Introduction

This is a recap of the work done during the first week of GSOC 2025. Log detective is a project for analyzing build logs. The task is to automate the collection of logs from various openSUSE projects on OBS and then feed each failed build log to the log detective for analysis.

Automating the Process

In order to automate the task, I created a python script file that first runs an osc command to display a list of failed builds in the project. Then each failed build package is parsed in order to construct its URL to download the log file from the public OBS endpoint.

After downloading all of the logs, each is given to the log detective model to analyze the log file. The analysis generates a text file that is also stored in a folder.

import os
import subprocess
import requests
import argparse

LOGS_DIR = "logs"
EXPLAIN_DIR = "explain"

def get_project_name():
    parser = argparse.ArgumentParser(description="Download and explain openSUSE build logs")
    parser.add_argument(
        "suffix",
        help="Suffix for the openSUSE project (e.g., Factory)",
    )
    args = parser.parse_args()
    return f"openSUSE:{args.suffix}"

def get_failed_builds(project_name):
    try:
        result = subprocess.run(
            ["osc", "results", "-f", project_name],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            check=True,
            text=True
        )
        return result.stdout.strip().split('\n')
    except subprocess.CalledProcessError as e:
        print(f"❌ Error running osc: {e.stderr}")
        return []

def parse_build_failure(line: str, project: str):
    parts = line.strip().split()
    if len(parts) < 4:
        raise ValueError("Expected format: '<package> <repository> <arch> failed'")

    package, repository, arch = parts[:3]
    url = f"https://build.opensuse.org/public/build/{project}/{repository}/{arch}/{package}/_log"
    filename = f"{package}_{repository}_{arch}.log"
    return url, filename

def download_log(url: str, filename: str):
    os.makedirs(LOGS_DIR, exist_ok=True)
    path = os.path.join(LOGS_DIR, filename)

    print(f"📥 Downloading: {url}")
    response = requests.get(url)
    if response.status_code == 200:
        with open(path, "w", encoding="utf-8") as f:
            f.write(response.text)
        print(f"✅ Saved to {path}")
        return path
    else:
        print(f"❌ Failed: {response.status_code} - {url}")
        return None

def run_log_detective(log_path):
    os.makedirs(EXPLAIN_DIR, exist_ok=True)
    output_filename = os.path.basename(log_path).replace('.log', '.txt')
    output_path = os.path.join(EXPLAIN_DIR, output_filename)

    try:
        result = subprocess.run(
          ["logdetective", log_path],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            check=True
        )
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(result.stdout)
        print(f"🕵️ Analysis saved to {output_path}")
        return output_path
    except subprocess.CalledProcessError as e:
        print(f"❌ Error analyzing {log_path}: {e.stderr}")
        return None

# === Main Script ===
if __name__ == "__main__":
    project = get_project_name()
    print(f"🔍 Checking failed builds in {project}...\n")
    failures = get_failed_builds(project)

    if not failures:
        print("🎉 No failed builds found!")
    else:
        downloaded_files = []
        explained_files = []

        for line in failures:
            try:
                url, filename = parse_build_failure(line, project)
                log_path = download_log(url, filename)
                if log_path:
                    downloaded_files.append(log_path)
                    explained_path = run_log_detective(log_path)
                    if explained_path:
                        explained_files.append(explained_path)
            except ValueError as ve:
                print(f"⚠️ Skipping line: {line}\nReason: {ve}")

Outcome

This script greatly improves our efficiency in analyzing large amounts of log files instead of individually spending time to download each one.

Automating log collection from OBS

Introduction

Automating the Process

Outcome

Subscribe to my newsletter

Aazam Thakur

Aazam Thakur