In [11]:
# pip install requests

In [12]:
import requests
import json
import os
from requests.utils import quote


In [13]:
output_dir = "output"
os.makedirs(output_dir, exist_ok=True)

In [14]:
GITHUB_REPO = "tulsyanshubham/TipyDo-Backend"
GITHUB_TOKEN = os.getenv("GITHUB_PAT")
HEADERS = {"Authorization": f"token {GITHUB_TOKEN}"}

In [15]:
def get_default_branch():
    url = f"https://api.github.com/repos/{GITHUB_REPO}"
    response = requests.get(url, headers=HEADERS)
    if response.status_code == 200:
        return response.json().get("default_branch", "main")
    return "main"

DEFAULT_BRANCH = get_default_branch()
print(f"Default branch: {DEFAULT_BRANCH}")


Default branch: main


In [16]:
def get_repo_contents(path=""):
    url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{quote(path)}"
    response = requests.get(url, headers=HEADERS)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"[ERROR] Failed to fetch {url}: {response.status_code}")
        return []


In [17]:
def get_file_content(file_path):
    url = f"https://raw.githubusercontent.com/{GITHUB_REPO}/{DEFAULT_BRANCH}/{file_path}"
    response = requests.get(url)

    if response.status_code == 200:
        return response.text
    return None


In [18]:
def analyze_repo(path=""):
    repo_structure = get_repo_contents(path)
    file_data = []
    dependencies = []

    skip_files = {
        ".gitignore", "README.md", "LICENSE", ".dockerignore",
        ".env", ".editorconfig", "package-lock.json", "node_modules"
    }
    skip_extensions = {
        ".png", ".jpg", ".jpeg", ".gif", ".ico", ".pdf", ".svg",
        ".zip", ".tar", ".gz", ".exe", ".dll", ".so", ".jar",
        ".bin", ".lock"
    }

    for item in repo_structure:
        if item["type"] == "dir":
            sub_file_data, sub_dependencies = analyze_repo(item["path"])
            file_data.extend(sub_file_data)
            dependencies.extend(sub_dependencies)

        elif item["type"] == "file":
            print(f"[INFO] Processing file: {item['path']}")
            name = item["name"]
            ext = "." + name.split(".")[-1] if "." in name else ""

            # Skip unneeded files
            if name in skip_files or ext.lower() in skip_extensions:
                continue

            file_content = get_file_content(item["path"])

            file_data.append({
                "name": name,
                "size": item["size"],
                "html_url": item["html_url"],
                "path": item["path"],
                "content": file_content
            })

            if name in ["package.json", "pom.xml", "requirements.txt"]:
                dependencies.append(item["path"])

    return file_data, dependencies


In [19]:
def generate_report():
    files, dep_files = analyze_repo()
    dependencies = {file: get_file_content(file) for file in dep_files}

    report = {
        "repo": GITHUB_REPO,
        "total_files": len(files),
        # "largest_files": sorted(files, key=lambda x: x["size"], reverse=True)[:5],
        "files": files,
        "dependencies": dependencies
    }

    repo_scan_report_path = os.path.join(output_dir, "repo_scan_report.json")
    with open(repo_scan_report_path, "w") as f:
        json.dump(report, f, indent=4)

    print("[INFO] Scan Completed: Report saved as repo_scan_report.json")


In [20]:
generate_report()

[INFO] Processing file: .gitignore
[INFO] Processing file: db.js
[INFO] Processing file: index.js
[INFO] Processing file: middleware/fetchuser.js
[INFO] Processing file: modals/count.js
[INFO] Processing file: modals/employee.js
[INFO] Processing file: modals/manager.js
[INFO] Processing file: modals/session.js
[INFO] Processing file: package-lock.json
[INFO] Processing file: package.json
[INFO] Scan Completed: Report saved as repo_scan_report.json
