In [1]:
import os, time
import requests

GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
HEADERS = {
    "Authorization": f"token {GITHUB_TOKEN}",
    "Accept": "application/vnd.github.v3.raw"
}

SEARCH_URL = "https://api.github.com/search/code"
DOWNLOAD_DIR = "outputs/github-ts-output"
os.makedirs(DOWNLOAD_DIR, exist_ok=True)

def search_ts_files(page=1):
    params = {
        "q": "extension:ts language:TypeScript",
        "per_page": 100,
        "page": page
    }
    resp = requests.get(SEARCH_URL, headers={"Authorization": HEADERS["Authorization"]}, params=params)
    resp.raise_for_status()
    return resp.json()

def download_file(item):
    raw_url = item["html_url"].replace("https://github.com/", "https://raw.githubusercontent.com/").replace("/blob/", "/")
    resp = requests.get(raw_url, headers=HEADERS, timeout=10)
    resp.raise_for_status()
    # Derive a safe filename
    owner = item["repository"]["owner"]["login"]
    repo  = item["repository"]["name"]
    path  = item["path"].replace("/", "_")
    filename = f"{owner}__{repo}__{path}"
    with open(os.path.join(DOWNLOAD_DIR, filename), "w", encoding="utf-8") as f:
        f.write(resp.text)

def main(max_pages=10):
    for page in range(1, max_pages+1):
        results = search_ts_files(page)
        items = results.get("items", [])
        if not items:
            break
        for item in items:
            try:
                download_file(item)
            except Exception as e:
                print(f"Failed {item['html_url']}: {e}")
        # Respect rate limits
        time.sleep(2)

if __name__ == "__main__":
    main()