In [None]:
import requests
import time
from datetime import datetime, timedelta

def search_github_repos(search_term, api_key, max_repos=1000):
    # GitHub API endpoint for searching repositories
    url = "https://api.github.com/search/repositories"
    
    # Headers for authentication
    headers = {
        'Authorization': f'token {api_key}'
    }

    # Function to get a date range for the query
    def daterange(start_date, end_date):
        for n in range(int((end_date - start_date).days)):
            yield start_date + timedelta(n)

    # Set initial date range
    start_date = datetime(2008, 1, 1)  # GitHub launch date
    end_date = datetime.now()
    
    repo_urls = []
    retries = 3  # Number of retries for each request

    for single_date in daterange(start_date, end_date):
        params = {
            'q': f'{search_term} created:{single_date.strftime("%Y-%m-%d")}..{(single_date + timedelta(days=1)).strftime("%Y-%m-%d")}',
            'sort': 'stars',
            'order': 'desc',
            'per_page': 100
        }
        
        page = 1
        while True:
            params['page'] = page
            attempt = 0
            while attempt < retries:
                try:
                    response = requests.get(url, headers=headers, params=params, timeout=30)
                    response.raise_for_status()
                    break  # Exit the retry loop if the request is successful
                except requests.exceptions.RequestException as e:
                    attempt += 1
                    print(f"Request error on {single_date.strftime('%Y-%m-%d')}, page {page}, attempt {attempt}: {e}")
                    time.sleep(10)  # Wait before retrying

            else:
                # Skip to the next date if all retries fail
                print(f"Failed to retrieve data for {single_date.strftime('%Y-%m-%d')}, page {page} after {retries} attempts.")
                break

            data = response.json()
            repos = data.get('items', [])
            if not repos:
                break
            
            repo_urls.extend(repo['html_url'] for repo in repos)
            print(f"Date {single_date.strftime('%Y-%m-%d')}, Page {page}: Retrieved {len(repos)} repositories.")
            
            page += 1
            if len(repo_urls) >= max_repos:
                break

        if len(repo_urls) >= max_repos:
            break
        
        time.sleep(2)

    with open(f"{search_term}.txt", 'w') as file:
        for url in repo_urls[:max_repos]:
            file.write(url + '\n')
    
    print(f"Successfully wrote {len(repo_urls[:max_repos])} URLs to {search_term}.txt")

search_term = "Systems Programming"
api_key = ""  # Replace with your actual GitHub API token
search_github_repos(search_term, api_key, max_repos=100000)


Date 2008-04-11, Page 1: Retrieved 1 repositories.
Date 2008-04-12, Page 1: Retrieved 1 repositories.
Date 2008-04-29, Page 1: Retrieved 1 repositories.
Date 2008-04-30, Page 1: Retrieved 1 repositories.
Date 2008-11-03, Page 1: Retrieved 1 repositories.
Date 2008-11-04, Page 1: Retrieved 1 repositories.
Date 2008-12-25, Page 1: Retrieved 1 repositories.
Date 2008-12-26, Page 1: Retrieved 2 repositories.
Date 2008-12-27, Page 1: Retrieved 1 repositories.
Date 2009-01-02, Page 1: Retrieved 1 repositories.
Date 2009-01-03, Page 1: Retrieved 1 repositories.
Request error on 2009-01-17, page 1, attempt 1: 403 Client Error: Forbidden for url: https://api.github.com/search/repositories?q=Systems+Programming+created%3A2009-01-17..2009-01-18&sort=stars&order=desc&per_page=100&page=1
Date 2009-02-15, Page 1: Retrieved 1 repositories.
Date 2009-02-16, Page 1: Retrieved 1 repositories.
Date 2009-03-08, Page 1: Retrieved 1 repositories.
Date 2009-03-09, Page 1: Retrieved 1 repositories.
Request er