Before you run:
- sign in with the `gh` CLI

In [None]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import subprocess

In [None]:
res = subprocess.run(['gh', 'auth', 'token'], capture_output=True, text=True)
if res.returncode != 0:
    print("Please authenticate with GitHub CLI")
    exit(1)
token = res.stdout.strip()

In [None]:
def get_downloads():
    all_releases = []
    page = 1
    per_page = 30

    while True:
        url = f"https://api.github.com/repos/microsoft/dev-proxy/releases?per_page={per_page}&page={page}"
        headers = {
            'accept': 'application/vnd.github+json',
            'X-GitHub-Api-Version': '2022-11-28',
            'authorization': f"Bearer {token}"
        }

        response = requests.get(url, headers=headers)
        response.raise_for_status()
        releases = response.json()

        if len(releases) == 0:
            break

        all_releases.extend(releases)

        page += 1

    return all_releases

def get_os(asset_name: str) -> str:
    if "linux" in asset_name:
        return "Linux"
    elif "osx" in asset_name:
        return "macOS"
    elif "win" in asset_name:
        return "Windows"
    else:
        return "Windows"

In [None]:
downloads = get_downloads()

In [None]:
# releases
releases = []
for release in downloads:
    for asset in release['assets']:
        os = get_os(asset['name'])
        # Uncomment to exclude anomalous Linux downloads
        # if os == "Linux" and (release['tag_name'] == "v0.22.0" or release['tag_name'] == "v0.23.0"):
        #     continue
        
        releases.append({
            'tag_name': release['tag_name'],
            'asset_name': asset['name'],
            'download_count': asset['download_count'],
            'os': os,
            'date': pd.to_datetime(release['published_at'])
        })
df = pd.DataFrame(releases)

In [None]:
# group by tag_name and date and sum the download count, sort ascending by date
df_by_tag = df.groupby(['tag_name', 'date']).sum(numeric_only=True).sort_values(by='date').reset_index()
print("Downloads per release:\n")
print(df_by_tag.to_string(index=False))

In [None]:
# group by os and date and sum the download count, include key in each row
df_by_os_date = df.groupby(['os', 'date']).sum(numeric_only=True).reset_index()
print("\nDownloads per OS and date:\n")
print(df_by_os_date.to_string(index=False))

In [None]:
df_by_os = df.groupby('os').sum(numeric_only=True).reset_index()
print("\nDownloads per OS:\n")
print(df_by_os.to_string(index=False))

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(df_by_tag['tag_name'], df_by_tag['download_count'], zorder=3)
plt.xlabel('Tag Name')
plt.ylabel('Download Count')
plt.title('Download Count by Tag Name')
plt.xticks(rotation=90)
plt.xlim(-0.5, len(df_by_tag['tag_name']) - 0.5)  # Adjust x-axis limits
plt.grid(axis='y', alpha=0.7, zorder=0)  # Add grid lines behind bars
plt.tight_layout()
plt.show()


In [None]:
# Plot the data
plt.figure(figsize=(10, 6))
for os in df['os'].unique():
    os_data = df[df['os'] == os]
    plt.plot(os_data['date'], os_data['download_count'], label=os)


# Set x-axis limits to the range of the data
plt.xlim(df['date'].min(), df['date'].max())

plt.xlabel('Date')
plt.ylabel('Download Count')
plt.title('Download Count Over Time by OS')
plt.xticks(rotation=45)
plt.legend()
plt.grid(axis='y', alpha=0.7, zorder=0)  # Add grid lines behind bars
plt.show()