In [2]:
! pip install matplotlib seaborn pandas

Collecting matplotlib
  Downloading matplotlib-3.10.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting pandas
  Using cached pandas-2.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.59.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (108 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (6.3 kB)
Collecting numpy>=1.23 (from matplotlib)
  Downloading 

In [2]:
import sys
import json
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

def load_and_prepare_data(json_file):
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)

    records = []
    for org in data:
        for repo in org.get("top_10_popular_repos", []):
            record = {
                "org_name": org.get("organization_name"),
                "org_total_repos": org.get("total_public_repos"),
                "repo_name": repo.get("name"),
                "stars": repo.get("stars", 0),
                "commits": repo.get("commit_count", 0),
                "forks": repo.get("forks", 0),
                "primary_language": repo.get("primary_language"),
                "last_activity_at": repo.get("last_activity_at")
            }
            records.append(record)
    
    df = pd.DataFrame(records)
    
    df['last_activity_at'] = pd.to_datetime(df['last_activity_at'], errors='coerce')
    
    df['days_since_activity'] = (pd.Timestamp.now(tz='UTC') - df['last_activity_at']).dt.days
    
    return df

def plot_org_overview(df, output_dir):
    print("[*] Generating Organization Overview plot...")
    org_summary = df.groupby('org_name').agg(
        total_stars=('stars', 'sum'),
        total_commits=('commits', 'sum'),
        total_repos=('org_total_repos', 'first') 
    ).sort_values('total_stars', ascending=False).head(20)

    fig, axes = plt.subplots(3, 1, figsize=(12, 18))
    fig.suptitle('Top 20 Organizations Overview', fontsize=16, y=0.95)

    sns.barplot(ax=axes[0], x=org_summary.index, y=org_summary['total_stars'], palette='viridis')
    axes[0].set_title('Total Stars (Top 10 Repos)')
    axes[0].set_ylabel('Stars')
    axes[0].tick_params(axis='x', rotation=45)

    sns.barplot(ax=axes[1], x=org_summary.index, y=org_summary['total_commits'], palette='plasma')
    axes[1].set_title('Total Commits (Top 10 Repos)')
    axes[1].set_ylabel('Commits')
    axes[1].tick_params(axis='x', rotation=45)
    
    sns.barplot(ax=axes[2], x=org_summary.index, y=org_summary['total_repos'], palette='magma')
    axes[2].set_title('Total Public Repositories (Entire Org)')
    axes[2].set_ylabel('Repo Count')
    axes[2].tick_params(axis='x', rotation=45)

    plt.tight_layout(rect=[0, 0, 1, 0.94])
    plt.savefig(os.path.join(output_dir, '1_organization_overview.png'))
    plt.close()

def plot_activity_recency(df, output_dir):
    print("[*] Generating Activity Recency plot...")
    most_recent = df.groupby('org_name')['days_since_activity'].min().sort_values().head(20)
    
    plt.figure(figsize=(12, 8))
    sns.barplot(x=most_recent.index, y=most_recent.values, palette='coolwarm')
    plt.title('Top 20 Most Recently Active Organizations')
    plt.ylabel('Days Since Last Activity (Lower is Better)')
    plt.xlabel('Organization')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, '2_activity_recency.png'))
    plt.close()

def plot_language_distribution(df, output_dir):
    print("[*] Generating Language Distribution plot...")
    lang_counts = df['primary_language'].dropna().value_counts().head(15)
    
    plt.figure(figsize=(12, 8))
    sns.barplot(x=lang_counts.index, y=lang_counts.values, palette='ocean')
    plt.title('Top 15 Primary Languages Used Across All Repos')
    plt.ylabel('Number of Repositories')
    plt.xlabel('Programming Language')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, '3_language_distribution.png'))
    plt.close()



        
json_file = "orgs_data.json"
output_dir = "."

if not os.path.exists(json_file):
    print(f"Error: Input file '{json_file}' not found.")
    sys.exit(1)
    
if not os.path.exists(output_dir):
    print(f"[*] Creating output directory: {output_dir}")
    os.makedirs(output_dir)

df = load_and_prepare_data(json_file)

if df.empty:
    print("[!] No data to visualize. Exiting.")
    
plot_org_overview(df, output_dir)
plot_activity_recency(df, output_dir)
plot_language_distribution(df, output_dir)

print(f"\n[+] Visualization complete. Charts saved in '{output_dir}' directory.")

[*] Generating Organization Overview plot...



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(ax=axes[0], x=org_summary.index, y=org_summary['total_stars'], palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(ax=axes[1], x=org_summary.index, y=org_summary['total_commits'], palette='plasma')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(ax=axes[2], x=org_summary.index, y=org_summary['total_repos'], palette='magma')


[*] Generating Activity Recency plot...
[*] Generating Language Distribution plot...



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=most_recent.index, y=most_recent.values, palette='coolwarm')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=lang_counts.index, y=lang_counts.values, palette='ocean')



[+] Visualization complete. Charts saved in '.' directory.
