In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

# Add current directory to path to ensure imports work
sys.path.append(os.getcwd())

# Import custom modules
from data_loader import get_top_dependents
from network_builder import build_dependency_graph
from metrics import calculate_risk_scores
from simulation import simulate_attacks, calculate_single_node_impact
from utils import export_results
from visualize import (
    plot_network_structure, 
    plot_degree_distributions, 
    plot_correlations, 
    plot_top_risk_scores, 
    plot_simulation_results,
    plot_risk_vs_cascade
)

# --- Configuration ---
TOP_N = 1000            # Number of seed packages to fetch
MAX_DEPTH = 2           # Traversal depth for the crawler
BETWEENNESS_K = 100     # Sampling size for faster betweenness calculation
API_DELAY = 0.1         # Delay between API calls in seconds

# --- Visualization Setup ---
sns.set_theme(style="whitegrid", context="notebook", palette="deep")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['figure.dpi'] = 100

## 2. Data Acquisition
Fetching seed packages from `ecosyste.ms` using `data_loader.py`.

In [None]:
# Fetch seed packages
seed_packages = get_top_dependents(limit=TOP_N, api_delay=API_DELAY)

## 3. Network Construction
Building the dependency graph using `network_builder.py`.

In [None]:
# Build Graph
G = build_dependency_graph(seed_packages, max_depth=MAX_DEPTH, api_delay=API_DELAY)

## 4. Metrics & Risk Scoring
Calculating centrality metrics and BRS using `metrics.py`.

**Formula:**
$$BRS = 0.5 \times InDegree_{norm} + 0.2 \times OutDegree_{norm} + 0.3 \times Betweenness_{norm}$$

In [None]:
# Calculate Metrics
risk_df = calculate_risk_scores(G, betweenness_k=BETWEENNESS_K)
print("\nTop 10 Critical Packages:")
display(risk_df.head(10))

## 5. Robustness Simulation
Simulating attacks using `simulation.py`.

In [None]:
# Run Simulation (LCC Decay)
sim_results = simulate_attacks(G, risk_df, num_removals=50)

# Calculate Single Node Impact for Correlation Analysis
# We analyze the top 100 risky nodes to see if high risk correlates with high impact
impact_df = calculate_single_node_impact(G, risk_df, sample_size=100)

## 6. Visualization & Reporting
Generating plots and exporting results to match the paper's requirements.

In [None]:
# 1. Network Topology
plot_network_structure(G)

# 2. Degree Distributions
plot_degree_distributions(G)

# 3. Correlations (Metrics)
plot_correlations(risk_df)

# 4. Top Risk Scores
plot_top_risk_scores(risk_df)

# 5. Risk vs Cascade Impact Correlation
plot_risk_vs_cascade(impact_df)

# 6. Simulation Results (Robustness)
plot_simulation_results(sim_results)

# 7. Export Data
export_results(risk_df, G)
print("\nAnalysis Complete!")