In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from pathlib import Path
import warnings

warnings.filterwarnings('ignore')

# Load data
data_dir = Path('data/raw')
outputs_dir = Path('notebooks/outputs')
outputs_dir.mkdir(exist_ok=True)

# Combine all player rankings CSVs
player_csvs = sorted(data_dir.glob('player_rankings_*.csv'))
print(f"Found {len(player_csvs)} player rankings files")

dfs = []
for p in player_csvs:
    try:
        df = pd.read_csv(p)
        dfs.append(df)
        print(f"  {p.name}: {len(df)} rows")
    except Exception as e:
        print(f"  {p.name}: ERROR - {e}")

player_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
print(f"\nTotal player records: {len(player_df)}")
print(player_df.head())

In [None]:
# Load team cap data
team_csvs = sorted(data_dir.glob('spotrac_team_cap_*.csv'))
print(f"Found {len(team_csvs)} team cap files")

team_dfs = []
for p in team_csvs:
    try:
        df = pd.read_csv(p)
        # Extract year from filename
        year = int(p.stem.split('_')[3])
        df['Year'] = year
        team_dfs.append(df)
        print(f"  {p.name}: {len(df)} rows")
    except Exception as e:
        print(f"  {p.name}: ERROR - {e}")

team_df = pd.concat(team_dfs, ignore_index=True) if team_dfs else pd.DataFrame()
print(f"\nTotal team records: {len(team_df)}")
print(team_df.head())

## Top 25 Players by Cap Hit (All Time)

In [None]:
# Top players across all years
top_players = player_df.nlargest(25, 'CapValue')[['Player', 'Team', 'Position', 'CapValue', 'Year']].copy()
top_players['CapValue_M'] = top_players['CapValue'] / 1e6  # Convert to millions

fig = go.Figure(data=[
    go.Bar(
        y=top_players['Player'] + ' (' + top_players['Year'].astype(str) + ')',
        x=top_players['CapValue_M'],
        orientation='h',
        marker=dict(
            color=top_players['CapValue_M'],
            colorscale='Viridis',
            showscale=True
        ),
        text=[f"${v:.1f}M" for v in top_players['CapValue_M']],
        textposition='auto',
    )
])

fig.update_layout(
    title='Top 25 Players by Cap Hit (All Years)',
    xaxis_title='Cap Hit ($M)',
    yaxis_title='Player',
    height=600,
    hovermode='closest',
    template='plotly_white',
)

fig.write_html(outputs_dir / 'top_25_players.html')
fig.show()

print(f"\n‚úì Saved to: {outputs_dir / 'top_25_players.html'}")

## Team Dead Money Trends by Year

In [None]:
if 'DeadMoneyTotal' in team_df.columns:
    team_trend = team_df.groupby('Year')['DeadMoneyTotal'].sum().reset_index()
    team_trend['DeadMoneyTotal_B'] = team_trend['DeadMoneyTotal'] / 1e9  # Billions
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=team_trend['Year'],
        y=team_trend['DeadMoneyTotal_B'],
        mode='lines+markers',
        name='Total League Dead Money',
        line=dict(color='#1f77b4', width=3),
        marker=dict(size=8)
    ))
    
    fig.update_layout(
        title='League Total Dead Money by Year',
        xaxis_title='Year',
        yaxis_title='Dead Money ($B)',
        hovermode='x unified',
        template='plotly_white',
    )
    
    fig.write_html(outputs_dir / 'team_dead_money_trend.html')
    fig.show()
    print(f"\n‚úì Saved to: {outputs_dir / 'team_dead_money_trend.html'}")
else:
    print("Team dead money column not found; skipping trend chart.")

## Summary Statistics by Position

In [None]:
# Position breakdown
position_stats = player_df.groupby('Position').agg({
    'CapValue': ['count', 'mean', 'median', 'max'],
    'Player': 'count'
}).round(0).astype(int)

position_stats.columns = ['Count', 'Mean Cap ($)', 'Median Cap ($)', 'Max Cap ($)', 'Players']
position_stats = position_stats.sort_values('Mean Cap ($)', ascending=False)

print("\nüìä Cap Hit Statistics by Position:")
print(position_stats.to_string())

# Save as HTML table
html_table = position_stats.to_html()
with open(outputs_dir / 'position_stats.html', 'w') as f:
    f.write(f"<h2>Cap Hit Statistics by Position</h2>{html_table}")
print(f"\n‚úì Saved to: {outputs_dir / 'position_stats.html'}")

## Top 10 Teams by Total Player Cap Hit (Current Year)

In [None]:
current_year = player_df['Year'].max()
current_year_data = player_df[player_df['Year'] == current_year]

team_totals = current_year_data.groupby('Team')['CapValue'].agg(['sum', 'count']).reset_index()
team_totals.columns = ['Team', 'Total_Cap', 'Player_Count']
team_totals = team_totals.nlargest(10, 'Total_Cap')
team_totals['Total_Cap_M'] = team_totals['Total_Cap'] / 1e6

fig = go.Figure(data=[
    go.Bar(
        x=team_totals['Team'],
        y=team_totals['Total_Cap_M'],
        marker=dict(color=team_totals['Total_Cap_M'], colorscale='RdYlGn_r'),
        text=[f"${v:.0f}M\n({c} players)" for v, c in zip(team_totals['Total_Cap_M'], team_totals['Player_Count'])],
        textposition='auto',
    )
])

fig.update_layout(
    title=f'Top 10 Teams by Total Player Cap Hit ({current_year})',
    xaxis_title='Team',
    yaxis_title='Total Cap Hit ($M)',
    hovermode='x',
    template='plotly_white',
)

fig.write_html(outputs_dir / f'top_teams_{current_year}.html')
fig.show()
print(f"\n‚úì Saved to: {outputs_dir / 'top_teams_{current_year}.html'}")

## Data Summary

In [None]:
print("\n" + "="*70)
print("üìä PRODUCTION ANALYSIS SUMMARY")
print("="*70)

print(f"\nüìã Data Coverage:")
print(f"  Player Records: {len(player_df):,}")
print(f"  Years: {sorted(player_df['Year'].unique())}")
print(f"  Teams: {player_df['Team'].nunique()}")
print(f"  Positions: {player_df['Position'].nunique()}")

print(f"\nüí∞ Cap Hit Summary:")
print(f"  Total League Cap (All Years): ${player_df['CapValue'].sum()/1e9:.2f}B")
print(f"  Avg Player Cap Hit: ${player_df['CapValue'].mean()/1e6:.2f}M")
print(f"  Median: ${player_df['CapValue'].median()/1e6:.2f}M")
print(f"  Max: ${player_df['CapValue'].max()/1e6:.2f}M")

print(f"\nüèà Current Year ({current_year}):")
print(f"  Players: {len(current_year_data):,}")
print(f"  Total Cap: ${current_year_data['CapValue'].sum()/1e9:.2f}B")
print(f"  Avg Cap/Player: ${current_year_data['CapValue'].mean()/1e6:.2f}M")

print(f"\nüìÅ Outputs saved to: {outputs_dir}")
print("\n‚úÖ Analysis complete!")
print("="*70)