## Imports and data loading


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np



## Load dataset


In [None]:
# Load the data
df = pd.read_csv('ncaa_data_all.csv')



## 1. Win percentages by quadrant and location


In [None]:
# 1. Win Percentages by Quadrant and Location
plt.figure(figsize=(12, 8))

# Calculate win percentages for each location type and quadrant
for q in range(1, 5):
    quad_data = df[df['Quadrant'] == q]
    
    # Calculate win percentages
    home_win_pct = quad_data['HomeWins'].sum() / (quad_data['HomeWins'].sum() + quad_data['HomeLosses'].sum())
    away_win_pct = quad_data['AwayWins'].sum() / (quad_data['AwayWins'].sum() + quad_data['AwayLosses'].sum())
    neutral_win_pct = quad_data['NeutralWins'].sum() / (quad_data['NeutralWins'].sum() + quad_data['NeutralLosses'].sum())
    total_win_pct = quad_data['TotalWins'].sum() / (quad_data['TotalWins'].sum() + quad_data['TotalLosses'].sum())
    
    # Create a bar for this quadrant
    x = np.array([q-0.3, q-0.1, q+0.1, q+0.3])
    plt.bar(x, [home_win_pct, away_win_pct, neutral_win_pct, total_win_pct], width=0.15)

plt.xticks([1, 2, 3, 4], ['Quadrant 1', 'Quadrant 2', 'Quadrant 3', 'Quadrant 4'])
plt.xlabel('Quadrant')
plt.ylabel('Win Percentage')
plt.title('Win Percentages by Quadrant and Location')
plt.legend(['Home', 'Away', 'Neutral', 'Total'])
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.savefig('win_percentages_by_quadrant.png', dpi=300, bbox_inches='tight')
plt.close()



## 2. Average wins and losses by quadrant


In [None]:
# 2. Average Wins and Losses by Quadrant
plt.figure(figsize=(12, 8))

quad_stats = {}
for q in range(1, 5):
    quad_stats[q] = {
        'Avg Home Wins': df[df['Quadrant'] == q]['HomeWins'].mean(),
        'Avg Home Losses': df[df['Quadrant'] == q]['HomeLosses'].mean(),
        'Avg Away Wins': df[df['Quadrant'] == q]['AwayWins'].mean(),
        'Avg Away Losses': df[df['Quadrant'] == q]['AwayLosses'].mean(),
        'Avg Neutral Wins': df[df['Quadrant'] == q]['NeutralWins'].mean(),
        'Avg Neutral Losses': df[df['Quadrant'] == q]['NeutralLosses'].mean(),
        'Avg Total Wins': df[df['Quadrant'] == q]['TotalWins'].mean(),
        'Avg Total Losses': df[df['Quadrant'] == q]['TotalLosses'].mean()
    }

# Create a dataframe for easier plotting
avg_stats_df = pd.DataFrame(quad_stats).T
avg_stats_df = avg_stats_df.reset_index().rename(columns={'index': 'Quadrant'})

# Plot total wins and losses
barWidth = 0.35
r1 = np.arange(4)
r2 = [x + barWidth for x in r1]

plt.bar(r1, avg_stats_df['Avg Total Wins'], width=barWidth, label='Avg Total Wins')
plt.bar(r2, avg_stats_df['Avg Total Losses'], width=barWidth, label='Avg Total Losses')

plt.xlabel('Quadrant')
plt.ylabel('Average Count')
plt.title('Average Total Wins and Losses by Quadrant')
plt.xticks([r + barWidth/2 for r in range(4)], ['Quadrant 1', 'Quadrant 2', 'Quadrant 3', 'Quadrant 4'])
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.savefig('avg_wins_losses_by_quadrant.png', dpi=300, bbox_inches='tight')
plt.close()



## 3. Home court advantage comparison


In [None]:
# 3. Home Court Advantage Comparison
plt.figure(figsize=(10, 6))

# Calculate home court advantage ratio (home win % / away win %)
home_advantage = []
for q in range(1, 5):
    quad_data = df[df['Quadrant'] == q]
    home_win_pct = quad_data['HomeWins'].sum() / (quad_data['HomeWins'].sum() + quad_data['HomeLosses'].sum())
    away_win_pct = quad_data['AwayWins'].sum() / (quad_data['AwayWins'].sum() + quad_data['AwayLosses'].sum())
    advantage_ratio = home_win_pct / away_win_pct if away_win_pct > 0 else 0
    home_advantage.append(advantage_ratio)

plt.bar(range(1, 5), home_advantage, color='green')
plt.axhline(y=1, color='r', linestyle='-', alpha=0.3, label='No Advantage')
plt.xlabel('Quadrant')
plt.ylabel('Home/Away Win Ratio')
plt.title('Home Court Advantage by Quadrant')
plt.xticks(range(1, 5), ['Quadrant 1', 'Quadrant 2', 'Quadrant 3', 'Quadrant 4'])
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend()
plt.savefig('home_advantage_by_quadrant.png', dpi=300, bbox_inches='tight')
plt.close()



## 4. Win distribution boxplots


In [None]:
# 4. Win Distribution Boxplots
plt.figure(figsize=(14, 10))

# Create a melted dataframe for wins by location and quadrant
win_columns = ['HomeWins', 'AwayWins', 'NeutralWins']
df_wins = df[['Quadrant'] + win_columns].copy()
df_wins_melted = pd.melt(df_wins, id_vars=['Quadrant'], value_vars=win_columns, 
                         var_name='Location', value_name='Wins')

# Create faceted boxplots
sns.boxplot(x='Quadrant', y='Wins', hue='Location', data=df_wins_melted)
plt.title('Distribution of Wins by Location and Quadrant')
plt.xlabel('Quadrant')
plt.ylabel('Number of Wins')
plt.savefig('win_distribution_boxplot.png', dpi=300, bbox_inches='tight')
plt.close()



## 5. Opponent strength comparison


In [None]:
# 5. Opponent Strength Comparison
plt.figure(figsize=(12, 8))

for q in range(1, 5):
    quad_data = df[df['Quadrant'] == q]
    opponent_win_pct = quad_data['OpponentTotalWins'].sum() / (quad_data['OpponentTotalWins'].sum() + quad_data['OpponentTotalLosses'].sum())
    team_win_pct = quad_data['TotalWins'].sum() / (quad_data['TotalWins'].sum() + quad_data['TotalLosses'].sum())
    
    x = np.array([q-0.15, q+0.15])
    plt.bar(x, [team_win_pct, opponent_win_pct], width=0.25)

plt.xticks([1, 2, 3, 4], ['Quadrant 1', 'Quadrant 2', 'Quadrant 3', 'Quadrant 4'])
plt.xlabel('Quadrant')
plt.ylabel('Win Percentage')
plt.title('Team vs Opponent Win Percentages by Quadrant')
plt.legend(['Team Win %', 'Opponent Win %'])
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.savefig('team_vs_opponent_win_pct.png', dpi=300, bbox_inches='tight')
plt.close()



## 6. Venue performance heatmap


In [None]:
# 6. Venue Performance Heatmap
plt.figure(figsize=(12, 10))

# Calculate win percentages for each venue and quadrant
win_pct_data = []
for q in range(1, 5):
    quad_data = df[df['Quadrant'] == q]
    
    home_win_pct = quad_data['HomeWins'].sum() / (quad_data['HomeWins'].sum() + quad_data['HomeLosses'].sum())
    away_win_pct = quad_data['AwayWins'].sum() / (quad_data['AwayWins'].sum() + quad_data['AwayLosses'].sum())
    neutral_win_pct = quad_data['NeutralWins'].sum() / (quad_data['NeutralWins'].sum() + quad_data['NeutralLosses'].sum())
    
    win_pct_data.append([home_win_pct, away_win_pct, neutral_win_pct])

win_pct_df = pd.DataFrame(win_pct_data, 
                        index=['Quadrant 1', 'Quadrant 2', 'Quadrant 3', 'Quadrant 4'],
                        columns=['Home', 'Away', 'Neutral'])

sns.heatmap(win_pct_df, annot=True, cmap='YlGnBu', fmt='.3f', vmin=0, vmax=1)
plt.title('Win Percentage by Venue and Quadrant')
plt.savefig('venue_win_pct_heatmap.png', dpi=300, bbox_inches='tight')
plt.close()



## 7. Win-loss ratio analysis


In [None]:
# 7. Win-Loss Ratio Analysis
plt.figure(figsize=(10, 6))

win_loss_ratio = []
for q in range(1, 5):
    quad_data = df[df['Quadrant'] == q]
    total_wins = quad_data['TotalWins'].sum()
    total_losses = quad_data['TotalLosses'].sum()
    ratio = total_wins / total_losses if total_losses > 0 else float('inf')
    win_loss_ratio.append(ratio)

plt.bar(range(1, 5), win_loss_ratio, color='purple')
plt.axhline(y=1, color='r', linestyle='-', alpha=0.3, label='Win-Loss Equilibrium')
plt.xlabel('Quadrant')
plt.ylabel('Win-Loss Ratio')
plt.title('Win-Loss Ratio by Quadrant')
plt.xticks(range(1, 5), ['Quadrant 1', 'Quadrant 2', 'Quadrant 3', 'Quadrant 4'])
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend()
plt.savefig('win_loss_ratio_by_quadrant.png', dpi=300, bbox_inches='tight')
plt.close()



## 8. Performance radar chart


In [None]:
# 8. Performance Radar Chart
plt.figure(figsize=(10, 10))
categories = ['Home Win %', 'Away Win %', 'Neutral Win %', 'Total Win %', 'Home Advantage']

quadrant_stats = []
for q in range(1, 5):
    quad_data = df[df['Quadrant'] == q]
    
    home_win_pct = quad_data['HomeWins'].sum() / (quad_data['HomeWins'].sum() + quad_data['HomeLosses'].sum())
    away_win_pct = quad_data['AwayWins'].sum() / (quad_data['AwayWins'].sum() + quad_data['AwayLosses'].sum()) 
    neutral_win_pct = quad_data['NeutralWins'].sum() / (quad_data['NeutralWins'].sum() + quad_data['NeutralLosses'].sum())
    total_win_pct = quad_data['TotalWins'].sum() / (quad_data['TotalWins'].sum() + quad_data['TotalLosses'].sum())
    home_advantage = home_win_pct / away_win_pct if away_win_pct > 0 else 2  # Cap at 2 for visualization
    
    # Normalize home advantage for better visualization (between 0 and 1)
    norm_home_advantage = min((home_advantage - 1) / 1, 1) if home_advantage > 1 else 0
    
    quadrant_stats.append([home_win_pct, away_win_pct, neutral_win_pct, total_win_pct, norm_home_advantage])

# Set up the radar chart
angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist()
angles += angles[:1]  # Close the polygon

fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))

for i, stats in enumerate(quadrant_stats):
    stats += stats[:1]  # Close the polygon
    ax.plot(angles, stats, linewidth=2, label=f'Quadrant {i+1}')
    ax.fill(angles, stats, alpha=0.1)

# Set category labels
ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories)
ax.set_yticklabels([])
ax.set_ylim(0, 1)

plt.legend(loc='upper right')
plt.title('Performance Radar Chart by Quadrant')
plt.savefig('performance_radar_chart.png', dpi=300, bbox_inches='tight')
plt.close()



## 9. Summary statistics visualization


In [None]:
# 9. Statistical Aggregation and Visualization
# Create a comprehensive metrics dataframe
metrics_df = pd.DataFrame({
    'Quadrant': range(1, 5),
    'Teams': [len(df[df['Quadrant'] == q]) for q in range(1, 5)],
    'Avg Total Wins': [df[df['Quadrant'] == q]['TotalWins'].mean() for q in range(1, 5)],
    'Avg Total Losses': [df[df['Quadrant'] == q]['TotalLosses'].mean() for q in range(1, 5)],
    'Avg Home Wins': [df[df['Quadrant'] == q]['HomeWins'].mean() for q in range(1, 5)],
    'Avg Away Wins': [df[df['Quadrant'] == q]['AwayWins'].mean() for q in range(1, 5)],
    'Avg Neutral Wins': [df[df['Quadrant'] == q]['NeutralWins'].mean() for q in range(1, 5)]
})

# Create a summary statistics visualization
plt.figure(figsize=(14, 8))

x = np.arange(4)
width = 0.15

# Plot various metrics side by side
plt.bar(x - 2*width, metrics_df['Avg Total Wins'], width=width, label='Avg Total Wins')
plt.bar(x - width, metrics_df['Avg Home Wins'], width=width, label='Avg Home Wins')
plt.bar(x, metrics_df['Avg Away Wins'], width=width, label='Avg Away Wins')
plt.bar(x + width, metrics_df['Avg Neutral Wins'], width=width, label='Avg Neutral Wins')
plt.bar(x + 2*width, metrics_df['Avg Total Losses'], width=width, label='Avg Total Losses')

plt.xlabel('Quadrant')
plt.ylabel('Average Count')
plt.title('Summary Statistics by Quadrant')
plt.xticks(x, ['Quadrant 1', 'Quadrant 2', 'Quadrant 3', 'Quadrant 4'])
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.savefig('summary_statistics.png', dpi=300, bbox_inches='tight')
plt.close()



## 10. Win percentage by location violin plot


In [None]:
# 10. Advanced Analysis - Win Percentage by Location Violinplot
plt.figure(figsize=(16, 8))

# Calculate win percentages at individual team level
win_pct_data = []
for _, row in df.iterrows():
    quadrant = row['Quadrant']
    
    # Calculate win percentages - handle division by zero
    home_win_pct = row['HomeWins'] / (row['HomeWins'] + row['HomeLosses']) if (row['HomeWins'] + row['HomeLosses']) > 0 else np.nan
    away_win_pct = row['AwayWins'] / (row['AwayWins'] + row['AwayLosses']) if (row['AwayWins'] + row['AwayLosses']) > 0 else np.nan
    neutral_win_pct = row['NeutralWins'] / (row['NeutralWins'] + row['NeutralLosses']) if (row['NeutralWins'] + row['NeutralLosses']) > 0 else np.nan
    
    # Add data points
    if not np.isnan(home_win_pct):
        win_pct_data.append({'Quadrant': quadrant, 'Location': 'Home', 'Win_Pct': home_win_pct})
    if not np.isnan(away_win_pct):
        win_pct_data.append({'Quadrant': quadrant, 'Location': 'Away', 'Win_Pct': away_win_pct})
    if not np.isnan(neutral_win_pct):
        win_pct_data.append({'Quadrant': quadrant, 'Location': 'Neutral', 'Win_Pct': neutral_win_pct})

win_pct_df = pd.DataFrame(win_pct_data)

# Create violin plots
sns.violinplot(x='Quadrant', y='Win_Pct', hue='Location', data=win_pct_df, split=False, 
               inner='quartile', palette="Set2")
plt.title('Win Percentage Distribution by Location and Quadrant')
plt.xlabel('Quadrant')
plt.ylabel('Win Percentage')
plt.savefig('win_pct_distribution_violinplot.png', dpi=300, bbox_inches='tight')
plt.close()



## 11. Opponent strength analysis


In [None]:
# 11. Opponent Strength Analysis
plt.figure(figsize=(12, 6))

quad_opponent_strength = []
for q in range(1, 5):
    quad_data = df[df['Quadrant'] == q]
    # Calculate average opponent win percentage
    opp_wins = quad_data['OpponentTotalWins'].sum()
    opp_losses = quad_data['OpponentTotalLosses'].sum()
    opp_win_pct = opp_wins / (opp_wins + opp_losses) if (opp_wins + opp_losses) > 0 else 0
    quad_opponent_strength.append(opp_win_pct)

plt.bar(range(1, 5), quad_opponent_strength, color='darkblue')
plt.xlabel('Quadrant')
plt.ylabel('Average Opponent Win Percentage')
plt.title('Opponent Strength by Quadrant')
plt.xticks(range(1, 5), ['Quadrant 1', 'Quadrant 2', 'Quadrant 3', 'Quadrant 4'])
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.savefig('opponent_strength_by_quadrant.png', dpi=300, bbox_inches='tight')
plt.close()



## 12. Comprehensive visualization dashboard


In [None]:
# 12. Comprehensive Data Visualization Dashboard
plt.figure(figsize=(15, 12))

# Create a 3x2 grid for multiple plots
plt.subplot(3, 2, 1)
for q in range(1, 5):
    quad_data = df[df['Quadrant'] == q]
    
    # Calculate win percentages
    home_win_pct = quad_data['HomeWins'].sum() / (quad_data['HomeWins'].sum() + quad_data['HomeLosses'].sum())
    away_win_pct = quad_data['AwayWins'].sum() / (quad_data['AwayWins'].sum() + quad_data['AwayLosses'].sum())
    neutral_win_pct = quad_data['NeutralWins'].sum() / (quad_data['NeutralWins'].sum() + quad_data['NeutralLosses'].sum())
    
    # Create a bar for this quadrant
    x = np.array([q-0.25, q, q+0.25])
    plt.bar(x, [home_win_pct, away_win_pct, neutral_win_pct], width=0.2)

plt.title('Win % by Location')
plt.xticks([1, 2, 3, 4], ['Q1', 'Q2', 'Q3', 'Q4'])
plt.legend(['Home', 'Away', 'Neutral'], loc='lower left')
plt.grid(axis='y', linestyle='--', alpha=0.5)

# Average wins plot
plt.subplot(3, 2, 2)
x = range(4)
width = 0.2
plt.bar([i-width for i in x], [df[df['Quadrant'] == q+1]['HomeWins'].mean() for q in x], width=width, label='Home')
plt.bar(x, [df[df['Quadrant'] == q+1]['AwayWins'].mean() for q in x], width=width, label='Away')
plt.bar([i+width for i in x], [df[df['Quadrant'] == q+1]['NeutralWins'].mean() for q in x], width=width, label='Neutral')
plt.title('Average Wins by Location')
plt.xticks(x, ['Q1', 'Q2', 'Q3', 'Q4'])
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.5)

# Home advantage ratio
plt.subplot(3, 2, 3)
home_advantage = []
for q in range(1, 5):
    quad_data = df[df['Quadrant'] == q]
    home_win_pct = quad_data['HomeWins'].sum() / (quad_data['HomeWins'].sum() + quad_data['HomeLosses'].sum())
    away_win_pct = quad_data['AwayWins'].sum() / (quad_data['AwayWins'].sum() + quad_data['AwayLosses'].sum())
    advantage_ratio = home_win_pct / away_win_pct if away_win_pct > 0 else 0
    home_advantage.append(advantage_ratio)

plt.bar(range(1, 5), home_advantage, color='green')
plt.axhline(y=1, color='r', linestyle='-', alpha=0.3)
plt.title('Home Advantage Ratio')
plt.xticks(range(1, 5), ['Q1', 'Q2', 'Q3', 'Q4'])
plt.grid(axis='y', linestyle='--', alpha=0.5)

# Win-Loss Ratio
plt.subplot(3, 2, 4)
win_loss_ratio = []
for q in range(1, 5):
    quad_data = df[df['Quadrant'] == q]
    wins = quad_data['TotalWins'].sum()
    losses = quad_data['TotalLosses'].sum()
    ratio = wins / losses if losses > 0 else float('inf')
    win_loss_ratio.append(ratio)

plt.bar(range(1, 5), win_loss_ratio, color='purple')
plt.axhline(y=1, color='r', linestyle='-', alpha=0.3)
plt.title('Win-Loss Ratio')
plt.xticks(range(1, 5), ['Q1', 'Q2', 'Q3', 'Q4'])
plt.grid(axis='y', linestyle='--', alpha=0.5)

# Team vs Opponent Win %
plt.subplot(3, 2, 5)
team_win_pct = []
opp_win_pct = []
for q in range(1, 5):
    quad_data = df[df['Quadrant'] == q]
    
    team_wins = quad_data['TotalWins'].sum()
    team_losses = quad_data['TotalLosses'].sum()
    team_win_percentage = team_wins / (team_wins + team_losses) if (team_wins + team_losses) > 0 else 0
    team_win_pct.append(team_win_percentage)
    
    opp_wins = quad_data['OpponentTotalWins'].sum()
    opp_losses = quad_data['OpponentTotalLosses'].sum()
    opp_win_percentage = opp_wins / (opp_wins + opp_losses) if (opp_wins + opp_losses) > 0 else 0
    opp_win_pct.append(opp_win_percentage)

x = range(1, 5)
width = 0.35
plt.bar([i-width/2 for i in x], team_win_pct, width=width, label='Team')
plt.bar([i+width/2 for i in x], opp_win_pct, width=width, label='Opponent')
plt.legend()
plt.title('Team vs Opponent Win %')
plt.xticks(x, ['Q1', 'Q2', 'Q3', 'Q4'])
plt.grid(axis='y', linestyle='--', alpha=0.5)

# Team Count
plt.subplot(3, 2, 6)
team_counts = [len(df[df['Quadrant'] == q]) for q in range(1, 5)]
plt.bar(range(1, 5), team_counts, color='orange')
plt.title('Number of Teams per Quadrant')
plt.xticks(range(1, 5), ['Q1', 'Q2', 'Q3', 'Q4'])
plt.grid(axis='y', linestyle='--', alpha=0.5)

plt.tight_layout()
plt.savefig('quadrant_analysis_dashboard.png', dpi=300, bbox_inches='tight')
plt.close()

print("Data visualization complete. All plots have been saved as PNG files.")