In [3]:
import pandas as pd
from functools import reduce
import matplotlib.pyplot as plt

In [4]:
file_path = '../data/nba_points_2024_2025.xlsx'
df = pd.read_excel(file_path, usecols="A:T")

# Games that have been played and tracked
df = df[df['Game Count'] == 1]
# Remove whitespace from Home
df['Home'] = df['Home'].str.strip()
# Remove whitespace from Away
df['Away'] = df['Away'].str.strip()

In [5]:
df['Home Win'] = (df['Home Points'] > df['Away Points']).astype(int)
df['Away Win'] = (df['Away Points'] > df['Home Points']).astype(int)

In [6]:
print('GAMES TRACKED: ', round(df['Game Count'].sum()))
print('AVG. OPEN TOTAL: ', round(df['Open Total'].mean(), 2))
print('AVG. ACTUAL TOTAL: ', round(df['Actual Total'].mean(), 2))
print('OVERS: ', round(df['Over'].sum()/df['Game Count'].sum() * 100, 2), '%')
print('UNDERS: ', round(df['Under'].sum()/df['Game Count'].sum() * 100, 2), '%')

GAMES TRACKED:  915
AVG. OPEN TOTAL:  226.56
AVG. ACTUAL TOTAL:  227.1
OVERS:  52.57 %
UNDERS:  47.32 %


In [7]:
game_counts = df.groupby(['Crew Chief'])['Game Count'].sum().reset_index(name='games')
over_percs = round(df.groupby(['Crew Chief'])['Over'].sum()/df.groupby('Crew Chief')['Game Count'].sum() * 100, 2).reset_index(name='over_percentage')
over_by_avg = round(df.groupby(['Crew Chief'])['Over Amount'].mean(), 2).reset_index(name='over_by_avg')
over_by_std = round(df.groupby(['Crew Chief'])['Over Amount'].std(), 2).reset_index(name='over_by_std')
under_by_avg = round(df.groupby(['Crew Chief'])['Under Amount'].mean(), 2).reset_index(name='under_by_avg')
under_by_std = round(df.groupby(['Crew Chief'])['Under Amount'].std(), 2).reset_index(name='under_by_std')
under_percs = round(df.groupby(['Crew Chief'])['Under'].sum()/df.groupby('Crew Chief')['Game Count'].sum() * 100, 2).reset_index(name='under_percentage')
avg_totals = round(df.groupby(['Crew Chief'])['Actual Total'].mean(), 2).reset_index(name='mean_actual_total')
std_totals = round(df.groupby(['Crew Chief'])['Actual Total'].std(), 2).reset_index(name='std_actual_total')
avg_opening_total = round(df.groupby(['Crew Chief'])['Open Total'].mean(), 2).reset_index(name='mean_open_total')

groupby_results = [game_counts, over_percs, over_by_avg, over_by_std, under_by_avg, under_by_std, under_percs, avg_totals, std_totals, avg_opening_total]
chief_stats = reduce(lambda left, right: pd.merge(left, right, on='Crew Chief'), groupby_results)

In [8]:
chief_stats[(chief_stats.games >= 15) &
            ((chief_stats.over_percentage > 60) | (chief_stats.under_percentage > 60))  ][['Crew Chief', 'games', 'over_percentage', 'under_percentage']].sort_values(by='over_percentage')

Unnamed: 0,Crew Chief,games,over_percentage,under_percentage
9,Jacyn Goble,15.0,26.67,73.33
7,Ed Malloy,44.0,38.64,61.36
13,Josh Tiven,49.0,63.27,36.73
24,Scott Foster,40.0,65.0,35.0
28,Tyler Ford,50.0,66.0,34.0


In [9]:
# Get all unique team names from both Home and Away columns
teams = pd.concat([df['Home'], df['Away']]).unique()

# Initialize a list to store the stats for each team
stats = []

for team in teams:
    # Overall: games where the team is either home or away
    team_games = df[(df['Home'] == team) | (df['Away'] == team)]
    overall_over_pct = team_games['Over'].mean() * 100  # Proportion of games with Over
    
    # Home games only
    home_games = df[df['Home'] == team]
    home_over_pct = home_games['Over'].mean() * 100 if not home_games.empty else None
    
    # Away games only
    away_games = df[df['Away'] == team]
    away_over_pct = away_games['Over'].mean() * 100 if not away_games.empty else None
    
    stats.append({
        'Team': team,
        'Overall Over %': overall_over_pct,
        'Home Over %': home_over_pct,
        'Away Over %': away_over_pct
    })

# Convert the list of stats into a DataFrame
stats_df = pd.DataFrame(stats)

In [19]:
stats_df.sort_values('Overall Over %', ascending=False)

Unnamed: 0,Team,Overall Over %,Home Over %,Away Over %
23,MEM,70.0,64.516129,75.862069
17,CLE,61.666667,62.5,60.714286
5,ATL,61.290323,68.965517,54.545455
14,DEN,60.655738,68.965517,53.125
9,UTA,60.655738,56.25,65.517241
7,NOP,58.333333,56.666667,60.0
18,NYK,58.333333,56.25,60.714286
3,PHI,57.377049,53.125,62.068966
28,OKC,56.451613,59.375,53.333333
2,DET,55.737705,53.333333,58.064516


In [16]:
# Ensure the 'Date' column is datetime
df['Date'] = pd.to_datetime(df['Date'])

# Get all unique team names from both Home and Away columns
teams = pd.concat([df['Home'], df['Away']]).unique()

# List to hold stats for each team
stats = []

for team in teams:
    # Overall games for the team (as home or away)
    team_games = df[(df['Home'] == team) | (df['Away'] == team)]
    team_games_last10 = team_games.sort_values('Date').tail(10)
    
    # Home games for the team
    home_games = df[df['Home'] == team]
    home_games_last10 = home_games.sort_values('Date').tail(10)
    
    # Away games for the team
    away_games = df[df['Away'] == team]
    away_games_last10 = away_games.sort_values('Date').tail(10)
    
    # Calculate Over % for each subset
    overall_over_pct = team_games_last10['Over'].mean() * 100 if not team_games_last10.empty else None
    home_over_pct = home_games_last10['Over'].mean() * 100 if not home_games_last10.empty else None
    away_over_pct = away_games_last10['Over'].mean() * 100 if not away_games_last10.empty else None

    # Calculate scoring averages
    # For overall games, pick the appropriate points column based on whether the team was home or away.
    def get_team_points(row):
        return row['Home Points'] if row['Home'] == team else row['Away Points']
    
    if not team_games_last10.empty:
        overall_team_points = team_games_last10.apply(get_team_points, axis=1)
        overall_avg_points = overall_team_points.mean()
    else:
        overall_avg_points = None

    home_avg_points = home_games_last10['Home Points'].mean() if not home_games_last10.empty else None
    away_avg_points = away_games_last10['Away Points'].mean() if not away_games_last10.empty else None
    overall_avg_total = team_games_last10['Actual Total'].mean()
    overall_avg_open_total = team_games_last10['Open Total'].mean()
    
    stats.append({
        'Team': team,
        'Overall Over % Last10': overall_over_pct,
        'Home Over % Last10': home_over_pct,
        'Away Over % Last10': away_over_pct,
        #'Overall Avg Points (last10)': overall_avg_points,
        #'Home Avg Points (last10)': home_avg_points,
        #'Away Avg Points (last10)': away_avg_points,
        'Overall Avg Total Last10': overall_avg_total,
        'Overall Avg Open Last10': overall_avg_open_total,
    })

# Convert the list of stats into a DataFrame and display
stats10_df = pd.DataFrame(stats)

In [None]:
stats10_df.sort_values(by='Overall Avg Total Last10',
                       ascending=False).head(10)

Unnamed: 0,Team,Overall Over % Last10,Home Over % Last10,Away Over % Last10,Overall Avg Total Last10,Overall Avg Open Last10
28,OKC,80.0,80.0,60.0,246.7,227.6
23,MEM,50.0,60.0,70.0,245.7,242.1
5,ATL,70.0,60.0,70.0,245.2,235.45
17,CLE,80.0,70.0,60.0,239.9,234.0
22,CHI,60.0,70.0,40.0,239.3,237.85
25,PHX,50.0,40.0,70.0,238.6,233.9
9,UTA,80.0,80.0,60.0,237.4,231.8
26,IND,50.0,50.0,50.0,237.2,237.5
24,SAS,50.0,70.0,50.0,235.7,232.95
18,NYK,50.0,60.0,70.0,235.5,235.95


In [21]:
# teams that have gone UNDER the most in their last 10 games
stats10_df.sort_values(by='Overall Avg Total Last10',
                       ascending=True).head(10)

Unnamed: 0,Team,Overall Over % Last10,Home Over % Last10,Away Over % Last10,Overall Avg Total Last10,Overall Avg Open Last10
27,BKN,30.0,30.0,30.0,211.3,216.15
20,CHA,20.0,40.0,30.0,211.4,222.7
16,ORL,40.0,40.0,50.0,213.3,216.55
4,TOR,50.0,60.0,40.0,218.3,225.45
6,MIA,60.0,80.0,40.0,219.2,224.7
8,HOU,40.0,20.0,60.0,220.8,224.3
1,LAL,30.0,40.0,70.0,221.5,229.25
0,BOS,40.0,50.0,50.0,221.8,229.55
13,DAL,30.0,50.0,60.0,224.3,230.1
29,GSW,40.0,40.0,60.0,226.2,226.55


In [23]:
# Group by 'Crew Chief' and calculate required stats
crew_stats = df.groupby('Crew Chief').agg(
    games_count=('Crew Chief', 'size'),               # Total number of games for each crew chief
    over_count=('Over', lambda x: x.sum())              # Sum of 'Over' since True is treated as 1 and False as 0
).reset_index()

# Calculate over percentage
crew_stats['over_percentage'] = (crew_stats['over_count'] / crew_stats['games_count']) * 100

In [28]:
crew_stats[crew_stats['games_count']>=15].sort_values(by='over_percentage').head(5)

Unnamed: 0,Crew Chief,games_count,over_count,over_percentage
9,Jacyn Goble,15,4.0,26.666667
7,Ed Malloy,44,17.0,38.636364
11,James Williams,47,19.0,40.425532
6,David Guthrie,19,8.0,42.105263
8,Gediminas Petraitis,23,10.0,43.478261


In [29]:
crew_stats[crew_stats['games_count']>=15].sort_values(by='over_percentage', ascending=False).head(5)

Unnamed: 0,Crew Chief,games_count,over_count,over_percentage
28,Tyler Ford,50,33.0,66.0
24,Scott Foster,40,26.0,65.0
13,Josh Tiven,49,31.0,63.265306
18,Marc Davis,45,27.0,60.0
19,Mark Lindsay,37,21.0,56.756757


In [34]:
df[df['Date'] == df['Date'].max()][['Date', 'Weekday', 'Away', 'Home', 'Away Points', 'Home Points']]

Unnamed: 0,Date,Weekday,Away,Home,Away Points,Home Points
913,2025-03-04,Tuesday,TOR,ORL,114.0,113.0
914,2025-03-04,Tuesday,HOU,IND,102.0,115.0
915,2025-03-04,Tuesday,GSW,NYK,114.0,102.0
916,2025-03-04,Tuesday,MIL,ATL,127.0,121.0
917,2025-03-04,Tuesday,CLE,CHI,139.0,117.0
918,2025-03-04,Tuesday,PHI,MIN,112.0,126.0
919,2025-03-04,Tuesday,BKN,SAS,113.0,127.0
920,2025-03-04,Tuesday,LAC,PHX,117.0,119.0
921,2025-03-04,Tuesday,NOP,LAL,115.0,136.0
