In [1]:
import pandas as pd
import scipy.special
import math
from matplotlib.pyplot import plot,scatter
import statistics
from sklearn.cluster import KMeans
from kneed import KneeLocator

In [2]:
years = range(2000, 2021)
leagues = ["nba", "mlb"] # TODO: add other leagues

def get_home_boost(total_pct, home_wins, home_games):
    prob = 0
    for wins in range(math.ceil(home_wins), home_games + 1):
        prob += scipy.special.binom(home_games, wins) * total_pct ** wins * (1-total_pct) ** (home_games - wins)
        
    return math.log(prob)

teamHomeBoosts={}
for league in leagues:
    teamHomeBoosts[league]={}
    for year in years:
        min_team = ""
        min_boost = 0
        
        standings = pd.read_csv("data/" + league + "/" + str(year) + ".csv") 
        for (team, home, road) in zip(standings["Team"], standings["Home"], standings["Road"]):
            [home_wins, home_losses] = [int(games) for games in home.split("-")]
            [road_wins, road_losses] = [int(games) for games in road.split("-")]

            total_wins = home_wins + road_wins
            total_games = total_wins + home_losses + road_losses

            total_pct = total_wins / total_games

            home_games = home_wins + home_losses
            
            boost = get_home_boost(total_pct, home_wins, home_games)
            
            if(team not in teamHomeBoosts[league]):
                teamHomeBoosts[league][team]=[]
            teamHomeBoosts[league][team].append(boost)
            
            if boost < min_boost:
                min_team = team
                min_boost = boost
        print(league, year, min_team, min_boost)     
        
    

nba 2000 Indiana Pacers -5.631077324079996
nba 2001 Denver Nuggets -5.610862267082693
nba 2002 New Jersey Nets -4.228697343452214
nba 2003 Chicago Bulls -8.914917337805443
nba 2004 Dallas Mavericks -7.648357773084604
nba 2005 San Antonio Spurs -6.846028544961502
nba 2006 Orlando Magic -4.676365219094853
nba 2007 Golden State Warriors -5.661837441662076
nba 2008 Utah Jazz -8.016702264422117
nba 2009 Utah Jazz -5.966234135382336
nba 2010 Charlotte Bobcats -5.736040780142513
nba 2011 Washington Wizards -5.555142475073601
nba 2012 Utah Jazz -4.595523957659505
nba 2013 Denver Nuggets -7.929267254368207
nba 2014 Orlando Magic -4.65226216045649
nba 2015 Golden State Warriors -4.345799270948383
nba 2016 San Antonio Spurs -5.9627547043522755
nba 2017 Indiana Pacers -4.7275519246970665
nba 2018 San Antonio Spurs -6.4493585127408375
nba 2019 San Antonio Spurs -4.941070454782477
nba 2020 Philadelphia 76ers -8.91207340615599
mlb 2000 NYM -3.1183210435371995
mlb 2001 FLA -3.29193685603975
mlb 2002 C

In [3]:
other_years = range(2000, 2020)
other_leagues = ["nfl", "nhl"] # TODO: add other leagues

for league in other_leagues:
    teamHomeBoosts[league]={}
    for year in other_years:
        if league == "nhl" and year == 2005:
            continue
        
        min_team = ""
        min_boost = 0
        
        standings = pd.read_csv("data/" + league + "/" + str(year) + ".csv") 
        for (team, games, wins, home_games, home_wins) in zip(standings["Team"], standings["Games"], standings["Wins"], standings["Home Games"], standings["Home Wins"]):
            total_pct = wins / games
            
            boost = get_home_boost(total_pct, home_wins, home_games)
            
            if(team not in teamHomeBoosts[league]):
                teamHomeBoosts[league][team]=[]
            teamHomeBoosts[league][team].append(boost)
            
            if boost < min_boost:
                min_team = team
                min_boost = boost
        print(league, year, min_team, min_boost)     
        
    

nfl 2000 Arizona Cardinals -1.7359465419824767
nfl 2001 Minnesota Vikings -2.6845894198294498
nfl 2002 Miami Dolphins -2.625750466669077
nfl 2003 Seattle Seahawks -3.7600290339658846
nfl 2004 Arizona Cardinals -1.9847931707052622
nfl 2005 Baltimore Ravens -3.3236384462947344
nfl 2006 Indianapolis Colts -2.301456579614247
nfl 2007 Pittsburgh Steelers -2.002171116413511
nfl 2008 Carolina Panthers -2.301456579614247
nfl 2009 New England Patriots -3.7600290339658846
nfl 2010 Green Bay Packers -2.002171116413511
nfl 2011 Baltimore Ravens -2.301456579614247
nfl 2012 Seattle Seahawks -2.9975475955312856
nfl 2013 Cincinnati Bengals -2.9975475955312856
nfl 2014 Green Bay Packers -2.301456579614247
nfl 2015 Jacksonville Jaguars -1.5260741550126662
nfl 2016 Houston Texans -2.625750466669077
nfl 2017 New York Jets -1.5260741550126662
nfl 2018 New England Patriots -2.9975475955312856
nfl 2019 New York Jets -1.4395875229796313
nhl 2000 Pittsburgh Penguins -1.9908276130136056
nhl 2001 Tampa Bay Light

In [4]:
#Are there teams out of all leagues which consistently enjoy a larger home boost?
def computeOptimalClusters(testList):
    xList=[]
    yList=[]
    length=len(tuple(testList))
    list2=[]
    for i in range(1,length+1,1):
        cluster=KMeans(n_clusters=i)
        fit=cluster.fit(testList)
        list2.append([fit.inertia_, i])
    for i in list2:
        xList.append(i[1])
        yList.append(i[0])
    kn = KneeLocator(xList, yList, curve='convex', direction='decreasing')
    print(kn.knee)
    return(kn.knee)

def evaluateConsistency(testList):
    cluster=KMeans(n_clusters=computeOptimalClusters(testList), max_iter=10000, tol=.000000001,algorithm='auto')
    cluster.fit(testList)
    MeasureList=cluster.predict(testList)
    scatter([i[0] for i in testList],[i[1] for i in testList],c=MeasureList)
    minimum=min([index[1] for index in cluster.cluster_centers_])
    minIndex=[i for i in range(0,len(cluster.cluster_centers_)) if cluster.cluster_centers_[i][1]==minimum]
    selectList=[]
    for index,point in zip(MeasureList,testList):
        if(index==minIndex):
            selectList.append(point)
    return selectList

consistencyDict={}
consistencyList=[]
for league in teamHomeBoosts:
    for team in teamHomeBoosts[league]:
        if(len(teamHomeBoosts[league][team])>=2):
            variance=statistics.variance(teamHomeBoosts[league][team])
        
            mean=statistics.mean(teamHomeBoosts[league][team])
            if(mean<=0):
                consistencyDict[team]=[mean,variance]
                consistencyList.append([mean,variance])

meanSet=statistics.mean([item[0] for item in consistencyList])
print(meanSet)
greaterMeanList=[item for item in consistencyList if item[0]<meanSet]
lesserMeanList=[item for item in consistencyList if item[0]>meanSet]
consistentValuesLesser=evaluateConsistency(lesserMeanList)
consistentValuesGreater=evaluateConsistency(greaterMeanList)
print("Consistent high home boosts: ")
for i in consistentValuesGreater:
    print(i,list(consistencyDict.keys())[list(consistencyDict.values()).index(i)])
print("Consistent low home boosts: ")
for i in consistentValuesLesser:
    print(i,list(consistencyDict.keys())[list(consistencyDict.values()).index(i)])






-1.4390163112557597
9
7
Consistent high home boosts: 
[-1.7135300943272596, 0.42776039067617716] Seattle SuperSonics
[-1.7779924274765087, 0.6157135441788903] Toronto Raptors
[-1.471184180597055, 0.6857593767662269] Brooklyn Nets
[-1.4584714105614784, 0.564112660989192] SFG
[-1.43980941298216, 0.824523230827438] CHW
[-1.483274213444073, 0.8410130127713387] STL
[-1.538623415769982, 0.7152701624089012] OAK
[-1.5394543492123318, 0.43878716630043657] NYY
[-1.5205431931980988, 0.4323065925258201] TOR
[-1.4409707620104861, 0.8538168937780759] BAL
[-1.6664092482411896, 0.8267655562023609] TEX
[-1.4915516362807586, 0.9045335846769832] Tampa Bay Lightning
[-1.5388957949162745, 0.7360959890456762] Anaheim Ducks
[-1.555223441547544, 0.10888045585813846] Vegas Golden Knights
Consistent low home boosts: 
[-0.3680213795417536, 0.10540303296334232] Los Angeles Rams
[-0.3549249434612463, 0.08469397130221248] Los Angeles Chargers


In [5]:
#which teams in the nba enjoy consistent higher and lower home boosts?
consistencyDict={}
consistencyList=[]

for team in teamHomeBoosts["nba"]:
    if(len(teamHomeBoosts["nba"][team])>=2):
        variance=statistics.variance(teamHomeBoosts["nba"][team])
        mean=statistics.mean(teamHomeBoosts["nba"][team])
        if(mean<=0):
            consistencyDict[team]=[mean,variance]
            consistencyList.append([mean,variance])
                                   
                                   
scatter([index[0] for index in consistencyList],[index[1] for index in consistencyList])

meanSet=statistics.mean([item[0] for item in consistencyList])
print(meanSet)
greaterMeanList=[item for item in consistencyList if item[0]<meanSet]
lesserMeanList=[item for item in consistencyList if item[0]>meanSet]
consistentValuesLesser=evaluateConsistency(lesserMeanList)
consistentValuesGreater=evaluateConsistency(greaterMeanList)
print("Consistent high home boosts: ")
for i in consistentValuesGreater:
    print(i,list(consistencyDict.keys())[list(consistencyDict.values()).index(i)])
print("Consistent low home boosts: ")
for i in consistentValuesLesser:
    print(i,list(consistencyDict.keys())[list(consistencyDict.values()).index(i)])


-2.1870050831905194
3
3
Consistent high home boosts: 
[-2.4762455958750644, 1.1404442632558969] Portland Trail Blazers
[-2.215768978810253, 0.9406476993508954] Milwaukee Bucks
[-2.5133270841841795, 1.1961587853000308] Cleveland Cavaliers
[-2.2720564120204325, 0.6586821939454445] Memphis Grizzlies
[-2.390434647143609, 0.03998196474712973] New Orleans/Oklahoma City Hornets
Consistent low home boosts: 
[-1.7135300943272596, 0.42776039067617716] Seattle SuperSonics
[-1.7779924274765087, 0.6157135441788903] Toronto Raptors
[-1.3869181660869967, 0.5384050674946775] Vancouver Grizzlies
[-1.873143353146407, 0.6308926469839363] New Orleans Hornets
[-1.8747733791275751, 0.6165427835431679] Oklahoma City Thunder
[-1.471184180597055, 0.6857593767662269] Brooklyn Nets


In [6]:
#which teams in the mlb enjoy consistent higher and lower home boosts?
consistencyDict={}
consistencyList=[]

for team in teamHomeBoosts["mlb"]:
    if(len(teamHomeBoosts["mlb"][team])>=2):
        variance=statistics.variance(teamHomeBoosts["mlb"][team])
        mean=statistics.mean(teamHomeBoosts["mlb"][team])
        if(mean<=0):
            consistencyDict[team]=[mean,variance]
            consistencyList.append([mean,variance])
                                   
                                   
scatter([index[0] for index in consistencyList],[index[1] for index in consistencyList])

meanSet=statistics.mean([item[0] for item in consistencyList])
print(meanSet)
greaterMeanList=[item for item in consistencyList if item[0]<meanSet]
lesserMeanList=[item for item in consistencyList if item[0]>meanSet]
consistentValuesLesser=evaluateConsistency(lesserMeanList)
consistentValuesGreater=evaluateConsistency(greaterMeanList)
print("Consistent high home boosts: ")
for i in consistentValuesGreater:
    print(i,list(consistencyDict.keys())[list(consistencyDict.values()).index(i)])
print("Consistent low home boosts: ")
for i in consistentValuesLesser:
    print(i,list(consistencyDict.keys())[list(consistencyDict.values()).index(i)])

-1.4789554250230337
4
3
Consistent high home boosts: 
[-1.483274213444073, 0.8410130127713387] STL
[-1.538623415769982, 0.7152701624089012] OAK
[-1.5394543492123318, 0.43878716630043657] NYY
[-1.5205431931980988, 0.4323065925258201] TOR
[-1.6664092482411896, 0.8267655562023609] TEX
Consistent low home boosts: 
[-1.2015413262213155, 0.34976649620739236] SEA
[-1.2362857618230148, 0.32077086315661174] CLE
[-1.2147028203290045, 0.32248927416603757] CIN
[-1.233523340641114, 0.3598868718623436] KCR
[-1.167002183434528, 0.3406885197013865] LAA
[-1.2935378250527012, 0.3166009535556119] WSN


In [7]:
#which teams in the nfl enjoy consistent higher and lower home boosts?
consistencyDict={}
consistencyList=[]

for team in teamHomeBoosts["nfl"]:
    if(len(teamHomeBoosts["nfl"][team])>=2):
        variance=statistics.variance(teamHomeBoosts["nfl"][team])
        mean=statistics.mean(teamHomeBoosts["nfl"][team])
        if(mean<=0):
            consistencyDict[team]=[mean,variance]
            consistencyList.append([mean,variance])
                                   
                                   
scatter([index[0] for index in consistencyList],[index[1] for index in consistencyList])

meanSet=statistics.mean([item[0] for item in consistencyList])
print(meanSet)
greaterMeanList=[item for item in consistencyList if item[0]<meanSet]
lesserMeanList=[item for item in consistencyList if item[0]>meanSet]
consistentValuesLesser=evaluateConsistency(lesserMeanList)
consistentValuesGreater=evaluateConsistency(greaterMeanList)
print("Consistent high home boosts: ")
for i in consistentValuesGreater:
    print(i,list(consistencyDict.keys())[list(consistencyDict.values()).index(i)])
print("Consistent low home boosts: ")
for i in consistentValuesLesser:
    print(i,list(consistencyDict.keys())[list(consistencyDict.values()).index(i)])

-0.854728477368492
4
5
Consistent high home boosts: 
[-0.8616903283589954, 0.29989284174863046] Oakland Raiders
[-0.8737591785354059, 0.16878736933611543] San Diego Chargers
[-0.8607533616148395, 0.2086997994123899] Buffalo Bills
Consistent low home boosts: 
[-0.3680213795417536, 0.10540303296334232] Los Angeles Rams
[-0.3549249434612463, 0.08469397130221248] Los Angeles Chargers


In [8]:
#which teams in the nhl enjoy consistent higher and lower home boosts?
consistencyDict={}
consistencyList=[]

for team in teamHomeBoosts["nhl"]:
    if(len(teamHomeBoosts["nhl"][team])>=2):
        variance=statistics.variance(teamHomeBoosts["nhl"][team])
        mean=statistics.mean(teamHomeBoosts["nhl"][team])
        if(mean<=0):
            consistencyDict[team]=[mean,variance]
            consistencyList.append([mean,variance])
                                   
                                   
scatter([index[0] for index in consistencyList],[index[1] for index in consistencyList])

meanSet=statistics.mean([item[0] for item in consistencyList])
print(meanSet)
greaterMeanList=[item for item in consistencyList if item[0]<meanSet]
lesserMeanList=[item for item in consistencyList if item[0]>meanSet]
consistentValuesLesser=evaluateConsistency(lesserMeanList)
consistentValuesGreater=evaluateConsistency(greaterMeanList)
print("Consistent high home boosts: ")
for i in consistentValuesGreater:
    print(i,list(consistencyDict.keys())[list(consistencyDict.values()).index(i)])
print("Consistent low home boosts: ")
for i in consistentValuesLesser:
    print(i,list(consistencyDict.keys())[list(consistencyDict.values()).index(i)])

-1.1913769199154198
3
4
Consistent high home boosts: 
[-1.2423054267788045, 0.4549231930471226] New Jersey Devils
[-1.2521567666222084, 0.3338567318935942] Washington Capitals
[-1.555223441547544, 0.10888045585813846] Vegas Golden Knights
Consistent low home boosts: 
[-0.9422811045180297, 0.3235108412241624] Edmonton Oilers
[-0.8609439235021634, 0.2928911395130322] Atlanta Thrashers
[-0.9718387075777315, 0.40644528232428667] Boston Bruins
[-1.0059142806884263, 0.26490447152541746] Buffalo Sabres
[-1.0141213829563311, 0.4075086944090097] Montreal Canadiens
[-1.019034361732184, 0.3228016203991867] Ottawa Senators
[-1.108344810317906, 0.37469025868595096] Chicago Blackhawks
