In [1]:
FALL = "2017"
SPRING = str(int(FALL) + 1)

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("Data/games_cleaned_" + FALL + "_" + SPRING + ".tsv", sep="\t", index_col=None)
df.date = df.date.astype("str")
#df["spread_bet"] = "NO BET"
#df["ml_bet"] = "NO BET"
df.dtypes
df

Unnamed: 0,date,day,away,home,opening_spread,away_ML,home_ML,away_pts,home_pts,result,result_diff
0,20171017,0,Boston,Cleveland,-3.0,155,-175,99,102,-3,0.0
1,20171017,0,Houston,Golden State,-9.0,385,-485,122,121,1,10.0
2,20171018,1,Brooklyn,Indiana,-3.5,145,-165,131,140,-9,-5.5
3,20171018,1,Miami,Orlando,3.0,-150,130,109,116,-7,-10.0
4,20171018,1,Milwaukee,Boston,-5.5,205,-245,108,100,8,13.5
5,20171018,1,Atlanta,Dallas,-6.5,220,-260,117,111,6,12.5
6,20171018,1,Minnesota,San Antonio,-1.0,105,-125,99,107,-8,-7.0
7,20171018,1,Philadelphia,Washington,-7.0,255,-310,115,120,-5,2.0
8,20171018,1,Charlotte,Detroit,-3.0,145,-165,90,102,-12,-9.0
9,20171018,1,New Orleans,Memphis,-3.0,135,-155,91,103,-12,-9.0


In [4]:
teams_df = pd.DataFrame(columns=["wins", "losses", "total", "pct", "ptDiff_total", "ptDiff"], index=df.away.unique())
teams_df = teams_df.sort_index()
teams_df[["wins", "losses", "ptDiff_total"]] = teams_df[["wins", "losses", "ptDiff_total"]].fillna(0)

In [5]:
# takes a DataFrame of all games from 1 day
def updateStats(today_df):
    for index, row in today_df.iterrows():
        
        if row["result"] < 0:  # home wins
            teams_df.at[row["home"], "wins"] += 1
            teams_df.at[row["away"], "losses"] += 1
            
        elif row["result"] > 0:  # away wins
            teams_df.at[row["away"], "wins"] += 1
            teams_df.at[row["home"], "losses"] += 1

        else:
            print("tie???")
            assert False
        
        teams_df.at[row["home"], "ptDiff_total"] += (-1 * row["result"])
        teams_df.at[row["away"], "ptDiff_total"] += row["result"]
    
    teams_df["total"] = teams_df.wins + teams_df.losses
    teams_df["pct"] = teams_df.wins / (teams_df.wins + teams_df.losses)
    
    teams_df["ptDiff"] = teams_df.ptDiff_total / teams_df.total

In [6]:
# fill out additional columns of df using teams_df as intermediary
# (only fills out teams_df for now, df is still to-do)
endDate = "20180411"

for index, row in df.iterrows():
    # if new day, update to-date stats for yesterday's games
    if row["day"] != 0 and row["day"] != df.at[index-1, "day"]:
        updateStats(df[df.day==df.at[index-1, "day"]])
    
    # if final day of season, update stats for the final day of games, both for debugging reasons and for the sake of my OCD
    if endDate:
        if row["date"] == endDate:
            updateStats(df[df.day==row["day"]])
            break

teams_df

Unnamed: 0,wins,losses,total,pct,ptDiff_total,ptDiff
Atlanta,24,58,82,0.292683,-447,-5.45122
Boston,55,27,82,0.670732,294,3.585366
Brooklyn,28,54,82,0.341463,-307,-3.743902
Charlotte,36,46,82,0.439024,21,0.256098
Chicago,27,55,82,0.329268,-577,-7.036585
Cleveland,50,32,82,0.609756,77,0.939024
Dallas,24,58,82,0.292683,-249,-3.036585
Denver,46,36,82,0.560976,121,1.47561
Detroit,39,43,82,0.47561,-12,-0.146341
Golden State,58,24,82,0.707317,490,5.97561


In [7]:
# begin data mining here

from sklearn.tree import DecisionTreeRegressor

df = df.drop(columns=["date", "result_diff"])
df

Unnamed: 0,day,away,home,opening_spread,away_ML,home_ML,away_pts,home_pts,result
0,0,Boston,Cleveland,-3.0,155,-175,99,102,-3
1,0,Houston,Golden State,-9.0,385,-485,122,121,1
2,1,Brooklyn,Indiana,-3.5,145,-165,131,140,-9
3,1,Miami,Orlando,3.0,-150,130,109,116,-7
4,1,Milwaukee,Boston,-5.5,205,-245,108,100,8
5,1,Atlanta,Dallas,-6.5,220,-260,117,111,6
6,1,Minnesota,San Antonio,-1.0,105,-125,99,107,-8
7,1,Philadelphia,Washington,-7.0,255,-310,115,120,-5
8,1,Charlotte,Detroit,-3.0,145,-165,90,102,-12
9,1,New Orleans,Memphis,-3.0,135,-155,91,103,-12


In [8]:
# loop through df, predicting each game using only games from previous days as training data

predictions = [-3, -9]
for day in range(1, max(df.day)+1):
    print(day)
    X_train = df.loc[df.day<day, ["day", "opening_spread", "away_ML", "home_ML"]]
    y_train = df.loc[df.day<day, ["result"]]
    
    X_pred = df.loc[df.day==day, ["day", "opening_spread", "away_ML", "home_ML"]]
    
    if not X_pred.empty:
        dtr = DecisionTreeRegressor().fit(X_train, y_train)
        pred = dtr.predict(X_pred)
        print(pred)
        for p in pred:
            predictions.append(int(p))
        #print(predictions)
    
    print()

df["prediction"] = predictions
df

1
[-3. -3. -3. -3. -3. -3. -3. -3. -3. -3. -3.]

2
[ 1.  1. -7.]

3
[ 48.   8.  -7.   8.   5.   8.  48.  -8.   8. -12.]

4
[  1. -17.   1.   8. -21. -21.  19.   5.  -3.  18. -17.]

5
[-5. -3. -9.]

6
[ -3.  -5.  10.   2.  -4.   6.   2. -12.]

7
[  2.   8.   5. -42. -11.   2.]

8
[ 30.  30.  30.  30.  30.  30.  30. -42.  30. -17.]

9
[ -4. -21. -21. -21. -21.]

10
[ -9.    4.5   1.    1.   -5.   -3.  -21. ]

11
[  7.  48. -21.  -3. -21.   5. -21. -21.]

12
[-27. -21.   7.  -5. -27.   1.  -7.]

13
[-7. 13. 19. -7. 13. -7. -7. -7. 13.]

14
[  8.  48. -14.  13.]

15
[  8.  16.   8.   8.  14.   8.   8.  19.  19. -20.  14.  19.]

16
[-3.  5.]

17
[ -9. -14.  20.  -5.   6. -10.  16. -18.   2.  22. -10.  -5.]

18
[  7.  -3.  22. -27.  -3.]

19
[-13.  -1.   5.  14.   3.  17.   3.  -7.  -1. -21.]

20
[ 15.  -9. -13.]

21
[  8.   6.   6.  -3.  -5. -17.  23. -13.  21.  32.]

22
[ 11. -10.   7.  -8. -17.]

23
[-24. -13. -13.  11. -13.]

24
[-11.  11.  -6. -17.  -4.  -4.  -4. -11.]

25
[ 17.   9.  -

Unnamed: 0,day,away,home,opening_spread,away_ML,home_ML,away_pts,home_pts,result,prediction
0,0,Boston,Cleveland,-3.0,155,-175,99,102,-3,-3
1,0,Houston,Golden State,-9.0,385,-485,122,121,1,-9
2,1,Brooklyn,Indiana,-3.5,145,-165,131,140,-9,-3
3,1,Miami,Orlando,3.0,-150,130,109,116,-7,-3
4,1,Milwaukee,Boston,-5.5,205,-245,108,100,8,-3
5,1,Atlanta,Dallas,-6.5,220,-260,117,111,6,-3
6,1,Minnesota,San Antonio,-1.0,105,-125,99,107,-8,-3
7,1,Philadelphia,Washington,-7.0,255,-310,115,120,-5,-3
8,1,Charlotte,Detroit,-3.0,145,-165,90,102,-12,-3
9,1,New Orleans,Memphis,-3.0,135,-155,91,103,-12,-3


In [9]:
# place bets

for i in range(1, 2):
    cushion = i
    df["atsResult"] = "P"
    df["atsPick_cushion" + str(cushion)] = "NO BET"
    
    for index, row in df.iterrows():
        if row["result"] < row["opening_spread"]:
            df.at[index, "atsResult"] = "H"
        elif row["result"] > row["opening_spread"]:
            df.at[index, "atsResult"] = "A"
        else:
            df.at[index, "atsResult"] = "P"
        
        if (float(row["prediction"]) - row["opening_spread"]) <= (-1 * cushion):
            df.at[index, "atsPick_cushion" + str(cushion)] = "H"
        elif (float(row["prediction"]) - row["opening_spread"]) >= cushion:
            df.at[index, "atsPick_cushion" + str(cushion)] = "A"

df

Unnamed: 0,day,away,home,opening_spread,away_ML,home_ML,away_pts,home_pts,result,prediction,atsResult,atsPick_cushion1
0,0,Boston,Cleveland,-3.0,155,-175,99,102,-3,-3,P,NO BET
1,0,Houston,Golden State,-9.0,385,-485,122,121,1,-9,A,NO BET
2,1,Brooklyn,Indiana,-3.5,145,-165,131,140,-9,-3,H,NO BET
3,1,Miami,Orlando,3.0,-150,130,109,116,-7,-3,H,H
4,1,Milwaukee,Boston,-5.5,205,-245,108,100,8,-3,A,A
5,1,Atlanta,Dallas,-6.5,220,-260,117,111,6,-3,A,A
6,1,Minnesota,San Antonio,-1.0,105,-125,99,107,-8,-3,H,H
7,1,Philadelphia,Washington,-7.0,255,-310,115,120,-5,-3,A,A
8,1,Charlotte,Detroit,-3.0,145,-165,90,102,-12,-3,H,NO BET
9,1,New Orleans,Memphis,-3.0,135,-155,91,103,-12,-3,H,NO BET


In [10]:
bets_df = df.loc[100:, :]
bets_df = bets_df[bets_df.atsPick_cushion1 != "NO BET"]
bets_df

Unnamed: 0,day,away,home,opening_spread,away_ML,home_ML,away_pts,home_pts,result,prediction,atsResult,atsPick_cushion1
100,14,Sacramento,Indiana,-6.0,175,-210,83,101,-18,8,H,A
101,14,Oklahoma City,Milwaukee,1.0,-125,105,110,91,19,48,A,A
102,14,Phoenix,Brooklyn,-3.0,155,-175,122,114,8,-14,A,H
103,14,Detroit,L.A. Lakers,4.5,-160,140,93,113,-20,13,H,A
104,15,Phoenix,Washington,-13.0,750,-950,122,116,6,8,A,A
105,15,Atlanta,Philadelphia,-8.5,325,-360,109,119,-10,16,H,A
106,15,Sacramento,Boston,-12.5,800,-1000,86,113,-27,8,H,A
107,15,Orlando,Memphis,-4.0,145,-165,101,99,2,8,A,A
108,15,Minnesota,New Orleans,-1.5,115,-125,104,98,6,14,A,A
109,15,Portland,Utah,-4.0,160,-170,103,112,-9,8,H,A


In [11]:
w = len(bets_df[bets_df.atsResult == bets_df.atsPick_cushion1])
p = len(bets_df[bets_df.atsResult == "P"])
l = len(bets_df[(bets_df.atsResult != "P") & (bets_df.atsResult != bets_df.atsPick_cushion1)])

In [12]:
print(w, "-", l, "-", p)
print(w / (w+l))

539 - 516 - 19
0.5109004739336492
