# Finding team strength with regression

The idea was adapted from - https://www.basketball-reference.com/blog/index6aa2.html?p=8070

Our goal is to find each team's strength. One possible way to do it is to create a linear equation for each game:

\begin{equation*}
Team_i - Team_j = \Delta Score
\end{equation*}
Where:
\begin{equation*}
\Delta Score = PointsScored_i - PointsScored_j
\end{equation*}

$Team_i$ and $Team_j$ indicate the strengths of teams i and j. Those are the variables we want to find. The $\Delta Score$ is the margin between the two team for the specific game. 

Another consideration is whether we want to consider a global home court advantage, each teams home strength or not take home court into consideration at all. 

In [156]:
import pandas as pd
import matplotlib.pyplot as plt
import glob
import numpy as np
pd.options.display.max_rows = 200
from sklearn.linear_model import Ridge,Lasso
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss,accuracy_score
import statsmodels.api as sm

%matplotlib inline

In [166]:
files = glob.glob('google-cloud-ncaa-march-madness-2020-division-1-mens-tournament/MDataFiles_Stage1/*')
file_dict = {f.split("\\")[-1].split(".")[0]:f for i,f in enumerate(files)}
for i in file_dict.keys():
    print(i)

Cities
Conferences
MConferenceTourneyGames
MGameCities
MMasseyOrdinals
MNCAATourneyCompactResults
MNCAATourneyDetailedResults
MNCAATourneySeedRoundSlots
MNCAATourneySeeds
MNCAATourneySlots
MRegularSeasonCompactResults
MRegularSeasonDetailedResults
MSeasons
MSecondaryTourneyCompactResults
MSecondaryTourneyTeams
MTeamCoaches
MTeamConferences
MTeams
MTeamSpellings


In [199]:
SeasonResults = pd.read_csv(file_dict['MRegularSeasonCompactResults'])
SeasonResults.head(10)

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT
0,1985,20,1228,81,1328,64,N,0
1,1985,25,1106,77,1354,70,H,0
2,1985,25,1112,63,1223,56,H,0
3,1985,25,1165,70,1432,54,H,0
4,1985,25,1192,86,1447,74,H,0
5,1985,25,1218,79,1337,78,H,0
6,1985,25,1228,64,1226,44,N,0
7,1985,25,1242,58,1268,56,N,0
8,1985,25,1260,98,1133,80,H,0
9,1985,25,1305,97,1424,89,H,0


In [169]:
TourneyCompactResults = pd.read_csv(file_dict['MNCAATourneyCompactResults'])
TourneyCompactResults['TeamID1'] = np.minimum(TourneyCompactResults['WTeamID'],TourneyCompactResults['LTeamID'])
TourneyCompactResults['TeamID2'] = np.maximum(TourneyCompactResults['WTeamID'],TourneyCompactResults['LTeamID'])
TourneyCompactResults['result'] = np.where(TourneyCompactResults['WTeamID']==TourneyCompactResults['TeamID1'],1,0)
TourneyCompactResults['ID'] = TourneyCompactResults['Season'].astype(str)+ '_' +TourneyCompactResults['TeamID1'].astype(str)+ '_' +TourneyCompactResults['TeamID2'].astype(str)

In [330]:
def team_strength_regression(seasons,alpha=0.003,reg_type='Lasso',home=True):
    """Find the true team strength by adjusting for opponent and score. Computes the strength based on a single season.
    For each game we are trying to solve the following equation: team1_loc + team1 - team2 = team1_points - team2_points
    Input:
    seasons - a list/array of seasons to compute the strength for
    alpha (optional) - the regularization parameter
    reg_type (optional) - 'Lasso' or 'Ridge'
    Example:
    team_strength = team_strength_regression(np.arange(1985,2020),alpha=alpha,reg_type='Lasso')
    """
    data = []
    for s in seasons:
        SingleSeason = SeasonResults[SeasonResults['Season']==s].copy()
        SingleSeason['TeamID1'] = np.minimum(SingleSeason['WTeamID'],SingleSeason['LTeamID'])
        SingleSeason['TeamID2'] = np.maximum(SingleSeason['WTeamID'],SingleSeason['LTeamID'])
        SingleSeason['Score'] = np.where(SingleSeason['WTeamID']==SingleSeason['TeamID1'],
                                         SingleSeason['WScore']-SingleSeason['LScore'],
                                         SingleSeason['LScore']-SingleSeason['WScore'])

        SingleSeason['LocN'] = SingleSeason['WLoc'].map({'H':1,'A':-1,'N':0})
        SingleSeason['Loc1'] = np.where(SingleSeason['WTeamID']==SingleSeason['TeamID1'],
                                         SingleSeason['LocN'],
                                         -1*SingleSeason['LocN'])

        unique_teams = pd.concat([SingleSeason['TeamID1'],SingleSeason['TeamID2']],axis=0).unique()
        if home:
            A = np.zeros((len(SingleSeason),len(unique_teams)+1))
            A[:,-1] = SingleSeason['Loc1']
        else:
            A = np.zeros((len(SingleSeason),len(unique_teams)))
            
        team_dict = dict(zip(unique_teams,np.arange(len(unique_teams))))
        for ii,idx in enumerate(zip(SingleSeason['TeamID1'].map(team_dict),SingleSeason['TeamID2'].map(team_dict))):
            A[ii,idx[0]] = 1
            A[ii,idx[1]] = -1
        y = SingleSeason['Score'].values
        
        if reg_type=='Lasso':
            lin = Lasso(alpha=alpha);
        elif reg_type=='Ridge':
            lin = Ridge(alpha=alpha);
        else:
            lin = Lasso(alpha=alpha);
        lin.fit(A,y);
        team_strength = pd.DataFrame(team_dict,index=['team_index']).T.reset_index().rename({'index':'TeamID'},axis=1).drop('team_index',axis=1)
        if home:
            team_strength['strength'] = lin.coef_[:-1]
            team_strength['home'] = lin.coef_[-1]
        else:
            team_strength['strength'] = lin.coef_
        team_strength['Season'] = s
        data.append(team_strength)
    return pd.concat(data,ignore_index=True)

def team_strength_logregression(seasons,alpha=2,home=True):
    """Find the true team strength by adjusting for opponent and outcome. Computes the strength based on a single season.
    For each game we are trying to find the true probability of a team chance of winning. 
    Input:
    seasons - a list/array of seasons to compute the strength for
    alpha (optional) - the regularization parameter
    Home (optional) - if True account for home court advantage 
    Example:
    team
    """
    data = []
    for s in seasons:
        SingleSeason = SeasonResults[SeasonResults['Season']==s].copy()
        SingleSeason['TeamID1'] = np.minimum(SingleSeason['WTeamID'],SingleSeason['LTeamID'])
        SingleSeason['TeamID2'] = np.maximum(SingleSeason['WTeamID'],SingleSeason['LTeamID'])
        SingleSeason['Score'] = np.where(SingleSeason['WTeamID']==SingleSeason['TeamID1'],
                                         SingleSeason['WScore']-SingleSeason['LScore'],
                                         SingleSeason['LScore']-SingleSeason['WScore'])

        SingleSeason['LocN'] = SingleSeason['WLoc'].map({'H':1,'A':-1,'N':0})
        SingleSeason['Loc1'] = np.where(SingleSeason['WTeamID']==SingleSeason['TeamID1'],
                                         SingleSeason['LocN'],
                                         -1*SingleSeason['LocN'])

        unique_teams = pd.concat([SingleSeason['TeamID1'],SingleSeason['TeamID2']],axis=0).unique()
        if home:
            A = np.zeros((len(SingleSeason),len(unique_teams)+1))
            A[:,-1] = SingleSeason['Loc1']
        else:
            A = np.zeros((len(SingleSeason),len(unique_teams)))
            
        team_dict = dict(zip(unique_teams,np.arange(len(unique_teams))))
        for ii,idx in enumerate(zip(SingleSeason['TeamID1'].map(team_dict),SingleSeason['TeamID2'].map(team_dict))):
            A[ii,idx[0]] = 1
            A[ii,idx[1]] = -1
        y = np.where(SingleSeason['Score'].values>0,1,0)

        lin = LogisticRegression(C=alpha,fit_intercept=False,solver='lbfgs',max_iter=500);
        lin.fit(A,y);
        team_strength = pd.DataFrame(team_dict,index=['team_index']).T.reset_index().rename({'index':'TeamID'},axis=1).drop('team_index',axis=1)
        if home:
            team_strength['exp_prob_strength'] = np.exp(lin.coef_.ravel()[:-1])
            team_strength['prob_strength'] = lin.coef_.ravel()[:-1]
        else:
            team_strength['exp_prob_strength'] = np.exp(lin.coef_.ravel())
            team_strength['prob_strength'] = lin.coef_.ravel()
            
        team_strength['Season'] = s
        data.append(team_strength)
    return pd.concat(data,ignore_index=True)

In [351]:
def team_strength_home(seasons,alpha=0.003,reg_type='Lasso',home=True):
    """Find the true team strength by adjusting for opponent and score. Computes the strength based on a single season.
    For each game we are trying to solve the following equation: team1_loc + team1 - team2 = team1_points - team2_points
    Input:
    seasons - a list/array of seasons to compute the strength for
    alpha (optional) - the regularization parameter
    reg_type (optional) - 'Lasso' or 'Ridge'
    Example:
    team_strength = team_strength_regression(np.arange(1985,2020),alpha=alpha,reg_type='Lasso')
    """
    data = []
    for s in seasons:
        SingleSeason = SeasonResults[SeasonResults['Season']==s].copy()
        SingleSeason['TeamID1'] = np.minimum(SingleSeason['WTeamID'],SingleSeason['LTeamID'])
        SingleSeason['TeamID2'] = np.maximum(SingleSeason['WTeamID'],SingleSeason['LTeamID'])
        SingleSeason['Score'] = np.where(SingleSeason['WTeamID']==SingleSeason['TeamID1'],
                                         SingleSeason['WScore']-SingleSeason['LScore'],
                                         SingleSeason['LScore']-SingleSeason['WScore'])

        SingleSeason['LocN'] = SingleSeason['WLoc'].map({'H':1,'A':-1,'N':0})
        SingleSeason['Loc1'] = np.where(SingleSeason['WTeamID']==SingleSeason['TeamID1'],
                                         SingleSeason['LocN'],
                                         -1*SingleSeason['LocN'])
        
        SingleSeason['HomeTeam'] = np.where(SingleSeason['Loc1']==1,
                                         SingleSeason['TeamID1'],
                                        SingleSeason['TeamID2'])

        unique_teams = pd.concat([SingleSeason['TeamID1'],SingleSeason['TeamID2']],axis=0).unique()
        if home:
            n = len(unique_teams)
            A = np.zeros((len(SingleSeason),n*2))
        else:
            A = np.zeros((len(SingleSeason),len(unique_teams)))
            
        team_dict = dict(zip(unique_teams,np.arange(len(unique_teams))))
        for ii,idx in enumerate(zip(SingleSeason['TeamID1'].map(team_dict),
                                    SingleSeason['TeamID2'].map(team_dict),
                                    SingleSeason['HomeTeam'].map(team_dict),
                                    SingleSeason['Loc1'])):
            A[ii,idx[0]] = 1
            A[ii,idx[1]] = -1
            A[ii,idx[2]+n] = idx[3]
            
        y = SingleSeason['Score'].values
        
        if reg_type=='Lasso':
            lin = Lasso(alpha=alpha);
        elif reg_type=='Ridge':
            lin = Ridge(alpha=alpha);
        else:
            lin = Lasso(alpha=alpha);
        lin.fit(A,y);
        team_strength = pd.DataFrame(team_dict,index=['team_index']).T.reset_index().rename({'index':'TeamID'},axis=1).drop('team_index',axis=1)
        if home:
            team_strength['strength_h'] = lin.coef_[:n]
            team_strength['home'] = lin.coef_[n:]
        else:
            team_strength['strength'] = lin.coef_
        team_strength['Season'] = s
        data.append(team_strength)
    return pd.concat(data,ignore_index=True)

Im going to try and find the best regularization parameter. The method I'm choosing is to run logistic regression on the strength difference between the teams. I'm going to test it for a few seasons by training the logistic regression on all the seasons before the season at question and then testing the log loss score for that season. 

For example:
* train for all the seasons between 1985 and 2013 and predict 2014 
* train for all the seasons between 1985 and 2014 and predict 2015
* etc.

## Regression with no home advantage or global home court advantage

In [197]:
all_scores = []

for home in [True,False]:
    for alpha in [0.0005,0.001,0.003,0.005,0.007,0.009,0.011]:
        team_strength = team_strength_regression(np.arange(1985,2020),alpha=alpha,reg_type='Lasso',home=home)
        TourneyResults = (TourneyCompactResults
                                 .merge(team_strength,left_on=['Season','TeamID1'],right_on=['Season','TeamID'],how='left')
                                 .drop('TeamID',axis=1)
                                 .merge(team_strength,left_on=['Season','TeamID2'],right_on=['Season','TeamID'],how='left')
                                 .drop('TeamID',axis=1)
                                ).copy()

        TourneyResults['strength_diff'] = TourneyResults['strength_x'] - TourneyResults['strength_y']

        cols = ['strength_diff']

        X = TourneyResults.loc[:,cols]
        y = TourneyResults[['result']].values.ravel()

        # have a low regularization parameter for the model
        lr = LogisticRegression(solver='lbfgs',C=1000000,random_state=0,max_iter=1500)

        scores = np.zeros((5,1))
        for ii,s in enumerate(range(2014,2019)):
            idxTrain = (TourneyResults['Season'] < s) 
            idxTest = (TourneyResults['Season'] == s)
            # fit all models
            lr.fit(X.loc[idxTrain],y[idxTrain])

            ypred_lr = lr.predict_proba(X.loc[idxTest])

            scores[ii,0] = log_loss(y[idxTest],ypred_lr[:,1])

        all_scores.append([alpha,home,np.mean(scores),np.std(scores)])
        
all_scores_df = pd.DataFrame(all_scores,columns=['alpha','home','log_loss_mean','log_loss_std'])

all_scores_df

Unnamed: 0,alpha,home,log_loss_mean,log_loss_std
0,0.0005,True,0.54879,0.039981
1,0.001,True,0.548509,0.039987
2,0.003,True,0.547813,0.039944
3,0.005,True,0.548045,0.040479
4,0.007,True,0.549172,0.040691
5,0.009,True,0.551058,0.040744
6,0.011,True,0.553285,0.040189
7,0.0005,False,0.549349,0.041545
8,0.001,False,0.549031,0.041624
9,0.003,False,0.547843,0.041618


**Best results are for Lasso with alpha = 0.009 without taking home court advantage!** <br>
You can test Ridge regression but the results are not as good

## Regression with home court advantage computed for each team

In [303]:
all_scores = []

for home in [True]:
    for alpha in [0.0005,0.001,0.003,0.005,0.007,0.009,0.011]:
        team_strength = team_strength_home(np.arange(1985,2020),alpha=alpha,reg_type='Lasso',home=home)
        TourneyResults = (TourneyCompactResults
                                 .merge(team_strength,left_on=['Season','TeamID1'],right_on=['Season','TeamID'],how='left')
                                 .drop('TeamID',axis=1)
                                 .merge(team_strength,left_on=['Season','TeamID2'],right_on=['Season','TeamID'],how='left')
                                 .drop('TeamID',axis=1)
                                ).copy()

        TourneyResults['strength_diff'] = TourneyResults['strength_x'] - TourneyResults['strength_y']

        cols = ['strength_diff']

        X = TourneyResults.loc[:,cols]
        y = TourneyResults[['result']].values.ravel()

        # have a low regularization parameter for the model
        lr = LogisticRegression(solver='lbfgs',C=1000000,random_state=42,max_iter=1500)

        scores = np.zeros((5,1))
        for ii,s in enumerate(range(2014,2019)):
            idxTrain = (TourneyResults['Season'] < s) 
            idxTest = (TourneyResults['Season'] == s)
            # fit all models
            lr.fit(X.loc[idxTrain],y[idxTrain])

            ypred_lr = lr.predict_proba(X.loc[idxTest])

            scores[ii,0] = log_loss(y[idxTest],ypred_lr[:,1])

        all_scores.append([alpha,home,np.mean(scores),np.std(scores)])
        
all_scores_df = pd.DataFrame(all_scores,columns=['alpha','home','log_loss_mean','log_loss_std'])

all_scores_df

Unnamed: 0,alpha,home,log_loss_mean,log_loss_std
0,0.0005,True,0.561086,0.01691
1,0.001,True,0.56006,0.016589
2,0.003,True,0.557077,0.016596
3,0.005,True,0.556666,0.017058
4,0.007,True,0.557345,0.017857
5,0.009,True,0.558032,0.018664
6,0.011,True,0.559026,0.019223


We can see that this only makes predictions worse. 

## Logistic Regression
Let's try the same thing using Raddar's solution - https://www.kaggle.com/c/google-cloud-ncaa-march-madness-2020-division-1-womens-tournament/discussion/130619 


Notice - I added the home court advantage parameter + regularization parameter. Let's hypertune with those parameters.

In [332]:
all_scores = []

for home in [True,False]:
    for alpha in [0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2]:
        team_strength = team_strength_logregression(np.arange(1985,2020),alpha=alpha,home=home)
        TourneyResults = (TourneyCompactResults
                                 .merge(team_strength,left_on=['Season','TeamID1'],right_on=['Season','TeamID'],how='left')
                                 .drop('TeamID',axis=1)
                                 .merge(team_strength,left_on=['Season','TeamID2'],right_on=['Season','TeamID'],how='left')
                                 .drop('TeamID',axis=1)
                                ).copy()

        TourneyResults['strength_diff'] = TourneyResults['prob_strength_x'] - TourneyResults['prob_strength_y']

        cols = ['strength_diff']

        X = TourneyResults.loc[:,cols]
        y = TourneyResults[['result']].values.ravel()

        # have a low regularization parameter for the model
        lr = LogisticRegression(solver='liblinear',C=1.0,random_state=0,max_iter=500)

        scores = np.zeros((5,1))
        for ii,s in enumerate(range(2014,2019)):
            idxTrain = (TourneyResults['Season'] < s) 
            idxTest = (TourneyResults['Season'] == s)
            # fit all models
            lr.fit(X.loc[idxTrain],y[idxTrain])

            ypred_lr = lr.predict_proba(X.loc[idxTest])

            scores[ii,0] = log_loss(y[idxTest],ypred_lr[:,1])

        all_scores.append([alpha,home,np.mean(scores),np.std(scores)])
        
all_scores_df = pd.DataFrame(all_scores,columns=['alpha','home','log_loss_mean','log_loss_std'])

all_scores_df

Unnamed: 0,alpha,home,log_loss_mean,log_loss_std
0,0.2,True,0.6174,0.008563
1,0.4,True,0.601587,0.011159
2,0.8,True,0.58734,0.014808
3,1.6,True,0.57751,0.018588
4,3.2,True,0.572254,0.021705
5,6.4,True,0.570171,0.02391
6,12.8,True,0.569969,0.025364
7,25.6,True,0.570735,0.026278
8,51.2,True,0.572247,0.027129
9,0.2,False,0.592039,0.011507


This method doesn't predict outcome as well.

## Compare to other ranking methods
Let's see how many games it predicts correctly by always choosing the higher rank team.

Note: the regression methods I hypertunned used the data we are checking which gives an unfair advantage. An apple to apples comarison would be to tune the regression on earlier data and test all the methods on the last few years. The disadvantage of that method is 

In [None]:
ranking = pd.read_csv(file_dict['MMasseyOrdinals'])
rank_methods = ['COL','DOL','MOR','POM','RTH','SAG','WLK','WOL']
team_rank = (ranking[(ranking['RankingDayNum']==133)&(ranking['SystemName'].isin(rank_methods))]
             .groupby(['Season','TeamID','SystemName'])['OrdinalRank']
             .mean()
             .unstack(2)
             .reset_index()
            )

In [358]:
# TourneyCompactResults = pd.read_csv(file_dict['MNCAATourneyCompactResults'])

team_strength = team_strength_regression(np.arange(2003,2020),
                                         alpha=0.009,
                                         reg_type='Lasso',
                                         home=False)

team_strength_log = team_strength_logregression(np.arange(2003,2020),
                                                home=False,
                                                alpha=3.2)

team_strength_h = team_strength_home(np.arange(2003,2020),
                                                home=True,
                                                alpha=0.005)

data = (team_rank
        .merge(team_strength,on=['Season','TeamID'],how='left')
        .merge(team_strength_log,on=['Season','TeamID'],how='left')
        .merge(team_strength_h,on=['Season','TeamID'],how='left')
       )

TourneyResults = (TourneyCompactResults
                         .merge(data,left_on=['Season','WTeamID'],right_on=['Season','TeamID'],how='left')
                         .drop('TeamID',axis=1)
                         .merge(data,left_on=['Season','LTeamID'],right_on=['Season','TeamID'],how='left')
                         .drop('TeamID',axis=1)
                        ).copy()

rdata = TourneyResults[TourneyResults['Season']>2002].copy()
rdata['POMr'] = np.where(rdata['POM_y']>rdata['POM_x'],1,0)
rdata['SAGr'] = np.where(rdata['SAG_y']>rdata['SAG_x'],1,0)
rdata['COLr'] = np.where(rdata['COL_y']>rdata['COL_x'],1,0)
rdata['DOLr'] = np.where(rdata['DOL_y']>rdata['DOL_x'],1,0)
rdata['MORr'] = np.where(rdata['MOR_y']>rdata['MOR_x'],1,0)
rdata['RTHr'] = np.where(rdata['RTH_y']>rdata['RTH_x'],1,0)
rdata['WLKr'] = np.where(rdata['WLK_y']>rdata['WLK_x'],1,0)
rdata['WOLr'] = np.where(rdata['WOL_y']>rdata['WOL_x'],1,0)
rdata['SHAr'] = np.where(rdata['strength_x']>rdata['strength_y'],1,0)
rdata['SHA2r'] = np.where(rdata['strength_h_x']>rdata['strength_h_y'],1,0)
rdata['RADDARr'] = np.where(rdata['prob_strength_x']>rdata['prob_strength_y'],1,0)
g = rdata.groupby('Season')[['POMr','SAGr','COLr','DOLr','MORr','RTHr','WLKr','WOLr','SHAr','SHA2r','RADDARr']].mean()
g.mean()

POMr       0.715170
SAGr       0.712371
COLr       0.701589
DOLr       0.707817
MORr       0.713414
RTHr       0.706102
WLKr       0.707103
WOLr       0.707062
SHAr       0.716967
SHA2r      0.700752
RADDARr    0.704511
dtype: float64

In [357]:
TourneyCompactResults

Unnamed: 0,Season,RankingDayNum,SystemName,TeamID,OrdinalRank
0,2003,35,SEL,1102,159
1,2003,35,SEL,1103,229
2,2003,35,SEL,1104,12
3,2003,35,SEL,1105,314
4,2003,35,SEL,1106,260
...,...,...,...,...,...
3820914,2019,133,ZAM,1462,70
3820915,2019,133,ZAM,1463,87
3820916,2019,133,ZAM,1464,242
3820917,2019,133,ZAM,1465,198


In [86]:
rdata['Round'] = rdata['DayNum'].map({136:'1st',137:'1st',138:'2nd',139:'2nd',143:'3-S16',144:'3-S16',145:'4-E8',146:'4-E8',152:'5-F4',154:'6-CG'})
#rdata[(rdata['Season']<=2018)&(rdata['DayNum']>=136)].groupby('Round')[['POMr','SAGr','COLr','DOLr','MORr','RTHr','WLKr','WOLr','SHAr']].mean()

Unnamed: 0_level_0,POMr,SAGr,COLr,DOLr,MORr,RTHr,WLKr,WOLr,SHAr
Round,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1st,0.765625,0.748047,0.742188,0.742188,0.753906,0.75,0.751953,0.748047,0.757812
2nd,0.707031,0.730469,0.714844,0.714844,0.699219,0.722656,0.730469,0.714844,0.710938
3-S16,0.671875,0.703125,0.6875,0.679688,0.679688,0.679688,0.695312,0.679688,0.703125
4-E8,0.546875,0.515625,0.5,0.5,0.546875,0.5,0.453125,0.546875,0.53125
5-F4,0.65625,0.625,0.6875,0.75,0.65625,0.65625,0.625,0.625,0.6875
6-CG,0.625,0.5625,0.5,0.5625,0.6875,0.5,0.5,0.75,0.75


In [115]:
team_strength.to_csv('team_strength_regression.csv',index=False)