In [1]:
import pandas as pd
import numpy as np
import json
pd.set_option("display.max_columns", None)
from IPython.display import display
from joblib import Parallel, delayed

In [2]:
teams_dict = {}
with open("teams.json", 'r') as _f:
    teams_dict = json.load(_f)

def convert_team(name, year):
    convert_dict = {}
    for team in teams_dict:
        convert_dict[team['teamName']] = team['abbreviation']
    
    name_changes = {
        'Charlotte Hornets': 'CHA',
        'Charlotte Bobcats': 'CHN',
        'New Orleans Hornets': 'NOK',
        'New Orleans Pelicans': 'NOP'
    }
    if name in name_changes:
        return name_changes[name]
    
    elif name == 'Brooklyn Nets':
        if year == 2012:
            return 'BRK'
        else:
            return 'BKN'
    
    return convert_dict[name]

In [3]:
schedules = pd.read_csv('2012_2017_schedules.csv', index_col=0)
played_schedule = schedules.dropna()
played_schedule['start_time'] = pd.to_datetime(played_schedule['start_time'])\
                                .dt.tz_localize('UTC').dt.tz_convert('US/Eastern')

box_scores = pd.read_csv('2012_2017_box_scores.csv', index_col=0)
box_scores['date'] = pd.to_datetime(box_scores['date'])

season_stats = pd.read_csv('2012_2017_season_player_stats.csv', index_col=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [4]:
def get_starters(home_team, visitor_team, date):
    df_slice = box_scores.query('team=="'+ home_team +'" or team=="'+ visitor_team +'"')
    game_players = df_slice[df_slice['date'] == date] 
    visitor_players = game_players[game_players['team']==visitor_team]
    home_players = game_players[game_players['team']==home_team]
    home_starters_df = home_players.sort_values('seconds_played', ascending=False).head(5)
    home_starters = [(p['first_name'], p['last_name']) for _,p in home_starters_df.iterrows()]
    visitor_starters_df = visitor_players.sort_values('seconds_played', ascending=False).head(5)
    visitor_starters = [(p['first_name'], p['last_name']) for _,p in visitor_starters_df.iterrows()]
    return home_starters, visitor_starters

In [32]:
def get_previous_wins(team, opponent, date):
    # TODO: Get past record this season against opponent
    lower_year = date.year
    if date.month < 7:
        lower_year -= 1
    lower_bound = pd.to_datetime('09-01-' + str(lower_year)).date()
    df_slice = played_schedule[(played_schedule['start_time'] < date) \
                               & (played_schedule['start_time'] > lower_bound)]
    our_team_slice = df_slice.query('home_team_name=="' + team + '" or visiting_team_name=="' + team + '"')
    
    home_wins = our_team_slice.apply(lambda row: row['home_team_name'] == team and\
                                     row['home_team_score'] > row['visiting_team_score'], axis=1)
    home_wins = home_wins.sum() if not home_wins.empty else 0
    visiting_wins = our_team_slice.apply(lambda row: row['visiting_team_name'] == team and\
                                     row['home_team_score'] < row['visiting_team_score'], axis=1)
    visiting_wins = visiting_wins.sum() if not visiting_wins.empty else 0
    
    win_streak = 0
    for _,row in our_team_slice.sort_values('start_time', ascending=False).iterrows():
        if row['home_team_name'] == team and row['home_team_score'] > row['visiting_team_score']:
            win_streak += 1
        elif row['visiting_team_name'] == team and row['home_team_score'] < row['visiting_team_score']:
            win_streak += 1
        else:
            break
    
    losing_streak = 0
    if win_streak > 0:
        pass
    else:
        for _,row in our_team_slice.sort_values('start_time', ascending=False).iterrows():
            if row['home_team_name'] == team and row['home_team_score'] < row['visiting_team_score']:
                losing_streak += 1
            elif row['visiting_team_name'] == team and row['home_team_score'] > row['visiting_team_score']:
                losing_streak += 1
            else:
                break
                
    
    season_series = our_team_slice.query('home_team_name=="' + opponent + '" or visiting_team_name=="' + opponent + '"')
    season_series_wins = 0
    season_series_losses = 0
    for _,row in season_series.iterrows():
        if row['home_team_name'] == team:
            if row['home_team_score'] > row['visiting_team_score']:
                season_series_wins += 1
            else:
                season_series_losses +=1
        elif row['visiting_team_name'] == team:
            if row['home_team_score'] < row['visiting_team_score']:
                season_series_wins += 1
            else:
                season_series_losses += 1
    
    return {
        'wins_as_home_team': home_wins,
        'wins_as_visiting_team': visiting_wins,
        'total_current_wins': (home_wins + visiting_wins),
        'current_win_streak': win_streak,
        'current_losing_streak': losing_streak,
        'season_series_wins': season_series_wins,
        'season_series_losses': season_series_losses
    }

In [33]:
X, Y = [], []
df_regr_list = []

for _, row in played_schedule.iterrows():    
    game_dict = {}
    # Get the teams and starting players for this game
    game_date = row['start_time'].date()
    home = convert_team(row['home_team_name'], game_date.year)
    visitors = convert_team(row['visiting_team_name'], game_date.year)
    home_starters, visitor_starters = get_starters(home, visitors, game_date)
    game_dict['home_team_name'] = row['home_team_name']
    game_dict['home_team_abbr'] = home
    game_dict['visiting_team_name'] = row['visiting_team_name']
    game_dict['visiting_team_abbr'] = visitors
    game_dict['game_date'] = game_date
    
    for player_i in xrange(len(home_starters)):
        first_name = home_starters[player_i][0]
        last_name = home_starters[player_i][1]
        df_player_slice = box_scores.query('first_name=="' + first_name + '" and last_name=="' + last_name +'"')
        current_player = df_player_slice[df_player_slice['date'] < game_date]
        features = ['assists','blocks','defensive_rebounds','field_goal_attempts','field_goals',
                   'free_throw_attempts','free_throws','offensive_rebounds','personal_fouls','points',
                   'steals','three_point_field_goal_attempts','three_point_field_goals','total_rebounds',
                    'turnovers']
        game_dict['visitor_starter_' + str(player_i)] = first_name + " " + last_name
        
        for f in features:
            desired_feat = current_player[f]
            career_mean_feat = np.mean(desired_feat)
            career_mean_feat = 0 if career_mean_feat != career_mean_feat else career_mean_feat
            game_dict['home_starter_' + str(player_i) + "_CAREER_" + f] = career_mean_feat
    
    for player_i in xrange(len(visitor_starters)):
        first_name = visitor_starters[player_i][0]
        last_name = visitor_starters[player_i][1]
        df_player_slice = box_scores.query('first_name=="' + first_name + '" and last_name=="' + last_name +'"')
        current_player = df_player_slice[df_player_slice['date'] < game_date]
        features = ['assists','blocks','defensive_rebounds','field_goal_attempts','field_goals',
                   'free_throw_attempts','free_throws','offensive_rebounds','personal_fouls','points',
                   'steals','three_point_field_goal_attempts','three_point_field_goals','total_rebounds',
                    'turnovers']
        game_dict['visitor_starter_' + str(player_i)] = first_name + " " + last_name
        
        for f in features:
            desired_feat = current_player[f]
            career_mean_feat = np.mean(desired_feat) 
            career_mean_feat = 0 if career_mean_feat != career_mean_feat else career_mean_feat
            game_dict['visitor_starter_' + str(player_i) + "_CAREER_" + f] = career_mean_feat
    
    game_dict['home_won'] = row['home_team_score'] > row['visiting_team_score']
    home_win_stats = get_previous_wins(row['home_team_name'], row['visiting_team_name'], game_date)
    game_dict['home_team_total_wins'] = home_win_stats['total_current_wins']
    game_dict['home_team_wins_as_home'] = home_win_stats['wins_as_home_team']
    game_dict['home_team_wins_as_visitor'] = home_win_stats['wins_as_visiting_team']
    game_dict['home_team_current_win_streak'] = home_win_stats['current_win_streak']
    game_dict['home_team_current_losing_streak'] = home_win_stats['current_losing_streak']
    game_dict['home_team_season_series_wins'] = home_win_stats['season_series_wins']
    game_dict['home_team_season_series_losses'] = home_win_stats['season_series_losses']
    visiting_win_stats = get_previous_wins(row['visiting_team_name'], row['home_team_name'], game_date)
    game_dict['visiting_team_total_wins'] = visiting_win_stats['total_current_wins']
    game_dict['visiting_team_wins_as_home'] = visiting_win_stats['wins_as_home_team']
    game_dict['visiting_team_wins_as_visitor'] = visiting_win_stats['wins_as_visiting_team']
    game_dict['visiting_team_current_win_streak'] = visiting_win_stats['current_win_streak']
    game_dict['visiting_team_current_losing_streak'] = visiting_win_stats['current_losing_streak']
    game_dict['visiting_team_season_series_wins'] = visiting_win_stats['season_series_wins']
    game_dict['visiting_team_season_series_losses'] = visiting_win_stats['season_series_losses']
    
    df_regr_list.append(game_dict)

KeyboardInterrupt: 

In [35]:
pd.DataFrame(df_regr_list)

Unnamed: 0,game_date,home_starter_0_CAREER_assists,home_starter_0_CAREER_blocks,home_starter_0_CAREER_defensive_rebounds,home_starter_0_CAREER_field_goal_attempts,home_starter_0_CAREER_field_goals,home_starter_0_CAREER_free_throw_attempts,home_starter_0_CAREER_free_throws,home_starter_0_CAREER_offensive_rebounds,home_starter_0_CAREER_personal_fouls,home_starter_0_CAREER_points,home_starter_0_CAREER_steals,home_starter_0_CAREER_three_point_field_goal_attempts,home_starter_0_CAREER_three_point_field_goals,home_starter_0_CAREER_total_rebounds,home_starter_0_CAREER_turnovers,home_starter_1_CAREER_assists,home_starter_1_CAREER_blocks,home_starter_1_CAREER_defensive_rebounds,home_starter_1_CAREER_field_goal_attempts,home_starter_1_CAREER_field_goals,home_starter_1_CAREER_free_throw_attempts,home_starter_1_CAREER_free_throws,home_starter_1_CAREER_offensive_rebounds,home_starter_1_CAREER_personal_fouls,home_starter_1_CAREER_points,home_starter_1_CAREER_steals,home_starter_1_CAREER_three_point_field_goal_attempts,home_starter_1_CAREER_three_point_field_goals,home_starter_1_CAREER_total_rebounds,home_starter_1_CAREER_turnovers,home_starter_2_CAREER_assists,home_starter_2_CAREER_blocks,home_starter_2_CAREER_defensive_rebounds,home_starter_2_CAREER_field_goal_attempts,home_starter_2_CAREER_field_goals,home_starter_2_CAREER_free_throw_attempts,home_starter_2_CAREER_free_throws,home_starter_2_CAREER_offensive_rebounds,home_starter_2_CAREER_personal_fouls,home_starter_2_CAREER_points,home_starter_2_CAREER_steals,home_starter_2_CAREER_three_point_field_goal_attempts,home_starter_2_CAREER_three_point_field_goals,home_starter_2_CAREER_total_rebounds,home_starter_2_CAREER_turnovers,home_starter_3_CAREER_assists,home_starter_3_CAREER_blocks,home_starter_3_CAREER_defensive_rebounds,home_starter_3_CAREER_field_goal_attempts,home_starter_3_CAREER_field_goals,home_starter_3_CAREER_free_throw_attempts,home_starter_3_CAREER_free_throws,home_starter_3_CAREER_offensive_rebounds,home_starter_3_CAREER_personal_fouls,home_starter_3_CAREER_points,home_starter_3_CAREER_steals,home_starter_3_CAREER_three_point_field_goal_attempts,home_starter_3_CAREER_three_point_field_goals,home_starter_3_CAREER_total_rebounds,home_starter_3_CAREER_turnovers,home_starter_4_CAREER_assists,home_starter_4_CAREER_blocks,home_starter_4_CAREER_defensive_rebounds,home_starter_4_CAREER_field_goal_attempts,home_starter_4_CAREER_field_goals,home_starter_4_CAREER_free_throw_attempts,home_starter_4_CAREER_free_throws,home_starter_4_CAREER_offensive_rebounds,home_starter_4_CAREER_personal_fouls,home_starter_4_CAREER_points,home_starter_4_CAREER_steals,home_starter_4_CAREER_three_point_field_goal_attempts,home_starter_4_CAREER_three_point_field_goals,home_starter_4_CAREER_total_rebounds,home_starter_4_CAREER_turnovers,home_team_abbr,home_team_current_losing_streak,home_team_current_win_streak,home_team_name,home_team_season_series_losses,home_team_season_series_wins,home_team_total_wins,home_team_wins_as_home,home_team_wins_as_visitor,home_won,visiting_team_abbr,visiting_team_current_losing_streak,visiting_team_current_win_streak,visiting_team_name,visiting_team_season_series_losses,visiting_team_season_series_wins,visiting_team_total_wins,visiting_team_wins_as_home,visiting_team_wins_as_visitor,visitor_starter_0,visitor_starter_0_CAREER_assists,visitor_starter_0_CAREER_blocks,visitor_starter_0_CAREER_defensive_rebounds,visitor_starter_0_CAREER_field_goal_attempts,visitor_starter_0_CAREER_field_goals,visitor_starter_0_CAREER_free_throw_attempts,visitor_starter_0_CAREER_free_throws,visitor_starter_0_CAREER_offensive_rebounds,visitor_starter_0_CAREER_personal_fouls,visitor_starter_0_CAREER_points,visitor_starter_0_CAREER_steals,visitor_starter_0_CAREER_three_point_field_goal_attempts,visitor_starter_0_CAREER_three_point_field_goals,visitor_starter_0_CAREER_total_rebounds,visitor_starter_0_CAREER_turnovers,visitor_starter_1,visitor_starter_1_CAREER_assists,visitor_starter_1_CAREER_blocks,visitor_starter_1_CAREER_defensive_rebounds,visitor_starter_1_CAREER_field_goal_attempts,visitor_starter_1_CAREER_field_goals,visitor_starter_1_CAREER_free_throw_attempts,visitor_starter_1_CAREER_free_throws,visitor_starter_1_CAREER_offensive_rebounds,visitor_starter_1_CAREER_personal_fouls,visitor_starter_1_CAREER_points,visitor_starter_1_CAREER_steals,visitor_starter_1_CAREER_three_point_field_goal_attempts,visitor_starter_1_CAREER_three_point_field_goals,visitor_starter_1_CAREER_total_rebounds,visitor_starter_1_CAREER_turnovers,visitor_starter_2,visitor_starter_2_CAREER_assists,visitor_starter_2_CAREER_blocks,visitor_starter_2_CAREER_defensive_rebounds,visitor_starter_2_CAREER_field_goal_attempts,visitor_starter_2_CAREER_field_goals,visitor_starter_2_CAREER_free_throw_attempts,visitor_starter_2_CAREER_free_throws,visitor_starter_2_CAREER_offensive_rebounds,visitor_starter_2_CAREER_personal_fouls,visitor_starter_2_CAREER_points,visitor_starter_2_CAREER_steals,visitor_starter_2_CAREER_three_point_field_goal_attempts,visitor_starter_2_CAREER_three_point_field_goals,visitor_starter_2_CAREER_total_rebounds,visitor_starter_2_CAREER_turnovers,visitor_starter_3,visitor_starter_3_CAREER_assists,visitor_starter_3_CAREER_blocks,visitor_starter_3_CAREER_defensive_rebounds,visitor_starter_3_CAREER_field_goal_attempts,visitor_starter_3_CAREER_field_goals,visitor_starter_3_CAREER_free_throw_attempts,visitor_starter_3_CAREER_free_throws,visitor_starter_3_CAREER_offensive_rebounds,visitor_starter_3_CAREER_personal_fouls,visitor_starter_3_CAREER_points,visitor_starter_3_CAREER_steals,visitor_starter_3_CAREER_three_point_field_goal_attempts,visitor_starter_3_CAREER_three_point_field_goals,visitor_starter_3_CAREER_total_rebounds,visitor_starter_3_CAREER_turnovers,visitor_starter_4,visitor_starter_4_CAREER_assists,visitor_starter_4_CAREER_blocks,visitor_starter_4_CAREER_defensive_rebounds,visitor_starter_4_CAREER_field_goal_attempts,visitor_starter_4_CAREER_field_goals,visitor_starter_4_CAREER_free_throw_attempts,visitor_starter_4_CAREER_free_throws,visitor_starter_4_CAREER_offensive_rebounds,visitor_starter_4_CAREER_personal_fouls,visitor_starter_4_CAREER_points,visitor_starter_4_CAREER_steals,visitor_starter_4_CAREER_three_point_field_goal_attempts,visitor_starter_4_CAREER_three_point_field_goals,visitor_starter_4_CAREER_total_rebounds,visitor_starter_4_CAREER_turnovers
0,2012-10-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,CLE,0,0,Cleveland Cavaliers,0,0,0,0,0,True,WAS,0,0,Washington Wizards,0,0,0,0,0,A.J. Price,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Jordan Crawford,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Trevor Ariza,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Emeka Okafor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Martell Webster,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2012-10-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,LAL,0,0,Los Angeles Lakers,0,0,0,0,0,False,DAL,0,0,Dallas Mavericks,0,0,0,0,0,Elton Brand,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Shawn Marion,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Darren Collison,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,O.J. Mayo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Vince Carter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2012-10-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,MIA,0,0,Miami Heat,0,0,0,0,0,True,BOS,0,0,Boston Celtics,0,0,0,0,0,Rajon Rondo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Paul Pierce,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Kevin Garnett,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Brandon Bass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Jason Terry,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2012-10-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,CHI,0,0,Chicago Bulls,0,0,0,0,0,True,SAC,0,0,Sacramento Kings,0,0,0,0,0,Tyreke Evans,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Marcus Thornton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Jason Thompson,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,DeMarcus Cousins,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Isaiah Thomas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2012-10-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,DET,0,0,Detroit Pistons,0,0,0,0,0,False,HOU,0,0,Houston Rockets,0,0,0,0,0,James Harden,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Chandler Parsons,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Jeremy Lin,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Omer Asik,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Marcus Morris,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,2012-10-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,LAC,0,0,Los Angeles Clippers,0,0,0,0,0,True,MEM,0,0,Memphis Grizzlies,0,0,0,0,0,Zach Randolph,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Marc Gasol,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Rudy Gay,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Mike Conley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Tony Allen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,2012-10-31,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NOK,0,0,New Orleans Hornets,0,0,0,0,0,False,SAS,0,0,San Antonio Spurs,0,0,0,0,0,Tony Parker,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Tim Duncan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Kawhi Leonard,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Danny Green,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Boris Diaw,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,2012-10-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,PHI,0,0,Philadelphia 76ers,0,0,0,0,0,True,DEN,0,0,Denver Nuggets,0,0,0,0,0,Ty Lawson,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Andre Iguodala,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Andre Miller,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Kosta Koufos,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Corey Brewer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,2012-10-31,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHX,0,0,Phoenix Suns,0,0,0,0,0,False,GSW,0,0,Golden State Warriors,0,0,0,0,0,David Lee,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Klay Thompson,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Stephen Curry,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Jarrett Jack,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Brandon Rush,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,2012-10-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,POR,0,0,Portland Trail Blazers,0,0,0,0,0,True,LAL,1,0,Los Angeles Lakers,0,0,0,0,0,Dwight Howard,2.0,1.0,6.0,12.0,8.0,14.0,3.0,4.0,6.0,19.0,0.0,0.0,0.0,10.0,3.0,Pau Gasol,6.0,3.0,9.0,19.0,8.0,8.0,6.0,4.0,2.0,23.0,0.0,1.0,1.0,13.0,2.0,Kobe Bryant,0.0,0.0,1.0,14.0,11.0,0.0,0.0,0.0,3.0,22.0,1.0,0.0,0.0,1.0,2.0,Metta World,4.0,0.0,6.0,8.0,1.0,2.0,1.0,2.0,1.0,3.0,3.0,3.0,0.0,8.0,3.0,Steve Blake,6.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,1.0,0.0,0.0,1.0


In [None]:
#Notes for regression:
# x_train = list of lists (interior list is all of features) - features should be (aggregate typically) numbers - x-train are vars to predict
# y_train = list of results (0-loss or 1-win)
# feature generation - creating x_train and y_train
# plug in and chug 5 lines code almost
# Make sure x_train and y_train are same length

# Tasks: 
# (1) create initial list of variables that thought to be important (8-25 variables - choose easiest first)
# (2) create the interior lists of x-train - one for each game and which games chosen dependent on current perspective
# (3) create the corresponding y-train (0,1) - basically in same order whether game chosen in x-train won or loss
# (4) plug in and chug once x-train and y-train completed

# Methods/Approaches - (1) PCA/SVM (2) Random Forest (3) Linear Regression - within each, can tweak parameters
# test model on subset of data that was not put into x-train - testtrainsplit() function arbitrarily picks values 
# for x-test and y-test and for train model (model.fit()) and 
# then use remaining values not used are used for model.predict()

# Predict point spread potentially

# start out with generic features - team stats first 
# individual stats would have to go into box scores and check/iterate through who played in those game
# historical team performances vs away/home 
# account for collinearity 

# have 3 models that work that have about 60-70% or higher (have at least one to) 
# cannot use variables that are not accessible before game starts 

# some good ones: previous game W/L, is_home/is_away, etc. 