## Merge Scrapes and Send to SQLite - WEEK 3

Import pickles and then merge them together.   

Always start with ESPN dataframe and merge onto it to keep consistent with naming conventions.

In [1]:
import pandas as pd
import numpy as np

from sqlalchemy import create_engine

import time

### Import Data from Pickle Archives/Quick Checks
- Import data
- Quick check overlapping number of all projections (probably about high 300s based on initial name checks - if less than that, may want to go back and use the Scraping Check for Naming Overlaps to investigate some more and make sure some new formatting/scrape edge cases aren't breaking the scrape functions)

#### Week 3

In [2]:
#import week 3 projections
df_wk3_ppr_proj_espn = pd.read_pickle('pickle_archive/Week3_PPR_Projections_ESPN_2018-9-18-18-3.pkl')
df_wk3_ppr_proj_cbs = pd.read_pickle('pickle_archive/Week3_PPR_Projections_CBS_2018-9-18-18-4.pkl')
df_wk3_ppr_proj_sharks = pd.read_pickle('pickle_archive/Week3_PPR_Projections_Sharks_2018-9-18-18-4.pkl')
df_wk3_ppr_proj_scout = pd.read_pickle('pickle_archive/Week3_PPR_Projections_SCOUT_2018-9-18-18-5.pkl')
df_wk3_ppr_prvs_wk_actuals = pd.read_pickle('pickle_archive/Week2_Player_Actual_PPR_2018-9-18-17-59.pkl')

df_wk3_salary_fanduel = pd.read_pickle('pickle_archive/Week3_Salary_FanDuel_2018-9-18-18-5.pkl')

In [3]:
#print sizes of week 3
print('Week3 ESPN: ', df_wk3_ppr_proj_espn.shape)
print('Week3 CBS: ', df_wk3_ppr_proj_cbs.shape)
print('Week3 SHARKS: ', df_wk3_ppr_proj_sharks.shape)
print('Week3 SCOUT: ', df_wk3_ppr_proj_scout.shape)
print('Week2 ACTUALS: ', df_wk3_ppr_prvs_wk_actuals.shape)
print('Week3 FANDUEL SALARY: ', df_wk3_salary_fanduel.shape)

Week3 ESPN:  (1009, 5)
Week3 CBS:  (830, 5)
Week3 SHARKS:  (971, 5)
Week3 SCOUT:  (374, 5)
Week2 ACTUALS:  (1009, 5)
Week3 FANDUEL SALARY:  (665, 5)


In [4]:
#how many overlapping players for all four projection sources (ignore previous week actual results)
espn_cbs_sharks_scout_intersection = set(df_wk3_ppr_proj_espn.PLAYER.tolist()) &\
                                     set(df_wk3_ppr_proj_cbs.PLAYER.tolist()) &\
                                     set(df_wk3_ppr_proj_sharks.PLAYER.tolist()) &\
                                     set(df_wk3_ppr_proj_scout.PLAYER.tolist())

print('Number of Overlapping Players in All Weekly Projections: ',
      len(espn_cbs_sharks_scout_intersection))

Number of Overlapping Players in All Weekly Projections:  359


### Merge Data & Save to SQLite
- Merge Data
- Save to SQLite Database

In [5]:
###FUNCTION CREATE TABLES IN OUR SQLITE DATABASE - USE A FIXED DATABASE NAME OF fantasy_football_2018.db###

#function inputs are the dataframe, the table_name, and the
#if_exists_action can be 'fail', 'replace', 'append' (default is set to 'append' if user doesn't put in anything)
def add_to_SQLite(df, table_name, if_exists_action='append'):
    default_database_path = 'sqlite:///fantasy_football_2018.db'
    disk_engine = create_engine(default_database_path)
    
    df.to_sql(table_name, disk_engine,
              if_exists=if_exists_action, index=False, 
              chunksize=100) #chunksize limits how many variables get added at a time (SQLite needed max of 100 or would error out)
    
    print(f"Table {table_name} was added to the database at {default_database_path}")

In [6]:
###FUNCTION INNER MERGES ALL THE WEEKLY PROJECTION INFORMATION - TO GET ONLY PLAYERS THAT SHOW UP IN ALL PROJECTION DFs###

#want inputs to be the weekly ppr projection dataframes for each of ESPN, CBS, SHARKS, SCOUT,
#and then also the previous week fantasy score
def get_weekly_PPR_proj_df_inner_merge(df_espn_ppr_proj, df_cbs_ppr_proj,
                                         df_sharks_ppr_proj, df_scout_ppr_proj,
                                         df_ppr_prvs_wk_actuals):
    
    #start with espn proj, merge in cbs as inner (only keep overlapping players)
    df_ppr_proj = pd.merge(df_espn_ppr_proj[['PLAYER', 'POS', 'TEAM', 'FPTS_PPR_ESPN']],
                       df_cbs_ppr_proj[['PLAYER', 'POS', 'FPTS_PPR_CBS']],
                       how='inner',
                       on=['PLAYER','POS'])
    print(df_ppr_proj.shape)


    #next merge in sharks as inner (only keep overlapping players)
    df_ppr_proj = pd.merge(df_ppr_proj,
                       df_sharks_ppr_proj[['PLAYER','POS','FPTS_PPR_SHARKS']],
                       how='inner',
                       on=['PLAYER', 'POS'])
    print(df_ppr_proj.shape)


    #next merge in scout as inner (only keep overlapping players)
    df_ppr_proj = pd.merge(df_ppr_proj,
                       df_scout_ppr_proj[['PLAYER','POS','FPTS_PPR_SCOUT']],
                       how='inner',
                       on=['PLAYER', 'POS'])
    print(df_ppr_proj.shape)


    #next merge in previous week actual (do left merge (don't want to do inner because if player didn't play last week, don't want to not include in this week))
    df_ppr_proj = pd.merge(df_ppr_proj,
                       df_ppr_prvs_wk_actuals[['PLAYER','POS','FPTS_PPR_ACTUAL']],
                       how='left',
                       on=['PLAYER', 'POS'])

    #rename column title so know it is previous week actual
    df_ppr_proj.rename(columns={'FPTS_PPR_ACTUAL':'FPTS_PPR_PRVS_WK_ACTUAL'},
                              inplace=True)

    #since did left merge for previous week actuals, make sure to change any None or Nan values to 0 for
    #missing players that didn't play week before and would have had zero points
    df_ppr_proj['FPTS_PPR_PRVS_WK_ACTUAL'] = df_ppr_proj['FPTS_PPR_PRVS_WK_ACTUAL'].map(
                                                    lambda x: 0.0 if pd.isna(x) else x)


    #print final shape and final head
    print(df_ppr_proj.shape)
    return df_ppr_proj

In [7]:
###FUNCTION OUTER MERGES ALL THE WEEKLY PROJECTION INFORMATION - GETS ALL SCRAPED DATA PULLED###
##!!!DO NOT USE THIS FOR CUSTOM WEIGHTED PERCENTAGE CALCULATIONS AS WILL HAVE LOTS OF NULL OBJECTS
##UNLESS FILTER OUT NAN ROWS in SQL

#want inputs to be the weekly ppr projection dataframes for each of ESPN, CBS, SHARKS, SCOUT,
#and then also the previous week fantasy score
def get_weekly_PPR_proj_df_outer_merge(df_espn_ppr_proj, df_cbs_ppr_proj,
                                         df_sharks_ppr_proj, df_scout_ppr_proj,
                                         df_ppr_prvs_wk_actuals):
    
    #start with espn proj, merge in cbs as outer(include all players scraped)
    df_ppr_proj_outer = pd.merge(df_espn_ppr_proj[['PLAYER', 'POS', 'TEAM', 'FPTS_PPR_ESPN']],
                       df_cbs_ppr_proj[['PLAYER', 'POS', 'FPTS_PPR_CBS']],
                       how='outer',
                       on=['PLAYER','POS'])
    print(df_ppr_proj_outer.shape)


    #next merge in sharks as outer(include all players scraped)
    df_ppr_proj_outer = pd.merge(df_ppr_proj_outer,
                       df_sharks_ppr_proj[['PLAYER','POS','FPTS_PPR_SHARKS']],
                       how='outer',
                       on=['PLAYER', 'POS'])
    print(df_ppr_proj_outer.shape)


    #next merge in scout as outer(include all players scraped)
    df_ppr_proj_outer = pd.merge(df_ppr_proj_outer,
                       df_scout_ppr_proj[['PLAYER','POS','FPTS_PPR_SCOUT']],
                       how='outer',
                       on=['PLAYER', 'POS'])
    print(df_ppr_proj_outer.shape)


    #next merge in previous week actual as outer(include all players scraped)
    df_ppr_proj_outer = pd.merge(df_ppr_proj_outer,
                       df_ppr_prvs_wk_actuals[['PLAYER','POS','FPTS_PPR_ACTUAL']],
                       how='outer',
                       on=['PLAYER', 'POS'])

    #rename column title so know it is previous week actual
    df_ppr_proj_outer.rename(columns={'FPTS_PPR_ACTUAL':'FPTS_PPR_PRVS_WK_ACTUAL'},
                              inplace=True)


    #print final shape and final head
    print(df_ppr_proj_outer.shape)
    return df_ppr_proj_outer

#### Merge Week 3 Projection Information and Send to SQLite

In [8]:
###INNER MERGE WEEK 3 PROJECTION INFORMATION###
df_wk3_ppr_projections = get_weekly_PPR_proj_df_inner_merge(df_wk3_ppr_proj_espn,
                                                            df_wk3_ppr_proj_cbs,
                                                            df_wk3_ppr_proj_sharks,
                                                            df_wk3_ppr_proj_scout,
                                                            df_wk3_ppr_prvs_wk_actuals)
df_wk3_ppr_projections.head()

(736, 5)
(493, 6)
(357, 7)
(357, 8)


Unnamed: 0,PLAYER,POS,TEAM,FPTS_PPR_ESPN,FPTS_PPR_CBS,FPTS_PPR_SHARKS,FPTS_PPR_SCOUT,FPTS_PPR_PRVS_WK_ACTUAL
0,Alvin Kamara,RB,NO,21.8,21.0,20.8,30.4,17.9
1,Ezekiel Elliott,RB,Dal,21.1,15.0,20.6,20.7,19.7
2,Tom Brady,QB,NE,20.9,23.0,23.6,26.4,16.4
3,Todd Gurley,RB,LAR,20.9,23.0,20.9,30.5,32.3
4,Antonio Brown,WR,Pit,20.2,23.0,22.9,25.1,15.7


In [10]:
###ADD WEEK 3 INNER MERGE PROJECTIONS TO SQLITE###
add_to_SQLite(df=df_wk3_ppr_projections,
              table_name='week3_ppr_projections',
              if_exists_action='fail')

Table week3_ppr_projections was added to the database at sqlite:///fantasy_football_2018.db


In [11]:
###OUTER MERGE WEEK 3 PROJECTION INFORMATION###
df_wk3_ppr_proj_all_scraped = get_weekly_PPR_proj_df_outer_merge(df_wk3_ppr_proj_espn,
                                                            df_wk3_ppr_proj_cbs,
                                                            df_wk3_ppr_proj_sharks,
                                                            df_wk3_ppr_proj_scout,
                                                            df_wk3_ppr_prvs_wk_actuals)
df_wk3_ppr_proj_all_scraped.head()

(1103, 5)
(1567, 6)
(1569, 7)
(1569, 8)


Unnamed: 0,PLAYER,POS,TEAM,FPTS_PPR_ESPN,FPTS_PPR_CBS,FPTS_PPR_SHARKS,FPTS_PPR_SCOUT,FPTS_PPR_PRVS_WK_ACTUAL
0,Alvin Kamara,RB,NO,21.8,21.0,20.8,30.4,17.9
1,Ezekiel Elliott,RB,Dal,21.1,15.0,20.6,20.7,19.7
2,Tom Brady,QB,NE,20.9,23.0,23.6,26.4,16.4
3,Todd Gurley,RB,LAR,20.9,23.0,20.9,30.5,32.3
4,Antonio Brown,WR,Pit,20.2,23.0,22.9,25.1,15.7


In [12]:
###ADD WEEK 3 OUTER MERGE PROJECTIONS TO SQLITE###
add_to_SQLite(df=df_wk3_ppr_proj_all_scraped,
              table_name='week3_ppr_projections_all_scraped',
              if_exists_action='fail')

Table week3_ppr_projections_all_scraped was added to the database at sqlite:///fantasy_football_2018.db


#### Save Week 3 FanDuel Salary Information to DB

In [14]:
###SAVE WEEK 3 FANDUEL SALARY INFO TO SQLITE###
add_to_SQLite(df=df_wk3_salary_fanduel,
              table_name='week3_salaries_fanduel',
              if_exists_action='fail')

Table week3_salaries_fanduel was added to the database at sqlite:///fantasy_football_2018.db


#### Save Actual Player PPR Results to DB for Week2

In [15]:
###SAVE WEEK 2 ACTUAL PPR RESULTS INFO TO SQLITE###

#above the actual ppr results for week 1 were saved as df_wk2_ppr_prvs_wk_actuals
#rename it first, just to prevent confusion
df_wk2_ppr_actuals = df_wk3_ppr_prvs_wk_actuals

#add to SQLite
add_to_SQLite(df=df_wk2_ppr_actuals,
              table_name='week2_ppr_actuals',
              if_exists_action='fail')

Table week2_ppr_actuals was added to the database at sqlite:///fantasy_football_2018.db
