In [None]:
import pandas as pd
import os
# from bs4 import BeautifulSoup
# from splinter import Browser
# import pprint

In [None]:
#Function to standardize names
def stdName(df,col_to_split):
    
    #Separate out players first and last name for joining later
    df[["FirstNm","Last"]] = df[col_to_split].str.split(" ",n=1,expand=True)
    df['FirstNm']=df['FirstNm'].str.upper()
    df['Last']=df['Last'].str.upper()

    #Get rid of , and . which may be inconsistent based on sample
    df['Last']=df['Last'].str.replace('.','')
    df['Last']=df['Last'].str.replace(',','')

    df['FirstNm']=df['FirstNm'].str.replace('.','')
    df['FirstNm']=df['FirstNm'].str.replace(',','')

    #Add preceeding , to common suffixes for parsing out
    df['Last']=df['Last'].str.replace(' JR',',JR')
    df['Last']=df['Last'].str.replace(', JR',',JR')
    df['Last']=df['Last'].str.replace(' II',',II')
    df['Last']=df['Last'].str.replace(' III',',III')
    df['Last']=df['Last'].str.replace(' SR',',SR')
    df['Last']=df['Last'].str.replace(' V',',V')

#     parse out the suffix from the last name
    df[['LastNm','Suffix']]=df['Last'].str.split(",",n=1,expand=True)

    return df

In [None]:
#function to get csv files
def get_csv(filename):
    path = os.path.abspath('')
    filepath = f"{path}/FPTS/{filename}.csv"
    return filepath

In [None]:
#Function to replace null values with an average factored by a penalty
def fill_null_floats(df,col_headers,penalty=0):
    df = df.fillna("NoRecord")
    for col in col_headers:
        mean_df = df[df[col]!='NoRecord']
        average = mean_df[col].astype(float).mean()
        print(average)
        df[col]=df[col].replace(to_replace='NoRecord',value = (average-penalty))
    return df

In [None]:
#read in all files created of Year and fantasy points by player and concatenate together.
files = ['2008Fpts','2009Fpts','2010Fpts','2011Fpts','2012Fpts','2013Fpts','2014Fpts','2015Fpts','2016Fpts','2017Fpts','2018Fpts','2019Fpts']
fpt_df =[]
for file in files:
    path = (get_csv(file))
    df = pd.read_csv(path)
    fpt_df.append(df)
#Put all the files into 1 dataframe
Fantasy_df = pd.concat(fpt_df)

#Strip some specific name editions that was attached to names
Fantasy_df['Player']=Fantasy_df['Player'].map(lambda x: x.rstrip(' PUP'))
Fantasy_df['Player']=Fantasy_df['Player'].map(lambda x: x.rstrip(' SUS'))
Fantasy_df['Player']=Fantasy_df['Player'].map(lambda x: x.rstrip(' o'))
Fantasy_df['Player']=Fantasy_df['Player'].map(lambda x: x.rstrip(' II'))
#run overall name standardization
Fantasy_df=stdName(Fantasy_df,'Player')

# Save to a CSV file.  Commented out to prevent overwriting files
# Fantasy_df.to_csv("fantasypts2.csv")

#Read in a standardize names of college stats
college_df = pd.read_csv("allcollegestat.csv")
college_df=stdName(college_df,'Player')

#drop career games column because it's blank
college_df.drop(columns = ['careergames'], inplace = True)
#fill missing college stats with 0
college_df.fillna(0,inplace = True)



In [None]:
combine_df = pd.read_csv('combine.csv')
combine_df=stdName(combine_df,'Player')

collcomb = college_df.merge(combine_df, on = ['FirstNm','LastNm'], how = 'left')
#drop extra Mike Williamss after merge
collcomb.drop(index = [292,295], inplace = True)

#Limit dataset to those that have
include3 = ['2009','2010','2011','2012','2013','2014','2015','2016']
# include4 = ['2009','2010','2011','2012','2013','2014','2015']
draft2020 = collcomb[collcomb['year']==2020]
sim_data3 = collcomb[collcomb['year'].isin(include3)]
# sim_data4 = collcomb[collcomb['year'].isin(include4)]
sim_data3.sort_values('year')

fname = sim_data3['FirstNm'].values
fname
lname = sim_data3['LastNm'].values
lname
draftyear = sim_data3['year'].values
name_yr_dict = {'firstnm':fname,'lastnm':lname,'draftyr':draftyear}
name_yr_dict['draftyr'][0]+1

In [None]:
#this code will go through the dictionary of individuals that were drafted and return the fantasy points scored in their
#first 3 seasons

#list to collect individual dfs
ind_df = []
no_stats = []
failures = []
success= []
for i in range(len(fname)):
    #create a list of first 3 years in the league for each player
    years =[round(name_yr_dict['draftyr'][i],0),round(name_yr_dict['draftyr'][i]+1,0),round(name_yr_dict['draftyr'][i]+2,0)]
    
    #get first and last name of each record
    first = name_yr_dict['firstnm'][i]
    last = name_yr_dict['lastnm'][i]
    
    #get a conditional dataframe of players
    try:
        indiv_df = Fantasy_df[(Fantasy_df['FirstNm']==first) & (Fantasy_df['LastNm']==last)]
        
        
        
        try:
            fant_indiv = indiv_df[indiv_df['Year'].isin(years)]
            df = fant_indiv.groupby('Player').sum()
            df.reset_index(inplace =True)
            df.drop(columns = ['Year'], inplace = True)
            df['FirstNm']=first
            df['LastNm']=last
            
            if len(df)>0:
                ind_df.append(df)
                print(f"{first} {last}: success")
                success.append(f"{first} {last}")
                if len(ind_df)==1:
                    fant_summ_df = df
                    print("appended")
                else:
                    fant_summ_df = pd.concat([fant_summ_df,df])
                    print(comb_df)
            else:
                print(f"{first} {last}: no stats {years}")
                no_stats.append(f"{first} {last}")
                new_row = {'Player':'no points','FPTS':0,'FirstNm':first,'LastNm':last}
                fant_summ_df = fant_summ_df.append(new_row,ignore_index = True)
                print("appended")
        except:
            print("fail")
    except: 
        print(f"{first} {last}: no stats")
        failures.append(f"{first} {last}")
        
#list of columns that need to be floats and adjusted to null values
col_replace = ['40YD','Vertical', 'BenchReps', 'Broad Jump', '3Cone', 'Shuttle']

#replace null values and convert measurables to floats using function defined above
sim_data3 = fill_null_floats(sim_data3,col_replace)
draft2020=fill_null_floats(draft2020,col_replace)

final3 = sim_data3.merge(fant_summ_df, how = 'inner',on = ['FirstNm','LastNm'])

model = final3.drop(columns=['Unnamed: 0_x','Year_x','Pos_x','Last_x','Suffix_x','Unnamed: 0_y', 'Rk', 'Year_y','Player_y', 'Pos_y',
                    'School_y','Last_y', 'Suffix_y', 'Player','School_x','Team','Round'])

#reset index to get a column with a unique number
model.reset_index(inplace = True)
#rename that column ID
model.rename(columns = {'index':'ID'}, inplace = True)

#create CSV file of data to read into model- NOTE: Commented out because of manual adjustments
# model.to_csv("modelPython.csv")
