In [49]:
import pandas as pd
import numpy as np
from datetime import datetime
import json
import time
import os
from tqdm import tqdm
from scipy import stats
import networkx as nx
from itertools import combinations

In [3]:
os.chdir('/home/valesco/Datasets/dk_downloads/')
contest = pd.read_csv('contest-standings-38904899.csv')
own_df = contest[['Player', '%Drafted', 'FPTS']]
own_df.columns = ['player_name', 'own_percent', 'actual_points']
contest_df = contest.drop(['Player', '%Drafted', 'Unnamed: 6',
                        'FPTS'], axis = 1)
contest_df

Unnamed: 0,Rank,EntryId,EntryName,TimeRemaining,Points,Lineup
0,1,712596001,Bnelly7896 (1/3),0,587.0,G Luke List G Justin Rose G Stewart Cink G Ric...
1,2,710808642,mateskec,0,582.5,G Peter Uihlein G Jon Rahm G Rickie Fowler G R...
2,3,711602570,jtbearord34 (2/2),0,582.0,G Justin Rose G Stewart Cink G Rickie Fowler G...
3,4,712651295,zflowers12,0,575.0,G Luke List G Phil Mickelson G Rickie Fowler G...
4,5,712883361,SonOfTheDon84,0,572.0,G Peter Uihlein G Justin Rose G Keegan Bradley...
5,6,712096192,tvagb (1/3),0,569.0,G J.B. Holmes G Rickie Fowler G Sung Kang G Ru...
6,7,710608030,RangerBella (1/3),0,566.5,G Luke List G Justin Rose G Rickie Fowler G Ru...
7,8,711430251,hus29 (1/3),0,565.5,G Peter Uihlein G Luke List G Patrick Reed G R...
8,9,712278702,Westicles,0,564.0,G Luke List G Phil Mickelson G Rickie Fowler G...
9,10,712523792,Mighty_Giants (3/3),0,563.5,G Justin Rose G Rickie Fowler G Russell Henley...


In [4]:
contest_df.columns = ['rank', 'entry_id', 'entry_name', 'time_remaining', 
                'team_points', 'lineup']

contest_df.dropna(inplace = True)
contest_df['lineup'] = contest_df['lineup'].apply(lambda x: x.replace(' G ',','))
contest_df['lineup'] = contest_df['lineup'].apply(lambda x:\
                x.replace('G ','').split(','))
contest_df[['player_1', 'player_2', 'player_3', 'player_4', 
           'player_5', 'player_6']] = contest_df['lineup'].apply(pd.Series)
contest_df

Unnamed: 0,rank,entry_id,entry_name,time_remaining,team_points,lineup,player_1,player_2,player_3,player_4,player_5,player_6
0,1,712596001,Bnelly7896 (1/3),0,587.0,"[Luke List, Justin Rose, Stewart Cink, Rickie ...",Luke List,Justin Rose,Stewart Cink,Rickie Fowler,Russell Henley,Tony Finau
1,2,710808642,mateskec,0,582.5,"[Peter Uihlein, Jon Rahm, Rickie Fowler, Russe...",Peter Uihlein,Jon Rahm,Rickie Fowler,Russell Henley,D.A. Points,Rafael Campos
2,3,711602570,jtbearord34 (2/2),0,582.0,"[Justin Rose, Stewart Cink, Rickie Fowler, Hud...",Justin Rose,Stewart Cink,Rickie Fowler,Hudson Swafford,Russell Henley,D.A. Points
3,4,712651295,zflowers12,0,575.0,"[Luke List, Phil Mickelson, Rickie Fowler, Hud...",Luke List,Phil Mickelson,Rickie Fowler,Hudson Swafford,Russell Henley,Tony Finau
4,5,712883361,SonOfTheDon84,0,572.0,"[Peter Uihlein, Justin Rose, Keegan Bradley, R...",Peter Uihlein,Justin Rose,Keegan Bradley,Rickie Fowler,Russell Henley,Jason Dufner
5,6,712096192,tvagb (1/3),0,569.0,"[J.B. Holmes, Rickie Fowler, Sung Kang, Russel...",J.B. Holmes,Rickie Fowler,Sung Kang,Russell Henley,Daniel Berger,Michael Kim
6,7,710608030,RangerBella (1/3),0,566.5,"[Luke List, Justin Rose, Rickie Fowler, Russel...",Luke List,Justin Rose,Rickie Fowler,Russell Henley,Jamie Lovemark,Kyle Stanley
7,8,711430251,hus29 (1/3),0,565.5,"[Peter Uihlein, Luke List, Patrick Reed, Ricki...",Peter Uihlein,Luke List,Patrick Reed,Rickie Fowler,Russell Henley,Daniel Berger
8,9,712278702,Westicles,0,564.0,"[Luke List, Phil Mickelson, Rickie Fowler, Rus...",Luke List,Phil Mickelson,Rickie Fowler,Russell Henley,Kyle Stanley,Tony Finau
9,10,712523792,Mighty_Giants (3/3),0,563.5,"[Justin Rose, Rickie Fowler, Russell Henley, K...",Justin Rose,Rickie Fowler,Russell Henley,Kyle Stanley,Jason Dufner,Tony Finau


In [5]:
for col in ['player_1', 'player_2', 'player_3', 'player_4', 
           'player_5', 'player_6']:
    contest_df = contest_df.merge(own_df[['player_name', 'own_percent']], left_on = col, 
                right_on = 'player_name')
    
for col in ['player_1', 'player_2', 'player_3', 'player_4', 
           'player_5', 'player_6']:
    contest_df = contest_df.merge(own_df[['player_name', 'actual_points']], left_on = col, 
                right_on = 'player_name')


In [6]:
contest_df.columns = ['team_rank', 'entry_id', 'dfs_player', 'time_remaining', 
        'team_points', 'lineup', 'player_1', 'player_2', 'player_3', 
        'player_4', 'player_5', 'player_6', 'p1', 'p1_own', 'p2', 'p2_own',
        'p3', 'p3_own', 'p4', 'p4_own', 'p5', 'p5_own', 'p6', 'p6_own',
        'p1a', 'p1_points', 'p2a', 'p2_points', 'p3', 'p3_points', 'p4', 
        'p4_points', 'p5', 'p5_points', 'p6', 'p6_points']

contest_df = contest_df[['team_rank', 'entry_id', 'dfs_player', 'time_remaining',
        'team_points', 'lineup', 'p1', 'p1_own', 'p1_points', 'p2', 'p2_own',
        'p2_points', 'p3', 'p3_own', 'p3_points', 'p4', 'p4_own', 'p4_points',
        'p5', 'p5_own', 'p5_points', 'p6', 'p6_own', 'p6_points']]



In [7]:
contest_df = contest_df.T.groupby(level=0).first().T

contest_df = contest_df[['team_rank', 'entry_id', 'dfs_player', 'time_remaining',
        'team_points', 'lineup', 'p1', 'p1_own', 'p1_points', 'p2', 'p2_own',
        'p2_points', 'p3', 'p3_own', 'p3_points', 'p4', 'p4_own', 'p4_points',
        'p5', 'p5_own', 'p5_points', 'p6', 'p6_own', 'p6_points']]

contest_df.sort_values(by = 'team_rank', inplace = True)

contest_df

Unnamed: 0,team_rank,entry_id,dfs_player,time_remaining,team_points,lineup,p1,p1_own,p1_points,p2,...,p3_points,p4,p4_own,p4_points,p5,p5_own,p5_points,p6,p6_own,p6_points
0,1,712596001,Bnelly7896 (1/3),0,587,"[Luke List, Justin Rose, Stewart Cink, Rickie ...",Luke List,4.11%,106,Justin Rose,...,78.5,Rickie Fowler,30.81%,121,Russell Henley,17.94%,135,Tony Finau,32.27%,65.5
11834,2,710808642,mateskec,0,582.5,"[Peter Uihlein, Jon Rahm, Rickie Fowler, Russe...",Peter Uihlein,19.11%,74,Jon Rahm,...,121,Russell Henley,17.94%,135,D.A. Points,4.10%,73.5,Rafael Campos,2.20%,96.5
5628,3,711602570,jtbearord34 (2/2),0,582,"[Justin Rose, Stewart Cink, Rickie Fowler, Hud...",Justin Rose,16.80%,81,Stewart Cink,...,121,Hudson Swafford,3.32%,93,Russell Henley,17.94%,135,D.A. Points,4.10%,73.5
132,4,712651295,zflowers12,0,575,"[Luke List, Phil Mickelson, Rickie Fowler, Hud...",Luke List,4.11%,106,Phil Mickelson,...,121,Hudson Swafford,3.32%,93,Russell Henley,17.94%,135,Tony Finau,32.27%,65.5
3336,5,712883361,SonOfTheDon84,0,572,"[Peter Uihlein, Justin Rose, Keegan Bradley, R...",Peter Uihlein,19.11%,74,Justin Rose,...,82,Rickie Fowler,30.81%,121,Russell Henley,17.94%,135,Jason Dufner,13.03%,79
5575,6,712096192,tvagb (1/3),0,569,"[J.B. Holmes, Rickie Fowler, Sung Kang, Russel...",J.B. Holmes,16.19%,23,Rickie Fowler,...,116,Russell Henley,17.94%,135,Daniel Berger,6.32%,103,Michael Kim,3.68%,71
7568,7,710608030,RangerBella (1/3),0,566.5,"[Luke List, Justin Rose, Rickie Fowler, Russel...",Luke List,4.11%,106,Justin Rose,...,121,Russell Henley,17.94%,135,Jamie Lovemark,6.08%,41.5,Kyle Stanley,6.96%,82
5873,8,711430251,hus29 (1/3),0,565.5,"[Peter Uihlein, Luke List, Patrick Reed, Ricki...",Peter Uihlein,19.11%,74,Luke List,...,26.5,Rickie Fowler,30.81%,121,Russell Henley,17.94%,135,Daniel Berger,6.32%,103
1491,9,712278702,Westicles,0,564,"[Luke List, Phil Mickelson, Rickie Fowler, Rus...",Luke List,4.11%,106,Phil Mickelson,...,121,Russell Henley,17.94%,135,Kyle Stanley,6.96%,82,Tony Finau,32.27%,65.5
1159,10,712844211,allanwattsdfs (2/3),0,563.5,"[Justin Rose, Rickie Fowler, Russell Henley, K...",Justin Rose,16.80%,81,Rickie Fowler,...,135,Kyle Stanley,6.96%,82,Jason Dufner,13.03%,79,Tony Finau,32.27%,65.5


In [8]:
player_ls = []

for col in ['p1', 'p2', 'p3', 'p4', 'p5', 'p6']:
    temp_unique = contest_df[col].unique()
    
    for player in temp_unique:
        if player not in player_ls:
            player_ls.extend([player])
            

In [19]:
corr_df = pd.DataFrame(columns = ['player', 'p_value', 'r_value', 'own_%', 'top_1%', 'top_5%', 
            'top_10%', 'top_20%'])
corr_count = 0

for player in tqdm(player_ls):
    temp_array = contest_df['lineup'].apply(lambda x: 1 if player in x else 0).values
    rank_array = contest_df['team_rank'].values.astype(int)

    slope, intercept, r_value, p_value, std_err = stats.linregress(rank_array, temp_array)
    
    entries = len(temp_array)
    
    own = sum(temp_array) / entries
    top_1 = sum(temp_array[:int(entries * .01)]) / (entries * .01)
    top_5 = sum(temp_array[:int(entries * .05)]) / (entries * .05)
    top_10 = sum(temp_array[:int(entries * .1)]) / (entries * .1)
    top_20 = sum(temp_array[:int(entries * .2)]) / (entries * .2)
    
    
    corr_df.loc[corr_count] = [player, p_value, r_value, own, top_1, top_5, top_10, top_20]
    corr_count += 1

100%|██████████| 140/140 [00:02<00:00, 67.24it/s]


In [20]:
corr_df.sort_values(by = 'p_value', ascending = True)

Unnamed: 0,player,p_value,r_value,own_%,top_1%,top_5%,top_10%,top_20%
54,Russell Henley,0.000000e+00,-0.436485,0.179753,0.851910,0.698566,0.584268,0.480974
7,Rickie Fowler,0.000000e+00,-0.490626,0.308817,0.880307,0.782337,0.738322,0.662715
18,Jordan Spieth,3.104162e-276,0.292672,0.239458,0.000000,0.012779,0.025557,0.057504
16,Patrick Reed,3.625609e-192,0.245353,0.286952,0.007099,0.061054,0.098680,0.119267
52,Matt Kuchar,1.890725e-139,0.209474,0.103862,0.000000,0.007099,0.016328,0.026267
17,Rafael Cabrera-Bello,1.366490e-126,0.199643,0.326281,0.021298,0.103649,0.138435,0.193809
21,Lee Westwood,1.307696e-123,0.197288,0.081712,0.000000,0.017038,0.015618,0.022008
6,Jon Rahm,1.095779e-114,-0.190051,0.282479,0.326565,0.386199,0.388329,0.379455
27,Henrik Stenson,4.664818e-114,0.189529,0.147238,0.000000,0.015618,0.023428,0.044725
0,Luke List,1.143842e-95,-0.173548,0.041176,0.369161,0.177481,0.139855,0.101164


In [22]:
player_edges = []

for index, row in contest_df.iterrows():
    for combo in combinations(row['lineup'], 2): 
        combo = combo + (entries - int(row['team_rank']),)
        player_edges.append(combo)
        
        
G = nx.Graph()

G.add_nodes_from(player_ls)
G.add_weighted_edges_from(player_edges)

In [23]:
cent = nx.eigenvector_centrality(G)

sorted(cent.items(), key = lambda x: x[1])

[('Peter Malnati', 0.0005428623274376995),
 ('Brett Stegmaier', 0.00477444775995518),
 ('Charlie Beljan', 0.009902025235216782),
 ('Fredrik Jacobson', 0.012853540775674242),
 ('Jason Bohn', 0.014124623737542434),
 ('Rich Berberian', 0.014629551385530024),
 ('Ben Willman', 0.015443128546678701),
 ('Greg Chalmers', 0.016797490860243586),
 ('Carl Pettersson', 0.018677202087622614),
 ('David Hearn', 0.019694452863680587),
 ('Stuart Appleby', 0.022074487498085767),
 ('Jordan Spieth', 0.022949805810243373),
 ('Ricky Barnes', 0.027650651447339206),
 ('Mark Hubbard', 0.028944921070425542),
 ('Henrik Stenson', 0.029823874577175172),
 ('K.J. Choi', 0.030545658912363504),
 ('Bob Estes', 0.0307510355276038),
 ('Robert Allenby', 0.031648861862722984),
 ('Andrew Loupe', 0.03234162707384483),
 ('Steven Bowditch', 0.03414996223066981),
 ('Rafael Cabrera-Bello', 0.035886014111902134),
 ('Lee Westwood', 0.03617110725825275),
 ('Patrick Reed', 0.03640600164011604),
 ('Steve Marino', 0.03746896781747714),

In [25]:
in_degree = nx.degree_centrality(G)

sorted(in_degree.items(), key = lambda x: x[1])

[('Charlie Beljan', 0.03597122302158273),
 ('Peter Malnati', 0.03597122302158273),
 ('David Hearn', 0.06474820143884892),
 ('Brett Stegmaier', 0.07194244604316546),
 ('Rich Berberian', 0.07194244604316546),
 ('Jason Bohn', 0.08633093525179857),
 ('Andrew Loupe', 0.08633093525179857),
 ('Ben Willman', 0.10791366906474821),
 ('Spencer Levin', 0.1223021582733813),
 ('Blayne Barber', 0.12949640287769784),
 ('Mark Hubbard', 0.12949640287769784),
 ('Matt Every', 0.1366906474820144),
 ('Steve Marino', 0.1366906474820144),
 ('Derek Fathauer', 0.14388489208633093),
 ('Troy Merritt', 0.14388489208633093),
 ('Carl Pettersson', 0.14388489208633093),
 ('Fredrik Jacobson', 0.1510791366906475),
 ('Bob Estes', 0.16546762589928057),
 ('Bryce Molder', 0.1798561151079137),
 ('Ken Duke', 0.18705035971223022),
 ('Ben Martin', 0.18705035971223022),
 ('Greg Chalmers', 0.19424460431654678),
 ('Stuart Appleby', 0.19424460431654678),
 ('Steven Bowditch', 0.20863309352517986),
 ('Angel Cabrera', 0.23021582733812

In [26]:
close_cent = nx.closeness_centrality(G)

sorted(close_cent.items(), key = lambda x: x[1])

[('Peter Malnati', 0.5036231884057971),
 ('Charlie Beljan', 0.5072992700729927),
 ('David Hearn', 0.5148148148148148),
 ('Rich Berberian', 0.516728624535316),
 ('Brett Stegmaier', 0.5186567164179104),
 ('Jason Bohn', 0.5225563909774437),
 ('Andrew Loupe', 0.5225563909774437),
 ('Ben Willman', 0.5285171102661597),
 ('Spencer Levin', 0.5325670498084292),
 ('Blayne Barber', 0.5346153846153846),
 ('Mark Hubbard', 0.5346153846153846),
 ('Matt Every', 0.5366795366795367),
 ('Steve Marino', 0.5366795366795367),
 ('Derek Fathauer', 0.5387596899224806),
 ('Troy Merritt', 0.5387596899224806),
 ('Carl Pettersson', 0.5387596899224806),
 ('Fredrik Jacobson', 0.5408560311284046),
 ('Bob Estes', 0.5450980392156862),
 ('Bryce Molder', 0.549407114624506),
 ('Ken Duke', 0.5515873015873016),
 ('Ben Martin', 0.5515873015873016),
 ('Greg Chalmers', 0.5537848605577689),
 ('Stuart Appleby', 0.5537848605577689),
 ('Steven Bowditch', 0.5582329317269076),
 ('Angel Cabrera', 0.5650406504065041),
 ('Geoff Ogilvy'

In [39]:
G.number_of_edges('Adam Scott', 'Tony Finau')

1

In [32]:
count = 0

for index, df in contest_df.iterrows():
    if 'Tony Finau' in df['lineup'] and 'Adam Scott' in df['lineup']:
        print(df['lineup'])
        count += 1
        
count

['Luke List', 'Rickie Fowler', 'Russell Henley', 'Kyle Stanley', 'Adam Scott', 'Tony Finau']
['Rickie Fowler', 'Jhonattan Vegas', 'Russell Henley', 'Jason Dufner', 'Adam Scott', 'Tony Finau']
['Peter Uihlein', 'Rickie Fowler', 'Russell Henley', 'Kyle Stanley', 'Adam Scott', 'Tony Finau']
['Peter Uihlein', 'Rickie Fowler', 'Jhonattan Vegas', 'Russell Henley', 'Adam Scott', 'Tony Finau']
['Rickie Fowler', 'Russell Henley', 'Billy Horschel', 'Jason Dufner', 'Adam Scott', 'Tony Finau']
['Peter Uihlein', 'Rickie Fowler', 'Russell Henley', 'Adam Scott', 'Tony Finau', 'J.J. Spaun']
['Rickie Fowler', 'Russell Henley', 'Jason Dufner', 'Adam Scott', 'Tony Finau', 'Jimmy Walker']
['Rickie Fowler', 'Cameron Tringale', 'Russell Henley', 'Kyle Stanley', 'Adam Scott', 'Tony Finau']
['Phil Mickelson', 'Russell Henley', 'Daniel Berger', 'Jason Dufner', 'Adam Scott', 'Tony Finau']
['Bryson DeChambeau', 'Rickie Fowler', 'Daniel Berger', 'Jason Dufner', 'Adam Scott', 'Tony Finau']
['Patrick Reed', 'Rickie

981

In [40]:
MG = nx.MultiGraph()

MG.add_nodes_from(player_ls)
MG.add_weighted_edges_from(player_edges)

In [43]:
mg_close_cent = nx.closeness_centrality(MG)

sorted(mg_close_cent.items(), key = lambda x: x[1])

[('Peter Malnati', 0.5036231884057971),
 ('Charlie Beljan', 0.5072992700729927),
 ('David Hearn', 0.5148148148148148),
 ('Rich Berberian', 0.516728624535316),
 ('Brett Stegmaier', 0.5186567164179104),
 ('Jason Bohn', 0.5225563909774437),
 ('Andrew Loupe', 0.5225563909774437),
 ('Ben Willman', 0.5285171102661597),
 ('Spencer Levin', 0.5325670498084292),
 ('Blayne Barber', 0.5346153846153846),
 ('Mark Hubbard', 0.5346153846153846),
 ('Matt Every', 0.5366795366795367),
 ('Steve Marino', 0.5366795366795367),
 ('Derek Fathauer', 0.5387596899224806),
 ('Troy Merritt', 0.5387596899224806),
 ('Carl Pettersson', 0.5387596899224806),
 ('Fredrik Jacobson', 0.5408560311284046),
 ('Bob Estes', 0.5450980392156862),
 ('Bryce Molder', 0.549407114624506),
 ('Ken Duke', 0.5515873015873016),
 ('Ben Martin', 0.5515873015873016),
 ('Greg Chalmers', 0.5537848605577689),
 ('Stuart Appleby', 0.5537848605577689),
 ('Steven Bowditch', 0.5582329317269076),
 ('Angel Cabrera', 0.5650406504065041),
 ('Geoff Ogilvy'

In [48]:
mg_degree_cent = nx.degree_centrality(MG)

sorted(mg_degree_cent.items(), key = lambda x: x[1])

[('Charlie Beljan', 0.03597122302158273),
 ('Peter Malnati', 0.03597122302158273),
 ('David Hearn', 0.07194244604316546),
 ('Rich Berberian', 0.07194244604316546),
 ('Jason Bohn', 0.10791366906474821),
 ('Brett Stegmaier', 0.10791366906474821),
 ('Andrew Loupe', 0.10791366906474821),
 ('Spencer Levin', 0.14388489208633093),
 ('Ben Willman', 0.14388489208633093),
 ('Mark Hubbard', 0.14388489208633093),
 ('Blayne Barber', 0.1798561151079137),
 ('Derek Fathauer', 0.1798561151079137),
 ('Troy Merritt', 0.1798561151079137),
 ('Carl Pettersson', 0.1798561151079137),
 ('Matt Every', 0.1798561151079137),
 ('Steve Marino', 0.1798561151079137),
 ('Bob Estes', 0.21582733812949642),
 ('Fredrik Jacobson', 0.21582733812949642),
 ('Bryce Molder', 0.2517985611510791),
 ('Stuart Appleby', 0.2517985611510791),
 ('Ken Duke', 0.28776978417266186),
 ('Ben Martin', 0.28776978417266186),
 ('Greg Chalmers', 0.32374100719424465),
 ('Steven Bowditch', 0.3597122302158274),
 ('Ricky Barnes', 0.43165467625899284),