In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
import json
import time
import os
from tqdm import tqdm
from scipy import stats
import networkx as nx
from itertools import combinations
from collections import Counter

In [3]:
os.chdir('/home/valesco/Datasets/dk_downloads/')
contest = pd.read_csv('contest-standings-38904899.csv')
own_df = contest[['Player', '%Drafted', 'FPTS']]
own_df.columns = ['player_name', 'own_percent', 'actual_points']
contest_df = contest.drop(['Player', '%Drafted', 'Unnamed: 6',
                        'FPTS'], axis = 1)
contest_df

Unnamed: 0,Rank,EntryId,EntryName,TimeRemaining,Points,Lineup
0,1,712596001,Bnelly7896 (1/3),0,587.0,G Luke List G Justin Rose G Stewart Cink G Ric...
1,2,710808642,mateskec,0,582.5,G Peter Uihlein G Jon Rahm G Rickie Fowler G R...
2,3,711602570,jtbearord34 (2/2),0,582.0,G Justin Rose G Stewart Cink G Rickie Fowler G...
3,4,712651295,zflowers12,0,575.0,G Luke List G Phil Mickelson G Rickie Fowler G...
4,5,712883361,SonOfTheDon84,0,572.0,G Peter Uihlein G Justin Rose G Keegan Bradley...
5,6,712096192,tvagb (1/3),0,569.0,G J.B. Holmes G Rickie Fowler G Sung Kang G Ru...
6,7,710608030,RangerBella (1/3),0,566.5,G Luke List G Justin Rose G Rickie Fowler G Ru...
7,8,711430251,hus29 (1/3),0,565.5,G Peter Uihlein G Luke List G Patrick Reed G R...
8,9,712278702,Westicles,0,564.0,G Luke List G Phil Mickelson G Rickie Fowler G...
9,10,712523792,Mighty_Giants (3/3),0,563.5,G Justin Rose G Rickie Fowler G Russell Henley...


In [4]:
contest_df.columns = ['rank', 'entry_id', 'entry_name', 'time_remaining', 
                'team_points', 'lineup']

contest_df.dropna(inplace = True)
contest_df['lineup'] = contest_df['lineup'].apply(lambda x: x.replace(' G ',','))
contest_df['lineup'] = contest_df['lineup'].apply(lambda x:\
                x.replace('G ','').split(','))
contest_df[['player_1', 'player_2', 'player_3', 'player_4', 
           'player_5', 'player_6']] = contest_df['lineup'].apply(pd.Series)
contest_df

Unnamed: 0,rank,entry_id,entry_name,time_remaining,team_points,lineup,player_1,player_2,player_3,player_4,player_5,player_6
0,1,712596001,Bnelly7896 (1/3),0,587.0,"[Luke List, Justin Rose, Stewart Cink, Rickie ...",Luke List,Justin Rose,Stewart Cink,Rickie Fowler,Russell Henley,Tony Finau
1,2,710808642,mateskec,0,582.5,"[Peter Uihlein, Jon Rahm, Rickie Fowler, Russe...",Peter Uihlein,Jon Rahm,Rickie Fowler,Russell Henley,D.A. Points,Rafael Campos
2,3,711602570,jtbearord34 (2/2),0,582.0,"[Justin Rose, Stewart Cink, Rickie Fowler, Hud...",Justin Rose,Stewart Cink,Rickie Fowler,Hudson Swafford,Russell Henley,D.A. Points
3,4,712651295,zflowers12,0,575.0,"[Luke List, Phil Mickelson, Rickie Fowler, Hud...",Luke List,Phil Mickelson,Rickie Fowler,Hudson Swafford,Russell Henley,Tony Finau
4,5,712883361,SonOfTheDon84,0,572.0,"[Peter Uihlein, Justin Rose, Keegan Bradley, R...",Peter Uihlein,Justin Rose,Keegan Bradley,Rickie Fowler,Russell Henley,Jason Dufner
5,6,712096192,tvagb (1/3),0,569.0,"[J.B. Holmes, Rickie Fowler, Sung Kang, Russel...",J.B. Holmes,Rickie Fowler,Sung Kang,Russell Henley,Daniel Berger,Michael Kim
6,7,710608030,RangerBella (1/3),0,566.5,"[Luke List, Justin Rose, Rickie Fowler, Russel...",Luke List,Justin Rose,Rickie Fowler,Russell Henley,Jamie Lovemark,Kyle Stanley
7,8,711430251,hus29 (1/3),0,565.5,"[Peter Uihlein, Luke List, Patrick Reed, Ricki...",Peter Uihlein,Luke List,Patrick Reed,Rickie Fowler,Russell Henley,Daniel Berger
8,9,712278702,Westicles,0,564.0,"[Luke List, Phil Mickelson, Rickie Fowler, Rus...",Luke List,Phil Mickelson,Rickie Fowler,Russell Henley,Kyle Stanley,Tony Finau
9,10,712523792,Mighty_Giants (3/3),0,563.5,"[Justin Rose, Rickie Fowler, Russell Henley, K...",Justin Rose,Rickie Fowler,Russell Henley,Kyle Stanley,Jason Dufner,Tony Finau


In [5]:
for col in ['player_1', 'player_2', 'player_3', 'player_4', 
           'player_5', 'player_6']:
    contest_df = contest_df.merge(own_df[['player_name', 'own_percent']], left_on = col, 
                right_on = 'player_name')
    
for col in ['player_1', 'player_2', 'player_3', 'player_4', 
           'player_5', 'player_6']:
    contest_df = contest_df.merge(own_df[['player_name', 'actual_points']], left_on = col, 
                right_on = 'player_name')


In [6]:
contest_df.columns = ['team_rank', 'entry_id', 'dfs_player', 'time_remaining', 
        'team_points', 'lineup', 'player_1', 'player_2', 'player_3', 
        'player_4', 'player_5', 'player_6', 'p1', 'p1_own', 'p2', 'p2_own',
        'p3', 'p3_own', 'p4', 'p4_own', 'p5', 'p5_own', 'p6', 'p6_own',
        'p1a', 'p1_points', 'p2a', 'p2_points', 'p3', 'p3_points', 'p4', 
        'p4_points', 'p5', 'p5_points', 'p6', 'p6_points']

contest_df = contest_df[['team_rank', 'entry_id', 'dfs_player', 'time_remaining',
        'team_points', 'lineup', 'p1', 'p1_own', 'p1_points', 'p2', 'p2_own',
        'p2_points', 'p3', 'p3_own', 'p3_points', 'p4', 'p4_own', 'p4_points',
        'p5', 'p5_own', 'p5_points', 'p6', 'p6_own', 'p6_points']]



In [7]:
contest_df = contest_df.T.groupby(level=0).first().T

contest_df = contest_df[['team_rank', 'entry_id', 'dfs_player', 'time_remaining',
        'team_points', 'lineup', 'p1', 'p1_own', 'p1_points', 'p2', 'p2_own',
        'p2_points', 'p3', 'p3_own', 'p3_points', 'p4', 'p4_own', 'p4_points',
        'p5', 'p5_own', 'p5_points', 'p6', 'p6_own', 'p6_points']]

contest_df.sort_values(by = 'team_rank', inplace = True)

contest_df

Unnamed: 0,team_rank,entry_id,dfs_player,time_remaining,team_points,lineup,p1,p1_own,p1_points,p2,...,p3_points,p4,p4_own,p4_points,p5,p5_own,p5_points,p6,p6_own,p6_points
0,1,712596001,Bnelly7896 (1/3),0,587,"[Luke List, Justin Rose, Stewart Cink, Rickie ...",Luke List,4.11%,106,Justin Rose,...,78.5,Rickie Fowler,30.81%,121,Russell Henley,17.94%,135,Tony Finau,32.27%,65.5
11834,2,710808642,mateskec,0,582.5,"[Peter Uihlein, Jon Rahm, Rickie Fowler, Russe...",Peter Uihlein,19.11%,74,Jon Rahm,...,121,Russell Henley,17.94%,135,D.A. Points,4.10%,73.5,Rafael Campos,2.20%,96.5
5628,3,711602570,jtbearord34 (2/2),0,582,"[Justin Rose, Stewart Cink, Rickie Fowler, Hud...",Justin Rose,16.80%,81,Stewart Cink,...,121,Hudson Swafford,3.32%,93,Russell Henley,17.94%,135,D.A. Points,4.10%,73.5
132,4,712651295,zflowers12,0,575,"[Luke List, Phil Mickelson, Rickie Fowler, Hud...",Luke List,4.11%,106,Phil Mickelson,...,121,Hudson Swafford,3.32%,93,Russell Henley,17.94%,135,Tony Finau,32.27%,65.5
3336,5,712883361,SonOfTheDon84,0,572,"[Peter Uihlein, Justin Rose, Keegan Bradley, R...",Peter Uihlein,19.11%,74,Justin Rose,...,82,Rickie Fowler,30.81%,121,Russell Henley,17.94%,135,Jason Dufner,13.03%,79
5575,6,712096192,tvagb (1/3),0,569,"[J.B. Holmes, Rickie Fowler, Sung Kang, Russel...",J.B. Holmes,16.19%,23,Rickie Fowler,...,116,Russell Henley,17.94%,135,Daniel Berger,6.32%,103,Michael Kim,3.68%,71
7568,7,710608030,RangerBella (1/3),0,566.5,"[Luke List, Justin Rose, Rickie Fowler, Russel...",Luke List,4.11%,106,Justin Rose,...,121,Russell Henley,17.94%,135,Jamie Lovemark,6.08%,41.5,Kyle Stanley,6.96%,82
5873,8,711430251,hus29 (1/3),0,565.5,"[Peter Uihlein, Luke List, Patrick Reed, Ricki...",Peter Uihlein,19.11%,74,Luke List,...,26.5,Rickie Fowler,30.81%,121,Russell Henley,17.94%,135,Daniel Berger,6.32%,103
1491,9,712278702,Westicles,0,564,"[Luke List, Phil Mickelson, Rickie Fowler, Rus...",Luke List,4.11%,106,Phil Mickelson,...,121,Russell Henley,17.94%,135,Kyle Stanley,6.96%,82,Tony Finau,32.27%,65.5
1159,10,712844211,allanwattsdfs (2/3),0,563.5,"[Justin Rose, Rickie Fowler, Russell Henley, K...",Justin Rose,16.80%,81,Rickie Fowler,...,135,Kyle Stanley,6.96%,82,Jason Dufner,13.03%,79,Tony Finau,32.27%,65.5


In [8]:
player_ls = []

for col in ['p1', 'p2', 'p3', 'p4', 'p5', 'p6']:
    temp_unique = contest_df[col].unique()
    
    for player in temp_unique:
        if player not in player_ls:
            player_ls.extend([player])
            

In [10]:
corr_df = pd.DataFrame(columns = ['player', 'p_value', 'r_value', 'own_%', 'top_1%', 'top_5%', 
            'top_10%', 'top_20%'])
corr_count = 0

for player in tqdm(player_ls):
    temp_array = contest_df['lineup'].apply(lambda x: 1 if player in x else 0).values
    rank_array = contest_df['team_rank'].values.astype(int)

    slope, intercept, r_value, p_value, std_err = stats.linregress(rank_array, temp_array)
    
    entries = len(temp_array)
    
    own = sum(temp_array) / entries
    top_1 = sum(temp_array[:int(entries * .01)]) / (entries * .01)
    top_5 = sum(temp_array[:int(entries * .05)]) / (entries * .05)
    top_10 = sum(temp_array[:int(entries * .1)]) / (entries * .1)
    top_20 = sum(temp_array[:int(entries * .2)]) / (entries * .2)
    
    
    corr_df.loc[corr_count] = [player, p_value, r_value, own, top_1, top_5, top_10, top_20]
    corr_count += 1

100%|██████████| 140/140 [00:01<00:00, 73.64it/s]


In [11]:
corr_df.sort_values(by = 'own_%', ascending = False)

Unnamed: 0,player,p_value,r_value,own_%,top_1%,top_5%,top_10%,top_20%
9,Phil Mickelson,3.593854e-03,-0.024532,0.360571,0.291069,0.387619,0.369871,0.362062
17,Rafael Cabrera-Bello,1.366490e-126,0.199643,0.326281,0.021298,0.103649,0.138435,0.193809
92,Tony Finau,1.367888e-60,-0.137706,0.323371,0.404657,0.435894,0.413886,0.397913
7,Rickie Fowler,0.000000e+00,-0.490626,0.308817,0.880307,0.782337,0.738322,0.662715
16,Patrick Reed,3.625609e-192,0.245353,0.286952,0.007099,0.061054,0.098680,0.119267
6,Jon Rahm,1.095779e-114,-0.190051,0.282479,0.326565,0.386199,0.388329,0.379455
18,Jordan Spieth,3.104162e-276,0.292672,0.239458,0.000000,0.012779,0.025557,0.057504
95,Adam Scott,8.115435e-92,0.170000,0.204103,0.014198,0.036916,0.082351,0.104004
1,Peter Uihlein,8.498191e-22,-0.080709,0.191538,0.184580,0.242794,0.225756,0.224691
54,Russell Henley,0.000000e+00,-0.436485,0.179753,0.851910,0.698566,0.584268,0.480974


In [None]:
player_edges_ranks = []

for index, row in contest_df.iterrows():
    for combo in combinations(row['lineup'], 2): 
        combo = combo + (entries - int(row['team_rank']),)
        player_edges_ranks.append(combo)

player_edges_num = []
player_edges_ls = []
for player1 in tqdm(player_edges_ranks):
    player_distinct = sorted(player1[:2])
    if player_distinct not in player_edges_ls:
        temp_count = 0
        for player2 in player_edges_ranks:
            if player_distinct == sorted(player2[:2]):
                temp_count += 1
        
        percent_connect = temp_count / entries
        player_temp = tuple(player_distinct) + (percent_connect,)
        player_edges_ls.append(sorted(player1[:2]))
        player_edges_num.append(player_temp)

  0%|          | 259/211290 [00:11<2:16:45, 25.72it/s]

In [40]:
G = nx.Graph()

G.add_nodes_from(player_ls)
G.add_weighted_edges_from(player_edges)

In [41]:
cent = nx.eigenvector_centrality(G)

sorted(cent.items(), key = lambda x: x[1])

[('Peter Malnati', 0.0005428623274376993),
 ('Brett Stegmaier', 0.00477444775995518),
 ('Charlie Beljan', 0.009902025235216778),
 ('Fredrik Jacobson', 0.012853540775674242),
 ('Jason Bohn', 0.014124623737542433),
 ('Rich Berberian', 0.01462955138553002),
 ('Ben Willman', 0.0154431285466787),
 ('Greg Chalmers', 0.01679749086024358),
 ('Carl Pettersson', 0.018677202087622607),
 ('David Hearn', 0.019694452863680583),
 ('Stuart Appleby', 0.022074487498085757),
 ('Jordan Spieth', 0.02294980581024338),
 ('Ricky Barnes', 0.027650651447339192),
 ('Mark Hubbard', 0.028944921070425535),
 ('Henrik Stenson', 0.029823874577175162),
 ('K.J. Choi', 0.03054565891236349),
 ('Bob Estes', 0.030751035527603793),
 ('Robert Allenby', 0.03164886186272298),
 ('Andrew Loupe', 0.032341627073844824),
 ('Steven Bowditch', 0.0341499622306698),
 ('Rafael Cabrera-Bello', 0.035886014111902134),
 ('Lee Westwood', 0.03617110725825273),
 ('Patrick Reed', 0.03640600164011604),
 ('Steve Marino', 0.03746896781747713),
 ('M

In [44]:
for neigh in nx.common_neighbors(G, 'Luke List', 'Hudson Swafford'):
    print(neigh)

Kyle Stanley
Andy Sullivan
Chris Stroud
Johnson Wagner
Jason Dufner
Andrew Johnston
Rafael Cabrera-Bello
Sam Saunders
Adam Scott
Jimmy Walker
Jamie Lovemark
J.J. Henry
Tony Finau
D.A. Points
Jason Kokrak
Daniel Berger
Russell Henley
Bryson DeChambeau
Nick Watney
Phil Mickelson
Charley Hoffman
Ryan Blaum
Morgan Hoffmann
Justin Rose
Smylie Kaufman
Chez Reavie
Charles Howell
Kyle Reifers
J.B. Holmes
Martin Flores
Henrik Stenson
Jim Herman
Grayson Murray
Peter Uihlein
Kevin Chappell
William Lunde
Jordan Spieth
Chris Wood
Matt Jones
Zac Blair
Harris English
Ryan Palmer
Scott Brown
Roberto Castro
Michael Kim
Cameron Smith
Harold Varner
Jonas Blixt
Cody Gribble
Keegan Bradley
Whee Kim
JT Poston
Jon Rahm
Jhonattan Vegas
John Huh
Brian Harman
Billy Horschel
Stewart Cink
Danny Lee
Lee Westwood
Lucas Glover
Luke Donald
Matt Kuchar
J.J. Spaun
Rickie Fowler
Bernd Wiesberger
Nick Taylor
Sean O'Hair
Bud Cauley
Cameron Tringale
Mackenzie Hughes
Patrick Cantlay
Rafael Campos
Alex Cejka
Ollie Schniederj

In [36]:
own_G = nx.Graph()

own_G.add_nodes_from(player_ls)
own_G.add_weighted_edges_from(player_edges_num)

In [38]:
own_cent = nx.eigenvector_centrality(own_G)

sorted(own_cent.items(), key = lambda x: x[1], reverse = True)

[('Phil Mickelson', 0.3372622702888784),
 ('Tony Finau', 0.32105758205927637),
 ('Rafael Cabrera-Bello', 0.3197604642980249),
 ('Patrick Reed', 0.28729664386954273),
 ('Rickie Fowler', 0.2854823246007726),
 ('Jon Rahm', 0.25825538332354614),
 ('Peter Uihlein', 0.1948434224386053),
 ('Jordan Spieth', 0.19442017770107722),
 ('Adam Scott', 0.19435069361806853),
 ('Russell Henley', 0.1784084977643574),
 ('Jimmy Walker', 0.17728120255372667),
 ('Justin Rose', 0.16099389091486158),
 ('Bernd Wiesberger', 0.15985596005602032),
 ('J.B. Holmes', 0.15619952567902134),
 ('Jhonattan Vegas', 0.15237595893979297),
 ('Charles Howell', 0.14907866460089234),
 ('Jason Dufner', 0.1411381166492751),
 ('Henrik Stenson', 0.1288022669534448),
 ('JT Poston', 0.11717756938211393),
 ('Charley Hoffman', 0.11423820196520049),
 ('Matt Kuchar', 0.09768183102175353),
 ('Ollie Schniederjans', 0.09136513401445592),
 ('Lee Westwood', 0.08056513316357158),
 ('Lucas Glover', 0.08044155456126591),
 ('Billy Horschel', 0.080

In [11]:
player_combo_3 = []
player_combo_4 = []
player_combo_5 = []
player_combo_6 = []

for index, row in contest_df.iterrows():
    for combo in combinations(row['lineup'], 3): 
        player_combo_3.append(combo)
    for combo in combinations(row['lineup'], 4): 
        player_combo_4.append(combo)
    for combo in combinations(row['lineup'], 5): 
        player_combo_5.append(combo)
    for combo in combinations(row['lineup'], 6): 
        player_combo_6.append(combo)
        
combo_3_counts = list(Counter(elem for elem in player_combo_3).items())
combo_4_counts = list(Counter(elem for elem in player_combo_4).items())
combo_5_counts = list(Counter(elem for elem in player_combo_5).items())
combo_6_counts = list(Counter(elem for elem in player_combo_6).items())

In [12]:
combo_6_counts.sort(key = lambda x: x[1], reverse = True)
combo_6_counts

[(('Patrick Reed',
   'Phil Mickelson',
   'J.B. Holmes',
   'Rickie Fowler',
   'Jason Dufner',
   'Tony Finau'),
  47),
 (('Bernd Wiesberger',
   'Jon Rahm',
   'Phil Mickelson',
   'Rickie Fowler',
   'Tony Finau',
   'JT Poston'),
  25),
 (('Justin Rose',
   'Patrick Reed',
   'Rafael Cabrera-Bello',
   'Phil Mickelson',
   'Russell Henley',
   'Tony Finau'),
  24),
 (('Jon Rahm',
   'Patrick Reed',
   'Rafael Cabrera-Bello',
   'Phil Mickelson',
   'Tony Finau',
   'Jimmy Walker'),
  20),
 (('Jon Rahm',
   'Patrick Reed',
   'Rafael Cabrera-Bello',
   'Phil Mickelson',
   'Billy Horschel',
   'Tony Finau'),
  19),
 (('Patrick Reed',
   'Rafael Cabrera-Bello',
   'Phil Mickelson',
   'Rickie Fowler',
   'Tony Finau',
   'Jimmy Walker'),
  17),
 (('Patrick Reed',
   'Rafael Cabrera-Bello',
   'Phil Mickelson',
   'Rickie Fowler',
   'Charley Hoffman',
   'Tony Finau'),
  16),
 (('Jordan Spieth',
   'Bernd Wiesberger',
   'Jon Rahm',
   'William Lunde',
   'Tom Hoge',
   'Rafael Camp

In [None]:
for combo in combo_4_counts:
    if 'Luke List' in combo[0]:
        print(combo)

(('Jordan Spieth', 'Peter Uihlein', 'Luke List', 'Jhonattan Vegas'), 16)
(('Jordan Spieth', 'Luke List', 'Jhonattan Vegas', 'Tony Finau'), 15)
(('Luke List', 'Jon Rahm', 'Rickie Fowler', 'Tony Finau'), 14)
(('Luke List', 'Justin Rose', 'Rickie Fowler', 'Tony Finau'), 14)
(('Luke List', 'Phil Mickelson', 'Rickie Fowler', 'Adam Scott'), 13)
(('Luke List', 'Jon Rahm', 'Rickie Fowler', 'Jhonattan Vegas'), 12)
(('Jordan Spieth', 'Luke List', 'Phil Mickelson', 'Jamie Lovemark'), 12)
(('Peter Uihlein', 'Luke List', 'Phil Mickelson', 'Jhonattan Vegas'), 12)
(('Peter Uihlein', 'Luke List', 'Jon Rahm', 'Rickie Fowler'), 12)
(('Luke List', 'Rickie Fowler', 'Russell Henley', 'Tony Finau'), 12)
(('Luke List', 'Justin Rose', 'Jhonattan Vegas', 'Tony Finau'), 12)
(('Luke List', 'Stewart Cink', 'Rickie Fowler', 'Adam Scott'), 12)
(('Peter Uihlein', 'Luke List', 'Phil Mickelson', 'Rickie Fowler'), 12)
(('Luke List', 'Justin Rose', 'J.B. Holmes', 'Tony Finau'), 12)
(('Luke List', 'Jon Rahm', 'Phil Micke

In [13]:
complete_combos = []

for combo in combinations(player_ls, 4):
    complete_combos.append(combo)
    
len(complete_combos)

15329615

In [14]:
len(combo_4_counts)

88588

In [12]:
combo_ls = []

for combo in combo_4_counts:
    combo_ls.append(combo[0])

In [19]:

for combo1 in tqdm(complete_combos):
    if combo1 not in combo_ls: 
        combo_4_counts.append((combo1,0))

  0%|                              | 29125/15329615 [05:35<52:52:49, 80.37it/s]

KeyboardInterrupt: 

In [20]:
len(player_ls)

140