In [138]:
import json
import os
import glob
import pandas as pd
import requests
import time
from datetime import datetime, timedelta
from random import randint
from pga.return_pid import return_pid
from pga.fuzz_match import return_tid
from sqlalchemy import create_engine
from pandas.io import sql


def return_tourny_avg(row):
    event_id = row['event_id']
    weekend = row['weekend']

    avg_points = grouped_df['actual_points']['mean'].loc[(grouped_df['event_id'] == event_id) & (grouped_df['weekend'] == weekend)].values[0]

    return avg_points

def return_tourny_std(row):
    event_id = row['event_id']
    weekend = row['weekend']

    std_points = grouped_df['actual_points']['std'].loc[(grouped_df['event_id'] == event_id) & (grouped_df['weekend'] == weekend)].values[0]

    return std_points

def norm_points(row):
    return (row['actual_points'] - row['tourny_avg_points']) / row['tourny_std_points']

now = datetime.now()
final_dict = {}
final_dict['salary'] = {}
final_dict['events'] = {}
final_dict['ownership'] = {}
final_dict['source'] = {}


#Retrieve data from last week

for i in range(1,2):
    date_str = (now - timedelta(i)).strftime('%-m_%-d_%Y')


    #Get salary data
    sal_url = 'http://www.fantasylabs.com/api/playermodel/5/{}/?modelId=313179'.format(date_str)

    sal_resp = requests.get(sal_url)
    sal_data = json.loads(sal_resp.text)
    
    try:
        if len(sal_data['PlayerModels']) > 0:
            sal_file_name = '/home/valesco/Datasets/PGA_Data/fantasylabs_data/salary_data/{}_salary_data.json'.format(date_str)

            if os.path.isfile(sal_file_name) is False:

                final_dict['salary'][date_str] = sal_data
                with open(sal_file_name, 'w') as outfile:
                    json.dump(sal_data, outfile)

                print(date_str + ' salary file saved!')
    except:
        pass

    wait = 1
    time.sleep(wait)

    #Get sportevents data
    event_url = 'http://www.fantasylabs.com/api/sportevents/5/{}'.format(date_str)

    event_resp = requests.get(event_url)
    event_data = json.loads(event_resp.text)



    if len(event_data) > 0:
        event_file_name = '/home/valesco/Datasets/PGA_Data/fantasylabs_data/sportevent_data/{}_sportevent_data.json'.format(date_str)

        if os.path.isfile(event_file_name) is False:

            final_dict['events'][date_str] = event_data

            with open(event_file_name, 'w') as outfile:
                json.dump(event_data, outfile)

                print(date_str + ' sportevents file saved')

    time.sleep(wait)

    #get ownership data
    own_url = 'http://www.fantasylabs.com/api/contest-ownership/5/{}/4'.format(date_str)

    own_resp = requests.get(own_url)
    own_data = json.loads(own_resp.text)

    if len(own_data) > 0:
        own_file_name = '/home/valesco/Datasets/PGA_Data/fantasylabs_data/ownership_data/{}_ownership_data.json'.format(date_str)

        if os.path.isfile(own_file_name) is False:

            final_dict['ownership'][date_str] = own_data

            with open(own_file_name, 'w') as outfile:
                json.dump(own_data, outfile)

                print(date_str + ' ownership file saved')
                
                
    #get source data
    
    source_url = 'http://www.fantasylabs.com/api/sourcedata/5/{}'.format(date_str)
    
    source_data = requests.get(source_url).json()

    
    if len(source_data) > 0:
        source_file_name = '/home/valesco/Datasets/PGA_Data/fantasylabs_data/source_data/{}_source_data.json'.format(date_str)
        
        if os.path.isfile(source_file_name) is False:
            
            final_dict['source'][date_str] = source_data
            
            with open(source_file_name, 'w') as outfile:
                json.dump(source_data, outfile)
                
                print(date_str + ' source data saved')
                
        
#Parse Results

sal_keys = final_dict['salary'].keys()


pga_salary = pd.DataFrame(columns = ['player_name', 'player_id', 'date_obj', 'event_id', 'tournament_name', 
                        'tournament_id', 'tourny_date', 'tourny_course', 'tour', 'salary', 'actual_points',
                        'imp_points', 'proj_points', 'score', 'plus_minus', 'injury_status', 'source',
                        'fantasy_id', 'weekend'])
count = 0

for sal_key in sal_keys:

    date_obj = datetime.strptime(sal_key, '%m_%d_%Y')

    for player in final_dict['salary'][sal_key]['PlayerModels']:
        switch = 0
        player_name = player['Properties']['Player_Name']
        player_id = return_pid(player_name)
        salary = player['Properties']['Salary']
        actual_points = player['Properties']['ActualPoints']
        imp_points = player['Properties']['ImpPts']
        proj_points = player['ProjPoints']
        score = player['Properties']['Score']
        injury_status = player['Properties']['InjuryStatus']
        plus_minus = player['Properties']['Plus_Minus']
        source = player['Properties']['SourceId']
        fantasy_id = player['FantasyResultId']


        if source == 4:
            event_id = player['Properties']['EventId']

            for event in final_dict['events'][sal_key]:
                if event['EventId'] == event_id:
                    tourny_name = event['HomeTeam']
                    tourny_date = event['EventDate']
                    tourny_course = event['StadiumName']
                    tournament_id = return_tid(tourny_name)
                    tour = event['VisitorTeam']
                    switch = 1

            if switch == 0:
                print('SWITCH ENGAGED!!!', tourny_name, tourny_date, file)
                tourny_name = np.nan
                tourny_date = np.nan
                tourny_course = np.nan
                tour = np.nan

            pga_salary.loc[count] = [player_name, player_id, date_obj, tourny_id, tourny_name, tournament_id,
                                     tourny_date, tourny_course, tour,
                                     salary, actual_points, imp_points, proj_points,
                                     score, plus_minus, injury_status, source, fantasy_id, 0]

            count += 1

#Find unique tournaments in df and retrieve year
tournaments = pga_salary['tournament_name'].unique()
year = pd.to_datetime(pga_salary['tourny_date'].loc[0]).year

#Connect to mysql db
engine = create_engine('mysql+pymysql://root:v1933@127.0.0.1:3306/pga?charset=utf8')
conn = engine.connect()

#Check if tournament and year exist in db, if not insert results
for tournament in tournaments:
    temp_results = conn.execute('SELECT * FROM pga.dk_salaries WHERE tournament_name = "{}" AND \
            YEAR(tourny_date) = {}'.format(tournament, year))

    temp_ls = []

    for result in temp_results:
        temp_ls.append(result)

    if len(temp_ls) < 1:
        agg_dict = {'actual_points': ['mean', 'std']}

        temp_df = pga_salary.loc[pga_salary['tournament_name'] == tournament]
        
        #Separate Weekend salaries from main contest salaries
        
        players = temp_df['player_name'].unique()

        temp_df.sort_values('fantasy_id', inplace = True)
        temp_df.reset_index(inplace = True, drop = True)

        for player in players:
            indices = sorted(temp_df.index[temp_df['player_name'] == player])
            
            if len(indices) > 1:
                temp_df['weekend'][indices[1]] = 1
        
        # Create grouped df to calc tourny std and avg
        grouped_df = temp_df[['event_id', 'weekend', 'actual_points']].groupby(['event_id', 'weekend']).agg(agg_dict)
        grouped_df.reset_index(inplace = True)

        temp_df['tourny_std_points'] = temp_df.apply(return_tourny_std, axis = 1)
        temp_df['tourny_avg_points'] = temp_df.apply(return_tourny_avg, axis = 1)
        temp_df['norm_points'] = temp_df.apply(norm_points, axis = 1)

        #temp_df.to_sql('dk_salaries', engine,
                        #if_exists = 'append', index = False)


7_23_2017 salary file saved!
7_23_2017 sportevents file saved


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [62]:
source_data

{'ContestGroups': [{'AdminEdit': False,
   'ContestEndDate': '2017-07-23T01:35:00',
   'ContestGroupId': 15077,
   'ContestStartDate': '2017-07-20T01:35:00',
   'ContestSuffix': ' (PGA)',
   'DisplayName': '1:35AM ET (PGA) - 4 Games',
   'DraftGroupId': 14130,
   'Events': [{'ContestGroupId': 15077, 'SportEventId': 3453281}],
   'GameCount': 4,
   'IsOpen': False,
   'IsPrimary': False,
   'IsProjected': False,
   'SourceId': 4,
   'SportId': 5},
  {'AdminEdit': False,
   'ContestEndDate': '2017-07-23T04:20:00',
   'ContestGroupId': 15235,
   'ContestStartDate': '2017-07-22T04:20:00',
   'ContestSuffix': ' (Weekend PGA)',
   'DisplayName': '4:20AM ET (Weekend PGA) - 2 Games',
   'DraftGroupId': 14354,
   'Events': [],
   'GameCount': 2,
   'IsOpen': False,
   'IsPrimary': False,
   'IsProjected': False,
   'SourceId': 4,
   'SportId': 5},
  {'AdminEdit': False,
   'ContestEndDate': '0001-01-01T00:00:00',
   'ContestGroupId': 15094,
   'ContestStartDate': '2017-07-20T01:35:00',
   'Cont

In [134]:
for p, player in enumerate(sal_data['PlayerModels']):
    if player['Properties']['SourceId'] == 4:
        if player['Properties']['Player_Name'] == 'Jimmy Walker':
            print(p)
            

307
308


In [70]:
main_dict = final_dict['salary']['7_23_2017']['PlayerModels'][300]

KeyError: '7_23_2017'

In [97]:
main_dict = sal_data['PlayerModels'][309]

In [98]:
weeknd_dict = sal_data['PlayerModels'][312]

In [99]:
##Iterate and find which fields are different

for key in main_dict.keys():
    if isinstance(main_dict[key], dict):
        for key1 in main_dict[key].keys():
            if main_dict[key][key1] != weeknd_dict[key][key1]:
                print('main_dict[{}][{}] = {}, weeknd_dict[{}][{}] = {}'.format(key, key1, 
                    main_dict[key][key1], key, key1, weeknd_dict[key][key1]))
                
    else:
        if main_dict[key] != weeknd_dict[key]:
            print('main_dict[{}] = {}, weeknd_dict[{}] = {}'.format(key, main_dict[key], key, weeknd_dict[key]))
    

main_dict[FantasyResultId] = 1005921, weeknd_dict[FantasyResultId] = 1005003
main_dict[Salary] = 10100, weeknd_dict[Salary] = 11600
main_dict[Properties][Site_Salary] = 77, weeknd_dict[Properties][Site_Salary] = 37
main_dict[Properties][Salary] = 10100, weeknd_dict[Properties][Salary] = 11600
main_dict[Properties][Month_Salary_Change] = -1300, weeknd_dict[Properties][Month_Salary_Change] = 200
main_dict[Properties][Season_Salary_Change] = -1400, weeknd_dict[Properties][Season_Salary_Change] = 100
main_dict[Properties][FantasyResultId] = 1005921, weeknd_dict[Properties][FantasyResultId] = 1005003
main_dict[Properties][Pro_Pct] = 91, weeknd_dict[Properties][Pro_Pct] = 87
main_dict[Properties][ImpPts] = 65.71, weeknd_dict[Properties][ImpPts] = 74.58


In [104]:
## Weekend salary has the higher FantasyResultId
for player in sal_data['PlayerModels']:
    for player1 in sal_data['PlayerModels']:
        if player['Properties']['Player_Name'] == player1['Properties']['Player_Name'] and player['Properties']['Salary'] != player1['Properties']['Salary'] and player['Properties']['SourceId'] == 4 and player1['Properties']['SourceId'] == 4:
            print(player['Properties']['Player_Name'], player['Properties']['Salary'], player['Properties']['FantasyResultId'])
            print(player1['Properties']['Player_Name'], player1['Properties']['Salary'], player1['Properties']['FantasyResultId'])
            print('\n')
            

Jon Rahm 11400 1005064
Jon Rahm 9600 1005981


Jon Rahm 9600 1005981
Jon Rahm 11400 1005064


Kyle Stanley 6600 1005047
Kyle Stanley 7400 1005964


Kyle Stanley 7400 1005964
Kyle Stanley 6600 1005047


Bernd Wiesberger 7600 1005039
Bernd Wiesberger 7500 1005955


Bernd Wiesberger 7500 1005955
Bernd Wiesberger 7600 1005039


Justin Rose 10300 1004968
Justin Rose 9200 1005888


Justin Rose 9200 1005888
Justin Rose 10300 1004968


Louis Oosthuizen 8300 1004983
Louis Oosthuizen 7200 1005901


Louis Oosthuizen 7200 1005901
Louis Oosthuizen 8300 1004983


Paul Casey 8100 1004976
Paul Casey 9100 1005895


Paul Casey 9100 1005895
Paul Casey 8100 1004976


Adam Hadwin 7200 1005922
Adam Hadwin 6800 1005004


Adam Hadwin 6800 1005004
Adam Hadwin 7200 1005922


David Horsey 6400 1005076
David Horsey 7200 1005994


David Horsey 7200 1005994
David Horsey 6400 1005076


Mike Lorenzo-Vera 6400 1005079
Mike Lorenzo-Vera 7100 1005997


Mike Lorenzo-Vera 7100 1005997
Mike Lorenzo-Vera 6400 1005079


Tony

In [141]:
temp_df.loc[temp_df['player_name'] == 'Sergio Garcia']

Unnamed: 0,index,player_name,player_id,date_obj,event_id,tournament_name,tourny_date,tourny_course,tour,salary,...,proj_points,score,plus_minus,injury_status,source,fantasy_id,weekend,tourny_std_points,tourny_avg_points,norm_points
53,23,Sergio Garcia,21209.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,10600.0,...,0.0,92.0,92.0,,4.0,1005019.0,0.0,25.149653,40.010274,0.854474
197,22,Sergio Garcia,21209.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,9400.0,...,0.0,92.0,92.0,,4.0,1005936.0,1.0,25.361548,40.319149,0.835156


In [126]:
temp_df

Unnamed: 0,player_name,player_id,date_obj,event_id,tournament_name,tourny_date,tourny_course,tour,salary,actual_points,...,score,plus_minus,injury_status,source,site_salary,fantasy_id,weekend,tourny_std_points,tourny_avg_points,norm_points
82,Richard Bland,45836.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7600.0,64.5,...,84.0,84.0,,4.0,1005998.0,0.0,0.0,25.339525,40.248227,0.957073
220,Austin Connelly,47906.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7400.0,73.0,...,40.0,40.0,,4.0,1005982.0,0.0,0.0,25.339525,40.248227,1.292517
254,Stuart Manley,26730.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7300.0,32.0,...,10.0,10.0,,4.0,1006005.0,0.0,0.0,25.339525,40.248227,-0.325508
187,Todd Hamilton,1473.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7000.0,4.0,...,50.0,50.0,,4.0,1005943.0,1.0,1.0,25.089066,40.208904,-1.443215
185,Adam Bland,26418.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7000.0,20.5,...,50.0,50.0,,4.0,1005944.0,1.0,1.0,25.089066,40.208904,-0.785558
124,Phachara Khongwatmai,47239.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7100.0,13.5,...,73.0,73.0,,4.0,1006012.0,1.0,0.0,25.339525,40.248227,-1.055593
192,Ashley Hall,37452.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7000.0,17.0,...,50.0,50.0,,4.0,1006026.0,1.0,0.0,25.339525,40.248227,-0.917469
276,Luca Cianchetti,49038.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7000.0,13.5,...,1.0,1.0,,4.0,1006013.0,1.0,0.0,25.339525,40.248227,-1.055593
195,Kent Bulle,35731.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7000.0,37.5,...,50.0,50.0,,4.0,1006009.0,1.0,0.0,25.339525,40.248227,-0.108456
242,Shaun Norris,26328.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7000.0,50.0,...,21.0,21.0,,4.0,1006016.0,1.0,0.0,25.339525,40.248227,0.384844


In [135]:
sal_data['PlayerModels'][307]

{'ActualPoints': 46.0,
 'Cnt': 0,
 'Correlation': 0.0,
 'CurrentExposure': 0.0,
 'Exposure': None,
 'FantasyResultId': 1004978,
 'FirstPosition': 'G',
 'FullName': '',
 'IsExcluded': False,
 'IsLiked': False,
 'IsLocked': False,
 'LineupCount': 0,
 'MinExposure': None,
 'PlayerId': 8872,
 'Points': 0.0,
 'Position': 'G',
 'PositionCount': 1,
 'PositionId': 501,
 'Positions': ['G'],
 'ProjPoints': 0.0,
 'Properties': {'ActualPoints': 46.0,
  'Consistency': 95,
  'EventId': 3453281,
  'EventTeamId': 0,
  'FantasyResultId': 1004978,
  'FullName': '',
  'ImpPts': 50.75,
  'InjuryStatus': None,
  'IsProjected': None,
  'Month_Count': 3,
  'Month_PPG': 60.2,
  'Month_Salary_Change': -800,
  'Month_X0': 33,
  'Month_X1': 66,
  'Month_X2': 66,
  'MyTrends': 0,
  'OwnRank': 90,
  'OwnRank_Slate': None,
  'PlayerId': 8872,
  'Player_Name': 'Jimmy Walker',
  'Plus_Minus': 53,
  'Position': 'G',
  'PositionId': 501,
  'ProTrendsPercentile_Slate': None,
  'Pro_Pct': 40,
  'Salary': 7000,
  'Score':

In [137]:
sal_data['PlayerModels'][308]

{'ActualPoints': 46.0,
 'Cnt': 0,
 'Correlation': 0.0,
 'CurrentExposure': 0.0,
 'Exposure': None,
 'FantasyResultId': 1005897,
 'FirstPosition': 'G',
 'FullName': '',
 'IsExcluded': False,
 'IsLiked': False,
 'IsLocked': False,
 'LineupCount': 0,
 'MinExposure': None,
 'PlayerId': 8872,
 'Points': 0.0,
 'Position': 'G',
 'PositionCount': 1,
 'PositionId': 501,
 'Positions': ['G'],
 'ProjPoints': 0.0,
 'Properties': {'ActualPoints': 46.0,
  'Consistency': 95,
  'EventId': 3453281,
  'EventTeamId': 0,
  'FantasyResultId': 1005897,
  'FullName': '',
  'ImpPts': 52.0,
  'InjuryStatus': None,
  'IsProjected': None,
  'Month_Count': 3,
  'Month_PPG': 60.2,
  'Month_Salary_Change': -500,
  'Month_X0': 33,
  'Month_X1': 66,
  'Month_X2': 66,
  'MyTrends': 0,
  'OwnRank': 90,
  'OwnRank_Slate': None,
  'PlayerId': 8872,
  'Player_Name': 'Jimmy Walker',
  'Plus_Minus': 53,
  'Position': 'G',
  'PositionId': 501,
  'ProTrendsPercentile_Slate': None,
  'Pro_Pct': 40,
  'Salary': 7300,
  'Score': 

In [142]:
#Retrieve data from last week

for i in range(80):
    date_str = (now - timedelta(i)).strftime('%-m_%-d_%Y')


    #Get salary data
    sal_url = 'http://www.fantasylabs.com/api/playermodel/5/{}/?modelId=313179'.format(date_str)

    sal_resp = requests.get(sal_url)
    sal_data = json.loads(sal_resp.text)
    
    try:
        if len(sal_data['PlayerModels']) > 0:
            sal_file_name = '/home/valesco/Datasets/PGA_Data/fantasylabs_data/salary_data/{}_salary_data.json'.format(date_str)

            if os.path.isfile(sal_file_name) is False:

                final_dict['salary'][date_str] = sal_data
                with open(sal_file_name, 'w') as outfile:
                    json.dump(sal_data, outfile)

                print(date_str + ' salary file saved!')
    except:
        pass

    wait = 1
    time.sleep(wait)

    #Get sportevents data
    event_url = 'http://www.fantasylabs.com/api/sportevents/5/{}'.format(date_str)

    event_resp = requests.get(event_url)
    event_data = json.loads(event_resp.text)



    if len(event_data) > 0:
        event_file_name = '/home/valesco/Datasets/PGA_Data/fantasylabs_data/sportevent_data/{}_sportevent_data.json'.format(date_str)

        if os.path.isfile(event_file_name) is False:

            final_dict['events'][date_str] = event_data

            with open(event_file_name, 'w') as outfile:
                json.dump(event_data, outfile)

                print(date_str + ' sportevents file saved')

    time.sleep(wait)

    #get ownership data
    own_url = 'http://www.fantasylabs.com/api/contest-ownership/5/{}/4'.format(date_str)

    own_resp = requests.get(own_url)
    own_data = json.loads(own_resp.text)

    if len(own_data) > 0:
        own_file_name = '/home/valesco/Datasets/PGA_Data/fantasylabs_data/ownership_data/{}_ownership_data.json'.format(date_str)

        if os.path.isfile(own_file_name) is False:

            final_dict['ownership'][date_str] = own_data

            with open(own_file_name, 'w') as outfile:
                json.dump(own_data, outfile)

                print(date_str + ' ownership file saved')
                
                
    #get source data
    
    source_url = 'http://www.fantasylabs.com/api/sourcedata/5/{}'.format(date_str)
    
    source_data = requests.get(source_url).json()

    
    if len(source_data) > 0:
        source_file_name = '/home/valesco/Datasets/PGA_Data/fantasylabs_data/source_data/{}_source_data.json'.format(date_str)
        
        if os.path.isfile(source_file_name) is False:
            
            final_dict['source'][date_str] = source_data
            
            with open(source_file_name, 'w') as outfile:
                json.dump(source_data, outfile)
                
                print(date_str + ' source data saved')

7_24_2017 source data saved
7_22_2017 source data saved
7_21_2017 source data saved
7_20_2017 source data saved
7_19_2017 source data saved
7_18_2017 source data saved
7_17_2017 source data saved
7_16_2017 salary file saved!
7_16_2017 sportevents file saved
7_16_2017 ownership file saved
7_16_2017 source data saved
7_15_2017 source data saved
7_14_2017 source data saved
7_13_2017 source data saved
7_12_2017 source data saved
7_11_2017 source data saved
7_10_2017 source data saved
7_9_2017 salary file saved!
7_9_2017 sportevents file saved
7_9_2017 ownership file saved
7_9_2017 source data saved
7_8_2017 source data saved
7_7_2017 source data saved
7_6_2017 source data saved
7_5_2017 source data saved
7_4_2017 source data saved
7_3_2017 source data saved
7_2_2017 salary file saved!
7_2_2017 sportevents file saved
7_2_2017 ownership file saved
7_2_2017 source data saved
7_1_2017 source data saved
6_30_2017 source data saved
6_29_2017 source data saved
6_28_2017 source data saved
6_27_201

In [145]:
from pga.fuzz_match import return_tid

return_tid('Humana Challenge')

2.0

In [1]:
import json
import os
import glob
import pandas as pd
import requests
import time
from datetime import datetime, timedelta
from random import randint
from pga.return_pid import return_pid
from pga.fuzz_match import return_tid
from sqlalchemy import create_engine
from pandas.io import sql
from tqdm import tqdm
import numpy as np

def return_tourny_avg(row):
    event_id = row['event_id']
    weekend = row['weekend']

    avg_points = grouped_df['actual_points']['mean'].loc[(grouped_df['event_id'] == event_id) & (grouped_df['weekend'] == weekend)].values[0]

    return avg_points

def return_tourny_std(row):
    event_id = row['event_id']
    weekend = row['weekend']

    std_points = grouped_df['actual_points']['std'].loc[(grouped_df['event_id'] == event_id) & (grouped_df['weekend'] == weekend)].values[0]

    return std_points

def norm_points(row):
    return (row['actual_points'] - row['tourny_avg_points']) / row['tourny_std_points']

now = datetime.now()



os.chdir('/home/valesco/Datasets/PGA_Data/fantasylabs_data/salary_data/')

salary_folders = glob.glob('*')


for sal in tqdm(salary_folders):
    #try:
    final_dict = {}
    final_dict['salary'] = {}
    final_dict['events'] = {}
    
    sal_ls = sal.split('_') 
    sal_date = sal_ls[0] + '_' + sal_ls[1] + '_' + sal_ls[2]

    sal_file = open(sal)
    sal_data = json.load(sal_file)

    event_name_str = '/home/valesco/Datasets/PGA_Data/fantasylabs_data/sportevent_data/{}'.format(sal_date + '_sportevent_data.json')
    event_file_name = open(event_name_str)
    event_data = json.load(event_file_name)

    #Parse Results

    final_dict['salary'][sal_date] = sal_data
    final_dict['events'][sal_date] = event_data

    sal_keys = final_dict['salary'].keys()


    pga_salary = pd.DataFrame(columns = ['player_name', 'player_id', 'date_obj', 'event_id', 'tournament_name', 
                            'tournament_id', 'tourny_date', 'tourny_course', 'tour', 'salary', 'actual_points',
                            'imp_points', 'proj_points', 'score', 'plus_minus', 'injury_status', 'source',
                            'fantasy_id', 'weekend'])
    count = 0

    for sal_key in sal_keys:

        date_obj = datetime.strptime(sal_key, '%m_%d_%Y')

        for player in final_dict['salary'][sal_key]['PlayerModels']:
            switch = 0
            event_id = player['Properties']['EventId']
            player_name = player['Properties']['Player_Name']
            try:
                player_id = return_pid(player_name)
            except:
                player_id = np.nan
            salary = player['Properties']['Salary']
            actual_points = player['Properties']['ActualPoints']
            imp_points = player['Properties']['ImpPts']
            proj_points = player['ProjPoints']
            score = player['Properties']['Score']
            injury_status = player['Properties']['InjuryStatus']
            plus_minus = player['Properties']['Plus_Minus']
            source = player['Properties']['SourceId']
            fantasy_id = player['FantasyResultId']


            if source == 4:
                event_id = player['Properties']['EventId']

                for event in final_dict['events'][sal_key]:
                    if event['EventId'] == event_id:
                        tourny_name = event['HomeTeam']
                        tourny_date = event['EventDate']
                        tourny_course = event['StadiumName']
                        try:
                            tournament_id = return_tid(tourny_name)
                        except:
                            tournament_id = -999
                        tour = event['VisitorTeam']
                        switch = 1

                if switch == 0:
                    print('SWITCH ENGAGED!!!', tourny_name, tourny_date, file)
                    tourny_name = np.nan
                    tourny_date = np.nan
                    tourny_course = np.nan
                    tour = np.nan

                pga_salary.loc[count] = [player_name, player_id, date_obj, event_id, tourny_name, tournament_id,
                                         tourny_date, tourny_course, tour,
                                         salary, actual_points, imp_points, proj_points,
                                         score, plus_minus, injury_status, source, fantasy_id, 0]

                count += 1

    #Find unique tournaments in df and retrieve year
    tournaments = pga_salary['tournament_name'].unique()
    year = pd.to_datetime(pga_salary['tourny_date'].loc[0]).year

    #Connect to mysql db
    engine = create_engine('mysql+pymysql://root:v1933@127.0.0.1:3306/pga?charset=utf8')
    conn = engine.connect()

    #Check if tournament and year exist in db, if not insert results
    #for tournament in tournaments:
        #temp_results = conn.execute('SELECT * FROM pga.dk_salaries WHERE tournament_name = "{}" AND \
                #YEAR(tourny_date) = {}'.format(tournament, year))

        #temp_ls = []

        #for result in temp_results:
         #   temp_ls.append(result)

        #if len(temp_ls) < 1:
    agg_dict = {'actual_points': ['mean', 'std']}

    temp_df = pga_salary.loc[pga_salary['tournament_name'] == tourny_name]

    #Separate Weekend salaries from main contest salaries

    players = temp_df['player_name'].unique()

    temp_df.sort_values('fantasy_id', inplace = True)
    temp_df.reset_index(inplace = True, drop = True)

    for player in players:
        indices = sorted(temp_df.index[temp_df['player_name'] == player])

        if len(indices) > 1:
            temp_df['weekend'][indices[1]] = 1

    # Create grouped df to calc tourny std and avg
    grouped_df = temp_df[['event_id', 'weekend', 'actual_points']].groupby(['event_id', 'weekend']).agg(agg_dict)
    grouped_df.reset_index(inplace = True)

    temp_df['tourny_std_points'] = temp_df.apply(return_tourny_std, axis = 1)
    temp_df['tourny_avg_points'] = temp_df.apply(return_tourny_avg, axis = 1)
    temp_df['norm_points'] = temp_df.apply(norm_points, axis = 1)

    temp_df.to_sql('dk_salaries', engine, if_exists = 'append', index = False)

    #except:
        #print(sal)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See th

Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International was not matched
Shenzhen International wa

 91%|█████████ | 139/153 [13:51<00:53,  3.81s/it]

Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan II was not matched
Trophee Hassan

100%|██████████| 153/153 [15:07<00:00,  3.05s/it]


In [154]:
os.chdir('/home/valesco/Datasets/PGA_Data/fantasylabs_data/salary_data/')

salary_folders = glob.glob('*')


for sal in salary_folders[:1]:
    sal_ls = sal.split('_') 
    sal_date = sal_ls[0] + '_' + sal_ls[1] + '_' + sal_ls[2]
    
    sal_file = open(sal)
    sal_data = json.load(sal_file)
    
    event_name_str = '/home/valesco/Datasets/PGA_Data/fantasylabs_data/sportevent_data/{}'.format(sal_date + '_sportevent_data.json')
    event_file_name = open(event_name_str)
    event_data = json.load(event_file_name)
    
sal_data

{'Key': '10406b82-61c4-4755-95e3-7d095e738605',
 'PlayerModels': [{'ActualPoints': 48.0,
   'Cnt': 0,
   'Correlation': 0.0,
   'CurrentExposure': 0.0,
   'ErrorList': {},
   'Exposure': None,
   'FantasyResultId': 549714,
   'FirstPosition': 'G',
   'FullName': '',
   'IsExcluded': False,
   'IsLiked': False,
   'IsLocked': False,
   'LineupCount': 0,
   'MinExposure': None,
   'PlayerId': 8832,
   'Points': 0.0,
   'Position': 'G',
   'PositionCount': 1,
   'PositionId': 501,
   'Positions': ['G'],
   'ProjPoints': 0.0,
   'Properties': {'ActualPoints': 48.0,
    'Consistency': 94,
    'EventId': -98998274,
    'EventTeamId': 0,
    'FantasyResultId': 549714,
    'FullName': '',
    'ImpPts': 46.59,
    'InjuryStatus': None,
    'IsProjected': None,
    'Month_Count': 1,
    'Month_PPG': 72.0,
    'Month_Salary_Change': -900,
    'Month_X0': 0,
    'Month_X1': 100,
    'Month_X2': 100,
    'MyTrends': 0,
    'PlayerId': 8832,
    'Player_Name': 'Davis Love III',
    'Plus_Minus': 39,

In [159]:
temp_df

Unnamed: 0,index,player_name,player_id,date_obj,event_id,tournament_name,tourny_date,tourny_course,tour,salary,...,proj_points,score,plus_minus,injury_status,source,fantasy_id,weekend,tourny_std_points,tourny_avg_points,norm_points
0,61,Phil Mickelson,1810.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,8700.0,...,0.0,87.0,87.0,,4.0,1004961.0,0.0,25.149653,40.010274,-0.974577
1,43,Stewart Cink,20229.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,6300.0,...,0.0,90.0,90.0,,4.0,1004962.0,0.0,25.149653,40.010274,-0.477552
2,140,Lee Westwood,20396.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7700.0,...,0.0,68.0,68.0,,4.0,1004963.0,0.0,25.149653,40.010274,0.874355
3,106,Padraig Harrington,20766.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7200.0,...,0.0,79.0,79.0,,4.0,1004964.0,0.0,25.149653,40.010274,-0.954696
4,97,Charley Hoffman,12716.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,6700.0,...,0.0,82.0,82.0,,4.0,1004966.0,0.0,25.149653,40.010274,1.093046
5,160,Matt Kuchar,23108.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7200.0,...,0.0,61.0,61.0,,4.0,1004967.0,0.0,25.149653,40.010274,2.385310
6,7,Justin Rose,22405.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,10300.0,...,0.0,94.0,94.0,,4.0,1004968.0,0.0,25.149653,40.010274,0.397211
7,151,Zach Johnson,24024.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7600.0,...,0.0,65.0,65.0,,4.0,1004969.0,0.0,25.149653,40.010274,0.933998
8,237,Henrik Stenson,21528.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,9600.0,...,0.0,26.0,26.0,,4.0,1004970.0,0.0,25.149653,40.010274,1.033403
9,110,Ian Poulter,24138.0,2017-07-23,3453281.0,British Open,2017-07-23T00:00:00,Royal Birkdale,PGA,7500.0,...,0.0,79.0,79.0,,4.0,1004971.0,0.0,25.149653,40.010274,0.874355
