Skip to content

Commit

Permalink
Update scripts to prevent connection errors and use basse filenames
Browse files Browse the repository at this point in the history
  • Loading branch information
vaastav committed Dec 26, 2017
1 parent 6c8b534 commit 19304e0
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 5 deletions.
11 changes: 8 additions & 3 deletions cleaners.py
@@ -1,5 +1,6 @@
import csv
import math
import os

def clean_players(filename, base_filename):
""" Creates a file with only important data columns for each player
Expand All @@ -9,7 +10,9 @@ def clean_players(filename, base_filename):
"""
headers = ['first_name', 'second_name', 'goals_scored', 'assists', 'total_points', 'minutes', 'goals_conceded', 'creativity', 'influence', 'threat', 'bonus', 'bps', 'ict_index', 'clean_sheets', 'red_cards', 'yellow_cards', 'selected_by_percent', 'now_cost']
fin = open(filename, 'r+', encoding='utf-8')
fout = open(base_filename + 'cleaned_players.csv', 'w+', encoding='utf-8', newline='')
outname = base_filename + 'cleaned_players.csv'
os.makedirs(os.path.dirname(outname), exist_ok=True)
fout = open(outname, 'w+', encoding='utf-8', newline='')
reader = csv.DictReader(fin)
writer = csv.DictWriter(fout, headers, extrasaction='ignore')
writer.writeheader()
Expand All @@ -24,7 +27,9 @@ def id_players(players_filename, base_filename):
"""
headers = ['first_name', 'second_name', 'id']
fin = open(players_filename, 'r+', encoding='utf-8')
fout = open(base_filename + 'player_idlist.csv', 'w+', encoding='utf-8', newline='')
outname = base_filename + 'player_idlist.csv'
os.makedirs(os.path.dirname(outname), exist_ok=True)
fout = open(outname, 'w+', encoding='utf-8', newline='')
reader = csv.DictReader(fin)
writer = csv.DictWriter(fout, headers, extrasaction='ignore')
writer.writeheader()
Expand All @@ -39,7 +44,7 @@ def get_player_ids(base_filename):
reader = csv.DictReader(fin)
player_ids = {}
for line in reader:
k = line['id']
k = int(line['id'])
v = line['first_name'] + '_' + line['second_name']
player_ids[k] = v
return player_ids
9 changes: 8 additions & 1 deletion global_scraper.py
Expand Up @@ -3,6 +3,7 @@
from utility import uprint
from parsers import *
from cleaners import *
import time

def get_data():
""" Retrieve the fpl player data from the hard-coded url
Expand All @@ -22,7 +23,12 @@ def get_individual_player_data(player_id):
"""
base_url = "https://fantasy.premierleague.com/drf/element-summary/"
full_url = base_url + str(player_id)
response = requests.get(full_url)
response = ''
while response == '':
try:
response = requests.get(full_url)
except:
time.sleep(5)
if response.status_code != 200:
raise Exception("Response was code " + str(response.status_code))
data = json.loads(response.text)
Expand All @@ -31,6 +37,7 @@ def get_individual_player_data(player_id):
def parse_data():
""" Parse and store all the data
"""
print("Getting data")
data = get_data()
season = '2017-18'
base_filename = 'data/' + season + '/'
Expand Down
4 changes: 3 additions & 1 deletion parsers.py
Expand Up @@ -15,7 +15,9 @@ def extract_stat_names(dict_of_stats):

def parse_players(list_of_players, base_filename):
stat_names = extract_stat_names(list_of_players[0])
f = open(base_filename + 'players_raw.csv', 'w', encoding='utf8', newline='')
filename = base_filename + 'players_raw.csv'
os.makedirs(os.path.dirname(filename), exist_ok=True)
f = open(filename, 'w+', encoding='utf8', newline='')
w = csv.DictWriter(f, sorted(stat_names))
w.writeheader()
for player in list_of_players:
Expand Down

0 comments on commit 19304e0

Please sign in to comment.