In [1]:
import requests
import numpy as np
from datetime import datetime, timedelta, date
import calendar
import io
import os
import re
from bs4 import BeautifulSoup
import pandas as pd
from tqdm.notebook import tqdm
# import urllib.request

pd.set_option('display.max_rows', 4000)
pd.set_option('display.max_columns', 3000)
pd.set_option('display.width', 1000)
pd.options.display.max_seq_items = 2000
pd.options.mode.chained_assignment = None

# 1. Utils from FPL API

In [2]:
fpl_api = f'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(fpl_api)
teams = pd.DataFrame(r.json()['teams'])
teams = teams[['id', 'name', 'short_name']]
teams = teams.set_index('id')
dict_teams = teams.to_dict(orient='index') # team names and teams abbreviations from API

events = pd.DataFrame(r.json()['events'])
events.deadline_time = events.deadline_time.str.replace('T', ' ')
events.deadline_time = events.deadline_time.str.replace('Z', ' ')
events.deadline_time = pd.to_datetime(events.deadline_time, format='%Y-%m-%d %H:%M:%S')
events.deadline_time = pd.to_datetime(events.deadline_time + timedelta(hours=3))
events['time_to_scrape'] = pd.to_datetime(events.deadline_time - timedelta(hours=10))
finish = events.loc[37, 'deadline_time'].to_pydatetime()

# get num of gameweek from API
def get_GW_num(query=["previous", "current"]): # current or previous
    mask = events[f'is_{query}'].values == True
    return events[mask].values[0][0]

# check status for update for future automatic update
def status_for_update(date_today, gw_max: int=38):
    times = events[['id', 'time_to_scrape']]

    gw_1 = times.loc[(times.time_to_scrape - pd.to_datetime(date_today)).abs().idxmin(), 'id']
    time_1 = times.loc[(times.time_to_scrape - pd.to_datetime(date_today)).abs().idxmin(), 'time_to_scrape']
    diff_with_gw_1 = date_today - time_1
    
    if diff_with_gw_1 > timedelta(days=0):
        gw_2 = gw_1 + 1
        gw_current = gw_1
        gw_next = gw_2
    else: 
        gw_2 = gw_1 - 1
        gw_current = gw_2
        gw_next = gw_1

    if gw_next > gw_max:
        print('End of the season!')
    else:
        when_to_upd = times.loc[(times.id == gw_next).idxmax, 'time_to_scrape']

        diff = when_to_upd - date_today

        days_wait, seconds_wait = abs(diff).days, abs(diff).seconds
        hours_wait = days_wait * 24 + seconds_wait // 3600
        minutes_wait = (seconds_wait % 3600) // 60

        print(f'Now (b/w GWs):   {gw_current} ... {gw_next}')
        print(f'Time to update:  {when_to_upd}')
        print(f'Wait for:        {hours_wait} h {minutes_wait} min')

print(f'Previous GW:     {get_GW_num("previous")}')
print(f'Current GW:      {get_GW_num("current")}')
print('--------------------------------------')
date_today = datetime.now()
status_for_update(date_today)

Previous GW:     37
Current GW:      38
--------------------------------------
End of the season!


# 2. Scrape code of all announced (non-postponed) fixtures

In [3]:
fpl_fixtures = f'https://fantasy.premierleague.com/api/fixtures/'
r = requests.get(fpl_fixtures)
calendar_df = pd.DataFrame(r.json())
calendar_df = calendar_df[['code', 'event', 'team_h', 'team_a']]
calendar_df.dropna(inplace=True)
calendar_df.event = calendar_df.event.astype(int, errors='ignore')
calendar_df.team_h = calendar_df.team_h.apply(lambda x: dict_teams[x]['short_name'])
calendar_df.team_a = calendar_df.team_a.apply(lambda x: dict_teams[x]['short_name'])

calendar_df.head()

Unnamed: 0,code,event,team_h,team_a
0,2210271,1,BRE,ARS
1,2210276,1,MUN,LEE
2,2210272,1,BUR,BHA
3,2210273,1,CHE,CRY
4,2210274,1,EVE,SOU


# 3. Scrape calendar from FBRef

In [4]:
fbref_calendar = 'https://fbref.com/en/comps/9/schedule/Premier-League-Scores-and-Fixtures'
id_data_default = '11160'
table_id_calendar = f'sched_{id_data_default}_1'

response = requests.get(fbref_calendar)
soup = BeautifulSoup(response.content, 'html.parser')
table_calendar = soup.find_all('table', id=table_id_calendar)[0] # main data
response.close()

# the current calendar
df = pd.read_html(str(table_calendar), parse_dates=[2])[0]
df.dropna(thresh=2, inplace=True)
df.reset_index(drop=True, inplace=True)
df.columns = ['Week', 'Day', 'Date', 'Time', 'Home', 'xG(H)', 'Score', 
              'xG(A)', 'Away', 'Attendance', 'Venue', 'Referee', 'Match Report', 'Notes']

df = df[['Week', 'Date', 'Home', 'Score', 'Away', 'Match Report', 'Notes']]

# dict.fromkeys(df.columns, 'str') <-- вспомогательная строчка на память

col_type = {'Week': 'int',
             'Home': 'str',
             'Score': 'str',
             'Away': 'str',
             'Match Report': 'str',
             'Notes': 'str'}

df.Date = df.Date.dt.date
df = df.astype(col_type)

# unifying teamnames
dict_fbref_teams = {'Arsenal': 'ARS',
                     'Aston Villa': 'AVL',
                     'Brentford': 'BRE',
                     'Brighton': 'BHA',
                     'Burnley': 'BUR',
                     'Chelsea': 'CHE',
                     'Crystal Palace': 'CRY',
                     'Everton': 'EVE',
                     'Leeds United': 'LEE',
                     'Leicester City': 'LEI',
                     'Liverpool': 'LIV',
                     'Manchester City': 'MCI',
                     'Manchester Utd': 'MUN',
                     'Newcastle Utd': 'NEW',
                     'Norwich City': 'NOR',
                     'Southampton': 'SOU',
                     'Tottenham': 'TOT',
                     'Watford': 'WAT',
                     'West Ham': 'WHU',
                     'Wolves': 'WOL'}

df['Home'] = df['Home'].apply(lambda x: dict_fbref_teams[x])
df['Away'] = df['Away'].apply(lambda x: dict_fbref_teams[x])

# настройка перенесённых игр
df.loc[df['Notes'] == 'Match Postponed', 'Date'] = 'nan'
df.replace('nan', np.nan, inplace=True)

# фильтрация от перенесённых игр
df = df[(~df.Date.isna())]

# календарь с учётом переносов
df.head()

Unnamed: 0,Week,Date,Home,Score,Away,Match Report,Notes
0,1,2021-08-13,BRE,2–0,ARS,Match Report,
1,1,2021-08-14,MUN,5–1,LEE,Match Report,
2,1,2021-08-14,LEI,1–0,WOL,Match Report,
3,1,2021-08-14,BUR,1–2,BHA,Match Report,
4,1,2021-08-14,CHE,3–0,CRY,Match Report,


# 4. Find and add links to match reports into DataFrame

In [5]:
match_reports = []

for a in table_calendar.find_all('a', href=True):
     match_reports.append(a['href'])
        
filt = [i for i in match_reports if i.startswith('/en/matches/') and i.endswith('Premier-League')]
filt = filt[::2]

# добавление ссылок
len_df = df.shape[0]
len_filt = len(filt)
addition = [np.nan] * (len_df - len_filt)
filt.extend(addition)
df['Match Report'] = filt

df.head()

Unnamed: 0,Week,Date,Home,Score,Away,Match Report,Notes
0,1,2021-08-13,BRE,2–0,ARS,/en/matches/3adf2aa7/Brentford-Arsenal-August-...,
1,1,2021-08-14,MUN,5–1,LEE,/en/matches/e62685d4/Manchester-United-Leeds-U...,
2,1,2021-08-14,LEI,1–0,WOL,/en/matches/0b346a62/Leicester-City-Wolverhamp...,
3,1,2021-08-14,BUR,1–2,BHA,/en/matches/4eb36e37/Burnley-Brighton-and-Hove...,
4,1,2021-08-14,CHE,3–0,CRY,/en/matches/6f454493/Chelsea-Crystal-Palace-Au...,


# 5. Prepare data & merge

In [6]:
df.columns = [i.replace(' ', '_').lower() for i in df.columns]
calendar_df.columns = ['fixture_id', 'week_fpl', 'home', 'away']

merged_calendar = calendar_df.merge(df)

merged_calendar.head()

Unnamed: 0,fixture_id,week_fpl,home,away,week,date,score,match_report,notes
0,2210271,1,BRE,ARS,1,2021-08-13,2–0,/en/matches/3adf2aa7/Brentford-Arsenal-August-...,
1,2210276,1,MUN,LEE,1,2021-08-14,5–1,/en/matches/e62685d4/Manchester-United-Leeds-U...,
2,2210272,1,BUR,BHA,1,2021-08-14,1–2,/en/matches/4eb36e37/Burnley-Brighton-and-Hove...,
3,2210273,1,CHE,CRY,1,2021-08-14,3–0,/en/matches/6f454493/Chelsea-Crystal-Palace-Au...,
4,2210274,1,EVE,SOU,1,2021-08-14,3–1,/en/matches/c99ebbf5/Everton-Southampton-Augus...,


---

In [7]:
urls = ['a2d435b3/Leicester-City', 
        '47c64c55/Crystal-Palace', 
        '822bd0ba/Liverpool', 
        'b8fd03ef/Manchester-City', 
        'cff3d9bb/Chelsea', 
        '19538871/Manchester-United', 
        '8cec06e1/Wolverhampton-Wanderers', 
        'cd051869/Brentford', 
        '361ca564/Tottenham-Hotspur', 
        '18bb7c10/Arsenal', 
        '943e8050/Burnley', 
        'd3fd31cc/Everton', 
        'b2b47a98/Newcastle-United', 
        '33c895d4/Southampton', 
        'd07537b9/Brighton--Hove-Albion', 
        '7c21e445/West-Ham-United', 
        '2abfe087/Watford', 
        '5bfb9659/Leeds-United', 
        '8602292d/Aston-Villa', 
        '1c781004/Norwich-City']

shortname_to_name = {v: k for k, v in dict_fbref_teams.items()}

res = [tuple(i.split('/')) for i in urls]
hash_to_teamname = dict(sorted(res, key=lambda x: x[1]))
# teamname_to_hash = dict(sorted(res[::-1], key=lambda x: x[0]))

shortname_to_hash = dict(zip(dict_fbref_teams.values(), hash_to_teamname.keys()))
shortname_to_hash

{'ARS': '18bb7c10',
 'AVL': '8602292d',
 'BRE': 'cd051869',
 'BHA': 'd07537b9',
 'BUR': '943e8050',
 'CHE': 'cff3d9bb',
 'CRY': '47c64c55',
 'EVE': 'd3fd31cc',
 'LEE': '5bfb9659',
 'LEI': 'a2d435b3',
 'LIV': '822bd0ba',
 'MCI': 'b8fd03ef',
 'MUN': '19538871',
 'NEW': 'b2b47a98',
 'NOR': '1c781004',
 'SOU': '33c895d4',
 'TOT': '361ca564',
 'WAT': '2abfe087',
 'WHU': '7c21e445',
 'WOL': '8cec06e1'}

In [8]:
last_GW_games = merged_calendar[merged_calendar.week_fpl == get_GW_num('current')]
last_GW_games

Unnamed: 0,fixture_id,week_fpl,home,away,week,date,score,match_report,notes
370,2210641,38,ARS,EVE,38,2022-05-22,5–1,/en/matches/4100d195/Arsenal-Everton-May-22-20...,
371,2210642,38,BRE,LEE,38,2022-05-22,1–2,/en/matches/076ca089/Brentford-Leeds-United-Ma...,
372,2210643,38,BHA,WHU,38,2022-05-22,3–1,/en/matches/70a81794/Brighton-and-Hove-Albion-...,
373,2210644,38,BUR,NEW,38,2022-05-22,1–2,/en/matches/ad4ac82b/Burnley-Newcastle-United-...,
374,2210645,38,CHE,WAT,38,2022-05-22,2–1,/en/matches/df85b298/Chelsea-Watford-May-22-20...,
375,2210646,38,CRY,MUN,38,2022-05-22,1–0,/en/matches/19173099/Crystal-Palace-Manchester...,
376,2210647,38,LEI,SOU,38,2022-05-22,4–1,/en/matches/16e2761e/Leicester-City-Southampto...,
377,2210648,38,LIV,WOL,38,2022-05-22,3–1,/en/matches/8b0f3e28/Liverpool-Wolverhampton-W...,
378,2210649,38,MCI,AVL,38,2022-05-22,3–2,/en/matches/7b4b63d0/Manchester-City-Aston-Vil...,
379,2210650,38,NOR,TOT,38,2022-05-22,0–5,/en/matches/771ecae9/Norwich-City-Tottenham-Ho...,


In [9]:
# i = 0

# metadata = tuple(last_GW_games.values[i][[2, 3, 7, 4, 5, 6]])

# home_team = metadata[0]
# away_team = metadata[1]
# match_report = metadata[2]
# gameweek = metadata[3]
# date_fixture = metadata[4]
# result = metadata[5]

In [10]:
tabs = ['summary', 'passing', 'passing_types', 'defense', 'possession', 'misc']

# колонка Start с информацией, стартовал или нет
def get_start_typo(fixture_df, col='Min'): 
    ser = fixture_df[col].copy()
    lst = ser.tolist()
    start = []

    skip_next = False
    for ind, val in enumerate(lst):
        if skip_next == True:
            skip_next = False
            continue
        try:
            a = val
            b = lst[ind+1]

            sum_ = a+b
            if sum_ == 90:
                start.extend(['Y', 'N'])
                skip_next = True
                continue
            else:
                start.append('Y')

        except IndexError:
            start.append('Y')
    
    fixture_df.insert(fixture_df.columns.get_loc(col)+1, 'Start', start)
    
# маркировка капитанов звёздочками в созданной колонке Start
def get_n_mark_captains(fixture_df):
    nonBreakSpace = u'\xa0'
    lst_raw = soup.find_all('div', {'class' :'datapoint'})[1::2] # необработанный список 
    lst = [t.find('a').text.replace(nonBreakSpace, ' ') for t in lst_raw]
    
    for captain in lst:
        fixture_df.loc[fixture_df['Player'] == captain, 'Start'] = 'Y*'

# сбор данных с одной из вкладок (tabs) в форме таблиц
def tab_stats_processing(table):
    df = pd.read_html(str(table))[0]
    df = df.iloc[:-1, :]
    df.columns = df.columns.map(lambda x: '|'.join([str(i) for i in x if not i.startswith('Unnamed')]))
    df.dropna(axis=0, how='all', inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    return df

def get_profile_team(soup, team, teams_list, datetime, gameweek_num, result_game, tabs_list, tag=['Home', 'Away']):
    hash_team = shortname_to_hash[team]
    name_team = hash_to_teamname[hash_team]
    name_opponent = shortname_to_name[teams_list[1-teams_list.index(team)]]
    
    profile_team = pd.DataFrame()
    
    for tab_name in tabs_list:
        table_id = f'stats_{hash_team}_{tab_name}'
        table = soup.find_all('table', id=table_id) # main data
        data_i = tab_stats_processing(table)
        profile_team = pd.concat([profile_team, data_i], axis=1)
        
    profile_team = profile_team.loc[:,~profile_team.columns.duplicated()]
    profile_team.insert(0, 'Squad', name_team)
    profile_team.insert(2, 'Date', datetime)
    profile_team.insert(3, 'Day', calendar.day_name[datetime.weekday()][:3])
    profile_team.insert(4, 'Opponent', name_opponent)
    profile_team.insert(5, 'Round', f'Matchweek {gameweek_num}')
    profile_team.insert(6, 'Venue', tag)
    
    if tag == 'Home':
        profile_team.insert(7, 'Result', result_game)
    elif tag == 'Away':
        result_reversed = '–'.join(result.split('–')[::-1])
        profile_team.insert(7, 'Result', result_reversed)
    else:
        pass

    return profile_team

def players_team_stats_new(report, teams_list, datetime, gameweek_num, result_game, tabs_list):
    url = f'https://fbref.com{report}'
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # формирование профилей команд
    home = get_profile_team(soup, teams_list[0], teams_list, datetime, gameweek_num, result_game, tabs_list, 'Home')
    away = get_profile_team(soup, teams_list[1], teams_list, datetime, gameweek_num, result_game, tabs_list, 'Away')
    stats_fixture = pd.concat([home, away])
    # добавление колонки Start
    get_start_typo(stats_fixture) 
    # поиск и маркировка капитанов в колонке Start звездочкой
    get_n_mark_captains(stats_fixture) 
    
    response.close()
    return stats_fixture


# def players_team_stats(report, teams_list, datetime, gameweek_num, tabs_list):
#     url = f'https://fbref.com{report}'
#     response = requests.get(url)
#     soup = BeautifulSoup(response.content, 'html.parser')
#     stats_fixture = pd.DataFrame()
    
#     for team in teams_list:
#         hash_team = shortname_to_hash[team]
#         name_team = hash_to_teamname[hash_team]
        
#         name_opponent = shortname_to_name[teams_list[1-teams_list.index(team)]]
                
#         profile_team = pd.DataFrame()
        
#         for tab_name in tabs_list:
#             table_id = f'stats_{hash_team}_{tab_name}'
#             table = soup.find_all('table', id=table_id) # main data
            
#             data_i = tab_stats_processing(table)
#             profile_team = pd.concat([profile_team, data_i], axis=1)

#         profile_team = profile_team.loc[:,~profile_team.columns.duplicated()]
#         profile_team.insert(0, 'Squad', name_team)
#         profile_team.insert(2, 'Date', datetime)
#         profile_team.insert(3, 'Day', calendar.day_name[datetime.weekday()][:3])
#         profile_team.insert(4, 'Opponent', name_opponent)
#         profile_team.insert(5, 'Round', f'Gameweek {gameweek_num}')
        
#         stats_fixture = pd.concat([stats_fixture, profile_team])
            
#     # добавление колонки Start
#     get_start_typo(stats_fixture) 
#     # поиск и маркировка капитанов в колонке Start звездочкой
#     get_n_mark_captains(stats_fixture) 
    
#     response.close()
    
#     return stats_fixture

In [11]:
# fixture_new = players_team_stats_new(match_report, [home_team, away_team], date_fixture, gameweek, result, tabs)
# fixture_new.head()

In [12]:
# fixture = players_team_stats(match_report, [home_team, away_team], date_fixture, gameweek, tabs)
# fixture.head()

In [13]:
GW_data = pd.DataFrame()

for i in tqdm(range(last_GW_games.shape[0])):
    
    metadata = tuple(last_GW_games.values[i][[2, 3, 7, 4, 5, 6]])

    home_team = metadata[0]
    away_team = metadata[1]
    match_report = metadata[2]
    gameweek = metadata[3]
    date_fixture = metadata[4]
    result = metadata[5]
    
    # защита от nan-ов
    if match_report != match_report:
        continue

    fixture = players_team_stats_new(match_report, [home_team, away_team], date_fixture, gameweek, result, tabs)
    
    GW_data = pd.concat([GW_data, fixture])    

  0%|          | 0/10 [00:00<?, ?it/s]

In [14]:
GW_data.head()

Unnamed: 0,Squad,Player,Date,Day,Opponent,Round,Venue,Result,#,Nation,Pos,Age,Min,Start,Performance|Gls,Performance|Ast,Performance|PK,Performance|PKatt,Performance|Sh,Performance|SoT,Performance|CrdY,Performance|CrdR,Performance|Touches,Performance|Press,Performance|Tkl,Performance|Int,Performance|Blocks,Expected|xG,Expected|npxG,Expected|xA,SCA|SCA,SCA|GCA,Passes|Cmp,Passes|Att,Passes|Cmp%,Passes|Prog,Carries|Carries,Carries|Prog,Dribbles|Succ,Dribbles|Att,Total|Cmp,Total|Att,Total|Cmp%,Total|TotDist,Total|PrgDist,Short|Cmp,Short|Att,Short|Cmp%,Medium|Cmp,Medium|Att,Medium|Cmp%,Long|Cmp,Long|Att,Long|Cmp%,Ast,xA,KP,1/3,PPA,CrsPA,Prog,Att,Pass Types|Live,Pass Types|Dead,Pass Types|FK,Pass Types|TB,Pass Types|Press,Pass Types|Sw,Pass Types|Crs,Pass Types|CK,Corner Kicks|In,Corner Kicks|Out,Corner Kicks|Str,Height|Ground,Height|Low,Height|High,Body Parts|Left,Body Parts|Right,Body Parts|Head,Body Parts|TI,Body Parts|Other,Outcomes|Cmp,Outcomes|Off,Outcomes|Out,Outcomes|Int,Outcomes|Blocks,Tackles|Tkl,Tackles|TklW,Tackles|Def 3rd,Tackles|Mid 3rd,Tackles|Att 3rd,Vs Dribbles|Tkl,Vs Dribbles|Att,Vs Dribbles|Tkl%,Vs Dribbles|Past,Pressures|Press,Pressures|Succ,Pressures|%,Pressures|Def 3rd,Pressures|Mid 3rd,Pressures|Att 3rd,Blocks|Blocks,Blocks|Sh,Blocks|ShSv,Blocks|Pass,Int,Tkl+Int,Clr,Err,Touches|Touches,Touches|Def Pen,Touches|Def 3rd,Touches|Mid 3rd,Touches|Att 3rd,Touches|Att Pen,Touches|Live,Dribbles|Succ%,Dribbles|#Pl,Dribbles|Megs,Carries|TotDist,Carries|PrgDist,Carries|1/3,Carries|CPA,Carries|Mis,Carries|Dis,Receiving|Targ,Receiving|Rec,Receiving|Rec%,Receiving|Prog,Performance|2CrdY,Performance|Fls,Performance|Fld,Performance|Off,Performance|Crs,Performance|TklW,Performance|PKwon,Performance|PKcon,Performance|OG,Performance|Recov,Aerial Duels|Won,Aerial Duels|Lost,Aerial Duels|Won%
0,Arsenal,Eddie Nketiah,2022-05-22,Sun,Everton,Matchweek 38,Home,5–1,30.0,eng ENG,FW,22-357,66,Y,1,0,0,0,3,1,0,0,32,13,0,1,1,0.3,0.3,0.1,3,0,16,22,72.7,1,21,1,1,3,16,22,72.7,215,31,8,12,66.7,7,8,87.5,0,1,0.0,0,0.1,1,1,0,0,1,22,22,0,0,0,1,0,1,0,0,0,0,16,3,3,4,14,3,0,1,16,0,0,1,0,0,0,0,0,0,0,0,,0,13,4,30.8,0,4,9,1,0,0,1,1,1,0,0,32,0,0,14,19,11,32,33.3,1,0,70,19,1,1,2,0,29,26,89.7,8,0,1,2,0,1,0,0,0,0,4,1,0,100.0
1,Arsenal,Alexandre Lacazette,2022-05-22,Sun,Everton,Matchweek 38,Home,5–1,9.0,fr FRA,FW,30-359,24,N,0,0,0,0,2,0,0,0,10,4,0,0,0,0.1,0.1,0.0,0,0,7,7,100.0,2,6,0,0,0,7,7,100.0,146,25,3,3,100.0,3,3,100.0,1,1,100.0,0,0.0,0,2,0,0,2,7,7,0,0,0,1,0,0,0,0,0,0,5,0,2,1,5,1,0,0,7,0,0,0,0,0,0,0,0,0,0,0,,0,4,1,25.0,0,3,1,0,0,0,0,0,0,0,0,10,0,1,6,4,2,10,,0,0,16,1,0,0,0,1,14,9,64.3,5,0,2,0,0,0,0,0,0,0,1,0,0,
2,Arsenal,Martinelli,2022-05-22,Sun,Everton,Matchweek 38,Home,5–1,35.0,br BRA,LW,20-338,90,Y,1,1,1,1,3,1,0,0,67,20,2,1,4,1.7,1.0,0.3,8,2,39,47,83.0,5,49,5,3,5,39,47,83.0,659,131,18,20,90.0,17,21,81.0,3,4,75.0,1,0.3,4,0,2,0,5,47,47,0,0,0,7,0,3,0,0,0,0,33,7,7,3,41,2,0,0,39,0,0,3,0,2,2,1,1,0,0,2,0.0,2,20,11,55.0,4,7,9,4,1,0,3,1,3,2,0,67,4,9,22,43,12,66,60.0,3,1,172,76,2,0,3,1,54,47,87.0,10,0,0,0,0,3,2,0,0,0,9,1,1,50.0
3,Arsenal,Bukayo Saka,2022-05-22,Sun,Everton,Matchweek 38,Home,5–1,7.0,eng ENG,RW,20-259,77,Y,0,1,0,0,3,0,0,0,56,6,0,1,1,0.2,0.2,0.1,7,2,37,46,80.4,4,32,9,1,2,37,46,80.4,590,198,17,21,81.0,17,18,94.4,3,6,50.0,1,0.1,2,2,2,0,4,46,35,11,0,0,7,2,1,8,3,0,2,27,10,9,30,10,0,3,1,37,0,0,0,0,0,0,0,0,0,0,0,,0,6,2,33.3,0,3,3,1,0,0,1,1,1,0,0,56,0,5,17,37,6,45,50.0,1,0,185,129,1,3,1,1,46,35,76.1,7,0,2,1,2,1,0,0,0,0,6,2,1,66.7
4,Arsenal,Nicolas Pépé,2022-05-22,Sun,Everton,Matchweek 38,Home,5–1,19.0,ci CIV,RW,26-358,13,N,0,1,0,0,1,1,0,0,12,4,0,0,0,0.0,0.0,0.1,2,1,10,10,100.0,0,11,4,1,1,10,10,100.0,127,6,5,5,100.0,4,4,100.0,0,0,,1,0.1,1,1,0,0,0,10,10,0,0,0,1,0,0,0,0,0,0,9,1,0,9,0,0,0,1,10,0,0,0,0,0,0,0,0,0,0,0,,0,4,1,25.0,1,2,1,0,0,0,0,0,0,0,0,12,0,1,5,8,2,12,100.0,1,0,86,37,1,1,0,0,13,10,76.9,5,0,0,0,0,0,0,0,0,0,1,0,0,


In [15]:
profile_cols = ['Date', 'Day', 'Round', 'Venue', 'Result', 'Squad', 'Opponent', 'Start', 'Pos', 'Min',\
                'Performance|Gls', 'Performance|Ast', 'Performance|PK', 'Performance|PKatt', 'Performance|Sh',\
                'Performance|SoT', 'Performance|CrdY', 'Performance|CrdR', 'Performance|Touches',\
                'Performance|Press', 'Performance|Tkl', 'Performance|Int', 'Performance|Blocks',\
                'Expected|xG', 'Expected|npxG', 'Expected|xA', 'SCA|SCA', 'SCA|GCA', 'Passes|Cmp',\
                'Passes|Att', 'Passes|Cmp%', 'Passes|Prog', 'Carries|Carries', 'Carries|Prog' \
                'Dribbles|Succ', 'Dribbles|Att', 'Total|Cmp', 'Total|Att', 'Total|Cmp%', 'Total|TotDist',\
                'Total|PrgDist', 'Short|Cmp', 'Short|Att', 'Short|Cmp%', 'Medium|Cmp', 'Medium|Att',\
                'Medium|Cmp%', 'Long|Cmp', 'Long|Att', 'Long|Cmp%', 'Ast', 'xA', 'KP', '1/3', 'PPA',\
                'CrsPA', 'Prog', 'Att', 'Pass Types|Live', 'Pass Types|Dead', 'Pass Types|FK',\
                'Pass Types|TB', 'Pass Types|Press', 'Pass Types|Sw', 'Pass Types|Crs',\
                'Pass Types|CK', 'Corner Kicks|In', 'Corner Kicks|Out', 'Corner Kicks|Str',\
                'Height|Ground', 'Height|Low', 'Height|High', 'Body Parts|Left', 'Body Parts|Right',\
                'Body Parts|Head', 'Body Parts|TI', 'Body Parts|Other', 'Outcomes|Cmp', 'Outcomes|Off',\
                'Outcomes|Out', 'Outcomes|Int', 'Outcomes|Blocks', 'Touches|Touches', 'Touches|Def Pen',\
                'Touches|Def 3rd', 'Touches|Mid 3rd', 'Touches|Att 3rd', 'Touches|Att Pen', 'Touches|Live',\
                'Dribbles|Succ%', 'Dribbles|#Pl', 'Dribbles|Megs', 'Carries|TotDist', 'Carries|PrgDist',\
                'Carries|1/3', 'Carries|CPA', 'Carries|Mis', 'Carries|Dis', 'Receiving|Targ', 'Receiving|Rec',\
                'Receiving|Rec%', 'Receiving|Prog', 'Performance|2CrdY', 'Performance|Fls', 'Performance|Fld',\
                'Performance|Off', 'Performance|Crs', 'Performance|TklW', 'Performance|PKwon', 'Performance|PKcon',\
                'Performance|OG', 'Performance|Recov', 'Aerial Duels|Won', 'Aerial Duels|Lost', 'Aerial Duels|Won%']

In [16]:
from itertools import product

def filler(word, from_char, to_char):
    options = [(c,) if c != from_char else (from_char, to_char) for c in word]
    return (''.join(o) for o in product(*options))

def get_pathway_new(pathway, season='2021-2022'):
    player_data = f'PlayerData_{season}'
    player_names_combinations = list(filler(pathway[1], ' ', '-'))
    team = pathway[0]
    # print(player_names_combinations, team)
    
    paths = [f'./{player_data}/{"/".join([team, i])}.csv'for i in player_names_combinations]
    try:
        existed_path = list(filter(lambda i: os.path.isfile(i), paths))[0]
        return existed_path
    except:
        return False
    
def get_profile_new(pathway, season='2021-2022'):
    player_data = f'PlayerData_{season}'
    filepath = get_pathway_new(pathway)
    
    # если такого файла нет с игроком
    if filepath == False:
        df_profile = pd.DataFrame(columns=profile_cols)
        filepath = f'./{player_data}/{"/".join(pathway)}.csv'
    else:
        df_profile = pd.read_csv(filepath)
        
    return df_profile, filepath

In [17]:
# i = 56
# for_path = list(GW_data.values[i][0:2])
# print(for_path)
# profile = get_profile_new(for_path)[0]
# gw_log = GW_data.iloc[i:i+1, :]
# profile.tail()

In [18]:
for i in tqdm(range(GW_data.shape[0])):
    for_path = list(GW_data.values[i][0:2])
    profile = get_profile_new(for_path)[0]
    gw_log = GW_data.iloc[i:i+1, :]
    
    # print('/'.join(for_path) + '.csv')
    
    profile = pd.concat([profile, gw_log], join='inner')
    profile.drop_duplicates(subset=['Day', 'Venue', 'Opponent'], inplace=True)
    save_to = get_profile_new(for_path)[1]
    profile.to_csv(save_to, index=False)

  0%|          | 0/280 [00:00<?, ?it/s]

---

**TODO:**

1. Итератор добавления данных в существующиеся csv 
2. Выяснить метод для поиска gameweek через API -- check
3. Автоматический запуск скрепинга -- уже сдвинулись
4. Начать переписывать в форме скриптов с описаниями
5. Добавил автосоздание профиля, если нет профиля в команде

---

In [76]:
def get_pathway(pathway, season='2021-2022'):
    player_data = f'PlayerData_{season}'
    player_names_combinations = list(filler(pathway[1], ' ', '-'))
    team = pathway[0]
    print(player_names_combinations, team)
    
    paths = [f'./{player_data}/{"/".join([team, i])}.csv'for i in player_names_combinations]
    existed_path = list(filter(lambda i: os.path.isfile(i), paths))[0]
    print(existed_path)
    
    return './' + player_data + '/' + '/'.join(pathway) + '.csv'

def get_profile(pathway, season='2021-2022'):
    player_data = f'PlayerData_{season}'
    #filepath = get_pathway(pathway)
    player_names_combinations = list(filler(pathway[1], ' ', '-'))
    team = pathway[0]
    
    i = 0
    while True:
        pathway_adj = [team, player_names_combinations[i]]
        filepath_adj = get_pathway(pathway_adj)
        #print(filepath_adj)
        i += 1
        try:
            df_profile = pd.read_csv(filepath_adj)
            break
        except FileNotFoundError:
            continue
    return df_profile, filepath_adj

from itertools import product

def filler(word, from_char, to_char):
    options = [(c,) if c != from_char else (from_char, to_char) for c in word]
    return (''.join(o) for o in product(*options))

# i = 56
# for_path = list(GW_data.values[i][0:2])
# print(for_path)
# profile = get_profile(for_path)[0]
# gw_log = GW_data.iloc[i:i+1, :]

# profile = pd.concat([profile, gw_log], join='inner')
# profile.drop_duplicates(subset=['Day', 'Venue', 'Opponent'], inplace=True)
# save_to = get_profile(for_path)[1]
# profile.to_csv(save_to, index=False)
# profile.tail()

In [19]:
# get_profile(for_path)[1]

In [23]:
GW_data.head()

Unnamed: 0,Squad,Player,Date,Day,Opponent,Round,Venue,Result,#,Nation,Pos,Age,Min,Start,Performance|Gls,Performance|Ast,Performance|PK,Performance|PKatt,Performance|Sh,Performance|SoT,Performance|CrdY,Performance|CrdR,Performance|Touches,Performance|Press,Performance|Tkl,Performance|Int,Performance|Blocks,Expected|xG,Expected|npxG,Expected|xA,SCA|SCA,SCA|GCA,Passes|Cmp,Passes|Att,Passes|Cmp%,Passes|Prog,Carries|Carries,Carries|Prog,Dribbles|Succ,Dribbles|Att,Total|Cmp,Total|Att,Total|Cmp%,Total|TotDist,Total|PrgDist,Short|Cmp,Short|Att,Short|Cmp%,Medium|Cmp,Medium|Att,Medium|Cmp%,Long|Cmp,Long|Att,Long|Cmp%,Ast,xA,KP,1/3,PPA,CrsPA,Prog,Att,Pass Types|Live,Pass Types|Dead,Pass Types|FK,Pass Types|TB,Pass Types|Press,Pass Types|Sw,Pass Types|Crs,Pass Types|CK,Corner Kicks|In,Corner Kicks|Out,Corner Kicks|Str,Height|Ground,Height|Low,Height|High,Body Parts|Left,Body Parts|Right,Body Parts|Head,Body Parts|TI,Body Parts|Other,Outcomes|Cmp,Outcomes|Off,Outcomes|Out,Outcomes|Int,Outcomes|Blocks,Tackles|Tkl,Tackles|TklW,Tackles|Def 3rd,Tackles|Mid 3rd,Tackles|Att 3rd,Vs Dribbles|Tkl,Vs Dribbles|Att,Vs Dribbles|Tkl%,Vs Dribbles|Past,Pressures|Press,Pressures|Succ,Pressures|%,Pressures|Def 3rd,Pressures|Mid 3rd,Pressures|Att 3rd,Blocks|Blocks,Blocks|Sh,Blocks|ShSv,Blocks|Pass,Int,Tkl+Int,Clr,Err,Touches|Touches,Touches|Def Pen,Touches|Def 3rd,Touches|Mid 3rd,Touches|Att 3rd,Touches|Att Pen,Touches|Live,Dribbles|Succ%,Dribbles|#Pl,Dribbles|Megs,Carries|TotDist,Carries|PrgDist,Carries|1/3,Carries|CPA,Carries|Mis,Carries|Dis,Receiving|Targ,Receiving|Rec,Receiving|Rec%,Receiving|Prog,Performance|2CrdY,Performance|Fls,Performance|Fld,Performance|Off,Performance|Crs,Performance|TklW,Performance|PKwon,Performance|PKcon,Performance|OG,Performance|Recov,Aerial Duels|Won,Aerial Duels|Lost,Aerial Duels|Won%
0,Brighton--Hove-Albion,Leandro Trossard,2022-01-14,Fri,Crystal Palace,Gameweek 22,Home,1–1,11.0,be BEL,"FW,LM,AM",27-041,90,Y,0,0,0,0,6,1,0,0,42,14,1,1,2,0.6,0.6,0.0,2,0,25,30,83.3,3,28,4,2,2,25,30,83.3,485,96,14,17,82.4,6,7,85.7,4,4,100.0,0,0.0,0,4,0,0,3,30,30,0,0,0,5,3,0,0,0,0,0,16,9,5,7,21,2,0,0,25,0,1,0,3,1,0,0,1,0,0,0,,0,14,5,35.7,1,4,9,2,0,0,2,1,2,0,0,42,0,2,14,29,7,42,100.0,2,0,133,74,1,2,1,2,41,30,73.2,5,0,2,0,1,0,0,0,0,0,8,0,1,0.0
1,Brighton--Hove-Albion,Neal Maupay,2022-01-14,Fri,Crystal Palace,Gameweek 22,Home,1–1,9.0,fr FRA,FW,25-153,90,Y,0,0,0,0,1,0,0,0,23,10,0,0,1,0.0,0.0,0.0,3,0,10,15,66.7,1,16,1,2,3,10,15,66.7,173,10,4,6,66.7,5,6,83.3,1,2,50.0,0,0.0,1,0,1,0,1,15,13,2,0,0,2,0,1,0,0,0,0,11,3,1,4,9,1,0,1,10,0,1,0,2,0,0,0,0,0,0,0,,0,10,1,10.0,0,3,7,1,0,0,1,0,0,1,0,23,0,1,8,15,4,21,66.7,2,0,59,44,1,0,1,2,30,14,46.7,4,0,2,2,0,1,0,0,0,0,4,0,2,0.0
2,Brighton--Hove-Albion,Jakub Moder,2022-01-14,Fri,Crystal Palace,Gameweek 22,Home,1–1,15.0,pl POL,"AM,DM",22-282,90,Y,0,0,0,0,2,1,0,0,54,14,1,1,2,0.3,0.3,0.0,3,0,29,41,70.7,2,33,3,4,5,29,41,70.7,522,93,18,24,75.0,6,8,75.0,5,8,62.5,0,0.0,0,2,3,1,2,41,41,0,0,0,5,2,4,0,0,0,0,26,8,7,13,24,2,0,0,29,0,1,0,0,1,1,0,1,0,0,1,0.0,1,14,7,50.0,4,6,4,2,0,0,2,1,2,0,0,54,1,2,24,31,4,54,80.0,4,0,113,67,2,0,3,1,48,37,77.1,6,0,1,0,0,4,1,0,0,0,9,2,3,40.0
3,Brighton--Hove-Albion,Alexis Mac Allister,2022-01-14,Fri,Crystal Palace,Gameweek 22,Home,1–1,10.0,ar ARG,"CM,DM",23-021,90,Y,0,0,0,0,1,0,0,0,67,25,7,1,2,0.0,0.0,0.3,4,0,44,55,80.0,7,33,4,0,0,44,55,80.0,885,190,19,21,90.5,18,19,94.7,7,15,46.7,0,0.3,2,5,2,1,7,55,48,7,1,1,7,5,1,6,3,2,1,35,9,11,6,46,1,0,0,44,0,1,0,0,7,6,2,3,2,2,4,50.0,2,25,13,52.0,7,13,5,2,0,0,2,1,8,0,0,67,0,7,34,30,1,60,,0,0,193,124,2,0,0,1,41,36,87.8,3,0,2,2,0,1,6,0,0,0,12,2,1,66.7
4,Brighton--Hove-Albion,Pascal Groß,2022-01-14,Fri,Crystal Palace,Gameweek 22,Home,1–1,13.0,de GER,"CM,DM",30-213,72,Y,0,0,0,1,1,0,0,0,60,15,3,0,1,0.8,0.0,0.3,5,0,38,51,74.5,1,41,4,0,2,38,51,74.5,729,171,14,19,73.7,14,15,93.3,8,14,57.1,0,0.3,4,0,2,2,1,51,47,4,2,0,13,3,6,2,0,2,0,32,9,10,10,38,1,0,0,38,0,2,2,1,3,3,1,2,0,2,4,50.0,2,15,9,60.0,5,6,4,1,0,0,1,0,3,1,0,60,0,9,23,29,4,55,0.0,0,1,162,56,0,0,0,0,46,40,87.0,7,0,1,1,0,6,3,0,0,0,9,2,3,40.0


In [32]:
GW_data.head(100)

Unnamed: 0,Squad,Player,Date,Day,Opponent,Round,Venue,Result,#,Nation,Pos,Age,Min,Start,Performance|Gls,Performance|Ast,Performance|PK,Performance|PKatt,Performance|Sh,Performance|SoT,Performance|CrdY,Performance|CrdR,Performance|Touches,Performance|Press,Performance|Tkl,Performance|Int,Performance|Blocks,Expected|xG,Expected|npxG,Expected|xA,SCA|SCA,SCA|GCA,Passes|Cmp,Passes|Att,Passes|Cmp%,Passes|Prog,Carries|Carries,Carries|Prog,Dribbles|Succ,Dribbles|Att,Total|Cmp,Total|Att,Total|Cmp%,Total|TotDist,Total|PrgDist,Short|Cmp,Short|Att,Short|Cmp%,Medium|Cmp,Medium|Att,Medium|Cmp%,Long|Cmp,Long|Att,Long|Cmp%,Ast,xA,KP,1/3,PPA,CrsPA,Prog,Att,Pass Types|Live,Pass Types|Dead,Pass Types|FK,Pass Types|TB,Pass Types|Press,Pass Types|Sw,Pass Types|Crs,Pass Types|CK,Corner Kicks|In,Corner Kicks|Out,Corner Kicks|Str,Height|Ground,Height|Low,Height|High,Body Parts|Left,Body Parts|Right,Body Parts|Head,Body Parts|TI,Body Parts|Other,Outcomes|Cmp,Outcomes|Off,Outcomes|Out,Outcomes|Int,Outcomes|Blocks,Tackles|Tkl,Tackles|TklW,Tackles|Def 3rd,Tackles|Mid 3rd,Tackles|Att 3rd,Vs Dribbles|Tkl,Vs Dribbles|Att,Vs Dribbles|Tkl%,Vs Dribbles|Past,Pressures|Press,Pressures|Succ,Pressures|%,Pressures|Def 3rd,Pressures|Mid 3rd,Pressures|Att 3rd,Blocks|Blocks,Blocks|Sh,Blocks|ShSv,Blocks|Pass,Int,Tkl+Int,Clr,Err,Touches|Touches,Touches|Def Pen,Touches|Def 3rd,Touches|Mid 3rd,Touches|Att 3rd,Touches|Att Pen,Touches|Live,Dribbles|Succ%,Dribbles|#Pl,Dribbles|Megs,Carries|TotDist,Carries|PrgDist,Carries|1/3,Carries|CPA,Carries|Mis,Carries|Dis,Receiving|Targ,Receiving|Rec,Receiving|Rec%,Receiving|Prog,Performance|2CrdY,Performance|Fls,Performance|Fld,Performance|Off,Performance|Crs,Performance|TklW,Performance|PKwon,Performance|PKcon,Performance|OG,Performance|Recov,Aerial Duels|Won,Aerial Duels|Lost,Aerial Duels|Won%
0,Brighton--Hove-Albion,Leandro Trossard,2022-01-14,Fri,Crystal Palace,Matchweek 22,Home,1–1,11.0,be BEL,"FW,LM,AM",27-041,90,Y,0,0,0,0,6,1,0,0,42,14,1,1,2,0.6,0.6,0.0,2,0,25,30,83.3,3,28,4,2,2,25,30,83.3,485,96,14,17,82.4,6,7,85.7,4,4,100.0,0,0.0,0,4,0,0,3,30,30,0,0,0,5,3,0,0,0,0,0,16,9,5,7,21,2,0,0,25,0,1,0,3,1,0,0,1,0,0,0,,0,14,5,35.7,1,4,9,2,0,0,2,1,2,0,0,42,0,2,14,29,7,42,100.0,2,0,133,74,1,2,1,2,41,30,73.2,5,0,2,0,1,0,0,0,0,0,8,0,1,0.0
1,Brighton--Hove-Albion,Neal Maupay,2022-01-14,Fri,Crystal Palace,Matchweek 22,Home,1–1,9.0,fr FRA,FW,25-153,90,Y,0,0,0,0,1,0,0,0,23,10,0,0,1,0.0,0.0,0.0,3,0,10,15,66.7,1,16,1,2,3,10,15,66.7,173,10,4,6,66.7,5,6,83.3,1,2,50.0,0,0.0,1,0,1,0,1,15,13,2,0,0,2,0,1,0,0,0,0,11,3,1,4,9,1,0,1,10,0,1,0,2,0,0,0,0,0,0,0,,0,10,1,10.0,0,3,7,1,0,0,1,0,0,1,0,23,0,1,8,15,4,21,66.7,2,0,59,44,1,0,1,2,30,14,46.7,4,0,2,2,0,1,0,0,0,0,4,0,2,0.0
2,Brighton--Hove-Albion,Jakub Moder,2022-01-14,Fri,Crystal Palace,Matchweek 22,Home,1–1,15.0,pl POL,"AM,DM",22-282,90,Y,0,0,0,0,2,1,0,0,54,14,1,1,2,0.3,0.3,0.0,3,0,29,41,70.7,2,33,3,4,5,29,41,70.7,522,93,18,24,75.0,6,8,75.0,5,8,62.5,0,0.0,0,2,3,1,2,41,41,0,0,0,5,2,4,0,0,0,0,26,8,7,13,24,2,0,0,29,0,1,0,0,1,1,0,1,0,0,1,0.0,1,14,7,50.0,4,6,4,2,0,0,2,1,2,0,0,54,1,2,24,31,4,54,80.0,4,0,113,67,2,0,3,1,48,37,77.1,6,0,1,0,0,4,1,0,0,0,9,2,3,40.0
3,Brighton--Hove-Albion,Alexis Mac Allister,2022-01-14,Fri,Crystal Palace,Matchweek 22,Home,1–1,10.0,ar ARG,"CM,DM",23-021,90,Y,0,0,0,0,1,0,0,0,67,25,7,1,2,0.0,0.0,0.3,4,0,44,55,80.0,7,33,4,0,0,44,55,80.0,885,190,19,21,90.5,18,19,94.7,7,15,46.7,0,0.3,2,5,2,1,7,55,48,7,1,1,7,5,1,6,3,2,1,35,9,11,6,46,1,0,0,44,0,1,0,0,7,6,2,3,2,2,4,50.0,2,25,13,52.0,7,13,5,2,0,0,2,1,8,0,0,67,0,7,34,30,1,60,,0,0,193,124,2,0,0,1,41,36,87.8,3,0,2,2,0,1,6,0,0,0,12,2,1,66.7
4,Brighton--Hove-Albion,Pascal Groß,2022-01-14,Fri,Crystal Palace,Matchweek 22,Home,1–1,13.0,de GER,"CM,DM",30-213,72,Y,0,0,0,1,1,0,0,0,60,15,3,0,1,0.8,0.0,0.3,5,0,38,51,74.5,1,41,4,0,2,38,51,74.5,729,171,14,19,73.7,14,15,93.3,8,14,57.1,0,0.3,4,0,2,2,1,51,47,4,2,0,13,3,6,2,0,2,0,32,9,10,10,38,1,0,0,38,0,2,2,1,3,3,1,2,0,2,4,50.0,2,15,9,60.0,5,6,4,1,0,0,1,0,3,1,0,60,0,9,23,29,4,55,0.0,0,1,162,56,0,0,0,0,46,40,87.0,7,0,1,1,0,6,3,0,0,0,9,2,3,40.0
5,Brighton--Hove-Albion,Solly March,2022-01-14,Fri,Crystal Palace,Matchweek 22,Home,1–1,20.0,eng ENG,RM,27-178,18,N,0,0,0,0,2,0,0,0,20,3,2,0,0,0.0,0.0,0.1,1,0,13,16,81.3,0,18,4,0,0,13,16,81.3,225,35,6,7,85.7,6,7,85.7,1,2,50.0,0,0.1,1,0,1,1,0,16,16,0,0,0,7,2,2,0,0,0,0,12,3,1,13,2,1,0,0,13,0,1,0,1,2,2,0,2,0,0,0,,0,3,2,66.7,1,2,0,0,0,0,0,0,2,0,0,20,0,1,3,17,3,20,,0,0,111,53,1,1,0,1,18,16,88.9,3,0,0,0,0,2,2,0,0,0,2,0,0,
6,Brighton--Hove-Albion,Adam Lallana,2022-01-14,Fri,Crystal Palace,Matchweek 22,Home,1–1,14.0,eng ENG,DM,33-249,55,Y,0,0,0,0,2,0,0,0,38,9,2,2,1,0.0,0.0,0.0,1,0,26,30,86.7,3,26,2,1,3,26,30,86.7,490,149,13,15,86.7,11,12,91.7,2,3,66.7,0,0.0,0,6,0,0,3,30,30,0,0,0,8,0,0,0,0,0,0,17,11,2,5,22,0,0,1,26,0,1,2,0,2,2,1,1,0,0,1,0.0,1,9,5,55.6,2,4,3,1,1,0,0,2,4,1,0,38,3,11,18,11,1,38,33.3,1,0,107,44,0,0,0,0,22,20,90.9,3,0,1,0,0,0,2,0,0,0,12,2,1,66.7
7,Brighton--Hove-Albion,Tariq Lamptey,2022-01-14,Fri,Crystal Palace,Matchweek 22,Home,1–1,2.0,eng ENG,"RB,WB",21-106,35,N,0,0,0,0,0,0,0,0,37,1,1,0,0,0.0,0.0,0.0,2,0,25,34,73.5,1,32,13,1,1,25,34,73.5,338,59,18,22,81.8,5,6,83.3,1,4,25.0,0,0.0,1,1,0,0,1,34,32,2,1,0,4,0,3,0,0,0,0,25,6,3,2,31,0,1,0,25,0,1,0,4,1,0,1,0,0,1,1,100.0,0,1,0,0.0,1,0,0,0,0,0,0,0,1,0,0,37,0,2,13,24,2,35,100.0,1,0,188,131,1,1,0,0,33,30,90.9,1,0,1,1,0,3,0,0,0,0,4,0,0,
8,Brighton--Hove-Albion,Marc Cucurella,2022-01-14,Fri,Crystal Palace,Matchweek 22,Home,1–1,3.0,es ESP,"LB,WB",23-176,90,Y,0,0,0,0,0,0,0,0,71,16,1,0,0,0.0,0.0,0.4,3,0,51,64,79.7,5,38,6,0,1,51,64,79.7,1140,307,20,24,83.3,21,26,80.8,10,11,90.9,0,0.4,2,5,1,0,5,64,48,16,1,0,8,3,2,1,0,0,0,37,16,11,38,8,2,14,0,51,0,0,2,3,1,1,1,0,0,0,1,0.0,1,16,4,25.0,9,5,2,0,0,0,0,0,1,1,0,71,1,14,35,26,1,55,0.0,0,1,201,148,3,0,0,1,41,37,90.2,1,0,0,1,0,2,1,0,0,0,13,2,2,50.0
9,Brighton--Hove-Albion,Dan Burn,2022-01-14,Fri,Crystal Palace,Matchweek 22,Home,1–1,33.0,eng ENG,CB,29-250,90,Y,0,0,0,0,1,1,0,0,90,9,1,2,2,0.1,0.1,0.0,0,0,77,82,93.9,2,69,5,0,0,77,82,93.9,1863,335,18,18,100.0,33,35,94.3,25,28,89.3,0,0.0,0,6,0,0,2,82,82,0,0,0,2,2,0,0,0,0,0,74,2,6,76,3,3,0,0,77,1,1,0,0,1,1,1,0,0,0,0,,0,9,4,44.4,5,2,2,2,0,0,2,2,3,2,1,90,5,33,57,5,1,90,,0,0,384,207,0,0,1,0,77,76,98.7,1,0,2,1,0,0,1,0,0,0,7,1,1,50.0


In [None]:
player_data = f'PlayerData_{season}'
#filepath = get_pathway(pathway)
player_names_combinations = list(filler(pathway[1], ' ', '-'))
team = pathway[0]


In [56]:
i = 56
for_path = list(GW_data.values[i][0:2])
#print(for_path)

#print(get_pathway(for_path))

profile = get_profile(for_path)[0]
gw_log = GW_data.iloc[i:i+1, :]
profile.tail()

['Allan Saint-Maximin', 'Allan-Saint-Maximin'] Newcastle-United
./PlayerData_2021-2022/Newcastle-United/Allan Saint-Maximin.csv


Unnamed: 0,Date,Day,Round,Venue,Result,Squad,Opponent,Start,Pos,Min,Performance|Gls,Performance|Ast,Performance|PK,Performance|PKatt,Performance|Sh,Performance|SoT,Performance|CrdY,Performance|CrdR,Performance|Touches,Performance|Press,Performance|Tkl,Performance|Int,Performance|Blocks,Expected|xG,Expected|npxG,Expected|xA,SCA|SCA,SCA|GCA,Passes|Cmp,Passes|Att,Passes|Cmp%,Passes|Prog,Carries|Carries,Carries|Prog,Dribbles|Succ,Dribbles|Att,Total|Cmp,Total|Att,Total|Cmp%,Total|TotDist,Total|PrgDist,Short|Cmp,Short|Att,Short|Cmp%,Medium|Cmp,Medium|Att,Medium|Cmp%,Long|Cmp,Long|Att,Long|Cmp%,Ast,xA,KP,1/3,PPA,CrsPA,Prog,Att,Pass Types|Live,Pass Types|Dead,Pass Types|FK,Pass Types|TB,Pass Types|Press,Pass Types|Sw,Pass Types|Crs,Pass Types|CK,Corner Kicks|In,Corner Kicks|Out,Corner Kicks|Str,Height|Ground,Height|Low,Height|High,Body Parts|Left,Body Parts|Right,Body Parts|Head,Body Parts|TI,Body Parts|Other,Outcomes|Cmp,Outcomes|Off,Outcomes|Out,Outcomes|Int,Outcomes|Blocks,Touches|Touches,Touches|Def Pen,Touches|Def 3rd,Touches|Mid 3rd,Touches|Att 3rd,Touches|Att Pen,Touches|Live,Dribbles|Succ%,Dribbles|#Pl,Dribbles|Megs,Carries|TotDist,Carries|PrgDist,Carries|1/3,Carries|CPA,Carries|Mis,Carries|Dis,Receiving|Targ,Receiving|Rec,Receiving|Rec%,Receiving|Prog,Performance|2CrdY,Performance|Fls,Performance|Fld,Performance|Off,Performance|Crs,Performance|TklW,Performance|PKwon,Performance|PKcon,Performance|OG,Performance|Recov,Aerial Duels|Won,Aerial Duels|Lost,Aerial Duels|Won%
15,2021-12-12,Sun,Matchweek 16,Away,L 0–4,Newcastle Utd,Leicester City,Y,LM,90,0,0,0,0,3,1,1,0,46,15,1,1,2,0.1,0.1,0.0,8,0,21,25,84.0,1,52,5,14,20,21,25,84.0,380,130,10,12,83.3,7,8,87.5,3,4,75.0,0,0.0,1,2,2,1,1,25,25,0,0,0,1,2,2,0,0,0,0,22,0,3,1,24,0,0,0,21,0,1,0,1,46,0,1,25,25,7,46,70.0,14,0,165,81,0,1,3,3,42,33,78.6,3,0,1,4,0,2,1,0,0,0,8,0,2,0.0
16,2021-12-16,Thu,Matchweek 17,Away,L 1–3,Newcastle Utd,Liverpool,Y,FW,78,0,0,0,0,1,1,0,0,23,8,1,2,0,0.2,0.2,0.0,2,0,9,14,64.3,2,20,5,3,4,9,14,64.3,211,56,3,6,50.0,3,4,75.0,3,3,100.0,0,0.0,1,2,1,0,2,14,12,2,0,0,4,0,2,0,0,0,0,8,5,1,2,11,0,0,0,9,0,0,3,1,23,1,2,20,8,1,21,75.0,3,0,200,123,3,1,2,1,25,16,64.0,1,0,0,2,1,2,0,0,0,0,1,1,1,50.0
17,2021-12-19,Sun,Matchweek 18,Home,L 0–4,Newcastle Utd,Manchester City,N,LM,45,0,0,0,0,1,0,0,0,16,1,0,0,0,0.0,0.0,0.0,0,0,7,10,70.0,1,17,4,3,6,7,10,70.0,124,22,3,4,75.0,4,4,100.0,0,2,0.0,0,0.0,0,0,1,0,1,10,10,0,0,0,0,0,1,0,0,0,0,6,1,3,1,8,1,0,0,7,0,0,0,0,16,0,1,8,11,1,16,50.0,3,0,85,44,0,0,1,0,14,14,100.0,2,0,1,1,0,1,0,0,0,0,2,0,0,
18,2021-12-27,Mon,Matchweek 19,Home,D 1–1,Newcastle Utd,Manchester Utd,Y,"LM,FW",82,1,0,0,0,5,4,0,0,36,11,0,1,3,0.9,0.9,0.0,0,0,7,13,53.8,0,32,5,2,6,7,13,53.8,123,38,3,6,50.0,2,3,66.7,1,3,33.3,0,0.0,0,0,0,0,0,13,13,0,0,0,6,3,0,0,0,0,0,9,2,2,2,11,0,0,0,7,1,2,0,0,36,0,4,18,20,5,36,33.3,3,0,129,102,3,1,7,2,35,29,82.9,6,0,0,1,0,0,0,0,0,0,3,0,0,
19,2022-01-15,Sat,Matchweek 22,Home,1–1,Newcastle-United,Watford,Y,LW,85,1,0,0,0,1,1,0,0,47,7,0,0,1,0.0,0.0,0.0,2,0,22,30,73.3,1,52,7,7,12,22,30,73.3,320,18,14,17,82.4,8,11,72.7,0,2,0.0,0,0.0,1,0,0,0,1,30,30,0,0,0,6,1,4,0,0,0,0,22,5,3,4,25,0,0,0,22,0,0,2,1,47,0,3,20,32,7,47,58.3,7,0,331,202,3,3,3,6,44,40,90.9,4,0,1,1,0,4,0,0,0,0,5,1,0,100.0


In [59]:
print(profile.columns.tolist())

['Date', 'Day', 'Round', 'Venue', 'Result', 'Squad', 'Opponent', 'Start', 'Pos', 'Min', 'Performance|Gls', 'Performance|Ast', 'Performance|PK', 'Performance|PKatt', 'Performance|Sh', 'Performance|SoT', 'Performance|CrdY', 'Performance|CrdR', 'Performance|Touches', 'Performance|Press', 'Performance|Tkl', 'Performance|Int', 'Performance|Blocks', 'Expected|xG', 'Expected|npxG', 'Expected|xA', 'SCA|SCA', 'SCA|GCA', 'Passes|Cmp', 'Passes|Att', 'Passes|Cmp%', 'Passes|Prog', 'Carries|Carries', 'Carries|Prog', 'Dribbles|Succ', 'Dribbles|Att', 'Total|Cmp', 'Total|Att', 'Total|Cmp%', 'Total|TotDist', 'Total|PrgDist', 'Short|Cmp', 'Short|Att', 'Short|Cmp%', 'Medium|Cmp', 'Medium|Att', 'Medium|Cmp%', 'Long|Cmp', 'Long|Att', 'Long|Cmp%', 'Ast', 'xA', 'KP', '1/3', 'PPA', 'CrsPA', 'Prog', 'Att', 'Pass Types|Live', 'Pass Types|Dead', 'Pass Types|FK', 'Pass Types|TB', 'Pass Types|Press', 'Pass Types|Sw', 'Pass Types|Crs', 'Pass Types|CK', 'Corner Kicks|In', 'Corner Kicks|Out', 'Corner Kicks|Str', 'Heig

In [33]:
for i in tqdm(range(GW_data.shape[0])):
    for_path = list(GW_data.values[i][0:2])
    profile = get_profile(for_path)[0]
    gw_log = GW_data.iloc[i:i+1, :]
    
    print('/'.join(for_path) + '.csv')
    
    profile = pd.concat([profile, gw_log], join='inner')
    profile.drop_duplicates(subset=['Day', 'Venue', 'Opponent'], inplace=True)
    save_to = get_profile(for_path)[1]
    profile.to_csv(save_to, index=False)

Widget Javascript not detected.  It may not be installed or enabled properly.


Brighton--Hove-Albion/Leandro Trossard.csv
Brighton--Hove-Albion/Neal Maupay.csv
Brighton--Hove-Albion/Jakub Moder.csv
Brighton--Hove-Albion/Alexis Mac Allister.csv
Brighton--Hove-Albion/Pascal Groß.csv
Brighton--Hove-Albion/Solly March.csv
Brighton--Hove-Albion/Adam Lallana.csv
Brighton--Hove-Albion/Tariq Lamptey.csv
Brighton--Hove-Albion/Marc Cucurella.csv
Brighton--Hove-Albion/Dan Burn.csv
Brighton--Hove-Albion/Adam Webster.csv
Brighton--Hove-Albion/Joël Veltman.csv
Brighton--Hove-Albion/Danny Welbeck.csv
Brighton--Hove-Albion/Robert Sánchez.csv
Crystal-Palace/Odsonne Édouard.csv
Crystal-Palace/Jean-Philippe Mateta.csv
Crystal-Palace/Eberechi Eze.csv
Crystal-Palace/Luka Milivojević.csv
Crystal-Palace/Michael Olise.csv
Crystal-Palace/Christian Benteke.csv
Crystal-Palace/Jeffrey Schlupp.csv
Crystal-Palace/Conor Gallagher.csv
Crystal-Palace/Will Hughes.csv
Crystal-Palace/Tyrick Mitchell.csv
Crystal-Palace/Marc Guéhi.csv
Crystal-Palace/Joachim Andersen.csv
Crystal-Palace/Joel Ward.csv
C

IndexError: list index out of range

In [50]:
type(get_profile(for_path))

NoneType

In [41]:
from itertools import product

def filler(word, from_char, to_char):
    options = [(c,) if c != from_char else (from_char, to_char) for c in word]
    return (''.join(o) for o in product(*options))

list(filler(for_path[1], ' ', '-'))

['Emile Smith Rowe',
 'Emile Smith-Rowe',
 'Emile-Smith Rowe',
 'Emile-Smith-Rowe']

In [38]:
[ for i in for_path[1] if i == ' ']

'Emile Smith Rowe'

In [35]:
get_pathway(for_path)

'./PlayerData_2021-2022/Arsenal/Alexandre Lacazette.csv'

In [13]:
for_path = list(GW_data.values[0][0:2])

season = '2021-2022'
player_data = f'PlayerData_{season}'
filepath = './' + player_data + '/' + '/'.join(for_path) + '.csv'

profile = pd.read_csv(filepath)
profile.tail()

Unnamed: 0,Date,Day,Round,Venue,Result,Squad,Opponent,Start,Pos,Min,Performance|Gls,Performance|Ast,Performance|PK,Performance|PKatt,Performance|Sh,Performance|SoT,Performance|CrdY,Performance|CrdR,Performance|Touches,Performance|Press,Performance|Tkl,Performance|Int,Performance|Blocks,Expected|xG,Expected|npxG,Expected|xA,SCA|SCA,SCA|GCA,Passes|Cmp,Passes|Att,Passes|Cmp%,Passes|Prog,Carries|Carries,Carries|Prog,Dribbles|Succ,Dribbles|Att,Total|Cmp,Total|Att,Total|Cmp%,Total|TotDist,Total|PrgDist,Short|Cmp,Short|Att,Short|Cmp%,Medium|Cmp,Medium|Att,Medium|Cmp%,Long|Cmp,Long|Att,Long|Cmp%,Ast,xA,KP,1/3,PPA,CrsPA,Prog,Att,Pass Types|Live,Pass Types|Dead,Pass Types|FK,Pass Types|TB,Pass Types|Press,Pass Types|Sw,Pass Types|Crs,Pass Types|CK,Corner Kicks|In,Corner Kicks|Out,Corner Kicks|Str,Height|Ground,Height|Low,Height|High,Body Parts|Left,Body Parts|Right,Body Parts|Head,Body Parts|TI,Body Parts|Other,Outcomes|Cmp,Outcomes|Off,Outcomes|Out,Outcomes|Int,Outcomes|Blocks,Touches|Touches,Touches|Def Pen,Touches|Def 3rd,Touches|Mid 3rd,Touches|Att 3rd,Touches|Att Pen,Touches|Live,Dribbles|Succ%,Dribbles|#Pl,Dribbles|Megs,Carries|TotDist,Carries|PrgDist,Carries|1/3,Carries|CPA,Carries|Mis,Carries|Dis,Receiving|Targ,Receiving|Rec,Receiving|Rec%,Receiving|Prog,Performance|2CrdY,Performance|Fls,Performance|Fld,Performance|Off,Performance|Crs,Performance|TklW,Performance|PKwon,Performance|PKcon,Performance|OG,Performance|Recov,Aerial Duels|Won,Aerial Duels|Lost,Aerial Duels|Won%
9,2021-12-06,Mon,Matchweek 15,Away,L 1–2,Arsenal,Everton,Y*,FW,84,0,0,0,0,0,0,0,0,20,19,3,0,1,0.0,0.0,0.0,2,1,12,15,80.0,2,13,0,0,0,12,15,80.0,237,61,5,6,83.3,5,5,100.0,2,2,100.0,0,0.0,0,4,0,0,2,15,15,0,0,0,2,1,0,0,0,0,0,11,4,0,1,13,0,0,1,12,0,0,1,0,20,0,1,13,7,1,20,,0,0,34,12,0,0,0,1,25,16,64.0,8,0,3,0,0,0,1,0,0,0,0,0,4,0.0
10,2021-12-11,Sat,Matchweek 16,Home,W 3–0,Arsenal,Southampton,Y*,FW,82,1,0,0,0,1,1,0,0,34,6,0,2,1,0.4,0.4,0.0,1,0,14,19,73.7,0,21,2,1,1,14,19,73.7,176,16,9,12,75.0,2,3,66.7,1,1,100.0,0,0.0,0,0,0,0,0,19,19,0,0,0,6,0,0,0,0,0,0,13,3,3,3,14,0,0,0,14,0,0,1,1,34,0,2,14,20,7,34,100.0,1,0,90,57,1,2,6,2,44,24,54.5,7,0,3,0,0,0,0,0,0,0,3,3,2,60.0
11,2021-12-15,Wed,Matchweek 17,Home,W 2–0,Arsenal,West Ham,Y*,FW,88,0,1,0,1,2,2,0,0,41,19,3,1,1,1.1,0.4,0.3,8,1,22,25,88.0,1,32,6,2,2,22,25,88.0,332,63,13,14,92.9,8,9,88.9,1,1,100.0,1,0.3,2,1,1,1,1,25,24,1,0,1,4,0,3,0,0,0,0,16,7,2,3,19,1,0,0,22,0,0,1,0,41,0,7,16,23,9,40,100.0,2,0,193,118,2,0,4,2,36,29,80.6,4,0,2,3,0,3,3,1,0,0,4,2,2,50.0
12,2021-12-18,Sat,Matchweek 18,Away,W 4–1,Arsenal,Leeds United,Y*,FW,90,0,1,0,0,2,2,0,0,41,28,3,2,1,0.9,0.9,0.1,4,1,27,32,84.4,3,22,1,1,2,27,32,84.4,388,105,17,19,89.5,6,8,75.0,2,2,100.0,1,0.1,1,2,1,0,3,32,32,0,0,0,12,0,0,0,0,0,0,24,4,4,5,25,1,0,0,27,1,0,3,1,41,0,5,18,21,6,41,50.0,1,0,84,29,1,1,0,0,41,29,70.7,3,0,4,1,0,0,1,0,0,0,5,1,7,12.5
13,2021-12-26,Sun,Matchweek 19,Away,W 5–0,Arsenal,Norwich City,Y*,FW,90,1,1,1,1,1,0,0,0,42,15,1,0,0,1.0,0.2,0.0,2,2,27,32,84.4,2,22,1,0,1,27,32,84.4,327,41,20,22,90.9,4,4,100.0,1,3,33.3,1,0.0,1,0,1,0,2,32,32,0,0,0,8,0,0,0,0,0,0,27,4,1,4,28,0,0,0,27,1,1,1,0,42,0,3,27,15,4,42,0.0,0,0,85,26,1,0,3,2,54,39,72.2,9,0,2,2,0,0,1,1,0,0,2,1,3,25.0


In [25]:
GW_data.iloc[0:1, :]

Unnamed: 0,Squad,Player,Date,Day,Opponent,Round,Venue,Result,#,Nation,Pos,Age,Min,Start,Performance|Gls,Performance|Ast,Performance|PK,Performance|PKatt,Performance|Sh,Performance|SoT,Performance|CrdY,Performance|CrdR,Performance|Touches,Performance|Press,Performance|Tkl,Performance|Int,Performance|Blocks,Expected|xG,Expected|npxG,Expected|xA,SCA|SCA,SCA|GCA,Passes|Cmp,Passes|Att,Passes|Cmp%,Passes|Prog,Carries|Carries,Carries|Prog,Dribbles|Succ,Dribbles|Att,Total|Cmp,Total|Att,Total|Cmp%,Total|TotDist,Total|PrgDist,Short|Cmp,Short|Att,Short|Cmp%,Medium|Cmp,Medium|Att,Medium|Cmp%,Long|Cmp,Long|Att,Long|Cmp%,Ast,xA,KP,1/3,PPA,CrsPA,Prog,Att,Pass Types|Live,Pass Types|Dead,Pass Types|FK,Pass Types|TB,Pass Types|Press,Pass Types|Sw,Pass Types|Crs,Pass Types|CK,Corner Kicks|In,Corner Kicks|Out,Corner Kicks|Str,Height|Ground,Height|Low,Height|High,Body Parts|Left,Body Parts|Right,Body Parts|Head,Body Parts|TI,Body Parts|Other,Outcomes|Cmp,Outcomes|Off,Outcomes|Out,Outcomes|Int,Outcomes|Blocks,Tackles|Tkl,Tackles|TklW,Tackles|Def 3rd,Tackles|Mid 3rd,Tackles|Att 3rd,Vs Dribbles|Tkl,Vs Dribbles|Att,Vs Dribbles|Tkl%,Vs Dribbles|Past,Pressures|Press,Pressures|Succ,Pressures|%,Pressures|Def 3rd,Pressures|Mid 3rd,Pressures|Att 3rd,Blocks|Blocks,Blocks|Sh,Blocks|ShSv,Blocks|Pass,Int,Tkl+Int,Clr,Err,Touches|Touches,Touches|Def Pen,Touches|Def 3rd,Touches|Mid 3rd,Touches|Att 3rd,Touches|Att Pen,Touches|Live,Dribbles|Succ%,Dribbles|#Pl,Dribbles|Megs,Carries|TotDist,Carries|PrgDist,Carries|1/3,Carries|CPA,Carries|Mis,Carries|Dis,Receiving|Targ,Receiving|Rec,Receiving|Rec%,Receiving|Prog,Performance|2CrdY,Performance|Fls,Performance|Fld,Performance|Off,Performance|Crs,Performance|TklW,Performance|PKwon,Performance|PKcon,Performance|OG,Performance|Recov,Aerial Duels|Won,Aerial Duels|Lost,Aerial Duels|Won%
0,Arsenal,Alexandre Lacazette,2022-01-01,Sat,Manchester City,Gameweek 21,Home,1–2,9.0,fr FRA,FW,30-218,70,Y,0,0,0,0,1,0,0,0,19,22,1,0,0,0.0,0.0,0.0,1,0,8,13,61.5,2,12,1,0,0,8,13,61.5,140,41,3,4,75.0,3,7,42.9,1,1,100.0,0,0.0,1,1,1,0,2,13,11,2,0,0,2,0,0,0,0,0,0,10,1,2,2,10,1,0,0,8,0,0,1,0,1,1,0,0,1,0,0,,0,22,3,13.6,0,6,16,0,0,0,0,0,1,1,0,19,1,1,8,11,1,17,,0,0,43,14,1,0,1,1,15,10,66.7,2,0,1,1,0,0,1,0,0,0,4,0,2,0.0


In [15]:
set(profile.columns) - set(GW_data.columns)

set()

---

In [54]:
match_report = f'https://fbref.com{link_to_report}'
hash_team = hash_to_shortname[home_team]
table_id = f'stats_{hash_team}_possession'

response = requests.get(match_report)
soup = BeautifulSoup(response.content, 'html.parser')
table = soup.find_all('table', id=table_id) # main data
response.close()

stats_team = pd.read_html(str(table))[0]
stats_team = stats_team.iloc[:-1, :-1]
stats_team.columns = stats_team.columns.map(lambda x: '|'.join([str(i) for i in x if not i.startswith('Unnamed')]))
stats_team.dropna(axis=0, how='all', inplace=True)
stats_team.reset_index(drop=True, inplace=True)

In [56]:
stats_team

Unnamed: 0,Player,#,Nation,Pos,Age,Min,Touches|Touches,Touches|Def Pen,Touches|Def 3rd,Touches|Mid 3rd,Touches|Att 3rd,Touches|Att Pen,Touches|Live,Dribbles|Succ,Dribbles|Att,Dribbles|Succ%,Dribbles|#Pl,Dribbles|Megs,Carries|Carries,Carries|TotDist,Carries|PrgDist,Carries|Prog,Carries|1/3,Carries|CPA,Carries|Mis,Carries|Dis,Receiving|Targ,Receiving|Rec,Receiving|Rec%
0,Alexandre Lacazette,9.0,fr FRA,FW,30-218,70,19,1,1,8,11,1,17,0,0,,0,0,12,43,14,1,1,0,1,1,15,10,66.7
1,Emile Smith Rowe,10.0,eng ENG,"LW,CM",21-157,20,10,0,4,2,4,0,8,0,0,,0,0,6,31,7,0,0,0,1,0,7,6,85.7
2,Martinelli,35.0,br BRA,"LW,FW",20-197,90,29,3,5,9,19,7,27,4,5,80.0,4,0,16,117,78,6,0,2,2,0,23,13,56.5
3,Bukayo Saka,7.0,eng ENG,RW,20-118,83,38,2,7,15,20,4,34,0,2,0.0,0,0,28,164,112,4,1,1,3,4,34,29,85.3
4,Mohamed Elneny,25.0,eg EGY,CM,29-174,7,7,0,0,3,4,0,7,0,0,,0,0,4,11,7,1,0,0,1,0,3,3,100.0
5,Martin Ødegaard,8.0,no NOR,AM,23-015,62,20,0,2,10,9,3,18,0,0,,0,0,14,34,21,0,0,0,0,2,14,12,85.7
6,Rob Holding,16.0,eng ENG,CB,26-103,28,10,2,6,2,2,1,10,0,0,,0,0,2,8,0,0,0,0,0,0,4,3,75.0
7,Granit Xhaka,34.0,ch SUI,"DM,CB",29-096,90,31,4,9,14,10,1,31,0,0,,0,0,19,102,49,3,2,1,1,0,20,19,95.0
8,Thomas Partey,5.0,gh GHA,DM,28-202,90,38,3,8,23,10,1,38,6,6,100.0,6,1,32,258,143,4,1,1,0,0,20,19,95.0
9,Kieran Tierney,3.0,sct SCO,"LB,WB",24-210,90,28,2,7,8,15,0,22,0,1,0.0,0,0,13,83,72,4,1,0,0,0,14,14,100.0


In [None]:
                for i in URL_list:
                    #print(i)
                    data_i = parse_data_csv(i, teamname, id_data_default)
                    profile = pd.concat([profile, data_i], axis=1)
                    
                profile = profile.loc[:,~profile.columns.duplicated()]