In [1]:
# from ipywidgets import Dropdown, VBox
from collections import OrderedDict
import numpy as np
import os
import pandas as pd
from statsmodels.api import OLS

os.chdir("C:/Users/zuk-8/Documents_AZ/Projects/Fanta")

## Global variables

In [2]:
VOTO_TYPE = 'Voto_Italia'

In [3]:
df = pd.read_csv('Output_FC/output_fc.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
GAMES_PS = df[['Week', 'Season']].drop_duplicates().groupby('Season').count()
GAMES_PS

Unnamed: 0_level_0,Week
Season,Unnamed: 1_level_1
2015,38
2016,38
2017,38
2018,38
2019,38
2020,2


## Penalties

In [5]:
penalties = pd.concat([df.loc[df['Rf']>0], df.loc[df['Rs']>0]])[['Season', 'Rf', 'Rs']].groupby('Season').sum()
penalties['Penalties'] = penalties['Rf'] + penalties['Rs']
penalties['Score_Ratio'] = (penalties['Rf'] / penalties['Penalties']) * 100
penalties['Points'] = 3*penalties['Rf'] -3*penalties['Rs']
penalties.astype(int)

Unnamed: 0_level_0,Rf,Rs,Penalties,Score_Ratio,Points
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015,90,30,120,75,180
2016,97,39,136,71,174
2017,90,39,129,69,153
2018,98,26,124,79,216
2019,148,35,183,80,339
2020,5,0,5,100,15


## Grades

In [6]:
def get_bonus(row, penalty=True):
    goals = 3*row['Gf'] - row['Gs'] - 2*row['Au']
    penalties = 3*row['Rp'] - 3*row['Rs'] + 3*row['Rf']
    assists = row['Ass'] + row['Asf']
    malus = - 0.5*row['Amm'] - row['Esp']
    if penalty is True:
        return goals + penalties + assists + malus
    elif penalty is False:
        return goals + assists + malus
    else:
        raise ValueError("penalty must be either True or False")
        return None


def get_bonus_nop(row):
    return get_bonus(row, penalty=False)


In [7]:
# Add bonuses
df['Bonus'] = df.apply(get_bonus, axis=1)
df['Bonus_nop'] = df.apply(get_bonus_nop, axis=1)

In [8]:
# Stats per role
avg_grade_role = df.set_index('Ruolo')[[VOTO_TYPE,'Bonus_nop']].sum(1).reset_index().groupby('Ruolo').mean()[0].round(2)
std_grade_role = df.set_index('Ruolo')[[VOTO_TYPE,'Bonus_nop']].sum(1).reset_index().groupby('Ruolo').std()[0].round(2)
# avg_grade_role = df.groupby(['Ruolo'])[['Voto_Italia','Bonus_nop']].mean().sum(1).round(2)
avg_grade_role_season = df.groupby(['Ruolo', 'Season'])[['Voto_Italia','Bonus_nop']].mean().sum(1).unstack().round(2)
REPLACEMENT_PER_ROLE = (avg_grade_role - 0.5 * std_grade_role).round(2)
print('Average grades per role')
print(avg_grade_role)
print('\nAverage grades per role for replacements')
print(REPLACEMENT_PER_ROLE)
print('Average grades per role by season')
print(avg_grade_role_season)

Average grades per role
Ruolo
A    6.66
C    6.17
D    5.89
P    4.76
Name: 0, dtype: float64

Average grades per role for replacements
Ruolo
A    5.63
C    5.49
D    5.38
P    3.97
Name: 0, dtype: float64
Average grades per role by season
Season  2015  2016  2017  2018  2019  2020
Ruolo                                     
A       6.60  6.78  6.53  6.61  6.75  6.90
C       6.16  6.20  6.21  6.12  6.17  6.26
D       5.85  5.86  5.89  5.94  5.91  5.96
P       4.88  4.65  4.81  4.82  4.65  4.35


In [9]:
def compute_score(raw_stats, expected_games=None, replacement_score=None):
    if expected_games is None:
        expected_games = sum([GAMES_PS.loc[season, 'Week'] for season in raw_stats['Season'].unique()])
    if replacement_score is None:
        role = raw_stats.groupby('Ruolo')['Week'].count().sort_values(ascending=False).index[0]
        replacement_score = REPLACEMENT_PER_ROLE[role]
    games_played = len(raw_stats.index)
    if games_played > expected_games:
        raise ValueError(f"games_played ({games_played}) > expected_games ({expected_games})")
    sum_grades = raw_stats[[VOTO_TYPE,'Bonus_nop']].sum(1)
    return ((sum_grades.sum() + (expected_games-games_played) * replacement_score) / expected_games).round(2)


def pred_score(stats, year=2019):
    if year-1 not in stats.columns:
        return np.NaN
    if year-2 not in stats.columns:
        return stats.loc['Score', year-1]
    pred_last2 = (2*stats.loc['Score', year-1] + stats.loc['Score', year-2])/3
    return pred_last2

## Player class

In [10]:
def search_id_from_name(name):
    pass


def get_player_id_from_name(name):
    ids = [int(x) for x in df[df['Nome']==name.upper()]['Fantacalcio_id'].unique()]
    if len(ids) == 1:
        return ids[0]
    elif len(ids) == 0:
        raise ValueError(f"Couldn't find any grades for player {name}")
        # search_id_from_name(name)
    elif len(ids) > 1:
        raise ValueError(f"Found more than 1 ID for player {name}: {ids}")
    else:
        raise ValueError("Unclear error")


def get_season_week_str(row):
    season = row['Season']
    week = row['Week']
    return f"{str(int(season))}-{str(int(season)-1999)} week {week:02d}"


def get_first_last_team(team):
    team = team.sort_values(by=['Season', 'Week'])
    team_name = team.iloc[0]['Team']
    first = get_season_week_str(team.iloc[0])
    for row in team.index:
        if team.loc[row]['Team'] == team_name:
            last = get_season_week_str(team.iloc[0])
            team = team.drop(index=row, axis=1)
        else:
            return f"{team_name} from {first} to {last}", team
    return f"{team_name} from {first} to {last}", None


def get_teams_from_fcid(fc_id):
    team = df.loc[df['Fantacalcio_id']==fc_id, ['Team', 'Week', 'Season']]
#     team.groupby(['Season', 'Team'])['Week'].agg(['min', 'max'])
    teams = list()
    while team is not None:
        first_last, team = get_first_last_team(team)
        teams.append(first_last)
    return teams


def get_roles_from_fcid(fc_id):
    role = df.loc[df['Fantacalcio_id']==fc_id, ['Ruolo', 'Week', 'Season']]
    return role.groupby('Season').first()['Ruolo']


def get_raw_stats_from_fcid(fc_id):
    stats = df.loc[df['Fantacalcio_id']==fc_id].copy()
    return stats


def player_stats(raw_stats):
    stats = OrderedDict([])
    stats['Game'] = len(raw_stats.index)
    stats['Score'] = compute_score(raw_stats)
    for col in raw_stats.columns:
        if any([col.startswith('Voto_'), col.startswith('Bonus')]):
            stats[col] = raw_stats[col].mean()
    stats['Bonus'] = raw_stats['Bonus'].mean()
    stats['Bonus_nop'] = raw_stats['Bonus_nop'].mean()
    if any(raw_stats['Ruolo']=='P'):
        stats['Goal_Conceded'] = raw_stats['Gs'].sum()
        stats['Penalty_Saved'] = raw_stats['Rp'].sum()
    elif all(raw_stats['Ruolo']!='P'):
        stats['Goal_Scored'] = raw_stats['Gf'].sum()
        stats['Penalty_Scored'] = raw_stats['Rf'].sum()
        stats['Penalty_Missed'] = raw_stats['Rs'].sum()
        stats['Assist'] = raw_stats['Ass'].sum() + raw_stats['Asf'].sum()
    else:
        raise ValueError("Role is unclear: 'P' for some but not all games")
    stats['Own_Goal'] = raw_stats['Au'].sum()
    stats['Red_Card'] = raw_stats['Esp'].sum()
    stats['Yellow_Card'] = raw_stats['Amm'].sum()
    return stats


def analyse_stats(raw_stats):
    # Total
    stats = player_stats(raw_stats)
    all_stats = pd.Series(stats).to_frame('Total')
    all_stats['Avg'] = all_stats['Total'].iloc[7:] / stats['Game']
    # Season
    seasons = raw_stats['Season'].unique()[::-1]
    stats_py = OrderedDict([(season, player_stats(raw_stats.loc[df['Season']==season])) for season in seasons])
    all_stats = all_stats.join(pd.DataFrame(stats_py), how='right')
    # Team
    teams = raw_stats['Team'].iloc[::-1].unique()
    stats_team = OrderedDict([(team, player_stats(raw_stats.loc[df['Team']==team])) for team in teams])
    all_stats = all_stats.join(pd.DataFrame(stats_team))
    # Role
    ruolo = raw_stats.groupby('Season').first()[['Ruolo']].T
    # Concat
    all_stats = pd.concat([all_stats.round(2), ruolo])
    all_stats = all_stats[['Total'] + [t for t in teams] + [s for s in seasons] + ['Avg']]
    return all_stats


name = 'skriniar'

fc_id = get_player_id_from_name(name)
teams = get_teams_from_fcid(fc_id)
roles = get_roles_from_fcid(fc_id)
raw_stats = get_raw_stats_from_fcid(fc_id)
stats = analyse_stats(raw_stats)
print("Teams:\n"+"\n".join(teams))
print("\nStats")
stats.fillna('-')

Teams:
SAMPDORIA from 2015-16 week 35 to 2016-17 week 38
INTER from 2017-18 week 01 to 2020-21 week 01

Stats


Unnamed: 0,Total,INTER,SAMPDORIA,2020,2019,2018,2017,2016,2015,Avg
Game,144,106,38,1,32,35,38,35,3,-
Score,5.86,6.07,5.53,5.69,5.78,5.92,6.53,5.71,5.35,-
Voto_Fantacalcio,6.05,6.17,5.7,6,6.14,6.1,6.28,5.7,5.67,-
Voto_Italia,6.06,6.15,5.8,6,6.06,6.09,6.29,5.83,5.5,-
Voto_Statistico,6.18,6.18,-,6,6.11,6.16,6.28,-,-,-
Bonus,-0.04,-0.01,-0.12,0,-0.2,-0.11,0.24,-0.09,-0.5,-
Bonus_nop,-0.04,-0.01,-0.12,0,-0.2,-0.11,0.24,-0.09,-0.5,-
Goal_Scored,4,4,0,0,0,0,4,0,0,0.03
Penalty_Scored,0,0,0,0,0,0,0,0,0,0
Penalty_Missed,0,0,0,0,0,0,0,0,0,0


## Predictions

In [11]:
def pred_score_from_id(fc_id, year=2019):
    raw_stats = get_raw_stats_from_fcid(fc_id)
    stats = analyse_stats(raw_stats)
    if year-1 in stats.columns:
        stats_ly = stats.loc['Score', year-1]
    else:
        stats_ly = np.NaN
    return {'Pred': pred_score(stats, year), str(year): stats.loc['Score', year], str(year-1): stats_ly}


# team_last_season = [team for team in raw_stats[raw_stats['Season']==2018, 'Team']][-1]
# team_next_season = [team for team in raw_stats[raw_stats['Season']==2019].loc[raw_stats['Week']<15, 'Team']][-1]

year = 2019
matches_y = df[df['Season']==year].groupby('Nome').count().mean(1)
players_y = df[df['Season']==year][['Fantacalcio_id','Nome', 'Ruolo']].drop_duplicates().set_index('Nome')
players_y = players_y.loc[matches_y[matches_y>10].index]
predictions_y = OrderedDict([(name, pred_score_from_id(int(fc_id))) for name, fc_id in players_y['Fantacalcio_id'].items()])
df_y = pd.DataFrame(predictions_y).round(2).T.dropna()
players_y = players_y.loc[df_y.index] # exclude nas

In [12]:
OLS(df_y[str(year)], exog=df_y['Pred'], hasconst=False).fit().resid.std(), OLS(df_y[str(year)], exog=df_y[str(year-1)], hasconst=False).fit().resid.std(), df_y[str(year)].std()

(0.3923354403672717, 0.42058415809818933, 0.6237575015591074)

In [13]:
role = 'P'
players_y_role = players_y.loc[players_y['Ruolo']==role].index
df_y_r = df_y.loc[players_y_role].dropna().sort_values(str(year-1), ascending=False)
OLS(df_y_r[str(year)], exog=df_y_r['Pred'], hasconst=False).fit().resid.std(), OLS(df_y_r[str(year)], exog=df_y_r[str(year-1)], hasconst=False).fit().resid.std(), df_y_r[str(year)].std()

(0.31961371456528515, 0.3177466815359551, 0.3334508564754491)

In [14]:
role = 'D'
players_y_role = players_y.loc[players_y['Ruolo']==role].index
df_y_r = df_y.loc[players_y_role].dropna().sort_values(str(year-1), ascending=False)
OLS(df_y_r[str(year)], exog=df_y_r['Pred'], hasconst=False).fit().resid.std(), OLS(df_y_r[str(year)], exog=df_y_r[str(year-1)], hasconst=False).fit().resid.std(), df_y_r[str(year)].std()

(0.2651740976407376, 0.2807178430912763, 0.2848928580959155)

In [15]:
role = 'C'
players_y_role = players_y.loc[players_y['Ruolo']==role].index
df_y_r = df_y.loc[players_y_role].dropna().sort_values(str(year-1), ascending=False)
OLS(df_y_r[str(year)], exog=df_y_r['Pred'], hasconst=False).fit().resid.std(), OLS(df_y_r[str(year)], exog=df_y_r[str(year-1)], hasconst=False).fit().resid.std(), df_y_r[str(year)].std()

(0.3501472295922671, 0.365048994296514, 0.39158866949082055)

In [16]:
role = 'A'
players_y_role = players_y.loc[players_y['Ruolo']==role].index
df_y_r = df_y.loc[players_y_role].dropna().sort_values(str(year-1), ascending=False)
OLS(df_y_r[str(year)], exog=df_y_r['Pred'], hasconst=False).fit().resid.std(), OLS(df_y_r[str(year)], exog=df_y_r[str(year-1)], hasconst=False).fit().resid.std(), df_y_r[str(year)].std()

(0.6047457088885165, 0.6608494936725317, 0.7004591868541948)