### Libraries

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
import sys
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


In [2]:
# Trova il percorso assoluto della cartella "esotic" (dove si trova il tuo ipynb)
current_dir = os.path.dirname(os.path.abspath("__file__"))

# Trova il percorso assoluto della cartella "lib"
lib_dir = os.path.join(current_dir, "..", "lib")

# Aggiungi il percorso di "lib" al PYTHONPATH
sys.path.append(lib_dir)

from Utility import *

## <b>Step 1</b> Away To Win Prediction

### Constants

In [3]:
features = ['GoalRatioDifference', 'RecentFormDiff']
classWeight={1: 3, 0: 1}

### Data

In [4]:
data = getFullData().copy()

data = data.dropna(subset=features)
print(len(data))
data['isATW'] = np.where(((data['FTR'] == 'A')), 1, 0)
print('isATW:', round(100*data[data['isATW']==1]['isATW'].count() / len(data['isATW'])), '%')

x_train, x_test, y_train, y_test = train_test_split(
    data[features], data['isATW'], test_size=0.3, random_state=42, shuffle=True)#, stratify=data['isATS'])

x_train = x_train.sort_index()
x_test = x_test.sort_index()
y_train = y_train.sort_index()
y_test = y_test.sort_index()

  data['UltimoScontroDiretto'] = data['UltimoScontroDiretto'].replace(


38192
isATW: 29 %


### Model

In [5]:
# logRegModel = trainLogRegModel(x_train, y_train, class_weight=classWeight)

# dumpModel("Step1-ATW-LogReg.joblib", logRegModel)
logRegModel = loadModel('Step1-ATW-LogReg.joblib')

## Metrics

In [6]:
df_train = data[data.index.isin(x_train.index)].copy()
df_test = data[data.index.isin(x_test.index)].copy()
print('Train Length:', len(df_train))
print('Test Length:',len(df_test))

df_train['LogRegPred'] = logRegModel.predict(x_train)

df_test['LogRegPred'] = logRegModel.predict(x_test)

print(
    f"Test LogReg Accuracy score: {round(100*accuracy_score(y_test, df_test['LogRegPred']),2)}%")

models = ['LogReg']
rows = []
classAccuracyDf = pd.DataFrame(
    columns=['Modello', 'Accuratezza_Under', 'Accuratezza_Over'])
for model in models:
    row = {
        "Modello": model,
        "Accuratezza_Over": class_accuracy(y_test, df_test[model+'Pred'])['Over'],
        "Accuratezza_Under": class_accuracy(y_test, df_test[model+'Pred'])['Under']
    }
    rows.append(row)

classAccuracyDf = pd.DataFrame(rows)
classAccuracyDf.set_index('Modello', inplace=True)

print('')
print(classAccuracyDf)

# Ottieni le probabilità predette per ogni classe
probabilita = logRegModel.predict_proba(x_test)

prediction = 'LogRegPred'

df_test['QuotaATW'] = df_test['B365A'].fillna(df_test['AvgCA'].fillna(df_test['AvgA']))
df_test['QuotaH'] = df_test['B365H'].fillna(df_test['AvgCH'].fillna(df_test['AvgH']))
df_test['QuotaX'] = df_test['B365D'].fillna(df_test['AvgCD'].fillna(df_test['AvgD']))
df_test['Quota1x'] = 1/(1/df_test['QuotaH'] + 1/df_test['QuotaX'])

df_test['GainATW'] = df_test.apply(calculate_gain_ATW, axis=1, args=(prediction,) )


df_test['EquityATW'] = df_test['GainATW'].cumsum()
df_test['NumOfBet'] =  df_test['GainATW'][df_test['GainATW'] != 0].count()
df_test['NumOfRow'] =  df_test['GainATW'].count()

print('EquityATW:', df_test['EquityATW'].iat[-1])
print('Bet number:', df_test['NumOfBet'].iat[-1], 'on', df_test['NumOfRow'].iat[-1])

Train Length: 26734
Test Length: 11458
Test LogReg Accuracy score: 52.44%

         Accuratezza_Over  Accuratezza_Under
Modello                                     
LogReg           0.361789           0.818784
EquityATW: -441.3999999999987
Bet number: 7180 on 11458


In [7]:
# Unisci i due DataFrame verticalmente
complete_df = pd.concat([df_train, df_test])
complete_df['ATW_LogRegPred'] = complete_df['LogRegPred']
# exportExcelWithTimeStamp(complete_df, "../Dataframe/", "df.xlsx")

## <b>Step 2</b> Away To Score Prediction

In [8]:
featuresATS = ['ATW_LogRegPred']
classWeight={1: 1, 0: 2}

In [9]:
dataATS = complete_df.dropna(subset=features)
print(len(dataATS))
dataATS['isATS'] = np.where(((dataATS['FTAG'] > 0)), 1, 0)
print('isATS:', round(100*dataATS[dataATS['isATS']==1]['isATS'].count() / len(dataATS['isATS'])), '%')

38192
isATS: 67 %


In [10]:
complete_df['LogRegPred'] = complete_df['ATW_LogRegPred']
complete_df['QuotaA'] = complete_df['B365A'].fillna(complete_df['AvgCA'].fillna(complete_df['AvgA']))
complete_df['PA'] =  1/complete_df['QuotaA']
complete_df['PATS'] = complete_df['PA']**(1/4.8)#+0.003/complete_df['PA']
complete_df['PANTS'] = 1-complete_df['PATS']
complete_df['QuotaATS'] = 1/complete_df['PATS']
complete_df['QuotaANTS'] = 1/complete_df['PANTS']
complete_df['QuotaANTS'] = complete_df['QuotaANTS']*0.8

complete_df['GainATS'] = complete_df.apply(calculate_gain_ATS, axis=1, args=(prediction,) )
complete_df['GainANTS'] = complete_df.apply(calculate_gain_ANTS, axis=1, args=(prediction,) )
complete_df['GainReverseATS'] = complete_df.apply(calculate_gain_ReverseATS, axis=1, args=(prediction,) )

# complete_df['Gain'] = complete_df['GainOver'].loc(-1)+ complete_df_under['GainUnder'].loc(-1) 
# complete_df['Equity'] = complete_df['Gain'].cumsum()
complete_df['EquityATS'] = complete_df['GainATS'].cumsum()
complete_df['EquityANTS'] = complete_df['GainANTS'].cumsum()
complete_df['EquityRevATS'] = complete_df['GainReverseATS'].cumsum()

# print(complete_df['Equity'].tail(1))
print('Equity ATS', complete_df['EquityATS'].tail(1))
print('Equity ANTS', complete_df['EquityANTS'].tail(1))
print('Equity Reverse ATS', complete_df['EquityRevATS'].tail(1))

Equity ATS 51638   -179.087633
Name: EquityATS, dtype: float64
Equity ANTS 51638    1324.548366
Name: EquityANTS, dtype: float64
Equity Reverse ATS 51638    5158.555224
Name: EquityRevATS, dtype: float64
