In [5]:
from itertools import chain, combinations
import numpy as np  # Fundamental package for scientific computing with Python
import pandas as pd
import calendar
import time
from sklearn.metrics import accuracy_score, precision_score, f1_score

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import sys
import os
from sklearn.tree import DecisionTreeClassifier
# Trova il percorso assoluto della cartella "esotic" (dove si trova il tuo ipynb)
current_dir = os.path.dirname(os.path.abspath("__file__"))

# Trova il percorso assoluto della cartella "lib"
lib_dir = os.path.join(current_dir, "..", "lib")

# Aggiungi il percorso di "lib" al PYTHONPATH
sys.path.append(lib_dir)

from Utility import *

In [6]:
dataframe = pd.DataFrame(columns=['DataSetName', 'TrainAccuracy', 'TrainPrecision', 'TrainF2', 'TrainRecall',
                                  'TrainROC', 'TestAccuracy', 'TestPrecision', 'TestF2', 'TestRecall', 'TestROC', "UnderTestAccuracy"
"OverTestAccuracy", 'BestParams'])

features = [
    'EloDiff', 
    "PointsDifference",
    # "HomeGoalsRatio",
    # "AwayGoalsRatio",
    # "GoalsDifference",
    # "ConcededGoalsDifference",
    'GoalsAvgSum',
    'RecentFormRatio',
    # "EloRatio",
    # "HomeLast3Points",
    # "HomeAvgGoalsScored",
    # "HomeAvgGoalsConceded",
    # "HomeEwmaPoints",
    # "HomeEwmaGoalsScored",
    # "HomeEwmaGoalsConceded",
    # "AwayLast3Points",
    # "AwayAvgGoalsScored",
    # "AwayAvgGoalsConceded",
    # "AwayEwmaPoints",
    # "AwayEwmaGoalsScored",
    # "AwayEwmaGoalsConceded",
    # "HomeWins",
    # "HomeDraws",
    # "HomeLosses",
    # "AwayWins",
    # "AwayDraws",
    # "AwayLosses",
    "UltimoScontroDiretto",
    "Last3PointsDifference",
    "GoalRatioDifference",
    "EwmaGoalsSum",
    "GoalsSum",
    # "HomePointGap",
    # "AwayPointGap",
    # "HomeGoalGap",
    # "AwayGoalGap",
    "feat1",
    "feat2",
    'FormDiff',
    'RecentFormDiff', 
    'GoalCumulativeSum',
    # 'GoalCumulativeSumPrev'
]
# features = ['AwayGoalsRatio', 'GoalsDifference', 'HomeEwmaGoalsScored']
combinazioni = chain.from_iterable(combinations(
    features, r) for r in range(1, 6))#len(features) + 1))

combos = []

## Combo

In [7]:
for idx, combinazione in enumerate(combinazioni):
    current_GMT = time.gmtime()
    time_stamp = calendar.timegm(current_GMT)

    print(idx, time.strftime("%H:%M:%S", time.localtime()), ':', combinazione)
    combos.append(combinazione)

print('Length of combos',combos.__len__())

0 18:20:59 : ('EloDiff',)
1 18:20:59 : ('PointsDifference',)
2 18:20:59 : ('GoalsAvgSum',)
3 18:20:59 : ('RecentFormRatio',)
4 18:20:59 : ('UltimoScontroDiretto',)
5 18:20:59 : ('Last3PointsDifference',)
6 18:20:59 : ('GoalRatioDifference',)
7 18:20:59 : ('EwmaGoalsSum',)
8 18:20:59 : ('GoalsSum',)
9 18:20:59 : ('feat1',)
10 18:20:59 : ('feat2',)
11 18:20:59 : ('FormDiff',)
12 18:20:59 : ('RecentFormDiff',)
13 18:20:59 : ('GoalCumulativeSum',)
14 18:20:59 : ('EloDiff', 'PointsDifference')
15 18:20:59 : ('EloDiff', 'GoalsAvgSum')
16 18:20:59 : ('EloDiff', 'RecentFormRatio')
17 18:20:59 : ('EloDiff', 'UltimoScontroDiretto')
18 18:20:59 : ('EloDiff', 'Last3PointsDifference')
19 18:20:59 : ('EloDiff', 'GoalRatioDifference')
20 18:20:59 : ('EloDiff', 'EwmaGoalsSum')
21 18:20:59 : ('EloDiff', 'GoalsSum')
22 18:20:59 : ('EloDiff', 'feat1')
23 18:20:59 : ('EloDiff', 'feat2')
24 18:20:59 : ('EloDiff', 'FormDiff')
25 18:20:59 : ('EloDiff', 'RecentFormDiff')
26 18:20:59 : ('EloDiff', 'GoalCumulat

In [8]:
for idx, combo in enumerate(combos):
    current_GMT = time.gmtime()
    time_stamp = calendar.timegm(current_GMT)

    feat_cols = [col for col in combo]

    data = getFullData().copy()
    data = data.dropna(subset=feat_cols)
    # dataTrain = getTrainOver1Data().copy()
    # dataTest = getTestOver1Data().copy()
    # dataTrain = dataTrain.dropna(subset=feat_cols)
    # dataTest = dataTest.dropna(subset=feat_cols)
    # print(len(dataTrain), len(dataTest))


    # x_train = dataTrain[feat_cols]
    # y_train = dataTrain['isOver']
    # x_test = dataTest[feat_cols]
    # y_test = dataTest['isOver']
    x_train, x_test, y_train, y_test = train_test_split(
        data[feat_cols], data['isOver'], test_size=0.3, random_state=42, shuffle=True)

    x_train = x_train.sort_index()
    x_test = x_test.sort_index()
    y_train = y_train.sort_index()
    y_test = y_test.sort_index()

    # Crea e addestra il modello di albero di decisione per la regressione
    decTreeModel = DecisionTreeClassifier(random_state=42, min_samples_split=80
                                 # , class_weight={1: 1.2, 0: 1}
                                 # , class_weight='balanced'
                                 , max_depth=3  # , ccp_alpha= 0.00001
                                 # , max_features= 3
                                 ).fit(x_train, y_train)
    
    # Crea e addestra il modello di LogRegression
    logRegModel = trainLogRegModel(x_train, y_train)

    # Crea e addestra il modello di Random Forest
    rfModel = RandomForestClassifier(random_state=42, n_estimators=200, 
                                min_samples_split=80, max_depth=3).fit(x_train, y_train) 

    # df_test = dataTest[dataTest.index.isin(x_test.index)].copy()
    # df_train = dataTrain[dataTrain.index.isin(x_train.index)].copy()
    df_test = data[data.index.isin(x_test.index)].copy()
    df_train = data[data.index.isin(x_train.index)].copy()
    # print(len(df_test), len(df_train))
    df_train['LogRegPred'] = logRegModel.predict(x_train)
    df_train['DecTreePred'] = decTreeModel.predict(x_train)
    df_train['RFPred'] = rfModel.predict(x_train)

    df_test['LogRegPred'] = logRegModel.predict(x_test)
    df_test['DecTreePred'] = decTreeModel.predict(x_test)
    df_test['RFPred'] = rfModel.predict(x_test)
    
    # Ciclo sui modelli
    models = ['LogReg', 'DecTree', 'RF']
    rows = []
    classAccuracyDf = pd.DataFrame(
        columns=['Modello', 'Accuratezza_Under', 'Accuratezza_Over'])
    for model in models:
        # Metrics
        train_accuracy = round(
            100*accuracy_score(y_train, df_train[model+'Pred']), 2)
        test_accuracy = round(
            100*accuracy_score(y_test, df_test[model+'Pred']), 2)
        
        # test_precision = precision_score(y_test, y_test_pred)
        test_f1 = f1_score(y_test, df_test[model+'Pred'])
        print(idx, time.strftime("%H:%M:%S", time.localtime()),test_accuracy, 'Trying with following combo', combo)
        
        # Save Results
        new_row = pd.Series({
            'Model': model,
            'Combo': str(combo),
            'TrainAccuracy': train_accuracy,
            'TestAccuracy': test_accuracy,
            'TestF1': test_f1,
            # "UnderTestAccuracy": class_accuracy(y_test, df_test[model+'Pred'])['Under'],
            # "OverTestAccuracy": class_accuracy(y_test, df_test[model+'Pred'])['Over']
            # 'BestParams': model.get_params,

        })
        # print(new_row)
        dataframe = pd.concat(
            [dataframe, new_row.to_frame().T], ignore_index=True)

dataframe.to_excel("../Dataframe/"+str(time_stamp) + "all.xlsx")

# #  print(result)

0 18:21:02 51.79 Trying with following combo ('EloDiff',)
0 18:21:02 52.34 Trying with following combo ('EloDiff',)
0 18:21:02 52.14 Trying with following combo ('EloDiff',)
1 18:21:02 51.75 Trying with following combo ('PointsDifference',)
1 18:21:02 52.1 Trying with following combo ('PointsDifference',)
1 18:21:02 52.1 Trying with following combo ('PointsDifference',)
2 18:21:03 53.74 Trying with following combo ('GoalsAvgSum',)
2 18:21:03 54.06 Trying with following combo ('GoalsAvgSum',)
2 18:21:03 54.06 Trying with following combo ('GoalsAvgSum',)
3 18:21:04 49.07 Trying with following combo ('RecentFormRatio',)
3 18:21:04 49.69 Trying with following combo ('RecentFormRatio',)
3 18:21:04 50.6 Trying with following combo ('RecentFormRatio',)
4 18:21:05 50.22 Trying with following combo ('UltimoScontroDiretto',)
4 18:21:05 51.27 Trying with following combo ('UltimoScontroDiretto',)
4 18:21:05 50.54 Trying with following combo ('UltimoScontroDiretto',)
5 18:21:05 50.83 Trying with fo

In [9]:
# dataframe.to_excel("../Dataframe/"+str(time_stamp) + "all.xlsx")