In [1]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
import pandasql as pql
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

## Подгрузим датасеты, сделаем предобработку:
Нам не важно первый игрок или второй играют хорошо, поэтому сделаем аггрегацию по игрокам. Возьмем их средние характеристики, при этом, чтобы не потерять всю информацию, возьмем так же стандартное отклонение среди игроков

In [2]:
df_train = pd.read_csv('train.csv')

In [3]:
df_train

Unnamed: 0,map_id,team1_id,team2_id,map_name,who_win
0,289,6665,7718,Ancient,0
1,715,4411,10577,Inferno,0
2,157,11251,9455,Nuke,1
3,524,4608,7532,Mirage,0
4,404,8637,6667,Overpass,1
...,...,...,...,...,...
708,709,6667,4773,Inferno,0
709,528,9215,5995,Ancient,1
710,163,4869,9565,Mirage,1
711,96,10426,4991,Nuke,1


In [4]:
df_test = pd.read_csv('test.csv')

In [5]:
len(df_test)

30

### Проверим, что в тесте играют те же команды, иначе возникнут трудности при one hot encoding

In [6]:
len(set(list(df_train.team1_id.values)+list(df_train.team2_id.values)+list(df_test.team1_id.values)+list(df_test.team2_id.values)))

61

#### Все ок, новых команд нет


In [7]:
df = pd.concat([df_train, df_test],axis=0)

In [8]:
df_train = df

In [9]:
df_train = df_train.drop(columns=['index'])

In [10]:
players_feats = pd.read_csv('players_feats.csv')

In [11]:
players_feats

Unnamed: 0,p1_id,p1_total_kills,p1_headshots,p1_total_deaths,p1_kd_ratio,p1_damage_per_round,p1_grenade_damage_per_round,p1_maps_played,p1_rounds_played,p1_kills_per_round,...,p5_kill_death_difference,p5_total_opening_kills,p5_total_opening_deaths,p5_opening_kill_ratio,p5_opening_kill_rating,p5_team_win_percent_after_first_kill,p5_first_kill_in_won_rounds,team_id,map_name,map_id
0,4954,90,42.2,112,0.80,76.3,5.9,6,156,0.58,...,5,25,12,2.08,1.28,84.0,25.0,6665,Ancient,635
1,5794,45,60.0,57,0.79,82.3,10.9,3,68,0.66,...,96,54,34,1.59,1.17,70.4,16.7,7532,Ancient,635
2,4954,156,51.9,167,0.93,63.5,3.4,10,265,0.59,...,22,26,19,1.37,1.10,88.5,20.5,6665,Dust2,583
3,5794,449,53.5,427,1.05,86.7,13.1,23,618,0.73,...,104,62,49,1.27,1.10,79.0,17.4,7532,Dust2,583
4,7998,173,32.9,130,1.33,82.4,2.9,9,225,0.77,...,19,27,25,1.08,1.08,81.5,16.2,4608,Dust2,439
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1481,9031,69,49.3,70,0.99,69.8,5.7,4,114,0.61,...,-1,16,18,0.89,1.11,75.0,21.4,10503,Ancient,284
1482,7716,104,55.8,113,0.92,71.6,5.9,6,172,0.60,...,4,33,34,0.97,1.30,75.8,28.4,7020,Vertigo,27
1483,12521,141,37.6,159,0.89,79.7,7.4,8,210,0.67,...,5,9,15,0.60,0.77,55.6,4.3,8297,Vertigo,27
1484,7716,155,58.7,154,1.01,71.1,7.9,10,246,0.63,...,35,30,17,1.76,1.39,83.3,22.9,7020,Ancient,237


In [12]:
players_feats = players_feats.dropna()

In [13]:
len(players_feats)

1470

In [14]:
query = '''select * from 
players_feats, df_train
where (df_train.map_id = players_feats.map_id and df_train.team1_id = players_feats.team_id)
'''

In [15]:
totall_df = pql.sqldf(query)

In [16]:
list(totall_df.columns)

['p1_id',
 'p1_total_kills',
 'p1_headshots',
 'p1_total_deaths',
 'p1_kd_ratio',
 'p1_damage_per_round',
 'p1_grenade_damage_per_round',
 'p1_maps_played',
 'p1_rounds_played',
 'p1_kills_per_round',
 'p1_assists_per_round',
 'p1_deaths_per_round',
 'p1_saved_by_teammate_per_round',
 'p1_saved_teammates_per_round',
 'p1_rating',
 'p1_kill_death',
 'p1_kill_round',
 'p1_rounds_with_kills',
 'p1_kill_death_difference',
 'p1_total_opening_kills',
 'p1_total_opening_deaths',
 'p1_opening_kill_ratio',
 'p1_opening_kill_rating',
 'p1_team_win_percent_after_first_kill',
 'p1_first_kill_in_won_rounds',
 'p2_id',
 'p2_total_kills',
 'p2_headshots',
 'p2_total_deaths',
 'p2_kd_ratio',
 'p2_damage_per_round',
 'p2_grenade_damage_per_round',
 'p2_maps_played',
 'p2_rounds_played',
 'p2_kills_per_round',
 'p2_assists_per_round',
 'p2_deaths_per_round',
 'p2_saved_by_teammate_per_round',
 'p2_saved_teammates_per_round',
 'p2_rating',
 'p2_kill_death',
 'p2_kill_round',
 'p2_rounds_with_kills',
 'p2

In [17]:
list1 = [
 'p1_total_kills',
 'p1_headshots',
 'p1_total_deaths',
 'p1_kd_ratio',
 'p1_damage_per_round',
 'p1_grenade_damage_per_round',
 'p1_maps_played',
 'p1_rounds_played',
 'p1_kills_per_round',
 'p1_assists_per_round',
 'p1_deaths_per_round',
 'p1_saved_by_teammate_per_round',
 'p1_saved_teammates_per_round',
 'p1_rating',
 'p1_kill_death',
 'p1_kill_round',
 'p1_rounds_with_kills',
 'p1_kill_death_difference',
 'p1_total_opening_kills',
 'p1_total_opening_deaths',
 'p1_opening_kill_ratio',
 'p1_opening_kill_rating',
 'p1_team_win_percent_after_first_kill',
 'p1_first_kill_in_won_rounds']

In [18]:
def make_std_and_mean(df, list1, t='1'):
    '''Функция для рассчета средних и std'''
    for item in list1:
        suf = item[3:]
        list_columns = ['p{}_'.format(i)+suf for i in range(1, 6)]
        df['t{}_'.format(t)+suf+'_mean'] = np.mean(df[list_columns].to_numpy(), axis=1)
        df['t{}_'.format(t)+suf+'_std'] = np.std(df[list_columns].to_numpy(), axis=1)
        for i in range(1,6):
            df = df.drop(columns=['p{}_'.format(i)+suf])
    return df

In [19]:
totall_df2 = make_std_and_mean(totall_df, list1, t='1')

In [20]:
totall_df2 = totall_df2.drop(columns=['p1_id', 'p2_id', 'p3_id', 'p4_id', 'p5_id'])

In [21]:
query_3 = '''select * from 
totall_df2, players_feats
where (totall_df2.map_id = players_feats.map_id and totall_df2.team2_id = players_feats.team_id)
'''

In [22]:
totall_df_3 = pql.sqldf(query_3)

In [23]:
list(totall_df_3.columns)

['team_id',
 'map_name',
 'map_id',
 'team1_id',
 'team2_id',
 'who_win',
 't1_total_kills_mean',
 't1_total_kills_std',
 't1_headshots_mean',
 't1_headshots_std',
 't1_total_deaths_mean',
 't1_total_deaths_std',
 't1_kd_ratio_mean',
 't1_kd_ratio_std',
 't1_damage_per_round_mean',
 't1_damage_per_round_std',
 't1_grenade_damage_per_round_mean',
 't1_grenade_damage_per_round_std',
 't1_maps_played_mean',
 't1_maps_played_std',
 't1_rounds_played_mean',
 't1_rounds_played_std',
 't1_kills_per_round_mean',
 't1_kills_per_round_std',
 't1_assists_per_round_mean',
 't1_assists_per_round_std',
 't1_deaths_per_round_mean',
 't1_deaths_per_round_std',
 't1_saved_by_teammate_per_round_mean',
 't1_saved_by_teammate_per_round_std',
 't1_saved_teammates_per_round_mean',
 't1_saved_teammates_per_round_std',
 't1_rating_mean',
 't1_rating_std',
 't1_kill_death_mean',
 't1_kill_death_std',
 't1_kill_round_mean',
 't1_kill_round_std',
 't1_rounds_with_kills_mean',
 't1_rounds_with_kills_std',
 't1_ki

In [24]:
totall_df_3 = totall_df_3.drop(columns=['p1_id', 'p2_id', 'p3_id', 'p4_id', 'p5_id', 'team_id',
 'map_name'])
#  'map_id',
# 'team1_id',
#  'team2_id',])

In [25]:
list(totall_df_3.columns)

['map_id',
 'team1_id',
 'team2_id',
 'who_win',
 't1_total_kills_mean',
 't1_total_kills_std',
 't1_headshots_mean',
 't1_headshots_std',
 't1_total_deaths_mean',
 't1_total_deaths_std',
 't1_kd_ratio_mean',
 't1_kd_ratio_std',
 't1_damage_per_round_mean',
 't1_damage_per_round_std',
 't1_grenade_damage_per_round_mean',
 't1_grenade_damage_per_round_std',
 't1_maps_played_mean',
 't1_maps_played_std',
 't1_rounds_played_mean',
 't1_rounds_played_std',
 't1_kills_per_round_mean',
 't1_kills_per_round_std',
 't1_assists_per_round_mean',
 't1_assists_per_round_std',
 't1_deaths_per_round_mean',
 't1_deaths_per_round_std',
 't1_saved_by_teammate_per_round_mean',
 't1_saved_by_teammate_per_round_std',
 't1_saved_teammates_per_round_mean',
 't1_saved_teammates_per_round_std',
 't1_rating_mean',
 't1_rating_std',
 't1_kill_death_mean',
 't1_kill_death_std',
 't1_kill_round_mean',
 't1_kill_round_std',
 't1_rounds_with_kills_mean',
 't1_rounds_with_kills_std',
 't1_kill_death_difference_mean'

In [26]:
totall_df_3 = totall_df_3.iloc[:,:-1]

In [27]:
totall_df_4 = make_std_and_mean(totall_df_3, list1, t='2')
    

In [28]:
totall_df_4

Unnamed: 0,map_id,team1_id,team2_id,who_win,t1_total_kills_mean,t1_total_kills_std,t1_headshots_mean,t1_headshots_std,t1_total_deaths_mean,t1_total_deaths_std,...,t2_total_opening_deaths_mean,t2_total_opening_deaths_std,t2_opening_kill_ratio_mean,t2_opening_kill_ratio_std,t2_opening_kill_rating_mean,t2_opening_kill_rating_std,t2_team_win_percent_after_first_kill_mean,t2_team_win_percent_after_first_kill_std,t2_first_kill_in_won_rounds_mean,t2_first_kill_in_won_rounds_std
0,635,6665,7532,1.0,98.4,30.787010,44.96,10.351927,111.4,30.269457,...,13.8,10.514752,0.926,0.425986,0.934,0.128468,50.16,28.897515,14.34,8.668472
1,583,6665,7532,1.0,139.2,37.252651,41.70,7.852898,138.6,32.450578,...,59.2,8.885944,1.096,0.377815,1.036,0.157556,77.56,6.753547,16.14,5.309840
2,439,4608,9215,1.0,160.2,10.998182,49.44,11.350348,125.2,7.249828,...,2.0,1.897367,0.966,0.895893,0.642,0.538791,53.14,43.875260,10.40,9.329523
3,363,5995,4411,0.0,316.2,103.933440,46.80,10.648568,289.0,94.935768,...,28.8,8.840814,0.814,0.214252,0.940,0.149131,64.44,5.683872,13.12,4.507061
4,105,4608,6665,1.0,268.6,59.156065,46.94,10.264814,238.2,14.878172,...,29.6,13.001538,0.994,0.210580,1.010,0.154013,69.20,2.916162,15.02,6.225560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
722,357,4608,6667,,442.0,83.744851,47.08,9.787012,393.4,25.896718,...,57.0,20.069878,1.064,0.520715,0.968,0.111427,76.60,7.071916,13.94,3.283656
723,593,4608,6667,,213.6,31.283222,52.44,7.730873,194.4,13.001538,...,32.4,13.215143,1.488,0.349651,1.110,0.123612,78.58,3.946847,15.04,4.237263
724,284,6665,10503,1.0,86.2,30.353912,43.68,8.210091,92.8,30.694625,...,11.2,3.544009,1.040,0.498518,0.980,0.175271,81.90,10.629017,14.68,4.787233
725,27,7020,8297,0.0,114.6,13.078226,48.76,5.701438,110.6,10.965400,...,20.0,9.757049,1.076,0.295337,1.016,0.167404,70.62,10.617796,12.70,6.680120


In [29]:
totall_df_4

Unnamed: 0,map_id,team1_id,team2_id,who_win,t1_total_kills_mean,t1_total_kills_std,t1_headshots_mean,t1_headshots_std,t1_total_deaths_mean,t1_total_deaths_std,...,t2_total_opening_deaths_mean,t2_total_opening_deaths_std,t2_opening_kill_ratio_mean,t2_opening_kill_ratio_std,t2_opening_kill_rating_mean,t2_opening_kill_rating_std,t2_team_win_percent_after_first_kill_mean,t2_team_win_percent_after_first_kill_std,t2_first_kill_in_won_rounds_mean,t2_first_kill_in_won_rounds_std
0,635,6665,7532,1.0,98.4,30.787010,44.96,10.351927,111.4,30.269457,...,13.8,10.514752,0.926,0.425986,0.934,0.128468,50.16,28.897515,14.34,8.668472
1,583,6665,7532,1.0,139.2,37.252651,41.70,7.852898,138.6,32.450578,...,59.2,8.885944,1.096,0.377815,1.036,0.157556,77.56,6.753547,16.14,5.309840
2,439,4608,9215,1.0,160.2,10.998182,49.44,11.350348,125.2,7.249828,...,2.0,1.897367,0.966,0.895893,0.642,0.538791,53.14,43.875260,10.40,9.329523
3,363,5995,4411,0.0,316.2,103.933440,46.80,10.648568,289.0,94.935768,...,28.8,8.840814,0.814,0.214252,0.940,0.149131,64.44,5.683872,13.12,4.507061
4,105,4608,6665,1.0,268.6,59.156065,46.94,10.264814,238.2,14.878172,...,29.6,13.001538,0.994,0.210580,1.010,0.154013,69.20,2.916162,15.02,6.225560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
722,357,4608,6667,,442.0,83.744851,47.08,9.787012,393.4,25.896718,...,57.0,20.069878,1.064,0.520715,0.968,0.111427,76.60,7.071916,13.94,3.283656
723,593,4608,6667,,213.6,31.283222,52.44,7.730873,194.4,13.001538,...,32.4,13.215143,1.488,0.349651,1.110,0.123612,78.58,3.946847,15.04,4.237263
724,284,6665,10503,1.0,86.2,30.353912,43.68,8.210091,92.8,30.694625,...,11.2,3.544009,1.040,0.498518,0.980,0.175271,81.90,10.629017,14.68,4.787233
725,27,7020,8297,0.0,114.6,13.078226,48.76,5.701438,110.6,10.965400,...,20.0,9.757049,1.076,0.295337,1.016,0.167404,70.62,10.617796,12.70,6.680120


In [30]:
totall_df_5 = pd.get_dummies(totall_df_4, columns = ['map_id', 'team1_id', 'team2_id'], prefix=['map_id', 'team1_id', 'team2_id']) 

In [31]:
totall_df_5

Unnamed: 0,who_win,t1_total_kills_mean,t1_total_kills_std,t1_headshots_mean,t1_headshots_std,t1_total_deaths_mean,t1_total_deaths_std,t1_kd_ratio_mean,t1_kd_ratio_std,t1_damage_per_round_mean,...,team2_id_11135,team2_id_11251,team2_id_11309,team2_id_11312,team2_id_11501,team2_id_11518,team2_id_11585,team2_id_11588,team2_id_11595,team2_id_11654
0,1.0,98.4,30.787010,44.96,10.351927,111.4,30.269457,0.876,0.094149,71.54,...,0,0,0,0,0,0,0,0,0,0
1,1.0,139.2,37.252651,41.70,7.852898,138.6,32.450578,0.996,0.090686,70.62,...,0,0,0,0,0,0,0,0,0,0
2,1.0,160.2,10.998182,49.44,11.350348,125.2,7.249828,1.282,0.092390,76.76,...,0,0,0,0,0,0,0,0,0,0
3,0.0,316.2,103.933440,46.80,10.648568,289.0,94.935768,1.156,0.280614,78.66,...,0,0,0,0,0,0,0,0,0,0
4,1.0,268.6,59.156065,46.94,10.264814,238.2,14.878172,1.130,0.253850,74.70,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
722,,442.0,83.744851,47.08,9.787012,393.4,25.896718,1.124,0.206359,75.20,...,0,0,0,0,0,0,0,0,0,0
723,,213.6,31.283222,52.44,7.730873,194.4,13.001538,1.102,0.174287,75.80,...,0,0,0,0,0,0,0,0,0,0
724,1.0,86.2,30.353912,43.68,8.210091,92.8,30.694625,0.928,0.096623,74.04,...,0,0,0,0,0,0,0,0,0,0
725,0.0,114.6,13.078226,48.76,5.701438,110.6,10.965400,1.042,0.117541,73.74,...,0,0,0,0,0,0,0,0,0,0


In [32]:
Data_train = totall_df_5[totall_df_5.who_win.notna()]

In [33]:
Data_res = totall_df_5[totall_df_5.who_win.isna()]

In [34]:
Data_res = Data_res.iloc[:,1:]

In [35]:
Data_res

Unnamed: 0,t1_total_kills_mean,t1_total_kills_std,t1_headshots_mean,t1_headshots_std,t1_total_deaths_mean,t1_total_deaths_std,t1_kd_ratio_mean,t1_kd_ratio_std,t1_damage_per_round_mean,t1_damage_per_round_std,...,team2_id_11135,team2_id_11251,team2_id_11309,team2_id_11312,team2_id_11501,team2_id_11518,team2_id_11585,team2_id_11588,team2_id_11595,team2_id_11654
694,145.2,58.758489,41.54,11.878148,124.4,44.156993,1.16,0.124097,78.08,5.437426,...,0,0,0,0,0,0,0,0,0,0
695,249.4,68.610786,42.7,8.123054,223.4,42.683018,1.1,0.167809,78.36,7.285492,...,0,0,0,0,0,0,0,0,0,0
696,272.2,31.587339,42.66,7.83801,285.4,25.12051,0.964,0.155126,71.58,8.906941,...,0,0,0,0,0,0,0,0,0,0
697,285.8,21.027601,40.86,9.816843,283.6,19.815146,1.01,0.079498,71.74,5.77013,...,0,0,0,0,0,0,0,0,0,0
698,128.6,9.264988,45.24,10.479427,117.2,11.805084,1.106,0.123548,75.92,6.994398,...,0,0,0,0,0,0,0,0,0,0
699,261.2,62.792993,37.14,6.985871,241.4,54.529258,1.082,0.108333,73.54,4.910234,...,0,0,0,0,0,0,0,0,0,0
700,313.2,18.225257,43.36,8.201122,319.6,48.812294,1.0,0.133716,68.72,2.172004,...,0,0,0,0,0,0,0,0,0,0
701,300.4,47.407172,48.44,8.588737,292.8,40.434639,1.036,0.152132,72.78,4.990551,...,0,0,0,0,0,0,0,0,0,0
702,247.8,38.690567,50.78,7.039432,218.4,19.064102,1.132,0.120897,74.1,7.424823,...,0,0,0,0,0,0,0,0,0,0
703,397.2,79.393703,47.32,10.005678,350.2,24.514486,1.132,0.216832,75.12,10.017864,...,0,0,0,0,0,0,0,0,0,0


In [36]:
X = Data_train.iloc[:, 1:]

In [37]:
X

Unnamed: 0,t1_total_kills_mean,t1_total_kills_std,t1_headshots_mean,t1_headshots_std,t1_total_deaths_mean,t1_total_deaths_std,t1_kd_ratio_mean,t1_kd_ratio_std,t1_damage_per_round_mean,t1_damage_per_round_std,...,team2_id_11135,team2_id_11251,team2_id_11309,team2_id_11312,team2_id_11501,team2_id_11518,team2_id_11585,team2_id_11588,team2_id_11595,team2_id_11654
0,98.4,30.787010,44.96,10.351927,111.4,30.269457,0.876,0.094149,71.54,10.405114,...,0,0,0,0,0,0,0,0,0,0
1,139.2,37.252651,41.70,7.852898,138.6,32.450578,0.996,0.090686,70.62,8.839321,...,0,0,0,0,0,0,0,0,0,0
2,160.2,10.998182,49.44,11.350348,125.2,7.249828,1.282,0.092390,76.76,4.459865,...,0,0,0,0,0,0,0,0,0,0
3,316.2,103.933440,46.80,10.648568,289.0,94.935768,1.156,0.280614,78.66,10.803074,...,0,0,0,0,0,0,0,0,0,0
4,268.6,59.156065,46.94,10.264814,238.2,14.878172,1.130,0.253850,74.70,11.286275,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
692,203.4,31.270433,42.82,6.976933,204.4,13.836184,0.992,0.107592,74.86,7.487216,...,0,0,0,0,0,0,0,0,0,0
693,205.8,28.812497,46.18,10.662157,207.0,23.383755,0.994,0.067705,71.26,7.727768,...,0,0,0,0,0,0,0,0,0,0
724,86.2,30.353912,43.68,8.210091,92.8,30.694625,0.928,0.096623,74.04,10.292832,...,0,0,0,0,0,0,0,0,0,0
725,114.6,13.078226,48.76,5.701438,110.6,10.965400,1.042,0.117541,73.74,9.493493,...,0,0,0,0,0,0,0,0,0,0


In [38]:
Y = Data_train['who_win'].values

In [39]:
X.columns

Index(['t1_total_kills_mean', 't1_total_kills_std', 't1_headshots_mean',
       't1_headshots_std', 't1_total_deaths_mean', 't1_total_deaths_std',
       't1_kd_ratio_mean', 't1_kd_ratio_std', 't1_damage_per_round_mean',
       't1_damage_per_round_std',
       ...
       'team2_id_11135', 'team2_id_11251', 'team2_id_11309', 'team2_id_11312',
       'team2_id_11501', 'team2_id_11518', 'team2_id_11585', 'team2_id_11588',
       'team2_id_11595', 'team2_id_11654'],
      dtype='object', length=932)

In [40]:
list(X.columns)

['t1_total_kills_mean',
 't1_total_kills_std',
 't1_headshots_mean',
 't1_headshots_std',
 't1_total_deaths_mean',
 't1_total_deaths_std',
 't1_kd_ratio_mean',
 't1_kd_ratio_std',
 't1_damage_per_round_mean',
 't1_damage_per_round_std',
 't1_grenade_damage_per_round_mean',
 't1_grenade_damage_per_round_std',
 't1_maps_played_mean',
 't1_maps_played_std',
 't1_rounds_played_mean',
 't1_rounds_played_std',
 't1_kills_per_round_mean',
 't1_kills_per_round_std',
 't1_assists_per_round_mean',
 't1_assists_per_round_std',
 't1_deaths_per_round_mean',
 't1_deaths_per_round_std',
 't1_saved_by_teammate_per_round_mean',
 't1_saved_by_teammate_per_round_std',
 't1_saved_teammates_per_round_mean',
 't1_saved_teammates_per_round_std',
 't1_rating_mean',
 't1_rating_std',
 't1_kill_death_mean',
 't1_kill_death_std',
 't1_kill_round_mean',
 't1_kill_round_std',
 't1_rounds_with_kills_mean',
 't1_rounds_with_kills_std',
 't1_kill_death_difference_mean',
 't1_kill_death_difference_std',
 't1_total_ope

In [41]:
i = 0
for a in X.columns:
    i+=1
    if 'map_id' in a:
        print(i)
        break

97


In [42]:
X_tree_test = Data_res.iloc[:, 96:]

In [43]:
X_tree = X.iloc[:, 96:]

In [44]:
X_tree

Unnamed: 0,map_id_0,map_id_1,map_id_2,map_id_3,map_id_4,map_id_5,map_id_6,map_id_7,map_id_8,map_id_9,...,team2_id_11135,team2_id_11251,team2_id_11309,team2_id_11312,team2_id_11501,team2_id_11518,team2_id_11585,team2_id_11588,team2_id_11595,team2_id_11654
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
692,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
693,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
724,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
725,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [45]:
X_train, X_test, y_train, y_test = train_test_split(X_tree, Y, test_size=0.2, random_state=5)

In [46]:
from sklearn.model_selection import GridSearchCV

In [47]:
params={'n_estimators':[2, 3, 4, 5, 8, 10, 20, 30], 
    'max_depth':[2, 3, 4, 5, 6, 7, 8, 10]
}

In [48]:
model = XGBClassifier(use_label_encoder=False, verbose=True, random_state=2)
clf = GridSearchCV(model, params)
clf.fit(X_train, y_train)

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to X

GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,
                                     colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, gamma=None,
                                     gpu_id=None, importance_type='gain',
                                     interaction_constraints=None,
                                     learning_rate=None, max_delta_step=None,
                                     max_depth=None, min_child_weight=None,
                                     missing=nan, monotone_constraints=None,
                                     n_estimators=100, n_jobs=None,
                                     num_parallel_tree=None, random_state=2,
                                     reg_alpha=None, reg_lambda=None,
                                     scale_pos_weight=None, subsample=None,
                                     tree_method=None, use_label_encoder=Fal

In [49]:
clf.best_params_

{'max_depth': 4, 'n_estimators': 8}

In [50]:
model = XGBClassifier(n_estimators = 8, max_depth = 4,
                      use_label_encoder=False, random_state=2)

In [51]:
model.fit(X_train, y_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=4,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=8, n_jobs=8, num_parallel_tree=1, random_state=2,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', use_label_encoder=False,
              validate_parameters=1, verbosity=None)

In [52]:
preds_train = model.predict_proba(X_train)


In [53]:
roc_auc_score(y_train, preds_train[:, 1])


0.6694947014732489

In [54]:
preds_test = model.predict(X_test)


In [55]:
np.sum(preds_test == y_test)/len(y_test)


0.6142857142857143

In [56]:
pred_proba = model.predict_proba(X_test)


In [57]:
roc_auc_score(y_test, pred_proba[:, 1])

0.5861326442721791

In [58]:
X_lin_res = Data_res.iloc[:, :96]

In [59]:
X_lin = X.iloc[:, :96]

### Добавим предсказания деревьев как новый признак для линейных моделей (ранее проверялось, что линейные модели показывают лучшее качество чем деревья, однако с one_hot признаками деревья должны справляться лучше

In [60]:
X_lin['new_feature'] = model.predict_proba(X_tree)[:, 1]

In [61]:
X_lin_res['new_feature'] = model.predict_proba(X_tree_test)[:, 1]

In [62]:
X_train, X_test, y_train, y_test = train_test_split(X_lin, Y, test_size=0.33, random_state=5)

## Протестируем линейную модель, данных довольно мало, поэтому она может показать хороший результат

In [63]:
from sklearn.linear_model import LogisticRegression

In [64]:
model = LogisticRegression()

In [65]:
model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression()

In [66]:
preds_train = model.predict_proba(X_train)

In [67]:
roc_auc_score(y_train, preds_train[:, 1])

0.7131521157430475

In [68]:
preds_test = model.predict(X_test)

In [69]:
np.sum(preds_test == y_test)/len(y_test)

0.6147186147186147

In [70]:
pred_proba = model.predict_proba(X_test)

In [71]:
roc_auc_score(y_test, pred_proba[:, 1])

0.6525462962962962

In [72]:
pred_proba_res = model.predict_proba(X_lin_res)[:, 1]

In [73]:
Data_res['who_win'] = pred_proba_res

In [74]:
res_linear = Data_res.copy()

### Линейные модели в совокупности с деревьями дали ROC_AUC 0.65

## Попробуем добавить нелинейности с помощью нейросетей

In [75]:
X_train_t = X_train.to_numpy()
X_test_t = X_test.to_numpy()
y_train_t = y_train
y_test_t = y_test

In [76]:
X_res = X_lin_res.to_numpy()

In [77]:
X_res.shape

(30, 97)

In [78]:
X_train_t.shape

(466, 97)

In [79]:
X_train_t.shape

(466, 97)

In [80]:
import torch

In [81]:
X_train_t = torch.FloatTensor(X_train_t)
X_test_t = torch.FloatTensor(X_test_t)
y_train_t = torch.LongTensor(y_train_t)
y_test_t = torch.LongTensor(y_test_t)
X_res = torch.FloatTensor(X_res)

In [82]:
n = X_train_t.shape[1]

In [83]:
len(X_train_t)

466

In [84]:
n


97

In [85]:
import random

def init_random_seed(value=0):
    random.seed(value)
    np.random.seed(value)
    torch.manual_seed(value)
    torch.cuda.manual_seed(value)
    torch.backends.cudnn.deterministic = True

In [86]:
init_random_seed(value=500)

In [87]:
class Net(torch.nn.Module):
    def __init__(self, n_hidden_neurons):
        super(Net, self).__init__()
        
        self.fc1 = torch.nn.Linear(n, n_hidden_neurons)
        self.activ1 = torch.nn.Sigmoid()
        self.fc2 = torch.nn.Linear(n_hidden_neurons, 2)
        self.sm = torch.nn.Softmax(dim=1)
        
    def forward(self, x):
        
#         x = self.fc1(x)
#         x = self.activ1(x)
#         x = self.fc2(x)
        
        x = self.fc1(x)
        x = self.activ1(x)
        x = self.fc2(x)

        return x

    def inference(self, x):
        x = self.forward(x)
        x = self.sm(x)
        return x
    
net = Net(7)

In [88]:
loss = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(net.parameters(), 
                             lr=1.0e-4)

In [89]:
batch_size = 150

for epoch in range(2000):

    optimizer.zero_grad()

    x_batch = X_train_t
    y_batch = y_train_t

    preds = net.forward(x_batch) 

    loss_value = loss(preds, y_batch)
    loss_value.backward()

    optimizer.step()
        
    if epoch % 100 == 0:
        test_preds = net.forward(X_test_t)
        test_preds = test_preds.argmax(dim=1)
        print((test_preds == y_test_t).float().mean())
        print('train')
        train_preds = net.forward(X_train_t)
        train_preds = train_preds.argmax(dim=1)
        print((train_preds == y_train_t).float().mean())
        print()

tensor(0.4805)
train
tensor(0.4657)

tensor(0.5844)
train
tensor(0.5386)

tensor(0.6061)
train
tensor(0.5622)

tensor(0.6017)
train
tensor(0.5730)

tensor(0.6017)
train
tensor(0.5837)

tensor(0.6061)
train
tensor(0.5987)

tensor(0.6190)
train
tensor(0.6030)

tensor(0.6277)
train
tensor(0.6137)

tensor(0.6364)
train
tensor(0.6137)

tensor(0.6320)
train
tensor(0.6180)

tensor(0.6234)
train
tensor(0.6159)

tensor(0.6234)
train
tensor(0.6202)

tensor(0.6234)
train
tensor(0.6159)

tensor(0.6104)
train
tensor(0.6223)

tensor(0.6104)
train
tensor(0.6266)

tensor(0.6190)
train
tensor(0.6524)

tensor(0.6190)
train
tensor(0.6717)

tensor(0.6190)
train
tensor(0.6931)

tensor(0.6277)
train
tensor(0.7124)

tensor(0.6277)
train
tensor(0.7082)



In [90]:
X_test_t.shape

torch.Size([231, 97])

In [91]:
pred_proba_test = net.inference(X_test_t)
pred_test = pred_proba_test.argmax(dim=1)

In [92]:
pred_proba_test = pred_proba_test.detach().numpy()

In [93]:
roc_auc_score(y_test_t, pred_proba_test[:, 1])

0.6652391975308641

### Дополнительно удалось добавить несколько пунктов к ROC_AUC однако данных мало, это может быть не существенным

In [94]:
pred_proba_test = net.inference(X_res)

In [95]:
pred_proba_test[:, 1]

tensor([0.4602, 0.4560, 0.5267, 0.4665, 0.5998, 0.4783, 0.6248, 0.4515, 0.4516,
        0.4514, 0.4561, 0.4515, 0.4516, 0.5710, 0.4516, 0.4515, 0.4531, 0.4520,
        0.4603, 0.4537, 0.4521, 0.4516, 0.4520, 0.4514, 0.4514, 0.4638, 0.4515,
        0.4516, 0.4525, 0.4525], grad_fn=<SelectBackward>)

In [96]:
Data_res['who_win'] = np.array([0.4602, 0.4560, 0.5267, 0.4665, 0.5998, 0.4783, 0.6248, 0.4515, 0.4516,
        0.4514, 0.4561, 0.4515, 0.4516, 0.5710, 0.4516, 0.4515, 0.4531, 0.4520,
        0.4603, 0.4537, 0.4521, 0.4516, 0.4520, 0.4514, 0.4514, 0.4638, 0.4515,
        0.4516, 0.4525, 0.4525])

In [97]:
Data_res.to_csv('res_net.csv')

In [98]:
set_maps = set(df_test.map_id.values)

In [99]:
set_team1 = set(df_test.team1_id.values)

In [100]:
set_team2 = set(df_test.team2_id.values)

In [101]:
set_team2

{4411, 5752, 5973, 6665, 6667, 7020, 7718, 8297}

In [102]:
a = 'map_id_11309'

In [103]:
data_without_res = totall_df_4[totall_df_4.who_win.isna()]

In [104]:
data_without_res

Unnamed: 0,map_id,team1_id,team2_id,who_win,t1_total_kills_mean,t1_total_kills_std,t1_headshots_mean,t1_headshots_std,t1_total_deaths_mean,t1_total_deaths_std,...,t2_total_opening_deaths_mean,t2_total_opening_deaths_std,t2_opening_kill_ratio_mean,t2_opening_kill_ratio_std,t2_opening_kill_rating_mean,t2_opening_kill_rating_std,t2_team_win_percent_after_first_kill_mean,t2_team_win_percent_after_first_kill_std,t2_first_kill_in_won_rounds_mean,t2_first_kill_in_won_rounds_std
694,309,5973,5752,,145.2,58.758489,41.54,11.878148,124.4,44.156993,...,22.4,9.951884,1.942,1.310197,1.142,0.149318,73.32,3.023508,15.48,4.312957
695,541,5973,5752,,249.4,68.610786,42.7,8.123054,223.4,42.683018,...,26.2,14.344337,1.374,0.456491,1.058,0.125443,75.36,6.912771,14.32,4.508836
696,1,8297,7020,,272.2,31.587339,42.66,7.83801,285.4,25.12051,...,24.6,16.788091,1.092,0.237941,1.024,0.152394,77.6,11.44028,14.74,5.553954
697,392,8297,7020,,285.8,21.027601,40.86,9.816843,283.6,19.815146,...,27.4,15.53834,0.688,0.367336,0.742,0.372688,57.34,28.914259,10.12,5.410878
698,684,8297,7020,,128.6,9.264988,45.24,10.479427,117.2,11.805084,...,21.0,11.454257,1.14,0.462385,1.056,0.192624,74.38,15.047578,15.46,7.004456
699,85,4494,4411,,261.2,62.792993,37.14,6.985871,241.4,54.529258,...,30.2,11.496086,1.002,0.145657,1.004,0.072829,73.08,13.386172,15.22,3.804944
700,314,4494,4411,,313.2,18.225257,43.36,8.201122,319.6,48.812294,...,37.4,17.647663,1.168,0.437877,1.038,0.158921,73.28,8.224695,15.28,4.814727
701,403,4494,4411,,300.4,47.407172,48.44,8.588737,292.8,40.434639,...,27.8,7.520638,1.086,0.418693,1.03,0.176522,71.24,6.543577,15.24,5.8834
702,125,4608,7718,,247.8,38.690567,50.78,7.039432,218.4,19.064102,...,36.4,17.106724,1.106,0.19986,1.022,0.155358,76.74,3.211604,14.9,6.011655
703,241,4608,7718,,397.2,79.393703,47.32,10.005678,350.2,24.514486,...,33.6,8.138796,0.994,0.349148,0.976,0.111104,76.3,10.585273,14.36,2.366094


In [105]:
data_without_res.columns

Index(['map_id', 'team1_id', 'team2_id', 'who_win', 't1_total_kills_mean',
       't1_total_kills_std', 't1_headshots_mean', 't1_headshots_std',
       't1_total_deaths_mean', 't1_total_deaths_std', 't1_kd_ratio_mean',
       't1_kd_ratio_std', 't1_damage_per_round_mean',
       't1_damage_per_round_std', 't1_grenade_damage_per_round_mean',
       't1_grenade_damage_per_round_std', 't1_maps_played_mean',
       't1_maps_played_std', 't1_rounds_played_mean', 't1_rounds_played_std',
       't1_kills_per_round_mean', 't1_kills_per_round_std',
       't1_assists_per_round_mean', 't1_assists_per_round_std',
       't1_deaths_per_round_mean', 't1_deaths_per_round_std',
       't1_saved_by_teammate_per_round_mean',
       't1_saved_by_teammate_per_round_std',
       't1_saved_teammates_per_round_mean', 't1_saved_teammates_per_round_std',
       't1_rating_mean', 't1_rating_std', 't1_kill_death_mean',
       't1_kill_death_std', 't1_kill_round_mean', 't1_kill_round_std',
       't1_rounds_with

In [112]:
data_without_res = data_without_res.drop(columns=['who_win'])

In [113]:
query_res_net = '''select map_id, team1_id, team2_id, who_win from 
data_without_res, Data_res
where (data_without_res.t1_total_kills_std = Data_res.t1_total_kills_std and data_without_res.t2_total_kills_std = Data_res.t2_total_kills_std)
'''

In [114]:
query_res_linear = '''select map_id, team1_id, team2_id, who_win from 
data_without_res, res_linear
where (data_without_res.t1_total_kills_std = res_linear.t1_total_kills_std and data_without_res.t2_total_kills_std = res_linear.t2_total_kills_std)
'''

In [115]:
df_net = pql.sqldf(query_res_net)

In [116]:
df_linear = pql.sqldf(query_res_linear)

In [117]:
df_test['who_win'] = df_net['who_win'].values

In [118]:
df_test.to_csv('test_net.csv' ,index=False)

In [119]:
df_test['who_win'] = df_linear['who_win'].values

In [120]:
df_test.to_csv('test_Linear.csv' ,index=False)