In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

In [2]:
train_matches = pd.read_csv('train.csv')
test_matches = pd.read_csv('test.csv')
gold = pd.read_csv('gold.csv')
xp = pd.read_csv('xp.csv')

In [3]:
final_gold = gold[gold.times == 600]
final_gold = final_gold.drop('times', 1)

## Отсортируем участников по количеству денег.

In [5]:
rad_sorted_gold = final_gold[['player_0', 'player_1', 'player_2', 'player_3', 'player_4']].values
rad_sorted_gold = np.sort(rad_sorted_gold, axis=1)
mids = final_gold['mid'].values
mids = mids.reshape(mids.size, 1)
rad_sorted_gold = np.append(mids, rad_sorted_gold, axis=1)

rad_sorted_gold = pd.DataFrame(rad_sorted_gold, final_gold.index, 
                           ['mid', 'player_0', 'player_1', 'player_2', 'player_3', 'player_4'])

In [6]:
dire_sorted_gold = final_gold[['player_5', 'player_6', 'player_7', 'player_8', 'player_9']].values
dire_sorted_gold = np.sort(dire_sorted_gold, axis=1)
mids = final_gold['mid'].values
mids = mids.reshape(mids.size, 1)
dire_sorted_gold = np.append(mids, dire_sorted_gold, axis=1)

dire_sorted_gold = pd.DataFrame(dire_sorted_gold, final_gold.index, 
                           ['mid', 'player_5', 'player_6', 'player_7', 'player_8', 'player_9'])

## Вставим отсортированные данные вместо старых.

In [7]:
final_gold = final_gold.drop(['player_0', 'player_1', 'player_2', 'player_3', 'player_4', 
                  'player_5', 'player_6', 'player_7', 'player_8', 'player_9'], 1)
final_gold = pd.merge(final_gold, rad_sorted_gold, on='mid')
final_gold = pd.merge(final_gold, dire_sorted_gold, on='mid')

## Теперь сделаем всё то же самое без изменений.

In [8]:
radiant_gold = final_gold[['player_0', 'player_1', 'player_2', 'player_3', 'player_4']].sum(axis=1)
dire_gold = final_gold[['player_5', 'player_6', 'player_7', 'player_8', 'player_9']].sum(axis=1)
final_gold['radiant_gold'] = radiant_gold
final_gold['dire_gold'] = dire_gold

In [9]:
final_gold['diff_gold'] = final_gold['radiant_gold'] - final_gold['dire_gold']
final_gold['ratio_gold'] = final_gold['radiant_gold'] / final_gold['dire_gold']

final_gold['strongest_diff_gold'] = final_gold['player_4'] - final_gold['player_9']
final_gold['strongest_ratio_gold'] = final_gold['player_4'] / final_gold['player_9']

## Сложим все признаки, которые мы нагенерировали, в один.

In [10]:
all_features = final_gold

In [11]:
train = pd.merge(train_matches[['mid']], all_features, on='mid', how='left').drop(['mid'], 1)
test = pd.merge(test_matches[['mid']], all_features, on='mid', how='left').drop(['mid'], 1)

In [12]:
x_train = train.values
x_test = test.values
y_train = train_matches.radiant_won.values

In [14]:
clf = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=1224)
np.mean(cross_val_score(clf, x_train, y_train, cv=5, scoring='roc_auc'))

0.69123919702673386

In [13]:
clf.fit(x_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=100, n_jobs=-1, oob_score=False,
            random_state=1234, verbose=0, warm_start=False)

In [14]:
test_matches['radiant_won'] = clf.predict_proba(x_test)[:, 1]

In [15]:
test_matches.to_csv('my_solution.csv', index=None)