**Import Libraries**

In [None]:
import numpy as np
import pandas as pd

from sklearn_pandas import DataFrameMapper
from sklearn.preprocessing import LabelBinarizer, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.pipeline import make_pipeline

import pickle

**Load and Clean Data**

In [1]:
df = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')
pk = pd.read_csv('data/pokemon.csv')

In [2]:
pk['Type 2'] = pk['Type 2'].fillna('None')
pk = pk.drop(['Generation', 'Legendary'], axis=1)

In [3]:
df.head()

Unnamed: 0,First_pokemon,Second_pokemon,Winner
0,5,49,5
1,119,5,5
2,130,52,130
3,123,139,139
4,166,108,166


In [4]:
df['first_winner'] = np.where(df['First_pokemon'] == df['Winner'], 1, 0)


In [5]:
data = pd.merge(df, pk, how='left', left_on='First_pokemon', right_on='#')
data = pd.merge(data, pk, how='left', left_on='Second_pokemon', right_on='#')

In [6]:
data.columns

Index(['First_pokemon', 'Second_pokemon', 'Winner', 'first_winner', '#_x',
       'Name_x', 'Type 1_x', 'Type 2_x', 'HP_x', 'Attack_x', 'Defense_x',
       'Sp. Atk_x', 'Sp. Def_x', 'Speed_x', '#_y', 'Name_y', 'Type 1_y',
       'Type 2_y', 'HP_y', 'Attack_y', 'Defense_y', 'Sp. Atk_y', 'Sp. Def_y',
       'Speed_y'],
      dtype='object')

In [7]:
data = data[['First_pokemon', 'Second_pokemon', 'Winner', 'first_winner', '#_x',
       'Name_x', 'Type 1_x', 'Type 2_x', 'HP_x', 'Attack_x', 'Defense_x',
       'Sp. Atk_x', 'Sp. Def_x', 'Speed_x', '#_y', 'Name_y', 'Type 1_y',
       'Type 2_y', 'HP_y', 'Attack_y', 'Defense_y', 'Sp. Atk_y', 'Sp. Def_y',
       'Speed_y']]
data = data.drop('Winner', axis=1)
data.head()

Unnamed: 0,First_pokemon,Second_pokemon,first_winner,#_x,Name_x,Type 1_x,Type 2_x,HP_x,Attack_x,Defense_x,...,#_y,Name_y,Type 1_y,Type 2_y,HP_y,Attack_y,Defense_y,Sp. Atk_y,Sp. Def_y,Speed_y
0,5,49,1,5,Charmander,Fire,,39,52,43,...,49,Oddish,Grass,Poison,45,50,55,75,65,30
1,119,5,0,119,Weezing,Poison,,65,90,120,...,5,Charmander,Fire,,39,52,43,60,50,65
2,130,52,1,130,Staryu,Water,,30,45,55,...,52,Paras,Bug,Grass,35,70,55,45,55,25
3,123,139,0,123,Tangela,Grass,,65,55,115,...,139,Tauros,Normal,,75,100,95,40,70,110
4,166,108,1,166,Mew,Psychic,,100,100,100,...,108,Kingler,Water,,55,130,115,50,50,75


In [20]:
mapper = DataFrameMapper([
    ('Type 1_x', LabelBinarizer()),
    ('Type 2_x', LabelBinarizer()),
    (['HP_x'], StandardScaler()),
    (['Attack_x'], StandardScaler()),
    (['Defense_x'], StandardScaler()),
    (['Sp. Atk_x'], StandardScaler()),
    (['Sp. Def_x'], StandardScaler()),
    (['Speed_x'], StandardScaler()),
    ('Type 1_y', LabelBinarizer()),
    ('Type 2_y', LabelBinarizer()),
    (['HP_y'], StandardScaler()),
    (['Attack_y'], StandardScaler()),
    (['Defense_y'], StandardScaler()),
    (['Sp. Atk_y'], StandardScaler()),
    (['Sp. Def_y'], StandardScaler()),
    (['Speed_y'], StandardScaler())
])

**Model**

In [9]:
X = data.drop('first_winner', axis=1)
y = data['first_winner']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=151, test_size=0.30)

In [24]:
model = BaggingClassifier()
pipe = make_pipeline(mapper, model)
pipe.fit(X_train, y_train)


Pipeline(memory=None,
         steps=[('dataframemapper',
                 DataFrameMapper(default=False, df_out=False,
                                 features=[('Type 1_x',
                                            LabelBinarizer(neg_label=0,
                                                           pos_label=1,
                                                           sparse_output=False)),
                                           ('Type 2_x',
                                            LabelBinarizer(neg_label=0,
                                                           pos_label=1,
                                                           sparse_output=False)),
                                           (['HP_x'],
                                            StandardScaler(copy=True,
                                                           with_mean=True,
                                                           with_std=True)),
                                         

In [13]:
pipe.score(X_train, y_train)

0.9969418960244648

In [14]:
pipe.score(X_test, y_test) 

0.9121140142517815

In [49]:
pickle.dump(pipe, open('pipe.pkl', 'wb'))

DataFrameMapper(default=False, df_out=False,
                features=[('Type 1_x',
                           LabelBinarizer(neg_label=0, pos_label=1,
                                          sparse_output=False)),
                          ('Type 2_x',
                           LabelBinarizer(neg_label=0, pos_label=1,
                                          sparse_output=False)),
                          (['HP_x'],
                           StandardScaler(copy=True, with_mean=True,
                                          with_std=True)),
                          (['Attack_x'],
                           StandardScaler(copy=True, with_mean=True,
                                          with_std=True)),
                          (['Defense_x'],
                           St...
                           StandardScaler(copy=True, with_mean=True,
                                          with_std=True)),
                          (['Defense_y'],
                           Stand

In [25]:
data_test = pd.merge(test, pk, how='left', left_on='First_pokemon', right_on='#')
data_test = pd.merge(data_test, pk, how='left', left_on='Second_pokemon', right_on='#')

In [26]:
data_test.head()

Unnamed: 0,First_pokemon,Second_pokemon,#_x,Name_x,Type 1_x,Type 2_x,HP_x,Attack_x,Defense_x,Sp. Atk_x,...,#_y,Name_y,Type 1_y,Type 2_y,HP_y,Attack_y,Defense_y,Sp. Atk_y,Sp. Def_y,Speed_y
0,94,141,94,Seel,Water,,65,45,55,45,...,141,Gyarados,Water,Flying,95,125,79,60,100,81
1,104,124,104,Onix,Rock,Ground,35,45,160,30,...,124,Kangaskhan,Normal,,105,95,80,40,80,90
2,107,113,107,Krabby,Water,,30,105,90,25,...,113,Cubone,Ground,,50,50,95,40,50,35
3,74,52,74,Machoke,Fighting,,80,100,70,50,...,52,Paras,Bug,Grass,35,70,55,45,55,25
4,91,70,91,Farfetch'd,Normal,Flying,52,65,55,58,...,70,Kadabra,Psychic,,40,35,30,120,70,105


In [27]:
pipe.predict(data_test)

array([0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1,
       1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1,
       1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 1, 1, 1, 0])

In [44]:
test["Winner"] = pipe.predict(data_test)

In [45]:
test["Winner"] = [test["Second_pokemon"][i] if test["Winner"][i] == 0 else test["First_pokemon"][i] for i in range(0,len(test))]

In [46]:
test

Unnamed: 0,First_pokemon,Second_pokemon,Winner
0,94,141,141
1,104,124,124
2,107,113,107
3,74,52,74
4,91,70,70
5,140,36,36
6,41,150,41
7,154,74,154
8,31,18,31
9,132,131,131


In [48]:
test.to_csv('data/pokemon_prediction.csv')