In [3]:
import numpy as np
import pandas as pd
import collections

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 500)

import pprint
from collections import OrderedDict
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import LabelBinarizer
from sklearn import preprocessing

In [4]:
# トレーニングデータ、テストデータ、サンプルサブミットデータを読み込み
train = pd.read_csv('train_data.csv')
test = pd.read_csv('test_data.csv')

In [5]:
# ステージデータ、武器データを読み込み
stage_info = pd.read_csv('stage_info.csv')
weapon_info = pd.read_csv('statink-weapon2.csv')

In [6]:
# rankは順序尺度なのでサイズマッピングする
rank_mapping = {'c-': 1, 'c': 2, 'c+': 3, 'b-': 4, 'b': 5, 'b+': 6, 'a-': 7, 'a': 8, 'a+': 9, 's-':10, 's':11, 's+':12, 'x':13}
train['A1-rank'] = train['A1-rank'].map(rank_mapping)
train['A2-rank'] = train['A2-rank'].map(rank_mapping)
train['A3-rank'] = train['A3-rank'].map(rank_mapping)
train['A4-rank'] = train['A4-rank'].map(rank_mapping)
train['B1-rank'] = train['B1-rank'].map(rank_mapping)
train['B2-rank'] = train['B2-rank'].map(rank_mapping)
train['B3-rank'] = train['B3-rank'].map(rank_mapping)
train['B4-rank'] = train['B4-rank'].map(rank_mapping)

# 欠損値は0にする
train['A1-rank'].fillna(0, inplace=True)
train['A2-rank'].fillna(0, inplace=True)
train['A3-rank'].fillna(0, inplace=True)
train['A4-rank'].fillna(0, inplace=True)
train['B1-rank'].fillna(0, inplace=True)
train['B2-rank'].fillna(0, inplace=True)
train['B3-rank'].fillna(0, inplace=True)
train['B4-rank'].fillna(0, inplace=True)

In [None]:
##### チーム人数の差の変数を作る #####

# A4-level, B3-level, B4-level のNanを'empty'に置換
train['A4-level'].fillna(0, inplace=True)
train['B3-level'].fillna(0, inplace=True)
train['B4-level'].fillna(0, inplace=True)   
# ランクにemptyがあるかどうか真偽値を与える
train_bool_A4 = (train['A4-level'] == 0)
train_bool_B3 = (train['B3-level'] == 0)
train_bool_B4 = (train['B4-level'] == 0)

team_dif = np.zeros((66125,1))
for i in range(66124):
    team_A = 4 - train_bool_A4[i] 
    team_B = 4 - (train_bool_B3[i] + train_bool_B4[i])
    team_dif[i,0] = team_A - team_B
    
team_dif = pd.DataFrame(team_dif)
# trainに結合
train['team_dif'] = team_dif

In [None]:
##### sub列とspecial列を作ってtrainに結合する #####

# 武器の辞書を作成
dic = weapon_info.set_index('key')['special'].to_dict()

# trainにspecial列を追加
train['A1-special'] = 0
train['A2-special'] = 0
train['A3-special'] = 0
train['A4-special'] = 0
train['B1-special'] = 0
train['B2-special'] = 0
train['B3-special'] = 0
train['B4-special'] = 0

# special列に代入
train['A1-special'] = train['A1-weapon'].replace(dic)
train['A2-special'] = train['A2-weapon'].replace(dic)
train['A3-special'] = train['A3-weapon'].replace(dic)
train['A4-special'] = train['A4-weapon'].replace(dic)
train['B1-special'] = train['B1-weapon'].replace(dic)
train['B2-special'] = train['B2-weapon'].replace(dic)
train['B3-special'] = train['B3-weapon'].replace(dic)
train['B4-special'] = train['B4-weapon'].replace(dic)

# trainにsub列を追加
train['A1-sub'] = 0
train['A2-sub'] = 0
train['A3-sub'] = 0
train['A4-sub'] = 0
train['B1-sub'] = 0
train['B2-sub'] = 0
train['B3-sub'] = 0
train['B4-sub'] = 0

# 武器の辞書を作成
dic = weapon_info.set_index('key')['subweapon'].to_dict()

# sub列に代入
train['A1-sub'] = train['A1-weapon'].replace(dic)
train['A2-sub'] = train['A2-weapon'].replace(dic)
train['A3-sub'] = train['A3-weapon'].replace(dic)
train['A4-sub'] = train['A4-weapon'].replace(dic)
train['B1-sub'] = train['B1-weapon'].replace(dic)
train['B2-sub'] = train['B2-weapon'].replace(dic)
train['B3-sub'] = train['B3-weapon'].replace(dic)
train['B4-sub'] = train['B4-weapon'].replace(dic)

In [None]:
# カテゴリーの列を作って結合する
dic = weapon_info.set_index('key')['category2'].to_dict()
train['A1-category'] = train['A1-weapon'].replace(dic)
train['A2-category'] = train['A2-weapon'].replace(dic)
train['A3-category'] = train['A3-weapon'].replace(dic)
train['A4-category'] = train['A4-weapon'].replace(dic)
train['B1-category'] = train['B1-weapon'].replace(dic)
train['B2-category'] = train['B2-weapon'].replace(dic)
train['B3-category'] = train['B3-weapon'].replace(dic)
train['B4-category'] = train['B4-weapon'].replace(dic)

In [None]:
##### アーマーがいるかいないかの変数を作る #####

# aromr がいるかどうか真偽値を与える
train_bool_A1special = (train['A1-special'] == 'armor')
train_bool_A2special = (train['A2-special'] == 'armor')
train_bool_A3special = (train['A3-special'] == 'armor')
train_bool_A4special = (train['A4-special'] == 'armor')
train_bool_B1special = (train['B1-special'] == 'armor')
train_bool_B2special = (train['B2-special'] == 'armor')
train_bool_B3special = (train['B3-special'] == 'armor')
train_bool_B4special = (train['B4-special'] == 'armor')

armor_A = np.zeros((66125,1))
armor_B = np.zeros((66125,1))
for i in range(66124): 
    armor_A[i] = max(train_bool_A1special[i], train_bool_A2special[i], train_bool_A3special[i], train_bool_A4special[i])
    armor_B[i] = max(train_bool_B1special[i], train_bool_B2special[i], train_bool_B3special[i], train_bool_B4special[i])
    
# pandas に変換して trainに結合
armor_A = pd.DataFrame(armor_A)
armor_B = pd.DataFrame(armor_B)
train['armor_A'] = armor_A
train['armor_B'] = armor_B