## player前処理(7)

In [49]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import feather
pd.set_option('display.max_Columns', 100)

In [50]:
train_player = pd.read_feather('data/train_player.f')
test_player = pd.read_feather('data/test_player.f')
print(train_player.shape)
print(test_player.shape)

(911, 25)
(1846, 25)


In [51]:
all_player = train_player.append(test_player, ignore_index=True)
print(all_player.shape)

(2757, 25)


### 2017年の成績

In [52]:
pit_2017 = pd.read_feather('intermediate/pit_2017_2.f')
bat_2017 = pd.read_feather('intermediate/bat_2017_2.f')
cat_2017 = pd.read_feather('intermediate/cat_2017_2.f')
print(pit_2017.shape)
print(bat_2017.shape)
print(cat_2017.shape)

(326, 19)
(466, 4)
(55, 19)


### 投手

In [53]:
all_player = all_player.merge(pit_2017, left_on='選手ID', right_on='投手ID', how='left')

#### 外国人助っ人

In [54]:
all_player['foreigner']=0
all_player.loc[all_player['出身国']!='日本', 'foreigner'] = 1

#### 情報がない選手
- 外国人投手-> 投手ID=-1
- 日本人投手-> 投手ID=0
- 2017の全体平均で穴埋め

In [55]:
all_player.loc[(all_player['位置']=='投手') & (all_player['投手ID'].isnull()) & (all_player['foreigner']==1), '投手ID'] = -1
all_player.loc[(all_player['位置']=='投手') & all_player['投手ID'].isnull(), '投手ID'] = 0

In [56]:
pit_mean = pit_2017.mean()
all_player.loc[all_player['投手ID']==0, 'straight'] = pit_mean['straight']
all_player.loc[all_player['投手ID']==0, 'curve'] = pit_mean['curve']
all_player.loc[all_player['投手ID']==0, 'slider'] = pit_mean['slider']
all_player.loc[all_player['投手ID']==0, 'shoot'] = pit_mean['shoot']
all_player.loc[all_player['投手ID']==0, 'fork'] = pit_mean['fork']
all_player.loc[all_player['投手ID']==0, 'changeup'] = pit_mean['changeup']
all_player.loc[all_player['投手ID']==0, 'sinker'] = pit_mean['sinker']
all_player.loc[all_player['投手ID']==0, 'cutball'] = pit_mean['cutball']
all_player.loc[all_player['投手ID']==0, 'total'] = pit_mean['total']
all_player.loc[all_player['投手ID']==0, 'pit_game_cnt'] = pit_mean['pit_game_cnt']
all_player.loc[all_player['投手ID']==0, 'pit_inning_cnt'] = pit_mean['pit_inning_cnt']
all_player.loc[all_player['投手ID']==0, 'pit_batter_cnt'] = pit_mean['pit_batter_cnt']
all_player.loc[all_player['投手ID']==0, 'pit_inning_per_game'] = pit_mean['pit_inning_per_game']
all_player.loc[all_player['投手ID']==0, 'pit_batter_per_game'] = pit_mean['pit_batter_per_game']
all_player.loc[all_player['投手ID']==0, 'pit_ball_per_game'] = pit_mean['pit_ball_per_game']
all_player.loc[all_player['投手ID']==0, 'pit_batter_per_inning'] = pit_mean['pit_batter_per_inning']
all_player.loc[all_player['投手ID']==0, 'pit_ball_per_inning'] = pit_mean['pit_ball_per_inning']
all_player.loc[all_player['投手ID']==0, 'pit_ball_per_batter'] = pit_mean['pit_ball_per_batter']

#### 外国人平均

In [57]:
foreigner_mean = all_player[(all_player['foreigner']==1)&(all_player['投手ID']==-1)].mean()
all_player.loc[all_player['投手ID']==-1, 'straight'] = foreigner_mean['straight']
all_player.loc[all_player['投手ID']==-1, 'curve'] = foreigner_mean['curve']
all_player.loc[all_player['投手ID']==-1, 'slider'] = foreigner_mean['slider']
all_player.loc[all_player['投手ID']==-1, 'shoot'] = foreigner_mean['shoot']
all_player.loc[all_player['投手ID']==-1, 'fork'] = foreigner_mean['fork']
all_player.loc[all_player['投手ID']==-1, 'changeup'] = foreigner_mean['changeup']
all_player.loc[all_player['投手ID']==-1, 'sinker'] = foreigner_mean['sinker']
all_player.loc[all_player['投手ID']==-1, 'cutball'] = foreigner_mean['cutball']
all_player.loc[all_player['投手ID']==-1, 'total'] = foreigner_mean['total']
all_player.loc[all_player['投手ID']==-1, 'pit_game_cnt'] = foreigner_mean['pit_game_cnt']
all_player.loc[all_player['投手ID']==-1, 'pit_inning_cnt'] = foreigner_mean['pit_inning_cnt']
all_player.loc[all_player['投手ID']==-1, 'pit_batter_cnt'] = foreigner_mean['pit_batter_cnt']
all_player.loc[all_player['投手ID']==-1, 'pit_inning_per_game'] = foreigner_mean['pit_inning_per_game']
all_player.loc[all_player['投手ID']==-1, 'pit_batter_per_game'] = foreigner_mean['pit_batter_per_game']
all_player.loc[all_player['投手ID']==-1, 'pit_ball_per_game'] = foreigner_mean['pit_ball_per_game']
all_player.loc[all_player['投手ID']==-1, 'pit_batter_per_inning'] = foreigner_mean['pit_batter_per_inning']
all_player.loc[all_player['投手ID']==-1, 'pit_ball_per_inning'] = foreigner_mean['pit_ball_per_inning']
all_player.loc[all_player['投手ID']==-1, 'pit_ball_per_batter'] = foreigner_mean['pit_ball_per_batter']

### 打者

In [58]:
all_player = all_player.merge(bat_2017, left_on='選手ID', right_on='打者ID', how='left')

In [59]:
bat_mean = bat_2017.mean()
all_player.loc[(all_player['位置']!='投手') & all_player['打者ID'].isnull(), '打者ID'] = 0
all_player.loc[all_player['打者ID']==0, 'batter_cnt'] = bat_mean['batter_cnt'] 
all_player.loc[all_player['打者ID']==0, 'bat_game_cnt'] = bat_mean['bat_game_cnt'] 
all_player.loc[all_player['打者ID']==0, 'batter_cnt_per_game'] = bat_mean['batter_cnt_per_game'] 
all_player.loc[all_player['打者ID'].isnull(), 'batter_cnt'] = 0
all_player.loc[all_player['打者ID'].isnull(), 'bat_game_cnt'] = 0
all_player.loc[all_player['打者ID'].isnull(), 'batter_cnt_per_game'] = 0

### 捕手

In [60]:
all_player = all_player.merge(cat_2017, left_on='選手ID', right_on='捕手ID', how='left')

In [61]:
cat_mean = cat_2017.mean()
all_player.loc[(all_player['位置']=='捕手') & all_player['捕手ID'].isnull(), '捕手ID'] = 0
all_player.loc[all_player['捕手ID']==0, 'c_straight'] = cat_mean['c_straight']
all_player.loc[all_player['捕手ID']==0, 'c_curve'] = cat_mean['c_curve']
all_player.loc[all_player['捕手ID']==0, 'c_slider'] = cat_mean['c_slider']
all_player.loc[all_player['捕手ID']==0, 'c_shoot'] = cat_mean['c_shoot']
all_player.loc[all_player['捕手ID']==0, 'c_fork'] = cat_mean['c_fork']
all_player.loc[all_player['捕手ID']==0, 'c_changeup'] = cat_mean['c_changeup']
all_player.loc[all_player['捕手ID']==0, 'c_sinker'] = cat_mean['c_sinker']
all_player.loc[all_player['捕手ID']==0, 'c_cutball'] = cat_mean['c_cutball']
all_player.loc[all_player['捕手ID']==0, 'c_total'] = cat_mean['c_total']
all_player.loc[all_player['捕手ID']==0, 'cat_game_cnt'] = cat_mean['cat_game_cnt']
all_player.loc[all_player['捕手ID']==0, 'cat_inning_cnt'] = cat_mean['cat_inning_cnt']
all_player.loc[all_player['捕手ID']==0, 'cat_batter_cnt'] = cat_mean['cat_batter_cnt']
all_player.loc[all_player['捕手ID']==0, 'cat_inning_per_game'] = cat_mean['cat_inning_per_game']
all_player.loc[all_player['捕手ID']==0, 'cat_batter_per_game'] = cat_mean['cat_batter_per_game']
all_player.loc[all_player['捕手ID']==0, 'cat_ball_per_game'] = cat_mean['cat_ball_per_game']
all_player.loc[all_player['捕手ID']==0, 'cat_batter_per_inning'] = cat_mean['cat_batter_per_inning']
all_player.loc[all_player['捕手ID']==0, 'cat_ball_per_inning'] = cat_mean['cat_ball_per_inning']
all_player.loc[all_player['捕手ID']==0, 'cat_ball_per_batter'] = cat_mean['cat_ball_per_batter']

### 高卒・大卒・社会人

In [62]:
all_player['company'] = 0
all_player.loc[~all_player['社会人'].isnull(), 'company'] = 1

In [63]:
all_player['univ']=0
all_player.loc[all_player['出身大学ID']!=0, 'univ'] = 1

In [64]:
all_player['highsch'] = 0
all_player.loc[(all_player['company']==0)&(all_player['univ']==0)&(all_player['foreigner']==0) , 'highsch'] = 1

### 年齢、現役年数

In [65]:
all_player['birth_day'] = pd.to_datetime(all_player['生年月日'])
all_player['age'] = all_player['年度'] - all_player['birth_day'].dt.year

In [66]:
all_player['play_year'] = all_player['年度'] - all_player['ドラフト年']
all_player.loc[all_player['ドラフト年'].isnull(), 'play_year'] = 6

### 年棒

In [67]:
all_player['salary_year'] = all_player['年俸']/ all_player['play_year'] 
all_player['salary_x_year'] = all_player['年俸'] * all_player['play_year'] 

### 身長・体重
BMI=体重/身長^2

In [68]:
all_player['bmi'] = all_player['体重']*10000/(all_player['身長']*all_player['身長'])

### 不要な列を削除

In [69]:
all_player.drop(
    columns=[
        'チームID', 'チーム名', '選手名', '背番号', '位置', '投', '打', '生年月日', 
        '出身高校ID', '出身高校名', '出身大学ID', '出身大学名', '社会人', 
        'ドラフト年', 'ドラフト種別', 
        '出身国', '出身地', '血液型', 'birth_day',
        '投手ID', '打者ID', '捕手ID'
    ], inplace=True)

### rename

In [70]:
all_player.rename(columns={
    '育成選手F': 'firm',
    '身長': 'height',
    '体重': 'weight',
    'ドラフト順位': 'draft_order',
    '年俸': 'salary',
}, inplace=True)

In [71]:
all_player.head()

Unnamed: 0,年度,選手ID,firm,height,weight,draft_order,salary,straight,curve,slider,shoot,fork,changeup,sinker,cutball,total,pit_game_cnt,pit_inning_cnt,pit_batter_cnt,pit_inning_per_game,pit_batter_per_game,pit_ball_per_game,pit_batter_per_inning,pit_ball_per_inning,pit_ball_per_batter,foreigner,batter_cnt,bat_game_cnt,batter_cnt_per_game,c_straight,c_curve,c_slider,c_shoot,c_fork,c_changeup,c_sinker,c_cutball,c_total,cat_game_cnt,cat_inning_cnt,cat_batter_cnt,cat_inning_per_game,cat_batter_per_game,cat_ball_per_game,cat_batter_per_inning,cat_ball_per_inning,cat_ball_per_batter,company,univ,highsch,age,play_year,salary_year,salary_x_year,bmi
0,2017,11343,0,183,86,5.0,4500,,,,,,,,,,,,,,,,,,,0,41.0,28.0,1.464286,0.551799,0.082879,0.158124,0.023991,0.058888,0.074155,0.009815,0.040349,917.0,19.0,49.0,220.0,2.578947,11.578947,48.263158,4.489796,18.714286,4.168182,0,0,1,41,23.0,195.652174,103500.0,25.680074
1,2017,11726,0,177,85,1.0,3700,,,,,,,,,,,,,,,,,,,0,13.0,7.0,1.857143,0.440874,0.089974,0.178663,0.071979,0.088689,0.03856,0.002571,0.088689,778.0,11.0,46.0,196.0,4.181818,17.818182,70.727273,4.26087,16.913043,3.969388,0,0,1,36,19.0,194.736842,70300.0,27.131412
2,2017,12049,0,180,97,1.0,26000,,,,,,,,,,,,,,,,,,,0,516.0,129.0,4.0,,,,,,,,,,,,,,,,,,,0,1,0,38,17.0,1529.411765,442000.0,29.938272
3,2017,12107,0,175,82,3.0,5000,0.489293,0.058848,0.196568,0.066474,0.086211,0.051951,0.008841,0.041813,788.702454,20.766871,50.245399,199.684049,2.910101,12.114679,47.892106,4.080928,16.130663,3.953392,0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,1,0,0,37,16.0,312.5,80000.0,26.77551
4,2017,12179,0,184,94,1.0,7000,0.269266,0.095195,0.281052,0.268359,0.0,0.086129,0.0,0.0,1103.0,13.0,75.0,300.0,5.769231,23.076923,84.846154,4.0,14.706667,3.676667,0,26.0,13.0,2.0,,,,,,,,,,,,,,,,,,,0,0,1,34,16.0,437.5,112000.0,27.76465


### 出力

In [72]:
all_player.to_feather('intermediate/all_player_7.f')