## player前処理(11)

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import feather
pd.set_option('display.max_Columns', 100)

In [2]:
train_player = pd.read_feather('data/train_player.f')
test_player = pd.read_feather('data/test_player.f')
print(train_player.shape)
print(test_player.shape)

(911, 25)
(1846, 25)


In [3]:
all_player = train_player.append(test_player, ignore_index=True)
print(all_player.shape)

(2757, 25)


In [4]:
OUT_PITCHER = 'intermediate/all_pitcher_player_1.f'
OUT_ALLPLAYER = 'intermediate/all_player_11.f'

### 2017年の成績

In [5]:
pit_2017 = pd.read_feather('intermediate/pit_2017_LR_1.f')
bat_2017 = pd.read_feather('intermediate/bat_2017_5.f')
print(pit_2017.shape)
print(bat_2017.shape)

(651, 14)
(466, 3)


### 外国人助っ人

In [6]:
all_player['foreigner']=0
all_player.loc[all_player['出身国']!='日本', 'foreigner'] = 1

### 高卒・大卒・社会人

In [7]:
# 社会人出身
all_player['company'] = 0
all_player.loc[~all_player['社会人'].isnull(), 'company'] = 1
# 大卒
all_player['univ']=0
all_player.loc[all_player['出身大学ID']!=0, 'univ'] = 1
# 高卒
all_player['highsch'] = 0
all_player.loc[(all_player['company']==0)&(all_player['univ']==0)&(all_player['foreigner']==0) , 'highsch'] = 1

### 年齢、現役年数

In [8]:
# 年齢
all_player['birth_day'] = pd.to_datetime(all_player['生年月日'])
all_player['age'] = all_player['年度'] - all_player['birth_day'].dt.year
# 現役年数
all_player['play_year'] = all_player['年度'] - all_player['ドラフト年']
all_player.loc[all_player['ドラフト年'].isnull(), 'play_year'] = 6

### 年棒

In [9]:
all_player['salary_year'] = all_player['年俸']/ all_player['play_year'] 
all_player['salary_x_year'] = all_player['年俸'] * all_player['play_year'] 

### 身長・体重
BMI=体重/身長^2

In [10]:
all_player['bmi'] = all_player['体重']*10000/(all_player['身長']*all_player['身長'])

### 不要な列を削除

In [11]:
all_player.drop(
    columns=[
        'チームID', 'チーム名', '選手名', '背番号', '打', '生年月日', 
        '出身高校ID', '出身高校名', '出身大学ID', '出身大学名', '社会人', 
        'ドラフト年', 'ドラフト種別', 
        '出身国', '出身地', '血液型', 'birth_day',
    ], inplace=True)

### rename

In [12]:
all_player.rename(columns={
    '育成選手F': 'firm',
    '身長': 'height',
    '体重': 'weight',
    'ドラフト順位': 'draft_order',
    '年俸': 'salary',
}, inplace=True)

### 投手のみ

In [13]:
all_pitcher = all_player[all_player['位置']=='投手']

In [14]:
dummy = pd.DataFrame({
    '投': ['右', '右', '左', '左'],
    'pit_bat': ['R_L', 'R_R', 'L_L', 'L_R']
})
all_pitcher = all_pitcher.merge(dummy, on='投', how='outer')

In [15]:
all_pitcher = all_pitcher.merge(pit_2017, left_on=['選手ID','pit_bat'], right_on=['投手ID','pit_bat'], how='left')

In [16]:
all_pitcher.loc[(all_pitcher['投手ID'].isnull()) & (all_pitcher['foreigner']==1), '投手ID'] = -1
all_pitcher.loc[all_pitcher['投手ID'].isnull(), '投手ID'] = 0

#### 情報がない選手
- 外国人投手-> 投手ID=-1
- 日本人投手-> 投手ID=0
- 2017の平均で穴埋め

In [17]:
def fill_ball(condition, source, target):
    ball_kind = ['straight', 'curve', 'slider', 'shoot', 'fork', 'changeup', 'sinker', 'cutball', 'total', 'pit_game_cnt', 'pit_inning_cnt', 'pit_batter_cnt']
    for ball in ball_kind:
        target.loc[condition, ball] = source[ball]

#### 日本人平均

In [18]:
RightLeft = ['R_L', 'R_R', 'L_R', 'L_L']
for RL in RightLeft:
    pit_mean = all_pitcher[(all_pitcher['foreigner']==0)&(all_pitcher['投手ID']!=0)&(all_pitcher['pit_bat']==RL)].mean()
    condition = (all_pitcher['投手ID']==0)&(all_pitcher['pit_bat']==RL)
    fill_ball(condition, pit_mean, all_pitcher)

#### 外国人平均

In [19]:
for RL in RightLeft:
    pit_mean = all_pitcher[(all_pitcher['foreigner']==1)&(all_pitcher['投手ID']!=-1)&(all_pitcher['pit_bat']==RL)].mean()
    condition = (all_pitcher['投手ID']==-1)&(all_pitcher['pit_bat']==RL)
    fill_ball(condition, pit_mean, all_pitcher)

### 各球種のストレートに対する比率

In [20]:
ball_not_straight = ['curve', 'slider', 'shoot', 'fork', 'changeup', 'sinker', 'cutball']
for ball in ball_not_straight:
    all_pitcher[ball] = all_pitcher[ball] / all_pitcher['straight']

### 不要な列を削除

In [21]:
all_pitcher.drop(columns=['straight', '投手ID', '位置', '投',], inplace=True)

In [22]:
all_pitcher.head()

Unnamed: 0,年度,選手ID,firm,height,weight,draft_order,salary,foreigner,company,univ,highsch,age,play_year,salary_year,salary_x_year,bmi,pit_bat,curve,slider,shoot,fork,changeup,sinker,cutball,total,pit_game_cnt,pit_inning_cnt,pit_batter_cnt
0,2017,12107,0,175,82,3.0,5000,0,1,0,0,37,16.0,312.5,80000.0,26.77551,L_L,0.092767,0.599706,0.081129,0.070261,0.076848,0.014881,0.085343,268.586667,19.377778,45.684444,67.946667
1,2017,12107,0,175,82,3.0,5000,0,1,0,0,37,16.0,312.5,80000.0,26.77551,L_R,0.111008,0.370771,0.134611,0.131097,0.312206,0.049194,0.056023,435.377778,19.377778,45.684444,108.626667
2,2017,400010,0,186,95,1.0,20000,0,1,0,0,35,14.0,1428.571429,280000.0,27.459822,L_L,0.0,0.528302,0.59434,0.009434,0.40566,0.0,0.556604,328.0,12.0,59.0,87.0
3,2017,400010,0,186,95,1.0,20000,0,1,0,0,35,14.0,1428.571429,280000.0,27.459822,L_R,0.0,1.118812,2.148515,0.079208,1.435644,0.0,0.554455,640.0,12.0,59.0,171.0
4,2017,600098,0,184,88,1.0,32000,0,0,0,1,34,12.0,2666.666667,384000.0,25.992439,L_L,0.0,3.818182,2.636364,0.090909,0.454545,0.0,0.0,88.0,18.0,19.0,26.0


### 投手のみ出力

In [23]:
all_pitcher.to_feather(OUT_PITCHER)

### 打者(全選手)

In [24]:
all_player = all_player.merge(bat_2017, left_on='選手ID', right_on='打者ID', how='left')

In [25]:
all_player.loc[all_player['打者ID'].isnull(), '打者ID'] = 0
# 投手以外
bat_mean = all_player[(all_player['打者ID']!=0)&(all_player['位置']!='投手')].mean()
condition = (all_player['打者ID']==0)&(all_player['位置']!='投手')
all_player.loc[condition, 'batter_cnt'] = bat_mean['batter_cnt']
all_player.loc[condition, 'bat_game_cnt'] = bat_mean['bat_game_cnt']
# 投手
bat_mean = all_player[(all_player['打者ID']!=0)&(all_player['位置']=='投手')].mean()
condition = (all_player['打者ID']==0)&(all_player['位置']=='投手')
all_player.loc[condition, 'batter_cnt'] = bat_mean['batter_cnt']
all_player.loc[condition, 'bat_game_cnt'] = bat_mean['bat_game_cnt']

### 不要な列を削除

In [26]:
all_player.drop(
    columns=['打者ID', '位置', '投'], inplace=True)

In [27]:
all_player.head()

Unnamed: 0,年度,選手ID,firm,height,weight,draft_order,salary,foreigner,company,univ,highsch,age,play_year,salary_year,salary_x_year,bmi,batter_cnt,bat_game_cnt
0,2017,11343,0,183,86,5.0,4500,0,0,0,1,41,23.0,195.652174,103500.0,25.680074,41.0,28.0
1,2017,11726,0,177,85,1.0,3700,0,0,0,1,36,19.0,194.736842,70300.0,27.131412,13.0,7.0
2,2017,12049,0,180,97,1.0,26000,0,0,1,0,38,17.0,1529.411765,442000.0,29.938272,516.0,129.0
3,2017,12107,0,175,82,3.0,5000,0,1,0,0,37,16.0,312.5,80000.0,26.77551,14.088,6.608
4,2017,12179,0,184,94,1.0,7000,0,0,0,1,34,16.0,437.5,112000.0,27.76465,26.0,13.0


### 全選手出力

In [28]:
all_player.to_feather(OUT_ALLPLAYER)