## ボールカウントの2017年データの集計
#### ball_2017_5.f
- ストレートに対する比率、コースの種類のみ

#### ball_2017_4.f
- コースの種類

#### ball_2017_3.f
- 特徴量名変更

#### ball_2017_2.f
- 右左で集計を分ける

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import feather
pd.set_option('display.max_Columns', 100)

In [2]:
train_pitch = pd.read_feather('data/train_pitch.f')
print(train_pitch.shape)

(257117, 51)


In [3]:
OUTPUT = 'intermediate/pitch/pitch_2017_5.f'

In [4]:
train_pitch.rename(columns={'球種': 'ball', '投球位置区域': 'course'}, inplace=True)
train_pitch['ball_cnt'] = train_pitch['プレイ前ストライク数'].astype(str) + '-' + train_pitch['プレイ前ボール数'].astype(str)

In [5]:
train_pitch.replace('左', 'L', inplace=True)
train_pitch.replace('右', 'R', inplace=True)
train_pitch['pit_bat'] = train_pitch['投手投球左右'] + '_' + train_pitch['打者打席左右']

In [6]:
train_pitch[['ball','ball_cnt', 'pit_bat']].head(10)

Unnamed: 0,ball,ball_cnt,pit_bat
0,0,0-0,R_L
1,0,1-0,R_L
2,0,2-0,R_L
3,0,0-0,R_R
4,0,1-0,R_R
5,1,2-0,R_R
6,2,2-1,R_R
7,3,0-0,R_R
8,0,0-1,R_R
9,2,1-1,R_R


### 球種

In [7]:
train_ball_cnt = train_pitch[['ball', 'ball_cnt', 'pit_bat']].groupby(['ball_cnt', 'pit_bat', 'ball']).size()
train_ball_cnt = pd.DataFrame(train_ball_cnt).reset_index()
train_ball_cnt.rename(columns={0:'ball_sum'}, inplace=True)

In [8]:
ball_total = train_ball_cnt.groupby(['ball_cnt', 'pit_bat']).sum().reset_index()
ball_total.rename(columns={'ball_sum':'total'}, inplace=True)
train_ball_cnt = train_ball_cnt.merge(ball_total[['ball_cnt', 'pit_bat', 'total']], on=['ball_cnt', 'pit_bat'], how='left')
train_ball_cnt['rate'] = train_ball_cnt['ball_sum'] / train_ball_cnt['total']

In [9]:
train_ball_pivot = pd.pivot_table(train_ball_cnt[['ball_cnt', 'pit_bat', 'ball', 'rate']], index=['ball_cnt', 'pit_bat'], columns='ball', values='rate').reset_index()
train_ball_pivot.rename(columns={
    0: 'bc_straight', 
    1: 'bc_curve', 
    2: 'bc_slider', 
    3: 'bc_shoot', 
    4: 'bc_fork', 
    5: 'bc_changeup', 
    6: 'bc_sinker', 
    7: 'bc_cutball'
}, inplace=True)

In [10]:
train_ball_pivot.fillna(0, inplace=True)

In [11]:
train_ball_pivot.head()

ball,ball_cnt,pit_bat,bc_straight,bc_curve,bc_slider,bc_shoot,bc_fork,bc_changeup,bc_sinker,bc_cutball
0,0-0,L_L,0.531149,0.058586,0.296482,0.031311,0.015332,0.022272,0.005003,0.039864
1,0-0,L_R,0.474875,0.080828,0.192947,0.067679,0.029661,0.112833,0.014474,0.026705
2,0-0,R_L,0.464734,0.110359,0.151086,0.097383,0.067492,0.04321,0.012933,0.052803
3,0-0,R_R,0.475961,0.101792,0.226048,0.066617,0.042131,0.018345,0.003343,0.065762
4,0-1,L_L,0.529675,0.033333,0.298374,0.044715,0.017073,0.028049,0.004878,0.043902


In [11]:
train_ball_pivot['bc_curve'] = train_ball_pivot['bc_curve'] / train_ball_pivot['bc_straight'] 
train_ball_pivot['bc_slider'] = train_ball_pivot['bc_slider'] / train_ball_pivot['bc_straight'] 
train_ball_pivot['bc_shoot'] = train_ball_pivot['bc_shoot'] / train_ball_pivot['bc_straight'] 
train_ball_pivot['bc_fork'] = train_ball_pivot['bc_fork'] / train_ball_pivot['bc_straight'] 
train_ball_pivot['bc_changeup'] = train_ball_pivot['bc_changeup'] / train_ball_pivot['bc_straight'] 
train_ball_pivot['bc_sinker'] = train_ball_pivot['bc_sinker'] / train_ball_pivot['bc_straight'] 
train_ball_pivot['bc_cutball'] = train_ball_pivot['bc_cutball'] / train_ball_pivot['bc_straight'] 

In [12]:
train_ball_pivot.drop(columns=['bc_straight'], inplace=True)

In [13]:
train_ball_pivot.shape

(48, 9)

### コース

In [14]:
train_course = train_pitch[['course', 'ball_cnt', 'pit_bat']].groupby(['ball_cnt', 'pit_bat', 'course']).size()
train_course = pd.DataFrame(train_course).reset_index()
train_course.rename(columns={0:'course_sum'}, inplace=True)

In [15]:
course_total = train_course.groupby(['ball_cnt', 'pit_bat']).sum().reset_index()
course_total.rename(columns={'course_sum':'total'}, inplace=True)
train_course = train_course.merge(course_total[['ball_cnt', 'pit_bat', 'total']], on=['ball_cnt', 'pit_bat'], how='left')
train_course['rate'] = train_course['course_sum'] / train_course['total']

In [16]:
train_course_pivot = pd.pivot_table(train_course[['ball_cnt', 'pit_bat', 'course', 'rate']], index=['ball_cnt', 'pit_bat'], columns='course', values='rate').reset_index()
train_course_pivot.rename(columns={
    0: 'bc_course00', 
    1: 'bc_course01', 
    2: 'bc_course02', 
    3: 'bc_course03', 
    4: 'bc_course04', 
    5: 'bc_course05', 
    6: 'bc_course06', 
    7: 'bc_course07', 
    8: 'bc_course08', 
    9: 'bc_course09', 
    10: 'bc_course10', 
    11: 'bc_course11', 
    12: 'bc_course12'
}, inplace=True)

In [17]:
train_course_pivot.fillna(0, inplace=True)

### コースの種類

In [18]:
train_course_pivot['bc_high_str'] = train_course_pivot['bc_course00'] + train_course_pivot['bc_course03'] + train_course_pivot['bc_course06'] 
train_course_pivot['bc_high_ball'] = train_course_pivot['bc_course09'] + train_course_pivot['bc_course10'] 
train_course_pivot['bc_mid_str'] = train_course_pivot['bc_course01'] + train_course_pivot['bc_course04'] + train_course_pivot['bc_course07'] 
train_course_pivot['bc_low_str'] = train_course_pivot['bc_course02'] + train_course_pivot['bc_course05'] + train_course_pivot['bc_course08'] 
train_course_pivot['bc_low_ball'] = train_course_pivot['bc_course11'] + train_course_pivot['bc_course12'] 

In [19]:
train_course_pivot['bc_left_str'] = train_course_pivot['bc_course00'] + train_course_pivot['bc_course01'] + train_course_pivot['bc_course02'] 
train_course_pivot['bc_left_ball'] = train_course_pivot['bc_course09'] + train_course_pivot['bc_course11'] 
train_course_pivot['bc_center_str'] = train_course_pivot['bc_course03'] + train_course_pivot['bc_course04'] + train_course_pivot['bc_course05'] 
train_course_pivot['bc_right_str'] = train_course_pivot['bc_course06'] + train_course_pivot['bc_course07'] + train_course_pivot['bc_course08'] 
train_course_pivot['bc_right_ball'] = train_course_pivot['bc_course10'] + train_course_pivot['bc_course12'] 

In [20]:
train_course_pivot.drop(columns=[
    'bc_course00', 'bc_course01', 'bc_course02', 'bc_course03', 'bc_course04', 'bc_course05', 
    'bc_course06', 'bc_course07', 'bc_course08', 'bc_course09', 'bc_course10', 'bc_course11', 'bc_course12'], inplace=True)

In [21]:
train_course_pivot.shape

(48, 12)

### マージ

In [22]:
ball_cnt_all = train_ball_pivot.merge(train_course_pivot, on=['ball_cnt', 'pit_bat'], how='left')
print(ball_cnt_all.shape)
ball_cnt_all.head(10)

(48, 19)


Unnamed: 0,ball_cnt,pit_bat,bc_curve,bc_slider,bc_shoot,bc_fork,bc_changeup,bc_sinker,bc_cutball,bc_high_str,bc_high_ball,bc_mid_str,bc_low_str,bc_low_ball,bc_left_str,bc_left_ball,bc_center_str,bc_right_str,bc_right_ball
0,0-0,L_L,0.110301,0.558189,0.058949,0.028867,0.041933,0.00942,0.075053,0.142834,0.199968,0.214977,0.174306,0.267915,0.292285,0.322628,0.152518,0.087314,0.145255
1,0-0,L_R,0.170208,0.40631,0.14252,0.06246,0.237605,0.030479,0.056235,0.134951,0.211701,0.193456,0.172459,0.287432,0.150341,0.235858,0.161859,0.188666,0.263276
2,0-0,R_L,0.237468,0.325101,0.209547,0.145227,0.092978,0.027829,0.11362,0.135883,0.221489,0.204959,0.174254,0.263415,0.242045,0.294634,0.157681,0.11537,0.19027
3,0-0,R_R,0.213866,0.474931,0.139964,0.088519,0.038543,0.007023,0.138168,0.151308,0.196937,0.206965,0.157643,0.287147,0.105057,0.153718,0.164756,0.246104,0.330367
4,0-1,L_L,0.062932,0.563315,0.084421,0.032233,0.052955,0.00921,0.082886,0.155285,0.168699,0.211382,0.205691,0.258943,0.306911,0.281707,0.163008,0.102439,0.145935
5,0-1,L_R,0.099792,0.428794,0.192308,0.070166,0.234407,0.045218,0.074844,0.146802,0.175388,0.209302,0.194525,0.273983,0.172481,0.220446,0.178052,0.200097,0.228924
6,0-1,R_L,0.125221,0.375664,0.249336,0.133407,0.092699,0.036726,0.142699,0.152709,0.18165,0.22773,0.196223,0.241687,0.267139,0.246203,0.18319,0.126334,0.177135
7,0-1,R_R,0.152546,0.480313,0.190888,0.086168,0.049062,0.010307,0.170893,0.156521,0.16519,0.219226,0.189751,0.269312,0.125891,0.147659,0.190618,0.248989,0.286843
8,0-2,L_L,0.034286,0.335238,0.047619,0.019048,0.026667,0.005714,0.081905,0.178133,0.167076,0.237101,0.212531,0.20516,0.325553,0.249386,0.202703,0.099509,0.12285
9,0-2,L_R,0.043423,0.328225,0.168582,0.058748,0.141762,0.030651,0.06258,0.167131,0.181755,0.228412,0.208914,0.213788,0.167827,0.173398,0.219359,0.21727,0.222145


In [23]:
ball_cnt_all.to_feather(OUTPUT)