## ボールカウントの2017年データの集計
#### ball_2017_3.f
- 特徴量名変更

#### ball_2017_2.f
- 右左で集計を分ける

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import feather
pd.set_option('display.max_Columns', 100)

In [2]:
train_pitch = pd.read_feather('data/train_pitch.f')
print(train_pitch.shape)

(257117, 51)


In [3]:
OUTPUT = 'intermediate/ball_2017_3.f'

In [4]:
train_pitch.rename(columns={'球種': 'ball', '投球位置区域': 'course'}, inplace=True)
train_pitch['ball_cnt'] = train_pitch['プレイ前ストライク数'].astype(str) + '-' + train_pitch['プレイ前ボール数'].astype(str)

In [5]:
train_pitch.replace('左', 'L', inplace=True)
train_pitch.replace('右', 'R', inplace=True)
train_pitch['pit_bat'] = train_pitch['投手投球左右'] + '_' + train_pitch['打者打席左右']

In [6]:
train_pitch[['ball','ball_cnt', 'pit_bat']].head(10)

Unnamed: 0,ball,ball_cnt,pit_bat
0,0,0-0,R_L
1,0,1-0,R_L
2,0,2-0,R_L
3,0,0-0,R_R
4,0,1-0,R_R
5,1,2-0,R_R
6,2,2-1,R_R
7,3,0-0,R_R
8,0,0-1,R_R
9,2,1-1,R_R


### 球種

In [7]:
train_ball_cnt = train_pitch[['ball', 'ball_cnt', 'pit_bat']].groupby(['ball_cnt', 'pit_bat', 'ball']).size()
train_ball_cnt = pd.DataFrame(train_ball_cnt).reset_index()
train_ball_cnt.rename(columns={0:'ball_sum'}, inplace=True)

In [8]:
ball_total = train_ball_cnt.groupby(['ball_cnt', 'pit_bat']).sum().reset_index()
ball_total.rename(columns={'ball_sum':'total'}, inplace=True)
train_ball_cnt = train_ball_cnt.merge(ball_total[['ball_cnt', 'pit_bat', 'total']], on=['ball_cnt', 'pit_bat'], how='left')
train_ball_cnt['rate'] = train_ball_cnt['ball_sum'] / train_ball_cnt['total']

In [9]:
train_ball_pivot = pd.pivot_table(train_ball_cnt[['ball_cnt', 'pit_bat', 'ball', 'rate']], index=['ball_cnt', 'pit_bat'], columns='ball', values='rate').reset_index()
train_ball_pivot.rename(columns={
    0: 'bc_straight', 
    1: 'bc_curve', 
    2: 'bc_slider', 
    3: 'bc_shoot', 
    4: 'bc_fork', 
    5: 'bc_changeup', 
    6: 'bc_sinker', 
    7: 'bc_cutball'
}, inplace=True)

In [10]:
train_ball_pivot.fillna(0, inplace=True)

In [11]:
train_ball_pivot.shape

(48, 10)

### コース

In [12]:
train_course = train_pitch[['course', 'ball_cnt', 'pit_bat']].groupby(['ball_cnt', 'pit_bat', 'course']).size()
train_course = pd.DataFrame(train_course).reset_index()
train_course.rename(columns={0:'course_sum'}, inplace=True)

In [13]:
course_total = train_course.groupby(['ball_cnt', 'pit_bat']).sum().reset_index()
course_total.rename(columns={'course_sum':'total'}, inplace=True)
train_course = train_course.merge(course_total[['ball_cnt', 'pit_bat', 'total']], on=['ball_cnt', 'pit_bat'], how='left')
train_course['rate'] = train_course['course_sum'] / train_course['total']

In [14]:
train_course_pivot = pd.pivot_table(train_course[['ball_cnt', 'pit_bat', 'course', 'rate']], index=['ball_cnt', 'pit_bat'], columns='course', values='rate').reset_index()
train_course_pivot.rename(columns={
    0: 'bc_course00', 
    1: 'bc_course01', 
    2: 'bc_course02', 
    3: 'bc_course03', 
    4: 'bc_course04', 
    5: 'bc_course05', 
    6: 'bc_course06', 
    7: 'bc_course07', 
    8: 'bc_course08', 
    9: 'bc_course09', 
    10: 'bc_course10', 
    11: 'bc_course11', 
    12: 'bc_course12'
}, inplace=True)

In [15]:
train_course_pivot.fillna(0, inplace=True)

In [16]:
train_course_pivot.shape

(48, 15)

### マージ

In [17]:
ball_cnt_all = train_ball_pivot.merge(train_course_pivot, on=['ball_cnt', 'pit_bat'], how='left')
print(ball_cnt_all.shape)
ball_cnt_all.head(10)

(48, 23)


Unnamed: 0,ball_cnt,pit_bat,bc_straight,bc_curve,bc_slider,bc_shoot,bc_fork,bc_changeup,bc_sinker,bc_cutball,bc_course00,bc_course01,bc_course02,bc_course03,bc_course04,bc_course05,bc_course06,bc_course07,bc_course08,bc_course09,bc_course10,bc_course11,bc_course12
0,0-0,L_L,0.531149,0.058586,0.296482,0.031311,0.015332,0.022272,0.005003,0.039864,0.066172,0.112976,0.113138,0.04745,0.06601,0.039057,0.029212,0.035991,0.022111,0.098128,0.10184,0.2245,0.043415
1,0-0,L_R,0.474875,0.080828,0.192947,0.067679,0.029661,0.112833,0.014474,0.026705,0.035572,0.059321,0.055448,0.047396,0.059525,0.054938,0.051982,0.07461,0.062073,0.072572,0.13913,0.163286,0.124146
2,0-0,R_L,0.464734,0.110359,0.151086,0.097383,0.067492,0.04321,0.012933,0.052803,0.064837,0.095199,0.082009,0.040512,0.063252,0.053916,0.030534,0.046508,0.038328,0.163762,0.057728,0.130872,0.132543
3,0-0,R_R,0.475961,0.101792,0.226048,0.066617,0.042131,0.018345,0.003343,0.065762,0.034319,0.043647,0.02709,0.054336,0.065374,0.045046,0.062653,0.097944,0.085507,0.09841,0.098527,0.055307,0.23184
4,0-1,L_L,0.529675,0.033333,0.298374,0.044715,0.017073,0.028049,0.004878,0.043902,0.06626,0.11748,0.123171,0.05122,0.058943,0.052846,0.037805,0.034959,0.029675,0.078455,0.090244,0.203252,0.055691
5,0-1,L_R,0.466085,0.046512,0.199855,0.089632,0.032703,0.109254,0.021076,0.034884,0.039002,0.062742,0.070736,0.053295,0.066618,0.05814,0.054506,0.079942,0.065649,0.057413,0.117975,0.163033,0.11095
6,0-1,R_L,0.463875,0.058087,0.174261,0.115661,0.061884,0.043001,0.017036,0.066195,0.073071,0.107143,0.086925,0.048543,0.069376,0.065271,0.031096,0.051211,0.044027,0.131979,0.049672,0.114224,0.127463
7,0-1,R_R,0.467251,0.071277,0.224427,0.089193,0.040262,0.022924,0.004816,0.07985,0.040358,0.049509,0.036024,0.057696,0.075901,0.057022,0.058467,0.093816,0.096706,0.088037,0.077153,0.059622,0.20969
8,0-2,L_L,0.644963,0.022113,0.216216,0.030713,0.012285,0.017199,0.003686,0.052826,0.066339,0.121622,0.137592,0.066339,0.074939,0.061425,0.045455,0.040541,0.013514,0.084767,0.08231,0.164619,0.040541
9,0-2,L_R,0.545265,0.023677,0.178969,0.091922,0.032033,0.077298,0.016713,0.034123,0.049443,0.049443,0.068942,0.050836,0.091922,0.076602,0.066852,0.087047,0.06337,0.053621,0.128134,0.119777,0.094011


In [18]:
ball_cnt_all.to_feather(OUTPUT)