## ボールカウントの2017年データの集計

In [48]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import feather
pd.set_option('display.max_Columns', 100)

In [49]:
train_pitch = pd.read_feather('data/train_pitch.f')
print(train_pitch.shape)

(257117, 51)


In [50]:
train_pitch.rename(columns={'球種': 'ball', '投球位置区域': 'pitch_area'}, inplace=True)
train_pitch['ball_cnt'] = train_pitch['プレイ前ストライク数'].astype(str) + '-' + train_pitch['プレイ前ボール数'].astype(str)

### 球種

In [51]:
train_ball_cnt = train_pitch[['ball','ball_cnt']].groupby(['ball_cnt', 'ball']).size()
train_ball_cnt = pd.DataFrame(train_ball_cnt).reset_index()
train_ball_cnt.rename(columns={0:'ball_sum'}, inplace=True)

In [52]:
ball_total = train_ball_cnt.groupby(['ball_cnt']).sum().reset_index()
ball_total.rename(columns={'ball_sum':'total'}, inplace=True)
train_ball_cnt = train_ball_cnt.merge(ball_total[['ball_cnt', 'total']], on='ball_cnt', how='left')
train_ball_cnt['rate'] = train_ball_cnt['ball_sum'] / train_ball_cnt['total']

In [53]:
train_ball_pivot = pd.pivot(train_ball_cnt[['ball_cnt', 'ball', 'rate']], index='ball_cnt', columns='ball', values='rate').reset_index()
train_ball_pivot.rename(columns={
    0: 'bc_straight', 
    1: 'bc_curve', 
    2: 'bc_slider', 
    3: 'bc_shoot', 
    4: 'bc_fork', 
    5: 'bc_changeup', 
    6: 'bc_sinker', 
    7: 'bc_cutball'
}, inplace=True)

In [54]:
train_ball_pivot.shape

(12, 9)

### コース

In [55]:
train_course = train_pitch[['pitch_area','ball_cnt']].groupby(['ball_cnt', 'pitch_area']).size()
train_course = pd.DataFrame(train_course).reset_index()
train_course.rename(columns={0:'course_sum'}, inplace=True)

In [56]:
course_total = train_course.groupby(['ball_cnt']).sum().reset_index()
course_total.rename(columns={'course_sum':'total'}, inplace=True)
train_course = train_course.merge(course_total[['ball_cnt', 'total']], on='ball_cnt', how='left')
train_course['rate'] = train_course['course_sum'] / train_course['total']

In [57]:
train_course_pivot = pd.pivot(train_course[['ball_cnt', 'pitch_area', 'rate']], index='ball_cnt', columns='pitch_area', values='rate').reset_index()
train_course_pivot.rename(columns={
    0: 'bc_area0', 
    1: 'bc_area1', 
    2: 'bc_area2', 
    3: 'bc_area3', 
    4: 'bc_area4', 
    5: 'bc_area5', 
    6: 'bc_area6', 
    7: 'bc_area7', 
    8: 'bc_area8', 
    9: 'bc_area9', 
    10: 'bc_area10', 
    11: 'bc_area11', 
    12: 'bc_area12'
}, inplace=True)

In [58]:
train_course_pivot.shape

(12, 14)

### マージ

In [59]:
ball_cnt_all = train_ball_pivot.merge(train_course_pivot, on='ball_cnt', how='left')
print(ball_cnt_all.shape)
ball_cnt_all

(12, 22)


Unnamed: 0,ball_cnt,bc_straight,bc_curve,bc_slider,bc_shoot,bc_fork,bc_changeup,bc_sinker,bc_cutball,bc_area0,bc_area1,bc_area2,bc_area3,bc_area4,bc_area5,bc_area6,bc_area7,bc_area8,bc_area9,bc_area10,bc_area11,bc_area12
0,0-0,0.477023,0.097592,0.20087,0.074454,0.046799,0.041882,0.008619,0.05276,0.048489,0.071105,0.059259,0.047675,0.063792,0.04915,0.046338,0.070075,0.059013,0.117934,0.090325,0.1148,0.162045
1,0-1,0.471588,0.059145,0.209141,0.094819,0.044845,0.044059,0.011792,0.06461,0.054466,0.078835,0.067979,0.053081,0.070525,0.059819,0.045968,0.070712,0.066519,0.09845,0.074643,0.108744,0.150258
2,0-2,0.550569,0.033485,0.176196,0.097608,0.033371,0.03303,0.009453,0.066287,0.062642,0.079727,0.066515,0.057062,0.08246,0.066401,0.048064,0.072893,0.059681,0.111503,0.073576,0.092141,0.127335
3,0-3,0.836213,0.005874,0.061161,0.048721,0.010021,0.007602,0.002764,0.027643,0.067381,0.071873,0.053905,0.070145,0.081548,0.064271,0.04803,0.063234,0.057015,0.132688,0.101935,0.08293,0.105045
4,1-0,0.459832,0.090366,0.161893,0.068313,0.099045,0.061079,0.012505,0.046967,0.035008,0.05645,0.047706,0.035812,0.044074,0.034687,0.037484,0.058379,0.050407,0.112451,0.103867,0.1617,0.221976
5,1-1,0.416055,0.083327,0.191329,0.081656,0.092485,0.066175,0.012392,0.056581,0.047969,0.069191,0.059779,0.044226,0.062396,0.050004,0.043753,0.070281,0.059852,0.102297,0.073952,0.134203,0.182099
6,1-2,0.453179,0.053719,0.203854,0.09627,0.061747,0.053427,0.013722,0.064083,0.054084,0.079556,0.063207,0.054959,0.076564,0.055835,0.048172,0.074739,0.061528,0.100066,0.066419,0.112693,0.152179
7,1-3,0.572314,0.028634,0.152836,0.100857,0.037388,0.036841,0.010213,0.060916,0.055809,0.092832,0.062557,0.067846,0.084443,0.067481,0.047602,0.0766,0.060733,0.105599,0.070947,0.082619,0.124932
8,2-0,0.46973,0.066062,0.159395,0.031361,0.16294,0.062722,0.014112,0.033679,0.021884,0.035656,0.030884,0.020316,0.021748,0.021816,0.028429,0.036678,0.02952,0.133761,0.13117,0.201595,0.286542
9,2-1,0.431787,0.070655,0.177804,0.043295,0.149767,0.072798,0.013116,0.040777,0.033637,0.049609,0.046415,0.033449,0.039462,0.034426,0.038372,0.052353,0.047354,0.100646,0.088996,0.181412,0.253871


In [60]:
ball_cnt_all.to_feather('intermediate/ball_2017_1.f')