## 球種 Tuning
### 22

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import lightgbm as lgb
import optuna.integration.lightgbm as lgb
from sklearn import datasets
from sklearn.model_selection import train_test_split
import gc
%matplotlib inline
pd.set_option('display.max_Columns', 100)

### 管理番号

In [2]:
model_No = 22
sample_No = 1

### sub-modelを使用するかどうか

In [3]:
use_sub_model = True

In [4]:
if use_sub_model:
    ALL_MERGE = 'intermediate/{}/all_merge_{}_{}_sub.f'.format(model_No, model_No, sample_No)
else:
    ALL_MERGE = 'intermediate/{}/all_merge_{}_{}.f'.format(model_No, model_No, sample_No)

In [5]:
all_pitch = pd.read_feather(ALL_MERGE)
print(all_pitch.shape)

(778767, 211)


In [6]:
if use_sub_model:
    all_pitch['predict_high_str'] = all_pitch['predict_0'] + all_pitch['predict_3'] + all_pitch['predict_6'] 
    all_pitch['predict_high_ball'] = all_pitch['predict_9'] + all_pitch['predict_10'] 
    all_pitch['predict_mid_str'] = all_pitch['predict_1'] + all_pitch['predict_4'] + all_pitch['predict_7'] 
    all_pitch['predict_low_str'] = all_pitch['predict_2'] + all_pitch['predict_5'] + all_pitch['predict_8'] 
    all_pitch['predict_low_ball'] = all_pitch['predict_11'] + all_pitch['predict_12'] 

    all_pitch['predict_left_str'] = all_pitch['predict_0'] + all_pitch['predict_1'] + all_pitch['predict_2'] 
    all_pitch['predict_left_ball'] = all_pitch['predict_9'] + all_pitch['predict_11'] 
    all_pitch['predict_center_str'] = all_pitch['predict_3'] + all_pitch['predict_4'] + all_pitch['predict_5'] 
    all_pitch['predict_right_str'] = all_pitch['predict_6'] + all_pitch['predict_7'] + all_pitch['predict_8'] 
    all_pitch['predict_right_ball'] = all_pitch['predict_10'] + all_pitch['predict_12'] 

In [7]:
if use_sub_model:
    all_pitch.drop(columns=[
        'predict_straight', 'predict_curve', 'predict_slider', 'predict_shoot',
        'predict_fork', 'predict_changeup', 'predict_sinker', 'predict_cutball',
        'predict_0','predict_1','predict_2','predict_3','predict_4','predict_5','predict_6',
        'predict_7','predict_8','predict_9','predict_10','predict_11','predict_12'
    ], inplace=True)

### train
- 行数: 257117

In [8]:
train = all_pitch.dropna(subset=['course'])
train.shape

(257117, 200)

#### 特徴量に加えた部分を抜く

In [9]:
if sample_No == 1:
    train = train.query('index > 60000')
elif sample_No == 2:
    train = train.query('index <= 60000 | index > 120000')
elif sample_No == 3:
    train = train.query('index <= 120000 | index > 180000')
elif sample_No == 4:
    train = train.query('index <= 180000')
train.shape

(197116, 200)

In [10]:
del all_pitch
gc.collect()

36

In [11]:
train_d = train.drop([
    'No', 
    'course', 
    'ball'
], axis=1)

## Dataset作成

In [12]:
X_train, X_test, y_train, y_test = train_test_split(train_d, train['ball'])
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

### LGBM学習
- multiclass : クラス数=8

In [13]:
lgb_param = {
        'objective' : 'multiclass',
        'boosting_type': 'gbdt',
        'metric' : 'multi_logloss',
        'num_class' : 8,
    }

In [14]:
%%time
best_params, tuning_history = dict(), list()
lgb_model = lgb.train(lgb_param, lgb_train,
                      valid_sets=lgb_eval,
                      verbose_eval=0,
                      best_params=best_params,
                      tuning_history=tuning_history)
print('Best Params:', best_params)
#print('Tuning history:', tuning_history)

tune_feature_fraction, val_score: 1.131747:  14%|#4        | 1/7 [01:11<07:07, 71.21s/it][I 2020-05-17 10:06:08,806] Finished trial#0 with value: 1.131746595006951 with parameters: {'feature_fraction': 0.4}. Best is trial#0 with value: 1.131746595006951.
tune_feature_fraction, val_score: 1.131747:  29%|##8       | 2/7 [02:30<06:08, 73.70s/it][I 2020-05-17 10:07:28,340] Finished trial#1 with value: 1.1341611160469944 with parameters: {'feature_fraction': 0.5}. Best is trial#0 with value: 1.131746595006951.
tune_feature_fraction, val_score: 1.131747:  43%|####2     | 3/7 [03:58<05:12, 78.02s/it][I 2020-05-17 10:08:56,439] Finished trial#2 with value: 1.1332460541280465 with parameters: {'feature_fraction': 0.6}. Best is trial#0 with value: 1.131746595006951.
tune_feature_fraction, val_score: 1.131747:  57%|#####7    | 4/7 [05:35<04:10, 83.59s/it][I 2020-05-17 10:10:33,033] Finished trial#3 with value: 1.135283338993464 with parameters: {'feature_fraction': 0.7}. Best is trial#0 with valu

tune_bagging_fraction_and_bagging_freq, val_score: 1.123011:  40%|####      | 4/10 [03:50<05:58, 59.74s/it][I 2020-05-17 10:57:09,717] Finished trial#3 with value: 1.1245295755820792 with parameters: {'bagging_fraction': 0.9937079196503578, 'bagging_freq': 4}. Best is trial#0 with value: 1.1230105925469775.
tune_bagging_fraction_and_bagging_freq, val_score: 1.123011:  50%|#####     | 5/10 [04:41<04:45, 57.02s/it][I 2020-05-17 10:58:00,391] Finished trial#4 with value: 1.1295090598544313 with parameters: {'bagging_fraction': 0.6027465786796519, 'bagging_freq': 5}. Best is trial#0 with value: 1.1230105925469775.
tune_bagging_fraction_and_bagging_freq, val_score: 1.123011:  60%|######    | 6/10 [05:38<03:47, 56.97s/it][I 2020-05-17 10:58:57,231] Finished trial#5 with value: 1.1250355900226068 with parameters: {'bagging_fraction': 0.7854957606014913, 'bagging_freq': 3}. Best is trial#0 with value: 1.1230105925469775.
tune_bagging_fraction_and_bagging_freq, val_score: 1.123011:  70%|#######

tune_lambda_l1_and_lambda_l2, val_score: 1.117613:  85%|########5 | 17/20 [19:21<03:34, 71.58s/it][I 2020-05-17 11:25:32,808] Finished trial#16 with value: 1.1181854150613566 with parameters: {'lambda_l1': 7.326469492873381, 'lambda_l2': 0.20234898032951368}. Best is trial#14 with value: 1.117613025406842.
tune_lambda_l1_and_lambda_l2, val_score: 1.117613:  90%|######### | 18/20 [20:34<02:23, 71.95s/it][I 2020-05-17 11:26:45,636] Finished trial#17 with value: 1.1178965833546513 with parameters: {'lambda_l1': 7.122031743239788, 'lambda_l2': 0.005927174827392446}. Best is trial#14 with value: 1.117613025406842.
tune_lambda_l1_and_lambda_l2, val_score: 1.117613:  95%|#########5| 19/20 [21:41<01:10, 70.48s/it][I 2020-05-17 11:27:52,674] Finished trial#18 with value: 1.1200197839546422 with parameters: {'lambda_l1': 0.5574780747055201, 'lambda_l2': 1.0604136580685575}. Best is trial#14 with value: 1.117613025406842.
tune_lambda_l1_and_lambda_l2, val_score: 1.117504: 100%|##########| 20/20 [

Best Params: {'lambda_l1': 9.399895836269046, 'lambda_l2': 1.0011449849417051e-05, 'num_leaves': 17, 'feature_fraction': 0.4, 'bagging_fraction': 0.9757689578049016, 'bagging_freq': 1, 'min_child_samples': 5}
CPU times: user 23h 57min 43s, sys: 6min 29s, total: 1d 4min 13s
Wall time: 1h 30min 23s


### Feature Importance

In [15]:
fi = lgb_model.feature_importance()
fn = lgb_model.feature_name()
df_feature_importance = pd.DataFrame({'feat_name':fn, 'feat_imp':fi})
df_feature_importance.sort_values('feat_imp', inplace=True)
df_feature_importance.tail(30)

Unnamed: 0,feat_name,feat_imp
140,batter_cnt,1236
101,bmi_pit,1259
181,rank_x_year_dif_b-c,1260
174,bmi_dif_p-c,1300
103,slider,1359
166,bmi_dif_p-b,1381
182,bmi_dif_b-c,1434
42,elapsed_min,1489
0,pitch_cnt_in_game,1490
47,elapsed_batter,1516


## 結果出力

### Tuning結果(22-1_sub)
`
Best Params: {
    'lambda_l1': 9.399895836269046, 
    'lambda_l2': 1.0011449849417051e-05, 
    'num_leaves': 17, 
    'feature_fraction': 0.4, 
    'bagging_fraction': 0.9757689578049016, 
    'bagging_freq': 1, 
    'min_child_samples': 5
}
`
Wall time: 1h 30min 23s

### Tuning結果(15)
`
Best Params: {
    'lambda_l1': 9.466286566446003, 
    'lambda_l2': 0.15842866429240474, 
    'num_leaves': 21, 
    'feature_fraction': 0.41600000000000004, 
    'bagging_fraction': 0.8473186677812694, 
    'bagging_freq': 7, 
    'min_child_samples': 100
}
`
Wall time: 2h 14min 40s

### Tuning結果(10)
`
Best Params: {
    'lambda_l1': 2.972300673782286, 
    'lambda_l2': 0.0002499813121238942, 
    'num_leaves': 15, 
    'feature_fraction': 0.4, 
    'bagging_fraction': 1.0, 
    'bagging_freq': 0, 
    'min_child_samples': 100
}
`
Wall time: 1h 54min 11s

### Tuning結果(5)
`
Best Params: {
    'lambda_l1': 6.9923570049658075, 
    'lambda_l2': 0.002378623984798833, 
    'num_leaves': 18, 
    'feature_fraction': 0.45199999999999996, 
    'bagging_fraction': 0.9799724836460725, 
    'bagging_freq': 4, 
    'min_child_samples': 20
}
`