## 球種 Tuning

In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import lightgbm as lgb
import optuna.integration.lightgbm as lgb
from sklearn import datasets
from sklearn.model_selection import train_test_split
import gc
%matplotlib inline
pd.set_option('display.max_Columns', 100)

### 管理番号

In [32]:
model_No = 32
sample_No = 1

### sub-modelを使用するかどうか

In [33]:
use_sub_model = False

In [34]:
if use_sub_model:
    ALL_MERGE = 'intermediate/{}/all_merge_{}_{}_sub.f'.format(model_No, model_No, sample_No)
else:
    ALL_MERGE = 'intermediate/{}/all_merge_{}_{}.f'.format(model_No, model_No, sample_No)

In [35]:
all_pitch = pd.read_feather(ALL_MERGE)
print(all_pitch.shape)

(778767, 584)


In [36]:
if use_sub_model:
    all_pitch['predict_high_str'] = all_pitch['predict_0'] + all_pitch['predict_3'] + all_pitch['predict_6'] 
    all_pitch['predict_high_ball'] = all_pitch['predict_9'] + all_pitch['predict_10'] 
    all_pitch['predict_mid_str'] = all_pitch['predict_1'] + all_pitch['predict_4'] + all_pitch['predict_7'] 
    all_pitch['predict_low_str'] = all_pitch['predict_2'] + all_pitch['predict_5'] + all_pitch['predict_8'] 
    all_pitch['predict_low_ball'] = all_pitch['predict_11'] + all_pitch['predict_12'] 

    all_pitch['predict_left_str'] = all_pitch['predict_0'] + all_pitch['predict_1'] + all_pitch['predict_2'] 
    all_pitch['predict_left_ball'] = all_pitch['predict_9'] + all_pitch['predict_11'] 
    all_pitch['predict_center_str'] = all_pitch['predict_3'] + all_pitch['predict_4'] + all_pitch['predict_5'] 
    all_pitch['predict_right_str'] = all_pitch['predict_6'] + all_pitch['predict_7'] + all_pitch['predict_8'] 
    all_pitch['predict_right_ball'] = all_pitch['predict_10'] + all_pitch['predict_12'] 

In [37]:
if use_sub_model:
    all_pitch.drop(columns=[
        'predict_straight', 'predict_curve', 'predict_slider', 'predict_shoot',
        'predict_fork', 'predict_changeup', 'predict_sinker', 'predict_cutball',
        'predict_0','predict_1','predict_2','predict_3','predict_4','predict_5','predict_6',
        'predict_7','predict_8','predict_9','predict_10','predict_11','predict_12'
    ], inplace=True)

### train
- 行数: 257117

In [38]:
train = all_pitch.dropna(subset=['course'])
train.shape

(257117, 584)

#### 特徴量に加えた部分を抜く

In [39]:
if sample_No == 1:
    train = train.query('index > 60000')
elif sample_No == 2:
    train = train.query('index <= 60000 | index > 120000')
elif sample_No == 3:
    train = train.query('index <= 120000 | index > 180000')
elif sample_No == 4:
    train = train.query('index <= 180000')
train.shape

(197116, 584)

In [40]:
del all_pitch
gc.collect()

35

In [41]:
train_d = train.drop([
    'No', 
    'course', 
    'ball'
], axis=1)

## Dataset作成

In [42]:
X_train, X_test, y_train, y_test = train_test_split(train_d, train['ball'])
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

### LGBM学習
- multiclass : クラス数=8

In [43]:
lgb_param = {
        'objective' : 'multiclass',
        'boosting_type': 'gbdt',
        'metric' : 'multi_logloss',
        'num_class' : 8,
    }

In [44]:
%%time
best_params, tuning_history = dict(), list()
lgb_model = lgb.train(lgb_param, lgb_train,
                      valid_sets=lgb_eval,
                      verbose_eval=0,
                      best_params=best_params,
                      tuning_history=tuning_history)
print('Best Params:', best_params)
#print('Tuning history:', tuning_history)

tune_feature_fraction, val_score: 1.139761:  14%|#4        | 1/7 [02:15<13:33, 135.60s/it][I 2020-06-21 09:16:53,242] Finished trial#0 with value: 1.1397611050271996 with parameters: {'feature_fraction': 0.4}. Best is trial#0 with value: 1.1397611050271996.
tune_feature_fraction, val_score: 1.139128:  29%|##8       | 2/7 [04:49<11:45, 141.01s/it][I 2020-06-21 09:19:26,859] Finished trial#1 with value: 1.1391280654393483 with parameters: {'feature_fraction': 0.5}. Best is trial#1 with value: 1.1391280654393483.
tune_feature_fraction, val_score: 1.139128:  43%|####2     | 3/7 [07:48<10:09, 152.47s/it][I 2020-06-21 09:22:26,077] Finished trial#2 with value: 1.1402896332723012 with parameters: {'feature_fraction': 0.6}. Best is trial#1 with value: 1.1391280654393483.
tune_feature_fraction, val_score: 1.139128:  57%|#####7    | 4/7 [11:14<08:25, 168.49s/it][I 2020-06-21 09:25:51,960] Finished trial#3 with value: 1.143266493678482 with parameters: {'feature_fraction': 0.7}. Best is trial#1 w

tune_bagging_fraction_and_bagging_freq, val_score: 1.124015:  40%|####      | 4/10 [06:48<10:05, 100.84s/it][I 2020-06-21 11:16:25,749] Finished trial#3 with value: 1.1309744532773713 with parameters: {'bagging_fraction': 0.5473483567893099, 'bagging_freq': 5}. Best is trial#2 with value: 1.124014521398484.
tune_bagging_fraction_and_bagging_freq, val_score: 1.124015:  50%|#####     | 5/10 [08:39<08:40, 104.00s/it][I 2020-06-21 11:18:17,114] Finished trial#4 with value: 1.1244812222265743 with parameters: {'bagging_fraction': 0.8138455075479958, 'bagging_freq': 1}. Best is trial#2 with value: 1.124014521398484.
tune_bagging_fraction_and_bagging_freq, val_score: 1.124015:  60%|######    | 6/10 [10:04<06:32, 98.14s/it] [I 2020-06-21 11:19:41,568] Finished trial#5 with value: 1.1339637690748379 with parameters: {'bagging_fraction': 0.4928683025726941, 'bagging_freq': 7}. Best is trial#2 with value: 1.124014521398484.
tune_bagging_fraction_and_bagging_freq, val_score: 1.124015:  70%|#######

tune_lambda_l1_and_lambda_l2, val_score: 1.115048:  70%|#######   | 14/20 [25:48<11:23, 113.83s/it][I 2020-06-21 12:04:32,836] Finished trial#13 with value: 1.1185181036406735 with parameters: {'lambda_l1': 0.10121865800273966, 'lambda_l2': 6.700548741780498}. Best is trial#10 with value: 1.1150479325115839.
tune_lambda_l1_and_lambda_l2, val_score: 1.115048:  75%|#######5  | 15/20 [27:46<09:35, 115.10s/it][I 2020-06-21 12:06:30,870] Finished trial#14 with value: 1.115121913733998 with parameters: {'lambda_l1': 7.919344274760103, 'lambda_l2': 4.277910736757767e-06}. Best is trial#10 with value: 1.1150479325115839.
tune_lambda_l1_and_lambda_l2, val_score: 1.115048:  80%|########  | 16/20 [29:31<07:28, 112.19s/it][I 2020-06-21 12:08:16,292] Finished trial#15 with value: 1.1232369697532394 with parameters: {'lambda_l1': 0.012728563920094173, 'lambda_l2': 2.638675959220353e-06}. Best is trial#10 with value: 1.1150479325115839.
tune_lambda_l1_and_lambda_l2, val_score: 1.115048:  85%|########

Best Params: {'lambda_l1': 7.8708983359773494, 'lambda_l2': 7.364400432535952, 'num_leaves': 15, 'feature_fraction': 0.42, 'bagging_fraction': 0.9318179467445713, 'bagging_freq': 1, 'min_child_samples': 20}
CPU times: user 3d 3h 58min 29s, sys: 19min 18s, total: 3d 4h 17min 47s
Wall time: 3h 10min 54s


### Feature Importance

In [45]:
fi = lgb_model.feature_importance()
fn = lgb_model.feature_name()
df_feature_importance = pd.DataFrame({'feat_name':fn, 'feat_imp':fi})
df_feature_importance.sort_values('feat_imp', inplace=True)
df_feature_importance.tail(30)

Unnamed: 0,feat_name,feat_imp
84,bmi_pit,550
102,rank_x_year_bat,572
177,salary_x_year_dif_b-c,590
178,rank_year_dif_b-c,603
107,bat_game_cnt,606
5,player_cnt_in_game,629
163,rank_x_year_dif_p-b,641
103,salary_year_bat,642
176,salary_year_dif_b-c,647
179,rank_x_year_dif_b-c,648


## 結果出力

### gbdt (32)
`
Best Params: {
    'lambda_l1': 7.8708983359773494, 
    'lambda_l2': 7.364400432535952, 
    'num_leaves': 15, 
    'feature_fraction': 0.42, 
    'bagging_fraction': 0.9318179467445713, 
    'bagging_freq': 1, 
    'min_child_samples': 20
}
Wall time: 3h 10min 54s
`

### dart (32)
`
Best Params: {
    'lambda_l1': 3.2650173236383515, 
    'lambda_l2': 0.0006692176426537234, 
    'num_leaves': 39, 
    'feature_fraction': 0.552, 
    'bagging_fraction': 1.0, 
    'bagging_freq': 0, 
    'min_child_samples': 50
}
`
Wall time: 10h 33min 9s

### Tuning結果(22-1_sub)
`
Best Params: {
    'lambda_l1': 9.399895836269046, 
    'lambda_l2': 1.0011449849417051e-05, 
    'num_leaves': 17, 
    'feature_fraction': 0.4, 
    'bagging_fraction': 0.9757689578049016, 
    'bagging_freq': 1, 
    'min_child_samples': 5
}
`
Wall time: 1h 30min 23s

### Tuning結果(15)
`
Best Params: {
    'lambda_l1': 9.466286566446003, 
    'lambda_l2': 0.15842866429240474, 
    'num_leaves': 21, 
    'feature_fraction': 0.41600000000000004, 
    'bagging_fraction': 0.8473186677812694, 
    'bagging_freq': 7, 
    'min_child_samples': 100
}
`
Wall time: 2h 14min 40s

### Tuning結果(10)
`
Best Params: {
    'lambda_l1': 2.972300673782286, 
    'lambda_l2': 0.0002499813121238942, 
    'num_leaves': 15, 
    'feature_fraction': 0.4, 
    'bagging_fraction': 1.0, 
    'bagging_freq': 0, 
    'min_child_samples': 100
}
`
Wall time: 1h 54min 11s

### Tuning結果(5)
`
Best Params: {
    'lambda_l1': 6.9923570049658075, 
    'lambda_l2': 0.002378623984798833, 
    'num_leaves': 18, 
    'feature_fraction': 0.45199999999999996, 
    'bagging_fraction': 0.9799724836460725, 
    'bagging_freq': 4, 
    'min_child_samples': 20
}
`