## 球種 Tuning
### 22

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import lightgbm as lgb
import optuna.integration.lightgbm as lgb
from sklearn import datasets
from sklearn.model_selection import train_test_split
import gc
%matplotlib inline
pd.set_option('display.max_Columns', 100)

### 管理番号

In [2]:
model_No = 32
sample_No = 1

### sub-modelを使用するかどうか

In [3]:
use_sub_model = False

In [4]:
if use_sub_model:
    ALL_MERGE = 'intermediate/{}/all_merge_{}_{}_sub.f'.format(model_No, model_No, sample_No)
else:
    ALL_MERGE = 'intermediate/{}/all_merge_{}_{}.f'.format(model_No, model_No, sample_No)

In [5]:
all_pitch = pd.read_feather(ALL_MERGE)
print(all_pitch.shape)

(778767, 584)


In [6]:
if use_sub_model:
    all_pitch['predict_high_str'] = all_pitch['predict_0'] + all_pitch['predict_3'] + all_pitch['predict_6'] 
    all_pitch['predict_high_ball'] = all_pitch['predict_9'] + all_pitch['predict_10'] 
    all_pitch['predict_mid_str'] = all_pitch['predict_1'] + all_pitch['predict_4'] + all_pitch['predict_7'] 
    all_pitch['predict_low_str'] = all_pitch['predict_2'] + all_pitch['predict_5'] + all_pitch['predict_8'] 
    all_pitch['predict_low_ball'] = all_pitch['predict_11'] + all_pitch['predict_12'] 

    all_pitch['predict_left_str'] = all_pitch['predict_0'] + all_pitch['predict_1'] + all_pitch['predict_2'] 
    all_pitch['predict_left_ball'] = all_pitch['predict_9'] + all_pitch['predict_11'] 
    all_pitch['predict_center_str'] = all_pitch['predict_3'] + all_pitch['predict_4'] + all_pitch['predict_5'] 
    all_pitch['predict_right_str'] = all_pitch['predict_6'] + all_pitch['predict_7'] + all_pitch['predict_8'] 
    all_pitch['predict_right_ball'] = all_pitch['predict_10'] + all_pitch['predict_12'] 

In [7]:
if use_sub_model:
    all_pitch.drop(columns=[
        'predict_straight', 'predict_curve', 'predict_slider', 'predict_shoot',
        'predict_fork', 'predict_changeup', 'predict_sinker', 'predict_cutball',
        'predict_0','predict_1','predict_2','predict_3','predict_4','predict_5','predict_6',
        'predict_7','predict_8','predict_9','predict_10','predict_11','predict_12'
    ], inplace=True)

### train
- 行数: 257117

In [8]:
train = all_pitch.dropna(subset=['course'])
train.shape

(257117, 584)

#### 特徴量に加えた部分を抜く

In [9]:
if sample_No == 1:
    train = train.query('index > 60000')
elif sample_No == 2:
    train = train.query('index <= 60000 | index > 120000')
elif sample_No == 3:
    train = train.query('index <= 120000 | index > 180000')
elif sample_No == 4:
    train = train.query('index <= 180000')
train.shape

(197116, 584)

In [10]:
del all_pitch
gc.collect()

36

In [11]:
train_d = train.drop([
    'No', 
    'course', 
    'ball'
], axis=1)

## Dataset作成

In [12]:
X_train, X_test, y_train, y_test = train_test_split(train_d, train['ball'])
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

### LGBM学習
- multiclass : クラス数=8

In [13]:
lgb_param = {
        'objective' : 'multiclass',
        'boosting_type': 'dart',
        'metric' : 'multi_logloss',
        'num_class' : 8,
    }

In [14]:
%%time
best_params, tuning_history = dict(), list()
lgb_model = lgb.train(lgb_param, lgb_train,
                      valid_sets=lgb_eval,
                      verbose_eval=0,
                      best_params=best_params,
                      tuning_history=tuning_history)
print('Best Params:', best_params)
#print('Tuning history:', tuning_history)

tune_feature_fraction, val_score: 1.117044:  14%|#4        | 1/7 [06:41<40:07, 401.17s/it][I 2020-06-14 05:24:14,352] Finished trial#0 with value: 1.1170444925979564 with parameters: {'feature_fraction': 0.4}. Best is trial#0 with value: 1.1170444925979564.
tune_feature_fraction, val_score: 1.116399:  29%|##8       | 2/7 [13:41<33:55, 407.01s/it][I 2020-06-14 05:31:14,968] Finished trial#1 with value: 1.116399389871115 with parameters: {'feature_fraction': 0.5}. Best is trial#1 with value: 1.116399389871115.
tune_feature_fraction, val_score: 1.116088:  43%|####2     | 3/7 [21:21<28:10, 422.75s/it][I 2020-06-14 05:38:54,449] Finished trial#2 with value: 1.1160875344274357 with parameters: {'feature_fraction': 0.6}. Best is trial#2 with value: 1.1160875344274357.
tune_feature_fraction, val_score: 1.116088:  57%|#####7    | 4/7 [29:33<22:10, 443.62s/it][I 2020-06-14 05:47:06,786] Finished trial#3 with value: 1.1165650863949161 with parameters: {'feature_fraction': 0.7}. Best is trial#2 wi

tune_bagging_fraction_and_bagging_freq, val_score: 1.114345:  40%|####      | 4/10 [31:12<45:41, 456.93s/it][I 2020-06-14 10:39:53,118] Finished trial#3 with value: 1.1254766739204536 with parameters: {'bagging_fraction': 0.5337941003996957, 'bagging_freq': 5}. Best is trial#1 with value: 1.1150584480568844.
tune_bagging_fraction_and_bagging_freq, val_score: 1.114345:  50%|#####     | 5/10 [39:58<39:47, 477.53s/it][I 2020-06-14 10:48:38,725] Finished trial#4 with value: 1.1148413764278555 with parameters: {'bagging_fraction': 0.9457639797324203, 'bagging_freq': 4}. Best is trial#4 with value: 1.1148413764278555.
tune_bagging_fraction_and_bagging_freq, val_score: 1.114345:  60%|######    | 6/10 [47:58<31:53, 478.38s/it][I 2020-06-14 10:56:39,087] Finished trial#5 with value: 1.1180327959901366 with parameters: {'bagging_fraction': 0.7892361189890217, 'bagging_freq': 7}. Best is trial#4 with value: 1.1148413764278555.
tune_bagging_fraction_and_bagging_freq, val_score: 1.114345:  70%|####

tune_lambda_l1_and_lambda_l2, val_score: 1.111962:  70%|#######   | 14/20 [1:56:02<50:21, 503.64s/it][I 2020-06-14 14:16:29,661] Finished trial#13 with value: 1.114383001095492 with parameters: {'lambda_l1': 0.1933286978642118, 'lambda_l2': 1.1638715930833154e-08}. Best is trial#6 with value: 1.1119617891882005.
tune_lambda_l1_and_lambda_l2, val_score: 1.111962:  75%|#######5  | 15/20 [2:04:27<42:00, 504.20s/it][I 2020-06-14 14:24:55,148] Finished trial#14 with value: 1.1145594126792224 with parameters: {'lambda_l1': 0.015614098722588567, 'lambda_l2': 0.17647261373723178}. Best is trial#6 with value: 1.1119617891882005.
tune_lambda_l1_and_lambda_l2, val_score: 1.111962:  80%|########  | 16/20 [2:13:16<34:06, 511.58s/it][I 2020-06-14 14:33:43,953] Finished trial#15 with value: 1.1130407222015821 with parameters: {'lambda_l1': 1.3248133722226059, 'lambda_l2': 2.674749597318623e-05}. Best is trial#6 with value: 1.1119617891882005.
tune_lambda_l1_and_lambda_l2, val_score: 1.111962:  85%|##

Best Params: {'lambda_l1': 3.2650173236383515, 'lambda_l2': 0.0006692176426537234, 'num_leaves': 39, 'feature_fraction': 0.552, 'bagging_fraction': 1.0, 'bagging_freq': 0, 'min_child_samples': 50}
CPU times: user 10d 12h 24min 47s, sys: 43min 12s, total: 10d 13h 8min
Wall time: 10h 33min 9s


### Feature Importance

In [15]:
fi = lgb_model.feature_importance()
fn = lgb_model.feature_name()
df_feature_importance = pd.DataFrame({'feat_name':fn, 'feat_imp':fi})
df_feature_importance.sort_values('feat_imp', inplace=True)
df_feature_importance.tail(30)

Unnamed: 0,feat_name,feat_imp
90,height_bat,1581
200,mul_shoot,1643
82,salary_year_pit,1682
71,draft_order_pit,1709
106,batter_cnt,1732
77,age_pit,1796
180,bmi_dif_b-c,1802
70,weight_pit,1807
43,point_diff,1869
91,weight_bat,1925


## 結果出力

### dart (32)
`
Best Params: {
    'lambda_l1': 3.2650173236383515, 
    'lambda_l2': 0.0006692176426537234, 
    'num_leaves': 39, 
    'feature_fraction': 0.552, 
    'bagging_fraction': 1.0, 
    'bagging_freq': 0, 
    'min_child_samples': 50
}
`
Wall time: 10h 33min 9s

### Tuning結果(22-1_sub)
`
Best Params: {
    'lambda_l1': 9.399895836269046, 
    'lambda_l2': 1.0011449849417051e-05, 
    'num_leaves': 17, 
    'feature_fraction': 0.4, 
    'bagging_fraction': 0.9757689578049016, 
    'bagging_freq': 1, 
    'min_child_samples': 5
}
`
Wall time: 1h 30min 23s

### Tuning結果(15)
`
Best Params: {
    'lambda_l1': 9.466286566446003, 
    'lambda_l2': 0.15842866429240474, 
    'num_leaves': 21, 
    'feature_fraction': 0.41600000000000004, 
    'bagging_fraction': 0.8473186677812694, 
    'bagging_freq': 7, 
    'min_child_samples': 100
}
`
Wall time: 2h 14min 40s

### Tuning結果(10)
`
Best Params: {
    'lambda_l1': 2.972300673782286, 
    'lambda_l2': 0.0002499813121238942, 
    'num_leaves': 15, 
    'feature_fraction': 0.4, 
    'bagging_fraction': 1.0, 
    'bagging_freq': 0, 
    'min_child_samples': 100
}
`
Wall time: 1h 54min 11s

### Tuning結果(5)
`
Best Params: {
    'lambda_l1': 6.9923570049658075, 
    'lambda_l2': 0.002378623984798833, 
    'num_leaves': 18, 
    'feature_fraction': 0.45199999999999996, 
    'bagging_fraction': 0.9799724836460725, 
    'bagging_freq': 4, 
    'min_child_samples': 20
}
`