# 导入包

In [30]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from scipy.stats import skew
from scipy.stats import kurtosis
from scipy.stats import mode
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from sklearn.model_selection import StratifiedKFold
import warnings 
warnings.filterwarnings("ignore")

# 导入数据

In [2]:
nrows = None

df_train = pd.read_csv('../data/sensor_train.csv',sep=',',nrows=nrows)
df_test = pd.read_csv('../data/sensor_test.csv',sep=',',nrows=nrows)
df_submit = pd.read_csv('../data/提交结果示例.csv',sep=',',nrows=nrows)

# 合并数据

In [3]:
df_train['flag'] = 'train'
df_test['flag'] = 'test'
df_test['behavior_id'] = -1
df_train_test = pd.concat([df_train, df_test])

In [4]:
df_train_test = df_train_test.sort_values(['flag','fragment_id','time_point'])

In [5]:
df_train_test['acc_all'] = (df_train_test['acc_x'] ** 2 + df_train_test['acc_y'] ** 2 + df_train_test['acc_z'] ** 2) ** 0.5
df_train_test['acc_allg'] = (df_train_test['acc_xg'] ** 2 + df_train_test['acc_yg'] ** 2 + df_train_test['acc_zg'] ** 2) ** 0.5

# 查看字段和数据

In [6]:
df_train_test.columns

Index(['acc_x', 'acc_xg', 'acc_y', 'acc_yg', 'acc_z', 'acc_zg', 'behavior_id',
       'flag', 'fragment_id', 'time_point', 'acc_all', 'acc_allg'],
      dtype='object')

In [7]:
df_train_test.head()

Unnamed: 0,acc_x,acc_xg,acc_y,acc_yg,acc_z,acc_zg,behavior_id,flag,fragment_id,time_point,acc_all,acc_allg
0,0.2,0.2,1.0,5.3,0.6,9.2,-1,test,0,71,1.183216,10.619322
1,0.0,0.2,1.0,6.0,-0.7,8.2,-1,test,0,150,1.220656,10.162677
2,-0.2,-0.4,0.8,5.3,-2.4,7.6,-1,test,0,244,2.537716,9.274158
3,2.3,2.5,-0.4,4.5,-1.6,3.4,-1,test,0,326,2.830194,6.169279
4,-0.3,0.2,-1.4,3.5,3.3,12.4,-1,test,0,409,3.597221,12.886039


# 数据聚合

In [8]:
agg_func = lambda x: list(x)
map_agg_func = {    
    'time_point' : agg_func,
    
    'acc_all' : agg_func,
    'acc_allg' : agg_func,
    
    'acc_x' : agg_func,
    'acc_y' : agg_func,
    'acc_z' : agg_func,
    
    'acc_xg' : agg_func,
    'acc_yg' : agg_func,
    'acc_zg' : agg_func
}
df_train_test_list = df_train_test.groupby(['flag','fragment_id','behavior_id']).agg(map_agg_func).reset_index()

In [9]:
df_train_test_list.head()

Unnamed: 0,flag,fragment_id,behavior_id,time_point,acc_all,acc_allg,acc_x,acc_y,acc_z,acc_xg,acc_yg,acc_zg
0,test,0,-1,"[71, 150, 244, 326, 409, 495, 579, 665, 755, 8...","[1.1832159566199232, 1.2206555615733703, 2.537...","[10.619322012256713, 10.16267681273, 9.2741576...","[0.2, 0.0, -0.2, 2.3, -0.3, 0.3, -0.8, 0.1, -0...","[1.0, 1.0, 0.8, -0.4, -1.4, -1.1, 0.0, 0.6, 0....","[0.6, -0.7, -2.4, -1.6, 3.3, 3.2, -0.7, -0.1, ...","[0.2, 0.2, -0.4, 2.5, 0.2, 0.5, -0.3, 0.6, 0.0...","[5.3, 6.0, 5.3, 4.5, 3.5, 3.1, 4.1, 4.6, 4.6, ...","[9.2, 8.2, 7.6, 3.4, 12.4, 9.8, 8.5, 8.7, 8.1,..."
1,test,1,-1,"[151, 232, 318, 406, 493, 581, 661, 749, 833, ...","[0.1, 0.1, 0.0, 0.223606797749979, 0.316227766...","[9.629122493768579, 9.525229656024049, 9.56504...","[0.1, 0.0, 0.0, 0.2, 0.1, 0.0, 0.2, 0.1, 0.0, ...","[0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, -0.1, -0....","[0.0, 0.1, 0.0, 0.0, -0.3, 0.0, -0.1, 0.0, -0....","[-1.0, -0.9, -1.2, -0.8, -0.8, -1.0, -1.0, -0....","[4.6, 4.4, 4.6, 4.4, 4.4, 4.6, 4.5, 4.4, 4.6, ...","[8.4, 8.4, 8.3, 8.2, 8.2, 8.4, 8.3, 8.3, 8.6, ..."
2,test,2,-1,"[46, 135, 233, 315, 397, 483, 574, 659, 751, 8...","[0.1, 0.31622776601683794, 0.5, 0.583095189484...","[10.00299955013495, 9.7205966895042, 9.4403389...","[0.0, 0.3, 0.3, 0.3, 0.4, -0.1, 0.1, 0.4, -0.5...","[0.0, 0.0, 0.0, 0.3, 0.0, -0.1, 0.0, -0.2, 0.0...","[0.1, -0.1, -0.4, 0.4, 0.5, 0.0, 0.0, -0.2, -0...","[0.9, 1.2, 1.2, 1.3, 1.2, 0.9, 1.2, 1.4, 0.3, ...","[3.3, 3.2, 3.2, 3.2, 2.9, 2.8, 2.9, 2.9, 2.6, ...","[9.4, 9.1, 8.8, 9.6, 9.7, 9.4, 9.2, 9.1, 9.0, ..."
3,test,3,-1,"[91, 172, 264, 345, 436, 516, 618, 701, 782, 8...","[0.28284271247461906, 0.2, 0.0, 0.282842712474...","[9.642095207992918, 9.39095309327014, 9.404254...","[0.0, 0.0, 0.0, -0.2, 0.0, 0.0, 0.0, 0.0, 0.0,...","[-0.2, -0.2, 0.0, 0.0, 0.0, -0.1, 0.0, -0.1, 0...","[0.2, 0.0, 0.0, -0.2, -0.2, -0.3, 0.2, 0.1, -0...","[0.2, 0.3, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.2, ...","[5.8, 5.9, 5.8, 5.9, 5.9, 5.8, 5.9, 5.9, 5.7, ...","[7.7, 7.3, 7.4, 7.5, 7.3, 7.1, 7.5, 7.6, 7.3, ..."
4,test,4,-1,"[38, 112, 205, 282, 364, 451, 530, 617, 700, 7...","[0.7348469228349533, 0.31622776601683794, 1.03...","[10.431203190428226, 9.633794683301073, 8.9386...","[0.2, 0.1, -0.1, -0.4, 2.3, 0.7, -0.9, 0.4, -0...","[-0.1, 0.0, 0.5, 1.0, -0.3, -0.5, -0.5, -1.3, ...","[0.7, -0.3, -0.9, 0.3, 1.7, 0.3, -1.4, 1.4, -1...","[3.7, 3.4, 3.0, 2.3, 5.6, 3.3, 2.3, 3.2, 1.9, ...","[4.6, 5.0, 5.1, 6.1, 5.0, 4.9, 5.4, 4.7, 5.4, ...","[8.6, 7.5, 6.7, 8.3, 9.1, 8.4, 5.9, 8.8, 6.4, ..."


# 抽取特征

In [41]:
map_features_fun = {
    # 时域
    'time_sum' : lambda x:np.sum(x),
    'time_mean' : lambda x: np.mean(x),
    'time_std' : lambda x: np.std(x),
    'time_var' : lambda x: np.var(x),
    'time_max' : lambda x: np.max(x),
    'time_min' : lambda x: np.min(x),
    'time_median' : lambda x: np.median(x),
    'time_energy' : lambda x: np.sum(np.power(x,2)),
    'time_mad' : lambda x: np.mean(np.absolute(x - np.mean(x))),
    'time_percent_9' : lambda x: np.percentile(x, 0.9),
    'time_percent_75' : lambda x: np.percentile(x, 0.75),
    'time_percent_25' : lambda x: np.percentile(x, 0.25),
    'time_percent_1' : lambda x: np.percentile(x, 0.1),
    'time_percent_75_25' : lambda x: np.percentile(x,75)-np.percentile(x,25),
    'time_range' : lambda x:np.max(x)-np.min(x),
    'time_zcr': lambda x: (np.diff(np.sign(x))!= 0).sum(),
    'time_mcr' : lambda x: (np.diff(np.sign(x-np.mean(x)))!= 0).sum(),
    'time_minind' : lambda x: np.argmin(x),
    'time_maxind' : lambda x: np.argmax(x),
    'time_skew' : lambda x: skew(x),
    'time_kurtosis' : lambda x: kurtosis(x),
    'time_zero_big' : lambda x: np.sum(np.sign(x)>0),
    'time_zero_small' : lambda x: np.sum(np.sign(x)<0),
    'time_len' : lambda x: np.size(x),
    
    # 频域
    'fft_dc' : lambda x: np.abs(np.fft.fft(x))[0],
    'fft_mean' : lambda x: np.mean(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_var' : lambda x: np.var(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_std' : lambda x: np.std(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_sum' : lambda x: np.sum(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_entropy' : lambda x: -1.0 * np.sum(np.log2(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]/np.sum(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]))),
    'fft_energy' : lambda x: np.sum(np.power(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1],2)),
    'fft_skew' : lambda x: skew(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_kurtosis' : lambda x: kurtosis(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_max' : lambda x: np.max(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_min' : lambda x: np.min(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_maxind' : lambda x: np.argmax(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_minind' : lambda x: np.argmin(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1])
}

In [11]:
df_train_test_features = df_train_test_list[['flag','fragment_id','behavior_id']]
for col in ['acc_all','acc_allg','acc_x','acc_y','acc_z','acc_xg','acc_yg','acc_zg']:
    for f_name, f_fun in tqdm(map_features_fun.items()):
        df_train_test_features[col + '_' + f_name] = df_train_test_list[col].map(f_fun)

100%|██████████| 37/37 [00:36<00:00,  1.00it/s]
100%|██████████| 37/37 [00:37<00:00,  1.01s/it]
100%|██████████| 37/37 [00:36<00:00,  1.01it/s]
100%|██████████| 37/37 [00:37<00:00,  1.01s/it]
100%|██████████| 37/37 [00:37<00:00,  1.02s/it]
100%|██████████| 37/37 [00:37<00:00,  1.02s/it]
100%|██████████| 37/37 [00:43<00:00,  1.17s/it]
100%|██████████| 37/37 [00:38<00:00,  1.03s/it]


In [12]:
df_train_test_features.head()

Unnamed: 0,flag,fragment_id,behavior_id,acc_all_time_sum,acc_all_time_mean,acc_all_time_std,acc_all_time_var,acc_all_time_max,acc_all_time_min,acc_all_time_median,...,acc_zg_fft_std,acc_zg_fft_sum,acc_zg_fft_entropy,acc_zg_fft_energy,acc_zg_fft_skew,acc_zg_fft_kurtosis,acc_zg_fft_max,acc_zg_fft_min,acc_zg_fft_maxind,acc_zg_fft_minind
0,test,0,-1,109.889781,1.894651,1.01495,1.030124,4.505552,0.424264,1.746425,...,7.333558,413.917412,146.342468,7467.5,0.748835,-0.40344,33.239467,5.058632,23,10
1,test,1,-1,9.328892,0.169616,0.102484,0.010503,0.412311,0.0,0.2,...,0.473385,26.632236,134.339388,32.32,0.458802,-0.111296,2.047783,0.141423,11,22
2,test,2,-1,21.264674,0.379726,0.368036,0.135451,2.083267,0.0,0.263896,...,1.472473,94.856892,139.783971,382.06,-0.075707,-0.781631,6.476364,0.707192,15,18
3,test,3,-1,10.755163,0.188687,0.135766,0.018432,0.519615,0.0,0.2,...,0.519421,33.706355,138.592552,48.13,0.902622,1.222909,2.783088,0.263222,17,3
4,test,4,-1,76.585242,1.276421,0.615156,0.378417,2.875761,0.223607,1.185515,...,3.033211,198.376073,152.614007,1587.78,0.86819,1.960154,16.666859,0.852075,17,21


# 模型参数

In [42]:
params = {
          'application': 'multiclass',
          'num_class': 19,
          'boosting': 'gbdt',
          #'metric': ['multi_logloss','multi_error'],
          'num_leaves': 63,
          'learning_rate': 0.1,
          'bagging_fraction': 0.8,
          'feature_fraction': 0.7,
          'min_split_gain': 0.01,
          'min_child_samples': 120,
          'min_child_weight': 0.01,
          'lambda_l2': 0.05,
          'verbosity': -1,
          'data_random_seed': 2020
         }  

# 获取训练和测试数据

In [43]:
cols = [c for c in df_train_test_features.columns if c not in ['flag','fragment_id','behavior_id']]

In [44]:
# cols = ['acc_yg_time_max','acc_xg_time_energy','acc_yg_time_energy','acc_y_time_percent_75_25','acc_yg_time_range','acc_xg_fft_dc',\
#  'acc_allg_time_mean','acc_allg_time_sum','acc_y_fft_max','acc_yg_time_percent_9','acc_xg_time_max','acc_yg_time_median','acc_zg_time_mean',\
#  'acc_yg_fft_dc','acc_yg_time_sum','acc_allg_time_median','acc_zg_time_sum','acc_xg_time_percent_9','acc_yg_time_percent_75','acc_yg_time_mean',\
#  'acc_allg_time_energy','acc_yg_time_std','acc_all_time_median','acc_xg_time_percent_75_25','acc_xg_time_mean','acc_zg_time_energy',\
#  'acc_allg_time_percent_75_25','acc_x_time_percent_75_25','acc_all_time_sum','acc_y_fft_var','acc_x_time_zcr','acc_y_fft_maxind',\
#  'acc_xg_time_sum','acc_zg_time_median','acc_y_time_sum','acc_y_time_zcr','acc_zg_time_percent_9','acc_xg_time_zero_big',\
#  'acc_xg_time_median','acc_yg_time_zero_big','acc_zg_time_mcr','acc_yg_time_min','acc_y_fft_skew','acc_x_fft_max','acc_xg_time_zero_small',\
#  'acc_x_time_sum','acc_yg_time_percent_1','acc_zg_time_percent_1','acc_x_time_mad','acc_allg_fft_maxind','acc_yg_time_zcr','acc_all_time_percent_9','acc_zg_fft_dc']

In [33]:
X = df_train_test_features[df_train_test_features['flag']=='train'][cols].values
y = df_train_test_features[df_train_test_features['flag']=='train']['behavior_id'].values
X_test = df_train_test_features[df_train_test_features['flag']=='test'][cols].values

In [34]:
seed = 2020
folds = 5
kfold = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)

In [35]:
df_train_stacking = pd.DataFrame(np.zeros((X.shape[0],19)))
df_test_stacking = pd.DataFrame(np.zeros((X_test.shape[0],19)))

# 训练模型

In [36]:
for train_index, val_index in tqdm(kfold.split(X, y)):

    print('--------------- begin ---------------')
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]
    
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_val = lgb.Dataset(X_val, y_val)
    
    watchlist = [lgb_train, lgb_val]
    model = lgb.train(params,
                      train_set = lgb_train, 
                      num_boost_round = 5000,
                      valid_sets = watchlist,
                      verbose_eval = 30,
                      early_stopping_rounds = 80)
    
    
    X_val_predict = model.predict(X_val)
    X_test_predict = model.predict(X_test)
    
    df_train_stacking.loc[val_index,:] = X_val_predict
    df_test_stacking[:] += X_test_predict / folds

0it [00:00, ?it/s]

--------------- begin ---------------
Training until validation scores don't improve for 80 rounds
[30]	training's multi_logloss: 0.820034	valid_1's multi_logloss: 1.14641
[60]	training's multi_logloss: 0.402691	valid_1's multi_logloss: 0.887218
[90]	training's multi_logloss: 0.210557	valid_1's multi_logloss: 0.799803
[120]	training's multi_logloss: 0.107305	valid_1's multi_logloss: 0.765057
[150]	training's multi_logloss: 0.0545503	valid_1's multi_logloss: 0.753509
[180]	training's multi_logloss: 0.0280992	valid_1's multi_logloss: 0.758664
[210]	training's multi_logloss: 0.0151867	valid_1's multi_logloss: 0.772345
Early stopping, best iteration is:
[154]	training's multi_logloss: 0.0498137	valid_1's multi_logloss: 0.752277


1it [00:07,  7.81s/it]

--------------- begin ---------------
Training until validation scores don't improve for 80 rounds
[30]	training's multi_logloss: 0.821348	valid_1's multi_logloss: 1.15646
[60]	training's multi_logloss: 0.405651	valid_1's multi_logloss: 0.898301
[90]	training's multi_logloss: 0.211158	valid_1's multi_logloss: 0.809093
[120]	training's multi_logloss: 0.107917	valid_1's multi_logloss: 0.771659
[150]	training's multi_logloss: 0.0549982	valid_1's multi_logloss: 0.764
[180]	training's multi_logloss: 0.0285384	valid_1's multi_logloss: 0.771688
[210]	training's multi_logloss: 0.0155433	valid_1's multi_logloss: 0.785012
Early stopping, best iteration is:
[154]	training's multi_logloss: 0.0501906	valid_1's multi_logloss: 0.762774


2it [00:14,  7.55s/it]

--------------- begin ---------------
Training until validation scores don't improve for 80 rounds
[30]	training's multi_logloss: 0.826313	valid_1's multi_logloss: 1.11937
[60]	training's multi_logloss: 0.410896	valid_1's multi_logloss: 0.858196
[90]	training's multi_logloss: 0.213345	valid_1's multi_logloss: 0.767609
[120]	training's multi_logloss: 0.109414	valid_1's multi_logloss: 0.726163
[150]	training's multi_logloss: 0.0560842	valid_1's multi_logloss: 0.716018
[180]	training's multi_logloss: 0.0292661	valid_1's multi_logloss: 0.720736
[210]	training's multi_logloss: 0.0159187	valid_1's multi_logloss: 0.730494
Early stopping, best iteration is:
[157]	training's multi_logloss: 0.0478978	valid_1's multi_logloss: 0.714701


3it [00:21,  7.41s/it]

--------------- begin ---------------
Training until validation scores don't improve for 80 rounds
[30]	training's multi_logloss: 0.826319	valid_1's multi_logloss: 1.10649
[60]	training's multi_logloss: 0.407699	valid_1's multi_logloss: 0.843817
[90]	training's multi_logloss: 0.211226	valid_1's multi_logloss: 0.753986
[120]	training's multi_logloss: 0.10807	valid_1's multi_logloss: 0.718247
[150]	training's multi_logloss: 0.055062	valid_1's multi_logloss: 0.703977
[180]	training's multi_logloss: 0.0284493	valid_1's multi_logloss: 0.699156
[210]	training's multi_logloss: 0.0153408	valid_1's multi_logloss: 0.704487
[240]	training's multi_logloss: 0.00987334	valid_1's multi_logloss: 0.715411
Early stopping, best iteration is:
[175]	training's multi_logloss: 0.031564	valid_1's multi_logloss: 0.69778


4it [00:29,  7.54s/it]

--------------- begin ---------------
Training until validation scores don't improve for 80 rounds
[30]	training's multi_logloss: 0.82557	valid_1's multi_logloss: 1.11381
[60]	training's multi_logloss: 0.405821	valid_1's multi_logloss: 0.857218
[90]	training's multi_logloss: 0.208403	valid_1's multi_logloss: 0.760531
[120]	training's multi_logloss: 0.105595	valid_1's multi_logloss: 0.722221
[150]	training's multi_logloss: 0.0535625	valid_1's multi_logloss: 0.713287
[180]	training's multi_logloss: 0.0278624	valid_1's multi_logloss: 0.721058
[210]	training's multi_logloss: 0.0151284	valid_1's multi_logloss: 0.73398
Early stopping, best iteration is:
[143]	training's multi_logloss: 0.062867	valid_1's multi_logloss: 0.712599


5it [00:37,  7.42s/it]


# 查看结果

In [37]:
df_test_stacking.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,0.00033,0.000606,0.000134,0.00024,0.000484,0.000303,0.000317,0.000228,5.6e-05,0.000167,0.00038,0.001044,0.630674,0.321598,0.001968,0.00871,0.000195,0.000957,0.031611
1,0.076428,0.002349,6.2e-05,0.874932,0.001291,0.000271,0.039748,0.000293,3e-05,0.001053,0.000369,0.000725,7.6e-05,3.8e-05,0.000577,0.000152,4.1e-05,7e-05,0.001495
2,0.002588,0.378425,0.00081,0.107659,0.000592,0.007968,0.396513,0.001149,0.000807,0.01201,0.057861,0.024999,0.000499,0.000808,0.000109,0.006282,0.000182,0.000231,0.000508
3,0.13803,0.278533,0.00016,0.182032,0.000965,0.001181,0.357474,0.000904,0.000483,0.02715,0.000436,0.003688,0.000294,0.000747,0.000306,0.001448,0.000228,0.005061,0.000878
4,0.001229,0.000288,0.000111,0.00057,0.000375,0.000328,0.00022,0.000209,0.00026,9.5e-05,0.000142,0.000431,0.001368,0.086592,8.5e-05,0.003537,0.001659,0.0066,0.895901


# 模型得分和保存

In [38]:
def acc_combo(y, y_pred):
    # 数值ID与行为编码的对应关系
    mapping = {0: 'A_0', 1: 'A_1', 2: 'A_2', 3: 'A_3', 
        4: 'D_4', 5: 'A_5', 6: 'B_1',7: 'B_5', 
        8: 'B_2', 9: 'B_3', 10: 'B_0', 11: 'A_6', 
        12: 'C_1', 13: 'C_3', 14: 'C_0', 15: 'B_6', 
        16: 'C_2', 17: 'C_5', 18: 'C_6'}
    # 将行为ID转为编码
    code_y, code_y_pred = mapping[y], mapping[y_pred]
    if code_y == code_y_pred: #编码完全相同得分1.0
        return 1.0
    elif code_y.split("_")[0] == code_y_pred.split("_")[0]: #编码仅字母部分相同得分1.0/7
        return 1.0/7
    elif code_y.split("_")[1] == code_y_pred.split("_")[1]: #编码仅数字部分相同得分1.0/3
        return 1.0/3
    else:
        return 0.0

In [39]:
labels = np.argmax(df_test_stacking.values, axis=1)
pred_y = np.argmax(df_train_stacking.values, axis=1)


acc_scores = round(accuracy_score(y, pred_y), 5)
acc_combo_scores = round(sum(acc_combo(y_true, y_pred) for y_true, y_pred in zip(y, pred_y)) / len(list(y)),5)

print('--------')
print(' acc : ', acc_scores, 'acc_combo : ', acc_combo_scores)

df_out = df_train_test_features[df_train_test_features['flag']=='test'][['fragment_id']]
df_out['behavior_id'] = labels
df_out.to_csv('./submit_lgb_%.5f_%.5f.csv' % (acc_scores, acc_combo_scores), index=False)

--------
 acc :  0.76303 acc_combo :  0.79819
