In [3]:
# 包导入
import pandas as pd
import numpy as np
import tsfresh as tsf
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute

In [4]:
# 数据读取
data_train = pd.read_csv("../data/train.csv")
data_test_A = pd.read_csv("../data/testA.csv")

print(data_train.shape)
print(data_test_A.shape)

(100000, 3)
(20000, 2)


In [5]:
data_train.head().append(data_train.tail())

Unnamed: 0,id,heartbeat_signals,label
0,0,"0.9912297987616655,0.9435330436439665,0.764677...",0.0
1,1,"0.9714822034884503,0.9289687459588268,0.572932...",0.0
2,2,"1.0,0.9591487564065292,0.7013782792997189,0.23...",2.0
3,3,"0.9757952826275774,0.9340884687738161,0.659636...",0.0
4,4,"0.0,0.055816398940721094,0.26129357194994196,0...",2.0
99995,99995,"1.0,0.677705342021188,0.22239242747868546,0.25...",0.0
99996,99996,"0.9268571578157265,0.9063471198026871,0.636993...",2.0
99997,99997,"0.9258351628306013,0.5873839035878395,0.633226...",3.0
99998,99998,"1.0,0.9947621698382489,0.8297017704865509,0.45...",2.0
99999,99999,"0.9259994004527861,0.916476635326053,0.4042900...",0.0


# 将特征转化为时间序列特征

In [6]:
# 对心电特征进行行转列处理，同时为每个心电信号加入时间步特征time
train_heartbeat_df = data_train["heartbeat_signals"].str.split(",", expand=True).stack()#stack将分割开的特征进行三维转化


In [7]:
train_heartbeat_df[9999]

0      0.9775722316331337
1      0.9516195523866948
2      0.9089355099006714
3      0.8666344453400215
4      0.8003216508740236
              ...        
200                   0.0
201                   0.0
202                   0.0
203                   0.0
204                   0.0
Length: 205, dtype: object

In [8]:
train_heartbeat_df = train_heartbeat_df.reset_index()

In [9]:
train_heartbeat_df

Unnamed: 0,level_0,level_1,0
0,0,0,0.9912297987616655
1,0,1,0.9435330436439665
2,0,2,0.7646772997256593
3,0,3,0.6185708990212999
4,0,4,0.3796321642826237
...,...,...,...
20499995,99999,200,0.0
20499996,99999,201,0.0
20499997,99999,202,0.0
20499998,99999,203,0.0


In [10]:
train_heartbeat_df = train_heartbeat_df.set_index("level_0")#重新设置行索引为level1水平

In [11]:
train_heartbeat_df

Unnamed: 0_level_0,level_1,0
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,0.9912297987616655
0,1,0.9435330436439665
0,2,0.7646772997256593
0,3,0.6185708990212999
0,4,0.3796321642826237
...,...,...
99999,200,0.0
99999,201,0.0
99999,202,0.0
99999,203,0.0


In [12]:
train_heartbeat_df.index.name = None#去除掉index名
train_heartbeat_df

Unnamed: 0,level_1,0
0,0,0.9912297987616655
0,1,0.9435330436439665
0,2,0.7646772997256593
0,3,0.6185708990212999
0,4,0.3796321642826237
...,...,...
99999,200,0.0
99999,201,0.0
99999,202,0.0
99999,203,0.0


In [13]:
train_heartbeat_df.rename(columns={"level_1":"time", 0:"heartbeat_signals"}, inplace=True)#重新修改列名
train_heartbeat_df

Unnamed: 0,time,heartbeat_signals
0,0,0.9912297987616655
0,1,0.9435330436439665
0,2,0.7646772997256593
0,3,0.6185708990212999
0,4,0.3796321642826237
...,...,...
99999,200,0.0
99999,201,0.0
99999,202,0.0
99999,203,0.0


In [14]:
train_heartbeat_df["heartbeat_signals"]

0        0.9912297987616655
0        0.9435330436439665
0        0.7646772997256593
0        0.6185708990212999
0        0.3796321642826237
                ...        
99999                   0.0
99999                   0.0
99999                   0.0
99999                   0.0
99999                   0.0
Name: heartbeat_signals, Length: 20500000, dtype: object

In [15]:
train_heartbeat_df["heartbeat_signals"] = train_heartbeat_df["heartbeat_signals"].astype(float)#转变object为数值类型

In [16]:
train_heartbeat_df["heartbeat_signals"].dtype

dtype('float64')

In [17]:
train_heartbeat_df

Unnamed: 0,time,heartbeat_signals
0,0,0.991230
0,1,0.943533
0,2,0.764677
0,3,0.618571
0,4,0.379632
...,...,...
99999,200,0.000000
99999,201,0.000000
99999,202,0.000000
99999,203,0.000000


In [18]:
# 将处理后的心电特征加入到训练数据中，同时将训练数据label列单独存储
data_train_label = data_train["label"]
data_train = data_train.drop("label", axis=1)
data_train = data_train.drop("heartbeat_signals", axis=1)
data_train = data_train.join(train_heartbeat_df)

data_train

Unnamed: 0,id,time,heartbeat_signals
0,0,0,0.991230
0,0,1,0.943533
0,0,2,0.764677
0,0,3,0.618571
0,0,4,0.379632
...,...,...,...
99999,99999,200,0.000000
99999,99999,201,0.000000
99999,99999,202,0.000000
99999,99999,203,0.000000


In [19]:
data_train[data_train["id"]==1]#每个样本有255个时间点特征

Unnamed: 0,id,time,heartbeat_signals
1,1,0,0.971482
1,1,1,0.928969
1,1,2,0.572933
1,1,3,0.178457
1,1,4,0.122962
...,...,...,...
1,1,200,0.000000
1,1,201,0.000000
1,1,202,0.000000
1,1,203,0.000000


## 使用 tsfresh 进行时间序列特征处理

In [20]:
from tsfresh import extract_features

In [21]:
# 特征提取
train_features = extract_features(data_train, column_id='id', column_sort='time')
train_features

Feature Extraction: 100%|██████████| 240/240 [17:35<00:00,  4.40s/it]  


Unnamed: 0,heartbeat_signals__variance_larger_than_standard_deviation,heartbeat_signals__has_duplicate_max,heartbeat_signals__has_duplicate_min,heartbeat_signals__has_duplicate,heartbeat_signals__sum_values,heartbeat_signals__abs_energy,heartbeat_signals__mean_abs_change,heartbeat_signals__mean_change,heartbeat_signals__mean_second_derivative_central,heartbeat_signals__median,...,heartbeat_signals__permutation_entropy__dimension_5__tau_1,heartbeat_signals__permutation_entropy__dimension_6__tau_1,heartbeat_signals__permutation_entropy__dimension_7__tau_1,heartbeat_signals__query_similarity_count__query_None__threshold_0.0,"heartbeat_signals__matrix_profile__feature_""min""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""max""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""mean""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""median""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""25""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""75""__threshold_0.98"
0,0.0,0.0,1.0,1.0,38.927945,18.216197,0.019894,-0.004859,0.000117,0.125531,...,2.184420,2.500658,2.722686,,6.445546,12.165525,10.246524,10.746992,8.388625,11.484910
1,0.0,0.0,1.0,1.0,19.445634,7.705092,0.019952,-0.004762,0.000105,0.030481,...,2.710933,3.065802,3.224835,,3.209140,12.649111,9.031069,9.437545,6.723180,12.094899
2,0.0,0.0,1.0,1.0,21.192974,9.140423,0.009863,-0.004902,0.000101,0.000000,...,1.263370,1.406001,1.509478,,3.054539,8.246211,7.370478,8.246211,5.966122,8.246211
3,0.0,0.0,1.0,1.0,42.113066,15.757623,0.018743,-0.004783,0.000103,0.241397,...,2.986728,3.534354,3.854177,,3.010557,9.797959,6.331360,6.406440,5.266743,7.091706
4,0.0,0.0,1.0,1.0,69.756786,51.229616,0.014514,0.000000,-0.000137,0.000000,...,1.914511,2.165627,2.323993,,9.181236,13.429784,9.959913,9.516290,9.286013,10.270925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,0.0,0.0,1.0,1.0,63.323449,28.742238,0.023588,-0.004902,0.000794,0.388402,...,2.873602,3.391830,3.679969,,2.436377,9.591663,5.635231,6.366205,3.596982,7.033638
99996,0.0,0.0,1.0,1.0,69.657534,31.866323,0.017373,-0.004543,0.000051,0.421138,...,3.085504,3.728881,4.095457,,1.415410,7.483315,2.893592,2.684349,2.049241,3.334109
99997,0.0,0.0,1.0,1.0,40.897057,16.412857,0.019470,-0.004538,0.000834,0.213306,...,2.601062,2.996962,3.293562,,5.748652,12.165525,8.524637,7.983410,7.062217,10.081756
99998,0.0,0.0,1.0,1.0,42.333303,14.281281,0.017032,-0.004902,0.000013,0.264974,...,3.236950,3.793512,4.018302,,2.346822,8.246211,4.951374,4.727535,4.069786,5.615282


In [35]:
# 对心电特征进行行转列处理，同时为每个心电信号加入时间步特征time
test_heartbeat_df = data_test_A["heartbeat_signals"].str.split(",", expand=True).stack()
test_heartbeat_df = test_heartbeat_df.reset_index()
test_heartbeat_df = test_heartbeat_df.set_index("level_0")
test_heartbeat_df.index.name = None
test_heartbeat_df.rename(columns={"level_1":"time", 0:"heartbeat_signals"}, inplace=True)
test_heartbeat_df["heartbeat_signals"] =test_heartbeat_df["heartbeat_signals"].astype(float)
# 将处理后的心电特征加入到训练数据中，同时将训练数据label列单独存储

data_test = data_test_A.drop("heartbeat_signals", axis=1)
data_test = data_test.join(test_heartbeat_df)
test_heartbeat_df

Unnamed: 0,time,heartbeat_signals
0,0,0.991571
0,1,1.000000
0,2,0.631816
0,3,0.136230
0,4,0.041420
...,...,...
19999,200,0.000000
19999,201,0.000000
19999,202,0.000000
19999,203,0.000000


In [37]:
data_test

Unnamed: 0,id,time,heartbeat_signals
0,100000,0,0.991571
0,100000,1,1.000000
0,100000,2,0.631816
0,100000,3,0.136230
0,100000,4,0.041420
...,...,...,...
19999,119999,200,0.000000
19999,119999,201,0.000000
19999,119999,202,0.000000
19999,119999,203,0.000000


In [38]:
# 特征提取
test_features = extract_features(data_test, column_id='id', column_sort='time')
test_features

Feature Extraction: 100%|██████████| 239/239 [03:26<00:00,  1.15it/s]


Unnamed: 0,heartbeat_signals__variance_larger_than_standard_deviation,heartbeat_signals__has_duplicate_max,heartbeat_signals__has_duplicate_min,heartbeat_signals__has_duplicate,heartbeat_signals__sum_values,heartbeat_signals__abs_energy,heartbeat_signals__mean_abs_change,heartbeat_signals__mean_change,heartbeat_signals__mean_second_derivative_central,heartbeat_signals__median,...,heartbeat_signals__permutation_entropy__dimension_5__tau_1,heartbeat_signals__permutation_entropy__dimension_6__tau_1,heartbeat_signals__permutation_entropy__dimension_7__tau_1,heartbeat_signals__query_similarity_count__query_None__threshold_0.0,"heartbeat_signals__matrix_profile__feature_""min""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""max""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""mean""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""median""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""25""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""75""__threshold_0.98"
100000,0.0,0.0,1.0,1.0,19.229863,7.907934,0.018374,-0.004861,-0.000021,0.027745,...,2.021451,2.356864,2.587925,,2.692281,11.661904,9.456542,9.648131,8.525878,11.076665
100001,0.0,0.0,0.0,1.0,84.298932,38.292802,0.021483,-0.001195,0.000195,0.367241,...,4.099123,4.656875,4.882383,,0.939893,4.470801,2.584921,2.493456,1.542105,3.698142
100002,0.0,0.0,1.0,1.0,47.789921,21.287039,0.021610,-0.004781,0.000749,0.260611,...,2.900488,3.321028,3.516715,,5.684175,12.512693,9.751129,10.483467,8.221934,11.135462
100003,0.0,0.0,1.0,1.0,47.069011,28.749520,0.023874,-0.004881,0.000194,0.000000,...,1.530558,1.806294,1.979305,,0.909721,4.898979,3.531943,4.898979,1.675526,4.898979
100004,0.0,0.0,1.0,1.0,24.899397,10.177998,0.020548,-0.004902,0.000276,0.034859,...,2.626554,2.960568,3.168085,,5.033722,14.180087,11.450599,11.991037,11.825491,12.597792
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119995,0.0,0.0,1.0,1.0,43.175130,18.967833,0.016106,-0.004902,0.000411,0.205399,...,3.150910,3.625398,3.843586,,3.687770,8.700294,5.991330,6.323450,4.155558,7.191577
119996,0.0,0.0,1.0,1.0,31.030782,14.413244,0.021473,-0.004902,0.000429,0.000000,...,1.732287,1.955659,2.081946,,10.456465,12.982197,11.338307,11.244766,10.763332,11.762948
119997,0.0,0.0,1.0,1.0,31.648623,13.083992,0.017566,-0.004665,0.000087,0.010807,...,2.248241,2.497097,2.663404,,6.037870,11.661904,9.312119,8.973721,8.064338,10.409977
119998,0.0,0.0,1.0,1.0,19.305442,6.700835,0.019937,-0.004547,0.000617,0.000000,...,2.538456,2.912829,3.021449,,10.350940,15.065584,12.961223,12.887409,12.118259,13.558463


In [39]:
test_features.to_csv('test_787timefeature.csv',index=False)

In [42]:
features_filtered.columns

Index(['heartbeat_signals__sum_values',
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_38',
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_37',
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_36',
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_35',
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_34',
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_33',
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_32',
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_31',
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_30',
       ...
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_84',
       'heartbeat_signals__fft_coefficient__attr_"imag"__coeff_97',
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_90',
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_94',
       'heartbeat_signals__fft_coefficient__attr_"abs"__coeff_92',
       'he

In [22]:
train_features.to_csv('787timefeature.csv',index=False)

In [23]:
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute

impute(train_features)# 去除抽取特征中的NaN值
features_filtered = select_features(train_features, data_train_label)#根据相关性检验筛选特征



In [24]:
features_filtered

Unnamed: 0,heartbeat_signals__sum_values,"heartbeat_signals__fft_coefficient__attr_""abs""__coeff_38","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_37","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_36","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_35","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_34","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_33","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_32","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_31","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_30",...,"heartbeat_signals__fft_coefficient__attr_""abs""__coeff_84","heartbeat_signals__fft_coefficient__attr_""imag""__coeff_97","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_90","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_94","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_92","heartbeat_signals__fft_coefficient__attr_""real""__coeff_97","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_75","heartbeat_signals__fft_coefficient__attr_""real""__coeff_88","heartbeat_signals__fft_coefficient__attr_""real""__coeff_92","heartbeat_signals__fft_coefficient__attr_""real""__coeff_83"
0,38.927945,0.660949,1.090709,0.848728,1.168685,0.982133,1.223496,1.236300,1.104172,1.497129,...,0.531883,-0.047438,0.554370,0.307586,0.564596,0.562960,0.591859,0.504124,0.528450,0.473568
1,19.445634,1.718217,1.280923,1.850706,1.460752,1.924501,1.925485,1.715938,2.079957,1.818636,...,0.563590,-0.109579,0.697446,0.398073,0.640969,0.270192,0.224925,0.645082,0.635135,0.297325
2,21.192974,1.814281,1.619051,1.215343,1.787166,2.146987,1.686190,1.540137,2.291031,2.403422,...,0.712487,-0.074042,0.321703,0.390386,0.716929,0.316524,0.422077,0.722742,0.680590,0.383754
3,42.113066,2.109550,0.619634,2.366413,2.071539,1.000340,2.728281,1.391727,2.017176,2.610492,...,0.601499,-0.184248,0.564669,0.623353,0.466980,0.651774,0.308915,0.550097,0.466904,0.494024
4,69.756786,0.194549,0.348882,0.092119,0.653924,0.231422,1.080003,0.711244,1.357904,1.237998,...,0.015292,0.070505,0.065835,0.051780,0.092940,0.103773,0.179405,-0.089611,0.091841,0.056867
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,63.323449,0.840651,1.186210,1.396236,0.417221,2.036034,1.659054,0.500584,1.693545,0.859932,...,0.779955,0.005525,0.486013,0.273372,0.705386,0.602898,0.447929,0.474844,0.564266,0.133969
99996,69.657534,1.557787,1.393960,0.989147,1.611333,1.793044,1.092325,0.507138,1.763940,2.677643,...,0.539489,0.114670,0.579498,0.417226,0.270110,0.556596,0.703258,0.462312,0.269719,0.539236
99997,40.897057,0.469758,1.000355,0.706395,1.190514,0.674603,1.632769,0.229008,2.027802,0.302457,...,0.282597,-0.474629,0.460647,0.478341,0.527891,0.904111,0.728529,0.178410,0.500813,0.773985
99998,42.333303,0.992948,1.354894,2.238589,1.237608,1.325212,2.785515,1.918571,0.814167,2.613950,...,0.594252,-0.162106,0.694276,0.681025,0.357196,0.498088,0.433297,0.406154,0.324771,0.340727


In [43]:
train = pd.read_csv("../data/train.csv")
test = pd.read_csv("../data/testA.csv")

In [28]:
feature_yuan = data_train1["heartbeat_signals"].str.split(",", expand=True)

In [30]:
def reduce_mem_usage(df):
    start_mem = df.memory_usage().sum() / 1024**2 
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2 
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [44]:
# 简单预处理
train_list = []

for items in train.values:
    train_list.append([items[0]] + [float(i) for i in items[1].split(',')] + [items[2]])

train = pd.DataFrame(np.array(train_list))
train.columns = ['id'] + ['s_'+str(i) for i in range(len(train_list[0])-2)] + ['label']
train = reduce_mem_usage(train)

test_list=[]
for items in test.values:
    test_list.append([items[0]] + [float(i) for i in items[1].split(',')])

test = pd.DataFrame(np.array(test_list))
test.columns = ['id'] + ['s_'+str(i) for i in range(len(test_list[0])-1)]
test = reduce_mem_usage(test)

Memory usage of dataframe is 157.93 MB
Memory usage after optimization is: 39.67 MB
Decreased by 74.9%
Memory usage of dataframe is 31.43 MB
Memory usage after optimization is: 7.90 MB
Decreased by 74.9%


In [91]:
test_features.index=test.index

In [92]:
test_xin=test.join(test_features)

In [93]:
test_xin

Unnamed: 0,id,s_0,s_1,s_2,s_3,s_4,s_5,s_6,s_7,s_8,...,heartbeat_signals__permutation_entropy__dimension_5__tau_1,heartbeat_signals__permutation_entropy__dimension_6__tau_1,heartbeat_signals__permutation_entropy__dimension_7__tau_1,heartbeat_signals__query_similarity_count__query_None__threshold_0.0,"heartbeat_signals__matrix_profile__feature_""min""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""max""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""mean""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""median""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""25""__threshold_0.98","heartbeat_signals__matrix_profile__feature_""75""__threshold_0.98"
0,100000.0,0.991699,1.000000,0.631836,0.136230,0.041412,0.102722,0.120850,0.123413,0.107910,...,2.021451,2.356864,2.587925,,2.692281,11.661904,9.456542,9.648131,8.525878,11.076665
1,100001.0,0.607422,0.541504,0.340576,0.000000,0.090698,0.164917,0.195068,0.168823,0.198853,...,4.099123,4.656875,4.882383,,0.939893,4.470801,2.584921,2.493456,1.542105,3.698142
2,100002.0,0.975098,0.670898,0.686523,0.708496,0.718750,0.716797,0.720703,0.701660,0.596680,...,2.900488,3.321028,3.516715,,5.684175,12.512693,9.751129,10.483467,8.221934,11.135462
3,100003.0,0.995605,0.916992,0.520996,0.000000,0.221802,0.404053,0.490479,0.527344,0.518066,...,1.530558,1.806294,1.979305,,0.909721,4.898979,3.531943,4.898979,1.675526,4.898979
4,100004.0,1.000000,0.888184,0.745605,0.531738,0.380371,0.224609,0.091125,0.057648,0.003914,...,2.626554,2.960568,3.168085,,5.033722,14.180087,11.450599,11.991037,11.825491,12.597792
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,119995.0,1.000000,0.833008,0.634277,0.639160,0.624023,0.598145,0.613770,0.624023,0.628906,...,3.150910,3.625398,3.843586,,3.687770,8.700294,5.991330,6.323450,4.155558,7.191577
19996,119996.0,1.000000,0.826172,0.452148,0.082214,0.000000,0.137085,0.201050,0.165649,0.158081,...,1.732287,1.955659,2.081946,,10.456465,12.982197,11.338307,11.244766,10.763332,11.762948
19997,119997.0,0.951660,0.916504,0.667480,0.352051,0.255371,0.197388,0.173584,0.141968,0.134521,...,2.248241,2.497097,2.663404,,6.037870,11.661904,9.312119,8.973721,8.064338,10.409977
19998,119998.0,0.927734,0.677246,0.242920,0.055359,0.102112,0.072266,0.021011,0.038300,0.048553,...,2.538456,2.912829,3.021449,,10.350940,15.065584,12.961223,12.887409,12.118259,13.558463


In [94]:
features_filtered

Unnamed: 0,heartbeat_signals__sum_values,"heartbeat_signals__fft_coefficient__attr_""abs""__coeff_38","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_37","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_36","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_35","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_34","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_33","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_32","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_31","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_30",...,"heartbeat_signals__fft_coefficient__attr_""abs""__coeff_84","heartbeat_signals__fft_coefficient__attr_""imag""__coeff_97","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_90","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_94","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_92","heartbeat_signals__fft_coefficient__attr_""real""__coeff_97","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_75","heartbeat_signals__fft_coefficient__attr_""real""__coeff_88","heartbeat_signals__fft_coefficient__attr_""real""__coeff_92","heartbeat_signals__fft_coefficient__attr_""real""__coeff_83"
0,38.927945,0.660949,1.090709,0.848728,1.168685,0.982133,1.223496,1.236300,1.104172,1.497129,...,0.531883,-0.047438,0.554370,0.307586,0.564596,0.562960,0.591859,0.504124,0.528450,0.473568
1,19.445634,1.718217,1.280923,1.850706,1.460752,1.924501,1.925485,1.715938,2.079957,1.818636,...,0.563590,-0.109579,0.697446,0.398073,0.640969,0.270192,0.224925,0.645082,0.635135,0.297325
2,21.192974,1.814281,1.619051,1.215343,1.787166,2.146987,1.686190,1.540137,2.291031,2.403422,...,0.712487,-0.074042,0.321703,0.390386,0.716929,0.316524,0.422077,0.722742,0.680590,0.383754
3,42.113066,2.109550,0.619634,2.366413,2.071539,1.000340,2.728281,1.391727,2.017176,2.610492,...,0.601499,-0.184248,0.564669,0.623353,0.466980,0.651774,0.308915,0.550097,0.466904,0.494024
4,69.756786,0.194549,0.348882,0.092119,0.653924,0.231422,1.080003,0.711244,1.357904,1.237998,...,0.015292,0.070505,0.065835,0.051780,0.092940,0.103773,0.179405,-0.089611,0.091841,0.056867
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,63.323449,0.840651,1.186210,1.396236,0.417221,2.036034,1.659054,0.500584,1.693545,0.859932,...,0.779955,0.005525,0.486013,0.273372,0.705386,0.602898,0.447929,0.474844,0.564266,0.133969
99996,69.657534,1.557787,1.393960,0.989147,1.611333,1.793044,1.092325,0.507138,1.763940,2.677643,...,0.539489,0.114670,0.579498,0.417226,0.270110,0.556596,0.703258,0.462312,0.269719,0.539236
99997,40.897057,0.469758,1.000355,0.706395,1.190514,0.674603,1.632769,0.229008,2.027802,0.302457,...,0.282597,-0.474629,0.460647,0.478341,0.527891,0.904111,0.728529,0.178410,0.500813,0.773985
99998,42.333303,0.992948,1.354894,2.238589,1.237608,1.325212,2.785515,1.918571,0.814167,2.613950,...,0.594252,-0.162106,0.694276,0.681025,0.357196,0.498088,0.433297,0.406154,0.324771,0.340727


In [49]:
train_xin=train.join(features_filtered)

In [95]:
train_xin

Unnamed: 0,id,s_0,s_1,s_2,s_3,s_4,s_5,s_6,s_7,s_8,...,"heartbeat_signals__fft_coefficient__attr_""abs""__coeff_84","heartbeat_signals__fft_coefficient__attr_""imag""__coeff_97","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_90","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_94","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_92","heartbeat_signals__fft_coefficient__attr_""real""__coeff_97","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_75","heartbeat_signals__fft_coefficient__attr_""real""__coeff_88","heartbeat_signals__fft_coefficient__attr_""real""__coeff_92","heartbeat_signals__fft_coefficient__attr_""real""__coeff_83"
0,0.0,0.991211,0.943359,0.764648,0.618652,0.379639,0.190796,0.040222,0.026001,0.031708,...,0.531883,-0.047438,0.554370,0.307586,0.564596,0.562960,0.591859,0.504124,0.528450,0.473568
1,1.0,0.971680,0.929199,0.572754,0.178467,0.122986,0.132324,0.094421,0.089600,0.030487,...,0.563590,-0.109579,0.697446,0.398073,0.640969,0.270192,0.224925,0.645082,0.635135,0.297325
2,2.0,1.000000,0.958984,0.701172,0.231812,0.000000,0.080688,0.128418,0.187500,0.280762,...,0.712487,-0.074042,0.321703,0.390386,0.716929,0.316524,0.422077,0.722742,0.680590,0.383754
3,3.0,0.975586,0.934082,0.659668,0.249878,0.237061,0.281494,0.249878,0.249878,0.241455,...,0.601499,-0.184248,0.564669,0.623353,0.466980,0.651774,0.308915,0.550097,0.466904,0.494024
4,4.0,0.000000,0.055817,0.261230,0.359863,0.433105,0.453613,0.499023,0.542969,0.616699,...,0.015292,0.070505,0.065835,0.051780,0.092940,0.103773,0.179405,-0.089611,0.091841,0.056867
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,99995.0,1.000000,0.677734,0.222412,0.257080,0.204712,0.054657,0.026154,0.118164,0.244873,...,0.779955,0.005525,0.486013,0.273372,0.705386,0.602898,0.447929,0.474844,0.564266,0.133969
99996,99996.0,0.926758,0.906250,0.637207,0.415039,0.374756,0.382568,0.358887,0.341309,0.336426,...,0.539489,0.114670,0.579498,0.417226,0.270110,0.556596,0.703258,0.462312,0.269719,0.539236
99997,99997.0,0.925781,0.587402,0.633301,0.632324,0.639160,0.614258,0.599121,0.517578,0.403809,...,0.282597,-0.474629,0.460647,0.478341,0.527891,0.904111,0.728529,0.178410,0.500813,0.773985
99998,99998.0,1.000000,0.994629,0.829590,0.458252,0.264160,0.240234,0.213745,0.189331,0.203857,...,0.594252,-0.162106,0.694276,0.681025,0.357196,0.498088,0.433297,0.406154,0.324771,0.340727


In [50]:
train_xin["label"]

0        0.0
1        0.0
2        2.0
3        0.0
4        2.0
        ... 
99995    0.0
99996    2.0
99997    3.0
99998    2.0
99999    0.0
Name: label, Length: 100000, dtype: float16

# 方差过滤

In [55]:
X=train_xin.drop(["label","id"], axis=1)

In [58]:
X

Unnamed: 0,s_0,s_1,s_2,s_3,s_4,s_5,s_6,s_7,s_8,s_9,...,"heartbeat_signals__fft_coefficient__attr_""abs""__coeff_84","heartbeat_signals__fft_coefficient__attr_""imag""__coeff_97","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_90","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_94","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_92","heartbeat_signals__fft_coefficient__attr_""real""__coeff_97","heartbeat_signals__fft_coefficient__attr_""abs""__coeff_75","heartbeat_signals__fft_coefficient__attr_""real""__coeff_88","heartbeat_signals__fft_coefficient__attr_""real""__coeff_92","heartbeat_signals__fft_coefficient__attr_""real""__coeff_83"
0,0.991211,0.943359,0.764648,0.618652,0.379639,0.190796,0.040222,0.026001,0.031708,0.065552,...,0.531883,-0.047438,0.554370,0.307586,0.564596,0.562960,0.591859,0.504124,0.528450,0.473568
1,0.971680,0.929199,0.572754,0.178467,0.122986,0.132324,0.094421,0.089600,0.030487,0.040497,...,0.563590,-0.109579,0.697446,0.398073,0.640969,0.270192,0.224925,0.645082,0.635135,0.297325
2,1.000000,0.958984,0.701172,0.231812,0.000000,0.080688,0.128418,0.187500,0.280762,0.328369,...,0.712487,-0.074042,0.321703,0.390386,0.716929,0.316524,0.422077,0.722742,0.680590,0.383754
3,0.975586,0.934082,0.659668,0.249878,0.237061,0.281494,0.249878,0.249878,0.241455,0.230713,...,0.601499,-0.184248,0.564669,0.623353,0.466980,0.651774,0.308915,0.550097,0.466904,0.494024
4,0.000000,0.055817,0.261230,0.359863,0.433105,0.453613,0.499023,0.542969,0.616699,0.676758,...,0.015292,0.070505,0.065835,0.051780,0.092940,0.103773,0.179405,-0.089611,0.091841,0.056867
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,1.000000,0.677734,0.222412,0.257080,0.204712,0.054657,0.026154,0.118164,0.244873,0.328857,...,0.779955,0.005525,0.486013,0.273372,0.705386,0.602898,0.447929,0.474844,0.564266,0.133969
99996,0.926758,0.906250,0.637207,0.415039,0.374756,0.382568,0.358887,0.341309,0.336426,0.317139,...,0.539489,0.114670,0.579498,0.417226,0.270110,0.556596,0.703258,0.462312,0.269719,0.539236
99997,0.925781,0.587402,0.633301,0.632324,0.639160,0.614258,0.599121,0.517578,0.403809,0.253174,...,0.282597,-0.474629,0.460647,0.478341,0.527891,0.904111,0.728529,0.178410,0.500813,0.773985
99998,1.000000,0.994629,0.829590,0.458252,0.264160,0.240234,0.213745,0.189331,0.203857,0.210815,...,0.594252,-0.162106,0.694276,0.681025,0.357196,0.498088,0.433297,0.406154,0.324771,0.340727


In [63]:
from sklearn.feature_selection import VarianceThreshold
import numpy as np
X_fsvar = VarianceThreshold(0.05).fit_transform(X)
X.var().values
X_fsvar.shape


(100000, 540)

In [64]:
y=train_xin["label"]

In [104]:
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier as RFC
RFC_ = RFC(n_estimators =10,random_state=0)
RF_EM=RFC_.fit(X,y)
#
RF_EM.feature_importances_


NameError: name 'rfc' is not defined

In [106]:
RF_EM.feature_importances_

array([8.03192811e-04, 2.26050818e-03, 9.54168935e-03, 1.77151213e-02,
       3.85274737e-03, 8.37329621e-04, 4.64758723e-03, 5.78802047e-04,
       3.26887069e-04, 1.71574720e-03, 7.75748188e-04, 1.35742809e-03,
       1.64481078e-04, 1.99641441e-04, 8.36635497e-04, 4.51153978e-04,
       6.26659839e-04, 6.11735538e-04, 3.59387347e-04, 6.91032438e-04,
       1.01051817e-03, 1.77201520e-03, 1.59049050e-03, 1.50578028e-03,
       8.37209322e-04, 1.34898425e-03, 2.60292498e-03, 6.45068946e-03,
       2.59886454e-03, 4.23142601e-03, 1.55365131e-03, 2.77599416e-04,
       4.82944077e-04, 1.85799135e-04, 9.58456188e-04, 1.34594399e-03,
       5.61290480e-04, 4.65629665e-04, 1.08186807e-03, 4.34070903e-03,
       2.20924520e-04, 5.48778882e-04, 4.24242604e-03, 2.59386426e-03,
       2.72944430e-04, 1.14892601e-03, 1.19524802e-04, 3.11716186e-04,
       2.55523806e-04, 2.98855026e-03, 4.40449139e-04, 1.32033181e-03,
       2.17110857e-04, 2.81811710e-04, 4.48508051e-04, 1.16678408e-03,
      

In [110]:
Filter_feature=X.columns[RF_EM.feature_importances_>0.0005]

from sklearn.model_selection import train_test_split,cross_val_score
import numpy as np
import matplotlib.pyplot as plt
RFC_.fit(X,y).feature_importances_
threshold = np.linspace(0,(RFC_.fit(X,y).feature_importances_).max(),5)
score = []
for i in threshold:
    X_embedded = SelectFromModel(RFC_,threshold=i).fit_transform(X,y)
    once = cross_val_score(RFC_,X_embedded,y,cv=5).mean()
    score.append(once)
plt.plot(threshold,score)
plt.show()

In [112]:
x_train = X
y_train = y

In [113]:
x_test=test_xin[Filter_feature]
x_train=x_train[Filter_feature]

In [115]:
x_train.head()

Unnamed: 0,s_0,s_1,s_2,s_3,s_4,s_5,s_6,s_7,s_9,s_10,...,"heartbeat_signals__cwt_coefficients__coeff_8__w_10__widths_(2, 5, 10, 20)","heartbeat_signals__cwt_coefficients__coeff_9__w_10__widths_(2, 5, 10, 20)",heartbeat_signals__energy_ratio_by_chunks__num_segments_10__segment_focus_1,"heartbeat_signals__fft_coefficient__attr_""angle""__coeff_2","heartbeat_signals__cwt_coefficients__coeff_14__w_20__widths_(2, 5, 10, 20)","heartbeat_signals__cwt_coefficients__coeff_10__w_10__widths_(2, 5, 10, 20)","heartbeat_signals__fft_aggregated__aggtype_""kurtosis""","heartbeat_signals__cwt_coefficients__coeff_10__w_5__widths_(2, 5, 10, 20)","heartbeat_signals__agg_linear_trend__attr_""rvalue""__chunk_len_5__f_agg_""min""",heartbeat_signals__energy_ratio_by_chunks__num_segments_10__segment_focus_3
0,0.991211,0.943359,0.764648,0.618652,0.379639,0.190796,0.040222,0.026001,0.065552,0.125488,...,0.452152,0.358548,0.090086,66.094067,0.117148,0.260394,5.659187,-0.54585,-0.64712,0.134384
1,0.97168,0.929199,0.572754,0.178467,0.122986,0.132324,0.094421,0.0896,0.040497,0.020386,...,0.3732,0.260375,0.052871,17.747878,0.070248,0.143347,6.283027,-0.372693,-0.507989,0.039657
2,1.0,0.958984,0.701172,0.231812,0.0,0.080688,0.128418,0.1875,0.328369,0.320557,...,0.407512,0.373239,0.334134,-101.623335,0.880301,0.333296,5.731839,-0.173709,-0.639,0.0
3,0.975586,0.934082,0.659668,0.249878,0.237061,0.281494,0.249878,0.249878,0.230713,0.224243,...,0.779006,0.729366,0.019274,-42.14932,0.544149,0.670066,5.526573,-0.144205,-0.512126,0.126396
4,0.0,0.055817,0.26123,0.359863,0.433105,0.453613,0.499023,0.542969,0.676758,0.737793,...,0.376854,0.541694,0.286812,-143.204956,1.556982,0.694927,11.70839,0.472725,-0.799241,0.20546


In [124]:
x_train.columns=['s_'+str(i) for i in range(237)]

In [125]:
x_test.columns=['s_'+str(i) for i in range(237)]

In [126]:
x_train.columns

Index(['s_0', 's_1', 's_2', 's_3', 's_4', 's_5', 's_6', 's_7', 's_8', 's_9',
       ...
       's_227', 's_228', 's_229', 's_230', 's_231', 's_232', 's_233', 's_234',
       's_235', 's_236'],
      dtype='object', length=237)

In [79]:
def abs_sum(y_pre,y_tru):
    y_pre=np.array(y_pre)
    y_tru=np.array(y_tru)
    loss=sum(sum(abs(y_pre-y_tru)))
    return loss

In [80]:
def cv_model(clf, train_x, train_y, test_x, clf_name):
    folds = 5
    seed = 2021
    kf = KFold(n_splits=folds, shuffle=True, random_state=seed)
    test = np.zeros((test_x.shape[0],4))

    cv_scores = []
    onehot_encoder = OneHotEncoder(sparse=False)
    for i, (train_index, valid_index) in enumerate(kf.split(train_x, train_y)):
        print('************************************ {} ************************************'.format(str(i+1)))
        trn_x, trn_y, val_x, val_y = train_x.iloc[train_index], train_y[train_index], train_x.iloc[valid_index], train_y[valid_index]
        
        if clf_name == "lgb":
            train_matrix = clf.Dataset(trn_x, label=trn_y)
            valid_matrix = clf.Dataset(val_x, label=val_y)

            params = {
                'boosting_type': 'gbdt',
                'objective': 'multiclass',
                'num_class': 4,
                'num_leaves': 2 ** 5,
                'feature_fraction': 0.8,
                'bagging_fraction': 0.8,
                'bagging_freq': 4,
                'learning_rate': 0.1,
                'seed': seed,
                'nthread': 28,
                'n_jobs':24,
                'verbose': -1,
            }

            model = clf.train(params, 
                      train_set=train_matrix, 
                      valid_sets=valid_matrix, 
                      num_boost_round=2000, 
                      verbose_eval=100, 
                      early_stopping_rounds=200)
            val_pred = model.predict(val_x, num_iteration=model.best_iteration)
            test_pred = model.predict(test_x, num_iteration=model.best_iteration) 
            
        val_y=np.array(val_y).reshape(-1, 1)
        val_y = onehot_encoder.fit_transform(val_y)
        print('预测的概率矩阵为：')
        print(test_pred)
        test += test_pred
        score=abs_sum(val_y, val_pred)
        cv_scores.append(score)
        print(cv_scores)
    print("%s_scotrainre_list:" % clf_name, cv_scores)
    print("%s_score_mean:" % clf_name, np.mean(cv_scores))
    print("%s_score_std:" % clf_name, np.std(cv_scores))
    test=test/kf.n_splits

    return test


In [82]:
import os
import gc
import math

import pandas as pd
import numpy as np

import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostRegressor
from sklearn.linear_model import SGDRegressor, LinearRegression, Ridge
from sklearn.preprocessing import MinMaxScaler


from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

from tqdm import tqdm
import matplotlib.pyplot as plt
import time
import warnings

In [127]:
x_train.head()

Unnamed: 0,s_0,s_1,s_2,s_3,s_4,s_5,s_6,s_7,s_8,s_9,...,s_227,s_228,s_229,s_230,s_231,s_232,s_233,s_234,s_235,s_236
0,0.991211,0.943359,0.764648,0.618652,0.379639,0.190796,0.040222,0.026001,0.065552,0.125488,...,0.452152,0.358548,0.090086,66.094067,0.117148,0.260394,5.659187,-0.54585,-0.64712,0.134384
1,0.97168,0.929199,0.572754,0.178467,0.122986,0.132324,0.094421,0.0896,0.040497,0.020386,...,0.3732,0.260375,0.052871,17.747878,0.070248,0.143347,6.283027,-0.372693,-0.507989,0.039657
2,1.0,0.958984,0.701172,0.231812,0.0,0.080688,0.128418,0.1875,0.328369,0.320557,...,0.407512,0.373239,0.334134,-101.623335,0.880301,0.333296,5.731839,-0.173709,-0.639,0.0
3,0.975586,0.934082,0.659668,0.249878,0.237061,0.281494,0.249878,0.249878,0.230713,0.224243,...,0.779006,0.729366,0.019274,-42.14932,0.544149,0.670066,5.526573,-0.144205,-0.512126,0.126396
4,0.0,0.055817,0.26123,0.359863,0.433105,0.453613,0.499023,0.542969,0.676758,0.737793,...,0.376854,0.541694,0.286812,-143.204956,1.556982,0.694927,11.70839,0.472725,-0.799241,0.20546


In [128]:
y_train.head()

0    0.0
1    0.0
2    2.0
3    0.0
4    2.0
Name: label, dtype: float16

In [129]:
x_test.head()

Unnamed: 0,s_0,s_1,s_2,s_3,s_4,s_5,s_6,s_7,s_8,s_9,...,s_227,s_228,s_229,s_230,s_231,s_232,s_233,s_234,s_235,s_236
0,0.991699,1.0,0.631836,0.13623,0.041412,0.102722,0.12085,0.123413,0.110535,0.113098,...,0.554187,0.470683,0.019887,49.092351,0.207637,0.380693,6.619765,-0.27402,-0.543781,0.078372
1,0.607422,0.541504,0.340576,0.0,0.090698,0.164917,0.195068,0.168823,0.153564,0.19165,...,-0.146182,-0.133759,0.231403,-154.364944,0.477285,-0.120367,8.790172,-0.265964,-0.068635,0.076568
2,0.975098,0.670898,0.686523,0.708496,0.71875,0.716797,0.720703,0.70166,0.487061,0.294434,...,1.348305,1.259921,0.054273,-20.656689,0.867225,1.144652,4.706507,0.258345,-0.730225,0.125029
3,0.995605,0.916992,0.520996,0.0,0.221802,0.404053,0.490479,0.527344,0.54541,0.54541,...,0.533758,0.606014,0.286783,-131.477514,1.505772,0.672723,5.252847,0.003522,-0.763156,0.228841
4,1.0,0.888184,0.745605,0.531738,0.380371,0.224609,0.091125,0.057648,0.00782,0.027191,...,0.439304,0.301966,0.070697,34.060658,0.176048,0.159977,5.597247,-0.508088,-0.487694,0.111104


In [130]:
def lgb_model(x_train, y_train, x_test):
    lgb_test = cv_model(lgb, x_train, y_train, x_test, "lgb")
    return lgb_test
lgb_test = lgb_model(x_train, y_train, x_test)

************************************ 1 ************************************
Training until validation scores don't improve for 200 rounds
[100]	valid_0's multi_logloss: 0.0393118
[200]	valid_0's multi_logloss: 0.0339911
[300]	valid_0's multi_logloss: 0.0353551
[400]	valid_0's multi_logloss: 0.0383097
Early stopping, best iteration is:
[214]	valid_0's multi_logloss: 0.0339586
预测的概率矩阵为：
[[9.99798361e-01 1.92101366e-04 7.44926255e-06 2.08793384e-06]
 [4.72634711e-06 5.97690802e-05 9.99934730e-01 7.74135290e-07]
 [2.38758477e-07 1.21907604e-06 1.52615194e-06 9.99997016e-01]
 ...
 [7.20866466e-02 7.17914188e-05 9.27837608e-01 3.95361773e-06]
 [9.99849395e-01 1.47658594e-04 2.28354575e-06 6.62880222e-07]
 [9.99077333e-01 1.71317621e-04 2.73704172e-04 4.77644859e-04]]
[513.864641104859]
************************************ 2 ************************************
Training until validation scores don't improve for 200 rounds
[100]	valid_0's multi_logloss: 0.0419001
[200]	valid_0's multi_logloss:

In [131]:
temp=pd.DataFrame(lgb_test)
result=pd.read_csv('../data/sample_submit.csv')
result['label_0']=temp[0]
result['label_1']=temp[1]
result['label_2']=temp[2]
result['label_3']=temp[3]
result.to_csv('submit0320.csv',index=False)