In [2]:
import pandas as pd 
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
from tqdm import tqdm 
import gc
import random
import lightgbm as lgbm
import re
from sklearn.metrics import *
from sklearn.model_selection import KFold
import warnings
warnings.filterwarnings(action='ignore')

In [3]:
#string 타입의 Time column 을 datetime 타입으로 변경해주는 함수
def make_datetime(x):
    x=str(x)
    year=int(x[:4])
    month=int(x[4:6])
    day=int(x[6:8])
    hour=int(x[8:10])
    min=int(x[10:12])
    sex=int(x[12:])
    return dt.datetime(year,month,day,hour)

In [4]:
#정규방정식을 활용해 (,)( )과 같은 불필요한 데이터 정제

def string2num(x):
    x=re.sub(r"[^0-9]+",'',str(x))
    if x=='':
        return 0
    else:
        return int(x)

In [4]:
train_err=pd.read_csv('train_err_data.csv')
train_err['fwver'].unique()

array(['05.15.2138', '04.33.1185', '04.33.1261', '04.22.1750',
       '04.22.1778', '04.16.3553', '04.33.1149', '04.16.3571',
       '05.66.3237', '05.66.3571', '03.11.1149', '04.22.1684',
       '03.11.1167', '04.82.1684', '04.82.1778', '04.33.1171',
       '04.73.2237', '10', '04.82.1730', '04.73.2571', '8.5.3',
       '04.22.1666', '03.11.1141', '05.15.2120', '04.33.1125',
       '04.16.3439', '04.22.1442', '04.33.1095', '04.16.3569',
       '05.15.2090', '05.15.3104', '05.15.2122', '04.22.1656',
       '04.16.2641', '05.15.2114', '04.16.3345', '05.15.2092'],
      dtype=object)

### ID,model,fwvwer에 따라 각 errtype의 빈도를 측정하려한다.
### 확인 결과 test와 train의 errtype의 종류는 동일하다
### 시간변수를 무시하고 ID,model,fwver에 의해서만 group을 만들어주기 위해 사용할 변수를 생성한다.

### 우선 연산의 간편성을 위해 model_nm과 fwver는 label encoding을 해주고
#### (후에 decoding을 통해 원래 값으로 변환해준다)


In [5]:
from sklearn.preprocessing import LabelEncoder

encoder_model_tr=LabelEncoder()
encoder_model_tr.fit(train_err['model_nm'])
label_model_tr=encoder_model_tr.transform(train_err['model_nm'])
train_err['model_nm']=label_model_tr


encoder_fwver_tr=LabelEncoder()
encoder_fwver_tr.fit(train_err['fwver'])
label_fwver_tr=encoder_fwver_tr.transform(train_err['fwver'])
train_err['fwver']=label_fwver_tr

print('train_model_nm의 라벨 값:',train_err['model_nm'].unique())
print('train_fwver의 라벨값:',train_err['fwver'].unique())

train_model_nm의 라벨 값: [3 2 0 1 7 4 5 8 6]
train_fwver의 라벨값: [31 19 20 13 14  6 17  8 33 34  1 12  2 23 25 18 21 35 24 22 36 11  0 29
 16  5  9 15  7 26 32 30 10  3 28  4 27]


### group을 나누기 위한 새로운 변수 group을 생성한다.

group=train_err['user_id']*1000+train_err['model_nm']*100+train_err['fwver'] 

->앞의 4자리는 id, 뒤의 2자리는 fwver, 중간수는 model을 나타낸다

In [6]:
train_err['group']=train_err['user_id']*1000 + train_err['model_nm']*100 + train_err['fwver']
print(train_err['group'].unique())
print(train_err['group'].value_counts())

[10000331 10001219 10001220 ... 24998013 24998014 24999331]
24934402    222186
20546331    195538
15570402    175730
20352402     83680
11041331     66093
             ...  
20331217         1
20099635         1
24435219         1
16010013         1
11157636         1
Name: group, Length: 24062, dtype: int64


### errtype별로 더하여 빈도수를 측정하기 위해서 errtype에 대해서 One-Hot 인코딩을 실시한다.

시간이 오래 걸리므로 코드는 주석처리를 했고, 따로 내보낸 csv파일을 사용한다.

In [7]:
a=pd.get_dummies(train_err,columns=['errtype'])
a.shape


(16554663, 47)

In [9]:
a.columns

Index(['user_id', 'time', 'model_nm', 'fwver', 'errcode', 'group', 'errtype_1',
       'errtype_2', 'errtype_3', 'errtype_4', 'errtype_5', 'errtype_6',
       'errtype_7', 'errtype_8', 'errtype_9', 'errtype_10', 'errtype_11',
       'errtype_12', 'errtype_13', 'errtype_14', 'errtype_15', 'errtype_16',
       'errtype_17', 'errtype_18', 'errtype_19', 'errtype_20', 'errtype_21',
       'errtype_22', 'errtype_23', 'errtype_24', 'errtype_25', 'errtype_26',
       'errtype_27', 'errtype_28', 'errtype_30', 'errtype_31', 'errtype_32',
       'errtype_33', 'errtype_34', 'errtype_35', 'errtype_36', 'errtype_37',
       'errtype_38', 'errtype_39', 'errtype_40', 'errtype_41', 'errtype_42'],
      dtype='object')

### group변수를 기준으로 groupby한 후 errtype별로 더해준다 

In [11]:
err_get=a.groupby(['group'])[ 'errtype_1',
       'errtype_2', 'errtype_3', 'errtype_4', 'errtype_5', 'errtype_6',
       'errtype_7', 'errtype_8', 'errtype_9', 'errtype_10', 'errtype_11',
       'errtype_12', 'errtype_13', 'errtype_14', 'errtype_15', 'errtype_16',
       'errtype_17', 'errtype_18', 'errtype_19', 'errtype_20', 'errtype_21',
       'errtype_22', 'errtype_23', 'errtype_24', 'errtype_25', 'errtype_26',
       'errtype_27', 'errtype_28', 'errtype_30', 'errtype_31', 'errtype_32',
       'errtype_33', 'errtype_34', 'errtype_35', 'errtype_36', 'errtype_37',
       'errtype_38', 'errtype_39', 'errtype_40', 'errtype_41', 'errtype_42'].sum()

In [13]:
err_get_train=pd.DataFrame(err_get)

In [18]:
err_get_train.shape

(24062, 41)

In [19]:
err_get_train.describe()

Unnamed: 0,errtype_1,errtype_2,errtype_3,errtype_4,errtype_5,errtype_6,errtype_7,errtype_8,errtype_9,errtype_10,...,errtype_33,errtype_34,errtype_35,errtype_36,errtype_37,errtype_38,errtype_39,errtype_40,errtype_41,errtype_42
count,24062.0,24062.0,24062.0,24062.0,24062.0,24062.0,24062.0,24062.0,24062.0,24062.0,...,24062.0,24062.0,24062.0,24062.0,24062.0,24062.0,24062.0,24062.0,24062.0,24062.0
mean,0.895395,1.084947,1.278281,53.892029,39.602693,2.097997,2.160793,0.006899,0.00453,5.544136,...,7.41738,6.879561,0.423365,0.387208,0.387998,0.188638,0.657468,30.354002,4.327903,1.42856
std,5.800314,35.918226,55.7025,1488.367087,154.116829,22.475164,22.694994,0.186495,0.152355,332.776447,...,8.037456,135.799312,6.233671,0.497004,0.498185,1.782659,8.640483,53.11241,16.620967,3.017265
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,6.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0
75%,0.0,0.0,0.0,1.0,12.0,1.0,1.0,0.0,0.0,0.0,...,11.0,0.0,0.0,1.0,1.0,0.0,0.0,39.0,3.0,1.0
max,171.0,2241.0,7284.0,195228.0,5549.0,1224.0,1542.0,19.0,17.0,49106.0,...,180.0,12356.0,602.0,5.0,5.0,181.0,542.0,1401.0,1342.0,33.0


## err_get_train에 group별 시간min(), model_nm,fwver,user_id 삽입

In [28]:
time_get_tr=train_err.groupby(['group'])['time'].min()
time_get_train=pd.DataFrame(time_get_tr)

id_get_tr=train_err.groupby(['group'])['user_id'].min()
id_get_train=pd.DataFrame(id_get_tr)

model_get_tr=train_err.groupby(['group'])['model_nm'].min()
model_get_train=pd.DataFrame(model_get_tr)

fw_get_tr=train_err.groupby(['group'])['fwver'].min()
fw_get_train=pd.DataFrame(fw_get_tr)

In [29]:
print(time_get_train.shape,id_get_train.shape,model_get_train.shape,fw_get_train.shape)

(24062, 1) (24062, 1) (24062, 1) (24062, 1)


In [30]:
err_get_train['time']=time_get_train
err_get_train['user_id']=id_get_train
err_get_train['model_nm']=model_get_train
err_get_train['fwver']=fw_get_train

In [31]:
err_get_train

Unnamed: 0_level_0,errtype_1,errtype_2,errtype_3,errtype_4,errtype_5,errtype_6,errtype_7,errtype_8,errtype_9,errtype_10,...,errtype_37,errtype_38,errtype_39,errtype_40,errtype_41,errtype_42,time,user_id,model_nm,fwver
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10000331,0.0,0.0,8.0,104.0,0.0,1.0,1.0,0.0,0.0,7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,20201101025616,10000,3,31
10001219,0.0,0.0,0.0,0.0,9.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,74.0,28.0,0.0,20201101020415,10001,2,19
10001220,0.0,0.0,0.0,0.0,44.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,39.0,28.0,1.0,20201112030617,10001,2,20
10002331,0.0,0.0,2.0,132.0,1.0,2.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,20201101030251,10002,3,31
10003219,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,9.0,0.0,0.0,20201101182532,10003,2,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24997013,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,38.0,0.0,0.0,20201101071624,24997,0,13
24997014,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,20.0,8.0,5.0,20201119031831,24997,0,14
24998013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,20201101050758,24998,0,13
24998014,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,6.0,0.0,0.0,20201103051329,24998,0,14


## model_nm과 fwver를 decoding해주므로 원래 형태로 변환한다.
일단 수치적 의미를 막기 위해 원래 형태로 전환해주는 것임

In [34]:
err_get_train['model_nm']=encoder_model_tr.inverse_transform(err_get_train['model_nm'])
err_get_train['fwver']=encoder_fwver_tr.inverse_transform(err_get_train['fwver'])

err_get_train

Unnamed: 0_level_0,errtype_1,errtype_2,errtype_3,errtype_4,errtype_5,errtype_6,errtype_7,errtype_8,errtype_9,errtype_10,...,errtype_37,errtype_38,errtype_39,errtype_40,errtype_41,errtype_42,time,user_id,model_nm,fwver
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10000331,0.0,0.0,8.0,104.0,0.0,1.0,1.0,0.0,0.0,7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,20201101025616,10000,model_3,05.15.2138
10001219,0.0,0.0,0.0,0.0,9.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,74.0,28.0,0.0,20201101020415,10001,model_2,04.33.1185
10001220,0.0,0.0,0.0,0.0,44.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,39.0,28.0,1.0,20201112030617,10001,model_2,04.33.1261
10002331,0.0,0.0,2.0,132.0,1.0,2.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,20201101030251,10002,model_3,05.15.2138
10003219,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,9.0,0.0,0.0,20201101182532,10003,model_2,04.33.1185
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24997013,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,38.0,0.0,0.0,20201101071624,24997,model_0,04.22.1750
24997014,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,20.0,8.0,5.0,20201119031831,24997,model_0,04.22.1778
24998013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,20201101050758,24998,model_0,04.22.1750
24998014,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,6.0,0.0,0.0,20201103051329,24998,model_0,04.22.1778


### index(group)을 제거해고 csv로 내보낸다.

In [41]:
err_get_train.to_csv(r"err_group_train.csv", index=False)

# 동일한 방식으로 test data도 형성해준다


In [5]:
test_err=pd.read_csv('test_err_data.csv')

In [6]:
#model, fwver labeling
from sklearn.preprocessing import LabelEncoder

encoder_model_te=LabelEncoder()
encoder_model_te.fit(test_err['model_nm'])
label_model_te=encoder_model_te.transform(test_err['model_nm'])
test_err['model_nm']=label_model_te


encoder_fwver_te=LabelEncoder()
encoder_fwver_te.fit(test_err['fwver'])
label_fwver_te=encoder_fwver_te.transform(test_err['fwver'])
test_err['fwver']=label_fwver_te

print('train_model_nm의 라벨 값:',test_err['model_nm'].unique())
print('train_fwver의 라벨값:',test_err['fwver'].unique())

train_model_nm의 라벨 값: [1 2 3 0 4 5 6 7 8]
train_fwver의 라벨값: [ 4  6 21 32 14 16 18  2 20 13 31 26 28 36 34 23 24 22 25 27  9 39 12 11
  1 19 35  3 29  0 10 33 17  8 30 37 38  5 15  7]


#### test에 fwver가 더 많이 존재함을 알 수 있다.

In [7]:
#group화 하기위한 group변수 생성
test_err['group']=test_err['user_id']*1000 + test_err['model_nm']*100 + test_err['fwver']
print(test_err['group'].unique())
print(test_err['group'].value_counts())

[30000104 30000106 30000221 ... 44997014 44997016 44998104]
41285332    396478
41591402    161516
41369402    159418
33363014     84166
30264221     83583
             ...  
44457636         1
32764220         1
36980218         1
39788013         1
34782013         1
Name: group, Length: 24096, dtype: int64


In [8]:
b=pd.get_dummies(test_err,columns=['errtype'])
b.shape

(16532648, 47)

In [9]:
b.columns

Index(['user_id', 'time', 'model_nm', 'fwver', 'errcode', 'group', 'errtype_1',
       'errtype_2', 'errtype_3', 'errtype_4', 'errtype_5', 'errtype_6',
       'errtype_7', 'errtype_8', 'errtype_9', 'errtype_10', 'errtype_11',
       'errtype_12', 'errtype_13', 'errtype_14', 'errtype_15', 'errtype_16',
       'errtype_17', 'errtype_18', 'errtype_19', 'errtype_20', 'errtype_21',
       'errtype_22', 'errtype_23', 'errtype_24', 'errtype_25', 'errtype_26',
       'errtype_27', 'errtype_28', 'errtype_30', 'errtype_31', 'errtype_32',
       'errtype_33', 'errtype_34', 'errtype_35', 'errtype_36', 'errtype_37',
       'errtype_38', 'errtype_39', 'errtype_40', 'errtype_41', 'errtype_42'],
      dtype='object')

In [10]:
err_get_te=b.groupby(['group'])[ 'errtype_1',
       'errtype_2', 'errtype_3', 'errtype_4', 'errtype_5', 'errtype_6',
       'errtype_7', 'errtype_8', 'errtype_9', 'errtype_10', 'errtype_11',
       'errtype_12', 'errtype_13', 'errtype_14', 'errtype_15', 'errtype_16',
       'errtype_17', 'errtype_18', 'errtype_19', 'errtype_20', 'errtype_21',
       'errtype_22', 'errtype_23', 'errtype_24', 'errtype_25', 'errtype_26',
       'errtype_27', 'errtype_28', 'errtype_30', 'errtype_31', 'errtype_32',
       'errtype_33', 'errtype_34', 'errtype_35', 'errtype_36', 'errtype_37',
       'errtype_38', 'errtype_39', 'errtype_40', 'errtype_41', 'errtype_42'].sum()

In [11]:
err_get_test=pd.DataFrame(err_get_te)
err_get_test.describe()

Unnamed: 0,errtype_1,errtype_2,errtype_3,errtype_4,errtype_5,errtype_6,errtype_7,errtype_8,errtype_9,errtype_10,...,errtype_33,errtype_34,errtype_35,errtype_36,errtype_37,errtype_38,errtype_39,errtype_40,errtype_41,errtype_42
count,24096.0,24096.0,24096.0,24096.0,24096.0,24096.0,24096.0,24096.0,24096.0,24096.0,...,24096.0,24096.0,24096.0,24096.0,24096.0,24096.0,24096.0,24096.0,24096.0,24096.0
mean,0.918036,0.866077,0.970369,49.236346,37.799676,2.341177,2.347361,0.008176,0.003818,6.183931,...,7.398821,6.521207,0.508549,0.391227,0.391974,0.197294,0.850639,30.211902,4.492198,1.425174
std,5.987339,34.696152,22.965019,2575.415079,181.956588,26.611904,26.496412,0.218978,0.085381,312.972711,...,8.364981,120.037581,8.240654,0.514206,0.515814,1.715862,11.330104,53.939188,14.392031,3.009669
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,6.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0
75%,0.0,0.0,0.0,1.0,12.0,1.0,1.0,0.0,0.0,0.0,...,11.0,0.0,0.0,1.0,1.0,0.0,0.0,38.0,3.0,1.0
max,152.0,2770.0,1547.0,396441.0,14820.0,1670.0,1656.0,17.0,5.0,41571.0,...,350.0,15777.0,929.0,15.0,15.0,106.0,894.0,1326.0,1201.0,18.0


In [12]:
time_get_te=test_err.groupby(['group'])['time'].min()
time_get_test=pd.DataFrame(time_get_te)

id_get_te=test_err.groupby(['group'])['user_id'].min()
id_get_test=pd.DataFrame(id_get_te)

model_get_te=test_err.groupby(['group'])['model_nm'].min()
model_get_test=pd.DataFrame(model_get_te)

fw_get_te=test_err.groupby(['group'])['fwver'].min()
fw_get_test=pd.DataFrame(fw_get_te)

In [13]:
print(time_get_test.shape,id_get_test.shape,model_get_test.shape,fw_get_test.shape)

(24096, 1) (24096, 1) (24096, 1) (24096, 1)


In [14]:
err_get_test['time']=time_get_test
err_get_test['user_id']=id_get_test
err_get_test['model_nm']=model_get_test
err_get_test['fwver']=fw_get_test

In [15]:
err_get_test

Unnamed: 0_level_0,errtype_1,errtype_2,errtype_3,errtype_4,errtype_5,errtype_6,errtype_7,errtype_8,errtype_9,errtype_10,...,errtype_37,errtype_38,errtype_39,errtype_40,errtype_41,errtype_42,time,user_id,model_nm,fwver
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
30000104,0.0,0.0,0.0,0.0,60.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,2.0,46.0,0.0,0.0,20201101030227,30000,1,4
30000106,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,42.0,0.0,1.0,20201118073855,30000,1,6
30000221,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,4.0,0.0,1.0,20201127094708,30000,2,21
30001332,0.0,0.0,3.0,98.0,3.0,4.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,20201101042135,30001,3,32
30002014,0.0,0.0,0.0,0.0,31.0,2.0,1.0,0.0,0.0,0.0,...,0.0,0.0,14.0,95.0,1.0,0.0,20201101003616,30002,0,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44996014,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,156.0,1.0,0.0,20201101003242,44996,0,14
44996016,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,173.0,1.0,3.0,20201119053129,44996,0,16
44997014,0.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0,0.0,0.0,...,0.0,3.0,0.0,28.0,44.0,0.0,20201101000032,44997,0,14
44997016,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,2.0,5.0,0.0,20201127040356,44997,0,16


## model_nm과 fwver를 원래형태로 변환

In [16]:
err_get_test['model_nm']=encoder_model_te.inverse_transform(err_get_test['model_nm'])
err_get_test['fwver']=encoder_fwver_te.inverse_transform(err_get_test['fwver'])

err_get_test

Unnamed: 0_level_0,errtype_1,errtype_2,errtype_3,errtype_4,errtype_5,errtype_6,errtype_7,errtype_8,errtype_9,errtype_10,...,errtype_37,errtype_38,errtype_39,errtype_40,errtype_41,errtype_42,time,user_id,model_nm,fwver
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
30000104,0.0,0.0,0.0,0.0,60.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,2.0,46.0,0.0,0.0,20201101030227,30000,model_1,04.16.3553
30000106,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,42.0,0.0,1.0,20201118073855,30000,model_1,04.16.3571
30000221,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,4.0,0.0,1.0,20201127094708,30000,model_2,04.33.1261
30001332,0.0,0.0,3.0,98.0,3.0,4.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,20201101042135,30001,model_3,05.15.2138
30002014,0.0,0.0,0.0,0.0,31.0,2.0,1.0,0.0,0.0,0.0,...,0.0,0.0,14.0,95.0,1.0,0.0,20201101003616,30002,model_0,04.22.1750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44996014,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,156.0,1.0,0.0,20201101003242,44996,model_0,04.22.1750
44996016,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,173.0,1.0,3.0,20201119053129,44996,model_0,04.22.1778
44997014,0.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0,0.0,0.0,...,0.0,3.0,0.0,28.0,44.0,0.0,20201101000032,44997,model_0,04.22.1750
44997016,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,2.0,5.0,0.0,20201127040356,44997,model_0,04.22.1778


In [17]:
err_get_test.to_csv(r"err_group_test.csv", index=False)