In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pylab as plt
import sys, gc, warnings, random, math, time, datetime 
from tqdm import tqdm
START_DATE = datetime.datetime.strptime('2017-11-30', '%Y-%m-%d')

from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error
from sklearn.decomposition import PCA
from scipy.stats.stats import pearsonr

import xgboost as xgb
import lightgbm as lgb

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from utils import *

import os
print(os.listdir("../input/features/"))

['train_y.pkl', 'train_uid_features.pkl', 'train_features.pkl', 'useful_features.npy', 'test_features.pkl', 'test_uid_features.pkl', 'train_basic_features.pkl', 'rm_features.npy', 'test.pkl', 'test_basic_features.pkl', 'train.pkl']


In [2]:
train = pd.read_pickle('../input/features/train.pkl')
test = pd.read_pickle('../input/features/test.pkl')
train_y = train['isFraud'].copy()

print(train.shape)
print(train_y.shape)
print(test.shape)

(590540, 433)
(590540,)
(506691, 433)


In [3]:
# fraud  =  train[train.isFraud==1]
# non_fraud  = train[train.isFraud==0]

# v_cols = ['V' + str(i) for i in range(1,340)]
# fraud  = fraud.drop(v_cols,  axis=1)
# non_fraud  = non_fraud.drop(v_cols,  axis=1)
# print(fraud.shape)
# print(non_fraud.shape)

In [4]:
# w_fraud = fraud[fraud.ProductCD=='R']
# w_non_fraud = non_fraud[non_fraud.ProductCD=='R']
# print(w_fraud.shape)
# print(w_non_fraud.shape)
# w_fraud.to_csv('r_fraud.csv')

# w_non_fraud_head = w_non_fraud.head(10000)
# w_non_fraud_head.to_csv('r_non_fraud.csv')

In [5]:
# col = 'D1'
# plt.plot(train[train_y==0][train.ProductCD=='W'][col], '.b')
# plt.plot(train[train_y==1][train.ProductCD=='W'][col], '.r')
# plt.plot(test[col][test.ProductCD=='W'], '.g')

In [6]:
# # %matplotlib notebook
# col = 'D1'
# plt.plot(train[col], '.b')
# plt.plot(train['DT_day_year'], '.r')
# plt.plot(test[col], '.b')

---

In [7]:
# fe with DT
def fe_dt(train_df, test_df, rm_features):

    print('==> processing DT...')
    
    for df in [train_df, test_df]:
        df['DT'] = df['TransactionDT'].apply(
            lambda x: (START_DATE + datetime.timedelta(seconds=x)))
        df['DT_month'] = (df['DT'].dt.year - 2017) * 12 + df['DT'].dt.month
        df['DT_week_year'] = (df['DT'].dt.year - 2017) * 52 + df['DT'].dt.weekofyear
        df['DT_day_year'] = (df['DT'].dt.year - 2017) * 365 + df['DT'].dt.dayofyear

        df['DT_hour'] = df['DT'].dt.hour
        df['DT_day_week'] = df['DT'].dt.dayofweek
        df['DT_day'] = df['DT'].dt.day

    for col in ['DT_hour', 'DT_day_week', 'DT_day']:
        temp_df = pd.concat([train_df[[col]], test_df[[col]]])
        fq_encode = temp_df[col].value_counts(dropna=False) / len(temp_df)
        fq_encode = fq_encode.to_dict()
        train_df[col+'_fq_enc'] = train_df[col].map(fq_encode)
        test_df[col+'_fq_enc']  = test_df[col].map(fq_encode)
        
    tmp_rm = ['TransactionDT', 'DT', 'DT_week_year', 'DT_day_year'] # keep DT_month for now
    rm_features = rm_features + tmp_rm

    return train_df, test_df, rm_features
# rm_features = []
# train, test, rm_features = fe_dt(train, test, rm_features)

In [8]:
# fe with card 1-6
def fe_card(train_df, test_df, rm_features):
    
    print('==> processing card...')
    
    # remove noisy cards
    card_cols = ['card1', 'card2', 'card3', 'card4', 'card5', 'card6']
    for col in card_cols:
        valid_card = pd.concat([train_df[[col]], test_df[[col]]])
        valid_card = valid_card[col].value_counts()
        valid_card = valid_card[valid_card>2]
        valid_card = list(valid_card.index)
        
#         train_df[col] = np.where(train_df[col].isin(test_df[col]), train_df[col], np.nan)
#         test_df[col]  = np.where(test_df[col].isin(train_df[col]), test_df[col], np.nan)

        train_df[col] = np.where(train_df[col].isin(valid_card), train_df[col], np.nan)
        test_df[col]  = np.where(test_df[col].isin(valid_card), test_df[col], np.nan)
        
    # freq encoding
    for col in ['card4', 'card6']:
        temp_df = pd.concat([train_df[[col]], test_df[[col]]])
        col_encoded = temp_df[col].value_counts(dropna=False) / len(temp_df)
        col_encoded = col_encoded.to_dict()
        train_df[col] = train_df[col].map(col_encoded)
        test_df[col]  = test_df[col].map(col_encoded)
    
    for col in ['card1', 'card2', 'card3', 'card5']:
        temp_df = pd.concat([train_df[[col]], test_df[[col]]])
        fq_encode = temp_df[col].value_counts(dropna=False) / len(temp_df)
        fq_encode = fq_encode.to_dict()
        train_df[col+'_fq_enc'] = train_df[col].map(fq_encode)
        test_df[col+'_fq_enc']  = test_df[col].map(fq_encode)

    # target encoding?
    
    tmp_rm = []
    rm_features = rm_features + tmp_rm
    
    return train_df, test_df, rm_features
# train, test, rm_features = fe_card(train, test, rm_features)

In [9]:
# fe with addr 1-2, dist 1-2
def fe_location(train_df, test_df, rm_features):
    
    print('==> processing location...')
    
    # remove noisy addr
    for col in ['addr1', 'addr2']:
        valid_addr = pd.concat([train_df[[col]], test_df[[col]]])
        valid_addr = valid_addr[col].value_counts()
        valid_addr = valid_addr[valid_addr>2]
        valid_addr = list(valid_addr.index)
        
#         train_df[col] = np.where(train_df[col].isin(test_df[col]), train_df[col], np.nan)
#         test_df[col]  = np.where(test_df[col].isin(train_df[col]), test_df[col], np.nan)

        train_df[col] = np.where(train_df[col].isin(valid_addr), train_df[col], np.nan)
        test_df[col]  = np.where(test_df[col].isin(valid_addr), test_df[col], np.nan)
        
    # freq encoding
    for col in ['addr1', 'addr2']:
        temp_df = pd.concat([train_df[[col]], test_df[[col]]])
        fq_encode = temp_df[col].value_counts(dropna=False) / len(temp_df)
        fq_encode = fq_encode.to_dict()
        train_df[col+'_fq_enc'] = train_df[col].map(fq_encode)
        test_df[col+'_fq_enc']  = test_df[col].map(fq_encode)
    
    # target encoding?
    
    tmp_rm = []
    rm_features = rm_features + tmp_rm
    return train_df, test_df, rm_features
# train, test, rm_features = fe_location(train, test, rm_features)

In [10]:
# fe wtih P&R email domain
def fe_email(train_df, test_df, rm_features):
    
    print('==> processing P&R email domain...')
    
    p = 'P_emaildomain'
    r = 'R_emaildomain'
    
    for df in [train_df, test_df]:
#         df[p] = df[p].map(emails)
#         df[r] = df[r].map(emails)
        df['email_check'] = np.where(df[p]==df[r],1,0)

    # freq encoding
    for col in [p, r]:
        temp_df = pd.concat([train_df[[col]], test_df[[col]]])
        fq_encode = temp_df[col].value_counts(dropna=False) / len(temp_df)
        fq_encode = fq_encode.to_dict()
        train_df[col+'_fq_enc'] = train_df[col].map(fq_encode)
        test_df[col+'_fq_enc']  = test_df[col].map(fq_encode)
    
    # encoding string cols
    for col in [p, r]:
        train_df[col] = train_df[col].fillna('unseen_before_label')
        test_df[col]  = test_df[col].fillna('unseen_before_label')
        
        train_df[col] = train_df[col].astype(str)
        test_df[col] = test_df[col].astype(str)
        
        le = LabelEncoder()
        le.fit(list(train_df[col])+list(test_df[col]))
        train_df[col] = le.transform(train_df[col])
        test_df[col]  = le.transform(test_df[col])
        
        train_df[col] = train_df[col].astype('category')
        test_df[col] = test_df[col].astype('category')
    
    tmp_rm = []
    rm_features = rm_features + tmp_rm
    return train_df, test_df, rm_features
# train, test, rm_features = fe_email(train, test, rm_features)

In [11]:
# fe with C 1-14
def fe_c(train_df, test_df, rm_features):
    
    print('==> processing C...')
    
    c_cols = ['C' + str(i) for i in range(1,15)]
    
#     # freq encoding? not good: numerical
#     for col in c_cols:
#         temp_df = pd.concat([train_df[[col]], test_df[[col]]])
#         fq_encode = temp_df[col].value_counts(dropna=False) / len(temp_df)
#         fq_encode = fq_encode.to_dict()
#         train_df[col+'_fq_enc'] = train_df[col].map(fq_encode)
#         test_df[col+'_fq_enc']  = test_df[col].map(fq_encode)

    tmp_rm = []
    rm_features = rm_features + tmp_rm
    return train_df, test_df, rm_features
# train, test, rm_features = fe_c(train, test, rm_features)

In [12]:
# fe with D 1-15
def fe_d(train_df, test_df, rm_features):
    
    print('==> processing D...')
    
#     d_cols = ['D' + str(i) for i in range(1,16)]
    d_cols = ['D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15']

#     for df in [train_df, test_df]:        
#         for col in d_cols:
#             df[col + '_corrected'] = df[col] - df['DT_day_year'] + train_df['DT_day_year'].min()
#             df[col + '_corrected'] = df[col].apply(lambda y: max(0,y))
            
#             temp_df = pd.concat([train_df[[col]], test_df[[col]]])
#             fq_encode = temp_df[col].value_counts(dropna=False) / len(temp_df)
#             fq_encode = fq_encode.to_dict()
#             train_df[col+'_fq_enc'] = train_df[col].map(fq_encode)
#             test_df[col+'_fq_enc']  = test_df[col].map(fq_encode)
    
    for df in [train_df, test_df]:
        for col in d_cols:
            # day map
            day_temp_dict = df.groupby('DT_day_year')[col].agg(['mean']).rename(columns={'mean':col + '_day_mean'})
            day_temp_dict = day_temp_dict[col + '_day_mean'].to_dict()
            df[col + '_day_mean'] = df['DT_day_year'].map(day_temp_dict)
            df[col + '_day_mean'] = df[col] - df[col + '_day_mean']

            df['DT_day_year_prdt'] = df['DT_day_year'].astype(str)+'_'+df['ProductCD'].astype(str)
            day_temp_dict_prdt = df.groupby('DT_day_year_prdt')[col].agg(['mean']).rename(columns={'mean':col + '_prdt_day_mean'})
            day_temp_dict_prdt = day_temp_dict_prdt[col + '_prdt_day_mean'].to_dict()
            df[col + '_prdt_day_mean'] = df['DT_day_year_prdt'].map(day_temp_dict_prdt)
            df[col + '_prdt_day_mean'] = df[col] - df[col + '_prdt_day_mean']

            # week map
            week_temp_dict = df.groupby('DT_week_year')[col].agg(['mean']).rename(columns={'mean':col + '_week_mean'})
            week_temp_dict = week_temp_dict[col + '_week_mean'].to_dict()
            df[col + '_week_mean'] = df['DT_week_year'].map(week_temp_dict)
            df[col + '_week_mean'] = df[col] - df[col + '_week_mean']

            df['DT_week_year_prdt'] = df['DT_week_year'].astype(str)+'_'+df['ProductCD'].astype(str)
            week_temp_dict_prdt = df.groupby('DT_week_year_prdt')[col].agg(['mean']).rename(columns={'mean':col + '_prdt_week_mean'})
            week_temp_dict_prdt = week_temp_dict_prdt[col + '_prdt_week_mean'].to_dict()
            df[col + '_prdt_week_mean'] = df['DT_week_year_prdt'].map(week_temp_dict_prdt)
            df[col + '_prdt_week_mean'] = df[col] - df[col + '_prdt_week_mean']
            
    tmp_rm = ['DT_day_year_prdt', 'DT_week_year_prdt']
    rm_features = rm_features + tmp_rm
    return train_df, test_df, rm_features
# rm_features = []
# train, test, rm_features = fe_d(train, test, rm_features)

In [13]:
# %matplotlib inline
# col = 'D1'
# plt.plot(train[train_y==0][train.ProductCD=='C'][col], '.b')
# plt.plot(train[train_y==1][train.ProductCD=='C'][col], '.r')
# plt.plot(test[col][test.ProductCD=='C'], '.g')

In [14]:
# fe with M 1-9
def fe_m(train_df, test_df, rm_features):
    
    print('==> processing M...')
    
    m_cols = ['M1','M2','M3','M5','M6','M7','M8','M9']
    
    for col in m_cols:
        train_df[col] = train_df[col].map({'T':1, 'F':0})
        test_df[col]  = test_df[col].map({'T':1, 'F':0})
    
    for df in [train_df, test_df]:
        df['M_sum'] = df[m_cols].sum(axis=1).astype(np.int8)
        df['M_na'] = df[m_cols].isna().sum(axis=1).astype(np.int8)
    
    # M4: freq encoding
    for col in ['M4', 'ProductCD']:
        temp_df = pd.concat([train_df[[col]], test_df[[col]]])
        fq_encode = temp_df[col].value_counts(dropna=False) / len(temp_df)
        fq_encode = fq_encode.to_dict()
        train_df[col] = train_df[col].map(fq_encode)
        test_df[col]  = test_df[col].map(fq_encode)
    
    tmp_rm = []
    rm_features = rm_features + tmp_rm
    return train_df, test_df, rm_features
# train, test, rm_features = fe_m(train, test, rm_features)

In [15]:
def process_v(train_df, test_df, v_list):
    n_com = 5
    pca = PCA(n_components=n_com, random_state=42)
    fill_val = 0

    temp_df = pd.concat([train_df[v_list], test_df[v_list]])
    pca.fit_transform(temp_df[v_list].fillna(fill_val))
    train_compressed = pca.transform(train_df[v_list].fillna(fill_val))
    test_compressed = pca.transform(test_df[v_list].fillna(fill_val))

    for i in range(n_com):
        train_df[str(v_list[0]) + '_pca_' + str(i)] = train_compressed[:, i]
        test_df[str(v_list[0]) + '_pca_' + str(i)] = test_compressed[:, i]

    return train_df, test_df

In [16]:
# fe with V 1-339
def fe_v(train_df, test_df, rm_features):
    
    print('==> processing V...')
    
    v_cols = ['V' + str(i) for i in range(1,340)]
       
    # V 1-11
    v_1_11 = ['V' + str(i) for i in range(1,12)]
    train_df, test_df = process_v(train_df, test_df, v_1_11)
    
    # V 12-34
    v_12_34 = ['V' + str(i) for i in range(12,35)]
    train_df, test_df = process_v(train_df, test_df, v_12_34)
    
    # V 35-52
    v_35_52 = ['V' + str(i) for i in range(35,53)]
    train_df, test_df = process_v(train_df, test_df, v_35_52)
    
    # V 53-74
    v_53_74 = ['V' + str(i) for i in range(53,75)]
    train_df, test_df = process_v(train_df, test_df, v_53_74)
    
    # V 75-94
    v_75_94 = ['V' + str(i) for i in range(75,95)]
    train_df, test_df = process_v(train_df, test_df, v_75_94)
    
    # V 95-137
    v_95_137 = ['V' + str(i) for i in range(95,138)]
    train_df, test_df = process_v(train_df, test_df, v_95_137)
    
    # V 138-166
    v_138_166 = ['V' + str(i) for i in range(138,167)]
    train_df, test_df = process_v(train_df, test_df, v_138_166)
    
    # V 167-216
    v_167_216 = ['V' + str(i) for i in range(167,217)]
    train_df, test_df = process_v(train_df, test_df, v_167_216)
    
    # V 217-278
    v_217_278 = ['V' + str(i) for i in range(217,279)]
    train_df, test_df = process_v(train_df, test_df, v_217_278)
    
    # V 279-321
    v_279_321 = ['V' + str(i) for i in range(279,322)]
    train_df, test_df = process_v(train_df, test_df, v_279_321)
    
    # V 322-339
    v_322_339 = ['V' + str(i) for i in range(322,339)]
    train_df, test_df = process_v(train_df, test_df, v_322_339)
    
    tmp_rm = []
    rm_features = rm_features + tmp_rm
    return train_df, test_df, rm_features
# train, test, rm_features = fe_v(train, test, rm_features)

In [17]:
# fe with id 1-11, 12-38
def fe_id(train_df, test_df, rm_features):
    
    print('==> processing id...')
    
    ## id 1-11
    
    
    ## id 12-38
    for df in [train_df, test_df]:
        df['id_12'] = df['id_12'].map({'Found':1, 'NotFound':0})
        df['id_15'] = df['id_15'].map({'New':2, 'Found':1, 'Unknown':0})
        df['id_16'] = df['id_16'].map({'Found':1, 'NotFound':0})

        df['id_23'] = df['id_23'].map({'IP_PROXY:TRANSPARENT':3, 'IP_PROXY:ANONYMOUS':2, 'IP_PROXY:HIDDEN':1})

        df['id_27'] = df['id_27'].map({'Found':1, 'NotFound':0})
        df['id_28'] = df['id_28'].map({'New':2, 'Found':1})

        df['id_29'] = df['id_29'].map({'Found':1, 'NotFound':0})

        df['id_35'] = df['id_35'].map({'T':1, 'F':0})
        df['id_36'] = df['id_36'].map({'T':1, 'F':0})
        df['id_37'] = df['id_37'].map({'T':1, 'F':0})
        df['id_38'] = df['id_38'].map({'T':1, 'F':0})

        df['id_34'] = df['id_34'].fillna(':0')
        df['id_34'] = df['id_34'].apply(lambda x: x.split(':')[1]).astype(np.int8)
        df['id_34'] = np.where(df['id_34']==0, np.nan, df['id_34'])

        df['id_33'] = df['id_33'].fillna('0x0')
        df['id_33_0'] = df['id_33'].apply(lambda x: x.split('x')[0]).astype(int)
        df['id_33_1'] = df['id_33'].apply(lambda x: x.split('x')[1]).astype(int)
        df['id_33'] = np.where(df['id_33']=='0x0', np.nan, df['id_33'])

        # id_30
        df['id_30'] = df['id_30'].fillna('unknown_device').str.lower()
        df['id_30_device'] = df['id_30'].apply(
            lambda x: ''.join([i for i in x if i.isalpha()]))
        df['id_30_version'] = df['id_30'].apply(
            lambda x: ''.join([i for i in x if i.isnumeric()]))
        
        # id_31 --> 'lastest_browser'
        df = setBrowser(df)

    for col in ['id_33', 'id_30_device', 'id_30_version']:
        train_df[col] = train_df[col].fillna('unseen_before_label')
        test_df[col]  = test_df[col].fillna('unseen_before_label')

        le = LabelEncoder()
        le.fit(list(train_df[col])+list(test_df[col]))
        train_df[col] = le.transform(train_df[col])
        test_df[col]  = le.transform(test_df[col])
    
    # freq encoding
    id_cols = ['id_12', 'id_13', 'id_14', 'id_15', 'id_16', 'id_17', 'id_18', 'id_19',
    'id_20', 'id_21','id_22', 'id_23', 'id_24', 'id_25', 'id_26', 'id_27', 'id_28', 'id_29',
    'id_30_device', 'id_30_version',
    'id_32', 'id_33', 'id_33_0', 'id_33_1', 'id_34', 'id_35', 'id_36', 'id_37', 'id_38']
    
#     for col in id_cols:
#         temp_df = pd.concat([train_df[[col]], test_df[[col]]])
#         fq_encode = temp_df[col].value_counts(dropna=False) / len(temp_df)
#         fq_encode = fq_encode.to_dict()
#         train_df[col+'_fq_enc'] = train_df[col].map(fq_encode)
#         test_df[col+'_fq_enc']  = test_df[col].map(fq_encode)
    
#     tmp_rm = [
#         'id_03', 'id_04', 'id_07','id_08','id_09', 'id_10',  'id_11', 'id_12',
#         'id_14', 'id_15', 'id_16', 'id_17', 'id_18', 'id_21','id_22', 'id_23', 'id_24', 'id_25', 'id_26', 'id_27', 'id_28', 'id_29',
#     'id_30_device', 'id_30_version', 'id_30', 'id_31',
#     'id_32', 'id_33', 'id_33_0', 'id_33_1', 'id_34', 'id_35', 'id_36', 'id_37', 'id_38']
    
    tmp_rm = ['id_30', 'id_31']
    
    rm_features = rm_features + tmp_rm
    return train_df, test_df, rm_features
# train, test, rm_features = fe_id(train, test, rm_features)

In [18]:
 # fe with DeviceType, DeviceInfo
def fe_device(train_df, test_df, rm_features):
    
    print('==> processing device...')
    
    for df in [train_df, test_df]:
        # DeviceType
#         df['DeviceType'] = df['DeviceType'].map({'desktop':1, 'mobile':0})
        pass
#         # DeviceInfo --> "device_name"
#         df = setDevice(df)
    
#     for col in ['DeviceType', 'device_name']:
#         temp_df = pd.concat([train_df[[col]], test_df[[col]]])
#         fq_encode = temp_df[col].value_counts(dropna=False) / len(temp_df)
#         fq_encode = fq_encode.to_dict()
#         train_df[col+'_fq_enc'] = train_df[col].map(fq_encode)
#         test_df[col+'_fq_enc']  = test_df[col].map(fq_encode)
    
    tmp_rm = ['DeviceInfo', 'DeviceType']
    rm_features = rm_features + tmp_rm
    return train_df, test_df, rm_features
# train, test, rm_features = fe_device(train, test, rm_features)

In [19]:
# fe with amount (user id)
def fe_uid(train_df, test_df, rm_features):
    
    print('==> processing uid...')
    
    # user id, save for later aggregation
    for df in [train_df, test_df]:
        df['cents'] = df['TransactionAmt'].apply(lambda x: x - int(x))
        
        ### universal user
        # card
        df['uid1'] = df['card1'].astype(str)+'_'+df['card2'].astype(str)
        df['uid2'] = df['uid1'] + df['card3'].astype(str)+'_'+df['card5'].astype(str) #*
        df['uid3'] = df['card2'].astype(str)+'_'+df['card3'].astype(str)
        
        # addr
        df['uid4'] =  df['addr1'].astype(str)+'_'+df['addr2'].astype(str) # strong
        df['uid5'] =df['uid2'] + df['addr1'].astype(str)+'_'+df['addr2'].astype(str) # strong #*
                
        # email
        df['uid6'] = df['P_emaildomain'].astype(str)+'_'+df['R_emaildomain'].astype(str) # strong
        df['uid7'] = df['uid5'].astype(str)+'_'+df['uid6'].astype(str) # strong
        df['uid8'] = df['uid5'].astype(str)+'_'+df['P_emaildomain'].astype(str)
        df['uid9'] = df['uid5'].astype(str)+'_'+df['R_emaildomain'].astype(str)
        df['uid10'] = df['uid5'].astype(str)+'_'+df['uid6'].astype(str) #*
        
#         df['uid10'] = df['id_19'].astype(str)+'_'+df['id_20'].astype(str)  
#         df['uid11'] = df['uid4'].astype(str)+'_'+df['uid10'].astype(str)
        
#         df['uid12'] = df['C1'].astype(str)+'_'+df['C2'].astype(str)+'_'+df['uid4'].astype(str)
#         df['uid13'] = df['D1'].astype(str)+'_'+df['D2'].astype(str)+'_'+df['uid4'].astype(str)
        
#         df['uid14'] = df['ProductCD'].astype(str)+'_'+df['uid5'].astype(str)
#         df['uid15'] = df['ProductCD'].astype(str)+'_'+df['uid6'].astype(str)
#         df['uid16'] = df['ProductCD'].astype(str)+'_'+df['uid7'].astype(str)
#         df['uid17'] = df['ProductCD'].astype(str)+'_'+df['uid8'].astype(str)
        
#         df['uid18'] = df['DT_month'].astype(str)+'_'+df['uid5'].astype(str)
#         df['uid19'] = df['DT_week_year'].astype(str)+'_'+df['uid5'].astype(str)
#         df['uid20'] = df['DT_day_year'].astype(str)+'_'+df['uid5'].astype(str)

    uid_list = ['uid' + str(i) for i in range(1, 11)]

    tmp_rm = [] + uid_list
    
    rm_features = rm_features + tmp_rm
    return train_df, test_df, rm_features, tmp_rm
# train, test, rm_features = fe_uid(train, test, rm_features)

In [20]:
# group aggregation
def fe_agg(train_df, test_df, rm_features, uid_list):

    print('==> processing aggregation...')

    uid_cols = ['card1', 'card2', 'card3', 'card5'] + uid_list
    
    for col in tqdm_notebook(uid_cols):
        # aggr: mean, std, min, max, sum
        for agg_type in ['mean', 'std']:
            for agg_col in ['TransactionAmt', 'cents']:
                new_col_name = col + '_' + agg_col + '_' + agg_type
                temp_df = pd.concat(
                    [train_df[[col, agg_col]], test_df[[col, agg_col]]])
                temp_df = temp_df.groupby([col])[agg_col].agg([
                    agg_type
                ]).reset_index().rename(columns={agg_type: new_col_name})

                temp_df.index = list(temp_df[col])
                temp_df = temp_df[new_col_name].to_dict()

                train_df[new_col_name] = train_df[col].map(temp_df)
                test_df[new_col_name] = test_df[col].map(temp_df)  
                
    for col in tqdm_notebook(uid_cols):
        # aggr: value - mean
        for agg_type in ['mean']:
            for agg_col in ['TransactionAmt', 'cents']:
                new_col_name = col + '_' + agg_col + '_' + agg_type + '_diff'
                temp_df = pd.concat(
                    [train_df[[col, agg_col]], test_df[[col, agg_col]]])
                temp_df = temp_df.groupby([col])[agg_col].agg([
                    agg_type
                ]).reset_index().rename(columns={agg_type: new_col_name})

                temp_df.index = list(temp_df[col])
                temp_df = temp_df[new_col_name].to_dict()

                train_df[new_col_name] = train_df[col].map(temp_df)
                test_df[new_col_name] = test_df[col].map(temp_df) 
                train_df[new_col_name] = train_df[agg_col] - train_df[new_col_name]
                test_df[new_col_name] = test_df[agg_col] - test_df[new_col_name]
        
        # aggr: max - value
        for agg_type in ['max']:
            for agg_col in ['TransactionAmt', 'cents']:
                new_col_name = col + '_' + agg_col + '_' + agg_type + '_diff'
                temp_df = pd.concat(
                    [train_df[[col, agg_col]], test_df[[col, agg_col]]])
                temp_df = temp_df.groupby([col])[agg_col].agg([
                    agg_type
                ]).reset_index().rename(columns={agg_type: new_col_name})

                temp_df.index = list(temp_df[col])
                temp_df = temp_df[new_col_name].to_dict()

                train_df[new_col_name] = train_df[col].map(temp_df)
                test_df[new_col_name] = test_df[col].map(temp_df) 
                train_df[new_col_name] = train_df[new_col_name] - train_df[agg_col]
                test_df[new_col_name] = test_df[new_col_name] - test_df[agg_col] 
                
                
    count_cols = [] # seems like freq encoding
    for col in tqdm_notebook(uid_cols + count_cols):
        # count
        for agg_type in ['count']:
            for agg_col in ['TransactionDT']:
                new_col_name = col + '_' + agg_type
                temp_df = pd.concat(
                    [train_df[[col, agg_col]], test_df[[col, agg_col]]])
                temp_df = temp_df.groupby([col])[agg_col].agg([
                    agg_type
                ]).reset_index().rename(columns={agg_type: new_col_name})

                temp_df.index = list(temp_df[col])
                temp_df = temp_df[new_col_name].to_dict()

                train_df[new_col_name] = train_df[col].map(temp_df)
                test_df[new_col_name] = test_df[col].map(temp_df)
        
        
    train_df = train_df.replace(np.inf, 999)
    test_df = test_df.replace(np.inf, 999)

    tmp_rm = []
    rm_features = rm_features + tmp_rm
    return train_df, test_df, rm_features

# train, test, rm_features = fe_agg(train, test, rm_features)

In [21]:
def fe(train_df, test_df):
    
    rm_features = ['isFraud']
    
    # fe with DT
    train_df, test_df, rm_features = fe_dt(train_df, test_df, rm_features)
    
    # fe with card 1-6
    train_df, test_df, rm_features = fe_card(train_df, test_df, rm_features)
    
    # fe with addr 1-2, dist 1-2
    train_df, test_df, rm_features = fe_location(train_df, test_df, rm_features)
    
    # fe wtih P&R email domain
    train_df, test_df, rm_features = fe_email(train_df, test_df, rm_features)

    # fe with C 1-14
    train_df, test_df, rm_features = fe_c(train_df, test_df, rm_features)
    
    # fe with D 1-15
    train_df, test_df, rm_features = fe_d(train_df, test_df, rm_features)
    
    # fe with M 1-9
    train_df, test_df, rm_features = fe_m(train_df, test_df, rm_features)
    
    # fe with V 1-339
    train_df, test_df, rm_features = fe_v(train_df, test_df, rm_features)
    
    # fe with id 1-11, 12-38
    train_df, test_df, rm_features = fe_id(train_df, test_df, rm_features)
    
    # fe with DeviceType, DeviceInfo
    train_df, test_df, rm_features = fe_device(train_df, test_df, rm_features)
    
#     # fe with amount (user id)
#     train_df, test_df, rm_features, uid_list = fe_uid(train_df, test_df, rm_features)
    
#     # group aggregation
#     train_df, test_df, rm_features = fe_agg(train_df, test_df, rm_features, uid_list)
    
    return train_df, test_df, rm_features

train, test, rm_features = fe(train, test)

==> processing DT...
==> processing card...
==> processing location...
==> processing P&R email domain...
==> processing C...
==> processing D...
==> processing M...
==> processing V...
==> processing id...
==> processing device...


In [22]:
train.head()

Unnamed: 0_level_0,isFraud,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,dist1,dist2,P_emaildomain,R_emaildomain,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15,M1,M2,M3,M4,M5,M6,M7,M8,M9,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54,V55,V56,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70,V71,V72,V73,V74,V75,V76,V77,V78,V79,V80,V81,V82,V83,V84,V85,V86,V87,V88,V89,V90,V91,V92,V93,V94,V95,V96,V97,V98,V99,V100,V101,V102,V103,V104,V105,V106,V107,V108,V109,V110,V111,V112,V113,V114,V115,V116,V117,V118,V119,V120,V121,V122,V123,V124,V125,V126,V127,V128,V129,V130,V131,V132,V133,V134,V135,V136,V137,V138,V139,V140,V141,V142,V143,V144,V145,V146,V147,V148,V149,V150,V151,V152,V153,V154,V155,V156,V157,V158,V159,V160,V161,V162,V163,V164,V165,V166,V167,V168,V169,V170,V171,V172,V173,V174,V175,V176,V177,V178,V179,V180,V181,V182,V183,V184,V185,V186,V187,V188,V189,V190,V191,V192,V193,V194,V195,V196,V197,V198,V199,V200,V201,V202,V203,V204,V205,V206,V207,V208,V209,V210,V211,V212,V213,V214,V215,V216,V217,V218,V219,V220,V221,V222,V223,V224,V225,V226,V227,V228,V229,V230,V231,V232,V233,V234,V235,V236,V237,V238,V239,V240,V241,V242,V243,V244,V245,V246,V247,V248,V249,V250,V251,V252,V253,V254,V255,V256,V257,V258,V259,V260,V261,V262,V263,V264,V265,V266,V267,V268,V269,V270,V271,V272,V273,V274,V275,V276,V277,V278,V279,V280,V281,V282,V283,V284,V285,V286,V287,V288,V289,V290,V291,V292,V293,V294,V295,V296,V297,V298,V299,V300,V301,V302,V303,V304,V305,V306,V307,V308,V309,V310,V311,V312,V313,V314,V315,V316,V317,V318,V319,V320,V321,V322,V323,V324,V325,V326,V327,V328,V329,V330,V331,V332,V333,V334,V335,V336,V337,V338,V339,id_01,id_02,id_03,id_04,id_05,id_06,id_07,id_08,id_09,id_10,id_11,id_12,id_13,id_14,id_15,id_16,id_17,id_18,id_19,id_20,id_21,id_22,id_23,id_24,id_25,id_26,id_27,id_28,id_29,id_30,id_31,id_32,id_33,id_34,id_35,id_36,id_37,id_38,DeviceType,DeviceInfo,DT,DT_month,DT_week_year,DT_day_year,DT_hour,DT_day_week,DT_day,DT_hour_fq_enc,DT_day_week_fq_enc,DT_day_fq_enc,card1_fq_enc,card2_fq_enc,card3_fq_enc,card5_fq_enc,addr1_fq_enc,addr2_fq_enc,email_check,P_emaildomain_fq_enc,R_emaildomain_fq_enc,D1_day_mean,DT_day_year_prdt,D1_prdt_day_mean,D1_week_mean,DT_week_year_prdt,D1_prdt_week_mean,D2_day_mean,D2_prdt_day_mean,D2_week_mean,D2_prdt_week_mean,D3_day_mean,D3_prdt_day_mean,D3_week_mean,D3_prdt_week_mean,D4_day_mean,D4_prdt_day_mean,D4_week_mean,D4_prdt_week_mean,D5_day_mean,D5_prdt_day_mean,D5_week_mean,D5_prdt_week_mean,D6_day_mean,D6_prdt_day_mean,D6_week_mean,D6_prdt_week_mean,D7_day_mean,D7_prdt_day_mean,D7_week_mean,D7_prdt_week_mean,D10_day_mean,D10_prdt_day_mean,D10_week_mean,D10_prdt_week_mean,D11_day_mean,D11_prdt_day_mean,D11_week_mean,D11_prdt_week_mean,D12_day_mean,D12_prdt_day_mean,D12_week_mean,D12_prdt_week_mean,D13_day_mean,D13_prdt_day_mean,D13_week_mean,D13_prdt_week_mean,D14_day_mean,D14_prdt_day_mean,D14_week_mean,D14_prdt_week_mean,D15_day_mean,D15_prdt_day_mean,D15_week_mean,D15_prdt_week_mean,M_sum,M_na,V1_pca_0,V1_pca_1,V1_pca_2,V1_pca_3,V1_pca_4,V12_pca_0,V12_pca_1,V12_pca_2,V12_pca_3,V12_pca_4,V35_pca_0,V35_pca_1,V35_pca_2,V35_pca_3,V35_pca_4,V53_pca_0,V53_pca_1,V53_pca_2,V53_pca_3,V53_pca_4,V75_pca_0,V75_pca_1,V75_pca_2,V75_pca_3,V75_pca_4,V95_pca_0,V95_pca_1,V95_pca_2,V95_pca_3,V95_pca_4,V138_pca_0,V138_pca_1,V138_pca_2,V138_pca_3,V138_pca_4,V167_pca_0,V167_pca_1,V167_pca_2,V167_pca_3,V167_pca_4,V217_pca_0,V217_pca_1,V217_pca_2,V217_pca_3,V217_pca_4,V279_pca_0,V279_pca_1,V279_pca_2,V279_pca_3,V279_pca_4,V322_pca_0,V322_pca_1,V322_pca_2,V322_pca_3,V322_pca_4,id_33_0,id_33_1,id_30_device,id_30_version,lastest_browser
TransactionID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1,Unnamed: 472_level_1,Unnamed: 473_level_1,Unnamed: 474_level_1,Unnamed: 475_level_1,Unnamed: 476_level_1,Unnamed: 477_level_1,Unnamed: 478_level_1,Unnamed: 479_level_1,Unnamed: 480_level_1,Unnamed: 481_level_1,Unnamed: 482_level_1,Unnamed: 483_level_1,Unnamed: 484_level_1,Unnamed: 485_level_1,Unnamed: 486_level_1,Unnamed: 487_level_1,Unnamed: 488_level_1,Unnamed: 489_level_1,Unnamed: 490_level_1,Unnamed: 491_level_1,Unnamed: 492_level_1,Unnamed: 493_level_1,Unnamed: 494_level_1,Unnamed: 495_level_1,Unnamed: 496_level_1,Unnamed: 497_level_1,Unnamed: 498_level_1,Unnamed: 499_level_1,Unnamed: 500_level_1,Unnamed: 501_level_1,Unnamed: 502_level_1,Unnamed: 503_level_1,Unnamed: 504_level_1,Unnamed: 505_level_1,Unnamed: 506_level_1,Unnamed: 507_level_1,Unnamed: 508_level_1,Unnamed: 509_level_1,Unnamed: 510_level_1,Unnamed: 511_level_1,Unnamed: 512_level_1,Unnamed: 513_level_1,Unnamed: 514_level_1,Unnamed: 515_level_1,Unnamed: 516_level_1,Unnamed: 517_level_1,Unnamed: 518_level_1,Unnamed: 519_level_1,Unnamed: 520_level_1,Unnamed: 521_level_1,Unnamed: 522_level_1,Unnamed: 523_level_1,Unnamed: 524_level_1,Unnamed: 525_level_1,Unnamed: 526_level_1,Unnamed: 527_level_1,Unnamed: 528_level_1,Unnamed: 529_level_1,Unnamed: 530_level_1,Unnamed: 531_level_1,Unnamed: 532_level_1,Unnamed: 533_level_1,Unnamed: 534_level_1,Unnamed: 535_level_1,Unnamed: 536_level_1,Unnamed: 537_level_1,Unnamed: 538_level_1,Unnamed: 539_level_1,Unnamed: 540_level_1,Unnamed: 541_level_1,Unnamed: 542_level_1,Unnamed: 543_level_1,Unnamed: 544_level_1,Unnamed: 545_level_1,Unnamed: 546_level_1,Unnamed: 547_level_1,Unnamed: 548_level_1,Unnamed: 549_level_1,Unnamed: 550_level_1,Unnamed: 551_level_1,Unnamed: 552_level_1,Unnamed: 553_level_1,Unnamed: 554_level_1,Unnamed: 555_level_1,Unnamed: 556_level_1,Unnamed: 557_level_1,Unnamed: 558_level_1,Unnamed: 559_level_1,Unnamed: 560_level_1,Unnamed: 561_level_1,Unnamed: 562_level_1,Unnamed: 563_level_1,Unnamed: 564_level_1,Unnamed: 565_level_1,Unnamed: 566_level_1,Unnamed: 567_level_1,Unnamed: 568_level_1
2987000,0,86400,68.5,0.729707,13926.0,,150.0,0.00868,142.0,0.24393,315.0,87.0,19.0,,49,49,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,0.0,1.0,1.0,14.0,,13.0,,,,,,,13.0,13.0,,,,0.0,1.0,1.0,1.0,0.112052,0.0,1.0,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,117.0,0.0,0.0,0.0,0.0,0.0,117.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,117.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,117.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,unknown_device,,,460,,,,,,,,2017-12-01 00:00:00,12,48,335,0,4,1,0.062342,0.163816,0.036737,5.1e-05,0.016029,0.872054,0.000282,0.039221,0.871662,0,0.149146,0.751045,-88.75,335_W,-106.9375,-82.4375,48_W,-104.75,,,,,-15.546875,-14.6875,-14.84375,-13.84375,,,,,,,,,,,,,,,,,-120.5,-134.25,-114.8125,-130.625,-142.125,-142.125,-148.5,-148.5,,,,,,,,,,,,,-195.75,-209.875,-199.375,-214.75,4,3,1.105709,-0.672874,0.09168,-0.041502,-0.112852,-0.517778,0.343313,-0.795382,-0.072634,-0.067785,-2.189012,0.570393,0.716233,-0.092708,0.086531,-0.443217,0.421562,-0.746061,-0.258886,-0.010403,-0.379178,0.51362,0.799062,-0.659385,0.07751,-252.980928,-96.751761,48.857959,-71.865657,68.485602,-10376.501283,-105.779936,20.137186,-23.409744,1.18737,-299.047397,-24.895938,-34.844658,-1.362068,13.039909,-117.757307,-14.162958,-9.824124,-9.723595,4.034396,-330.417064,-99.633832,10.402851,-4.653791,-134.096963,-186.381735,-8.941901,11.315498,-2.795801,-5.815186,0,0,7,0,0.0
2987001,0,86401,29.0,0.729707,2755.0,404.0,150.0,0.316602,102.0,0.24393,325.0,87.0,,,16,49,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,,,0.0,,,,,,0.0,,,,,0.0,,,,0.326084,1.0,1.0,,,,,,,,,,,,,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,unknown_device,,,460,,,,,,,,2017-12-01 00:00:01,12,48,335,0,4,1,0.062342,0.163816,0.036737,0.001219,0.005097,0.872054,0.045105,0.070087,0.871662,0,0.397184,0.751045,-102.75,335_W,-120.9375,-96.4375,48_W,-118.75,,,,,,,,,-165.5,-183.0,-170.75,-188.625,,,,,,,,,,,,,-133.5,-147.25,-127.8125,-143.625,,,,,,,,,,,,,,,,,-195.75,-209.875,-199.375,-214.75,2,6,-1.808407,0.017792,0.05844,0.011721,0.039499,-0.001253,-0.244607,-0.219052,-0.873822,0.44775,0.130262,0.009971,0.415828,-0.15567,0.00395,-0.112093,-0.247761,-0.288684,0.123833,0.950714,-0.121519,-0.0752,0.31896,-0.211891,-0.104486,-370.136334,-98.921114,-51.135882,-83.446399,11.895101,-10376.501283,-105.779936,20.137186,-23.409744,1.18737,-299.047397,-24.895938,-34.844658,-1.362068,13.039909,-117.757307,-14.162958,-9.824124,-9.723595,4.034396,-450.06876,-66.453267,-93.93723,22.487832,-118.160636,-186.381735,-8.941901,11.315498,-2.795801,-5.815186,0,0,7,0,0.0
2987002,0,86469,59.0,0.729707,4663.0,490.0,150.0,0.655877,166.0,0.751855,330.0,87.0,287.0,,35,49,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,,,0.0,,,,,,0.0,315.0,,,,315.0,1.0,1.0,1.0,0.326084,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,unknown_device,,,460,,,,,,,,2017-12-01 00:01:09,12,48,335,0,4,1,0.062342,0.163816,0.036737,0.001635,0.064249,0.872054,0.093809,0.044099,0.871662,0,0.009054,0.751045,-102.75,335_W,-120.9375,-96.4375,48_W,-118.75,,,,,,,,,-165.5,-183.0,-170.75,-188.625,,,,,,,,,,,,,-133.5,-147.25,-127.8125,-143.625,159.875,159.875,153.5,153.5,,,,,,,,,,,,,119.25,105.125,115.625,100.25,3,0,1.105709,-0.672874,0.09168,-0.041502,-0.112852,-0.517778,0.343313,-0.795382,-0.072634,-0.067785,0.235662,-0.699503,-0.062825,-0.005122,-0.791631,-0.443217,0.421562,-0.746061,-0.258886,-0.010403,-0.145674,0.548502,-0.271033,-0.815928,0.005365,-370.136334,-98.921114,-51.135882,-83.446399,11.895101,-10376.501283,-105.779936,20.137186,-23.409744,1.18737,-299.047397,-24.895938,-34.844658,-1.362068,13.039909,-117.757307,-14.162958,-9.824124,-9.723595,4.034396,-450.06876,-66.453267,-93.93723,22.487832,-118.160636,-186.381735,-8.941901,11.315498,-2.795801,-5.815186,0,0,7,0,0.0
2987003,0,86499,50.0,0.729707,18132.0,567.0,150.0,0.316602,117.0,0.751855,476.0,87.0,,,55,49,2.0,5.0,0.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,1.0,0.0,25.0,1.0,112.0,112.0,0.0,94.0,0.0,,,,,84.0,,,,,111.0,,,,0.326084,1.0,0.0,,,,,,,,,,,,,,,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,48.0,28.0,0.0,10.0,4.0,1.0,38.0,24.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,50.0,1758.0,925.0,0.0,354.0,135.0,50.0,1404.0,790.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,28.0,0.0,0.0,0.0,0.0,10.0,0.0,4.0,0.0,0.0,1.0,1.0,1.0,1.0,38.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,50.0,1758.0,925.0,0.0,354.0,0.0,135.0,0.0,0.0,0.0,50.0,1404.0,790.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,unknown_device,,,460,,,,,,,,2017-12-01 00:01:39,12,48,335,0,4,1,0.062342,0.163816,0.036737,0.006958,0.010287,0.872054,0.042891,0.015908,0.871662,0,0.166587,0.751045,9.25,335_W,-8.9375,15.5625,48_W,-6.75,-61.5,-62.0,-56.75,-59.0,-28.546875,-27.6875,-27.84375,-26.84375,-71.5,-89.0,-76.75,-94.625,-33.25,-32.25,-34.6875,-33.25,,,,,,,,,-49.5,-63.25,-43.8125,-59.625,,,,,,,,,,,,,,,,,-84.75,-98.875,-88.375,-103.75,1,6,-1.808407,0.017792,0.05844,0.011721,0.039499,-0.517778,0.343313,-0.795382,-0.072634,-0.067785,0.235662,-0.699503,-0.062825,-0.005122,-0.791631,-0.443217,0.421562,-0.746061,-0.258886,-0.010403,-0.145674,0.548502,-0.271033,-0.815928,0.005365,1925.297043,124.468905,1067.157082,102.326815,-223.719757,-10376.501283,-105.779936,20.137186,-23.409744,1.18737,-299.047397,-24.895938,-34.844658,-1.362068,13.039909,-117.757307,-14.162958,-9.824124,-9.723595,4.034396,1863.511109,-251.389065,897.299588,329.070627,226.268036,-186.381735,-8.941901,11.315498,-2.795801,-5.815186,0,0,7,0,0.0
2987004,0,86506,50.0,0.056868,4497.0,514.0,150.0,0.316602,102.0,0.24393,420.0,87.0,,,16,49,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,,,,,,,,,,,,,,,,,,0.473181,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,18.0,140.0,0.0,0.0,0.0,0.0,1803.0,49.0,64.0,0.0,0.0,0.0,0.0,0.0,0.0,15560.0,169690.796875,0.0,0.0,0.0,515.0,5155.0,2840.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,70787.0,,,,,,,,,100.0,0.0,,-480.0,2.0,0.0,166.0,,542.0,144.0,,,,,,,,2.0,0.0,android 7.0,samsung browser 6.2,32.0,267,2.0,1.0,0.0,1.0,1.0,mobile,SAMSUNG SM-G892A Build/NRD90M,2017-12-01 00:01:46,12,48,335,0,4,1,0.062342,0.163816,0.036737,2.7e-05,0.024812,0.872054,0.045105,0.006477,0.871662,0,0.397184,0.751045,-102.75,335_H,-0.019119,-96.4375,48_H,-0.056213,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,8,-1.808407,0.017792,0.05844,0.011721,0.039499,-0.340372,-2.497077,-0.024258,-0.106216,-0.454519,-2.189012,0.570393,0.716233,-0.092708,0.086531,-1.204924,-2.212143,-0.121934,0.538949,0.080589,-2.257252,-0.776626,1.105983,-0.058375,0.209016,-370.136334,-98.921114,-51.135882,-83.446399,11.895101,159992.800345,-132.61494,-6445.181416,2455.900081,-330.799086,-299.047003,-24.895019,-34.842355,-1.360958,13.03224,-117.756896,-14.16282,-9.818613,-9.720964,4.034351,-450.068726,-66.45329,-93.937212,22.487953,-118.16082,-186.381735,-8.941901,11.315498,-2.795801,-5.815186,2220,1080,0,67,0.0


In [23]:
# # pearson correlation
# un_corr_cols = []
# for col in train.columns:
#     if (train[col].dtype != 'object' and col not in rm_features):
#         if(train[col].isnull().any()):
#             if(abs(pearsonr(train[col].fillna(-999), train_y)[1]) > 0.05 and col != 'DT_month'):
#                 un_corr_cols.append(col)
#             if(abs(pearsonr(train[col].fillna(-999), train_y)[0]) > 0.01 and col != 'DT_month'):
#                 print(col, pearsonr(train[col].fillna(-999), train_y))
#         else:
#             if(abs(pearsonr(train[col], train_y)[1]) > 0.05 and col != 'DT_month'):
#                 un_corr_cols.append(col)
#             if(abs(pearsonr(train[col], train_y)[0]) > 0.01 and col != 'DT_month'):
#                 print(col, pearsonr(train[col], train_y))
# print(len(rm_features + un_corr_cols), 'features are removed')

In [24]:
# train = train.drop(rm_features + un_corr_cols, axis=1)
# test = test.drop(rm_features + un_corr_cols, axis=1)

# print(train.shape)
# print(test.shape)

In [25]:
train.head()

Unnamed: 0_level_0,isFraud,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,dist1,dist2,P_emaildomain,R_emaildomain,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15,M1,M2,M3,M4,M5,M6,M7,M8,M9,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54,V55,V56,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70,V71,V72,V73,V74,V75,V76,V77,V78,V79,V80,V81,V82,V83,V84,V85,V86,V87,V88,V89,V90,V91,V92,V93,V94,V95,V96,V97,V98,V99,V100,V101,V102,V103,V104,V105,V106,V107,V108,V109,V110,V111,V112,V113,V114,V115,V116,V117,V118,V119,V120,V121,V122,V123,V124,V125,V126,V127,V128,V129,V130,V131,V132,V133,V134,V135,V136,V137,V138,V139,V140,V141,V142,V143,V144,V145,V146,V147,V148,V149,V150,V151,V152,V153,V154,V155,V156,V157,V158,V159,V160,V161,V162,V163,V164,V165,V166,V167,V168,V169,V170,V171,V172,V173,V174,V175,V176,V177,V178,V179,V180,V181,V182,V183,V184,V185,V186,V187,V188,V189,V190,V191,V192,V193,V194,V195,V196,V197,V198,V199,V200,V201,V202,V203,V204,V205,V206,V207,V208,V209,V210,V211,V212,V213,V214,V215,V216,V217,V218,V219,V220,V221,V222,V223,V224,V225,V226,V227,V228,V229,V230,V231,V232,V233,V234,V235,V236,V237,V238,V239,V240,V241,V242,V243,V244,V245,V246,V247,V248,V249,V250,V251,V252,V253,V254,V255,V256,V257,V258,V259,V260,V261,V262,V263,V264,V265,V266,V267,V268,V269,V270,V271,V272,V273,V274,V275,V276,V277,V278,V279,V280,V281,V282,V283,V284,V285,V286,V287,V288,V289,V290,V291,V292,V293,V294,V295,V296,V297,V298,V299,V300,V301,V302,V303,V304,V305,V306,V307,V308,V309,V310,V311,V312,V313,V314,V315,V316,V317,V318,V319,V320,V321,V322,V323,V324,V325,V326,V327,V328,V329,V330,V331,V332,V333,V334,V335,V336,V337,V338,V339,id_01,id_02,id_03,id_04,id_05,id_06,id_07,id_08,id_09,id_10,id_11,id_12,id_13,id_14,id_15,id_16,id_17,id_18,id_19,id_20,id_21,id_22,id_23,id_24,id_25,id_26,id_27,id_28,id_29,id_30,id_31,id_32,id_33,id_34,id_35,id_36,id_37,id_38,DeviceType,DeviceInfo,DT,DT_month,DT_week_year,DT_day_year,DT_hour,DT_day_week,DT_day,DT_hour_fq_enc,DT_day_week_fq_enc,DT_day_fq_enc,card1_fq_enc,card2_fq_enc,card3_fq_enc,card5_fq_enc,addr1_fq_enc,addr2_fq_enc,email_check,P_emaildomain_fq_enc,R_emaildomain_fq_enc,D1_day_mean,DT_day_year_prdt,D1_prdt_day_mean,D1_week_mean,DT_week_year_prdt,D1_prdt_week_mean,D2_day_mean,D2_prdt_day_mean,D2_week_mean,D2_prdt_week_mean,D3_day_mean,D3_prdt_day_mean,D3_week_mean,D3_prdt_week_mean,D4_day_mean,D4_prdt_day_mean,D4_week_mean,D4_prdt_week_mean,D5_day_mean,D5_prdt_day_mean,D5_week_mean,D5_prdt_week_mean,D6_day_mean,D6_prdt_day_mean,D6_week_mean,D6_prdt_week_mean,D7_day_mean,D7_prdt_day_mean,D7_week_mean,D7_prdt_week_mean,D10_day_mean,D10_prdt_day_mean,D10_week_mean,D10_prdt_week_mean,D11_day_mean,D11_prdt_day_mean,D11_week_mean,D11_prdt_week_mean,D12_day_mean,D12_prdt_day_mean,D12_week_mean,D12_prdt_week_mean,D13_day_mean,D13_prdt_day_mean,D13_week_mean,D13_prdt_week_mean,D14_day_mean,D14_prdt_day_mean,D14_week_mean,D14_prdt_week_mean,D15_day_mean,D15_prdt_day_mean,D15_week_mean,D15_prdt_week_mean,M_sum,M_na,V1_pca_0,V1_pca_1,V1_pca_2,V1_pca_3,V1_pca_4,V12_pca_0,V12_pca_1,V12_pca_2,V12_pca_3,V12_pca_4,V35_pca_0,V35_pca_1,V35_pca_2,V35_pca_3,V35_pca_4,V53_pca_0,V53_pca_1,V53_pca_2,V53_pca_3,V53_pca_4,V75_pca_0,V75_pca_1,V75_pca_2,V75_pca_3,V75_pca_4,V95_pca_0,V95_pca_1,V95_pca_2,V95_pca_3,V95_pca_4,V138_pca_0,V138_pca_1,V138_pca_2,V138_pca_3,V138_pca_4,V167_pca_0,V167_pca_1,V167_pca_2,V167_pca_3,V167_pca_4,V217_pca_0,V217_pca_1,V217_pca_2,V217_pca_3,V217_pca_4,V279_pca_0,V279_pca_1,V279_pca_2,V279_pca_3,V279_pca_4,V322_pca_0,V322_pca_1,V322_pca_2,V322_pca_3,V322_pca_4,id_33_0,id_33_1,id_30_device,id_30_version,lastest_browser
TransactionID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1,Unnamed: 472_level_1,Unnamed: 473_level_1,Unnamed: 474_level_1,Unnamed: 475_level_1,Unnamed: 476_level_1,Unnamed: 477_level_1,Unnamed: 478_level_1,Unnamed: 479_level_1,Unnamed: 480_level_1,Unnamed: 481_level_1,Unnamed: 482_level_1,Unnamed: 483_level_1,Unnamed: 484_level_1,Unnamed: 485_level_1,Unnamed: 486_level_1,Unnamed: 487_level_1,Unnamed: 488_level_1,Unnamed: 489_level_1,Unnamed: 490_level_1,Unnamed: 491_level_1,Unnamed: 492_level_1,Unnamed: 493_level_1,Unnamed: 494_level_1,Unnamed: 495_level_1,Unnamed: 496_level_1,Unnamed: 497_level_1,Unnamed: 498_level_1,Unnamed: 499_level_1,Unnamed: 500_level_1,Unnamed: 501_level_1,Unnamed: 502_level_1,Unnamed: 503_level_1,Unnamed: 504_level_1,Unnamed: 505_level_1,Unnamed: 506_level_1,Unnamed: 507_level_1,Unnamed: 508_level_1,Unnamed: 509_level_1,Unnamed: 510_level_1,Unnamed: 511_level_1,Unnamed: 512_level_1,Unnamed: 513_level_1,Unnamed: 514_level_1,Unnamed: 515_level_1,Unnamed: 516_level_1,Unnamed: 517_level_1,Unnamed: 518_level_1,Unnamed: 519_level_1,Unnamed: 520_level_1,Unnamed: 521_level_1,Unnamed: 522_level_1,Unnamed: 523_level_1,Unnamed: 524_level_1,Unnamed: 525_level_1,Unnamed: 526_level_1,Unnamed: 527_level_1,Unnamed: 528_level_1,Unnamed: 529_level_1,Unnamed: 530_level_1,Unnamed: 531_level_1,Unnamed: 532_level_1,Unnamed: 533_level_1,Unnamed: 534_level_1,Unnamed: 535_level_1,Unnamed: 536_level_1,Unnamed: 537_level_1,Unnamed: 538_level_1,Unnamed: 539_level_1,Unnamed: 540_level_1,Unnamed: 541_level_1,Unnamed: 542_level_1,Unnamed: 543_level_1,Unnamed: 544_level_1,Unnamed: 545_level_1,Unnamed: 546_level_1,Unnamed: 547_level_1,Unnamed: 548_level_1,Unnamed: 549_level_1,Unnamed: 550_level_1,Unnamed: 551_level_1,Unnamed: 552_level_1,Unnamed: 553_level_1,Unnamed: 554_level_1,Unnamed: 555_level_1,Unnamed: 556_level_1,Unnamed: 557_level_1,Unnamed: 558_level_1,Unnamed: 559_level_1,Unnamed: 560_level_1,Unnamed: 561_level_1,Unnamed: 562_level_1,Unnamed: 563_level_1,Unnamed: 564_level_1,Unnamed: 565_level_1,Unnamed: 566_level_1,Unnamed: 567_level_1,Unnamed: 568_level_1
2987000,0,86400,68.5,0.729707,13926.0,,150.0,0.00868,142.0,0.24393,315.0,87.0,19.0,,49,49,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,0.0,1.0,1.0,14.0,,13.0,,,,,,,13.0,13.0,,,,0.0,1.0,1.0,1.0,0.112052,0.0,1.0,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,117.0,0.0,0.0,0.0,0.0,0.0,117.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,117.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,117.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,unknown_device,,,460,,,,,,,,2017-12-01 00:00:00,12,48,335,0,4,1,0.062342,0.163816,0.036737,5.1e-05,0.016029,0.872054,0.000282,0.039221,0.871662,0,0.149146,0.751045,-88.75,335_W,-106.9375,-82.4375,48_W,-104.75,,,,,-15.546875,-14.6875,-14.84375,-13.84375,,,,,,,,,,,,,,,,,-120.5,-134.25,-114.8125,-130.625,-142.125,-142.125,-148.5,-148.5,,,,,,,,,,,,,-195.75,-209.875,-199.375,-214.75,4,3,1.105709,-0.672874,0.09168,-0.041502,-0.112852,-0.517778,0.343313,-0.795382,-0.072634,-0.067785,-2.189012,0.570393,0.716233,-0.092708,0.086531,-0.443217,0.421562,-0.746061,-0.258886,-0.010403,-0.379178,0.51362,0.799062,-0.659385,0.07751,-252.980928,-96.751761,48.857959,-71.865657,68.485602,-10376.501283,-105.779936,20.137186,-23.409744,1.18737,-299.047397,-24.895938,-34.844658,-1.362068,13.039909,-117.757307,-14.162958,-9.824124,-9.723595,4.034396,-330.417064,-99.633832,10.402851,-4.653791,-134.096963,-186.381735,-8.941901,11.315498,-2.795801,-5.815186,0,0,7,0,0.0
2987001,0,86401,29.0,0.729707,2755.0,404.0,150.0,0.316602,102.0,0.24393,325.0,87.0,,,16,49,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,,,0.0,,,,,,0.0,,,,,0.0,,,,0.326084,1.0,1.0,,,,,,,,,,,,,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,unknown_device,,,460,,,,,,,,2017-12-01 00:00:01,12,48,335,0,4,1,0.062342,0.163816,0.036737,0.001219,0.005097,0.872054,0.045105,0.070087,0.871662,0,0.397184,0.751045,-102.75,335_W,-120.9375,-96.4375,48_W,-118.75,,,,,,,,,-165.5,-183.0,-170.75,-188.625,,,,,,,,,,,,,-133.5,-147.25,-127.8125,-143.625,,,,,,,,,,,,,,,,,-195.75,-209.875,-199.375,-214.75,2,6,-1.808407,0.017792,0.05844,0.011721,0.039499,-0.001253,-0.244607,-0.219052,-0.873822,0.44775,0.130262,0.009971,0.415828,-0.15567,0.00395,-0.112093,-0.247761,-0.288684,0.123833,0.950714,-0.121519,-0.0752,0.31896,-0.211891,-0.104486,-370.136334,-98.921114,-51.135882,-83.446399,11.895101,-10376.501283,-105.779936,20.137186,-23.409744,1.18737,-299.047397,-24.895938,-34.844658,-1.362068,13.039909,-117.757307,-14.162958,-9.824124,-9.723595,4.034396,-450.06876,-66.453267,-93.93723,22.487832,-118.160636,-186.381735,-8.941901,11.315498,-2.795801,-5.815186,0,0,7,0,0.0
2987002,0,86469,59.0,0.729707,4663.0,490.0,150.0,0.655877,166.0,0.751855,330.0,87.0,287.0,,35,49,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,,,0.0,,,,,,0.0,315.0,,,,315.0,1.0,1.0,1.0,0.326084,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,unknown_device,,,460,,,,,,,,2017-12-01 00:01:09,12,48,335,0,4,1,0.062342,0.163816,0.036737,0.001635,0.064249,0.872054,0.093809,0.044099,0.871662,0,0.009054,0.751045,-102.75,335_W,-120.9375,-96.4375,48_W,-118.75,,,,,,,,,-165.5,-183.0,-170.75,-188.625,,,,,,,,,,,,,-133.5,-147.25,-127.8125,-143.625,159.875,159.875,153.5,153.5,,,,,,,,,,,,,119.25,105.125,115.625,100.25,3,0,1.105709,-0.672874,0.09168,-0.041502,-0.112852,-0.517778,0.343313,-0.795382,-0.072634,-0.067785,0.235662,-0.699503,-0.062825,-0.005122,-0.791631,-0.443217,0.421562,-0.746061,-0.258886,-0.010403,-0.145674,0.548502,-0.271033,-0.815928,0.005365,-370.136334,-98.921114,-51.135882,-83.446399,11.895101,-10376.501283,-105.779936,20.137186,-23.409744,1.18737,-299.047397,-24.895938,-34.844658,-1.362068,13.039909,-117.757307,-14.162958,-9.824124,-9.723595,4.034396,-450.06876,-66.453267,-93.93723,22.487832,-118.160636,-186.381735,-8.941901,11.315498,-2.795801,-5.815186,0,0,7,0,0.0
2987003,0,86499,50.0,0.729707,18132.0,567.0,150.0,0.316602,117.0,0.751855,476.0,87.0,,,55,49,2.0,5.0,0.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,1.0,0.0,25.0,1.0,112.0,112.0,0.0,94.0,0.0,,,,,84.0,,,,,111.0,,,,0.326084,1.0,0.0,,,,,,,,,,,,,,,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,48.0,28.0,0.0,10.0,4.0,1.0,38.0,24.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,50.0,1758.0,925.0,0.0,354.0,135.0,50.0,1404.0,790.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,28.0,0.0,0.0,0.0,0.0,10.0,0.0,4.0,0.0,0.0,1.0,1.0,1.0,1.0,38.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,50.0,1758.0,925.0,0.0,354.0,0.0,135.0,0.0,0.0,0.0,50.0,1404.0,790.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,unknown_device,,,460,,,,,,,,2017-12-01 00:01:39,12,48,335,0,4,1,0.062342,0.163816,0.036737,0.006958,0.010287,0.872054,0.042891,0.015908,0.871662,0,0.166587,0.751045,9.25,335_W,-8.9375,15.5625,48_W,-6.75,-61.5,-62.0,-56.75,-59.0,-28.546875,-27.6875,-27.84375,-26.84375,-71.5,-89.0,-76.75,-94.625,-33.25,-32.25,-34.6875,-33.25,,,,,,,,,-49.5,-63.25,-43.8125,-59.625,,,,,,,,,,,,,,,,,-84.75,-98.875,-88.375,-103.75,1,6,-1.808407,0.017792,0.05844,0.011721,0.039499,-0.517778,0.343313,-0.795382,-0.072634,-0.067785,0.235662,-0.699503,-0.062825,-0.005122,-0.791631,-0.443217,0.421562,-0.746061,-0.258886,-0.010403,-0.145674,0.548502,-0.271033,-0.815928,0.005365,1925.297043,124.468905,1067.157082,102.326815,-223.719757,-10376.501283,-105.779936,20.137186,-23.409744,1.18737,-299.047397,-24.895938,-34.844658,-1.362068,13.039909,-117.757307,-14.162958,-9.824124,-9.723595,4.034396,1863.511109,-251.389065,897.299588,329.070627,226.268036,-186.381735,-8.941901,11.315498,-2.795801,-5.815186,0,0,7,0,0.0
2987004,0,86506,50.0,0.056868,4497.0,514.0,150.0,0.316602,102.0,0.24393,420.0,87.0,,,16,49,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,,,,,,,,,,,,,,,,,,0.473181,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,18.0,140.0,0.0,0.0,0.0,0.0,1803.0,49.0,64.0,0.0,0.0,0.0,0.0,0.0,0.0,15560.0,169690.796875,0.0,0.0,0.0,515.0,5155.0,2840.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,70787.0,,,,,,,,,100.0,0.0,,-480.0,2.0,0.0,166.0,,542.0,144.0,,,,,,,,2.0,0.0,android 7.0,samsung browser 6.2,32.0,267,2.0,1.0,0.0,1.0,1.0,mobile,SAMSUNG SM-G892A Build/NRD90M,2017-12-01 00:01:46,12,48,335,0,4,1,0.062342,0.163816,0.036737,2.7e-05,0.024812,0.872054,0.045105,0.006477,0.871662,0,0.397184,0.751045,-102.75,335_H,-0.019119,-96.4375,48_H,-0.056213,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,8,-1.808407,0.017792,0.05844,0.011721,0.039499,-0.340372,-2.497077,-0.024258,-0.106216,-0.454519,-2.189012,0.570393,0.716233,-0.092708,0.086531,-1.204924,-2.212143,-0.121934,0.538949,0.080589,-2.257252,-0.776626,1.105983,-0.058375,0.209016,-370.136334,-98.921114,-51.135882,-83.446399,11.895101,159992.800345,-132.61494,-6445.181416,2455.900081,-330.799086,-299.047003,-24.895019,-34.842355,-1.360958,13.03224,-117.756896,-14.16282,-9.818613,-9.720964,4.034351,-450.068726,-66.45329,-93.937212,22.487953,-118.16082,-186.381735,-8.941901,11.315498,-2.795801,-5.815186,2220,1080,0,67,0.0


In [26]:
v_cols = ['V' + str(i) for i in range(1,340)]
train  = train.drop(v_cols, axis=1)
test = test.drop(v_cols, axis=1)

In [27]:
print(train.shape)
print(test.shape)

(590540, 229)
(506691, 229)


In [28]:
train.to_pickle('../input/features/train_basic_features.pkl')
test.to_pickle('../input/features/test_basic_features.pkl')
train_y.to_pickle('../input/features/train_y.pkl')
np.save('../input/features/rm_features', rm_features)

In [29]:
# col ='D7'
# train['day'] = train[col] - train['DT_day_year'] + train['DT_day_year'].min()
# train['day'] = train['day'].apply(lambda y: max(0,y))
# plt.plot(train[train_y==0]['day'], '.b')
# plt.plot(train[train_y==1]['day'], '.r')
# plt.show()
# plt.plot(train[train_y==0][train.ProductCD=='W']['day'], '.b')
# plt.plot(train[train_y==1][train.ProductCD=='W']['day'], '.r')