In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pylab as plt
import sys, gc, warnings, random, math, time, datetime, os
START_DATE = datetime.datetime.strptime('2017-11-30', '%Y-%m-%d')
from tqdm import tqdm_notebook
from sklearn import preprocessing, metrics
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error
import xgboost as xgb
import lightgbm as lgb

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

from utils import *

SEED = 42
seed_everything(SEED)

In [2]:
train = pd.read_pickle('../input/reduced/train.pkl')
test = pd.read_pickle('../input/reduced/test.pkl')

train_y = train['isFraud'].copy()
train = train.drop(['isFraud'],axis=1)

print('training data size is ', train.shape)
print('training label size is', train_y.shape)
print('testing data size is', test.shape)

training data size is  (590540, 432)
training label size is (590540,)
testing data size is (506691, 432)


In [3]:
C_book = []
for i in range(14):
    C_book.append("C" + str(i + 1))

D_book = []
for i in range(15):
    D_book.append("D" + str(i + 1))

M_book = []
for i in range(9):
    M_book.append("M" + str(i + 1))

V_book = []
for i in range(339):
    V_book.append("V" + str(i + 1))

id_book = []
id_book_non_str = []
id_book_str = []
id_book_str_idx = [12, 15, 16, 23, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38]
for i in range(38):

    if i + 1 in id_book_str_idx:
        id_book_str.append("id_" + str(i + 1))
        id_book.append("id_" + str(i + 1))
        continue
    if (i < 9):
        id_book_non_str.append("id_0" + str(i + 1))
        id_book.append("id_0" + str(i + 1))
    else:
        id_book_non_str.append("id_" + str(i + 1))
        id_book.append("id_" + str(i + 1))

In [4]:
drop_list = [
    "DT", "TransactionDT", "DT_day", "DT_dayofweek", "DT_hour",
    "TransactionAmt", "ProductCD", "card1", "card2", "card3", "card4", "card5",
    "card6", "addr1", "addr2", "dist1", "dist2", "P_emaildomain",
    "R_emaildomain", "DeviceType", "DeviceInfo"
] + C_book + D_book + M_book + V_book + id_book

target_code_book = [
    "DT_day", "DT_dayofweek", "DT_hour", "TransactionAmt", "card1", "card2",
    "card3", "card4", "card5", "card6", "addr1", "addr2", "dist1", "dist2",
    "P_emaildomain", "R_emaildomain", "DeviceType", "DeviceInfo"
] + C_book + D_book + M_book + id_book

freq_code_book_str = [
    "card4", "card6", "P_emaildomain", "R_emaildomain", "DeviceType",
    "DeviceInfo"
] + M_book + id_book_str

freq_code_book_non_str = [
    "DT_day",
    "DT_dayofweek",
    "DT_hour",
    "TransactionAmt",
    "card1",
    "card2",
    "card3",
    "card5",
    "addr1",
    "addr2",
    "dist1",
    "dist2",
] + C_book + D_book + id_book_non_str

In [5]:
# mark
# @tmp: delete later
# @target: target encoding later
# @freq: frequency encoding later


def fe(df):
    # deal with time variables
    df['DT'] = df['TransactionDT'].apply(
        lambda x: (START_DATE + datetime.timedelta(seconds=x)))  # tmp
    df['DT_month'] = (df['DT'].dt.year - 2017) * 12 + df[
        'DT'].dt.month  # tmp, used for group k-fold index
    df['DT_day'] = df['DT'].dt.day  # target, freq
    df['DT_dayofweek'] = df['DT'].dt.dayofweek  # target, freq
    df['DT_hour'] = df['DT'].dt.hour  # target, freq

    # transaction amount # target, freq
    # card 1 # target, freq

    return df


train = fe(train)
test = fe(test)

In [6]:
def nan_encode_func(train, test):
    df = pd.concat([train, test], axis=0)
    for col in tqdm_notebook(test.columns):
        if(df[col].isnull().sum() / len(df) > 0.1):
            train[col + "_nan_code"] = train[col].isnull().astype(int)
            test[col + "_nan_code"] = test[col].isnull().astype(int)
        
    return train, test
train, test = nan_encode_func(train, test)

HBox(children=(IntProgress(value=0, max=437), HTML(value='')))




In [7]:
# there might be some data leakages
def target_encode_func(train, test, train_y, target_code_book):
    # encoding time variables
    for col in tqdm_notebook(target_code_book):
        train[col + "_target_code"], test[col +
                                          "_target_code"] = target_encode(
                                              train[col],
                                              test[col],
                                              target=train_y,
                                              min_samples_leaf=100,
                                              smoothing=10,
                                              noise_level=0.01)

    return train, test


# train, test = target_encode_func(train, test, train_y, target_code_book)

In [8]:
def freq_encode_func(train, test, freq_code_book_non_str):
    df = pd.concat([train, test], axis=0)
    for col in tqdm_notebook(freq_code_book_non_str):
        col_map = df[col].value_counts() / len(df)
        if(df[col].isnull().any()):
            col_map[float('nan')]=df[col].isnull().sum() / len(df)
        train[col + "_freq_code"] = train[col].apply(lambda x: col_map[x])
        test[col + "_freq_code"] = test[col].apply(lambda x: col_map[x])

    return train, test


train, test = freq_encode_func(train, test, freq_code_book_non_str)

HBox(children=(IntProgress(value=0, max=64), HTML(value='')))




In [9]:
def freq_encode_str_func(train, test, freq_code_book_str):
    df = pd.concat([train, test], axis=0)
    for col in tqdm_notebook(freq_code_book_str):
        df[col] = df[col].fillna('unknown')
        col_map = df[col].value_counts() / len(df)
        
        train[col] = train[col].fillna('unknown')
        test[col] = test[col].fillna('unknown')
        
        train[col + "_freq_code"] = train[col].apply(lambda x: col_map[x])
        test[col + "_freq_code"] = test[col].apply(lambda x: col_map[x])

    return train, test

train, test = freq_encode_str_func(train, test, freq_code_book_str)

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))




In [10]:
train = train.drop(drop_list,axis=1)
test = test.drop(drop_list,axis=1)

In [14]:
print('training data size is ', train.shape)
print('training label size is', train_y.shape)
print('testing data size is', test.shape)

training data size is  (590540, 350)
training label size is (590540,)
testing data size is (506691, 350)


In [15]:
train.head()

Unnamed: 0_level_0,DT_month,addr1_nan_code,addr2_nan_code,dist1_nan_code,dist2_nan_code,P_emaildomain_nan_code,R_emaildomain_nan_code,D2_nan_code,D3_nan_code,D4_nan_code,D5_nan_code,D6_nan_code,D7_nan_code,D8_nan_code,D9_nan_code,D11_nan_code,D12_nan_code,D13_nan_code,D14_nan_code,M1_nan_code,M2_nan_code,M3_nan_code,M4_nan_code,M5_nan_code,M6_nan_code,M7_nan_code,M8_nan_code,M9_nan_code,V1_nan_code,V2_nan_code,V3_nan_code,V4_nan_code,V5_nan_code,V6_nan_code,V7_nan_code,V8_nan_code,V9_nan_code,V10_nan_code,V11_nan_code,V35_nan_code,V36_nan_code,V37_nan_code,V38_nan_code,V39_nan_code,V40_nan_code,V41_nan_code,V42_nan_code,V43_nan_code,V44_nan_code,V45_nan_code,V46_nan_code,V47_nan_code,V48_nan_code,V49_nan_code,V50_nan_code,V51_nan_code,V52_nan_code,V138_nan_code,V139_nan_code,V140_nan_code,V141_nan_code,V142_nan_code,V143_nan_code,V144_nan_code,V145_nan_code,V146_nan_code,V147_nan_code,V148_nan_code,V149_nan_code,V150_nan_code,V151_nan_code,V152_nan_code,V153_nan_code,V154_nan_code,V155_nan_code,V156_nan_code,V157_nan_code,V158_nan_code,V159_nan_code,V160_nan_code,V161_nan_code,V162_nan_code,V163_nan_code,V164_nan_code,V165_nan_code,V166_nan_code,V167_nan_code,V168_nan_code,V169_nan_code,V170_nan_code,V171_nan_code,V172_nan_code,V173_nan_code,V174_nan_code,V175_nan_code,V176_nan_code,V177_nan_code,V178_nan_code,V179_nan_code,V180_nan_code,V181_nan_code,V182_nan_code,V183_nan_code,V184_nan_code,V185_nan_code,V186_nan_code,V187_nan_code,V188_nan_code,V189_nan_code,V190_nan_code,V191_nan_code,V192_nan_code,V193_nan_code,V194_nan_code,V195_nan_code,V196_nan_code,V197_nan_code,V198_nan_code,V199_nan_code,V200_nan_code,V201_nan_code,V202_nan_code,V203_nan_code,V204_nan_code,V205_nan_code,V206_nan_code,V207_nan_code,V208_nan_code,V209_nan_code,V210_nan_code,V211_nan_code,V212_nan_code,V213_nan_code,V214_nan_code,V215_nan_code,V216_nan_code,V217_nan_code,V218_nan_code,V219_nan_code,V220_nan_code,V221_nan_code,V222_nan_code,V223_nan_code,V224_nan_code,V225_nan_code,V226_nan_code,V227_nan_code,V228_nan_code,V229_nan_code,V230_nan_code,V231_nan_code,V232_nan_code,V233_nan_code,V234_nan_code,V235_nan_code,V236_nan_code,V237_nan_code,V238_nan_code,V239_nan_code,V240_nan_code,V241_nan_code,V242_nan_code,V243_nan_code,V244_nan_code,V245_nan_code,V246_nan_code,V247_nan_code,V248_nan_code,V249_nan_code,V250_nan_code,V251_nan_code,V252_nan_code,V253_nan_code,V254_nan_code,V255_nan_code,V256_nan_code,V257_nan_code,V258_nan_code,V259_nan_code,V260_nan_code,V261_nan_code,V262_nan_code,V263_nan_code,V264_nan_code,V265_nan_code,V266_nan_code,V267_nan_code,V268_nan_code,V269_nan_code,V270_nan_code,V271_nan_code,V272_nan_code,V273_nan_code,V274_nan_code,V275_nan_code,V276_nan_code,V277_nan_code,V278_nan_code,V322_nan_code,V323_nan_code,V324_nan_code,V325_nan_code,V326_nan_code,V327_nan_code,V328_nan_code,V329_nan_code,V330_nan_code,V331_nan_code,V332_nan_code,V333_nan_code,V334_nan_code,V335_nan_code,V336_nan_code,V337_nan_code,V338_nan_code,V339_nan_code,id_01_nan_code,id_02_nan_code,id_03_nan_code,id_04_nan_code,id_05_nan_code,id_06_nan_code,id_07_nan_code,id_08_nan_code,id_09_nan_code,id_10_nan_code,id_11_nan_code,id_12_nan_code,id_13_nan_code,id_14_nan_code,id_15_nan_code,id_16_nan_code,id_17_nan_code,id_18_nan_code,id_19_nan_code,id_20_nan_code,id_21_nan_code,id_22_nan_code,id_23_nan_code,id_24_nan_code,id_25_nan_code,id_26_nan_code,id_27_nan_code,id_28_nan_code,id_29_nan_code,id_30_nan_code,id_31_nan_code,id_32_nan_code,id_33_nan_code,id_34_nan_code,id_35_nan_code,id_36_nan_code,id_37_nan_code,id_38_nan_code,DeviceType_nan_code,DeviceInfo_nan_code,DT_day_freq_code,DT_dayofweek_freq_code,DT_hour_freq_code,TransactionAmt_freq_code,card1_freq_code,card2_freq_code,card3_freq_code,card5_freq_code,addr1_freq_code,addr2_freq_code,dist1_freq_code,dist2_freq_code,C1_freq_code,C2_freq_code,C3_freq_code,C4_freq_code,C5_freq_code,C6_freq_code,C7_freq_code,C8_freq_code,C9_freq_code,C10_freq_code,C11_freq_code,C12_freq_code,C13_freq_code,C14_freq_code,D1_freq_code,D2_freq_code,D3_freq_code,D4_freq_code,D5_freq_code,D6_freq_code,D7_freq_code,D8_freq_code,D9_freq_code,D10_freq_code,D11_freq_code,D12_freq_code,D13_freq_code,D14_freq_code,D15_freq_code,id_01_freq_code,id_02_freq_code,id_03_freq_code,id_04_freq_code,id_05_freq_code,id_06_freq_code,id_07_freq_code,id_08_freq_code,id_09_freq_code,id_10_freq_code,id_11_freq_code,id_13_freq_code,id_14_freq_code,id_17_freq_code,id_18_freq_code,id_19_freq_code,id_20_freq_code,id_21_freq_code,id_22_freq_code,id_24_freq_code,id_25_freq_code,id_26_freq_code,id_32_freq_code,card4_freq_code,card6_freq_code,P_emaildomain_freq_code,R_emaildomain_freq_code,DeviceType_freq_code,DeviceInfo_freq_code,M1_freq_code,M2_freq_code,M3_freq_code,M4_freq_code,M5_freq_code,M6_freq_code,M7_freq_code,M8_freq_code,M9_freq_code,id_12_freq_code,id_15_freq_code,id_16_freq_code,id_23_freq_code,id_27_freq_code,id_28_freq_code,id_29_freq_code,id_30_freq_code,id_31_freq_code,id_33_freq_code,id_34_freq_code,id_35_freq_code,id_36_freq_code,id_37_freq_code,id_38_freq_code
TransactionID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1
2987000,12,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0.036737,0.163816,0.062342,0.001223,5.1e-05,0.016029,0.872054,0.000282,0.039221,0.871662,0.00429,0.9325,0.534748,0.530149,0.986018,0.750484,0.644357,0.5753,0.876057,0.743967,0.383104,0.751364,0.153824,0.780219,0.335041,0.545482,0.005056,0.469879,0.010504,0.223994,0.486877,0.819573,0.909727,0.863963,0.863963,0.002924,0.0021,0.877901,0.831088,0.838338,0.326245,0.739216,0.746773,0.878963,0.878963,0.752454,0.752454,0.990691,0.990691,0.863963,0.863963,0.746857,0.765222,0.862015,0.749064,0.912518,0.749165,0.749466,0.990687,0.990676,0.991354,0.99073,0.990695,0.864881,0.00868,0.24393,0.149146,0.751045,0.746871,0.786988,0.591886,0.536189,0.47232,0.112052,0.218874,0.318528,0.529773,0.529748,0.529748,0.739216,0.74667,0.767518,0.990676,0.990676,0.746857,0.746857,0.864911,0.747631,0.868797,0.86331,0.74667,0.74667,0.74667,0.74667
2987001,12,0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0.036737,0.163816,0.062342,0.018053,0.001219,0.005097,0.872054,0.045105,0.070087,0.871662,0.586465,0.9325,0.534748,0.530149,0.986018,0.750484,0.644357,0.5753,0.876057,0.743967,0.311294,0.751364,0.666944,0.780219,0.335041,0.545482,0.462851,0.469879,0.424724,0.31429,0.486877,0.819573,0.909727,0.863963,0.863963,0.410059,0.415414,0.877901,0.831088,0.838338,0.326245,0.739216,0.746773,0.878963,0.878963,0.752454,0.752454,0.990691,0.990691,0.863963,0.863963,0.746857,0.765222,0.862015,0.749064,0.912518,0.749165,0.749466,0.990687,0.990676,0.991354,0.99073,0.990695,0.864881,0.316602,0.24393,0.397184,0.751045,0.746871,0.786988,0.408063,0.408063,0.408063,0.326084,0.179508,0.318528,0.529773,0.529748,0.529748,0.739216,0.74667,0.767518,0.990676,0.990676,0.746857,0.746857,0.864911,0.747631,0.868797,0.86331,0.74667,0.74667,0.74667,0.74667
2987002,12,0,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0.036737,0.163816,0.062342,0.052826,0.001635,0.064249,0.872054,0.093809,0.044099,0.871662,3.5e-05,0.9325,0.534748,0.530149,0.986018,0.750484,0.644357,0.5753,0.876057,0.743967,0.383104,0.751364,0.666944,0.780219,0.335041,0.545482,0.462851,0.469879,0.424724,0.31429,0.486877,0.819573,0.909727,0.863963,0.863963,0.410059,0.000444,0.877901,0.831088,0.838338,0.000622,0.739216,0.746773,0.878963,0.878963,0.752454,0.752454,0.990691,0.990691,0.863963,0.863963,0.746857,0.765222,0.862015,0.749064,0.912518,0.749165,0.749466,0.990687,0.990676,0.991354,0.99073,0.990695,0.864881,0.655877,0.751855,0.009054,0.751045,0.746871,0.786988,0.591886,0.536189,0.47232,0.326084,0.218874,0.382265,0.405205,0.29497,0.067479,0.739216,0.74667,0.767518,0.990676,0.990676,0.746857,0.746857,0.864911,0.747631,0.868797,0.86331,0.74667,0.74667,0.74667,0.74667
2987003,12,0,0,1,1,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0.036737,0.163816,0.062342,0.028701,0.006958,0.010287,0.872054,0.042891,0.015908,0.871662,0.586465,0.9325,0.180017,0.028875,0.986018,0.750484,0.644357,0.031192,0.876057,0.743967,0.383104,0.751364,0.666944,0.780219,0.005158,0.545482,0.001312,0.001252,0.133359,0.000815,0.126923,0.819573,0.909727,0.863963,0.863963,0.001447,0.415414,0.877901,0.831088,0.838338,0.000906,0.739216,0.746773,0.878963,0.878963,0.752454,0.752454,0.990691,0.990691,0.863963,0.863963,0.746857,0.765222,0.862015,0.749064,0.912518,0.749165,0.749466,0.990687,0.990676,0.991354,0.99073,0.990695,0.864881,0.316602,0.751855,0.166587,0.751045,0.746871,0.786988,0.408063,0.408063,0.408063,0.326084,0.179508,0.382265,0.529773,0.529748,0.529748,0.739216,0.74667,0.767518,0.990676,0.990676,0.746857,0.746857,0.864911,0.747631,0.868797,0.86331,0.74667,0.74667,0.74667,0.74667
2987004,12,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.036737,0.163816,0.062342,0.028701,2.7e-05,0.024812,0.872054,0.045105,0.006477,0.871662,0.586465,0.9325,0.534748,0.530149,0.986018,0.750484,0.644357,0.5753,0.876057,0.184397,0.311294,0.175509,0.666944,0.780219,0.335041,0.545482,0.462851,0.469879,0.424724,0.223994,0.486877,0.819573,0.909727,0.863963,0.863963,0.080719,0.415414,0.877901,0.831088,0.838338,0.092216,0.028759,3e-06,0.878963,0.878963,0.752454,0.752454,0.990691,0.990691,0.863963,0.863963,0.239226,0.765222,0.022097,0.137563,0.912518,0.008917,0.001196,0.990687,0.990676,0.991354,0.99073,0.990695,0.040171,0.316602,0.24393,0.397184,0.751045,0.107701,8e-06,0.408063,0.408063,0.408063,0.473181,0.601618,0.299207,0.529773,0.529748,0.529748,0.222305,0.108817,0.111446,0.990676,0.990676,0.114783,0.117106,0.003256,0.000972,0.001303,0.120471,0.136219,0.243662,0.196084,0.099325


In [16]:
train = reduce_mem_usage(train)
test  = reduce_mem_usage(test)
train.to_pickle('../input/reduced/train_features.pkl')
train_y.to_pickle('../input/reduced/train_y.pkl')
test.to_pickle('../input/reduced/test_features.pkl')

Mem. usage decreased to 254.56 Mb (83.9% reduction)
Mem. usage decreased to 218.41 Mb (83.9% reduction)
