In [1]:
import gc
import numpy as np
import pandas as pd
import sys
import os
import utils
from utils import logger_func, get_categorical_features
logger = logger_func()
import eda
key = 'SK_ID_CURR'
target = 'TARGET'

utils.start(sys.argv[0])

#==============================================================================
# to pickle
#==============================================================================
def to_pkl():
    app_train = pd.read_csv('../input/application_train.csv')
    app_test = pd.read_csv('../input/application_test.csv')
    app = pd.concat([app_train, app_test], axis=0)
    utils.to_df_pkl(df=app, path='../input', fname='application_train_test')
    app_eda = eda.df_info(app)
    app_eda.to_csv('../eda/application_eda.csv')

    bur = pd.read_csv('../input/bureau.csv')
    utils.to_df_pkl(df=bur, path='../input', fname='bureau')
    bur_eda = eda.df_info(bur)
    bur_eda.to_csv('../eda/bureau_eda.csv')

    pre = pd.read_csv('../input/previous_application.csv')
    utils.to_df_pkl(df=pre, path='../input', fname='previous_application')
    pre_eda = eda.df_info(pre)
    pre_eda.to_csv('../eda/prev_eda.csv')

    ins = pd.read_csv('../input/installments_payments.csv')
    utils.to_df_pkl(df=ins, path='../input', fname='installments_payments')
    ins_eda = eda.df_info(ins)
    ins_eda.to_csv('../eda/install_eda.csv')

    ccb = pd.read_csv('../input/credit_card_balance.csv')
    utils.to_df_pkl(df=ccb, path='../input', fname='credit_card_balance')
    ccb_eda = eda.df_info(ccb)
    ccb_eda.to_csv('../eda/credit_eda.csv')

    pos = pd.read_csv('../input/POS_CASH_balance.csv')
    utils.to_df_pkl(df=pos, path='../input', fname='POS_CASH_balance')
    pos_eda = eda.df_info(pos)
    pos_eda.to_csv('../eda/pos_eda.csv')

to_pkl()
#  sys.exit()

#========================================================================
# CLEANSING & PROCESSING
#========================================================================
def clean_app(app):
    logger.info(f'''
    #==============================================================================
    # APPLICATION
    #==============================================================================''')

    app['CODE_GENDER'].replace('XNA', 'F', inplace=True)

    cat_cols = get_categorical_features(df=app, ignore_list=[])
    for col in cat_cols:
        app[col].fillna('XNA', inplace=True)

    ' revo '
    #  revo = 'Revolving loans'
    #  amt_list = ['AMT_ANNUITY', 'AMT_CREDIT', 'AMT_GOODS_PRICE']
    #  for col in amt_list:
    #      app[f'revo_{col}'] = app[col].where(app[f'NAME_CONTRACT_TYPE']==revo, np.nan)
    #      app[col] = app[col].where(app[f'NAME_CONTRACT_TYPE']!=revo, np.nan)

    utils.to_df_pkl(df=app, path='../input', fname='clean_application_train_test')


def clean_bureau(bur):
    logger.info(f'''
    #==============================================================================
    # BUREAU CLEANSING
    #==============================================================================''')

    bur = utils.read_df_pkl(path='../input/bureau*.p')
    bur['DAYS_CREDIT_ENDDATE'] = bur['DAYS_CREDIT_ENDDATE'].where(bur['DAYS_CREDIT_ENDDATE']>-36000, np.nan)
    bur['DAYS_ENDDATE_FACT'] = bur['DAYS_ENDDATE_FACT'].where(bur['DAYS_ENDDATE_FACT']>-36000, np.nan)
    bur['DAYS_CREDIT_UPDATE'] = bur['DAYS_CREDIT_UPDATE'].where(bur['DAYS_CREDIT_UPDATE']>-36000, np.nan)
    bur = utils.to_df_pkl(df=bur, path='../input', fname='clean_bureau')


def clean_prev(pre):
    logger.info(f'''
    #==============================================================================
    # PREV CLEANSING
    #==============================================================================''')

    cash = 'Cash loans'
    revo = 'Revolving loans'
    pre = utils.read_df_pkl(path='../input/previous*.p')
    pre['AMT_CREDIT'] = pre['AMT_CREDIT'].where(pre['AMT_CREDIT']>0, np.nan)
    pre['AMT_ANNUITY'] = pre['AMT_ANNUITY'].where(pre['AMT_ANNUITY']>0, np.nan)
    pre['AMT_APPLICATION'] = pre['AMT_APPLICATION'].where(pre['AMT_APPLICATION']>0, np.nan)
    pre['CNT_PAYMENT'] = pre['CNT_PAYMENT'].where(pre['CNT_PAYMENT']>0, np.nan)
    pre['AMT_DOWN_PAYMENT'] = pre['AMT_DOWN_PAYMENT'].where(pre['AMT_DOWN_PAYMENT']>0, np.nan)
    pre['RATE_DOWN_PAYMENT'] = pre['RATE_DOWN_PAYMENT'].where(pre['RATE_DOWN_PAYMENT']>0, np.nan)

    pre['DAYS_FIRST_DRAWING']        = pre['DAYS_FIRST_DRAWING'].where(pre['DAYS_FIRST_DRAWING'] <100000, np.nan)
    pre['DAYS_FIRST_DUE']            = pre['DAYS_FIRST_DUE'].where(pre['DAYS_FIRST_DUE']         <100000, np.nan)
    pre['DAYS_LAST_DUE_1ST_VERSION'] = pre['DAYS_LAST_DUE_1ST_VERSION'].where(pre['DAYS_LAST_DUE_1ST_VERSION'] <100000, np.nan)
    pre['DAYS_LAST_DUE']             = pre['DAYS_LAST_DUE'].where(pre['DAYS_LAST_DUE']           <100000, np.nan)
    pre['DAYS_TERMINATION']          = pre['DAYS_TERMINATION'].where(pre['DAYS_TERMINATION']     <100000, np.nan)
    #  pre['SELLERPLACE_AREA']          = pre['SELLERPLACE_AREA'].where(pre['SELLERPLACE_AREA']     <200, 200)

    ignore_list = ['SK_ID_CURR', 'SK_ID_PREV', 'NAME_CONTRACT_TYPE', 'NAME_CONTRACT_STATUS']
    ' revo '
    ' RevolvingではCNT_PAYMENT, AMT系をNULLにする '
    #  for col in pre.columns:
    #      if col in ignore_list:
    #          logger.info(f'CONTINUE: {col}')
    #          continue
    #      pre[f'revo_{col}'] = pre[col].where(pre[f'NAME_CONTRACT_TYPE']==revo, np.nan)
    #      pre[col] = pre[col].where(pre[f'NAME_CONTRACT_TYPE']!=revo, np.nan)

    pre['NAME_TYPE_SUITE'].fillna('XNA', inplace=True)
    pre['PRODUCT_COMBINATION'].fillna('XNA', inplace=True)

    pre = utils.to_df_pkl(df=pre, path='../input', fname='clean_prev')


def clean_pos(pos):
    logger.info(f'''
    #==============================================================================
    # PREV CLEANSING
    #==============================================================================''')

    pos = pos.query("NAME_CONTRACT_STATUS!='Signed' and NAME_CONTRACT_STATUS!='Approved' and NAME_CONTRACT_STATUS!='XNA'")
    pos.loc[(pos.NAME_CONTRACT_STATUS=='Completed') & (pos.CNT_INSTALMENT_FUTURE!=0), 'NAME_CONTRACT_STATUS'] = 'Active'

    pos_0 = pos.query('CNT_INSTALMENT_FUTURE==0')
    pos_1 = pos.query('CNT_INSTALMENT_FUTURE>0')
    pos_0['NAME_CONTRACT_STATUS'] = 'Completed'
    pos_0.sort_values(by=['SK_ID_PREV', 'MONTHS_BALANCE'], ascending=[True, False], inplace=True)
    pos_0.drop_duplicates('SK_ID_PREV', keep='last', inplace=True)
    pos = pd.concat([pos_0, pos_1], ignore_index=True)
    del pos_0, pos_1
    gc.collect()

    utils.to_df_pkl(df=pos, path='../input', fname='clean_pos')


def clean_ins(ins):

    # なぜ0なのかよくわからないし290行しかないので抜いてしまう
    ins = ins.query("AMT_INSTALMENT>0")

    utils.to_df_pkl(df=ins, path='../input', fname='clean_install')


def clean_ccb(ccb):

    amt_cols = [col for col in ccb.columns if col.count('AMT')]
    cnt_cols = [col for col in ccb.columns if col.count('CNT')]
    amt_cnt_cols = list(set(amt_cols+cnt_cols))
    for col in amt_cnt_cols:
        ccb[col].fillna(0, inplace=True)

    utils.to_df_pkl(df=ccb, path='../input', fname='clean_ccb')

app = utils.read_df_pkl(path='../input/application_train_test*.p')
clean_app(app)
del app
gc.collect()
bur = utils.read_df_pkl(path='../input/bureau*.p')
clean_bureau(bur)
del bur
gc.collect()
pre = utils.read_df_pkl(path='../input/prev*.p')
clean_prev(pre)
del pre
gc.collect()
pos = utils.read_df_pkl(path='../input/POS*.p')
clean_pos(pos)
del pos
gc.collect()
ins = utils.read_df_pkl(path='../input/install*.p')
clean_ins(ins)
del ins
gc.collect()
ccb = utils.read_df_pkl(path='../input/credit*.p')
clean_ccb(ccb)
del ccb
gc.collect()

utils.end(sys.argv[0])

2018-10-27 09:12:53,447 utils 353 [INFO]    [logger_func] start 


SystemExit: 

In [2]:
app_eda = pd.read_csv('../eda/application_eda.csv')
bur_eda = pd.read_csv('../eda/bureau_eda.csv')
pre_eda = pd.read_csv('../eda/prev_eda.csv')
ins_eda = pd.read_csv('../eda/install_eda.csv')
ccb_eda = pd.read_csv('../eda/credit_eda.csv')
pos_eda = pd.read_csv('../eda/pos_eda.csv')
display(app_eda)
display(bur_eda)
display(pre_eda)
display(ins_eda)
display(ccb_eda)
display(pos_eda)

Unnamed: 0.1,Unnamed: 0,DataType,#Nulls,#Uniques,#Inf,#-Inf,Min,Mean,Max,Std,z 1.96 val,z -1.96 val,z 1.96 cnt,z -1.96 cnt,top10 val,top10 cnt,top10 ratio
0,AMT_ANNUITY,float64,36,14166,0,0,1615.500000,27425.560657,2.580255e+05,14732.808190,5.630186e+04,-1450.743395,14003,0,"[9000.0, 13500.0, 6750.0, 10125.0, 37800.0, 11...","[6450, 5556, 2404, 2052, 1612, 1508, 1454, 135...","[0.02, 0.02, 0.01, 0.01, 0.0, 0.0, 0.0, 0.0, 0..."
1,AMT_CREDIT,float64,0,6480,0,0,45000.000000,587767.413753,4.050000e+06,398623.693715,1.369070e+06,-193535.025929,15406,0,"[450000.0, 675000.0, 225000.0, 180000.0, 27000...","[11827, 10303, 9998, 7757, 7733, 7081, 4575, 4...","[0.029999999999999999, 0.029999999999999999, 0..."
2,AMT_GOODS_PRICE,float64,278,1291,0,0,40500.000000,528019.997800,4.050000e+06,366064.985656,1.245507e+06,-189467.374084,18719,0,"[450000.0, 225000.0, 675000.0, 900000.0, 27000...","[30711, 29732, 28434, 17404, 12653, 11111, 996...","[0.089999999999999997, 0.080000000000000002, 0..."
3,AMT_INCOME_TOTAL,float64,0,2741,0,0,25650.000000,170116.059860,1.170000e+08,223506.819009,6.081894e+05,-267957.305397,1836,0,"[135000.0, 112500.0, 157500.0, 180000.0, 90000...","[41312, 35870, 30991, 28924, 25427, 24462, 193...","[0.12, 0.10000000000000001, 0.0899999999999999..."
4,AMT_REQ_CREDIT_BUREAU_DAY,float64,47568,9,0,0,0.000000,0.006281,9.000000e+00,0.104250,2.106115e-01,-0.198049,1559,0,"[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 9.0, 8.0]","[307128, 1355, 113, 45, 26, 9, 8, 2, 1]","[0.85999999999999999, 0.0, 0.0, 0.0, 0.0, 0.0,..."
5,AMT_REQ_CREDIT_BUREAU_HOUR,float64,47568,5,0,0,0.000000,0.005808,4.000000e+00,0.079736,1.620915e-01,-0.150475,1715,0,"[0.0, 1.0, 2.0, 3.0, 4.0]","[306972, 1648, 57, 9, 1]","[0.85999999999999999, 0.0, 0.0, 0.0, 0.0]"
6,AMT_REQ_CREDIT_BUREAU_MON,float64,47568,24,0,0,0.000000,0.231697,2.700000e+01,0.855949,1.909358e+00,-1.445963,10642,0,"[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, ...","[264574, 33471, 5409, 1995, 1077, 603, 344, 29...","[0.73999999999999999, 0.089999999999999997, 0...."
7,AMT_REQ_CREDIT_BUREAU_QRT,float64,47568,11,0,0,0.000000,0.304399,2.610000e+02,0.786915,1.846753e+00,-1.237955,20276,0,"[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[238976, 49435, 17410, 2212, 533, 75, 29, 8, 7...","[0.67000000000000004, 0.14000000000000001, 0.0..."
8,AMT_REQ_CREDIT_BUREAU_WEEK,float64,47568,9,0,0,0.000000,0.029995,8.000000e+00,0.191374,4.050872e-01,-0.345098,8652,0,"[0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 5.0, 8.0, 7.0]","[300035, 8321, 202, 58, 34, 20, 10, 5, 2]","[0.83999999999999997, 0.02, 0.0, 0.0, 0.0, 0.0..."
9,AMT_REQ_CREDIT_BUREAU_YEAR,float64,47568,25,0,0,0.000000,1.911564,2.500000e+01,1.865338,5.567627e+00,-1.744499,16341,0,"[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[82640, 72616, 58681, 39822, 24459, 14128, 809...","[0.23000000000000001, 0.20000000000000001, 0.1..."


Unnamed: 0.1,Unnamed: 0,DataType,#Nulls,#Uniques,#Inf,#-Inf,Min,Mean,Max,Std,z 1.96 val,z -1.96 val,z 1.96 cnt,z -1.96 cnt,top10 val,top10 cnt,top10 ratio
0,SK_ID_CURR,int64,0,305811,0,0,100001.0,278214.9,456255.0,102938.6,479974.5,76455.36,0,0,"[120860, 169704, 318065, 251643, 425396, 29580...","[116, 94, 78, 61, 60, 59, 58, 57, 56, 55]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,SK_ID_BUREAU,int64,0,1716428,0,0,5000000.0,5924434.0,6843457.0,532265.7,6967675.0,4881194.0,0,0,"[5000822, 6547158, 6487797, 6481654, 6483703, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,CREDIT_ACTIVE,object,0,4,0,0,,,,,,,0,0,"['Closed', 'Active', 'Sold', 'Bad debt']","[1079273, 630607, 6527, 21]","[0.63, 0.37, 0.0, 0.0]"
3,CREDIT_CURRENCY,object,0,4,0,0,,,,,,,0,0,"['currency 1', 'currency 2', 'currency 3', 'cu...","[1715020, 1224, 174, 10]","[1.0, 0.0, 0.0, 0.0]"
4,DAYS_CREDIT,int64,0,2923,0,0,-2922.0,-1142.108,0.0,795.1649,416.4156,-2700.631,0,72833,"[-364, -336, -273, -357, -343, -315, -371, -36...","[1330, 1248, 1238, 1218, 1203, 1202, 1196, 119...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5,CREDIT_DAY_OVERDUE,int64,0,942,0,0,0.0,0.8181666,2792.0,36.54443,72.44525,-70.80891,1404,0,"[0, 30, 60, 13, 8, 9, 7, 14, 17, 11]","[1712211, 311, 126, 103, 103, 93, 92, 91, 77, 75]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
6,DAYS_CREDIT_ENDDATE,float64,105553,14096,0,0,-42060.0,510.5174,31199.0,4994.22,10299.19,-9278.154,48286,146,"[0.0, 3.0, -7.0, 1.0, -14.0, -10.0, 4.0, -2.0,...","[883, 845, 837, 830, 787, 782, 777, 772, 771, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
7,DAYS_ENDDATE_FACT,float64,633653,2917,0,0,-42023.0,-1017.437,0.0,714.0106,382.0237,-2416.898,0,49323,"[-329.0, -273.0, -301.0, -91.0, -84.0, -154.0,...","[811, 794, 791, 785, 783, 783, 782, 778, 778, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
8,AMT_CREDIT_MAX_OVERDUE,float64,1124488,68251,0,0,0.0,3825.418,115987200.0,206031.6,407647.4,-399996.5,310,0,"[0.0, 1440.0, 225.0, 45.0, 4.5, 90.0, 4500.0, ...","[470650, 688, 405, 377, 315, 222, 220, 192, 19...","[0.27000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0,..."
9,CNT_CREDIT_PROLONG,int64,0,10,0,0,0.0,0.006410406,9.0,0.09622391,0.1950093,-0.1821884,9114,0,"[0, 1, 2, 3, 4, 5, 9, 6, 8, 7]","[1707314, 7620, 1222, 191, 54, 21, 2, 2, 1, 1]","[0.98999999999999999, 0.0, 0.0, 0.0, 0.0, 0.0,..."


Unnamed: 0.1,Unnamed: 0,DataType,#Nulls,#Uniques,#Inf,#-Inf,Min,Mean,Max,Std,z 1.96 val,z -1.96 val,z 1.96 cnt,z -1.96 cnt,top10 val,top10 cnt,top10 ratio
0,SK_ID_PREV,int64,0,1670214,0,0,1000001.0,1923089.0,2845382.0,532597.958696,2966981.0,879197.136287,0,0,"[1000983, 2428426, 1026910, 1024863, 2448896, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,SK_ID_CURR,int64,0,338857,0,0,100001.0,278357.2,456255.0,102814.823849,479874.2,76840.119355,0,0,"[187868, 265681, 173680, 242412, 206783, 15636...","[77, 73, 72, 68, 67, 66, 64, 64, 63, 62]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,NAME_CONTRACT_TYPE,object,0,4,0,0,,,,,,,0,0,"['Cash loans', 'Consumer loans', 'Revolving lo...","[747553, 729151, 193164, 346]","[0.45000000000000001, 0.44, 0.12, 0.0]"
3,AMT_ANNUITY,float64,372235,357959,0,0,0.0,15955.12,418058.145,14782.137335,44928.11,-13017.868517,70564,0,"[2250.0, 11250.0, 6750.0, 9000.0, 22500.0, 450...","[31865, 13974, 13442, 12496, 11903, 10597, 717...","[0.02, 0.01, 0.01, 0.01, 0.01, 0.01, 0.0, 0.0,..."
4,AMT_APPLICATION,float64,0,93885,0,0,0.0,175233.9,6905160.0,292779.762387,749082.2,-398614.473919,90181,0,"[0.0, 45000.0, 225000.0, 135000.0, 450000.0, 9...","[392402, 47831, 43543, 40678, 38905, 29367, 24...","[0.23000000000000001, 0.029999999999999999, 0...."
5,AMT_CREDIT,float64,1,86803,0,0,0.0,196114.0,6905160.0,318574.616546,820520.3,-428292.227212,90691,0,"[0.0, 45000.0, 225000.0, 450000.0, 135000.0, 1...","[336768, 35051, 21094, 19954, 18720, 17085, 13...","[0.20000000000000001, 0.02, 0.01, 0.01, 0.01, ..."
6,AMT_DOWN_PAYMENT,float64,895844,29278,0,0,-0.9,6697.402,3060045.0,20921.49541,47703.53,-34308.728864,12685,0,"[0.0, 4500.0, 9000.0, 13500.0, 22500.0, 6750.0...","[369854, 21241, 14747, 9655, 8165, 7709, 6241,...","[0.22, 0.01, 0.01, 0.01, 0.0, 0.0, 0.0, 0.0, 0..."
7,AMT_GOODS_PRICE,float64,385515,93885,0,0,0.0,227847.3,6905160.0,315396.557937,846024.5,-390329.974272,77754,0,"[45000.0, 225000.0, 135000.0, 450000.0, 90000....","[47831, 43549, 40666, 38926, 29367, 24736, 205...","[0.029999999999999999, 0.029999999999999999, 0..."
8,WEEKDAY_APPR_PROCESS_START,object,0,7,0,0,,,,,,,77754,0,"['TUESDAY', 'WEDNESDAY', 'MONDAY', 'FRIDAY', '...","[255118, 255010, 253557, 252048, 249099, 24063...","[0.14999999999999999, 0.14999999999999999, 0.1..."
9,HOUR_APPR_PROCESS_START,int64,0,24,0,0,0.0,12.48418,23.0,3.334028,19.01888,5.949487,19539,31183,"[11, 12, 10, 13, 14, 15, 9, 16, 17, 8]","[192728, 185980, 181690, 172256, 157711, 14296...","[0.12, 0.11, 0.11, 0.10000000000000001, 0.0899..."


Unnamed: 0.1,Unnamed: 0,DataType,#Nulls,#Uniques,#Inf,#-Inf,Min,Mean,Max,Std,z 1.96 val,z -1.96 val,z 1.96 cnt,z -1.96 cnt,top10 val,top10 cnt,top10 ratio
0,SK_ID_PREV,int64,0,997752,0,0,1000001.0,1903365.0,2843499.0,536202.905546,2954323.0,852407.274679,0,0,"[2360056, 2592574, 1017477, 1449382, 1746731, ...","[293, 279, 248, 243, 236, 223, 222, 220, 219, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,SK_ID_CURR,int64,0,339587,0,0,100001.0,278444.9,456255.0,102718.310411,479772.8,77116.993332,0,0,"[145728, 296205, 453103, 189699, 186851, 17269...","[372, 350, 347, 344, 337, 336, 332, 324, 323, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,NUM_INSTALMENT_VERSION,float64,0,65,0,0,0.0,0.8566373,178.0,1.035216,2.88566,-1.172386,417616,0,"[1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, ...","[8485004, 4082498, 620283, 237063, 55274, 4840...","[0.62, 0.29999999999999999, 0.0500000000000000..."
3,NUM_INSTALMENT_NUMBER,int64,0,277,0,0,1.0,18.8709,277.0,26.664067,71.13247,-33.390675,932424,0,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]","[1004160, 985716, 968279, 943502, 880007, 8279...","[0.070000000000000007, 0.070000000000000007, 0..."
4,DAYS_INSTALMENT,float64,0,2922,0,0,-2922.0,-1042.27,-1.0,800.946284,527.5847,-2612.124709,0,544170,"[-120.0, -180.0, -150.0, -119.0, -149.0, -210....","[11512, 11212, 11194, 11183, 11144, 11140, 111...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5,DAYS_ENTRY_PAYMENT,float64,2905,3039,0,0,-4921.0,-1051.114,-1.0,800.585883,518.0346,-2620.262015,0,540333,"[-91.0, -182.0, -154.0, -92.0, -245.0, -273.0,...","[13103, 13090, 13071, 12646, 12405, 12151, 119...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
6,AMT_INSTALMENT,float64,0,902539,0,0,0.0,17050.91,3771487.845,50570.254429,116168.6,-82066.791691,171300,0,"[9000.0, 2250.0, 4500.0, 6750.0, 3375.0, 5625....","[254062, 179120, 174143, 173659, 149941, 96362...","[0.02, 0.01, 0.01, 0.01, 0.01, 0.01, 0.0, 0.0,..."
7,AMT_PAYMENT,float64,2905,944235,0,0,0.0,17238.22,3771487.845,54735.783981,124520.4,-90043.913353,190177,0,"[9000.0, 2250.0, 4500.0, 6750.0, 3375.0, 5625....","[248757, 182654, 178309, 170360, 141832, 91165...","[0.02, 0.01, 0.01, 0.01, 0.01, 0.01, 0.0, 0.0,..."


Unnamed: 0.1,Unnamed: 0,DataType,#Nulls,#Uniques,#Inf,#-Inf,Min,Mean,Max,Std,z 1.96 val,z -1.96 val,z 1.96 cnt,z -1.96 cnt,top10 val,top10 cnt,top10 ratio
0,SK_ID_PREV,int64,0,104307,0,0,1000018.0,1904504.0,2843496.0,536469.470563,2955984.0,853023.427596,0,0,"[1009171, 1348858, 1745395, 2526035, 1567893, ...","[96, 96, 96, 96, 96, 96, 96, 96, 96, 96]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,SK_ID_CURR,int64,0,103558,0,0,100006.0,278324.2,456250.0,102704.475133,479625.0,77023.436029,0,0,"[186401, 311118, 120076, 128827, 246089, 19182...","[192, 178, 140, 129, 128, 128, 128, 127, 127, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,MONTHS_BALANCE,int64,0,96,0,0,-96.0,-34.52192,-1.0,26.667751,17.74687,-86.790713,0,152148,"[-4, -5, -3, -6, -7, -2, -8, -9, -10, -11]","[102115, 100546, 100355, 98577, 95332, 94643, ...","[0.029999999999999999, 0.029999999999999999, 0..."
3,AMT_BALANCE,float64,0,1347904,0,0,-420250.185,58300.16,1505902.185,106307.031025,266661.9,-150061.625546,178607,8,"[0.0, 67.5, 130.5, 270.0, 135.0, 202.5, 450.0,...","[2156420, 16049, 3662, 2313, 921, 742, 536, 31...","[0.56000000000000005, 0.0, 0.0, 0.0, 0.0, 0.0,..."
4,AMT_CREDIT_LIMIT_ACTUAL,int64,0,181,0,0,0.0,153808.0,1350000.0,165145.699523,477493.5,-169877.613665,138559,0,"[0, 180000, 135000, 45000, 90000, 67500, 45000...","[753823, 529292, 430142, 329932, 319996, 30886...","[0.20000000000000001, 0.14000000000000001, 0.1..."
5,AMT_DRAWINGS_ATM_CURRENT,float64,749816,2267,0,0,-6827.31,5961.325,2115000.0,28225.688579,61283.67,-49361.024792,96233,0,"[0.0, 4500.0, 9000.0, 45000.0, 2250.0, 22500.0...","[2665718, 35851, 27726, 22946, 22854, 22676, 2...","[0.68999999999999995, 0.01, 0.01, 0.01, 0.01, ..."
6,AMT_DRAWINGS_CURRENT,float64,0,187005,0,0,-6211.62,7433.388,2287098.315,33846.077334,73771.7,-58904.923394,118470,0,"[0.0, 4500.0, 9000.0, 2250.0, 45000.0, 22500.0...","[3223443, 30257, 22968, 20212, 18947, 18670, 1...","[0.83999999999999997, 0.01, 0.01, 0.01, 0.0, 0..."
7,AMT_DRAWINGS_OTHER_CURRENT,float64,749816,1832,0,0,0.0,288.1696,1529847.0,8201.989345,16364.07,-15787.729533,7583,0,"[0.0, 3343.5, 6682.5, 4455.0, 8910.0, 46800.0,...","[3078163, 782, 460, 441, 319, 242, 235, 216, 2...","[0.80000000000000004, 0.0, 0.0, 0.0, 0.0, 0.0,..."
8,AMT_DRAWINGS_POS_CURRENT,float64,749816,168748,0,0,0.0,2968.805,2239274.16,20796.887047,43730.7,-37793.093765,59736,0,"[0.0, 450.0, 900.0, 2250.0, 4500.0, 1350.0, 22...","[2825595, 1287, 976, 910, 801, 784, 609, 462, ...","[0.73999999999999999, 0.0, 0.0, 0.0, 0.0, 0.0,..."
9,AMT_INST_MIN_REGULARITY,float64,305236,312266,0,0,0.0,3540.204,202882.005,5600.154122,14516.51,-7436.097951,149765,0,"[0.0, 9000.0, 6750.0, 3375.0, 4500.0, 2250.0, ...","[1928864, 225429, 147469, 127613, 124979, 1083...","[0.5, 0.059999999999999998, 0.0400000000000000..."


Unnamed: 0.1,Unnamed: 0,DataType,#Nulls,#Uniques,#Inf,#-Inf,Min,Mean,Max,Std,z 1.96 val,z -1.96 val,z 1.96 cnt,z -1.96 cnt,top10 val,top10 cnt,top10 ratio
0,SK_ID_PREV,int64,0,936325,0,0,1000001.0,1903217.0,2843499.0,535846.530722,2953476.0,852957.398742,0,0,"[1624618, 2746611, 1889497, 1235285, 2263451, ...","[96, 96, 96, 96, 96, 96, 96, 96, 96, 96]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,SK_ID_CURR,int64,0,337252,0,0,100001.0,278403.9,456255.0,102763.74509,479820.8,76986.92293,0,0,"[265042, 172612, 309133, 197583, 127659, 18518...","[295, 247, 246, 245, 245, 245, 244, 239, 237, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,MONTHS_BALANCE,int64,0,96,0,0,-96.0,-35.01259,-1.0,26.06657,16.07789,-86.103065,0,436635,"[-10, -11, -9, -12, -8, -13, -7, -14, -6, -15]","[216441, 216023, 215558, 214716, 214149, 21095...","[0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.0..."
3,CNT_INSTALMENT,float64,26071,73,0,0,1.0,17.08965,92.0,11.995056,40.59996,-6.420659,589927,0,"[12.0, 24.0, 10.0, 6.0, 18.0, 36.0, 8.0, 48.0,...","[2496845, 1517472, 1243449, 1065500, 727394, 5...","[0.25, 0.14999999999999999, 0.12, 0.11, 0.0700..."
4,CNT_INSTALMENT_FUTURE,float64,26087,79,0,0,0.0,10.48384,85.0,11.109058,32.25759,-11.289913,605599,0,"[0.0, 6.0, 4.0, 5.0, 3.0, 2.0, 1.0, 10.0, 8.0,...","[1185960, 614058, 613632, 600295, 582007, 5471...","[0.12, 0.059999999999999998, 0.059999999999999..."
5,NAME_CONTRACT_STATUS,object,0,9,0,0,,,,,,,605599,0,"['Active', 'Completed', 'Signed', 'Demand', 'R...","[9151119, 744883, 87260, 7065, 5461, 4917, 636...","[0.91000000000000003, 0.070000000000000007, 0...."
6,SK_DPD,int64,0,3400,0,0,0.0,11.60693,4231.0,132.714043,271.7265,-248.512597,96162,0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]","[9706131, 21872, 17358, 14403, 12350, 11046, 9...","[0.96999999999999997, 0.0, 0.0, 0.0, 0.0, 0.0,..."
7,SK_DPD_DEF,int64,0,2307,0,0,0.0,0.6544684,3595.0,32.762491,64.86895,-63.560013,4902,0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]","[9887389, 22134, 14690, 11652, 9528, 8031, 662...","[0.98999999999999999, 0.0, 0.0, 0.0, 0.0, 0.0,..."
