# Week 8 Bank Data Case Study

## Load Packages

In [1]:
import pandas as pd
import numpy as np

## Read the Data

In this section we read in the data. 

In [2]:
df = pd.read_csv("../../../case_8.csv")
df.head()

Unnamed: 0,ID,target,v1,v2,v3,v4,v5,v6,v7,v8,...,v122,v123,v124,v125,v126,v127,v128,v129,v130,v131
0,3,1,1.335739,8.727474,C,3.921026,7.915266,2.599278,3.176895,0.012941,...,8.0,1.98978,0.035754,AU,1.804126,3.113719,2.024285,0,0.636365,2.857144
1,4,1,1.630686,7.464411,C,4.145098,9.191265,2.436402,2.483921,2.30163,...,6.822439,3.549938,0.598896,AF,1.672658,3.239542,1.957825,0,1.925763,1.739389
2,5,1,0.943877,5.310079,C,4.410969,5.326159,3.979592,3.928571,0.019645,...,9.333333,2.477596,0.013452,AE,1.773709,3.922193,1.120468,2,0.883118,1.176472
3,6,1,0.797415,8.304757,C,4.22593,11.627438,2.0977,1.987549,0.171947,...,7.018256,1.812795,0.002267,CJ,1.41523,2.954381,1.990847,1,1.677108,1.034483
4,8,1,1.630686,7.464411,C,4.145098,8.742359,2.436402,2.483921,1.496569,...,6.822439,3.549938,0.919812,Z,1.672658,3.239542,2.030373,0,1.925763,1.739389


No obvious issues like parsing errors or missings. Lets see what we have for data types.

In [3]:
df.info(verbose = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114321 entries, 0 to 114320
Data columns (total 133 columns):
 #   Column  Dtype  
---  ------  -----  
 0   ID      int64  
 1   target  int64  
 2   v1      float64
 3   v2      float64
 4   v3      object 
 5   v4      float64
 6   v5      float64
 7   v6      float64
 8   v7      float64
 9   v8      float64
 10  v9      float64
 11  v10     float64
 12  v11     float64
 13  v12     float64
 14  v13     float64
 15  v14     float64
 16  v15     float64
 17  v16     float64
 18  v17     float64
 19  v18     float64
 20  v19     float64
 21  v20     float64
 22  v21     float64
 23  v22     object 
 24  v23     float64
 25  v24     object 
 26  v25     float64
 27  v26     float64
 28  v27     float64
 29  v28     float64
 30  v29     float64
 31  v30     object 
 32  v31     object 
 33  v32     float64
 34  v33     float64
 35  v34     float64
 36  v35     float64
 37  v36     float64
 38  v37     float64
 39  v38     int64  
 40  v

We see mostly floats. There are some object data types we should probably recast. We have 114K observations, plenty to work with.  No variable names, as expected. He said there are no missings but lets check anyway.

In [4]:
df.isnull().values.any()

False

Fine. He told the truth. How about that target variable

In [46]:
counts = df.target.value_counts()
print(counts)
print(round(counts[1]/sum(counts),4))

1    87021
0    27300
Name: target, dtype: int64
0.7612


The target is binary and a little unbalanced, but not terrible.

## Data Cleaning

In [6]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.float_format', lambda x: '%.5f' % x)


df.describe()

Unnamed: 0,ID,target,v1,v2,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v23,v25,v26,v27,v28,v29,v32,v33,v34,v35,v36,v37,v38,v39,v40,v41,v42,v43,v44,v45,v46,v48,v49,v50,v51,v53,v54,v55,v57,v58,v59,v60,v61,v62,v63,v64,v65,v67,v68,v69,v70,v72,v73,v76,v77,v78,v80,v81,v82,v83,v84,v85,v86,v87,v88,v89,v90,v92,v93,v94,v95,v96,v97,v98,v99,v100,v101,v102,v103,v104,v105,v106,v108,v109,v111,v114,v115,v116,v117,v118,v119,v120,v121,v122,v123,v124,v126,v127,v128,v129,v130,v131
count,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0
mean,114228.92823,0.7612,1.63069,7.46441,4.1451,8.74236,2.4364,2.48392,1.49657,9.03186,1.88305,15.44741,6.8813,3.7984,12.09428,2.08091,4.92322,3.83227,0.84105,0.2223,17.77359,7.02974,1.09309,1.69813,1.87603,2.74345,5.09333,8.20642,1.62215,2.16163,6.40624,8.12239,13.3756,0.74147,0.09093,1.23718,10.46593,7.18255,12.92497,2.2166,10.79517,9.14223,1.63053,12.53802,8.01655,1.50426,7.19816,15.7113,1.25386,1.55956,4.07783,7.70165,10.58794,1.71429,14.58303,1.03069,1.68733,6.34371,15.84756,9.28728,17.56412,9.44934,12.26996,1.43177,2.4333,2.40506,7.30737,13.33448,2.2097,7.28717,6.20836,2.17381,1.60796,2.82225,1.22018,10.18022,1.92418,1.51843,0.96691,0.58237,5.47518,3.85288,0.66576,6.45795,7.62255,7.66762,1.25072,12.09162,6.86641,2.89029,5.29672,2.64283,1.08105,11.79136,2.15262,4.18128,3.36531,13.57445,10.54805,2.29122,8.30386,8.36465,3.16897,1.29122,2.7376,6.82244,3.54994,0.91981,1.67266,3.23954,2.03037,0.31014,1.92576,1.73939
std,65934.48736,0.42635,0.81326,2.22504,0.86266,1.54344,0.45061,0.44271,2.10979,1.44954,1.39347,0.59338,0.92415,0.88317,1.44392,0.55045,1.34464,1.43607,0.46286,0.12868,0.86743,1.0694,2.98732,2.24158,0.41398,0.62666,2.01131,0.96545,0.42324,0.7397,2.0242,1.00628,1.78573,0.40657,0.58348,1.77108,3.16764,0.75443,0.7488,0.48667,1.58586,1.55058,2.19532,1.64993,0.67797,1.16789,1.87306,0.60036,1.7546,0.62668,0.50925,5.13806,1.5564,0.40378,1.59344,0.69624,2.24951,1.89742,1.4105,0.84371,1.71983,1.4267,1.75436,0.92227,0.59981,1.03956,0.94339,1.38423,0.80726,1.68567,2.78821,0.79785,0.70691,1.06186,0.34985,2.27357,0.78753,2.13245,0.13438,0.1804,1.23201,0.64216,0.19835,0.84155,1.44498,1.76276,0.34655,5.17341,1.76901,1.35412,0.92291,0.66527,1.70317,2.21935,0.69222,2.81395,1.11715,2.61288,1.42744,0.5034,2.74269,1.50358,3.1636,0.55455,1.0186,1.3487,1.94343,1.59155,0.37791,1.22123,0.81434,0.69326,0.94964,0.85182
min,3.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,1.51678,0.10618,-0.0,0.04104,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.06935,-0.0,-0.0,-0.0,-0.0,-0.0,0.01306,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.05306,-0.0,0.6593,-0.0,1.50136,-0.0,0.42709,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.8724,-0.0,0.02237,-0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,9e-05,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.01914,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0
25%,57280.0,1.0,1.34615,6.57577,4.0687,8.39409,2.34097,2.37659,0.26531,8.81356,1.05033,15.39823,6.32262,3.46409,11.25602,1.90569,4.70588,3.37983,0.71949,0.19241,17.77359,6.41875,0.0,0.27448,1.75553,2.56647,4.74236,8.11437,1.49129,1.83074,5.0558,7.89615,13.09935,0.59072,0.0,0.30522,8.41039,7.06762,12.81317,2.06897,10.54256,8.88634,0.2563,12.15693,7.914,0.65879,6.83727,15.66772,0.20819,1.2766,3.97665,4.06136,10.2168,1.5996,14.58303,1.0,0.27094,5.85294,15.84756,9.16798,17.56412,9.27007,12.08748,1.0,2.23612,2.06003,7.19023,13.15175,1.95122,7.20499,3.59298,1.81535,1.31804,2.44395,1.10841,9.55184,1.62351,0.22454,0.94962,0.51793,5.13587,3.65008,0.59461,6.36225,7.18954,7.29994,1.17271,12.09162,6.34055,2.32905,4.98815,2.43202,0.17355,11.70203,1.85201,2.72122,2.92386,11.99667,10.26667,2.13904,7.604,7.86517,1.16943,1.05263,2.28261,6.51961,2.57105,0.08471,1.57097,2.7625,1.68126,0.0,1.44948,1.46341
50%,114189.0,1.0,1.63069,7.46441,4.1451,8.74236,2.4364,2.48392,1.49657,9.03186,1.31291,15.44741,6.61324,3.7984,11.96783,2.08091,4.92322,3.83227,0.84105,0.2223,17.77359,7.03937,0.33059,1.69813,1.87603,2.74345,5.09333,8.20642,1.62215,2.16163,6.53443,8.12239,13.3756,0.74147,0.0,1.23718,10.33934,7.18255,12.92497,2.2166,10.79517,9.14223,1.63053,12.53802,8.01655,1.21194,7.19816,15.7113,1.25386,1.55956,4.07783,7.70165,10.58794,1.71429,14.58303,1.0,1.68733,6.34371,15.84756,9.28728,17.56412,9.44934,12.26996,1.0,2.4333,2.40506,7.30737,13.33448,2.2097,7.28717,6.20836,2.17381,1.60796,2.82225,1.22018,10.18022,1.92418,1.51843,0.96691,0.58237,5.47518,3.85288,0.66576,6.45795,7.62255,7.66762,1.25072,12.09162,6.86641,2.89029,5.29672,2.64283,1.08105,11.79136,2.15262,4.18128,3.36531,14.03888,10.54805,2.29122,8.30386,8.36465,3.16897,1.29122,2.7376,6.82244,3.54994,0.91981,1.67266,3.23954,2.03037,0.0,1.92576,1.73939
75%,171206.0,1.0,1.63069,7.5515,4.34023,8.9248,2.4847,2.52845,1.49657,9.30233,2.10066,15.5939,7.0194,3.7984,12.71577,2.08091,5.14286,3.83227,0.84105,0.2223,18.1546,7.66652,1.09309,1.69813,1.89891,2.7791,5.33034,8.47939,1.62215,2.16163,7.70145,8.25076,14.32492,0.74147,0.0,1.23718,12.76246,7.34477,13.04965,2.23749,11.0221,9.41516,1.63053,12.67463,8.13559,2.00572,7.41788,15.87156,1.25386,1.55956,4.15366,7.70165,10.83954,1.73502,15.31291,1.0,1.68733,6.3844,16.47085,9.46899,18.4375,9.73384,12.9166,2.0,2.43665,2.40506,7.55221,13.55932,2.24359,7.82301,6.20836,2.17381,1.60796,2.82225,1.22018,10.43359,1.92418,1.51843,0.9901,0.58237,5.47518,3.85288,0.66576,6.669,7.71084,8.00612,1.30167,15.69721,6.93119,2.89029,5.29672,2.64283,1.08105,12.44363,2.15262,4.18128,3.36531,15.37219,10.71895,2.31017,8.64537,8.41772,3.16897,1.29122,2.7376,7.0,3.54994,0.91981,1.67266,3.23954,2.03037,0.0,1.92576,1.73939
max,228713.0,1.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,18.53392,20.0,18.71055,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,19.29605,20.0,20.0,20.0,20.0,19.84819,20.0,17.56098,20.0,20.0,20.0,20.0,20.0,12.0,19.91553,20.0,20.0,20.0,20.0,19.83168,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,18.84696,7.0,20.0,20.0,20.0,20.0,20.0,20.0,19.81631,12.0,20.0,20.0,15.97351,20.0,20.0,20.0,20.0,20.0,20.0,20.0,17.56098,19.84275,20.0,20.0,6.30577,8.92384,20.0,19.01631,9.07054,20.0,20.0,19.0588,20.0,20.0,20.0,20.0,18.77525,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,10.39427,20.0,20.0,19.68607,20.0,15.63161,20.0,20.0,11.0,20.0,20.0


In [7]:
df.describe(include='object')

Unnamed: 0,v3,v22,v24,v30,v31,v47,v52,v56,v66,v71,v74,v75,v79,v91,v107,v110,v112,v113,v125
count,114321,114321,114321,114321,114321,114321,114321,114321,114321,114321,114321,114321,114321,114321,114321,114321,114321,114321,114321
unique,3,18210,5,7,3,10,12,122,3,9,3,4,18,7,7,3,22,36,90
top,C,AGDF,E,C,A,C,J,BW,A,F,B,D,C,A,E,A,F,G,BM
freq,114041,2886,55177,92288,91804,55425,11106,18233,70353,75094,113560,75087,34561,27082,27082,55688,22053,71556,5836


In [8]:
#https://github.com/Sundar0989/WOE-and-IV/blob/master/WOE_IV.ipynb
import pandas.core.algorithms as algos
from pandas import Series
import scipy.stats.stats as stats
import re
import traceback
import string

max_bin = 20
force_bin = 3

# define a binning function
def mono_bin(Y, X, n = max_bin):
    
    df1 = pd.DataFrame({"X": X, "Y": Y})
    justmiss = df1[['X','Y']][df1.X.isnull()]
    notmiss = df1[['X','Y']][df1.X.notnull()]
    r = 0
    while np.abs(r) < 1:
        try:
            d1 = pd.DataFrame({"X": notmiss.X, "Y": notmiss.Y, "Bucket": pd.qcut(notmiss.X, n)})
            d2 = d1.groupby('Bucket', as_index=True)
            r, p = stats.spearmanr(d2.mean().X, d2.mean().Y)
            n = n - 1 
        except Exception as e:
            n = n - 1

    if len(d2) == 1:
        n = force_bin         
        bins = algos.quantile(notmiss.X, np.linspace(0, 1, n))
        if len(np.unique(bins)) == 2:
            bins = np.insert(bins, 0, 1)
            bins[1] = bins[1]-(bins[1]/2)
        d1 = pd.DataFrame({"X": notmiss.X, "Y": notmiss.Y, "Bucket": pd.cut(notmiss.X, np.unique(bins),include_lowest=True)}) 
        d2 = d1.groupby('Bucket', as_index=True)
    
    d3 = pd.DataFrame({},index=[])
    d3["MIN_VALUE"] = d2.min().X
    d3["MAX_VALUE"] = d2.max().X
    d3["COUNT"] = d2.count().Y
    d3["EVENT"] = d2.sum().Y
    d3["NONEVENT"] = d2.count().Y - d2.sum().Y
    d3=d3.reset_index(drop=True)
    
    if len(justmiss.index) > 0:
        d4 = pd.DataFrame({'MIN_VALUE':np.nan},index=[0])
        d4["MAX_VALUE"] = np.nan
        d4["COUNT"] = justmiss.count().Y
        d4["EVENT"] = justmiss.sum().Y
        d4["NONEVENT"] = justmiss.count().Y - justmiss.sum().Y
        d3 = d3.append(d4,ignore_index=True)
    
    d3["EVENT_RATE"] = d3.EVENT/d3.COUNT
    d3["NON_EVENT_RATE"] = d3.NONEVENT/d3.COUNT
    d3["DIST_EVENT"] = d3.EVENT/d3.sum().EVENT
    d3["DIST_NON_EVENT"] = d3.NONEVENT/d3.sum().NONEVENT
    d3["WOE"] = np.log(d3.DIST_EVENT/d3.DIST_NON_EVENT)
    d3["IV"] = (d3.DIST_EVENT-d3.DIST_NON_EVENT)*np.log(d3.DIST_EVENT/d3.DIST_NON_EVENT)
    d3["VAR_NAME"] = "VAR"
    d3 = d3[['VAR_NAME','MIN_VALUE', 'MAX_VALUE', 'COUNT', 'EVENT', 'EVENT_RATE', 'NONEVENT', 'NON_EVENT_RATE', 'DIST_EVENT','DIST_NON_EVENT','WOE', 'IV']]       
    d3 = d3.replace([np.inf, -np.inf], 0)
    d3.IV = d3.IV.sum()
    
    return(d3)

def char_bin(Y, X):
        
    df1 = pd.DataFrame({"X": X, "Y": Y})
    justmiss = df1[['X','Y']][df1.X.isnull()]
    notmiss = df1[['X','Y']][df1.X.notnull()]    
    df2 = notmiss.groupby('X',as_index=True)
    
    d3 = pd.DataFrame({},index=[])
    d3["COUNT"] = df2.count().Y
    d3["MIN_VALUE"] = df2.sum().Y.index
    d3["MAX_VALUE"] = d3["MIN_VALUE"]
    d3["EVENT"] = df2.sum().Y
    d3["NONEVENT"] = df2.count().Y - df2.sum().Y
    
    if len(justmiss.index) > 0:
        d4 = pd.DataFrame({'MIN_VALUE':np.nan},index=[0])
        d4["MAX_VALUE"] = np.nan
        d4["COUNT"] = justmiss.count().Y
        d4["EVENT"] = justmiss.sum().Y
        d4["NONEVENT"] = justmiss.count().Y - justmiss.sum().Y
        d3 = d3.append(d4,ignore_index=True)
    
    d3["EVENT_RATE"] = d3.EVENT/d3.COUNT
    d3["NON_EVENT_RATE"] = d3.NONEVENT/d3.COUNT
    d3["DIST_EVENT"] = d3.EVENT/d3.sum().EVENT
    d3["DIST_NON_EVENT"] = d3.NONEVENT/d3.sum().NONEVENT
    d3["WOE"] = np.log(d3.DIST_EVENT/d3.DIST_NON_EVENT)
    d3["IV"] = (d3.DIST_EVENT-d3.DIST_NON_EVENT)*np.log(d3.DIST_EVENT/d3.DIST_NON_EVENT)
    d3["VAR_NAME"] = "VAR"
    d3 = d3[['VAR_NAME','MIN_VALUE', 'MAX_VALUE', 'COUNT', 'EVENT', 'EVENT_RATE', 'NONEVENT', 'NON_EVENT_RATE', 'DIST_EVENT','DIST_NON_EVENT','WOE', 'IV']]      
    d3 = d3.replace([np.inf, -np.inf], 0)
    d3.IV = d3.IV.sum()
    d3 = d3.reset_index(drop=True)
    
    return(d3)

def data_vars(df1, target):
    
    stack = traceback.extract_stack()
    filename, lineno, function_name, code = stack[-2]
    vars_name = re.compile(r'\((.*?)\).*$').search(code).groups()[0]
    final = (re.findall(r"[\w']+", vars_name))[-1]
    
    x = df1.dtypes.index
    count = -1
    
    for i in x:
        if i.upper() not in (final.upper()):
            if np.issubdtype(df1[i], np.number) and len(Series.unique(df1[i])) > 2:
                conv = mono_bin(target, df1[i])
                conv["VAR_NAME"] = i
                count = count + 1
            else:
                conv = char_bin(target, df1[i])
                conv["VAR_NAME"] = i            
                count = count + 1
                
            if count == 0:
                iv_df = conv
            else:
                iv_df = iv_df.append(conv,ignore_index=True)
    
    iv = pd.DataFrame({'IV':iv_df.groupby('VAR_NAME').IV.max()})
    iv = iv.reset_index()
    return(iv_df,iv)

In [9]:
pd.set_option('display.max_rows', 500)
forWOE = df[["v22","target"]].copy()

final_iv, IV = data_vars(forWOE , forWOE.target)
final_iv.sort_values("WOE",ascending=False)

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,VAR_NAME,MIN_VALUE,MAX_VALUE,COUNT,EVENT,EVENT_RATE,NONEVENT,NON_EVENT_RATE,DIST_EVENT,DIST_NON_EVENT,WOE,IV
9055,v22,JEA,JEA,36,35,0.97222,1,0.02778,0.00040,0.00004,2.39609,0.29734
14740,v22,TPX,TPX,25,24,0.96000,1,0.04000,0.00028,0.00004,2.01879,0.29734
2624,v22,AEUR,AEUR,23,22,0.95652,1,0.04348,0.00025,0.00004,1.93178,0.29734
12366,v22,PEF,PEF,23,22,0.95652,1,0.04348,0.00025,0.00004,1.93178,0.29734
16518,v22,WWN,WWN,22,21,0.95455,1,0.04545,0.00024,0.00004,1.88526,0.29734
...,...,...,...,...,...,...,...,...,...,...,...,...
731,v22,ABIM,ABIM,6,1,0.16667,5,0.83333,0.00001,0.00018,-2.76870,0.29734
4876,v22,BMV,BMV,6,1,0.16667,5,0.83333,0.00001,0.00018,-2.76870,0.29734
1112,v22,ACBD,ACBD,6,1,0.16667,5,0.83333,0.00001,0.00018,-2.76870,0.29734
12304,v22,PAZ,PAZ,7,1,0.14286,6,0.85714,0.00001,0.00022,-2.95102,0.29734


## Data Prep

In [10]:
from sklearn.preprocessing import StandardScaler
def transform_data(data):
    #OH encode
    label_encode = data.select_dtypes(include='object').columns
    normalize = data.drop(columns=["ID","target"]).select_dtypes(include='number').columns

    data_OHE = pd.get_dummies(data, columns=label_encode)

    scaler = StandardScaler()
    data_OHE[normalize] = scaler.fit_transform(data_OHE[normalize])
 
    return data_OHE

In [11]:
df['v22'] = df['v22'].astype('category')
df2 = transform_data(df)
df3 = df2.merge(final_iv[["MIN_VALUE","WOE"]], how='left', left_on="v22",right_on="MIN_VALUE")
preModel_data = df3.drop(columns=["v22","MIN_VALUE"]).copy()

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [12]:
preModel_data.head()

Unnamed: 0,ID,target,v1,v2,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v23,v25,v26,v27,v28,v29,v32,v33,v34,v35,v36,v37,v38,v39,v40,v41,v42,v43,v44,v45,v46,v48,v49,v50,v51,v53,v54,v55,v57,v58,v59,v60,v61,v62,v63,v64,v65,v67,v68,v69,v70,v72,v73,v76,v77,v78,v80,v81,v82,v83,v84,v85,v86,v87,v88,v89,v90,v92,v93,v94,v95,v96,v97,v98,v99,v100,v101,v102,v103,v104,v105,v106,v108,v109,v111,v114,v115,v116,v117,v118,v119,v120,v121,v122,v123,v124,v126,v127,v128,v129,v130,v131,v3_A,v3_B,v3_C,v24_A,v24_B,v24_C,v24_D,v24_E,v30_A,v30_B,v30_C,v30_D,v30_E,v30_F,v30_G,v31_A,v31_B,v31_C,v47_A,v47_B,v47_C,v47_D,v47_E,v47_F,v47_G,v47_H,v47_I,v47_J,v52_A,v52_B,v52_C,v52_D,v52_E,v52_F,v52_G,v52_H,v52_I,v52_J,v52_K,v52_L,v56_A,v56_AA,v56_AB,v56_AC,v56_AE,v56_AF,v56_AG,v56_AH,v56_AI,v56_AJ,v56_AK,v56_AL,v56_AM,v56_AN,v56_AO,v56_AP,v56_AR,v56_AS,v56_AT,v56_AU,v56_AV,v56_AW,v56_AX,v56_AY,v56_AZ,v56_B,v56_BA,v56_BC,v56_BD,v56_BE,v56_BF,v56_BG,v56_BH,v56_BI,v56_BJ,v56_BK,v56_BL,v56_BM,v56_BN,v56_BO,v56_BP,v56_BQ,v56_BR,v56_BS,v56_BT,v56_BU,v56_BV,v56_BW,v56_BX,v56_BY,v56_BZ,v56_C,v56_CA,v56_CB,v56_CC,v56_CD,v56_CE,v56_CF,v56_CG,v56_CH,v56_CI,v56_CJ,v56_CK,v56_CL,v56_CM,v56_CN,v56_CO,v56_CP,v56_CQ,v56_CS,v56_CT,v56_CV,v56_CW,v56_CX,v56_CY,v56_CZ,v56_D,v56_DA,v56_DB,v56_DC,v56_DD,v56_DE,v56_DF,v56_DG,v56_DH,v56_DI,v56_DJ,v56_DK,v56_DL,v56_DM,v56_DN,v56_DO,v56_DP,v56_DQ,v56_DR,v56_DS,v56_DT,v56_DU,v56_DV,v56_DW,v56_DX,v56_DY,v56_DZ,v56_E,v56_F,v56_G,v56_H,v56_I,v56_L,v56_M,v56_N,v56_O,v56_P,v56_Q,v56_R,v56_T,v56_U,v56_V,v56_W,v56_X,v56_Y,v56_Z,v66_A,v66_B,v66_C,v71_A,v71_B,v71_C,v71_D,v71_F,v71_G,v71_I,v71_K,v71_L,v74_A,v74_B,v74_C,v75_A,v75_B,v75_C,v75_D,v79_A,v79_B,v79_C,v79_D,v79_E,v79_F,v79_G,v79_H,v79_I,v79_J,v79_K,v79_L,v79_M,v79_N,v79_O,v79_P,v79_Q,v79_R,v91_A,v91_B,v91_C,v91_D,v91_E,v91_F,v91_G,v107_A,v107_B,v107_C,v107_D,v107_E,v107_F,v107_G,v110_A,v110_B,v110_C,v112_A,v112_B,v112_C,v112_D,v112_E,v112_F,v112_G,v112_H,v112_I,v112_J,v112_K,v112_L,v112_M,v112_N,v112_O,v112_P,v112_Q,v112_R,v112_S,v112_T,v112_U,v112_V,v113_A,v113_AA,v113_AB,v113_AC,v113_AD,v113_AE,v113_AF,v113_AG,v113_AH,v113_AI,v113_AJ,v113_AK,v113_B,v113_C,v113_D,v113_E,v113_F,v113_G,v113_H,v113_I,v113_J,v113_L,v113_M,v113_N,v113_O,v113_P,v113_Q,v113_R,v113_S,v113_T,v113_U,v113_V,v113_W,v113_X,v113_Y,v113_Z,v125_A,v125_AA,v125_AB,v125_AC,v125_AD,v125_AE,v125_AF,v125_AG,v125_AH,v125_AI,v125_AJ,v125_AK,v125_AL,v125_AM,v125_AN,v125_AO,v125_AP,v125_AQ,v125_AR,v125_AS,v125_AT,v125_AU,v125_AV,v125_AW,v125_AX,v125_AY,v125_AZ,v125_B,v125_BA,v125_BB,v125_BC,v125_BD,v125_BE,v125_BF,v125_BG,v125_BH,v125_BI,v125_BJ,v125_BK,v125_BL,v125_BM,v125_BN,v125_BO,v125_BP,v125_BQ,v125_BR,v125_BS,v125_BT,v125_BU,v125_BV,v125_BW,v125_BX,v125_BY,v125_BZ,v125_C,v125_CA,v125_CB,v125_CC,v125_CD,v125_CE,v125_CF,v125_CG,v125_CH,v125_CI,v125_CJ,v125_CK,v125_CL,v125_D,v125_E,v125_F,v125_G,v125_H,v125_I,v125_J,v125_K,v125_L,v125_M,v125_N,v125_O,v125_P,v125_Q,v125_R,v125_S,v125_T,v125_U,v125_V,v125_W,v125_X,v125_Y,v125_Z,WOE
0,3,1,-0.36267,0.56766,-0.25975,-0.53588,0.36146,1.56529,-0.70322,0.6679,-0.99017,1.66284,-0.8609,-1.0548,-0.31712,-1.31874,2.71315,-0.11275,-1.58649,-0.57054,1.26315,0.65568,-0.36591,-0.69537,-0.37492,1.03733,-2.23895,0.69859,-1.27378,-1.55579,0.42679,0.25149,-1.14744,-0.70572,-0.15584,1.5668,-0.86957,0.62416,-0.29031,-0.4132,-0.18717,0.4556,-0.691,-0.222,0.10339,-0.5179,0.04252,1.72675,-0.69347,-0.41475,-0.20954,-1.39583,0.19471,-0.31167,0.80023,-0.04409,-0.68187,0.01026,1.7415,0.03177,-1.35614,5.39255,-0.27669,-0.46816,-1.36429,-0.16749,-0.01584,-3.44096,0.97899,0.14306,0.95162,-1.91013,-0.43615,-1.04999,-1.01117,-0.27639,1.77406,-0.66719,-0.45817,-0.7767,0.27503,-0.52193,-1.02718,1.16318,-1.50037,0.68631,-0.17136,1.42626,0.86084,-0.09816,-0.99947,-1.60655,-0.63044,0.35498,0.33237,-0.08897,-2.62463,0.78858,-5.38792,-0.6754,-0.62415,-1.63315,-1.0017,-0.41766,-1.89871,0.87311,-0.80279,-0.55547,0.34788,-0.10303,-0.00748,-0.44737,-1.35778,1.3122,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
1,4,1,0.0,0.0,0.0,0.29085,0.0,0.0,0.38159,0.0,-0.40915,0.0,-0.40433,0.0,-0.31712,-0.0,0.0,-0.0,0.0,-0.0,0.0,-0.24933,-0.0,0.60583,0.0,-0.0,0.0,0.0,0.0,0.0,-1.3789,0.0,0.67417,0.0,-0.15584,-0.0,1.21221,-0.0,-0.0,-0.0,0.0,-0.0,0.37327,-0.0,0.0,-0.10708,-0.0,0.0,-0.07089,-0.0,-0.0,0.0,-0.0,0.0,-0.0,1.3922,0.38116,0.0,0.0,-0.0,0.0,0.0,-0.12347,0.61613,0.0,0.0,0.0,0.0,0.0,-0.00565,-0.99622,0.0,0.0,0.0,0.0,-0.14612,-0.0,0.54405,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.36099,-0.0,0.0,0.0,0.0,0.0,0.0,0.24912,0.0,-0.47277,0.02366,0.0,-1.25012,0.0,-0.0,0.8355,0.0,0.0,0.0,0.0,-0.0,0.0,-0.20164,0.0,-0.0,-0.08909,-0.44737,-0.0,-0.0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.02343
2,5,1,-0.84451,-0.96823,0.3082,-2.21338,3.42466,3.26318,-0.70004,2.50757,-0.80173,-1.16504,-0.5374,-1.46383,-1.72499,-0.17583,0.7133,-0.46058,-1.28873,-0.60648,0.20606,-1.66889,-0.36591,-0.70671,0.89102,4.08945,-2.1167,-0.7317,-0.3971,-0.57719,-1.16707,-0.16218,-0.36124,-1.18487,-0.15584,3.46784,0.82436,-1.33756,-0.77195,1.45938,-1.19659,-2.44968,-0.68532,0.10504,-1.74021,-0.77042,1.3024,-1.01483,-0.66584,-1.26739,-0.09271,-0.66644,-0.9525,1.08244,0.9367,-0.04409,-0.69512,-0.43521,0.37551,-1.09034,-3.79321,-2.50017,-2.17135,1.70042,-0.03282,-0.4243,-1.47236,-1.13405,1.39191,1.7247,0.73806,-0.80617,-0.10941,-0.36949,-0.4252,-0.76213,1.83253,-0.65982,-1.15688,-1.72334,-0.25862,0.56382,-1.79533,-0.94432,-1.14087,2.22449,-1.17988,0.65715,-0.55798,0.29724,0.68018,-1.46471,-0.62987,-0.05442,-1.12229,-1.06508,0.00182,-0.90662,1.67652,1.66475,-1.75921,-1.41721,-1.0017,1.52829,-0.48968,1.86172,-0.55178,-0.56948,0.26739,0.55899,-1.11736,2.43755,-1.09794,-0.66084,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
3,6,1,-1.0246,0.37768,0.0937,1.86926,-0.75165,-1.1212,-0.62785,-0.04577,3.34392,1.51685,2.99234,0.11878,1.38543,-0.24683,0.44177,-0.15423,0.82761,0.0725,0.69495,0.45576,-0.36591,-0.69116,-1.37146,-0.70185,1.9059,0.69202,-0.08153,-0.66915,1.13493,0.77124,-1.16076,-0.75691,-0.15584,-0.53632,0.33372,0.99213,0.0145,-1.5323,1.20655,0.3409,-0.69336,-0.20799,0.84872,1.56258,-1.29084,1.51642,-0.63498,-0.6086,-0.22054,-1.16183,0.76457,-1.20128,0.84369,-0.04409,-0.68774,-0.02674,0.82531,0.49281,0.58379,-0.01686,0.75513,0.61613,-0.26802,-0.20861,0.96062,0.08221,-0.3251,-1.47675,2.54913,-0.61763,-0.32345,-1.16326,0.06469,0.24936,-0.65539,-0.69375,0.56192,1.00643,0.01925,-0.66797,0.84075,1.09029,-0.60594,-0.41164,0.24208,1.19162,0.92757,-0.28597,-0.46002,-0.10396,-0.56781,0.34376,0.11288,-0.77868,-0.64596,0.07777,0.01868,-1.54977,-1.22298,-0.78741,-0.82295,-0.2253,-0.76681,0.14519,-0.89386,-0.57651,-0.68118,-0.2335,-0.04854,0.99509,-0.26184,-0.82753,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.30707
4,8,1,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.59759,0.0,-0.60728,0.0,-0.76402,-0.0,0.0,-0.0,0.0,-0.0,0.0,-0.57525,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,-0.15961,0.0,-0.0,0.0,-0.15584,-0.0,-0.10323,-0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,-0.11964,-0.0,0.0,0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.04409,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.46816,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.20003,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,-0.44737,-0.0,-0.0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1.23863


In [13]:
preModel_data.describe()

Unnamed: 0,ID,target,v1,v2,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v23,v25,v26,v27,v28,v29,v32,v33,v34,v35,v36,v37,v38,v39,v40,v41,v42,v43,v44,v45,v46,v48,v49,v50,v51,v53,v54,v55,v57,v58,v59,v60,v61,v62,v63,v64,v65,v67,v68,v69,v70,v72,v73,v76,v77,v78,v80,v81,v82,v83,v84,v85,v86,v87,v88,v89,v90,v92,v93,v94,v95,v96,v97,v98,v99,v100,v101,v102,v103,v104,v105,v106,v108,v109,v111,v114,v115,v116,v117,v118,v119,v120,v121,v122,v123,v124,v126,v127,v128,v129,v130,v131,v3_A,v3_B,v3_C,v24_A,v24_B,v24_C,v24_D,v24_E,v30_A,v30_B,v30_C,v30_D,v30_E,v30_F,v30_G,v31_A,v31_B,v31_C,v47_A,v47_B,v47_C,v47_D,v47_E,v47_F,v47_G,v47_H,v47_I,v47_J,v52_A,v52_B,v52_C,v52_D,v52_E,v52_F,v52_G,v52_H,v52_I,v52_J,v52_K,v52_L,v56_A,v56_AA,v56_AB,v56_AC,v56_AE,v56_AF,v56_AG,v56_AH,v56_AI,v56_AJ,v56_AK,v56_AL,v56_AM,v56_AN,v56_AO,v56_AP,v56_AR,v56_AS,v56_AT,v56_AU,v56_AV,v56_AW,v56_AX,v56_AY,v56_AZ,v56_B,v56_BA,v56_BC,v56_BD,v56_BE,v56_BF,v56_BG,v56_BH,v56_BI,v56_BJ,v56_BK,v56_BL,v56_BM,v56_BN,v56_BO,v56_BP,v56_BQ,v56_BR,v56_BS,v56_BT,v56_BU,v56_BV,v56_BW,v56_BX,v56_BY,v56_BZ,v56_C,v56_CA,v56_CB,v56_CC,v56_CD,v56_CE,v56_CF,v56_CG,v56_CH,v56_CI,v56_CJ,v56_CK,v56_CL,v56_CM,v56_CN,v56_CO,v56_CP,v56_CQ,v56_CS,v56_CT,v56_CV,v56_CW,v56_CX,v56_CY,v56_CZ,v56_D,v56_DA,v56_DB,v56_DC,v56_DD,v56_DE,v56_DF,v56_DG,v56_DH,v56_DI,v56_DJ,v56_DK,v56_DL,v56_DM,v56_DN,v56_DO,v56_DP,v56_DQ,v56_DR,v56_DS,v56_DT,v56_DU,v56_DV,v56_DW,v56_DX,v56_DY,v56_DZ,v56_E,v56_F,v56_G,v56_H,v56_I,v56_L,v56_M,v56_N,v56_O,v56_P,v56_Q,v56_R,v56_T,v56_U,v56_V,v56_W,v56_X,v56_Y,v56_Z,v66_A,v66_B,v66_C,v71_A,v71_B,v71_C,v71_D,v71_F,v71_G,v71_I,v71_K,v71_L,v74_A,v74_B,v74_C,v75_A,v75_B,v75_C,v75_D,v79_A,v79_B,v79_C,v79_D,v79_E,v79_F,v79_G,v79_H,v79_I,v79_J,v79_K,v79_L,v79_M,v79_N,v79_O,v79_P,v79_Q,v79_R,v91_A,v91_B,v91_C,v91_D,v91_E,v91_F,v91_G,v107_A,v107_B,v107_C,v107_D,v107_E,v107_F,v107_G,v110_A,v110_B,v110_C,v112_A,v112_B,v112_C,v112_D,v112_E,v112_F,v112_G,v112_H,v112_I,v112_J,v112_K,v112_L,v112_M,v112_N,v112_O,v112_P,v112_Q,v112_R,v112_S,v112_T,v112_U,v112_V,v113_A,v113_AA,v113_AB,v113_AC,v113_AD,v113_AE,v113_AF,v113_AG,v113_AH,v113_AI,v113_AJ,v113_AK,v113_B,v113_C,v113_D,v113_E,v113_F,v113_G,v113_H,v113_I,v113_J,v113_L,v113_M,v113_N,v113_O,v113_P,v113_Q,v113_R,v113_S,v113_T,v113_U,v113_V,v113_W,v113_X,v113_Y,v113_Z,v125_A,v125_AA,v125_AB,v125_AC,v125_AD,v125_AE,v125_AF,v125_AG,v125_AH,v125_AI,v125_AJ,v125_AK,v125_AL,v125_AM,v125_AN,v125_AO,v125_AP,v125_AQ,v125_AR,v125_AS,v125_AT,v125_AU,v125_AV,v125_AW,v125_AX,v125_AY,v125_AZ,v125_B,v125_BA,v125_BB,v125_BC,v125_BD,v125_BE,v125_BF,v125_BG,v125_BH,v125_BI,v125_BJ,v125_BK,v125_BL,v125_BM,v125_BN,v125_BO,v125_BP,v125_BQ,v125_BR,v125_BS,v125_BT,v125_BU,v125_BV,v125_BW,v125_BX,v125_BY,v125_BZ,v125_C,v125_CA,v125_CB,v125_CC,v125_CD,v125_CE,v125_CF,v125_CG,v125_CH,v125_CI,v125_CJ,v125_CK,v125_CL,v125_D,v125_E,v125_F,v125_G,v125_H,v125_I,v125_J,v125_K,v125_L,v125_M,v125_N,v125_O,v125_P,v125_Q,v125_R,v125_S,v125_T,v125_U,v125_V,v125_W,v125_X,v125_Y,v125_Z,WOE
count,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0,114321.0
mean,114228.92823,0.7612,0.0,0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.00199,0.00046,0.99755,0.03314,0.07129,0.18257,0.23034,0.48265,0.02023,0.00179,0.80727,0.0457,0.02601,0.02265,0.07635,0.80304,0.16574,0.03123,0.00033,0.00044,0.48482,0.02762,0.04637,0.03781,0.03452,1e-05,0.34177,0.02633,0.07807,0.08209,0.08468,0.08404,0.08119,0.08578,0.08239,0.0728,0.08975,0.09715,0.07828,0.08378,0.00163,0.00052,2e-05,0.0001,2e-05,0.0206,0.01893,0.00022,0.00283,2e-05,0.0006,0.00845,2e-05,0.00156,0.00185,1e-05,0.00227,0.07726,3e-05,0.00015,4e-05,0.05571,1e-05,0.0003,0.00347,3e-05,0.00345,1e-05,0.00036,3e-05,0.00047,0.00054,0.00103,0.00085,0.02051,0.00806,0.03123,0.00767,3e-05,2e-05,5e-05,0.00539,5e-05,0.00055,2e-05,0.0005,0.01661,0.15949,0.01213,0.00053,0.06275,0.00092,0.00027,0.00011,0.00134,2e-05,1e-05,0.00123,1e-05,0.00054,0.00194,1e-05,1e-05,8e-05,0.00344,0.03276,0.00058,0.00523,5e-05,0.00611,8e-05,4e-05,8e-05,6e-05,0.0335,1e-05,1e-05,0.00607,1e-05,1e-05,2e-05,0.00016,0.00979,0.00023,0.01135,0.08971,0.01452,0.00075,0.00102,0.00089,0.00071,0.04626,0.04065,8e-05,0.00815,0.01323,3e-05,0.00013,0.00013,5e-05,0.01833,0.01724,0.0007,3e-05,0.0001,0.00674,2e-05,1e-05,2e-05,2e-05,0.01433,2e-05,0.04371,5e-05,0.0052,1e-05,0.01465,0.01292,6e-05,1e-05,0.00275,0.00174,0.6154,0.15976,0.22484,1e-05,0.26465,0.07826,1e-05,0.65687,4e-05,0.00014,1e-05,1e-05,0.00039,0.99334,0.00626,0.00016,0.34282,0.00021,0.65681,0.00365,0.22569,0.30232,0.04638,0.22093,0.00499,5e-05,0.01753,0.0399,0.00816,0.03768,1e-05,0.03452,0.00043,0.02914,0.01939,0.0088,0.00044,0.23689,0.19841,0.20256,0.00201,0.02804,0.11737,0.2147,0.11737,0.19841,0.2147,0.20256,0.23689,0.02804,0.00201,0.48712,0.48483,0.02805,0.08349,0.02351,0.01798,0.06409,0.04153,0.1929,0.01489,0.04943,0.08943,0.0347,0.02845,0.03918,0.01044,0.07948,0.03202,0.04089,0.02052,0.03648,0.00729,0.03481,0.04201,0.01647,0.00292,1e-05,0.01114,0.0521,0.00232,0.00595,0.03121,0.01498,0.00697,0.00274,0.00763,1e-05,0.01188,0.00618,0.00124,0.00283,0.00204,0.62592,0.00036,0.02279,0.00282,0.00417,0.0645,0.01037,0.00087,0.01728,0.0052,0.00158,0.00537,0.01407,0.00535,0.01463,0.01309,0.0143,0.01121,0.00396,0.01337,0.00474,0.00165,0.01701,0.00425,0.00729,0.00976,0.00814,0.003,0.00792,0.00072,0.04668,0.00902,0.00388,0.01346,0.00563,0.02983,0.0041,0.0195,0.00374,0.00974,0.00971,0.00471,0.0067,0.00011,0.00911,0.02113,0.02077,0.00305,0.00059,0.00345,0.02145,0.00756,0.00447,0.00311,0.01002,0.0063,0.03906,0.01292,0.01062,0.05105,0.00796,0.00848,0.00441,0.00601,0.00562,0.0021,0.00356,0.01198,0.00705,0.02168,0.01135,0.02896,3e-05,0.0045,0.01332,0.00589,0.00946,0.01801,0.01018,0.00811,0.03347,0.00613,0.00347,0.01265,0.00545,0.00403,0.00848,0.02205,0.00193,0.02269,0.0281,0.00221,0.00793,0.0248,0.02189,0.00556,0.00904,0.00217,0.0128,0.00932,0.01065,0.00708,0.00538,0.00947,0.02829,0.00777,0.00758,0.0077,0.01395,-0.06075
std,65934.48736,0.42635,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.04452,0.02153,0.04943,0.17901,0.25731,0.38632,0.42105,0.4997,0.1408,0.04231,0.39444,0.20884,0.15915,0.14878,0.26555,0.39771,0.37184,0.17393,0.01823,0.02091,0.49977,0.16387,0.21028,0.19073,0.18255,0.00296,0.4743,0.16011,0.26828,0.27451,0.27841,0.27744,0.27313,0.28003,0.27496,0.25982,0.28582,0.29616,0.26861,0.27706,0.0403,0.0229,0.00418,0.00981,0.00418,0.14204,0.13628,0.01479,0.05308,0.00418,0.02456,0.09153,0.00418,0.03943,0.04292,0.00296,0.04764,0.267,0.00592,0.01219,0.00661,0.22936,0.00296,0.01724,0.05883,0.00512,0.05861,0.00296,0.01893,0.00512,0.02173,0.02328,0.03211,0.02912,0.14175,0.08939,0.17393,0.08725,0.00512,0.00418,0.00724,0.07321,0.00724,0.02347,0.00418,0.02232,0.12781,0.36613,0.10948,0.02309,0.24252,0.03029,0.01646,0.01066,0.03656,0.00418,0.00296,0.0351,0.00296,0.02328,0.04402,0.00296,0.00296,0.00887,0.05853,0.17801,0.02402,0.07214,0.00724,0.0779,0.00887,0.00661,0.00887,0.00782,0.17994,0.00296,0.00296,0.07768,0.00296,0.00296,0.00418,0.01255,0.09845,0.01508,0.10595,0.28577,0.11962,0.02742,0.03197,0.02986,0.02661,0.21006,0.19748,0.00887,0.08992,0.11428,0.00512,0.01145,0.01145,0.00724,0.13413,0.13017,0.02644,0.00592,0.00981,0.08179,0.00418,0.00296,0.00418,0.00418,0.11884,0.00418,0.20445,0.00724,0.07196,0.00296,0.12015,0.11293,0.00782,0.00296,0.05234,0.04169,0.4865,0.36639,0.41748,0.00296,0.44115,0.26858,0.00296,0.47476,0.00661,0.01183,0.00296,0.00296,0.01984,0.08132,0.07889,0.01255,0.47466,0.01449,0.47478,0.06029,0.41804,0.45926,0.2103,0.41488,0.0705,0.00724,0.13123,0.19572,0.08997,0.19043,0.00296,0.18255,0.0207,0.16819,0.1379,0.09339,0.02091,0.42518,0.39881,0.40191,0.04481,0.1651,0.32186,0.41062,0.32186,0.39881,0.41062,0.40191,0.42518,0.1651,0.04481,0.49984,0.49977,0.16512,0.27663,0.15153,0.13286,0.24492,0.19952,0.39458,0.1211,0.21677,0.28537,0.18302,0.16624,0.19402,0.10162,0.27048,0.17606,0.19804,0.14178,0.18747,0.08505,0.18331,0.20062,0.12728,0.05397,0.00296,0.10494,0.22223,0.04809,0.07689,0.17389,0.12145,0.0832,0.05225,0.087,0.00296,0.10834,0.0784,0.03522,0.05316,0.0451,0.48389,0.01893,0.14922,0.053,0.06446,0.24565,0.10133,0.02956,0.1303,0.0719,0.03976,0.07309,0.11776,0.07297,0.12008,0.11368,0.11873,0.1053,0.06282,0.11484,0.06869,0.04063,0.12932,0.06506,0.08505,0.09832,0.08987,0.05469,0.08862,0.02677,0.21096,0.09454,0.0622,0.11524,0.07484,0.17011,0.06392,0.13827,0.061,0.09823,0.09806,0.06844,0.08158,0.01066,0.09504,0.14383,0.1426,0.05517,0.02438,0.05861,0.14487,0.08661,0.06671,0.05564,0.09958,0.07911,0.19373,0.11293,0.1025,0.2201,0.08886,0.09172,0.06625,0.07729,0.07473,0.04577,0.05956,0.10877,0.08367,0.14562,0.10591,0.1677,0.00512,0.0669,0.11465,0.0765,0.09683,0.13299,0.10039,0.08968,0.17985,0.07807,0.05883,0.11175,0.07362,0.06337,0.09168,0.14685,0.04393,0.14892,0.16525,0.04699,0.08872,0.15551,0.14631,0.07438,0.09463,0.04653,0.1124,0.09611,0.10263,0.08382,0.07315,0.09687,0.1658,0.08779,0.08676,0.0874,0.11729,0.53476
min,3.0,0.0,-2.00512,-3.35475,-4.80503,-5.66423,-5.40688,-5.61068,-0.70935,-6.23087,-1.35135,-26.03288,-7.44615,-4.30087,-8.37603,-3.78039,-3.66138,-2.6686,-1.81705,-1.72754,-18.74145,-6.47426,-0.36591,-0.73925,-4.53167,-4.37794,-2.53235,-8.50018,-3.83268,-2.92234,-3.16484,-8.07173,-7.49031,-1.82372,-0.15584,-0.69855,-3.30403,-9.5206,-17.26109,-4.55465,-6.80717,-5.89602,-0.71114,-7.59918,-11.82434,-1.28803,-3.84302,-26.16992,-0.70717,-2.4886,-8.00749,-1.49895,-6.80288,-4.24564,-9.15197,-1.48037,-0.7265,-3.34335,-10.76808,-11.0077,-9.33977,-6.62324,-6.75056,-1.55245,-4.05679,-2.31355,-7.74593,-9.63319,-2.73728,-4.32304,-2.22666,-2.7246,-2.27464,-2.65786,-3.48773,-4.09394,-2.44333,-0.70157,-7.19516,-3.22824,-4.44412,-5.99993,-3.35651,-7.67393,-5.27521,-4.3498,-3.60909,-2.33727,-3.88152,-2.13445,-5.73915,-3.97258,-0.63467,-5.31301,-3.10976,-1.48592,-3.01242,-5.19523,-7.38951,-4.55148,-3.02764,-5.56318,-1.0017,-2.32842,-2.68761,-5.05855,-1.81679,-0.57794,-4.42606,-2.65271,-2.49328,-0.44737,-2.0279,-2.04198,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-3.10517
25%,57280.0,1.0,-0.34987,-0.39938,-0.08856,-0.22565,-0.21179,-0.24245,-0.58359,-0.1506,-0.59759,-0.08289,-0.60454,-0.37853,-0.58055,-0.31833,-0.16163,-0.31506,-0.26263,-0.23227,0.0,-0.57134,-0.36591,-0.63511,-0.29107,-0.28242,-0.1745,-0.09534,-0.30918,-0.44734,-0.66715,-0.22483,-0.1547,-0.3708,-0.15584,-0.52622,-0.64892,-0.15235,-0.1493,-0.30335,-0.15929,-0.16503,-0.62598,-0.23098,-0.15125,-0.72393,-0.19268,-0.07259,-0.59596,-0.45152,-0.19868,-0.7085,-0.23847,-0.28404,-0.0,-0.04409,-0.62965,-0.25865,0.0,-0.14139,0.0,-0.12565,-0.10401,-0.46816,-0.32875,-0.3319,-0.12417,-0.13201,-0.32019,-0.04875,-0.93802,-0.44929,-0.41012,-0.35627,-0.31949,-0.27639,-0.38179,-0.60676,-0.12872,-0.35721,-0.27542,-0.31581,-0.35868,-0.11372,-0.29967,-0.20858,-0.22511,0.0,-0.29726,-0.41447,-0.33434,-0.31688,-0.53283,-0.04025,-0.43428,-0.51887,-0.39516,-0.60385,-0.19713,-0.30231,-0.25517,-0.3322,-0.63205,-0.43024,-0.44668,-0.22454,-0.50369,-0.52471,-0.26907,-0.39063,-0.42871,-0.44737,-0.50155,-0.32398,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.26317
50%,114189.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.40915,0.0,-0.29007,0.0,-0.08758,-0.0,0.0,-0.0,0.0,-0.0,0.0,0.009,-0.25524,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.06333,0.0,-0.0,0.0,-0.15584,-0.0,-0.03996,-0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,-0.2503,-0.0,0.0,0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.04409,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.46816,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.17775,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,-0.44737,-0.0,-0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,171206.0,1.0,0.0,0.03914,0.2262,0.1182,0.10718,0.10057,-0.0,0.18659,0.15617,0.24686,0.14943,0.0,0.43042,-0.0,0.16334,-0.0,0.0,-0.0,0.43924,0.59546,-0.0,0.0,0.05527,0.05689,0.11784,0.28275,0.0,0.0,0.63987,0.12757,0.53162,0.0,-0.15584,-0.0,0.725,0.21503,0.16651,0.04293,0.1431,0.17602,0.0,0.0828,0.17559,0.42937,0.11731,0.26694,0.0,-0.0,0.14891,0.0,0.16165,0.05132,0.45805,-0.04409,-0.0,0.02144,0.4419,0.21538,0.50783,0.19942,0.36859,0.61613,0.00558,0.0,0.25954,0.16243,0.04198,0.31788,-0.0,0.0,0.0,0.0,0.0,0.11144,-0.0,0.0,0.17256,-0.0,-0.0,-0.0,0.0,0.25079,0.0611,0.19203,0.14701,0.69695,0.03661,0.0,0.0,0.0,0.0,0.2939,-0.0,0.0,0.0,0.68803,0.11973,0.03765,0.12452,0.0353,0.0,0.0,0.0,0.13165,0.0,-0.0,0.0,-0.0,0.0,-0.44737,-0.0,-0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.15292
max,228713.0,1.0,22.58722,5.63391,18.37912,7.29389,38.97723,39.56531,8.77033,7.56666,11.9493,7.67229,12.80024,18.34485,5.4752,32.55355,11.21253,11.25839,41.39234,153.69603,2.56668,11.47031,6.32908,8.16475,43.77952,27.53761,7.33598,12.21575,37.6589,24.11594,6.71567,11.80353,3.70965,47.36829,20.41059,10.54637,3.00984,16.98975,9.4486,36.5412,5.69821,7.00241,8.36759,4.52263,17.67549,15.83695,6.83476,7.14358,10.68406,29.4256,31.2658,2.39359,6.04735,45.2866,2.67594,8.57362,8.14077,7.19733,2.94397,12.69721,1.41636,7.39518,4.3015,11.45902,29.28712,16.92553,9.18625,4.81535,22.03785,7.54175,4.94644,22.34289,26.01768,16.17717,46.70793,4.24996,22.95266,8.66685,39.72849,46.23938,11.78957,23.61334,42.37382,16.09189,8.56584,6.46215,54.10301,1.52867,7.42429,12.63535,14.60439,26.09049,11.10812,3.69869,25.78304,5.62156,14.89032,2.45919,6.62163,35.17832,4.26449,7.73847,5.32023,16.41525,16.9472,9.77061,8.30295,11.98845,36.93713,13.72436,22.06655,15.41972,19.0328,21.43725,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.39609


In [14]:
preModel_data.isnull().values.any()

False

## PCA

In [15]:
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)

In [16]:
pca.fit(preModel_data.drop(columns=["ID","target"]))
print(pca.explained_variance_ratio_)

[0.22034024 0.12644241 0.08581462 0.04406322 0.0431619  0.03365225
 0.0321402  0.02635147 0.02385634 0.0209489  0.0190249  0.01833979
 0.01788414 0.01622156 0.0148095  0.01404941 0.01267173 0.01193737
 0.00990282 0.00981178 0.00856013 0.00812542 0.00721521 0.00694633
 0.00670631 0.00653924 0.00614631 0.00585753 0.00550046 0.00518615
 0.00510923 0.00447577 0.00397673 0.00387005 0.00372242 0.00352734
 0.00340161 0.00325796 0.00325056 0.0032193  0.00308182 0.00287044
 0.00270511 0.0025849  0.0024307  0.00233644 0.00223537 0.00216156
 0.00207444 0.00198545 0.0018972  0.00179931 0.00174134 0.00157336
 0.00150845 0.0015001  0.00142716 0.00130001 0.00126502 0.00119665
 0.001179   0.00115983 0.00114766 0.00106608]


In [17]:
comps = pca.transform(preModel_data.drop(columns=["ID","target"]))
comps = pd.DataFrame(comps)
print(comps.shape)
comps.head()


(114321, 64)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63
0,-4.67521,-4.42316,-2.56967,-0.42495,0.10768,-1.10066,0.54249,2.01488,1.34618,-3.73007,2.48529,1.14113,-5.92842,2.41245,-0.91145,-2.00244,5.74793,-0.84317,-0.52969,0.69284,-0.45885,-2.01714,-0.83254,-0.29846,0.23222,-0.97979,0.42985,-0.36497,-0.20653,1.37445,0.097,0.89321,0.09423,0.46205,1.22828,0.70716,0.15069,0.08099,0.65915,0.98554,0.79729,-0.37835,0.03045,-0.64992,0.29371,-0.15711,0.14563,0.13255,-0.09164,-0.45285,-0.2178,0.25488,0.12207,0.37124,-0.76954,0.08354,-0.40151,0.08367,0.38448,-0.24121,-0.60589,-0.43156,0.38726,-0.72993
1,0.34148,0.40385,0.4447,-0.04688,-0.38871,-0.85327,0.62829,0.45386,-2.20958,-0.20191,-0.05656,0.05874,-0.1276,0.03665,0.80222,-0.08168,-0.08858,-0.48291,-0.51728,0.00332,-0.03774,1.06853,0.1855,-0.37927,0.04255,-0.66785,0.30051,0.21564,-0.62384,-0.73522,-0.38689,-0.12376,0.40287,-0.07726,-0.30608,-0.16177,-0.59317,-0.82828,0.92003,-0.04038,0.31212,-0.0724,-0.10838,0.35068,0.28051,0.14755,0.1681,-0.15932,0.27074,-0.56812,1.03276,0.65375,-0.00941,-0.06428,-0.17903,-0.28921,0.07178,0.05217,0.15021,-0.8005,0.06151,0.15666,0.17849,0.16239
2,-0.30705,-9.34969,2.63842,3.68972,-3.75452,-1.09231,-1.48383,1.85394,-2.08571,0.93333,1.2159,-0.9864,3.16252,3.82797,1.31405,1.1717,1.76311,-1.69686,-1.80154,-2.45263,-0.0569,-1.06809,-0.34601,-0.18512,0.88184,1.16053,0.89666,0.12922,1.81502,2.78841,1.28963,0.42413,0.59673,0.8662,-0.7714,-0.35673,1.01874,-0.60203,-0.28503,1.56328,0.3333,1.24724,-0.14812,0.19675,0.50096,0.2503,-0.26392,1.1619,0.23533,0.72426,0.35403,-0.03742,0.05735,-0.8276,-0.66126,-0.62819,0.50094,-0.83166,-0.05094,0.88839,-0.41541,-0.40948,0.63727,-0.20788
3,-3.41562,0.44885,-5.50561,-0.21501,-0.12642,5.2156,0.45818,-1.692,1.05529,-1.60035,-0.35209,-1.76878,-0.85401,0.11006,-0.03197,-0.6852,-1.09911,2.39201,1.29766,-1.30516,-0.05337,1.33156,-0.52766,0.32694,-0.37294,-0.77266,-0.31772,-0.22271,-1.33458,0.09043,0.14536,0.01762,0.22266,-0.5486,-0.26405,0.0283,-0.41658,-0.89165,0.14408,-0.86876,-0.40331,0.301,-0.35523,0.31364,0.0544,0.25933,0.18268,0.16609,-0.30955,0.36775,-0.24446,-0.87729,0.20601,-0.03037,0.43782,0.0259,0.05485,0.16113,-0.21692,-0.64373,0.26845,0.21199,0.2222,-0.34701
4,0.0737,-0.04897,-0.04236,-0.01466,0.03888,-1.51891,0.58119,-0.15342,0.01297,0.03177,0.01297,0.009,-0.08311,-0.15559,-0.48762,0.15413,-0.06744,-0.03269,0.07488,0.2249,0.57753,0.37423,-0.03749,0.59712,0.18606,-0.39753,0.08475,0.01055,0.59284,-0.10183,0.06019,0.02497,0.03461,-0.02617,-0.63273,-0.13955,0.95538,0.13455,0.03025,0.19039,-0.21236,0.21489,-0.02369,-0.79356,-0.0525,0.23389,1.21624,0.16896,-0.38835,0.26443,-0.48658,-0.41266,0.19676,0.01577,-0.26799,-0.16569,-0.00692,0.00647,0.18542,0.00215,0.08393,-0.0038,0.08698,0.07511


In [18]:
import seaborn

## Model prep

In [19]:
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, accuracy_score #https://scikit-learn.org/stable/modules/model_evaluation.html
from sklearn.svm import SVC #https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
from sklearn.svm import LinearSVC #https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import log_loss, accuracy_score, classification_report, confusion_matrix 
import pickle
import time

In [20]:
validation = preModel_data.sample(frac=0.5, replace=False, random_state=1)
forTrain = preModel_data.drop(validation.index)
print(validation.shape)
print(forTrain.shape)

(57160, 479)
(57161, 479)


In [21]:
n1k = forTrain.sample(n=1000, replace=False, random_state=1)
n2k = forTrain.sample(n=2000, replace=False, random_state=1)
n5k = forTrain.sample(n=5000, replace=False, random_state=1)
n10k = forTrain.sample(n=10000, replace=False, random_state=1)
n20k = forTrain.sample(n=20000, replace=False, random_state=1)

In [22]:
X_1k = n1k.copy().drop(columns=["ID","target"])
X_2k = n2k.copy().drop(columns=["ID","target"])
X_5k = n5k.copy().drop(columns=["ID","target"])
X_10k = n10k.copy().drop(columns=["ID","target"])
X_20k = n20k.copy().drop(columns=["ID","target"])
X_57k = forTrain.copy().drop(columns=["ID","target"])


y_1k = n1k.loc[:,"target"].copy()
y_2k = n2k.loc[:,"target"].copy()
y_5k = n5k.loc[:,"target"].copy()
y_10k = n10k.loc[:,"target"].copy()
y_20k = n20k.loc[:,"target"].copy()
y_57k = forTrain.loc[:,"target"].copy()


In [23]:
y_10k.value_counts()

1    7580
0    2420
Name: target, dtype: int64

## SVC
This never finishes. Need to reduce features for it to do anything.

In [24]:
param_grid = {'C':[1,10,100,1000],
              'gamma':[1.0,0.1,0.001,0.0001], 
              'kernel':['linear','poly','rbf']}

svc = SVC(cache_size = 1000,class_weight = 'balanced', random_state=42)

In [25]:
start = time.time()

n_iter_search = 12
svc_random_search = RandomizedSearchCV(
    svc, 
    param_distributions=param_grid, 
    cv = 2, 
    random_state=42,
    n_iter=n_iter_search, 
    refit=True, 
    n_jobs=-1)

svc_random_search.fit(X_1k, y_1k)

end = time.time()
time_1k = end - start
print(time_1k)

filename = 'svc_random_search_1k.p'
pickle.dump(svc_random_search, open(filename, 'wb'))

preds = svc_random_search.predict(validation.drop(columns=["ID","target"]))
acc1k = accuracy_score(y_pred=preds,y_true=validation.target)
print("accuracy: " + str(round(acc1k,4)))
pd.crosstab(preds,validation.target)



4.147249937057495
accuracy: 0.7615


target,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2,0
1,13635,43523


In [26]:
start = time.time()

n_iter_search = 12
svc_random_search = RandomizedSearchCV(
    svc, 
    param_distributions=param_grid, 
    cv = 2, 
    random_state=42,
    n_iter=n_iter_search, 
    refit=True, 
    n_jobs=-1)

svc_random_search.fit(X_2k, y_2k)

end = time.time()
time_2k = end - start
print(time_2k)

filename = 'svc_random_search_2k.p'
pickle.dump(svc_random_search, open(filename, 'wb'))

preds = svc_random_search.predict(validation.drop(columns=["ID","target"]))
acc2k = accuracy_score(y_pred=preds,y_true=validation.target)
print("accuracy: " + str(round(acc2k,4)))
pd.crosstab(preds,validation.target)

118.19777274131775
accuracy: 0.7615


target,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,4,2
1,13633,43521


In [27]:
start = time.time()

n_iter_search = 12
svc_random_search = RandomizedSearchCV(
    svc, 
    param_distributions=param_grid, 
    cv = 2, 
    random_state=42,
    n_iter=n_iter_search, 
    refit=True, 
    n_jobs=-1)

svc_random_search.fit(X_5k, y_5k)

end = time.time()
time_5k = end - start
print(time_5k)

filename = 'svc_random_search_5k.p'
pickle.dump(svc_random_search, open(filename, 'wb'))

preds = svc_random_search.predict(validation.drop(columns=["ID","target"]))
acc5k = accuracy_score(y_pred=preds,y_true=validation.target)
print("accuracy: " + str(round(acc5k,4)))
pd.crosstab(preds,validation.target)

556.5973017215729
accuracy: 0.7616


target,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,15,4
1,13622,43519


In [28]:
start = time.time()

n_iter_search = 12
svc_random_search = RandomizedSearchCV(
    svc, 
    param_distributions=param_grid, 
    cv = 2, 
    random_state=42,
    n_iter=n_iter_search, 
    refit=True, 
    n_jobs=-1)

svc_random_search.fit(X_10k, y_10k)

end = time.time()
time_10k = end - start
print(time_10k)

filename = 'svc_random_search_10k.p'
pickle.dump(svc_random_search, open(filename, 'wb'))

preds = svc_random_search.predict(validation.drop(columns=["ID","target"]))
acc10k = accuracy_score(y_pred=preds,y_true=validation.target)
print("accuracy: " + str(round(acc10k,4)))
pd.crosstab(preds,validation.target)

1392.1399865150452
accuracy: 0.7617


target,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,24,7
1,13613,43516


In [29]:
times = [time_1k,time_2k,time_5k,time_10k]
accuracies = [acc1k,acc2k,acc5k,acc10k]

In [45]:
svc_random_search_summary = pd.DataFrame(data={'Training Time':times, 'Accuracy':accuracies})
filename = 'svc_random_search_summary.p'
pickle.dump(svc_random_search_summary, open(filename, 'wb'))
svc_random_search_summary

Unnamed: 0,Training Time,Accuracy
0,4.14725,0.76146
1,118.19777,0.76146
2,556.5973,0.76162
3,1392.13999,0.76172


In [35]:
best = svc_random_search.best_estimator_
best.fit(X_10k, y_10k)
preds = best.predict(validation.drop(columns=["ID","target"]))
acc = accuracy_score(y_pred=preds,y_true=validation.target)
print("accuracy: " + str(round(acc,4)))
pd.crosstab(preds,validation.target)

accuracy: 0.7617


target,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,24,7
1,13613,43516


In [36]:
svc_random_search.best_params_

{'kernel': 'rbf', 'gamma': 1.0, 'C': 100}

In [49]:
srs = pd.read_pickle('../../../pickles/svc_random_search_1k.p')
srs.best_params_

{'kernel': 'rbf', 'gamma': 1.0, 'C': 100}

In [52]:
1392/557


2.4991023339317775