In [1]:
import pandas as pd
import numpy as np
import scipy
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
import xgboost as xgb
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score, confusion_matrix, precision_score, recall_score
from sklearn.ensemble import IsolationForest, AdaBoostClassifier, BaggingClassifier, BaseEnsemble, ExtraTreesClassifier, GradientBoostingClassifier, RandomForestClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from catboost import CatBoostClassifier
import eli5
import shap
from sklearn import model_selection
from imblearn.under_sampling import TomekLinks
import umap

pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [31]:
train = pd.read_csv('../data/CAX_MortgageModeling_Train.csv')

In [32]:
train.RESULT = train.RESULT.apply(lambda x: 1 if x == 'FUNDED' else 0)
train.RESULT = train.RESULT.apply(lambda x: 1-x)

In [33]:
test = pd.read_csv('../data/CAX_MortgageModeling_Test.csv')

In [34]:
train_len = train.shape[0]
traincols = list(train.columns[2:-1])

In [35]:
data = pd.concat([train[['Unique_ID'] + traincols+['RESULT']],test[['Unique_ID']+traincols+['RESULT']]],axis=0)

In [36]:
data['GDS'] = np.abs(data['GDS'])
data['GDS'] = np.clip(data.GDS,0,data['GDS'].max())

In [37]:
data['TDS'] = np.abs(data['TDS'])
data['TDS'] = np.clip(data.TDS,0,data['TDS'].max())

In [38]:
data['GDS'] = data['GDS']/100
data['LTV'] = data['LTV']/100
data['TDS'] = data['TDS']/100
data['RATE'] = data['RATE']/100

In [39]:
data['APPRAISED PROPERTY VALUE'] = data['MORTGAGE AMOUNT']/data['LTV']
data['CHANGE IN PROPERTY VALUE'] = (data['APPRAISED PROPERTY VALUE']-data['PROPERTY VALUE'])/data['PROPERTY VALUE']
data['ANNUALIZED HOUSING EXPENSE'] = data['GDS'] * data['INCOME']
data['MORTGAGE RATIO'] = data['MORTGAGE AMOUNT']/data['PROPERTY VALUE']
data['OTHER EXPENSE'] = data['INCOME'] * data['TDS'] - data['ANNUALIZED HOUSING EXPENSE']
data['RENT INCOME'] = data.apply(lambda x: np.abs(x['OTHER EXPENSE']) if x.GDS > x.TDS else 0, axis=1)
data['OTHER EXPENSE'] = np.clip(data['OTHER EXPENSE'],0,data['OTHER EXPENSE'].max())

In [40]:
def f(x):
    if x['PAYMENT FREQUENCY'] in ['Bi-Weekly','Bi-Weekly Accelerated','Semi-Monthly'] :
        return 2
    elif x['PAYMENT FREQUENCY'] in ['Weekly','Weekly Accelerated']:
        return 4
    else:
        return x['PAYMENT FREQUENCY IN MONTHS']
data['PAYMENT FREQUENCY IN MONTHS'] = 1
data['PAYMENT FREQUENCY IN MONTHS'] = data.apply(lambda x: f(x), axis=1)

In [41]:
data['MORTGAGE PAYMENT'] = data['PAYMENT FREQUENCY IN MONTHS'] * data['MORTGAGE PAYMENT']

In [42]:
data['ACTUAL MORTGAGE PAYMENT'] = data.apply(lambda x: x['MORTGAGE AMOUNT'] * np.exp(x.RATE*x.AMORTIZATION*1.0/12), axis=1)
data['TOTAL INTEREST'] = data.apply(lambda x: x['ACTUAL MORTGAGE PAYMENT'] - x['MORTGAGE AMOUNT'], axis=1)

In [43]:
data['MORTGAGE AMOUNT PER INCOME'] = data['MORTGAGE AMOUNT']/data['INCOME']
data['MORTGAGE PAYMENT PER INCOME'] = data['MORTGAGE PAYMENT']*12/data['INCOME']
data['PROPERTY VALUE PER INCOME'] = data['PROPERTY VALUE']/data['INCOME']
data['TOTAL INTEREST PER INCOME'] = data['TOTAL INTEREST']/data['INCOME']
data['OTHER EXPENSE PER INCOME'] = data['OTHER EXPENSE']/data['INCOME']
data['ANNUALIZED HOUSING EXPENSE PER INCOME'] = data['ANNUALIZED HOUSING EXPENSE']/data['INCOME']

In [44]:
data['ACTUAL MORTGAGE PAYMENT BY MORTGAGE'] = data['ACTUAL MORTGAGE PAYMENT']/data['MORTGAGE AMOUNT']

In [45]:
data['FSA'] = data['FSA'].apply(lambda x: str(x[:2]))

In [46]:
data.replace('Under 25',25,inplace=True)
data.replace('25-29',29,inplace=True)
data.replace('30-34',34,inplace=True)
data.replace('35-39',39,inplace=True)
data.replace('40-44',44,inplace=True)
data.replace('45-49',49,inplace=True)
data.replace('50-54',54,inplace=True)
data.replace('55-59',59,inplace=True)
data.replace('60-64',64,inplace=True)
data.replace('65-69',69,inplace=True)
data.replace('70 and over',75,inplace=True)

In [47]:
data.shape

(60856, 37)

In [48]:
print (data[['PROPERTY VALUE','INCOME','FSA','CREDIT SCORE']].drop_duplicates().shape)

(60525, 4)


In [49]:
cnt_cols = []
cat_cols = []
for col in data.columns:
    if data[col].nunique() > 50 and data[col].dtype != object:
        cnt_cols.append(col)
    elif col != 'RESULT':
        cat_cols.append(col)

In [50]:
cat_cols

In [51]:
cnt_cols

In [52]:
data['Is unemployed'] = data['AGE RANGE'].apply(lambda x: 1 if x == 25 else 0)
data['Is recently married'] = data['AGE RANGE'].apply(lambda x: 1 if x > 25 and x <= 34 else 0)
data['Is married'] = data['AGE RANGE'].apply(lambda x: 1 if x > 34 else 0)
data['Is established'] = data['AGE RANGE'].apply(lambda x: 1 if x >= 39 else 0)
data['Is about to retire'] = data['AGE RANGE'].apply(lambda x: 1 if x >= 55 else 0)
data['Is retired'] = data['AGE RANGE'].apply(lambda x: 1 if x >= 64 else 0)

In [53]:
data['REPAYMENT AGE'] = data.AMORTIZATION/12 + data['AGE RANGE']

In [54]:
for cat_col in ['MORTGAGE PURPOSE','PAYMENT FREQUENCY','PROPERTY TYPE','AGE RANGE','GENDER','INCOME TYPE','FSA']:
    for cnt_col in cnt_cols:
        temp = data.groupby([cat_col])[cnt_col].agg(['mean','var']).reset_index()
        temp.columns = [cat_col,cnt_col+' ' + cat_col+' MEAN',cnt_col+' ' + cat_col+' VAR']
        data = pd.merge(data,temp,how='left')
        data[cnt_col+' ' + cat_col+' Z SCORE'] = data.apply(lambda x: (x[cnt_col]-x[cnt_col+' ' + cat_col+' MEAN'])/np.sqrt(x[cnt_col+' ' + cat_col+' VAR']), axis=1)
        data = data.drop([cnt_col+' ' + cat_col+' MEAN',cnt_col+' ' + cat_col+' VAR'],axis=1)

In [55]:
data = pd.get_dummies(data,columns=['MORTGAGE PURPOSE','PAYMENT FREQUENCY','PROPERTY TYPE','FSA','GENDER','AMORTIZATION','TERM','INCOME TYPE'])

In [56]:
data = data.drop(['Unique_ID','PAYMENT FREQUENCY IN MONTHS','NAICS CODE'], axis=1)

In [57]:
traincols = list(data.columns)
traincols.remove('RESULT')

In [58]:
binary_cols = []
for col in traincols:
    if data[col].nunique() == 2:
        binary_cols.append(col)

In [59]:
#embedding = umap.UMAP().fit_transform(data[binary_cols])
data['umap_1'] = embedding[:,0]
data['umap_2'] = embedding[:,1]

In [60]:
data.replace(np.nan,0,inplace=True)
data.replace(np.inf,0,inplace=True)

In [61]:
data.to_csv('../data/data_full_final_1705.csv',index=False)

In [77]:
data = pd.read_csv('../data/data_full_final.csv')

In [62]:
data.head(10)

Unnamed: 0,PROPERTY VALUE,MORTGAGE PAYMENT,GDS,LTV,TDS,MORTGAGE AMOUNT,RATE,AGE RANGE,INCOME,CREDIT SCORE,RESULT,APPRAISED PROPERTY VALUE,CHANGE IN PROPERTY VALUE,ANNUALIZED HOUSING EXPENSE,MORTGAGE RATIO,OTHER EXPENSE,RENT INCOME,ACTUAL MORTGAGE PAYMENT,TOTAL INTEREST,MORTGAGE AMOUNT PER INCOME,MORTGAGE PAYMENT PER INCOME,PROPERTY VALUE PER INCOME,TOTAL INTEREST PER INCOME,OTHER EXPENSE PER INCOME,ANNUALIZED HOUSING EXPENSE PER INCOME,ACTUAL MORTGAGE PAYMENT BY MORTGAGE,Is unemployed,Is recently married,Is married,Is established,Is about to retire,Is retired,REPAYMENT AGE,PROPERTY VALUE MORTGAGE PURPOSE Z SCORE,MORTGAGE PAYMENT MORTGAGE PURPOSE Z SCORE,GDS MORTGAGE PURPOSE Z SCORE,LTV MORTGAGE PURPOSE Z SCORE,TDS MORTGAGE PURPOSE Z SCORE,MORTGAGE AMOUNT MORTGAGE PURPOSE Z SCORE,INCOME MORTGAGE PURPOSE Z SCORE,CREDIT SCORE MORTGAGE PURPOSE Z SCORE,APPRAISED PROPERTY VALUE MORTGAGE PURPOSE Z SCORE,CHANGE IN PROPERTY VALUE MORTGAGE PURPOSE Z SCORE,ANNUALIZED HOUSING EXPENSE MORTGAGE PURPOSE Z SCORE,MORTGAGE RATIO MORTGAGE PURPOSE Z SCORE,OTHER EXPENSE MORTGAGE PURPOSE Z SCORE,ACTUAL MORTGAGE PAYMENT MORTGAGE PURPOSE Z SCORE,TOTAL INTEREST MORTGAGE PURPOSE Z SCORE,MORTGAGE AMOUNT PER INCOME MORTGAGE PURPOSE Z SCORE,MORTGAGE PAYMENT PER INCOME MORTGAGE PURPOSE Z SCORE,PROPERTY VALUE PER INCOME MORTGAGE PURPOSE Z SCORE,TOTAL INTEREST PER INCOME MORTGAGE PURPOSE Z SCORE,OTHER EXPENSE PER INCOME MORTGAGE PURPOSE Z SCORE,ANNUALIZED HOUSING EXPENSE PER INCOME MORTGAGE PURPOSE Z SCORE,ACTUAL MORTGAGE PAYMENT BY MORTGAGE MORTGAGE PURPOSE Z SCORE,PROPERTY VALUE PAYMENT FREQUENCY Z SCORE,MORTGAGE PAYMENT PAYMENT FREQUENCY Z SCORE,GDS PAYMENT FREQUENCY Z SCORE,LTV PAYMENT FREQUENCY Z SCORE,TDS PAYMENT FREQUENCY Z SCORE,MORTGAGE AMOUNT PAYMENT FREQUENCY Z SCORE,INCOME PAYMENT FREQUENCY Z SCORE,CREDIT SCORE PAYMENT FREQUENCY Z SCORE,APPRAISED PROPERTY VALUE PAYMENT FREQUENCY Z SCORE,CHANGE IN PROPERTY VALUE PAYMENT FREQUENCY Z SCORE,ANNUALIZED HOUSING EXPENSE PAYMENT FREQUENCY Z SCORE,MORTGAGE RATIO PAYMENT FREQUENCY Z SCORE,OTHER EXPENSE PAYMENT FREQUENCY Z SCORE,ACTUAL MORTGAGE PAYMENT PAYMENT FREQUENCY Z SCORE,TOTAL INTEREST PAYMENT FREQUENCY Z SCORE,MORTGAGE AMOUNT PER INCOME PAYMENT FREQUENCY Z SCORE,MORTGAGE PAYMENT PER INCOME PAYMENT FREQUENCY Z SCORE,PROPERTY VALUE PER INCOME PAYMENT FREQUENCY Z SCORE,TOTAL INTEREST PER INCOME PAYMENT FREQUENCY Z SCORE,OTHER EXPENSE PER INCOME PAYMENT FREQUENCY Z SCORE,ANNUALIZED HOUSING EXPENSE PER INCOME PAYMENT FREQUENCY Z SCORE,ACTUAL MORTGAGE PAYMENT BY MORTGAGE PAYMENT FREQUENCY Z SCORE,PROPERTY VALUE PROPERTY TYPE Z SCORE,MORTGAGE PAYMENT PROPERTY TYPE Z SCORE,GDS PROPERTY TYPE Z SCORE,LTV PROPERTY TYPE Z SCORE,TDS PROPERTY TYPE Z SCORE,MORTGAGE AMOUNT PROPERTY TYPE Z SCORE,INCOME PROPERTY TYPE Z SCORE,CREDIT SCORE PROPERTY TYPE Z SCORE,APPRAISED PROPERTY VALUE PROPERTY TYPE Z SCORE,CHANGE IN PROPERTY VALUE PROPERTY TYPE Z SCORE,ANNUALIZED HOUSING EXPENSE PROPERTY TYPE Z SCORE,MORTGAGE RATIO PROPERTY TYPE Z SCORE,OTHER EXPENSE PROPERTY TYPE Z SCORE,ACTUAL MORTGAGE PAYMENT PROPERTY TYPE Z SCORE,TOTAL INTEREST PROPERTY TYPE Z SCORE,MORTGAGE AMOUNT PER INCOME PROPERTY TYPE Z SCORE,MORTGAGE PAYMENT PER INCOME PROPERTY TYPE Z SCORE,PROPERTY VALUE PER INCOME PROPERTY TYPE Z SCORE,TOTAL INTEREST PER INCOME PROPERTY TYPE Z SCORE,OTHER EXPENSE PER INCOME PROPERTY TYPE Z SCORE,ANNUALIZED HOUSING EXPENSE PER INCOME PROPERTY TYPE Z SCORE,ACTUAL MORTGAGE PAYMENT BY MORTGAGE PROPERTY TYPE Z SCORE,PROPERTY VALUE AGE RANGE Z SCORE,MORTGAGE PAYMENT AGE RANGE Z SCORE,GDS AGE RANGE Z SCORE,LTV AGE RANGE Z SCORE,TDS AGE RANGE Z SCORE,MORTGAGE AMOUNT AGE RANGE Z SCORE,INCOME AGE RANGE Z SCORE,CREDIT SCORE AGE RANGE Z SCORE,APPRAISED PROPERTY VALUE AGE RANGE Z SCORE,CHANGE IN PROPERTY VALUE AGE RANGE Z SCORE,ANNUALIZED HOUSING EXPENSE AGE RANGE Z SCORE,MORTGAGE RATIO AGE RANGE Z SCORE,OTHER EXPENSE AGE RANGE Z SCORE,ACTUAL MORTGAGE PAYMENT AGE RANGE Z SCORE,TOTAL INTEREST AGE RANGE Z SCORE,MORTGAGE AMOUNT PER INCOME AGE RANGE Z SCORE,MORTGAGE PAYMENT PER INCOME AGE RANGE Z SCORE,PROPERTY VALUE PER INCOME AGE RANGE Z SCORE,TOTAL INTEREST PER INCOME AGE RANGE Z SCORE,OTHER EXPENSE PER INCOME AGE RANGE Z SCORE,ANNUALIZED HOUSING EXPENSE PER INCOME AGE RANGE Z SCORE,ACTUAL MORTGAGE PAYMENT BY MORTGAGE AGE RANGE Z SCORE,PROPERTY VALUE GENDER Z SCORE,MORTGAGE PAYMENT GENDER Z SCORE,GDS GENDER Z SCORE,LTV GENDER Z SCORE,TDS GENDER Z SCORE,MORTGAGE AMOUNT GENDER Z SCORE,INCOME GENDER Z SCORE,CREDIT SCORE GENDER Z SCORE,APPRAISED PROPERTY VALUE GENDER Z SCORE,CHANGE IN PROPERTY VALUE GENDER Z SCORE,ANNUALIZED HOUSING EXPENSE GENDER Z SCORE,MORTGAGE RATIO GENDER Z SCORE,OTHER EXPENSE GENDER Z SCORE,ACTUAL MORTGAGE PAYMENT GENDER Z SCORE,TOTAL INTEREST GENDER Z SCORE,MORTGAGE AMOUNT PER INCOME GENDER Z SCORE,MORTGAGE PAYMENT PER INCOME GENDER Z SCORE,PROPERTY VALUE PER INCOME GENDER Z SCORE,TOTAL INTEREST PER INCOME GENDER Z SCORE,OTHER EXPENSE PER INCOME GENDER Z SCORE,ANNUALIZED HOUSING EXPENSE PER INCOME GENDER Z SCORE,ACTUAL MORTGAGE PAYMENT BY MORTGAGE GENDER Z SCORE,PROPERTY VALUE INCOME TYPE Z SCORE,MORTGAGE PAYMENT INCOME TYPE Z SCORE,GDS INCOME TYPE Z SCORE,LTV INCOME TYPE Z SCORE,TDS INCOME TYPE Z SCORE,MORTGAGE AMOUNT INCOME TYPE Z SCORE,INCOME INCOME TYPE Z SCORE,CREDIT SCORE INCOME TYPE Z SCORE,APPRAISED PROPERTY VALUE INCOME TYPE Z SCORE,CHANGE IN PROPERTY VALUE INCOME TYPE Z SCORE,ANNUALIZED HOUSING EXPENSE INCOME TYPE Z SCORE,MORTGAGE RATIO INCOME TYPE Z SCORE,OTHER EXPENSE INCOME TYPE Z SCORE,ACTUAL MORTGAGE PAYMENT INCOME TYPE Z SCORE,TOTAL INTEREST INCOME TYPE Z SCORE,MORTGAGE AMOUNT PER INCOME INCOME TYPE Z SCORE,MORTGAGE PAYMENT PER INCOME INCOME TYPE Z SCORE,PROPERTY VALUE PER INCOME INCOME TYPE Z SCORE,TOTAL INTEREST PER INCOME INCOME TYPE Z SCORE,OTHER EXPENSE PER INCOME INCOME TYPE Z SCORE,ANNUALIZED HOUSING EXPENSE PER INCOME INCOME TYPE Z SCORE,ACTUAL MORTGAGE PAYMENT BY MORTGAGE INCOME TYPE Z SCORE,PROPERTY VALUE FSA Z SCORE,MORTGAGE PAYMENT FSA Z SCORE,GDS FSA Z SCORE,LTV FSA Z SCORE,TDS FSA Z SCORE,MORTGAGE AMOUNT FSA Z SCORE,INCOME FSA Z SCORE,CREDIT SCORE FSA Z SCORE,APPRAISED PROPERTY VALUE FSA Z SCORE,CHANGE IN PROPERTY VALUE FSA Z SCORE,ANNUALIZED HOUSING EXPENSE FSA Z SCORE,MORTGAGE RATIO FSA Z SCORE,OTHER EXPENSE FSA Z SCORE,ACTUAL MORTGAGE PAYMENT FSA Z SCORE,TOTAL INTEREST FSA Z SCORE,MORTGAGE AMOUNT PER INCOME FSA Z SCORE,MORTGAGE PAYMENT PER INCOME FSA Z SCORE,PROPERTY VALUE PER INCOME FSA Z SCORE,TOTAL INTEREST PER INCOME FSA Z SCORE,OTHER EXPENSE PER INCOME FSA Z SCORE,ANNUALIZED HOUSING EXPENSE PER INCOME FSA Z SCORE,ACTUAL MORTGAGE PAYMENT BY MORTGAGE FSA Z SCORE,MORTGAGE PURPOSE_Purchase,MORTGAGE PURPOSE_Refinance,PAYMENT FREQUENCY_Bi-Weekly,PAYMENT FREQUENCY_Bi-Weekly Accelerated,PAYMENT FREQUENCY_Monthly,PAYMENT FREQUENCY_Semi-Monthly,PAYMENT FREQUENCY_Weekly,PAYMENT FREQUENCY_Weekly Accelerated,PROPERTY TYPE_Duplex,PROPERTY TYPE_Fourplex,PROPERTY TYPE_High Rise,PROPERTY TYPE_Semi-Detached,PROPERTY TYPE_Single Detached,PROPERTY TYPE_Townhouse Condominium,PROPERTY TYPE_Townhouse Freehold,PROPERTY TYPE_Triplex,FSA_A0,FSA_A1,FSA_A2,FSA_B0,FSA_B2,FSA_B3,FSA_B4,FSA_B6,FSA_B9,FSA_C0,FSA_C1,FSA_C4,FSA_E1,FSA_E2,FSA_E3,FSA_E4,FSA_E5,FSA_E6,FSA_G0,FSA_G1,FSA_G2,FSA_G3,FSA_G6,FSA_G7,FSA_G8,FSA_G9,FSA_H0,FSA_H1,FSA_H2,FSA_H3,FSA_H4,FSA_H7,FSA_H8,FSA_H9,FSA_J0,FSA_J1,FSA_J2,FSA_J3,FSA_J4,FSA_J5,FSA_J6,FSA_J7,FSA_J8,FSA_J9,FSA_K0,FSA_K1,FSA_K2,FSA_K4,FSA_K6,FSA_K7,FSA_K8,FSA_K9,FSA_L0,FSA_L1,FSA_L2,FSA_L3,FSA_L4,FSA_L5,FSA_L6,FSA_L7,FSA_L8,FSA_L9,FSA_M0,FSA_M1,FSA_M2,FSA_M3,FSA_M4,FSA_M5,FSA_M6,FSA_M7,FSA_M8,FSA_M9,FSA_N0,FSA_N1,FSA_N2,FSA_N3,FSA_N4,FSA_N5,FSA_N6,FSA_N7,FSA_N8,FSA_N9,FSA_P0,FSA_P1,FSA_P2,FSA_P3,FSA_P4,FSA_P5,FSA_P6,FSA_P7,FSA_R0,FSA_R1,FSA_R2,FSA_R3,FSA_R4,FSA_R5,FSA_R6,FSA_R7,FSA_T0,FSA_T1,FSA_T2,FSA_T3,FSA_T4,FSA_T5,FSA_T6,FSA_T7,FSA_T8,FSA_T9,FSA_V0,FSA_V1,FSA_V2,FSA_V3,FSA_V4,FSA_V5,FSA_V6,FSA_V7,FSA_V8,FSA_V9,FSA_W1,FSA_X0,FSA_X1,GENDER_Female,GENDER_Male,GENDER_Unknown,AMORTIZATION_60,AMORTIZATION_120,AMORTIZATION_180,AMORTIZATION_240,AMORTIZATION_300,AMORTIZATION_360,AMORTIZATION_420,AMORTIZATION_480,TERM_6,TERM_12,TERM_24,TERM_36,TERM_48,TERM_60,INCOME TYPE_-1,INCOME TYPE_1,INCOME TYPE_2,INCOME TYPE_3,INCOME TYPE_4,INCOME TYPE_5,INCOME TYPE_6,INCOME TYPE_7,INCOME TYPE_8,INCOME TYPE_9,INCOME TYPE_10,INCOME TYPE_11,INCOME TYPE_12,INCOME TYPE_13,INCOME TYPE_14,INCOME TYPE_15,INCOME TYPE_16,INCOME TYPE_17,INCOME TYPE_18,INCOME TYPE_19,INCOME TYPE_20,umap_1,umap_2
0,900000,5429,0.6198,0.65,0.7163,1040000.0,0.04,25,108000,681,0.0,1600000.0,0.7777778,66938.4,1.155556,10422.0,0.0,3452922.0,2412922.0,9.62963,0.603222,8.333333,22.341867,0.0965,0.6198,3.320117,1,0,0,0,0,0,55.0,0.654673,1.705486,-0.008679,-0.13534,-0.009545,1.989924,-0.060666,0.346419,1.914993,1.689258,-0.006501,1.469948,-0.007511,1.178559,0.940503,0.779529,0.455844,0.110304,0.400134,-0.007576,-0.008679,-0.815202,0.717169,1.757995,-0.006941,-0.492562,-0.008089,2.022816,-0.153133,0.13551,2.147397,2.253533,-0.004577,1.778288,-0.005914,1.292384,1.060317,0.735224,0.516335,0.163659,0.389707,-0.007419,-0.006941,-0.789173,0.481677,1.412705,-0.008078,-0.417141,-0.008477,1.649548,-0.207497,0.172895,1.749843,2.271147,-0.005516,1.776352,-0.005367,1.002527,0.801403,0.627905,0.4339,0.134677,0.325342,-0.006169,-0.008078,-0.774121,1.259671,2.506503,2.36383,-0.684193,2.92548,2.762873,0.008919,0.046712,3.076969,2.334171,1.784315,1.562472,-0.05367,1.938095,1.649064,1.846169,1.477189,0.418459,1.066183,0.135264,2.36383,-0.873903,0.664643,1.682507,-0.002894,-0.574844,-0.003075,1.919692,-0.213168,0.102812,2.07432,2.875535,0.001621,2.170682,-0.087339,1.222198,1.00022,1.01428,0.989372,0.305103,0.67538,-0.01314,-0.002894,-0.795365,0.419956,1.277336,-0.00519,-0.644435,-0.007725,1.495877,-0.372827,0.00211,1.608466,2.428543,-0.004881,1.866892,-0.008238,0.897933,0.71616,0.765734,0.550836,0.221727,0.537711,-0.007721,-0.00519,-0.734037,0.348211,1.192744,-0.012941,-0.551778,-0.013109,1.402129,-0.323439,0.078128,1.595739,2.744712,-0.008145,1.969579,-0.01467,0.848064,0.673459,0.409413,0.288271,0.092858,0.173939,-0.013286,-0.012941,-0.618118,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,-13.749108,-6.118732
1,386000,2179,0.3522,0.7429,0.4065,390000.0,0.045,75,78000,710,0.0,524969.7,0.3600252,27471.6,1.010363,4235.4,0.0,1504396.0,1114396.0,5.0,0.335231,4.948718,14.287128,0.0543,0.3522,3.857426,0,0,1,1,1,1,105.0,-0.426679,-0.151379,-0.008867,0.581221,-0.009747,-0.08406,-0.34103,0.567604,-0.214971,0.639703,-0.006522,0.961262,-0.00756,-0.158826,-0.176374,0.087311,0.024593,-0.117624,0.015283,-0.007713,-0.008867,-0.316767,-0.456776,-0.179305,-0.007199,0.310719,-0.008342,-0.12725,-0.413224,0.351521,-0.192885,0.934253,-0.004606,1.143271,-0.005983,-0.157431,-0.163102,0.105064,0.059561,-0.082895,0.061235,-0.007536,-0.007199,-0.200591,-0.558077,-0.318238,-0.008292,0.353701,-0.008692,-0.269377,-0.452361,0.390565,-0.328272,0.928711,-0.00554,1.145673,-0.005426,-0.282017,-0.280368,0.074117,0.035784,-0.09597,0.036186,-0.006293,-0.008292,-0.21184,-0.41428,-0.018259,-0.023327,0.805786,-0.023342,0.062858,-0.155326,0.347952,-0.165219,0.938013,-0.023331,1.648487,-0.023356,-0.029778,-0.054081,-0.025808,-0.125716,-0.377302,-0.12387,-0.023567,-0.023327,-0.363466,-0.488589,-0.218875,-0.005275,0.269226,-0.005783,-0.175855,-0.467528,0.324675,-0.22756,1.200933,-0.006946,1.387673,-0.155075,-0.195177,-0.197113,0.178444,0.166328,-0.077265,0.149892,-0.034048,-0.005275,-0.178556,-0.382314,0.0582,-0.00616,0.296889,-0.006185,0.141052,-0.296055,0.449483,0.035199,0.885956,-0.006202,1.066481,-0.006242,0.054639,0.027555,0.111871,0.059159,-0.110518,0.044497,-0.006572,-0.00616,-0.240987,-0.452063,-0.024036,-0.019145,0.269168,-0.019158,0.069293,-0.431635,0.397389,-0.029731,1.042914,-0.019164,1.255613,-0.01919,-0.000658,-0.020783,0.540433,0.387867,-0.132657,0.354525,-0.01935,-0.019145,-0.21571,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.218038,-14.646424
2,531000,2152,0.3097,0.8,0.3541,424800.0,0.035,39,87000,709,0.0,531000.0,0.0,26943.9,0.8,3862.8,0.0,1213930.0,789130.2,4.882759,0.296828,6.103448,9.070462,0.0444,0.3097,2.857651,0,0,1,1,0,0,69.0,-0.11346,-0.19859,-0.005826,0.667277,-0.006511,-0.027708,-0.393987,0.173527,-0.140547,-0.135358,-0.008528,0.25673,-0.007274,-0.379151,-0.47923,0.113782,0.034025,0.05091,-0.097399,-0.007113,-0.005826,-1.488835,-0.125605,-0.195399,-0.007239,0.804447,-0.008385,-0.012138,-0.335197,0.344072,-0.179757,-0.202722,-0.004606,0.223226,-0.005987,-0.373555,-0.469555,0.089106,-0.005895,0.001222,-0.151501,-0.007563,-0.007239,-1.295769,-0.264761,-0.332618,-0.008326,0.82749,-0.008728,-0.166641,-0.378901,0.383059,-0.316615,-0.22822,-0.005541,0.231915,-0.00543,-0.473503,-0.551339,0.060092,-0.021266,-0.01728,-0.151087,-0.006322,-0.008326,-1.25808,-0.145872,-0.252245,-0.011041,0.720313,-0.011514,-0.074441,-0.414617,0.332321,-0.199401,-0.198918,-0.012746,0.138651,-0.404553,-0.439664,-0.538237,0.390288,0.124051,0.193004,-0.356669,-0.455901,-0.011041,-1.303923,-0.072159,-0.125419,-0.011611,0.831686,-0.01365,0.07205,-0.252917,0.370282,-0.128853,-0.174907,-0.007918,0.230693,-0.010148,-0.327417,-0.436852,0.030249,-0.034379,-0.036034,-0.132827,-0.012809,-0.011611,-1.268101,0.132782,0.032949,-0.006192,0.809663,-0.006222,0.317607,-0.130041,0.442401,0.055864,-0.175477,-0.006202,0.207538,-0.006245,-0.288643,-0.458751,0.09274,-0.020352,0.005732,-0.163286,-0.006689,-0.006192,-1.308317,-0.129665,-0.172623,0.063791,0.731675,-0.210732,0.007451,-0.239164,0.320817,-0.213558,-0.17716,-0.127301,0.109389,-0.269879,-0.516782,-0.657312,0.105411,-0.065707,-0.064198,-0.502541,-0.322408,0.063791,-1.33828,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8.346516,5.426757
3,1200000,5410,0.1904,0.75,0.3414,960000.0,0.055,49,300000,761,0.0,1280000.0,0.06666667,57120.0,0.8,45300.0,0.0,4998701.0,4038701.0,3.2,0.2164,4.0,13.462335,0.151,0.1904,5.20698,0,0,1,1,0,0,79.0,1.590024,1.877187,-0.006139,0.106102,-0.006527,1.84499,1.395364,0.572678,1.71139,0.190172,-0.001485,0.25673,-0.004598,2.769383,2.998036,-0.110303,-0.120627,-0.126743,0.071207,-0.006834,-0.006139,1.892821,1.402352,1.746669,-0.007354,0.372111,-0.008396,1.758193,1.511454,0.731403,1.450775,0.007814,-0.004584,0.223226,-0.005526,2.442532,2.592062,-0.139942,-0.142978,-0.152005,0.0276,-0.007268,-0.007354,1.277745,1.088537,1.402586,-0.008421,0.412613,-0.008737,1.413372,1.359631,0.773365,1.131259,-0.013988,-0.005522,0.231915,-0.005031,2.021565,2.155801,-0.141196,-0.140746,-0.160621,0.006577,-0.006009,-0.008421,1.200435,1.407028,1.745953,-0.011871,0.366611,-0.010139,1.756338,1.469811,0.774366,1.492558,0.009664,-0.005971,0.237472,-0.00891,2.462969,2.619175,-0.090395,-0.078784,-0.095146,0.034422,-0.009729,-0.011871,1.234625,1.337735,1.671391,-0.006715,0.333735,-0.006353,1.661779,1.414731,0.714848,1.389129,0.024977,-0.00051,0.25321,0.294537,2.346612,2.499304,-0.14653,-0.19862,-0.184443,0.096083,0.013861,-0.006715,1.370677,0.994916,1.267853,-0.010525,0.305504,-0.008736,1.272785,0.963736,0.687339,1.027101,-0.000627,-0.006444,0.179658,-0.007289,1.850711,1.981193,-0.078881,-0.075948,-0.092828,0.113214,-0.007549,-0.010525,1.43264,0.33828,0.484329,-0.025171,0.278453,-0.025164,0.489488,0.512016,0.549316,0.357909,0.001374,-0.025119,0.10753,-0.024241,0.836095,0.919367,-0.337811,-0.297804,-0.317203,0.077573,-0.024843,-0.025171,1.61595,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.714539,17.860504
4,350000,3342,0.2959,0.8,0.3485,592000.0,0.05,54,147000,762,0.0,740000.0,1.114286,43497.3,1.691429,7732.2,0.0,2653160.0,2061160.0,4.027211,0.272816,2.380952,14.021496,0.0526,0.2959,4.481689,0,0,1,1,0,0,84.0,-0.502415,0.513093,-0.008907,1.021649,-0.009785,0.56047,0.303806,0.964212,0.21107,2.534695,-0.006513,3.347392,-0.007533,0.629636,0.637949,-0.058139,-0.075844,-0.290544,0.002591,-0.007719,-0.008907,0.262331,-0.538998,0.513951,-0.007253,0.804447,-0.00839,0.540925,0.184987,0.738851,0.275224,3.31624,-0.004594,4.121988,-0.005944,0.697315,0.728901,-0.027346,-0.046821,-0.269945,0.050402,-0.007541,-0.007253,0.483243,-0.6309,0.301173,-0.008337,0.82749,-0.008732,0.326966,0.110826,0.780871,0.087398,3.352506,-0.00553,4.104038,-0.005393,0.475293,0.508359,-0.042247,-0.056936,-0.270953,0.02665,-0.006298,-0.008337,0.441436,-0.482158,0.480368,0.077575,0.882947,-0.189057,0.511104,0.183085,0.78335,0.231475,3.417507,0.476399,4.221512,-0.273525,0.636193,0.66096,-0.010595,-0.063047,-0.486892,0.115583,-0.392623,0.077575,0.394145,-0.56936,0.461527,-0.005776,0.788025,-0.00629,0.475377,0.117499,0.722498,0.232868,4.224459,-0.003467,5.060586,-0.116789,0.640444,0.675871,0.002816,-0.025357,-0.36735,0.132562,-0.03489,-0.005776,0.538072,-0.634137,0.235769,-0.009215,0.780474,-0.008717,0.24656,-0.101338,0.695905,0.046047,3.578061,-0.008613,4.409796,-0.008311,0.404981,0.442451,0.029784,0.015465,-0.210354,0.139945,-0.007859,-0.009215,0.599792,-0.766368,0.420999,-0.096752,0.67369,-0.321592,0.455243,0.177247,0.861114,0.189893,5.188485,0.333414,5.615124,-0.236024,0.673851,0.720295,-0.148145,-0.188917,-0.704702,0.110644,-0.292947,-0.096752,0.54469,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1.943105,-16.649006
5,420000,2179,0.2616,0.6905,0.2616,290000.0,0.055,44,106000,570,0.0,419985.5,-3.448157e-05,27729.6,0.690476,0.0,0.0,871208.1,581208.1,2.735849,0.246679,3.962264,5.483096,0.0,0.2616,3.004166,0,0,1,1,0,0,64.0,-0.35515,-0.151379,-0.008931,0.177047,-0.009841,-0.403135,-0.079357,-0.500187,-0.422976,-0.264904,-0.006522,-0.159468,-0.007594,-0.593419,-0.634975,-0.251223,-0.117903,-0.184054,-0.405369,-0.007889,-0.008931,-1.108293,-0.379122,-0.179305,-0.007286,-0.142369,-0.008461,-0.458029,-0.170472,-0.691291,-0.42143,-0.202831,-0.004606,-0.255788,-0.00603,-0.628559,-0.665451,-0.203119,-0.091369,-0.154754,-0.297793,-0.007686,-0.007286,-1.135273,-0.489299,-0.318238,-0.008364,-0.08109,-0.008792,-0.564596,-0.223821,-0.660257,-0.531214,-0.228331,-0.00554,-0.243826,-0.005467,-0.699439,-0.724554,-0.196717,-0.095764,-0.163193,-0.279869,-0.006452,-0.008364,-1.104756,-0.407176,-0.206188,-0.050231,-0.273394,-0.818699,-0.491628,-0.230518,-0.769424,-0.443556,-0.20408,-0.151084,-0.31312,-0.484977,-0.636075,-0.663958,-0.475672,-0.161437,-0.350993,-0.735904,-0.770948,-0.050231,-1.125151,-0.335507,-0.107878,-0.011638,-0.066306,-0.013694,-0.412025,-0.062611,-0.667287,-0.384993,-0.175003,-0.007918,-0.187735,-0.010173,-0.608792,-0.653701,-0.183155,-0.09026,-0.142878,-0.232003,-0.01288,-0.011638,-1.10989,-0.261533,0.0582,-0.006228,-0.173677,-0.006288,-0.36629,0.220433,-0.542093,-0.324559,-0.175579,-0.006202,-0.239664,-0.006276,-0.693683,-0.769616,-0.257587,-0.124182,-0.209827,-0.306174,-0.007212,-0.006228,-1.151902,-0.575226,-0.14607,-0.287027,-0.274775,-0.95886,-0.658895,-0.036715,-0.766393,-0.629884,-0.177247,-0.087401,-0.257997,-0.410432,-0.924075,-0.970517,-0.723696,-0.346589,-0.517974,-0.90966,-0.733692,-0.287027,-1.159406,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5.073985,-4.611742
6,1600000,5275,0.3212,0.6923,0.339,900000.0,0.04,59,185000,812,0.0,1300014.0,-0.187491,59422.0,0.5625,3293.0,0.0,2988105.0,2088105.0,4.864865,0.342162,8.648649,11.287055,0.0178,0.3212,3.320117,0,0,1,1,1,0,89.0,2.12733,1.617499,-0.008889,0.190931,-0.009791,1.543219,0.658933,1.345565,1.32063,-0.735867,-0.006505,-0.607835,-0.007568,0.859529,0.661125,0.067106,0.035747,0.131538,-0.128059,-0.007831,-0.008889,-0.815202,2.315928,1.666197,-0.007228,-0.126805,-0.008398,1.559725,0.514436,1.111284,1.494345,-0.794826,-0.004582,-0.815506,-0.005993,0.946534,0.754288,0.086671,0.071375,0.186628,-0.061108,-0.007637,-0.007228,-0.789173,1.897684,1.330685,-0.008316,-0.066155,-0.008738,1.236241,0.420987,1.156164,1.169948,-0.830717,-0.005521,-0.79972,-0.005435,0.696102,0.530806,0.057952,0.046081,0.156165,-0.071513,-0.0064,-0.008316,-0.774121,2.134587,1.730792,-0.012555,0.088311,-0.012627,1.629651,0.535333,1.150744,1.403101,-0.510812,-0.006819,-0.478932,-0.015504,0.964862,0.762392,0.153922,0.122298,0.211558,-0.123588,-0.012702,-0.012555,-0.823905,2.464045,1.903467,-0.011605,-0.051544,-0.013657,1.778524,0.728664,1.139128,1.645469,-0.697441,-0.007904,-0.676658,-0.010151,1.12918,0.917897,0.02847,0.016138,0.09097,-0.071547,-0.012852,-0.011605,-0.768718,1.761529,1.200478,-0.0089,-0.242611,-0.008742,1.105466,0.16319,1.124173,1.063462,-0.868835,-0.006078,-0.947362,-0.008432,0.611433,0.463417,0.139821,0.127829,0.244615,0.009222,-0.007969,-0.0089,-0.734037,2.584741,1.77866,0.09326,-0.320699,-0.39296,1.660589,0.558252,1.273454,1.642141,-1.252515,1.018387,-1.274149,-0.416872,0.9686,0.750181,0.187923,0.182473,0.484567,-0.206438,-0.633612,0.09326,-0.722039,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,11.128749,-4.930684
7,1500000,4383,0.3568,0.5517,0.3781,644000.0,0.04,49,160000,683,0.0,1167301.0,-0.2217993,57088.0,0.429333,3408.0,0.0,2138155.0,1494155.0,4.025,0.328725,9.375,9.338471,0.0213,0.3568,3.320117,0,0,1,1,0,0,79.0,1.916951,1.107861,-0.008864,-0.893554,-0.009765,0.726389,0.425297,0.361673,1.057684,-0.822062,-0.006506,-1.074388,-0.007567,0.276159,0.150261,-0.05847,0.014124,0.180452,-0.221161,-0.00782,-0.008864,-0.815202,2.087534,1.134483,-0.007194,-1.342535,-0.008366,0.71293,0.297693,0.150407,1.205435,-0.903173,-0.004584,-1.397926,-0.005992,0.314122,0.194692,-0.027647,0.048472,0.239539,-0.140571,-0.007627,-0.007194,-0.789173,1.695397,0.855608,-0.008288,-1.232789,-0.008711,0.48048,0.216934,0.187906,0.913403,-0.940966,-0.005522,-1.37816,-0.005434,0.135782,0.036001,-0.042511,0.02612,0.205662,-0.141465,-0.00639,-0.008288,-0.774121,2.098844,1.127962,-0.008312,-1.453045,-0.009989,0.701774,0.251721,0.167707,1.23888,-1.113906,-0.005976,-1.618845,-0.010434,0.300933,0.180085,-0.008502,0.036447,0.187361,-0.09936,-0.010385,-0.008312,-0.769177,2.010828,1.070554,-0.005234,-1.467977,-0.006032,0.643021,0.227721,0.118113,1.147815,-1.131367,-0.000517,-1.745754,-0.164134,0.265825,0.153052,0.002417,0.146347,0.422782,-0.172956,-0.050397,-0.005234,-0.795365,1.569876,0.755305,-0.008458,-1.578225,-0.008637,0.39157,-0.010841,0.019241,0.822353,-0.986033,-0.006449,-1.579284,-0.008429,0.087546,0.00126,0.029493,0.106057,0.297341,-0.083933,-0.007958,-0.008458,-0.734037,2.221048,1.058062,0.209524,-1.388652,-0.030261,0.540646,0.211605,0.067904,1.201531,-1.100428,0.817751,-1.610537,-0.43404,0.21919,0.113501,-0.069065,0.039927,0.30915,-0.157135,-0.166943,0.209524,-0.657101,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1.314829,-17.595125
8,790000,3092,0.3529,0.78481,0.354,620000.0,0.035,49,89000,684,1.0,790000.1,1.612903e-07,31408.1,0.78481,97.9,0.0,1771744.0,1151744.0,6.966292,0.416899,8.876404,12.94094,0.0011,0.3529,2.857651,0,0,1,1,0,0,79.0,0.423255,0.370258,-0.008867,0.904484,-0.009781,0.649811,-0.23823,0.3693,0.310136,-0.264817,-0.00652,0.171033,-0.007593,0.02467,-0.144251,0.381309,0.156013,0.146876,-0.049037,-0.007885,-0.008867,-1.244208,0.465936,0.364928,-0.007198,0.673103,-0.008385,0.633543,-0.317857,0.157856,0.384072,-0.202721,-0.004603,0.156792,-0.006029,0.041491,-0.127915,0.372705,0.198759,0.203219,0.006337,-0.007683,-0.007198,-1.295769,0.259162,0.168024,-0.008291,0.701451,-0.008728,0.409627,-0.362577,0.195412,0.184052,-0.228219,-0.005538,0.165934,-0.005466,-0.105771,-0.249254,0.309321,0.157107,0.171685,-0.012141,-0.006449,-0.008291,-1.25808,0.461546,0.351111,-0.008396,0.686038,-0.010087,0.62168,-0.366025,0.175484,0.3896,-0.250001,-0.009927,0.1614,-0.010554,0.023994,-0.148136,0.283464,0.126902,0.161155,0.017507,-0.010487,-0.008396,-1.260304,0.542319,0.48526,-0.011587,0.707115,-0.01365,0.773026,-0.232885,0.183669,0.46873,-0.174906,-0.007916,0.172661,-0.010172,0.130547,-0.058669,0.237353,0.099418,0.102334,-0.025823,-0.012878,-0.011587,-1.268101,1.052851,0.912048,-0.00616,0.673252,-0.006222,1.307938,-0.093149,0.265334,0.943402,-0.175477,-0.0062,0.145516,-0.006275,0.370599,0.083393,0.432725,0.228248,0.284894,-0.009122,-0.007199,-0.00616,-1.308317,0.007391,-0.072832,0.630766,0.663269,-0.185916,0.094798,-0.473111,-0.090105,-0.058383,-0.197092,-0.158873,0.125406,-0.559927,-0.274449,-0.367302,0.638059,0.471686,0.412481,-0.018264,-0.814164,0.630766,-1.204294,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.973521,-4.746644
9,650000,1839,0.3878,0.4185,0.3898,272000.0,0.055,44,60000,716,0.0,649940.3,-9.190332e-05,23268.0,0.418462,120.0,0.0,1416299.0,1144299.0,4.533333,0.3678,10.833333,19.071642,0.002,0.3878,5.20698,0,0,1,1,0,0,74.0,0.189551,-0.398013,-0.005622,-3.614489,-0.006464,-0.562365,-0.620806,0.227259,0.153538,-0.135806,-0.009385,-2.143687,-0.007516,-0.210802,-0.099175,0.067251,0.170496,0.450387,0.286551,-0.007224,-0.005622,1.892821,0.146184,-0.381976,-0.007164,-2.494278,-0.008356,-0.517569,-0.569279,0.396213,0.079169,-0.203012,-0.004609,-1.445475,-0.006029,-0.222981,-0.134929,0.041544,0.115073,0.345773,0.256347,-0.007681,-0.007164,1.277745,-0.02404,-0.499321,-0.008263,-2.338021,-0.008703,-0.617736,-0.599279,0.4356,-0.086695,-0.228515,-0.005543,-1.425384,-0.005466,-0.340094,-0.255457,0.018295,0.084168,0.305042,0.207945,-0.006446,-0.008263,1.200435,0.144461,-0.403446,0.8302,-2.890991,0.087302,-0.550287,-0.603135,0.396916,0.069861,-0.204267,-0.309767,-1.512302,-0.481765,-0.243635,-0.153651,0.236572,0.510248,1.296267,0.867374,-0.756761,0.8302,1.344866,0.103733,-0.417789,-0.004959,-2.678204,-0.005929,-0.556277,-0.620143,0.370578,0.04003,-0.242631,-0.007858,-1.804385,-0.200134,-0.25926,-0.169541,0.094191,0.266353,0.587533,0.462032,-0.059959,-0.004959,1.370677,0.555516,-0.259772,-0.006133,-2.616311,-0.006197,-0.457611,-0.628082,0.49198,0.463446,-0.175748,-0.006204,-1.350342,-0.006275,-0.049478,0.072262,0.035721,0.126592,0.481903,0.235068,-0.007189,-0.006133,1.199759,-0.13991,-0.591349,-0.013195,-2.515703,-0.013292,-0.721743,-0.704934,0.3678,-0.201669,-0.276537,-0.01612,-1.723442,-0.016631,-0.421155,-0.32719,0.000189,0.051351,0.215767,0.110396,-0.013394,-0.013195,1.244551,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.558757,14.982231


In [78]:
train = data.iloc[:train_len]
test = data.iloc[train_len:]

In [79]:
traincols = list(train.columns)
traincols.remove('RESULT')

In [80]:
print (len(traincols))

713


In [81]:
train2 = pd.concat([train[train.RESULT == 0].sample(frac=.3),train[train.RESULT == 1]],axis=0).sample(frac=1)

In [68]:
kf = KFold(n_splits=5)
for train_index, test_index in kf.split(train):
    break

In [69]:
for train_index2, test_index2 in kf.split(train2):
    break

In [71]:
rf = RandomForestClassifier(n_estimators=1000)
rf.fit(train2[traincols].iloc[train_index2], train2.RESULT.iloc[train_index2])
pred = rf.predict(train2[traincols].iloc[test_index2])
print (accuracy_score(train2.RESULT.iloc[test_index2],pred))
print (confusion_matrix(train2.RESULT.iloc[test_index2],pred))
print (f1_score(train2.RESULT.iloc[test_index2],pred))

0.6551639745472344
[[1549  669]
 [ 740 1128]]
0.6155525238744884


In [72]:
gbm = GradientBoostingClassifier(max_depth=8,n_estimators=1000)
gbm.fit(train2[traincols].iloc[train_index2], train2.RESULT.iloc[train_index2])
pred = gbm.predict(train2[traincols].iloc[test_index2])
print (accuracy_score(train2.RESULT.iloc[test_index2],pred))
print (confusion_matrix(train2.RESULT.iloc[test_index2],pred))
print (f1_score(train2.RESULT.iloc[test_index2],pred))

0.6644640234948604
[[1584  634]
 [ 737 1131]]
0.6226259289843106


In [73]:
importances = gbm.feature_importances_
indices = importances.argsort()[::-1]
cols = traincols
count = 0
for i in range(len(cols)):
    count += 1
    if importances[indices[i]] > 0 and count < 50:
        print ("Column {} has importance {}".format(cols[indices[i]],importances[indices[i]]))

Column PAYMENT FREQUENCY_Monthly has importance 0.04153714293353343
Column CHANGE IN PROPERTY VALUE MORTGAGE PURPOSE Z SCORE has importance 0.028591708755899427
Column TOTAL INTEREST FSA Z SCORE has importance 0.025400252857281758
Column CHANGE IN PROPERTY VALUE PAYMENT FREQUENCY Z SCORE has importance 0.022421835531278703
Column umap_1 has importance 0.01706507535718056
Column TDS MORTGAGE PURPOSE Z SCORE has importance 0.016777248384419807
Column umap_2 has importance 0.016750919278299685
Column APPRAISED PROPERTY VALUE FSA Z SCORE has importance 0.013387466824094668
Column CREDIT SCORE INCOME TYPE Z SCORE has importance 0.012254142083782954
Column TDS FSA Z SCORE has importance 0.012237372973064846
Column MORTGAGE RATIO FSA Z SCORE has importance 0.011955920231004344
Column ACTUAL MORTGAGE PAYMENT FSA Z SCORE has importance 0.01167291984633375
Column CHANGE IN PROPERTY VALUE has importance 0.011378059964156344
Column TDS has importance 0.010886612588296575
Column CHANGE IN PROPERTY 

In [74]:
importances = rf.feature_importances_
indices = importances.argsort()[::-1]
cols = traincols
count = 0
for i in range(len(cols)):
    count += 1
    if importances[indices[i]] > 0 and count < 50:
        print ("Column {} has importance {}".format(cols[indices[i]],importances[indices[i]]))

Column CHANGE IN PROPERTY VALUE PAYMENT FREQUENCY Z SCORE has importance 0.010150019418283399
Column ACTUAL MORTGAGE PAYMENT FSA Z SCORE has importance 0.009866164450025926
Column TOTAL INTEREST FSA Z SCORE has importance 0.009689936086052951
Column APPRAISED PROPERTY VALUE FSA Z SCORE has importance 0.009639296170917553
Column CHANGE IN PROPERTY VALUE MORTGAGE PURPOSE Z SCORE has importance 0.009509047688697352
Column umap_2 has importance 0.009046347147320442
Column TDS MORTGAGE PURPOSE Z SCORE has importance 0.009042317952888604
Column ANNUALIZED HOUSING EXPENSE PAYMENT FREQUENCY Z SCORE has importance 0.008957846301750321
Column TDS PAYMENT FREQUENCY Z SCORE has importance 0.008936543793342907
Column CHANGE IN PROPERTY VALUE has importance 0.008829333890184464
Column PROPERTY VALUE FSA Z SCORE has importance 0.008211045417534065
Column OTHER EXPENSE PAYMENT FREQUENCY Z SCORE has importance 0.008186996462021071
Column OTHER EXPENSE PER INCOME PAYMENT FREQUENCY Z SCORE has importance

#### Hyper parameter tuning ####

In [75]:
from hyperopt import hp, tpe
from hyperopt.fmin import fmin

In [82]:
def objective(params):
    params = {'n_estimators': int(params['n_estimators']), 'max_depth': int(params['max_depth'])}
    clf = RandomForestClassifier(n_jobs=2,  **params, random_state=123)
    score = cross_val_score(clf, train2[traincols], train2.RESULT, scoring='f1', cv=KFold(5)).mean()
    print("F1 {:.3f} params {}".format(score, params))
    return score

space = {
    'n_estimators': hp.quniform('n_estimators', 25, 500, 25),
    'max_depth': hp.quniform('max_depth', 1, 10, 1)
}

best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=10)

F1 0.566 params {'n_estimators': 75, 'max_depth': 2}
F1 0.598 params {'n_estimators': 300, 'max_depth': 3}
F1 0.594 params {'n_estimators': 200, 'max_depth': 4}
F1 0.600 params {'n_estimators': 200, 'max_depth': 6}
F1 0.610 params {'n_estimators': 300, 'max_depth': 9}
F1 0.599 params {'n_estimators': 300, 'max_depth': 6}
F1 0.562 params {'n_estimators': 275, 'max_depth': 2}
F1 0.596 params {'n_estimators': 150, 'max_depth': 4}
F1 0.613 params {'n_estimators': 325, 'max_depth': 10}
F1 0.596 params {'n_estimators': 350, 'max_depth': 4}


In [83]:
def objective(params):
    params = {
        'max_depth': int(params['max_depth']),
        'gamma': "{:.3f}".format(params['gamma']),
        'colsample_bytree': '{:.3f}'.format(params['colsample_bytree']),
        'n_estimators': int(params['n_estimators'])
    }
    
    clf = xgb.XGBClassifier(
        n_jobs=2,
        **params
    )
    
    score = cross_val_score(clf, train2[traincols], train2.RESULT, scoring='f1', cv=KFold(5)).mean()
    print("F1 {:.3f} params {}".format(score, params))
    return -score

space = {
    'max_depth': hp.quniform('max_depth', 2, 8, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.3, 1.0),
    'gamma': hp.uniform('gamma', 0.0, 0.5),
    'n_estimators': hp.quniform('n_estimators', 25, 500, 25)
}

best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=10)

F1 0.635 params {'max_depth': 4, 'gamma': '0.146', 'colsample_bytree': '0.336', 'n_estimators': 375}
F1 0.630 params {'max_depth': 3, 'gamma': '0.315', 'colsample_bytree': '0.722', 'n_estimators': 150}
F1 0.637 params {'max_depth': 4, 'gamma': '0.196', 'colsample_bytree': '0.383', 'n_estimators': 475}
F1 0.634 params {'max_depth': 4, 'gamma': '0.318', 'colsample_bytree': '0.631', 'n_estimators': 100}
F1 0.625 params {'max_depth': 8, 'gamma': '0.026', 'colsample_bytree': '0.327', 'n_estimators': 25}
F1 0.631 params {'max_depth': 8, 'gamma': '0.309', 'colsample_bytree': '0.985', 'n_estimators': 50}
F1 0.637 params {'max_depth': 7, 'gamma': '0.479', 'colsample_bytree': '0.472', 'n_estimators': 375}
F1 0.636 params {'max_depth': 6, 'gamma': '0.241', 'colsample_bytree': '0.818', 'n_estimators': 100}
F1 0.628 params {'max_depth': 3, 'gamma': '0.098', 'colsample_bytree': '0.327', 'n_estimators': 125}
F1 0.630 params {'max_depth': 3, 'gamma': '0.392', 'colsample_bytree': '0.559', 'n_estimators

In [84]:
def objective(params):
    params = {
        'max_depth': int(params['max_depth']),
        'colsample_bytree': '{:.3f}'.format(params['colsample_bytree']),
        'n_estimators': int(params['n_estimators'])
    }
    
    clf = lgb.LGBMClassifier(
        **params
    )
    
    score = cross_val_score(clf, train2[traincols], train2.RESULT, scoring='f1', cv=KFold(5)).mean()
    print("F1 {:.3f} params {}".format(score, params))
    return -score

space = {
    'max_depth': hp.quniform('max_depth', 2, 8, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.3, 1.0),
    'n_estimators': hp.quniform('n_estimators', 25, 1000, 50)
}

best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=10)

F1 0.634 params {'max_depth': 8, 'colsample_bytree': '0.460', 'n_estimators': 450}
F1 0.634 params {'max_depth': 7, 'colsample_bytree': '1.000', 'n_estimators': 400}
F1 0.636 params {'max_depth': 7, 'colsample_bytree': '0.851', 'n_estimators': 150}
F1 0.635 params {'max_depth': 6, 'colsample_bytree': '0.795', 'n_estimators': 200}
F1 0.632 params {'max_depth': 6, 'colsample_bytree': '0.413', 'n_estimators': 750}
F1 0.638 params {'max_depth': 4, 'colsample_bytree': '0.418', 'n_estimators': 250}
F1 0.635 params {'max_depth': 2, 'colsample_bytree': '0.787', 'n_estimators': 700}
F1 0.632 params {'max_depth': 8, 'colsample_bytree': '0.779', 'n_estimators': 400}
F1 0.630 params {'max_depth': 6, 'colsample_bytree': '0.686', 'n_estimators': 50}
F1 0.633 params {'max_depth': 7, 'colsample_bytree': '0.546', 'n_estimators': 650}


In [86]:
def objective(params):
    params = {'n_estimators': int(params['n_estimators']), 'max_depth': int(params['max_depth']),'max_features': float(params['max_features'])}
    clf = GradientBoostingClassifier(**params, random_state=123)
    score = cross_val_score(clf, train2[traincols], train2.RESULT, scoring='f1', cv=KFold(5)).mean()
    print("F1 {:.3f} params {}".format(score, params))
    return -score

space = {
    'n_estimators': hp.quniform('n_estimators', 25, 1000, 50),
    'max_depth': hp.quniform('max_depth', 1, 10, 1),
    'max_features': hp.quniform('max_features',.4,1,.1)
}

best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=10)

KeyboardInterrupt: 

#### Ensemble ######

In [116]:
class SklearnHelper(object):
    def __init__(self, clf, seed=0, params=None):
        params['random_state'] = seed
        self.clf = clf(**params)

    def train(self, x_train, y_train):
        self.clf.fit(x_train, y_train)

    def predict(self, x):
        return self.clf.predict(x)
    
    def predict_proba(self, x):
        return self.clf.predict_proba(x)
    
    def fit(self,x,y):
        return self.clf.fit(x,y)
    
    def feature_importances(self,x,y):
        print(self.clf.fit(x,y).feature_importances_)

In [114]:
SEED = 123 # for reproducibility
NFOLDS = 5 # set folds for out-of-fold prediction
kf = KFold(n_splits= NFOLDS, random_state=SEED)

def get_oof(clf, x_train, y_train, x_test):
    ntrain = x_train.shape[0]
    ntest = x_test.shape[0]
    oof_train = np.zeros((ntrain,))
    oof_test = np.zeros((ntest,))
    oof_test_skf = np.empty((NFOLDS, ntest))

    for i, (train_index, test_index) in enumerate(kf.split(x_train)):
        x_tr = x_train[train_index]
        y_tr = y_train[train_index]
        x_te = x_train[test_index]

        clf.train(x_tr, y_tr)

        oof_train[test_index] = clf.predict_proba(x_te)[:,1]
        oof_test_skf[i, :] = clf.predict_proba(x_test)[:,1]

    oof_test[:] = oof_test_skf.mean(axis=0)
    return oof_train.reshape(-1, 1), oof_test.reshape(-1, 1)

In [90]:
rf_params = {'n_estimators': 325, 'max_depth': 10, 'random_state': 123}
gbm_params = {'n_estimators': 475, 'max_depth': 7, 'max_features': 0.9, 'random_state': 123}
xgb_params = {'max_depth': 7, 'gamma': '0.479', 'colsample_bytree': '0.472', 'n_estimators': 375, 'random_state':123}
lgb_params = {'max_depth': 4, 'colsample_bytree': '0.418', 'n_estimators': 250, 'random_state':123}
et_params = {'n_estimators': 500, 'max_depth': 10, 'random_state': 123}

In [118]:
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
rf = SklearnHelper(clf=RandomForestClassifier, seed=SEED, params=rf_params)
et = SklearnHelper(clf=ExtraTreesClassifier, seed=SEED, params=et_params)
gbm = SklearnHelper(clf=GradientBoostingClassifier, seed=SEED, params=gbm_params)
xgb = SklearnHelper(clf=XGBClassifier, seed=SEED, params=xgb_params)
lgb = SklearnHelper(clf=LGBMClassifier, seed=SEED, params=lgb_params)

In [111]:
train2.shape

(20426, 714)

In [94]:
x_train = train2[traincols].values
y_train = train2.RESULT.values
x_test = test[traincols].values

In [119]:
rf_oof_train, rf_oof_test = get_oof(rf, x_train, y_train, x_test)
et_oof_train, et_oof_test = get_oof(et, x_train, y_train, x_test)
gbm_oof_train, gbm_oof_test = get_oof(gbm, x_train, y_train, x_test)
xgb_oof_train, xgb_oof_test = get_oof(xgb, x_train, y_train, x_test)
lgb_oof_train, lgb_oof_test = get_oof(lgb, x_train, y_train, x_test)

In [121]:
train_pred = pd.DataFrame()
train_pred['rf'] = rf_oof_train.reshape(1,-1).tolist()[0]
train_pred['et'] = et_oof_train.reshape(1,-1).tolist()[0]
train_pred['gbm'] = gbm_oof_train.reshape(1,-1).tolist()[0]
train_pred['xgb'] = xgb_oof_train.reshape(1,-1).tolist()[0]
train_pred['lgb'] = lgb_oof_train.reshape(1,-1).tolist()[0]
train_pred['actual'] = train2.RESULT.values
train_pred.to_csv('../data/blend_train.csv',index=False)

In [123]:
test_pred = pd.DataFrame()
test_pred['rf'] = rf_oof_test.reshape(1,-1).tolist()[0]
test_pred['et'] = et_oof_test.reshape(1,-1).tolist()[0]
test_pred['gbm'] = gbm_oof_test.reshape(1,-1).tolist()[0]
test_pred['xgb'] = xgb_oof_test.reshape(1,-1).tolist()[0]
test_pred['lgb'] = lgb_oof_test.reshape(1,-1).tolist()[0]
test_pred.to_csv('../data/blend_test.csv',index=False)

In [135]:
train_pred['ensemble'] = (np.round(train_pred.rf) + np.round(train_pred.et) + np.round(train_pred.gbm) + np.round(train_pred.xgb) + np.round(train_pred.lgb))*.2

In [136]:
for col in ['rf','et','gbm','xgb','lgb','ensemble']:
    print (col, f1_score(train_pred.actual,np.round(train_pred[col].values)))
    cutoffs = []
    f1s = []
    for cutoff in np.arange(.1,.5,.01):
        cutoffs.append(cutoff)
        f1s.append(f1_score(train_pred.actual,train_pred[col].values>cutoff))
    print (max(f1s),cutoffs[np.array(f1s).argmax()])    

rf 0.6130380502632579
0.6716632363791698 0.3599999999999999
et 0.5959234479539443
0.6670373665480428 0.3799999999999999
gbm 0.6301325747410229
0.6785920369301789 0.24999999999999992
xgb 0.6343669250645996
0.67867794255832 0.2799999999999999
lgb 0.6401792545633402
0.684940394128619 0.32999999999999985
ensemble 0.638620839206332
0.672023757533409 0.1


In [134]:
test_pred['ensemble'] = (test_pred.rf + test_pred.et + test_pred.gbm + test_pred.xgb + test_pred.lgb)*.2
test_pred['actual'] = np.round(test_pred.ensemble) #test_pred.ensemble > .35
print (test_pred.actual.value_counts(normalize=True))

0.0    0.662285
1.0    0.337715
Name: actual, dtype: float64
