### Prediction

In [39]:
import pandas as pd
import matplotlib.pyplot as plt
import re
import time
import numpy as np
import seaborn as sns
import plotly.express as px
import gc

In [40]:
## Utility Function

# Refer :- https://www.kaggle.com/rinnqd/reduce-memory-usage

def reduce_memory_usage(df):
  
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

### Reading application test data

In [41]:
app_test_data = reduce_memory_usage(pd.read_csv("./dataset/application_test.csv"))
print(app_test_data.shape)
app_test_data.head()

Memory usage of dataframe is 45.00 MB
Memory usage after optimization is: 14.60 MB
Decreased by 67.6%
(48744, 121)


Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,...,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100001,Cash loans,F,N,Y,0,135000.0,568800.0,20560.5,450000.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,100005,Cash loans,M,N,Y,0,99000.0,222768.0,17370.0,180000.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
2,100013,Cash loans,M,Y,Y,0,202500.0,663264.0,69777.0,630000.0,...,0,0,0,0,0.0,0.0,0.0,0.0,1.0,4.0
3,100028,Cash loans,F,N,Y,2,315000.0,1575000.0,49018.5,1575000.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
4,100038,Cash loans,M,Y,N,1,180000.0,625500.0,32067.0,625500.0,...,0,0,0,0,,,,,,


In [42]:
app_test_data['NAME_INCOME_TYPE'].value_counts()

Working                 24533
Commercial associate    11402
Pensioner                9273
State servant            3532
Student                     2
Businessman                 1
Unemployed                  1
Name: NAME_INCOME_TYPE, dtype: int64

In [43]:
def fix_nulls_outliers(data):
        
    data['NAME_FAMILY_STATUS'].fillna('Data_Not_Available', inplace=True)
    data['NAME_HOUSING_TYPE'].fillna('Data_Not_Available', inplace=True)

    data['FLAG_MOBIL'].fillna('Data_Not_Available', inplace=True)
    data['FLAG_EMP_PHONE'].fillna('Data_Not_Available', inplace=True)
    data['FLAG_CONT_MOBILE'].fillna('Data_Not_Available', inplace=True)
    data['FLAG_EMAIL'].fillna('Data_Not_Available', inplace=True)

    data['OCCUPATION_TYPE'].fillna('Data_Not_Available', inplace=True)

    #Replace NA with the most frequently occuring class for Count of Client Family Members
    data['CNT_FAM_MEMBERS'].fillna(data['CNT_FAM_MEMBERS'].value_counts().idxmax(), \
                                             inplace=True)
        
    data.replace(max(data['DAYS_EMPLOYED'].values), np.nan, inplace=True)

    data['CODE_GENDER'].replace('XNA','M',inplace=True)
    #There are a total of 4 applicants with Gender provided as 'XNA'

    data['AMT_ANNUITY'].fillna(0, inplace=True)
    #A total of 36 datapoints are there where Annuity Amount is null.

    data['AMT_GOODS_PRICE'].fillna(0, inplace=True)
    #A total of 278 datapoints are there where Annuity Amount is null.

    data['NAME_TYPE_SUITE'].fillna('Unaccompanied', inplace=True)
    #Removing datapoints where 'Name_Type_Suite' is null.

    data['NAME_FAMILY_STATUS'].replace('Unknown','Married', inplace=True)
    #Removing datapoints where 'Name_Family_Status' is Unknown.

    data['OCCUPATION_TYPE'].fillna('Data_Not_Available', inplace=True)

    data['EXT_SOURCE_1'].fillna(0, inplace=True)
    data['EXT_SOURCE_2'].fillna(0, inplace=True)
    data['EXT_SOURCE_3'].fillna(0, inplace=True)
    
    return data

In [44]:
def FE_application_data(data):
    
    data['CREDIT_INCOME_PERCENT'] = data['AMT_CREDIT'] / data['AMT_INCOME_TOTAL']
    data['ANNUITY_INCOME_PERCENT'] = data['AMT_ANNUITY'] / data['AMT_INCOME_TOTAL']
    data['CREDIT_ANNUITY_PERCENT'] = data['AMT_CREDIT'] / data['AMT_ANNUITY']

    data['FAMILY_CNT_INCOME_PERCENT'] = data['AMT_INCOME_TOTAL'] / data['CNT_FAM_MEMBERS']
    data['CREDIT_TERM'] = data['AMT_ANNUITY'] / data['AMT_CREDIT']
    data['BIRTH_EMPLOYED_PERCENT'] = data['DAYS_EMPLOYED'] / data['DAYS_BIRTH']
    data['CHILDREN_CNT_INCOME_PERCENT'] = data['AMT_INCOME_TOTAL']/data['CNT_CHILDREN']

    data['CREDIT_GOODS_DIFF'] = data['AMT_CREDIT'] - data['AMT_GOODS_PRICE']
    data['EMPLOYED_REGISTRATION_PERCENT'] = data['DAYS_EMPLOYED'] / data['DAYS_REGISTRATION']
    data['BIRTH_REGISTRATION_PERCENT'] = data['DAYS_BIRTH'] / data['DAYS_REGISTRATION']
    data['ID_REGISTRATION_DIFF'] = data['DAYS_ID_PUBLISH'] - data['DAYS_REGISTRATION']

    data['ANNUITY_LENGTH_EMPLOYED_PERCENT'] = data['CREDIT_TERM']/ data['DAYS_EMPLOYED']

    data['AGE_LOAN_FINISH'] = data['DAYS_BIRTH']*(-1.0/365) + \
                         (data['AMT_CREDIT']/data['AMT_ANNUITY']) *(1.0/12)
    # (This basically refers to the client's age when he/she finishes loan repayment)

    data['CAR_AGE_EMP_PERCENT'] = data['OWN_CAR_AGE']/data['DAYS_EMPLOYED']
    data['CAR_AGE_BIRTH_PERCENT'] = data['OWN_CAR_AGE']/data['DAYS_BIRTH']
    data['PHONE_CHANGE_EMP_PERCENT'] = data['DAYS_LAST_PHONE_CHANGE']/data['DAYS_EMPLOYED']
    data['PHONE_CHANGE_BIRTH_PERCENT'] = data['DAYS_LAST_PHONE_CHANGE']/data['DAYS_BIRTH']
    
    income_by_contract = data[['AMT_INCOME_TOTAL', 'NAME_CONTRACT_TYPE']].groupby('NAME_CONTRACT_TYPE').median()['AMT_INCOME_TOTAL']
    data['MEDIAN_INCOME_CONTRACT_TYPE'] = data['NAME_CONTRACT_TYPE'].map(income_by_contract)
    
    income_by_suite = data[['AMT_INCOME_TOTAL', 'NAME_TYPE_SUITE']].groupby('NAME_TYPE_SUITE').median()['AMT_INCOME_TOTAL']
    data['MEDIAN_INCOME_SUITE_TYPE'] = data['NAME_TYPE_SUITE'].map(income_by_suite)
    
    income_by_housing = data[['AMT_INCOME_TOTAL', 'NAME_HOUSING_TYPE']].groupby('NAME_HOUSING_TYPE').median()['AMT_INCOME_TOTAL']
    data['MEDIAN_INCOME_HOUSING_TYPE'] = data['NAME_HOUSING_TYPE'].map(income_by_housing)
    
    income_by_org = data[['AMT_INCOME_TOTAL', 'ORGANIZATION_TYPE']].groupby('ORGANIZATION_TYPE').median()['AMT_INCOME_TOTAL']
    data['MEDIAN_INCOME_ORG_TYPE'] = data['ORGANIZATION_TYPE'].map(income_by_org)

    income_by_occu = data[['AMT_INCOME_TOTAL', 'OCCUPATION_TYPE']].groupby('OCCUPATION_TYPE').median()['AMT_INCOME_TOTAL']
    data['MEDIAN_INCOME_OCCU_TYPE'] = data['OCCUPATION_TYPE'].map(income_by_occu)

    income_by_education = data[['AMT_INCOME_TOTAL', 'NAME_EDUCATION_TYPE']].groupby('NAME_EDUCATION_TYPE').median()['AMT_INCOME_TOTAL']
    data['MEDIAN_INCOME_EDU_TYPE'] = data['NAME_EDUCATION_TYPE'].map(income_by_education)

    data['ORG_TYPE_INCOME_PERCENT'] = data['MEDIAN_INCOME_ORG_TYPE']/data['AMT_INCOME_TOTAL']
    data['OCCU_TYPE_INCOME_PERCENT'] = data['MEDIAN_INCOME_OCCU_TYPE']/data['AMT_INCOME_TOTAL']
    data['EDU_TYPE_INCOME_PERCENT'] = data['MEDIAN_INCOME_EDU_TYPE']/data['AMT_INCOME_TOTAL']

    data= data.drop(['FLAG_DOCUMENT_2','FLAG_DOCUMENT_4','FLAG_DOCUMENT_5','FLAG_DOCUMENT_6','FLAG_DOCUMENT_7',
    'FLAG_DOCUMENT_8','FLAG_DOCUMENT_9','FLAG_DOCUMENT_10', 'FLAG_DOCUMENT_11','FLAG_DOCUMENT_12','FLAG_DOCUMENT_13',
    'FLAG_DOCUMENT_14','FLAG_DOCUMENT_15','FLAG_DOCUMENT_16','FLAG_DOCUMENT_17','FLAG_DOCUMENT_18','FLAG_DOCUMENT_19',
    'FLAG_DOCUMENT_20','FLAG_DOCUMENT_21'],axis=1)

    cat_col = [category for category in data.columns if data[category].dtype == 'object']
    data = pd.get_dummies(data, columns= cat_col)
    
    return data

In [45]:
print("Before Feature Engineering on Application test data")
print(app_test_data.shape)

app_test_data_temp = fix_nulls_outliers(app_test_data)
app_test_data_fe = FE_application_data(app_test_data_temp)

print("After Feature Engineering on Application test data")
print(app_test_data_fe.shape)

Before Feature Engineering on Application test data
(48744, 121)
After Feature Engineering on Application test data
(48744, 250)


In [46]:
app_test_data_fe.columns.tolist()

['SK_ID_CURR',
 'CNT_CHILDREN',
 'AMT_INCOME_TOTAL',
 'AMT_CREDIT',
 'AMT_ANNUITY',
 'AMT_GOODS_PRICE',
 'REGION_POPULATION_RELATIVE',
 'DAYS_BIRTH',
 'DAYS_EMPLOYED',
 'DAYS_REGISTRATION',
 'DAYS_ID_PUBLISH',
 'OWN_CAR_AGE',
 'FLAG_MOBIL',
 'FLAG_EMP_PHONE',
 'FLAG_WORK_PHONE',
 'FLAG_CONT_MOBILE',
 'FLAG_PHONE',
 'FLAG_EMAIL',
 'CNT_FAM_MEMBERS',
 'REGION_RATING_CLIENT',
 'REGION_RATING_CLIENT_W_CITY',
 'HOUR_APPR_PROCESS_START',
 'REG_REGION_NOT_LIVE_REGION',
 'REG_REGION_NOT_WORK_REGION',
 'LIVE_REGION_NOT_WORK_REGION',
 'REG_CITY_NOT_LIVE_CITY',
 'REG_CITY_NOT_WORK_CITY',
 'LIVE_CITY_NOT_WORK_CITY',
 'EXT_SOURCE_1',
 'EXT_SOURCE_2',
 'EXT_SOURCE_3',
 'APARTMENTS_AVG',
 'BASEMENTAREA_AVG',
 'YEARS_BEGINEXPLUATATION_AVG',
 'YEARS_BUILD_AVG',
 'COMMONAREA_AVG',
 'ELEVATORS_AVG',
 'ENTRANCES_AVG',
 'FLOORSMAX_AVG',
 'FLOORSMIN_AVG',
 'LANDAREA_AVG',
 'LIVINGAPARTMENTS_AVG',
 'LIVINGAREA_AVG',
 'NONLIVINGAPARTMENTS_AVG',
 'NONLIVINGAREA_AVG',
 'APARTMENTS_MODE',
 'BASEMENTAREA_MODE',
 

### Reading other files for merging

In [47]:
bu_bal = pd.read_pickle('pickles/bureau_bal_grouped_data.pkl')
prev_appl = pd.read_pickle('pickles/prev_appl_data_grouped_data.pkl')
pos_cash_bal = pd.read_pickle('pickles/pos_cash_bal_grouped_data.pkl')
inst_paym = pd.read_pickle('pickles/inst_paym_grouped_data.pkl')
cc_bal = pd.read_pickle('pickles/cc_bal_grouped_data.pkl')

In [48]:
bu_bal.head()

Unnamed: 0_level_0,BUREAU_AMT_CREDIT_SUM_DEBT_mean,BUREAU_AMT_CREDIT_SUM_DEBT_sum,BUREAU_AMT_CREDIT_SUM_OVERDUE_mean,BUREAU_AMT_CREDIT_SUM_OVERDUE_sum,BUREAU_DAYS_CREDIT_mean,BUREAU_DAYS_CREDIT_var,BUREAU_DAYS_CREDIT_UPDATE_mean,BUREAU_DAYS_CREDIT_UPDATE_min,BUREAU_CREDIT_DAY_OVERDUE_mean,BUREAU_CREDIT_DAY_OVERDUE_min,...,C_AMT_CREDIT_SUM_LIMIT_MEAN,C_AMT_CREDIT_SUM_LIMIT_SUM,C_AMT_CREDIT_MAX_OVERDUE_MEAN,C_AMT_CREDIT_MAX_OVERDUE_MAX,C_AMT_ANNUITY_MAX,C_AMT_ANNUITY_MEAN,C_AMT_ANNUITY_SUM,C_AMT_CREDIT_SUM_MEAN,C_AMT_CREDIT_SUM_SUM,C_AMT_CREDIT_SUM_MAX
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100001,85240.929688,596686.5,0.0,0.0,-735.0,240043.666667,-93.142857,-155,0.0,0,...,0.0,0.0,,,0.0,0.0,0.0,142335.0,569340.0,279720.0
100002,30722.625,245781.0,0.0,0.0,-874.0,186150.0,-499.875,-1185,0.0,0,...,0.0,0.0,2091.161133,5043.64502,0.0,0.0,0.0,63844.5,383067.0,135000.0
100003,0.0,0.0,0.0,0.0,-1400.75,827783.583333,-816.0,-2131,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,69133.5,207400.5,112500.0
100004,0.0,0.0,0.0,0.0,-867.0,421362.0,-532.0,-682,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94518.898438,189037.796875,94537.796875
100005,189469.5,568408.5,0.0,0.0,-190.666667,26340.333333,-54.333333,-121,0.0,0,...,0.0,0.0,,,0.0,0.0,0.0,58500.0,58500.0,58500.0


In [49]:
prev_appl.head()

Unnamed: 0,SK_ID_CURR,PREV_AMT_ANNUITY,PREV_AMT_APPLICATION,PREV_AMT_CREDIT,PREV_AMT_DOWN_PAYMENT,PREV_AMT_GOODS_PRICE,PREV_HOUR_APPR_PROCESS_START,PREV_NFLAG_LAST_APPL_IN_DAY,PREV_RATE_DOWN_PAYMENT,PREV_RATE_INTEREST_PRIMARY,PREV_RATE_INTEREST_PRIVILEGED,PREV_DAYS_DECISION,PREV_SELLERPLACE_AREA,PREV_CNT_PAYMENT,PREV_DAYS_FIRST_DRAWING,PREV_DAYS_FIRST_DUE,PREV_DAYS_LAST_DUE_1ST_VERSION,PREV_DAYS_LAST_DUE,PREV_DAYS_TERMINATION,PREV_NFLAG_INSURED_ON_APPROVAL
0,100001,3951.0,24835.5,23787.0,2520.0,24835.5,13.0,1.0,0.104309,,,-1740.0,23.0,8.0,0.0,-1709.0,-1499.0,-1619.0,-1612.0,0.0
1,100002,9251.775391,179055.0,179055.0,0.0,179055.0,9.0,1.0,0.0,,,-606.0,500.0,24.0,0.0,-565.0,125.0,-25.0,-17.0,0.0
2,100003,56553.988281,435436.5,484191.0,3442.5,435436.5,14.666667,1.0,0.050018,,,-1305.0,533.0,10.0,0.0,-1274.333374,-1004.333313,-1054.333374,-1047.333374,0.666667
3,100004,5357.25,24282.0,20106.0,4860.0,24282.0,5.0,1.0,0.212036,,,-815.0,30.0,4.0,0.0,-784.0,-694.0,-724.0,-714.0,0.0
4,100005,4813.200195,22308.75,20076.75,4464.0,44617.5,10.5,1.0,0.108948,,,-536.0,18.0,12.0,0.0,-706.0,-376.0,-466.0,-460.0,0.0


### Merging data

In [50]:
application_bureau = app_test_data_fe.join(bu_bal, how='left', on='SK_ID_CURR')

In [51]:
application_bureau_prev = application_bureau.merge(prev_appl, on=['SK_ID_CURR'], how='left')

In [52]:
app_bu_prev_pos = application_bureau_prev.merge(pos_cash_bal, on=['SK_ID_CURR'], how='left')

In [53]:
app_bu_prev_pos_instl = app_bu_prev_pos.merge(inst_paym, on =['SK_ID_CURR'], how = 'left')

In [54]:
app_bu_prev_pos_instl_cc = app_bu_prev_pos_instl.merge(cc_bal, on=['SK_ID_CURR'], how='left')
app_bu_prev_pos_instl_cc.shape

(48744, 546)

### Selecting top features

In [55]:
cols = np.loadtxt("kbest_best_columns.txt", dtype=int)

X_test_best = app_bu_prev_pos_instl_cc.iloc[:,cols]

print(X_test_best.shape)

(48744, 400)


### Scaling Data

In [56]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_test_std = scaler.fit_transform(X_test_best)

#replacing nan values with 0
X_test_std[np.isnan(X_test_std)] = 0

print ("X_test_std :", X_test_std.shape)

X_test_std : (48744, 400)


### Load Model

In [57]:
import pickle
algo_name = ["LR", "RF", "SVM", "GB", "KNN"]

with open('models/model_' + 'KNN', 'rb') as f:
    model = pickle.load(f)
print("Model loaded successfully.")

Model loaded successfully.


### Prediction

In [58]:
test_predict = model.predict(X_test_std)

In [63]:
test_predict

array([0, 0, 0, ..., 0, 0, 0], dtype=int8)

In [68]:
np.unique(test_predict)

array([0, 1], dtype=int8)

In [70]:
print("All index value of 1 is: ", np.where(test_predict == 1)[0])

All index value of 1 is:  [  205   215   264   320   525   562   716   729   764   786   860   876
   979  1109  1153  1248  1494  1528  1903  2431  2601  2799  2933  3100
  3241  3293  3300  3351  3374  3393  3780  3846  3862  4127  4503  4593
  4658  5017  5044  5045  5081  5306  5361  5602  5619  5741  5911  6087
  6105  6181  6237  6245  6290  6354  6561  6562  6911  7368  7780  7943
  8027  8031  8110  8391  8439  8479  8482  8560  8607  8700  8778  8779
  8796  9023  9040  9159  9280  9389  9476  9518  9593  9667  9958 10112
 10124 10147 10375 10665 11063 11084 11152 11195 11206 11428 11441 11491
 11607 11787 12001 12091 12244 12277 12376 12386 12455 12549 12565 12837
 12865 12874 13083 13126 13170 13214 13234 13261 13407 13410 13505 13522
 13561 13610 13715 13757 13797 13934 13968 14122 14211 14267 14279 14628
 14698 14795 14886 15179 15250 15304 15319 15425 15472 15892 15999 16171
 16468 16842 16970 16985 17417 17607 17649 17692 17707 17789 17808 17960
 17980 17991 18134 18137 

In [74]:
test_predict[215]

1

In [82]:
app_bu_prev_pos_instl_cc.iloc[199, :]

SK_ID_CURR                      101355.0
CNT_CHILDREN                         0.0
AMT_INCOME_TOTAL                225000.0
AMT_CREDIT                     1157958.0
AMT_ANNUITY                      49189.5
                                 ...    
CR_DRAWING_LIMIT_RATIO_MAX           NaN
CR_DRAWING_LIMIT_RATIO_MEAN          NaN
CR_DRAWING_LIMIT_RATIO_SUM           0.0
CR_DRAWING_LIMIT_RATIO_VAR           NaN
CREDIT_COUNT                        25.0
Name: 199, Length: 546, dtype: float64

In [105]:
sk_id_curr = '101427'
sk_id_curr = int(sk_id_curr)

sk_id_curr_best = app_bu_prev_pos_instl_cc[app_bu_prev_pos_instl_cc["SK_ID_CURR"] == sk_id_curr]

print(sk_id_curr_best["SK_ID_CURR"])

sk_id_curr_best = sk_id_curr_best.iloc[:,cols]


sk_id_curr_std = scaler.fit_transform(sk_id_curr_best)

sk_id_curr_std[np.isnan(sk_id_curr_std)] = 0

#replacing nan values with 0
#sk_id_curr_std[np.isnan(sk_id_curr_std)] = 0

model.predict(sk_id_curr_std)

215    101427
Name: SK_ID_CURR, dtype: int32


  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


array([0], dtype=int8)

In [79]:


#sk_id_curr = request.form.to_dict()['SK_ID_CURR']
sk_id_curr = '101427'
sk_id_curr = int(sk_id_curr)
    
select_index = list(np.where(app_bu_prev_pos_instl_cc["SK_ID_CURR"] == sk_id_curr)[0])
print(select_index)
final_class_label = test_predict[select_index[0]]

#final_class_label = model.predict(X_test_std["SK_ID_CURR"])

if final_class_label == 1:
    prediction = 'The customer with this ID is a Potential Defaulter with label {}.'.format(final_class_label)
else:
    prediction = 'The customer with this ID is not a Potential Defaulter with label.'.format(final_class_label)
    
print(prediction)

[215]
The customer with this ID is a Potential Defaulter with label 1.


In [81]:
sk_id_curr = '102193'
sk_id_curr = int(sk_id_curr)
    
select_index = list(np.where(app_bu_prev_pos_instl_cc["SK_ID_CURR"] == sk_id_curr)[0])
print(select_index)
final_class_label = test_predict[select_index[0]]

#final_class_label = model.predict(X_test_std["SK_ID_CURR"])

if final_class_label == 1:
    prediction = 'The customer with this ID is a Potential Defaulter with label {}.'.format(final_class_label)
else:
    prediction = 'The customer with this ID is not a Potential Defaulter with label.'.format(final_class_label)
    
print(prediction)

[320]
The customer with this ID is a Potential Defaulter with label 1.


In [85]:
sk_id_curr = '101355'
sk_id_curr = int(sk_id_curr)
    
select_index = list(np.where(app_bu_prev_pos_instl_cc["SK_ID_CURR"] == sk_id_curr)[0])
print(select_index)
final_class_label = test_predict[select_index[0]]

#final_class_label = model.predict(X_test_std["SK_ID_CURR"])

if final_class_label == 1:
    prediction = 'The customer with this ID is a Potential Defaulter with label {}.'.format(final_class_label)
else:
    prediction = 'The customer with this ID is NOT a Potential Defaulter with label {}.'.format(final_class_label)
    
print(prediction)

[199]
The customer with this ID is NOT a Potential Defaulter with label 0.
