# Mortgage Loans: Predict on new data

In [2]:
import pandas as pd
import numpy as np
import pickle
import plotly.graph_objects as go

## Read in the pickle files

In [40]:
# dataframes for visualization
responded=pd.read_csv('../model_components/cust_responded.csv')
ignored=pd.read_csv('../model_components/cust_ignored.csv')

In [41]:
# random forest model
filename = open('../model_components/cust_response_rf_model.pkl', 'rb')
rf = pickle.load(filename)
filename.close()

In [42]:
# encoder1
filename = open('../model_components/cust_response_edu_onehot_encoder.pkl', 'rb')
encoder1 = pickle.load(filename)
filename.close()

In [43]:
# encoder2
filename = open('../model_components/cust_response_marital_onehot_encoder.pkl', 'rb')
encoder2 = pickle.load(filename)
filename.close()

In [44]:
filename = open('../model_components/age_ss_scaler.pkl', 'rb')
ss_scaler0 = pickle.load(filename)
filename.close()

filename = open('../model_components/income_ss_scaler.pkl', 'rb')
ss_scaler1 = pickle.load(filename)
filename.close()

filename = open('../model_components/recency_ss_scaler.pkl', 'rb')
ss_scaler2 = pickle.load(filename)
filename.close()

filename = open('../model_components/spend_ss_scaler.pkl', 'rb')
ss_scaler3 = pickle.load(filename)
filename.close()

filename = open('../model_components/yrs_cust_ss_scaler.pkl', 'rb')
ss_scaler30 = pickle.load(filename)
filename.close()


In [45]:

filename = open('../model_components/deals_ss_scaler.pkl', 'rb')
ss_scaler4 = pickle.load(filename)
filename.close()

filename = open('../model_components/web_ss_scaler.pkl', 'rb')
ss_scaler40 = pickle.load(filename)
filename.close()

filename = open('../model_components/catalog_ss_scaler.pkl', 'rb')
ss_scaler41 = pickle.load(filename)
filename.close()

filename = open('../model_components/store_ss_scaler.pkl', 'rb')
ss_scaler42 = pickle.load(filename)
filename.close()

filename = open('../model_components/visits_ss_scaler.pkl', 'rb')
ss_scaler43 = pickle.load(filename)
filename.close()

In [87]:
# Remember: the order of arguments must match the order of features
features = [
 'Kidhome',
 'Teenhome',
 'AcceptedCmp5',
 'Education',
 'Marital_Status',
 'age',
 'Income',
 'Recency',
 'Total_Spent',
 'Yrs_Customer',
 'NumDealsPurchases',
 'NumWebPurchases',
 'NumCatalogPurchases',
 'NumStorePurchases',
 'NumWebVisitsMonth'
]

In [88]:
# make some fake data
fake1=[1, 0, 0, 'PhD', 'Married', 45, 55000.0, 45, 2500.0, 5, 1, 0, 0, 1, 10]
fake2=[0, 1, 1, 'Master', 'Single', 35, 65000.0, 90, 5500.0, 10, 1, 2, 3, 1, 20]
fake3=[0, 0, 1, 'Graduation', 'Divorced', 55, 45000.0, 25, 1500.0, 2, 1, 5, 4, 1, 30]

## Write a function to preprocess and predict

In [91]:
## Create a function that can take list of valid inputs & make a prediction
import traceback

def make_predictions(listofargs, Threshold):
    try:
        # the order of the arguments must match the order of the features
        df = pd.DataFrame(columns=features) 
        
        df.loc[0] = listofargs
        
        # convert arguments from integers to floats:
        for var in ['Kidhome', 'Teenhome', 'AcceptedCmp5', 'age', 'Income', 'Recency', 'Total_Spent', 'Yrs_Customer', 'NumDealsPurchases', 'NumWebPurchases', 'NumCatalogPurchases', 'NumStorePurchases', 'NumWebVisitsMonth']:
            df[var]=int(df[var])

        # transform the categorical variable using the same encoder we trained previously
        ohe=pd.DataFrame(encoder1.transform(df[['Education']]).toarray())
        col_list = ['Education_{}'.format(item) for item in ['Graduation', 'PhD', 'Master', 'Unknown', 'Basic']] 
        ohe.columns=col_list
        df = pd.concat([df, ohe],axis=1)

        ohe=pd.DataFrame(encoder2.transform(df[['Marital_Status']]).toarray())
        col_list = ['Marital_Status_{}'.format(item) for item in ['Married', 'Together', 'Single', 'Divorced', 'Widow', 'Unknown']] 
        ohe.columns=col_list
        df = pd.concat([df, ohe],axis=1)
        
        # create new features using the scalers we trained earlier
        
        df['ln_age'] = ss_scaler0.transform(np.array(np.log(df['age'])).reshape(-1, 1))
        df['ln_Income'] = ss_scaler1.transform(np.array(np.log(df['Income'])).reshape(-1, 1))
        df['ln_Recency'] = ss_scaler2.transform(np.array(np.log(df['Recency'])).reshape(-1, 1))
        df['ln_Spending'] = ss_scaler3.transform(np.array(np.log(df['Total_Spent'])).reshape(-1, 1))
        df['ln_Yrs_Customer'] = ss_scaler3.transform(np.array(np.log(df['Yrs_Customer'])).reshape(-1, 1))
        
        df['ln_deals'] = ss_scaler4.transform(np.array(df['NumDealsPurchases']).reshape(-1, 1))
        df['ln_web'] = ss_scaler40.transform(np.array(df['NumWebPurchases']).reshape(-1, 1))
        df['ln_catalog'] = ss_scaler41.transform(np.array(df['NumCatalogPurchases']).reshape(-1, 1))
        df['ln_store'] = ss_scaler42.transform(np.array(df['NumStorePurchases']).reshape(-1, 1))
        df['ln_visits'] = ss_scaler43.transform(np.array(df['NumWebVisitsMonth']).reshape(-1, 1))
        

        # drop & rearrange the columns in the order expected by your trained model!
        df=df[['Kidhome', 'Teenhome', 'AcceptedCmp5', 'Education_Graduation', 'Education_PhD', 'Education_Master', 'Education_Unknown', 'Education_Basic',
               'Marital_Status_Married', 'Marital_Status_Together', 'Marital_Status_Single', 'Marital_Status_Divorced', 'Marital_Status_Widow', 'Marital_Status_Unknown',
               'ln_age', 'ln_Income', 'ln_Recency', 'ln_Spending', 'ln_Yrs_Customer',
               'ln_deals', 'ln_web', 'ln_catalog', 'ln_store', 'ln_visits']]

        prob = rf.predict_proba(df)
        raw_responded_prob=prob[0][1]
        Threshold=Threshold*.01
        respond_func = lambda y: 'Responded' if raw_responded_prob>Threshold else 'Ignored'
        formatted_ignored_prob = "{:,.1f}%".format(100*prob[0][0])
        formatted_responded_prob = "{:,.1f}%".format(100*prob[0][1])
        return respond_func(raw_responded_prob), formatted_responded_prob, formatted_ignored_prob

    except Exception: 
        traceback.print_exc()

## Predicting on new data

In [92]:
# example 1
make_predictions(fake1, 83)

('Ignored', '23.9%', '76.1%')

In [93]:
# example 2
make_predictions(fake2, 40)

('Responded', '46.0%', '54.0%')

In [94]:
# example 3
make_predictions(fake3, 70)

('Ignored', '60.2%', '39.8%')

## Visualize the new data

In [105]:
fake1

[1, 0, 0, 'PhD', 'Married', 45, 55000.0, 45, 2500.0, 5, 1, 0, 0, 1, 10]

In [106]:
newdata=pd.DataFrame([fake1], columns=features)
newdata

Unnamed: 0,Kidhome,Teenhome,AcceptedCmp5,Education,Marital_Status,age,Income,Recency,Total_Spent,Yrs_Customer,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth
0,1,0,0,PhD,Married,45,55000.0,45,2500.0,5,1,0,0,1,10


In [107]:
# plotly visualization of Loan Status

def make_cube(*args):
    newdata=pd.DataFrame(args, columns=features)

    trace0=go.Scatter3d(
        x=responded['Income'],
        y=responded['Total_Spent'],
        z=responded['Recency'],
        name='responded',
        mode='markers',
        text = list(zip(
            ["Marital Status: {}".format(x) for x in responded['Marital_Status']],
            ["<br>Education: {}".format(x) for x in responded['Education']],
            ["<br>Yrs_Customer: {}".format(x) for x in responded['Yrs_Customer']],
            ["<br>Age: {}".format(x) for x in responded['age']],
            ["<br>Kid Home: {}".format(x) for x in responded['Kidhome']],
            ["<br>Teen Home: {}".format(x) for x in responded['Teenhome']]
            )) ,
        hovertemplate =
            '<b>Income: $%{x:.0f}K</b>'+
            '<br><b>Total Spent: $%{y:.0f}</b>'+
            '<br><b>Recency: %{z:.0f}</b>'+
            '<br>%{text}',
        hoverinfo='text',
        marker=dict(size=6, color='blue', opacity=0.4))


    trace1=go.Scatter3d(
        x=ignored['Income'],
        y=ignored['Total_Spent'],
        z=ignored['Recency'],
        name='ignored',
        mode='markers',
        text = list(zip(
            ["Marital Status: {}".format(x) for x in ignored['Marital_Status']],
            ["<br>Education: {}".format(x) for x in ignored['Education']],
            ["<br>Yrs_Customer: {}".format(x) for x in ignored['Yrs_Customer']],
            ["<br>Age: {}".format(x) for x in ignored['age']],
            ["<br>Kid Home: {}".format(x) for x in ignored['Kidhome']],
            ["<br>Teen Home: {}".format(x) for x in ignored['Teenhome']]
                )) ,
        hovertemplate =
            '<b>Income: $%{x:.0f}K</b>'+
            '<br><b>Total Spent: $%{y:.0f}</b>'+
            '<br><b>Recency: %{z:.0f}</b>'+
            '<br>%{text}',
        hoverinfo='text',
        marker=dict(size=6, color='red', opacity=0.4))

    trace2=go.Scatter3d(
        x=newdata['Income'],
        y=newdata['Total_Spent'],
        z=newdata['Recency'],
        name='Customer',
        mode='markers',
        text = list(zip(
            ["Marital Status: {}".format(x) for x in newdata['Marital_Status']],
            ["<br>Education: {}".format(x) for x in newdata['Education']],
            ["<br>Yrs_Customer: {}".format(x) for x in newdata['Yrs_Customer']],
            ["<br>Age: {}".format(x) for x in newdata['age']],
            ["<br>Kid Home: {}".format(x) for x in newdata['Kidhome']],
            ["<br>Teen Home: {}".format(x) for x in newdata['Teenhome']]
                )) ,
        hovertemplate =
            '<b>Income: $%{x:.0f}K</b>'+
            '<br><b>Total Spent: $%{y:.0f}</b>'+
            '<br><b>Recency: %{z:.0f}</b>'+
            '<br>%{text}',
        hoverinfo='text',
        marker=dict(size=15, color='yellow'))


    layout = go.Layout(title="Customer Responses",
                        showlegend=True,
                            scene = dict(
                            xaxis=dict(title='Income'),
                            yaxis=dict(title='Total Spent'),
                            zaxis=dict(title='Recency')
                    ))
    fig=go.Figure([trace0, trace1, trace2], layout)
    return fig

In [108]:
make_cube(fake1)