# Blacklight/MA Capital One Cash Deposits Marketing Predictive Analytics 


## Algorithmic Code for Predicting Total Deposits for Capital One Bank CD Offering


In [1]:
import pandas as pd
import numpy as np
%matplotlib inline

## Importing Datasets

In [2]:
path="Raw Data\RAW_COF-CDBankRate_Report_thru_1.06.2019.csv"

In [3]:
ls

 Volume in drive C has no label.
 Volume Serial Number is D00F-3EF0

 Directory of C:\Users\tchopra\Predictive Analytics Folder

01/08/2019  05:16 PM    <DIR>          .
01/08/2019  05:16 PM    <DIR>          ..
01/08/2019  05:16 PM    <DIR>          .ipynb_checkpoints
01/08/2019  05:16 PM            24,334 BankRate_LinearRegression_PredictiveModel.ipynb
01/08/2019  05:10 PM    <DIR>          Raw Data
               1 File(s)         24,334 bytes
               4 Dir(s)  111,412,314,112 bytes free


In [4]:
dfbr=pd.read_csv(path)

##  Reviewing Dataset from Capital One Bank

In [8]:
dfbr.columns

Index(['Campaign', 'Date', 'Site (DCM)', 'Placement', 'Impressions', 'Clicks',
       'Net Media Cost', 'Total Gross NABs', 'Total New Cust NABs (Gross)',
       'Total Fast App NABs (Gross)',
       ...
       'New Cust Deps (Post Amb, Post Cann)', 'Fast App Deps (Post Amb)',
       'Fast App Deposits (Post Amb, Post Cann)', 'Digital Channel', 'Weekday',
       'Month', 'Term', 'Site + Term', 'View Based NABs (gross)',
       'Click Based NABs (gross)'],
      dtype='object', length=103)

In [9]:
dfbr.dtypes

Campaign                                    object
Date                                        object
Site (DCM)                                  object
Placement                                   object
Impressions                                  int64
Clicks                                       int64
Net Media Cost                              object
Total Gross NABs                             int64
Total New Cust NABs (Gross)                  int64
Total Fast App NABs (Gross)                  int64
12M_New Customer_Desktop: Click              int64
12M_New Customer_Desktop: View               int64
12M_New Customer_Mobile: Click               int64
12M_New Customer_Mobile: View                int64
18M_New Customer_Desktop: Click              int64
18M_New Customer_Desktop: View               int64
18M_New Customer_Mobile: Click               int64
18M_New Customer_Mobile: View                int64
24M_New Customer_Desktop: Click              int64
24M_New Customer_Desktop: View 

In [10]:
obj_cols = dfbr.select_dtypes(['object']).columns

In [11]:
ndfbr = dfbr.drop(obj_cols, axis=1)

## Splitting the Dataset into 'Training Data' and 'Testing Data'

In [12]:
from sklearn.model_selection import train_test_split

# Extracting the target variable without encoding(we will have 2 for this dataset)

train_on= ndfbr
train_label= dfbr["New Cust Deps (Post Amb)"].astype(int) +dfbr["Fast App Deps (Post Amb)"].astype(int)
x_train, x_test, y_train, y_test = train_test_split(train_on, train_label, test_size=0.25, random_state=42)

## Defining Function to Evaluate Performance

In [13]:
from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore', category = RuntimeWarning)

def metrics(train_pred, valid_pred, y_train, y_valid):
    """Calculate metrics: Root mean squared error and mean absolute percentage error"""
    
    # Root mean squared error
    train_rmse = np.sqrt(mean_squared_error(y_train, train_pred))
    valid_rmse = np.sqrt(mean_squared_error(y_valid, valid_pred))
    
    # Calculate absolute percentage error
    train_ape = abs((y_train - train_pred) / y_train)
    valid_ape = abs((y_valid - valid_pred) / y_valid)
    
    # Account for y values of 0
    train_ape[train_ape == np.inf] = 0
    train_ape[train_ape == -np.inf] = 0
    valid_ape[valid_ape == np.inf] = 0
    valid_ape[valid_ape == -np.inf] = 0
    
    train_mape = 100 * np.mean(train_ape)
    valid_mape = 100 * np.mean(valid_ape)
    
    return train_rmse, valid_rmse, train_mape, valid_mape

def evaluate(model, features, X_train, X_valid, y_train, y_valid):
    """Mean absolute percentage error"""
    
    # Make predictions
    train_pred = model.predict(X_train[features])
    valid_pred = model.predict(X_valid[features])
    
    # Get metrics
    train_rmse, valid_rmse, train_mape, valid_mape = metrics(train_pred, valid_pred,
                                                             y_train, y_valid)
    print(f'Training:   rmse = {round(train_rmse, 2)} \t mape = {round(train_mape, 2)}')
    print(f'Validation: rmse = {round(valid_rmse, 2)} \t mape = {round(valid_mape, 2)}')

## Creating Target Variable for Linear Regression

In [14]:
lr = LinearRegression()

In [15]:
# Training a simple (only using three methods)

lr.fit(x_train, y_train)

print('Intercept', round(lr.intercept_, 4))
print('abs_lat_diff coef: ', round(lr.coef_[0], 4), 
      '\tabs_lon_diff coef:', round(lr.coef_[1], 4),
      '\tpassenger_count coef:', round(lr.coef_[2], 4))

Intercept -0.036
abs_lat_diff coef:  -0.0002 	abs_lon_diff coef: -0.0291 	passenger_count coef: 0.0257


In [16]:
lr.coef_

array([-2.17046684e-04, -2.91421240e-02,  2.57023674e-02,  5.94933959e-02,
       -3.37910283e-02, -1.06855362e-01, -1.50380819e-10,  3.00569095e-01,
       -2.35664821e-11, -1.96140956e-01, -1.74705399e-01, -3.02809918e-01,
       -2.86864383e-01, -7.85741646e-02, -2.34751670e-02, -2.71055888e-02,
        1.29582109e-01,  7.41919830e-02, -3.54031788e-02,  3.63853882e-01,
       -1.33236375e-02,  3.17917175e-02,  2.26503005e-02, -1.33004189e-01,
       -1.27675648e-15, -4.67115183e-02,  3.11337326e-02,  3.20710059e-01,
       -7.66747776e-16, -1.74865363e-02, -2.18717601e-01, -4.12726888e-02,
       -3.86847061e-02, -1.59750319e-01,  5.79745108e-01,  8.80965412e-02,
        3.18044970e-01,  1.59682938e-02, -8.30889767e-02,  2.84984738e-02,
       -1.21795116e-01, -1.74650262e-03, -3.80885601e-02, -3.35272321e-02,
       -1.06046233e-01,  3.26128013e-16,  3.60822483e-16, -1.30101521e-02,
       -4.95003948e-02, -1.56604437e-02, -6.79397812e-02, -5.20544497e-02,
       -8.92974873e-02,  

## Evaluating Predictiion Accuarcy of Linear Regression Model

In [17]:
evaluate(lr, x_train.columns , x_train, x_test, y_train, y_test)

Training:   rmse = 0.25 	 mape = 0.0
Validation: rmse = 0.25 	 mape = 0.0
