# Week-5
This notebook is created during week-5 of MLzoomcamp - `Deploying Machine Learning Models`.

# Import modules

In [1]:
import pandas as pd
import numpy as np 

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

# Load utilities

In [2]:
import sys 
sys.path.append('../src/')

import utilities as utils

# Churn Prediction model from previous week

In [3]:
df = pd.read_csv('../data/03_data_customer_churn.csv')

# Standardize column names 
df.columns = df.columns.str.lower().str.replace(' ','_')

# get list of categorical features
features_cat = list(df.dtypes[df.dtypes == 'object'].index)

# standardize categorical features data
for f in features_cat:
    df[f] = df[f].str.lower().str.replace(' ','_')
    
df.totalcharges = pd.to_numeric(df.totalcharges, errors='coerce')
df.totalcharges = df.totalcharges.fillna(0)


df.churn = (df.churn == 'yes').astype('int')

In [4]:
# Train-Test-Split (80-20)
df_train_full, df_test = utils.train_test_split(df, test_size=0.2, random_state=1)

In [5]:
# Numericals 
list_features_num = ['tenure', 'monthlycharges', 'totalcharges']

# Categoical 
list_features_cat = list(set(df_train_full.columns) - set(list_features_num) - set(['churn','customerid']))

# All Features 
list_features = list_features_num + list_features_cat
list_features

['tenure',
 'monthlycharges',
 'totalcharges',
 'onlinebackup',
 'onlinesecurity',
 'multiplelines',
 'streamingmovies',
 'internetservice',
 'techsupport',
 'paymentmethod',
 'paperlessbilling',
 'phoneservice',
 'partner',
 'deviceprotection',
 'seniorcitizen',
 'streamingtv',
 'dependents',
 'contract',
 'gender']

In [6]:
# params for Kfold CLf function
nsplits = 5
list_C = [1.0]

# Call Kfold CLF
scores = utils.kfold_biclf_logisticreg(df_train_full, 'churn', list_features, nsplits, list_C)

  0%|          | 0/1 [00:00<?, ?it/s]

C =1.0 score_mean=84.041, score_std=0.854


In [7]:
# Train final model 
y_train_full = df_train_full.churn.values
C=1.0
dv, model = utils.train_biclf_logisticreg(df_train_full, y_train_full, list_features, C=C)
y_pred = utils.predict_clf(df_test, list_features, dv, model)

# AUC final model
y_test = df_test.churn.values
auc = utils.get_roc_auc_score(y_test, y_pred)

auc

0.8572386167896259

***

# Save the model

In [8]:
# File name of the output model
C=1.0
filename_model = f'../models/model_C={C}.bin'
filename_model

'../models/model_C=1.0.bin'

In [9]:
# call save model func
utils.save_model_pickle(dv, model, filename_model, verbose=True)

 >>> ML model [Saved]: 
	 - File: ../models/model_C=1.0.bin


# Load the model

In [10]:
import pickle

In [11]:
dv, model = utils.load_model_pickle(filename_model, verbose=True)

 >>> ML model [Loaded]: 
	 - File: ../models/model_C=1.0.bin


In [12]:
dv, model

(DictVectorizer(sparse=False), LogisticRegression(max_iter=1000))

In [13]:
customer = {
    'customerid': '8879-zkjof',
    'gender': 'female',
    'seniorcitizen': 0,
    'partner': 'no',
    'dependents': 'no',
    'tenure': 1,
    'phoneservice': 'yes',
    'multiplelines': 'no',
    'internetservice': 'dsl',
    'onlinesecurity': 'yes',
    'onlinebackup': 'no',
    'deviceprotection': 'yes',
    'techsupport': 'yes',
    'streamingtv': 'yes',
    'streamingmovies': 'yes',
    'contract': 'month-to-month',
    'paperlessbilling': 'yes',
    'paymentmethod': 'bank_transfer_(automatic)',
    'monthlycharges': 79.85,
    'totalcharges': 3320.75
}

In [14]:
X = dv.transform([customer])

In [15]:
model.predict_proba(X)[0,1]

0.5900396422629436

# Notebook into python script

- train script 
- predict script

***

# Web Service - ping pong

In [37]:
import webservice_ping as ping

ping.ping()

'PONG!'