In [None]:
# !pip install pandas numpy scikit-learn matplotlib
# !pip install kfserving --upgrade -q 
# !pip install keras==2.2.5 -q
# !pip install scikit-image 

In [None]:
import numpy as np
import pandas as pd
import sklearn as sk
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn import tree

In [None]:
#Uncomment to fetch and unzip dataset
#Citation: [Moro et al., 2014] S. Moro, P. Cortez and P. Rita. A Data-Driven Approach to Predict the Success of Bank Telemarketing. Decision Support Systems, Elsevier, 62:22-31, June 2014
#https://archive.ics.uci.edu/ml/datasets/Bank+Marketing

# !curl -O https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank.zip
# !ls 
# !unzip bank.zip
# !mkdir -p data/bank; mv bank-full.csv data/bank
# !mv bank-names.txt data/bank
# !rm bank.zip bank.csv
# !ls

In [None]:
#loading the full data set
#
source_bank_data = pd.read_csv('data/bank/bank-full.csv', delimiter=";")
source_bank_data ['label'] = source_bank_data ['y'].map(dict(yes=1, no=0))

In [None]:
#creating label series, creating dummies for categorical variables.
#
label = source_bank_data['label']
source_bank_data = pd.get_dummies(source_bank_data, columns=['housing','loan','education','job','marital','default','contact','poutcome',])

In [None]:
#cleaning input dataset by converting months to numerical values
#
calend = {'may':5 , 'jul':7, 'aug':8, 'jun':6, 'nov':11, 'apr':4, 'feb':2, 'jan':1, 'oct':10, 'sep':9, 'mar':3, 'dec':4}
source_bank_data['month'] = source_bank_data['month'].map(calend)

In [None]:
#creating dataframe to use in training from source data
#can copy data without concern because a small sample, but be careful with your own data volumes
source_bank_data.drop(['y','label'], axis=1, inplace=True) 
bank_df = source_bank_data.copy()

In [None]:
#using scikit learn api to create proportioned test and train split
#
X_train, X_test, y_train, y_test = sk.model_selection.train_test_split(bank_df, label, test_size=0.2, random_state=42)

In [None]:
#using scikit learn api to train logistic regression model
#default parameters solver = "lbfgs"
logistic_regression = LogisticRegression(penalty = 'l2', max_iter = 15000)
log_reg_trained = logistic_regression.fit(X_train, y_train)

In [None]:
#using fit model to make predictions to evaluate model's metrics
#
log_test_pred = logistic_regression.predict(X_test)

print('Logistic Regression Test Accuracy:', sk.metrics.accuracy_score(y_test, log_test_pred))
print('Logistic Regression ROC Score:', sk.metrics.roc_auc_score(y_test, log_test_pred))
print('Classification Report:\n', sk.metrics.classification_report(y_test,log_test_pred))


In [None]:
# ## Uncomment to view feature importance plots
# ##
# importance = logistic_regression.coef_
# # summarize feature importance
# for i,v in enumerate(importance[0]):
#     print('Feature: %0d, Score: %.5f' % (i,v))
# # plot feature importance
# plt.bar([x for x in range(len(importance[0]))], importance[0])
# plt.show()

In [None]:
#using scikit learn api to train decision tree classifier
#
decision_tree = tree.DecisionTreeClassifier() 
tree_model = decision_tree.fit(X_train, y_train)

#using fit model to make predictions to evaluate model's metrics
tree_test_predictions = decision_tree.predict(X_test)

print('Decision Tree Test Accuracy:', sk.metrics.accuracy_score(y_test, tree_test_predictions))
print('Decision Tree ROC Score:', sk.metrics.roc_auc_score(y_test, tree_test_predictions))
print('Classification Report:\n', sk.metrics.classification_report(y_test,tree_test_predictions))

In [None]:
# # Uncomment to view feature importance plots
# #
# importance = tree_model.feature_importances_
# # summarize feature importance
# for i,v in enumerate(importance):
#     print('Feature: %0d, Score: %.5f' % (i,v))
# # plot feature importance
# plt.bar([x for x in range(len(importance))], importance)
# plt.show()

In [None]:
from joblib import dump
dump (tree_model,'tree-model.joblib')

In [None]:
#Documentation for SKLearn Example using KFServing: 
#https://github.com/kubeflow/kfserving/tree/master/docs/samples/sklearn

#This notebook is from a workshop to familiarize people with Kubeflow
#and machine learning best practices. The last thing to do is deploy the model
#as a part of an inferencing service using KFServing. 

In [None]:
from kubernetes import client

from kfserving import KFServingClient
from kfserving import constants
from kfserving import utils
from kfserving import V1alpha2EndpointSpec
from kfserving import V1alpha2PredictorSpec
from kfserving import V1alpha2SKLearnSpec
from kfserving import V1alpha2InferenceServiceSpec
from kfserving import V1alpha2InferenceService
from kubernetes.client import V1ResourceRequirements


In [None]:
#Fill Out to Create Inference Service