### **Importing Libraries** <a id="head1"></a>

In [2]:
import numpy as np
import pandas as pd
                                                                                   
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

import pickle

import warnings
warnings.filterwarnings('ignore')

### **Loading Data** <a id="head2"></a>

In [3]:
telco_customer = pd.read_csv("ChurnTrainDataset.csv")

In [4]:
telco_customer

Unnamed: 0,state,account_length,area_code,international_plan,voice_mail_plan,number_vmail_messages,total_day_minutes,total_day_calls,total_day_charge,total_eve_minutes,total_eve_calls,total_eve_charge,total_night_minutes,total_night_calls,total_night_charge,total_intl_minutes,total_intl_calls,total_intl_charge,number_customer_service_calls,churn
0,OH,107.0,area_code_415,no,yes,26.0,161.6,123.0,27.47,195.5,103.0,16.62,254.4,103.0,11.45,13.7,3.0,3.70,1.0,no
1,NJ,137.0,area_code_415,no,no,0.0,243.4,114.0,41.38,121.2,110.0,10.30,162.6,104.0,7.32,12.2,5.0,3.29,0.0,no
2,OH,84.0,area_code_408,yes,no,0.0,299.4,71.0,50.90,61.9,88.0,5.26,196.9,89.0,8.86,6.6,7.0,1.78,2.0,no
3,OK,75.0,area_code_415,yes,no,0.0,166.7,113.0,28.34,148.3,122.0,12.61,186.9,121.0,8.41,10.1,3.0,2.73,3.0,no
4,MA,121.0,area_code_510,no,yes,24.0,218.2,88.0,37.09,348.5,108.0,29.62,212.6,118.0,9.57,7.5,7.0,2.03,3.0,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4245,MT,83.0,area_code_415,no,no,0.0,188.3,70.0,32.01,243.8,88.0,20.72,213.7,79.0,9.62,10.3,6.0,2.78,0.0,no
4246,WV,73.0,area_code_408,no,no,0.0,177.9,89.0,30.24,131.2,82.0,11.15,186.2,89.0,8.38,11.5,6.0,3.11,3.0,no
4247,NC,75.0,area_code_408,no,no,0.0,170.7,101.0,29.02,193.1,126.0,16.41,129.1,104.0,5.81,6.9,7.0,1.86,1.0,no
4248,HI,50.0,area_code_408,no,yes,40.0,235.7,127.0,40.07,223.0,126.0,18.96,297.5,116.0,13.39,9.9,5.0,2.67,2.0,no


In [5]:
telco_customer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4250 entries, 0 to 4249
Data columns (total 20 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   state                          4232 non-null   object 
 1   account_length                 4216 non-null   float64
 2   area_code                      4234 non-null   object 
 3   international_plan             4250 non-null   object 
 4   voice_mail_plan                4237 non-null   object 
 5   number_vmail_messages          4216 non-null   float64
 6   total_day_minutes              4240 non-null   float64
 7   total_day_calls                4248 non-null   float64
 8   total_day_charge               4242 non-null   float64
 9   total_eve_minutes              4215 non-null   float64
 10  total_eve_calls                4233 non-null   float64
 11  total_eve_charge               4242 non-null   float64
 12  total_night_minutes            4248 non-null   f

### **Preprocessing** <a id="head5"></a>

In [6]:
# Encoding categorical data using cat codes
for col in telco_customer.columns[telco_customer.dtypes == 'object']:
    if col!='churn':
        telco_customer[col]=telco_customer[col].astype('category').cat.codes

# Fill Null Values of target column
telco_customer['churn'] =  telco_customer['churn'].fillna(telco_customer['churn'].mode()[0])

# Manual encoding the target variable
a={'yes':1,'no':0}
telco_customer['churn']=telco_customer['churn'].map(a)

### **Seperate Features & Target Variable** <a id="head6"></a>

In [7]:
X = telco_customer.drop('churn',axis=1)
y=telco_customer['churn']

In [8]:
cat_cols = X.select_dtypes(include=['int8']).columns.values

In [9]:
num_cols = X.select_dtypes(include=['float64']).columns.values

### **Train Test Split**  <a id="head7"></a>

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=17)

## Feature Engineering pipeline

In [11]:
# Imputation Transformer to fill null values
fill_null_col = ColumnTransformer([
        ('FillCat',SimpleImputer(strategy='most_frequent'),cat_cols),
        ('FillNumeric',SimpleImputer(strategy='median'),num_cols),
    ],remainder='passthrough')

# Scaling
scale_col = ColumnTransformer([
    ('scale', StandardScaler(),slice(5,18))
])

# Principal Component Analysis
pca_col = ColumnTransformer([
    ('PCA', PCA(n_components=10),slice(0,18))
])

# # Model
logistic=LogisticRegression()
decision=DecisionTreeClassifier()
random = RandomForestClassifier()

### **Model Pipeline** <a id="head9"></a>

In [12]:
pipeline_model1 = Pipeline([('fill_null_col', fill_null_col),
                 ('scale_col', scale_col),
                 ('pca_col', pca_col),
                 ('classifier1', logistic)])

pipeline_model2 = Pipeline([('fill_null_col', fill_null_col),
                 ('scale_col', scale_col),
                 ('pca_col', pca_col),
                 ('classifier2', decision)])

pipeline_model3 = Pipeline([('fill_null_col', fill_null_col),
                 ('scale_col', scale_col),
                 ('pca_col', pca_col),
                 ('classifier3', random)])

In [13]:
pipelines=[pipeline_model1,pipeline_model2,pipeline_model3]

In [14]:
best_accuracy=0.0
best_classifier=0
best_pipeline=""

In [15]:
pipe_dict={0:'Logistic Regression',1:'Decision Tree',2:'Random Forest'}

# Fitting pipelines
for pipe in pipelines:
    pipe.fit(X_train,y_train)

In [16]:
for i,model in enumerate(pipelines):
    print("{} Test Accuracy : {}".format(pipe_dict[i],model.score(X_test,y_test)))

Logistic Regression Test Accuracy : 0.8552941176470589
Decision Tree Test Accuracy : 0.8141176470588235
Random Forest Test Accuracy : 0.8811764705882353


In [17]:
for i,model in enumerate(pipelines):
    pred=model.predict(X_test)
    print("\n{} Classification Report :\n {}".format(pipe_dict[i],classification_report(y_test,pred)))


Logistic Regression Classification Report :
               precision    recall  f1-score   support

           0       0.85      1.00      0.92       724
           1       1.00      0.02      0.05       126

    accuracy                           0.86       850
   macro avg       0.93      0.51      0.48       850
weighted avg       0.88      0.86      0.79       850


Decision Tree Classification Report :
               precision    recall  f1-score   support

           0       0.90      0.88      0.89       724
           1       0.39      0.44      0.41       126

    accuracy                           0.81       850
   macro avg       0.64      0.66      0.65       850
weighted avg       0.82      0.81      0.82       850


Random Forest Classification Report :
               precision    recall  f1-score   support

           0       0.88      0.99      0.93       724
           1       0.86      0.24      0.37       126

    accuracy                           0.88       850
  

In [18]:
for i,model in enumerate(pipelines):
    if model.score(X_test,y_test)>best_accuracy:
        best_accuracy=model.score(X_test,y_test)
        best_pipeline=model
        best_classifier=i
print('Classifier with the best accuracy:{}'.format(pipe_dict[best_classifier]))

Classifier with the best accuracy:Random Forest


### **Best Hyperparameters for Random Forest Classifier Using GridSearchCV using a pipeline** 
<a id="head10"></a>

In [19]:
# params = [{"classifier3__n_estimators": range(100, 501, 100),
#            "classifier3__max_depth":range(10,30,10),
#            "classifier3__min_samples_leaf":[1, 2, 4],
#            "classifier3__min_samples_split": [2, 5, 10],
#           }]
                 
# randomized_cv = GridSearchCV(estimator = pipeline_model3,
#                            param_grid = params,
#                            cv = 10 ,verbose = 1, n_jobs=-1)

# randomized_cv.fit(X_train,y_train)
# randomized_cv.best_params_

In [20]:
randomized_cv = Pipeline([('fill_null_col', fill_null_col),
                 ('scale_col', scale_col),
                 ('pca_col', pca_col),
                 ('classifier3', RandomForestClassifier(max_depth=20,min_samples_leaf=1,min_samples_split=5,n_estimators=400))])

randomized_cv.fit(X_train,y_train)
filename = 'modelv1.pkl'
pickle.dump(randomized_cv, open(filename, 'wb'))

### Load the model from disk

In [21]:
filename = 'modelv1.pkl'
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, y_test)
print(result)

0.8776470588235294


In [22]:
# {0:
#     {
#     "predict":1,
#     "predict_prob":0.92,
#     "threshold":
#     },
#  1:
#  {
#    "predict":0,
#    "predict_prob":0.92,
#    "threshold":  
#  }
# }

In [23]:
#  {
#    "predict":0,
#    "predict_prob":0.92,
#    "threshold":  
#  }

### **Model Prediction**<a id="head11"></a>

In [24]:
user_input = telco_customer.sample(10).drop('churn',axis=1)
user_input

Unnamed: 0,state,account_length,area_code,international_plan,voice_mail_plan,number_vmail_messages,total_day_minutes,total_day_calls,total_day_charge,total_eve_minutes,total_eve_calls,total_eve_charge,total_night_minutes,total_night_calls,total_night_charge,total_intl_minutes,total_intl_calls,total_intl_charge,number_customer_service_calls
3255,49,136.0,2,0,0,0.0,240.1,121.0,40.82,271.2,123.0,23.05,132.7,98.0,5.97,17.2,4.0,4.64,0.0
2128,15,168.0,1,0,0,0.0,183.2,131.0,31.14,179.2,73.0,15.23,292.8,100.0,13.18,9.9,5.0,2.67,2.0
1360,1,95.0,1,0,0,0.0,194.6,114.0,33.08,232.8,106.0,19.79,173.4,92.0,7.8,3.8,2.0,1.03,3.0
1341,0,76.0,1,0,0,0.0,143.7,55.0,24.43,173.1,108.0,14.71,239.1,95.0,10.76,5.8,6.0,1.57,1.0
789,33,81.0,1,0,1,28.0,167.9,147.0,28.54,190.7,105.0,16.21,193.0,103.0,8.69,9.2,6.0,2.48,4.0
3055,29,100.0,1,0,0,0.0,220.3,106.0,37.45,126.1,124.0,10.72,242.0,67.0,10.89,10.2,1.0,2.75,3.0
1885,22,116.0,0,0,0,0.0,133.3,94.0,22.66,247.8,126.0,21.06,219.0,78.0,9.86,11.3,5.0,3.05,5.0
3023,10,90.0,1,0,0,0.0,68.8,124.0,11.7,168.1,106.0,14.29,281.7,96.0,12.68,9.8,5.0,2.65,1.0
1219,47,136.0,0,0,0,0.0,152.6,97.0,25.94,208.9,85.0,17.76,119.1,99.0,5.36,5.0,10.0,1.35,1.0
1087,37,129.0,0,0,0,0.0,98.0,99.0,16.66,240.7,62.0,20.46,254.8,123.0,11.47,10.5,4.0,2.84,0.0


In [25]:
new =user_input.to_dict('records')
new

[{'state': 49,
  'account_length': 136.0,
  'area_code': 2,
  'international_plan': 0,
  'voice_mail_plan': 0,
  'number_vmail_messages': 0.0,
  'total_day_minutes': 240.1,
  'total_day_calls': 121.0,
  'total_day_charge': 40.82,
  'total_eve_minutes': 271.2,
  'total_eve_calls': 123.0,
  'total_eve_charge': 23.05,
  'total_night_minutes': 132.7,
  'total_night_calls': 98.0,
  'total_night_charge': 5.97,
  'total_intl_minutes': 17.2,
  'total_intl_calls': 4.0,
  'total_intl_charge': 4.64,
  'number_customer_service_calls': 0.0},
 {'state': 15,
  'account_length': 168.0,
  'area_code': 1,
  'international_plan': 0,
  'voice_mail_plan': 0,
  'number_vmail_messages': 0.0,
  'total_day_minutes': 183.2,
  'total_day_calls': 131.0,
  'total_day_charge': 31.14,
  'total_eve_minutes': 179.2,
  'total_eve_calls': 73.0,
  'total_eve_charge': 15.23,
  'total_night_minutes': 292.8,
  'total_night_calls': 100.0,
  'total_night_charge': 13.18,
  'total_intl_minutes': 9.9,
  'total_intl_calls': 5.0,


In [26]:
new2=[{'state': 'KS',
  'account_length': 18.0,
  'area_code': 'area_code_510',
  'international_plan': 'no',
  'voice_mail_plan': 'no',
  'number_vmail_messages': 0.0,
  'total_day_minutes': 180.2,
  'total_day_calls': 115.0,
  'total_day_charge': 30.63,
  'total_eve_minutes': 83.4,
  'total_eve_calls': 114.0,
  'total_eve_charge': 7.09,
  'total_night_minutes': 245.3,
  'total_night_calls': 111.0,
  'total_night_charge': 11.04,
  'total_intl_minutes': 11.9,
  'total_intl_calls': 6.0,
  'total_intl_charge': 3.21,
  'number_customer_service_calls': 1.0},
 {'state': 'LA',
  'account_length': 143.0,
  'area_code': 'area_code_415',
  'international_plan': 'no',
  'voice_mail_plan': 'no',
  'number_vmail_messages': 0.0,
  'total_day_minutes': 178.2,
  'total_day_calls': 109.0,
  'total_day_charge': 30.29,
  'total_eve_minutes': 254.6,
  'total_eve_calls': 93.0,
  'total_eve_charge': 21.64,
  'total_night_minutes': 241.3,
  'total_night_calls': 139.0,
  'total_night_charge': 10.86,
  'total_intl_minutes': 8.5,
  'total_intl_calls': 7.0,
  'total_intl_charge': 2.3,
  'number_customer_service_calls': 0.0},
 {'state': 'TX',
  'account_length': 104.0,
  'area_code': 'area_code_510',
  'international_plan': 'no',
  'voice_mail_plan': 'no',
  'number_vmail_messages': 0.0,
  'total_day_minutes': 111.9,
  'total_day_calls': 105.0,
  'total_day_charge': 19.02,
  'total_eve_minutes': 197.7,
  'total_eve_calls': 121.0,
  'total_eve_charge': 16.8,
  'total_night_minutes': 302.8,
  'total_night_calls': 108.0,
  'total_night_charge': 13.63,
  'total_intl_minutes': 10.1,
  'total_intl_calls': 5.0,
  'total_intl_charge': 2.73,
  'number_customer_service_calls': 1.0},
 {'state': 'RI',
  'account_length': 107.0,
  'area_code': 'area_code_415',
  'international_plan': 'no',
  'voice_mail_plan': 'yes',
  'number_vmail_messages': 37.0,
  'total_day_minutes': 60.0,
  'total_day_calls': 102.0,
  'total_day_charge': 10.2,
  'total_eve_minutes': 102.2,
  'total_eve_calls': 80.0,
  'total_eve_charge': 8.69,
  'total_night_minutes': 261.8,
  'total_night_calls': 106.0,
  'total_night_charge': 11.78,
  'total_intl_minutes': 11.1,
  'total_intl_calls': 3.0,
  'total_intl_charge': 3.0,
  'number_customer_service_calls': 0.0},
 {'state': 'WV',
  'account_length': 101.0,
  'area_code': 'area_code_415',
  'international_plan': 'no',
  'voice_mail_plan': 'yes',
  'number_vmail_messages': 28.0,
  'total_day_minutes': 220.3,
  'total_day_calls': 96.0,
  'total_day_charge': 37.45,
  'total_eve_minutes': 285.8,
  'total_eve_calls': 72.0,
  'total_eve_charge': 24.29,
  'total_night_minutes': 203.0,
  'total_night_calls': 111.0,
  'total_night_charge': 9.14,
  'total_intl_minutes': 9.4,
  'total_intl_calls': 6.0,
  'total_intl_charge': 2.54,
  'number_customer_service_calls': 4.0},
 {'state': 'IL',
  'account_length': 76.0,
  'area_code': 'area_code_510',
  'international_plan': 'no',
  'voice_mail_plan': 'no',
  'number_vmail_messages': 0.0,
  'total_day_minutes': 186.1,
  'total_day_calls': 96.0,
  'total_day_charge': 31.64,
  'total_eve_minutes': 211.6,
  'total_eve_calls': 100.0,
  'total_eve_charge': 17.99,
  'total_night_minutes': 230.6,
  'total_night_calls': 100.0,
  'total_night_charge': 10.38,
  'total_intl_minutes': 8.0,
  'total_intl_calls': 4.0,
  'total_intl_charge': 2.16,
  'number_customer_service_calls': 0.0},
 {'state': 'SC',
  'account_length': 19.0,
  'area_code': 'area_code_510',
  'international_plan': 'no',
  'voice_mail_plan': 'no',
  'number_vmail_messages': 0.0,
  'total_day_minutes': 259.4,
  'total_day_calls': 116.0,
  'total_day_charge': 44.1,
  'total_eve_minutes': 269.7,
  'total_eve_calls': 109.0,
  'total_eve_charge': 22.92,
  'total_night_minutes': 175.3,
  'total_night_calls': 130.0,
  'total_night_charge': 7.89,
  'total_intl_minutes': 9.5,
  'total_intl_calls': 3.0,
  'total_intl_charge': 2.57,
  'number_customer_service_calls': 1.0},
 {'state': 'WY',
  'account_length': 39.0,
  'area_code': 'area_code_510',
  'international_plan': 'no',
  'voice_mail_plan': 'yes',
  'number_vmail_messages': 38.0,
  'total_day_minutes': 201.8,
  'total_day_calls': 66.0,
  'total_day_charge': 34.31,
  'total_eve_minutes': 200.1,
  'total_eve_calls': 87.0,
  'total_eve_charge': 17.01,
  'total_night_minutes': 173.7,
  'total_night_calls': 112.0,
  'total_night_charge': 7.82,
  'total_intl_minutes': 9.5,
  'total_intl_calls': 3.0,
  'total_intl_charge': 2.57,
  'number_customer_service_calls': 0.0},
 {'state': 'OR',
  'account_length': 59.0,
  'area_code': 'area_code_408',
  'international_plan': 'no',
  'voice_mail_plan': 'yes',
  'number_vmail_messages': 28.0,
  'total_day_minutes': 120.9,
  'total_day_calls': 97.0,
  'total_day_charge': 20.55,
  'total_eve_minutes': 213.0,
  'total_eve_calls': 92.0,
  'total_eve_charge': 18.11,
  'total_night_minutes': 163.1,
  'total_night_calls': 116.0,
  'total_night_charge': 7.34,
  'total_intl_minutes': 8.5,
  'total_intl_calls': 5.0,
  'total_intl_charge': 2.3,
  'number_customer_service_calls': 2.0},
 {'state': 'MI',
  'account_length': 52.0,
  'area_code': 'area_code_415',
  'international_plan': 'no',
  'voice_mail_plan': 'no',
  'number_vmail_messages': 0.0,
  'total_day_minutes': 204.4,
  'total_day_calls': 97.0,
  'total_day_charge': 34.75,
  'total_eve_minutes': 273.2,
  'total_eve_calls': 128.0,
  'total_eve_charge': 23.22,
  'total_night_minutes': 179.6,
  'total_night_calls': 118.0,
  'total_night_charge': 8.08,
  'total_intl_minutes': 11.0,
  'total_intl_calls': 5.0,
  'total_intl_charge': 2.97,
  'number_customer_service_calls': 1.0}]

In [27]:
final = pd.DataFrame.from_dict(new2)
final

Unnamed: 0,state,account_length,area_code,international_plan,voice_mail_plan,number_vmail_messages,total_day_minutes,total_day_calls,total_day_charge,total_eve_minutes,total_eve_calls,total_eve_charge,total_night_minutes,total_night_calls,total_night_charge,total_intl_minutes,total_intl_calls,total_intl_charge,number_customer_service_calls
0,KS,18.0,area_code_510,no,no,0.0,180.2,115.0,30.63,83.4,114.0,7.09,245.3,111.0,11.04,11.9,6.0,3.21,1.0
1,LA,143.0,area_code_415,no,no,0.0,178.2,109.0,30.29,254.6,93.0,21.64,241.3,139.0,10.86,8.5,7.0,2.3,0.0
2,TX,104.0,area_code_510,no,no,0.0,111.9,105.0,19.02,197.7,121.0,16.8,302.8,108.0,13.63,10.1,5.0,2.73,1.0
3,RI,107.0,area_code_415,no,yes,37.0,60.0,102.0,10.2,102.2,80.0,8.69,261.8,106.0,11.78,11.1,3.0,3.0,0.0
4,WV,101.0,area_code_415,no,yes,28.0,220.3,96.0,37.45,285.8,72.0,24.29,203.0,111.0,9.14,9.4,6.0,2.54,4.0
5,IL,76.0,area_code_510,no,no,0.0,186.1,96.0,31.64,211.6,100.0,17.99,230.6,100.0,10.38,8.0,4.0,2.16,0.0
6,SC,19.0,area_code_510,no,no,0.0,259.4,116.0,44.1,269.7,109.0,22.92,175.3,130.0,7.89,9.5,3.0,2.57,1.0
7,WY,39.0,area_code_510,no,yes,38.0,201.8,66.0,34.31,200.1,87.0,17.01,173.7,112.0,7.82,9.5,3.0,2.57,0.0
8,OR,59.0,area_code_408,no,yes,28.0,120.9,97.0,20.55,213.0,92.0,18.11,163.1,116.0,7.34,8.5,5.0,2.3,2.0
9,MI,52.0,area_code_415,no,no,0.0,204.4,97.0,34.75,273.2,128.0,23.22,179.6,118.0,8.08,11.0,5.0,2.97,1.0


In [28]:
for col in final.columns[final.dtypes == 'object']:
        final[col]=final[col].astype('category').cat.codes

In [29]:
predict_prob = model.predict_proba(final).tolist()
predict_prob

[[0.92, 0.08],
 [0.92, 0.08],
 [0.97, 0.03],
 [0.97, 0.03],
 [0.89, 0.11],
 [0.96, 0.04],
 [0.11, 0.89],
 [1.0, 0.0],
 [1.0, 0.0],
 [0.97, 0.03]]

In [30]:
predict_prob[0][1]

0.08

In [31]:
prediction = model.predict(final).tolist()
prediction

[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]

In [32]:
prediction[0]

0

In [33]:
threshold_val=0.5

In [34]:
arr=[]
for i in range(0,10):
    temp={
        "predict":prediction[i],
        "predict_prob":round(predict_prob[i][1],2),
        "threshold":threshold_val
           }
    arr.append(temp)

In [35]:
arr

[{'predict': 0, 'predict_prob': 0.08, 'threshold': 0.5},
 {'predict': 0, 'predict_prob': 0.08, 'threshold': 0.5},
 {'predict': 0, 'predict_prob': 0.03, 'threshold': 0.5},
 {'predict': 0, 'predict_prob': 0.03, 'threshold': 0.5},
 {'predict': 0, 'predict_prob': 0.11, 'threshold': 0.5},
 {'predict': 0, 'predict_prob': 0.04, 'threshold': 0.5},
 {'predict': 1, 'predict_prob': 0.89, 'threshold': 0.5},
 {'predict': 0, 'predict_prob': 0.0, 'threshold': 0.5},
 {'predict': 0, 'predict_prob': 0.0, 'threshold': 0.5},
 {'predict': 0, 'predict_prob': 0.03, 'threshold': 0.5}]

In [36]:
arr= np.array(arr)
arr

array([{'predict': 0, 'predict_prob': 0.08, 'threshold': 0.5},
       {'predict': 0, 'predict_prob': 0.08, 'threshold': 0.5},
       {'predict': 0, 'predict_prob': 0.03, 'threshold': 0.5},
       {'predict': 0, 'predict_prob': 0.03, 'threshold': 0.5},
       {'predict': 0, 'predict_prob': 0.11, 'threshold': 0.5},
       {'predict': 0, 'predict_prob': 0.04, 'threshold': 0.5},
       {'predict': 1, 'predict_prob': 0.89, 'threshold': 0.5},
       {'predict': 0, 'predict_prob': 0.0, 'threshold': 0.5},
       {'predict': 0, 'predict_prob': 0.0, 'threshold': 0.5},
       {'predict': 0, 'predict_prob': 0.03, 'threshold': 0.5}],
      dtype=object)

In [37]:
list(enumerate(arr, 0))

[(0, {'predict': 0, 'predict_prob': 0.08, 'threshold': 0.5}),
 (1, {'predict': 0, 'predict_prob': 0.08, 'threshold': 0.5}),
 (2, {'predict': 0, 'predict_prob': 0.03, 'threshold': 0.5}),
 (3, {'predict': 0, 'predict_prob': 0.03, 'threshold': 0.5}),
 (4, {'predict': 0, 'predict_prob': 0.11, 'threshold': 0.5}),
 (5, {'predict': 0, 'predict_prob': 0.04, 'threshold': 0.5}),
 (6, {'predict': 1, 'predict_prob': 0.89, 'threshold': 0.5}),
 (7, {'predict': 0, 'predict_prob': 0.0, 'threshold': 0.5}),
 (8, {'predict': 0, 'predict_prob': 0.0, 'threshold': 0.5}),
 (9, {'predict': 0, 'predict_prob': 0.03, 'threshold': 0.5})]

In [52]:
result = dict(enumerate(arr, 0))
result


{0: {'predict': 0, 'predict_prob': 0.08, 'threshold': 0.5},
 1: {'predict': 0, 'predict_prob': 0.08, 'threshold': 0.5},
 2: {'predict': 0, 'predict_prob': 0.03, 'threshold': 0.5},
 3: {'predict': 0, 'predict_prob': 0.03, 'threshold': 0.5},
 4: {'predict': 0, 'predict_prob': 0.11, 'threshold': 0.5},
 5: {'predict': 0, 'predict_prob': 0.04, 'threshold': 0.5},
 6: {'predict': 1, 'predict_prob': 0.89, 'threshold': 0.5},
 7: {'predict': 0, 'predict_prob': 0.0, 'threshold': 0.5},
 8: {'predict': 0, 'predict_prob': 0.0, 'threshold': 0.5},
 9: {'predict': 0, 'predict_prob': 0.03, 'threshold': 0.5}}

In [39]:
user_input=[['MO', 85.0, 'area_code_510', 'no', 'no', 0.0, 165.8, 96.0, 28.19,
        190.0, 141.0, 16.15, 144.0, 116.0, 6.48, 10.9, 3.0, 2.94, 5.0]]

In [40]:
# user_input = np.array(telco_customer.sample(1).drop('churn',axis=1))
# user_input

In [41]:
col_names = ['state', 'account_length', 'area_code', 'international_plan',
       'voice_mail_plan', 'number_vmail_messages', 'total_day_minutes',
       'total_day_calls', 'total_day_charge', 'total_eve_minutes',
       'total_eve_calls', 'total_eve_charge', 'total_night_minutes',
       'total_night_calls', 'total_night_charge', 'total_intl_minutes',
       'total_intl_calls', 'total_intl_charge',
       'number_customer_service_calls']

In [42]:
user_df_input = pd.DataFrame(data=user_input,columns=col_names)
user_df_input

Unnamed: 0,state,account_length,area_code,international_plan,voice_mail_plan,number_vmail_messages,total_day_minutes,total_day_calls,total_day_charge,total_eve_minutes,total_eve_calls,total_eve_charge,total_night_minutes,total_night_calls,total_night_charge,total_intl_minutes,total_intl_calls,total_intl_charge,number_customer_service_calls
0,MO,85.0,area_code_510,no,no,0.0,165.8,96.0,28.19,190.0,141.0,16.15,144.0,116.0,6.48,10.9,3.0,2.94,5.0


In [43]:
for col in user_df_input.columns[user_df_input.dtypes == 'object']:
        user_df_input[col]=user_df_input[col].astype('category').cat.codes
        print(col)

state
area_code
international_plan
voice_mail_plan


In [44]:
user_df_input

Unnamed: 0,state,account_length,area_code,international_plan,voice_mail_plan,number_vmail_messages,total_day_minutes,total_day_calls,total_day_charge,total_eve_minutes,total_eve_calls,total_eve_charge,total_night_minutes,total_night_calls,total_night_charge,total_intl_minutes,total_intl_calls,total_intl_charge,number_customer_service_calls
0,0,85.0,0,0,0,0.0,165.8,96.0,28.19,190.0,141.0,16.15,144.0,116.0,6.48,10.9,3.0,2.94,5.0


In [45]:
# prediction = loaded_model.predict_proba(user_df_input)
# prediction

In [46]:
predict_prob = model.predict_proba(user_df_input).tolist()
prediction = model.predict(user_df_input).tolist()
threshold_val=0.5

In [47]:
predict_prob

[[0.35, 0.65]]

In [48]:
result={
        "predict":prediction[0],
        "predict_prob":round(predict_prob[0][1],2),
        "threshold":threshold_val
          }

In [49]:
result

{'predict': 1, 'predict_prob': 0.65, 'threshold': 0.5}

In [50]:
df=

SyntaxError: invalid syntax (Temp/ipykernel_10468/3220236241.py, line 1)

In [None]:
sample = telco_customer.sample(1)
sample

In [None]:
sample2=sample.drop('churn',axis=1)
sample2

In [None]:
prediction = loaded_model.predict(sample2)
prediction

In [None]:
prediction = loaded_model.predict_proba(sample2)
prediction

In [None]:
prediction = loaded_model.predict_proba(user_df_input)
prediction

### **Model Evaluation** <a id="head12"></a>

In [None]:
conf_matrix = confusion_matrix(y_test, prediction)
print("confusion matrix")
print(conf_matrix)
print(classification_report(y_test,prediction))

In [None]:
X_test