In [3]:
import pandas as pd
import matplotlib.pyplot as pltxx


In [4]:
Data=pd.read_csv("Data_Consumer_ChurnPrediction.csv")
Data

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


# Data Cleaning

In [5]:
Data.drop('customerID',axis=1,inplace = True) #customerId not much useful for prediction

In [6]:
Data['TotalCharges'] = pd.to_numeric(Data['TotalCharges'],errors='coerce') #Convert the string values to numerica and put NAN in place of the missing value
Data['TotalCharges'] = Data['TotalCharges'].fillna(Data['TotalCharges'].median()) #Replace the missing data with median value

boolean_mapping = {'Yes':1, 'No':0}
Data['Churn'] = Data['Churn'].map(boolean_mapping)
Data['Partner'] = Data['Partner'].map(boolean_mapping)
Data['Dependents'] = Data['Dependents'].map(boolean_mapping)
Data['PhoneService'] = Data['PhoneService'].map(boolean_mapping)
Data['PaperlessBilling'] = Data['PaperlessBilling'].map(boolean_mapping)
Data['gender'] = Data['gender'].map({'Male':1, 'Female':0})


CatVar =  ['MultipleLines','InternetService','OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport','StreamingTV','StreamingMovies','Contract','PaymentMethod']
Data = pd.get_dummies(Data, columns = CatVar, drop_first=False)
Data

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,...,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,0,1,0,1,0,1,29.85,29.85,0,...,1,0,0,1,0,0,0,0,1,0
1,1,0,0,0,34,1,0,56.95,1889.50,0,...,1,0,0,0,1,0,0,0,0,1
2,1,0,0,0,2,1,1,53.85,108.15,1,...,1,0,0,1,0,0,0,0,0,1
3,1,0,0,0,45,0,0,42.30,1840.75,0,...,1,0,0,0,1,0,1,0,0,0
4,0,0,0,0,2,1,1,70.70,151.65,1,...,1,0,0,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,1,0,1,1,24,1,1,84.80,1990.50,0,...,0,0,1,0,1,0,0,0,0,1
7039,0,0,1,1,72,1,1,103.20,7362.90,0,...,0,0,1,0,1,0,0,1,0,0
7040,0,0,1,1,11,0,1,29.60,346.45,0,...,1,0,0,1,0,0,0,0,1,0
7041,1,1,1,0,4,1,1,74.40,306.60,1,...,1,0,0,1,0,0,0,0,0,1


In [7]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()
Data['tenure'] = sc.fit_transform(Data[['tenure']])
Data['MonthlyCharges'] = sc.fit_transform(Data[['MonthlyCharges']])
Data['TotalCharges'] = sc.fit_transform(Data[['TotalCharges']])

In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

X = Data.drop('Churn', axis=1)
y = Data['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=50)

def train(alg, name):
    model = alg
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(name)
    print("=================================")
    print("accuracy: ",accuracy_score(y_test, y_pred))
    print("precision: ",precision_score(y_test, y_pred))
    print("recall: ",recall_score(y_test, y_pred))
    print("f1_score: ",f1_score(y_test, y_pred, average='weighted'))
    print("\n")
    return model


train(LogisticRegression, 'Logistic Regression')
print("")
train(SVC, 'SVC Classification')
print("")
train(RandomForestClassifier, "Random Forest Classification")
print("")
train(DecisionTreeClassifier, "Decision Tree Classification")
print("")
train(GaussianNB, "Naive Bayes Classification")
print("")
train(MultinomialNB, "MultinomialNB Classification")
print("")
train(BernoulliNB, "BernoulliNB Classification")
print("")
train(KNeighborsClassifier, "KNeighbors Classification")

NameError: name 'modeling' is not defined