## Importing Necessary libraries

In [1]:
import eda as eda
import dataframe as df
import train as t
import pandas as pd
#import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFECV,SequentialFeatureSelector
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Fetching Data and EDA

In [2]:
churn_df_new = df.get_churn_df()
churn_df = churn_df_new.copy()
churn_df.TotalCharges = pd.to_numeric(churn_df['TotalCharges'],errors='coerce')
churn_df=eda.drop_null_values(churn_df)
feature_churn_df = df.get_feature_df(churn_df,['customerID','Churn'])
target_churn_df = df.get_target_df(churn_df,'Churn')

feature_churn_df['SeniorCitizen'] = feature_churn_df['SeniorCitizen'].map({0:'No',1:'Yes'})
feature_churn_df_encoded = pd.get_dummies(feature_churn_df)

## Logistic Regression

In [3]:
X_train,X_test,y_train,y_test = t.get_train_test_split_data(feature_churn_df_encoded,target_churn_df)
lg = LogisticRegression()
lg.fit(X_train,y_train)
y_pred = lg.predict(X_test)
print("Accuracy score is:   ", round(accuracy_score(y_test,y_pred)*100,2,),"%")

Accuracy score is:    79.35 %


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## RFE

In [8]:
X_train,X_test,y_train,y_test = t.get_train_test_split_data(feature_churn_df_encoded,target_churn_df)
model = LogisticRegression(penalty='l1',solver='liblinear')
rfe = RFECV(model,min_features_to_select=5)
rfe.fit(X_train,y_train)

print("Optimal features:    ",rfe.n_features_)
#print("Selected_features:   ",X_train.columns[rfe.support_])

Optimal features:     43


In [9]:
X_train_rfe = pd.DataFrame(X_train,columns = X_train.columns[rfe.support_])
X_test_rfe = pd.DataFrame(X_test,columns=X_test.columns[rfe.support_])
rfe2 = rfe.fit(X_train_rfe,y_train)
rfe2_pred = rfe2.predict(X_test_rfe)
print("Accuracy score is:   ", round(accuracy_score(y_test,rfe2_pred)*100,2,),"%")


Accuracy score is:    79.01 %


## Sequential Feature Selector 

In [None]:
X_train,X_test,y_train,y_test = train_test_split(feature_churn_df_encoded,target_churn_df)
model_Sfs = LogisticRegression()
sfs = SequentialFeatureSelector(model_Sfs,n_features_to_select=5)
sfs.fit(X_train,y_train)
X_train_sfs = pd.DataFrame(sfs.transform(X_train),columns = sfs.get_feature_names_out(),index=X_train.index)
X_test_sfs = pd.DataFrame(sfs.transform(X_test),columns = sfs.get_feature_names_out(),index=X_test.index)
model_Sfs.fit(X_train_sfs,y_train)
y_pred_sfs = model_Sfs.predict(X_test_sfs)


In [7]:
print("Accuracy score is:   ", round(accuracy_score(y_test,y_pred_sfs)*100,2,),"%")

Accuracy score is:    78.95 %
