In [3]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt 


In [4]:
df = pd.read_csv("C:\\Users\\HP\\Downloads\\WA_Fn-UseC_-Telco-Customer-Churn.csv")


In [5]:
# Convert 'TotalCharges' to numeric and fill missing values with 0
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'])
df['TotalCharges'].fillna(0, inplace=True)

# Map 'Churn' column to binary values
df['Churn'] = df['Churn'].map({'No': 0, 'Yes': 1})

# Split data into train and test sets (80-20 split)
X = df.drop('Churn', axis=1)
y = df['Churn']


In [6]:
from sklearn.model_selection import train_test_split

# Split the data into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2 , random_state= 1 )

In [7]:
categorical = ['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService','OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies','Contract', 'PaperlessBilling', 'PaymentMethod']
numerical = ['tenure', 'MonthlyCharges', 'TotalCharges']


In [11]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
# Create transformers for numerical and categorical columns
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(sparse=False))
])

# Create a column transformer that applies the transformers to the appropriate columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical ),
        ('cat', categorical_transformer, categorical)
    ])










In [None]:
# Create a final pipeline that includes the column transformer
final_pipeline = Pipeline(steps=[('preprocessor', preprocessor)])



In [None]:
# Fit and transform the data
X_train_transformed = final_pipeline.fit_transform(X_train)
X_test_transformed = final_pipeline.transform(X_test)


In [None]:

# Get the column names after one-hot encoding
categorical_encoder = final_pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot']
one_hot_encoded_cols = categorical_encoder.get_feature_names(input_features=categorical)


In [None]:
# Combine the numerical and one-hot encoded column names
all_column_names = numerical + list(one_hot_encoded_cols)

# Create DataFrames for the transformed data with the correct column names
X_train_final = pd.DataFrame(X_train_transformed, columns=all_column_names)
X_test_final = pd.DataFrame(X_test_transformed, columns=all_column_names)



In [13]:

from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score


In [14]:
# Train models
rf_model = RandomForestClassifier(random_state=1)
rf_model.fit(X_train_final, y_train)

et_model = ExtraTreesClassifier(random_state=1)
et_model.fit(X_train_final, y_train)

xgb_model = XGBClassifier(random_state=1)
xgb_model.fit(X_train_final, y_train)

lgbm_model = LGBMClassifier(random_state=1)
lgbm_model.fit(X_train_final, y_train)


LGBMClassifier(random_state=1)

In [15]:
# Evaluate models
rf_pred = rf_model.predict(X_test_final)
et_pred = et_model.predict(X_test_final)
xgb_pred = xgb_model.predict(X_test_final)
lgbm_pred = lgbm_model.predict(X_test_final)


In [16]:
rf_accuracy = accuracy_score(y_test, rf_pred)
et_accuracy = accuracy_score(y_test, et_pred)
xgb_accuracy = accuracy_score(y_test, xgb_pred)
lgbm_accuracy = accuracy_score(y_test, lgbm_pred)

In [17]:
print(f"Random Forest Accuracy: {rf_accuracy:.2f}")
print(f"Extra Trees Accuracy: {et_accuracy:.2f}")
print(f"XGBoost Accuracy: {xgb_accuracy:.2f}")
print(f"LightGBM Accuracy: {lgbm_accuracy:.2f}")


Random Forest Accuracy: 0.79
Extra Trees Accuracy: 0.77
XGBoost Accuracy: 0.79
LightGBM Accuracy: 0.80
