In [6]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

df = pd.read_csv("telecom_churn.csv")

print("Dataset Columns:", df.columns)

features = ["account length", "total day minutes", "total eve minutes", 
            "total night minutes", "total intl charge", "customer service calls"]

X = df[features]
y = df["churn"]

X = X.fillna(X.median())

if y.dtype == 'O':  
    y = y.map({"yes": 1, "no": 0})  # Convert "yes" -> 1, "no" -> 0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

joblib.dump(model, "rf_model.pkl")


Dataset Columns: Index(['state', 'account length', 'area code', 'phone number',
       'international plan', 'voice mail plan', 'number vmail messages',
       'total day minutes', 'total day calls', 'total day charge',
       'total eve minutes', 'total eve calls', 'total eve charge',
       'total night minutes', 'total night calls', 'total night charge',
       'total intl minutes', 'total intl calls', 'total intl charge',
       'customer service calls', 'churn'],
      dtype='object')
Accuracy: 0.8980509745127436
Confusion Matrix:
 [[553  13]
 [ 55  46]]
Classification Report:
               precision    recall  f1-score   support

       False       0.91      0.98      0.94       566
        True       0.78      0.46      0.57       101

    accuracy                           0.90       667
   macro avg       0.84      0.72      0.76       667
weighted avg       0.89      0.90      0.89       667



['rf_model.pkl']