In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [3]:
df = pd.read_csv("Churn_Modelling.csv")

In [5]:
df.drop(columns=['RowNumber', 'CustomerId', 'Surname'], inplace=True)

In [7]:
le_geo = LabelEncoder()
le_gender = LabelEncoder()
df['Geography'] = le_geo.fit_transform(df['Geography'])  # France=0, Germany=1, Spain=2
df['Gender'] = le_gender.fit_transform(df['Gender'])   # Female=0, Male=1

In [9]:
X = df.drop('Exited', axis=1)
y = df['Exited']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [17]:
y_pred = model.predict(X_test)

print("\n✅ Classification Report:")
print(classification_report(y_test, y_pred))

print("\n✅ Accuracy:")
print("Accuracy:", accuracy_score(y_test, y_pred))

print("\n✅ Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


✅ Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.75      0.47      0.58       393

    accuracy                           0.86      2000
   macro avg       0.81      0.71      0.75      2000
weighted avg       0.85      0.86      0.85      2000


✅ Accuracy:
Accuracy: 0.8645

✅ Confusion Matrix:
[[1545   62]
 [ 209  184]]


In [19]:
output = pd.DataFrame(X_test, columns=X.columns)
output['Actual'] = y_test.values
output['Predicted'] = y_pred
output.to_csv("churn_predictions.csv", index=False)
print("\n✅ Results saved to 'churn_predictions.csv'")


✅ Results saved to 'churn_predictions.csv'
