In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

df = pd.read_csv('d:/data/churn/churn_sample.csv')

df_train, df_test = train_test_split(df, test_size = 0.2, random_state = 42, stratify = df['churn'])

X_train_raw = df_train.drop(columns = ['customer_id', 'churn'])
y_train = df_train['churn']
X_test_raw = df_test.drop(columns = ['customer_id', 'churn'])
y_test = df_test['churn']

categorical_cols = ['gender', 'contract_type', 'payment_method', 'internet_service']
encoder = OneHotEncoder(drop='first')
X_train_cat = encoder.fit_transform(X_train_raw[categorical_cols]).toarray()
X_test_cat = encoder.transform(X_test_raw[categorical_cols]).toarray()

X_train_num = X_train_raw.drop(columns = categorical_cols).values
X_test_num = X_test_raw.drop(columns = categorical_cols).values
X_train = np.hstack([X_train_num, X_train_cat])
X_test = np.hstack([X_test_num, X_test_cat])

best_acc = 0
best_depth = None
for depth in list(range(1, 21)) + [None]:
    model = RandomForestClassifier(n_estimators=100, max_depth=depth, random_state=42)
    model.fit(X_train, y_train)
    acc = accuracy_score(y_test, model.predict(X_test))
    if acc > best_acc:
        best_acc = acc
        best_depth = depth

final_model = RandomForestClassifier(n_estimators=100, max_depth=best_depth, random_state=42)
final_model.fit(X_train, y_train)
y_test_pred = final_model.predict(X_test)

model_path = 'd:/data/churn/rf_best_model.joblib'
joblib.dump(final_model, model_path)

pred_df = pd.DataFrame({
    'customer_id': df_test['customer_id'].values, 
    'actual_churn': y_test.values,
    'predicted_churn': y_test_pred
})

print("Best max_depth: ", best_depth, " Accuracy: ", round(best_acc, 4))
print("\nSample Predictions: ")
print(pred_df.head(10))

pred_csv_path = 'd:/data/churn/churn_predictions.csv'
pred_df.to_csv(pred_csv_path, index=False)

print(f"\nModel saved to: {model_path}")
print(f"Predictions saved to: {pred_csv_path}")

Best max_depth:  6  Accuracy:  0.73

Sample Predictions: 
   customer_id  actual_churn  predicted_churn
0          206             0                0
1          903             1                1
2          808             1                0
3          140             1                1
4          933             0                1
5          626             0                1
6          334             1                1
7          219             1                1
8          676             0                0
9          577             0                0

Model saved to: d:/data/churn/rf_best_model.joblib
Predictions saved to: d:/data/churn/churn_predictions.csv
