In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import pandas as pd
import joblib

features = pd.read_csv(r'..\data\processed\customers_features.csv')

In [2]:
X = features.drop(columns=['customer_id', 'is_churned'])  # Features
y = features['is_churned']  # Target variable

X = X.astype('int')

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)



In [5]:
model = RandomForestClassifier(n_estimators=200, max_depth=8, min_samples_leaf=10,random_state=42, class_weight='balanced')
model.fit(X_train, y_train)

print(X.shape, X_train.shape, X_test.shape)

(500, 10) (375, 10) (125, 10)


In [6]:
y_pred = model.predict(X_test)

print(y_pred)

[0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0
 0 1 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0 0 0 0 0
 1 1 0 1 0 0 1 0 1 1 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 0 0 0 0 0 0 0]


In [7]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.72


In [8]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:", cm)

Confusion Matrix: [[85 20]
 [15  5]]


In [9]:
features_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': model.feature_importances_
}).sort_values(by='importance', ascending=False)
print(features_importance)

                feature  importance
8        total_spending    0.241205
9          avg_spending    0.230953
7    total_transactions    0.214162
0  customer_tenure_days    0.163257
5         city_Surabaya    0.046492
4             city_Solo    0.031084
1              gender_M    0.026497
6       city_Yogyakarta    0.016893
2          city_Jakarta    0.016096
3           city_Malang    0.013362


In [None]:
joblib.dump(model, r'..\models\best_model.pkl')
jobli

['..\\models\\best_model.pkl']