In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve, auc
data_raw = pd.read_csv('prepped_data.csv', low_memory=False)

(data_raw)

In [None]:

data_raw = data_raw[data_raw['welcome_discount'] == 1.0]

data_raw = data_raw.dropna(axis=1)
categorical_columns = ['last_brand', 'last_type', 'last_fuel_type']
data_raw = pd.get_dummies(data_raw, columns=categorical_columns)
columns_to_drop = ['Unnamed: 0', 'policy_nr_hashed', 'last_data_year', 'first_data_year', 'control_group', 'last_product']
data = data_raw.drop(columns=columns_to_drop, axis=1)

# Rest of your code
y = data['churn']
X = data.drop('churn', axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = CatBoostClassifier(iterations=100,
                           learning_rate=0.1,
                           depth=10,
                           loss_function='Logloss',
                           auto_class_weights='Balanced',  # This will handle the class imbalance
                           random_state=42)  # For reproducibility

model.fit(X_train, y_train, verbose=False)

predictions = model.predict(X_test)
cm = confusion_matrix(y_test, predictions)
f1 = f1_score(y_test, predictions, average='weighted')

print("Confusion Matrix:\n", cm)
print("F1 Score:", f1)

In [None]:
probabilities = model.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, probabilities)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(4, 4))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

In [None]:
print("Confusion Matrix:\n", cm)
print("F1 Score:", f1)

In [None]:
np.sum(cm[0]) + np.sum(cm[1])

In [None]:
len(data) * 0.2

In [None]:
data