# Imports

In [1]:
from pathlib import Path

import pandas as pd

from sklearn.metrics import f1_score, roc_auc_score
from sklearn.model_selection import train_test_split

from mlops.pipelines.churn import ChurnPipeline

from mlops.config import MODEL_CONFIG

In [2]:
config = MODEL_CONFIG["models"]["churn"]

## Data

In [3]:
data_path = Path("../data") / "churn" / "data.parquet"
dataset = pd.read_parquet(data_path)

In [4]:
X = dataset.drop(columns=[config["target"]])
y = dataset[config["target"]]

numeric_features = [
    X.columns.get_loc(c)
    for c in X.select_dtypes(include=["int64", "float64"])
    .drop(config["drop_features"]["numerical"], axis=1)
    .columns.values
]

categorical_features = [
    X.columns.get_loc(c)
    for c in X.select_dtypes(include=["object"])
    .drop(config["drop_features"]["categorical"], axis=1)
    .columns.values
]

## Pipeline


In [5]:
pipeline = ChurnPipeline(params={}, numeric_features=numeric_features, categorical_features=categorical_features).build()

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=config["test_size"], random_state=config["random_seed"]
)

In [7]:
pipeline.fit(X_train, y_train)



## Prediction

In [8]:
predictions, probabilities = pipeline.predict(X_test), pipeline.predict_proba(X_test)

In [9]:
results = pd.DataFrame(
    {"actual": y_test, "prediction": predictions, "probability": probabilities[:, 1]}  # Probability of positive class
)

In [10]:
results

Unnamed: 0,actual,prediction,probability
8369,1,0,0.084875
5395,0,1,0.542969
7996,0,0,0.031986
4157,0,0,0.018895
3324,0,0,0.279897
...,...,...,...
8336,1,0,0.451282
8221,1,1,0.982396
7049,1,0,0.148617
986,0,0,0.091119
