In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd

from sklearn.model_selection import train_test_split
from tabulate import tabulate, SEPARATING_LINE

from mads_telemarketing_assignment.config import (
    DATA_FILENAME,
    PROCESSED_DATA_DIR,
)
from mads_telemarketing_assignment.metrics import calculate_cost_estimates

In [2]:
# Various variables used in the code
random_state = 42

In [3]:
# Load  processed dataset
df = pd.read_csv(PROCESSED_DATA_DIR / DATA_FILENAME)

In [4]:
# Define X and y for calculation of cost estimates
X = df.drop(columns=["y"], axis=1)
y = df["y"]

print(f"X shape: {X.shape}, y shape: {y.shape}")

X shape: (41188, 22), y shape: (41188,)


In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    random_state=random_state,
    test_size=0.2,  # 20% of the data for testing
)

In [6]:
# Calculate the cost estimates for train and test sets
train_cost_estimates = calculate_cost_estimates(X_train, y_train)
test_cost_estimates = calculate_cost_estimates(X_test, y_test)

In [7]:
def print_cost_estimates(cost_estimates):

    table = [
        ["Hourly Wage", round(cost_estimates[0], 2)],
        ["Cost Per Call", round(cost_estimates[1], 2)],
        ["Revenue Per Success", f"{cost_estimates[2]:.2f}"],
        SEPARATING_LINE,
        ["Profit", f"{cost_estimates[5]:,.2f}"],
        ["Profit Margin", f"{cost_estimates[6]:.0%}"],
    ]

    print(tabulate(table, headers=["Metric", "Value"], tablefmt="github"))

In [8]:
print_cost_estimates(train_cost_estimates)

| Metric              | Value         |
|---------------------|---------------|
| Hourly Wage         | 35            |
| Cost Per Call       | 100           |
| Revenue Per Success | 400.00        |
|  |
| Profit              | -3,295,000.00 |
| Profit Margin       | 0%            |


In [9]:
print_cost_estimates(test_cost_estimates)

| Metric              | Value       |
|---------------------|-------------|
| Hourly Wage         | 35          |
| Cost Per Call       | 100         |
| Revenue Per Success | 400.00      |
|  |
| Profit              | -823,800.00 |
| Profit Margin       | 0%          |
