In [6]:
import pickle
import pandas as pd

# Load the model
with open('../model/random_forest_model.pkl', 'rb') as f:
    model = pickle.load(f)

# Load the scaler
with open('../model/scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)

# Load dummy columns
with open('../model/dummy_columns.pkl', 'rb') as f:
    dummy_columns = pickle.load(f)


In [7]:
print("Scaler means:", scaler.mean_)  

Scaler means: [2.58285010e+02 5.16703591e+03 3.62129081e+00 8.18855006e-02
 1.72962999e-01]


In [9]:

# Example manual input
sample_input = {
    'job': 'technician',
    'marital': 'married',
    'education': 'tertiary',
    'default': 'no',
    'contact': 'cellular',
    'month': 'may',
    'day_of_week': 'mon',
    'poutcome': 'nonexistent',
    'duration': 320,
    'nr.employed': 5099.1,
    'euribor3m': 4.857,
    'emp.var.rate': 1.4,
    'previous': 0
}

#

In [10]:

def predict_subscription(sample_input, model, scaler, dummy_columns):
    # Convert to DataFrame
    input_df = pd.DataFrame([sample_input])

    # One-hot encode categorical features
    input_encoded = pd.get_dummies(input_df, columns=[
        'job', 'marital', 'education', 'default',
        'contact', 'month', 'day_of_week', 'poutcome'
    ])

    # Add missing dummy columns
    for col in dummy_columns:
        if col not in input_encoded.columns:
            input_encoded[col] = 0

    # Reorder columns to match training data
    input_encoded = input_encoded[dummy_columns]

    # Scale numerical columns
    numerical = ['duration', 'nr.employed', 'euribor3m', 'emp.var.rate', 'previous']
    input_encoded[numerical] = scaler.transform(input_encoded[numerical])

    # Predict
    pred = model.predict(input_encoded)[0]
    proba = model.predict_proba(input_encoded)[0][1]

    # Output
    prediction_text = "Subscribed (1)" if pred == 1 else "Not Subscribed (0)"
    print("Prediction:", prediction_text)
    print("Probability of subscribing:", round(proba, 4))

    return pred, proba


In [11]:
pred, proba = predict_subscription(sample_input, model, scaler, dummy_columns)


Prediction: Not Subscribed (0)
Probability of subscribing: 0.17
