### 1. Load data

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from joblib import load
from sklearn.pipeline import Pipeline

**Read csv**

In [4]:
X_train = pd.read_csv('../data/processed/X_train.zip')
X_val = pd.read_csv('../data/processed/X_val.zip')
X_test = pd.read_csv('../data/processed/X_test.zip')
y_train = pd.read_csv('../data/processed/y_train.zip')
y_val = pd.read_csv('../data/processed/y_val.zip')
y_test = pd.read_csv('../data/processed/y_test.zip')

#Pipieline
preprocessor = load('../models/pipe.pkl')

In [4]:
#Dimension
print("X_train:", X_train.shape)
print("X_val:", X_val.shape)
print("X_test:", X_test.shape)
print("y_train:", y_train.shape)
print("y_val:", y_val.shape)
print("y_test:", y_test.shape)

X_train: (2520168, 16)
X_val: (630043, 16)
X_test: (787553, 16)
y_train: (2520168, 1)
y_val: (630043, 1)
y_test: (787553, 1)


In [5]:
#Path from above unzipping file
extract_path = 'C:\\Users\\ting_\\OneDrive - UTS\\2024 SPR SESSION\\36120 Advanced MLAA\\Assignments\\AT3 Data Product\\data\\itineraries_csv'
import os

### 2. AdaBoost Regression Model - default

In [6]:
from sklearn.ensemble import AdaBoostRegressor

#### [2.1] Apply pipeline

In [12]:
ada_pipe = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
        ('ada', AdaBoostRegressor(random_state=42))
    ]
).set_output(transform="pandas")

#### [2.2] Fit the model

In [7]:
y_train = y_train.values.ravel()

In [14]:
ada_pipe.fit(X_train,y_train)

#### [2.3] Make predictions

In [15]:
y_train_pred = ada_pipe.predict(X_train)
y_val_pred = ada_pipe.predict(X_val)

#### [2.4] Evaluation Metrics - RMSE

In [8]:
from sklearn.metrics import root_mean_squared_error as rmse
from sklearn.metrics import mean_absolute_error as mae

In [17]:
print("RMSE_train:", f'{rmse(y_train_pred, y_train):.4f}')
print("RMSE_val:", f'{rmse(y_val_pred, y_val):.4f}')
print("MAE_train:", f'{mae(y_train_pred, y_train):.4f}')
print("MAE_val:", f'{mae(y_val_pred, y_val):.4f}')

RMSE_train: 309.5018
RMSE_val: 309.8439
MAE_train: 262.1196
MAE_val: 262.2014


### 3. AdaBoost Regressor Model - learning_rate=0.1, n_estimators=50

#### [3.1] Define Pipeline

In [8]:
ada_pipe2 = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
         ('ada', AdaBoostRegressor(learning_rate=0.1, n_estimators=50, loss='square', random_state=42))
    ]
).set_output(transform='pandas')

#### [3.2] Fit the model and make prediction

In [9]:
ada_pipe2.fit(X_train,y_train)

In [10]:
y_train_pred2 = ada_pipe2.predict(X_train)
y_val_pred2 = ada_pipe2.predict(X_val)

#### [3.3] Evaluation Metrics - RMSE

In [11]:
print("RMSE_train:", f'{rmse(y_train_pred2, y_train):.4f}')
print("RMSE_val:", f'{rmse(y_val_pred2, y_val):.4f}')
print("MAE_train:", f'{mae(y_train_pred2, y_train):.4f}')
print("MAE_val:", f'{mae(y_val_pred2, y_val):.4f}')

RMSE_train: 160.8980
RMSE_val: 161.3474
MAE_train: 117.2727
MAE_val: 117.2640


### 4. AdaBoost Regressor Model - learning_rate=0.1, n_estimators=100

In [8]:
ada_pipe3 = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
         ('ada', AdaBoostRegressor(learning_rate=0.1, n_estimators=100, loss='square', random_state=42))
    ]
).set_output(transform='pandas')

In [9]:
ada_pipe3.fit(X_train,y_train)

In [10]:
y_train_pred3 = ada_pipe3.predict(X_train)
y_val_pred3 = ada_pipe3.predict(X_val)

#### Evaluation Metrics - RMSE

In [11]:
print("RMSE_train:", f'{rmse(y_train_pred3, y_train):.4f}')
print("RMSE_val:", f'{rmse(y_val_pred3, y_val):.4f}')
print("MAE_train:", f'{mae(y_train_pred3, y_train):.4f}')
print("MAE_val:", f'{mae(y_val_pred3, y_val):.4f}')

RMSE_train: 163.5397
RMSE_val: 163.9740
MAE_train: 123.2337
MAE_val: 123.2509


### 5. AdaBoost Regressor Model - learning_rate=0.3, n_estimators=50

In [18]:
ada_pipe4 = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
         ('ada', AdaBoostRegressor(learning_rate=0.3, n_estimators=50, loss='square', random_state=42))
    ]
).set_output(transform='pandas')

In [19]:
ada_pipe4.fit(X_train,y_train)

In [20]:
y_train_pred4 = ada_pipe4.predict(X_train)
y_val_pred4 = ada_pipe4.predict(X_val)

#### Evaluation Metrics - RMSE

In [21]:
print("RMSE_train:", f'{rmse(y_train_pred4, y_train):.4f}')
print("RMSE_val:", f'{rmse(y_val_pred4, y_val):.4f}')
print("MAE_train:", f'{mae(y_train_pred4, y_train):.4f}')
print("MAE_val:", f'{mae(y_val_pred4, y_val):.4f}')

RMSE_train: 164.1764
RMSE_val: 164.6057
MAE_train: 124.2686
MAE_val: 124.2969


### 6. AdaBoost Regressor Model - learning_rate=0.01, n_estimators=50

In [9]:
ada_pipe5 = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
         ('ada', AdaBoostRegressor(learning_rate=0.01, n_estimators=50, loss='square', random_state=42))
    ]
).set_output(transform='pandas')

In [10]:
ada_pipe5.fit(X_train,y_train)

In [11]:
y_train_pred5 = ada_pipe5.predict(X_train)
y_val_pred5 = ada_pipe5.predict(X_val)

#### Evaluation Metrics - RMSE

In [12]:
print("RMSE_train:", f'{rmse(y_train_pred5, y_train):.4f}')
print("RMSE_val:", f'{rmse(y_val_pred5, y_val):.4f}')
print("MAE_train:", f'{mae(y_train_pred5, y_train):.4f}')
print("MAE_val:", f'{mae(y_val_pred5, y_val):.4f}')

RMSE_train: 157.7597
RMSE_val: 158.3534
MAE_train: 113.9365
MAE_val: 113.9646


##### Test Data

In [13]:
y_test_pred = ada_pipe5.predict(X_test)
print("RMSE_test:", f'{rmse(y_test_pred, y_test):.4f}')
print("MAE_test:", f'{mae(y_test_pred, y_test):.4f}')

RMSE_test: 157.6789
MAE_test: 113.9684
