In [1]:
import pandas as pd

- **`Loading` data from .csv file:**

In [2]:
data = pd.read_csv("API_Call_Dataset.csv")

- **`Parsing` Dates:**

In [None]:
data['Time of Call'] = pd.to_datetime(data['Time of Call'], format='%d-%m-%Y %H:%M')

- **Handling `Missing Data`:**

In [4]:
data.dropna()

Unnamed: 0,API Code,Time of Call
0,A1,2022-01-01 16:36:00
1,A1,2022-01-01 18:31:00
2,A1,2022-01-01 18:31:00
3,A1,2022-01-01 19:23:00
4,A1,2022-01-01 08:45:00
...,...,...
16123849,A9,2024-12-31 06:20:00
16123850,A9,2024-12-31 14:25:00
16123851,A9,2024-12-31 01:14:00
16123852,A9,2024-12-31 14:10:00


- **Handling `Duplicates`:**

In [5]:
data = data.drop_duplicates()

- **`Storing` data of each API into separate files:**

In [6]:
import os

# Create the directory if it doesn't exist
os.makedirs("separate_data", exist_ok=True)

api_names = data['API Code'].unique()

for api in api_names:
   filt = data['API Code'] == api
   sep_data = data[filt]
   sep_data = sep_data.rename(columns={'API Code': 'api_name', 'Time of Call': 'call_time'})
   sep_data.to_csv(f"separate_data/{api}.csv")

- **Importing neccessary `Libraries` for modelling:**

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from prophet import Prophet
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from xgboost import XGBRegressor
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.losses import MeanSquaredError
import warnings
warnings.filterwarnings("ignore")
import os
import json
import joblib

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


- **`Prophet` Model:**

In [8]:
from prophet import Prophet

def train_prophet(train_data):
   prophet_data = train_data.reset_index().rename(columns={'call_time': 'ds', 'call_count': 'y'})
   model = Prophet(yearly_seasonality=True, daily_seasonality=True)
   model.add_regressor('hour')
   model.add_regressor('is_night')
   model.add_regressor('is_weekend')
   model.add_regressor('day_of_month')
   model.fit(prophet_data)
   return model

- **`ARIMA` Model:**

In [9]:
from statsmodels.tsa.arima.model import ARIMA

def train_arima(train_data):
   model = ARIMA(train_data['call_count'], order=(1,1,1), seasonal_order=(1,1,1,24))  # Example order, adjust as needed
   arima_model = model.fit()
   return arima_model

- **`XGBoost` Model:**

In [10]:
from xgboost import XGBRegressor

def train_xgboost(train_data):
   xgb_data = train_data.copy()
   xgb_data['call_count_lag1'] = xgb_data['call_count'].shift(1)
   xgb_data.dropna(inplace=True)
   
   X_train = xgb_data[['hour', 'is_night', 'is_weekend', 'day_of_month', 'call_count_lag1']]
   y_train = xgb_data['call_count']
   
   xgb_model = XGBRegressor(n_estimators=100, max_depth=3)
   xgb_model.fit(X_train, y_train)
   return xgb_model

- **`LSTM` Model:**

In [11]:
def train_lstm(data, n_steps):
   scaler = MinMaxScaler()
   data = scaler.fit_transform(data)
   # data = data.to_numpy()
   X, y = [], []
   for i in range(len(data) - n_steps):
      X.append(data[i:i + n_steps, :])  # Include all features in X
      y.append(data[i + n_steps, 0])    # Target can still be based on 'call_count'
   X, y = np.array(X), np.array(y)

   split_idx = int(len(data) * 0.9)
   X_train = X[:split_idx]
   X_val = X[split_idx:]
   y_train = y[:split_idx]
   y_val = y[split_idx:]
    
   #  # Define and compile the LSTM model
   model = Sequential()
   model.add(InputLayer((n_steps, X.shape[2])))
   model.add(LSTM(64, return_sequences=True))
   model.add(LSTM(32))
   model.add(Dense(8,'relu'))
   model.add(Dense(1,'linear'))
   model.compile(optimizer=Adam(learning_rate=0.001), loss=MeanSquaredError(),metrics=[RootMeanSquaredError()])
   model.summary()
   history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, verbose=1)
   
   # Optional: Plot the loss over epochs to monitor training
   # import matplotlib.pyplot as plt
   # plt.plot(history.history['loss'], label='Train Loss')
   # plt.plot(history.history['val_loss'], label='Validation Loss')
   # plt.legend()
   # plt.show() 
   
   return model,scaler


- **`TCN` Model:**

In [12]:
def train_tcn(data, n_steps=24):
   
   scaler = MinMaxScaler()
   train_data = scaler.fit_transform(data)
   
   data = data.to_numpy()
   # Prepare the data
   X, y = [], []
   for i in range(len(data) - n_steps):
      X.append(data[i:i + n_steps, :])  # Include all features in X
      y.append(data[i + n_steps, 0])   # Target can still be based on 'call_count'
   X, y = np.array(X), np.array(y)

   split_idx = int(len(data) * 0.9)
   X_train = X[:split_idx]
   X_val = X[split_idx:]
   y_train = y[:split_idx]
   y_val = y[split_idx:]
   
    
   model = Sequential()
   model.add(InputLayer((n_steps, X.shape[2])))  # Input shape based on n_steps and feature count
   model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
   model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
   model.add(Flatten())
   model.add(Dense(8, activation='relu'))
   model.add(Dense(1, activation='linear'))
   model.compile(optimizer=Adam(learning_rate=0.001), loss=MeanSquaredError(), metrics=['mae'])

   history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, verbose=1)
    
   return model, scaler

- **`Forcasting` functions:**

In [13]:
def forecast_with_prophet(model, periods):
   future = model.make_future_dataframe(periods=periods, freq='H')
   future['hour'] = future['ds'].dt.hour
   future['is_night'] = future['hour'].apply(lambda x: 1 if (x >= 20 or x < 8) else 0)
   future['is_weekend'] = future['ds'].dt.dayofweek >5
   future['is_weekend'] = future['is_weekend'].astype(int)
   future['day_of_month'] = future['ds'].dt.day
   forecast = model.predict(future)
   return forecast[['ds', 'yhat']]

def forecast_with_arima(model, periods):
   forecast = model.forecast(steps=periods)
   return forecast

def forecast_with_xgboost(model, test_data, periods):
   forecast = []
   last_known_value = test_data['call_count'].iloc[-1]
   
   for i in range(periods):
      features = np.array([
         test_data.index[-1].hour,  # hour
         1 if test_data.index[-1].hour >= 20 or test_data.index[-1].hour < 6 else 0,  # is_night
         1 if test_data.index[-1].dayofweek >= 5 else 0,  # is_weekend
         test_data.index[-1].day,  # day_of_month
         last_known_value  # Lagged call_count
      ]).reshape(1, -1)
      
      pred = model.predict(features)
      forecast.append(pred[0])
      last_known_value = pred[0]
   return forecast

def forecast_lstm(model, scaler, test_data, n_steps=24):
   test_data = scaler.transform(test_data)  # Apply the same scaling to test data
   X, y = [], []
   for i in range(len(test_data) - n_steps):
      X.append(test_data[i:i + n_steps, :])
      y.append(test_data[i + n_steps, 0])
   X_test, y_test = np.array(X), np.array(y)

   # Generate predictions on test data
   test_predictions = model.predict(X_test).flatten()

   # Inverse scale predictions and actuals for interpretability
   test_predictions = scaler.inverse_transform(np.concatenate([test_predictions.reshape(-1, 1), np.zeros((len(test_predictions), test_data.shape[1] - 1))], axis=1))[:, 0]
   y_test = scaler.inverse_transform(np.concatenate([y_test.reshape(-1, 1), np.zeros((len(y_test), test_data.shape[1] - 1))], axis=1))[:, 0]

   # Compare predictions with actual values
   test_results = pd.DataFrame(data={'Test Predictions': test_predictions, 'Actuals': y_test})
   return test_predictions, y_test

def forecast_tcn(model, scaler, test_data, n_steps=24):
   # Normalize the test data
   test_data = scaler.transform(test_data)

   # Prepare the data for forecasting
   X, y = [], []
   for i in range(len(test_data) - n_steps):
      X.append(test_data[i:i + n_steps, :])
      y.append(test_data[i + n_steps, 0])
   X_test, y_test = np.array(X), np.array(y)

   # Generate predictions on the test data
   test_predictions = model.predict(X_test).flatten()

   # Inverse scale predictions and actuals for interpretability
   test_predictions = scaler.inverse_transform(
      np.concatenate([test_predictions.reshape(-1, 1), np.zeros((len(test_predictions), test_data.shape[1] - 1))], axis=1)
   )[:, 0]
   y_test = scaler.inverse_transform(
      np.concatenate([y_test.reshape(-1, 1), np.zeros((len(y_test), test_data.shape[1] - 1))], axis=1)
   )[:, 0]

   return test_predictions, y_test

- **Model `Selection` and `Evaluation`:**

In [14]:
# Evaluation function
def evaluate_model(predictions, actual):
    predictions = pd.Series(predictions).reset_index(drop=True)
    actual = pd.Series(actual).reset_index(drop=True)
    
    mae = mean_absolute_error(actual, predictions)
    rmse = np.sqrt(mean_squared_error(actual, predictions))
    
    # Define accuracy as the percentage of predictions within 10% of actual values
    tolerance = 0.1
    within_range = (abs(predictions - actual) <= tolerance * actual).sum()
    accuracy = within_range / len(actual) * 100
    
    return mae, rmse, accuracy

# Model training and evaluation loop
results = {}
forecast_periods = 24  # Define the forecast length
n_steps = 24

for api in api_names:
   print(f"Evaluating models for API: {api}")
   
   # Load and preprocess data
   data = pd.read_csv(f'separate_data/{api}.csv')
   data['call_time'] = pd.to_datetime(data['call_time'])
   data.set_index('call_time', inplace=True)

   # Resample to hourly frequency
   hourly_data = data.resample('H').size().to_frame('call_count')
   hourly_data['hour'] = hourly_data.index.hour
   hourly_data['is_night'] = hourly_data['hour'].apply(lambda x: 1 if (x >= 20 or x < 8) else 0)
   hourly_data['is_weekend'] = (hourly_data.index.dayofweek >= 5).astype(int)
   hourly_data['day_of_month'] = hourly_data.index.day
   hourly_data['month'] = hourly_data.index.month
   hourly_data['year'] = hourly_data.index.year
   
   # Split data into train and test sets
   split_idx = int(len(hourly_data) * 0.7)
   train_data = hourly_data[:split_idx]
   test_data = hourly_data[split_idx:]

   # Prophet Model
   prophet_model = train_prophet(train_data)
   prophet_predictions = forecast_with_prophet(prophet_model, periods=forecast_periods)['yhat']
   prophet_mae, prophet_rmse, prophet_accuracy = evaluate_model(prophet_predictions[:forecast_periods], test_data['call_count'][:forecast_periods])

   # ARIMA Model
   arima_model = train_arima(train_data)
   arima_predictions = forecast_with_arima(arima_model, periods=forecast_periods)
   arima_mae, arima_rmse, arima_accuracy = evaluate_model(arima_predictions[:forecast_periods], test_data['call_count'][:forecast_periods])

   # XGBoost Model
   xgb_model = train_xgboost(train_data)
   xgb_predictions = forecast_with_xgboost(xgb_model, test_data, periods=forecast_periods)
   xgb_mae, xgb_rmse, xgb_accuracy = evaluate_model(xgb_predictions[:forecast_periods], test_data['call_count'][:forecast_periods])

   # LSTM Model
   lstm_model,scaler=train_lstm(train_data,24)
   lstm_predictions, y_test = forecast_lstm(lstm_model, scaler, test_data, 24)
   lstm_mae, lstm_rmse, lstm_accuracy = evaluate_model(lstm_predictions, y_test)

   # TCN Model
   # tcn_model, scaler = train_tcn(train_data, n_steps=n_steps)
   # tcn_predictions, y_test = forecast_tcn(tcn_model, scaler, test_data, 24)
   # tcn_mae, tcn_rmse, tcn_accuracy = evaluate_model(tcn_predictions, y_test)

   # Store results
   results[api] = {
      'Prophet': {'MAE': prophet_mae, 'RMSE': prophet_rmse, 'Accuracy': prophet_accuracy},
      'ARIMA': {'MAE': arima_mae, 'RMSE': arima_rmse, 'Accuracy': arima_accuracy},
      'XGBoost': {'MAE': xgb_mae, 'RMSE': xgb_rmse, 'Accuracy': xgb_accuracy},
      'LSTM': {'MAE': lstm_mae, 'RMSE': lstm_rmse, 'Accuracy': lstm_accuracy},
      # 'TCN': {'MAE': tcn_mae, 'RMSE': tcn_rmse, 'Accuracy': tcn_accuracy}
   }

print("Model evaluation completed.")

Evaluating models for API: A1


18:04:16 - cmdstanpy - INFO - Chain [1] start processing
18:04:19 - cmdstanpy - INFO - Chain [1] done processing


Epoch 1/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 32ms/step - loss: 0.0424 - root_mean_squared_error: 0.1953 - val_loss: 0.0075 - val_root_mean_squared_error: 0.0867
Epoch 2/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 30ms/step - loss: 0.0076 - root_mean_squared_error: 0.0871 - val_loss: 0.0060 - val_root_mean_squared_error: 0.0777
Epoch 3/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 31ms/step - loss: 0.0058 - root_mean_squared_error: 0.0759 - val_loss: 0.0058 - val_root_mean_squared_error: 0.0763
Epoch 4/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 30ms/step - loss: 0.0058 - root_mean_squared_error: 0.0761 - val_loss: 0.0054 - val_root_mean_squared_error: 0.0736
Epoch 5/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 30ms/step - loss: 0.0055 - root_mean_squared_error: 0.0744 - val_loss: 0.0055 - val_root_mean_squared_error: 0.0739
Epoch 6/100
[1m518/518[

18:21:37 - cmdstanpy - INFO - Chain [1] start processing
18:21:39 - cmdstanpy - INFO - Chain [1] done processing


Epoch 1/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - loss: 0.0276 - root_mean_squared_error: 0.1591 - val_loss: 0.0060 - val_root_mean_squared_error: 0.0774
Epoch 2/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - loss: 0.0059 - root_mean_squared_error: 0.0771 - val_loss: 0.0065 - val_root_mean_squared_error: 0.0808
Epoch 3/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 14ms/step - loss: 0.0060 - root_mean_squared_error: 0.0771 - val_loss: 0.0053 - val_root_mean_squared_error: 0.0725
Epoch 4/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - loss: 0.0054 - root_mean_squared_error: 0.0732 - val_loss: 0.0049 - val_root_mean_squared_error: 0.0698
Epoch 5/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - loss: 0.0053 - root_mean_squared_error: 0.0725 - val_loss: 0.0052 - val_root_mean_squared_error: 0.0718
Epoch 6/100
[1m518/518[0m 

18:35:56 - cmdstanpy - INFO - Chain [1] start processing
18:36:01 - cmdstanpy - INFO - Chain [1] done processing


Epoch 1/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 14ms/step - loss: 0.0576 - root_mean_squared_error: 0.2246 - val_loss: 0.0074 - val_root_mean_squared_error: 0.0859
Epoch 2/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - loss: 0.0058 - root_mean_squared_error: 0.0761 - val_loss: 0.0062 - val_root_mean_squared_error: 0.0784
Epoch 3/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: 0.0054 - root_mean_squared_error: 0.0734 - val_loss: 0.0055 - val_root_mean_squared_error: 0.0742
Epoch 4/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: 0.0055 - root_mean_squared_error: 0.0738 - val_loss: 0.0057 - val_root_mean_squared_error: 0.0756
Epoch 5/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: 0.0052 - root_mean_squared_error: 0.0723 - val_loss: 0.0053 - val_root_mean_squared_error: 0.0728
Epoch 6/100
[1m518/518[0m 

18:51:26 - cmdstanpy - INFO - Chain [1] start processing
18:51:30 - cmdstanpy - INFO - Chain [1] done processing


Epoch 1/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - loss: 0.0529 - root_mean_squared_error: 0.2138 - val_loss: 0.0064 - val_root_mean_squared_error: 0.0800
Epoch 2/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 14ms/step - loss: 0.0063 - root_mean_squared_error: 0.0792 - val_loss: 0.0078 - val_root_mean_squared_error: 0.0882
Epoch 3/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 14ms/step - loss: 0.0061 - root_mean_squared_error: 0.0778 - val_loss: 0.0054 - val_root_mean_squared_error: 0.0736
Epoch 4/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 14ms/step - loss: 0.0056 - root_mean_squared_error: 0.0748 - val_loss: 0.0053 - val_root_mean_squared_error: 0.0728
Epoch 5/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - loss: 0.0055 - root_mean_squared_error: 0.0742 - val_loss: 0.0056 - val_root_mean_squared_error: 0.0748
Epoch 6/100
[1m518/518[0m 

19:07:33 - cmdstanpy - INFO - Chain [1] start processing
19:07:39 - cmdstanpy - INFO - Chain [1] done processing


Epoch 1/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 21ms/step - loss: 0.0381 - root_mean_squared_error: 0.1844 - val_loss: 0.0070 - val_root_mean_squared_error: 0.0835
Epoch 2/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - loss: 0.0069 - root_mean_squared_error: 0.0833 - val_loss: 0.0060 - val_root_mean_squared_error: 0.0775
Epoch 3/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - loss: 0.0059 - root_mean_squared_error: 0.0767 - val_loss: 0.0057 - val_root_mean_squared_error: 0.0753
Epoch 4/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - loss: 0.0055 - root_mean_squared_error: 0.0744 - val_loss: 0.0058 - val_root_mean_squared_error: 0.0761
Epoch 5/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - loss: 0.0057 - root_mean_squared_error: 0.0754 - val_loss: 0.0059 - val_root_mean_squared_error: 0.0766
Epoch 6/100
[1m518/518[0m 

19:25:05 - cmdstanpy - INFO - Chain [1] start processing
19:25:09 - cmdstanpy - INFO - Chain [1] done processing


Epoch 1/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 21ms/step - loss: 0.0350 - root_mean_squared_error: 0.1780 - val_loss: 0.0132 - val_root_mean_squared_error: 0.1151
Epoch 2/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0061 - val_root_mean_squared_error: 0.0783
Epoch 3/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 18ms/step - loss: 0.0057 - root_mean_squared_error: 0.0752 - val_loss: 0.0054 - val_root_mean_squared_error: 0.0736
Epoch 4/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - loss: 0.0055 - root_mean_squared_error: 0.0745 - val_loss: 0.0054 - val_root_mean_squared_error: 0.0738
Epoch 5/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - loss: 0.0053 - root_mean_squared_error: 0.0730 - val_loss: 0.0050 - val_root_mean_squared_error: 0.0709
Epoch 6/100
[1m518/518[0m

19:42:50 - cmdstanpy - INFO - Chain [1] start processing
19:42:56 - cmdstanpy - INFO - Chain [1] done processing


Epoch 1/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 21ms/step - loss: 0.2807 - root_mean_squared_error: 0.5268 - val_loss: 0.0937 - val_root_mean_squared_error: 0.3062
Epoch 2/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - loss: 0.0770 - root_mean_squared_error: 0.2774 - val_loss: 0.0614 - val_root_mean_squared_error: 0.2478
Epoch 3/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - loss: 0.0610 - root_mean_squared_error: 0.2469 - val_loss: 0.0604 - val_root_mean_squared_error: 0.2458
Epoch 4/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - loss: 0.0610 - root_mean_squared_error: 0.2470 - val_loss: 0.0604 - val_root_mean_squared_error: 0.2457
Epoch 5/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - loss: 0.0612 - root_mean_squared_error: 0.2474 - val_loss: 0.0604 - val_root_mean_squared_error: 0.2457
Epoch 6/100
[1m518/518[

20:00:50 - cmdstanpy - INFO - Chain [1] start processing
20:00:54 - cmdstanpy - INFO - Chain [1] done processing


Epoch 1/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 22ms/step - loss: 0.0442 - root_mean_squared_error: 0.1962 - val_loss: 0.0066 - val_root_mean_squared_error: 0.0809
Epoch 2/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 21ms/step - loss: 0.0062 - root_mean_squared_error: 0.0789 - val_loss: 0.0056 - val_root_mean_squared_error: 0.0751
Epoch 3/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 21ms/step - loss: 0.0066 - root_mean_squared_error: 0.0813 - val_loss: 0.0052 - val_root_mean_squared_error: 0.0724
Epoch 4/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 22ms/step - loss: 0.0055 - root_mean_squared_error: 0.0744 - val_loss: 0.0054 - val_root_mean_squared_error: 0.0732
Epoch 5/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 21ms/step - loss: 0.0054 - root_mean_squared_error: 0.0732 - val_loss: 0.0055 - val_root_mean_squared_error: 0.0741
Epoch 6/100
[1m518/518[

20:19:58 - cmdstanpy - INFO - Chain [1] start processing
20:20:02 - cmdstanpy - INFO - Chain [1] done processing


Epoch 1/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 26ms/step - loss: 0.0311 - root_mean_squared_error: 0.1687 - val_loss: 0.0064 - val_root_mean_squared_error: 0.0799
Epoch 2/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - loss: 0.0067 - root_mean_squared_error: 0.0817 - val_loss: 0.0055 - val_root_mean_squared_error: 0.0744
Epoch 3/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - loss: 0.0059 - root_mean_squared_error: 0.0767 - val_loss: 0.0060 - val_root_mean_squared_error: 0.0775
Epoch 4/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - loss: 0.0059 - root_mean_squared_error: 0.0769 - val_loss: 0.0051 - val_root_mean_squared_error: 0.0715
Epoch 5/100
[1m518/518[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 21ms/step - loss: 0.0055 - root_mean_squared_error: 0.0742 - val_loss: 0.0060 - val_root_mean_squared_error: 0.0775
Epoch 6/100
[1m518/518[

- **`Saving` the Models:**

In [None]:
# Directory to save models and results
model_dir = "saved_models"
os.makedirs(model_dir, exist_ok=True)

# Save all models and their evaluation results
for api, model_results in results.items():
   for model_name, metrics in model_results.items():
      # Save the model
      model_path = os.path.join(model_dir, f"{api}_{model_name}.joblib")
      joblib.dump(metrics.get("Model"), model_path)

      # Save the evaluation results
      results_path = os.path.join(model_dir, f"{api}_{model_name}_results.json")
      with open(results_path, "w") as f:
         json.dump({"RMSE": metrics["RMSE"], "Accuracy": metrics["Accuracy"]}, f)

NameError: name 'results' is not defined

- **Finding the `best models` for each API:**

In [6]:
import os
import joblib
import json

# Directory where models and metrics are saved
model_dir = "saved_models"

# Function to find the best model for each API
def find_best_models(model_dir):
    best_models = {}

    # Group results by API
    api_models = {}
    for file_name in os.listdir(model_dir):
        if file_name.endswith("_results.json"):
            api_name, model_name = file_name.rsplit("_", 2)[0:2]
            results_path = os.path.join(model_dir, file_name)
            
            # Load metrics
            with open(results_path, "r") as f:
                metrics = json.load(f)
            
            if api_name not in api_models:
                api_models[api_name] = {}
            api_models[api_name][model_name] = metrics

    # Find the best model for each API
    for api, models in api_models.items():
        best_model_name = None
        best_model_metrics = None

        # Filter models with RMSE < 8
        filtered_models = {name: metrics for name, metrics in models.items() if metrics["RMSE"] < 8}
        
        if filtered_models:
            # Compare by accuracy if RMSE is < 8 for multiple models
            best_model_name = max(filtered_models, key=lambda x: filtered_models[x]["Accuracy"])
            best_model_metrics = filtered_models[best_model_name]
        else:
            # Choose model with the lowest RMSE
            best_model_name = min(models, key=lambda x: models[x]["RMSE"])
            best_model_metrics = models[best_model_name]

        # Save the best model and metrics
        model_path = os.path.join(model_dir, f"{api}_{best_model_name}.joblib")
        best_models[api] = {
            "Model Name": best_model_name,
            "Metrics": best_model_metrics,
            "Model Path": model_path
        }

    return best_models

# Call the function
best_models = find_best_models(model_dir)

# Display the best models
for api, details in best_models.items():
    print(f"Best model for {api}:")
    print(f"  Model Name: {details['Model Name']}")
    print(f"  RMSE: {details['Metrics']['RMSE']}")
    print(f"  Accuracy: {details['Metrics']['Accuracy']}")
    print(f"  Saved at: {details['Model Path']}\n")


Best model for A1:
  Model Name: ARIMA
  RMSE: 2.979619155994401
  Accuracy: 79.16666666666666
  Saved at: saved_models\A1_ARIMA.joblib

Best model for A2:
  Model Name: ARIMA
  RMSE: 2.3159160481849614
  Accuracy: 83.33333333333334
  Saved at: saved_models\A2_ARIMA.joblib

Best model for A3:
  Model Name: ARIMA
  RMSE: 4.520202130125251
  Accuracy: 70.83333333333334
  Saved at: saved_models\A3_ARIMA.joblib

Best model for A4:
  Model Name: LSTM
  RMSE: 3.718873338809156
  Accuracy: 65.26436197254702
  Saved at: saved_models\A4_LSTM.joblib

Best model for A5:
  Model Name: ARIMA
  RMSE: 3.2269962394610454
  Accuracy: 83.33333333333334
  Saved at: saved_models\A5_ARIMA.joblib

Best model for A6:
  Model Name: LSTM
  RMSE: 3.812849170545218
  Accuracy: 63.38332486019319
  Saved at: saved_models\A6_LSTM.joblib

Best model for A7:
  Model Name: ARIMA
  RMSE: 2.723921556564258
  Accuracy: 70.83333333333334
  Saved at: saved_models\A7_ARIMA.joblib

Best model for A8:
  Model Name: LSTM
  RMS