In [3]:
from tqdm import tqdm
from utils.dataset import AI4VN_AirDataLoader
from utils.evaluation import eval_regression_model

air_data_loader = AI4VN_AirDataLoader(test_size=0.15)

Loading raw csv files...


## Scikit-learn ensemble models

In [None]:
from utils.models import EnsembleModel

### Train models

In [3]:
X_train, X_test, y_train, y_test = air_data_loader.get_data_loader_sklearn()

In [4]:
model_gb = EnsembleModel(name="gradientboosting")
model_rf = EnsembleModel(name="randomforest")

trained_model_gb = model_gb.fit(X_train, y_train)
trained_model_rf = model_rf.fit(X_train, y_train)

Creating Gradient Boosting Regressor
Creating Random Forest Regressor
Training model...
Trained model has been saved at trained_models/EnsembleModel.pkl
Training model...
Trained model has been saved at trained_models/EnsembleModel.pkl


### Evaluate the trained models

In [5]:
eval_regression_model(trained_model_gb, X_test, y_test)

explained_variance:  0.3674
mean_squared_log_error:  0.4021
R^2:  0.36743302762436236
MAE:  20.917917685450405
MSE:  1070.0027049837165
RMSE:  32.71089581444868
MAPE:  67.01531528734091 %


In [6]:
eval_regression_model(trained_model_rf, X_test, y_test)

explained_variance:  0.2835
mean_squared_log_error:  0.4389
R^2:  0.28352027390667056
MAE:  22.225689794453338
MSE:  1211.9432067512423
RMSE:  34.81297468977971
MAPE:  71.93823773342694 %


## Pytorch Neural Network

In [4]:
import torch
import torch.nn as nn

from utils.models import NeuralNetwork

In [5]:
train_loader, val_loader = air_data_loader.get_data_loader_pytorch(batch_size=64)

In [6]:
net = NeuralNetwork(num_input_feat=2)
loss_function = nn.L1Loss() # MAE Loss
optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)
num_epochs = 10

In [9]:
for epoch in tqdm(range(num_epochs)):
    current_loss = 0.0
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.float(), targets.float()
        targets = targets.reshape((targets.shape[0], 1))

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = loss_function(outputs, targets)
        loss.backward()

        optimizer.step()

        current_loss += loss.item()

100%|██████████| 10/10 [00:19<00:00,  1.98s/it]
