In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import os
import logging
from pathlib import Path
PARENT_PATH = Path(os.getcwd()).parent.absolute()

from dnn.config import DNNConfig
from dnn.data_access import DataClass
from dnn.features import Features
from dnn.model import Model

FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
logging.basicConfig(format=FORMAT, level=logging.INFO)

##### Build config

In [3]:
config = DNNConfig()
config.CURRENT_PATH = PARENT_PATH

#### Parse raw data for pre-processing

In [4]:
data = DataClass(config)
data.make_dirs()
df = data.build()

2022-09-15 01:00:31,602 - dnn.data_access.fetch - INFO - Created model directory /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model
2022-09-15 01:00:31,603 - dnn.data_access.fetch - INFO - Created reports directory /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/reports
2022-09-15 01:00:31,605 - dnn.data_access.fetch - INFO - Once unzipped, raw data will be available here: /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/data/raw
2022-09-15 01:00:31,606 - dnn.data_access.fetch - INFO - Unpacking file {zip_file_path}
2022-09-15 01:00:32,354 - dnn.utils.utils - INFO - func:unzip took: 0.75 sec
2022-09-15 01:00:32,355 - dnn.data_access.fetch - INFO - Reading raw data into CSV for pre-processing
2022-09-15 01:00:33,719 - dnn.data_access.fetch - INFO - Splitting data available for model-building into train and validation sets
2022-09-15 01:00:41,230 - dnn.data_access.fetch - INFO - Saved CSV to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competi

#### Preprocess and build features

In [5]:
features = Features(config)
df_features = features.build(df)

2022-09-15 01:00:43,047 - dnn.features.features - INFO - Fitting processor on train set
2022-09-15 01:00:43,396 - dnn.utils.utils - INFO - func:fit took: 0.29 sec
2022-09-15 01:00:43,397 - dnn.features.features - INFO - Transforming data
2022-09-15 01:00:47,198 - dnn.utils.utils - INFO - func:transform took: 3.79 sec
2022-09-15 01:01:04,228 - dnn.utils.utils - INFO - func:build took: 22.65 sec


#### Train model and generate submission file

In [6]:
model = Model(config)
model.build(df_features)

2022-09-15 01:01:06,369 - dnn.model.core - INFO - Training model
2022-09-15 01:01:06,381 - dnn.model.core - INFO - Number of features: 610
2022-09-15 01:01:06,382 - dnn.model.core - INFO - Number of training samples: 320000
2022-09-15 01:01:06,384 - dnn.model.core - INFO - Number of validation samples: 80000
EPOCH:1/50 - Training Loss: 433.6363, Validation Loss: 375.0902
EPOCH:2/50 - Training Loss: 353.9997, Validation Loss: 355.1570
EPOCH:3/50 - Training Loss: 344.1858, Validation Loss: 349.0168
EPOCH:4/50 - Training Loss: 339.3187, Validation Loss: 347.0388
EPOCH:5/50 - Training Loss: 336.3177, Validation Loss: 346.9173
EPOCH:6/50 - Training Loss: 334.6770, Validation Loss: 349.7823
EPOCH:7/50 - Training Loss: 332.6425, Validation Loss: 345.4816
EPOCH:8/50 - Training Loss: 330.8800, Validation Loss: 341.6853
EPOCH:9/50 - Training Loss: 329.5342, Validation Loss: 342.6322
EPOCH:10/50 - Training Loss: 327.9129, Validation Loss: 343.9199
EPOCH:11/50 - Training Loss: 327.4806, Validation

#### Save features and model

In [7]:
features.save()
model.save()

2022-09-15 01:06:44,901 - dnn.features.features - INFO - Saving processor to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/processor.pickle
2022-09-15 01:06:44,903 - dnn.model.core - INFO - Saving model state dictionary to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/mlp.pth
2022-09-15 01:06:44,922 - dnn.model.core - INFO - Saving model parameters to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/mlp_params.json
2022-09-15 01:06:44,924 - dnn.utils.utils - INFO - func:save took: 0.02 sec
