In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import os
import logging
from pathlib import Path
PARENT_PATH = Path(os.getcwd()).parent.absolute()

from dnn.config import DNNConfig
from dnn.data_access import DataClass
from dnn.features import Features
from dnn.model import Model

FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
logging.basicConfig(format=FORMAT, level=logging.INFO)

##### Build config

In [3]:
config = DNNConfig()
config.CURRENT_PATH = PARENT_PATH

#### Parse raw data for pre-processing

In [4]:
data = DataClass(config)
data.make_dirs()
df = data.build()

2022-09-15 03:54:08,638 - dnn.data_access.fetch - INFO - Created model directory /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model
2022-09-15 03:54:08,640 - dnn.data_access.fetch - INFO - Created reports directory /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/reports
2022-09-15 03:54:08,641 - dnn.data_access.fetch - INFO - Once unzipped, raw data will be available here: /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/data/raw
2022-09-15 03:54:08,642 - dnn.data_access.fetch - INFO - Unpacking file {zip_file_path}
2022-09-15 03:54:09,425 - dnn.utils.utils - INFO - func:unzip took: 0.78 sec
2022-09-15 03:54:09,426 - dnn.data_access.fetch - INFO - Reading raw data into CSV for pre-processing
2022-09-15 03:54:10,390 - dnn.data_access.fetch - INFO - Splitting data available for model-building into train and validation sets
2022-09-15 03:54:17,213 - dnn.data_access.fetch - INFO - Saved CSV to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competi

#### Preprocess and build features

In [5]:
features = Features(config)
df_features = features.build(df)

2022-09-15 03:54:18,906 - dnn.features.features - INFO - Fitting processor on train set
2022-09-15 03:54:19,212 - dnn.utils.utils - INFO - func:fit took: 0.26 sec
2022-09-15 03:54:19,213 - dnn.features.features - INFO - Transforming data
2022-09-15 03:54:22,573 - dnn.utils.utils - INFO - func:transform took: 3.35 sec
2022-09-15 03:54:33,817 - dnn.utils.utils - INFO - func:build took: 16.32 sec


#### Train model and generate submission file

In [6]:
model = Model(config)
model.build(df_features)

2022-09-15 03:54:36,232 - dnn.model.core - INFO - Training model
2022-09-15 03:54:36,246 - dnn.model.core - INFO - Number of features: 610
2022-09-15 03:54:36,246 - dnn.model.core - INFO - Number of training samples: 320000
2022-09-15 03:54:36,247 - dnn.model.core - INFO - Number of validation samples: 80000
EPOCH:1/100 - Training Loss: 434.5218, Validation Loss: 376.2603
EPOCH:2/100 - Training Loss: 353.1301, Validation Loss: 359.2219
EPOCH:3/100 - Training Loss: 342.8799, Validation Loss: 350.0489
EPOCH:4/100 - Training Loss: 338.4302, Validation Loss: 346.4148
EPOCH:5/100 - Training Loss: 335.9062, Validation Loss: 345.3732
EPOCH:6/100 - Training Loss: 333.5598, Validation Loss: 343.2729
EPOCH:7/100 - Training Loss: 331.5424, Validation Loss: 342.7682
EPOCH:8/100 - Training Loss: 329.7884, Validation Loss: 341.8892
EPOCH:9/100 - Training Loss: 328.4201, Validation Loss: 341.9203
EPOCH:10/100 - Training Loss: 327.5036, Validation Loss: 340.7039
EPOCH:11/100 - Training Loss: 325.8000,

#### Save features and model

In [7]:
features.save()
model.save()

2022-09-15 04:59:10,835 - dnn.features.features - INFO - Saving processor to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/processor.pickle
2022-09-15 04:59:10,839 - dnn.model.core - INFO - Saving model state dictionary to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/mlp.pth
2022-09-15 04:59:10,877 - dnn.model.core - INFO - Saving model parameters to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/mlp_params.json
2022-09-15 04:59:10,882 - dnn.utils.utils - INFO - func:save took: 0.04 sec
