In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import os
import logging
from pathlib import Path
PARENT_PATH = Path(os.getcwd()).parent.absolute()

from dnn.config import DNNConfig
from dnn.data_access import DataClass
from dnn.features import Features
from dnn.model import Model

FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
logging.basicConfig(format=FORMAT, level=logging.INFO)

##### Build config

In [3]:
config = DNNConfig()
config.CURRENT_PATH = PARENT_PATH

#### Parse raw data for pre-processing

In [4]:
data = DataClass(config)
data.make_dirs()
df = data.build()

2022-09-14 22:15:02,889 - dnn.data_access.fetch - INFO - Created model directory /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model
2022-09-14 22:15:02,890 - dnn.data_access.fetch - INFO - Created reports directory /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/reports
2022-09-14 22:15:02,891 - dnn.data_access.fetch - INFO - Once unzipped, raw data will be available here: /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/data/raw
2022-09-14 22:15:02,892 - dnn.data_access.fetch - INFO - Unpacking file {zip_file_path}
2022-09-14 22:15:03,726 - dnn.utils.utils - INFO - func:unzip took: 0.83 sec
2022-09-14 22:15:03,727 - dnn.data_access.fetch - INFO - Reading raw data into CSV for pre-processing
2022-09-14 22:15:04,695 - dnn.data_access.fetch - INFO - Splitting data available for model-building into train and validation sets
2022-09-14 22:15:11,726 - dnn.data_access.fetch - INFO - Saved CSV to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competi

#### Preprocess and build features

In [5]:
features = Features(config)
df_features = features.build(df)

2022-09-14 22:15:13,501 - dnn.features.features - INFO - Fitting processor on train set
2022-09-14 22:15:13,814 - dnn.utils.utils - INFO - func:fit took: 0.26 sec
2022-09-14 22:15:13,815 - dnn.features.features - INFO - Transforming data
2022-09-14 22:15:17,185 - dnn.utils.utils - INFO - func:transform took: 3.36 sec
2022-09-14 22:15:28,695 - dnn.utils.utils - INFO - func:build took: 16.64 sec


#### Train model and generate submission file

In [6]:
model = Model(config)
model.build(df_features)

2022-09-14 22:15:30,609 - dnn.model.core - INFO - Training model
2022-09-14 22:15:30,620 - dnn.model.core - INFO - Number of features: 610
2022-09-14 22:15:30,621 - dnn.model.core - INFO - Number of training samples: 320000
2022-09-14 22:15:30,622 - dnn.model.core - INFO - Number of validation samples: 80000
EPOCH:1/10 - Training Loss: 432.6587, Validation Loss: 378.9439
EPOCH:2/10 - Training Loss: 353.1731, Validation Loss: 356.1632
EPOCH:3/10 - Training Loss: 343.8040, Validation Loss: 350.0914
EPOCH:4/10 - Training Loss: 339.1981, Validation Loss: 346.1640
EPOCH:5/10 - Training Loss: 336.2251, Validation Loss: 344.4388
EPOCH:6/10 - Training Loss: 333.6592, Validation Loss: 345.4611
EPOCH:7/10 - Training Loss: 331.4349, Validation Loss: 343.8223
EPOCH:8/10 - Training Loss: 329.7966, Validation Loss: 347.2539
EPOCH:9/10 - Training Loss: 328.4275, Validation Loss: 341.5088
EPOCH:10/10 - Training Loss: 327.0334, Validation Loss: 342.0826
2022-09-14 22:16:41,113 - dnn.model.core - INFO -

#### Save features and model

In [7]:
features.save()
model.save()

2022-09-14 22:16:42,742 - dnn.features.features - INFO - Saving processor to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/processor.pickle
2022-09-14 22:16:42,745 - dnn.model.core - INFO - Saving model state dictionary to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/mlp.pth
2022-09-14 22:16:42,775 - dnn.model.core - INFO - Saving model parameters to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/mlp_params.json
2022-09-14 22:16:42,777 - dnn.utils.utils - INFO - func:save took: 0.03 sec
