In [8]:
%reload_ext autoreload
%autoreload 2

In [9]:
import os
import logging
from pathlib import Path
PARENT_PATH = Path(os.getcwd()).parent.absolute()

from dnn.config import DNNConfig
from dnn.data_access import DataClass
from dnn.features import Features
from dnn.model import Model

FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
logging.basicConfig(format=FORMAT, level=logging.INFO)

##### Build config

In [10]:
config = DNNConfig()
config.CURRENT_PATH = PARENT_PATH

#### Parse raw data for pre-processing

In [11]:
data = DataClass(config)
data.make_dirs()
df = data.build()

2022-09-14 22:35:14,393 - dnn.data_access.fetch - INFO - Created model directory /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model
2022-09-14 22:35:14,394 - dnn.data_access.fetch - INFO - Created reports directory /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/reports
2022-09-14 22:35:14,395 - dnn.data_access.fetch - INFO - Once unzipped, raw data will be available here: /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/data/raw
2022-09-14 22:35:14,396 - dnn.data_access.fetch - INFO - Unpacking file {zip_file_path}
2022-09-14 22:35:15,220 - dnn.utils.utils - INFO - func:unzip took: 0.82 sec
2022-09-14 22:35:15,221 - dnn.data_access.fetch - INFO - Reading raw data into CSV for pre-processing
2022-09-14 22:35:16,164 - dnn.data_access.fetch - INFO - Splitting data available for model-building into train and validation sets
2022-09-14 22:35:23,263 - dnn.data_access.fetch - INFO - Saved CSV to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competi

#### Preprocess and build features

In [12]:
features = Features(config)
df_features = features.build(df)

2022-09-14 22:35:25,140 - dnn.features.features - INFO - Fitting processor on train set
2022-09-14 22:35:25,430 - dnn.utils.utils - INFO - func:fit took: 0.24 sec
2022-09-14 22:35:25,431 - dnn.features.features - INFO - Transforming data
2022-09-14 22:35:28,885 - dnn.utils.utils - INFO - func:transform took: 3.45 sec
2022-09-14 22:35:43,509 - dnn.utils.utils - INFO - func:build took: 19.74 sec


#### Train model and generate submission file

In [13]:
model = Model(config)
model.build(df_features)

2022-09-14 22:35:45,573 - dnn.model.core - INFO - Training model
2022-09-14 22:35:45,583 - dnn.model.core - INFO - Number of features: 610
2022-09-14 22:35:45,584 - dnn.model.core - INFO - Number of training samples: 320000
2022-09-14 22:35:45,585 - dnn.model.core - INFO - Number of validation samples: 80000
EPOCH:1/100 - Training Loss: 436.1114, Validation Loss: 374.5657
EPOCH:2/100 - Training Loss: 353.8389, Validation Loss: 355.8509
EPOCH:3/100 - Training Loss: 343.9794, Validation Loss: 350.1673
EPOCH:4/100 - Training Loss: 339.1825, Validation Loss: 347.3331
EPOCH:5/100 - Training Loss: 336.6391, Validation Loss: 346.2542
EPOCH:6/100 - Training Loss: 333.5960, Validation Loss: 345.5748
EPOCH:7/100 - Training Loss: 332.6716, Validation Loss: 346.0030
EPOCH:8/100 - Training Loss: 330.6468, Validation Loss: 343.3890
EPOCH:9/100 - Training Loss: 329.5382, Validation Loss: 342.0117
EPOCH:10/100 - Training Loss: 328.5388, Validation Loss: 344.0612
EPOCH:11/100 - Training Loss: 326.8366,

#### Save features and model

In [14]:
features.save()
model.save()

2022-09-14 22:47:08,442 - dnn.features.features - INFO - Saving processor to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/processor.pickle
2022-09-14 22:47:08,444 - dnn.model.core - INFO - Saving model state dictionary to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/mlp.pth
2022-09-14 22:47:08,466 - dnn.model.core - INFO - Saving model parameters to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/mlp_params.json
2022-09-14 22:47:08,468 - dnn.utils.utils - INFO - func:save took: 0.02 sec
