In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import os
import gc
import logging
from pathlib import Path
PARENT_PATH = Path(os.getcwd()).parent.absolute()

import numpy as np
import pandas as pd
import torch

from dnn.config import DNNConfig
from dnn.data_access import DataClass
from dnn.features import Features
from dnn.model import Model

FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
logging.basicConfig(format=FORMAT, level=logging.INFO)

##### Build config

In [3]:
config = DNNConfig()
config.CURRENT_PATH = PARENT_PATH

#### Parse raw data for pre-processing

In [4]:
data = DataClass(config)
data.make_dirs()
df = data.build()

2022-09-14 21:26:31,115 - dnn.data_access.fetch - INFO - Created model directory /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model
2022-09-14 21:26:31,116 - dnn.data_access.fetch - INFO - Created reports directory /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/reports
2022-09-14 21:26:31,117 - dnn.data_access.fetch - INFO - Once unzipped, raw data will be available here: /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/data/raw
2022-09-14 21:26:31,120 - dnn.data_access.fetch - INFO - Unpacking file {zip_file_path}
2022-09-14 21:26:31,958 - dnn.utils.utils - INFO - func:unzip took: 0.84 sec
2022-09-14 21:26:31,959 - dnn.data_access.fetch - INFO - Reading raw data into CSV for pre-processing
2022-09-14 21:26:32,965 - dnn.data_access.fetch - INFO - Splitting data available for model-building into train and validation sets
2022-09-14 21:26:40,001 - dnn.data_access.fetch - INFO - Saved CSV to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competi

#### Preprocess and build features

In [5]:
features = Features(config)
df_features = features.build(df)

2022-09-14 21:26:41,704 - dnn.features.features - INFO - Fitting processor on train set
2022-09-14 21:26:42,009 - dnn.utils.utils - INFO - func:fit took: 0.25 sec
2022-09-14 21:26:42,010 - dnn.features.features - INFO - Transforming data
2022-09-14 21:26:45,383 - dnn.utils.utils - INFO - func:transform took: 3.37 sec
2022-09-14 21:26:58,025 - dnn.utils.utils - INFO - func:build took: 17.75 sec


#### Train model and generate submission file

In [6]:
model = Model(config)
model.build(df_features)

2022-09-14 21:26:59,935 - dnn.model.core - INFO - Training model
2022-09-14 21:26:59,946 - dnn.model.core - INFO - Number of features: 610
2022-09-14 21:26:59,947 - dnn.model.core - INFO - Number of training samples: 320000
2022-09-14 21:26:59,948 - dnn.model.core - INFO - Number of validation samples: 80000
EPOCH:1/10 - Training Loss: 437.0790, Validation Loss: 374.9083
EPOCH:2/10 - Training Loss: 354.0769, Validation Loss: 356.0378
EPOCH:3/10 - Training Loss: 343.3830, Validation Loss: 348.9504
EPOCH:4/10 - Training Loss: 340.1999, Validation Loss: 350.6764
EPOCH:5/10 - Training Loss: 336.5748, Validation Loss: 346.2588
EPOCH:6/10 - Training Loss: 334.3928, Validation Loss: 346.1929
EPOCH:7/10 - Training Loss: 332.2467, Validation Loss: 342.5577
EPOCH:8/10 - Training Loss: 331.0764, Validation Loss: 341.9297
EPOCH:9/10 - Training Loss: 330.0059, Validation Loss: 343.0682
EPOCH:10/10 - Training Loss: 328.8979, Validation Loss: 342.1826
2022-09-14 21:28:10,009 - dnn.model.core - INFO -

#### Save features and model

In [8]:
features.save()
model.save()

2022-09-14 22:11:40,157 - dnn.features.features - INFO - Saving processor to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/processor.pickle
2022-09-14 22:11:40,159 - dnn.model.core - INFO - Saving model state dictionary to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/mlp.pth
2022-09-14 22:11:40,187 - dnn.model.core - INFO - Saving model parameters to /home/vtyagi/repos/CS4771-Spring-2022-Regression-Competition/model/mlp_params.json
2022-09-14 22:11:40,189 - dnn.utils.utils - INFO - func:save took: 0.03 sec
