## Install Packages 

In [None]:
!pip install scikit-learn
!pip install numpy
!pip install pandas
!pip install torch
!pip install tqdm

## Import Packages

In [None]:
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

from tqdm import tqdm
import math
import random

## Useful Functions 

### Set seeds

In [None]:
def same_seeds(seed):
    random.seed(seed) 
    np.random.seed(seed)  
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed) 
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

### Read Dataset from CSV Files

In [None]:
def readDataset(data_filepath):
    assert os.path.exists(data_filepath)
    filenames = os.listdir(data_filepath)
    if '.DS_Store' in filenames:
        filenames.remove('.DS_Store')
    filenames = sorted(filenames)
    train, valid = [], []
    decimalPoint = 10000
    
    for idx, filename in enumerate(filenames):
        data = (pd.read_csv(data_filepath + filename).values).tolist()
        if idx >= len(filenames) - 14:
            # validation
            for single_data in data:
                valid.append(single_data)
        else:
            #training
            for single_data in data:
                train.append(single_data)
    return train, valid

## My Youbike Dataset Class 

In [None]:
class YoubikeDataset(Dataset):
    def __init__(self, data):
        super(YoubikeDataset, self).__init__()
        # [month, date, weekday, hr, min, lat, lng, act, ratio, sbi, tot, title, act_title]
        self.data = data
        self.datasize = len(self.data)

    def __getitem__(self, idx):
        label = [self.data[idx][8]]
        features = self.data[idx][:8]
        return torch.FloatTensor(features), torch.FloatTensor(label)

    def __len__(self):
        return self.datasize

## My Model(s)

### DNN model

In [None]:
class My_Model(nn.Module):
    def __init__(self, input_dim):
        super(My_Model, self).__init__()
        # TODO: modify model's structure, be aware of dimensions. 
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 4),
            nn.Sigmoid(),
            nn.Linear(4, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = self.layers(x)
        return x

## Training function

In [None]:
def train(model, config, train_loader, valid_loader, device):
    criterion = nn.MSELoss(reduction='mean') # Define your loss function, do not modify this.
    # criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay']) 
    scheduler = CosineAnnealingWarmRestarts(optimizer,T_0=1,T_mult=2)
    if not os.path.isdir(config["save_dir"]):
        os.mkdir(config["save_dir"]) # Create directory of saving models.

    n_epochs, best_loss, step, early_stop_count = config['epochs'], math.inf, 0, 0

    for epoch in range(n_epochs):
        model.train() # Set your model to train mode.
        loss_record = []
        print(scheduler.get_last_lr())

        # tqdm is a package to visualize your training progress.
        train_pbar = tqdm(train_loader, position=0, leave=True)

        for x, y in train_pbar:
            optimizer.zero_grad()               # Set gradient to zero.
            x, y = x.to(device), y.to(device)   # Move your data to device. 
            pred = model(x) 
            loss = criterion(pred, y)
            loss.backward()                     # Compute gradient(backpropagation).
            optimizer.step()                    # Update parameters.
            step += 1
            loss_record.append(loss.detach().item())
            
            # Display current epoch number and loss on tqdm progress bar.
            train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
            train_pbar.set_postfix({'loss': loss.detach().item()})

        scheduler.step()
        mean_train_loss = sum(loss_record)/len(loss_record)

        model.eval() # Set your model to evaluation mode.
        loss_record = []
        for x, y in valid_loader:
            x, y = x.to(device), y.to(device)
            with torch.no_grad():
                pred = model(x)
                loss = criterion(pred, y)

            loss_record.append(loss.item())
            
        mean_valid_loss = sum(loss_record)/len(loss_record)
        print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}')
        # writer.add_scalar('Loss/valid', mean_valid_loss, step)

        if mean_valid_loss < best_loss:
            best_loss = mean_valid_loss
            torch.save(model.state_dict(), config['save_dir'] + config['model_name']) # Save your best model
            print('Saving model with loss {:.3f}...'.format(best_loss))
            early_stop_count = 0
        else: 
            early_stop_count += 1

        if early_stop_count >= config['early_stop']:
            print('\nModel is not improving, so we halt the training session.')
            print('best loss {:.3f}...'.format(best_loss))
            return

# Hyperparameters

In [None]:
config = {
    "batch_size": 8,
#     "data_filepath": 'dataset_csv/',
    "data_filepath": '/kaggle/input/dataset-1129/dataset_csv/',
    "epochs": 60,
    "learning_rate": 5e-4,
    "weight_decay": 5e-3,
    "save_dir": "./models/",
    "model_name": "1129-DNN.ckpt",
    "early_stop":20,
    "seeds": 10901036
}
device = 'cuda' if torch.cuda.is_available() else 'cpu'

## Load Datasets

In [None]:
same_seeds(config["seeds"])
train_data, valid_data = readDataset(config['data_filepath'])
print(f'train_data_size: {len(train_data)}, valid_data_size: {len(valid_data)}')
train_dataset, valid_dataset = YoubikeDataset(train_data), YoubikeDataset(valid_data)
train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)
valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)

## Start Training

In [None]:
model = My_Model(input_dim=8).to(device) # put your model and data on the same computation device.

train(model, config, train_loader, valid_loader, device)