In [1]:
import os
train_path = '../../raw_train_artifact'
test_path = '../../raw_test_artifact'
embedding_path = '../../embedding_artifact'
input_path = '../../input_artifact'
input_split_path = '../../input_artifact/input_split'
model_path = '../../model_artifact'
output_path = '../../output_artifact'

In [2]:
import sys
import gc
gc.enable()
import time
import re

import numpy as np
import pandas as pd
pd.set_option('display.max_columns',120)
pd.set_option('display.max_rows',2000)
pd.set_option('precision',5)
pd.set_option('float_format', '{:.5f}'.format)

import tqdm
import joblib
import json

from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, roc_auc_score
from gensim.models import Word2Vec
import torch
from torch import nn
import torch.nn.functional as F

In [3]:
import logging

log_path = '[1.2]LSTM with Creative Embedding Sequence.log'
    
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

formatter = logging.Formatter('%(asctime)s %(levelname)-s: %(message)s', datefmt='%H:%M:%S')

fh = logging.FileHandler(log_path)
fh.setLevel(logging.INFO)
fh.setFormatter(formatter)
logger.addHandler(fh)

sh = logging.StreamHandler(sys.stdout)
sh.setLevel(logging.INFO)
sh.setFormatter(formatter)
logger.addHandler(sh)

logger.info(f'Restart notebook\n==========================\n{time.ctime()}\n==========================')

08:37:50 INFO: Restart notebook
Wed Jun  3 08:37:50 2020


In [4]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info('Device in Use: {}'.format(DEVICE))
torch.cuda.empty_cache()
t = torch.cuda.get_device_properties(DEVICE).total_memory/1024**3
c = torch.cuda.memory_cached(DEVICE)/1024**3
a = torch.cuda.memory_allocated(DEVICE)/1024**3
logger.info('CUDA Memory: Total {:.2f} GB, Cached {:.2f} GB, Allocated {:.2f} GB'.format(t,c,a))

08:37:50 INFO: Device in Use: cuda
08:37:50 INFO: CUDA Memory: Total 8.00 GB, Cached 0.00 GB, Allocated 0.00 GB


## Data Loader

In [5]:
creative_embedding_path = r'C:\JupyterNotebook\Tencent-Ads-Algo-Comp-2020\embedding_artifact\creative_id_embed_s160_w64_cbow_38168zon'

def prepare_data(split_id, max_seq=100, slient=False, logger=None):
    global input_split_path, creative_embedding_path

    start = time.time()
    if not slient and logger: logger.info(f'Processing Split-{split_id}')
    truth_path = os.path.join(input_split_path, f'train_truth_{split_id}.npy')
    with open(truth_path, 'rb') as f:
        truth = np.load(f)
    inp_user = truth[:,0]
    out_age = torch.from_numpy(truth[:,1]).long()
    out_gender = torch.from_numpy(truth[:,2]).long()
    if not slient and logger: logger.info(f'Target output ready after {time.time()-start:.2f}s')
    del truth
    _ = gc.collect()
    
    creative_embedding = Word2Vec.load(creative_embedding_path) 
    if not slient and logger: logger.info(f'Creative ID embedding artifact is loaded after {time.time()-start:.2f}s')
    creative_path = os.path.join(input_split_path, f'train_creative_agg_user_{split_id}.json')
    with open(creative_path, 'r') as f:
        creative = json.load(f)
    inp_creative = []
    for user in inp_user:
        inp_creative.append(torch.from_numpy(np.stack([creative_embedding.wv[key] for key in creative[str(user)][:max_seq]], axis=0)).float())
    inp_last_idx = np.array([i.shape[0] for i in inp_creative])-1
    if not slient and logger: logger.info(f'Creative embedding ready after {time.time()-start:.2f}s')
    del creative_embedding, creative, inp_user
    _ = gc.collect()
        
    return inp_creative, inp_last_idx, out_age, out_gender

## Model

In [6]:
class LSTM_Classifier(nn.Module):
    def __init__(self, embed_size, lstm_hidden_size, out_size, rnn_dropout=0.2, mlp_dropout=0.4, **kwargs):
        super(LSTM_Classifier, self).__init__(**kwargs)
        self.embed_size = embed_size
        self.lstm_hidden_size = lstm_hidden_size
        self.out_size = out_size
        self.rnn_dropout = rnn_dropout
        self.mlp_dropout = mlp_dropout
        
        self.bi_lstm = nn.LSTM(input_size=embed_size, hidden_size=lstm_hidden_size, bias=True, bidirectional=True)
        self.rnn_dropout_1 = nn.Dropout(p=rnn_dropout)
        self.layernorm_1 = nn.LayerNorm(2*lstm_hidden_size)
        self.lstm_1 = nn.LSTM(input_size=2*lstm_hidden_size, hidden_size=2*lstm_hidden_size)
        self.rnn_dropout_2 = nn.Dropout(p=rnn_dropout)
        self.layernorm_2 = nn.LayerNorm(2*lstm_hidden_size)
        self.lstm_2 = nn.LSTM(input_size=2*lstm_hidden_size, hidden_size=2*lstm_hidden_size)
        self.batchnorm_1 = nn.BatchNorm1d(2*lstm_hidden_size)
        self.mlp_dropout_1 = nn.Dropout(p=mlp_dropout)
        self.mlp_1 = nn.Linear(2*lstm_hidden_size, 1024)
        self.batchnorm_2 = nn.BatchNorm1d(1024)
        self.mlp_dropout_2 = nn.Dropout(p=mlp_dropout)
        self.mlp_2 = nn.Linear(1024, 512)
        self.batchnorm_3 = nn.BatchNorm1d(512)
        self.mlp_dropout_3 = nn.Dropout(p=mlp_dropout)
        self.mlp_3 = nn.Linear(512, out_size)
        
    def forward(self, inp_embed, inp_last_idx):
        bilstm_out, _ = self.bi_lstm(inp_embed.permute(1,0,2))                            # (max_seq_length, batch_size, embed_size) -> (max_seq_length, batch_size, 2*lstm_hidden_size)
        bilstm_out = self.rnn_dropout_1(bilstm_out)                                       # (max_seq_length, batch_size, 2*lstm_hidden_size)
        bilstm_out = self.layernorm_1(bilstm_out)                                         # (max_seq_length, batch_size, 2*lstm_hidden_size)
        lstm_out, _ = self.lstm_1(bilstm_out)                                             # (max_seq_length, batch_size, 2*lstm_hidden_size)
        lstm_out = self.rnn_dropout_2(lstm_out)                                           # (max_seq_length, batch_size, 2*lstm_hidden_size)
        lstm_out = self.layernorm_2(lstm_out+bilstm_out)                                  # (max_seq_length, batch_size, 2*lstm_hidden_size)
        lstm_out, _ = self.lstm_2(lstm_out)                                               # (max_seq_length, batch_size, 2*lstm_hidden_size)
        lstm_out = lstm_out.permute(1,0,2)[np.arange(len(inp_last_idx)), inp_last_idx,:]  # (batch_size, 2*lstm_hidden_size)
        lstm_out = self.mlp_dropout_1(F.relu(self.batchnorm_1(lstm_out)))                 # (batch_size, 2*lstm_hidden_size)
        mlp_out = self.mlp_1(lstm_out)                                                    # (batch_size, 1024)
        mlp_out = self.mlp_dropout_2(F.relu(self.batchnorm_2(mlp_out)))                   # (batch_size, 1024)
        mlp_out = self.mlp_2(mlp_out)                                                     # (batch_size, 512)
        mlp_out = self.mlp_dropout_3(F.relu(self.batchnorm_3(mlp_out)))                   # (batch_size, 512)
        mlp_out = self.mlp_3(mlp_out)                                                     # (batch_size, out_size)
        return mlp_out   

## Gender Model Training

In [7]:
EPOCHES = 5
BATCH_SIZE = 512
N_BATCH = 90000//BATCH_SIZE-1
TEST_SIZE = 90000%BATCH_SIZE + BATCH_SIZE

def train_gender(model, loss_fn, optimizer, device, checkpoint_dir, checkpoint_prefix, logger=None, epoch_start=0):
    global EPOCHES, BATCH_SIZE, N_BATCH, TEST_SIZE
    if not os.path.isdir(checkpoint_dir):
        os.mkdir(checkpoint_dir)
    
    for epoch in range(1+epoch_start, EPOCHES+1+epoch_start):
        if logger: 
            logger.info('=========================')
            logger.info(f'Processing Epoch {epoch}/{EPOCHES+epoch_start}')
            logger.info('=========================')
            
        train_file = [1,2,3,4,5,6,7,8,9]
        test_file = [10]
            
        train_running_loss, train_n_batch = 0, 0
        pred_y, true_y = [], []
        for index, split_id in enumerate(train_file, start=1):
            inp_creative, inp_last_idx, out_age, out_gender = prepare_data(split_id)
            train_creative, test_creative = inp_creative[:-TEST_SIZE], inp_creative[-TEST_SIZE:]
            train_last_idx, test_last_idx = inp_last_idx[:-TEST_SIZE], inp_last_idx[-TEST_SIZE:]
            train_gender, test_gender = out_gender[:-TEST_SIZE], out_gender[-TEST_SIZE:]
            
            model.train()
            
            for batch_index in range(N_BATCH):
                x1 = torch.nn.utils.rnn.pad_sequence(train_creative[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE], batch_first=True, padding_value=0).to(device)
                x2 = train_last_idx[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE]
                y = train_gender[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE].to(device)
                optimizer.zero_grad()
                yp = F.softmax(model(x1, x2), 1)
                loss = loss_fn(yp, y)
                
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=100)
                optimizer.step()
                
                train_running_loss += loss.item()
                train_n_batch += 1
                
                del x1, x2, y, yp
                _ = gc.collect()
                torch.cuda.empty_cache()
            
            model.eval()
            
            x1 = torch.nn.utils.rnn.pad_sequence(test_creative, batch_first=True, padding_value=0).to(device)
            x2 = test_last_idx
            y = test_gender.to(device)
            yp = F.softmax(model(x1, x2), 1)
            loss = loss_fn(yp, y)
            
            pred_y.extend(list(yp.cpu().detach().numpy()))
            true_y.extend(list(y.cpu().detach().numpy()))
            
            del x1, x2, y, yp
            _ = gc.collect()
            torch.cuda.empty_cache()
            
            prob = np.array(pred_y)[:,1]
            pred = np.argmax(np.array(pred_y), 1)
            true = np.array(true_y).reshape((-1,))
            roc_score = roc_auc_score(true, prob)
            acc_score = accuracy_score(true, pred)
            
            if logger:
                logger.info(f'Epoch {epoch}/{EPOCHES+epoch_start} - Training Split {index}/{len(train_file)} Done - Train Loss: {train_running_loss/train_n_batch:.6f}, Val Loss: {loss.item():.6f}, Val AUC: {roc_score:.6f}, Val Accuracy: {acc_score:.6f}')
            
            del inp_creative, inp_last_idx, out_age, out_gender, train_creative, test_creative, train_last_idx, test_last_idx, train_gender, test_gender
            _ = gc.collect()
            torch.cuda.empty_cache()   
        
        model.eval()
        test_running_loss, test_n_batch = 0, 0
        true_y, pred_y = [], []
        
        for index, split_id in enumerate(test_file, start=1):
            inp_creative, inp_last_idx, out_age, out_gender = prepare_data(split_id)
            for batch_index in range(N_BATCH+2):
                x1 = torch.nn.utils.rnn.pad_sequence(inp_creative[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE], batch_first=True, padding_value=0).to(device)
                x2 = inp_last_idx[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE]
                y = out_gender[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE].to(device)
                yp = F.softmax(model(x1, x2), 1)
                loss = loss_fn(yp, y)
            
                test_running_loss += loss.item()
                test_n_batch += 1
            
                pred_y.extend(list(yp.cpu().detach().numpy()))
                true_y.extend(list(y.cpu().detach().numpy()))
            
                del x1, x2, y, yp
                _ = gc.collect()
                torch.cuda.empty_cache()
            
            del inp_creative, inp_last_idx, out_age, out_gender
            _ = gc.collect()
            torch.cuda.empty_cache()
        
        prob = np.array(pred_y)[:,1]
        pred = np.argmax(np.array(pred_y), 1)
        true = np.array(true_y).reshape((-1,))
        roc_score = roc_auc_score(true, prob)
        acc_score = accuracy_score(true, pred)
        
        if logger:
            logger.info(f'Epoch {epoch}/{EPOCHES+epoch_start} Done - Test Loss: {test_running_loss/test_n_batch:.6f}, Test AUC: {roc_score:.6f}, Test Accuracy: {acc_score:.6f}')
            
        ck_file_name = f'{checkpoint_prefix}_{epoch}.pth'
        ck_file_path = os.path.join(checkpoint_dir, ck_file_name)
        
        torch.save(model.state_dict(), ck_file_path)

In [8]:
model = LSTM_Classifier(160, 256, 2).to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
device = DEVICE
optimizer = torch.optim.Adam(model.parameters())
checkpoint_dir = os.path.join(model_path, 'LSTM_Classifier_Creative_Gender')
checkpoint_prefix = 'LSTM_Classifier_Creative_Gender'

train_gender(model, loss_fn, optimizer, device, checkpoint_dir, checkpoint_prefix, logger=logger)

21:21:42 INFO: Processing Epoch 1/5
21:23:53 INFO: Epoch 1/5 - Training Split 1/9 Done - Train Loss: 0.411199, Val Loss: 0.379032, Val AUC: 0.971454, Val Accuracy: 0.929825
21:26:07 INFO: Epoch 1/5 - Training Split 2/9 Done - Train Loss: 0.401921, Val Loss: 0.402942, Val AUC: 0.963301, Val Accuracy: 0.917215
21:28:22 INFO: Epoch 1/5 - Training Split 3/9 Done - Train Loss: 0.396658, Val Loss: 0.387113, Val AUC: 0.965022, Val Accuracy: 0.919225
21:30:35 INFO: Epoch 1/5 - Training Split 4/9 Done - Train Loss: 0.392903, Val Loss: 0.371584, Val AUC: 0.968793, Val Accuracy: 0.924068
21:32:46 INFO: Epoch 1/5 - Training Split 5/9 Done - Train Loss: 0.390318, Val Loss: 0.380074, Val AUC: 0.968102, Val Accuracy: 0.924781
21:34:58 INFO: Epoch 1/5 - Training Split 6/9 Done - Train Loss: 0.388471, Val Loss: 0.377876, Val AUC: 0.968840, Val Accuracy: 0.926352
21:37:11 INFO: Epoch 1/5 - Training Split 7/9 Done - Train Loss: 0.387017, Val Loss: 0.377145, Val AUC: 0.967544, Val Accuracy: 0.927475
21:39

RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED (_cudnn_rnn_backward_input at ..\aten\src\ATen\native\cudnn\RNN.cpp:931)
(no backtrace available)

In [8]:
model = LSTM_Classifier(160, 256, 2)
checkpoint_dir = os.path.join(model_path, 'LSTM_Classifier_Creative_Gender')
checkpoint_prefix = 'LSTM_Classifier_Creative_Gender'
model.load_state_dict(torch.load(os.path.join(checkpoint_dir, f'{checkpoint_prefix}_2.pth')))

model = model.to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
device = DEVICE
optimizer = torch.optim.Adam(model.parameters())
train_gender(model, loss_fn, optimizer, device, checkpoint_dir, checkpoint_prefix, logger=logger, epoch_start=2)

22:27:42 INFO: Processing Epoch 3/7
22:29:54 INFO: Epoch 3/7 - Training Split 1/9 Done - Train Loss: 0.374186, Val Loss: 0.368580, Val AUC: 0.973175, Val Accuracy: 0.941886
22:32:08 INFO: Epoch 3/7 - Training Split 2/9 Done - Train Loss: 0.374155, Val Loss: 0.396829, Val AUC: 0.967264, Val Accuracy: 0.927632
22:34:23 INFO: Epoch 3/7 - Training Split 3/9 Done - Train Loss: 0.373919, Val Loss: 0.376800, Val AUC: 0.970505, Val Accuracy: 0.929825
22:36:40 INFO: Epoch 3/7 - Training Split 4/9 Done - Train Loss: 0.373435, Val Loss: 0.368807, Val AUC: 0.972180, Val Accuracy: 0.933114
22:38:54 INFO: Epoch 3/7 - Training Split 5/9 Done - Train Loss: 0.373301, Val Loss: 0.375425, Val AUC: 0.971957, Val Accuracy: 0.933772
22:41:06 INFO: Epoch 3/7 - Training Split 6/9 Done - Train Loss: 0.373066, Val Loss: 0.376274, Val AUC: 0.971379, Val Accuracy: 0.933845
22:43:19 INFO: Epoch 3/7 - Training Split 7/9 Done - Train Loss: 0.372876, Val Loss: 0.376838, Val AUC: 0.970724, Val Accuracy: 0.933427
22:45

KeyboardInterrupt: 

In [8]:
model = LSTM_Classifier(160, 256, 2)
checkpoint_dir = os.path.join(model_path, 'LSTM_Classifier_Creative_Gender')
checkpoint_prefix = 'LSTM_Classifier_Creative_Gender'
model.load_state_dict(torch.load(os.path.join(checkpoint_dir, f'{checkpoint_prefix}_5.pth')))

model = model.to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
device = DEVICE
optimizer = torch.optim.Adam(model.parameters())
train_gender(model, loss_fn, optimizer, device, checkpoint_dir, checkpoint_prefix, logger=logger, epoch_start=5)

07:11:35 INFO: Processing Epoch 6/10
07:13:46 INFO: Epoch 6/10 - Training Split 1/9 Done - Train Loss: 0.367703, Val Loss: 0.366074, Val AUC: 0.972564, Val Accuracy: 0.944079
07:15:58 INFO: Epoch 6/10 - Training Split 2/9 Done - Train Loss: 0.367952, Val Loss: 0.388708, Val AUC: 0.963354, Val Accuracy: 0.934211
07:18:10 INFO: Epoch 6/10 - Training Split 3/9 Done - Train Loss: 0.367923, Val Loss: 0.376057, Val AUC: 0.963560, Val Accuracy: 0.934942
07:20:21 INFO: Epoch 6/10 - Training Split 4/9 Done - Train Loss: 0.367549, Val Loss: 0.362159, Val AUC: 0.968822, Val Accuracy: 0.938596
07:22:32 INFO: Epoch 6/10 - Training Split 5/9 Done - Train Loss: 0.367529, Val Loss: 0.376939, Val AUC: 0.968141, Val Accuracy: 0.938158
07:24:41 INFO: Epoch 6/10 - Training Split 6/9 Done - Train Loss: 0.367303, Val Loss: 0.374775, Val AUC: 0.968516, Val Accuracy: 0.938048
07:26:50 INFO: Epoch 6/10 - Training Split 7/9 Done - Train Loss: 0.367093, Val Loss: 0.376527, Val AUC: 0.967549, Val Accuracy: 0.9375

RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED (_cudnn_rnn_backward_input at ..\aten\src\ATen\native\cudnn\RNN.cpp:931)
(no backtrace available)

In [8]:
model = LSTM_Classifier(160, 256, 2)
checkpoint_dir = os.path.join(model_path, 'LSTM_Classifier_Creative_Gender')
checkpoint_prefix = 'LSTM_Classifier_Creative_Gender'
model.load_state_dict(torch.load(os.path.join(checkpoint_dir, f'{checkpoint_prefix}_7.pth')))

model = model.to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
device = DEVICE
optimizer = torch.optim.Adam(model.parameters())

EPOCHES = 3

train_gender(model, loss_fn, optimizer, device, checkpoint_dir, checkpoint_prefix, logger=logger, epoch_start=7)

08:05:00 INFO: Processing Epoch 8/10
08:07:07 INFO: Epoch 8/10 - Training Split 1/9 Done - Train Loss: 0.364662, Val Loss: 0.368567, Val AUC: 0.962465, Val Accuracy: 0.944079
08:09:17 INFO: Epoch 8/10 - Training Split 2/9 Done - Train Loss: 0.365105, Val Loss: 0.394047, Val AUC: 0.956590, Val Accuracy: 0.931469
08:11:27 INFO: Epoch 8/10 - Training Split 3/9 Done - Train Loss: 0.365040, Val Loss: 0.372571, Val AUC: 0.959230, Val Accuracy: 0.933114
08:13:38 INFO: Epoch 8/10 - Training Split 4/9 Done - Train Loss: 0.364682, Val Loss: 0.366505, Val AUC: 0.961847, Val Accuracy: 0.935855
08:15:46 INFO: Epoch 8/10 - Training Split 5/9 Done - Train Loss: 0.364662, Val Loss: 0.372614, Val AUC: 0.962568, Val Accuracy: 0.937061
08:17:55 INFO: Epoch 8/10 - Training Split 6/9 Done - Train Loss: 0.364575, Val Loss: 0.376573, Val AUC: 0.962644, Val Accuracy: 0.936952


RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED (_cudnn_rnn_backward_input at ..\aten\src\ATen\native\cudnn\RNN.cpp:931)
(no backtrace available)

## Age Model Training

In [7]:
EPOCHES = 5
BATCH_SIZE = 512
N_BATCH = 90000//BATCH_SIZE-1
TEST_SIZE = 90000%BATCH_SIZE + BATCH_SIZE

def train_age(model, loss_fn, optimizer, device, checkpoint_dir, checkpoint_prefix, logger=None, epoch_start=0):
    global EPOCHES, BATCH_SIZE, N_BATCH, TEST_SIZE
    if not os.path.isdir(checkpoint_dir):
        os.mkdir(checkpoint_dir)
    
    for epoch in range(1+epoch_start, EPOCHES+1+epoch_start):
        if logger: 
            logger.info('=========================')
            logger.info(f'Processing Epoch {epoch}/{EPOCHES+epoch_start}')
            logger.info('=========================')
            
        train_file = [1,2,3,4,5,6,7,8,9]
        test_file = [10]
            
        train_running_loss, train_n_batch = 0, 0
        pred_y, true_y = [], []
        for index, split_id in enumerate(train_file, start=1):
            inp_creative, inp_last_idx, out_age, out_gender = prepare_data(split_id)
            train_creative, test_creative = inp_creative[:-TEST_SIZE], inp_creative[-TEST_SIZE:]
            train_last_idx, test_last_idx = inp_last_idx[:-TEST_SIZE], inp_last_idx[-TEST_SIZE:]
            train_age, test_age = out_age[:-TEST_SIZE], out_age[-TEST_SIZE:]
            
            model.train()
            
            for batch_index in range(N_BATCH):
                x1 = torch.nn.utils.rnn.pad_sequence(train_creative[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE], batch_first=True, padding_value=0).to(device)
                x2 = train_last_idx[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE]
                y = train_age[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE].to(device)
                optimizer.zero_grad()
                yp = F.softmax(model(x1, x2), 1)
                loss = loss_fn(yp, y)
                
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=100)
                optimizer.step()
                
                train_running_loss += loss.item()
                train_n_batch += 1
                
                del x1, x2, y, yp
                _ = gc.collect()
                torch.cuda.empty_cache()
            
            model.eval()
            
            x1 = torch.nn.utils.rnn.pad_sequence(test_creative, batch_first=True, padding_value=0).to(device)
            x2 = test_last_idx
            y = test_age.to(device)
            yp = F.softmax(model(x1, x2), 1)
            loss = loss_fn(yp, y)
            
            pred_y.extend(list(yp.cpu().detach().numpy()))
            true_y.extend(list(y.cpu().detach().numpy()))
            
            del x1, x2, y, yp
            _ = gc.collect()
            torch.cuda.empty_cache()
            
            pred = np.argmax(np.array(pred_y), 1)
            true = np.array(true_y).reshape((-1,))
            acc_score = accuracy_score(true, pred)
            
            if logger:
                logger.info(f'Epoch {epoch}/{EPOCHES+epoch_start} - Training Split {index}/{len(train_file)} Done - Train Loss: {train_running_loss/train_n_batch:.6f}, Val Loss: {loss.item():.6f}, Val Accuracy: {acc_score:.6f}')
            
            del inp_creative, inp_last_idx, out_age, out_gender, train_creative, test_creative, train_last_idx, test_last_idx, train_age, test_age
            _ = gc.collect()
            torch.cuda.empty_cache()   
        
        model.eval()
        test_running_loss, test_n_batch = 0, 0
        true_y, pred_y = [], []
        
        for index, split_id in enumerate(test_file, start=1):
            inp_creative, inp_last_idx, out_age, out_gender = prepare_data(split_id)
            for batch_index in range(N_BATCH+2):
                x1 = torch.nn.utils.rnn.pad_sequence(inp_creative[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE], batch_first=True, padding_value=0).to(device)
                x2 = inp_last_idx[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE]
                y = out_age[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE].to(device)
                yp = F.softmax(model(x1, x2), 1)
                loss = loss_fn(yp, y)
            
                test_running_loss += loss.item()
                test_n_batch += 1
            
                pred_y.extend(list(yp.cpu().detach().numpy()))
                true_y.extend(list(y.cpu().detach().numpy()))
            
                del x1, x2, y, yp
                _ = gc.collect()
                torch.cuda.empty_cache()
            
            del inp_creative, inp_last_idx, out_age, out_gender
            _ = gc.collect()
            torch.cuda.empty_cache()
        
        pred = np.argmax(np.array(pred_y), 1)
        true = np.array(true_y).reshape((-1,))
        acc_score = accuracy_score(true, pred)
        
        if logger:
            logger.info(f'Epoch {epoch}/{EPOCHES+epoch_start} Done - Test Loss: {test_running_loss/test_n_batch:.6f}, Test Accuracy: {acc_score:.6f}')
            
        ck_file_name = f'{checkpoint_prefix}_{epoch}.pth'
        ck_file_path = os.path.join(checkpoint_dir, ck_file_name)
        
        torch.save(model.state_dict(), ck_file_path)

In [9]:
model = LSTM_Classifier(160, 256, 10).to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
device = DEVICE
optimizer = torch.optim.Adam(model.parameters())
checkpoint_dir = os.path.join(model_path, 'LSTM_Classifier_Creative_Age')
checkpoint_prefix = 'LSTM_Classifier_Creative_Age'

train_age(model, loss_fn, optimizer, device, checkpoint_dir, checkpoint_prefix, logger=logger)

08:38:12 INFO: Processing Epoch 1/5
08:40:21 INFO: Epoch 1/5 - Training Split 1/9 Done - Train Loss: 2.143261, Val Loss: 2.099092, Val Accuracy: 0.353070
08:42:32 INFO: Epoch 1/5 - Training Split 2/9 Done - Train Loss: 2.130610, Val Loss: 2.117119, Val Accuracy: 0.345943
08:44:43 INFO: Epoch 1/5 - Training Split 3/9 Done - Train Loss: 2.123347, Val Loss: 2.122414, Val Accuracy: 0.339912
08:46:55 INFO: Epoch 1/5 - Training Split 4/9 Done - Train Loss: 2.118455, Val Loss: 2.112529, Val Accuracy: 0.335800
08:49:05 INFO: Epoch 1/5 - Training Split 5/9 Done - Train Loss: 2.113458, Val Loss: 2.080825, Val Accuracy: 0.343421
08:51:14 INFO: Epoch 1/5 - Training Split 6/9 Done - Train Loss: 2.109293, Val Loss: 2.093206, Val Accuracy: 0.344846
08:53:22 INFO: Epoch 1/5 - Training Split 7/9 Done - Train Loss: 2.105581, Val Loss: 2.084407, Val Accuracy: 0.347744
08:55:30 INFO: Epoch 1/5 - Training Split 8/9 Done - Train Loss: 2.101984, Val Loss: 2.045729, Val Accuracy: 0.354989
08:57:39 INFO: Epoch

In [None]:
model = LSTM_Classifier(160, 256, 10)
checkpoint_dir = os.path.join(model_path, 'LSTM_Classifier_Creative_Age')
checkpoint_prefix = 'LSTM_Classifier_Creative_Age'
model.load_state_dict(torch.load(os.path.join(checkpoint_dir, f'{checkpoint_prefix}_5.pth')))

model = model.to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
device = DEVICE
optimizer = torch.optim.Adam(model.parameters())

EPOCHES = 5

train_age(model, loss_fn, optimizer, device, checkpoint_dir, checkpoint_prefix, logger=logger, epoch_start=5)

10:24:46 INFO: Processing Epoch 6/10
10:26:53 INFO: Epoch 6/10 - Training Split 1/9 Done - Train Loss: 2.031641, Val Loss: 2.028656, Val Accuracy: 0.429825
10:29:03 INFO: Epoch 6/10 - Training Split 2/9 Done - Train Loss: 2.031851, Val Loss: 2.082080, Val Accuracy: 0.398026
10:31:14 INFO: Epoch 6/10 - Training Split 3/9 Done - Train Loss: 2.031182, Val Loss: 2.046174, Val Accuracy: 0.401316
10:33:25 INFO: Epoch 6/10 - Training Split 4/9 Done - Train Loss: 2.030634, Val Loss: 2.080975, Val Accuracy: 0.391721
10:35:36 INFO: Epoch 6/10 - Training Split 5/9 Done - Train Loss: 2.030126, Val Loss: 2.035072, Val Accuracy: 0.396930
10:37:47 INFO: Epoch 6/10 - Training Split 6/9 Done - Train Loss: 2.029887, Val Loss: 2.033610, Val Accuracy: 0.400037
10:39:58 INFO: Epoch 6/10 - Training Split 7/9 Done - Train Loss: 2.028962, Val Loss: 2.051966, Val Accuracy: 0.401003
10:42:08 INFO: Epoch 6/10 - Training Split 8/9 Done - Train Loss: 2.027995, Val Loss: 2.021664, Val Accuracy: 0.406113
10:44:16 IN