### Module import

In [3]:
import torch
import os
import csv
import random
import numpy as np
import pandas as pd
import ast
from sklearn.model_selection import train_test_split

In [4]:
NUM_EPOCH = 200
BATCH_SIZE = 128 
LATENT_DIM = 512 
REDUCED_DIM = 16 
NUM_ITER = 300 
MODEL_NAME = 'model.pth'
lr = 5e-4

DEVICE_ID = 0
SEED = 5566

torch.cuda.set_device(DEVICE_ID)
use_gpu = torch.cuda.is_available()
device = torch.device("cuda" if use_gpu else "cpu")

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
random.seed(SEED)
np.random.seed(SEED)

AttributeError: module 'torch._C' has no attribute '_cuda_setDevice'

### Load train data and label

In [103]:
def load_train_label(df, train_dt, label_dt):
    train_dt = ["dt%d" % i for i in train_dt]
    label_dt = ["dt%d" % (label_dt - 1), "dt%d" % (label_dt)]
    label = df[label_dt].values.tolist()
    idx = df['chid'].tolist()
    tag_feat = df[train_dt].values.tolist()
    for i in range(len(tag_feat)):
        tag_feat[i] = np.array([ast.literal_eval(x) for x in tag_feat[i]])
    for j in range(len(label)):
        label[j][1] = np.array(ast.literal_eval(label[j][1]))
    return idx, tag_feat, [i[1] for i in label]

In [104]:
df_alldt = pd.read_csv("./data/dt_all.csv")
train_idx, train_tag, train_label = load_train_label(df_alldt, [1,2,3,4,5,6,7,8,9,10], 11) #dt=1~10 as input feature, dt=11 as label
train_idx, valid_idx, train_label_tag, valid_label_tag, train_label, valid_label = train_test_split(train_idx, train_tag, train_label, test_size=0.15)

### Model

In [None]:
class LSTM_Dataset(torch.utils.data.Dataset):
    def __init__(self, id_list, tag, labels):
        self.id_list = id_list
        self.tag = tag
        self.labels = labels
    
    def __getitem__(self, idx):
        return self.id_list[idx], self.tag[idx], self.labels[idx]
    
    def __len__(self):
        return len(self.id_list)

In [107]:
train_dataset, valid_dataset = LSTM_Dataset(train_idx, train_label_tag, train_label), LSTM_Dataset(valid_idx, valid_label_tag, valid_label)
train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                            batch_size = 128,
                                            shuffle = True)
valid_loader = torch.utils.data.DataLoader(dataset = valid_dataset,
                                            batch_size = 128,
                                            shuffle = False)

In [109]:
class LSTM_Backbone(torch.nn.Module):
    def __init__(self, hidden_dim, num_layers, bidirectional, fix_embedding=True):
        super(LSTM_Backbone, self).__init__()
        #self.dropout = torch.nn.Dropout(p=0.75) if not (fix_embedding) else torch.nn.Dropout(p=0)
        self.lstm = torch.nn.LSTM(16, hidden_dim, num_layers=num_layers, \
                                  bidirectional=bidirectional, batch_first=True, dropout = 0.55)
        
    def forward(self, inputs):
        #inputs = self.dropout(inputs)
        x, _ = self.lstm(inputs)
        return x
    
class Header(torch.nn.Module):
    def __init__(self, dropout, hidden_dim):
        super(Header, self).__init__()
        self.classifier = torch.nn.Sequential(torch.nn.Dropout(dropout),
                                  torch.nn.Linear(hidden_dim, 16),
                                  torch.nn.Sigmoid())
    
    def forward(self, inputs):
        # the input shape should be (N, L, D∗H)
        inputs = inputs.sum(dim=1)
        out = self.classifier(inputs)
        return out

### Training

In [195]:
def train(train_loader, backbone, header, optimizer, criterion, device, epoch):

    total_loss = []
    total_acc = []

    for idx, (idx, tags, labels) in enumerate(train_loader):
        tags, labels = tags.float().to(device), labels.float().to(device)

        optimizer.zero_grad()
        if not backbone is None:
            inputs = backbone(tags)
        soft_predicted = header(inputs)
        loss = criterion(soft_predicted, labels)
        total_loss.append(loss.item())
        loss.backward()
        optimizer.step()
        
        #with torch.no_grad():
        '''
        hard_predicted = (soft_predicted >= 0.5).int()
        correct = sum(hard_predicted == labels).item()
        batch_size = len(labels)
        acc = correct * 100 / batch_size
        total_acc.append(acc)
        '''
        #print('[ Epoch {}: {}/{} ] loss:{:.3f} '.format(epoch+1, i+1, len(train_loader), loss.item()))
    return np.mean(total_loss)
def valid(valid_loader, backbone, header, criterion, device, epoch):
    backbone.eval()
    header.eval()
    with torch.no_grad():
        total_loss = []
        #total_acc = []
        
        for idx, (idx, tags, labels) in enumerate(train_loader):
            #print(tags[0])
            #print(labels[0])
            tags, labels = tags.float().to(device), labels.float().to(device)

            if not backbone is None:
                inputs = backbone(tags)
            soft_predicted = header(inputs)
            loss = criterion(soft_predicted, labels)
            total_loss.append(loss.item())
            '''
            hard_predicted = (soft_predicted >= 0.5).int()
            correct = sum(hard_predicted == labels).item()
            acc = correct * 100 / len(labels)
            total_acc.append(acc)
            
            print('[Validation in epoch {:}] loss:{:.3f} acc:{:.3f}'.format(epoch+1, np.mean(total_loss), np.mean(total_acc)), end='\r')
            '''
    backbone.train()
    header.train()
    return np.mean(total_loss)

            
def run_training(train_loader, valid_loader, backbone, header, epoch_num, lr, device):
    total_acc =[] 
    global patience
    patience = 0
    def is_stop(loss, acc):
        global patience
        if (acc < max(total_acc)):
          patience += 1
        else:
          patience = 0
        if (patience > 12):
          return True
        else:
          return False
    
    if backbone is None:
        trainable_paras = header.parameters()
    else:
        trainable_paras = list(backbone.parameters()) + list(header.parameters())
        
    optimizer = torch.optim.Adam(trainable_paras, lr=1e-5)
    
    backbone.train()
    header.train()
    backbone = backbone.to(device)
    header = header.to(device)
    criterion = torch.nn.MSELoss()
    for epoch in range(epoch_num):
        loss_t = train(train_loader, backbone, header, optimizer, criterion, device, epoch)
        loss = valid(valid_loader, backbone, header, criterion, device, epoch)
        print('[Training in epoch {:}] loss:{:.3f}'.format(epoch+1, loss_t))
        print('[Validation in epoch {:}] loss:{:.3f}'.format(epoch+1, loss))
    

In [254]:
def run_testing(test_loader, backbone, header, device, output_path):
    with open(output_path, 'w', newline="") as f:
        writer = csv.writer(f)
        writer.writerow(['chid', 'top1', 'top2', 'top3'])
        
        with torch.no_grad():
            for idx, (idx, tags, labels) in enumerate(test_loader):
                #print(tags[0])
                #print(labels[0])
                tags, labels = tags.float().to(device), labels.float().to(device)
                #print(tags[0:4])
                if not backbone is None:
                    inputs = backbone(tags)
                #print(inputs[0:4])
                soft_predicted = header(inputs)
                soft_predicted = soft_predicted.detach().cpu().numpy()
                #print(soft_predicted[0:4])
                
                pred1 = []
                pred2 = []
                pred3 = []
                tag_list = ["2","6","10","12","13","15","18","19","21","22","25","26","36","37","39","48"]
                for i in range(len(idx)):
                    #print(soft_predicted[i])
                    res = np.argpartition(soft_predicted[i], -3)[-3:]
                    #print(res)
                    pred1.append(tag_list[res[0]])
                    pred2.append(tag_list[res[1]])
                    pred3.append(tag_list[res[2]])
                #print(pred1, pred2, pred3)
                for i, p1, p2, p3 in zip(idx, pred1, pred2, pred3):
                    writer.writerow([str(i.item()), str(p3), str(p2), str(p1)])

In [None]:
backbone = LSTM_Backbone(hidden_dim=256, num_layers=4, bidirectional=True)
header = Header(dropout=0, hidden_dim=512)
run_training(train_loader, valid_loader, backbone, header, 100, lr, device)
torch.save({'backbone': backbone.state_dict(), 'header': header.state_dict()}, MODEL_NAME)

### Run testing (dt_all.csv, dt_10to22.csv)

In [None]:
df_alldt = pd.read_csv("./data/dt_all.csv")
test_idx, test_tag, test_label = load_train_label(df_alldt, [14,15,16,17,18,19,20,21,22,23], 23)
test_dataset = LSTM_Dataset(test_idx, test_tag, test_label)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                            batch_size = 128,
                                            shuffle = False)
run_testing(test_loader, backbone, header, device, "result_all.csv")

In [245]:
df_10to22 = pd.read_csv("./data/dt_10to22_post.csv")
test_idx, test_tag, test_label = load_train_label(df_10to22, [14,15,16,17,18,19,20,21,22,23], 23)
test_dataset = LSTM_Dataset(test_idx, test_tag, test_label)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                            batch_size = 128,
                                            shuffle = False)
run_testing(test_loader, backbone, header, device, "result_10to22.csv")

In [None]:
df_all = pd.read_csv("./data/dt_all.csv")
test_idx, test_tag, test_label = load_train_label(df_all, [14,15,16,17,18,19,20,21,22,23], 23)
test_dataset = LSTM_Dataset(test_idx, test_tag, test_label)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                            batch_size = 128,
                                            shuffle = False)
run_testing(test_loader, backbone, header, device, "result_all.csv")