In [1]:
import numpy as np
import sys
import datetime
from copy import deepcopy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from scipy.stats import pearsonr
from sklearn.metrics import f1_score, accuracy_score

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [0]:
class myDataset(Dataset):
    def __init__(self, text, label):
        super().__init__()
        self.text = torch.tensor(text).to(device)
        self.label = torch.tensor(label).to(device)
        self.class_idx = torch.argmax(
            self.label, dim=1, keepdim=False).to(device)

    def __len__(self):
        return self.label.shape[0]

    def __getitem__(self, index):
        return self.text[index], self.label[index], self.class_idx[index]


class Stat():
    def __init__(self, ):
        self.loss = []
        self.labels = []
        self.pred_labels = []
        self.classes = []
        self.pred_classes = []

    def append(self, loss, label, pred_label):
        pred_class = pred_label.argmax(dim=1).cpu().detach().numpy()
        true_class = label.argmax(dim=1).cpu().numpy()
        self.loss.append(loss)
        self.labels.extend(label.cpu().detach().numpy())
        self.pred_labels.extend(pred_label.cpu().detach().numpy())
        self.classes.extend(true_class)
        self.pred_classes.extend(pred_class)

    def eval(self):
        loss = sum(self.loss) / len(self.loss)
        acc = accuracy_score(self.classes, self.pred_classes) * 100
        f1 = f1_score(self.classes, self.pred_classes,
                      average='macro') * 100
        corr = sum([pearsonr(self.pred_labels[i], self.labels[i])[0]
                    for i in range(len(self.labels))]) / len(self.labels)
        return loss, acc, f1, corr


In [3]:
print("====Load Data====")
# colab dir
text_train_tensor = np.load('/content/drive/My Drive/PA3/text_train_tensor.npy')
label_train_tensor = np.load('/content/drive/My Drive/PA3/label_train_tensor.npy')
text_dev_tensor = np.load('/content/drive/My Drive/PA3/text_dev_tensor.npy')
label_dev_tensor = np.load('/content/drive/My Drive/PA3/label_dev_tensor.npy')
text_test_tensor = np.load('/content/drive/My Drive/PA3/text_test_tensor.npy')
label_test_tensor = np.load('/content/drive/My Drive/PA3/label_test_tensor.npy')
# test_data = np.load('/content/drive/My Drive/PA3/test_data.npy')
# test_label = np.load('/content/drive/My Drive/PA3/test_label.npy')
train_set = myDataset(text_train_tensor, label_train_tensor)
dev_set = myDataset(text_dev_tensor, label_dev_tensor)
test_set = myDataset(text_test_tensor, label_test_tensor)
train_loader = DataLoader(
    train_set, batch_size=64, shuffle=True)
dev_loader = DataLoader(
    dev_set, batch_size=64, shuffle=False)
test_loader = DataLoader(
    dataset=test_set, batch_size=64, shuffle=False)
print("====Load Finish====")

====Load Data====
====Load Finish====


In [0]:

max_len = 512
embed_size = 300
num_class = 8
batch_size = 64



class MLP(nn.Module):
    '''
    Multilayer Perceptron
    '''
    def __init__(self, params):
        super().__init__()
        hidden_size = params['hidden_size']
        dropout = params['dropout']
        self.fc = nn.Sequential(
            nn.Linear(max_len * embed_size, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size, num_class),
        )

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x


class DAN(nn.Module):
    '''
    Deep Average Network
    '''

    def __init__(self, params):
        super().__init__()
        word_dropout = params['word_dropout']
        dropout = params['dropout']
        hidden_size = params['hidden_size']
        self.word_dropout = word_dropout
        self.fc = nn.Sequential(
            nn.Linear(embed_size, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size, num_class)
        )

    def forward(self, x):
        # x.size: [64, 512, 300]
        batch = x.shape[0]
        # word dropout
        if(self.training):
            mask = torch.bernoulli(torch.ones(
                batch, max_len) * (1 - self.word_dropout)).to(device)
        else:
            mask = torch.ones(batch, max_len).to(device)

        x[mask == 0] *= 0
        word_sum = torch.sum(mask == 1, dim=1).reshape((batch, 1))
        x = torch.sum(x, dim=1) / word_sum
        x = self.fc(x)
        return x


class CNN(nn.Module):
    '''
    Convolutional Neural Network
    '''

    def __init__(self, params):
        super().__init__()
        num_filters = params['num_filters']
        filter_size = params['filter_size']
        dropout = params['dropout']

        self.convs = nn.ModuleList([nn.Conv2d
                                    (1, num_filters, (k, embed_size)) for k in filter_size])
        self.fc = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(num_filters * len(filter_size), num_class)
        )

    def conv_pool(self, x, conv):
        # x: (b, max_len, embed_size)
        # conv(x): (b, num_filters, max_len , 1)
        x = F.relu(conv(x)).squeeze(3)
        x = F.max_pool1d(x, x.shape[2]).squeeze(2)
        return x

    def forward(self, x):
        x = x.unsqueeze(1)
        x = torch.cat([self.conv_pool(x, conv) for conv in self.convs], 1)
        x = self.fc(x)
        return x



class RNN(nn.Module):
    '''
    BiLSTM with Self Attention
    '''

    def __init__(self, params):
        super().__init__()
        hidden_size = params['hidden_size']
        self.Attention = params['Attention']
        # input_size, hidden_size
        # num_layers default = 1
        self.lstm = nn.LSTM(embed_size, hidden_size,
                            bidirectional=True, batch_first=True)
        if(self.Attention):
            self.w = nn.Parameter(torch.zeros(hidden_size * 2))

        self.fc = nn.Linear(hidden_size * 2, num_class)

    def forward(self, x):
        # lstm(x) = output, (h_n, c_n)
        # h_0, c_0 default zero
        H, _ = self.lstm(x)
        if(self.Attention):
            M = torch.tanh(H)
            alpha = F.softmax(torch.matmul(M, self.w), dim=1).unsqueeze(-1)
            out = H * alpha
            out = torch.sum(out, dim=1)
            out = self.fc(out)
        else:
            out = self.fc(H[:, -1, :])
        return out

class RCNN(nn.Module):
    '''
    Recurrent Convolutional Neural Network
    '''

    def __init__(self, params):
        super().__init__()
        hidden_size = params['hidden_size']
        num_filters = params['num_filters']
        self.lstm = nn.LSTM(embed_size, hidden_size,
                            bidirectional=True, batch_first=True)
        self.conv = nn.Conv2d(
            1, num_filters, (1, 2 * hidden_size + embed_size))
        self.maxpool = nn.MaxPool1d(max_len)
        self.fc = nn.Linear(num_filters, num_class)

    def forward(self, x):
        H, _ = self.lstm(x)
        x = torch.cat((x, H), 2).unsqueeze(1)
        # [c_l; e; e_r]
        x = self.conv(x).squeeze(3)
        # x = torch.tanh(x)
        x = F.relu(x)
        x = F.max_pool1d(x, x.shape[2]).squeeze(2)
        x = self.fc(x)
        return x


In [0]:
def run(params):
    batch_size = 64
    model_type = params['model_type']
    lr = params['lr']
    num_epochs = params['num_epochs']
    weight_decay = params['weight_decay']

    model = None
    if(model_type == 'MLP'):
        model = MLP(params)
    elif(model_type == 'DAN'):
        model = DAN(params)
    elif(model_type == 'CNN'):
        model = CNN(params)
    elif(model_type == 'RNN'):
        model = RNN(params)
    elif(model_type == 'RCNN'):
        model = RCNN(params)

    model.to(device)

    print("====Train Begin====")
    loss_function = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    best_acc = 0
    best_epoch = -1
    best_model = None
    
    

    for epoch in range(num_epochs):
        print("epoch",epoch)
        model.train()
        train_stat = Stat()
        for batch in train_loader:
            optimizer.zero_grad()
            text, label, class_idx = batch
            pred_label = model(text)

            loss = loss_function(pred_label, class_idx)
            loss.backward()
            optimizer.step()
            train_stat.append(loss.item(), label, pred_label)
        t_loss, t_acc, t_f1, t_corr = train_stat.eval()
        print("TRAIN\tloss:%.4f, acc:%.4f, f1:%.4f, corr:%.4f" % (
            t_loss, t_acc, t_f1, t_corr))

        model.eval()
        dev_stat = Stat()
        with torch.no_grad():
            for batch in dev_loader:
                text, label, class_idx = batch
                pred_label = model(text)
                loss = loss_function(pred_label, class_idx)
                dev_stat.append(loss.item(), label, pred_label)
            d_loss, d_acc, d_f1, d_corr = dev_stat.eval()
            print("DEV  \tloss:%.4f, acc:%.4f, f1:%.4f, corr:%.4f" % (
                d_loss, d_acc, d_f1, d_corr))
            if(d_acc > best_acc):
                best_acc = d_acc
                best_model = deepcopy(model.state_dict())
                best_epoch = epoch

    print("====Train End====")
    print("best acc:%.4f" % (best_acc))

    torch.save(best_model, '/content/drive/My Drive/PA3/'+ model_type + '.pkl')
    model.load_state_dict(best_model)
    print("====Test====")
    test_stat = Stat()
    model.eval()
    with torch.no_grad():
        for batch in test_loader:
            text, label, class_idx = batch
            pred_label = model(text)
            test_stat.append(0, label, pred_label)
        t_loss, t_acc, t_f1, t_corr = test_stat.eval()
        print("TEST\tacc:%.4f, f1:%.4f, corr:%.4f" % (t_acc, t_f1, t_corr))
    return t_acc, t_f1, t_corr

In [0]:
import sys
def test_performance(params):
    acc_list = []
    f1_list = []
    corr_list = []
    save = sys.stdout
    print(params)
    for i in range(5):
        print("run test",i)
        sys.stdout = None
        acc, f1, corr = run(params)
        sys.stdout = save
        acc_list.append(acc)
        f1_list.append(f1)
        corr_list.append(corr)
    acc = sum(acc_list) / 5
    f1 = sum(f1_list) / 5
    corr = sum(corr_list)/ 5
    print("PERFORMANCE: acc:%.4f, f1:%.4f, corr:%.4f" %(acc, f1, corr))

In [6]:
MLP_params = {
    'model_type': 'MLP',
    'lr': 1e-3,
    'weight_decay': 1e-4,
    'num_epochs': 50,
    'hidden_size': 512,
    'dropout': 0.5
}
DAN_params = {
    'model_type': 'DAN',
    'lr': 1e-2,
    'weight_decay': 0,
    'num_epochs': 100,
    'hidden_size': 256,
    'dropout': 0.5,
    'word_dropout': 0.5
}
CNN_params = {
    'model_type': 'CNN',
    'lr': 1e-3,
    'weight_decay': 1e-3,
    'num_epochs': 20,
    'num_filters': 128,
    'filter_size': (2, 3, 4),
    'dropout': 0.5
}
RNN_params = {
    'model_type': 'RNN',
    'lr': 1e-3,
    'weight_decay': 0,
    'num_epochs': 50,
    'hidden_size': 256,
    'Attention': True
}
RCNN_params = {
    'model_type': 'RCNN',
    'lr': 1e-3,
    'weight_decay': 1e-3,
    'num_epochs': 20,
    'hidden_size': 128,
    'num_filters': 128
}
# test_performance(MLP_params)
run(DAN_params)

====Train Begin====
epoch 0
TRAIN	loss:1.6291, acc:43.7325, f1:11.8778, corr:0.3943
DEV  	loss:1.3636, acc:55.6000, f1:16.6520, corr:0.4805
epoch 1
TRAIN	loss:1.4552, acc:50.1403, f1:14.9782, corr:0.4609
DEV  	loss:1.2775, acc:57.6000, f1:19.3064, corr:0.5211
epoch 2
TRAIN	loss:1.3853, acc:51.8709, f1:17.8804, corr:0.4873
DEV  	loss:1.2302, acc:57.7000, f1:19.5270, corr:0.5756
epoch 3
TRAIN	loss:1.3530, acc:53.5547, f1:20.9807, corr:0.5233
DEV  	loss:1.1916, acc:58.8000, f1:22.2374, corr:0.5862
epoch 4
TRAIN	loss:1.3291, acc:54.1628, f1:22.4002, corr:0.5287
DEV  	loss:1.1988, acc:59.3000, f1:20.2793, corr:0.5868
epoch 5
TRAIN	loss:1.3178, acc:54.0692, f1:22.2399, corr:0.5311
DEV  	loss:1.1718, acc:59.0000, f1:22.5356, corr:0.5806
epoch 6
TRAIN	loss:1.2878, acc:55.7063, f1:25.5297, corr:0.5403
DEV  	loss:1.1408, acc:59.5000, f1:23.1911, corr:0.5807
epoch 7
TRAIN	loss:1.2541, acc:56.6885, f1:27.3987, corr:0.5400
DEV  	loss:1.1617, acc:59.5000, f1:25.8532, corr:0.5918
epoch 8
TRAIN	loss:1

(64.32748538011695, 35.395789442382764, 0.6045400572427136)