In [1]:
import torch
import torch.optim as optim
import torch.nn as nn
import time
import os, sys
import glob
import numpy as np
import logging
from args import get_args
from models import RNNSubjectRecognition
# from DataLoader import SubjectRecognitionLoader, load_pretrained_vectors
from predict import evaluation
sys.path.append("/home2/zyk/kbqa/entity_detection")
from seqLabelingLoader import SeqLabelingLoader

def set_logger(name):
    '''
    Write logs to checkpoint and console
    '''

    log_file = './log/%s.log' % name

    logging.basicConfig(
        format='%(asctime)s %(levelname)-8s %(message)s',
        level=logging.INFO,
        datefmt='%Y-%m-%d %H:%M:%S',
        filename=log_file,
        filemode='w'
    )
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s %(levelname)-8s %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    
class ARGS():
    def __init__(self):
        self.cuda = True
        self.gpu = 3
        self.seed = 1111
        self.train_file = '/home2/zyk/kbqa/entity_detection/data/train.entity_detection.pt'
        self.valid_file = '/home2/zyk/kbqa/entity_detection/data/valid.entity_detection.pt'
        self.test_file = '/home2/zyk/kbqa/entity_detection/data/test.entity_detection.pt'
        self.vocab_file = '../../data/vocab/word_vocab.pt'
        self.birnn = True
        self.rnn_type = 'lstm'
        self.d_embed=300
        self.d_hidden=200
        self.n_layers=2
        self.lr=1e-4
        self.dropout_prob=0.5

        self.word_vectors='../../data/vocab/glove.42B.300d.txt'
        self.vector_cache='./input_vectors.pt'
        self.word_normalize=True

        
args = ARGS()
torch.manual_seed(args.seed)
if not args.cuda:
    args.gpu = -1
    device = torch.device("cpu")
else:
    device = torch.device("cuda:%d" % args.gpu)

if torch.cuda.is_available() and args.cuda:
    print("Note: You are using GPU for training")
    torch.cuda.set_device(args.gpu)
    torch.cuda.manual_seed(args.seed)
if torch.cuda.is_available() and not args.cuda:
    print("Warning: You have Cuda but do not use it. You are using CPU for training")

train_loader = SeqLabelingLoader(args.train_file, args.gpu)
print('load train data, batch_num: %d\tbatch_size: %d'
      %(train_loader.batch_num, train_loader.batch_size))
valid_loader = SeqLabelingLoader(args.valid_file, args.gpu)
print('load valid data, batch_num: %d\tbatch_size: %d'
      %(valid_loader.batch_num, valid_loader.batch_size))

# load word vocab for questions
word_vocab = torch.load(args.vocab_file)
print('load word vocab, size: %s' % len(word_vocab))

Note: You are using GPU for training
<class 'list'>
load train data, batch_num: 566	batch_size: 128
<class 'list'>
load valid data, batch_num: 82	batch_size: 128
load word vocab, size: 61024


In [6]:
from torch import nn
from torch import autograd
import torch.nn.functional as F
from torch.autograd import Variable
import torch
import numpy as np
import sys
sys.path.append('../tools')
args.n_out = 2
args.n_cells = args.n_layers
if args.birnn:
    args.n_cells *= 2

model = SubjectRecognition(len(word_vocab), args)
if args.word_vectors:
    if os.path.isfile(args.vector_cache):
        pretrained = torch.load(args.vector_cache)
    else:
        pretrained = load_pretrained_vectors(args.word_vectors, binary=False,
                                             normalize=args.word_normalize)
        torch.save(pretrained, args.vector_cache)

    model.embed.weight.data.copy_(pretrained)
    logging.info('load pretrained word vectors from %s, pretrained size: %s' % (args.word_vectors,
                                                                         pretrained.size()))

model.to(device)
for name, param in model.named_parameters():
    print(name, param.size())
criterion = nn.NLLLoss() # negative log likelyhood loss function
optimizer = optim.Adam(model.parameters(), lr=args.lr)

embed.weight torch.Size([61024, 300])
rnn.weight_ih_l0 torch.Size([800, 300])
rnn.weight_hh_l0 torch.Size([800, 200])
rnn.bias_ih_l0 torch.Size([800])
rnn.bias_hh_l0 torch.Size([800])
rnn.weight_ih_l0_reverse torch.Size([800, 300])
rnn.weight_hh_l0_reverse torch.Size([800, 200])
rnn.bias_ih_l0_reverse torch.Size([800])
rnn.bias_hh_l0_reverse torch.Size([800])
rnn.weight_ih_l1 torch.Size([800, 400])
rnn.weight_hh_l1 torch.Size([800, 200])
rnn.bias_ih_l1 torch.Size([800])
rnn.bias_hh_l1 torch.Size([800])
rnn.weight_ih_l1_reverse torch.Size([800, 400])
rnn.weight_hh_l1_reverse torch.Size([800, 200])
rnn.bias_ih_l1_reverse torch.Size([800])
rnn.bias_hh_l1_reverse torch.Size([800])
hidden2tag.0.weight torch.Size([400, 400])
hidden2tag.0.bias torch.Size([400])
hidden2tag.1.weight torch.Size([400])
hidden2tag.1.bias torch.Size([400])
hidden2tag.4.weight torch.Size([2, 400])
hidden2tag.4.bias torch.Size([2])


In [19]:
# train the model
iterations = 0
best_dev_acc = 0
best_dev_F = 0
num_iters_in_epoch = train_loader.batch_num
patience = 5 * num_iters_in_epoch
iters_not_improved = 0
early_stop = False

snapshot_path = "subject_recognition.pth"
best_snapshot_path = "best_subject_recognition.pth"

for epoch in range(1, 10+1):
    if early_stop:
        logging.info("Early stopping. Epoch: {}, Best Dev. Acc: {}".format(epoch, best_dev_acc))
        break

    n_correct, n_total = 0, 0
    for batch_idx, batch in enumerate(train_loader.next_batch()):
        iterations += 1
        seq, label = batch
        seq_len = [seq.size()[1] for i in range(seq.size()[0]) ]
        model.train()
        optimizer.zero_grad()
        scores = model.forward(seq, seq_len)

        n_correct += ((torch.max(scores, dim=1)[1].view(label.size()).data == label.data).sum(dim=0) \
                      == label.size()[0]).sum()
        n_total += train_loader.batch_size
        train_acc = 100. * n_correct / n_total

        loss = criterion(scores, label.view(-1, 1)[:, 0])
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.6)
        optimizer.step()

        if iterations % 1000 == 0:
            torch.save(model, snapshot_path)

        if iterations % 900 == 0:
            model.eval()
            n_dev_correct = 0
            gold_list = []
            pred_list = []
            for valid_batch_idx, valid_batch in enumerate(valid_loader.next_batch()):
                valid_label = valid_batch[1]
                seq, valid_label = batch
                seq_len = [seq.size()[1] for i in range(seq.size()[0]) ]
                answer = model.forward(seq, seq_len)
                n_dev_correct += ((torch.max(answer, 1)[1].view(valid_label.size()).data == \
                                   valid_label.data).sum(dim=0) == valid_label.size()[0]).sum()
                index_tag = np.transpose(torch.max(answer, 1)[1].view(valid_label.size()).cpu().data.numpy())
                gold_list.append(np.transpose(valid_label.cpu().data.numpy()))
                pred_list.append(index_tag)
            P, R, F_ = evaluation(gold_list, pred_list)
            dev_acc = 100. * n_dev_correct / (valid_loader.batch_num * valid_loader.batch_size)
            print("iterations: %d" % iterations)
            print(dev_acc)
            print(P)
            print(R)
            print(F)
            if F_ > best_dev_F:
                best_dev_F = F_
                iters_not_improved = 0
                torch.save(model, best_snapshot_path)
            else:
                iters_not_improved += 1
                if iters_not_improved > patience:
                    early_stop = True
                    break



iterations: 900
tensor(7, device='cuda:3')
0.3364485981308411
0.28125
0.30638297872340425


  "type " + obj.__name__ + ". It won't be checked "


AttributeError: 'float' object has no attribute 'log_softmax'

In [18]:
seq.size()

torch.Size([9, 128])