In [None]:
import sys, os, time, gc
from torch.optim import Adam

In [None]:
sys.path.append(os.path.abspath("../"))

In [None]:
from utils.args import init_args, add_argument_base
from utils.initialization import *
from utils.example import Example
from utils.batch import from_example_list
from utils.vocab import PAD
from model.slu_baseline_tagging import SLUTagging

In [None]:
def get_args():
    import argparse
    arg_parser = argparse.ArgumentParser()

    #### General configuration ####
    # arg_parser.add_argument('--dataroot', default='./data', help='root of data')
    arg_parser.add_argument('--dataroot', default='../data', help='root of data')
    arg_parser.add_argument('--word2vec_path', default='../word2vec-768.txt', help='path of word2vector file path')
    arg_parser.add_argument('--seed', default=999, type=int, help='Random seed')
    arg_parser.add_argument('--device', type=int, default=-1, help='Use which device: -1 -> cpu ; the index of gpu o.w.')
    arg_parser.add_argument('--testing', action='store_true', help='training or evaluation mode')
    #### Training Hyperparams ####
    arg_parser.add_argument('--batch_size', default=32, type=int, help='Batch size')
    arg_parser.add_argument('--lr', type=float, default=1e-3, help='learning rate')
    arg_parser.add_argument('--max_epoch', type=int, default=100, help='terminate after maximum epochs')
    #### Common Encoder Hyperparams ####
    arg_parser.add_argument('--encoder_cell', default='LSTM', choices=['LSTM', 'GRU', 'RNN'], help='root of data')
    arg_parser.add_argument('--dropout', type=float, default=0.2, help='feature dropout rate')
    arg_parser.add_argument('--embed_size', default=768, type=int, help='Size of word embeddings')
    arg_parser.add_argument('--hidden_size', default=512, type=int, help='hidden size')
    arg_parser.add_argument('--num_layer', default=2, type=int, help='number of layer')

    args = arg_parser.parse_args([])

    return args


def set_optimizer(model, args):
    params = [(n, p) for n, p in model.named_parameters() if p.requires_grad]
    grouped_params = [{'params': list(set([p for n, p in params]))}]
    optimizer = Adam(grouped_params, lr=args.lr)
    return optimizer


def decode(choice):
    assert choice in ['train', 'dev']
    model.eval()
    dataset = train_dataset if choice == 'train' else dev_dataset
    predictions, labels = [], []
    total_loss, count = 0, 0
    with torch.no_grad():
        for i in range(0, len(dataset), args.batch_size):
            cur_dataset = dataset[i: i + args.batch_size]
            current_batch = from_example_list(args, cur_dataset, device, train=True)
            pred, label, loss = model.decode(Example.label_vocab, current_batch)
            predictions.extend(pred)
            labels.extend(label)
            total_loss += loss
            count += 1
        metrics = Example.evaluator.acc(predictions, labels)
    torch.cuda.empty_cache()
    gc.collect()
    return metrics, total_loss / count



In [None]:
args = get_args()
set_random_seed(args.seed)
device = set_torch_device(args.device)
print("Initialization finished ...")
print("Random seed is set to %d" % (args.seed))
print("Use GPU with index %s" % (args.device) if args.device >= 0 else "Use CPU as target torch device")

In [None]:
start_time = time.time()
train_path = os.path.join(args.dataroot, 'train.json')
dev_path = os.path.join(args.dataroot, 'development.json')
Example.configuration(args.dataroot, train_path=train_path, word2vec_path=args.word2vec_path)
train_dataset = Example.load_dataset(train_path)
dev_dataset = Example.load_dataset(dev_path)

print("Load dataset and database finished, cost %.4fs ..." % (time.time() - start_time))
print("Dataset size: train -> %d ; dev -> %d" % (len(train_dataset), len(dev_dataset)))

In [60]:
d = train_dataset[4]

In [61]:
d.tag_id, d.slotvalue, d.utt

([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [], '第二个到塔季他这个道士观')

In [62]:
d.ex

{'utt_id': 1,
 'manual_transcript': '(side)(dialect)',
 'asr_1best': '第二个到塔季他这个道士观',
 'semantic': []}

In [52]:
[ d.utt[idx-1] for idx in d.input_idx ]

IndexError: string index out of range

In [63]:
d.input_idx

[28, 29, 30, 31, 32, 33, 34, 35, 30, 36, 37, 38]

In [64]:
d.tags

['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']

In [66]:
Example.label_vocab.num_tags

74

In [70]:
[Example.label_vocab.idx2tag[idx] for idx in range(Example.label_vocab.num_tags)]

['<pad>',
 'O',
 'B-inform-poi名称',
 'I-inform-poi名称',
 'B-inform-poi修饰',
 'I-inform-poi修饰',
 'B-inform-poi目标',
 'I-inform-poi目标',
 'B-inform-起点名称',
 'I-inform-起点名称',
 'B-inform-起点修饰',
 'I-inform-起点修饰',
 'B-inform-起点目标',
 'I-inform-起点目标',
 'B-inform-终点名称',
 'I-inform-终点名称',
 'B-inform-终点修饰',
 'I-inform-终点修饰',
 'B-inform-终点目标',
 'I-inform-终点目标',
 'B-inform-途经点名称',
 'I-inform-途经点名称',
 'B-inform-请求类型',
 'I-inform-请求类型',
 'B-inform-出行方式',
 'I-inform-出行方式',
 'B-inform-路线偏好',
 'I-inform-路线偏好',
 'B-inform-对象',
 'I-inform-对象',
 'B-inform-操作',
 'I-inform-操作',
 'B-inform-序列号',
 'I-inform-序列号',
 'B-inform-页码',
 'I-inform-页码',
 'B-inform-value',
 'I-inform-value',
 'B-deny-poi名称',
 'I-deny-poi名称',
 'B-deny-poi修饰',
 'I-deny-poi修饰',
 'B-deny-poi目标',
 'I-deny-poi目标',
 'B-deny-起点名称',
 'I-deny-起点名称',
 'B-deny-起点修饰',
 'I-deny-起点修饰',
 'B-deny-起点目标',
 'I-deny-起点目标',
 'B-deny-终点名称',
 'I-deny-终点名称',
 'B-deny-终点修饰',
 'I-deny-终点修饰',
 'B-deny-终点目标',
 'I-deny-终点目标',
 'B-deny-途经点名称',
 'I-deny-途经点名称',
 'B-deny-请求类

In [None]:
args.vocab_size = Example.word_vocab.vocab_size
args.pad_idx = Example.word_vocab[PAD]
args.num_tags = Example.label_vocab.num_tags
args.tag_pad_idx = Example.label_vocab.convert_tag_to_idx(PAD)

In [None]:
model = SLUTagging(args).to(device)
Example.word2vec.load_embeddings(model.word_embed, Example.word_vocab, device=device)

In [None]:
# if not args.testing:
num_training_steps = ((len(train_dataset) + args.batch_size - 1) // args.batch_size) * args.max_epoch
print('Total training steps: %d' % (num_training_steps))

In [None]:
optimizer = set_optimizer(model, args)
nsamples, best_result = len(train_dataset), {'dev_acc': 0., 'dev_f1': 0.}
train_index, step_size = np.arange(nsamples), args.batch_size

In [None]:
print('Start training ......')
# for i in range(args.max_epoch):
i = 0
start_time = time.time()
epoch_loss = 0
np.random.shuffle(train_index)
model.train()
count = 0

In [None]:
# for j in range(0, nsamples, step_size):
j = 0
cur_dataset = [train_dataset[k] for k in train_index[j: j + step_size]]
current_batch = from_example_list(args, cur_dataset, device, train=True)
output, loss = model(current_batch)
epoch_loss += loss.item()
loss.backward()
optimizer.step()
optimizer.zero_grad()
count += 1

In [None]:
batch = current_batch
tag_ids = batch.tag_ids
tag_mask = batch.tag_mask
input_ids = batch.input_ids
lengths = batch.lengths

In [None]:
Example.tags

In [None]:
tag_ids[0], tag_mask[0], input_ids[0], lengths[0]

In [None]:
tag_ids.view(-1)

In [None]:
576 / 32

In [None]:
embed = model.word_embed(input_ids)

In [None]:
import torch.nn.utils.rnn as rnn_utils

In [None]:
packed_inputs = rnn_utils.pack_padded_sequence(embed, lengths, batch_first=True)

In [None]:
packed_inputs

In [None]:
packed_rnn_out, h_t_c_t = model.rnn(packed_inputs)  # bsize x seqlen x dim

In [None]:
rnn_out, unpacked_len = rnn_utils.pad_packed_sequence(packed_rnn_out, batch_first=True)

In [None]:
hiddens = model.dropout_layer(rnn_out)

In [None]:
tag_output = model.output_layer(hiddens, tag_mask, tag_ids)

In [None]:
output, loss = tag_output

In [None]:
output.shape

In [None]:
output[0][0].shape

In [None]:
print('Training: \tEpoch: %d\tTime: %.4f\tTraining Loss: %.4f' % (i, time.time() - start_time, epoch_loss / count))
torch.cuda.empty_cache()
gc.collect()

In [None]:
start_time = time.time()
metrics, dev_loss = decode('dev')
dev_acc, dev_fscore = metrics['acc'], metrics['fscore']
print('Evaluation: \tEpoch: %d\tTime: %.4f\tDev acc: %.2f\tDev fscore(p/r/f): (%.2f/%.2f/%.2f)' % (i, time.time() - start_time, dev_acc, dev_fscore['precision'], dev_fscore['recall'], dev_fscore['fscore']))

In [None]:
if dev_acc > best_result['dev_acc']:
    best_result['dev_loss'], best_result['dev_acc'], best_result['dev_f1'], best_result['iter'] = dev_loss, dev_acc, dev_fscore, i
    torch.save({
        'epoch': i, 'model': model.state_dict(),
        'optim': optimizer.state_dict(),
    }, open('model.bin', 'wb'))
    print('NEW BEST MODEL: \tEpoch: %d\tDev loss: %.4f\tDev acc: %.2f\tDev fscore(p/r/f): (%.2f/%.2f/%.2f)' % (i, dev_loss, dev_acc, dev_fscore['precision'], dev_fscore['recall'], dev_fscore['fscore']))

In [None]:
best_result

In [None]:
print('FINAL BEST RESULT: \tEpoch: %d\tDev loss: %.4f\tDev acc: %.4f\tDev fscore(p/r/f): (%.4f/%.4f/%.4f)' % (best_result['iter'], best_result['dev_loss'], best_result['dev_acc'], best_result['dev_f1']['precision'], best_result['dev_f1']['recall'], best_result['dev_f1']['fscore']))