In [1]:
import os
import datetime
import torch
import torch.nn.functional as F
import torchtext.data as data
from src import obj_dict, movie_reviews_dataset, model_cnn, train

args = obj_dict.objdict({
    'batch_size': 1024,
    'cuda': True and torch.cuda.is_available(),
    'device': 0,
    'epochs': 250,
    'save_dir': './snapshots/cnn/',
    'static': False,
    'kernel_sizes': [3, 4, 5],
    'embed_dim': 300,
    'kernel_num': 100,
    'dropout': 0.5,
    'lr': 0.001,
    'log_interval': 100,
    'test_interval': 100,
    'save_interval': 500
})

Setup Movie Reviews data set loader

In [2]:
# load MR dataset
def mr(text_field, label_field, **kargs):
    train_data, dev_data = movie_reviews_dataset.MR.splits(text_field, label_field, root='./data')
    text_field.build_vocab(train_data, dev_data)
    label_field.build_vocab(train_data, dev_data)
    train_iter, dev_iter = data.Iterator.splits(
        (train_data, dev_data),
        batch_sizes=(args.batch_size, len(dev_data)),
        **kargs)
    return train_iter, dev_iter

print(args)

# load data
print("\nLoading data...")
text_field = data.Field(lower=True)
label_field = data.Field(sequential=False)
train_iter, dev_iter = mr(text_field, label_field, device=args.device if args.cuda else -1, repeat=False)
print("Loaded", len(text_field.vocab), "samples")

{'batch_size': 1024, 'cuda': True, 'device': 0, 'epochs': 250, 'save_dir': './snapshots/cnn/', 'static': False, 'kernel_sizes': [3, 4, 5], 'embed_dim': 300, 'kernel_num': 100, 'dropout': 0.5, 'lr': 0.001, 'log_interval': 100, 'test_interval': 100, 'save_interval': 500}

Loading data...


Loaded 21109 samples


In [3]:
args.embed_num = len(text_field.vocab)
args.class_num = len(label_field.vocab) - 1

args.snapshot_save_dir = os.path.join(args.save_dir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))

# model
cnn = model_cnn.CNN_Text(args)
train.train(train_iter, dev_iter, cnn, args)

Batch[100] - loss: 0.694310  acc: 53.4211%(203/380)
Evaluation - loss: 0.689329  acc: 51.8762%(553/1066) 



Final Evaluation

In [4]:
train.eval(dev_iter, cnn, args)


Evaluation - loss: 0.576529  acc: 73.4522%(783/1066) 



(0.5765285241447291, 73.45215759849906, 783, 1066)