In [28]:
from util import get_num_lines, get_vocab, embed_sequence, get_word2idx_idx2word, get_embedding_matrix
from util import TextDatasetWithGloveElmoSuffix as TextDataset
from util import evaluate
from model import RNNSequenceClassifier

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
import torch.nn.functional as F

import csv
import h5py
import random
import math
import numpy as np
import matplotlib

matplotlib.use('Agg')  # to avoid the error: _tkinter.TclError: no display name and no $DISPLAY environment variable
matplotlib.use('tkagg')  # to display the graph on remote server
import matplotlib.pyplot as plt

In [2]:
print("PyTorch version:")
print(torch.__version__)
print("GPU Detected:")
print(torch.cuda.is_available())
using_GPU = True

PyTorch version:
1.2.0
GPU Detected:
True


In [7]:
raw_poetry = []

# normal version
with open('../Poetry/poetry.csv') as f:
    lines = csv.reader(f)
    next(lines)
    for line in lines:
        raw_poetry.append([line[1].strip(), int(line[2]), int(line[3])])
print('Poetry dataset size: ', len(raw_poetry))
print(raw_poetry)

Poetry dataset size:  585
[['poise is a club .', 1, 1], ['destroying alexandria . sunlight is silence', 4, 1], ['feet are no anchor . gravity sucks at the mind', 1, 1], ["on the day 's horizon is a gesture of earth", 5, 1], ['he said good-by as if good-by is a number .', 6, 1], ['although your eyes be lakes , dies', 3, 1], ['ways ! as if the world were a taxi , you enter it , then', 6, 1], ['the poet is a man who feigns', 2, 0], ['man is an animal that needs a warden', 1, 1], ['my name is james a . wright , and i was born', 2, 0], ['earth is a door i cannot even face .', 1, 1], ['the hackles on my neck are fear , not grief .', 5, 1], ['the moon is a sow', 2, 1], ['earth is the birth of the blues ,  sang yellow bertha ,', 1, 1], ['about the nature of understanding . no one is that simple', 8, 0], ['the real terror of nature is humanity enraged , the true', 5, 1], ['love is a word another kind of open --', 1, 1], ['your goodbye is a promise of lightning', 2, 1], ['his broken body is the 

In [8]:
# vocab is a set of words
vocab = get_vocab(raw_poetry)
# two dictionaries. <PAD>: 0, <UNK>: 1
word2idx, idx2word = get_word2idx_idx2word(vocab)
# glove_embeddings a nn.Embeddings
glove_embeddings = get_embedding_matrix(word2idx, idx2word, normalization=False)
# elmo_embeddings
elmos_poetry = None
# suffix_embeddings: number of suffix tag is 2, and the suffix embedding dimension is 50
suffix_embeddings = nn.Embedding(2, 50)

vocab size:  1773


100%|██████████| 2196017/2196017 [00:25<00:00, 85044.60it/s]

Number of pre-trained word vectors loaded:  1713
Embeddings mean:  -0.004147875588387251
Embeddings stdev:  0.3719170391559601





In [11]:
random.seed(0)  # set a seed
random.shuffle(raw_poetry)

embedded_poetry = [[embed_sequence(example[0], example[1], word2idx, glove_embeddings,
                                   elmos_poetry, suffix_embeddings), example[2]]
                   for example in raw_poetry]
print(len(embedded_poetry))

585


In [12]:
sentences = [example[0] for example in embedded_poetry]
labels = [example[1] for example in embedded_poetry]
print(sentences)
print(labels)

[array([[-0.038548  ,  0.54252   , -0.21843   , ..., -0.5663904 ,
        -1.3034391 ,  0.63536566],
       [ 0.18733   ,  0.40595   , -0.51174   , ..., -0.5663904 ,
        -1.3034391 ,  0.63536566],
       [ 0.27294   ,  0.14161   , -0.36977   , ..., -0.5663904 ,
        -1.3034391 ,  0.63536566],
       ...,
       [-0.082752  ,  0.67204   , -0.14987   , ..., -0.5663904 ,
        -1.3034391 ,  0.63536566],
       [ 0.043798  ,  0.024779  , -0.20937   , ..., -0.5663904 ,
        -1.3034391 ,  0.63536566],
       [ 0.52804   ,  0.036922  , -0.50694   , ..., -0.5663904 ,
        -1.3034391 ,  0.63536566]], dtype=float32), array([[-0.10648   , -0.016295  , -0.22755   , ..., -0.5663904 ,
        -1.3034391 ,  0.63536566],
       [ 0.12223   ,  0.31658   , -0.55482   , ..., -0.5663904 ,
        -1.3034391 ,  0.63536566],
       [ 0.27774   ,  0.2603    ,  0.01925   , ..., -0.5663904 ,
        -1.3034391 ,  0.63536566],
       ...,
       [ 0.022669  ,  0.26819   , -0.16665   , ..., -0.566

In [15]:
dataloader_sentences = TextDataset(sentences, labels)
print(dataloader_sentences)
poetry = DataLoader(dataset=dataloader_sentences, batch_size = 10, shuffle=True, collate_fn=TextDataset.collate_fn)
print(poetry)

<util.TextDatasetWithGloveElmoSuffix object at 0x7efb4e46c350>
<torch.utils.data.dataloader.DataLoader object at 0x7efb4ec70c50>


In [16]:
for (text, lengths, labels) in poetry:
    text = Variable(text)
    lengths = Variable(lengths)
print(text)
print('-'*10)
print(lengths)

tensor([[[-0.0995,  0.0282, -0.2319,  ..., -0.5664, -1.3034,  0.6354],
         [-0.4178, -0.0352, -0.1261,  ..., -0.5664, -1.3034,  0.6354],
         [-0.1857,  0.0660, -0.2521,  ..., -0.5664, -1.3034,  0.6354],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[-0.0169,  0.1740, -0.3025,  ..., -0.5664, -1.3034,  0.6354],
         [ 0.1543, -0.0655,  0.0225,  ..., -0.5664, -1.3034,  0.6354],
         [-0.0441,  0.3661,  0.1803,  ..., -0.6758,  1.3187, -0.0266],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[-0.1240,  0.0611, -0.1532,  ..., -0.5664, -1.3034,  0.6354],
         [-0.0620,  0.1188, -0.2467,  ..., -0

In [17]:
net = torch.load('../models/LSTMSuffixElmoAtt_Poetry_fold_0_epoch_23.pt')
net.eval()

RNNSequenceClassifier(
  (rnn): LSTM(350, 300, batch_first=True, bidirectional=True)
  (attention_weights): Linear(in_features=600, out_features=1, bias=True)
  (output_projection): Linear(in_features=600, out_features=2, bias=True)
  (dropout_on_input_to_LSTM): Dropout(p=0.2, inplace=False)
  (dropout_on_input_to_linear_layer): Dropout(p=0, inplace=False)
)

In [30]:
if using_GPU:
    text = text.cuda()
    lengths = lengths.cuda()

tensor([[4.0825e-03, 9.9592e-01],
        [9.8034e-01, 1.9659e-02],
        [3.4305e-01, 6.5695e-01],
        [1.0656e-01, 8.9344e-01],
        [9.9759e-04, 9.9900e-01]], device='cuda:0', grad_fn=<SoftmaxBackward>)


  """


In [31]:
predicted = net(text, lengths)
print(predicted)

tensor([[-5.5010e+00, -4.0908e-03],
        [-1.9854e-02, -3.9292e+00],
        [-1.0699e+00, -4.2015e-01],
        [-2.2391e+00, -1.1267e-01],
        [-6.9102e+00, -9.9802e-04]], device='cuda:0',
       grad_fn=<LogSoftmaxBackward>)
