In [1]:
import os
os.environ['CHAINER_TYPE_CHECK'] = '0'

import sys
import numpy as np
import math
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import optimizers
from chainer import Variable
from chainer import cuda
from common.params_lstm import *
from common.utils import *

  util.experimental('cupy.core.fusion')


In [2]:
chainer.config.type_check = False

In [3]:
cuda.set_max_workspace_size(512 * 1024 * 1024)

In [4]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Chainer: ", chainer.__version__)
print("CuPy: ", chainer.cuda.cupy.__version__)
print("Numpy: ", np.__version__)
print("GPU: ", get_gpu_name())

OS:  linux
Python:  3.6.2 (default, Nov 13 2017, 16:19:19) 
[GCC 5.4.0 20160609]
Chainer:  4.0.0b1
CuPy:  4.0.0b1
Numpy:  1.13.3
GPU:  ['Quadro K420', 'GeForce GTX TITAN X', 'GeForce GTX TITAN X', 'GeForce GTX TITAN X', 'GeForce GTX TITAN X']


In [5]:
class SymbolModule(chainer.Chain):

    def __init__(self, n_vocab, n_units):
        super(SymbolModule, self).__init__()
        with self.init_scope():
            self.embed = L.EmbedID(n_vocab, EMBEDSIZE)
            self.gru = L.NStepGRU(1, EMBEDSIZE, n_units, 0)
            self.l_out = L.Linear(n_units, 2)

    def ___call__(self, x_data):
        batchsize = len(x_data)
        hx = None
        xs = []
        lengths = []
        for i, x in enumerate(x_data):
            x = Variable(x)
            x = self.embed(x)
            xs.append(x)
            lengths.append(len(x))
        # GRU
        _hy, ys = self.gru(hx=hx, xs=xs)

        last_idx = np.cumsum(lengths).astype(np.int32) - 1
        last_idx = cuda.to_gpu(last_idx)

        last_vecs = F.embed_id(last_idx, F.concat(ys, axis=0))
        y = self.l_out(last_vecs)
        return y

    def __call__(self, x_data):
        batchsize = len(x_data)
        hx = None
        x_data = F.transpose(x_data)
        xs = self.embed(x_data)
        #xs = F.split_axis(xs.reshape(-1, xs.shape[2]), batchsize, 0)
        xs = F.split_axis(xs.reshape(-1, xs.shape[2]), xs.shape[0], 0)

        # GRU
        _hy, ys = self.gru(hx=hx, xs=xs)
        ys = F.transpose_sequence(ys)
        shape = (len(ys),) + ys[0].shape
        last_vecs = F.concat(ys, axis=0).reshape(shape)[:, -1, :]
        #shape = (ys[0].shape[0], len(ys), ys[0].shape[1])
        #last_vecs = F.stack(ys).transpose([1, 0, 2]).reshape(shape)[:, -1, :]
        y = self.l_out(last_vecs)
        return y
    
    def __call__(self, x_data):
        x_data = x_data.T
        lengths = np.full(len(x_data), x_data.shape[1], dtype='i')
        x_data = x_data.reshape(-1)
        xs = self.embed(x_data)

        # GRU
        hy, ys = self.gru(None, xs, lengths)
        last_vecs = hy[0]
        y = self.l_out(last_vecs)
        return y

In [6]:
def init_model(m):
    optimizer = optimizers.Adam(alpha=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS)

    optimizer.setup(m)
    return optimizer

In [7]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)
# Torch-specific
x_train = x_train.astype(np.int64)
x_test = x_test.astype(np.int64)
y_train = y_train.astype(np.int64)
y_test = y_test.astype(np.int64)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Downloading https://s3.amazonaws.com/text-datasets/imdb.npz
Done.
Extracting files...
Done.
Trimming to 30000 max-features
Padding to length 150
(25000, 150) (25000, 150) (25000,) (25000,)
int64 int64 int64 int64
CPU times: user 5.45 s, sys: 668 ms, total: 6.12 s
Wall time: 12.5 s


In [8]:
%%time
# Create symbol
sym = SymbolModule(MAXFEATURES, NUMHIDDEN)
if GPU:
    chainer.cuda.get_device(0).use()  # Make a specified GPU current
    sym.to_gpu()  # Copy the model to the GPU

CPU times: user 796 ms, sys: 600 ms, total: 1.4 s
Wall time: 1.49 s


In [9]:
%%time
optimizer = init_model(sym)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 119 µs


In [10]:
%%time
cuda.cupy.cuda.profiler.initialize('', 'rnn.nvvp', cuda.cupy.cuda.profiler.cudaKeyValuePair)
cuda.cupy.cuda.profiler.start()
for j in range(EPOCHS):
    for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
        # Get samples
        data = cuda.to_gpu(data)
        target = cuda.to_gpu(target)
        output = sym(data)
        loss = F.softmax_cross_entropy(output, target)
        sym.cleargrads()
        loss.backward()
        optimizer.update()
    # Log
    print(j)
cuda.cupy.cuda.profiler.stop()


0
1
2
CPU times: user 17.6 s, sys: 916 ms, total: 18.5 s
Wall time: 18.5 s


In [11]:
%%time
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = y_test[:n_samples]
c = 0

with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):
    for data, target in yield_mb(x_test, y_test, BATCHSIZE):
        # Forwards
        pred = cuda.to_cpu(sym(cuda.to_gpu(data)).data.argmax(-1))
        # Collect results
        y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred
        c += 1

CPU times: user 1.82 s, sys: 88 ms, total: 1.91 s
Wall time: 1.91 s


In [12]:
print("Accuracy: ", sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.837820512821
