In [29]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [28]:
pip install mxnet



In [4]:
pip install gluonnlp

Collecting gluonnlp
[?25l  Downloading https://files.pythonhosted.org/packages/c6/27/07b57d22496ed6c98b247e578712122402487f5c265ec70a747900f97060/gluonnlp-0.9.1.tar.gz (252kB)
[K     |█▎                              | 10kB 19.3MB/s eta 0:00:01[K     |██▋                             | 20kB 1.7MB/s eta 0:00:01[K     |███▉                            | 30kB 2.3MB/s eta 0:00:01[K     |█████▏                          | 40kB 2.5MB/s eta 0:00:01[K     |██████▌                         | 51kB 2.0MB/s eta 0:00:01[K     |███████▊                        | 61kB 2.3MB/s eta 0:00:01[K     |█████████                       | 71kB 2.5MB/s eta 0:00:01[K     |██████████▍                     | 81kB 2.7MB/s eta 0:00:01[K     |███████████▋                    | 92kB 2.9MB/s eta 0:00:01[K     |█████████████                   | 102kB 2.8MB/s eta 0:00:01[K     |██████████████▎                 | 112kB 2.8MB/s eta 0:00:01[K     |███████████████▌                | 122kB 2.8MB/s eta 0:00:01[

In [5]:
import nltk
nltk.download('punkt')
  

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [6]:
import warnings
warnings.filterwarnings('ignore')

# NLP
import string
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize 
from nltk.stem import SnowballStemmer
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer

import random
import time
import multiprocessing as mp
import numpy as np
import pandas as pd

import mxnet as mx
from mxnet import nd, gluon, autograd

import gluonnlp as nlp
nlp.utils.check_version('0.7.0')

random.seed(123)
np.random.seed(123)
mx.random.seed(123)

In [7]:
class MeanPoolingLayer(gluon.HybridBlock):
    """A block for mean pooling of encoder features"""
    def __init__(self, prefix=None, params=None):
        super(MeanPoolingLayer, self).__init__(prefix=prefix, params=params)

    def hybrid_forward(self, F, data, valid_length): # pylint: disable=arguments-differ
        """Forward logic"""
        # Data will have shape (T, N, C)
        masked_encoded = F.SequenceMask(data,
                                        sequence_length=valid_length,
                                        use_sequence_length=True)
        agg_state = F.broadcast_div(F.sum(masked_encoded, axis=0),
                                    F.expand_dims(valid_length, axis=1))
        return agg_state


class SentimentNet(gluon.HybridBlock):
    """Network for sentiment analysis."""
    def __init__(self, dropout, prefix=None, params=None):
        super(SentimentNet, self).__init__(prefix=prefix, params=params)
        with self.name_scope():
            self.embedding = None # will set with lm embedding later
            self.encoder = None # will set with lm encoder later
            self.agg_layer = MeanPoolingLayer()
            self.output = gluon.nn.HybridSequential()
            with self.output.name_scope():
                self.output.add(gluon.nn.Dropout(dropout))
                self.output.add(gluon.nn.Dense(1, flatten=False))

    def hybrid_forward(self, F, data, valid_length): # pylint: disable=arguments-differ
        encoded = self.encoder(self.embedding(data))  # Shape(T, N, C)
        agg_state = self.agg_layer(encoded, valid_length)
        out = self.output(agg_state)
        return out

In [8]:
dropout = 0
language_model_name = 'standard_lstm_lm_200'
pretrained = True
learning_rate, batch_size = 0.005, 32
bucket_num, bucket_ratio = 10, 0.2
epochs = 100
grad_clip = None
log_interval = 100

In [9]:
context = mx.cpu()

In [10]:
lm_model, vocab = nlp.model.get_model(name=language_model_name,
                                      dataset_name='wikitext-2',
                                      pretrained=pretrained,
                                      ctx=context,
                                      dropout=dropout)

Vocab file is not found. Downloading.
Downloading /root/.mxnet/models/7729646630986104513/7729646630986104513_wikitext-2-be36dc52.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/vocab/wikitext-2-be36dc52.zip...
Downloading /root/.mxnet/models/standard_lstm_lm_200_wikitext-2-b233c700.zip75b98200-d0bf-4093-9689-adef852f4369 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/standard_lstm_lm_200_wikitext-2-b233c700.zip...


In [11]:
net = SentimentNet(dropout=dropout)
net.embedding = lm_model.embedding
net.encoder = lm_model.encoder
net.hybridize()
net.output.initialize(mx.init.Xavier(), ctx=context)
                      
print(net)

SentimentNet(
  (embedding): HybridSequential(
    (0): Embedding(33278 -> 200, float32)
  )
  (encoder): LSTM(200 -> 200, TNC, num_layers=2)
  (agg_layer): MeanPoolingLayer(
  
  )
  (output): HybridSequential(
    (0): Dropout(p = 0, axes=())
    (1): Dense(None -> 1, linear)
  )
)


In [14]:
# The tokenizer takes as input a string and outputs a list of tokens.
tokenizer = nlp.data.SpacyTokenizer('en')

# `length_clip` takes as input a list and outputs a list with maximum length 120.
length_clip = nlp.data.ClipSequence(120)

# Helper function to preprocess a single data point
def preprocess(x):
    data, label = x
    data = vocab[length_clip(tokenizer(data))]
    lengths = float(len(x[0]))
    return data, label

# Helper function for getting the length
def get_length(x):
    return float(len(x[0]))





In [15]:
# Loading the dataset
books_neg = pd.read_csv("/content/drive/My Drive/LSTM/books_negative.csv")
books_pos = pd.read_csv("/content/drive/My Drive/LSTM/books_positive.csv")
dvd_neg = pd.read_csv("/content/drive/My Drive/LSTM/dvd_negative.csv")
dvd_pos = pd.read_csv("/content/drive/My Drive/LSTM/dvd_positive.csv")
ele_neg = pd.read_csv("/content/drive/My Drive/LSTM/electronics_negative.csv")
ele_pos = pd.read_csv("/content/drive/My Drive/LSTM/electronics_positive.csv")
kit_neg = pd.read_csv("/content/drive/My Drive/LSTM/kitchen_negative.csv")
kit_pos = pd.read_csv("/content/drive/My Drive/LSTM/kitchen_positive.csv")
books_neg['label'] = 0 
books_pos['label'] = 1
books = pd.concat([books_neg,books_pos],axis = 0)
books['domain'] = 'books'
dvd_neg['label'] = 0
dvd_pos['label'] = 1
dvd = pd.concat([dvd_pos, dvd_neg],axis = 0)
dvd['domain'] = 'dvd'
ele_neg['label'] = 0
ele_pos['label'] = 1
ele = pd.concat([ele_neg, ele_pos],axis = 0)
ele['domain'] = 'electronics'
kit_neg['label'] = 0
kit_pos['label'] = 1
kit = pd.concat([kit_neg, kit_pos],axis = 0)
kit['domain'] = 'kitchen'

books =books.reset_index(drop=True)
dvd =dvd.reset_index(drop=True)
ele =ele.reset_index(drop=True)
kit =kit.reset_index(drop=True)
print('Tokenize using spaCy...')

Tokenize using spaCy...


In [16]:
x_books = books['review_text']
y_books = books['label']

x_dvd = dvd['review_text']
y_dvd = dvd['label']

x_ele = ele['review_text']
y_ele = ele['label']

x_kit = kit['review_text']
y_kit = kit['label']

In [17]:
def preprocess_dataset(dataset):
    start = time.time()
    with mp.Pool() as pool:
        # Each sample is processed in an asynchronous manner.
        dataset = gluon.data.SimpleDataset(pool.map(preprocess, dataset))
        lengths = gluon.data.SimpleDataset(pool.map(get_length, dataset))
    end = time.time()
    print('Done! Tokenizing Time={:.2f}s, #Sentences={}'.format(end - start, len(dataset)))
    return dataset, lengths


In [18]:
dat_books = []
for i in range(0,len(x_books)):
  Y = []
  Y.append(x_books[i])
  Y.append(y_books[i])
  dat_books.append(Y)

In [21]:
dat_dvd = []
for i in range(0,len(x_dvd)):
  Y = []
  Y.append(x_dvd[i])
  Y.append(y_dvd[i])
  dat_dvd.append(Y)

dat_ele = []
for i in range(0,len(x_ele)):
  Y = []
  Y.append(x_ele[i])
  Y.append(y_ele[i])
  dat_ele.append(Y)

dat_kit = []
for i in range(0,len(x_kit)):
  Y = []
  Y.append(x_kit[i])
  Y.append(y_kit[i])
  dat_kit.append(Y)

In [19]:
train_books = dat_books[:1700]
test_books = dat_books[1700:]

In [22]:
train_dvd = dat_dvd[:1700]
test_dvd = dat_dvd[1700:]

train_ele = dat_ele[:1700]
test_ele = dat_ele[1700:]

train_kit = dat_kit[:1700]
test_kit = dat_kit[1700:]

In [23]:
# Doing the actual pre-processing of the dataset
train_b, train_b_lengths = preprocess_dataset(train_books)
test_b, test_b_lengths = preprocess_dataset(test_books)

Done! Tokenizing Time=2.15s, #Sentences=1700
Done! Tokenizing Time=0.54s, #Sentences=300


In [24]:
train_e, train_e_lengths = preprocess_dataset(train_ele)
test_e, test_e_lengths = preprocess_dataset(test_ele)

train_d, train_d_lengths = preprocess_dataset(train_dvd)
test_d, test_d_lengths = preprocess_dataset(test_dvd)

train_k, train_k_lengths = preprocess_dataset(train_kit)
test_k, test_k_lengths = preprocess_dataset(test_kit)

Done! Tokenizing Time=1.24s, #Sentences=1700
Done! Tokenizing Time=0.34s, #Sentences=300
Done! Tokenizing Time=2.24s, #Sentences=1700
Done! Tokenizing Time=0.54s, #Sentences=300
Done! Tokenizing Time=1.04s, #Sentences=1700
Done! Tokenizing Time=0.34s, #Sentences=300


In [21]:
# Construct the DataLoader for books

def get_dataloader():

    # Pad data, stack label and lengths
    batchify_fn = nlp.data.batchify.Tuple(
        nlp.data.batchify.Pad(axis=0, pad_val=0, ret_length=True),
        nlp.data.batchify.Stack(dtype='float32'))
    batch_sampler = nlp.data.sampler.FixedBucketSampler(
        train_b_lengths,
        batch_size=batch_size,
        num_buckets=bucket_num,
        ratio=bucket_ratio,
        shuffle=True)
    print(batch_sampler.stats())

    # Construct a DataLoader object for both the training and test data
    train_dataloader = gluon.data.DataLoader(
        dataset=train_b,
        batch_sampler=batch_sampler,
        batchify_fn=batchify_fn)
    test_dataloader = gluon.data.DataLoader(
        dataset=test_b,
        batch_size=batch_size,
        shuffle=False,
        batchify_fn=batchify_fn)
    return train_dataloader, test_dataloader


train_b_dataloader, test_b_dataloader = get_dataloader()
 



FixedBucketSampler:
  sample_num=1700, batch_num=57
  key=[12, 24, 36, 48, 60, 72, 84, 96, 108, 120]
  cnt=[8, 42, 95, 75, 80, 95, 89, 87, 95, 1034]
  batch_size=[64, 32, 32, 32, 32, 32, 32, 32, 32, 32]


In [22]:
# Construct the DataLoader for dvd

def get_dataloader():

    # Pad data, stack label and lengths
    batchify_fn = nlp.data.batchify.Tuple(
        nlp.data.batchify.Pad(axis=0, pad_val=0, ret_length=True),
        nlp.data.batchify.Stack(dtype='float32'))
    batch_sampler = nlp.data.sampler.FixedBucketSampler(
        train_d_lengths,
        batch_size=batch_size,
        num_buckets=bucket_num,
        ratio=bucket_ratio,
        shuffle=True)
    print(batch_sampler.stats())

    # Construct a DataLoader object for both the training and test data
    train_dataloader = gluon.data.DataLoader(
        dataset=train_d,
        batch_sampler=batch_sampler,
        batchify_fn=batchify_fn)
    test_dataloader = gluon.data.DataLoader(
        dataset=test_d,
        batch_size=batch_size,
        shuffle=False,
        batchify_fn=batchify_fn)
    return train_dataloader, test_dataloader


train_d_dataloader, test_d_dataloader = get_dataloader()
 



FixedBucketSampler:
  sample_num=1700, batch_num=58
  key=[21, 32, 43, 54, 65, 76, 87, 98, 109, 120]
  cnt=[58, 72, 71, 95, 102, 91, 93, 82, 72, 964]
  batch_size=[36, 32, 32, 32, 32, 32, 32, 32, 32, 32]


In [23]:
# Construct the DataLoader for electronics

def get_dataloader():

    # Pad data, stack label and lengths
    batchify_fn = nlp.data.batchify.Tuple(
        nlp.data.batchify.Pad(axis=0, pad_val=0, ret_length=True),
        nlp.data.batchify.Stack(dtype='float32'))
    batch_sampler = nlp.data.sampler.FixedBucketSampler(
        train_e_lengths,
        batch_size=batch_size,
        num_buckets=bucket_num,
        ratio=bucket_ratio,
        shuffle=True)
    print(batch_sampler.stats())

    # Construct a DataLoader object for both the training and test data
    train_dataloader = gluon.data.DataLoader(
        dataset=train_e,
        batch_sampler=batch_sampler,
        batchify_fn=batchify_fn)
    test_dataloader = gluon.data.DataLoader(
        dataset=test_e,
        batch_size=batch_size,
        shuffle=False,
        batchify_fn=batchify_fn)
    return train_dataloader, test_dataloader


train_e_dataloader, test_e_dataloader = get_dataloader()
 

FixedBucketSampler:
  sample_num=1700, batch_num=58
  key=[21, 32, 43, 54, 65, 76, 87, 98, 109, 120]
  cnt=[98, 126, 120, 134, 138, 100, 101, 87, 88, 708]
  batch_size=[36, 32, 32, 32, 32, 32, 32, 32, 32, 32]


In [25]:

# Construct the DataLoader for kitchen appliance

def get_dataloader():

    # Pad data, stack label and lengths
    batchify_fn = nlp.data.batchify.Tuple(
        nlp.data.batchify.Pad(axis=0, pad_val=0, ret_length=True),
        nlp.data.batchify.Stack(dtype='float32'))
    batch_sampler = nlp.data.sampler.FixedBucketSampler(
        train_k_lengths,
        batch_size=batch_size,
        num_buckets=bucket_num,
        ratio=bucket_ratio,
        shuffle=True)
    print(batch_sampler.stats())

    # Construct a DataLoader object for both the training and test data
    train_dataloader = gluon.data.DataLoader(
        dataset=train_k,
        batch_sampler=batch_sampler,
        batchify_fn=batchify_fn)
    test_dataloader = gluon.data.DataLoader(
        dataset=test_k,
        batch_size=batch_size,
        shuffle=False,
        batchify_fn=batchify_fn)
    return train_dataloader, test_dataloader


train_k_dataloader, test_k_dataloader = get_dataloader()
 

FixedBucketSampler:
  sample_num=1700, batch_num=58
  key=[21, 32, 43, 54, 65, 76, 87, 98, 109, 120]
  cnt=[100, 129, 141, 152, 128, 142, 134, 109, 84, 581]
  batch_size=[36, 32, 32, 32, 32, 32, 32, 32, 32, 32]


In [26]:
def evaluate(net, dataloader, context):
    loss = gluon.loss.SigmoidBCELoss()
    total_L = 0.0
    total_sample_num = 0
    total_correct_num = 0
    start_log_interval_time = time.time()

    print('Begin Testing...')
    for i, ((data, valid_length), label) in enumerate(dataloader):
        data = mx.nd.transpose(data.as_in_context(context))
        valid_length = valid_length.as_in_context(context).astype(np.float32)
        label = label.as_in_context(context)
        output = net(data, valid_length)

        L = loss(output, label)
        pred = (output > 0.5).reshape(-1)
        total_L += L.sum().asscalar()
        total_sample_num += label.shape[0]
        total_correct_num += (pred == label).sum().asscalar()

        if (i + 1) % log_interval == 0:
            print('[Batch {}/{}] elapsed {:.2f} s'.format(
                i + 1, len(dataloader),
                time.time() - start_log_interval_time))
            start_log_interval_time = time.time()

    avg_L = total_L / float(total_sample_num)
    acc = total_correct_num / float(total_sample_num)

    return avg_L, acc

In [26]:
#pre-trained model performance in books domain
def train(net, context, epochs):
    trainer = gluon.Trainer(net.collect_params(), 'ftml',
                            {'learning_rate': learning_rate})
    loss = gluon.loss.SigmoidBCELoss()

    parameters = net.collect_params().values()

    # Training/Testing
    for epoch in range(epochs):
        # Epoch training stats
        start_epoch_time = time.time()
        epoch_L = 0.0
        epoch_sent_num = 0
        epoch_wc = 0
        # Log interval training stats
        start_log_interval_time = time.time()
        log_interval_wc = 0
        log_interval_sent_num = 0
        log_interval_L = 0.0

        for i, ((data, length), label) in enumerate(train_b_dataloader):
            L = 0
            wc = length.sum().asscalar()
            log_interval_wc += wc
            epoch_wc += wc
            log_interval_sent_num += data.shape[1]
            epoch_sent_num += data.shape[1]
            with autograd.record():
                output = net(data.as_in_context(context).T,
                             length.as_in_context(context)
                                   .astype(np.float32))
                L = L + loss(output, label.as_in_context(context)).mean()
            L.backward()
            # Clip gradient
            if grad_clip:
                gluon.utils.clip_global_norm(
                    [p.grad(context) for p in parameters],
                    grad_clip)
            # Update parameter
            trainer.step(1)
            log_interval_L += L.asscalar()
            epoch_L += L.asscalar()
            if (i + 1) % log_interval == 0:
                print(
                    '[Epoch {} Batch {}/{}] elapsed {:.2f} s, '
                    'avg loss {:.6f}, throughput {:.2f}K wps'.format(
                        epoch, i + 1, len(train_b_dataloader),
                        time.time() - start_log_interval_time,
                        log_interval_L / log_interval_sent_num, log_interval_wc
                        / 1000 / (time.time() - start_log_interval_time)))
                # Clear log interval training stats
                start_log_interval_time = time.time()
                log_interval_wc = 0
                log_interval_sent_num = 0
                log_interval_L = 0
        end_epoch_time = time.time()
        train_avg_L, train_acc = evaluate(net,train_b_dataloader,context)
        test_avg_L, test_acc = evaluate(net, test_b_dataloader, context)
        print('[Epoch {}] train avg loss {:.6f}, train acc{:.4f} test acc {:.4f}'
              'test avg loss {:.6f}, throughput {:.2f}K wps'.format(
                  epoch, epoch_L / epoch_sent_num, train_acc, test_acc, test_avg_L,
                  epoch_wc / 1000 / (end_epoch_time - start_epoch_time)))
        

history1 = train(net, context, epochs)

Begin Testing...
Begin Testing...
[Epoch 0] train avg loss 0.006833, train acc0.5888 test acc 0.0000test avg loss 0.873851, throughput 3.02K wps
Begin Testing...
Begin Testing...
[Epoch 1] train avg loss 0.005370, train acc0.9341 test acc 0.5433test avg loss 0.682347, throughput 3.01K wps
Begin Testing...
Begin Testing...
[Epoch 2] train avg loss 0.002082, train acc0.9935 test acc 0.6333test avg loss 0.918566, throughput 2.88K wps
Begin Testing...
Begin Testing...
[Epoch 3] train avg loss 0.000636, train acc0.9994 test acc 0.6667test avg loss 1.224674, throughput 2.82K wps
Begin Testing...
Begin Testing...
[Epoch 4] train avg loss 0.000186, train acc1.0000 test acc 0.6467test avg loss 1.980648, throughput 2.72K wps
Begin Testing...
Begin Testing...
[Epoch 5] train avg loss 0.000083, train acc1.0000 test acc 0.6367test avg loss 2.333720, throughput 2.72K wps
Begin Testing...
Begin Testing...
[Epoch 6] train avg loss 0.000117, train acc1.0000 test acc 0.6233test avg loss 1.830645, throug

In [45]:
# pre-trained model performance in dvd domain
def train(net, context, epochs):
    trainer = gluon.Trainer(net.collect_params(), 'ftml',
                            {'learning_rate': learning_rate})
    loss = gluon.loss.SigmoidBCELoss()

    parameters = net.collect_params().values()

    # Training/Testing
    for epoch in range(epochs):
        # Epoch training stats
        start_epoch_time = time.time()
        epoch_L = 0.0
        epoch_sent_num = 0
        epoch_wc = 0
        # Log interval training stats
        start_log_interval_time = time.time()
        log_interval_wc = 0
        log_interval_sent_num = 0
        log_interval_L = 0.0

        for i, ((data, length), label) in enumerate(train_d_dataloader):
            L = 0
            wc = length.sum().asscalar()
            log_interval_wc += wc
            epoch_wc += wc
            log_interval_sent_num += data.shape[1]
            epoch_sent_num += data.shape[1]
            with autograd.record():
                output = net(data.as_in_context(context).T,
                             length.as_in_context(context)
                                   .astype(np.float32))
                L = L + loss(output, label.as_in_context(context)).mean()
            L.backward()
            # Clip gradient
            if grad_clip:
                gluon.utils.clip_global_norm(
                    [p.grad(context) for p in parameters],
                    grad_clip)
            # Update parameter
            trainer.step(1)
            log_interval_L += L.asscalar()
            epoch_L += L.asscalar()
            if (i + 1) % log_interval == 0:
                print(
                    '[Epoch {} Batch {}/{}] elapsed {:.2f} s, '
                    'avg loss {:.6f}, throughput {:.2f}K wps'.format(
                        epoch, i + 1, len(train_d_dataloader),
                        time.time() - start_log_interval_time,
                        log_interval_L / log_interval_sent_num, log_interval_wc
                        / 1000 / (time.time() - start_log_interval_time)))
                # Clear log interval training stats
                start_log_interval_time = time.time()
                log_interval_wc = 0
                log_interval_sent_num = 0
                log_interval_L = 0
        end_epoch_time = time.time()
        train_avg_L, train_acc = evaluate(net, train_d_dataloader, context)
        test_avg_L, test_acc = evaluate(net, test_d_dataloader, context)
        print('[Epoch {}] train avg loss {:.6f}, train acc{:.4f}, test acc {:.4f}, '
              'test avg loss {:.6f}, throughput {:.2f}K wps'.format(
                  epoch, epoch_L / epoch_sent_num, train_acc, test_acc, test_avg_L,
                  epoch_wc / 1000 / (end_epoch_time - start_epoch_time)))
        

history2 = train(net, context, epochs)

Begin Testing...
Begin Testing...
[Epoch 0] train avg loss 0.000179, train acc0.9965, test acc 0.4533, test avg loss 3.978074, throughput 2.92K wps
Begin Testing...
Begin Testing...
[Epoch 1] train avg loss 0.000057, train acc0.9994, test acc 0.5633, test avg loss 2.792423, throughput 2.94K wps
Begin Testing...
Begin Testing...
[Epoch 2] train avg loss 0.000014, train acc1.0000, test acc 0.6267, test avg loss 1.921606, throughput 2.93K wps
Begin Testing...
Begin Testing...
[Epoch 3] train avg loss 0.000005, train acc1.0000, test acc 0.5800, test avg loss 2.286801, throughput 2.64K wps
Begin Testing...
Begin Testing...
[Epoch 4] train avg loss 0.000001, train acc1.0000, test acc 0.5800, test avg loss 2.324768, throughput 2.65K wps
Begin Testing...
Begin Testing...
[Epoch 5] train avg loss 0.000001, train acc1.0000, test acc 0.5767, test avg loss 2.356763, throughput 2.65K wps
Begin Testing...
Begin Testing...
[Epoch 6] train avg loss 0.000001, train acc1.0000, test acc 0.5767, test avg 

In [47]:
#pre-trained model performance in electronics domain
def train(net, context, epochs):
    trainer = gluon.Trainer(net.collect_params(), 'ftml',
                            {'learning_rate': learning_rate})
    loss = gluon.loss.SigmoidBCELoss()

    parameters = net.collect_params().values()

    # Training/Testing
    for epoch in range(epochs):
        # Epoch training stats
        start_epoch_time = time.time()
        epoch_L = 0.0
        epoch_sent_num = 0
        epoch_wc = 0
        # Log interval training stats
        start_log_interval_time = time.time()
        log_interval_wc = 0
        log_interval_sent_num = 0
        log_interval_L = 0.0

        for i, ((data, length), label) in enumerate(train_e_dataloader):
            L = 0
            wc = length.sum().asscalar()
            log_interval_wc += wc
            epoch_wc += wc
            log_interval_sent_num += data.shape[1]
            epoch_sent_num += data.shape[1]
            with autograd.record():
                output = net(data.as_in_context(context).T,
                             length.as_in_context(context)
                                   .astype(np.float32))
                L = L + loss(output, label.as_in_context(context)).mean()
            L.backward()
            # Clip gradient
            if grad_clip:
                gluon.utils.clip_global_norm(
                    [p.grad(context) for p in parameters],
                    grad_clip)
            # Update parameter
            trainer.step(1)
            log_interval_L += L.asscalar()
            epoch_L += L.asscalar()
            if (i + 1) % log_interval == 0:
                print(
                    '[Epoch {} Batch {}/{}] elapsed {:.2f} s, '
                    'avg loss {:.6f}, throughput {:.2f}K wps'.format(
                        epoch, i + 1, len(train_e_dataloader),
                        time.time() - start_log_interval_time,
                        log_interval_L / log_interval_sent_num, log_interval_wc
                        / 1000 / (time.time() - start_log_interval_time)))
                # Clear log interval training stats
                start_log_interval_time = time.time()
                log_interval_wc = 0
                log_interval_sent_num = 0
                log_interval_L = 0
        end_epoch_time = time.time()
        train_avg_L, train_acc = evaluate(net, train_e_dataloader, context)
        test_avg_L, test_acc = evaluate(net, test_e_dataloader, context)
        print('[Epoch {}] train avg loss {:.6f}, train acc{:.4f}, test acc {:.4f}, '
              'test avg loss {:.6f}, throughput {:.2f}K wps'.format(
                  epoch, epoch_L / epoch_sent_num, train_acc, test_acc, test_avg_L,
                  epoch_wc / 1000 / (end_epoch_time - start_epoch_time)))
        

history3 = train(net, context, epochs)

Begin Testing...
Begin Testing...
[Epoch 0] train avg loss 0.000186, train acc0.9982, test acc 0.5767, test avg loss 1.914706, throughput 2.76K wps
Begin Testing...
Begin Testing...
[Epoch 1] train avg loss 0.000320, train acc0.9994, test acc 0.6400, test avg loss 1.312313, throughput 2.76K wps
Begin Testing...
Begin Testing...
[Epoch 2] train avg loss 0.000054, train acc1.0000, test acc 0.5867, test avg loss 1.686159, throughput 2.77K wps
Begin Testing...
Begin Testing...
[Epoch 3] train avg loss 0.000013, train acc1.0000, test acc 0.5867, test avg loss 1.802204, throughput 2.57K wps
Begin Testing...
Begin Testing...
[Epoch 4] train avg loss 0.000009, train acc1.0000, test acc 0.5900, test avg loss 1.839378, throughput 2.48K wps
Begin Testing...
Begin Testing...
[Epoch 5] train avg loss 0.000007, train acc1.0000, test acc 0.5900, test avg loss 1.897036, throughput 2.47K wps
Begin Testing...
Begin Testing...
[Epoch 6] train avg loss 0.000006, train acc1.0000, test acc 0.5900, test avg 

In [27]:
#pre-trained model performance in kitchen appliance domain
def train(net, context, epochs):
    trainer = gluon.Trainer(net.collect_params(), 'ftml',
                            {'learning_rate': learning_rate})
    loss = gluon.loss.SigmoidBCELoss()

    parameters = net.collect_params().values()

    # Training/Testing
    for epoch in range(epochs):
        # Epoch training stats
        start_epoch_time = time.time()
        epoch_L = 0.0
        epoch_sent_num = 0
        epoch_wc = 0
        # Log interval training stats
        start_log_interval_time = time.time()
        log_interval_wc = 0
        log_interval_sent_num = 0
        log_interval_L = 0.0

        for i, ((data, length), label) in enumerate(train_k_dataloader):
            L = 0
            wc = length.sum().asscalar()
            log_interval_wc += wc
            epoch_wc += wc
            log_interval_sent_num += data.shape[1]
            epoch_sent_num += data.shape[1]
            with autograd.record():
                output = net(data.as_in_context(context).T,
                             length.as_in_context(context)
                                   .astype(np.float32))
                L = L + loss(output, label.as_in_context(context)).mean()
            L.backward()
            # Clip gradient
            if grad_clip:
                gluon.utils.clip_global_norm(
                    [p.grad(context) for p in parameters],
                    grad_clip)
            # Update parameter
            trainer.step(1)
            log_interval_L += L.asscalar()
            epoch_L += L.asscalar()
            if (i + 1) % log_interval == 0:
                print(
                    '[Epoch {} Batch {}/{}] elapsed {:.2f} s, '
                    'avg loss {:.6f}, throughput {:.2f}K wps'.format(
                        epoch, i + 1, len(train_e_dataloader),
                        time.time() - start_log_interval_time,
                        log_interval_L / log_interval_sent_num, log_interval_wc
                        / 1000 / (time.time() - start_log_interval_time)))
                # Clear log interval training stats
                start_log_interval_time = time.time()
                log_interval_wc = 0
                log_interval_sent_num = 0
                log_interval_L = 0
        end_epoch_time = time.time()
        train_avg_L, train_acc = evaluate(net, train_k_dataloader, context)
        test_avg_L, test_acc = evaluate(net, test_k_dataloader, context)
        print('[Epoch {}] train avg loss {:.6f}, train acc{:.4f}, test acc {:.4f}, '
              'test avg loss {:.6f}, throughput {:.2f}K wps'.format(
                  epoch, epoch_L / epoch_sent_num, train_acc, test_acc, test_avg_L,
                  epoch_wc / 1000 / (end_epoch_time - start_epoch_time)))
        

history4 = train(net, context, epochs)

Begin Testing...
Begin Testing...
[Epoch 0] train avg loss 0.008003, train acc0.5894, test acc 0.0000, test avg loss 0.860046, throughput 2.74K wps
Begin Testing...
Begin Testing...
[Epoch 1] train avg loss 0.005901, train acc0.9147, test acc 0.5633, test avg loss 0.681947, throughput 2.75K wps
Begin Testing...
Begin Testing...
[Epoch 2] train avg loss 0.002428, train acc0.9753, test acc 0.6233, test avg loss 0.695118, throughput 2.73K wps
Begin Testing...
Begin Testing...
[Epoch 3] train avg loss 0.000996, train acc0.9853, test acc 0.5833, test avg loss 1.403840, throughput 2.52K wps
Begin Testing...
Begin Testing...
[Epoch 4] train avg loss 0.000585, train acc0.9959, test acc 0.6600, test avg loss 1.167990, throughput 2.44K wps
Begin Testing...
Begin Testing...
[Epoch 5] train avg loss 0.000276, train acc0.9971, test acc 0.6167, test avg loss 1.871773, throughput 2.43K wps
Begin Testing...
Begin Testing...
[Epoch 6] train avg loss 0.000149, train acc0.9982, test acc 0.6867, test avg 