diff --git a/README.md b/README.md
index 3f45d35..ee2d55f 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,6 @@
 # PyTorch Sentiment Analysis
 
-## Note: This repo only works with torchtext 0.9 or above which requires PyTorch 1.8 or above. If you are using torchtext 0.8 then please use [this](https://github.com/bentrevett/pytorch-sentiment-analysis/tree/torchtext08) branch
-
-This repo contains tutorials covering how to do sentiment analysis using [PyTorch](https://github.com/pytorch/pytorch) 1.8 and [torchtext](https://github.com/pytorch/text) 0.9 using Python 3.7.
+This repo contains tutorials covering how to do sentiment analysis using [PyTorch](https://github.com/pytorch/pytorch) 1.7 and [TorchText](https://github.com/pytorch/text) 0.8 using Python 3.7.
 
 The first 2 tutorials will cover getting started with the de facto approach to sentiment analysis: recurrent neural networks (RNNs). The third notebook covers the [FastText](https://arxiv.org/abs/1607.01759) model and the final covers a [convolutional neural network](https://arxiv.org/abs/1408.5882) (CNN) model.
 
diff --git a/experimental/1_nbow.ipynb b/experimental/1_nbow.ipynb
deleted file mode 100644
index 791d259..0000000
--- a/experimental/1_nbow.ipynb
+++ /dev/null
@@ -1,1523 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 228
-    },
-    "colab_type": "code",
-    "id": "-V90fMxJdFl7",
-    "outputId": "2bbc3f28-84e3-47bd-97a2-ea0c2f0cf395"
-   },
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import torch.optim as optim\n",
-    "\n",
-    "import torchtext\n",
-    "import torchtext.experimental\n",
-    "import torchtext.experimental.vectors\n",
-    "from torchtext.experimental.datasets.raw.text_classification import RawTextIterableDataset\n",
-    "from torchtext.experimental.datasets.text_classification import TextClassificationDataset\n",
-    "from torchtext.experimental.functional import sequential_transforms, vocab_func, totensor\n",
-    "\n",
-    "import collections\n",
-    "import random\n",
-    "import time"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "tOO7b-Z1dFmA"
-   },
-   "outputs": [],
-   "source": [
-    "seed = 1234\n",
-    "\n",
-    "torch.manual_seed(seed)\n",
-    "random.seed(seed)\n",
-    "torch.backends.cudnn.deterministic = True\n",
-    "torch.backends.cudnn.benchmark = False"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "FhBXG95YdFmD"
-   },
-   "outputs": [],
-   "source": [
-    "raw_train_data, raw_test_data = torchtext.experimental.datasets.raw.IMDB()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "raw_train_data = list(raw_train_data)\n",
-    "raw_test_data = list(raw_test_data)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('neg',\n",
-       " 'I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered \"controversial\" I really had to see this for myself.<br /><br />The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.<br /><br />What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, even then it\\'s not shot like some cheaply made porno. While my countrymen mind find it shocking, in reality sex and nudity are a major staple in Swedish cinema. Even Ingmar Bergman, arguably their answer to good old boy John Ford, had sex scenes in his films.<br /><br />I do commend the filmmakers for the fact that any sex shown in the film is shown for artistic purposes rather than just to shock people and make money to be shown in pornographic theaters in America. I AM CURIOUS-YELLOW is a good film for anyone wanting to study the meat and potatoes (no pun intended) of Swedish cinema. But really, this film doesn\\'t have much of a plot.')"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "raw_train_data[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('neg',\n",
-       " 'I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn\\'t match the background, and painfully one-dimensional characters cannot be overcome with a \\'sci-fi\\' setting. (I\\'m sure there are those of you out there who think Babylon 5 is good sci-fi TV. It\\'s not. It\\'s clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It\\'s really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it\\'s rubbish as they have to always say \"Gene Roddenberry\\'s Earth...\" otherwise people would not continue watching. Roddenberry\\'s ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again.')"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "raw_test_data[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of training examples: 25,000\n",
-      "Number of testing examples: 25,000\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'Number of training examples: {len(raw_train_data):,}')\n",
-    "print(f'Number of testing examples: {len(raw_test_data):,}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "rOTczrIEdFmY"
-   },
-   "outputs": [],
-   "source": [
-    "def get_train_valid_split(raw_train_data, split_ratio = 0.7):\n",
-    "        \n",
-    "    random.shuffle(raw_train_data)\n",
-    "        \n",
-    "    n_train_examples = int(len(raw_train_data) * split_ratio)\n",
-    "        \n",
-    "    train_data = raw_train_data[:n_train_examples]\n",
-    "    valid_data = raw_train_data[n_train_examples:]\n",
-    "    \n",
-    "    return train_data, valid_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "C6Tp4CyQdFma"
-   },
-   "outputs": [],
-   "source": [
-    "raw_train_data, raw_valid_data = get_train_valid_split(raw_train_data)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of training examples: 17,500\n",
-      "Number of validation examples: 7,500\n",
-      "Number of testing examples: 25,000\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'Number of training examples: {len(raw_train_data):,}')\n",
-    "print(f'Number of validation examples: {len(raw_valid_data):,}')\n",
-    "print(f'Number of testing examples: {len(raw_test_data):,}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "LTJjCocRdFmh"
-   },
-   "outputs": [],
-   "source": [
-    "class Tokenizer:\n",
-    "    def __init__(self, tokenize_fn = 'basic_english', lower = True, max_length = None):\n",
-    "        \n",
-    "        self.tokenize_fn = torchtext.data.utils.get_tokenizer(tokenize_fn)\n",
-    "        self.lower = lower\n",
-    "        self.max_length = max_length\n",
-    "        \n",
-    "    def tokenize(self, s):\n",
-    "        \n",
-    "        tokens = self.tokenize_fn(s)\n",
-    "        \n",
-    "        if self.lower:\n",
-    "            tokens = [token.lower() for token in tokens]\n",
-    "            \n",
-    "        if self.max_length is not None:\n",
-    "            tokens = tokens[:self.max_length]\n",
-    "            \n",
-    "        return tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "5P2KumuDdFmj"
-   },
-   "outputs": [],
-   "source": [
-    "max_length = 250\n",
-    "\n",
-    "tokenizer = Tokenizer(max_length = max_length)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "V1albCvadFmm",
-    "outputId": "5c7c30f2-c6b7-4098-990d-7bfcdc2446f1"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['this', 'film', 'is', 'terrible', '.', 'i', 'hate', 'it', 'and', 'it', \"'\", 's', 'bad', '!']\n"
-     ]
-    }
-   ],
-   "source": [
-    "s = \"this film is terrible. i hate it and it's bad!\"\n",
-    "\n",
-    "print(tokenizer.tokenize(s))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "anC7_ViodFmp"
-   },
-   "outputs": [],
-   "source": [
-    "def build_vocab_from_data(raw_data, tokenizer, **vocab_kwargs):\n",
-    "        \n",
-    "    token_freqs = collections.Counter()\n",
-    "    \n",
-    "    for label, text in raw_data:\n",
-    "        tokens = tokenizer.tokenize(text)\n",
-    "        token_freqs.update(tokens)\n",
-    "                \n",
-    "    vocab = torchtext.vocab.Vocab(token_freqs, **vocab_kwargs)\n",
-    "    \n",
-    "    return vocab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "rgHPS1xzdFmt"
-   },
-   "outputs": [],
-   "source": [
-    "max_size = 25_000\n",
-    "\n",
-    "vocab = build_vocab_from_data(raw_train_data, tokenizer, max_size = max_size)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Unique tokens in vocab: 25,002\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'Unique tokens in vocab: {len(vocab):,}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 364
-    },
-    "colab_type": "code",
-    "id": "PsRQLrlddFmw",
-    "outputId": "5357c17c-b0ba-429d-b675-aa3fd9c39b72"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[('the', 165322),\n",
-       " ('.', 164239),\n",
-       " (',', 133647),\n",
-       " ('a', 81952),\n",
-       " ('and', 80334),\n",
-       " ('of', 71820),\n",
-       " ('to', 65662),\n",
-       " (\"'\", 64249),\n",
-       " ('is', 53598),\n",
-       " ('it', 49589),\n",
-       " ('i', 48810),\n",
-       " ('in', 45611),\n",
-       " ('this', 40868),\n",
-       " ('that', 35609),\n",
-       " ('s', 29273),\n",
-       " ('was', 26159),\n",
-       " ('movie', 24543),\n",
-       " ('as', 22276),\n",
-       " ('with', 21494),\n",
-       " ('for', 21332)]"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "vocab.freqs.most_common(20)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "FGS5tZKmdFmy",
-    "outputId": "5304c151-6696-4d2e-bd4e-ac9cfb2e3f23"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['<unk>', '<pad>', 'the', '.', ',', 'a', 'and', 'of', 'to', \"'\"]"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "vocab.itos[:10]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "izsoXluedFm3",
-    "outputId": "1ab77cea-612b-4d86-cca3-5273f0964fbe"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "vocab.stoi['the']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "xiW0GItTdFm6"
-   },
-   "outputs": [],
-   "source": [
-    "def raw_data_to_dataset(raw_data, tokenizer, vocab):\n",
-    "        \n",
-    "    text_transform = sequential_transforms(tokenizer.tokenize,\n",
-    "                                           vocab_func(vocab),\n",
-    "                                           totensor(dtype=torch.long))\n",
-    "    \n",
-    "    label_transform = sequential_transforms(lambda x: 1 if x == 'pos' else 0, \n",
-    "                                            totensor(dtype=torch.long))\n",
-    "\n",
-    "    transforms = (label_transform, text_transform)\n",
-    "\n",
-    "    dataset = TextClassificationDataset(raw_data,\n",
-    "                                        vocab,\n",
-    "                                        transforms)\n",
-    "    \n",
-    "    return dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "LCslagnudFm8"
-   },
-   "outputs": [],
-   "source": [
-    "train_data = raw_data_to_dataset(raw_train_data, tokenizer, vocab)\n",
-    "valid_data = raw_data_to_dataset(raw_valid_data, tokenizer, vocab)\n",
-    "test_data = raw_data_to_dataset(raw_test_data, tokenizer, vocab)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of training examples: 17,500\n",
-      "Number of validation examples: 7,500\n",
-      "Number of testing examples: 25,000\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'Number of training examples: {len(train_data):,}')\n",
-    "print(f'Number of validation examples: {len(valid_data):,}')\n",
-    "print(f'Number of testing examples: {len(test_data):,}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 416
-    },
-    "colab_type": "code",
-    "id": "FDsGUUeydFm_",
-    "outputId": "848655ba-b5b2-4307-ca5b-a827200fdef2"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([   12,   121,  1013,     6,   219,  1855,     8,   276,    70,    20,\n",
-      "            5,   177,     3,  1013,     0,    30,   541,     0,     4, 15259,\n",
-      "            6,  7022,     3,    12,   751,     8,    45,    14,     4,    12,\n",
-      "           69,   123,     4,    22,    11,    10,     8,    56,   241,  1013,\n",
-      "           19, 12534,   563,    10,     8,   338,  1803,    25,     2,   196,\n",
-      "           24,     3,   717,     0,     4,   745,  3428,   686,     4,  4315,\n",
-      "         3437,     4,  4258,    15,   170,     9,    28,  1209,     2,   951,\n",
-      "            4,     6,  2005,  5083,   113,   544,    35,  2957,    20,     5,\n",
-      "            9,  1013,     9,   925,     3,    25,    12,     9,   145,   255,\n",
-      "           46,    30,   160,     7,    26,    54,    46,    42,   107, 12534,\n",
-      "          563,    10,    56,  1013,   241,     3,    11,     9,    16,    29,\n",
-      "            3,    11,     9,    16,  2966,     6,  8018,     3,    24,   143,\n",
-      "          199,   773,   249,    45,  1364,     6,   120,   893,     4,  1013,\n",
-      "           10,     5,   516,    15,   135,    29,   205,   437,   599,    25,\n",
-      "        24229,     3,   338,  1803,    24,     3,    11,   222,  1655,   734,\n",
-      "         1296,     4,   265,    29,    19,     5,   618,  4793,     3,    11,\n",
-      "            9,    16,    69,   866,     8,   474,    47,     2,   113,   138,\n",
-      "           19,    39,    30,    29,   343,  6136,     4,    48,   984,     5,\n",
-      "         5212,     7,   122,     3,    77,  1894,     6,  3550,    30,  1650,\n",
-      "            6,   634,     4,   403,  1266,     8,   110,     3,     2,  1332,\n",
-      "            7,   649,   130,    11,     9,    16,  1834,    19,    39,    31,\n",
-      "            8,   215,   134,  1965, 13961,     9,    16,   649,     3,     3,\n",
-      "            3,   910,    81,    68,    29,  1677,   142,     3, 13961,     9,\n",
-      "           16, 13264,   208,    35,  1685,    13,    77, 13826,    19,    14,\n",
-      "          696,     4,   745,     4,   793,  2192,    25,   142,    11,   211])\n"
-     ]
-    }
-   ],
-   "source": [
-    "label, indexes = test_data[0]\n",
-    "\n",
-    "print(indexes)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 55
-    },
-    "colab_type": "code",
-    "id": "nXOay2JUdFnB",
-    "outputId": "148242f9-c657-46be-e71d-c7503f662fc9"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['i', 'love', 'sci-fi', 'and', 'am', 'willing', 'to', 'put', 'up', 'with', 'a', 'lot', '.', 'sci-fi', '<unk>', 'are', 'usually', '<unk>', ',', 'under-appreciated', 'and', 'misunderstood', '.', 'i', 'tried', 'to', 'like', 'this', ',', 'i', 'really', 'did', ',', 'but', 'it', 'is', 'to', 'good', 'tv', 'sci-fi', 'as', 'babylon', '5', 'is', 'to', 'star', 'trek', '(', 'the', 'original', ')', '.', 'silly', '<unk>', ',', 'cheap', 'cardboard', 'sets', ',', 'stilted', 'dialogues', ',', 'cg', 'that', 'doesn', \"'\", 't', 'match', 'the', 'background', ',', 'and', 'painfully', 'one-dimensional', 'characters', 'cannot', 'be', 'overcome', 'with', 'a', \"'\", 'sci-fi', \"'\", 'setting', '.', '(', 'i', \"'\", 'm', 'sure', 'there', 'are', 'those', 'of', 'you', 'out', 'there', 'who', 'think', 'babylon', '5', 'is', 'good', 'sci-fi', 'tv', '.', 'it', \"'\", 's', 'not', '.', 'it', \"'\", 's', 'clichéd', 'and', 'uninspiring', '.', ')', 'while', 'us', 'viewers', 'might', 'like', 'emotion', 'and', 'character', 'development', ',', 'sci-fi', 'is', 'a', 'genre', 'that', 'does', 'not', 'take', 'itself', 'seriously', '(', 'cf', '.', 'star', 'trek', ')', '.', 'it', 'may', 'treat', 'important', 'issues', ',', 'yet', 'not', 'as', 'a', 'serious', 'philosophy', '.', 'it', \"'\", 's', 'really', 'difficult', 'to', 'care', 'about', 'the', 'characters', 'here', 'as', 'they', 'are', 'not', 'simply', 'foolish', ',', 'just', 'missing', 'a', 'spark', 'of', 'life', '.', 'their', 'actions', 'and', 'reactions', 'are', 'wooden', 'and', 'predictable', ',', 'often', 'painful', 'to', 'watch', '.', 'the', 'makers', 'of', 'earth', 'know', 'it', \"'\", 's', 'rubbish', 'as', 'they', 'have', 'to', 'always', 'say', 'gene', 'roddenberry', \"'\", 's', 'earth', '.', '.', '.', 'otherwise', 'people', 'would', 'not', 'continue', 'watching', '.', 'roddenberry', \"'\", 's', 'ashes', 'must', 'be', 'turning', 'in', 'their', 'orbit', 'as', 'this', 'dull', ',', 'cheap', ',', 'poorly', 'edited', '(', 'watching', 'it', 'without']\n"
-     ]
-    }
-   ],
-   "source": [
-    "print([vocab.itos[i] for i in indexes])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "egzlLweTdFnH"
-   },
-   "outputs": [],
-   "source": [
-    "class Collator:\n",
-    "    def __init__(self, pad_idx):\n",
-    "        \n",
-    "        self.pad_idx = pad_idx\n",
-    "        \n",
-    "    def collate(self, batch):\n",
-    "        \n",
-    "        labels, text = zip(*batch)\n",
-    "        \n",
-    "        labels = torch.LongTensor(labels)\n",
-    "        \n",
-    "        text = nn.utils.rnn.pad_sequence(text, padding_value = self.pad_idx)\n",
-    "        \n",
-    "        return labels, text"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "TYLvjhoSdFnM"
-   },
-   "outputs": [],
-   "source": [
-    "pad_token = '<pad>'\n",
-    "pad_idx = vocab[pad_token]\n",
-    "\n",
-    "collator = Collator(pad_idx)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "7Ly4l1I8dFnR"
-   },
-   "outputs": [],
-   "source": [
-    "batch_size = 256\n",
-    "\n",
-    "train_iterator = torch.utils.data.DataLoader(train_data, \n",
-    "                                             batch_size, \n",
-    "                                             shuffle = True, \n",
-    "                                             collate_fn = collator.collate)\n",
-    "\n",
-    "valid_iterator = torch.utils.data.DataLoader(valid_data, \n",
-    "                                             batch_size, \n",
-    "                                             shuffle = False, \n",
-    "                                             collate_fn = collator.collate)\n",
-    "\n",
-    "test_iterator = torch.utils.data.DataLoader(test_data, \n",
-    "                                            batch_size, \n",
-    "                                            shuffle = False, \n",
-    "                                            collate_fn = collator.collate)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "dbh38jHEdFnV"
-   },
-   "outputs": [],
-   "source": [
-    "class NBOW(nn.Module):\n",
-    "    def __init__(self, input_dim, emb_dim, output_dim, pad_idx):\n",
-    "        super().__init__()\n",
-    "        \n",
-    "        self.embedding = nn.Embedding(input_dim, emb_dim, padding_idx = pad_idx)\n",
-    "        self.fc = nn.Linear(emb_dim, output_dim)\n",
-    "        \n",
-    "    def forward(self, text):\n",
-    "        \n",
-    "        # text = [seq len, batch size]\n",
-    "        \n",
-    "        embedded = self.embedding(text)\n",
-    "        \n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "        \n",
-    "        pooled = embedded.mean(0)\n",
-    "        \n",
-    "        # pooled = [batch size, emb dim]\n",
-    "        \n",
-    "        prediction = self.fc(pooled)\n",
-    "        \n",
-    "        # prediction = [batch size, output dim]\n",
-    "        \n",
-    "        return prediction"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Ga1nXhindFnZ"
-   },
-   "outputs": [],
-   "source": [
-    "input_dim = len(vocab)\n",
-    "emb_dim = 100\n",
-    "output_dim = 2\n",
-    "\n",
-    "model = NBOW(input_dim, emb_dim, output_dim, pad_idx)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "UyIJC0tYdFnc"
-   },
-   "outputs": [],
-   "source": [
-    "def count_parameters(model):\n",
-    "    return sum(p.numel() for p in model.parameters() if p.requires_grad)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "1sJRLyewdFng",
-    "outputId": "e7e357e1-1cc7-4aa4-ff40-4d749209759d"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The model has 2,500,402 trainable parameters\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'The model has {count_parameters(model):,} trainable parameters')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "BPsihrZudFnl"
-   },
-   "outputs": [],
-   "source": [
-    "glove = torchtext.experimental.vectors.GloVe(name = '6B',\n",
-    "                                             dim = emb_dim)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 243
-    },
-    "colab_type": "code",
-    "id": "hUIoXGkpdFno",
-    "outputId": "b58af33d-b40f-4783-b997-8e85a0edc583"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([-0.0382, -0.2449,  0.7281, -0.3996,  0.0832,  0.0440, -0.3914,  0.3344,\n",
-       "        -0.5755,  0.0875,  0.2879, -0.0673,  0.3091, -0.2638, -0.1323, -0.2076,\n",
-       "         0.3340, -0.3385, -0.3174, -0.4834,  0.1464, -0.3730,  0.3458,  0.0520,\n",
-       "         0.4495, -0.4697,  0.0263, -0.5415, -0.1552, -0.1411, -0.0397,  0.2828,\n",
-       "         0.1439,  0.2346, -0.3102,  0.0862,  0.2040,  0.5262,  0.1716, -0.0824,\n",
-       "        -0.7179, -0.4153,  0.2033, -0.1276,  0.4137,  0.5519,  0.5791, -0.3348,\n",
-       "        -0.3656, -0.5486, -0.0629,  0.2658,  0.3020,  0.9977, -0.8048, -3.0243,\n",
-       "         0.0125, -0.3694,  2.2167,  0.7220, -0.2498,  0.9214,  0.0345,  0.4674,\n",
-       "         1.1079, -0.1936, -0.0746,  0.2335, -0.0521, -0.2204,  0.0572, -0.1581,\n",
-       "        -0.3080, -0.4162,  0.3797,  0.1501, -0.5321, -0.2055, -1.2526,  0.0716,\n",
-       "         0.7056,  0.4974, -0.4206,  0.2615, -1.5380, -0.3022, -0.0734, -0.2831,\n",
-       "         0.3710, -0.2522,  0.0162, -0.0171, -0.3898,  0.8742, -0.7257, -0.5106,\n",
-       "        -0.5203, -0.1459,  0.8278,  0.2706])"
-      ]
-     },
-     "execution_count": 33,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "glove['the']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 104
-    },
-    "colab_type": "code",
-    "id": "vz_X14INdFnq",
-    "outputId": "b41c1997-b970-4042-fab9-2d72f07540b0"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-       "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-       "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-       "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-       "        0., 0., 0., 0.])"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "glove['shoggoth']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 104
-    },
-    "colab_type": "code",
-    "id": "iBKvWWCwdFnu",
-    "outputId": "821572aa-2743-4b1e-a03d-afeb5387bd9f"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-       "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-       "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-       "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-       "        0., 0., 0., 0.])"
-      ]
-     },
-     "execution_count": 35,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "glove['The']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "glove_vocab = glove.vectors.get_stoi()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "'the' in glove_vocab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "'The' in glove_vocab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "4BFftRDMdFnx"
-   },
-   "outputs": [],
-   "source": [
-    "def get_pretrained_embedding(initial_embedding, pretrained_vectors, vocab, unk_token):\n",
-    "    \n",
-    "    pretrained_embedding = torch.FloatTensor(initial_embedding.weight.clone()).detach()    \n",
-    "    pretrained_vocab = pretrained_vectors.vectors.get_stoi()\n",
-    "    \n",
-    "    unk_tokens = []\n",
-    "    \n",
-    "    for idx, token in enumerate(vocab.itos):\n",
-    "        if token in pretrained_vocab:\n",
-    "            pretrained_vector = pretrained_vectors[token]\n",
-    "            pretrained_embedding[idx] = pretrained_vector\n",
-    "        else:\n",
-    "            unk_tokens.append(token)\n",
-    "        \n",
-    "    return pretrained_embedding, unk_tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "QRToW07JdFnz"
-   },
-   "outputs": [],
-   "source": [
-    "unk_token = '<unk>'\n",
-    "\n",
-    "pretrained_embedding, unk_tokens = get_pretrained_embedding(model.embedding, glove, vocab, unk_token)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[-0.1117, -0.4966,  0.1631,  ...,  1.5903, -0.1947, -0.2415],\n",
-       "        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
-       "        [ 0.7289, -0.7336,  1.5624,  ..., -0.5592, -0.4480, -0.6476],\n",
-       "        ...,\n",
-       "        [ 0.0914,  1.5196,  0.4670,  ...,  0.6393, -0.0332,  0.0185],\n",
-       "        [-0.6290,  0.4650, -0.7165,  ..., -1.3171,  2.0381, -2.0497],\n",
-       "        [-1.1222, -0.0240, -1.0878,  ..., -0.4948, -0.3874,  0.0339]])"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.embedding.weight.data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[-0.1117, -0.4966,  0.1631,  ...,  1.5903, -0.1947, -0.2415],\n",
-       "        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
-       "        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],\n",
-       "        ...,\n",
-       "        [ 0.4029,  0.1353,  0.6673,  ..., -0.3300,  0.7533, -0.1666],\n",
-       "        [ 0.1226,  0.0419,  0.0746,  ..., -0.0024, -0.2733, -1.0033],\n",
-       "        [-0.1009, -0.1484,  0.3141,  ..., -0.3414, -0.3768,  0.5605]])"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pretrained_embedding"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "j36jzQpPdFn3",
-    "outputId": "7ebe041d-b092-498e-ea16-0fce8c20ed33"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "734"
-      ]
-     },
-     "execution_count": 43,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(unk_tokens)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "yzvhgf8tdFn5",
-    "outputId": "8c30dc4a-9a2b-4c11-8c7b-1d2cb3ba0aee"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['<unk>', '<pad>', '\\x96', '****', 'hadn', 'camera-work', '*1/2', '100%', '*****', '$1']\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(unk_tokens[:10])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 139
-    },
-    "colab_type": "code",
-    "id": "AnE6D4MAdFn_",
-    "outputId": "8b3fea1a-9bcb-4fd9-ba78-72baee94f96a"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[-0.1117, -0.4966,  0.1631,  ...,  1.5903, -0.1947, -0.2415],\n",
-       "        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
-       "        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],\n",
-       "        ...,\n",
-       "        [ 0.4029,  0.1353,  0.6673,  ..., -0.3300,  0.7533, -0.1666],\n",
-       "        [ 0.1226,  0.0419,  0.0746,  ..., -0.0024, -0.2733, -1.0033],\n",
-       "        [-0.1009, -0.1484,  0.3141,  ..., -0.3414, -0.3768,  0.5605]])"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.embedding.weight.data.copy_(pretrained_embedding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "DJloauERdFoF"
-   },
-   "outputs": [],
-   "source": [
-    "optimizer = optim.Adam(model.parameters())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "fPPZ0cs_dFoH"
-   },
-   "outputs": [],
-   "source": [
-    "criterion = nn.CrossEntropyLoss()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "HGUcFIupdFoK",
-    "outputId": "e5d9b842-689b-49ca-a4f4-08574f0524ee"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Using: cuda\n"
-     ]
-    }
-   ],
-   "source": [
-    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
-    "\n",
-    "print(f'Using: {device}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Ynf7j6kQdFoM"
-   },
-   "outputs": [],
-   "source": [
-    "model = model.to(device)\n",
-    "criterion = criterion.to(device)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "977iykeOdFoP"
-   },
-   "outputs": [],
-   "source": [
-    "def calculate_accuracy(predictions, labels):\n",
-    "    top_predictions = predictions.argmax(1, keepdim = True)\n",
-    "    correct = top_predictions.eq(labels.view_as(top_predictions)).sum()\n",
-    "    accuracy = correct.float() / labels.shape[0]\n",
-    "    return accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "HPNI8DJudFoS"
-   },
-   "outputs": [],
-   "source": [
-    "def train(model, iterator, optimizer, criterion, device):\n",
-    "    \n",
-    "    epoch_loss = 0\n",
-    "    epoch_acc = 0\n",
-    "    \n",
-    "    model.train()\n",
-    "    \n",
-    "    for labels, text in iterator:\n",
-    "        \n",
-    "        labels = labels.to(device)\n",
-    "        text = text.to(device)\n",
-    "        \n",
-    "        optimizer.zero_grad()\n",
-    "        \n",
-    "        predictions = model(text)\n",
-    "        \n",
-    "        loss = criterion(predictions, labels)\n",
-    "        \n",
-    "        acc = calculate_accuracy(predictions, labels)\n",
-    "        \n",
-    "        loss.backward()\n",
-    "        \n",
-    "        optimizer.step()\n",
-    "        \n",
-    "        epoch_loss += loss.item()\n",
-    "        epoch_acc += acc.item()\n",
-    "        \n",
-    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "kp6pV5xKdFoV"
-   },
-   "outputs": [],
-   "source": [
-    "def evaluate(model, iterator, criterion, device):\n",
-    "    \n",
-    "    epoch_loss = 0\n",
-    "    epoch_acc = 0\n",
-    "    \n",
-    "    model.eval()\n",
-    "    \n",
-    "    with torch.no_grad():\n",
-    "    \n",
-    "        for labels, text in iterator:\n",
-    "\n",
-    "            labels = labels.to(device)\n",
-    "            text = text.to(device)\n",
-    "            \n",
-    "            predictions = model(text)\n",
-    "            \n",
-    "            loss = criterion(predictions, labels)\n",
-    "            \n",
-    "            acc = calculate_accuracy(predictions, labels)\n",
-    "\n",
-    "            epoch_loss += loss.item()\n",
-    "            epoch_acc += acc.item()\n",
-    "        \n",
-    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "8YzL45gYdFoX"
-   },
-   "outputs": [],
-   "source": [
-    "def epoch_time(start_time, end_time):\n",
-    "    elapsed_time = end_time - start_time\n",
-    "    elapsed_mins = int(elapsed_time / 60)\n",
-    "    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))\n",
-    "    return elapsed_mins, elapsed_secs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 537
-    },
-    "colab_type": "code",
-    "id": "0A8wv7-xdFoa",
-    "outputId": "238f01bf-5438-482a-80ac-75c70cb20ed1"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch: 01 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.683 | Train Acc: 60.00%\n",
-      "\t Val. Loss: 0.669 |  Val. Acc: 67.02%\n",
-      "Epoch: 02 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.651 | Train Acc: 68.09%\n",
-      "\t Val. Loss: 0.632 |  Val. Acc: 71.31%\n",
-      "Epoch: 03 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.603 | Train Acc: 74.06%\n",
-      "\t Val. Loss: 0.582 |  Val. Acc: 74.86%\n",
-      "Epoch: 04 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.545 | Train Acc: 78.13%\n",
-      "\t Val. Loss: 0.528 |  Val. Acc: 78.88%\n",
-      "Epoch: 05 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.485 | Train Acc: 82.10%\n",
-      "\t Val. Loss: 0.477 |  Val. Acc: 81.64%\n",
-      "Epoch: 06 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.430 | Train Acc: 85.15%\n",
-      "\t Val. Loss: 0.437 |  Val. Acc: 83.25%\n",
-      "Epoch: 07 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.386 | Train Acc: 86.92%\n",
-      "\t Val. Loss: 0.404 |  Val. Acc: 84.59%\n",
-      "Epoch: 08 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.350 | Train Acc: 88.21%\n",
-      "\t Val. Loss: 0.383 |  Val. Acc: 85.19%\n",
-      "Epoch: 09 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.319 | Train Acc: 89.36%\n",
-      "\t Val. Loss: 0.363 |  Val. Acc: 85.86%\n",
-      "Epoch: 10 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.295 | Train Acc: 90.17%\n",
-      "\t Val. Loss: 0.349 |  Val. Acc: 86.27%\n"
-     ]
-    }
-   ],
-   "source": [
-    "n_epochs = 10\n",
-    "\n",
-    "best_valid_loss = float('inf')\n",
-    "\n",
-    "for epoch in range(n_epochs):\n",
-    "\n",
-    "    start_time = time.monotonic()\n",
-    "    \n",
-    "    train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device)\n",
-    "    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, device)\n",
-    "    \n",
-    "    end_time = time.monotonic()\n",
-    "\n",
-    "    epoch_mins, epoch_secs = epoch_time(start_time, end_time)\n",
-    "    \n",
-    "    if valid_loss < best_valid_loss:\n",
-    "        best_valid_loss = valid_loss\n",
-    "        torch.save(model.state_dict(), 'nbow-model.pt')\n",
-    "    \n",
-    "    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')\n",
-    "    print(f'\\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')\n",
-    "    print(f'\\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "oMHAuMFNdFoc",
-    "outputId": "58b32f9a-8c39-4818-b526-1a80e435f3ae"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Test Loss: 0.374 | Test Acc: 84.75%\n"
-     ]
-    }
-   ],
-   "source": [
-    "model.load_state_dict(torch.load('nbow-model.pt'))\n",
-    "\n",
-    "test_loss, test_acc = evaluate(model, test_iterator, criterion, device)\n",
-    "\n",
-    "print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "sEDiSM3fdFog"
-   },
-   "outputs": [],
-   "source": [
-    "def predict_sentiment(tokenizer, vocab, model, device, sentence):\n",
-    "    model.eval()\n",
-    "    tokens = tokenizer.tokenize(sentence)\n",
-    "    indexes = [vocab.stoi[token] for token in tokens]\n",
-    "    tensor = torch.LongTensor(indexes).unsqueeze(-1).to(device)\n",
-    "    prediction = model(tensor)\n",
-    "    probabilities = nn.functional.softmax(prediction, dim = -1)\n",
-    "    pos_probability = probabilities.squeeze()[-1].item()\n",
-    "    return pos_probability"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "ycEAWhYIdFoi",
-    "outputId": "8a675641-fd79-46a6-b4e6-0b2006f866cc"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2.818893153744284e-05"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = 'the absolute worst movie of all time.'\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "cuMFqIoJdFok",
-    "outputId": "12c964fc-6788-459c-ad5e-ca0af366b1d4"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.9997795224189758"
-      ]
-     },
-     "execution_count": 58,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = 'one of the greatest films i have ever seen in my life.'\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 59,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "zausUPENdFoo",
-    "outputId": "2bdd06df-dab7-47ea-8952-8bd82d39bac2"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.6041761040687561"
-      ]
-     },
-     "execution_count": 59,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = \"i thought it was going to be one of the greatest films i have ever seen in my life, \\\n",
-    "but it was actually the absolute worst movie of all time.\"\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 60,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "e15vpNJYdFor",
-    "outputId": "eed3ae38-d01a-4476-a235-8fd3582240f3"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.6041760444641113"
-      ]
-     },
-     "execution_count": 60,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = \"i thought it was going to be the absolute worst movie of all time, \\\n",
-    "but it was actually one of the greatest films i have ever seen in my life.\"\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "machine_shape": "hm",
-   "name": "1_nbow.ipynb",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
diff --git a/experimental/2_rnn_gru.ipynb b/experimental/2_rnn_gru.ipynb
deleted file mode 100644
index 8084fdb..0000000
--- a/experimental/2_rnn_gru.ipynb
+++ /dev/null
@@ -1,1120 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 228
-    },
-    "colab_type": "code",
-    "id": "lIYdn1woOS1n",
-    "outputId": "05f43a3e-f111-4f96-ee3e-d95027c041c8"
-   },
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import torch.optim as optim\n",
-    "\n",
-    "import torchtext\n",
-    "import torchtext.experimental\n",
-    "import torchtext.experimental.vectors\n",
-    "from torchtext.experimental.datasets.raw.text_classification import RawTextIterableDataset\n",
-    "from torchtext.experimental.datasets.text_classification import TextClassificationDataset\n",
-    "from torchtext.experimental.functional import sequential_transforms, vocab_func, totensor\n",
-    "\n",
-    "import collections\n",
-    "import random\n",
-    "import time"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "kjHAEB8BKbEY"
-   },
-   "outputs": [],
-   "source": [
-    "seed = 1234\n",
-    "\n",
-    "torch.manual_seed(seed)\n",
-    "random.seed(seed)\n",
-    "torch.backends.cudnn.deterministic = True\n",
-    "torch.backends.cudnn.benchmark = False"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "HRkCva2fJ_kr"
-   },
-   "outputs": [],
-   "source": [
-    "raw_train_data, raw_test_data = torchtext.experimental.datasets.raw.IMDB()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "RkgVHXXSKAyU"
-   },
-   "outputs": [],
-   "source": [
-    "def get_train_valid_split(raw_train_data, split_ratio = 0.7):\n",
-    "\n",
-    "    raw_train_data = list(raw_train_data)\n",
-    "        \n",
-    "    random.shuffle(raw_train_data)\n",
-    "        \n",
-    "    n_train_examples = int(len(raw_train_data) * split_ratio)\n",
-    "        \n",
-    "    train_data = raw_train_data[:n_train_examples]\n",
-    "    valid_data = raw_train_data[n_train_examples:]\n",
-    "    \n",
-    "    train_data = RawTextIterableDataset(train_data)\n",
-    "    valid_data = RawTextIterableDataset(valid_data)\n",
-    "    \n",
-    "    return train_data, valid_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "T5fGSB1OKC77"
-   },
-   "outputs": [],
-   "source": [
-    "raw_train_data, raw_valid_data = get_train_valid_split(raw_train_data)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "zvcEouXQLmHz"
-   },
-   "outputs": [],
-   "source": [
-    "class Tokenizer:\n",
-    "    def __init__(self, tokenize_fn = 'basic_english', lower = True, max_length = None):\n",
-    "        \n",
-    "        self.tokenize_fn = torchtext.data.utils.get_tokenizer(tokenize_fn)\n",
-    "        self.lower = lower\n",
-    "        self.max_length = max_length\n",
-    "        \n",
-    "    def tokenize(self, s):\n",
-    "        \n",
-    "        tokens = self.tokenize_fn(s)\n",
-    "        \n",
-    "        if self.lower:\n",
-    "            tokens = [token.lower() for token in tokens]\n",
-    "            \n",
-    "        if self.max_length is not None:\n",
-    "            tokens = tokens[:self.max_length]\n",
-    "            \n",
-    "        return tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "dnpijQRFLnXV"
-   },
-   "outputs": [],
-   "source": [
-    "max_length = 500\n",
-    "\n",
-    "tokenizer = Tokenizer(max_length = max_length)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "VOl6UxZoLdg_"
-   },
-   "outputs": [],
-   "source": [
-    "def build_vocab_from_data(raw_data, tokenizer, **vocab_kwargs):\n",
-    "    \n",
-    "    token_freqs = collections.Counter()\n",
-    "    \n",
-    "    for label, text in raw_data:\n",
-    "        tokens = tokenizer.tokenize(text)\n",
-    "        token_freqs.update(tokens)\n",
-    "                \n",
-    "    vocab = torchtext.vocab.Vocab(token_freqs, **vocab_kwargs)\n",
-    "    \n",
-    "    return vocab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "eNLrpvt2Lgsr"
-   },
-   "outputs": [],
-   "source": [
-    "max_size = 25_000\n",
-    "\n",
-    "vocab = build_vocab_from_data(raw_train_data, tokenizer, max_size = max_size)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "AN1YQiYfLr0_"
-   },
-   "outputs": [],
-   "source": [
-    "def process_raw_data(raw_data, tokenizer, vocab):\n",
-    "    \n",
-    "    raw_data = [(label, text) for (label, text) in raw_data]\n",
-    "\n",
-    "    text_transform = sequential_transforms(tokenizer.tokenize,\n",
-    "                                           vocab_func(vocab),\n",
-    "                                           totensor(dtype=torch.long))\n",
-    "    \n",
-    "    label_transform = sequential_transforms(totensor(dtype=torch.long))\n",
-    "\n",
-    "    transforms = (label_transform, text_transform)\n",
-    "\n",
-    "    dataset = TextClassificationDataset(raw_data,\n",
-    "                                        vocab,\n",
-    "                                        transforms)\n",
-    "    \n",
-    "    return dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "dlejEwWLMScW"
-   },
-   "outputs": [],
-   "source": [
-    "train_data = process_raw_data(raw_train_data, tokenizer, vocab)\n",
-    "valid_data = process_raw_data(raw_valid_data, tokenizer, vocab)\n",
-    "test_data = process_raw_data(raw_test_data, tokenizer, vocab)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "hggYldmOQahU"
-   },
-   "outputs": [],
-   "source": [
-    "class Collator:\n",
-    "    def __init__(self, pad_idx):\n",
-    "        \n",
-    "        self.pad_idx = pad_idx\n",
-    "        \n",
-    "    def collate(self, batch):\n",
-    "        \n",
-    "        labels, text = zip(*batch)\n",
-    "        \n",
-    "        labels = torch.LongTensor(labels)\n",
-    "        \n",
-    "        lengths = torch.LongTensor([len(x) for x in text])\n",
-    "\n",
-    "        text = nn.utils.rnn.pad_sequence(text, padding_value = self.pad_idx)\n",
-    "        \n",
-    "        return labels, text, lengths"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "gw4LBXWAQiEC"
-   },
-   "outputs": [],
-   "source": [
-    "pad_token = '<pad>'\n",
-    "pad_idx = vocab[pad_token]\n",
-    "\n",
-    "collator = Collator(pad_idx)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "d0dP9wnZQjaU"
-   },
-   "outputs": [],
-   "source": [
-    "batch_size = 256\n",
-    "\n",
-    "train_iterator = torch.utils.data.DataLoader(train_data, \n",
-    "                                             batch_size, \n",
-    "                                             shuffle = True, \n",
-    "                                             collate_fn = collator.collate)\n",
-    "\n",
-    "valid_iterator = torch.utils.data.DataLoader(valid_data, \n",
-    "                                             batch_size, \n",
-    "                                             shuffle = False, \n",
-    "                                             collate_fn = collator.collate)\n",
-    "\n",
-    "test_iterator = torch.utils.data.DataLoader(test_data, \n",
-    "                                            batch_size, \n",
-    "                                            shuffle = False, \n",
-    "                                            collate_fn = collator.collate)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class GRU(nn.Module):\n",
-    "    def __init__(self, input_dim, emb_dim, hid_dim, output_dim, pad_idx):\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.embedding = nn.Embedding(input_dim, emb_dim, padding_idx = pad_idx)\n",
-    "        self.gru = nn.GRUCell(emb_dim, hid_dim)\n",
-    "        self.fc = nn.Linear(hid_dim, output_dim)\n",
-    "\n",
-    "    def forward(self, text, lengths):\n",
-    "\n",
-    "        # text = [seq len, batch size]\n",
-    "        # lengths = [batch size]\n",
-    "\n",
-    "        embedded = self.embedding(text)\n",
-    "\n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "\n",
-    "        seq_len, batch_size, _ = embedded.shape\n",
-    "        hid_dim = self.gru.hidden_size\n",
-    "                \n",
-    "        hidden = torch.zeros(batch_size, hid_dim).to(embedded.device)\n",
-    "        \n",
-    "        for i in range(seq_len):\n",
-    "            x = embedded[i]\n",
-    "            hidden = self.gru(x, hidden)\n",
-    "        \n",
-    "        prediction = self.fc(hidden)\n",
-    "\n",
-    "        # prediction = [batch size, output dim]\n",
-    "\n",
-    "        return prediction"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class GRU(nn.Module):\n",
-    "    def __init__(self, input_dim, emb_dim, hid_dim, output_dim, pad_idx):\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.embedding = nn.Embedding(input_dim, emb_dim, padding_idx = pad_idx)\n",
-    "        self.gru = nn.GRU(emb_dim, hid_dim)\n",
-    "        self.fc = nn.Linear(hid_dim, output_dim)\n",
-    "\n",
-    "    def forward(self, text, lengths):\n",
-    "\n",
-    "        # text = [seq len, batch size]\n",
-    "        # lengths = [batch size]\n",
-    "\n",
-    "        embedded = self.embedding(text)\n",
-    "\n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "\n",
-    "        output, hidden = self.gru(embedded)\n",
-    "\n",
-    "        # output = [seq_len, batch size, n directions * hid dim]\n",
-    "        # hidden = [n layers * n directions, batch size, hid dim]\n",
-    "\n",
-    "        prediction = self.fc(hidden.squeeze(0))\n",
-    "\n",
-    "        # prediction = [batch size, output dim]\n",
-    "\n",
-    "        return prediction "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "LGQ5JkfBQll0"
-   },
-   "outputs": [],
-   "source": [
-    "class GRU(nn.Module):\n",
-    "    def __init__(self, input_dim, emb_dim, hid_dim, output_dim, pad_idx):\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.embedding = nn.Embedding(input_dim, emb_dim, padding_idx = pad_idx)\n",
-    "        self.gru = nn.GRU(emb_dim, hid_dim)\n",
-    "        self.fc = nn.Linear(hid_dim, output_dim)\n",
-    "\n",
-    "    def forward(self, text, lengths):\n",
-    "\n",
-    "        # text = [seq len, batch size]\n",
-    "        # lengths = [batch size]\n",
-    "\n",
-    "        embedded = self.embedding(text)\n",
-    "\n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "\n",
-    "        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, lengths, enforce_sorted = False)\n",
-    "\n",
-    "        packed_output, hidden = self.gru(packed_embedded)\n",
-    "\n",
-    "        output, _ = nn.utils.rnn.pad_packed_sequence(packed_output)\n",
-    "\n",
-    "        # output = [seq_len, batch size, n directions * hid dim]\n",
-    "        # hidden = [n layers * n directions, batch size, hid dim]\n",
-    "\n",
-    "        prediction = self.fc(hidden.squeeze(0))\n",
-    "\n",
-    "        # prediction = [batch size, output dim]\n",
-    "\n",
-    "        return prediction "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "mEb-ff-bQtKL"
-   },
-   "outputs": [],
-   "source": [
-    "input_dim = len(vocab)\n",
-    "emb_dim = 100\n",
-    "hid_dim = 256\n",
-    "output_dim = 2\n",
-    "\n",
-    "model = GRU(input_dim, emb_dim, hid_dim, output_dim, pad_idx)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "WEwnyJT_Tm8q"
-   },
-   "outputs": [],
-   "source": [
-    "def count_parameters(model):\n",
-    "    return sum(p.numel() for p in model.parameters() if p.requires_grad)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "SJdVErKTTogS",
-    "outputId": "aaf74c2e-2b9f-47df-a672-b809ffffd6e5"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The model has 2,775,658 trainable parameters\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'The model has {count_parameters(model):,} trainable parameters')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "name: embedding.weight, shape: torch.Size([25002, 100])\n",
-      "name: gru.weight_ih_l0, shape: torch.Size([768, 100])\n",
-      "name: gru.weight_hh_l0, shape: torch.Size([768, 256])\n",
-      "name: gru.bias_ih_l0, shape: torch.Size([768])\n",
-      "name: gru.bias_hh_l0, shape: torch.Size([768])\n",
-      "name: fc.weight, shape: torch.Size([2, 256])\n",
-      "name: fc.bias, shape: torch.Size([2])\n"
-     ]
-    }
-   ],
-   "source": [
-    "for n, p in model.named_parameters():\n",
-    "    print(f'name: {n}, shape: {p.shape}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def initialize_parameters(m):\n",
-    "    if isinstance(m, nn.Embedding):\n",
-    "        nn.init.uniform_(m.weight, -0.05, 0.05)\n",
-    "    elif isinstance(m, nn.GRU):\n",
-    "        for n, p in m.named_parameters():\n",
-    "            if 'weight_ih' in n:\n",
-    "                r, z, n = p.chunk(3)\n",
-    "                nn.init.xavier_uniform_(r)\n",
-    "                nn.init.xavier_uniform_(z)\n",
-    "                nn.init.xavier_uniform_(n)\n",
-    "            elif 'weight_hh' in n:\n",
-    "                r, z, n = p.chunk(3)\n",
-    "                nn.init.orthogonal_(r)\n",
-    "                nn.init.orthogonal_(z)\n",
-    "                nn.init.orthogonal_(n)\n",
-    "            elif 'bias' in n:\n",
-    "                r, z, n = p.chunk(3)\n",
-    "                nn.init.zeros_(r)\n",
-    "                nn.init.zeros_(z)\n",
-    "                nn.init.zeros_(n)\n",
-    "    elif isinstance(m, nn.Linear):\n",
-    "        nn.init.xavier_uniform_(m.weight)\n",
-    "        nn.init.zeros_(m.bias)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "GRU(\n",
-       "  (embedding): Embedding(25002, 100, padding_idx=1)\n",
-       "  (gru): GRU(100, 256)\n",
-       "  (fc): Linear(in_features=256, out_features=2, bias=True)\n",
-       ")"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.apply(initialize_parameters)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "HE9cEN3XTpf7"
-   },
-   "outputs": [],
-   "source": [
-    "glove = torchtext.experimental.vectors.GloVe(name = '6B',\n",
-    "                                             dim = emb_dim)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "AyI08bfvTrCV"
-   },
-   "outputs": [],
-   "source": [
-    "def get_pretrained_embedding(initial_embedding, pretrained_vectors, vocab, unk_token):\n",
-    "    \n",
-    "    pretrained_embedding = torch.FloatTensor(initial_embedding.weight.clone()).detach()    \n",
-    "    pretrained_vocab = pretrained_vectors.vectors.get_stoi()\n",
-    "    \n",
-    "    unk_tokens = []\n",
-    "    \n",
-    "    for idx, token in enumerate(vocab.itos):\n",
-    "        if token in pretrained_vocab:\n",
-    "            pretrained_vector = pretrained_vectors[token]\n",
-    "            pretrained_embedding[idx] = pretrained_vector\n",
-    "        else:\n",
-    "            unk_tokens.append(token)\n",
-    "        \n",
-    "    return pretrained_embedding, unk_tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "GPMcsd6HTtoC"
-   },
-   "outputs": [],
-   "source": [
-    "unk_token = '<unk>'\n",
-    "\n",
-    "pretrained_embedding, unk_tokens = get_pretrained_embedding(model.embedding, glove, vocab, unk_token)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 139
-    },
-    "colab_type": "code",
-    "id": "LhlnYb2ZTvPr",
-    "outputId": "8d56d0e2-6af1-40fe-ea1e-9ec7a42d8b15"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[ 0.0098,  0.0150, -0.0099,  ...,  0.0211, -0.0092,  0.0027],\n",
-       "        [ 0.0347,  0.0276,  0.0468,  ..., -0.0315, -0.0472, -0.0326],\n",
-       "        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],\n",
-       "        ...,\n",
-       "        [-0.2925,  0.1087,  0.7920,  ..., -0.3641,  0.1822, -0.4104],\n",
-       "        [-0.7250,  0.7545,  0.1637,  ..., -0.0144, -0.1761,  0.3418],\n",
-       "        [ 1.1753,  0.0460, -0.3542,  ...,  0.4510,  0.0485, -0.4015]])"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.embedding.weight.data.copy_(pretrained_embedding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model.embedding.weight.data[pad_idx] = torch.zeros(emb_dim)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[ 0.0098,  0.0150, -0.0099,  ...,  0.0211, -0.0092,  0.0027],\n",
-       "        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],\n",
-       "        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],\n",
-       "        ...,\n",
-       "        [-0.2925,  0.1087,  0.7920,  ..., -0.3641,  0.1822, -0.4104],\n",
-       "        [-0.7250,  0.7545,  0.1637,  ..., -0.0144, -0.1761,  0.3418],\n",
-       "        [ 1.1753,  0.0460, -0.3542,  ...,  0.4510,  0.0485, -0.4015]])"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.embedding.weight.data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Sji9nWvaTxcp"
-   },
-   "outputs": [],
-   "source": [
-    "optimizer = optim.Adam(model.parameters())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "a4Q-afN8Tyqr"
-   },
-   "outputs": [],
-   "source": [
-    "criterion = nn.CrossEntropyLoss()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "PjZOAABMT0-T"
-   },
-   "outputs": [],
-   "source": [
-    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "6cYt2pfoT3TD"
-   },
-   "outputs": [],
-   "source": [
-    "model = model.to(device)\n",
-    "criterion = criterion.to(device)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "SSdhLxTJT4mn"
-   },
-   "outputs": [],
-   "source": [
-    "def calculate_accuracy(predictions, labels):\n",
-    "    top_predictions = predictions.argmax(1, keepdim = True)\n",
-    "    correct = top_predictions.eq(labels.view_as(top_predictions)).sum()\n",
-    "    accuracy = correct.float() / labels.shape[0]\n",
-    "    return accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "EoJT5j-1T54w"
-   },
-   "outputs": [],
-   "source": [
-    "def train(model, iterator, optimizer, criterion, device):\n",
-    "    \n",
-    "    epoch_loss = 0\n",
-    "    epoch_acc = 0\n",
-    "    \n",
-    "    model.train()\n",
-    "    \n",
-    "    for labels, text, lengths in iterator:\n",
-    "        \n",
-    "        labels = labels.to(device)\n",
-    "        text = text.to(device)\n",
-    "\n",
-    "        optimizer.zero_grad()\n",
-    "        \n",
-    "        predictions = model(text, lengths)\n",
-    "        \n",
-    "        loss = criterion(predictions, labels)\n",
-    "        \n",
-    "        acc = calculate_accuracy(predictions, labels)\n",
-    "        \n",
-    "        loss.backward()\n",
-    "        \n",
-    "        optimizer.step()\n",
-    "        \n",
-    "        epoch_loss += loss.item()\n",
-    "        epoch_acc += acc.item()\n",
-    "\n",
-    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "UBh7g1cnUBMG"
-   },
-   "outputs": [],
-   "source": [
-    "def evaluate(model, iterator, criterion, device):\n",
-    "    \n",
-    "    epoch_loss = 0\n",
-    "    epoch_acc = 0\n",
-    "    \n",
-    "    model.eval()\n",
-    "    \n",
-    "    with torch.no_grad():\n",
-    "    \n",
-    "        for labels, text, lengths in iterator:\n",
-    "\n",
-    "            labels = labels.to(device)\n",
-    "            text = text.to(device)\n",
-    "            \n",
-    "            predictions = model(text, lengths)\n",
-    "            \n",
-    "            loss = criterion(predictions, labels)\n",
-    "            \n",
-    "            acc = calculate_accuracy(predictions, labels)\n",
-    "\n",
-    "            epoch_loss += loss.item()\n",
-    "            epoch_acc += acc.item()\n",
-    "        \n",
-    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "jSMtdoeSUDAH"
-   },
-   "outputs": [],
-   "source": [
-    "def epoch_time(start_time, end_time):\n",
-    "    elapsed_time = end_time - start_time\n",
-    "    elapsed_mins = int(elapsed_time / 60)\n",
-    "    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))\n",
-    "    return elapsed_mins, elapsed_secs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 537
-    },
-    "colab_type": "code",
-    "id": "lG-dJsjFUF8x",
-    "outputId": "c434d13f-4efa-4a7c-c346-5e886db0405d"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch: 01 | Epoch Time: 0m 7s\n",
-      "\tTrain Loss: 0.654 | Train Acc: 60.73%\n",
-      "\t Val. Loss: 0.584 |  Val. Acc: 68.87%\n",
-      "Epoch: 02 | Epoch Time: 0m 7s\n",
-      "\tTrain Loss: 0.423 | Train Acc: 80.73%\n",
-      "\t Val. Loss: 0.332 |  Val. Acc: 86.04%\n",
-      "Epoch: 03 | Epoch Time: 0m 7s\n",
-      "\tTrain Loss: 0.252 | Train Acc: 90.15%\n",
-      "\t Val. Loss: 0.285 |  Val. Acc: 88.63%\n",
-      "Epoch: 04 | Epoch Time: 0m 8s\n",
-      "\tTrain Loss: 0.186 | Train Acc: 93.05%\n",
-      "\t Val. Loss: 0.286 |  Val. Acc: 89.40%\n",
-      "Epoch: 05 | Epoch Time: 0m 7s\n",
-      "\tTrain Loss: 0.116 | Train Acc: 95.85%\n",
-      "\t Val. Loss: 0.307 |  Val. Acc: 89.56%\n",
-      "Epoch: 06 | Epoch Time: 0m 7s\n",
-      "\tTrain Loss: 0.065 | Train Acc: 97.90%\n",
-      "\t Val. Loss: 0.354 |  Val. Acc: 89.64%\n",
-      "Epoch: 07 | Epoch Time: 0m 8s\n",
-      "\tTrain Loss: 0.042 | Train Acc: 98.74%\n",
-      "\t Val. Loss: 0.403 |  Val. Acc: 89.35%\n",
-      "Epoch: 08 | Epoch Time: 0m 8s\n",
-      "\tTrain Loss: 0.020 | Train Acc: 99.47%\n",
-      "\t Val. Loss: 0.408 |  Val. Acc: 89.35%\n",
-      "Epoch: 09 | Epoch Time: 0m 7s\n",
-      "\tTrain Loss: 0.010 | Train Acc: 99.81%\n",
-      "\t Val. Loss: 0.505 |  Val. Acc: 88.53%\n",
-      "Epoch: 10 | Epoch Time: 0m 7s\n",
-      "\tTrain Loss: 0.007 | Train Acc: 99.85%\n",
-      "\t Val. Loss: 0.657 |  Val. Acc: 88.27%\n"
-     ]
-    }
-   ],
-   "source": [
-    "n_epochs = 10\n",
-    "\n",
-    "best_valid_loss = float('inf')\n",
-    "\n",
-    "for epoch in range(n_epochs):\n",
-    "\n",
-    "    start_time = time.monotonic()\n",
-    "    \n",
-    "    train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device)\n",
-    "    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, device)\n",
-    "    \n",
-    "    end_time = time.monotonic()\n",
-    "\n",
-    "    epoch_mins, epoch_secs = epoch_time(start_time, end_time)\n",
-    "    \n",
-    "    if valid_loss < best_valid_loss:\n",
-    "        best_valid_loss = valid_loss\n",
-    "        torch.save(model.state_dict(), 'gru-model.pt')\n",
-    "    \n",
-    "    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')\n",
-    "    print(f'\\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')\n",
-    "    print(f'\\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "PH7-0f6nUKRb",
-    "outputId": "faf1e6dd-c99e-4fda-c6f8-435a08ca0073"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Test Loss: 0.290 | Test Acc: 87.93%\n"
-     ]
-    }
-   ],
-   "source": [
-    "model.load_state_dict(torch.load('gru-model.pt'))\n",
-    "\n",
-    "test_loss, test_acc = evaluate(model, test_iterator, criterion, device)\n",
-    "\n",
-    "print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "rnWNSo8kdcl_"
-   },
-   "outputs": [],
-   "source": [
-    "def predict_sentiment(tokenizer, vocab, model, device, sentence):\n",
-    "    model.eval()\n",
-    "    tokens = tokenizer.tokenize(sentence)\n",
-    "    length = torch.LongTensor([len(tokens)]).to(device)\n",
-    "    indexes = [vocab.stoi[token] for token in tokens]\n",
-    "    tensor = torch.LongTensor(indexes).unsqueeze(-1).to(device)\n",
-    "    prediction = model(tensor, length)\n",
-    "    probabilities = nn.functional.softmax(prediction, dim = -1)\n",
-    "    pos_probability = probabilities.squeeze()[-1].item()\n",
-    "    return pos_probability"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "hb7bC-aEeC1q",
-    "outputId": "059cccd1-efb4-404c-81f9-606983c23b33"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.06520231813192368"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = 'the absolute worst movie of all time.'\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "APEVZ3D4eEVw",
-    "outputId": "0d188e29-6e4e-4183-c7aa-467ea8f1afe6"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.8539475798606873"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = 'one of the greatest films i have ever seen in my life.'\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "X7GMey_jebjg",
-    "outputId": "04ca4196-51f0-4661-ffe4-8f4dd199baf4"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.15590433776378632"
-      ]
-     },
-     "execution_count": 43,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = \"i thought it was going to be one of the greatest films i have ever seen in my life, \\\n",
-    "but it was actually the absolute worst movie of all time.\"\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "kOoESlQSxYx2",
-    "outputId": "e5826bef-5f9c-41f6-9eb0-795318280045"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.3470574617385864"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = \"i thought it was going to be the absolute worst movie of all time, \\\n",
-    "but it was actually one of the greatest films i have ever seen in my life.\"\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "machine_shape": "hm",
-   "name": "2_rnn_gru.ipynb",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
diff --git a/experimental/3_rnn_bilstm.ipynb b/experimental/3_rnn_bilstm.ipynb
deleted file mode 100644
index 54d3d95..0000000
--- a/experimental/3_rnn_bilstm.ipynb
+++ /dev/null
@@ -1,1049 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 228
-    },
-    "colab_type": "code",
-    "id": "lIYdn1woOS1n",
-    "outputId": "a30c21d5-b7cc-4ea6-a0d3-f9f1392ee04a"
-   },
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import torch.optim as optim\n",
-    "\n",
-    "import torchtext\n",
-    "import torchtext.experimental\n",
-    "import torchtext.experimental.vectors\n",
-    "from torchtext.experimental.datasets.raw.text_classification import RawTextIterableDataset\n",
-    "from torchtext.experimental.datasets.text_classification import TextClassificationDataset\n",
-    "from torchtext.experimental.functional import sequential_transforms, vocab_func, totensor\n",
-    "\n",
-    "import collections\n",
-    "import random\n",
-    "import time"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "II-XIfhSkZS-"
-   },
-   "outputs": [],
-   "source": [
-    "seed = 1234\n",
-    "\n",
-    "torch.manual_seed(seed)\n",
-    "random.seed(seed)\n",
-    "torch.backends.cudnn.deterministic = True\n",
-    "torch.backends.cudnn.benchmark = False"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "kIkeEy2mkcT6"
-   },
-   "outputs": [],
-   "source": [
-    "raw_train_data, raw_test_data = torchtext.experimental.datasets.raw.IMDB()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "_a5ucP1ZkeDv"
-   },
-   "outputs": [],
-   "source": [
-    "def get_train_valid_split(raw_train_data, split_ratio = 0.7):\n",
-    "\n",
-    "    raw_train_data = list(raw_train_data)\n",
-    "        \n",
-    "    random.shuffle(raw_train_data)\n",
-    "        \n",
-    "    n_train_examples = int(len(raw_train_data) * split_ratio)\n",
-    "        \n",
-    "    train_data = raw_train_data[:n_train_examples]\n",
-    "    valid_data = raw_train_data[n_train_examples:]\n",
-    "    \n",
-    "    train_data = RawTextIterableDataset(train_data)\n",
-    "    valid_data = RawTextIterableDataset(valid_data)\n",
-    "    \n",
-    "    return train_data, valid_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "1WP4nz-_kf_0"
-   },
-   "outputs": [],
-   "source": [
-    "raw_train_data, raw_valid_data = get_train_valid_split(raw_train_data)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "pPvrMZlWkicJ"
-   },
-   "outputs": [],
-   "source": [
-    "class Tokenizer:\n",
-    "    def __init__(self, tokenize_fn = 'basic_english', lower = True, max_length = None):\n",
-    "        \n",
-    "        self.tokenize_fn = torchtext.data.utils.get_tokenizer(tokenize_fn)\n",
-    "        self.lower = lower\n",
-    "        self.max_length = max_length\n",
-    "        \n",
-    "    def tokenize(self, s):\n",
-    "        \n",
-    "        tokens = self.tokenize_fn(s)\n",
-    "        \n",
-    "        if self.lower:\n",
-    "            tokens = [token.lower() for token in tokens]\n",
-    "            \n",
-    "        if self.max_length is not None:\n",
-    "            tokens = tokens[:self.max_length]\n",
-    "            \n",
-    "        return tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "SMsMQSuSkkt3"
-   },
-   "outputs": [],
-   "source": [
-    "max_length = 500\n",
-    "\n",
-    "tokenizer = Tokenizer(max_length = max_length)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Yie7TKWKkmeK"
-   },
-   "outputs": [],
-   "source": [
-    "def build_vocab_from_data(raw_data, tokenizer, **vocab_kwargs):\n",
-    "    \n",
-    "    token_freqs = collections.Counter()\n",
-    "    \n",
-    "    for label, text in raw_data:\n",
-    "        tokens = tokenizer.tokenize(text)\n",
-    "        token_freqs.update(tokens)\n",
-    "                \n",
-    "    vocab = torchtext.vocab.Vocab(token_freqs, **vocab_kwargs)\n",
-    "    \n",
-    "    return vocab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "9jW7Ci7WkoSn"
-   },
-   "outputs": [],
-   "source": [
-    "max_size = 25_000\n",
-    "\n",
-    "vocab = build_vocab_from_data(raw_train_data, tokenizer, max_size = max_size)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "cvSZt_iFkqkt"
-   },
-   "outputs": [],
-   "source": [
-    "def process_raw_data(raw_data, tokenizer, vocab):\n",
-    "    \n",
-    "    raw_data = [(label, text) for (label, text) in raw_data]\n",
-    "\n",
-    "    text_transform = sequential_transforms(tokenizer.tokenize,\n",
-    "                                           vocab_func(vocab),\n",
-    "                                           totensor(dtype=torch.long))\n",
-    "    \n",
-    "    label_transform = sequential_transforms(totensor(dtype=torch.long))\n",
-    "\n",
-    "    transforms = (label_transform, text_transform)\n",
-    "\n",
-    "    dataset = TextClassificationDataset(raw_data,\n",
-    "                                        vocab,\n",
-    "                                        transforms)\n",
-    "    \n",
-    "    return dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "bwsSiBdkktRk"
-   },
-   "outputs": [],
-   "source": [
-    "train_data = process_raw_data(raw_train_data, tokenizer, vocab)\n",
-    "valid_data = process_raw_data(raw_valid_data, tokenizer, vocab)\n",
-    "test_data = process_raw_data(raw_test_data, tokenizer, vocab)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "5m3xRusSk8v3"
-   },
-   "outputs": [],
-   "source": [
-    "class Collator:\n",
-    "    def __init__(self, pad_idx):\n",
-    "        \n",
-    "        self.pad_idx = pad_idx\n",
-    "        \n",
-    "    def collate(self, batch):\n",
-    "        \n",
-    "        labels, text = zip(*batch)\n",
-    "        \n",
-    "        labels = torch.LongTensor(labels)\n",
-    "        \n",
-    "        lengths = torch.LongTensor([len(x) for x in text])\n",
-    "\n",
-    "        text = nn.utils.rnn.pad_sequence(text, padding_value = self.pad_idx)\n",
-    "        \n",
-    "        return labels, text, lengths"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "1ZMuZqZxk8-p"
-   },
-   "outputs": [],
-   "source": [
-    "pad_token = '<pad>'\n",
-    "pad_idx = vocab[pad_token]\n",
-    "\n",
-    "collator = Collator(pad_idx)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "mxG97Si9lAI2"
-   },
-   "outputs": [],
-   "source": [
-    "batch_size = 256\n",
-    "\n",
-    "train_iterator = torch.utils.data.DataLoader(train_data, \n",
-    "                                             batch_size, \n",
-    "                                             shuffle = True, \n",
-    "                                             collate_fn = collator.collate)\n",
-    "\n",
-    "valid_iterator = torch.utils.data.DataLoader(valid_data, \n",
-    "                                             batch_size, \n",
-    "                                             shuffle = False, \n",
-    "                                             collate_fn = collator.collate)\n",
-    "\n",
-    "test_iterator = torch.utils.data.DataLoader(test_data, \n",
-    "                                            batch_size, \n",
-    "                                            shuffle = False, \n",
-    "                                            collate_fn = collator.collate)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "ty3NbheMlPYs"
-   },
-   "outputs": [],
-   "source": [
-    "class BiLSTM(nn.Module):\n",
-    "    def __init__(self, input_dim, emb_dim, hid_dim, output_dim, n_layers, dropout, pad_idx):\n",
-    "\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.embedding = nn.Embedding(input_dim, emb_dim, padding_idx = pad_idx)\n",
-    "        self.lstm = nn.LSTM(emb_dim, hid_dim, num_layers = n_layers, bidirectional = True, dropout = dropout)\n",
-    "        self.fc = nn.Linear(2 * hid_dim, output_dim)\n",
-    "        self.dropout = nn.Dropout(dropout)\n",
-    "\n",
-    "    def forward(self, text, lengths):\n",
-    "\n",
-    "        # text = [seq len, batch size]\n",
-    "        # lengths = [batch size]\n",
-    "\n",
-    "        embedded = self.dropout(self.embedding(text))\n",
-    "\n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "\n",
-    "        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, lengths, enforce_sorted = False)\n",
-    "\n",
-    "        packed_output, (hidden, cell) = self.lstm(packed_embedded)\n",
-    "\n",
-    "        output, _ = nn.utils.rnn.pad_packed_sequence(packed_output)\n",
-    "\n",
-    "        # outputs = [seq_len, batch size, n directions * hid dim]\n",
-    "        # hidden = [n layers * n directions, batch size, hid dim]\n",
-    "\n",
-    "        hidden_fwd = hidden[-2]\n",
-    "        hidden_bck = hidden[-1]\n",
-    "\n",
-    "        # hidden_fwd/bck = [batch size, hid dim]\n",
-    "\n",
-    "        hidden = torch.cat((hidden_fwd, hidden_bck), dim = 1)\n",
-    "\n",
-    "        # hidden = [batch size, hid dim * 2]\n",
-    "\n",
-    "        prediction = self.fc(self.dropout(hidden))\n",
-    "\n",
-    "        # prediction = [batch size, output dim]\n",
-    "\n",
-    "        return prediction"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "trg6yTjBqOLZ"
-   },
-   "outputs": [],
-   "source": [
-    "input_dim = len(vocab)\n",
-    "emb_dim = 100\n",
-    "hid_dim = 256\n",
-    "output_dim = 2\n",
-    "n_layers = 2\n",
-    "dropout = 0.5\n",
-    "\n",
-    "model = BiLSTM(input_dim, emb_dim, hid_dim, output_dim, n_layers, dropout, pad_idx)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "9dgdCRsqqQoD"
-   },
-   "outputs": [],
-   "source": [
-    "def count_parameters(model):\n",
-    "    return sum(p.numel() for p in model.parameters() if p.requires_grad)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "bfiGzjvnqV-s",
-    "outputId": "168a3662-b95a-48de-d722-c76264e8c8ab"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The model has 4,811,370 trainable parameters\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'The model has {count_parameters(model):,} trainable parameters')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "name: embedding.weight, shape: torch.Size([25002, 100])\n",
-      "name: lstm.weight_ih_l0, shape: torch.Size([1024, 100])\n",
-      "name: lstm.weight_hh_l0, shape: torch.Size([1024, 256])\n",
-      "name: lstm.bias_ih_l0, shape: torch.Size([1024])\n",
-      "name: lstm.bias_hh_l0, shape: torch.Size([1024])\n",
-      "name: lstm.weight_ih_l0_reverse, shape: torch.Size([1024, 100])\n",
-      "name: lstm.weight_hh_l0_reverse, shape: torch.Size([1024, 256])\n",
-      "name: lstm.bias_ih_l0_reverse, shape: torch.Size([1024])\n",
-      "name: lstm.bias_hh_l0_reverse, shape: torch.Size([1024])\n",
-      "name: lstm.weight_ih_l1, shape: torch.Size([1024, 512])\n",
-      "name: lstm.weight_hh_l1, shape: torch.Size([1024, 256])\n",
-      "name: lstm.bias_ih_l1, shape: torch.Size([1024])\n",
-      "name: lstm.bias_hh_l1, shape: torch.Size([1024])\n",
-      "name: lstm.weight_ih_l1_reverse, shape: torch.Size([1024, 512])\n",
-      "name: lstm.weight_hh_l1_reverse, shape: torch.Size([1024, 256])\n",
-      "name: lstm.bias_ih_l1_reverse, shape: torch.Size([1024])\n",
-      "name: lstm.bias_hh_l1_reverse, shape: torch.Size([1024])\n",
-      "name: fc.weight, shape: torch.Size([2, 512])\n",
-      "name: fc.bias, shape: torch.Size([2])\n"
-     ]
-    }
-   ],
-   "source": [
-    "for n, p in model.named_parameters():\n",
-    "    print(f'name: {n}, shape: {p.shape}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def initialize_parameters(m):\n",
-    "    if isinstance(m, nn.Embedding):\n",
-    "        nn.init.uniform_(m.weight, -0.05, 0.05)\n",
-    "    elif isinstance(m, nn.LSTM):\n",
-    "        for n, p in m.named_parameters():\n",
-    "            if 'weight_ih' in n:\n",
-    "                i, f, g, o = p.chunk(4)\n",
-    "                nn.init.xavier_uniform_(i)\n",
-    "                nn.init.xavier_uniform_(f)\n",
-    "                nn.init.xavier_uniform_(g)\n",
-    "                nn.init.xavier_uniform_(o)\n",
-    "            elif 'weight_hh' in n:\n",
-    "                i, f, g, o = p.chunk(4)\n",
-    "                nn.init.orthogonal_(i)\n",
-    "                nn.init.orthogonal_(f)\n",
-    "                nn.init.orthogonal_(g)\n",
-    "                nn.init.orthogonal_(o)\n",
-    "            elif 'bias' in n:\n",
-    "                i, f, g, o = p.chunk(4)\n",
-    "                nn.init.zeros_(i)\n",
-    "                nn.init.ones_(f)\n",
-    "                nn.init.zeros_(g)\n",
-    "                nn.init.zeros_(o)\n",
-    "    elif isinstance(m, nn.Linear):\n",
-    "        nn.init.xavier_uniform_(m.weight)\n",
-    "        nn.init.zeros_(m.bias)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "BiLSTM(\n",
-       "  (embedding): Embedding(25002, 100, padding_idx=1)\n",
-       "  (lstm): LSTM(100, 256, num_layers=2, dropout=0.5, bidirectional=True)\n",
-       "  (fc): Linear(in_features=512, out_features=2, bias=True)\n",
-       "  (dropout): Dropout(p=0.5, inplace=False)\n",
-       ")"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.apply(initialize_parameters)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Sah17A41qW5d"
-   },
-   "outputs": [],
-   "source": [
-    "glove = torchtext.experimental.vectors.GloVe(name = '6B',\n",
-    "                                             dim = emb_dim)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "S1Dfcn2Nqabo"
-   },
-   "outputs": [],
-   "source": [
-    "def get_pretrained_embedding(initial_embedding, pretrained_vectors, vocab, unk_token):\n",
-    "    \n",
-    "    pretrained_embedding = torch.FloatTensor(initial_embedding.weight.clone()).detach()    \n",
-    "    pretrained_vocab = pretrained_vectors.vectors.get_stoi()\n",
-    "    \n",
-    "    unk_tokens = []\n",
-    "    \n",
-    "    for idx, token in enumerate(vocab.itos):\n",
-    "        if token in pretrained_vocab:\n",
-    "            pretrained_vector = pretrained_vectors[token]\n",
-    "            pretrained_embedding[idx] = pretrained_vector\n",
-    "        else:\n",
-    "            unk_tokens.append(token)\n",
-    "        \n",
-    "    return pretrained_embedding, unk_tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "sGyV94f7qvdr"
-   },
-   "outputs": [],
-   "source": [
-    "unk_token = '<unk>'\n",
-    "\n",
-    "pretrained_embedding, unk_tokens = get_pretrained_embedding(model.embedding, glove, vocab, unk_token)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 139
-    },
-    "colab_type": "code",
-    "id": "KYnGxbVisUsk",
-    "outputId": "e1a88c1c-0f3e-48c6-afcf-9d791fd54bb9"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[-0.0398,  0.0357, -0.0046,  ..., -0.0485, -0.0088,  0.0329],\n",
-       "        [-0.0330,  0.0428,  0.0304,  ...,  0.0236,  0.0487,  0.0101],\n",
-       "        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],\n",
-       "        ...,\n",
-       "        [-0.2925,  0.1087,  0.7920,  ..., -0.3641,  0.1822, -0.4104],\n",
-       "        [-0.7250,  0.7545,  0.1637,  ..., -0.0144, -0.1761,  0.3418],\n",
-       "        [ 1.1753,  0.0460, -0.3542,  ...,  0.4510,  0.0485, -0.4015]])"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.embedding.weight.data.copy_(pretrained_embedding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model.embedding.weight.data[pad_idx] = torch.zeros(emb_dim)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "DTwNU41WseMS"
-   },
-   "outputs": [],
-   "source": [
-    "optimizer = optim.Adam(model.parameters())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Rxlx7a72s1ze"
-   },
-   "outputs": [],
-   "source": [
-    "criterion = nn.CrossEntropyLoss()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "1CLimBxus2yX"
-   },
-   "outputs": [],
-   "source": [
-    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "108fm55ftBgO"
-   },
-   "outputs": [],
-   "source": [
-    "model = model.to(device)\n",
-    "criterion = criterion.to(device)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "IYCxbvXUvE5v"
-   },
-   "outputs": [],
-   "source": [
-    "def calculate_accuracy(predictions, labels):\n",
-    "    top_predictions = predictions.argmax(1, keepdim = True)\n",
-    "    correct = top_predictions.eq(labels.view_as(top_predictions)).sum()\n",
-    "    accuracy = correct.float() / labels.shape[0]\n",
-    "    return accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Ik2JQo6TvGml"
-   },
-   "outputs": [],
-   "source": [
-    "def train(model, iterator, optimizer, criterion, device):\n",
-    "    \n",
-    "    epoch_loss = 0\n",
-    "    epoch_acc = 0\n",
-    "    \n",
-    "    model.train()\n",
-    "    \n",
-    "    for labels, text, lengths in iterator:\n",
-    "        \n",
-    "        labels = labels.to(device)\n",
-    "        text = text.to(device)\n",
-    "\n",
-    "        optimizer.zero_grad()\n",
-    "        \n",
-    "        predictions = model(text, lengths)\n",
-    "        \n",
-    "        loss = criterion(predictions, labels)\n",
-    "        \n",
-    "        acc = calculate_accuracy(predictions, labels)\n",
-    "        \n",
-    "        loss.backward()\n",
-    "        \n",
-    "        optimizer.step()\n",
-    "        \n",
-    "        epoch_loss += loss.item()\n",
-    "        epoch_acc += acc.item()\n",
-    "\n",
-    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "aGy1Zk6jvIf8"
-   },
-   "outputs": [],
-   "source": [
-    "def evaluate(model, iterator, criterion, device):\n",
-    "    \n",
-    "    epoch_loss = 0\n",
-    "    epoch_acc = 0\n",
-    "    \n",
-    "    model.eval()\n",
-    "    \n",
-    "    with torch.no_grad():\n",
-    "    \n",
-    "        for labels, text, lengths in iterator:\n",
-    "\n",
-    "            labels = labels.to(device)\n",
-    "            text = text.to(device)\n",
-    "            \n",
-    "            predictions = model(text, lengths)\n",
-    "            \n",
-    "            loss = criterion(predictions, labels)\n",
-    "            \n",
-    "            acc = calculate_accuracy(predictions, labels)\n",
-    "\n",
-    "            epoch_loss += loss.item()\n",
-    "            epoch_acc += acc.item()\n",
-    "        \n",
-    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "9MyMRRzbvKPx"
-   },
-   "outputs": [],
-   "source": [
-    "def epoch_time(start_time, end_time):\n",
-    "    elapsed_time = end_time - start_time\n",
-    "    elapsed_mins = int(elapsed_time / 60)\n",
-    "    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))\n",
-    "    return elapsed_mins, elapsed_secs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 537
-    },
-    "colab_type": "code",
-    "id": "dRKwD51WvMa3",
-    "outputId": "79389e66-c1bf-45c9-a919-63ee787ad660"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch: 01 | Epoch Time: 0m 23s\n",
-      "\tTrain Loss: 0.777 | Train Acc: 52.23%\n",
-      "\t Val. Loss: 0.683 |  Val. Acc: 53.70%\n",
-      "Epoch: 02 | Epoch Time: 0m 24s\n",
-      "\tTrain Loss: 0.683 | Train Acc: 57.90%\n",
-      "\t Val. Loss: 0.676 |  Val. Acc: 53.47%\n",
-      "Epoch: 03 | Epoch Time: 0m 24s\n",
-      "\tTrain Loss: 0.625 | Train Acc: 65.60%\n",
-      "\t Val. Loss: 0.482 |  Val. Acc: 78.27%\n",
-      "Epoch: 04 | Epoch Time: 0m 25s\n",
-      "\tTrain Loss: 0.483 | Train Acc: 77.15%\n",
-      "\t Val. Loss: 0.410 |  Val. Acc: 82.67%\n",
-      "Epoch: 05 | Epoch Time: 0m 24s\n",
-      "\tTrain Loss: 0.350 | Train Acc: 85.31%\n",
-      "\t Val. Loss: 0.315 |  Val. Acc: 86.75%\n",
-      "Epoch: 06 | Epoch Time: 0m 24s\n",
-      "\tTrain Loss: 0.294 | Train Acc: 88.14%\n",
-      "\t Val. Loss: 0.288 |  Val. Acc: 88.41%\n",
-      "Epoch: 07 | Epoch Time: 0m 25s\n",
-      "\tTrain Loss: 0.258 | Train Acc: 89.92%\n",
-      "\t Val. Loss: 0.277 |  Val. Acc: 89.14%\n",
-      "Epoch: 08 | Epoch Time: 0m 24s\n",
-      "\tTrain Loss: 0.231 | Train Acc: 91.03%\n",
-      "\t Val. Loss: 0.280 |  Val. Acc: 88.89%\n",
-      "Epoch: 09 | Epoch Time: 0m 24s\n",
-      "\tTrain Loss: 0.196 | Train Acc: 92.50%\n",
-      "\t Val. Loss: 0.285 |  Val. Acc: 89.27%\n",
-      "Epoch: 10 | Epoch Time: 0m 24s\n",
-      "\tTrain Loss: 0.175 | Train Acc: 93.53%\n",
-      "\t Val. Loss: 0.316 |  Val. Acc: 89.55%\n"
-     ]
-    }
-   ],
-   "source": [
-    "n_epochs = 10\n",
-    "\n",
-    "best_valid_loss = float('inf')\n",
-    "\n",
-    "for epoch in range(n_epochs):\n",
-    "\n",
-    "    start_time = time.monotonic()\n",
-    "    \n",
-    "    train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device)\n",
-    "    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, device)\n",
-    "    \n",
-    "    end_time = time.monotonic()\n",
-    "\n",
-    "    epoch_mins, epoch_secs = epoch_time(start_time, end_time)\n",
-    "    \n",
-    "    if valid_loss < best_valid_loss:\n",
-    "        best_valid_loss = valid_loss\n",
-    "        torch.save(model.state_dict(), 'bilstm-model.pt')\n",
-    "    \n",
-    "    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')\n",
-    "    print(f'\\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')\n",
-    "    print(f'\\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "hKOg4oARvPHJ",
-    "outputId": "7cfe4b85-de2f-47f3-8437-45589c32ceca"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Test Loss: 0.291 | Test Acc: 88.06%\n"
-     ]
-    }
-   ],
-   "source": [
-    "model.load_state_dict(torch.load('bilstm-model.pt'))\n",
-    "\n",
-    "test_loss, test_acc = evaluate(model, test_iterator, criterion, device)\n",
-    "\n",
-    "print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "tQ4Jsf_vvWgB"
-   },
-   "outputs": [],
-   "source": [
-    "def predict_sentiment(tokenizer, vocab, model, device, sentence):\n",
-    "    model.eval()\n",
-    "    tokens = tokenizer.tokenize(sentence)\n",
-    "    length = torch.LongTensor([len(tokens)]).to(device)\n",
-    "    indexes = [vocab.stoi[token] for token in tokens]\n",
-    "    tensor = torch.LongTensor(indexes).unsqueeze(-1).to(device)\n",
-    "    prediction = model(tensor, length)\n",
-    "    probabilities = nn.functional.softmax(prediction, dim = -1)\n",
-    "    pos_probability = probabilities.squeeze()[-1].item()\n",
-    "    return pos_probability"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "Yy7_6rhovZTE",
-    "outputId": "78860852-39ea-4a7b-eb33-9a1a077fb9e0"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.06933268904685974"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = 'the absolute worst movie of all time.'\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "L3LmQxrgvau9",
-    "outputId": "0204aa17-0bc1-45f2-9be1-c014798af120"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.9730159640312195"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = 'one of the greatest films i have ever seen in my life.'\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "t7Qoy21Bvb7v",
-    "outputId": "6094a141-4f37-4110-edc7-aa14b9a3c667"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.1614144891500473"
-      ]
-     },
-     "execution_count": 40,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = \"i thought it was going to be one of the greatest films i have ever seen in my life, \\\n",
-    "but it was actually the absolute worst movie of all time.\"\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "EPGXBr18vdQT",
-    "outputId": "e5b3d210-0254-4d5f-bdbe-609c0b7d6a8a"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.5040232539176941"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = \"i thought it was going to be the absolute worst movie of all time, \\\n",
-    "but it was actually one of the greatest films i have ever seen in my life.\"\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "machine_shape": "hm",
-   "name": "scratchpad",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
diff --git a/experimental/4_cnn.ipynb b/experimental/4_cnn.ipynb
deleted file mode 100644
index ae4d680..0000000
--- a/experimental/4_cnn.ipynb
+++ /dev/null
@@ -1,1028 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 228
-    },
-    "colab_type": "code",
-    "id": "lIYdn1woOS1n",
-    "outputId": "f9419fe4-7c0e-4706-a9b9-30fbc836d9a9"
-   },
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
-    "import torch.optim as optim\n",
-    "\n",
-    "import torchtext\n",
-    "import torchtext.experimental\n",
-    "import torchtext.experimental.vectors\n",
-    "from torchtext.experimental.datasets.raw.text_classification import RawTextIterableDataset\n",
-    "from torchtext.experimental.datasets.text_classification import TextClassificationDataset\n",
-    "from torchtext.experimental.functional import sequential_transforms, vocab_func, totensor\n",
-    "\n",
-    "import collections\n",
-    "import random\n",
-    "import time"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "II-XIfhSkZS-"
-   },
-   "outputs": [],
-   "source": [
-    "seed = 1234\n",
-    "\n",
-    "torch.manual_seed(seed)\n",
-    "random.seed(seed)\n",
-    "torch.backends.cudnn.deterministic = True\n",
-    "torch.backends.cudnn.benchmark = False"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "kIkeEy2mkcT6"
-   },
-   "outputs": [],
-   "source": [
-    "raw_train_data, raw_test_data = torchtext.experimental.datasets.raw.IMDB()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "_a5ucP1ZkeDv"
-   },
-   "outputs": [],
-   "source": [
-    "def get_train_valid_split(raw_train_data, split_ratio = 0.7):\n",
-    "\n",
-    "    raw_train_data = list(raw_train_data)\n",
-    "        \n",
-    "    random.shuffle(raw_train_data)\n",
-    "        \n",
-    "    n_train_examples = int(len(raw_train_data) * split_ratio)\n",
-    "        \n",
-    "    train_data = raw_train_data[:n_train_examples]\n",
-    "    valid_data = raw_train_data[n_train_examples:]\n",
-    "    \n",
-    "    train_data = RawTextIterableDataset(train_data)\n",
-    "    valid_data = RawTextIterableDataset(valid_data)\n",
-    "    \n",
-    "    return train_data, valid_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "1WP4nz-_kf_0"
-   },
-   "outputs": [],
-   "source": [
-    "raw_train_data, raw_valid_data = get_train_valid_split(raw_train_data)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "pPvrMZlWkicJ"
-   },
-   "outputs": [],
-   "source": [
-    "class Tokenizer:\n",
-    "    def __init__(self, tokenize_fn = 'basic_english', lower = True, max_length = None):\n",
-    "        \n",
-    "        self.tokenize_fn = torchtext.data.utils.get_tokenizer(tokenize_fn)\n",
-    "        self.lower = lower\n",
-    "        self.max_length = max_length\n",
-    "        \n",
-    "    def tokenize(self, s):\n",
-    "        \n",
-    "        tokens = self.tokenize_fn(s)\n",
-    "        \n",
-    "        if self.lower:\n",
-    "            tokens = [token.lower() for token in tokens]\n",
-    "            \n",
-    "        if self.max_length is not None:\n",
-    "            tokens = tokens[:self.max_length]\n",
-    "            \n",
-    "        return tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "SMsMQSuSkkt3"
-   },
-   "outputs": [],
-   "source": [
-    "max_length = 500\n",
-    "\n",
-    "tokenizer = Tokenizer(max_length = max_length)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Yie7TKWKkmeK"
-   },
-   "outputs": [],
-   "source": [
-    "def build_vocab_from_data(raw_data, tokenizer, **vocab_kwargs):\n",
-    "    \n",
-    "    token_freqs = collections.Counter()\n",
-    "    \n",
-    "    for label, text in raw_data:\n",
-    "        tokens = tokenizer.tokenize(text)\n",
-    "        token_freqs.update(tokens)\n",
-    "                \n",
-    "    vocab = torchtext.vocab.Vocab(token_freqs, **vocab_kwargs)\n",
-    "    \n",
-    "    return vocab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "9jW7Ci7WkoSn"
-   },
-   "outputs": [],
-   "source": [
-    "max_size = 25_000\n",
-    "\n",
-    "vocab = build_vocab_from_data(raw_train_data, tokenizer, max_size = max_size)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "cvSZt_iFkqkt"
-   },
-   "outputs": [],
-   "source": [
-    "def process_raw_data(raw_data, tokenizer, vocab):\n",
-    "    \n",
-    "    raw_data = [(label, text) for (label, text) in raw_data]\n",
-    "\n",
-    "    text_transform = sequential_transforms(tokenizer.tokenize,\n",
-    "                                           vocab_func(vocab),\n",
-    "                                           totensor(dtype=torch.long))\n",
-    "    \n",
-    "    label_transform = sequential_transforms(totensor(dtype=torch.long))\n",
-    "\n",
-    "    transforms = (label_transform, text_transform)\n",
-    "\n",
-    "    dataset = TextClassificationDataset(raw_data,\n",
-    "                                        vocab,\n",
-    "                                        transforms)\n",
-    "    \n",
-    "    return dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "bwsSiBdkktRk"
-   },
-   "outputs": [],
-   "source": [
-    "train_data = process_raw_data(raw_train_data, tokenizer, vocab)\n",
-    "valid_data = process_raw_data(raw_valid_data, tokenizer, vocab)\n",
-    "test_data = process_raw_data(raw_test_data, tokenizer, vocab)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "5m3xRusSk8v3"
-   },
-   "outputs": [],
-   "source": [
-    "class Collator:\n",
-    "    def __init__(self, pad_idx, batch_first):\n",
-    "        \n",
-    "        self.pad_idx = pad_idx\n",
-    "        self.batch_first = batch_first\n",
-    "        \n",
-    "    def collate(self, batch):\n",
-    "        \n",
-    "        labels, text = zip(*batch)\n",
-    "        \n",
-    "        labels = torch.LongTensor(labels)\n",
-    "\n",
-    "        text = nn.utils.rnn.pad_sequence(text, \n",
-    "                                         padding_value = self.pad_idx,\n",
-    "                                         batch_first = self.batch_first)\n",
-    "        \n",
-    "        return labels, text"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "1ZMuZqZxk8-p"
-   },
-   "outputs": [],
-   "source": [
-    "pad_token = '<pad>'\n",
-    "pad_idx = vocab[pad_token]\n",
-    "batch_first = True\n",
-    "\n",
-    "collator = Collator(pad_idx, batch_first)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "mxG97Si9lAI2"
-   },
-   "outputs": [],
-   "source": [
-    "batch_size = 256\n",
-    "\n",
-    "train_iterator = torch.utils.data.DataLoader(train_data, \n",
-    "                                             batch_size, \n",
-    "                                             shuffle = True, \n",
-    "                                             collate_fn = collator.collate)\n",
-    "\n",
-    "valid_iterator = torch.utils.data.DataLoader(valid_data, \n",
-    "                                             batch_size, \n",
-    "                                             shuffle = False, \n",
-    "                                             collate_fn = collator.collate)\n",
-    "\n",
-    "test_iterator = torch.utils.data.DataLoader(test_data, \n",
-    "                                            batch_size, \n",
-    "                                            shuffle = False, \n",
-    "                                            collate_fn = collator.collate)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "ty3NbheMlPYs"
-   },
-   "outputs": [],
-   "source": [
-    "class CNN(nn.Module):\n",
-    "    def __init__(self, input_dim, emb_dim, n_filters, filter_sizes, output_dim, dropout, pad_idx):\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.embedding = nn.Embedding(input_dim, emb_dim, padding_idx = pad_idx)\n",
-    "        self.convs = nn.ModuleList([nn.Conv1d(in_channels = emb_dim,\n",
-    "                                              out_channels = n_filters,\n",
-    "                                              kernel_size = filter_size)\n",
-    "                                    for filter_size in filter_sizes])\n",
-    "        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)\n",
-    "        self.dropout = nn.Dropout(dropout)\n",
-    "\n",
-    "    def forward(self, text):\n",
-    "\n",
-    "        # text = [batch size, seq len]\n",
-    "                \n",
-    "        embedded = self.dropout(self.embedding(text))\n",
-    "\n",
-    "        # embedded = [batch size, seq len, emb dim]\n",
-    "\n",
-    "        embedded = embedded.permute(0, 2, 1)\n",
-    "\n",
-    "        # embedded = [batch size, emb dim, seq len]\n",
-    "\n",
-    "        conved = [F.relu(conv(embedded)) for conv in self.convs]\n",
-    "\n",
-    "        # conved[n] = [batch size, n filters, seq len - filter_sizes[n] + 1]\n",
-    "\n",
-    "        pooled = [F.max_pool1d(conv, conv.shape[-1]).squeeze(-1) for conv in conved]\n",
-    "\n",
-    "        # pooled[n] = [batch size, n filters]\n",
-    "\n",
-    "        cat = torch.cat(pooled, dim = -1)\n",
-    "\n",
-    "        # cat = [batch size, n filters * len(filter_sizes)]\n",
-    "\n",
-    "        prediction = self.fc(self.dropout(cat))\n",
-    "\n",
-    "        # prediction = [batch size, output dim]\n",
-    "\n",
-    "        return prediction"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "trg6yTjBqOLZ"
-   },
-   "outputs": [],
-   "source": [
-    "input_dim = len(vocab)\n",
-    "emb_dim = 100\n",
-    "n_filters = 100\n",
-    "filter_sizes = [3, 4, 5]\n",
-    "output_dim = 2\n",
-    "dropout = 0.5\n",
-    "pad_idx = pad_idx\n",
-    "\n",
-    "model = CNN(input_dim, emb_dim, n_filters, filter_sizes, output_dim, dropout, pad_idx)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "9dgdCRsqqQoD"
-   },
-   "outputs": [],
-   "source": [
-    "def count_parameters(model):\n",
-    "    return sum(p.numel() for p in model.parameters() if p.requires_grad)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "bfiGzjvnqV-s",
-    "outputId": "fffbb2a6-0a0a-432f-f182-7697a6903c75"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The model has 2,621,102 trainable parameters\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'The model has {count_parameters(model):,} trainable parameters')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "name: embedding.weight, shape: torch.Size([25002, 100])\n",
-      "name: convs.0.weight, shape: torch.Size([100, 100, 3])\n",
-      "name: convs.0.bias, shape: torch.Size([100])\n",
-      "name: convs.1.weight, shape: torch.Size([100, 100, 4])\n",
-      "name: convs.1.bias, shape: torch.Size([100])\n",
-      "name: convs.2.weight, shape: torch.Size([100, 100, 5])\n",
-      "name: convs.2.bias, shape: torch.Size([100])\n",
-      "name: fc.weight, shape: torch.Size([2, 300])\n",
-      "name: fc.bias, shape: torch.Size([2])\n"
-     ]
-    }
-   ],
-   "source": [
-    "for n, p in model.named_parameters():\n",
-    "    print(f'name: {n}, shape: {p.shape}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def initialize_parameters(m):\n",
-    "    if isinstance(m, nn.Embedding):\n",
-    "        nn.init.uniform_(m.weight, -0.05, 0.05)\n",
-    "    elif isinstance(m, nn.Conv1d):\n",
-    "        nn.init.xavier_uniform_(m.weight)\n",
-    "        nn.init.zeros_(m.bias)\n",
-    "    elif isinstance(m, nn.Linear):\n",
-    "        nn.init.xavier_uniform_(m.weight)\n",
-    "        nn.init.zeros_(m.bias)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "CNN(\n",
-       "  (embedding): Embedding(25002, 100, padding_idx=1)\n",
-       "  (convs): ModuleList(\n",
-       "    (0): Conv1d(100, 100, kernel_size=(3,), stride=(1,))\n",
-       "    (1): Conv1d(100, 100, kernel_size=(4,), stride=(1,))\n",
-       "    (2): Conv1d(100, 100, kernel_size=(5,), stride=(1,))\n",
-       "  )\n",
-       "  (fc): Linear(in_features=300, out_features=2, bias=True)\n",
-       "  (dropout): Dropout(p=0.5, inplace=False)\n",
-       ")"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.apply(initialize_parameters)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Sah17A41qW5d"
-   },
-   "outputs": [],
-   "source": [
-    "glove = torchtext.experimental.vectors.GloVe(name = '6B',\n",
-    "                                             dim = emb_dim)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "S1Dfcn2Nqabo"
-   },
-   "outputs": [],
-   "source": [
-    "def get_pretrained_embedding(initial_embedding, pretrained_vectors, vocab, unk_token):\n",
-    "    \n",
-    "    pretrained_embedding = torch.FloatTensor(initial_embedding.weight.clone()).detach()    \n",
-    "    pretrained_vocab = pretrained_vectors.vectors.get_stoi()\n",
-    "    \n",
-    "    unk_tokens = []\n",
-    "    \n",
-    "    for idx, token in enumerate(vocab.itos):\n",
-    "        if token in pretrained_vocab:\n",
-    "            pretrained_vector = pretrained_vectors[token]\n",
-    "            pretrained_embedding[idx] = pretrained_vector\n",
-    "        else:\n",
-    "            unk_tokens.append(token)\n",
-    "        \n",
-    "    return pretrained_embedding, unk_tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "sGyV94f7qvdr"
-   },
-   "outputs": [],
-   "source": [
-    "unk_token = '<unk>'\n",
-    "\n",
-    "pretrained_embedding, unk_tokens = get_pretrained_embedding(model.embedding, glove, vocab, unk_token)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 139
-    },
-    "colab_type": "code",
-    "id": "KYnGxbVisUsk",
-    "outputId": "39d1354c-9a3a-4a6e-bf4a-8595d7f4eac9"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[-0.0220, -0.0288, -0.0422,  ...,  0.0103,  0.0218, -0.0141],\n",
-       "        [ 0.0326,  0.0222,  0.0044,  ...,  0.0249,  0.0163,  0.0052],\n",
-       "        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],\n",
-       "        ...,\n",
-       "        [-0.2925,  0.1087,  0.7920,  ..., -0.3641,  0.1822, -0.4104],\n",
-       "        [-0.7250,  0.7545,  0.1637,  ..., -0.0144, -0.1761,  0.3418],\n",
-       "        [ 1.1753,  0.0460, -0.3542,  ...,  0.4510,  0.0485, -0.4015]])"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.embedding.weight.data.copy_(pretrained_embedding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model.embedding.weight.data[pad_idx] = torch.zeros(emb_dim)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "DTwNU41WseMS"
-   },
-   "outputs": [],
-   "source": [
-    "optimizer = optim.Adam(model.parameters())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Rxlx7a72s1ze"
-   },
-   "outputs": [],
-   "source": [
-    "criterion = nn.CrossEntropyLoss()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "1CLimBxus2yX"
-   },
-   "outputs": [],
-   "source": [
-    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "108fm55ftBgO"
-   },
-   "outputs": [],
-   "source": [
-    "model = model.to(device)\n",
-    "criterion = criterion.to(device)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "IYCxbvXUvE5v"
-   },
-   "outputs": [],
-   "source": [
-    "def calculate_accuracy(predictions, labels):\n",
-    "    top_predictions = predictions.argmax(1, keepdim = True)\n",
-    "    correct = top_predictions.eq(labels.view_as(top_predictions)).sum()\n",
-    "    accuracy = correct.float() / labels.shape[0]\n",
-    "    return accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Ik2JQo6TvGml"
-   },
-   "outputs": [],
-   "source": [
-    "def train(model, iterator, optimizer, criterion, device):\n",
-    "    \n",
-    "    epoch_loss = 0\n",
-    "    epoch_acc = 0\n",
-    "    \n",
-    "    model.train()\n",
-    "    \n",
-    "    for labels, text in iterator:\n",
-    "        \n",
-    "        labels = labels.to(device)\n",
-    "        text = text.to(device)\n",
-    "\n",
-    "        optimizer.zero_grad()\n",
-    "        \n",
-    "        predictions = model(text)\n",
-    "        \n",
-    "        loss = criterion(predictions, labels)\n",
-    "        \n",
-    "        acc = calculate_accuracy(predictions, labels)\n",
-    "        \n",
-    "        loss.backward()\n",
-    "        \n",
-    "        optimizer.step()\n",
-    "        \n",
-    "        epoch_loss += loss.item()\n",
-    "        epoch_acc += acc.item()\n",
-    "\n",
-    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "aGy1Zk6jvIf8"
-   },
-   "outputs": [],
-   "source": [
-    "def evaluate(model, iterator, criterion, device):\n",
-    "    \n",
-    "    epoch_loss = 0\n",
-    "    epoch_acc = 0\n",
-    "    \n",
-    "    model.eval()\n",
-    "    \n",
-    "    with torch.no_grad():\n",
-    "    \n",
-    "        for labels, text in iterator:\n",
-    "\n",
-    "            labels = labels.to(device)\n",
-    "            text = text.to(device)\n",
-    "            \n",
-    "            predictions = model(text)\n",
-    "            \n",
-    "            loss = criterion(predictions, labels)\n",
-    "            \n",
-    "            acc = calculate_accuracy(predictions, labels)\n",
-    "\n",
-    "            epoch_loss += loss.item()\n",
-    "            epoch_acc += acc.item()\n",
-    "        \n",
-    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "9MyMRRzbvKPx"
-   },
-   "outputs": [],
-   "source": [
-    "def epoch_time(start_time, end_time):\n",
-    "    elapsed_time = end_time - start_time\n",
-    "    elapsed_mins = int(elapsed_time / 60)\n",
-    "    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))\n",
-    "    return elapsed_mins, elapsed_secs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 537
-    },
-    "colab_type": "code",
-    "id": "dRKwD51WvMa3",
-    "outputId": "935b7d4b-c396-42d8-8041-802ec9575cd6"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch: 01 | Epoch Time: 0m 9s\n",
-      "\tTrain Loss: 1.370 | Train Acc: 53.26%\n",
-      "\t Val. Loss: 0.588 |  Val. Acc: 69.31%\n",
-      "Epoch: 02 | Epoch Time: 0m 9s\n",
-      "\tTrain Loss: 0.796 | Train Acc: 60.77%\n",
-      "\t Val. Loss: 0.562 |  Val. Acc: 73.82%\n",
-      "Epoch: 03 | Epoch Time: 0m 9s\n",
-      "\tTrain Loss: 0.620 | Train Acc: 67.86%\n",
-      "\t Val. Loss: 0.523 |  Val. Acc: 78.67%\n",
-      "Epoch: 04 | Epoch Time: 0m 9s\n",
-      "\tTrain Loss: 0.523 | Train Acc: 74.40%\n",
-      "\t Val. Loss: 0.459 |  Val. Acc: 81.48%\n",
-      "Epoch: 05 | Epoch Time: 0m 9s\n",
-      "\tTrain Loss: 0.459 | Train Acc: 78.51%\n",
-      "\t Val. Loss: 0.416 |  Val. Acc: 83.35%\n",
-      "Epoch: 06 | Epoch Time: 0m 9s\n",
-      "\tTrain Loss: 0.412 | Train Acc: 81.52%\n",
-      "\t Val. Loss: 0.381 |  Val. Acc: 84.52%\n",
-      "Epoch: 07 | Epoch Time: 0m 9s\n",
-      "\tTrain Loss: 0.374 | Train Acc: 83.71%\n",
-      "\t Val. Loss: 0.369 |  Val. Acc: 84.95%\n",
-      "Epoch: 08 | Epoch Time: 0m 9s\n",
-      "\tTrain Loss: 0.356 | Train Acc: 84.29%\n",
-      "\t Val. Loss: 0.356 |  Val. Acc: 85.49%\n",
-      "Epoch: 09 | Epoch Time: 0m 9s\n",
-      "\tTrain Loss: 0.339 | Train Acc: 85.20%\n",
-      "\t Val. Loss: 0.344 |  Val. Acc: 85.92%\n",
-      "Epoch: 10 | Epoch Time: 0m 9s\n",
-      "\tTrain Loss: 0.318 | Train Acc: 86.43%\n",
-      "\t Val. Loss: 0.334 |  Val. Acc: 86.28%\n"
-     ]
-    }
-   ],
-   "source": [
-    "n_epochs = 10\n",
-    "\n",
-    "best_valid_loss = float('inf')\n",
-    "\n",
-    "for epoch in range(n_epochs):\n",
-    "\n",
-    "    start_time = time.monotonic()\n",
-    "    \n",
-    "    train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device)\n",
-    "    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, device)\n",
-    "    \n",
-    "    end_time = time.monotonic()\n",
-    "\n",
-    "    epoch_mins, epoch_secs = epoch_time(start_time, end_time)\n",
-    "    \n",
-    "    if valid_loss < best_valid_loss:\n",
-    "        best_valid_loss = valid_loss\n",
-    "        torch.save(model.state_dict(), 'cnn-model.pt')\n",
-    "    \n",
-    "    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')\n",
-    "    print(f'\\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')\n",
-    "    print(f'\\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "hKOg4oARvPHJ",
-    "outputId": "b5552b10-fcca-4c29-8d4b-4f5688ef53dd"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Test Loss: 0.338 | Test Acc: 85.99%\n"
-     ]
-    }
-   ],
-   "source": [
-    "model.load_state_dict(torch.load('cnn-model.pt'))\n",
-    "\n",
-    "test_loss, test_acc = evaluate(model, test_iterator, criterion, device)\n",
-    "\n",
-    "print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "tQ4Jsf_vvWgB"
-   },
-   "outputs": [],
-   "source": [
-    "def predict_sentiment(tokenizer, vocab, model, device, sentence):\n",
-    "    model.eval()\n",
-    "    tokens = tokenizer.tokenize(sentence)\n",
-    "    indexes = [vocab.stoi[token] for token in tokens]\n",
-    "    tensor = torch.LongTensor(indexes).unsqueeze(0).to(device)\n",
-    "    prediction = model(tensor)\n",
-    "    probabilities = nn.functional.softmax(prediction, dim = -1)\n",
-    "    pos_probability = probabilities.squeeze()[-1].item()\n",
-    "    return pos_probability"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "Yy7_6rhovZTE",
-    "outputId": "4297c903-8ef3-4c94-8a9e-21fbb98a6be9"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.08827298134565353"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = 'the absolute worst movie of all time.'\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "L3LmQxrgvau9",
-    "outputId": "afee78c4-6c74-4900-dd3b-53ad1c1b7b26"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.6329940557479858"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = 'one of the greatest films i have ever seen in my life.'\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "t7Qoy21Bvb7v",
-    "outputId": "d85a8a1b-b4dc-4aea-e58e-2597087b46c2"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.060872383415699005"
-      ]
-     },
-     "execution_count": 40,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = \"i thought it was going to be one of the greatest films i have ever seen in my life, \\\n",
-    "but it was actually the absolute worst movie of all time.\"\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "EPGXBr18vdQT",
-    "outputId": "1b28c7d1-9e12-462f-d9ac-2b4876b3b6b4"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.07820437103509903"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = \"i thought it was going to be the absolute worst movie of all time, \\\n",
-    "but it was actually one of the greatest films i have ever seen in my life.\"\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "machine_shape": "hm",
-   "name": "scratchpad",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
diff --git a/experimental/5_transformer.ipynb b/experimental/5_transformer.ipynb
deleted file mode 100644
index 85b635a..0000000
--- a/experimental/5_transformer.ipynb
+++ /dev/null
@@ -1,1601 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 228
-    },
-    "colab_type": "code",
-    "id": "lIYdn1woOS1n",
-    "outputId": "0e6cb755-1903-417a-8d37-a33ccb613ed6"
-   },
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
-    "import torch.optim as optim\n",
-    "\n",
-    "import torchtext\n",
-    "import torchtext.experimental\n",
-    "import torchtext.experimental.vectors\n",
-    "from torchtext.experimental.datasets.raw.text_classification import RawTextIterableDataset\n",
-    "from torchtext.experimental.datasets.text_classification import TextClassificationDataset\n",
-    "from torchtext.experimental.functional import sequential_transforms, vocab_func, totensor\n",
-    "\n",
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "import collections\n",
-    "import copy\n",
-    "import random\n",
-    "import time"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "7w0Fqtf5bosU"
-   },
-   "outputs": [],
-   "source": [
-    "seed = 1234\n",
-    "\n",
-    "torch.manual_seed(seed)\n",
-    "random.seed(seed)\n",
-    "torch.backends.cudnn.deterministic = True\n",
-    "torch.backends.cudnn.benchmark = False"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "COPd0_YIbp6G"
-   },
-   "outputs": [],
-   "source": [
-    "raw_train_data, raw_test_data = torchtext.experimental.datasets.raw.IMDB()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "raw_train_data = list(raw_train_data)\n",
-    "raw_test_data = list(raw_test_data)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "sqImRLskbrAd"
-   },
-   "outputs": [],
-   "source": [
-    "def get_train_valid_split(raw_train_data, split_ratio = 0.7):\n",
-    "        \n",
-    "    random.shuffle(raw_train_data)\n",
-    "        \n",
-    "    n_train_examples = int(len(raw_train_data) * split_ratio)\n",
-    "        \n",
-    "    train_data = raw_train_data[:n_train_examples]\n",
-    "    valid_data = raw_train_data[n_train_examples:]\n",
-    "    \n",
-    "    return train_data, valid_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "YgKzkSjibsCh"
-   },
-   "outputs": [],
-   "source": [
-    "raw_train_data, raw_valid_data = get_train_valid_split(raw_train_data)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Yh1olQ-EbtLT"
-   },
-   "outputs": [],
-   "source": [
-    "class Tokenizer:\n",
-    "    def __init__(self, tokenize_fn = 'basic_english', lower = True, max_length = None, sos_token = None):\n",
-    "        \n",
-    "        self.tokenize_fn = torchtext.data.utils.get_tokenizer(tokenize_fn)\n",
-    "        self.lower = lower\n",
-    "        self.max_length = max_length\n",
-    "        self.sos_token = sos_token\n",
-    "        \n",
-    "    def tokenize(self, s):\n",
-    "        \n",
-    "        tokens = self.tokenize_fn(s)\n",
-    "        \n",
-    "        if self.lower:\n",
-    "            tokens = [token.lower() for token in tokens]\n",
-    "            \n",
-    "        if self.sos_token is not None:\n",
-    "            tokens = [self.sos_token] + tokens\n",
-    "\n",
-    "        if self.max_length is not None:\n",
-    "            tokens = tokens[:self.max_length]\n",
-    "            \n",
-    "        return tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "hEKZjV-PbuMk"
-   },
-   "outputs": [],
-   "source": [
-    "max_length = 250\n",
-    "sos_token = '<sos>'\n",
-    "\n",
-    "tokenizer = Tokenizer(max_length = max_length, sos_token = sos_token)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "YMpskDMRoBOT",
-    "outputId": "68995c90-d409-432a-9ed3-4614244b23bc"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['<sos>', 'hello', 'world', ',', 'how', 'are', 'you', '?']"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "s = \"hello world, how are you?\"\n",
-    "\n",
-    "tokenizer.tokenize(s)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "DQx6B6JQbvSU"
-   },
-   "outputs": [],
-   "source": [
-    "def build_vocab_from_data(raw_data, tokenizer, **vocab_kwargs):\n",
-    "        \n",
-    "    token_freqs = collections.Counter()\n",
-    "    \n",
-    "    for label, text in raw_data:\n",
-    "        tokens = tokenizer.tokenize(text)\n",
-    "        token_freqs.update(tokens)\n",
-    "                \n",
-    "    vocab = torchtext.vocab.Vocab(token_freqs, **vocab_kwargs)\n",
-    "    \n",
-    "    return vocab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "unWu0DuZbwVB"
-   },
-   "outputs": [],
-   "source": [
-    "max_size = 25_000\n",
-    "\n",
-    "vocab = build_vocab_from_data(raw_train_data, \n",
-    "                              tokenizer, \n",
-    "                              max_size = max_size)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Unique tokens in vocab: 25,002\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'Unique tokens in vocab: {len(vocab):,}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "zOi2KdeebxZP"
-   },
-   "outputs": [],
-   "source": [
-    "def raw_data_to_dataset(raw_data, tokenizer, vocab):\n",
-    "        \n",
-    "    text_transform = sequential_transforms(tokenizer.tokenize,\n",
-    "                                           vocab_func(vocab),\n",
-    "                                           totensor(dtype=torch.long))\n",
-    "    \n",
-    "    label_transform = sequential_transforms(lambda x: 1 if x == 'pos' else 0, \n",
-    "                                            totensor(dtype=torch.long))\n",
-    "\n",
-    "    transforms = (label_transform, text_transform)\n",
-    "\n",
-    "    dataset = TextClassificationDataset(raw_data,\n",
-    "                                        vocab,\n",
-    "                                        transforms)\n",
-    "    \n",
-    "    return dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "zSodj_hBbzVk"
-   },
-   "outputs": [],
-   "source": [
-    "train_data = raw_data_to_dataset(raw_train_data, tokenizer, vocab)\n",
-    "valid_data = raw_data_to_dataset(raw_valid_data, tokenizer, vocab)\n",
-    "test_data = raw_data_to_dataset(raw_test_data, tokenizer, vocab)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of training examples: 17,500\n",
-      "Number of validation examples: 7,500\n",
-      "Number of testing examples: 25,000\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'Number of training examples: {len(train_data):,}')\n",
-    "print(f'Number of validation examples: {len(valid_data):,}')\n",
-    "print(f'Number of testing examples: {len(test_data):,}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "c_KIBjr4b2ks"
-   },
-   "outputs": [],
-   "source": [
-    "class Collator:\n",
-    "    def __init__(self, pad_idx, batch_first):\n",
-    "        \n",
-    "        self.pad_idx = pad_idx\n",
-    "        self.batch_first = batch_first\n",
-    "        \n",
-    "    def collate(self, batch):\n",
-    "        \n",
-    "        labels, text = zip(*batch)\n",
-    "        \n",
-    "        labels = torch.LongTensor(labels)\n",
-    "\n",
-    "        text = nn.utils.rnn.pad_sequence(text, \n",
-    "                                         padding_value = self.pad_idx,\n",
-    "                                         batch_first = self.batch_first)\n",
-    "        \n",
-    "        return labels, text"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "i6j7fcFYb354"
-   },
-   "outputs": [],
-   "source": [
-    "pad_token = '<pad>'\n",
-    "pad_idx = vocab[pad_token]\n",
-    "batch_first = False\n",
-    "\n",
-    "collator = Collator(pad_idx, batch_first)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "FCyc9S85b5jd"
-   },
-   "outputs": [],
-   "source": [
-    "batch_size = 256\n",
-    "\n",
-    "train_iterator = torch.utils.data.DataLoader(train_data, \n",
-    "                                             batch_size, \n",
-    "                                             shuffle = True, \n",
-    "                                             collate_fn = collator.collate)\n",
-    "\n",
-    "valid_iterator = torch.utils.data.DataLoader(valid_data, \n",
-    "                                             batch_size, \n",
-    "                                             shuffle = False, \n",
-    "                                             collate_fn = collator.collate)\n",
-    "\n",
-    "test_iterator = torch.utils.data.DataLoader(test_data, \n",
-    "                                            batch_size, \n",
-    "                                            shuffle = False, \n",
-    "                                            collate_fn = collator.collate)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "ajddJOkZb7xe"
-   },
-   "outputs": [],
-   "source": [
-    "class DefaultTransformer(nn.Module):\n",
-    "    def __init__(self, input_dim, emb_dim, n_heads, hid_dim, n_layers, output_dim, dropout, max_length, pad_idx):\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.tok_embedding = nn.Embedding(input_dim, emb_dim, padding_idx = pad_idx)\n",
-    "        self.pos_embedding = nn.Embedding(max_length, emb_dim)\n",
-    "        self.layer_norm = nn.LayerNorm(emb_dim)\n",
-    "        transformer_layer = nn.TransformerEncoderLayer(emb_dim, n_heads, hid_dim, activation = 'gelu')\n",
-    "        norm = nn.LayerNorm(emb_dim)\n",
-    "        self.transformer = nn.TransformerEncoder(transformer_layer, n_layers, norm)\n",
-    "        self.fc = nn.Linear(emb_dim, output_dim)\n",
-    "        self.dropout = nn.Dropout(dropout)\n",
-    "\n",
-    "    def forward(self, text):\n",
-    "\n",
-    "        # text = [seq len, batch size]\n",
-    "\n",
-    "        seq_len, batch_size = text.shape\n",
-    "        \n",
-    "        pos = torch.arange(0, seq_len).unsqueeze(-1).repeat(1, batch_size).to(text.device)\n",
-    "        \n",
-    "        # pos = [seq len, batch size]\n",
-    "        \n",
-    "        embedded_pos = self.pos_embedding(pos)\n",
-    "        embedded_tok = self.tok_embedding(text)\n",
-    "        \n",
-    "        embedded = self.dropout(embedded_pos + embedded_tok)\n",
-    "\n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "\n",
-    "        emedded = self.layer_norm(embedded)\n",
-    "        \n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "        \n",
-    "        transformed = self.transformer(embedded)\n",
-    "\n",
-    "        # transformed = [seq len, batch size, emb dim]\n",
-    "\n",
-    "        sos_transformed = transformed[0]\n",
-    "\n",
-    "        # sos_transformed = [batch size, emb dim]\n",
-    "\n",
-    "        prediction = self.fc(self.dropout(sos_transformed))\n",
-    "\n",
-    "        # prediction = [batch size, output dim]\n",
-    "\n",
-    "        return prediction"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class TransformerEncoder(nn.Module):\n",
-    "    def __init__(self, encoder_layer, n_layers, norm = None):\n",
-    "        super().__init__()\n",
-    "        \n",
-    "        self.encoder_layers = nn.ModuleList([copy.deepcopy(encoder_layer) \n",
-    "                                             for _ in range(n_layers)])\n",
-    "        self.norm = norm\n",
-    "        \n",
-    "    def forward(self, embedded):\n",
-    "        \n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "        \n",
-    "        for layer in self.encoder_layers:\n",
-    "            embedded, attention = layer(embedded)\n",
-    "            \n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "            \n",
-    "        if self.norm is not None:\n",
-    "            embedded = self.norm(embedded)\n",
-    "        \n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "        \n",
-    "        return embedded, attention"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class TransformerEncoderLayer(nn.Module):\n",
-    "    def __init__(self, emb_dim, ff_layer, attention_layer, dropout):\n",
-    "        super().__init__()\n",
-    "        \n",
-    "        self.attention_layer = attention_layer\n",
-    "        self.ff_layer = ff_layer\n",
-    "        self.layer_norm_1 = nn.LayerNorm(emb_dim)\n",
-    "        self.layer_norm_2 = nn.LayerNorm(emb_dim)\n",
-    "        self.dropout = nn.Dropout(dropout)\n",
-    "        \n",
-    "    def forward(self, embedded):\n",
-    "        \n",
-    "        _embedded, attention = self.attention_layer(embedded, embedded, embedded)\n",
-    "        embedded = embedded + self.dropout(_embedded)\n",
-    "        embedded = self.layer_norm_1(embedded)\n",
-    "        _embedded = self.ff_layer(embedded)\n",
-    "        embedded = embedded + self.dropout(_embedded)\n",
-    "        embedded = self.layer_norm_2(embedded)\n",
-    "        \n",
-    "        return embedded, attention"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class MultiHeadAttentionLayer(nn.Module):\n",
-    "    def __init__(self, emb_dim, n_heads, dropout):\n",
-    "        super().__init__()\n",
-    "        \n",
-    "        assert emb_dim % n_heads == 0\n",
-    "        \n",
-    "        self.n_heads = n_heads\n",
-    "        self.head_dim = emb_dim // n_heads\n",
-    "        \n",
-    "        self.fc_q = nn.Linear(emb_dim, emb_dim)\n",
-    "        self.fc_k = nn.Linear(emb_dim, emb_dim)\n",
-    "        self.fc_v = nn.Linear(emb_dim, emb_dim)\n",
-    "        self.fc_o = nn.Linear(emb_dim, emb_dim)\n",
-    "        self.dropout = nn.Dropout(dropout)\n",
-    "        \n",
-    "        self.scale = self.head_dim ** -0.5\n",
-    "        \n",
-    "    def forward(self, query, key, value):\n",
-    "        \n",
-    "        # query/key/value = [seq len, batch size, emb dim]\n",
-    "        \n",
-    "        seq_len, batch_size, emb_dim = query.shape\n",
-    "        \n",
-    "        Q = self.fc_q(query)\n",
-    "        K = self.fc_k(key)\n",
-    "        V = self.fc_v(value)\n",
-    "        \n",
-    "        # Q/K/V = [seq len, batch size, emb dim]\n",
-    "        \n",
-    "        Q = Q.view(seq_len, batch_size, self.n_heads, self.head_dim)\n",
-    "        K = K.view(seq_len, batch_size, self.n_heads, self.head_dim)\n",
-    "        V = V.view(seq_len, batch_size, self.n_heads, self.head_dim)\n",
-    "        \n",
-    "        # Q/K/V = [seq len, batch size, n heads, head dim]\n",
-    "        \n",
-    "        Q = Q.permute(1, 2, 0, 3)\n",
-    "        K = K.permute(1, 2, 0, 3)\n",
-    "        V = V.permute(1, 2, 0, 3)\n",
-    "        \n",
-    "        # Q/K/V = [batch size, n heads, seq len, head dim]\n",
-    "        \n",
-    "        energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) * self.scale\n",
-    "        \n",
-    "        # energy = [batch size, n heads, seq len, seq len]\n",
-    "        \n",
-    "        attention = torch.softmax(energy, dim = -1)\n",
-    "        \n",
-    "        # attention = [batch size, n heads, seq len, seq len]\n",
-    "        \n",
-    "        x = torch.matmul(self.dropout(attention), V)\n",
-    "        \n",
-    "        # x = [batch size, n heads, seq len, head dim]\n",
-    "        \n",
-    "        x = x.permute(2, 0, 1, 3).contiguous()\n",
-    "        \n",
-    "        # x = [seq len, batch size, n heads, head dim]\n",
-    "        \n",
-    "        x = x.view(seq_len, batch_size, emb_dim)\n",
-    "        \n",
-    "        # x = [seq len, batch size, emb dim]\n",
-    "        \n",
-    "        x = self.fc_o(x)\n",
-    "        \n",
-    "        # x = [seq len, batch size, emb dim]\n",
-    "        \n",
-    "        return x, attention"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class FeedForwardLayer(nn.Module):\n",
-    "    def __init__(self, emb_dim, hid_dim, activation, dropout):\n",
-    "        super().__init__()\n",
-    "        \n",
-    "        self.fc_1 = nn.Linear(emb_dim, hid_dim)\n",
-    "        self.fc_2 = nn.Linear(hid_dim, emb_dim)\n",
-    "        self.activation = activation\n",
-    "        self.dropout = nn.Dropout(dropout)\n",
-    "        \n",
-    "    def forward(self, embedded):\n",
-    "        \n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "        \n",
-    "        embedded = self.dropout(self.activation(self.fc_1(embedded)))\n",
-    "        \n",
-    "        # embedded = [seq len, batch size, hid dim]\n",
-    "        \n",
-    "        embedded = self.fc_2(embedded)\n",
-    "        \n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "        \n",
-    "        return embedded"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class Transformer(nn.Module):\n",
-    "    def __init__(self, input_dim, emb_dim, encoder, output_dim, dropout, max_length, pad_idx):\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.tok_embedding = nn.Embedding(input_dim, emb_dim, padding_idx = pad_idx)\n",
-    "        self.pos_embedding = nn.Embedding(max_length, emb_dim)\n",
-    "        self.layer_norm = nn.LayerNorm(emb_dim)\n",
-    "        self.encoder = encoder\n",
-    "        self.fc = nn.Linear(emb_dim, output_dim)\n",
-    "        self.dropout = nn.Dropout(dropout)\n",
-    "\n",
-    "    def forward(self, text):\n",
-    "\n",
-    "        # text = [seq len, batch size]\n",
-    "\n",
-    "        seq_len, batch_size = text.shape\n",
-    "        \n",
-    "        pos = torch.arange(0, seq_len).unsqueeze(-1).repeat(1, batch_size).to(text.device)\n",
-    "        \n",
-    "        # pos = [seq len, batch size]\n",
-    "        \n",
-    "        embedded_pos = self.pos_embedding(pos)\n",
-    "        embedded_tok = self.tok_embedding(text)\n",
-    "        \n",
-    "        embedded = self.dropout(embedded_pos + embedded_tok)\n",
-    "\n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "\n",
-    "        emedded = self.layer_norm(embedded)\n",
-    "        \n",
-    "        # embedded = [seq len, batch size, emb dim]\n",
-    "        \n",
-    "        encoded, attention = self.encoder(embedded)\n",
-    "\n",
-    "        # encoded = [seq len, batch size, emb dim]\n",
-    "\n",
-    "        sos_encoded = encoded[0]\n",
-    "\n",
-    "        # sos_encoded = [batch size, emb dim]\n",
-    "\n",
-    "        prediction = self.fc(self.dropout(sos_encoded))\n",
-    "\n",
-    "        # prediction = [batch size, output dim]\n",
-    "\n",
-    "        return prediction, attention"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "input_dim = len(vocab)\n",
-    "emb_dim = 100\n",
-    "n_heads = 10\n",
-    "hid_dim = 1024\n",
-    "n_layers = 3\n",
-    "output_dim = 2\n",
-    "dropout = 0.1\n",
-    "pad_idx = pad_idx\n",
-    "ff_layer_activation = F.gelu\n",
-    "encoder_norm = nn.LayerNorm(emb_dim)\n",
-    "\n",
-    "ff_layer = FeedForwardLayer(emb_dim, hid_dim, ff_layer_activation, dropout)\n",
-    "attention_layer = MultiHeadAttentionLayer(emb_dim, n_heads, dropout)\n",
-    "transformer_layer = TransformerEncoderLayer(emb_dim, ff_layer, attention_layer, dropout)\n",
-    "transformer_encoder = TransformerEncoder(transformer_layer, n_layers, encoder_norm)\n",
-    "model = Transformer(input_dim, emb_dim, transformer_encoder, output_dim, dropout, max_length, pad_idx)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Kubq6Vkjfec_"
-   },
-   "outputs": [],
-   "source": [
-    "def count_parameters(model):\n",
-    "    return sum(p.numel() for p in model.parameters() if p.requires_grad)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "TPaxqpJMf0-6",
-    "outputId": "4db6b90c-b273-4c94-c12d-4f68be8e1477"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The model has 3,265,974 trainable parameters\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'The model has {count_parameters(model):,} trainable parameters')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "name: tok_embedding.weight, shape: torch.Size([25002, 100])\n",
-      "name: pos_embedding.weight, shape: torch.Size([250, 100])\n",
-      "name: layer_norm.weight, shape: torch.Size([100])\n",
-      "name: layer_norm.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.0.attention_layer.fc_q.weight, shape: torch.Size([100, 100])\n",
-      "name: encoder.encoder_layers.0.attention_layer.fc_q.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.0.attention_layer.fc_k.weight, shape: torch.Size([100, 100])\n",
-      "name: encoder.encoder_layers.0.attention_layer.fc_k.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.0.attention_layer.fc_v.weight, shape: torch.Size([100, 100])\n",
-      "name: encoder.encoder_layers.0.attention_layer.fc_v.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.0.attention_layer.fc_o.weight, shape: torch.Size([100, 100])\n",
-      "name: encoder.encoder_layers.0.attention_layer.fc_o.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.0.ff_layer.fc_1.weight, shape: torch.Size([1024, 100])\n",
-      "name: encoder.encoder_layers.0.ff_layer.fc_1.bias, shape: torch.Size([1024])\n",
-      "name: encoder.encoder_layers.0.ff_layer.fc_2.weight, shape: torch.Size([100, 1024])\n",
-      "name: encoder.encoder_layers.0.ff_layer.fc_2.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.0.layer_norm_1.weight, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.0.layer_norm_1.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.0.layer_norm_2.weight, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.0.layer_norm_2.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.1.attention_layer.fc_q.weight, shape: torch.Size([100, 100])\n",
-      "name: encoder.encoder_layers.1.attention_layer.fc_q.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.1.attention_layer.fc_k.weight, shape: torch.Size([100, 100])\n",
-      "name: encoder.encoder_layers.1.attention_layer.fc_k.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.1.attention_layer.fc_v.weight, shape: torch.Size([100, 100])\n",
-      "name: encoder.encoder_layers.1.attention_layer.fc_v.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.1.attention_layer.fc_o.weight, shape: torch.Size([100, 100])\n",
-      "name: encoder.encoder_layers.1.attention_layer.fc_o.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.1.ff_layer.fc_1.weight, shape: torch.Size([1024, 100])\n",
-      "name: encoder.encoder_layers.1.ff_layer.fc_1.bias, shape: torch.Size([1024])\n",
-      "name: encoder.encoder_layers.1.ff_layer.fc_2.weight, shape: torch.Size([100, 1024])\n",
-      "name: encoder.encoder_layers.1.ff_layer.fc_2.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.1.layer_norm_1.weight, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.1.layer_norm_1.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.1.layer_norm_2.weight, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.1.layer_norm_2.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.2.attention_layer.fc_q.weight, shape: torch.Size([100, 100])\n",
-      "name: encoder.encoder_layers.2.attention_layer.fc_q.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.2.attention_layer.fc_k.weight, shape: torch.Size([100, 100])\n",
-      "name: encoder.encoder_layers.2.attention_layer.fc_k.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.2.attention_layer.fc_v.weight, shape: torch.Size([100, 100])\n",
-      "name: encoder.encoder_layers.2.attention_layer.fc_v.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.2.attention_layer.fc_o.weight, shape: torch.Size([100, 100])\n",
-      "name: encoder.encoder_layers.2.attention_layer.fc_o.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.2.ff_layer.fc_1.weight, shape: torch.Size([1024, 100])\n",
-      "name: encoder.encoder_layers.2.ff_layer.fc_1.bias, shape: torch.Size([1024])\n",
-      "name: encoder.encoder_layers.2.ff_layer.fc_2.weight, shape: torch.Size([100, 1024])\n",
-      "name: encoder.encoder_layers.2.ff_layer.fc_2.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.2.layer_norm_1.weight, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.2.layer_norm_1.bias, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.2.layer_norm_2.weight, shape: torch.Size([100])\n",
-      "name: encoder.encoder_layers.2.layer_norm_2.bias, shape: torch.Size([100])\n",
-      "name: encoder.norm.weight, shape: torch.Size([100])\n",
-      "name: encoder.norm.bias, shape: torch.Size([100])\n",
-      "name: fc.weight, shape: torch.Size([2, 100])\n",
-      "name: fc.bias, shape: torch.Size([2])\n"
-     ]
-    }
-   ],
-   "source": [
-    "for n, p in model.named_parameters():\n",
-    "    print(f'name: {n}, shape: {p.shape}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def initialize_parameters(m):\n",
-    "    if isinstance(m, nn.Embedding):\n",
-    "        nn.init.normal_(m.weight, std = 0.02)\n",
-    "    elif isinstance(m, nn.Linear):\n",
-    "        nn.init.normal_(m.weight, std = 0.02)\n",
-    "        nn.init.zeros_(m.bias)\n",
-    "    elif isinstance(m, nn.LayerNorm):\n",
-    "        nn.init.ones_(m.weight)\n",
-    "        nn.init.zeros_(m.bias)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Transformer(\n",
-       "  (tok_embedding): Embedding(25002, 100, padding_idx=1)\n",
-       "  (pos_embedding): Embedding(250, 100)\n",
-       "  (layer_norm): LayerNorm((100,), eps=1e-05, elementwise_affine=True)\n",
-       "  (encoder): TransformerEncoder(\n",
-       "    (encoder_layers): ModuleList(\n",
-       "      (0): TransformerEncoderLayer(\n",
-       "        (attention_layer): MultiHeadAttentionLayer(\n",
-       "          (fc_q): Linear(in_features=100, out_features=100, bias=True)\n",
-       "          (fc_k): Linear(in_features=100, out_features=100, bias=True)\n",
-       "          (fc_v): Linear(in_features=100, out_features=100, bias=True)\n",
-       "          (fc_o): Linear(in_features=100, out_features=100, bias=True)\n",
-       "          (dropout): Dropout(p=0.1, inplace=False)\n",
-       "        )\n",
-       "        (ff_layer): FeedForwardLayer(\n",
-       "          (fc_1): Linear(in_features=100, out_features=1024, bias=True)\n",
-       "          (fc_2): Linear(in_features=1024, out_features=100, bias=True)\n",
-       "          (dropout): Dropout(p=0.1, inplace=False)\n",
-       "        )\n",
-       "        (layer_norm_1): LayerNorm((100,), eps=1e-05, elementwise_affine=True)\n",
-       "        (layer_norm_2): LayerNorm((100,), eps=1e-05, elementwise_affine=True)\n",
-       "        (dropout): Dropout(p=0.1, inplace=False)\n",
-       "      )\n",
-       "      (1): TransformerEncoderLayer(\n",
-       "        (attention_layer): MultiHeadAttentionLayer(\n",
-       "          (fc_q): Linear(in_features=100, out_features=100, bias=True)\n",
-       "          (fc_k): Linear(in_features=100, out_features=100, bias=True)\n",
-       "          (fc_v): Linear(in_features=100, out_features=100, bias=True)\n",
-       "          (fc_o): Linear(in_features=100, out_features=100, bias=True)\n",
-       "          (dropout): Dropout(p=0.1, inplace=False)\n",
-       "        )\n",
-       "        (ff_layer): FeedForwardLayer(\n",
-       "          (fc_1): Linear(in_features=100, out_features=1024, bias=True)\n",
-       "          (fc_2): Linear(in_features=1024, out_features=100, bias=True)\n",
-       "          (dropout): Dropout(p=0.1, inplace=False)\n",
-       "        )\n",
-       "        (layer_norm_1): LayerNorm((100,), eps=1e-05, elementwise_affine=True)\n",
-       "        (layer_norm_2): LayerNorm((100,), eps=1e-05, elementwise_affine=True)\n",
-       "        (dropout): Dropout(p=0.1, inplace=False)\n",
-       "      )\n",
-       "      (2): TransformerEncoderLayer(\n",
-       "        (attention_layer): MultiHeadAttentionLayer(\n",
-       "          (fc_q): Linear(in_features=100, out_features=100, bias=True)\n",
-       "          (fc_k): Linear(in_features=100, out_features=100, bias=True)\n",
-       "          (fc_v): Linear(in_features=100, out_features=100, bias=True)\n",
-       "          (fc_o): Linear(in_features=100, out_features=100, bias=True)\n",
-       "          (dropout): Dropout(p=0.1, inplace=False)\n",
-       "        )\n",
-       "        (ff_layer): FeedForwardLayer(\n",
-       "          (fc_1): Linear(in_features=100, out_features=1024, bias=True)\n",
-       "          (fc_2): Linear(in_features=1024, out_features=100, bias=True)\n",
-       "          (dropout): Dropout(p=0.1, inplace=False)\n",
-       "        )\n",
-       "        (layer_norm_1): LayerNorm((100,), eps=1e-05, elementwise_affine=True)\n",
-       "        (layer_norm_2): LayerNorm((100,), eps=1e-05, elementwise_affine=True)\n",
-       "        (dropout): Dropout(p=0.1, inplace=False)\n",
-       "      )\n",
-       "    )\n",
-       "    (norm): LayerNorm((100,), eps=1e-05, elementwise_affine=True)\n",
-       "  )\n",
-       "  (fc): Linear(in_features=100, out_features=2, bias=True)\n",
-       "  (dropout): Dropout(p=0.1, inplace=False)\n",
-       ")"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.apply(initialize_parameters)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "o3ZggI59f3KY"
-   },
-   "outputs": [],
-   "source": [
-    "glove = torchtext.experimental.vectors.GloVe(name = '6B',\n",
-    "                                             dim = emb_dim)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "5BQXMqvKf41y"
-   },
-   "outputs": [],
-   "source": [
-    "def get_pretrained_embedding(initial_embedding, pretrained_vectors, vocab, unk_token):\n",
-    "    \n",
-    "    pretrained_embedding = torch.FloatTensor(initial_embedding.weight.clone()).detach()    \n",
-    "    pretrained_vocab = pretrained_vectors.vectors.get_stoi()\n",
-    "    \n",
-    "    unk_tokens = []\n",
-    "    \n",
-    "    for idx, token in enumerate(vocab.itos):\n",
-    "        if token in pretrained_vocab:\n",
-    "            pretrained_vector = pretrained_vectors[token]\n",
-    "            pretrained_embedding[idx] = pretrained_vector\n",
-    "        else:\n",
-    "            unk_tokens.append(token)\n",
-    "        \n",
-    "    return pretrained_embedding, unk_tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "xsG6yriEf8dA"
-   },
-   "outputs": [],
-   "source": [
-    "unk_token = '<unk>'\n",
-    "\n",
-    "pretrained_embedding, unk_tokens = get_pretrained_embedding(model.tok_embedding, glove, vocab, unk_token)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 139
-    },
-    "colab_type": "code",
-    "id": "aZWfRQnPf99e",
-    "outputId": "b9fbce51-9abb-42a2-8e57-88f92f4d100b"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[-0.0017, -0.0139, -0.0213,  ...,  0.0069,  0.0128, -0.0211],\n",
-       "        [ 0.0157, -0.0071, -0.0066,  ..., -0.0251,  0.0035,  0.0269],\n",
-       "        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],\n",
-       "        ...,\n",
-       "        [-0.0288, -0.0316,  0.4083,  ...,  0.6288, -0.5348, -0.8080],\n",
-       "        [-0.0152,  0.0155, -0.0168,  ..., -0.0409,  0.0031,  0.0127],\n",
-       "        [-0.2612,  0.6821, -0.2295,  ..., -0.5306,  0.0863,  0.4852]])"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.tok_embedding.weight.data.copy_(pretrained_embedding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model.tok_embedding.weight.data[pad_idx] = torch.zeros(emb_dim)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "xBmTbzpRf-5x"
-   },
-   "outputs": [],
-   "source": [
-    "optimizer = optim.Adam(model.parameters())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "H-2Pqg5VgABR"
-   },
-   "outputs": [],
-   "source": [
-    "criterion = nn.CrossEntropyLoss()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "tj6Q8hfMgA5q"
-   },
-   "outputs": [],
-   "source": [
-    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "HSK--Y91gB7W"
-   },
-   "outputs": [],
-   "source": [
-    "model = model.to(device)\n",
-    "criterion = criterion.to(device)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "X8m3rLRZgDGZ"
-   },
-   "outputs": [],
-   "source": [
-    "def calculate_accuracy(predictions, labels):\n",
-    "    top_predictions = predictions.argmax(1, keepdim = True)\n",
-    "    correct = top_predictions.eq(labels.view_as(top_predictions)).sum()\n",
-    "    accuracy = correct.float() / labels.shape[0]\n",
-    "    return accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "qONO5ahxgEbN"
-   },
-   "outputs": [],
-   "source": [
-    "def train(model, iterator, optimizer, criterion, device):\n",
-    "    \n",
-    "    epoch_loss = 0\n",
-    "    epoch_acc = 0\n",
-    "    \n",
-    "    model.train()\n",
-    "    \n",
-    "    for labels, text in iterator:\n",
-    "\n",
-    "        labels = labels.to(device)\n",
-    "        text = text.to(device)\n",
-    "\n",
-    "        optimizer.zero_grad()\n",
-    "        \n",
-    "        predictions, attention = model(text)\n",
-    "        \n",
-    "        loss = criterion(predictions, labels)\n",
-    "        \n",
-    "        acc = calculate_accuracy(predictions, labels)\n",
-    "        \n",
-    "        loss.backward()\n",
-    "        \n",
-    "        optimizer.step()\n",
-    "        \n",
-    "        epoch_loss += loss.item()\n",
-    "        epoch_acc += acc.item()\n",
-    "\n",
-    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "6g9SqCLBgFh3"
-   },
-   "outputs": [],
-   "source": [
-    "def evaluate(model, iterator, criterion, device):\n",
-    "    \n",
-    "    epoch_loss = 0\n",
-    "    epoch_acc = 0\n",
-    "    \n",
-    "    model.eval()\n",
-    "    \n",
-    "    with torch.no_grad():\n",
-    "    \n",
-    "        for labels, text in iterator:\n",
-    "\n",
-    "            labels = labels.to(device)\n",
-    "            text = text.to(device)\n",
-    "            \n",
-    "            predictions, attention = model(text)\n",
-    "            \n",
-    "            loss = criterion(predictions, labels)\n",
-    "            \n",
-    "            acc = calculate_accuracy(predictions, labels)\n",
-    "\n",
-    "            epoch_loss += loss.item()\n",
-    "            epoch_acc += acc.item()\n",
-    "        \n",
-    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "6UO5TMjqgGpT"
-   },
-   "outputs": [],
-   "source": [
-    "def epoch_time(start_time, end_time):\n",
-    "    elapsed_time = end_time - start_time\n",
-    "    elapsed_mins = int(elapsed_time / 60)\n",
-    "    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))\n",
-    "    return elapsed_mins, elapsed_secs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 537
-    },
-    "colab_type": "code",
-    "id": "XN6rcPDmgIR5",
-    "outputId": "586ca3f4-340c-4040-92b0-29f5e6d3fb54"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch: 01 | Epoch Time: 0m 25s\n",
-      "\tTrain Loss: 0.604 | Train Acc: 64.21%\n",
-      "\t Val. Loss: 0.457 |  Val. Acc: 78.76%\n",
-      "Epoch: 02 | Epoch Time: 0m 25s\n",
-      "\tTrain Loss: 0.364 | Train Acc: 84.16%\n",
-      "\t Val. Loss: 0.355 |  Val. Acc: 84.73%\n",
-      "Epoch: 03 | Epoch Time: 0m 25s\n",
-      "\tTrain Loss: 0.270 | Train Acc: 89.23%\n",
-      "\t Val. Loss: 0.384 |  Val. Acc: 84.55%\n",
-      "Epoch: 04 | Epoch Time: 0m 25s\n",
-      "\tTrain Loss: 0.206 | Train Acc: 92.15%\n",
-      "\t Val. Loss: 0.355 |  Val. Acc: 86.63%\n",
-      "Epoch: 05 | Epoch Time: 0m 25s\n",
-      "\tTrain Loss: 0.150 | Train Acc: 94.58%\n",
-      "\t Val. Loss: 0.435 |  Val. Acc: 86.43%\n",
-      "Epoch: 06 | Epoch Time: 0m 25s\n",
-      "\tTrain Loss: 0.101 | Train Acc: 96.54%\n",
-      "\t Val. Loss: 0.455 |  Val. Acc: 86.67%\n",
-      "Epoch: 07 | Epoch Time: 0m 25s\n",
-      "\tTrain Loss: 0.084 | Train Acc: 97.17%\n",
-      "\t Val. Loss: 0.505 |  Val. Acc: 84.09%\n",
-      "Epoch: 08 | Epoch Time: 0m 25s\n",
-      "\tTrain Loss: 0.066 | Train Acc: 97.82%\n",
-      "\t Val. Loss: 0.508 |  Val. Acc: 86.05%\n",
-      "Epoch: 09 | Epoch Time: 0m 25s\n",
-      "\tTrain Loss: 0.041 | Train Acc: 98.78%\n",
-      "\t Val. Loss: 0.605 |  Val. Acc: 86.25%\n",
-      "Epoch: 10 | Epoch Time: 0m 25s\n",
-      "\tTrain Loss: 0.035 | Train Acc: 99.01%\n",
-      "\t Val. Loss: 0.681 |  Val. Acc: 85.79%\n"
-     ]
-    }
-   ],
-   "source": [
-    "n_epochs = 10\n",
-    "\n",
-    "best_valid_loss = float('inf')\n",
-    "\n",
-    "for epoch in range(n_epochs):\n",
-    "\n",
-    "    start_time = time.monotonic()\n",
-    "    \n",
-    "    train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device)\n",
-    "    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, device)\n",
-    "    \n",
-    "    end_time = time.monotonic()\n",
-    "\n",
-    "    epoch_mins, epoch_secs = epoch_time(start_time, end_time)\n",
-    "    \n",
-    "    if valid_loss < best_valid_loss:\n",
-    "        best_valid_loss = valid_loss\n",
-    "        torch.save(model.state_dict(), 'transformer-model.pt')\n",
-    "    \n",
-    "    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')\n",
-    "    print(f'\\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')\n",
-    "    print(f'\\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "lZdkI89ggJZj",
-    "outputId": "4a36eb35-d243-4da7-f16b-c72af5787809"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Test Loss: 0.371 | Test Acc: 85.62%\n"
-     ]
-    }
-   ],
-   "source": [
-    "model.load_state_dict(torch.load('transformer-model.pt'))\n",
-    "\n",
-    "test_loss, test_acc = evaluate(model, test_iterator, criterion, device)\n",
-    "\n",
-    "print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "G6xX77_FigbW"
-   },
-   "outputs": [],
-   "source": [
-    "def predict_sentiment(tokenizer, vocab, model, device, sentence):\n",
-    "    model.eval()\n",
-    "    tokens = tokenizer.tokenize(sentence)\n",
-    "    indexes = [vocab.stoi[token] for token in tokens]\n",
-    "    tensor = torch.LongTensor(indexes).unsqueeze(-1).to(device)\n",
-    "    prediction, attention = model(tensor)\n",
-    "    probabilities = nn.functional.softmax(prediction, dim = -1)\n",
-    "    pos_probability = probabilities.squeeze()[-1].item()\n",
-    "    return pos_probability, attention"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def display_attention(tokens, attention):\n",
-    "    \n",
-    "    fig = plt.figure(figsize=(10,20))\n",
-    "    \n",
-    "    attention = attention.cpu().detach().squeeze(0).mean(0)\n",
-    "    \n",
-    "    ax = fig.add_subplot(1,1,1)\n",
-    "    \n",
-    "    ax.matshow(attention, cmap='bone')\n",
-    "    \n",
-    "    ax.set_xticks(range(len(tokens)))\n",
-    "    ax.set_xticklabels(tokens, rotation=45)\n",
-    "    ax.set_yticks(range(len(tokens)))\n",
-    "    ax.set_yticklabels(tokens)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "ALLuVhn7m_OF",
-    "outputId": "62cee726-84fd-4ee0-9d36-8ae54a71a356"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.0068605015985667706\n"
-     ]
-    }
-   ],
-   "source": [
-    "sentence = 'the absolute worst movie of all time.'\n",
-    "\n",
-    "sentiment, attention = predict_sentiment(tokenizer, vocab, model, device, sentence)\n",
-    "print(sentiment)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "\n",
-      "text/plain": [
-       "<Figure size 720x1440 with 1 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "display_attention(tokenizer.tokenize(sentence), attention)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "aLqml9PenBMp",
-    "outputId": "1614cf67-7583-4cb6-ab17-09ea8d1774a6"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.991644561290741\n"
-     ]
-    }
-   ],
-   "source": [
-    "sentence = 'one of the greatest films i have ever seen in my life.'\n",
-    "\n",
-    "sentiment, attention = predict_sentiment(tokenizer, vocab, model, device, sentence)\n",
-    "print(sentiment)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "\n",
-      "text/plain": [
-       "<Figure size 720x1440 with 1 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "display_attention(tokenizer.tokenize(sentence), attention)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "MyjsYDeJnCui",
-    "outputId": "d87ccbee-9e91-4e64-fb2b-aaaf474f12e6"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.051942430436611176\n"
-     ]
-    }
-   ],
-   "source": [
-    "sentence = \"i thought it was going to be one of the greatest films i have ever seen in my life, \\\n",
-    "but it was actually the absolute worst movie of all time.\"\n",
-    "\n",
-    "sentiment, attention = predict_sentiment(tokenizer, vocab, model, device, sentence)\n",
-    "print(sentiment)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "\n",
-      "text/plain": [
-       "<Figure size 720x1440 with 1 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "display_attention(tokenizer.tokenize(sentence), attention)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "u1ezlkkknE2M",
-    "outputId": "1ba5ed25-cc74-4abc-ea51-f15798c45023"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.06497201323509216\n"
-     ]
-    }
-   ],
-   "source": [
-    "sentence = \"i thought it was going to be the absolute worst movie of all time, \\\n",
-    "but it was actually one of the greatest films i have ever seen in my life.\"\n",
-    "\n",
-    "sentiment, attention = predict_sentiment(tokenizer, vocab, model, device, sentence)\n",
-    "print(sentiment)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "\n",
-      "text/plain": [
-       "<Figure size 720x1440 with 1 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "display_attention(tokenizer.tokenize(sentence), attention)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def count_pos_neg_token(token, tokenizer, data):\n",
-    "    n_examples = {'pos': 0, 'neg': 0}\n",
-    "    n_count = {'pos': 0, 'neg': 0}\n",
-    "    for label, text in data.data:\n",
-    "        tokens = tokenizer.tokenize(text)\n",
-    "        count = tokens.count(token)\n",
-    "        if count > 0:\n",
-    "            n_examples[label] += 1\n",
-    "            n_count[label] += count\n",
-    "    return n_examples['pos'], n_count['pos'], n_examples['neg'], n_count['neg']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\"worst\" appears in 106 positive examples, 111 times in total.\n",
-      "\"worst\" appears in 1246 negative examples, 1457 times in total.\n"
-     ]
-    }
-   ],
-   "source": [
-    "token = 'worst'\n",
-    "\n",
-    "n_pos, count_pos, n_neg, count_neg = count_pos_neg_token(token, tokenizer, train_data)\n",
-    "\n",
-    "print(f'\"{token}\" appears in {n_pos} positive examples, {count_pos} times in total.')\n",
-    "print(f'\"{token}\" appears in {n_neg} negative examples, {count_neg} times in total.')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\"greatest\" appears in 242 positive examples, 264 times in total.\n",
-      "\"greatest\" appears in 86 negative examples, 91 times in total.\n"
-     ]
-    }
-   ],
-   "source": [
-    "token = 'greatest'\n",
-    "\n",
-    "n_pos, count_pos, n_neg, count_neg = count_pos_neg_token(token, tokenizer, train_data)\n",
-    "\n",
-    "print(f'\"{token}\" appears in {n_pos} positive examples, {count_pos} times in total.')\n",
-    "print(f'\"{token}\" appears in {n_neg} negative examples, {count_neg} times in total.')"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "machine_shape": "hm",
-   "name": "scratchpad",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
diff --git a/experimental/a_nbow-bag.ipynb b/experimental/a_nbow-bag.ipynb
deleted file mode 100644
index 097aec4..0000000
--- a/experimental/a_nbow-bag.ipynb
+++ /dev/null
@@ -1,950 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 228
-    },
-    "colab_type": "code",
-    "id": "-V90fMxJdFl7",
-    "outputId": "2bbc3f28-84e3-47bd-97a2-ea0c2f0cf395"
-   },
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import torch.optim as optim\n",
-    "\n",
-    "import torchtext\n",
-    "import torchtext.experimental\n",
-    "import torchtext.experimental.vectors\n",
-    "from torchtext.experimental.datasets.raw.text_classification import RawTextIterableDataset\n",
-    "from torchtext.experimental.datasets.text_classification import TextClassificationDataset\n",
-    "from torchtext.experimental.functional import sequential_transforms, vocab_func, totensor\n",
-    "\n",
-    "import collections\n",
-    "import random\n",
-    "import time"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "tOO7b-Z1dFmA"
-   },
-   "outputs": [],
-   "source": [
-    "seed = 1234\n",
-    "\n",
-    "torch.manual_seed(seed)\n",
-    "random.seed(seed)\n",
-    "torch.backends.cudnn.deterministic = True\n",
-    "torch.backends.cudnn.benchmark = False"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "FhBXG95YdFmD"
-   },
-   "outputs": [],
-   "source": [
-    "raw_train_data, raw_test_data = torchtext.experimental.datasets.raw.IMDB()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "rOTczrIEdFmY"
-   },
-   "outputs": [],
-   "source": [
-    "def get_train_valid_split(raw_train_data, split_ratio = 0.7):\n",
-    "\n",
-    "    raw_train_data = list(raw_train_data)\n",
-    "        \n",
-    "    random.shuffle(raw_train_data)\n",
-    "        \n",
-    "    n_train_examples = int(len(raw_train_data) * split_ratio)\n",
-    "        \n",
-    "    train_data = raw_train_data[:n_train_examples]\n",
-    "    valid_data = raw_train_data[n_train_examples:]\n",
-    "    \n",
-    "    train_data = RawTextIterableDataset(train_data)\n",
-    "    valid_data = RawTextIterableDataset(valid_data)\n",
-    "    \n",
-    "    return train_data, valid_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "C6Tp4CyQdFma"
-   },
-   "outputs": [],
-   "source": [
-    "raw_train_data, raw_valid_data = get_train_valid_split(raw_train_data)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "LTJjCocRdFmh"
-   },
-   "outputs": [],
-   "source": [
-    "class Tokenizer:\n",
-    "    def __init__(self, tokenize_fn = 'basic_english', lower = True, max_length = None):\n",
-    "        \n",
-    "        self.tokenize_fn = torchtext.data.utils.get_tokenizer(tokenize_fn)\n",
-    "        self.lower = lower\n",
-    "        self.max_length = max_length\n",
-    "        \n",
-    "    def tokenize(self, s):\n",
-    "        \n",
-    "        tokens = self.tokenize_fn(s)\n",
-    "        \n",
-    "        if self.lower:\n",
-    "            tokens = [token.lower() for token in tokens]\n",
-    "            \n",
-    "        if self.max_length is not None:\n",
-    "            tokens = tokens[:self.max_length]\n",
-    "            \n",
-    "        return tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "5P2KumuDdFmj"
-   },
-   "outputs": [],
-   "source": [
-    "max_length = 500\n",
-    "\n",
-    "tokenizer = Tokenizer(max_length = max_length)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "V1albCvadFmm",
-    "outputId": "5c7c30f2-c6b7-4098-990d-7bfcdc2446f1"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['this', 'film', 'is', 'terrible', '.', 'i', 'hate', 'it', 'and', 'it', \"'\", 's', 'bad', '!']\n"
-     ]
-    }
-   ],
-   "source": [
-    "s = \"this film is terrible. i hate it and it's bad!\"\n",
-    "\n",
-    "print(tokenizer.tokenize(s))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "anC7_ViodFmp"
-   },
-   "outputs": [],
-   "source": [
-    "def build_vocab_from_data(raw_data, tokenizer, **vocab_kwargs):\n",
-    "    \n",
-    "    token_freqs = collections.Counter()\n",
-    "    \n",
-    "    for label, text in raw_data:\n",
-    "        tokens = tokenizer.tokenize(text)\n",
-    "        token_freqs.update(tokens)\n",
-    "                \n",
-    "    vocab = torchtext.vocab.Vocab(token_freqs, **vocab_kwargs)\n",
-    "    \n",
-    "    return vocab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "rgHPS1xzdFmt"
-   },
-   "outputs": [],
-   "source": [
-    "max_size = 25_000\n",
-    "\n",
-    "vocab = build_vocab_from_data(raw_train_data, tokenizer, max_size = max_size)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "xiW0GItTdFm6"
-   },
-   "outputs": [],
-   "source": [
-    "def process_raw_data(raw_data, tokenizer, vocab):\n",
-    "    \n",
-    "    raw_data = [(label, text) for (label, text) in raw_data]\n",
-    "\n",
-    "    text_transform = sequential_transforms(tokenizer.tokenize,\n",
-    "                                           vocab_func(vocab),\n",
-    "                                           totensor(dtype=torch.long))\n",
-    "    \n",
-    "    label_transform = sequential_transforms(totensor(dtype=torch.long))\n",
-    "\n",
-    "    transforms = (label_transform, text_transform)\n",
-    "\n",
-    "    dataset = TextClassificationDataset(raw_data,\n",
-    "                                        vocab,\n",
-    "                                        transforms)\n",
-    "    \n",
-    "    return dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "4Rec_Wk6dFnD"
-   },
-   "outputs": [],
-   "source": [
-    "train_data = process_raw_data(raw_train_data, tokenizer, vocab)\n",
-    "valid_data = process_raw_data(raw_valid_data, tokenizer, vocab)\n",
-    "test_data = process_raw_data(raw_test_data, tokenizer, vocab)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "egzlLweTdFnH"
-   },
-   "outputs": [],
-   "source": [
-    "class Collator:      \n",
-    "    def collate(self, batch):\n",
-    "        \n",
-    "        labels, text = zip(*batch)\n",
-    "        \n",
-    "        labels = torch.LongTensor(labels)\n",
-    "        \n",
-    "        lengths = [len(x) for x in text]\n",
-    "        lengths = torch.LongTensor([0] + lengths[:-1])\n",
-    "        \n",
-    "        offsets = torch.cumsum(lengths, dim = 0)\n",
-    "                \n",
-    "        text = torch.cat(text)\n",
-    "     \n",
-    "        return labels, text, offsets"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "TYLvjhoSdFnM"
-   },
-   "outputs": [],
-   "source": [
-    "collator = Collator()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "7Ly4l1I8dFnR"
-   },
-   "outputs": [],
-   "source": [
-    "batch_size = 256\n",
-    "\n",
-    "train_iterator = torch.utils.data.DataLoader(train_data, \n",
-    "                                             batch_size, \n",
-    "                                             shuffle = True, \n",
-    "                                             collate_fn = collator.collate)\n",
-    "\n",
-    "valid_iterator = torch.utils.data.DataLoader(valid_data, \n",
-    "                                             batch_size, \n",
-    "                                             shuffle = False, \n",
-    "                                             collate_fn = collator.collate)\n",
-    "\n",
-    "test_iterator = torch.utils.data.DataLoader(test_data, \n",
-    "                                            batch_size, \n",
-    "                                            shuffle = False, \n",
-    "                                            collate_fn = collator.collate)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "dbh38jHEdFnV"
-   },
-   "outputs": [],
-   "source": [
-    "class NBOW(nn.Module):\n",
-    "    def __init__(self, input_dim, emb_dim, output_dim):\n",
-    "        super().__init__()\n",
-    "        \n",
-    "        self.embedding = nn.EmbeddingBag(input_dim, emb_dim)\n",
-    "        self.fc = nn.Linear(emb_dim, output_dim)\n",
-    "        \n",
-    "    def forward(self, text, offsets):\n",
-    "        \n",
-    "        # text = [seq len * batch size]\n",
-    "        # offsets = [batch size]\n",
-    "        \n",
-    "        embedded = self.embedding(text, offsets)\n",
-    "        \n",
-    "        # embedded = [batch size, emb dim]\n",
-    "        \n",
-    "        prediction = self.fc(embedded)\n",
-    "        \n",
-    "        # prediction = [batch size, output dim]\n",
-    "        \n",
-    "        return prediction"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Ga1nXhindFnZ"
-   },
-   "outputs": [],
-   "source": [
-    "input_dim = len(vocab)\n",
-    "emb_dim = 100\n",
-    "output_dim = 2\n",
-    "\n",
-    "model = NBOW(input_dim, emb_dim, output_dim)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "UyIJC0tYdFnc"
-   },
-   "outputs": [],
-   "source": [
-    "def count_parameters(model):\n",
-    "    return sum(p.numel() for p in model.parameters() if p.requires_grad)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "1sJRLyewdFng",
-    "outputId": "e7e357e1-1cc7-4aa4-ff40-4d749209759d"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The model has 2,500,402 trainable parameters\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'The model has {count_parameters(model):,} trainable parameters')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "BPsihrZudFnl"
-   },
-   "outputs": [],
-   "source": [
-    "glove = torchtext.experimental.vectors.GloVe(name = '6B',\n",
-    "                                             dim = emb_dim)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "4BFftRDMdFnx"
-   },
-   "outputs": [],
-   "source": [
-    "def get_pretrained_embedding(initial_embedding, pretrained_vectors, vocab, unk_token):\n",
-    "    \n",
-    "    pretrained_embedding = torch.FloatTensor(initial_embedding.weight.clone()).detach()    \n",
-    "    pretrained_vocab = pretrained_vectors.vectors.get_stoi()\n",
-    "    \n",
-    "    unk_tokens = []\n",
-    "    \n",
-    "    for idx, token in enumerate(vocab.itos):\n",
-    "        if token in pretrained_vocab:\n",
-    "            pretrained_vector = pretrained_vectors[token]\n",
-    "            pretrained_embedding[idx] = pretrained_vector\n",
-    "        else:\n",
-    "            unk_tokens.append(token)\n",
-    "        \n",
-    "    return pretrained_embedding, unk_tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "QRToW07JdFnz"
-   },
-   "outputs": [],
-   "source": [
-    "unk_token = '<unk>'\n",
-    "\n",
-    "pretrained_embedding, unk_tokens = get_pretrained_embedding(model.embedding, glove, vocab, unk_token)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 139
-    },
-    "colab_type": "code",
-    "id": "AnE6D4MAdFn_",
-    "outputId": "8b3fea1a-9bcb-4fd9-ba78-72baee94f96a"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[-0.1117, -0.4966,  0.1631,  ...,  1.5903, -0.1947, -0.2415],\n",
-       "        [ 1.3204,  1.5997, -1.0792,  ...,  0.6060,  0.2209, -0.8245],\n",
-       "        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],\n",
-       "        ...,\n",
-       "        [-0.2925,  0.1087,  0.7920,  ..., -0.3641,  0.1822, -0.4104],\n",
-       "        [-0.7250,  0.7545,  0.1637,  ..., -0.0144, -0.1761,  0.3418],\n",
-       "        [ 1.1753,  0.0460, -0.3542,  ...,  0.4510,  0.0485, -0.4015]])"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.embedding.weight.data.copy_(pretrained_embedding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "DJloauERdFoF"
-   },
-   "outputs": [],
-   "source": [
-    "optimizer = optim.Adam(model.parameters())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "fPPZ0cs_dFoH"
-   },
-   "outputs": [],
-   "source": [
-    "criterion = nn.CrossEntropyLoss()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "HGUcFIupdFoK",
-    "outputId": "e5d9b842-689b-49ca-a4f4-08574f0524ee"
-   },
-   "outputs": [],
-   "source": [
-    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "Ynf7j6kQdFoM"
-   },
-   "outputs": [],
-   "source": [
-    "model = model.to(device)\n",
-    "criterion = criterion.to(device)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "977iykeOdFoP"
-   },
-   "outputs": [],
-   "source": [
-    "def calculate_accuracy(predictions, labels):\n",
-    "    top_predictions = predictions.argmax(1, keepdim = True)\n",
-    "    correct = top_predictions.eq(labels.view_as(top_predictions)).sum()\n",
-    "    accuracy = correct.float() / labels.shape[0]\n",
-    "    return accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "HPNI8DJudFoS"
-   },
-   "outputs": [],
-   "source": [
-    "def train(model, iterator, optimizer, criterion, device):\n",
-    "    \n",
-    "    epoch_loss = 0\n",
-    "    epoch_acc = 0\n",
-    "    \n",
-    "    model.train()\n",
-    "    \n",
-    "    for labels, text, offsets in iterator:\n",
-    "        \n",
-    "        labels = labels.to(device)\n",
-    "        text = text.to(device)\n",
-    "        offsets = offsets.to(device)\n",
-    "        \n",
-    "        optimizer.zero_grad()\n",
-    "        \n",
-    "        predictions = model(text, offsets)\n",
-    "        \n",
-    "        loss = criterion(predictions, labels)\n",
-    "        \n",
-    "        acc = calculate_accuracy(predictions, labels)\n",
-    "        \n",
-    "        loss.backward()\n",
-    "        \n",
-    "        optimizer.step()\n",
-    "        \n",
-    "        epoch_loss += loss.item()\n",
-    "        epoch_acc += acc.item()\n",
-    "        \n",
-    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "kp6pV5xKdFoV"
-   },
-   "outputs": [],
-   "source": [
-    "def evaluate(model, iterator, criterion, device):\n",
-    "    \n",
-    "    epoch_loss = 0\n",
-    "    epoch_acc = 0\n",
-    "    \n",
-    "    model.eval()\n",
-    "    \n",
-    "    with torch.no_grad():\n",
-    "    \n",
-    "        for labels, text, offsets in iterator:\n",
-    "\n",
-    "            labels = labels.to(device)\n",
-    "            text = text.to(device)\n",
-    "            offsets = offsets.to(device)\n",
-    "            \n",
-    "            predictions = model(text, offsets)\n",
-    "            \n",
-    "            loss = criterion(predictions, labels)\n",
-    "            \n",
-    "            acc = calculate_accuracy(predictions, labels)\n",
-    "\n",
-    "            epoch_loss += loss.item()\n",
-    "            epoch_acc += acc.item()\n",
-    "        \n",
-    "    return epoch_loss / len(iterator), epoch_acc / len(iterator)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "8YzL45gYdFoX"
-   },
-   "outputs": [],
-   "source": [
-    "def epoch_time(start_time, end_time):\n",
-    "    elapsed_time = end_time - start_time\n",
-    "    elapsed_mins = int(elapsed_time / 60)\n",
-    "    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))\n",
-    "    return elapsed_mins, elapsed_secs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 537
-    },
-    "colab_type": "code",
-    "id": "0A8wv7-xdFoa",
-    "outputId": "238f01bf-5438-482a-80ac-75c70cb20ed1"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch: 01 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.678 | Train Acc: 63.94%\n",
-      "\t Val. Loss: 0.659 |  Val. Acc: 70.86%\n",
-      "Epoch: 02 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.634 | Train Acc: 72.62%\n",
-      "\t Val. Loss: 0.608 |  Val. Acc: 74.09%\n",
-      "Epoch: 03 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.570 | Train Acc: 77.51%\n",
-      "\t Val. Loss: 0.542 |  Val. Acc: 78.16%\n",
-      "Epoch: 04 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.497 | Train Acc: 81.81%\n",
-      "\t Val. Loss: 0.477 |  Val. Acc: 81.68%\n",
-      "Epoch: 05 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.430 | Train Acc: 84.98%\n",
-      "\t Val. Loss: 0.424 |  Val. Acc: 84.21%\n",
-      "Epoch: 06 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.375 | Train Acc: 87.18%\n",
-      "\t Val. Loss: 0.387 |  Val. Acc: 85.68%\n",
-      "Epoch: 07 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.334 | Train Acc: 88.58%\n",
-      "\t Val. Loss: 0.357 |  Val. Acc: 86.61%\n",
-      "Epoch: 08 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.302 | Train Acc: 89.62%\n",
-      "\t Val. Loss: 0.337 |  Val. Acc: 87.14%\n",
-      "Epoch: 09 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.274 | Train Acc: 90.65%\n",
-      "\t Val. Loss: 0.321 |  Val. Acc: 87.67%\n",
-      "Epoch: 10 | Epoch Time: 0m 4s\n",
-      "\tTrain Loss: 0.253 | Train Acc: 91.37%\n",
-      "\t Val. Loss: 0.308 |  Val. Acc: 88.14%\n"
-     ]
-    }
-   ],
-   "source": [
-    "n_epochs = 10\n",
-    "\n",
-    "best_valid_loss = float('inf')\n",
-    "\n",
-    "for epoch in range(n_epochs):\n",
-    "\n",
-    "    start_time = time.monotonic()\n",
-    "    \n",
-    "    train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device)\n",
-    "    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, device)\n",
-    "    \n",
-    "    end_time = time.monotonic()\n",
-    "\n",
-    "    epoch_mins, epoch_secs = epoch_time(start_time, end_time)\n",
-    "    \n",
-    "    if valid_loss < best_valid_loss:\n",
-    "        best_valid_loss = valid_loss\n",
-    "        torch.save(model.state_dict(), 'nbow-model.pt')\n",
-    "    \n",
-    "    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')\n",
-    "    print(f'\\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')\n",
-    "    print(f'\\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "oMHAuMFNdFoc",
-    "outputId": "58b32f9a-8c39-4818-b526-1a80e435f3ae"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Test Loss: 0.327 | Test Acc: 86.80%\n"
-     ]
-    }
-   ],
-   "source": [
-    "model.load_state_dict(torch.load('nbow-model.pt'))\n",
-    "\n",
-    "test_loss, test_acc = evaluate(model, test_iterator, criterion, device)\n",
-    "\n",
-    "print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "sEDiSM3fdFog"
-   },
-   "outputs": [],
-   "source": [
-    "def predict_sentiment(tokenizer, vocab, model, device, sentence):\n",
-    "    model.eval()\n",
-    "    tokens = tokenizer.tokenize(sentence)\n",
-    "    indexes = [vocab.stoi[token] for token in tokens]\n",
-    "    tensor = torch.LongTensor(indexes).to(device)\n",
-    "    offset = torch.LongTensor([0]).to(device)\n",
-    "    prediction = model(tensor, offset)\n",
-    "    probabilities = nn.functional.softmax(prediction, dim = -1)\n",
-    "    pos_probability = probabilities.squeeze()[-1].item()\n",
-    "    return pos_probability"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "ycEAWhYIdFoi",
-    "outputId": "8a675641-fd79-46a6-b4e6-0b2006f866cc"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.00038787935045547783"
-      ]
-     },
-     "execution_count": 35,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = 'the absolute worst movie of all time.'\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "cuMFqIoJdFok",
-    "outputId": "12c964fc-6788-459c-ad5e-ca0af366b1d4"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.9986314177513123"
-      ]
-     },
-     "execution_count": 36,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = 'one of the greatest films i have ever seen in my life.'\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "zausUPENdFoo",
-    "outputId": "2bdd06df-dab7-47ea-8952-8bd82d39bac2"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.6374390721321106"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = \"i thought it was going to be one of the greatest films i have ever seen in my life, \\\n",
-    "but it was actually the absolute worst movie of all time.\"\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 35
-    },
-    "colab_type": "code",
-    "id": "e15vpNJYdFor",
-    "outputId": "eed3ae38-d01a-4476-a235-8fd3582240f3"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.6374390721321106"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentence = \"i thought it was going to be the absolute worst movie of all time, \\\n",
-    "but it was actually one of the greatest films i have ever seen in my life.\"\n",
-    "\n",
-    "predict_sentiment(tokenizer, vocab, model, device, sentence)"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "machine_shape": "hm",
-   "name": "1_nbow.ipynb",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}