[ENHANCEMENT] Fixes issue #19 partly

varunp2k · varunp2k · commit 00b8b02eef80 · 2021-01-10T10:30:04.000+05:30
diff --git a/Ch6/02_BERT_ATIS.ipynb b/Ch6/02_BERT_ATIS.ipynb
@@ -345,8 +345,6 @@
     }
    ],
    "source": [
-    "# from utils import fetch_data, read_method\n",
-    "\n",
     "sents,labels,intents = fetch_data('atis.test.w-intent.iob')\n",
     "\n",
     "test_sentences = [\" \".join(i) for i in sents]\n",
@@ -1080,7 +1078,6 @@
     "optimizer = BertAdam(optimizer_grouped_parameters, lr=3e-5)\n",
     "\n",
     "\n",
-    "\n",
     "# Function to calculate the accuracy of our predictions vs labels\n",
     "def flat_accuracy(preds, labels):\n",
     "    pred_flat = np.argmax(preds, axis=1).flatten()\n",
@@ -1179,7 +1176,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.10"
+   "version": "3.6.12"
   }
  },
  "nbformat": 4,
diff --git a/Ch6/03_BERT_ATIS_Binary.ipynb b/Ch6/03_BERT_ATIS_Binary.ipynb
@@ -11,6 +11,13 @@
     "In this notebook we build a binarry classifier for the ATIS Dataset using [BERT](https://arxiv.org/abs/1810.04805), a pre-Trained NLP model open soucred by google in late 2018 that can be used for [Transfer Learning](https://towardsdatascience.com/transfer-learning-in-nlp-fecc59f546e4) on text data. This notebook has been adapted from this [Article](https://towardsdatascience.com/bert-for-dummies-step-by-step-tutorial-fb90890ffe03). The link for the dataset can be found [here](https://www.kaggle.com/siddhadev/ms-cntk-atis/data#).<br> This notebook requires a GPU to get setup. We suggest you to run this on your local machine only if you have a GPU setup or else you can use google colab."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Imports"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -115,8 +122,8 @@
     }
    ],
    "source": [
-    "#importing a few necessary packages and setting the DATA directory\n",
     "\n",
+    "#if not using colab, comment below line\n",
     "%tensorflow_version 1.x\n",
     "\n",
     "from torch.nn import Adam\n",
@@ -150,6 +157,13 @@
     "torch.cuda.get_device_name(0)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Loading"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -345,6 +359,13 @@
     "query_data_test, intent_data_test, intent_data_label_test, slot_data_test = load_atis('atis.test.pkl')\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's look at a few training queries."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -381,6 +402,14 @@
     "query_data_train"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Pre-processing\n",
+    "We need to convert the sentences to tensors."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -431,15 +460,11 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "S9SMEwslo-ve"
-   },
-   "outputs": [],
-   "source": []
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "BERT expects data to be in a specific format, i.e, [CLS] token1,token2,....[SEP]"
+   ]
   },
   {
    "cell_type": "code",
@@ -508,6 +533,13 @@
     "input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype=\"long\", truncating=\"post\", padding=\"post\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Creating the BERT attention masks"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -579,6 +611,13 @@
     "validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Training"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -913,6 +952,13 @@
     "model.cuda()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Fine-Tuning BERT"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 0,
@@ -1149,7 +1195,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.10"
+   "version": "3.6.12"
   }
  },
  "nbformat": 4,
diff --git a/Ch6/04_CRF_SNIPS_slots.ipynb b/Ch6/04_CRF_SNIPS_slots.ipynb
@@ -2681,7 +2681,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.3"
+   "version": "3.6.12"
   },
   "toc": {
    "base_numbering": 1,