### Importing required libraries

In [3]:
# suppressing warnings
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from itertools import accumulate

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split

from torchtext.datasets import AG_NEWS
from torchtext.data.utils import get_tokenizer 
from torchtext.vocab import build_vocab_from_iterator
from torchtext.data.functional import to_map_style_dataset

from sklearn.manifold import TSNE
import plotly.graph_objs as go
from IPython.display import Markdown as md

### Defining helper functions

In [5]:
def plot(COST,ACC):
    fig, ax1 = plt.subplots()
    color = 'tab:red'
    ax1.plot(COST, color=color)
    ax1.set_xlabel('epoch', color=color)
    ax1.set_ylabel('total loss', color=color)
    ax1.tick_params(axis='y', color=color)

    ax2 = ax1.twix()
    color = 'tab:blue'
    ax2.set_ylabel('accuracy', color=color)
    ax2.plot(ACC, color=color)
    ax2.tick_params(axis='y', color=color)
    fig.tight_layout()

    plt.show()

### The Dataset

#### Printing an example document from the AG NEWS dataset

In [15]:
# class labels
ag_news_label = {1: "World", 2: "Sports", 3: "Business", 4: "Sci/Tec"}

# dataset iterable object from "torchtext" library
train_iter = iter(AG_NEWS(split='train'))

# the 1st example document
y, text = next(train_iter)
print(f"Class: {ag_news_label[y]}\nText: {text}")

Class: Business
Text: Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\band of ultra-cynics, are seeing green again.


#### Tokenizing and building the vocabulary

In [29]:
# dataset iterable object from "torchtext" library
train_iter = iter(AG_NEWS(split='train'))

# The "basic_english" tokenizer from "torchtext" library
tokenizer = get_tokenizer("basic_english")

# a function to get tokenized text for one document at a time
def yield_tokens(data_iter):
    for _, text in data_iter:
        yield tokenizer(text)

# building the vocabulary using 'bulid_vocab_from_iterator' function from "torchtext" library
vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<unk>"])
vocab.set_default_index(vocab["<unk>"]) # This index will be returned when OOV token is queried

# printing some token indices