# Text Classification Task
In this task, you would require to claasify the BBC News text into 5 classes ['business' 'entertainment' 'politics' 'sport''tech'] For this task, the code skeleton has been given and you have to write your code in the #TODO part.

## Importing relevant libraries 
If any of the below list libraries is not installed already, then use "pip install #library_name" to install it

In [1]:
!pip install torch==1.6.0



In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,f1_score
from sklearn.feature_extraction.text import CountVectorizer
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## Importing BBC News Dataset
Source data from public data set on BBC news articles:
D. Greene and P. Cunningham. "Practical Solutions to the Problem of Diagonal Dominance in Kernel Document Clustering", Proc. ICML 2006. [PDF] [BibTeX].

http://mlg.ucd.ie/datasets/bbc.html

Cleaned up version of the Dataset is given as csv file with the assignment

In [3]:
data_train = pd.read_csv("bbc-text_train.csv")
data_test= pd.read_csv("bbc-text_test.csv")

In [4]:
data_train.head()

Unnamed: 0,category,text
0,entertainment,farrell due to make us tv debut actor colin fa...
1,business,china continues rapid growth china s economy h...
2,business,ebbers aware of worldcom fraud former worldc...
3,entertainment,school tribute for tv host carson more than 1 ...
4,tech,broadband fuels online expression fast web acc...


In [5]:
data_train['category'].value_counts()

sport            413
business         409
politics         334
tech             319
entertainment    305
Name: category, dtype: int64

## Splitting training data into Train and validation set
Note: Validation set is surrogate to test set and while training the network , we evaluate the model on validation set

In [6]:
train_x_df,val_x_df,train_y_df,val_y_df = train_test_split(data_train['text'],data_train['category'],test_size=0.2,random_state=42)

## Encoding prediction classes/labels into integers


In [7]:
le =LabelEncoder()
le.fit(train_y_df)
print(le.classes_)
train_y=le.transform(train_y_df)
val_y=le.transform(val_y_df)
test_y=le.transform(data_test['category'])

['business' 'entertainment' 'politics' 'sport' 'tech']


## Converting News text into numerical vector using count vectorizer

In [8]:
vectorizer = CountVectorizer()
vectorizer.fit_transform(train_x_df)
train_x=vectorizer.transform(train_x_df)
val_x=vectorizer.transform(val_x_df)
test_x=vectorizer.transform(data_test['text'])

In [9]:
train_x.toarray()

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 4, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [10]:
train_x.toarray().shape

(1424, 24295)

In [11]:
class ClassificationNet(nn.Module):

    def __init__(self):
        super(ClassificationNet, self).__init__()
        '''
        Defining layers of neural network
        '''
        # TODO 1: change network to include three hidden layers with 
        # hidden dimension 256, 128 and 64 respectively
        self.fc1 = nn.Linear(in_features = 24295, out_features = 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 5)
        
        # TODO 2: Add layer normalization to 1st hidden layer (256 dim)
        self.bn1 = nn.LayerNorm(256)
        
        # TODO 3: Add dropout to 2nd (128 dim) and 3rd hidden (64 dim)
        # layers with dropout probability of 0.3 for both layers
        self.drop = nn.Dropout(p = 0.3)

    def forward(self, x):
        """The forward pass of the classifier
        
        Args:
            x_in (torch.Tensor): an input data tensor. 
                x_in.shape should be (data_points, num_features)
        Returns:
            the resulting tensor.
        """
        # TODO 4: Modify function call to use modified architecture
        # You can use ReLU activation function for all the hidden layers
        x = F.relu(self.bn1(self.fc1(x)))
        x = F.relu(self.drop(self.fc2(x)))
        x = F.relu(self.drop(self.fc3(x)))
        x = self.fc4(x)
        return x


net = ClassificationNet()

#define learning rate
# TODO 5: Add learning rate to be used for Adam optimizer 
# Typically adam_lr = 0.0001 x sgd_lr
sgd_lr = 0.5
adam_lr = 0.0001 * sgd_lr

#Construct an optimizer object
# TODO 6: Use Adam optimizer
optimizer = optim.Adam(net.parameters(), lr=adam_lr)

#Construct an loss/criterion object
criterion=nn.CrossEntropyLoss()

#define number of epochs/ number of training iteration
epochs=300

#converting train and validation set arrays to tensor
train_x_tensor=torch.tensor(train_x.toarray()).float()
train_y_tensor=torch.tensor(train_y)
val_x_tensor=torch.tensor(val_x.toarray()).float()
val_y_tensor=torch.tensor(val_y)


def evaluation_metrics(predict_y,ground_truth_y):
    '''
    Returns accuracy and f1 score metrics for evaluation
    '''
    accuracy=accuracy_score(ground_truth_y,predict_y)
    f1score=f1_score(ground_truth_y,predict_y,average='macro')
    
    return (accuracy,f1score)

## Training Loop

In [12]:
for i in range(epochs):
    # the training routine is these 5 steps:
    
    # step 1. zero the gradients
    optimizer.zero_grad()
    
    # step 2. compute the output
    output = net(train_x_tensor)
    
    # step 3. compute the loss
    loss = criterion(output, train_y_tensor)
    
    # step 4. use loss to produce gradients
    loss.backward()
    
    # step 5. use optimizer to take gradient step
    optimizer.step() 
    
    with torch.no_grad():
        # validation set evaluation:
        
        # compute the output
        output_val=net(val_x_tensor)
        
        # compute the loss
        loss_val = criterion(output_val, val_y_tensor)
        
        # compute the prediction
        predict_y= output_val.data.max(1, keepdim=True)[1]
        
        # Use the "evaluation_metrics" function to find accuracy and f1 score
        accuracy,f1score=evaluation_metrics(predict_y,val_y_tensor)
        
        print('Epoch %d/%d - Loss_train: %.3f   loss_val: %.3f   accuracy_val: %.3f f1score_val: %.3f   '% \
              (i + 1, epochs,loss.item(),loss_val.item(),accuracy,f1score))

Epoch 1/300 - Loss_train: 1.607   loss_val: 1.591   accuracy_val: 0.233 f1score_val: 0.180   
Epoch 2/300 - Loss_train: 1.593   loss_val: 1.582   accuracy_val: 0.292 f1score_val: 0.223   
Epoch 3/300 - Loss_train: 1.575   loss_val: 1.565   accuracy_val: 0.346 f1score_val: 0.265   
Epoch 4/300 - Loss_train: 1.561   loss_val: 1.561   accuracy_val: 0.334 f1score_val: 0.277   
Epoch 5/300 - Loss_train: 1.545   loss_val: 1.542   accuracy_val: 0.407 f1score_val: 0.324   
Epoch 6/300 - Loss_train: 1.525   loss_val: 1.526   accuracy_val: 0.455 f1score_val: 0.404   
Epoch 7/300 - Loss_train: 1.508   loss_val: 1.517   accuracy_val: 0.497 f1score_val: 0.444   
Epoch 8/300 - Loss_train: 1.489   loss_val: 1.494   accuracy_val: 0.559 f1score_val: 0.513   
Epoch 9/300 - Loss_train: 1.468   loss_val: 1.475   accuracy_val: 0.576 f1score_val: 0.552   
Epoch 10/300 - Loss_train: 1.453   loss_val: 1.465   accuracy_val: 0.598 f1score_val: 0.570   
Epoch 11/300 - Loss_train: 1.425   loss_val: 1.449   accura

#### Test set Prediction and Evaluation

In [13]:
test_x_tensor=torch.tensor(test_x.toarray()).float()
test_y_tensor=torch.tensor(test_y)

with torch.no_grad():
    # Test set evaluation:
    
    # compute the output
    output_test=net(test_x_tensor)
    
    # compute the prediction
    predict_test_y= output_test.data.max(1, keepdim=True)[1]
    
    # Use the "evaluation_metrics" function to find accuracy and f1 score
    accuracy,f1score=evaluation_metrics(predict_test_y,test_y_tensor)
    print('Accuracy_test: %.3f f1score_val: %.3f   '% (accuracy,f1score))

Accuracy_test: 0.969 f1score_val: 0.968   
