# NMSU CSCI-5435 Assignment 3 Task 1

## Relevent Information

In [1]:
#Name:               Tianjie Chen
#Email:              tvc5586@nmsu.edu
#File Creation Date: Feb/26/2025
#Purpose of File:    NMSU CSCI-5435 Assignment 3 Task 1
#Last Edit Date:     Feb/27/2025
#Last Edit Note:     Re-run experiments
#GenAI used:         False

## Load libraries

In [2]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import gensim.downloader
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score
from torch.autograd import Variable

ModuleNotFoundError: No module named 'sklearn'

## Setup

In [None]:
# USING GPU
print(torch.cuda.device_count())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
batch_size = 1024  # BATCH SIZE FOR THIS MODEL
epochs     = 50   # Number of training epochs

In [None]:
word2vec = gensim.downloader.load("word2vec-google-news-300")

In [None]:
DATA_PATH = "News_Category_Dataset_v2.json"

X = []
Y = []

with open(DATA_PATH, 'r', encoding='utf-8') as f:
    for line in f:
        item = json.loads(line)
        text = item["short_description"]
        label = item["category"]
        
        X.append(text)
        Y.append(label)

## Preprocessing

In [None]:
_, temp = [], []

for i in range(len(X)):
    split_sent = X[i].split(' ')
    for word in split_sent:
        try:
            _.append(word2vec[word])
        except:
            _.append(np.zeros(300))
    _ = np.array(_)
    _ = np.mean(_, axis=0)
    temp.append(_)
    _ = []

X = np.array(temp)

In [None]:
label_encoder = LabelEncoder()
Y = label_encoder.fit_transform(Y)
num_classes = len(label_encoder.classes_)
print("Number of classes:", num_classes)

In [None]:
Y_encoded = np.zeros((Y.size, Y.max()+1), dtype=int)
Y_encoded[np.arange(Y.size), Y] = 1 

## Create dataset

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, Y_encoded, test_size=0.2, random_state=42
)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
# CREATE DATASET CLASS FOR DATALOADERS
class Dataset(Dataset):
    def __init__(self, data1, data2):
        self.data1 = data1
        self.data2 = data2
        
    def __len__(self):
        return len(self.data1)
    
    def __getitem__(self, idx):
        X = self.data1[idx]
        y = self.data2[idx]

        return X, y

In [None]:
train_dataset = Dataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)

test_dataset = Dataset(X_test, y_test)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)

## 2-layer Model and Training

### Model with 2 layers

In [None]:
# define the model
class Model_2_Layer(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(300, 300)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(300, 100)
        self.act2 = nn.ReLU()
        self.output = nn.Linear(100, 41)
        self.act_output = nn.ReLU()
 
    def forward(self, x):
        x = self.act1(self.hidden1(x))
        x = self.act2(self.hidden2(x))
        x = self.act_output(self.output(x))
        return x

In [None]:
model_2l = Model_2_Layer().to(device)
model_2l = model_2l.to(device)   # using '.to(device)' to move the model from CPU to GPU

In [None]:
model_optim=torch.optim.Adam(model_2l.parameters(), lr = 0.001)  
CEloss = nn.CrossEntropyLoss()

### Training Function

In [None]:
# TRAINING THE FNN MODEL
def trainModel(
    train_dataloader, test_dataloader,
    X_train, X_test,
    epoch,
    model
):

    history_train = []
    history_test = []
    train_dataloader_len = len(train_dataloader)
    test_dataloader_len = len(test_dataloader)
    train_len = X_train.shape[0]
    test_len = X_test.shape[0]
    
    for epoch in range(epochs):  # loops over the complete dataset multiple times (which is the nummber of epochs)
        model.train()     
        train_loss = 0.0
        correct_train = 0
        train_accuracy = 0.0
        for i, data in enumerate(train_dataloader, 0):  # loops over complete training dataset once 
            
            inputs, label = data
            inputs, label = inputs.float(), label.float()    # convert double values to float
            inputs, label = inputs.to(device), label.to(device)   
    
            model_optim.zero_grad()
            output = model(inputs)   # forward pass of model
            output = output.to(device)
            
            loss1 = CEloss(output, label)     # loss calculation
            loss1.backward()            # computes the gradient during the backward pass
            model_optim.step()   # performs single optimization step
    
            train_loss += loss1.item()   # adding accuracy values of all batches in an epoch
            _, output = torch.max(output, 1)     # storing the index of maximum value in prediction to the variable 'output'
            output = output.cpu().detach().numpy()     # loads the variable to cpu and converts it to a numpy array
            label = label.cpu().detach().numpy()        
            label = np.argmax(label, axis = 1)   # storing the index of maximum value in label to the variable 'label'
            train_accuracy += accuracy_score(label, output)  # adding accuracy values of all batches in training dataset in an epoch
        
        train_loss = train_loss/train_dataloader_len
        train_accuracy = train_accuracy/train_dataloader_len  # dividing accuracy by number of batches for training dataset
        history_train.append((train_loss, train_accuracy))
        
        model.eval()     # model evaluation on test dataset
        test_loss = 0.0
        test_accuracy = 0.0
        correct_test = 0
        with torch.no_grad():     # disables gradient calculation
            for i, data in enumerate(test_dataloader, 0):    # loops over complete test dataset once
                
                inputs, label = data
                inputs, label = inputs.float(), label.float()
                inputs, label = inputs.to(device), label.to(device)
    
                pred = model(inputs)
                pred = pred.to(device)
                loss2 = CEloss(pred, label)
    
                test_loss += loss2.item()
                _, pred = torch.max(pred, 1)
                pred = pred.cpu().detach().numpy()
                label = label.cpu().detach().numpy()
                label = np.argmax(label, axis = 1)    
                test_accuracy += accuracy_score(label, pred)
                
            test_loss = test_loss/test_dataloader_len
            test_accuracy = test_accuracy/test_dataloader_len
            history_test.append((test_loss, test_accuracy))
        
        print(f' Epoch {epoch + 1} '.center(70, '*'))
        print("Train Loss:", train_loss, "\tTrain Accuracy:", train_accuracy)
        print("Test Loss:", test_loss, "\tTest Accuracy:", test_accuracy)
    
    print("".center(70, '*'))
    print("Final test accuracy:", test_accuracy)

    return test_accuracy

### Train 2-Layer Model

In [None]:
model_2l_test_acc = trainModel(
    train_dataloader, test_dataloader,
    X_train, X_test,
    epochs,
    model_2l
)

## Compare with different number of layers

### Model with 1 layer

In [None]:
# define the model
class Model_1_Layer(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden2 = nn.Linear(300, 100)
        self.act2 = nn.ReLU()
        self.output = nn.Linear(100, 41)
        self.act_output = nn.ReLU()
 
    def forward(self, x):
        x = self.act2(self.hidden2(x))
        x = self.act_output(self.output(x))
        return x

In [None]:
model_1l = Model_1_Layer().to(device)
model_1l = model_1l.to(device)   # using '.to(device)' to move the model from CPU to GPU

In [None]:
model_optim=torch.optim.Adam(model_1l.parameters(), lr = 0.001)  
CEloss = nn.CrossEntropyLoss()

In [None]:
model_1l_test_acc = trainModel(
    train_dataloader, test_dataloader,
    X_train, X_test,
    epochs,
    model_1l
)

### Model with 3 layers

In [None]:
# define the model
class Model_3_Layer(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(300, 300)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(300, 300)
        self.act2 = nn.ReLU()
        self.hidden3 = nn.Linear(300, 100)
        self.act3 = nn.ReLU()
        self.output = nn.Linear(100, 41)
        self.act_output = nn.ReLU()
 
    def forward(self, x):
        x = self.act1(self.hidden1(x))
        x = self.act2(self.hidden2(x))
        x = self.act3(self.hidden3(x))
        x = self.act_output(self.output(x))
        return x

In [None]:
model_3l = Model_3_Layer().to(device)
model_3l = model_3l.to(device)   # using '.to(device)' to move the model from CPU to GPU

In [None]:
model_optim=torch.optim.Adam(model_3l.parameters(), lr = 0.001)  
CEloss = nn.CrossEntropyLoss()

In [None]:
model_3l_test_acc = trainModel(
    train_dataloader, test_dataloader,
    X_train, X_test,
    epochs,
    model_3l
)

### Comparison Result

In [None]:
print(
    f"""Accuracy Comparison:\n
        1 Layer:\t{model_1l_test_acc}\n
        2 Layers:\t{model_2l_test_acc}\n
        3 Lyaers:\t{model_3l_test_acc}"""
)

## Compare with different activation functions

### 2 Layers with ELU

In [None]:
# define the model
class Model_2_Layer_ELU(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(300, 300)
        self.hidden2 = nn.Linear(300, 100)
        self.act = nn.ELU()
        self.output = nn.Linear(100, 41)
        self.act_output = nn.ReLU()
 
    def forward(self, x):
        x = self.act(self.hidden1(x))
        x = self.act(self.hidden2(x))
        x = self.act_output(self.output(x))
        return x

In [None]:
model_2l_ELU = Model_2_Layer_ELU().to(device)
model_2l_ELU = model_2l_ELU.to(device)   # using '.to(device)' to move the model from CPU to GPU

In [None]:
model_optim=torch.optim.Adam(model_2l_ELU.parameters(), lr = 0.001)  
CEloss = nn.CrossEntropyLoss()

In [None]:
model_2l_elu_test_acc = trainModel(
    train_dataloader, test_dataloader,
    X_train, X_test,
    epochs,
    model_2l_ELU
)

### 2 Layers with Swish

In [None]:
# define the model
class Model_2_Layer_Swish(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(300, 300)
        self.hidden2 = nn.Linear(300, 100)
        self.act = nn.SiLU()
        self.output = nn.Linear(100, 41)
        self.act_output = nn.ReLU()
 
    def forward(self, x):
        x = self.act(self.hidden1(x))
        x = self.act(self.hidden2(x))
        x = self.act_output(self.output(x))
        return x

In [None]:
model_2l_Swish = Model_2_Layer_Swish().to(device)
model_2l_Swish = model_2l_Swish.to(device)   # using '.to(device)' to move the model from CPU to GPU

In [None]:
model_optim=torch.optim.Adam(model_2l_Swish.parameters(), lr = 0.001)  
CEloss = nn.CrossEntropyLoss()

In [None]:
model_2l_swish_test_acc = trainModel(
    train_dataloader, test_dataloader,
    X_train, X_test,
    epochs,
    model_2l_Swish
)

### Comparison Result

In [None]:
print(
    f"""Accuracy Comparison:\n
        2 Layers ReLU:\t{model_2l_test_acc}\n
        2 Layers ELU:\t{model_2l_elu_test_acc}\n
        2 Lyaers Swish:\t{model_2l_swish_test_acc}"""
)

ELU gives the best performance