In [1]:
# Import the required packages
!pip install nltk==3.6.1
!pip install numpy==1.18.5
!pip install pandas==1.3.0
!pip install torch==1.9.0
!pip install tqdm==4.59.0
!pip install scikit_learn==1.0.2



In [1]:
# Import the required libraries
import re
import torch
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
from nltk.tokenize import word_tokenize
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:
# Define configuration file paths
lr = 0.0001  # Learning rate for training
input_size = 50  # Input size for the neural network
num_epochs = 50  # Number of training epochs
hidden_size = 50  # Size of the hidden layer in the neural network
label_col = "Product"  # Name of the label column in the dataset
tokens_path = "Output/tokens.pkl"  # Path to save token data
labels_path = "Output/labels.pkl"  # Path to save label data
data_path = "Input/complaints.csv"  # Path to the input data CSV file
rnn_model_path = "Output/model_rnn.pth"  # Path to save the RNN model
lstm_model_path = "Output/model_lstm.pth"  # Path to save the LSTM model
vocabulary_path = "Output/vocabulary.pkl"  # Path to save the vocabulary
embeddings_path = "Output/embeddings.pkl"  # Path to save word embeddings
glove_vector_path = "Input/glove.6B.50d.txt"  # Path to the GloVe word vectors file
text_col_name = "Consumer complaint narrative"  # Name of the text column in the dataset
label_encoder_path = "Output/label_encoder.pkl"  # Path to save the label encoder

# Mapping of product names to shorter labels
product_map = {
    'Vehicle loan or lease': 'vehicle_loan',
    'Credit reporting, credit repair services, or other personal consumer reports': 'credit_report',
    'Credit card or prepaid card': 'card',
    'Money transfer, virtual currency, or money service': 'money_transfer',
    'virtual currency': 'money_transfer',
    'Mortgage': 'mortgage',
    'Payday loan, title loan, or personal loan': 'loan',
    'Debt collection': 'debt_collection',
    'Checking or savings account': 'savings_account',
    'Credit card': 'card',
    'Bank account or service': 'savings_account',
    'Credit reporting': 'credit_report',
    'Prepaid card': 'card',
    'Payday loan': 'loan',
    'Other financial service': 'others',
    'Virtual currency': 'money_transfer',
    'Student loan': 'loan',
    'Consumer Loan': 'loan',
    'Money transfers': 'money_transfer'
}


In [None]:
# Define function for saving a file
def save_file(name, obj):
    """
    Function to save an object as pickle file
    """
    with open(name, 'wb') as f:
        pickle.dump(obj, f)

# Define function for loading a file
def load_file(name):
    """
    Function to load a pickle object
    """
    return pickle.load(open(name, "rb"))

## Process glove embeddings
---

In [None]:
# Open the glove embeddings file and read
with open(glove_vector_path, "rt") as f:
    emb = f.readlines()

### 400000 unique words are there in the embeddings

In [None]:
# length of embeddings
len(emb)

### Check the first record

In [None]:
# Check first record
emb[0]

In [None]:
# Split the first record and check for vocabulary
emb[0].split()[0]

In [None]:
# Split the first record and check for embeddings
emb[0].split()[1:]

### Separate embeddings and vocabulary

In [None]:
# Initialize empty lists for vocabulary and embeddings
vocabulary, embeddings = [], []

# Process each item in the 'emb' list
for item in emb:
    # Split the item into a word/token and its embedding vector
    parts = item.split()
    word = parts[0]  # The first part is the word/token
    embedding = parts[1:]  # The rest is the embedding vector

    # Append the word/token to the 'vocabulary' list
    vocabulary.append(word)

    # Append the embedding vector to the 'embeddings' list
    embeddings.append(embedding)


### Convert embeddings to numpy float array

In [None]:
embeddings = np.array(embeddings, dtype=np.float32)

In [None]:
embeddings.shape

### Add embeddings for padding and unknown items

In [None]:
vocabulary[:10]

In [None]:
# Initialize the vocabulary list with special tokens "<pad>" and "<unk>"
vocabulary = ["<pad>", "<unk>"] + vocabulary


In [None]:
# Create a 50-dimensional vector filled with ones
ones_vector = np.ones(50, dtype=np.float32)

# Calculate the mean of all embeddings in the 'embeddings' list
mean_embedding = np.mean(embeddings, axis=0)

# Vertically stack the ones vector, mean embedding, and the original embeddings
embeddings = np.vstack([ones_vector, mean_embedding, embeddings])


In [None]:
print(len(vocabulary), embeddings.shape)

### Save embeddings and vocabulary

In [None]:
save_file(embeddings_path, embeddings)
save_file(vocabulary_path, vocabulary)

---

## Process text data

### Read the data file

In [None]:
data = pd.read_csv(data_path)

### Drop rows where the text column is empty

In [None]:
data.dropna(subset=[text_col_name], inplace=True)

### Replace duplicate labels

In [None]:
# Replace values in the 'label_col' column of the 'data' DataFrame
data.replace({label_col: product_map}, inplace=True)


### Encode the label column and save the encoder and encoded labels

In [None]:
from sklearn.preprocessing import LabelEncoder

# Initialize a LabelEncoder
label_encoder = LabelEncoder()

# Fit the LabelEncoder to the labels in the 'label_col' column of the 'data' DataFrame
label_encoder.fit(data[label_col])

# Transform the labels in the 'label_col' column to their encoded integer values
labels = label_encoder.transform(data[label_col])


In [None]:
labels[0]

In [None]:
label_encoder.classes_

In [None]:
data[label_col]

In [None]:
save_file(labels_path, labels)
save_file(label_encoder_path, label_encoder)

### Process the text column

In [None]:
# Extract the values from the 'text_col_name' column of the 'data' DataFrame
input_text = data[text_col_name]


### Convert text to lower case

In [None]:
# Convert each text item in 'input_text' to lowercase and display a progress bar
input_text = [i.lower() for i in tqdm(input_text)]


### Remove punctuations except apostrophe

In [None]:
# Remove non-alphanumeric characters (except for single quotes and spaces) from each text item in 'input_text' and display a progress bar
input_text = [re.sub(r"[^\w\d'\s]+", " ", i) for i in tqdm(input_text)]


### Remove digits

In [None]:
# Remove one or more consecutive digits from each text item in 'input_text' and display a progress bar
input_text = [re.sub("\d+", "", i) for i in tqdm(input_text)]


### Remove more than one consecutive instance of 'x'

In [None]:
# Remove consecutive occurrences of 'x' (two or more 'x's in a row) from each text item in 'input_text' and display a progress bar
input_text = [re.sub(r'[x]{2,}', "", i) for i in tqdm(input_text)]


### Replace multiple spaces with single space

In [None]:
# Replace multiple consecutive spaces with a single space in each text item in 'input_text' and display a progress bar
input_text = [re.sub(' +', ' ', i) for i in tqdm(input_text)]


### Tokenize the text

In [None]:
# Tokenize each text item in 'input_text' using the word_tokenize function and display a progress bar
tokens = [word_tokenize(t) for t in tqdm(input_text)]


### Take the first 20 tokens in each complaint text

In [None]:
# Limit each token sequence in 'tokens' to a maximum length of 20 tokens, padding with '<pad>' if necessary
tokens = [i[:20] if len(i) > 19 else ['<pad>'] * (20 - len(i)) + i for i in tqdm(tokens)]


### Convert tokens to integer indices from vocabulary

In [None]:
from tqdm import tqdm

def token_index(tokens, vocabulary, missing='<unk>'):
    """
    Convert lists of word tokens into lists of integers representing their positions in the vocabulary.

    :param tokens: List of word tokens
    :param vocabulary: List of words in the vocabulary
    :param missing: Token for words not present in the vocabulary
    :return: List of integers representing the word tokens
    """
    idx_token = []  # Initialize a list to store the converted token indices

    for text in tqdm(tokens):  # Iterate through the list of token sequences
        idx_text = []  # Initialize a list to store the converted indices for a single token sequence

        for token in text:  # Iterate through the tokens in a sequence
            if token in vocabulary:  # Check if the token is in the vocabulary
                idx_text.append(vocabulary.index(token))  # Append the index of the token in the vocabulary
            else:
                idx_text.append(vocabulary.index(missing))  # Append the index of the missing token

        idx_token.append(idx_text)  # Append the list of token indices for the current sequence to the result

    return idx_token  # Return the list of lists of token indices


In [None]:
tokens = token_index(tokens, vocabulary)

In [None]:
len(tokens)

In [None]:
tokens[0]

In [None]:
data.head()

In [None]:
vocabulary[tokens[0][0]]

### Save the tokens

In [None]:
save_file(tokens_path, tokens)

---

## Create PyTorch Dataset

In [None]:
import torch

class TextDataset(torch.utils.data.Dataset):
    def __init__(self, tokens, embeddings, labels):
        """
        Initialize the TextDataset.

        :param tokens: List of word tokens
        :param embeddings: Word embeddings (from GloVe)
        :param labels: List of labels
        """
        self.tokens = tokens  # List of word tokens
        self.embeddings = embeddings  # Word embeddings (from GloVe)
        self.labels = labels  # List of labels

    def __len__(self):
        """
        Get the total number of samples in the dataset.

        :return: The number of samples
        """
        return len(self.tokens)

    def __getitem__(self, idx):
        """
        Get a sample from the dataset.

        :param idx: Index of the sample
        :return: A tuple containing label and corresponding embedding
        """
        label = self.labels[idx]  # Get the label for the specified index
        embedding = self.embeddings[self.tokens[idx], :]  # Get the embedding for the specified token
        return label, embedding


---

## Create Models

### RNN Model

In [None]:
import torch

class RNNNetwork(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        """
        Initialize the RNNNetwork model.

        :param input_size: Size of input embeddings
        :param hidden_size: Size of the hidden state
        :param num_classes: Number of output classes
        """
        super(RNNNetwork, self).__init()
        # RNN Layer
        self.rnn = torch.nn.RNN(input_size=input_size,
                                hidden_size=hidden_size,
                                batch_first=True)
        # Linear Layer
        self.linear = torch.nn.Linear(hidden_size, num_classes)

    def forward(self, input_data):
        """
        Forward pass of the model.

        :param input_data: Input data (sequences of embeddings)
        :return: Output of the model
        """
        _, hidden = self.rnn(input_data)
        output = self.linear(hidden)
        return output


### tensor flow alternate


In [None]:
import tensorflow as tf
from tensorflow.keras import layers

class RNNNetwork(tf.keras.Model):
    def __init__(self, input_size, hidden_size, num_classes):
        """
        Initialize the RNNNetwork model.

        :param input_size: Size of input embeddings
        :param hidden_size: Size of the hidden state
        :param num_classes: Number of output classes
        """
        super(RNNNetwork, self).__init__()
        # RNN Layer
        self.rnn = layers.SimpleRNN(units=hidden_size, 
                                    return_sequences=False, 
                                    return_state=True)
        # Linear Layer
        self.linear = layers.Dense(num_classes)

    def call(self, inputs, training=False):
        """
        Forward pass of the model.

        :param inputs: Input data (sequences of embeddings)
        :param training: Boolean to specify if the model is in training mode
        :return: Output of the model
        """
        _, hidden = self.rnn(inputs)
        output = self.linear(hidden)
        return output

# Example usage
input_size = 10
hidden_size = 20
num_classes = 5

# Instantiate the model
model = RNNNetwork(input_size, hidden_size, num_classes)

# Build the model by providing input shape
model.build(input_shape=(None, None, input_size))

# Print the model summary
model.summary()


### LSTM Model

In [None]:
import torch

class LSTMNetwork(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        """
        Initialize the LSTMNetwork model.

        :param input_size: Size of input embeddings
        :param hidden_size: Size of the hidden state
        :param num_classes: Number of output classes
        """
        super(LSTMNetwork, self).__init()
        # LSTM Layer
        self.rnn = torch.nn.LSTM(input_size=input_size,
                                hidden_size=hidden_size,
                                batch_first=True)
        # Linear Layer
        self.linear = torch.nn.Linear(hidden_size, num_classes)

    def forward(self, input_data):
        """
        Forward pass of the model.

        :param input_data: Input data (sequences of embeddings)
        :return: Output of the model
        """
        _, (hidden, _) = self.rnn(input_data)
        output = self.linear(hidden[-1])
        return output


### Tensorflow Alternatives


In [None]:
import tensorflow as tf
from tensorflow.keras import layers

class LSTMNetwork(tf.keras.Model):
    def __init__(self, input_size, hidden_size, num_classes):
        """
        Initialize the LSTMNetwork model.

        :param input_size: Size of input embeddings
        :param hidden_size: Size of the hidden state
        :param num_classes: Number of output classes
        """
        super(LSTMNetwork, self).__init__()
        # LSTM Layer
        self.lstm = layers.LSTM(units=hidden_size, 
                                return_sequences=False, 
                                return_state=False)
        # Linear Layer
        self.linear = layers.Dense(num_classes)

    def call(self, inputs, training=False):
        """
        Forward pass of the model.

        :param inputs: Input data (sequences of embeddings)
        :param training: Boolean to specify if the model is in training mode
        :return: Output of the model
        """
        hidden = self.lstm(inputs)
        output = self.linear(hidden)
        return output

# Example usage
input_size = 10
hidden_size = 20
num_classes = 5

# Instantiate the model
model = LSTMNetwork(input_size, hidden_size, num_classes)

# Build the model by providing input shape
model.build(input_shape=(None, None, input_size))

# Print the model summary
model.summary()


### Define train function

In [None]:
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm

def train(train_loader, valid_loader, model, criterion, optimizer, device,
          num_epochs, model_path):
    """
    Function to train the model
    :param train_loader: Data loader for the training dataset
    :param valid_loader: Data loader for the validation dataset
    :param model: Model object
    :param criterion: Loss function
    :param optimizer: Optimizer
    :param device: 'cuda' (GPU) or 'cpu' (CPU)
    :param num_epochs: Number of training epochs
    :param model_path: Path to save the model
    """
    best_loss = 1e8  # Initialize a variable to track the best validation loss

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1} of {num_epochs}")
        valid_loss, train_loss = [], []

        model.train()  # Set the model to training mode
        # Training loop
        for batch_labels, batch_data in tqdm(train_loader):
            # Move data to GPU if available
            batch_labels = batch_labels.to(device)
            batch_labels = batch_labels.type(torch.LongTensor)
            batch_data = batch_data.to(device)

            # Forward pass
            batch_output = model(batch_data)
            batch_output = torch.squeeze(batch_output)

            # Calculate the loss
            loss = criterion(batch_output, batch_labels)
            train_loss.append(loss.item())

            optimizer.zero_grad()
            # Backward pass and gradient update step
            loss.backward()
            optimizer.step()

        model.eval()  # Set the model to evaluation mode
        # Validation loop
        for batch_labels, batch_data in tqdm(valid_loader):
            # Move data to GPU if available
            batch_labels = batch_labels.to(device)
            batch_labels = batch_labels.type(torch.LongTensor)
            batch_data = batch_data.to(device)

            # Forward pass
            batch_output = model(batch_data)
            batch_output = torch.squeeze(batch_output)

            # Calculate the loss
            loss = criterion(batch_output, batch_labels)
            valid_loss.append(loss.item())

        # Calculate mean losses for the epoch
        t_loss = np.mean(train_loss)
        v_loss = np.mean(valid_loss)
        print(f"Train Loss: {t_loss}, Validation Loss: {v_loss}")

        if v_loss < best_loss:
            best_loss = v_loss
            # Save the model if the validation loss improves
            torch.save(model.state_dict(), model_path)

        print(f"Best Validation Loss: {best_loss}")


### Define test function

In [None]:
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score

def test(test_loader, model, criterion, device):
    """
    Function to test the model
    :param test_loader: Data loader for the test dataset
    :param model: Model object
    :param criterion: Loss function
    :param device: 'cuda' (GPU) or 'cpu' (CPU)
    """
    model.eval()  # Set the model to evaluation mode
    test_loss = []  # Initialize a list to store test losses
    test_accu = []  # Initialize a list to store test accuracies

    for batch_labels, batch_data in tqdm(test_loader):
        # Move data to the specified device
        batch_labels = batch_labels.to(device)
        batch_labels = batch_labels.type(torch.LongTensor)
        batch_data = batch_data.to(device)

        # Forward pass
        batch_output = model(batch_data)
        batch_output = torch.squeeze(batch_output)

        # Calculate the loss
        loss = criterion(batch_output, batch_labels)
        test_loss.append(loss.item())

        # Compute batch predictions
        batch_preds = torch.argmax(batch_output, axis=1)

        # Move predictions to CPU
        if torch.cuda.is_available():
            batch_labels = batch_labels.cpu()
            batch_preds = batch_preds.cpu()

        # Compute accuracy and append to the list
        test_accu.append(accuracy_score(batch_labels.detach().numpy(), batch_preds.detach().numpy()))

    # Calculate the mean test loss and test accuracy
    test_loss = np.mean(test_loss)
    test_accu = np.mean(test_accu)

    print(f"Test Loss: {test_loss}, Test Accuracy: {test_accu}")


---

## Train RNN Model

### Load the files

In [None]:
# Load tokenized text data from a file
tokens = load_file(tokens_path)

# Load label data from a file
labels = load_file(labels_path)

# Load word embeddings from a file
embeddings = load_file(embeddings_path)

# Load a label encoder object from a file
label_encoder = load_file(label_encoder_path)

# Calculate the number of unique classes based on the label encoder
num_classes = len(label_encoder.classes_)


### Split data into train, validation and test sets

In [None]:
# Split the data into a training set and a temporary set (20% of the data as the test set)
X_train, X_test, y_train, y_test = train_test_split(tokens, labels, test_size=0.2)

# Further split the temporary set into a training set (60% of the original data) and a validation set (20% of the original data)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.25)


### Create PyTorch datasets

In [None]:
# Create a training dataset using X_train, word embeddings, and y_train
train_dataset = TextDataset(X_train, embeddings, y_train)

# Create a validation dataset using X_valid, word embeddings, and y_valid
valid_dataset = TextDataset(X_valid, embeddings, y_valid)

# Create a test dataset using X_test, word embeddings, and y_test
test_dataset = TextDataset(X_test, embeddings, y_test)


### Create data loaders

In [None]:
# Create a training data loader with batch size 16, shuffling the data, and dropping the last batch if it's smaller than the batch size
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)

# Create a validation data loader with batch size 16
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=16)

# Create a test data loader with batch size 16
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16)


### Create model object

In [None]:
# Create an instance of the RNNNetwork model with the specified input size, hidden size, and number of output classes
model = RNNNetwork(input_size, hidden_size, num_classes)


### Move the model to GPU if available

In [None]:
if torch.cuda.is_available():
    model = model.cuda()

### Define loss function and optimizer

In [None]:
# Define the loss function (CrossEntropyLoss) for classification tasks
criterion = torch.nn.CrossEntropyLoss()

# Define the optimizer (Adam) for updating model parameters during training
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# Determine the computing device - use GPU if available, otherwise, use CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


### Training loop

In [None]:
# Call the 'train' function to train the model
train(train_loader, valid_loader, model, criterion, optimizer, device, num_epochs, rnn_model_path)


---

## Train LSTM Model

In [None]:
# Create an instance of the LSTMNetwork model with the specified input size, hidden size, and number of output classes
model = LSTMNetwork(input_size, hidden_size, num_classes)


In [None]:
if torch.cuda.is_available():
    model = model.cuda()

In [None]:
# Define the loss function (CrossEntropyLoss) for classification tasks
criterion = torch.nn.CrossEntropyLoss()

# Define the optimizer (Adam) for updating model parameters during training
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# Determine the computing device - use GPU if available, otherwise, use CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [None]:
# Call the 'train' function to train the LSTM model
train(train_loader, valid_loader, model, criterion, optimizer, device, num_epochs, lstm_model_path)


In [None]:
test(test_loader, model, criterion, device)

## Predict on new text
---

In [None]:
input_text = '''I am a victim of Identity Theft & currently have an Experian account that 
I can view my Experian Credit Report and getting notified when there is activity on 
my Experian Credit Report. For the past 3 days I've spent a total of approximately 9 
hours on the phone with Experian. Every time I call I get transferred repeatedly and 
then my last transfer and automated message states to press 1 and leave a message and 
someone would call me. Every time I press 1 I get an automatic message stating than you 
before I even leave a message and get disconnected. I call Experian again, explain what 
is happening and the process begins again with the same end result. I was trying to have 
this issue attended and resolved informally but I give up after 9 hours. There are hard 
hit inquiries on my Experian Credit Report that are fraud, I didn't authorize, or recall 
and I respectfully request that Experian remove the hard hit inquiries immediately just 
like they've done in the past when I was able to speak to a live Experian representative 
in the United States. The following are the hard hit inquiries : BK OF XXXX XX/XX/XXXX 
XXXX XXXX XXXX  XX/XX/XXXX XXXX  XXXX XXXX  XX/XX/XXXX XXXX  XX/XX/XXXX XXXX  XXXX 
XX/XX/XXXX'''

### Process input text

In [None]:
# Convert the input text to lowercase
input_text = input_text.lower()

# Replace non-alphanumeric characters (except for ' and spaces) with a space
input_text = re.sub(r"[^\w\d'\s]+", " ", input_text)

# Remove all digits from the text
input_text = re.sub("\d+", "", input_text)

# Remove consecutive occurrences of 'x' with two or more repetitions
input_text = re.sub(r'[x]{2,}', "", input_text)

# Replace consecutive spaces with a single space
input_text = re.sub(' +', ' ', input_text)

# Tokenize the preprocessed text into a list of words
tokens = word_tokenize(input_text)


### Add padding if the length of tokens is less than 20

In [None]:
# Pad the list of tokens to a fixed length of 20 by adding '<pad>' tokens to the beginning
tokens = ['<pad>'] * (20 - len(tokens)) + tokens


### Tokenize the input text

In [None]:
idx_token = []  # Initialize a list to store the token indices
for token in tokens:
    if token in vocabulary:
        # If the token is in the vocabulary, get its index in the vocabulary
        idx_token.append(vocabulary.index(token))
    else:
        # If the token is not in the vocabulary, use the index of the '<unk>' token
        idx_token.append(vocabulary.index('<unk>'))


### Get embeddings for tokens

In [None]:
# Extract word embeddings from the 'embeddings' array for the tokens in 'idx_token'
token_emb = embeddings[idx_token, :]


### Convert to torch tensor

In [None]:
# Convert the 'token_emb' NumPy array into a PyTorch tensor
inp = torch.from_numpy(token_emb)


### Move the tensor to GPU if available

In [None]:
inp = inp.to(device)

### Create a batch of one record

In [None]:
# Add a new dimension to the 'inp' tensor
inp = torch.unsqueeze(inp, 0)


### Load label encoder

In [None]:
# Load a label encoder from a file
label_encoder = load_file(label_encoder_path)

# Determine the number of unique classes based on the loaded label encoder
num_classes = len(label_encoder.classes_)


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### RNN prediction

In [None]:
# Create a model object (RNNNetwork) with the specified input size, hidden size, and number of classes
model = RNNNetwork(input_size, hidden_size, num_classes)

# Load the trained model weights from the specified path
model.load_state_dict(torch.load(rnn_model_path))

# Move the model to the GPU if a GPU is available (optional)
if torch.cuda.is available():
    model = model.cuda()

# Perform a forward pass with the input tensor (inp)
out = torch.squeeze(model(inp))

# Find the predicted class by taking the class with the highest score
# and mapping it back to the original class label using the label encoder
predicted_class_index = torch.argmax(out)
prediction = label_encoder.classes_[predicted_class_index]

# Print the predicted class
print(f"Predicted Class: {prediction}")


### LSTM prediction

In [None]:
# Create a model object (LSTMNetwork) with the specified input size, hidden size, and number of classes
model = LSTMNetwork(input_size, hidden_size, num_classes)

# Load the trained model weights from the specified path
model.load_state_dict(torch.load(lstm_model_path))

# Move the model to the GPU if a GPU is available (optional)
if torch.cuda.is_available():
    model = model.cuda()

# Perform a forward pass with the input tensor (inp)
out = torch.squeeze(model(inp))

# Find the predicted class by taking the class with the highest score
# and mapping it back to the original class label using the label encoder
predicted_class_index = torch.argmax(out)
prediction = label_encoder.classes_[predicted_class_index]

# Print the predicted class
print(f"Predicted Class: {prediction}")


---