In [1]:
import pandas as pd
from google.colab import drive
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch
import torch.nn as nn
import logging
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import json
import torch.nn.functional as F
import os
from torch.utils.data import Dataset
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [2]:
# load google drive to see the files in google drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
directory = '2024-07-04_08-07-28' ### Date and Time of the training

path = '/content/drive/MyDrive/Progetto-Vascon/DataLog'
save_dir_base = os.path.join(path, directory)
save_dir_bert = os.path.join(path, "bert_sequence_classification_trained")

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import json

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        """
        Initialize the Attention mechanism.
        Args:
            hidden_dim (int): The number of expected features in the input.
        """
        super(Attention, self).__init__()
        self.hidden_dim = hidden_dim
        self.attn = nn.Linear(hidden_dim, 1)
        self.init_weights()

    def init_weights(self):
        """
        Initialize weights for the attention layer.
        """
        nn.init.xavier_uniform_(self.attn.weight)
        if self.attn.bias is not None:
            nn.init.constant_(self.attn.bias, 0)

    def forward(self, lstm_output):
        """
        Forward pass for the attention mechanism.
        Args:
            lstm_output (torch.Tensor): Output from the LSTM layer.
        Returns:
            torch.Tensor: Attention weights.
        """
        energy = torch.tanh(self.attn(lstm_output))  # shape: (batch_size, seq_len, 1)
        energy = energy.squeeze(-1)  # shape: (batch_size, seq_len)
        attention_weights = F.softmax(energy, dim=1)  # shape: (batch_size, seq_len)
        return attention_weights

class SentimentClassifierWithSoftAttention(nn.Module):
    def __init__(
        self,
        embedding_dim=768,
        hidden_dim=256,
        output_dim=6,
        n_layers=1,
        bidirectional=True,
        dropout=0.0,
        rnn_type='LSTM',
    ):
        """
        Initialize the SentimentClassifierWithSoftAttention model.
        Args:
            embedding_dim (int): Dimension of the input embeddings.
            hidden_dim (int): Dimension of the hidden layer.
            output_dim (int): Dimension of the output layer.
            n_layers (int): Number of recurrent layers.
            bidirectional (bool): If True, use a bidirectional RNN.
            dropout (float): Dropout probability.
            rnn_type (str): Type of RNN to use ('LSTM' or 'GRU').
        """
        super().__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.n_layers = n_layers
        self.bidirectional = bidirectional
        self.dropout = dropout
        self.rnn_type = rnn_type

        self.attention = Attention(hidden_dim * 2 if bidirectional else hidden_dim)

        if rnn_type == 'LSTM':
            self.rnn = nn.LSTM(
                embedding_dim,
                hidden_dim,
                num_layers=n_layers,
                bidirectional=bidirectional,
                dropout=dropout if n_layers > 1 else 0,
                batch_first=True,
            )
        elif rnn_type == 'GRU':
            self.rnn = nn.GRU(
                embedding_dim,
                hidden_dim,
                num_layers=n_layers,
                bidirectional=bidirectional,
                dropout=dropout if n_layers > 1 else 0,
                batch_first=True,
            )
        else:
            raise ValueError("Choose a valid RNN type: LSTM or GRU")

        self.fc = nn.Sequential(
            nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

        self.fc_cls = nn.Sequential(
            nn.Linear(hidden_dim * 2 + embedding_dim if bidirectional else hidden_dim + embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

        self.init_weights()

    def init_weights(self):
        """
        Initialize weights for the RNN and fully connected layers.
        """
        for name, param in self.rnn.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param.data)
            elif 'bias' in name:
                nn.init.constant_(param.data, 0)

        for layer in self.fc_cls:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                if layer.bias is not None:
                    nn.init.constant_(layer.bias, 0)

    def forward(self, embedded):
        """
        Forward pass for the model.
        Args:
            embedded (torch.Tensor): Input embeddings.
        Returns:
            torch.Tensor: Output logits.
            torch.Tensor: Attention weights.
        """
        weighted_sum, attention_weights = self.prepare_data(embedded)
        dense_outputs = self.fc(weighted_sum)
        return dense_outputs, attention_weights  # Return attention weights

    def forward_with_cls(self, embedded, cls_token):
        """
        Forward pass for the model with CLS token.
        Args:
            embedded (torch.Tensor): Input embeddings.
            cls_token (torch.Tensor): CLS token.
        Returns:
            torch.Tensor: Output logits.
            torch.Tensor: Attention weights.
        """
        weighted_sum, attention_weights = self.prepare_data(embedded)
        cls_token = cls_token.squeeze(1)
        weighted_sum_with_cls = torch.cat((weighted_sum, cls_token), dim=1) # concatenate attention weights + cls_token
        dense_outputs = self.fc_cls(weighted_sum_with_cls)
        return dense_outputs, attention_weights  # Return attention weights

    def prepare_data(self, embedded):
        """
        Prepare data by applying RNN and attention mechanism.
        Args:
            embedded (torch.Tensor): Input embeddings.
        Returns:
            torch.Tensor: Weighted sum of LSTM outputs.
            torch.Tensor: Attention weights.
        """
        if len(embedded.shape) != 3:
            raise ValueError("Input shape must be 3D: (batch_size, seq_len, embedding_dim)")
        lstm_output, _ = self.rnn(embedded)  # lstm_output shape: (batch_size, seq_len, hidden_dim)
        attention_weights = self.attention(lstm_output)  # attention_weights shape: (batch_size, seq_len)
        attention_weights = attention_weights.unsqueeze(1)  # attention_weights shape: (batch_size, 1, seq_len)
        weighted = torch.bmm(attention_weights, lstm_output)  # weighted shape: (batch_size, 1, hidden_dim)
        weighted_sum = weighted.squeeze(1)  # weighted_sum shape: (batch_size, hidden_dim)
        return weighted_sum, attention_weights.squeeze(1)  # Return attention weights

    def get(self):
        """
        Get model parameters as a JSON string.
        Returns:
            str: JSON string of model parameters.
        """
        params = {
            'embedding_dim': self.embedding_dim,
            'hidden_dim': self.hidden_dim,
            'output_dim': self.output_dim,
            'n_layers': self.n_layers,
            'bidirectional': self.bidirectional,
            'dropout': self.dropout,
            'rnn_type': self.rnn_type
        }
        return json.dumps(params)


In [5]:
import torch
import numpy as np
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, BertModel
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm
from torch.utils.data import Dataset

# Tokenizzazione del testo
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Caricare il modello addestrato
save_dir_bert = os.path.join(path, "bert_sequence_classification_trained")
model_bert = BertForSequenceClassification.from_pretrained(save_dir_bert)


def get_emb_cls_function_trained(sentence):

    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True,  max_length=256)

    with torch.no_grad():
        # Ottenere l'output del modello
        outputs = model_bert.bert(**inputs)

    last_hidden_state = outputs.last_hidden_state

    embeddings_words = last_hidden_state[:, 1:-1, :]   #(ignora [CLS] e [SEP])
    embedding_cls = last_hidden_state[:, 0, :]

    return embeddings_words, embedding_cls

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [6]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import matplotlib

def plot_attention_in_sentence(sentence, attention_weights, tokenizer):
    """
    Plot the attention weights over the input sentence.
    Args:
        sentence (str): The input sentence.
        attention_weights (torch.Tensor): Attention weights from the model.
        tokenizer (PreTrainedTokenizer): Tokenizer used to tokenize the sentence.
    """
    attention_weights = attention_weights.detach().cpu().numpy()

    # Normalize attention weights for better visualization

    if len(attention_weights)==1:
      attention_weights == np.array([1.0])
    else:
      attention_weights = (attention_weights - np.min(attention_weights)) / (np.max(attention_weights) - np.min(attention_weights))

    # Tokenizza la frase di input
    inputs = tokenizer(sentence, return_tensors="pt", add_special_tokens=False)
    input_ids = inputs["input_ids"][0]

    # Converti gli input IDs in token
    input_tokens = tokenizer.convert_ids_to_tokens(input_ids)

    # Ensure the number of tokens matches the number of attention weights
    assert len(input_tokens) == len(attention_weights), "The number of tokens must match the number of attention weights."

    # Create a figure and axis
    fig, ax = plt.subplots(figsize=(len(input_tokens) * 1.5, 2))

    # Get colors from the colormap
    cmap = matplotlib.colormaps.get_cmap('RdYlBu_r')
    colors = cmap(attention_weights)

    # Plot the sentence with attention weights
    for i, token in enumerate(input_tokens):
        ax.text(i, 0.5, token, horizontalalignment='center', verticalalignment='center', size=12,
                color='black', bbox=dict(facecolor=colors[i], edgecolor='white', boxstyle='round,pad=0.3'))

    ax.set_xlim(-1, len(input_tokens))
    ax.set_ylim(0, 1)
    ax.axis('off')

    # Add a custom legend
    legend_elements = [plt.Line2D([0], [0], marker='o', color='w', label='High Attention (Red)', markersize=10, markerfacecolor=cmap(1.0)),
                       plt.Line2D([0], [0], marker='o', color='w', label='Low Attention (Blue)', markersize=10, markerfacecolor=cmap(0.0))]

    ax.legend(handles=legend_elements, loc='upper right')

    plt.show()


In [7]:
filename = 'LSTM/best_model_LSTM.pth'
best_model_path_lstm = os.path.join(save_dir_base, filename)

best_model_lstm = SentimentClassifierWithSoftAttention(rnn_type='LSTM').to(device)
best_model_lstm.load_state_dict(torch.load(best_model_path_lstm))



<All keys matched successfully>

In [None]:
filename = 'GRU/best_model_GRU.pth'
best_model_path_gru = os.path.join(save_dir_base, filename)

best_model_gru = SentimentClassifierWithSoftAttention(rnn_type='GRU').to(device)
best_model_gru.load_state_dict(torch.load(best_model_path_gru))



<All keys matched successfully>

In [None]:
filename = 'LSTM_CLS/best_model_LSTM_CLS.pth'
best_model_path_lstm_cls = os.path.join(save_dir_base, filename)

best_model_lstm_cls = SentimentClassifierWithSoftAttention(rnn_type='LSTM').to(device)
best_model_lstm_cls.load_state_dict(torch.load(best_model_path_lstm_cls))



<All keys matched successfully>

In [None]:
filename = 'GRU_CLS/best_model_GRU_CLS.pth'
best_model_path_gru_cls = os.path.join(save_dir_base, filename)

best_model_gru_cls = SentimentClassifierWithSoftAttention(rnn_type='GRU').to(device)
best_model_gru_cls.load_state_dict(torch.load(best_model_path_gru_cls))



<All keys matched successfully>

In [8]:
label_mapping = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

In [9]:
def process_sentence(sentence, model, cls = True):
    # Example sentence and its embedding
    if sentence is None or sentence == '':
      return 'no sentence'

    if cls == True:
      example_embedding, example_cls_token = get_emb_cls_function_trained(sentence)
    elif cls==False:
      example_embedding, _ = get_emb_cls_function_trained(sentence)
    else : raise ValueError('cls must be set to either True or False')

    # Get the model outputs and attention weights
    model.eval()
    with torch.no_grad():

        if cls == True:
          outputs, attention_weights = model.forward_with_cls(example_embedding, example_cls_token)
        else:
          outputs, attention_weights = model.forward(example_embedding)

        predictions = torch.where(outputs > 0, torch.tensor(1.0), torch.tensor(0.0)).numpy()

    plot_attention_in_sentence(sentence, attention_weights.squeeze(0), tokenizer)

    result_labels = [label for label, value in zip(label_mapping, predictions.flatten()) if value == 1]
    if result_labels == []:
      return 'no toxic'
    return f"this sentence is toxic: {', '.join(result_labels)}"


In [10]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import torch

# Create a text box widget
input_text = widgets.Text(
    value='',
    placeholder='Type something',
    description='Input:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

# Create a button widget
process_button = widgets.Button(
    description='Process',
    disabled=False,
    button_style='primary',
    tooltip='Click to process',
    icon='check'
)

# Create an output widget to display the result
output_text = widgets.Output(layout={'border': '1px solid black', 'padding': '10px', 'width': '400px'})

# Define the function to process input when the button is clicked
def on_button_click(b):
    with output_text:
        # Clear previous output
        clear_output()
        # Process the input sentence
        result = process_sentence(input_text.value, best_model_lstm, cls=False)
        # Display the result
        print(f"{result}")

# Attach the function to the button's click event
process_button.on_click(on_button_click)

# Organize the widgets in a vertical box
vbox_layout = widgets.VBox([input_text, process_button, output_text], layout=widgets.Layout(align_items='center', width='100%'))

# Display the layout
display(vbox_layout)


VBox(children=(Text(value='', description='Input:', layout=Layout(width='400px'), placeholder='Type something'…