# Arabic SA with LSTM and LLMs
===============================================
Focus:
1) LSTM with LLMs embedding

2) Fine-tuning LLM for Arabic SA and evaluation (with Arabic tweets data).

## Part (1) LSTM with LLM embeddings for Arabic SA

In [None]:
! git clone  https://github.com/waheebedrees/NLP_lab.git

%cd  NLP_lab/



Cloning into 'NLP_lab'...
remote: Enumerating objects: 5, done.[K
remote: Counting objects: 100% (5/5), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remote: Total 5 (delta 0), reused 5 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (5/5), 967.81 KiB | 53.77 MiB/s, done.
/content/NLP_lab


In [None]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import random

def seed(seed: int = 42):
    """Set random seed for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False



seed()
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# This is a version of BERT model trained for Arabic SA
MODEL_NAME = "CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment"

train_data = pd.read_excel("AraSenti_all.xlsx")

test_data = pd.read_excel("HIAQatar_tweets.xlsx")

train_data.head(), test_data.head()


(                                               tweet  label
 0  اغلاق_المحلات_9_مساءمو كافي تقفل كل شوي عشان ا...      0
 1  إغلاق_المحلات_9_مساء لكل قرار سلبيات وإيجابيات...      0
 2  اغلاق_المحلات_9_مساء المولات بالذات قد تكون ال...      0
 3  اغلاق_المحلات_9_مساء طيب متى يفتح المحل السادس...      0
 4  اغلاق_المحلات_9_مساء وبما أن الجو اليوم حلو با...      0,
    ano3  ano2  ano1  agree text_label  label  \
 0     1     1     1      1   positive      1   
 1     1     1     1      1   positive      1   
 2     1     1     1      1   positive      1   
 3     0     0     0      1   negative      0   
 4     0     0     0      1   negative      0   
 
                                                tweet  
 0  @ @ @ مطار حمد الدولي ينضم لنادي الخمس نجوم، م...  
 1  @ بيض الله وجه إدارة المطار على حسن الاستقبال ...  
 2  @ مبادرة مميزة وغير مستغربه من إدارة مطار حمد ...  
 3  @ نرجو من إدارة مطار حمد عمل خطة بديلة لأحتكار...  
 4  @ نرجو من إدارة مطار حمد عمل خطة بديلة لأحتكار...  )

In [None]:
display(train_data.describe(), train_data.info(), train_data.isnull().sum())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15751 entries, 0 to 15750
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   tweet   15751 non-null  object
 1   label   15751 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 246.2+ KB


Unnamed: 0,label
count,15751.0
mean,0.903752
std,0.822234
min,0.0
25%,0.0
50%,1.0
75%,2.0
max,2.0


None

Unnamed: 0,0
tweet,0
label,0


In [None]:
train_data.dropna(inplace=True)
test_data.dropna(inplace=True)


In [None]:
train_data.iloc[0]


Unnamed: 0,0
tweet,اغلاق_المحلات_9_مساءمو كافي تقفل كل شوي عشان ا...
label,0


In [None]:
train_data.label.value_counts()


Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
0,6155
1,4957
2,4639


In [None]:
# this will use it later for inverse mapping

inverse_mapping = {0: "negative", 1: "neutral", 2: "positive"}


mapping =  lambda x : "negative" if x == 0 else "neutral" if x == 1 else "positive" if x == 2 else -1


In [None]:
import re
# Data preprocessing function
# In this function you should clean the Arabic text and normlize it
# remove repeated letter, english letter, punctuation, etc.
# Input: unclean tweet
# Output: clean tweet


def clean_tweet(tweet: str) -> str:
    '''
    write your code here
    '''
    tweet = str(tweet)

    # remove english letters
    tweet = re.sub(r'[a-zA-Z0-9]+', '', tweet, flags=re.MULTILINE)

    # remove emoijis
    tweet = re.sub(
        "["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags
        "]+",
        "",
        tweet
    )

    # Remove URLs
    tweet = re.sub(r'http\S+|www\S+|pic\.twitter\S+', '', tweet)

    # remove mentions
    tweet = re.sub(r'@\w+', '', tweet)
    # remove hashtags
    tweet = re.sub(r'#\w+', '', tweet)
    # Remove digits
    tweet = re.sub(r'\d+', '', tweet)

    # Remove extra symbols and punctuation
    # This keeps only letters, numbers, and spaces
    tweet = re.sub(r'[^\w\s]', '', tweet)

    # Remove repeated letters
    # https://stackoverflow.com/questions/39137851/how-to-deal-with-repeated-letters-in-arabic
    tweet = re.sub(r'(.)\1+', r'\1', tweet)

    # Replace underscores and hyphens with spaces
    tweet = re.sub(r'[_\-]', ' ', tweet)

    # Keep only Arabic letters and spaces
    tweet = re.sub(r'[^\u0600-\u06FF\s]', '', tweet)

    # Remove HTML entities
    tweet = re.sub(r'&[a-z]+;', '', tweet)
    # Remove extra whitespaces

    tweet = normalize_arabic(tweet)

    tweet = re.sub(r'\s+', ' ', tweet).strip()
    return tweet


def normalize_arabic(text):
    # Normalize different forms of Alef
    text = re.sub(r'[إأآا]', 'ا', text)
    # Normalize Yeh
    text = re.sub(r'[يى]', 'ي', text)
    # Normalize Teh Marbuta
    text = re.sub(r'ة', 'ه', text)
    # Normalize Hamza
    text = re.sub(r'ؤ', 'و', text)
    text = re.sub(r'ئ', 'ي', text)
    return text


# Test the function
text = "هذتيتيتا مثال على تنظيف التغريدات!!! Visit https://example.com #مثال @user"
cleaned_text = clean_tweet(text)
print(cleaned_text)


train_data['cleaned_text'] = train_data['tweet'].apply(clean_tweet)
test_data['cleaned_text'] = test_data['tweet'].apply(clean_tweet)
train_data.head()


هذتيتيتا مثال علي تنظيف التغريدات


Unnamed: 0,tweet,label,cleaned_text
0,اغلاق_المحلات_9_مساءمو كافي تقفل كل شوي عشان ا...,0,اغلاق المحلات مساءمو كافي تقفل كل شوي عشان الص...
1,إغلاق_المحلات_9_مساء لكل قرار سلبيات وإيجابيات...,0,اغلاق المحلات مساء لكل قرار سلبيات وايجابيات ا...
2,اغلاق_المحلات_9_مساء المولات بالذات قد تكون ال...,0,اغلاق المحلات مساء المولات بالذات قد تكون المت...
3,اغلاق_المحلات_9_مساء طيب متى يفتح المحل السادس...,0,اغلاق المحلات مساء طيب متي يفتح المحل السادسه ...
4,اغلاق_المحلات_9_مساء وبما أن الجو اليوم حلو با...,0,اغلاق المحلات مساء وبما ان الجو اليوم حلو بالس...


In [None]:

X = list(train_data['cleaned_text'])
y = list(train_data['label'])
print(f"train data size: {len(X)}")
print(f"test data size: {len(test_data)}")



train data size: 15751
test data size: 151


In [None]:
train_data['cleaned_text'].iloc[0], train_data['tweet'].iloc[0]


('اغلاق المحلات مساءمو كافي تقفل كل شوي عشان الصلاه متي يتسوقون الناس اجل',
 'اغلاق_المحلات_9_مساءمو كافي تقفل كل شوي عشان الصلاة متى يتسوقون الناس اجل?')

In [None]:

from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
bert = AutoModel.from_pretrained(MODEL_NAME)


bert.config.hidden_size

tokenizer_config.json:   0%|          | 0.00/86.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/841 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

768

In [None]:
# Bulid your Bidirectional LSTM with LLM embedding here
# 1) build datasets (train and test)
# 2) tokenize data
# 3) build the model
# 4) train the model
# 5) evaluate the model

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
MODEL_NAME = "CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment"


config = {
    'model_name': MODEL_NAME,
    # Since tweets are short texts, we limited the maximum sequence length to 128 tokens to reduce computational cost without affecting performance.
    'max_len': 128,
    'batch_size': 16,
    'num_epochs': 10,
    'learning_rate': 2e-5,
    'weight_decay': 1e-2,
    'hidden_size': 128,
    'num_layers': 2,
    'dropout': 0.3,
    'num_classes': 3

}


class SentimentDataset(Dataset):

    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_len,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': torch.tensor(label, dtype=torch.long)
        }






seed()


train_size = int(0.8 * len(train_data))


val_size = len(train_data) - train_size

train, val = train_test_split(
    train_data, test_size=val_size, random_state=42)


print(f"train size: {len(train)}")
print(f"val size: {len(val)}")
print(f"test size: {len(test_data)}")


train_dataset = SentimentDataset(
    texts=list(train['cleaned_text']),
    labels=list(train['label']),
    tokenizer=tokenizer,
    max_len=config['max_len']
)


val_dataset = SentimentDataset(
    texts=list(val['cleaned_text']),
    labels=list(val['label']),
    tokenizer=tokenizer,
    max_len=config['max_len']
)



test_dataset = SentimentDataset(
    texts=list(test_data['cleaned_text']),
    labels=list(test_data['label']),
    tokenizer=tokenizer,
    max_len=config['max_len']
)


train_loader = DataLoader(
    train_dataset,
    batch_size=config['batch_size'],
    shuffle=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=config['batch_size'],
    shuffle=False,
    drop_last=False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=config['batch_size'],
    shuffle=False,
    drop_last=False
)



print(f"Train Loader size: {len(train_loader)}")
print(f"Val Loader size: {len(val_loader)}")
print(f"Test Loader size: {len(test_loader)}")



test_batch = next(iter(train_loader))
test_batch
print(len(train_data))
print("Keys in batch:", test_batch.keys())


print("input_ids shape:", test_batch['input_ids'].shape)
print("attention_mask shape:", test_batch['attention_mask'].shape)
print("labels shape:", test_batch['labels'].shape)


decoded_text = tokenizer.decode(
    test_batch['input_ids'][0], skip_special_tokens=True)
print("Decoded text example:", decoded_text)

print("Label:", test_batch['labels'][0].item())
print("Label:", mapping(test_batch['labels'][0].item()))


train size: 12600
val size: 3151
test size: 151
Train Loader size: 788
Val Loader size: 197
Test Loader size: 10
15751
Keys in batch: dict_keys(['input_ids', 'attention_mask', 'labels'])
input_ids shape: torch.Size([16, 128])
attention_mask shape: torch.Size([16, 128])
labels shape: torch.Size([16])
Decoded text example: حمله دربك خضر توصي باستخدام رمز الاستجابه السريع لتاكد من صحه بطاقه الاطارات
Label: 2
Label: positive


In [None]:

lengths = train_data['cleaned_text'].apply(
    lambda x: len(tokenizer.tokenize(x))
)

lengths.describe(percentiles=[0.9, 0.95, 0.99])


Unnamed: 0,cleaned_text
count,15751.0
mean,18.167545
std,8.635155
min,1.0
50%,17.0
90%,31.0
95%,33.0
99%,37.0
max,45.0


In [None]:
print(len(train_loader))
print(len(val_loader))
print(len(test_loader))

788
197
10


In [None]:
seed()
loss_fn = nn.CrossEntropyLoss()

test_batch = next(iter(train_loader))
input_ids = test_batch['input_ids'].to(DEVICE)
attention_mask = test_batch['attention_mask'].to(DEVICE)
labels = test_batch['labels'].to(DEVICE)

bert = bert.to(DEVICE)
for params in bert.parameters():
    params.requires_grad = False

bert_hidden_size = bert.config.hidden_size

lstm = nn.LSTM(
    input_size=bert_hidden_size,
    hidden_size=config['hidden_size'],
    num_layers=config['num_layers'],
    batch_first=True,
    dropout=config['dropout'],
    bidirectional=True).to(DEVICE)


attention = nn.Linear(config['hidden_size'] * 2, 1).to(DEVICE)


norm = nn.LayerNorm(config['hidden_size'] * 2).to(DEVICE)
fc = nn.Linear(config['hidden_size'] * 2, config['num_classes']).to(DEVICE)

dropout1 = nn.Dropout(config['dropout']).to(DEVICE)
dropout2 = nn.Dropout(config['dropout']).to(DEVICE)





out = bert(input_ids=input_ids, attention_mask=attention_mask)   # [batch_size, seq_len, bert_hidden_size]

lstm_out, _ = lstm(out.last_hidden_state) # [batch_size, seq_len, hidden_size*2]
lstm_out_norm = norm(lstm_out)
lstm_out = dropout1(lstm_out_norm)
print(lstm_out.shape)
attention_scores = attention(lstm_out) # [batch_size, seq_len, 1]

# Convert scores to probabilities using softmax
attention_weights = torch.softmax(attention_scores, dim=1)  # [batch_size, seq_len, 1]
attention_weights.shape

# Element-wise multiplication (broadcasting)
weighted_outputs = attention_weights * lstm_out # [batch_size, seq_len, hidden_size*2]

# Sum across sequence dimension to get context vector
context_vector = torch.sum(weighted_outputs, dim=1) # [batch_size, hidden_size*2]

context_vector = norm(context_vector)
context_vector = dropout2(context_vector)


logits = fc(context_vector) # [batch_size, num_classes]


loss  = loss_fn(logits, labels)
print(loss)


torch.Size([16, 128, 256])
tensor(0.9712, device='cuda:0', grad_fn=<NllLossBackward0>)


In [None]:

import numpy as np

import torch
import torch.nn as nn

from transformers import AutoModel

from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from tqdm import tqdm
from torch.optim import AdamW
from sklearn.utils.class_weight import compute_class_weight


class BiLSTMWithLLM(nn.Module):
    def __init__(self, model_name, hidden_size, num_layers, dropout, num_classes, freeze_bert_layers=6):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        # here we frezzing the layers
        for param in self.bert.parameters():
            param.requires_grad = False


        bert_hidden_size = self.bert.config.hidden_size

        self.lstm = nn.LSTM(
            input_size=bert_hidden_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            bidirectional=True,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )

        self.norm = nn.LayerNorm(hidden_size * 2)
        self.attention = nn.Linear(hidden_size * 2, 1)
        self.fc = nn.Linear(hidden_size * 2, num_classes)

        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)


    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) # [batch_size, seq_len, bert_hidden_size]

        sequence_output = outputs.last_hidden_state
        # LSTM
        lstm_out, _ = self.lstm(sequence_output)          # [batch_size, seq_len, hidden_size*2]
        lstm_out = self.norm(lstm_out)
        lstm_out = self.dropout1(lstm_out)



        # Attention mechanism
        attention_scores = self.attention(lstm_out)                 # [batch_size, seq_len, 1]
        attention_weights = torch.softmax(attention_scores, dim=1)  # [batch_size, seq_len, 1]
        weighted_outputs = attention_weights * lstm_out             # [batch_size, seq_len, hidden_size*2]

        context_vector = torch.sum(weighted_outputs, dim=1)         # [batch_size, hidden_size*2]

        # Classification
        context = self.norm(context_vector)
        context_vector = self.dropout2(context_vector)
        logits = self.fc(context_vector)

        return logits


def train_one_epoch(model, train_loader, val_loader, optimizer, criterion, device):

    model.train()
    total_train_loss = 0
    train_loop = tqdm(train_loader, desc="Train", leave=True)

    for batch in train_loop:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        logits = model(input_ids, attention_mask)
        loss = criterion(logits, labels)

        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        train_loop.set_postfix(train_loss=f"{loss.item():.4f}")

    avg_train_loss = total_train_loss / len(train_loader)


    model.eval()
    total_val_loss = 0
    val_preds = []
    val_labels = []
    val_loop = tqdm(val_loader, desc="Val", leave=True)

    with torch.no_grad():
        for batch in val_loop:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            logits = model(input_ids, attention_mask)
            loss = criterion(logits, labels)

            total_val_loss += loss.item()
            _, preds = torch.max(logits, dim=1)

            val_preds.extend(preds.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

            batch_acc = (preds == labels).float().mean().item()
            val_loop.set_postfix(val_loss=f"{loss.item():.4f}", batch_acc=f"{batch_acc:.4f}")

    avg_val_loss = total_val_loss / len(val_loader)
    val_acc = accuracy_score(val_labels, val_preds)
    val_f1 = f1_score(val_labels, val_preds, average='weighted')

    return avg_train_loss, avg_val_loss, val_acc, val_f1



def evaluate(model, dataloader):
    model.eval()
    predictions = []
    true_labels = []

    loop = tqdm(dataloader, desc="Evaluating", leave=True)

    with torch.no_grad():
        for batch in loop:
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            labels = batch['labels'].to(DEVICE)
            logits = model(input_ids, attention_mask)
            _, preds = torch.max(logits, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
            batch_acc = (preds == labels).float().mean().item()
            loop.set_postfix(batch_accuracy=f"{batch_acc:.4f}")


    accuracy = accuracy_score(true_labels, predictions)

    f1 = f1_score(true_labels, predictions, average='weighted')
    cm = confusion_matrix(true_labels, predictions)
    return accuracy, f1, cm




seed()


config = {
    'model_name': MODEL_NAME,
    'max_len': 128,
    'batch_size': 16,
    'num_epochs': 4,
    'learning_rate': 2e-5,
    'weight_decay': 1e-2,
    'hidden_size': 128,
    'num_layers': 2,
    'dropout': 0.3,
    'num_classes': 3

}

train_size = int(0.8 * len(train_data))


val_size = len(train_data) - train_size

train, val = train_test_split(
    train_data, test_size=val_size, random_state=42)


print(f"train size: {len(train)}")
print(f"val size: {len(val)}")
print(f"test size: {len(test_data)}")


train_dataset = SentimentDataset(
    texts=list(train['cleaned_text']),
    labels=list(train['label']),
    tokenizer=tokenizer,
    max_len=config['max_len']
)


val_dataset = SentimentDataset(
    texts=list(val['cleaned_text']),
    labels=list(val['label']),
    tokenizer=tokenizer,
    max_len=config['max_len']
)



test_dataset = SentimentDataset(
    texts=list(test_data['cleaned_text']),
    labels=list(test_data['label']),
    tokenizer=tokenizer,
    max_len=config['max_len']
)


train_loader = DataLoader(
    train_dataset,
    batch_size=config['batch_size'],
    shuffle=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=config['batch_size'],
    shuffle=False,
    drop_last=False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=config['batch_size'],
    shuffle=False,
    drop_last=False
)



print(f"Train Loader size: {len(train_loader)}")
print(f"Val Loader size: {len(val_loader)}")
print(f"Test Loader size: {len(test_loader)}\n")


model = BiLSTMWithLLM(
    model_name=config['model_name'],
    hidden_size=config['hidden_size'],
    num_layers=config['num_layers'],
    dropout=config['dropout'],
    num_classes=config['num_classes']
).to(DEVICE)





criterion = nn.CrossEntropyLoss()




optimizer = AdamW(
    model.parameters(),
    lr=config['learning_rate'],
    weight_decay=config['weight_decay']
)



best_accuarcy = 0.0


for epoch in range(config['num_epochs']):
    train_loss, val_loss, val_acc, val_f1 = train_one_epoch(
        model, train_loader, val_loader, optimizer, criterion, DEVICE
    )

    print(f"Epoch {epoch+1}")
    print(f"Train Loss: {train_loss:.4f}")
    print(f"Val Loss:   {val_loss:.4f}")
    print(f"Val Acc:    {val_acc:.4f}")
    print(f"Val F1:     {val_f1:.4f}")
    print("-" * 50)
    print()


    test_accuracy, test_f1, test_cm = evaluate(model, test_loader)
    if best_accuarcy < test_accuracy:
        best_accuarcy = test_accuracy
        torch.save(model.state_dict(), 'best_model.pth')
        print(f"Saving best model weights")


    print("TEST RESULTS")
    print("-" * 50)
    print(f"TEST Accuracy: {test_accuracy:.4f}")
    print(f"TEST F1 Score: {test_f1:.4f}")
    print("TEST Confusion Matrix:")
    print(test_cm)
    print("-" * 50)
    print()



train size: 12600
val size: 3151
test size: 151
Train Loader size: 1575
Val Loader size: 394
Test Loader size: 19



Train: 100%|██████████| 1575/1575 [01:39<00:00, 15.81it/s, train_loss=0.2730]
Val: 100%|██████████| 394/394 [00:23<00:00, 16.94it/s, batch_acc=1.0000, val_loss=0.3232]


Epoch 1
Train Loss: 0.4781
Val Loss:   0.3682
Val Acc:    0.8600
Val F1:     0.8603
--------------------------------------------------



Evaluating: 100%|██████████| 19/19 [00:01<00:00, 17.24it/s, batch_accuracy=0.8571]


Saving best model weights
TEST RESULTS
--------------------------------------------------
TEST Accuracy: 0.8146
TEST F1 Score: 0.8222
TEST Confusion Matrix:
[[63  4 15]
 [ 2 34  4]
 [ 2  1 26]]
--------------------------------------------------



Train: 100%|██████████| 1575/1575 [01:39<00:00, 15.81it/s, train_loss=0.3104]
Val: 100%|██████████| 394/394 [00:23<00:00, 16.95it/s, batch_acc=1.0000, val_loss=0.2343]


Epoch 2
Train Loss: 0.3510
Val Loss:   0.3301
Val Acc:    0.8765
Val F1:     0.8768
--------------------------------------------------



Evaluating: 100%|██████████| 19/19 [00:01<00:00, 17.02it/s, batch_accuracy=1.0000]


Saving best model weights
TEST RESULTS
--------------------------------------------------
TEST Accuracy: 0.8278
TEST F1 Score: 0.8325
TEST Confusion Matrix:
[[69  6  7]
 [ 0 34  6]
 [ 4  3 22]]
--------------------------------------------------



Train: 100%|██████████| 1575/1575 [01:40<00:00, 15.73it/s, train_loss=0.0628]
Val: 100%|██████████| 394/394 [00:24<00:00, 16.34it/s, batch_acc=1.0000, val_loss=0.2248]


Epoch 3
Train Loss: 0.3107
Val Loss:   0.3138
Val Acc:    0.8785
Val F1:     0.8787
--------------------------------------------------



Evaluating: 100%|██████████| 19/19 [00:01<00:00, 15.77it/s, batch_accuracy=1.0000]


TEST RESULTS
--------------------------------------------------
TEST Accuracy: 0.8079
TEST F1 Score: 0.8147
TEST Confusion Matrix:
[[66  6 10]
 [ 0 34  6]
 [ 4  3 22]]
--------------------------------------------------



Train: 100%|██████████| 1575/1575 [01:47<00:00, 14.59it/s, train_loss=0.0862]
Val: 100%|██████████| 394/394 [00:23<00:00, 17.06it/s, batch_acc=1.0000, val_loss=0.2311]


Epoch 4
Train Loss: 0.2772
Val Loss:   0.3024
Val Acc:    0.8889
Val F1:     0.8891
--------------------------------------------------



Evaluating: 100%|██████████| 19/19 [00:01<00:00, 17.27it/s, batch_accuracy=1.0000]

TEST RESULTS
--------------------------------------------------
TEST Accuracy: 0.7881
TEST F1 Score: 0.7954
TEST Confusion Matrix:
[[63  8 11]
 [ 0 34  6]
 [ 4  3 22]]
--------------------------------------------------






In [None]:
best_accuarcy

0.8278145695364238

In [None]:
model = BiLSTMWithLLM(
    model_name=config['model_name'],
    hidden_size=config['hidden_size'],
    num_layers=config['num_layers'],
    dropout=config['dropout'],
    num_classes=config['num_classes']
).to(DEVICE)



model.load_state_dict(torch.load('best_model.pth'))


model.eval()
predictions = []
true_labels = []

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(DEVICE)
        attention_mask = batch['attention_mask'].to(DEVICE)
        labels = batch['labels'].to(DEVICE)
        logits = model(input_ids, attention_mask)
        _, preds = torch.max(logits, dim=1)
        predictions.extend(preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())
        batch_acc = (preds == labels).float().mean().item()

    accuracy = accuracy_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions, average='weighted')
    cm = confusion_matrix(true_labels, predictions)
    print("TEST RESULTS")
    print("-" * 50)
    print(f"TEST Accuracy: {accuracy:.4f}")
    print(f"TEST F1 Score: {f1:.4f}")
    print("TEST Confusion Matrix:")
    print(cm)
    print("-" * 50)
    print()



TEST RESULTS
--------------------------------------------------
TEST Accuracy: 0.8278
TEST F1 Score: 0.8325
TEST Confusion Matrix:
[[69  6  7]
 [ 0 34  6]
 [ 4  3 22]]
--------------------------------------------------



## Part (2) LLM for Arabic SA

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader


In [None]:
import re
## Data preprocessing function
## In this function you should clean the Arabic text and normlize it
## remove repeated letter, english letter, punctuation, etc.
## Input: unclean tweet
## Output: clean tweet

def clean_tweet(tweet):
    tweet = re.sub(r'[a-zA-Z0-9]+', '',tweet,flags=re.MULTILINE) #remove english letters
    '''
    write your code here
    '''
    return tweet


In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

MODEL_NAME = "CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment"


In [None]:
## Bulid your Bidirectional LSTM with LLM embedding here
## 1) build datasets (train and test)
## 2) call tokenizer and pre-trained model
## 3) prepare trainer
## 4) train the model (call trainer)
## 5) evaluate the model


# datasets
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

train_dataset = SentimentDataset(train_data['cleaned_text'], train_data['label'], tokenizer)
test_dataset =  SentimentDataset(test_data['cleaned_text'], test_data['label'], tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


def train_epoch(model, dataloader):
    model.train()
    total_loss = 0
    loop = tqdm(dataloader, desc="Train", leave=True)

    for batch in loop:
        input_ids = batch['input_ids'].to(DEVICE)
        attention_mask = batch['attention_mask'].to(DEVICE)
        labels = batch['labels'].to(DEVICE)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        logits = outputs.logits
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        loop.set_postfix(loss=f"{loss.item():.4f}")

    return total_loss / len(dataloader)


def evaluate(model, dataloader):
    model.eval()
    predictions = []
    true_labels = []

    loop = tqdm(dataloader, desc="Evaluating", leave=True)

    with torch.no_grad():
        for batch in loop:
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            labels = batch['labels'].to(DEVICE)
            outputs = model(input_ids, attention_mask)
            logits = outputs.logits
            _, preds = torch.max(logits, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
            batch_acc = (preds == labels).float().mean().item()
            loop.set_postfix(batch_accuracy=f"{batch_acc:.4f}")
    accuracy = accuracy_score(true_labels, predictions)

    f1 = f1_score(true_labels, predictions, average='weighted')
    cm = confusion_matrix(true_labels, predictions)
    return accuracy, f1, cm


model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=3
).to(DEVICE)


criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay'])

for epoch in range(config['num_epochs']):
    loss = train_epoch(model, train_loader)
    print(f"Epoch {epoch+1}/{config['num_epochs']} - Loss: {loss:.4f}")
    print("-" * 50)
    accuracy, f1, cm = evaluate(model, test_loader)
    print(f"Epoch {epoch+1}/{config['num_epochs']} - Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}")
    print("Confusion Matrix:")
    print(cm)
    print("-" * 50)




Train: 100%|██████████| 985/985 [05:50<00:00,  2.81it/s, loss=0.0405]


Epoch 1/10 - Loss: 0.3063
--------------------------------------------------


Evaluating: 100%|██████████| 10/10 [00:01<00:00,  8.46it/s, batch_accuracy=1.0000]


Epoch 1/10 - Accuracy: 0.8146, F1 Score: 0.8222
Confusion Matrix:
[[62  5 15]
 [ 0 36  4]
 [ 3  1 25]]
--------------------------------------------------


Train: 100%|██████████| 985/985 [05:44<00:00,  2.86it/s, loss=0.0080]


Epoch 2/10 - Loss: 0.1203
--------------------------------------------------


Evaluating: 100%|██████████| 10/10 [00:01<00:00,  8.91it/s, batch_accuracy=1.0000]


Epoch 2/10 - Accuracy: 0.7682, F1 Score: 0.7789
Confusion Matrix:
[[56  7 19]
 [ 0 34  6]
 [ 2  1 26]]
--------------------------------------------------


Train: 100%|██████████| 985/985 [05:46<00:00,  2.84it/s, loss=0.0039]


Epoch 3/10 - Loss: 0.0512
--------------------------------------------------


Evaluating: 100%|██████████| 10/10 [00:01<00:00,  8.89it/s, batch_accuracy=1.0000]


Epoch 3/10 - Accuracy: 0.7682, F1 Score: 0.7733
Confusion Matrix:
[[59 16  7]
 [ 0 35  5]
 [ 4  3 22]]
--------------------------------------------------


Train: 100%|██████████| 985/985 [05:43<00:00,  2.87it/s, loss=0.0037]


Epoch 4/10 - Loss: 0.0282
--------------------------------------------------


Evaluating: 100%|██████████| 10/10 [00:01<00:00,  8.97it/s, batch_accuracy=1.0000]


Epoch 4/10 - Accuracy: 0.7351, F1 Score: 0.7393
Confusion Matrix:
[[50 14 18]
 [ 0 35  5]
 [ 2  1 26]]
--------------------------------------------------


Train: 100%|██████████| 985/985 [05:43<00:00,  2.87it/s, loss=0.0008]


Epoch 5/10 - Loss: 0.0224
--------------------------------------------------


Evaluating: 100%|██████████| 10/10 [00:01<00:00,  8.92it/s, batch_accuracy=1.0000]


Epoch 5/10 - Accuracy: 0.7881, F1 Score: 0.7965
Confusion Matrix:
[[59  8 15]
 [ 0 34  6]
 [ 2  1 26]]
--------------------------------------------------


Train: 100%|██████████| 985/985 [05:43<00:00,  2.87it/s, loss=0.0018]


Epoch 6/10 - Loss: 0.0202
--------------------------------------------------


Evaluating: 100%|██████████| 10/10 [00:01<00:00,  8.66it/s, batch_accuracy=1.0000]


Epoch 6/10 - Accuracy: 0.6954, F1 Score: 0.6935
Confusion Matrix:
[[44 23 15]
 [ 0 37  3]
 [ 2  3 24]]
--------------------------------------------------


Train: 100%|██████████| 985/985 [05:43<00:00,  2.87it/s, loss=0.0017]


Epoch 7/10 - Loss: 0.0160
--------------------------------------------------


Evaluating: 100%|██████████| 10/10 [00:01<00:00,  8.96it/s, batch_accuracy=0.8571]


Epoch 7/10 - Accuracy: 0.6689, F1 Score: 0.6827
Confusion Matrix:
[[41  6 35]
 [ 0 34  6]
 [ 2  1 26]]
--------------------------------------------------


Train: 100%|██████████| 985/985 [05:43<00:00,  2.87it/s, loss=0.0006]


Epoch 8/10 - Loss: 0.0166
--------------------------------------------------


Evaluating: 100%|██████████| 10/10 [00:01<00:00,  8.79it/s, batch_accuracy=0.8571]


Epoch 8/10 - Accuracy: 0.7086, F1 Score: 0.7200
Confusion Matrix:
[[52  7 23]
 [ 4 30  6]
 [ 3  1 25]]
--------------------------------------------------


Train: 100%|██████████| 985/985 [05:43<00:00,  2.87it/s, loss=0.0002]


Epoch 9/10 - Loss: 0.0144
--------------------------------------------------


Evaluating: 100%|██████████| 10/10 [00:01<00:00,  8.99it/s, batch_accuracy=0.8571]


Epoch 9/10 - Accuracy: 0.7219, F1 Score: 0.7252
Confusion Matrix:
[[61  7 14]
 [10 25  5]
 [ 5  1 23]]
--------------------------------------------------


Train: 100%|██████████| 985/985 [05:43<00:00,  2.87it/s, loss=0.1168]


Epoch 10/10 - Loss: 0.0103
--------------------------------------------------


Evaluating: 100%|██████████| 10/10 [00:01<00:00,  8.90it/s, batch_accuracy=0.8571]

Epoch 10/10 - Accuracy: 0.7483, F1 Score: 0.7602
Confusion Matrix:
[[53  7 22]
 [ 0 34  6]
 [ 2  1 26]]
--------------------------------------------------



