### Initial Implementation

##### Imports

In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, accuracy_score
from datasets import load_dataset

##### Load Dataset

In [10]:
data = load_dataset("shainar/BEAD", "Full_Annotations", split="full")

In [12]:
print(data)

Dataset({
    features: ['text', 'dimension', 'biased_words', 'aspect', 'label', 'sentiment', 'toxic', 'identity_mention'],
    num_rows: 3674927
})


In [13]:
df = data.to_pandas()
print(df.head())

                                                text             dimension  \
0  Where will this leave the kids??? That my frie...  Ageism and sentiment   
1   I had a feeling of impending doom &amp; then ...  Ageism and sentiment   
2                                 I no feel good ..   Ageism and sentiment   
3  Back in Hotlanta for a about a week, then off ...  Ageism and sentiment   
4  Kids toys &amp; stamping stuff cleaned up... N...  Ageism and sentiment   

            biased_words          aspect            label sentiment  toxic  \
0                     []     Transphobia  Slightly Biased  Negative    0.0   
1  ['impending', 'doom']  Climate change    Highly Biased  Negative    1.0   
2                     []    Body-shaming          Neutral  Positive    0.0   
3                     []       Lifestyle  Slightly Biased  Negative    0.0   
4                     []           Hobby          Neutral   Neutral    1.0   

  identity_mention  
0               NO  
1               NO  

##### Text Pre-processing

In [14]:
df['text'] = df['text'].astype(str)
df['text'] = df['text'].fillna('')

##### One-Hot Encoding

In [15]:
label_encoder = LabelEncoder()
df['label_encoded'] = label_encoder.fit_transform(df['label'])
num_classes = len(label_encoder.classes_)
y = to_categorical(df['label_encoded'], num_classes=num_classes)

##### Tokenisation & Padding

In [11]:
MAX_VOCAB = 10000
MAX_LEN = 100

tokenizer = Tokenizer(num_words=MAX_VOCAB, oov_token="<OOV>")
tokenizer.fit_on_texts(df['text'])

X = tokenizer.texts_to_sequences(df['text'])
X = pad_sequences(X, maxlen=MAX_LEN, padding='post', truncating='post')

##### Train-Test Split

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

##### Model Architecture

In [None]:
embedding_dim = 128

model = Sequential([
    Embedding(input_dim=MAX_VOCAB, output_dim=embedding_dim, input_length=MAX_LEN),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

model.build(input_shape=(None, MAX_LEN))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()



##### Model Training

In [14]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=5,
    batch_size=32,
    verbose=1
)

Epoch 1/5
[1m91874/91874[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1368s[0m 15ms/step - accuracy: 0.8021 - loss: 0.4893 - val_accuracy: 0.8496 - val_loss: 0.3836
Epoch 2/5
[1m91874/91874[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1389s[0m 15ms/step - accuracy: 0.8493 - loss: 0.3922 - val_accuracy: 0.8534 - val_loss: 0.3762
Epoch 3/5
[1m91874/91874[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1398s[0m 15ms/step - accuracy: 0.8544 - loss: 0.3795 - val_accuracy: 0.8548 - val_loss: 0.3726
Epoch 4/5
[1m91874/91874[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1459s[0m 15ms/step - accuracy: 0.8571 - loss: 0.3727 - val_accuracy: 0.8542 - val_loss: 0.3784
Epoch 5/5
[1m91874/91874[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1404s[0m 15ms/step - accuracy: 0.8593 - loss: 0.3677 - val_accuracy: 0.8549 - val_loss: 0.3715


##### Model Evaluation

In [15]:
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

print("Accuracy:", accuracy_score(y_true, y_pred))
print("\nClassification Report:\n", classification_report(y_true, y_pred, target_names=label_encoder.classes_))

[1m22969/22969[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 5ms/step
Accuracy: 0.8549305701060973

Classification Report:
                  precision    recall  f1-score   support

  Highly Biased       0.83      0.80      0.82    149484
        Neutral       0.91      0.94      0.92    377251
Slightly Biased       0.77      0.74      0.76    208251

       accuracy                           0.85    734986
      macro avg       0.84      0.83      0.83    734986
   weighted avg       0.85      0.85      0.85    734986

