#Machine Learning shared methods

In [None]:
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC # Import Support Vector Classifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier # Import Gradient Boosting and Random Forest Classifiers
from sklearn.linear_model import LogisticRegression # Import Logistic Regression
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier

def clean_text(text):
    # URL Removal
    # This pattern targets common URL formats (http(s)://, www., etc.)
    url_pattern = r'https?://\S+|www\.\S+|\S+\.(com|net|org|gov|edu|info|biz|co|me|io|ly|tv|pro|tel|aero|mobi|asia|int|mil|museum|name|jobs|travel|xyz|tk|cc|ws|fm|am|eu|us|ru|cn|jp|de|fr|uk|au|ca|it|es|nl|se|no|dk|fi|pl|cz|hu|ie|sg|hk|my|th|ph|vn|id|sa|ae|kw|qa|bh|om|eg|ma|dz|tn|sd|sy|iq|lb|jo|ye)\b'
    text = re.sub(url_pattern, ' ', text)
    # Diacritics (Tashkeel) Removal
    dia_pattren = r'[\u064B-\u0652\u06D6-\u06DC\u06DF-\u06E8\u06EA-\u06ED]'
    text = re.sub(dia_pattren, ' ', text)
    # Punctuation and Non-Arabic Characters Removal
    text = re.sub(r'[^\u0621-\u064A\u0660-\u0669\s]+', ' ', text)
    # Repeats Reduction, as they can indecate emotions intensity
    text = re.sub(r'(.)\1+', r'\1\1', text)
    # Trim Extra Spaces
    text = ' '.join(text.split())
    return text


def vectorize_text(corpus, vectorizer=None, ngram_range=(1, 4)):
    # Convert tokens into numeric features (TF-IDF, embeddings, etc.)
    if vectorizer is None:
        vectorizer = CountVectorizer(ngram_range=ngram_range)
        vectorized_data = vectorizer.fit_transform(corpus)
        return vectorizer, vectorized_data
    else:
        vectorized_data = vectorizer.transform(corpus)
        return vectorized_data

def train_model(X_train, y_train, model_type='naive_bayes'):
    if model_type == 'naive_bayes':
        model = MultinomialNB()
    elif model_type == 'svm':
        model = SVC()
    elif model_type == 'gradient_boosting':
        model = GradientBoostingClassifier(random_state=42)
    elif model_type in ['logistic_regression','logistic']:
        model = LogisticRegression(random_state=42, solver='liblinear') # Added Logistic Regression
    elif model_type == 'decision_tree':
        model = DecisionTreeClassifier(random_state=42) # Added Decision Tree Classifier
    elif model_type == 'random_forest':
        model = RandomForestClassifier(random_state=42) # Added Random Forest Classifier
    else:
        # Defaulting to a simple model or raising an error for unknown types
        raise ValueError(f"Unsupported model type: {model_type}")

    model.fit(X_train, y_train)
    return model

def evaluate_model(model, X_test, y_test):
    # Evaluate performance metrics like accuracy and F1-score
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return accuracy, report

def predict_text(model, vectorizer, text):
    # Run a single text prediction through the whole pipeline
    cleaned = clean_text(text)
    # Tokenization is now handled by CountVectorizer internally
    vectorized = vectorizer.transform([cleaned])
    return model.predict(vectorized)



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
dataset_path='sports_tweets.csv'
df = pd.read_csv('sports_tweets.csv')
df['cleaned_text'] = df['tweet'].apply(clean_text)
model_types=['naive_bayes','svm','gradient_boosting','logistic_regression','decision_tree','random_forest']
ngram_ranges=[(1,1),(1,2),(1,3),(1,4)]

#This is the parameters to setup

In [None]:

ngram_range = (1, 1) # use (1,n) for ngram setup, currently using unigram(1-gram)
model_type = 'naive_bayes'
# Vectorize cleaned text with the specified n-gram range
vectorizer, X_vectorized = vectorize_text(df['cleaned_text'], ngram_range=ngram_range)
y = df['label']

# Split data into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)



#This runs and trains the ML Models

In [None]:

#we will temporarily use the naive_bayes
model = train_model(X_train, y_train, model_type=model_type)

# Evaluate the model
accuracy, report = evaluate_model(model, X_test, y_test)

print(f"Model Accuracy on {dataset_path} ({model_types[0]}) with {ngram_ranges[0]}-grams: {accuracy:.4f}")
print(f"\nClassification Report on {dataset_path} ({model_types[0]}) with {ngram_ranges[0]}-grams:")
print(report)

Model Accuracy on sports_tweets.csv (naive_bayes) with (1, 1)-grams: 0.9526

Classification Report on sports_tweets.csv (naive_bayes) with (1, 1)-grams:
              precision    recall  f1-score   support

        hate       0.95      0.97      0.96       266
         not       0.96      0.93      0.94       198

    accuracy                           0.95       464
   macro avg       0.95      0.95      0.95       464
weighted avg       0.95      0.95      0.95       464



In [None]:
all_results=list()
# Iterate through each model type
for model_type in model_types:
    # Iterate through each n-gram range
    for ngram_range in ngram_ranges:
        print(f"\n--- Evaluating Model: {model_type} with N-gram Range: {ngram_range} ---")

        # 2. Vectorize cleaned text with the current n-gram range
        # A new vectorizer is created for each ngram_range to ensure correct feature generation
        vectorizer, X_vectorized = vectorize_text(df['cleaned_text'], ngram_range=ngram_range)

        # 3. Split data into training and testing sets (80-20 split)
        X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

        # 4. Train the current model
        model = train_model(X_train, y_train, model_type=model_type)

        # 5. Evaluate the model
        accuracy, report = evaluate_model(model, X_test, y_test)

        # Store the results
        all_results.append({
            'model_type': model_type,
            'ngram_range': ngram_range,
            'accuracy': accuracy,
            'classification_report': report
        })

        print(f"Model Accuracy: {accuracy:.4f}")
        print("Classification Report:")
        print(report)

print("\n" + "="*50)
print("Comprehensive Evaluation Results Summary")
print("="*50)

# Print summary of all results
for result in all_results:
    print(f"\nModel: {result['model_type']}, N-gram Range: {result['ngram_range']}")
    print(f"Accuracy: {result['accuracy']:.4f}")
    print("Classification Report:")
    print(result['classification_report'])


--- Evaluating Model: naive_bayes with N-gram Range: (1, 1) ---
Model Accuracy: 0.9526
Classification Report:
              precision    recall  f1-score   support

        hate       0.95      0.97      0.96       266
         not       0.96      0.93      0.94       198

    accuracy                           0.95       464
   macro avg       0.95      0.95      0.95       464
weighted avg       0.95      0.95      0.95       464


--- Evaluating Model: naive_bayes with N-gram Range: (1, 2) ---
Model Accuracy: 0.9483
Classification Report:
              precision    recall  f1-score   support

        hate       0.94      0.98      0.96       266
         not       0.97      0.91      0.94       198

    accuracy                           0.95       464
   macro avg       0.95      0.94      0.95       464
weighted avg       0.95      0.95      0.95       464


--- Evaluating Model: naive_bayes with N-gram Range: (1, 3) ---
Model Accuracy: 0.9504
Classification Report:
             

In [None]:
best_accuracy = -1
best_result = None

for result in all_results:
    if result['accuracy'] > best_accuracy:
        best_accuracy = result['accuracy']
        best_result = result

print("\n" + "="*50)
print("Best Performing Model and N-gram Combination")
print("="*50)

if best_result:
    print(f"Model Type: {best_result['model_type']}")
    print(f"N-gram Range: {best_result['ngram_range']}")
    print(f"Accuracy: {best_result['accuracy']:.4f}")
    print("Classification Report:")
    print(best_result['classification_report'])
else:
    print("No results found to determine the best model.")


Best Performing Model and N-gram Combination
Model Type: naive_bayes
N-gram Range: (1, 1)
Accuracy: 0.9526
Classification Report:
              precision    recall  f1-score   support

        hate       0.95      0.97      0.96       266
         not       0.96      0.93      0.94       198

    accuracy                           0.95       464
   macro avg       0.95      0.95      0.95       464
weighted avg       0.95      0.95      0.95       464



#- Here are the Deep Learning

# Task
Install `transformers`, `datasets`, and `accelerate` libraries, and import them in the current environment.

## Install and Import Libraries

### Subtask:
Install necessary Python libraries for Hugging Face Transformers, Datasets, and Accelerate, and import them.


**Reasoning**:
To complete the subtask, I need to install the specified libraries using pip and then import them in a Python code block.



In [None]:
!pip install transformers datasets accelerate
import transformers
import datasets
import accelerate



## Authenticate Hugging Face

### Subtask:
Authenticate to Hugging Face using the 'HF_TOKEN' Colab secret for model uploads and downloads.


**Reasoning**:
To authenticate to Hugging Face, I need to import the `notebook_login` function and then call it. This function is designed to handle authentication within Colab environments by leveraging Colab secrets.



In [None]:
from huggingface_hub import login
from google.colab import userdata

login(userdata.get('HF_TOKEN'))

## Prepare Dataset for MBERT

### Subtask:
Tokenize the Arabic text data using the MBERT tokenizer and prepare it for model training. This will involve using the already cleaned text from the 'cleaned_text' column in the DataFrame and mapping the labels to integers.


**Reasoning**:
I will start by importing the `AutoTokenizer` class from the `transformers` library and loading the `bert-base-multilingual-cased` tokenizer. Then, I'll define a function to tokenize the text, handling padding and truncation.



In [None]:
from transformers import AutoTokenizer

# Load the MBERT tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')

def tokenize_function(examples):
    return tokenizer(examples['cleaned_text'], truncation=True, padding='max_length', max_length=128)

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

# Task
Convert the pandas DataFrame to a Hugging Face Dataset, map the labels to integers, and then tokenize the dataset using the previously defined `tokenize_function`.

## Prepare Dataset for MBERT

### Subtask:
Tokenize the Arabic text data using the MBERT tokenizer and prepare it for model training. This will involve using the already cleaned text from the 'cleaned_text' column in the DataFrame and mapping the labels to integers.


**Reasoning**:
To prepare the dataset for MBERT, I need to convert the pandas DataFrame to a Hugging Face Dataset, map string labels to integers, tokenize the text, and then format the dataset for PyTorch.



In [None]:
from datasets import Dataset

# 1. Convert pandas DataFrame to Hugging Face Dataset
hf_dataset = Dataset.from_pandas(df)

# 2. Create a dictionary to map string labels to numerical integers
label_mapping = {'hate': 0, 'not': 1}

# 3. Apply this mapping to the 'label' column of the dataset
hf_dataset = hf_dataset.map(lambda examples: {'labels': label_mapping[examples['label']]})

# 4. Apply the tokenize_function to the dataset and remove unnecessary columns
tokenized_dataset = hf_dataset.map(tokenize_function, batched=True, remove_columns=['tweet', 'cleaned_text', 'label', 'lemmatized_tweet'])

# Set the format to 'torch'
tokenized_dataset.set_format('torch')

print("Dataset prepared successfully for MBERT training:")
print(tokenized_dataset)

Map:   0%|          | 0/2316 [00:00<?, ? examples/s]

Map:   0%|          | 0/2316 [00:00<?, ? examples/s]

Dataset prepared successfully for MBERT training:
Dataset({
    features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 2316
})


## Split Data

### Subtask:
Split the prepared dataset into training and testing sets for fine-tuning and evaluation.


**Reasoning**:
To split the dataset into training and testing sets, I will use the `train_test_split` method on the `tokenized_dataset` with the specified `test_size` and `seed`.



In [None]:
from datasets import DatasetDict

# Split the tokenized dataset into training and testing sets
train_test_split_dataset = tokenized_dataset.train_test_split(test_size=0.2, seed=42)

train_dataset = train_test_split_dataset['train']
test_dataset = train_test_split_dataset['test']

print("Dataset split successfully:")
print(f"Training dataset size: {len(train_dataset)}")
print(f"Testing dataset size: {len(test_dataset)}")

Dataset split successfully:
Training dataset size: 1852
Testing dataset size: 464


## Load MBERT Model and Define Training Arguments

### Subtask:
Load a pre-trained `bert-base-multilingual-cased` model for sequence classification and configure training arguments, including hyperparameters.


**Reasoning**:
To achieve the subtask, I need to import `AutoModelForSequenceClassification`, `TrainingArguments`, and `Trainer` from the transformers library. Then, I will load the MBERT model for sequence classification and define the training arguments as specified.



In [None]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

# 1. Load the pre-trained MBERT model for sequence classification
#    Set num_labels to 2 for binary classification (hate/not hate)
model = AutoModelForSequenceClassification.from_pretrained('bert-base-multilingual-cased', num_labels=2)

# 2. Configure TrainingArguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory for model predictions and checkpoints
    learning_rate=2e-5,              # The initial learning rate for AdamW optimizer
    per_device_train_batch_size=16,  # Batch size per GPU/TPU core/CPU for training
    per_device_eval_batch_size=16,   # Batch size per GPU/TPU core/CPU for evaluation
    num_train_epochs=3,              # Total number of training epochs to perform
    weight_decay=0.01,               # Strength of weight decay
    eval_strategy="epoch",     # Evaluation is done at the end of each epoch
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


**Reasoning**:
To properly evaluate the model during training, I need to define a `compute_metrics` function. This function will take predictions and labels as input and return a dictionary of evaluation metrics, such as accuracy and F1-score, which are essential for tracking model performance. I will import `accuracy_score` and `f1_score` from `sklearn.metrics` for this purpose.



In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)
    accuracy = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average='weighted') # Using weighted average for F1-score
    return {'accuracy': accuracy, 'f1_score': f1}

**Reasoning**:
I have already defined the `compute_metrics` function in the previous step as requested. Now I need to integrate this function into the `Trainer` setup to use it during model training and evaluation.



In [None]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

# 1. Load the pre-trained MBERT model for sequence classification
#    Set num_labels to 2 for binary classification (hate/not hate)
model = AutoModelForSequenceClassification.from_pretrained('bert-base-multilingual-cased', num_labels=2)

# 2. Configure TrainingArguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory for model predictions and checkpoints
    learning_rate=2e-5,              # The initial learning rate for AdamW optimizer
    per_device_train_batch_size=16,  # Batch size per GPU/TPU core/CPU for training
    per_device_eval_batch_size=16,   # Batch size per GPU/TPU core/CPU for evaluation
    num_train_epochs=3,              # Total number of training epochs to perform
    weight_decay=0.01,               # Strength of weight decay
    eval_strategy="epoch",     # Evaluation is done at the end of each epoch
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,
)

# 3. Create Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

print("Trainer initialized successfully.")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Trainer initialized successfully.


**Reasoning**:
The `Trainer` has been successfully initialized, despite a `FutureWarning`. The next logical step in the machine learning pipeline is to train the model using the configured `Trainer` instance.



In [None]:
trainer.train()
print("Model training complete.")

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33me33u33[0m ([33me33u33-king-saud-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Epoch,Training Loss,Validation Loss,Accuracy,F1 Score
1,0.3138,0.487658,0.75,0.744126
2,0.1666,0.262635,0.918103,0.917924
3,0.103,0.288629,0.915948,0.916037




Model training complete.


**Reasoning**:
The model has been trained successfully. Now, I need to evaluate its performance on the test dataset using the `trainer.evaluate()` method to get the final metrics.



In [None]:
eval_results = trainer.evaluate()
print("Model evaluation complete:")
print(eval_results)



Model evaluation complete:
{'eval_loss': 0.2886291444301605, 'eval_accuracy': 0.915948275862069, 'eval_f1_score': 0.916036950074367, 'eval_runtime': 182.0152, 'eval_samples_per_second': 2.549, 'eval_steps_per_second': 0.159, 'epoch': 3.0}


**Reasoning**:
The model has been evaluated, and the `eval_results` dictionary contains the final performance metrics. I will extract and present the key metrics from this dictionary to summarize the deep learning model's performance.



In [None]:
eval_results = trainer.evaluate()
print("Model evaluation complete:")
print(eval_results)

# Extract and print key metrics for clarity
print(f"\nDeep Learning Model (MBERT) Performance:")
print(f"  Accuracy: {eval_results['eval_accuracy']:.4f}")
print(f"  F1-score (weighted): {eval_results['eval_f1_score']:.4f}")



Model evaluation complete:
{'eval_loss': 0.2886291444301605, 'eval_accuracy': 0.915948275862069, 'eval_f1_score': 0.916036950074367, 'eval_runtime': 176.4626, 'eval_samples_per_second': 2.629, 'eval_steps_per_second': 0.164, 'epoch': 3.0}

Deep Learning Model (MBERT) Performance:
  Accuracy: 0.9159
  F1-score (weighted): 0.9160


**Reasoning**:
The user wants to store the deep learning model's performance metrics in a variable for later use. I will extract the accuracy and F1-score from the `eval_results` and assign them to a new dictionary named `dl_results`.



In [None]:
eval_results = trainer.evaluate()
print("Model evaluation complete:")
print(eval_results)

# Extract and print key metrics for clarity
print(f"\nDeep Learning Model (MBERT) Performance:")
print(f"  Accuracy: {eval_results['eval_accuracy']:.4f}")
print(f"  F1-score (weighted): {eval_results['eval_f1_score']:.4f}")

dl_results = {
    'model_type': 'MBERT',
    'accuracy': eval_results['eval_accuracy'],
    'f1_score': eval_results['eval_f1_score']
}



Model evaluation complete:
{'eval_loss': 0.2886291444301605, 'eval_accuracy': 0.915948275862069, 'eval_f1_score': 0.916036950074367, 'eval_runtime': 177.1797, 'eval_samples_per_second': 2.619, 'eval_steps_per_second': 0.164, 'epoch': 3.0}

Deep Learning Model (MBERT) Performance:
  Accuracy: 0.9159
  F1-score (weighted): 0.9160


**Reasoning**:
I need to compare the deep learning model's performance with the best classical ML model. I will display the results of both and then determine and print which one achieved higher accuracy and F1-score.



In [None]:
print("\n" + "="*50)
print("Model Performance Comparison (Classical ML vs. Deep Learning)")
print("="*50)

# Display best Classical ML model results
if best_result:
    print(f"\nBest Classical ML Model ({best_result['model_type']} with {best_result['ngram_range']}-grams):")
    print(f"  Accuracy: {best_result['accuracy']:.4f}")
    print(f"  F1-score: {float(best_result['classification_report'].split()[-2]):.4f}") # Extract F1-score from report
else:
    print("No classical ML results found.")

# Display Deep Learning model results
if dl_results:
    print(f"\nDeep Learning Model ({dl_results['model_type']}):")
    print(f"  Accuracy: {dl_results['accuracy']:.4f}")
    print(f"  F1-score: {dl_results['f1_score']:.4f}")
else:
    print("No deep learning results found.")

# Compare and determine the best overall model
if best_result and dl_results:
    print("\n" + "-"*50)
    if dl_results['accuracy'] > best_result['accuracy']:
        print("The Deep Learning model performed better in Accuracy.")
    elif best_result['accuracy'] > dl_results['accuracy']:
        print("The Best Classical ML model performed better in Accuracy.")
    else:
        print("Both models achieved similar accuracy.")

    if dl_results['f1_score'] > float(best_result['classification_report'].split()[-2]):
        print("The Deep Learning model performed better in F1-score.")
    elif float(best_result['classification_report'].split()[-2]) > dl_results['f1_score']:
        print("The Best Classical ML model performed better in F1-score.")
    else:
        print("Both models achieved similar F1-score.")



Model Performance Comparison (Classical ML vs. Deep Learning)

Best Classical ML Model (naive_bayes with (1, 1)-grams):
  Accuracy: 0.9526
  F1-score: 0.9500

Deep Learning Model (MBERT):
  Accuracy: 0.9159
  F1-score: 0.9160

--------------------------------------------------
The Best Classical ML model performed better in Accuracy.
The Best Classical ML model performed better in F1-score.


## Evaluate MBERT Model

### Subtask:
Evaluate the fine-tuned MBERT model on the test set and report its accuracy, precision, recall, and F1-score.


**Reasoning**:
To evaluate the model as requested, I will use the `trainer.predict` method to get predictions on the test set, convert logits to class labels, and then generate a detailed classification report using `sklearn.metrics.classification_report`. Finally, I will update the `dl_results` dictionary with the full report and extracted precision/recall metrics.



In [None]:
from sklearn.metrics import classification_report

# 1. Use trainer.predict() on the test_dataset
predictions_output = trainer.predict(test_dataset)

# 2. Extract raw predictions (logits)
logits = predictions_output.predictions

# 3. Convert raw predictions (logits) into predicted class labels
predicted_labels = np.argmax(logits, axis=1)

# 4. Extract true labels
true_labels = predictions_output.label_ids

# 5. Generate a comprehensive classification report
dl_classification_report = classification_report(true_labels, predicted_labels, target_names=list(label_mapping.keys()))

print("Deep Learning Model (MBERT) Classification Report:")
print(dl_classification_report)

# 6. Update the dl_results dictionary
# Re-parse the report to get precision and recall for updating dl_results
report_dict = classification_report(true_labels, predicted_labels, target_names=list(label_mapping.keys()), output_dict=True)

dl_results.update({
    'classification_report': dl_classification_report,
    'precision_weighted': report_dict['weighted avg']['precision'],
    'recall_weighted': report_dict['weighted avg']['recall']
})

print("\nUpdated dl_results:")
print(dl_results)



Deep Learning Model (MBERT) Classification Report:
              precision    recall  f1-score   support

        hate       0.93      0.91      0.92       257
         not       0.90      0.92      0.91       207

    accuracy                           0.92       464
   macro avg       0.91      0.92      0.92       464
weighted avg       0.92      0.92      0.92       464


Updated dl_results:
{'model_type': 'MBERT', 'accuracy': 0.915948275862069, 'f1_score': 0.916036950074367, 'classification_report': '              precision    recall  f1-score   support\n\n        hate       0.93      0.91      0.92       257\n         not       0.90      0.92      0.91       207\n\n    accuracy                           0.92       464\n   macro avg       0.91      0.92      0.92       464\nweighted avg       0.92      0.92      0.92       464\n', 'precision_weighted': 0.9163395826233335, 'recall_weighted': 0.915948275862069}
