# pip install packages

In [None]:
!pip install -U datasets evaluate "ray[tune]" torch torchvision codecarbon hyperopt accelerate

# Load Dataset

In [None]:
from datasets import load_dataset, Dataset, concatenate_datasets, ClassLabel, Features, DatasetDict
dataset = load_dataset("ieuniversity/flirty_or_not")
dataset = dataset.remove_columns('id')

In [None]:
train_dataset = dataset['train']
validation_dataset = dataset['validation']
test_dataset = dataset['test']

In [None]:
import pandas as pd
data = pd.read_csv('flirting_rated.csv')
data.drop_duplicates(subset='texts', inplace=True)
dataset_2 = Dataset.from_pandas(data)
print(len(dataset_2))

In [None]:
split_train_dataset = dataset_2.train_test_split(test_size=0.2)['train']
split_valid_dataset = dataset_2.train_test_split(test_size=0.2)['test'].train_test_split(test_size=0.5)['train']
split_test_dataset = dataset_2.train_test_split(test_size=0.2)['test'].train_test_split(test_size=0.5)['test']

class_label = ClassLabel(names=['neutral', 'flirty'])

# Define features for the dataset
features = Features({
    'label': class_label,
    'texts': split_train_dataset.features['texts']
})

split_train_dataset = Dataset.from_dict({
    'label': split_train_dataset['label'],
    'texts': split_train_dataset['texts']
}, features=features)

split_valid_dataset = Dataset.from_dict({
    'label': split_valid_dataset['label'],
    'texts': split_valid_dataset['texts']
}, features=features)

split_test_dataset = Dataset.from_dict({
    'label': split_test_dataset['label'],
    'texts': split_test_dataset['texts']
}, features=features)


print(split_train_dataset.features)
print(train_dataset.features)


In [None]:
train_dataset_final = concatenate_datasets([split_train_dataset, train_dataset])
validation_dataset_final = concatenate_datasets([validation_dataset, split_valid_dataset])
test_dataset_final = concatenate_datasets([test_dataset, split_test_dataset])

In [None]:
train = train_dataset_final.add_column('dataset', ['train'] * len(train_dataset_final))
validation = validation_dataset_final.add_column('dataset', ['validation'] * len(validation_dataset_final))
test = test_dataset_final.add_column('dataset', ['test'] * len(test_dataset_final))

dataset = DatasetDict({
    'train': train_dataset_final,
    'validation': validation_dataset_final,
    'test': test_dataset_final
})

# Prepare Dataset & Model

In [None]:
from transformers import AutoTokenizer
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

def tokenize_function(examples):
  return tokenizer(examples["texts"], padding="max_length", truncation=True)

In [None]:
train_ds = dataset['train'].map(tokenize_function, batched=True)
val_ds = dataset['validation'].map(tokenize_function, batched=True)
test_ds = dataset['test'].map(tokenize_function, batched=True)

In [None]:
from transformers import AutoModelForSequenceClassification
# Load and compile our model
model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)

# Fine-tune Model using Hugging Face

In [None]:
from transformers import TrainingArguments
training_args = TrainingArguments(
    output_dir="training_results",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_steps=50,
    evaluation_strategy="epoch",
)

In [None]:
import numpy as np
import evaluate

metric = evaluate.load("accuracy")

In [None]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

def compute_metrics(eval_pred):
  logits, labels = eval_pred
  predictions = np.argmax(logits, axis=-1)
  precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="binary")
  acc = metric.compute(predictions=predictions, references=labels)
  # get confusion matrix and convert into list
  conf_matrix = confusion_matrix(labels, predictions).tolist()

  return {
      'Accuracy': acc['accuracy'],
      'F1 score': f1,
      'Precision': precision,
      'Recall': recall,
      'Conf_matrix': conf_matrix
  }

In [None]:
from transformers import Trainer
trainer = Trainer(
  model=model,
  args=training_args,
  train_dataset=train_ds,
  eval_dataset=val_ds,
  compute_metrics=compute_metrics,
)

In [None]:
trainer.evaluate(test_ds)

In [None]:
# Output the misclassified examples
from scipy.special import softmax
predictions = trainer.predict(test_ds)
raw_predictions = predictions.predictions
predicted_labels = np.argmax(softmax(raw_predictions, axis=1), axis=1)

true_labels = test_ds['label']

# Now we can iterate over predicted and true labels to find misclassifications
misclassified_indices = []
for index, (predicted, true) in enumerate(zip(predicted_labels, true_labels)):
    if predicted != true:
        misclassified_indices.append(index)

# We have the indices of misclassified examples. To see the data:
for index in misclassified_indices:
    entry = test_ds[index]
    error_str = 'false positive' if predicted_labels[index] == 1 else 'false negative'
    print(f"Index: {index}")
    print(f"Error: {error_str}")
    print(f"Predicted label: {predicted_labels[index]}, True label: {true_labels[index]}")
    print(f"Text: {entry['texts']}\n")

# Hyperparameter-tuning with RayTune

In [None]:
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(
    output_dir="training_results",
    num_train_epochs=5,
    evaluation_strategy="epoch",
)

In [None]:
def compute_metrics(eval_pred):
  logits, labels = eval_pred
  predictions = np.argmax(logits, axis=-1)
  precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="binary")
  acc = metric.compute(predictions=predictions, references=labels)
  return {
      'Accuracy': acc['accuracy'],
      'F1 score': f1,
      'Precision': precision,
      'Recall': recall
  }

def model_init():
  return AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)

trainer = Trainer(
  model_init=model_init,
  args=training_args,
  tokenizer=tokenizer,
  train_dataset=train_ds,
  eval_dataset=val_ds,
  compute_metrics=compute_metrics,
)


In [None]:
from ray.tune.search.hyperopt import HyperOptSearch

trainer.hyperparameter_search(
    direction="maximize",
    backend="ray",
    search_alg=HyperOptSearch(metric="objective", mode="max"),
    n_trials=3 # number of trials
)

# Save Model & Use for Inference

In [None]:
from transformers import pipeline
NAME='bert_flirt_detection_5epoch'
trainer.save_model(NAME)
new_model = AutoModelForSequenceClassification.from_pretrained(NAME, id2label={0: 'neutral', 1: 'flirty'})
classifier = pipeline('text-classification', model=new_model, tokenizer=tokenizer)
classifier("Hey :)")