# Reviews Sentiment Analysis By Fine Tuning Bert

## Install packages

In [None]:
%pip install datasets
%pip install transformers[torch]
%pip install -q peft
%pip install -q evaluate
%pip install scikit-learn

# Preparation

Import all necessary libraries

In [None]:
import torch

import numpy as np

from datasets import load_dataset

from transformers import AutoTokenizer, BertForSequenceClassification, TrainingArguments, Trainer, BertForSequenceClassification

from peft import LoraConfig, TaskType, get_peft_model, PeftModel

import evaluate

Checking avaiable devices

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)

## Data preprocessing

Load dataset. We use the IMDB dataset.

In [None]:
raw_datasets = load_dataset("imdb")

Tokenizer from BERT

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

Test the tokenizer 

In [None]:
sentence = "Good morning, today is a good day"
input = tokenizer(sentence, padding="max_length", truncation=True)
print(input)

Prepare tokenized dataset from the IMDB dataset

In [None]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

full_train_dataset = tokenized_datasets["train"].shuffle(seed=77)
full_eval_dataset = tokenized_datasets["test"].shuffle(seed=77).select(range(320))   #Small sample size for testing 

print(len(tokenized_datasets["train"]))
print(len(tokenized_datasets["test"]))

## Model and Training

LoRA (Low Rank Adaptation) is applied to fine tune BERT model

Configuration for LoRA. 
- SEQ and CLS are used for training BERT model
- Rank is set to 1 for training (larger rank does not make much difference)


In [None]:
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS, r=1, lora_alpha=1, lora_dropout=0.1
)

Define pretrained BERT model

In [None]:

model = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)

File paths definitions 
- `save_path` : Model path for saving after fine tuning

In [None]:
save_path = "../fine_tuned_model"

In [None]:
model = get_peft_model(model, lora_config)

Metric for evalation. We use sklearn's metric

In [None]:
metric = evaluate.load("accuracy")

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    pred = np.argmax(logits, axis=-1)
    return metric.compute(predictions=pred, references=labels)

Training Arguments

In [None]:
training_args = TrainingArguments(output_dir="result", eval_strategy="epoch",
                                  num_train_epochs = 1,
                                  per_device_train_batch_size = 32,
                                  per_device_eval_batch_size = 32
                                  )

Define a trainer

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=full_train_dataset,
    eval_dataset=full_eval_dataset,
    compute_metrics=compute_metrics,
)

Start training

In [None]:
trainer.train()

Save fine tuned model

In [None]:
trainer.model.save_pretrained(save_path)

## Load a fine tuned model

In [None]:
load_path = "../fine_tuned_model"

fine_tuned_model = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)
fine_tuned_model.load_adapter(load_path, peft_config = lora_config)

In [None]:
sentences = 'I am thoroughly impressed with this laptop! It is incredibly affordable yet delivers outstanding performance that surpasses other laptops in the same price range. Highly recommended for anyone looking for a budget-friendly yet powerful option!'
inputs = tokenizer(sentences, padding="max_length", truncation=True, return_tensors="pt")
inputs = inputs.to(device)

fine_tuned_model = fine_tuned_model.to(device)
labels = torch.tensor([1]).unsqueeze(0).to(device)
pred = fine_tuned_model(**inputs, labels = labels)

print(pred)