# Semi-Supervised Sentiment Classification with PEFT

In [1]:
import evaluate
import numpy as np
import pandas as pd
from datasets import Dataset, DatasetDict
from dotenv import load_dotenv
import emoji
from spellchecker import SpellChecker
from peft import LoraConfig, get_peft_model, TaskType
from tqdm import tqdm
import torch
from transformers import (
    BertTokenizer,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    Trainer,
    TrainingArguments,
    pipeline,
)
from appvocai-genailab.shared.io import IOService
pd.options.display.max_rows=32

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "finiteautomata/bertweet-base-sentiment-analysis"
fp = "data/01_exp/review.csv"
task = TaskType.SEQ_CLS
output_dir = "models/sentiment/experiment"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
peft_model_path = "models/sentiment/experiment/checkpoint-4"

## Dataset

In [3]:
def truncate(text):
    return text[:128]

In [7]:

df = IOService.read(fp)
samples = list(df.sample(n=64)['content'])
#samples = list(sample.map(truncate))
samples[:10]


['I really like the app, but if it updated quicker and you can spectate that'd be rad',
 "Great app. I love Star Wars and collecting cards so this app is the best thing I've ever found based on Star Wars. my username is roborave.",
 'App works great. Innovative product.',
 "After reading others reviews I'm having the same problem wth messenger trying to open on iPhone..white screen and force close..!! Plzz fix this..!!",
 'This app is great! Keeps me super organized and the last update makes it THAT much more efficient !',
 'i wish there were more minutes we could use',
 'yuh',
 'Plenty of good funny sound affects GET THIS APP',
 'Muy buena App',
 "I normally don't write reviews but it's so lame to see all the negative reviews stating that this app doesn't work and some of them are recent so it has nothing to do with updates. I installed it, took a few minutes to acclimate myself with how to scroll through the different views, saved the one I liked, watched the little tutorial/ slide s

## Create Labeled Data

In [5]:
clf = pipeline("sentiment-analysis", model=model_name)
predictions = pd.DataFrame(clf(samples))
examples = pd.DataFrame(samples)
examples = pd.concat([examples,predictions["label"]],axis=1)
examples.columns = ["content", 'label']
examples




ValueError: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).

## Create HuggingFace Dataset

In [None]:
dataset = Dataset.from_pandas(examples)
# Convert labels to integers if they are not already
# For example, if 'label' column contains strings like 'positive', 'negative', 'neutral'
label2id = {"NEG": 0, "NEU": 1, "POS": 2}
dataset = dataset.map(lambda example: {"label": label2id[example["label"]]})

# Create Training and Test Sets
dataset = dataset.train_test_split(train_size=0.8)

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenize the 'content' column
def tokenize_function(example):
    return tokenizer(example["content"], truncation=True)

dataset = dataset.map(tokenize_function, batched=True)

dataset.set_format(type="torch", columns=["input_ids", "token_type_ids", "attention_mask", "label"])

print(dataset)
print(dataset["train"][0])

                                                  

DatasetDict({
    train: Dataset({
        features: ['content', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 51
    })
    test: Dataset({
        features: ['content', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 13
    })
})
{'label': tensor(2), 'input_ids': tensor([    0,  7408,    94,   842,     8,    82,     9,   265,    11,   566,
         1250,  2231,     7,   698,   442,     7,  6624, 44204,   704,    66,
            8,   310,   566, 35358,   234,   433, 23564, 32188,     2]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1])}




## Create PEFT Model
Fine-tuning a model using PEFT (Pretrained Embeddings Fine-Tuning) involves loading a pre-trained model, replacing the classifier head to match the number of output classes, and then training the model on the specific dataset. Here's how you can adjust the code for sentiment analysis with three classes using PEFT:

In [None]:
# Load pre-trained BERT model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Create Peft Model
peft_config = LoraConfig(task_type=task, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1)
peft_model = get_peft_model(model, peft_config=peft_config)
peft_model.print_trainable_parameters()




trainable params: 1480710 || all params: 135790086 || trainable%: 1.0904404317116347


## Train Model

In [None]:

# Define training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    learning_rate=1e-3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=2,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

# Setup evaluation 
metric = evaluate.load("accuracy")

# Define training function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)
    
# The Data Collator converts the training samples to PyTorch tensors for faster training
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
# Fine-tune the model
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)
trainer.train()


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.025577,1.0
2,No log,0.018807,1.0


TrainOutput(global_step=4, training_loss=0.09668834507465363, metrics={'train_runtime': 106.4224, 'train_samples_per_second': 0.958, 'train_steps_per_second': 0.038, 'total_flos': 2091938427540.0, 'train_loss': 0.09668834507465363, 'epoch': 2.0})

## Inference

In [None]:
# Test sentence
text = ["this app leaves a lot to be desired"]

inference_model = pipeline(model=peft_model_path)
inference_model(text)



[{'label': 'NEG', 'score': 0.8102602958679199}]