In [27]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [2]:
import torch
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments, DistilBertTokenizer
from datasets import load_dataset
from transformers import get_scheduler
import loralib as lora
import time

from datasets import concatenate_datasets

from sklearn.metrics import precision_recall_fscore_support
import numpy as np
import pandas as pd

# Create dataset

In [3]:
# Load the IMDB dataset
dataset = load_dataset("imdb")

# Initialize the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

# Tokenize the IMDB dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
train_dataset = tokenized_datasets["train"]
test_dataset = tokenized_datasets["test"]



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]



Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [4]:
# Sample 2500 from the first 12500 negative samples
negative_samples_train = train_dataset.select(range(12500)).shuffle(seed=42).select(range(2500))

# Sample 2500 from the second 12500 positive samples
positive_samples_train = train_dataset.select(range(12500, 25000)).shuffle(seed=42).select(range(2500))

balanced_train_dataset = concatenate_datasets([negative_samples_train, positive_samples_train])

In [5]:
# Sample 2500 from the first 12500 negative samples
negative_samples_test = test_dataset.select(range(12500)).shuffle(seed=42).select(range(2500))

# Sample 2500 from the second 12500 positive samples
positive_samples_test = test_dataset.select(range(12500, 25000)).shuffle(seed=42).select(range(2500))

balanced_test_dataset = concatenate_datasets([negative_samples_test, positive_samples_test])

In [6]:
balanced_train_dataset

Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 5000
})

In [7]:
balanced_test_dataset

Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 5000
})

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
import torch
torch.cuda.empty_cache()


In [10]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="binary", zero_division=1)

    return {
        "precision": precision,
        "recall": recall,
        "f1": f1,
    }


In [None]:
5e7abf784a3f7805725b2a241d1a5b31d222be1d

# P tune

In [101]:

from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
)
from peft import (
    get_peft_model,
    IA3Config,
    TaskType,
)
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import numpy as np

model_name = "distilbert-base-uncased"
num_epochs = 3
lr = 2e-5
batch_size = 8

In [102]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [103]:
peft_config = IA3Config(
    task_type=TaskType.SEQ_CLS, inference_mode=False,target_modules=["q_lin","k_lin","v_lin","lin1","lin2"], feedforward_modules=["lin1","lin2"]
)


In [104]:
# Get PEFT model
#model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config).to(device)
model.print_trainable_parameters()

trainable params: 628,994 || all params: 67,584,004 || trainable%: 0.9307


In [105]:


# Training arguments
training_args = TrainingArguments(
    output_dir="ia3_distilbert_imdb",
    learning_rate=2e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=balanced_train_dataset,
    eval_dataset=balanced_test_dataset,
    compute_metrics=compute_metrics,
)

# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate()

print("\nEvaluation Results:")
print(f"Precision: {eval_results['eval_precision']:.4f}")
print(f"Recall: {eval_results['eval_recall']:.4f}")
print(f"F1 Score: {eval_results['eval_f1']:.4f}")



Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.4292,0.301674,0.900429,0.8392,0.868737
2,0.3413,0.294495,0.872365,0.894,0.88305
3,0.3115,0.285858,0.892872,0.8768,0.884763



Evaluation Results:


KeyError: 'eval_accuracy'

In [107]:
print("\nEvaluation Results:")
print(f"Precision: {eval_results['eval_precision']:.4f}")
print(f"Recall: {eval_results['eval_recall']:.4f}")
print(f"F1 Score: {eval_results['eval_f1']:.4f}")


Evaluation Results:
Precision: 0.8929
Recall: 0.8768
F1 Score: 0.8848


In [106]:
eval_results

{'eval_loss': 0.2858583927154541,
 'eval_precision': 0.8928716904276985,
 'eval_recall': 0.8768,
 'eval_f1': 0.8847628657921291,
 'eval_runtime': 90.7795,
 'eval_samples_per_second': 55.079,
 'eval_steps_per_second': 6.885,
 'epoch': 3.0}