In [None]:
! pip install transformers datasets
! pip install evaluate
! pip install sentence-transformers
! pip install -U accelerate

# Μέρος Α: Fine-tune a pretrained model

## Yelp polarity

In [None]:
from datasets import load_dataset

# insert your code here

dataset = load_dataset("yelp_polarity")

#dataset["train"][100]

In [None]:
dataset["train"][0]

In [None]:
# insert your code here
from datasets import Dataset
import numpy as np
import pandas as pd
samples = 300

train_sampled_indices = np.random.choice(np.arange(0, len(dataset["train"]) - 1), samples, replace=False)
test_sampled_indices = np.random.choice(np.arange(0, len(dataset["test"]) - 1), samples, replace=False)
                                   
train_dataset = [dataset["train"][int(i)] for i in train_sampled_indices]
test_dataset = [dataset["test"][int(i)] for i in test_sampled_indices]

train_dataset = Dataset.from_pandas(pd.DataFrame(train_dataset))
test_dataset = Dataset.from_pandas(pd.DataFrame(test_dataset))

In [None]:
train_dataset = dataset["train"].shuffle(seed=42).select(range(300))
eval_dataset = dataset["test"].shuffle(seed=42).select(range(300))

In [None]:
train_dataset

#  LLMs


In [None]:
# insert your code here

from transformers import AutoTokenizer, RobertaForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("roberta-base")
model = RobertaForSequenceClassification.from_pretrained("roberta-base")

In [None]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

# insert your code here
train_dataset = train_dataset.map(tokenize_function, batched=True)
eval_dataset = eval_dataset.map(tokenize_function, batched=True)


In [None]:
train_dataset

In [None]:
pip install -U accelerate

In [None]:
import numpy as np
import evaluate
import torch
from tqdm import tqdm
import accelerate #import PartialState 
from transformers import pipeline

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
from transformers import TrainingArguments, Trainer
from transformers import get_constant_schedule

epochs = 10

args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", per_device_train_batch_size=8, num_train_epochs = epochs)


# insert your code here
# optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr = 1e-5)

# scheduler
scheduler = get_constant_schedule(optimizer)
# etc

In [None]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    optimizers = (optimizer, scheduler)
)

In [None]:
#lr = 1e-4 - epochs = 10  - bsize = 16
trained_model=trainer.train()

In [None]:
#lr = 1e-5 - epochs = 10  - bsize = 16
trained_model=trainer.train()

In [None]:
#lr = 1e-5 - epochs = 10  - bsize = 8
trained_model=trainer.train()

# Transfer Learning

## B1. Piqa dataset


In [None]:
# # insert your code here (load dataset)

dataset = load_dataset("piqa")
eval_dataset = dataset["train"].shuffle(seed=42).select(range(100))

In [None]:
# insert your code here (models)

from transformers import AutoTokenizer, AutoModelForMultipleChoice

tokenizer_roberta = AutoTokenizer.from_pretrained("roberta-base")
model_roberta = AutoModelForMultipleChoice.from_pretrained("roberta-base")

tokenizer_yoso = AutoTokenizer.from_pretrained("uw-madison/yoso-4096")
model_yoso = AutoModelForMultipleChoice.from_pretrained("uw-madison/yoso-4096")

tokenizer_mega = AutoTokenizer.from_pretrained("mnaylor/mega-base-wikitext")
model_mega = AutoModelForMultipleChoice.from_pretrained("mnaylor/mega-base-wikitext")

tokenizer_bert = AutoTokenizer.from_pretrained("bert-base-uncased")
model_bert = AutoModelForMultipleChoice.from_pretrained("bert-base-uncased")

tokenizer_electra = AutoTokenizer.from_pretrained("google/electra-small-discriminator")
model_electra = AutoModelForMultipleChoice.from_pretrained("google/electra-small-discriminator")

In [None]:
# insert your code here (function for ending prediction)
def ending_prediction(dataset, model, tokenizer):
    pred = []
    true = []
    for d in dataset:
        prompt = d["goal"]
        candidate1 = d["sol1"]
        candidate2 = d["sol2"]
        labels = d["label"]
        if tokenizer.pad_token is None:
            tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="pt", padding=True, truncation=True)
        labels = torch.tensor(labels).unsqueeze(0)
        outputs = model(**{k: v.unsqueeze(0) for k, v in inputs.items()}, labels=labels)
        logits = outputs.logits
        predicted_class = logits.argmax().item()
        true.append(d["label"])
        pred.append(predicted_class)
    
    print(metric.compute(predictions=pred, references=true))

    


In [None]:
print("YOSO: ")
ending_prediction(eval_dataset, model_yoso, tokenizer_yoso)
print("Roberta: ")
ending_prediction(eval_dataset, model_roberta, tokenizer_roberta)
print("Mega: ")
ending_prediction(eval_dataset, model_mega, tokenizer_mega)
print("BERT: ")
ending_prediction(eval_dataset, model_bert, tokenizer_bert)
print("Electra: ")
ending_prediction(eval_dataset, model_electra, tokenizer_electra)

## B2. Truthful QA

### Sentence Transformers


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
def get_cosine_similarity(feature_vec_1, feature_vec_2):    
    return cosine_similarity(feature_vec_1.reshape(1, -1), feature_vec_2.reshape(1, -1))[0][0]

In [None]:
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
embeddings = model.encode(sentences)

get_cosine_similarity(embeddings[0], embeddings[1])

In [None]:
# insert your code here (load dataset)
dataset = load_dataset("truthful_qa", 'generation')
#eval_dataset = dataset["train"].shuffle(seed=42).select(range(100))

eval_dataset[0]

In [None]:
# insert your code here (load models for semantic similarity and QA)
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [None]:
# insert your code here (function for optimal correct answers & semantic similarity)

## Β3. Winogrande dataset


In [None]:
# insert your code here (load dataset)
from datasets import Dataset
import numpy as np
dataset = load_dataset("winogrande", 'winogrande_xs')
eval_dataset = dataset["train"].shuffle(seed=42).select(range(100))

**Text Classification**

In [None]:
# insert your code here (load models)
from transformers import pipeline
classifier_bert = pipeline("text-classification", model="bert-base-uncased")
classifier_pavlov = pipeline("text-classification", model="DeepPavlov/roberta-large-winogrande") 
classifier_roberta = pipeline("text-classification", model="roberta-base")

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
tokenizer_pavlov = AutoTokenizer.from_pretrained("DeepPavlov/roberta-large-winogrande")
model_pavlov = AutoModelForSequenceClassification.from_pretrained("DeepPavlov/roberta-large-winogrande")
tokenizer_bert = AutoTokenizer.from_pretrained("bert-base-uncased", num_labels=2)
model_bert = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)

model_distilbert = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)
tokenizer_distilbert = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [None]:
# insert your code here (create pipelines)
import torch
            
def fill(dataset, classifier):
    acc = 0
    for d in dataset:
        ans = d["answer"]
        sentence = d["sentence"]
        result = sentence.split("_")
        input1 = result[0] + d["option1"] + result[1]
        input2 = result[0] + d["option2"] + result[1]
        out1 = classifier(input1)
        out2 = classifier(input2)
        print(out2[0]["label"])
        if(int(ans) == 2 and out1[0]["label"] == 'LABEL_0' and out2[0]["label"] == 'LABEL_1'): acc += 1
        if(int(ans) == 1 and out1[0]["label"] == 'LABEL_1' and out2[0]["label"] == 'LABEL_0'): acc += 1

    print(acc/100)

**Multiple Choice**

In [None]:
from transformers import AutoTokenizer, AutoModelForMultipleChoice
import evaluate
metric = evaluate.load("accuracy")
tokenizer_roberta = AutoTokenizer.from_pretrained("roberta-base")
model_roberta = AutoModelForMultipleChoice.from_pretrained("roberta-base")

tokenizer_mega = AutoTokenizer.from_pretrained("mnaylor/mega-base-wikitext")
model_mega = AutoModelForMultipleChoice.from_pretrained("mnaylor/mega-base-wikitext")

tokenizer_bert = AutoTokenizer.from_pretrained("bert-base-uncased")
model_bert = AutoModelForMultipleChoice.from_pretrained("bert-base-uncased")

def fill(dataset, model, tokenizer):
    pred = []
    true = []
    for d in dataset:
        sentence = d["sentence"]
        result = sentence.split("_")
        prompt = result[0]
        candidate1 = d["option1"] + result[1]
        candidate2 = d["option2"] + result[1]
        ans = int(d["answer"]) - 1
        labels = ans
        if tokenizer.pad_token is None:
            tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="pt", padding=True, truncation=True)
        labels = torch.tensor(labels).unsqueeze(0)
        outputs = model(**{k: v.unsqueeze(0) for k, v in inputs.items()}, labels=labels)
        logits = outputs.logits
        predicted_class = logits.argmax().item()
        true.append(ans)
        pred.append(predicted_class)
    
    print(metric.compute(predictions=pred, references=true))

In [None]:
# insert your code here (function for predicting best fill)
print("Roberta:")
fill(eval_dataset, model_roberta, tokenizer_roberta)
print("Bert:")
fill(eval_dataset, model_bert, tokenizer_bert)
print("Mega:")
fill(eval_dataset, model_mega, tokenizer_mega)