In [1]:
import torch
import pandas as pd
from evaluate import load
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import os
from huggingface_hub import login

In [2]:
login(token=os.getenv('HF_TOKEN'))

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
data = pd.read_csv('yelp_parallel/test_en_parallel.txt', sep='\t')
data.columns = ['Style1', 'Style2']
data = data[:5000]

In [4]:
sentences_negative = data['Style1'].values.tolist()
sentences_positive = data['Style2'].values.tolist()

In [5]:
model_name = 'meta-llama/Llama-2-7b-hf'
bitsandbytes_config = BitsAndBytesConfig(load_in_4bit=True,
                                         bnb_4bit_compute_dtype=torch.float16,
                                         bnb_4bit_quant_type='nf4')

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             quantization_config=bitsandbytes_config,
                                             device_map='cuda')

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
bleu = load('bleu')
bertscore = load('bertscore')

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [8]:
# Zero-shot
sample_sentence = sentences_negative[0]
sample_sentence

"ever since joes has changed hands it's just gotten worse and worse."

In [9]:
prompt = f'Transform the following negative sentence to positive: {sample_sentence}\nPositive: '
prompt

"Transform the following negative sentence to positive: ever since joes has changed hands it's just gotten worse and worse.\nPositive: "

In [10]:
tokens = tokenizer(prompt, return_tensors='pt').to('cuda')
tokens

{'input_ids': tensor([[    1,  4103,   689,   278,  1494,  8178, 10541,   304,  6374, 29901,
          3926,  1951,  2958,   267,   756,  3939,  6567,   372, 29915, 29879,
           925,  2355,   841, 15029,   322, 15029, 29889,    13,  9135,  3321,
         29901, 29871]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}

In [11]:
output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
output_ids

tensor([[    1,  4103,   689,   278,  1494,  8178, 10541,   304,  6374, 29901,
          3926,  1951,  2958,   267,   756,  3939,  6567,   372, 29915, 29879,
           925,  2355,   841, 15029,   322, 15029, 29889,    13,  9135,  3321,
         29901, 29871, 25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285,
         25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285,
         25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285,
         25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285,
         25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285, 25285,
         25285, 25285]], device='cuda:0')

In [12]:
tokenizer.decode(output_ids[0], skip_special_tokens=True)

"Transform the following negative sentence to positive: ever since joes has changed hands it's just gotten worse and worse.\nPositive: ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................"

In [13]:
predictions_0 = []
references_0 = []

In [14]:
for i in range(10):
    prompt = f'Transform the following negative sentence to positive: {sentences_negative[i]}\nPositive: '
    tokens = tokenizer(prompt, return_tensors='pt').to('cuda')
    output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
    output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    pred = output.split('Positive:')[-1].strip() if 'Positive:' in output else output.split('\n')[-1].strip()
    predictions_0.append(pred)
    references_0.append(sentences_positive[i])

In [15]:
bleu.compute(predictions=predictions_0, references=[[ref] for ref in references_0])

{'bleu': 0.006770613426183067,
 'precisions': [0.02568397543271915,
  0.010662177328843996,
  0.0045121263395375075,
  0.0017006802721088435],
 'brevity_penalty': 1.0,
 'length_ratio': 17.91,
 'translation_length': 1791,
 'reference_length': 100}

In [16]:
bertscore.compute(predictions=predictions_0, references=references_0, model_type='microsoft/deberta-xlarge-mnli')



{'precision': [0.48212531208992004,
  0.18001851439476013,
  0.0,
  0.1839916706085205,
  0.7004492282867432,
  0.3902304172515869,
  0.3722982704639435,
  0.39336657524108887,
  0.6014003753662109,
  0.5318043828010559],
 'recall': [0.5934234857559204,
  0.3732936382293701,
  0.0,
  0.3349146246910095,
  0.8469482660293579,
  0.5931626558303833,
  0.6140822768211365,
  0.5618292689323425,
  0.6054982542991638,
  0.5594109296798706],
 'f1': [0.5320158004760742,
  0.24290001392364502,
  0.0,
  0.23750531673431396,
  0.7667638659477234,
  0.4707580506801605,
  0.4635569155216217,
  0.46274250745773315,
  0.603442370891571,
  0.5452584028244019],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [17]:
# One-shot
example = 'Negative: the food was terrible\nPositive: the food was excellent'
example

'Negative: the food was terrible\nPositive: the food was excellent'

In [18]:
sample_sentence = sentences_negative[1]
sample_sentence

'there is definitely not enough room in that part of the venue.'

In [19]:
prompt = f'{example}\n\nNegative: {sample_sentence}\nPositive: '
prompt

'Negative: the food was terrible\nPositive: the food was excellent\n\nNegative: there is definitely not enough room in that part of the venue.\nPositive: '

In [20]:
tokens = tokenizer(prompt, return_tensors='pt').to('cuda')
output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
tokenizer.decode(output_ids[0], skip_special_tokens=True)

'Negative: the food was terrible\nPositive: the food was excellent\n\nNegative: there is definitely not enough room in that part of the venue.\nPositive: \n\nNegative: \n\nNegative: \n\nNegative: \n\nNegative: \n\nNegative: \n\nNegative: \n\nNegative: \n'

In [21]:
predictions_1 = []
references_1 = []

In [22]:
for i in range(10):
    prompt = f'{example}\n\nNegative: {sentences_negative[i]}\nPositive: '
    tokens = tokenizer(prompt, return_tensors='pt').to('cuda')
    output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
    output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    pred = output.split('Positive:')[-1].strip() if 'Positive:' in output else output.split('\n')[-1].strip()
    predictions_1.append(pred)
    references_1.append(sentences_positive[i])

In [23]:
bleu.compute(predictions=predictions_1, references=[[ref] for ref in references_1])

{'bleu': 0.0,
 'precisions': [0.1927710843373494,
  0.10526315789473684,
  0.02857142857142857,
  0.0],
 'brevity_penalty': 0.8147945551343462,
 'length_ratio': 0.83,
 'translation_length': 83,
 'reference_length': 100}

In [24]:
bertscore.compute(predictions=predictions_1, references=references_1, model_type='microsoft/deberta-xlarge-mnli')



{'precision': [0.0,
  0.48055315017700195,
  0.0,
  0.3161439001560211,
  0.7941712737083435,
  0.0,
  0.5118494629859924,
  0.25768721103668213,
  0.32971903681755066,
  0.2829959988594055],
 'recall': [0.0,
  0.5796743631362915,
  0.0,
  0.34851500391960144,
  0.7376935482025146,
  0.0,
  0.6840810775756836,
  0.2824844717979431,
  0.3182152807712555,
  0.3795325756072998],
 'f1': [0.0,
  0.5254802703857422,
  0.0,
  0.33154115080833435,
  0.7648912072181702,
  0.0,
  0.5855633616447449,
  0.26951664686203003,
  0.32386505603790283,
  0.3242311477661133],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [25]:
# Five-shot
examples_5 = 'Negative: the food was terrible\nPositive: the food was excellent\n\nNegative: awful service\nPositive: great service\n\nNegative: worst experience\nPositive: best experience\n\nNegative: not good at all\nPositive: very good\n\nNegative: disappointing meal\nPositive: amazing meal'
examples_5

'Negative: the food was terrible\nPositive: the food was excellent\n\nNegative: awful service\nPositive: great service\n\nNegative: worst experience\nPositive: best experience\n\nNegative: not good at all\nPositive: very good\n\nNegative: disappointing meal\nPositive: amazing meal'

In [26]:
sample_sentence = sentences_negative[2]
sample_sentence

'so basically tasted watered down.'

In [27]:
prompt = f'{examples_5}\n\nNegative: {sample_sentence}\nPositive: '
prompt

'Negative: the food was terrible\nPositive: the food was excellent\n\nNegative: awful service\nPositive: great service\n\nNegative: worst experience\nPositive: best experience\n\nNegative: not good at all\nPositive: very good\n\nNegative: disappointing meal\nPositive: amazing meal\n\nNegative: so basically tasted watered down.\nPositive: '

In [28]:
tokens = tokenizer(prompt, return_tensors='pt').to('cuda')
output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
tokenizer.decode(output_ids[0], skip_special_tokens=True)

'Negative: the food was terrible\nPositive: the food was excellent\n\nNegative: awful service\nPositive: great service\n\nNegative: worst experience\nPositive: best experience\n\nNegative: not good at all\nPositive: very good\n\nNegative: disappointing meal\nPositive: amazing meal\n\nNegative: so basically tasted watered down.\nPositive: \n\nNegative: the food was terrible\nPositive: the food was excellent\n\nNegative: awful service\nPositive: great service\n\nNegative: worst experience\nPositive: best experience\n\nNeg'

In [29]:
predictions_5 = []
references_5 = []

In [30]:
for i in range(10):
    prompt = f'{examples_5}\n\nNegative: {sentences_negative[i]}\nPositive: '
    tokens = tokenizer(prompt, return_tensors='pt').to('cuda')
    output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
    output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    pred = output.split('Positive:')[-1].strip() if 'Positive:' in output else output.split('\n')[-1].strip()
    predictions_5.append(pred)
    references_5.append(sentences_positive[i])

In [31]:
bleu.compute(predictions=predictions_5, references=[[ref] for ref in references_5])

{'bleu': 0.0,
 'precisions': [0.23684210526315788, 0.06451612903225806, 0.0, 0.0],
 'brevity_penalty': 0.19562045574649367,
 'length_ratio': 0.38,
 'translation_length': 38,
 'reference_length': 100}

In [32]:
bertscore.compute(predictions=predictions_5, references=references_5, model_type='microsoft/deberta-xlarge-mnli')



{'precision': [0.29588770866394043,
  0.2990191578865051,
  0.0,
  0.0,
  0.525473952293396,
  0.30622485280036926,
  0.4155178964138031,
  0.0,
  0.47124630212783813,
  0.40099745988845825],
 'recall': [0.33613187074661255,
  0.36854127049446106,
  0.0,
  0.0,
  0.7323873043060303,
  0.412492573261261,
  0.5519180297851562,
  0.0,
  0.36022430658340454,
  0.45194560289382935],
 'f1': [0.3147284984588623,
  0.3301600515842438,
  0.0,
  0.0,
  0.6119123697280884,
  0.3515024483203888,
  0.47410231828689575,
  0.0,
  0.4083231985569,
  0.42494991421699524],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [33]:
# Ten-shot
examples_10 = 'Negative: the food was terrible\nPositive: the food was excellent\n\nNegative: awful service\nPositive: great service\n\nNegative: worst experience\nPositive: best experience\n\nNegative: not good at all\nPositive: very good\n\nNegative: disappointing meal\nPositive: amazing meal\n\nNegative: horrible place\nPositive: wonderful place\n\nNegative: never coming back\nPositive: definitely coming back\n\nNegative: waste of money\nPositive: worth the money\n\nNegative: rude staff\nPositive: friendly staff\n\nNegative: cold food\nPositive: hot food'
examples_10

'Negative: the food was terrible\nPositive: the food was excellent\n\nNegative: awful service\nPositive: great service\n\nNegative: worst experience\nPositive: best experience\n\nNegative: not good at all\nPositive: very good\n\nNegative: disappointing meal\nPositive: amazing meal\n\nNegative: horrible place\nPositive: wonderful place\n\nNegative: never coming back\nPositive: definitely coming back\n\nNegative: waste of money\nPositive: worth the money\n\nNegative: rude staff\nPositive: friendly staff\n\nNegative: cold food\nPositive: hot food'

In [34]:
sample_sentence = sentences_negative[3]
sample_sentence

"she said she'd be back and disappeared for a few minutes."

In [35]:
prompt = f'{examples_10}\n\nNegative: {sample_sentence}\nPositive: '
prompt

"Negative: the food was terrible\nPositive: the food was excellent\n\nNegative: awful service\nPositive: great service\n\nNegative: worst experience\nPositive: best experience\n\nNegative: not good at all\nPositive: very good\n\nNegative: disappointing meal\nPositive: amazing meal\n\nNegative: horrible place\nPositive: wonderful place\n\nNegative: never coming back\nPositive: definitely coming back\n\nNegative: waste of money\nPositive: worth the money\n\nNegative: rude staff\nPositive: friendly staff\n\nNegative: cold food\nPositive: hot food\n\nNegative: she said she'd be back and disappeared for a few minutes.\nPositive: "

In [36]:
tokens = tokenizer(prompt, return_tensors='pt').to('cuda')
output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
tokenizer.decode(output_ids[0], skip_special_tokens=True)

"Negative: the food was terrible\nPositive: the food was excellent\n\nNegative: awful service\nPositive: great service\n\nNegative: worst experience\nPositive: best experience\n\nNegative: not good at all\nPositive: very good\n\nNegative: disappointing meal\nPositive: amazing meal\n\nNegative: horrible place\nPositive: wonderful place\n\nNegative: never coming back\nPositive: definitely coming back\n\nNegative: waste of money\nPositive: worth the money\n\nNegative: rude staff\nPositive: friendly staff\n\nNegative: cold food\nPositive: hot food\n\nNegative: she said she'd be back and disappeared for a few minutes.\nPositive:  she was back in 30 seconds.\n\nNegative: the food was terrible\nPositive: the food was delicious\n\nNegative: the service was terrible\nPositive: the service was excellent\n\nNeg"

In [37]:
predictions_10 = []
references_10 = []

In [38]:
for i in range(10):
    prompt = f'{examples_10}\n\nNegative: {sentences_negative[i]}\nPositive: '
    tokens = tokenizer(prompt, return_tensors='pt').to('cuda')
    output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
    output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    pred = output.split('Positive:')[-1].strip() if 'Positive:' in output else output.split('\n')[-1].strip()
    predictions_10.append(pred)
    references_10.append(sentences_positive[i])

In [39]:
bleu.compute(predictions=predictions_10, references=[[ref] for ref in references_10])

{'bleu': 0.0,
 'precisions': [0.08163265306122448, 0.0, 0.0, 0.0],
 'brevity_penalty': 0.3531662652616454,
 'length_ratio': 0.49,
 'translation_length': 49,
 'reference_length': 100}

In [40]:
bertscore.compute(predictions=predictions_10, references=references_10, model_type='microsoft/deberta-xlarge-mnli')



{'precision': [0.3190222382545471,
  0.3569352328777313,
  0.3579205274581909,
  0.4621596038341522,
  0.3346001207828522,
  0.2986333966255188,
  0.32461729645729065,
  0.0,
  0.361230731010437,
  0.3829004168510437],
 'recall': [0.343608021736145,
  0.44789159297943115,
  0.5275716185569763,
  0.5749461054801941,
  0.43461373448371887,
  0.4130258858203888,
  0.3504026532173157,
  0.0,
  0.3670108914375305,
  0.49886780977249146],
 'f1': [0.3308590054512024,
  0.3972737193107605,
  0.4264943599700928,
  0.5124199986457825,
  0.3781050145626068,
  0.3466358780860901,
  0.3370174765586853,
  0.0,
  0.3640978932380676,
  0.4332582652568817],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [None]:
# Овој модел не е подобар од моделите во претходните лапараториски вежби