In [1]:
import torch
import os
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from sentence_transformers import SentenceTransformer, util
from evaluate import load
from huggingface_hub import login




In [2]:
login(token=os.getenv('HF_TOKEN'))

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
data = pd.read_csv('yelp_parallel/test_en_parallel.txt', sep='\t')
data.columns = ['Style1', 'Style2']
data = data[:5000]

In [4]:
sentences_negative = data['Style1'].values.tolist()
sentences_positive = data['Style2'].values.tolist()

In [5]:
documents = sentences_positive

In [6]:
embeddings_model = SentenceTransformer('all-MiniLM-L6-v2')
document_embeddings = embeddings_model.encode(documents, batch_size=64, show_progress_bar=True)

Batches:   0%|          | 0/16 [00:00<?, ?it/s]

In [7]:
model_name = 'meta-llama/Llama-2-7b-hf'
bitsandbytes_config = BitsAndBytesConfig(load_in_4bit=True,
                                         bnb_4bit_compute_dtype=torch.float16,
                                         bnb_4bit_quant_type='nf4')

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             quantization_config=bitsandbytes_config,
                                             device_map='cuda:0')

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
bleu = load('bleu')
bertscore = load('bertscore')

In [10]:
# Експеримент 1: RAG со 5 документи
sample_sentence = sentences_negative[0]
sample_sentence

"ever since joes has changed hands it's just gotten worse and worse."

In [11]:
sample_embedding = embeddings_model.encode(sample_sentence, batch_size=64, show_progress_bar=True)
context_results = util.semantic_search(sample_embedding, document_embeddings, top_k=5)
context_results

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[[{'corpus_id': 0, 'score': 0.9348020553588867},
  {'corpus_id': 418, 'score': 0.27251461148262024},
  {'corpus_id': 130, 'score': 0.26488202810287476},
  {'corpus_id': 89, 'score': 0.2495049089193344},
  {'corpus_id': 361, 'score': 0.24265258014202118}]]

In [12]:
doc_ids = [c['corpus_id'] for c in context_results[0]]
doc_ids

[0, 418, 130, 89, 361]

In [13]:
docs = [documents[d] for d in doc_ids]
docs

["Ever since joes has changed hands it's gotten better and better.",
 'much more these days.',
 'i expected so much less from this ny staple.',
 'it seems it was warmed up a tad too bit',
 'great what has happened to this sandwich shop.']

In [14]:
context = f'Context:\nExample 1: {docs[0]}\nExample 2: {docs[1]}\nExample 3: {docs[2]}\nExample 4: {docs[3]}\nExample 5: {docs[4]}'
context

"Context:\nExample 1: Ever since joes has changed hands it's gotten better and better.\nExample 2: much more these days.\nExample 3: i expected so much less from this ny staple.\nExample 4: it seems it was warmed up a tad too bit\nExample 5: great what has happened to this sandwich shop."

In [15]:
prompt = f'{context}\n\nTransform the following negative sentence to positive: {sample_sentence}\nPositive: '
prompt

"Context:\nExample 1: Ever since joes has changed hands it's gotten better and better.\nExample 2: much more these days.\nExample 3: i expected so much less from this ny staple.\nExample 4: it seems it was warmed up a tad too bit\nExample 5: great what has happened to this sandwich shop.\n\nTransform the following negative sentence to positive: ever since joes has changed hands it's just gotten worse and worse.\nPositive: "

In [16]:
tokens = tokenizer(prompt, return_tensors='pt').to('cuda:0')
tokens

{'input_ids': tensor([[    1, 15228, 29901,    13, 14023, 29871, 29896, 29901, 18274,  1951,
          2958,   267,   756,  3939,  6567,   372, 29915, 29879,  2355,   841,
          2253,   322,  2253, 29889,    13, 14023, 29871, 29906, 29901,  1568,
           901,  1438,  3841, 29889,    13, 14023, 29871, 29941, 29901,   474,
          3806,   577,  1568,  3109,   515,   445,  7098,   380,   481,   280,
         29889,    13, 14023, 29871, 29946, 29901,   372,  2444,   372,   471,
          1370,  2168,   701,   263,   260,   328,  2086,  2586,    13, 14023,
         29871, 29945, 29901,  2107,   825,   756,  9559,   304,   445, 11982,
         16416, 18296, 29889,    13,    13, 13372,   278,  1494,  8178, 10541,
           304,  6374, 29901,  3926,  1951,  2958,   267,   756,  3939,  6567,
           372, 29915, 29879,   925,  2355,   841, 15029,   322, 15029, 29889,
            13,  9135,  3321, 29901, 29871]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1,

In [17]:
output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
output_ids

tensor([[    1, 15228, 29901,    13, 14023, 29871, 29896, 29901, 18274,  1951,
          2958,   267,   756,  3939,  6567,   372, 29915, 29879,  2355,   841,
          2253,   322,  2253, 29889,    13, 14023, 29871, 29906, 29901,  1568,
           901,  1438,  3841, 29889,    13, 14023, 29871, 29941, 29901,   474,
          3806,   577,  1568,  3109,   515,   445,  7098,   380,   481,   280,
         29889,    13, 14023, 29871, 29946, 29901,   372,  2444,   372,   471,
          1370,  2168,   701,   263,   260,   328,  2086,  2586,    13, 14023,
         29871, 29945, 29901,  2107,   825,   756,  9559,   304,   445, 11982,
         16416, 18296, 29889,    13,    13, 13372,   278,  1494,  8178, 10541,
           304,  6374, 29901,  3926,  1951,  2958,   267,   756,  3939,  6567,
           372, 29915, 29879,   925,  2355,   841, 15029,   322, 15029, 29889,
            13,  9135,  3321, 29901, 29871,    13, 14023, 29871, 29953, 29901,
          2958,   267,   756,  2355,   841, 15029,  

In [18]:
tokenizer.decode(output_ids[0], skip_special_tokens=True)

"Context:\nExample 1: Ever since joes has changed hands it's gotten better and better.\nExample 2: much more these days.\nExample 3: i expected so much less from this ny staple.\nExample 4: it seems it was warmed up a tad too bit\nExample 5: great what has happened to this sandwich shop.\n\nTransform the following negative sentence to positive: ever since joes has changed hands it's just gotten worse and worse.\nPositive: \nExample 6: joes has gotten worse and worse ever since it has changed hands.\nExample 7: joes has gotten worse and worse ever since it has changed hands.\nExample 8: joes has gotten"

In [19]:
predictions_5 = []
references_5 = []

In [20]:
for i in range(10):
    negative_sent = sentences_negative[i]
    positive_sent = sentences_positive[i]

    sent_embedding = embeddings_model.encode(negative_sent)
    context_results = util.semantic_search(sent_embedding, document_embeddings, top_k=5)
    doc_ids = [c['corpus_id'] for c in context_results[0]]
    docs = [documents[d] for d in doc_ids]
    context = f'Context:\nExample 1: {docs[0]}\nExample 2: {docs[1]}\nExample 3: {docs[2]}\nExample 4: {docs[3]}\nExample 5: {docs[4]}'

    prompt = f'{context}\n\nTransform the following negative sentence to positive: {negative_sent}\nPositive: '
    tokens = tokenizer(prompt, return_tensors='pt').to('cuda:0')
    output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
    output = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    if 'Positive:' in output:
        pred = output.split('Positive:')[-1].strip()
    else:
        pred = output.split('\n')[-1].strip()

    predictions_5.append(pred)
    references_5.append(positive_sent)

In [21]:
bleu.compute(predictions=predictions_5, references=[[ref] for ref in references_5])

{'bleu': 0.10181444503483945,
 'precisions': [0.20095693779904306,
  0.10050251256281408,
  0.07936507936507936,
  0.0670391061452514],
 'brevity_penalty': 1.0,
 'length_ratio': 2.09,
 'translation_length': 209,
 'reference_length': 100}

In [22]:
bertscore.compute(predictions=predictions_5, references=references_5, model_type='microsoft/deberta-xlarge-mnli')

{'precision': [0.3917850852012634,
  0.7488517761230469,
  0.4536862373352051,
  0.5515203475952148,
  0.7548274993896484,
  0.45931288599967957,
  0.5178031921386719,
  0.5454379320144653,
  0.5310631394386292,
  0.44078490138053894],
 'recall': [0.4670611023902893,
  0.6535013318061829,
  0.6453817486763,
  0.6679890751838684,
  0.6978453397750854,
  0.6200667023658752,
  0.8740153312683105,
  0.7711831331253052,
  0.46099597215652466,
  0.5255774259567261],
 'f1': [0.4261241853237152,
  0.6979349851608276,
  0.5328165888786316,
  0.6041930913925171,
  0.7252188324928284,
  0.5277191400527954,
  0.6503260731697083,
  0.6389576196670532,
  0.4935552179813385,
  0.479461133480072],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [23]:
# Експеримент 2: RAG со 3 документи
sample_sentence = sentences_negative[5]
sample_sentence

'just left and took it off the bill.'

In [24]:
sent_embedding = embeddings_model.encode(sample_sentence, batch_size=64, show_progress_bar=True)
context_results = util.semantic_search(sent_embedding, document_embeddings, top_k=3)
doc_ids = [c['corpus_id'] for c in context_results[0]]
docs = [documents[d] for d in doc_ids]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [25]:
context = f'Context:\nExample 1: {docs[0]}\nExample 2: {docs[1]}\nExample 3: {docs[2]}'
context

"Context:\nExample 1: just left and put it on the bill.\nExample 2: He took care of the bill himself.\nExample 3: i know i shouldn't have sent this back and walked out."

In [26]:
predictions_3 = []
references_3 = []

In [27]:
for i in range(10):
    negative_sent = sentences_negative[i]
    positive_sent = sentences_positive[i]

    sent_embedding = embeddings_model.encode(negative_sent)
    context_results = util.semantic_search(sent_embedding, document_embeddings, top_k=3)
    doc_ids = [c['corpus_id'] for c in context_results[0]]
    docs = [documents[d] for d in doc_ids]
    context = f'Context:\nExample 1: {docs[0]}\nExample 2: {docs[1]}\nExample 3: {docs[2]}'

    prompt = f'{context}\n\nTransform the following negative sentence to positive: {negative_sent}\nPositive: '
    tokens = tokenizer(prompt, return_tensors='pt').to('cuda:0')
    output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
    output = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    if 'Positive:' in output:
        pred = output.split('Positive:')[-1].strip()
    else:
        pred = output.split('\n')[-1].strip()

    predictions_3.append(pred)
    references_3.append(positive_sent)

In [28]:
bleu.compute(predictions=predictions_3, references=[[ref] for ref in references_3])

{'bleu': 0.1061619130063686,
 'precisions': [0.23140495867768596,
  0.12612612612612611,
  0.07920792079207921,
  0.054945054945054944],
 'brevity_penalty': 1.0,
 'length_ratio': 1.21,
 'translation_length': 121,
 'reference_length': 100}

In [29]:
bertscore.compute(predictions=predictions_3, references=references_3, model_type='microsoft/deberta-xlarge-mnli')

{'precision': [0.5270600318908691,
  0.7140408754348755,
  0.5146754384040833,
  0.4531857371330261,
  0.5921239852905273,
  0.4238852560520172,
  0.7440032958984375,
  0.7389216423034668,
  0.48301970958709717,
  0.42338812351226807],
 'recall': [0.46220603585243225,
  0.6450587511062622,
  0.8828999996185303,
  0.45786505937576294,
  0.5934141874313354,
  0.5669422149658203,
  0.7878130674362183,
  0.5587757229804993,
  0.5196435451507568,
  0.49340400099754333],
 'f1': [0.4925071895122528,
  0.6777992248535156,
  0.6502789855003357,
  0.4555133581161499,
  0.5927683711051941,
  0.4850863814353943,
  0.7652816772460938,
  0.6363447904586792,
  0.5006627440452576,
  0.455722451210022],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [30]:
# Експеримент 3: RAG со 1 документ
sample_sentence = sentences_negative[10]
sample_sentence

'we sit down and we got some really slow and lazy service.'

In [31]:
sent_embedding = embeddings_model.encode(sample_sentence, batch_size=64, show_progress_bar=True)
context_results = util.semantic_search(sent_embedding, document_embeddings, top_k=1)
doc_ids = [c['corpus_id'] for c in context_results[0]]
docs = [documents[d] for d in doc_ids]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [32]:
context = f'Context:\nExample 1: {docs[0]}'
context

'Context:\nExample 1: services were fast and we tried to help everyone equally fast'

In [33]:
predictions_1 = []
references_1 = []

In [34]:
for i in range(10):
    negative_sent = sentences_negative[i]
    positive_sent = sentences_positive[i]

    sent_embedding = embeddings_model.encode(negative_sent)
    context_results = util.semantic_search(sent_embedding, document_embeddings, top_k=1)
    doc_ids = [c['corpus_id'] for c in context_results[0]]
    docs = [documents[d] for d in doc_ids]
    context = f'Context:\nExample 1: {docs[0]}'

    prompt = f'{context}\n\nTransform the following negative sentence to positive: {negative_sent}\nPositive: '
    tokens = tokenizer(prompt, return_tensors='pt').to('cuda:0')
    output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
    output = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    if 'Positive:' in output:
        pred = output.split('Positive:')[-1].strip()
    else:
        pred = output.split('\n')[-1].strip()

    predictions_1.append(pred)
    references_1.append(positive_sent)

In [35]:
bleu.compute(predictions=predictions_1, references=[[ref] for ref in references_1])

{'bleu': 0.12645444920082566,
 'precisions': [0.21, 0.14659685863874344, 0.10382513661202186, 0.08],
 'brevity_penalty': 1.0,
 'length_ratio': 2.0,
 'translation_length': 200,
 'reference_length': 100}

In [36]:
bertscore.compute(predictions=predictions_1, references=references_1, model_type='microsoft/deberta-xlarge-mnli')



{'precision': [0.35859304666519165,
  0.3157937824726105,
  0.35891419649124146,
  0.0,
  0.36218664050102234,
  0.6071540117263794,
  0.39615654945373535,
  0.40183383226394653,
  0.7006238102912903,
  0.32159507274627686],
 'recall': [0.40656664967536926,
  0.3776853680610657,
  0.4423854947090149,
  0.0,
  0.6723802089691162,
  0.6155771017074585,
  0.6099676489830017,
  0.5984358787536621,
  0.6191234588623047,
  0.7249214053153992],
 'f1': [0.381075918674469,
  0.3439777195453644,
  0.3963022232055664,
  0.0,
  0.4707808196544647,
  0.6113365888595581,
  0.48034361004829407,
  0.4808139204978943,
  0.6573571562767029,
  0.4455375075340271],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [37]:
# Експеримент 4: Друг embedding модел (all-distilroberta-v1)
embeddings_model_distil = SentenceTransformer('all-distilroberta-v1')
document_embeddings_distil = embeddings_model_distil.encode(documents, batch_size=64, show_progress_bar=True)

Batches:   0%|          | 0/16 [00:00<?, ?it/s]

In [38]:
sample_sentence = sentences_negative[8]
sample_sentence

"new owner, i heard - but i don't know the details."

In [39]:
sent_embedding = embeddings_model_distil.encode(sample_sentence, batch_size=64, show_progress_bar=True)
context_results = util.semantic_search(sent_embedding, document_embeddings_distil, top_k=5)
doc_ids = [c['corpus_id'] for c in context_results[0]]
docs = [documents[d] for d in doc_ids]
docs

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

["great new owner, i heard - but i don't know the details.",
 'the owner knows us and treats us very well!',
 'they stock some of the most common parts.',
 'the owner is a hoot and the facility is very accommodating.',
 'we dropped our rental truck of at this location two weeks ago.']

In [40]:
predictions_distil = []
references_distil = []

In [41]:
for i in range(10):
    negative_sent = sentences_negative[i]
    positive_sent = sentences_positive[i]

    sent_embedding = embeddings_model_distil.encode(negative_sent)
    context_results = util.semantic_search(sent_embedding, document_embeddings_distil, top_k=5)
    doc_ids = [c['corpus_id'] for c in context_results[0]]
    docs = [documents[d] for d in doc_ids]
    context = f'Context:\nExample 1: {docs[0]}\nExample 2: {docs[1]}\nExample 3: {docs[2]}\nExample 4: {docs[3]}\nExample 5: {docs[4]}'

    prompt = f'{context}\n\nTransform the following negative sentence to positive: {negative_sent}\nPositive: '
    tokens = tokenizer(prompt, return_tensors='pt').to('cuda:0')
    output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
    output = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    if 'Positive:' in output:
        pred = output.split('Positive:')[-1].strip()
    else:
        pred = output.split('\n')[-1].strip()

    predictions_distil.append(pred)
    references_distil.append(positive_sent)

In [42]:
bleu.compute(predictions=predictions_distil, references=[[ref] for ref in references_distil])

{'bleu': 0.12576964851024927,
 'precisions': [0.22093023255813954,
  0.13580246913580246,
  0.09868421052631579,
  0.08450704225352113],
 'brevity_penalty': 1.0,
 'length_ratio': 1.72,
 'translation_length': 172,
 'reference_length': 100}

In [43]:
bertscore.compute(predictions=predictions_distil, references=references_distil, model_type='microsoft/deberta-xlarge-mnli')

{'precision': [0.5576872825622559,
  0.7167272567749023,
  0.5404964685440063,
  0.4957411587238312,
  0.455392450094223,
  0.4574008882045746,
  0.4763123393058777,
  0.5233611464500427,
  0.2370419055223465,
  0.4646373391151428],
 'recall': [0.4673736095428467,
  0.6509063839912415,
  0.8947333097457886,
  0.7171201705932617,
  0.7153955101966858,
  0.5507560968399048,
  0.6168950200080872,
  0.4753214716911316,
  0.292397141456604,
  0.5394173264503479],
 'f1': [0.508551836013794,
  0.6822329163551331,
  0.6738993525505066,
  0.5862269401550293,
  0.556523859500885,
  0.4997561275959015,
  0.5375644564628601,
  0.4981858730316162,
  0.26182571053504944,
  0.4992425739765167],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [44]:
# Експеримент 5: Zero-shot (без контекст)
sample_sentence = sentences_negative[12]
sample_sentence

"there was no i'm sorry or how did everything come out."

In [45]:
prompt = f'Transform the following negative sentence to positive: {sample_sentence}\nPositive: '
prompt

"Transform the following negative sentence to positive: there was no i'm sorry or how did everything come out.\nPositive: "

In [46]:
tokens = tokenizer(prompt, return_tensors='pt').to('cuda:0')
output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
tokenizer.decode(output_ids[0], skip_special_tokens=True)

"Transform the following negative sentence to positive: there was no i'm sorry or how did everything come out.\nPositive: আসল না অথবা কেউ কি এখানে চুমুল �����"

In [47]:
predictions_zero = []
references_zero = []

In [48]:
for i in range(10):
    negative_sent = sentences_negative[i]
    positive_sent = sentences_positive[i]

    prompt = f'Transform the following negative sentence to positive: {negative_sent}\nPositive: '
    tokens = tokenizer(prompt, return_tensors='pt').to('cuda:0')
    output_ids = model.generate(tokens.input_ids, max_new_tokens=50)
    output = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    if 'Positive:' in output:
        pred = output.split('Positive:')[-1].strip()
    else:
        pred = output.split('\n')[-1].strip()

    predictions_zero.append(pred)
    references_zero.append(positive_sent)

In [49]:
bleu.compute(predictions=predictions_zero, references=[[ref] for ref in references_zero])

{'bleu': 0.009536617286546121,
 'precisions': [0.029940119760479042,
  0.011077542799597181,
  0.006091370558375634,
  0.0040941658137154556],
 'brevity_penalty': 1.0,
 'length_ratio': 10.02,
 'translation_length': 1002,
 'reference_length': 100}

In [50]:
bertscore.compute(predictions=predictions_zero, references=references_zero, model_type='microsoft/deberta-xlarge-mnli')



{'precision': [0.0,
  0.4730415344238281,
  0.3164166212081909,
  0.18399174511432648,
  0.7120349407196045,
  0.4685872197151184,
  0.31048405170440674,
  0.3912695050239563,
  0.5547829866409302,
  0.4000599682331085],
 'recall': [0.0,
  0.551421046257019,
  0.41196054220199585,
  0.3349146246910095,
  0.8616223335266113,
  0.5437687635421753,
  0.3629450500011444,
  0.574638843536377,
  0.7419710159301758,
  0.4197489023208618],
 'f1': [0.0,
  0.5092329978942871,
  0.35792213678359985,
  0.23750539124011993,
  0.7797189354896545,
  0.5033863186836243,
  0.33467116951942444,
  0.46554863452911377,
  0.6348665952682495,
  0.4096680283546448],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [51]:
# Во оваа задача резултатите за bleu се подобри од првата задача, бидејќи тука има одговори а не само една буква
# RAG со 5 документи даде просечен bertscore од 0.55
# RAG со 3 документи даде просечен bertscore од 0.50
# RAG со 1 документ даде просечен bertscore од 0.45
# RAG со DistilRoberta даде просечен bertscore од 0.40
# Zero-shot даде просечен bertscore од 0.45
#
# Според добиените резултати можеме да заклучиме дека RAG со 1 догумент даде најдобри резултати од сите останати тестови.
# RAG со DistilRoberta даде послаб резултат од сите други RAG-ови со 5, 3 и 1 документи
# Со помош на zero-shot prompting се добија добри резултати, но не најдобри
# Овој пристап не даде најдобри резултати, односно најдобри резултати имавме во задача 1, па во оваа задача 
# и најлоши имавме во задача 2