In [1]:
import pandas as pd
from evaluate import load
from torch.optim import AdamW
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq
from seq2seq import create_transformers_train_data, train_transformer, decode_with_transformer

In [2]:
data = pd.read_csv('yelp_parallel/test_en_parallel.txt', sep='\t')
data.columns = ['Style1', 'Style2']
data = data[:5000]

In [3]:
sentences_negative = data['Style1'].values.tolist()
sentences_positive = data['Style2'].values.tolist()

In [4]:
#1: t5-small, lr=0.001, epochs=3

In [5]:
model_name = 't5-small'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [6]:
train_set = create_transformers_train_data(sentences_negative, sentences_positive, tokenizer)



In [7]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)
train_loader = DataLoader(train_set, batch_size=16, shuffle=True, collate_fn=data_collator)

In [8]:
optimizer = AdamW(model.parameters(), lr=0.001)
train_transformer(model, train_loader, optimizer, 3, device='cuda')

Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch 1/3, Loss: 2.0668
Epoch 2/3, Loss: 1.5934
Epoch 3/3, Loss: 1.3292


In [9]:
predicted_sentence = decode_with_transformer(sentences_negative[0], tokenizer, model, device='cuda')
predicted_sentence

"ever since joes has changed hands it's just gotten better and better."

In [10]:
reference_sentence = sentences_positive[0]
reference_sentence

"Ever since joes has changed hands it's gotten better and better."

In [11]:
bleu = load('bleu')
bleu.compute(predictions=[predicted_sentence], references=[[reference_sentence]])

{'bleu': 0.6703420896351792,
 'precisions': [0.8461538461538461, 0.75, 0.6363636363636364, 0.5],
 'brevity_penalty': 1.0,
 'length_ratio': 1.0833333333333333,
 'translation_length': 13,
 'reference_length': 12}

In [12]:
bertscore = load('bertscore')
bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence], model_type='microsoft/deberta-xlarge-mnli')

{'precision': [0.9704587459564209],
 'recall': [0.9900611639022827],
 'f1': [0.9801619648933411],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [13]:
#2: t5-small, lr=0.0001, epochs=5

In [14]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
train_loader = DataLoader(train_set, batch_size=16, shuffle=True, collate_fn=data_collator)

In [15]:
optimizer = AdamW(model.parameters(), lr=0.0001)
train_transformer(model, train_loader, optimizer, 5, device='cuda')

Epoch 1/5, Loss: 2.3103
Epoch 2/5, Loss: 1.9708
Epoch 3/5, Loss: 1.8598
Epoch 4/5, Loss: 1.7834
Epoch 5/5, Loss: 1.6949


In [16]:
predicted_sentence = decode_with_transformer(sentences_negative[0], tokenizer, model, device='cuda')
predicted_sentence

"ever since joes has changed hands it's just gotten better and better."

In [17]:
bleu.compute(predictions=[predicted_sentence], references=[[reference_sentence]])

{'bleu': 0.6703420896351792,
 'precisions': [0.8461538461538461, 0.75, 0.6363636363636364, 0.5],
 'brevity_penalty': 1.0,
 'length_ratio': 1.0833333333333333,
 'translation_length': 13,
 'reference_length': 12}

In [18]:
bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence], model_type='microsoft/deberta-xlarge-mnli')

{'precision': [0.9704587459564209],
 'recall': [0.9900611639022827],
 'f1': [0.9801619648933411],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [19]:
#3: t5-small, lr=0.0005, epochs=7

In [20]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
train_loader = DataLoader(train_set, batch_size=16, shuffle=True, collate_fn=data_collator)

In [21]:
optimizer = AdamW(model.parameters(), lr=0.0005)
train_transformer(model, train_loader, optimizer, 7, device='cuda')

Epoch 1/7, Loss: 2.0669
Epoch 2/7, Loss: 1.6772
Epoch 3/7, Loss: 1.4751
Epoch 4/7, Loss: 1.2906
Epoch 5/7, Loss: 1.1252
Epoch 6/7, Loss: 1.0011
Epoch 7/7, Loss: 0.8797


In [22]:
predicted_sentence = decode_with_transformer(sentences_negative[0], tokenizer, model, device='cuda')
predicted_sentence

"ever since joes has changed hands it's gotten better and better."

In [23]:
bleu.compute(predictions=[predicted_sentence], references=[[reference_sentence]])

{'bleu': 0.9036020036098448,
 'precisions': [0.9166666666666666,
  0.9090909090909091,
  0.9,
  0.8888888888888888],
 'brevity_penalty': 1.0,
 'length_ratio': 1.0,
 'translation_length': 12,
 'reference_length': 12}

In [24]:
bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence], model_type='microsoft/deberta-xlarge-mnli')

{'precision': [0.9945324659347534],
 'recall': [0.9945324659347534],
 'f1': [0.9945324659347534],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [25]:
#4: google/flan-t5-small, lr=0.001, epochs=3

In [26]:
model_name = 'google/flan-t5-small'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [27]:
train_set = create_transformers_train_data(sentences_negative, sentences_positive, tokenizer)

In [28]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)
train_loader = DataLoader(train_set, batch_size=16, shuffle=True, collate_fn=data_collator)

In [29]:
optimizer = AdamW(model.parameters(), lr=0.001)
train_transformer(model, train_loader, optimizer, 3, device='cuda')

Epoch 1/3, Loss: 1.9484
Epoch 2/3, Loss: 1.4188
Epoch 3/3, Loss: 1.0975


In [30]:
predicted_sentence = decode_with_transformer(sentences_negative[0], tokenizer, model, device='cuda')
predicted_sentence

"Ever since joes has changed hands it's gotten better and better."

In [31]:
bleu.compute(predictions=[predicted_sentence], references=[[reference_sentence]])

{'bleu': 1.0,
 'precisions': [1.0, 1.0, 1.0, 1.0],
 'brevity_penalty': 1.0,
 'length_ratio': 1.0,
 'translation_length': 12,
 'reference_length': 12}

In [32]:
bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence], model_type='microsoft/deberta-xlarge-mnli')

{'precision': [1.0],
 'recall': [1.0],
 'f1': [1.0],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [33]:
#5: google/flan-t5-small, lr=0.0001, epochs=5

In [34]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
train_loader = DataLoader(train_set, batch_size=16, shuffle=True, collate_fn=data_collator)

In [35]:
optimizer = AdamW(model.parameters(), lr=0.0001)
train_transformer(model, train_loader, optimizer, 5, device='cuda')

Epoch 1/5, Loss: 2.0788
Epoch 2/5, Loss: 1.7816
Epoch 3/5, Loss: 1.6554
Epoch 4/5, Loss: 1.5702
Epoch 5/5, Loss: 1.4883


In [36]:
predicted_sentence = decode_with_transformer(sentences_negative[0], tokenizer, model, device='cuda')
predicted_sentence

"Ever since joes has changed hands it's just gotten better and better."

In [37]:
bleu.compute(predictions=[predicted_sentence], references=[[reference_sentence]])

{'bleu': 0.7611606003349892,
 'precisions': [0.9230769230769231,
  0.8333333333333334,
  0.7272727272727273,
  0.6],
 'brevity_penalty': 1.0,
 'length_ratio': 1.0833333333333333,
 'translation_length': 13,
 'reference_length': 12}

In [38]:
bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence], model_type='microsoft/deberta-xlarge-mnli')

{'precision': [0.9741904735565186],
 'recall': [0.9940983057022095],
 'f1': [0.9840437173843384],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [39]:
#6: google/flan-t5-small, lr=0.0005, epochs=7

In [40]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
train_loader = DataLoader(train_set, batch_size=16, shuffle=True, collate_fn=data_collator)

In [41]:
optimizer = AdamW(model.parameters(), lr=0.0005)
train_transformer(model, train_loader, optimizer, 7, device='cuda')

Epoch 1/7, Loss: 1.9263
Epoch 2/7, Loss: 1.5110
Epoch 3/7, Loss: 1.2779
Epoch 4/7, Loss: 1.0555
Epoch 5/7, Loss: 0.9013
Epoch 6/7, Loss: 0.7562
Epoch 7/7, Loss: 0.6499


In [42]:
predicted_sentence = decode_with_transformer(sentences_negative[0], tokenizer, model, device='cuda')
predicted_sentence

"Ever since joes has changed hands it's gotten better and better."

In [43]:
bleu.compute(predictions=[predicted_sentence], references=[[reference_sentence]])

{'bleu': 1.0,
 'precisions': [1.0, 1.0, 1.0, 1.0],
 'brevity_penalty': 1.0,
 'length_ratio': 1.0,
 'translation_length': 12,
 'reference_length': 12}

In [90]:
bertscore.compute(predictions=[predicted_sentence], references=[reference_sentence], model_type='microsoft/deberta-xlarge-mnli')

{'precision': [1.0],
 'recall': [1.0],
 'f1': [1.0],
 'hashcode': 'microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.47.1)'}

In [94]:
# BLEU / T5 - 0.67, 0.67, 0.90
# BERTScore / T5 - 0.97, 0.97, 0.99
# BLEU / FLAN-T5 - 1.0, 0.76, 1.0
# BERTScore / FLAN-T5 - 1.0, 0.97, 1.0

# BLEU / Т5: најдобар резултат добивме со 7 епохи и рата на учење 0.0005
# BERTScore / T5: најдобар резултат добивме со 7 епохи и рата на учење 0.0005
# BLEU / FLAN-T5: најдобри резултати добивме со 3 епохи и рата на учење 0.001 и со 7 епохи и рата на учење 0.0005
# BERTScore / FLAN-T5: најдобри резултати добивме со 3 епохи и рата на учење 0.001 и со 7 епохи и рата на учење 0.0005

# Со зголемување на бројот на епохи се зголемува и прецизноста
# Во seq2seq скриптата зголемен е опсегот од 10 на 128 зборови