In [29]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch

In [30]:
model_name = "deepset/roberta-base-squad2" # Μοντέλο εκπαιδευμένο για QA
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)

In [31]:
# question = "Πόσο είναι το πρόστιμο;"
# context = "Σε περίπτωση καθυστέρησης, ο πελάτης επιβαρύνεται με πρόστιμο 500 ευρώ άμεσα."

question = "How much is the fine?"
context = "In case of a delay, the customer needs to pay 500 euro immediately."

In [38]:
inputs = tokenizer(question, context, return_tensors="pt")
print("Inputs:", inputs)

Inputs: {'input_ids': tensor([[   0, 6179,  203,   16,    5, 2051,  116,    2,    2, 1121,  403,    9,
           10, 4646,    6,    5, 2111,  782,    7,  582, 1764, 2287, 1320,    4,
            2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1]])}


In [33]:
print("Tokens: ")
print(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]))

Tokens: 
['<s>', 'How', 'Ġmuch', 'Ġis', 'Ġthe', 'Ġfine', '?', '</s>', '</s>', 'In', 'Ġcase', 'Ġof', 'Ġa', 'Ġdelay', ',', 'Ġthe', 'Ġcustomer', 'Ġneeds', 'Ġto', 'Ġpay', 'Ġ500', 'Ġeuro', 'Ġimmediately', '.', '</s>']


In [34]:
with torch.no_grad():
    outputs = model(**inputs)   
    start_scores = outputs.start_logits
    end_scores = outputs.end_logits

print("Start scores:", start_scores)
print("End scores:", end_scores)

print("start_scores shape:", start_scores.shape)
print("end_scores shape:", end_scores.shape)


Start scores: tensor([[ 1.6772, -8.5406, -8.6707, -9.0597, -8.6991, -8.1731, -9.3835, -8.7775,
         -8.4697, -1.0327, -2.6350, -6.8713, -3.6353, -3.3584, -4.9603, -0.9364,
         -3.4354, -1.2541, -2.5988, -0.6171,  3.1787, -3.8498, -3.8643, -5.3451,
         -8.4797]])
End scores: tensor([[ 2.0590, -7.8861, -7.4584, -7.4615, -7.8655, -6.8296, -6.3431, -7.4264,
         -6.2726, -7.0933, -6.9618, -7.8270, -7.6243, -1.1088, -3.3083, -6.8399,
         -5.4050, -5.6432, -5.9173, -2.9994,  1.0332,  3.6821,  2.1870, -0.0129,
         -7.0405]])
start_scores shape: torch.Size([1, 25])
end_scores shape: torch.Size([1, 25])


In [35]:
answer_start_index = torch.argmax(start_scores)
answer_end_index = torch.argmax(end_scores) + 1

print(f"Start Index: {answer_start_index}, End Index: {answer_end_index}")

Start Index: 20, End Index: 22


In [36]:
if answer_end_index < answer_start_index:
    print("Error: Το μοντέλο δεν βρήκε έγκυρη απάντηση.")
else:
    # 3. Extraction (Slicing στα Input IDs)
    # Παίρνουμε τα IDs από την αρχή μέχρι το τέλος
    answer_token_ids = inputs.input_ids[0, answer_start_index : answer_end_index]
    
    # 4. Decoding (IDs -> String)
    answer = tokenizer.decode(answer_token_ids)
    print(f"Απάντηση: {answer}") 

Απάντηση:  500 euro
