In [2]:
from transformers import BertTokenizer, BertForQuestionAnswering
import torch

import warnings
warnings.filterwarnings("ignore")

In [5]:
# squad veriseti zerinde ince ayar yapılmış bert fit modeli
model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"

#bert tokenizer
tokenizer = BertTokenizer.from_pretrained(model_name)

#soru cevaplama için ince ayar yapılmjş bert modeli
model= BertForQuestionAnswering.from_pretrained(model_name)

#cevapları tahmin eden fonksiyon
def predict_answer(context, question):
    """
        context = metin
        question = soru
        Amaç = metin içerisindeki soruyu bulmak

        1)tokenize
        2)metnin içerisinde soruyu ara
        3)metnin içerisinde sorunun cevabını nerelerde olabilreceğinin scorelarını return et
        4) skorladan tokenların indexlerinin hesapladdık
        5)tokenları bulduk yani cevabı bulduk
        6) tokenların okunabilir olması için tokenlardan stringe çevirdik
    """

    #metni ve soruyu tokenlara ayıralım ve modele uygun hale getirelim
    encoding = tokenizer.encode_plus(question,context,return_tensors = "pt", max_length=512, truncation=True)

    #giriş tensorleri hazırla
    input_ids = encoding["input_ids"] #tokenlerin idleri
    attention_masks = encoding["attention_mask"] #hangi tokenların dikkate alınacağını belirler

    #modeli çalıştır ve skorları hesapla
    with torch.no_grad():
        start_scores, end_scores = model(input_ids,attention_masks,return_dict=False)

    #en yüksek olasılığa sahip start ve end indekslerini hesaplıyor
    start_index= torch.argmax(start_scores, dim=1).item() #başlangıç indeksleri
    end_index = torch.argmax(end_scores,dim=1).item() #bitiş indexi

    #token idlerini kullanarak cevap metinin elde edimi
    answer_tokens = tokenizer.convert_ids_to_tokens(input_ids[0][start_index:end_index+1])

    #tokenları birleştir ve okunabilir hale getir
    answer= tokenizer.convert_tokens_to_string(answer_tokens)

    return answer
     


Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [6]:
question = "What is the capital of france?"
context = "France, offically the French Republic, is a country of capital Paris"

answer = predict_answer(context,question)
answer

'paris'

GPT ile SORU CEVAPLAMA

In [1]:
from transformers import GPT2Tokenizer,TFGPT2LMHeadModel
import torch

import warnings
warnings.filterwarnings("ignore")




In [2]:
model_name= "gpt2"

tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model= TFGPT2LMHeadModel.from_pretrained(model_name)




All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [5]:
def generate_answer(context,question):

    input_text = f"Question: {question}, Context: {context}.Please answer the question according the context"

    #tokenlaştırma
    inputs = tokenizer.encode(input_text,return_tensors="pt")

    #modeli çakıştır
    with torch.no_grad():
        outputs= model.generate(inputs,max_length = 500)

    #üretilen yanıtı decode et
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True) #merhaba<EOS><PAD>

    #yanıtları ayıklayalım
    answer = answer.split("Answer")[-1].strip()

    return answer

In [6]:
question = "What is the capital of france?"
context = "France, offically the French Republic, is a country of capital Paris"

answer = generate_answer(context,question)
answer

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


': The capital of france is the capital of France.'