In [1]:
import torch
from transformers import BertTokenizer, BertForQuestionAnswering

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForQuestionAnswering: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased a

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [4]:
def answer_question(question, context):
    # Tokenize the input question and context
    encoded_input = tokenizer.encode_plus(question, context, return_tensors='pt')
    
    # Retrieve the token IDs, attention mask, and token type IDs
    input_ids = encoded_input['input_ids'].to(device)
    attention_mask = encoded_input['attention_mask'].to(device)
    token_type_ids = encoded_input['token_type_ids'].to(device)
    
    # Use the BERT model to get the start and end logits for the answer
    start_logits, end_logits = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
    
    # Find the start and end indices of the answer in the input context
    start_index = torch.argmax(start_logits)
    end_index = torch.argmax(end_logits) + 1
    
    # Decode the answer from the token IDs
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[0][start_index:end_index]))
    
    return answer

In [5]:
while True:
    user_input = input("User: ")
    if user_input.lower() == 'exit':
        print("Chatbot: Goodbye!")
        break
    
    context = user_input
    question = input("User: ")
    
    answer = answer_question(question, context)
    print("Chatbot:", answer)


User:  hello
User:  hello


TypeError: argmax(): argument 'input' (position 1) must be Tensor, not str