# Question answering model with Transformers from huggingface.

In [1]:
# Install required libraries
!pip install transformers
!pip install torch

import torch
from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering
from transformers import pipeline

# Load pre-trained model and tokenizer
model_name = "distilbert-base-cased-distilled-squad"
model = DistilBertForQuestionAnswering.from_pretrained(model_name)
tokenizer = DistilBertTokenizer.from_pretrained(model_name)

# Sample context and question
context = "ChatGPT is a language model developed by OpenAI."
question = "Who developed ChatGPT?"

# Tokenize input
inputs = tokenizer(question, context, return_tensors="pt")

# Make prediction
outputs = model(**inputs)
start_scores = outputs.start_logits
end_scores = outputs.end_logits

# Get the answer span
answer_start = torch.argmax(start_scores)
answer_end = torch.argmax(end_scores) + 1
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))

print("Answer:", answer)


config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Answer: OpenAI


In [2]:
qa_pipeline = pipeline("question-answering", model=model_name, tokenizer=model_name)
result = qa_pipeline(question=question, context=context)
print("Answer:", result["answer"])


Answer: OpenAI


# Translation model using transformers

In [3]:
from transformers import MarianMTModel, MarianTokenizer

# Load pre-trained model and tokenizer
model_name = "Helsinki-NLP/opus-mt-en-de"
model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)

# Sample English text
text = "Hello, how are you?"

# Tokenize input
inputs = tokenizer(text, return_tensors="pt")

# Make translation prediction
outputs = model.generate(**inputs)

# Decode the generated translation
translation = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("="*125)
print("\nOriginal text:", text)
print("="*125)
print("\nTranslated text:", translation)
print("="*125)

config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/298M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/768k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/797k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.27M [00:00<?, ?B/s]


Original text: Hello, how are you?
Translated text: Hallo, wie geht's?
