In [2]:
import tensorflow as tf
from transformers import TFGPT2LMHeadModel, GPT2Tokenizer

# Load the GPT-2 tokenizer and TensorFlow model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = TFGPT2LMHeadModel.from_pretrained('gpt2')

# Example dataset (typically loaded from your JSON file)
data = [
    {
        "id": "hp_001",
        "question": "What curse caused the death of Harry Potter's parents?",
        "options": [
            "Avada Kedavra",
            "Cruciatus Curse",
            "Imperius Curse",
            "Soul Extraction Curse"
        ],
        "correct_answer": "Avada Kedavra"
    },
    # Add more questions as needed
]

def create_prompt(question_obj):
    """
    Create a prompt without including any additional content.
    """
    question = question_obj['question']
    options = question_obj['options']
    
    # Map options to letters (A, B, C, D, etc.)
    option_map = {i: chr(65 + i) for i in range(len(options))}
    options_text = "\n".join([f"{option_map[i]}. {option}" for i, option in enumerate(options)])
    
    prompt = (
        f"Question: {question}\n\n"
        f"Options:\n{options_text}\n\n"
        "Your Answer:"
    )
    return prompt

def evaluate_answer(generated_text, correct_answer):
    """
    Evaluate whether the generated answer contains the correct answer.
    This is a basic check; you can expand it for more robust evaluation.
    """
    return correct_answer.lower() in generated_text.lower()

results = []
for question_obj in data:
    prompt = create_prompt(question_obj)
    # Encode the prompt using the tokenizer
    input_ids = tokenizer.encode(prompt, return_tensors='tf')
    
    # Generate answer with GPT-2 (adjust max_length and other parameters as needed)
    output_ids = model.generate(input_ids, max_length=input_ids.shape[1] + 20, do_sample=True)
    
    # Decode the generated tokens to text
    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    # Extract the answer portion after "Your Answer:"
    answer_generated = generated_text.split("Your Answer:")[-1].strip()
    
    # Evaluate if the answer is correct
    is_correct = evaluate_answer(answer_generated, question_obj["correct_answer"])
    
    results.append({
        "id": question_obj["id"],
        "prompt": prompt,
        "generated_answer": answer_generated,
        "evaluation": "Correct" if is_correct else "Incorrect"
    })

# Display the results
for res in results:
    print(f"Question ID: {res['id']}")
    print("Prompt:\n", res['prompt'])
    print("Generated Answer:", res['generated_answer'])
    print("Evaluation:", res['evaluation'])
    print("-" * 50)


All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at gpt2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence


Question ID: hp_001
Prompt:
 Question: What curse caused the death of Harry Potter's parents?

Options:
A. Avada Kedavra
B. Cruciatus Curse
C. Imperius Curse
D. Soul Extraction Curse

Your Answer:
Generated Answer: Q: Do you know where Harry Potter and Ron Weasley's bodies are?


A
Evaluation: Incorrect
--------------------------------------------------
