In [157]:
import json
from rich.console import Console
from rich.panel import Panel
from rich.text import Text
from dotenv import load_dotenv
from openai import OpenAI

In [158]:
load_dotenv(override=True)

True

In [159]:
openai = OpenAI()
model_name = "gpt-5-nano"

In [160]:
subject = input("Enter a subject for quiz:")
num_of_questions = input("Enter a number of questions:")

agent_state = {
    "questions": [],
    "choices": [],
    "current_index": 0,
    "user_responses": [],
    "correct_answers": []
}

In [161]:
def show(panel):
    try:
        Console(force_jupyter=True).print(panel)
    except Exception:
        print(panel)

In [162]:
def produce_grade():
    user_res = agent_state["user_responses"]

    if not user_res:
        show("No responses recorded")
        return

    score = round(user_res.count(True) * 100 / len(user_res))
    show(f"Score: {score}%")

    if score >= 75:
        grade = "A"
    elif 60 <= score < 75:
        grade = "B"
    elif 40 <= score < 60:
        grade = "C"
    else:
        grade = "D"

    text = Text(f"You scored {score}%. Grade: {grade}", justify="center")
    panel = Panel(text, title="RESULT", title_align="center")
    show(panel)



In [163]:
produce_grade()
show(subject)

In [164]:
def generate_questions(questions: list[str], choices: list[list[str]], correct_answers: list[str]) -> str :
    if correct_answers is None:
        raise RuntimeError(
            "generate_questions called without correct_answers. "
            "This is a model error."
        )

    if len(questions) != len(choices) != len(correct_answers):
        raise RuntimeError("Questions, choices, and answers length mismatch")
    agent_state["questions"] = questions
    agent_state["choices"] = choices
    agent_state["current_index"] = 0
    agent_state["user_responses"] = [False] * len(questions)
    agent_state["correct_answers"] = correct_answers
    return "QUESTIONS_STORED"

def ask_questions(index: int):
    print("DEBUG asking question index:", index)
    if not agent_state.get("questions") or not agent_state.get("choices"):
        raise RuntimeError(
            "Agent state not initialized. "
            "generate_questions must be called before ask_questions."
        )

    if len(agent_state["questions"]) == 0 or len(agent_state["choices"]) == 0:
        raise ValueError(
            "Questions or choices list is empty. Cannot ask questions."
        )
    q = agent_state["questions"][index]
    opts = agent_state["choices"][index]

    print(f"\nQ{index+1}: {q}")
    for i, opt in enumerate(opts):
        print(f"{chr(65+i)}. {opt}")

    return "QUESTION_DISPLAYED"

def record_user_response(index: int, answer: str):
    is_correct = evaluate_user_response(index, answer)
    agent_state["user_responses"][index] = is_correct

def evaluate_user_response(index: int, user_answer: str) -> bool:

    answer_map = {"A": 0, "B": 1, "C": 2, "D": 3}

    normalized = user_answer.strip().upper()
    if normalized not in answer_map:
        raise ValueError(f"Invalid answer received: {normalized}")

    selected_index = answer_map[normalized]
    correct_index = agent_state["correct_answers"][index]

    return selected_index == correct_index



In [165]:
generate_questions_json = {
    "name": "generate_questions",
    "description": "Tool to store generated questions and answer choices",
    "parameters": {
        "type": "object",
        "properties": {
            "questions": {
                "type": "array",
                "title": "Questions",
                "items": {
                    "type": "string"
                }
            },
            "choices": {
                "type": "array",
                "title": "Choices",
                "items": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                }
            },
            "correct_answers": {
                "type": "array",
                "title": "Correct Answers",
                "items": {
                    "type": "integer",
                    "minimum": 0,
                    "maximum": 3
                }
            },
        },
        "required": ["questions", "choices", "correct_answers"],
        "additionalProperties": False
    }
}

ask_questions_json = {
    "name": "ask_questions",
    "description": "Tool to ask questions one by one to the user",
    "parameters": {
        "type": "object",
        "properties": {
            "index": {
                "type": "integer",
            },
        },
        "required": ["index"],
        "additionalProperties": False
    }
}

produce_grade_json = {
    "name": "produce_grade",
    "description": "Tool to grade the user response"
}


In [166]:
tools = [{"type": "function", "function": generate_questions_json},
         {"type": "function", "function": ask_questions_json},
         {"type": "function", "function": produce_grade_json}]

In [167]:
def handle_tool_calls(tool_calls):
    results = []
    for tool_call in tool_calls:
        tool_name = tool_call.function.name
        arguments = json.loads(tool_call.function.arguments)
        tool = globals().get(tool_name)
        result = tool(**arguments) if tool else {}
        results.append({"role": "tool","content": json.dumps(result),"tool_call_id": tool_call.id})
    return results

def handle_each_tool_call(each_tool_call):
    tool_name = each_tool_call.function.name
    arguments = json.loads(each_tool_call.function.arguments)
    tool = globals().get(tool_name)
    result = tool(**arguments) if tool else {}
    return {"role": "tool","content": json.dumps(result),"tool_call_id": each_tool_call.id}

def run_quiz():
    total = len(agent_state["questions"])

    for idx in range(total):
        ask_questions(idx)

        while True:
            user_answer = input("Your answer (A/B/C/D): ").strip().upper()
            try:
                record_user_response(idx, user_answer)
                break
            except ValueError:
                print("❌ Invalid input. Please enter A, B, C, or D.")

    produce_grade()
    return

In [168]:
def loop(prompt):
    done = False
    while not done:
        res = openai.chat.completions.create(model=model_name, messages=prompt, tools=tools, reasoning_effort="minimal")

        finish_reason = res.choices[0].finish_reason
        message = res.choices[0].message
        if finish_reason=="tool_calls":
            prompt.append(message)
            tool_calls = message.tool_calls

            for each_tool_call in tool_calls:
                if each_tool_call.function.name == "generate_questions":
                    args = json.loads(each_tool_call.function.arguments)
                    result = generate_questions(
                        questions=args["questions"],
                        choices=args["choices"],
                        correct_answers=args["correct_answers"]
                    )

                    messages.append({
                        "role": "tool",
                        "name": "generate_questions",
                        "content": json.dumps(result),
                        "tool_call_id": each_tool_call.id
                  })

        run_quiz()
        return

In [169]:
system_prompt=f"""
You are a quiz-generation agent.

Your responsibilities are STRICTLY LIMITED to calling tools.
You must NOT ask the user questions or display text directly.

When given a subject, you MUST:

1. Generate EXACTLY {num_of_questions} multiple-choice questions for the given subject.
2. Each question MUST have EXACTLY 4 answer choices.
3. Each question MUST have ONLY ONE correct answer.

4. Call the tool `generate_questions` ONCE with:
   - questions: list of question strings
   - choices: list of lists of 4 strings
   - correct_answers: list of integers (0–3)

You MUST ALWAYS include `correct_answers`.

IMPORTANT:
- You MUST NOT attempt to control quiz flow.
- You MUST NOT loop.
- You MUST NOT call ask_questions more than once.
- You MUST NOT call produce_grade.
- You MUST ONLY generate quiz data.
"""
user_choice=f"""
{subject}
"""
messages =[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_choice}]

In [170]:

loop(messages)

DEBUG asking question index: 0

Q1: Which number is a prime number?
A. 4
B. 6
C. 9
D. 7
DEBUG asking question index: 1

Q2: What is the result of 8 + 5?
A. 12
B. 13
C. 11
D. 8
DEBUG asking question index: 2

Q3: How many sides does a triangle have?
A. 3
B. 4
C. 5
D. 6
❌ Invalid input. Please enter A, B, C, or D.
DEBUG asking question index: 3

Q4: Which planet is known as the Red Planet?
A. Earth
B. Mars
C. Jupiter
D. Venus
