# Grading Experiment
MCQ grading and feedback

Scenarios:
1. Single agent 0-shot
2. Single agent 1-shot
3. 2 agents 0-shot
4. 2 Agents 1-Shot (Grade -> Structured Feedback - Simulated)

In [1]:
%pip install google-genai


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [75]:
import os
from google import genai
from google.genai import types
import json

In [76]:
base_model = "gemini-2.0-flash"
api_key = os.environ.get("GEMINI_API_KEY")
client = genai.Client(
  api_key=api_key,
)

# setup study material files
files = [
  client.files.upload(file='section-loop.pdf'),
  client.files.upload(file='sample-rubric.pdf'),
]

In [77]:
def setup_question_generator_config(system_prompt):
  return types.GenerateContentConfig(
      response_mime_type="application/json",
      system_instruction=[
          types.Part.from_text(text=system_prompt),
      ],
  )

In [78]:
def get_grading_result(questions, student_answers):
    """
    Grade student answers against questions and return detailed results.
    
    Args:
        questions (list): List of question dictionaries containing question text and correct answer
        student_answers (list): List of student's answer choices
        
    Returns:
        list: List of dictionaries containing grading results for each question
    """
    results = []
    for i, (question, student_answer) in enumerate(zip(questions, student_answers)):
        result = {
            'question_text': question['question_text'],
            'student_answer': student_answer,
            'correct_answer': question['correct_option'],
            'is_correct': student_answer == question['correct_option']
        }
        results.append(result)
        
    return results

In [79]:
questions = json.load(open('sample-questions.json'))['questions']
student_answers = json.load(open('sample-answer.json'))['answers']

student_results = get_grading_result(questions, student_answers)

# Scenario 1

In [80]:
system_prompt_scenario_1 = ""
with open('grader-0-shot-prompt.txt', 'r') as file:
    system_prompt_scenario_1 = file.read()

scenario_1_config = setup_question_generator_config(system_prompt_scenario_1)

In [81]:
result_scenario_1 = []

In [82]:
for student_result in student_results:
    user_text = f"""Generate a justification/feedback message for the student's answer to the MCQ below. Follow the guidelines in the referenced rubric file and use the source text file for context when needed. Use the JSON output format described in your instructions.
Question Text: "{student_result['question_text']}"
Correct Answer Key: "{student_result['correct_answer']}"
Student Selected Key: "{student_result['student_answer']}"
Was Student Correct?: {student_result['is_correct']}
(Source Text and Rubric files are referenced)
"""
    content_scenario_1 = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_uri(
                    file_uri=files[0].uri,
                    mime_type=files[0].mime_type,
                ),
                types.Part.from_uri(
                    file_uri=files[1].uri,
                    mime_type=files[1].mime_type,
                ),
                types.Part.from_text(text=user_text),
            ]
        )
    ]
    _result_1 = client.models.generate_content(
        model=base_model,
        contents=content_scenario_1,
        config=scenario_1_config,
    )
    result_scenario_1.append(json.loads(_result_1.text))



In [83]:
for result in result_scenario_1:
    print(result)

{'justification_message': 'Correct! The while loop continues as long as the condition is true.'}
{'justification_message': 'Correct! The continue statement skips the rest of the current iteration and proceeds to the next one.'}
{'justification_message': 'The outer loop controls how many times the inner loop runs completely. Review section 5.3, which defines the roles of the outer and inner loops in nested structures.'}
{'justification_message': "The correct answer is C. The else block in a loop-else construct is executed when the loop completes its iterations without encountering a 'break' statement. If a 'break' statement is encountered during the loop's execution, the else block is skipped (page 137)."}
{'justification_message': 'Correct! The "else" block associated with the "while" loop is executed only when the loop finishes normally (i.e., the condition becomes false). In this case, the loop finishes when \'i\' is no longer less than 5, and "Loop finished!" is printed.'}


# Scenario 2

In [84]:
system_prompt_scenario_2 = ""
with open('grader-1-shot-prompt.txt', 'r') as file:
    system_prompt_scenario_2 = file.read()

scenario_2_config = setup_question_generator_config(system_prompt_scenario_2)

In [85]:
result_scenario_2 = []

In [86]:
for student_result in student_results:
    user_text = f"""Here is an example of the desired JSON output format and justification style (assuming it follows the rubric):
Example Output (for an incorrect answer):
{{
  "justification_message": "Not quite. The correct answer was C. The `break` statement exits the loop immediately. Your answer B describes the `continue` statement, which only skips the current iteration (see guideline 3 in the rubric file and the source text example on loop control)."
}}

Now, using that exact JSON format and justification style, generate a justification/feedback message for the student's answer to the MCQ below. Follow the guidelines in the referenced rubric file and use the source text file for context when needed.

Question Text: "{student_result['question_text']}"
Correct Answer Key: "{student_result['correct_answer']}"
Student Selected Key: "{student_result['student_answer']}"
Was Student Correct?: "{student_result['is_correct']}"
(Source Text and Rubric files are referenced)
"""
    content_scenario_2 = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_uri(
                    file_uri=files[0].uri,
                    mime_type=files[0].mime_type,
                ),
                types.Part.from_uri(
                    file_uri=files[1].uri,
                    mime_type=files[1].mime_type,
                ),
                types.Part.from_text(text=user_text),
            ]
        )
    ]
    _result_2 = client.models.generate_content(
        model=base_model,
        contents=content_scenario_2,
        config=scenario_2_config,
    )
    result_scenario_2.append(json.loads(_result_2.text))



In [87]:
for result in result_scenario_2:
    print(result)

{'justification_message': 'Correct! The while loop continues as long as the condition is true.'}
{'justification_message': 'Correct! The `continue` statement skips the rest of the current iteration and proceeds to the next iteration of the loop.'}
{'justification_message': "Incorrect. The outer loop determines how many times the inner loop runs in its entirety. Review the 'Nested loops' section."}
{'justification_message': 'The correct answer is C. The `else` block in a loop `else` statement is executed only if the loop completes its iterations without encountering a `break` statement. Option B is incorrect; the `else` block *is* executed when the loop condition becomes false (naturally terminates). See section 5.5 in the text.'}
{'justification_message': 'Correct! The `else` block associated with the `while` loop is executed after the loop finishes normally (i.e., when the condition `i < 5` is no longer true).'}


# Scenario 3

In [88]:
system_prompt_reviewer_scenario_3 = ""
with open('grading-reviewer-0-shot-prompt.txt', 'r') as file:
    system_prompt_reviewer_scenario_3 = file.read()

scenario_3_config = setup_question_generator_config(system_prompt_reviewer_scenario_3)

In [89]:
feedback_result_scenario_3 = []

In [90]:
for result in result_scenario_1:
    user_text = f"""Assess whether the following justification message adheres to all guidelines specified in the referenced rubric file. Use the JSON output format described in your instructions.
Justification to Review:

{result}

(Rubric file and study material are attached)
"""

    content_scenario_3 = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_uri(
                    file_uri=files[0].uri,
                    mime_type=files[0].mime_type,
                ),
                types.Part.from_uri(
                    file_uri=files[1].uri,
                    mime_type=files[1].mime_type,
                ),
                types.Part.from_text(text=user_text),
            ]
        )
    ]
    _result_3 = client.models.generate_content(
        model=base_model,
        contents=content_scenario_3,
        config=scenario_3_config,
    )
    print(f"{type(_result_3.text)} {_result_3.text}")
    feedback_result_scenario_3.append(json.loads(_result_3.text))


<class 'str'> {
  "adheres_to_rubric": false,
  "assessment_notes": "The justification is too short and doesn't properly explain the answer. The questions require explaining reasoning behind the selection and the generated answer does not attempt to do that."
}
<class 'str'> {
  "adheres_to_rubric": true,
  "assessment_notes": "The justification message is very brief but accurate in this case. It correctly identifies what the \"continue\" statement does. It's concise and doesn't violate any of the negative constraints."
}
<class 'str'> {
  "adheres_to_rubric": true,
  "assessment_notes": "The justification accurately and concisely refers to the location in the text where the answer can be found (section 5.3)."
}
<class 'str'> {
  "adheres_to_rubric": true,
  "assessment_notes": "The justification accurately and concisely explains the concept of a 'loop-else' construct. It correctly identifies that the 'else' block is executed only when the loop completes without a 'break' and correctly

In [91]:
for result in feedback_result_scenario_3:
    print(result)

{'adheres_to_rubric': False, 'assessment_notes': "The justification is too short and doesn't properly explain the answer. The questions require explaining reasoning behind the selection and the generated answer does not attempt to do that."}
{'adheres_to_rubric': True, 'assessment_notes': 'The justification message is very brief but accurate in this case. It correctly identifies what the "continue" statement does. It\'s concise and doesn\'t violate any of the negative constraints.'}
{'adheres_to_rubric': True, 'assessment_notes': 'The justification accurately and concisely refers to the location in the text where the answer can be found (section 5.3).'}
{'adheres_to_rubric': True, 'assessment_notes': "The justification accurately and concisely explains the concept of a 'loop-else' construct. It correctly identifies that the 'else' block is executed only when the loop completes without a 'break' and correctly indicates the page where this is discussed. The justification is clear, and di

In [92]:
new_result_scenario_3 = []

In [94]:
for i, student_result in enumerate(student_results):
    user_text = f"""Generate a justification/feedback message for the student's answer to the MCQ below. Follow the guidelines in the referenced rubric file and use the source text file for context when needed. Use the JSON output format described in your instructions.
Question Text: "{student_result['question_text']}"
Correct Answer Key: "{student_result['correct_answer']}"
Student Selected Key: "{student_result['student_answer']}"
Was Student Correct?: {student_result['is_correct']}
(Source Text and Rubric files are referenced)
"""
    content_scenario_3 = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_uri(
                    file_uri=files[0].uri,
                    mime_type=files[0].mime_type,
                ),
                types.Part.from_uri(
                    file_uri=files[1].uri,
                    mime_type=files[1].mime_type,
                ),
                types.Part.from_text(text=user_text),
                types.Part.from_text(text=f"""Feedback from review agent:{feedback_result_scenario_3[i]}"""),
            ]
        ),
    ]
    new_result_3 = client.models.generate_content(
        model=base_model,
        contents=content_scenario_3,
        config=scenario_1_config,
    )
    print(f"{type(new_result_3.text)} {new_result_3.text}")
    new_result_scenario_3.append(json.loads(new_result_3.text))



<class 'str'> {
"justification_message": "Correct! The while loop continues as long as the condition is true."
}
<class 'str'> {
  "justification_message": "Correct! The continue statement skips the rest of the current iteration and proceeds to the next one."
}
<class 'str'> {
"justification_message": "Incorrect. The outer loop controls the number of times the inner loop executes fully. See section 5.3."
}
<class 'str'> {
 "justification_message": "That's incorrect. According to section 5.5, the `else` block of a loop is executed only when the loop completes its iterations without encountering a `break` statement. If a `break` is encountered, the `else` block is skipped."
}
<class 'str'> {
"justification_message": "Correct! The 'else' block associated with a 'while' loop is executed only if the loop completes normally (i.e., the loop condition becomes false), and not if the loop is terminated by a 'break' statement."
}


In [95]:
for result in new_result_scenario_3:
    print(result)

{'justification_message': 'Correct! The while loop continues as long as the condition is true.'}
{'justification_message': 'Correct! The continue statement skips the rest of the current iteration and proceeds to the next one.'}
{'justification_message': 'Incorrect. The outer loop controls the number of times the inner loop executes fully. See section 5.3.'}
{'justification_message': "That's incorrect. According to section 5.5, the `else` block of a loop is executed only when the loop completes its iterations without encountering a `break` statement. If a `break` is encountered, the `else` block is skipped."}
{'justification_message': "Correct! The 'else' block associated with a 'while' loop is executed only if the loop completes normally (i.e., the loop condition becomes false), and not if the loop is terminated by a 'break' statement."}


# Scenario 4

In [96]:
system_prompt_reviewer_scenario_4 = ""
with open('grading-reviewer-1-shot-prompt.txt', 'r') as file:
    system_prompt_reviewer_scenario_4 = file.read()

scenario_4_config = setup_question_generator_config(system_prompt_reviewer_scenario_4)

In [97]:
feedback_result_scenario_4 = []

In [98]:
for result in result_scenario_2:
    user_text = f"""Here is an example of the desired JSON output format for the assessment:
Example Output (assuming adherence):
{{
  "adheres_to_rubric": true,
  "assessment_notes": "Adheres to all guidelines: Correct length, supportive tone, explains using source concepts, states correct answer."
}}
// OR (assuming non-adherence):
// {{
//  "adheres_to_rubric": false,
//  "assessment_notes": "Does not adhere: Failed to explain why the student's incorrect answer was wrong (guideline 2b) and used a neutral rather than supportive tone (guideline 1)."
// }}

Now, using that exact JSON format, assess whether the following justification message adheres to all guidelines specified in the referenced rubric file.

Justification to Review:
{result}

(Rubric file and study material are attached)
"""

    content_scenario_4 = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_uri(
                    file_uri=files[0].uri,
                    mime_type=files[0].mime_type,
                ),
                types.Part.from_uri(
                    file_uri=files[1].uri,
                    mime_type=files[1].mime_type,
                ),
                types.Part.from_text(text=user_text),
            ]
        )
    ]
    _result_4 = client.models.generate_content(
        model=base_model,
        contents=content_scenario_4,
        config=scenario_4_config,
    )
    feedback_result_scenario_4.append(json.loads(_result_4.text))



In [99]:
for result in feedback_result_scenario_4:
    print(result)

{'adheres_to_rubric': False, 'assessment_notes': 'The justification is too short and does not provide enough explanation. It also does not refer to the source text. It is also too terse to be supportive.'}
{'adheres_to_rubric': True, 'assessment_notes': "The justification message is concise, uses a supportive tone ('Correct!'), and accurately describes the functionality of the `continue` statement in the context of loops. It directly relates to the concept being tested in the material."}
{'adheres_to_rubric': False, 'assessment_notes': "The justification uses a neutral, rather than supportive tone. It also doesn't state the correct answer or give specific guidance on how to arrive at it, which is especially important given that the response was incorrect."}
{'adheres_to_rubric': True, 'assessment_notes': 'Adheres to all guidelines. The justification is concise, uses a supportive tone, and correctly explains the logic behind the correct answer (C), while also explaining why option B is 

In [100]:
new_result_scenario_4 = []

In [101]:
for student_result in student_results:
    user_text = f"""Here is an example of the desired JSON output format and justification style (assuming it follows the rubric):
Example Output (for an incorrect answer):
{{
  "justification_message": "Not quite. The correct answer was C. The `break` statement exits the loop immediately. Your answer B describes the `continue` statement, which only skips the current iteration (see guideline 3 in the rubric file and the source text example on loop control)."
}}

Now, using that exact JSON format and justification style, generate a justification/feedback message for the student's answer to the MCQ below. Follow the guidelines in the referenced rubric file and use the source text file for context when needed.

Question Text: "{student_result['question_text']}"
Correct Answer Key: "{student_result['correct_answer']}"
Student Selected Key: "{student_result['student_answer']}"
Was Student Correct?: "{student_result['is_correct']}"
(Source Text and Rubric files are referenced)
"""
    content_scenario_2 = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_uri(
                    file_uri=files[0].uri,
                    mime_type=files[0].mime_type,
                ),
                types.Part.from_uri(
                    file_uri=files[1].uri,
                    mime_type=files[1].mime_type,
                ),
                types.Part.from_text(text=user_text),
                types.Part.from_text(text=f"""Feedback from review agent: {feedback_result_scenario_4[i]}"""),
            ]
        ),
    ]
    new_result_4 = client.models.generate_content(
        model=base_model,
        contents=content_scenario_2,
        config=scenario_2_config,
    )
    new_result_scenario_4.append(json.loads(new_result_4.text))



In [102]:
for result in new_result_scenario_4:
    print(result)

{'justification_message': 'Correct! A while loop continues executing as long as its condition remains true.'}
{'justification_message': "That's correct! The `continue` statement skips the rest of the current iteration and proceeds to the next (see section 5.4 in the text)."}
{'justification_message': 'Not quite! The outer loop determines how many times the inner loop runs completely. Therefore, the correct answer is B (outer loop). Review section 5.3 in the text for more information on nested loops.'}
{'justification_message': "Incorrect. The correct answer is C: If the loop's execution is interrupted by a `break` statement. According to the text, a loop `else` statement runs after the loop's execution is completed without being interrupted by a `break` statement."}
{'justification_message': 'Correct! The `else` block associated with a `while` loop is executed only when the loop finishes normally (i.e., the loop condition becomes false), not if the loop is terminated by a `break` state