In [1]:
import os
import sys
import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm

sys.path.append(os.path.abspath(os.path.join('..', 'src')))
print(os.getcwd())
from llm_utils import analyze_student_work_structured
# Load environment variables
load_dotenv(dotenv_path=os.path.join('..', '.env'))

print("Setup complete. Modules loaded.")

/Users/tianlongxu/SquirrelAi/context-aware-ai-conversation/notebooks
Setup complete. Modules loaded.


In [2]:
data_path = '../data/student_drafts.csv'
df = pd.read_csv(data_path)

print("Original dataset loaded. Shape:", df.shape)
print("Columns:", df.columns.tolist())
df.head()

Original dataset loaded. Shape: (20, 8)
Columns: ['draft_id', 'problem_id', 'student_id', 'problem_text', 'student_answer', 'correct_answer', 'error_reason_analysis', 'student_draft_image_url']


Unnamed: 0,draft_id,problem_id,student_id,problem_text,student_answer,correct_answer,error_reason_analysis,student_draft_image_url
0,101,1,8882287307308084,A number is 35 less than 24 times 145. What is...,3345,3445,The student made a mistake during the multipli...,https://img1.classba.cn/cimages/90ad521c1e42c0...
1,102,2,8888025590665607,What is the area of the land shown in the figu...,72900,729,The student did not correctly understand the u...,https://img1.classba.cn/cimages/c2139f011daa7a...
2,103,3,8882287305793402,School is on a straight road. Alice's house is...,"925, 293","293, 925",The student showed a deviation in understandin...,https://img1.classba.cn/cimages/8e8bd724b9d3fc...
3,104,4,8882287294950916,There are 251 books. To distribute them evenly...,"5, 32","3, 31","The student misunderstood the question, failin...",https://img1.classba.cn/cimages/558d38a2e639d8...
4,105,5,8882287304803904,Calculate using long multiplication: 357 * 15.,3355,5355,The student did not correctly align the number...,https://img1.classba.cn/cimages/6fcc67f8414c0d...


In [3]:
results = []
# To speed up the live demo, let's process a sample of 2. 
# For the full run, attendees can use: `for index, row in tqdm(df.iterrows(), total=df.shape[0]):`
sample_df = df.head(2).copy() 

for index, row in tqdm(sample_df.iterrows(), total=sample_df.shape[0], desc="Analyzing Drafts"):
    problem_text = row['problem_text']
    image_url = row['student_draft_image_url']
    student_answer = row['student_answer']

    problem_context = "Problem Text: " + problem_text + "\nStudent Answer: " + student_answer

    # Call our robust LLM utility function
    analysis = analyze_student_work_structured(problem_context, image_url)
    
    if analysis:
        # If successful, convert the Pydantic object to a dictionary and append
        results.append(analysis.model_dump())
    else:
        # If there was an error, append a placeholder
        results.append({'error_type': 'ANALYSIS_FAILED', 'detailed_explanation': 'See console for error details.'})
        
print("\nBatch analysis complete!")

E0000 00:00:1758318730.841132 31735859 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
Analyzing Drafts: 100%|██████████| 2/2 [00:42<00:00, 21.39s/it]


Batch analysis complete!





In [4]:
# Create a new DataFrame from the list of result dictionaries
results_df = pd.DataFrame(results)

# Rename columns to avoid confusion with original human-labeled data
results_df.rename(columns={
    'error_type': 'ai_error_type',
    'detailed_explanation': 'ai_detailed_explanation'
}, inplace=True)

# Combine the original sample data with the new AI-generated results
# Note: Ensure the indices align for a clean merge
enriched_df = pd.concat([sample_df.reset_index(drop=True), results_df], axis=1)

print("Enriched Dataset Preview:")
display(enriched_df.head())

# --- Save the final output ---
output_dir = '../results'
output_path = os.path.join(output_dir, 'student_drafts_with_analysis.csv')

# Create the results directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Save to CSV
enriched_df.to_csv(output_path, index=False)

print(f"\n✅ Successfully saved the enriched data to: {output_path}")

Enriched Dataset Preview:


Unnamed: 0,draft_id,problem_id,student_id,problem_text,student_answer,correct_answer,error_reason_analysis,student_draft_image_url,ai_error_type,ai_detailed_explanation
0,101,1,8882287307308084,A number is 35 less than 24 times 145. What is...,3345,3445,The student made a mistake during the multipli...,https://img1.classba.cn/cimages/90ad521c1e42c0...,Calculation Error,The student made a mistake in the multiplicati...
1,102,2,8888025590665607,What is the area of the land shown in the figu...,72900,729,The student did not correctly understand the u...,https://img1.classba.cn/cimages/c2139f011daa7a...,Procedural Error,The student correctly set up the multiplicatio...



✅ Successfully saved the enriched data to: ../results/student_drafts_with_analysis.csv


In [6]:
from IPython.display import display, Markdown, Image

def visualize_analysis(record):
    """
    Displays a formatted 'report card' for a single record from the enriched DataFrame.
    """
    display(Markdown(f"--- \n### 📊 Analysis for Draft ID: {record['draft_id']}"))
    
    # --- Problem Information ---
    display(Markdown("#### 1. Problem & Student's Answer"))
    display(Markdown(f"**Problem Text:** {record['problem_text']}"))
    display(Markdown(f"**Student's Answer:** `{record['student_answer']}` (Correct Answer: `{record['correct_answer']}`)"))
    
    # --- Student's Draft Image ---
    display(Markdown("#### 2. Student's Handwritten Draft"))
    try:
        display(Image(url=record['student_draft_image_url'], width=400))
    except Exception as e:
        display(Markdown(f"*Could not load image from URL: {record['student_draft_image_url']}*"))
        
    # --- AI vs. Human Analysis ---
    display(Markdown("#### 3. AI Analysis vs. Human Label"))
    
    # Create a simple table using Markdown
    comparison_table = f"""
|               | **AI-Generated Analysis** | **Human-Labeled Analysis** |
|---------------|---------------------------|----------------------------|
| **Error Type**| `{record['ai_error_type']}` | *(From Reason Below)* |
| **Explanation** | {record['ai_detailed_explanation']} | {record['error_reason_analysis']} |
"""
    display(Markdown(comparison_table))
    
    # --- Quality Check ---
    display(Markdown("#### 4. Quality Check"))
    if record['ai_error_type'] == 'ANALYSIS_FAILED':
        display(Markdown("🔴 **Quality: FAILED.** The AI could not process this record."))
    else:
        display(Markdown("🟢 **Quality: PASS.** The AI analysis seems plausible and correctly identifies a key mistake."))

In [7]:
print("Displaying detailed analysis for each processed record...")

for index, row in enriched_df.iterrows():
    visualize_analysis(row)


Displaying detailed analysis for each processed record...


--- 
### 📊 Analysis for Draft ID: 101

#### 1. Problem & Student's Answer

**Problem Text:** A number is 35 less than 24 times 145. What is this number?

**Student's Answer:** `3345` (Correct Answer: `3445`)

#### 2. Student's Handwritten Draft

#### 3. AI Analysis vs. Human Label


|               | **AI-Generated Analysis** | **Human-Labeled Analysis** |
|---------------|---------------------------|----------------------------|
| **Error Type**| `Calculation Error` | *(From Reason Below)* |
| **Explanation** | The student made a mistake in the multiplication step. When calculating the first partial product (145 x 4), they correctly carried over a '1' from the tens column (4x4=16, plus the carried 2, makes 18). However, they forgot to add this carried '1' to the result of 4x1 in the hundreds column. They wrote 4 instead of 5, resulting in 480 instead of the correct 580. | The student made a mistake during the multiplication process, leading to an incorrect final answer. |


#### 4. Quality Check

🟢 **Quality: PASS.** The AI analysis seems plausible and correctly identifies a key mistake.

--- 
### 📊 Analysis for Draft ID: 102

#### 1. Problem & Student's Answer

**Problem Text:** What is the area of the land shown in the figure in square decimeters?

**Student's Answer:** `72900` (Correct Answer: `729`)

#### 2. Student's Handwritten Draft

#### 3. AI Analysis vs. Human Label


|               | **AI-Generated Analysis** | **Human-Labeled Analysis** |
|---------------|---------------------------|----------------------------|
| **Error Type**| `Procedural Error` | *(From Reason Below)* |
| **Explanation** | The student correctly set up the multiplication as 450 × 190 and accurately calculated the first partial product (450 × 90 = 40,500). The mistake occurred in the second step of the multiplication. Instead of calculating the second partial product by multiplying 450 by the 1 in the hundreds place (450 × 100 = 45,000), the student incorrectly used the value 32,400. The final addition was performed correctly on the wrong numbers. | The student did not correctly understand the units for the final answer and mistakenly converted the area from square decimeters to square centimeters, leading to an incorrect answer. |


#### 4. Quality Check

🟢 **Quality: PASS.** The AI analysis seems plausible and correctly identifies a key mistake.