In [1]:
import json
import pandas as pd
from openai import OpenAI
import os


In [2]:
with open("response_data_20250705_assessment.json", "r", encoding="utf-8") as f:
    data = json.load(f)

#print(data)
parent = data["responses"]["parent"]
teen = data["responses"]["teenager"]

# Convert to DataFrame
df_parent = pd.DataFrame(parent)[["question_id", "dimension", "selected_option"]]
df_parent["role"] = "parent"

df_teen = pd.DataFrame(teen)[["question_id", "dimension", "selected_option"]]
df_teen["role"] = "teen"

# Map options A–E to 1–5
mapping = {"A": 1, "B": 2, "C": 3, "D": 4, "E": 5}
df_parent["score"] = df_parent["selected_option"].map(mapping)
df_teen["score"] = df_teen["selected_option"].map(mapping)

# Merge by dimension
parent_mean = df_parent.groupby("dimension")["score"].mean()
teen_mean = df_teen.groupby("dimension")["score"].mean()
res = pd.DataFrame({"parent_mean": parent_mean, "teen_mean": teen_mean})

print(res)

                        parent_mean  teen_mean
dimension                                     
Acceptance                 2.000000   3.500000
Attention                  2.333333   2.500000
Attitude                   5.000000   2.333333
Behavior Management             NaN        NaN
Care                       5.000000   5.000000
Clarity                    1.000000   3.333333
Daily Living Skills        2.000000   2.000000
Dominating                 4.000000   2.000000
Emotional Stability        3.000000   1.000000
General                         NaN        NaN
Imagination                3.000000   3.000000
Inference                  2.500000   4.000000
Information Processing     3.333333   4.000000
Maturity of Judgment       5.000000   2.500000
Motivation                 3.000000   5.000000
Open-mindedness            3.000000   3.500000
Performance-Avoidance      2.666667   2.000000
Resilience                 1.333333   4.000000
Social Skills              3.333333   2.500000
Socialization

In [3]:
res["delta"] = res["teen_mean"] - res["parent_mean"]

prompt_text = f"""
You are a Psychology-Inspired Educational Assistant.
Do NOT give medical diagnoses.

### Example
Input:
some gaps: Emotional Stability (+2), Responsibility (-2), Daily Living Skills(0), etc...

Output (Step by step reasoning + final):
- Step 1: Agreements → Parent and teen mostly agree on daily routines and habits.
- Step 2: Disagreements → Teen sees themselves more stable emotionally, parent doubts responsibility.
- Step 3: Tips → Encourage open talks, set small shared goals, praise consistency.

---

### Your Task
Here is the processed data:
{res["delta"].to_string()}

Think step by step:
1. Identify agreements.
2. Identify biggest gaps.
3. Explain why these gaps might exist.
4. Suggest 2–3 simple parenting tips.

Provide your final structured answer in the same style as the example.
"""

# print(prompt_text)


In [None]:
def score_output(output_text, deltas):
    scores = {}
    
    # Confidence: did it follow steps 1–4?
    # Each one present = +1.
    # then scores["confidence"] = divide by 4 -> gives a ratio between 0 and 1.

    # Reasoning Depth
    # We looped through all dimension names (e.g., Attention, Emotional Stability).
    # Checked if the model mentioned them in its text. If it mentioned ≥2 dimensions → we give full credit (1.0). If it only mentioned 1 dimension → 0.5. If none → 0.0.
    
    # Breadth
    # we filtered only the big gaps (where |delta| > 1).
    # Then checked how many of those dimensions the model actually mentioned.
    # Ratio = #covered big gaps ÷ total big gaps.
    
    return scores

In [None]:
client = OpenAI(
    api_key = os.getenv('YOUR_API_KEY'),
)


response = client.chat.completions.create(
    model="gpt-5",
    messages=[
        {"role": "user", "content": prompt_text}
    ],
    temperature=0.7,
    max_tokens=500
)

# Extract text
llm_output = response.choices[0].message.content

# Score the output
scores = score_output(llm_output, res["delta"])
