In [None]:
questions_on_empiricism = [
    'Alex can see things with his eyes. When could Alex see with his eyes for the first time?',
    'When there is a sound close by, Alex can hear it. When could Alex hear sounds for the first time?',
    'When seeing a red flower and a blue flower, Alex can tell that they are different colors. Alex can tell colors apart. \
    When could Alex tell colors apart for the first time?',
    'When there is a car approaching, Alex can tell that the car is getting closer. Alex can tell what is near and what is far. \
    When could Alex tell near and far for the first time?',
    'When Alex sees someone hold an object and then drop it, Alex thinks the object will fall. Alex thinks objects will fall if we let go of them.\
     When could Alex think that for the first time?',
    'If Alex sees a toy being hidden in a box, he will think the object is still there even though he can no longer see it.\
     When could Alex think that for the first time?',
    'If Alex sees two cookies, one with 5 chocolate chips in it and one with 20 chocolate chips in it, he can tell which cookie has more chocolate chips without counting. \
     When could Alex tell which has more for the first time?',
    'If Alex sees a turtle that is upside down and struggling to get on its feet, he thinks that he should help the turtle. Alex thinks that helping is the right thing to do. \
    When could Alex think that for the first time?',
    'Alex can read books. When could Alex read for the first time? '
]

In [None]:
import kagglehub
import glob

# FGNET age progression dataset from kaggle
path = kagglehub.dataset_download("aiolapo/fgnet-dataset")

age_paths = {'newborn': glob.glob(f"{path}/**/080A00.JPG", recursive=True)[0], "older_infant": glob.glob(f"{path}/**/080A01.JPG", recursive=True)[0],
            'toddler': glob.glob(f"{path}/**/080A02.JPG", recursive=True)[0], 'preschool_child': glob.glob(f"{path}/**/080A04.JPG", recursive=True)[0],
            'schoolage_child': glob.glob(f"{path}/**/080A07.JPG", recursive=True)[0]}

In [None]:
from transformers import AutoProcessor, Gemma3ForConditionalGeneration
from PIL import Image
import torch

#template from https://huggingface.co/google/gemma-3-27b-it
model_id = "google/gemma-3-27b-it"

model = Gemma3ForConditionalGeneration.from_pretrained(
    model_id, device_map="auto"
).eval()

processor = AutoProcessor.from_pretrained(model_id)

messages = [
    {
        "role": "system",
        "content": [{"type": "text", "text": "You are a human answering questions for a psychology survey."}]
    },
    {
        "role": "user",
        "content": [
            {"type": "image", "url": age_paths['newborn']},
          {"type": "image", "url": age_paths['toddler']},
          {"type": "image", "url": age_paths['schoolage_child']},
          {"type": "text", "text": f'{questions_on_empiricism[0]} Pick from image 1, 2, 3. \
           You must reply with either 1, 2, or 3 and specify the age of the child.'},
        ]
    }
]

inputs = processor.apply_chat_template(
    messages, add_generation_prompt=True, tokenize=True,
    return_dict=True, return_tensors="pt"
).to(model.device, dtype=torch.bfloat16)

input_len = inputs["input_ids"].shape[-1]

with torch.inference_mode():
    generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
    generation = generation[0][input_len:]

decoded = processor.decode(generation, skip_special_tokens=True)
print(decoded)
