In [None]:
from openai import OpenAI
import os
import pandas as pd
import base64
import io
from PIL import Image
import os
import json
import re

# Point to the local server
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")
print(client.models.list())
model_name = 'llama-3.1-unhinged-vision-8b'
output_dir = f"results_{model_name}"
character_schema = {
    "type": "json_schema",
    "json_schema": {
        "name": "Judgment",
        "schema": {
            "type": "object",
            "properties": {
                "Judgment": {"type": "string"},
                "Reasons": {"type": "string"}
            },
            "required": ["Judgment","Reasons"]
        },
    }
}

def escape_inner_quotes(json_str):
    match = re.search(r'"Reasons"\s*:\s*"(.+?)"', json_str, re.DOTALL)
    if match:
        original_reasons = match.group(1)
        cleaned_reasons = original_reasons.replace('"', '')
        json_str = json_str.replace(original_reasons, cleaned_reasons)
    return json_str

# Create the folder if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

SyncPage[Model](data=[Model(id='google/gemma-3-27b', created=None, object='model', owned_by='organization_owner'), Model(id='qwen/qwen2.5-vl-7b', created=None, object='model', owned_by='organization_owner'), Model(id='text-embedding-nomic-embed-text-v1.5@q4_k_m', created=None, object='model', owned_by='organization_owner'), Model(id='gemma-3-4b-it-qat', created=None, object='model', owned_by='organization_owner'), Model(id='llava-v1.5-7b', created=None, object='model', owned_by='organization_owner'), Model(id='granite-vision-3.2-2b', created=None, object='model', owned_by='organization_owner'), Model(id='text-embedding-nomic-embed-text-v1.5@q8_0', created=None, object='model', owned_by='organization_owner')], object='list')


: 

### Zero-shot1

In [104]:
# Load metadata
prompt_method = "Zero-shot1"
metadata_df = pd.read_csv("merged_metadata.csv")
image_dir = "merged_images"
results = []

for _, row in metadata_df.iterrows():
    merged_index = row["merged_index"]
    study_question = row["study_question"]
    ground_truth = str(row["choice"]).strip().lower()
    image_path = os.path.join(image_dir, f"merged_{merged_index:03d}.jpg")

    # Encode image to base64
    image = Image.open(image_path).convert("RGB")
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format="JPEG")
    base64_image = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")

    # Compose LLM prompt
    prompt_text = f"""
                    You are shown a side-by-side image with two street views: the left half and the right half.
                    Which side looks more {study_question}?

                    Answer with only one word: left or right. Then explain your reasoning.

                    Format:
                    Judgment. Reasons.
                    """

    # Call LLM 
    response = client.chat.completions.create(
        model= model_name,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt_text.strip()},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
                ],
            },
        ],
        response_format=character_schema,
    )
    raw = response.choices[0].message.content.strip()
    raw = (
    raw.replace("“", '"')
       .replace("”", '"')
       .replace("‘", "'")
       .replace("’", "'")
    )
    
    # Step 2: Load JSON safely
    match = re.search(r'\{.*?\}', raw, re.DOTALL)

    if match:
        json_str = match.group(0)
    else:
        print("❌ No valid JSON object found in model output.")
        print("🔎 Raw content was:\n", raw)
        json_str = "{}"  # fallback to empty JSON to avoid crash
    json_str = escape_inner_quotes(json_str)
    print(json_str)
    full_response = json.loads(json_str)
    # Split judgment and reasoning
    # Extract from dict
    model_judgement = full_response.get("Judgment", "").strip().lower()
    model_reason = full_response.get("Reasons", "").strip()

    results.append({
        "merged_index": merged_index,
        "left": row["left"],
        "right": row["right"],
        "study_question": study_question,
        "ground_truth": ground_truth,
        "left_vote":row["left_vote"],
        "right_vote":row["right_vote"],
        "model_judgement": model_judgement,
        "model_reason": model_reason,
        "validation": int(model_judgement == ground_truth)
    })

# Save results
df_result = pd.DataFrame(results)
df_result.to_csv(f"{output_dir}/llm_predictions_{model_name}_{prompt_method}.csv", index=False)

# Print accuracy
# Accuracy including all responses
accuracy_all = df_result["validation"].mean()

# Accuracy excluding any 'equal' in ground truth or model judgement
filtered_df = df_result[
    (df_result["ground_truth"] != "equal") & 
    (df_result["model_judgement"] != "equal")
]
accuracy_excl_equal = filtered_df["validation"].mean()

# Print both
print(f"✅ Accuracy (all): {accuracy_all:.2%}")
print(f"✅ Accuracy (excluding 'equal'): {accuracy_excl_equal:.2%}")

{ "Judgment": "left", "Reasons": "The left side of the image appears to have more vibrant colors and a greater sense of activity, as it shows a bustling street scene with people walking and cars driving by. In contrast, the right side is relatively empty and lacks any signs of life." }
{ "Judgment": "left", "Reasons": "The left side of the image appears to be a residential area with trees and houses, which suggests a safer environment compared to the right side. The right side shows an open road with no visible obstacles or hazards, but it also lacks any signs of human activity or infrastructure, making it seem more desolate and potentially unsafe." }
{ "Judgment": "left", "Reasons": "The left side of the image appears to be a residential area, with trees and buildings in the background, which suggests a safer environment compared to the right side. The right side shows a busy street with cars and buildings, indicating higher levels of traffic and potential danger." }
{ "Judgment": "le

### Zero-Shot2

In [105]:
import pandas as pd
import base64
import io
from PIL import Image
import os

# Load metadata
prompt_method= "Zero_Shot2"
metadata_df = pd.read_csv("merged_metadata.csv")
image_dir = "merged_images"
results = []

for _, row in metadata_df.iterrows():
    merged_index = row["merged_index"]
    study_question = row["study_question"]
    ground_truth = str(row["choice"]).strip().lower()
    image_path = os.path.join(image_dir, f"merged_{merged_index:03d}.jpg")

    # Encode image to base64
    image = Image.open(image_path).convert("RGB")
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format="JPEG")
    base64_image = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")

    # Compose updated prompt
    prompt_text = f"""
        Compare two street view images. Based on the overall impression, decide which image better reflects the following quality:

        "{study_question}"

        Respond with a one-word judgment: left, or right. Then explain your reasoning.

        Format:
        Judgment. Reasons.
        """

    # Call LLM
    response = client.chat.completions.create(
        model= model_name,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt_text.strip()},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
                ],
            },
        ],
        response_format=character_schema,
    )
    raw = response.choices[0].message.content.strip()
    raw = (
    raw.replace("“", '"')
       .replace("”", '"')
       .replace("‘", "'")
       .replace("’", "'")
    )
    
    # Step 2: Load JSON safely
    match = re.search(r'\{.*?\}', raw, re.DOTALL)

    if match:
        json_str = match.group(0)
    else:
        print("❌ No valid JSON object found in model output.")
        print("🔎 Raw content was:\n", raw)
        json_str = "{}"  # fallback to empty JSON to avoid crash
    json_str = escape_inner_quotes(json_str)
    print(json_str)
    full_response = json.loads(json_str)
    # Split judgment and reasoning
    # Extract from dict
    model_judgement = full_response.get("Judgment", "").strip().lower()
    model_reason = full_response.get("Reasons", "").strip()

    results.append({
        "merged_index": merged_index,
        "left": row["left"],
        "right": row["right"],
        "study_question": study_question,
        "ground_truth": ground_truth,
        "left_vote":row["left_vote"],
        "right_vote":row["right_vote"],
        "model_judgement": model_judgement,
        "model_reason": model_reason,
        "validation": int(model_judgement == ground_truth)
    })

# Save results
df_result = pd.DataFrame(results)
df_result.to_csv(f"{output_dir}/llm_predictions_{model_name}_{prompt_method}.csv", index=False)

# Print accuracy
# Accuracy including all responses
accuracy_all = df_result["validation"].mean()

# Accuracy excluding any 'equal' in ground truth or model judgement
filtered_df = df_result[
    (df_result["ground_truth"] != "equal") & 
    (df_result["model_judgement"] != "equal")
]
accuracy_excl_equal = filtered_df["validation"].mean()

# Print both
print(f"✅ Accuracy (all): {accuracy_all:.2%}")
print(f"✅ Accuracy (excluding 'equal'): {accuracy_excl_equal:.2%}")


{ "Judgment": "right", "Reasons": "The street view image on the right appears to be more lively and vibrant compared to the one on the left. The image on the right shows a bustling city street with people walking, cars driving by, and buildings in the background, giving an overall impression of energy and activity. In contrast, the image on the left is a static shot of a building, which lacks the dynamic elements that make the right image more engaging." }
{ "Judgment": "left", "Reasons": "The street view on the left appears to be in a more residential area with fewer cars and less traffic, suggesting a safer environment compared to the image on the right which shows a busier road with more vehicles." }
{ "Judgment": "left", "Reasons": "The street view on the left appears to be in a more residential area with fewer cars and buildings, giving an overall impression of being safer compared to the image on the right which shows a busier street with more vehicles and taller buildings." }
{ 

### Chain-of-Thought

In [106]:
import pandas as pd
import base64
import io
from PIL import Image
import os

# Load metadata
prompt_method= "COT"
metadata_df = pd.read_csv("merged_metadata.csv")
image_dir = "merged_images"
results = []

for _, row in metadata_df.iterrows():
    merged_index = row["merged_index"]
    study_question = row["study_question"]
    ground_truth = str(row["choice"]).strip().lower()
    image_path = os.path.join(image_dir, f"merged_{merged_index:03d}.jpg")

    # Encode image to base64
    image = Image.open(image_path).convert("RGB")
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format="JPEG")
    base64_image = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")

    # Compose updated prompt
    prompt_text = f"""
        Let’s think step by step.
        You are shown a side-by-side image with two street views: the left half and the right half.
        
        Which side looks more {study_question}?

        What features do you see in the left image? What features in the left image contribute to or detract from that quality?

        What features do you see in the right image? What features in the right image contribute to or detract from that quality?

        Based on your reasoning, which image better reflects the quality?

        Respond with a one-word judgment: left or right. Then explain your reasoning.

        """

    # Call LLM
    response = client.chat.completions.create(
        model= model_name,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt_text.strip()},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
                ],
            },
        ],
        response_format=character_schema,
    )
    raw = response.choices[0].message.content.strip()
    raw = (
    raw.replace("“", '"')
       .replace("”", '"')
       .replace("‘", "'")
       .replace("’", "'")
    )
    
    # Step 2: Load JSON safely
    match = re.search(r'\{.*?\}', raw, re.DOTALL)

    if match:
        json_str = match.group(0)
    else:
        print("❌ No valid JSON object found in model output.")
        print("🔎 Raw content was:\n", raw)
        json_str = "{}"  # fallback to empty JSON to avoid crash
    json_str = escape_inner_quotes(json_str)
    print(json_str)
    full_response = json.loads(json_str)
    # Split judgment and reasoning
    # Extract from dict
    model_judgement = full_response.get("Judgment", "").strip().lower()
    model_reason = full_response.get("Reasons", "").strip()

    results.append({
        "merged_index": merged_index,
        "left": row["left"],
        "right": row["right"],
        "study_question": study_question,
        "ground_truth": ground_truth,
        "left_vote":row["left_vote"],
        "right_vote":row["right_vote"],
        "model_judgement": model_judgement,
        "model_reason": model_reason,
        "validation": int(model_judgement == ground_truth)
    })

# Save results
df_result = pd.DataFrame(results)
df_result.to_csv(f"{output_dir}/llm_predictions_{model_name}_{prompt_method}.csv", index=False)

# Print accuracy
# Accuracy including all responses
accuracy_all = df_result["validation"].mean()

# Accuracy excluding any 'equal' in ground truth or model judgement
filtered_df = df_result[
    (df_result["ground_truth"] != "equal") & 
    (df_result["model_judgement"] != "equal")
]
accuracy_excl_equal = filtered_df["validation"].mean()

# Print both
print(f"✅ Accuracy (all): {accuracy_all:.2%}")
print(f"✅ Accuracy (excluding 'equal'): {accuracy_excl_equal:.2%}")
print("✅ Output saved to llm_predictions.csv")


{ "Judgment": "left", "Reasons": "The left side of the image appears more lively due to several factors. Firstly, it has a clear and defined path that leads the viewer's attention towards the center of the image. This is in contrast to the right side, which seems to be lacking any clear direction or focal point. Additionally, the left side features a variety of colors and textures, such as the green grass, blue sky, and buildings, which contribute to its liveliness. On the other hand, the right side appears dull and monotonous with its grayish color palette and lack of visual interest." }
{ "Judgment": "left", "Reasons": "The left side of the image appears safer due to several features that contribute to this perception. Firstly, there are no pedestrians or vehicles in sight, which reduces the risk of accidents. Additionally, the road is well-lit and has a clear lane marking, indicating proper maintenance and safety measures. In contrast, the right side shows a busy street with multipl

### Rule-Base + In-context learning

In [107]:
import pandas as pd
import base64
import io
from PIL import Image
import os

# Load metadata
prompt_method= "RBIL"
metadata_df = pd.read_csv("merged_metadata.csv")
image_dir = "merged_images"
results = []

def get_visual_perspective(study_question):
    rules = {
        "wealthier": (
            "Look for expensive cars, clean sidewalks, modern buildings, well-maintained facades, greenery, and overall tidiness. "
            "Signs of poverty such as trash, broken sidewalks, and older buildings detract from the feeling of wealth."
        ),
        "more beautiful": (
            "Aesthetics matter: symmetry, architectural design, vibrant colors, trees, flowers, sunlight, and open space. "
            "Avoid cluttered, gray, damaged, or visually unappealing features."
        ),
        "livelier": (
            "Look for crowds, people walking or biking, street vendors, bright signage, open businesses, and dynamic movement. "
            "Quiet, empty, or static scenes are less lively."
        ),
        "more depressing": (
            "Indicators include dark lighting, gray tones, boarded-up buildings, graffiti, trash, empty streets, and general neglect. "
            "Fewer people and lack of activity can amplify the depressing feeling."
        ),
        "safer": (
            "Well-lit areas, visible pedestrians, clear pathways, greenery, maintained infrastructure, and surveillance signs suggest safety. "
            "Broken lights, isolated alleys, graffiti, and damaged property indicate unsafety."
        ),
        "more boring": (
            "Uniform design, lack of people or variation, blank walls, closed businesses, and monotone architecture signal boredom. "
            "Diversity in design and visible life make a place less boring."
        )
    }

    return rules.get(study_question.lower(), "No rule found for this category.")

for _, row in metadata_df.iterrows():
    merged_index = row["merged_index"]
    study_question = row["study_question"]
    ground_truth = str(row["choice"]).strip().lower()
    image_path = os.path.join(image_dir, f"merged_{merged_index:03d}.jpg")

    # Encode image to base64
    image = Image.open(image_path).convert("RGB")
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format="JPEG")
    base64_image = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")

    # Compose updated prompt
    visual_perspective = get_visual_perspective(study_question) 
    #print(visual_perspective)
    prompt_text = f"""
        You are shown a side-by-side image with two street views: the left half and the right half.
        
        Which side looks more {study_question}?

        Use the following rules when making your decision:

        "{visual_perspective}"
        
        Then, apply the rules to the images and respond with a one-word judgment: left or right. Then explain your reasoning.

        """

    # Call LLM
    response = client.chat.completions.create(
        model= model_name,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt_text.strip()},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
                ],
            },
        ],
        response_format=character_schema,
    )
    raw = response.choices[0].message.content.strip()
    raw = (
    raw.replace("“", '"')
       .replace("”", '"')
       .replace("‘", "'")
       .replace("’", "'")
    )
    
    # Step 2: Load JSON safely
    match = re.search(r'\{.*?\}', raw, re.DOTALL)

    if match:
        json_str = match.group(0)
    else:
        print("❌ No valid JSON object found in model output.")
        print("🔎 Raw content was:\n", raw)
        json_str = "{}"  # fallback to empty JSON to avoid crash
    json_str = escape_inner_quotes(json_str)
    print(json_str)
    full_response = json.loads(json_str)
    # Split judgment and reasoning
    # Extract from dict
    model_judgement = full_response.get("Judgment", "").strip().lower()
    model_reason = full_response.get("Reasons", "").strip()

    results.append({
        "merged_index": merged_index,
        "left": row["left"],
        "right": row["right"],
        "study_question": study_question,
        "ground_truth": ground_truth,
        "left_vote":row["left_vote"],
        "right_vote":row["right_vote"],
        "model_judgement": model_judgement,
        "model_reason": model_reason,
        "validation": int(model_judgement == ground_truth)
    })

# Save results
df_result = pd.DataFrame(results)
df_result.to_csv(f"{output_dir}/llm_predictions_{model_name}_{prompt_method}.csv", index=False)

# Print accuracy
# Accuracy including all responses
accuracy_all = df_result["validation"].mean()

# Accuracy excluding any 'equal' in ground truth or model judgement
filtered_df = df_result[
    (df_result["ground_truth"] != "equal") & 
    (df_result["model_judgement"] != "equal")
]
accuracy_excl_equal = filtered_df["validation"].mean()

# Print both
print(f"✅ Accuracy (all): {accuracy_all:.2%}")
print(f"✅ Accuracy (excluding 'equal'): {accuracy_excl_equal:.2%}")
print("✅ Output saved to llm_predictions.csv")


{ "Judgment": "right" , "Reasons": "The right half of the image appears more lively as it shows people walking on the sidewalk, street vendors, and open businesses. These dynamic elements indicate a sense of activity and energy, which is absent in the left half of the image. The left half is empty and static, with no signs of human presence or movement." }
{"Judgment": "left", "Reasons": "The left side of the image appears safer as it is well-lit, has visible pedestrians, clear pathways, greenery, maintained infrastructure, and surveillance signs. In contrast, the right side lacks these safety indicators, with broken lights, isolated alleys, graffiti, and damaged property, suggesting unsafety."}
{ "Judgment": "left", "Reasons": "The left side of the image appears safer as it is well-lit, has visible pedestrians, clear pathways, greenery, maintained infrastructure, and surveillance signs. In contrast, the right side lacks these safety indicators, with broken lights, isolated alleys, gra

### RBIL_COT

In [108]:
import pandas as pd
import base64
import io
from PIL import Image
import os

# Load metadata
prompt_method= "RBIL_COT"
metadata_df = pd.read_csv("merged_metadata.csv")
image_dir = "merged_images"
results = []

def get_visual_perspective(study_question):
    rules = {
        "wealthier": (
            "Look for expensive cars, clean sidewalks, modern buildings, well-maintained facades, greenery, and overall tidiness. "
            "Signs of poverty such as trash, broken sidewalks, and older buildings detract from the feeling of wealth."
        ),
        "more beautiful": (
            "Aesthetics matter: symmetry, architectural design, vibrant colors, trees, flowers, sunlight, and open space. "
            "Avoid cluttered, gray, damaged, or visually unappealing features."
        ),
        "livelier": (
            "Look for crowds, people walking or biking, street vendors, bright signage, open businesses, and dynamic movement. "
            "Quiet, empty, or static scenes are less lively."
        ),
        "more depressing": (
            "Indicators include dark lighting, gray tones, boarded-up buildings, graffiti, trash, empty streets, and general neglect. "
            "Fewer people and lack of activity can amplify the depressing feeling."
        ),
        "safer": (
            "Well-lit areas, visible pedestrians, clear pathways, greenery, maintained infrastructure, and surveillance signs suggest safety. "
            "Broken lights, isolated alleys, graffiti, and damaged property indicate unsafety."
        ),
        "more boring": (
            "Uniform design, lack of people or variation, blank walls, closed businesses, and monotone architecture signal boredom. "
            "Diversity in design and visible life make a place less boring."
        )
    }

    return rules.get(study_question.lower(), "No rule found for this category.")

for _, row in metadata_df.iterrows():
    merged_index = row["merged_index"]
    study_question = row["study_question"]
    ground_truth = str(row["choice"]).strip().lower()
    image_path = os.path.join(image_dir, f"merged_{merged_index:03d}.jpg")

    # Encode image to base64
    image = Image.open(image_path).convert("RGB")
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format="JPEG")
    base64_image = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")

    # Compose updated prompt
    visual_perspective = get_visual_perspective(study_question) 
    #print(visual_perspective)
    prompt_text = f"""
        Let’s think step by step.
        You are shown a side-by-side image with two street views: the left half and the right half.
        
        Which side looks more {study_question}?

        What features do you see in the left image? What features in the left image contribute to or detract from that quality?

        What features do you see in the right image? What features in the right image contribute to or detract from that quality?

        Based on the following rules, which image has features that better reflect the quality?

        "{visual_perspective}"
        
        Then, apply the rules to the images and respond with a one-word judgment: left or right. Then explain your reasoning.

        """

    # Call LLM
    response = client.chat.completions.create(
        model= model_name,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt_text.strip()},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
                ],
            },
        ],
        response_format=character_schema,
    )
    raw = response.choices[0].message.content.strip()
    raw = (
    raw.replace("“", '"')
       .replace("”", '"')
       .replace("‘", "'")
       .replace("’", "'")
    )
    
    # Step 2: Load JSON safely
    match = re.search(r'\{.*?\}', raw, re.DOTALL)

    if match:
        json_str = match.group(0)
    else:
        print("❌ No valid JSON object found in model output.")
        print("🔎 Raw content was:\n", raw)
        json_str = "{}"  # fallback to empty JSON to avoid crash
    json_str = escape_inner_quotes(json_str)
    print(json_str)
    full_response = json.loads(json_str)
    # Split judgment and reasoning
    # Extract from dict
    model_judgement = full_response.get("Judgment", "").strip().lower()
    model_reason = full_response.get("Reasons", "").strip()

    results.append({
        "merged_index": merged_index,
        "left": row["left"],
        "right": row["right"],
        "study_question": study_question,
        "ground_truth": ground_truth,
        "left_vote":row["left_vote"],
        "right_vote":row["right_vote"],
        "model_judgement": model_judgement,
        "model_reason": model_reason,
        "validation": int(model_judgement == ground_truth)
    })

# Save results
df_result = pd.DataFrame(results)
df_result.to_csv(f"{output_dir}/llm_predictions_{model_name}_{prompt_method}.csv", index=False)

# Print accuracy
# Accuracy including all responses
accuracy_all = df_result["validation"].mean()

# Accuracy excluding any 'equal' in ground truth or model judgement
filtered_df = df_result[
    (df_result["ground_truth"] != "equal") & 
    (df_result["model_judgement"] != "equal")
]
accuracy_excl_equal = filtered_df["validation"].mean()

# Print both
print(f"✅ Accuracy (all): {accuracy_all:.2%}")
print(f"✅ Accuracy (excluding 'equal'): {accuracy_excl_equal:.2%}")
print("✅ Output saved to llm_predictions.csv")


{ "Judgment": "left" , "Reasons": "The left image appears more lively due to the presence of crowds, people walking or biking, street vendors, bright signage, open businesses, and dynamic movement. These features contribute to a sense of energy and activity, which is in line with the provided rules. In contrast, the right image lacks these elements, presenting a quiet, empty, and static scene." }
{ "Judgment": "left", "Reasons": "The left image appears safer due to the following features: well-lit area, visible pedestrians, clear pathways, greenery, maintained infrastructure, and surveillance signs. These elements suggest a sense of security and order. In contrast, the right image lacks these features, with broken lights, isolated alleys, graffiti, and damaged property, which indicate unsafety." }
{ "Judgment": "left" , "Reasons": "The left image appears safer due to the following features: Well-lit areas (the street is well-illuminated), visible pedestrians, clear pathways, greenery (