In [None]:
import sys
sys.path.append('../')

from mappings import mappings

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()  # By default, loads from .env in the current directory

print(os.getenv("OPENAI_API_KEY"))
print(os.getenv("GOOGLE_API_KEY"))
print(os.getenv("EXPECTED_PARROT_API_KEY"))

<h1>Create Synthetic Twins</h1>

In [None]:
def get_mapped_value(column, value, mappings):
    if pd.notna(value):
        # Ensure the value is integer if the column mappings use integers as keys
        try:
            value = int(value) if isinstance(value, float) else value
        except ValueError:
            return f"Invalid {column}"

        # Fetch the mapped value
        return mappings.get(column, {}).get(value, f"Unknown {column}")
    return f"Unknown {column}"

def lowercase_first_char(s):
    if not s:
        return s
    return s[0].lower() + s[1:]


def create_synthetic_twin_agent(df, mappings, index):
    row = df.iloc[index]

    def create_list_of_descriptions(prefix, range_end):
        descriptions = [
            get_mapped_value(f"{prefix}{i}", row.get(f"{prefix}{i}", None), mappings)
            for i in range(1, range_end + 1)
            if pd.notna(row.get(f"{prefix}{i}", None))
        ]

        return [lowercase_first_char(desc) for desc in descriptions if desc != "unknown"]

    def format_political_orientation_prompt(value):
        prompts = {
            1: "You identify as a Republican, reflecting alignment with conservative political ideologies.",
            2: "You identify as a Democrat, reflecting alignment with progressive political ideologies.",
            3: "You identify as an Independent, indicating a preference for policies that may transcend traditional party lines.",
            5: "You do not identify with any specific political orientation, indicating no particular preference for major political ideologies."
        }
        return prompts.get(value, "You do not identify with any specific political orientation, indicating no particular preference for major political ideologies.")

    def format_political_ideology_prompt(value):
        prompts = {
            1: "Your political views are extremely liberal, prioritizing progressive and transformative social policies.",
            2: "Your political views are liberal, favoring policies that emphasize equality, inclusivity, and progress.",
            3: "Your political views are moderately liberal, indicating a balanced approach toward progressive ideals.",
            4: "You identify as centrist, reflecting a pragmatic and neutral perspective on political issues.",
            5: "Your political views are moderately conservative, indicating a preference for traditional values with some openness to change.",
            6: "Your political views are conservative, favoring traditional values and limited government intervention.",
            7: "Your political views are extremely conservative, emphasizing deeply traditional and preservationist principles.",
            0: "You decline to specify your political ideology, leaving your views undefined.",
        }
        return prompts.get(value, "You have an undefined political ideology.")

    def format_republican_strength_prompt(value):
        prompts = {
            1: "You consider yourself a strong Republican, deeply aligned with the party's principles and policies.",
            2: "You consider yourself a not very strong Republican, showing some alignment with the party's principles but with nuanced perspectives.",
        }
        return prompts.get(value, "")

    def format_democrat_strength_prompt(value):
        prompts = {
            1: "You consider yourself a strong Democrat, firmly aligned with the party's progressive values and policies.",
            2: "You consider yourself a not very strong Democrat, supporting the party's values with some reservations or alternative perspectives.",
        }
        return prompts.get(value, "")

    def format_political_strength_prompt(row):
        political_orientation = int(row.get("political_orientaton")) # _orientaton is the correct word :)

        if political_orientation == 1:  # Republican
            democrat_strength = row.get("republican_strength")
            return format_republican_strength_prompt(democrat_strength)
        elif political_orientation == 2:  # Democrat
            republican_strength = row.get("democrat_strength")
            return format_democrat_strength_prompt(republican_strength)
        elif political_orientation == 3:  # Independent
            return "You identify as Independent, reflecting an independent political view."
        return "Your political strength and orientation are undefined."

    def format_voting_behavior_prompt(voted, vote_for):
        if voted == 1:  # Yes
            prompts = {
                1: "You voted in the 2024 presidential election for Donald Trump, reflecting alignment with Republican values.",
                2: "You voted in the 2024 presidential election for Kamala Harris, reflecting alignment with Democratic values.",
            }
            return prompts.get(vote_for)
        elif voted == 2:  # No
            return "You did not vote in the 2024 presidential elections."
        return "Your voting behavior is undefined."

    def format_social_media_influence_prompt(value):
        prompts = {
            1: "Social media has no influence on your decision-making process when you buy online.",
            2: "Social media influences your decision-making process a little when you buy online.",
            3: "Social media somewhat influences your decision-making process when you buy online.",
            4: "Social media has quite a bit of influence on your decision-making process when you buy online.",
            5: "Social media has a strong influence on your decision-making process when you buy online.",
        }
        return prompts.get(value, "Unknown influence of social media.")

    def format_eco_friendly_importance_prompt(value):
        prompts = {
            1: "Eco-friendliness plays no role in your decision-making process when choosing products.",
            2: "Eco-friendliness has a minor influence on your decision-making process when choosing products.",
            3: "Eco-friendliness moderately influences your decision-making process when choosing products.",
            4: "Eco-friendliness is an important factor in your decision-making process when choosing products.",
            5: "Eco-friendliness is a key consideration in your decision-making process when choosing products.",
        }
        return prompts.get(value, "Unknown eco-friendliness importance.")

    def get_political_strength_prompt(row, mappings):
        republican_strength = get_mapped_value("republican_strength", row.get("republican_strength"), mappings)
        democrat_strength = get_mapped_value("democrat_strength", row.get("democrat_strength"), mappings)

        if republican_strength != "Unknown republican_strength":
            return ("republican_strength", republican_strength)
        elif democrat_strength != "Unknown democrat_strength":
            return ("democrat_strength", democrat_strength)
        return (None, None)


    def format_mini_ipip_personality_traits_prompt(row):
        extraversion_score = row.get("extraversion_score")
        agreeableness_score = row.get("agreeableness_score")
        conscientiousness_score = row.get("conscientiousness_score")
        neuroticism_score = row.get("neuroticism_score")
        openness_score = row.get("openness_score")

        prompt = (
            f"Your personality traits based on the Mini-IPIP raw scores are as follows: "
            f"Extraversion is {extraversion_score}, indicating your sociability and energy levels. "
            f"Agreeableness is {agreeableness_score}, reflecting your compassion and cooperation with others. "
            f"Conscientiousness is {conscientiousness_score}, representing your organization and dependability. "
            f"Neuroticism is {neuroticism_score}, showing your emotional stability and resilience to stress. "
            f"Openness to experience is {openness_score}, highlighting your creativity and appreciation for new experiences."
        )

        return prompt

    traits = {
        "prolific_pid": row["PROLIFIC_PID"],
        "age": int(row["age"]),
        "gender": lowercase_first_char(get_mapped_value("gender", row["gender"], mappings)),
        "state": get_mapped_value("state", row.get("state"), mappings),
        "country": "U.S." if pd.notna(row.get("state", None)) else get_mapped_value("country", row.get("country", None), mappings),
        "ethnicity": ", ".join(create_list_of_descriptions("races_", 7)) or "not specified",
        "marital_status": get_mapped_value("marital_status", row["marital_status"], mappings).lower(),
        "children": "no children" if row.get("children", 0) == 0 else f"{int(row['children'])} {'child' if int(row['children']) == 1 else 'children'}",
        "employment_status": lowercase_first_char(get_mapped_value("employment_status", row["employment_status"], mappings)),
        "education_level": lowercase_first_char(get_mapped_value("education_level", row["education_level"], mappings)),
        "household_income": get_mapped_value("household_income", row["household_income"], mappings).lower(),

        "political_orientation": get_mapped_value("political_orientaton", row.get("political_orientaton"), mappings),
        "political_ideology": get_mapped_value("political_Ideology_1", row.get("political_Ideology_1"), mappings),

        "shopping_frequency": get_mapped_value("shopping_freq", row.get("shopping_freq"), mappings).lower(),
        "monthly_spend": get_mapped_value("monthly_spend", row.get("monthly_spend"), mappings).lower(),
        "devices_used": ", ".join(create_list_of_descriptions("device_used_to_buy_", 4)) or "unspecified devices",
        "brand_preferences": ", ".join(create_list_of_descriptions("brands_type_pref_", 6)) or "no specific preferences",
        "social_media_influence": get_mapped_value("social_m_influence", row.get("social_m_influence"), mappings).lower(),
        "eco_friendly_importance": get_mapped_value("eco_friendly_imp", row.get("eco_friendly_imp"), mappings).lower(),

        # Mini-IPIP (BIG5s) scores:
        "extraversion_score": row.get("extraversion_score"),
        "agreeableness_score": row.get("agreeableness_score"),
        "conscientiousness_score": row.get("conscientiousness_score"),
        "neuroticism_score": row.get("neuroticism_score"),
        "openness_score": row.get("openness_score")
    }

    # Dynamically add republican_strength or democrat_strength if one exists
    political_strength_key, political_strength_value = get_political_strength_prompt(row, mappings)
    if political_strength_key and political_strength_value:
        traits[political_strength_key] = political_strength_value

    if int(row.get("voted")) == 1:
        traits['vote'] = get_mapped_value("voted", row.get("voted"), mappings)
        traits['vote_for'] = get_mapped_value("vote_for", row.get("vote_for"), mappings)
    elif int(row.get("voted")) == 2:
        # print(get_mapped_value("voted", int(row.get("voted")), mappings))
        traits['vote'] = get_mapped_value("voted", int(row.get("voted")), mappings)
        traits['vote_for'] = "No one"
    else:
        print(row.get("PROLIFIC_PID"))
        print(get_mapped_value("voted", int(row.get("voted")), mappings))
        print(get_mapped_value("voted_for", int(row.get("voted_for")), mappings))

    # Add formatted prompts
    traits.update({
        "political_orientation_prompt": format_political_orientation_prompt(int(row.get("political_orientaton"))),
        "political_ideology_prompt": format_political_ideology_prompt(row.get("political_Ideology_1")),
        "political_strength_prompt": format_political_strength_prompt(row),
        "voting_behavior_prompt": format_voting_behavior_prompt(row.get("voted"), row.get("vote_for")),
        "social_media_influence_prompt": format_social_media_influence_prompt(row.get("social_m_influence")),
        "eco_friendly_importance_prompt": format_eco_friendly_importance_prompt(row.get("eco_friendly_imp")),
    })

    def format_instruction_prompt():
        # Key Sections of the Prompt
        introduction = (
            "***IMPORTANT INSTRUCTIONS:***\n"
            "You are a synthetic twin participating in a simulated online shopping experiment hosted on the Prolific platform. "
            "Participants on Prolific are incentivized by monetary payment, motivating them to balance speed and quality while completing tasks.\n\n"
            "Each ad includes three images, a title, and a textual description. Your task is to evaluate these ads based on your assigned "
            "personality traits, preferences, and values.\n"
        )
    
        behavioral_guidelines = (
            "**Key Behavioral Guidelines**:\n"
            "- You are incentivized to complete the task efficiently, but you must provide thoughtful evaluations that reflect your assigned traits.\n"
            "- You may rely on noticeable elements, like prominent images or key phrases, especially when ads feel repetitive.\n"
            "- Your attention might fluctuate as you progress, leading to less detailed evaluations for later ads.\n"
            "- Your responses **must** reflect a balance between following instructions carefully and completing the task in a timely manner.\n"
        )
    
        personality_instructions = (
            "**Specific Instructions for Personality Traits**:\n"
            "You **must** use your assigned personality trait levels to guide your evaluation. Specifically:\n\n"
            "1. **Extraversion**:\n"
            "   - If you have **high Extraversion**, prioritize elements that emphasize social engagement, fun, or excitement in the ad.\n"
            "   - If you have **low Extraversion**, focus on practicality and avoid overvaluing overly social or flashy aspects.\n"
            "2. **Agreeableness**:\n"
            "   - If you have **high Agreeableness**, look for signals of warmth, empathy, and positivity in the ad.\n"
            "   - If you have **low Agreeableness**, evaluate the ad critically, without being influenced by attempts to appeal emotionally.\n"
            "3. **Conscientiousness**:\n"
            "   - If you have **high Conscientiousness**, assess how detailed, accurate, and organized the ad is. Look for well-structured information.\n"
            "   - If you have **low Conscientiousness**, focus on overall impressions without getting too caught up in fine details or structure.\n"
            "4. **Neuroticism**:\n"
            "   - If you have **high Neuroticism**, consider whether the ad reduces uncertainty or worry. Look for reassuring or calming elements.\n"
            "   - If you have **low Neuroticism**, focus on practical features without being overly concerned about potential risks.\n"
            "5. **Openness**:\n"
            "   - If you have **high Openness**, evaluate the ad’s creativity, originality, and appeal to curiosity. Look for innovative or unique features.\n"
            "   - If you have **low Openness**, prioritize straightforward, familiar, and functional aspects of the ad.\n"
        )
    
        evaluation_criteria = (
            "**Evaluation Criteria**:\n"
            "For each ad, you will answer six 5-point Likert-scale questions. Your scores should reflect:\n"
            "1. **How well the ad aligns with your assigned personality traits**.\n"
            "2. **A balanced assessment if traits conflict** (e.g., high Openness encouraging creativity vs. high Conscientiousness valuing structure).\n"
            "3. **Efficiency and quality**, consistent with typical Prolific participants incentivized by payment.\n"
        )
    
        final_reminder = (
            "**Final Reminder**:\n"
            "You are a synthetic twin designed to reflect realistic participant behavior. Be consistent with your assigned personality "
            "traits while balancing thoughtful evaluation with timely completion."
        )
    
        
        return f"{introduction}\n{behavioral_guidelines}\n{personality_instructions}\n{evaluation_criteria}\n{final_reminder}"



    # Generate presentation template
    traits_presentation_template = (
        f"You are {traits['age']} years old, identifying as {traits['gender']}, living in {traits['state']}, {traits['country']}. "
        f"Your ethnicity is {traits['ethnicity']}, and you are {traits['marital_status']}, with {traits['children']}. "
        f"You are {traits['employment_status']} and have attained {traits['education_level']}. "
        f"Your household income is {traits['household_income']}. {traits['political_orientation_prompt']} "
        f"{traits['political_ideology_prompt']} {traits['political_strength_prompt']} "
        f"{traits['voting_behavior_prompt']} "
        f"As an online shopper, you shop {traits['shopping_frequency']} and spend {traits['monthly_spend']} per month. "
        f"You primarily use {traits['devices_used']} for purchases, favoring {traits['brand_preferences']} brands. "
        f"{traits['social_media_influence_prompt']} {traits['eco_friendly_importance_prompt']}\n\n"
        f"Your personality profile is characterized by:\n"
        f" - Extraversion: {traits['extraversion_score']}\n"
        f" - Agreeableness: {traits['agreeableness_score']}\n"
        f" - Conscientiousness: {traits['conscientiousness_score']}\n"
        f" - Neuroticism: {traits['neuroticism_score']}\n"
        f" - Openness: {traits['openness_score']}\n\n"
    )

    instruction = format_instruction_prompt()

    print(f'{traits_presentation_template}\n')
    print(f'{instruction}\n\n')

    agent = Agent(
        traits=traits,
        name=row['PROLIFIC_PID'],
        traits_presentation_template=traits_presentation_template,
        instruction=instruction
    )

    return agent

def create_synthetic_twins(df, mappings):
    synthetic_twins = []

    for index, row in df.iterrows():
        try:
            synthetic_twin = create_synthetic_twin_agent(df, mappings, index)
            synthetic_twins.append(synthetic_twin)
        except Exception as e:
            print(f"Error processing row {index}: {e}")

    print(f"Successfully created {len(synthetic_twins)} synthetic twin agents.")
    return synthetic_twins

<h1> Create a Survey </h1>

In [None]:
STATEMENTS = [
    "I find this product advertisement to be persuasive.",
    "This is an effective advertisement.",
    "I would purchase this product after seeing this advertisement.",
    "Overall, I like this product advertisement.",
    "This advertisement has made me more interested in the product.",
    "I am interested in learning more about this product after seeing this advertisement."
]

# Product 1 Descriptions per trait
TRAIT_PRODUCT_DESCRIPTIONS_1 = {
    "openness": "Designed for visionaries who appreciate iconic design innovation. The signature parallel grooves, inspired by early aviation, transform aluminum into flowing sculptural lines. The 35L design challenges you to curate essentials like a minimalist art piece. Each element tells a story - from the understated RIMOWA lettering to the precisely engineered corners. The multi-directional wheels move like brush strokes, while the anodized surface creates ever-changing light plays. Perfect for creative minds who see beauty in engineering.",
    "consc": "Exemplifies German engineering precision at its finest. Each of the 15 parallel grooves is exactly 14mm apart, providing optimal structural integrity while reducing weight by 26%. The aircraft-grade aluminum body offers a precise 35L capacity, optimized for carry-on efficiency. The TSA-approved locks feature 1,000 unique combinations, while the whisper-quiet wheels are tested for 832,000 rotations. The interior features a calibrated 50/50 split. Every measurement, every component is exactingly calculated.",
    "extr": "Turns heads and starts conversations wherever you go. Its iconic grooved design and gleaming aluminum finish make an unforgettable entrance in any setting. The sleek 35L profile is perfect for spontaneous weekend getaways or high-impact business trips. The distinctive silhouette has graced more Instagram stories than any luxury luggage, while the characteristic wheel sound announces your arrival with style. Each scratch tells a story of your adventures. Travel as the trendsetter you are.",
    "agree": "Embodies our commitment to responsible luxury and mindful travel. Our sustainable process uses recycled aluminum for the 35L design, crafted by artisans earning fair wages in family-owned facilities. The smooth-gliding wheels and ergonomic handle are designed with consideration for fellow travelers. We've partnered with global repair artisans to ensure local maintenance, supporting communities worldwide. Join a movement of conscious travelers who believe luxury should lift everyone up.",
    "neuro": "Eliminates travel worries through multiple protection layers. The 35L aircraft-grade aluminum shell, reinforced with 12 structural grooves, exceeds military impact standards while ensuring carry-on compliance. Our corner guards provide eight-point drop protection, with a watertight seal system for all weather. Dual TSA locks feature tamper-alert technology and embedded tracking for real-time location. The stabilized wheel system prevents tip-overs. Travel with absolute confidence."
}

# Product 2 Descriptions per trait
TRAIT_PRODUCT_DESCRIPTIONS_2 = {
    "openness": "Discover a revolutionary art form in travel organization with RIMOWA's visionary cubes. Like modernist sculpture, their fluid compression transforms space itself, challenging conventional boundaries. Each cube is a canvas of infinite possibilities, crafted from aerospace-inspired recycled materials. The avant-garde design morphs as you pack, creating dynamic spatial compositions. Perfect for creative pioneers who see packing as performance art. Break free from ordinary constraints and curate your journey like a gallery installation. Transform travel into pure innovation",
    "consc": "Experience the pinnacle of German precision with RIMOWA's meticulously engineered packing system. Each cube reduces volume by 35% while maintaining perfect structural integrity. The design includes precise compression markers, reinforced stress points tested to 12kg capacity, and premium YKK zippers rated for 35,000 cycles. The systematic organization maximizes your 35L cabin case space with efficient compartmentalization. Achieve flawless organization with engineering excellence, measured and tested to perfection.",
    "extr": "Become the star of every destination with RIMOWA's most viral travel innovation. These show-stopping cubes have sparked over 350M views across social media, dominated Instagram reels, and inspired countless #PackingWithRIMOWA moments worldwide. The intelligent compression system leaves room for spontaneous shopping discoveries and outfit changes. Their ultra-sleek design transforms hotel room unpacking into a content creator's dream. Perfect for jet-setters who collect likes as often as passport stamps. Pack to impress, travel to be seen.",
    "agree": "Join RIMOWA's global family of mindful travelers with our most conscientious creation yet. Each cube supports three local artisan communities and is crafted from ocean-recovered materials, preventing 12 plastic bottles from harming marine life. The collaborative design promotes shared packing experiences, while our fair-trade manufacturing empowers developing communities. Every purchase funds environmental restoration and educational initiatives. Travel with compassion, pack with purpose, share with love.",
    "neuro": "Achieve complete travel peace of mind with RIMOWA's most secure packing innovation. Features military-grade compression technology, antimicrobial Polygiene® treatment, and our patented triple-lock zipper system. Built-in stress indicators prevent over-compression, while RFID-traceable markers ensure nothing gets lost. The water-resistant barriers exceed IP67 standards, protecting from all environmental risks. Backed by our lifetime guarantee and 24/7 global support network. Transform uncertainty into absolute confidence."
}

# Product 3 Descriptions per trait
TRAIT_PRODUCT_DESCRIPTIONS_3 = {
    "openness": "Discover a sensory revolution in hydration with 24Bottles' artistic innovation. The Clima bottle is a modernist sculpture in stainless steel, featuring avant-garde temperature control that maintains cold for 24 hours or heat for 12. Each bottle's minimalist silhouette and custom-curated colors challenge conventional design boundaries. Perfect for creative spirits who see everyday objects as art pieces, this bottle transforms hydration into a statement of individual expression. Embrace innovation in every sip.",
    "consc": "Master your hydration with 24Bottles' precision-engineered Clima bottle. Features 316-grade stainless steel construction, maintaining exact temperatures (24 hours cold/12 hours hot) with 99.9% efficiency. The leak-proof cap system withstands 20,000+ open-close cycles, while the 1L capacity optimizes daily water intake requirements. Each bottle saves 0.08kg carbon footprint daily versus disposable alternatives. Achieve perfect hydration with Swiss-tested temperature control and German-engineered durability.",
    "extr": "Make every sip an opportunity with 24Bottles' most sociable hydration companion. The Clima bottle naturally starts conversations, whether you're at the office, gym, traveling, or meeting friends. Its distinctive design and premium finish draw admiring glances, while double-wall insulation keeps drinks cold for 24 hours or hot for 12 - perfect for impromptu coffee dates or extended adventures with friends. Ideal for those who love bringing people together and staying energized through every social moment. Turn every drink into a chance to connect.",
    "agree": "Join 24Bottles' compassionate hydration movement with the eco-conscious Clima bottle. Each purchase prevents 580 single-use plastics annually and supports global water conservation projects. Crafted through fair-labor practices, this bottle embodies our commitment to environmental harmony. The mindful design keeps drinks cold for 24 hours or hot for 12, while supporting clean water initiatives worldwide. Choose sustainable hydration that nurtures both people and planet. Every sip makes a difference.",
    "neuro": "Experience worry-free hydration with 24Bottles' most secure temperature-control innovation. The Clima bottle features antimicrobial coating, triple-sealed cap technology, and certified food-grade steel construction. Advanced vacuum insulation guarantees temperature stability (24 hours cold/12 hours hot), while the non-slip texture prevents accidents. Each bottle undergoes 25-point safety testing and includes our lifetime warranty. Stay perfectly hydrated while enjoying complete peace of mind."
}

# Trait descriptions dictionary for all products
TRAIT_PRODUCT_DESCRIPTIONS = {
    1: TRAIT_PRODUCT_DESCRIPTIONS_1,
    # 2: TRAIT_PRODUCT_DESCRIPTIONS_2,
    # 3: TRAIT_PRODUCT_DESCRIPTIONS_3
}

PRODUCT_TITLES = {
    1: "The RIMOWA Original",
    # 2: "Compressible Packing Cubes",
    # 3: "Clima Water Bottle"
}

IMAGE_UUIDS = {
    1: ['4f41ad44-6472-4403-8e59-0fb0e5eca575', 'e30bbefa-b135-4a46-bf32-ed9437be8733', 'a56079a4-1f50-4019-9344-947101bd3e4b'],
    # 2: ['7c046f64-1209-447d-a68a-b984f3508e0b', '82ffd7a6-87dc-4365-9c9f-2e76263908be', '851dedbe-3b32-45ca-8c32-4ddc510f7327'],
    # 3: ['2a81aec9-c164-4d29-9281-360547f04ac4', 'd34d3ec1-073d-45b9-baca-a0d3411e9e0c', 'ff0ab897-50b8-458e-bfa5-7550b8a48648']
}

In [None]:
from edsl import QuestionLinearScale, ScenarioList, Scenario, Model, FileStore

def prefetch_images(image_uuids):
    return {
        product: {
            index: FileStore.pull(uuid) for index, uuid in enumerate(uuids)
        }
        for product, uuids in image_uuids.items()
    }

pre_fetched_images = prefetch_images(IMAGE_UUIDS)

q = QuestionLinearScale(
    question_name="question",
    question_text="""
    Please evaluate the effectiveness of this product ad by indicating the extent to which you agree with the following statement: 
    {{ statement }}.
    
    The ad includes three images:
    
    1. {{ image_1 }}
    2. {{ image_2 }}
    3. {{ image_3 }}
    
    A a title: {{ title }}, and a description: {{ description }}.
    """,
    question_options=[
        1, 2, 3, 4, 5
    ],
    option_labels={
        1: "Strongly disagree",
        2: "Disagree",
        3: "Neither agree nor disagree",
        4: "Agree",
        5: "Strongly agree"
    }
)

sl = ScenarioList([
    Scenario({
        "question_name": f"p_{product}_{trait}_item_{i + 1}",
        "image_1": pre_fetched_images[product][0],
        "image_2": pre_fetched_images[product][1],
        "image_3": pre_fetched_images[product][2],
        "title": PRODUCT_TITLES[product],
        "description": TRAIT_PRODUCT_DESCRIPTIONS[product][trait],
        "statement": statement
    })
    for product in IMAGE_UUIDS
    for trait in TRAIT_PRODUCT_DESCRIPTIONS[product]
    for i, statement in enumerate(STATEMENTS)
])

<h1>Create Survey</h1>

In [None]:
import pandas as pd
from edsl import Agent, AgentList

df = pd.read_csv('../../../data/filtered_participants_dataset.csv')

agents = AgentList(create_synthetic_twins(df, mappings))

In [None]:
%%time
from edsl import Model

m = Model("gemini-1.5-flash", temperature = 1)
m

In [None]:

batch_size = 1

agents_to_process = agents[:1]
# Create batches of agents
agent_batches = [agents_to_process[i:i + batch_size] for i in range(0, len(agents_to_process), batch_size)]
print(f"Agent batches: {len(agent_batches)}")

print(len(agent_batches))

In [None]:
import asyncio

async def process_agent(agent, agent_number):
    print(f"{agent_number} Processing agent {agent.name}")
    return await q.by(sl).by([agent]).by(m).run_async(disable_remote_cache=True)

async def run():
    # Flatten all agent batches into a single list if `agent_batches` has multiple sublists
    all_agents = [agent for batch in agent_batches for agent in batch]

    # Run all agents in parallel
    results = await asyncio.gather(*(process_agent(agent, index + 1) for index, agent in enumerate(all_agents)))

    return results

all_responses = asyncio.run(run())

<h2>Store result</h2>

In [None]:
for i, res in enumerate(all_responses, start=1):
    agent_name = res.select('agent.prolific_pid').first()
    res.to_pandas().to_csv(f"../../synthetics_survey_results/gemini_1.5_flash/per_sta_csvs/{i}_prolific_pid_{agent_name}.csv", index=False)
    print(f"Saved: {agent_name}")

In [None]:
%%time

import os
import pandas as pd

# Directory containing CSV files
input_folder = '../../synthetics_survey_results/gemini_1.5_flash/per_sta_csvs'

# Initialize an empty list to store transformed DataFrames
all_transformed_data = []

# Iterate through all files in the folder
for file_name in os.listdir(input_folder):
    # Check if the file is a CSV
    if file_name.endswith('.csv'):
        file_path = os.path.join(input_folder, file_name)
        
        # Load the CSV file
        data = pd.read_csv(file_path)
        
        # Check if the required columns exist and pivot the data
        if 'agent.prolific_pid' in data.columns and 'scenario.question_name' in data.columns and 'answer.question' in data.columns:
            reshaped_data = data.pivot(
                index='agent.prolific_pid',         # Rows become unique agent IDs
                columns='scenario.question_name',  # Columns become question names
                values='answer.question'           # Values populate the table
            )
            
            # Reset column names for better readability (optional)
            reshaped_data.columns = reshaped_data.columns.astype(str)
            
            # Add a column to identify the source file (optional)
            reshaped_data['source_file'] = file_name
            
            # Extract additional agent-level fields if present
            additional_fields = [
                "agent.prolific_pid",
                "agent.extraversion_score",
                "agent.agreeableness_score",
                "agent.conscientiousness_score",
                "agent.neuroticism_score",
                "agent.openness_score"
            ]
            for field in additional_fields:
                if field in data.columns:
                    reshaped_data[field] = data.groupby('agent.prolific_pid')[field].first()  # Use `.first()` to avoid duplicates
            
            # Rename specific columns if they exist in the reshaped data
            reshaped_data.rename(columns={
                "agent.prolific_pid": "PROLIFIC_PID",
                "agent.extraversion_score": "extraversion_score",
                "agent.agreeableness_score": "agreeableness_score",
                "agent.conscientiousness_score": "conscientiousness_score",
                "agent.neuroticism_score": "neuroticism_score",
                "agent.openness_score": "openness_score"
            }, inplace=True)
            
            # Append the reshaped data to the list
            all_transformed_data.append(reshaped_data)
        else:
            print(f"Required columns missing in file: {file_name}")

# Combine all transformed data into a single DataFrame
final_dataframe = pd.concat(all_transformed_data, axis=0)

#Save the combined DataFrame to a new CSV file
output_path = '../../synthetics_survey_results/gemini_1.5_flash/merged/gemini_1.5_flash.csv'
final_dataframe.to_csv(output_path)

# Set options to display all rows and columns
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns



final_dataframe.head()
# print(f"Transformed data has been saved to: {output_path}")


In [None]:
%%time

import os
import pandas as pd

# Directory containing CSV files
INPUT_PATH  = '../../synthetics_survey_results/gemini_2.5_flash/per_sta_csvs'
OUTPUT_PATH = '../../synthetics_survey_results/gemini_2.5_flash/merged/gemini_2.5_flash.csv'

# Initialize an empty list to store transformed DataFrames
all_transformed_data = []

# Iterate through all files in the folder
for file_name in os.listdir(INPUT_PATH):
    # Check if the file is a CSV
    if file_name.endswith('.csv'):
        file_path = os.path.join(INPUT_PATH, file_name)
        
        # Load the CSV file
        data = pd.read_csv(file_path)
        
        # Check if the required columns exist and pivot the data
        if 'agent.prolific_pid' in data.columns and 'scenario.question_name' in data.columns and 'answer.question' in data.columns:
            reshaped_data = data.pivot(
                index='agent.prolific_pid',        # Rows become unique agent IDs
                columns='scenario.question_name',  # Columns become question names
                values='answer.question'           # Values populate the table
            )
            
            # Reset column names for better readability (optional)
            reshaped_data.columns = reshaped_data.columns.astype(str)
            
            # Add a column to identify the source file (optional)
            reshaped_data['source_file'] = file_name
            
            # Extract additional agent-level fields if present
            additional_fields = [
                "agent.prolific_pid",
                "agent.extraversion_score",
                "agent.agreeableness_score",
                "agent.conscientiousness_score",
                "agent.neuroticism_score",
                "agent.openness_score"
            ]
            for field in additional_fields:
                if field in data.columns:
                    reshaped_data[field] = data.groupby('agent.prolific_pid')[field].first()  # Use `.first()` to avoid duplicates
            
            # Rename specific columns if they exist in the reshaped data
            reshaped_data.rename(columns={
                "agent.prolific_pid": "PROLIFIC_PID",
                "agent.extraversion_score": "extraversion_score",
                "agent.agreeableness_score": "agreeableness_score",
                "agent.conscientiousness_score": "conscientiousness_score",
                "agent.neuroticism_score": "neuroticism_score",
                "agent.openness_score": "openness_score"
            }, inplace=True)
            
            # Append the reshaped data to the list
            all_transformed_data.append(reshaped_data)
        else:
            print(f"Required columns missing in file: {file_name}")

# Combine all transformed data into a single DataFrame
final_dataframe = pd.concat(all_transformed_data, axis=0)

#Save the combined DataFrame to a new CSV file

final_dataframe.to_csv(OUTPUT_PATH)

# Set options to display all rows and columns
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns



final_dataframe.head()
# print(f"Transformed data has been saved to: {output_path}")
