In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:

choices_df = pd.read_csv("/Users/unasantos/Documents/GitHub/SURP2025/Data/ModelDF.csv")
sandwich_df = pd.read_csv("/Users/unasantos/Documents/GitHub/SURP2025/Data/DecisionSandwiches.csv")

# Extract choice matrix (80 trials per participant)
choice_cols = [col for col in choices_df.columns if col.startswith('Choice.')]
choice_matrix = choices_df[choice_cols].values

# Split into vegetarian and regular trial sets
veg_df = sandwich_df[sandwich_df['RegOrVeg'] == 'v'].reset_index(drop=True)
reg_df = sandwich_df[sandwich_df['RegOrVeg'] == 'r'].reset_index(drop=True)

# Get ingredient columns
s1_cols = [col for col in sandwich_df.columns if col.startswith('s1')]
s2_cols = [col for col in sandwich_df.columns if col.startswith('s2')]

# Get list of all unique ingredients (from full dataset)
all_ingredients = pd.unique(sandwich_df[s1_cols + s2_cols].values.ravel())
all_ingredients = [ing for ing in all_ingredients if pd.notna(ing)]

# Result container
results = {}

for ingredient in all_ingredients:
    d_list = []
    v_list = []

    for pid in range(choice_matrix.shape[0]):
        participant_choices = choice_matrix[pid]
        is_veg = choices_df.loc[pid, 'Vegetarian'] == 'y'

        # Select the correct trial set
        trial_df = veg_df if is_veg else reg_df

        # Check ingredient presence in each trial
        s1_has = trial_df[[col for col in trial_df.columns if col.startswith('s1')]].apply(
            lambda row: ingredient in row.values, axis=1)
        s2_has = trial_df[[col for col in trial_df.columns if col.startswith('s2')]].apply(
            lambda row: ingredient in row.values, axis=1)

        d_numerator = 0
        d_denominator = 0
        v_numerator = 0
        v_denominator = 0

        # loop over this participant’s trials (always 80)
        for trial in range(len(participant_choices)):
            choice = participant_choices[trial]
            if pd.isna(choice):
                continue

            s1 = s1_has[trial]
            s2 = s2_has[trial]

            # Discriminability
            if s1 or s2:
                d_denominator += 1
                if (choice == 0 and s1) or (choice == 1 and s2):
                    d_numerator += 1

            # Validity
            v_denominator += 1
            if (choice == 0 and s1) or (choice == 1 and s2):
                v_numerator += 1

        D = d_numerator / d_denominator if d_denominator > 0 else np.nan
        V = v_numerator / v_denominator if v_denominator > 0 else np.nan

        d_list.append(D)
        v_list.append(V)

    results[f"D_{ingredient}"] = d_list
    results[f"V_{ingredient}"] = v_list

final_results = pd.DataFrame(results)
final_results.insert(0, 'Participant', choices_df['Participant'])    
final_results.insert(1, 'Vegetarian', choices_df['Vegetarian'])   

final_results_veg = final_results[final_results['Vegetarian'] == 'y'].copy()
final_results_nonveg = final_results[final_results['Vegetarian'] == 'n'].copy()

final_results_veg.to_csv("DV_Results_Veg.csv", index=False)
final_results_nonveg.to_csv("DV_Results_NonVeg.csv", index=False)


Graphs

In [10]:
import os

output_directory = "/Users/unasantos/Documents/GitHub/SURP2025/D-vs-V/plots_nonveg"
os.makedirs(output_directory,exist_ok=True)

In [11]:
slider_df = pd.read_csv("/Users/unasantos/Documents/GitHub/SURP2025/Data/SliderIngs.csv", header=None)

nonveg_ingredients = slider_df.iloc[1].dropna().tolist()
veg_ingredients = slider_df.iloc[2].dropna().tolist()

# Clean them
nonveg_ingredients = [ing.strip() for ing in nonveg_ingredients]
veg_ingredients = [ing.strip() for ing in veg_ingredients]

print("Non-Veg Ingredients:", nonveg_ingredients)
print("Veg Ingredients:", veg_ingredients)

def plot_dv_grouped(df,ingredients,group_name='Non-Vegetarian',color='red'):
    for ing in ingredients:
        d_col = f"D_{ing}"
        v_col = f"V_{ing}"

        if d_col not in final_results_nonveg.columns or v_col not in final_results_nonveg.columns:
            continue

        filtered= df[[d_col,v_col]].dropna()
        if df.empty:
            continue

        #plot
        plt.figure(figsize=(8,6))
        plt.scatter(df[d_col],df[v_col],label=group_name,color=color)
        plt.xlabel(f'Discriminability (D) - {ing}')
        plt.ylabel(f'Validity (D) - {ing}')
        plt.title(f'D vs V for "{ing}" for {group_name}')
        plt.legend()
        plt.grid(True)

        filename = f"DvV_{group_name}_{ing}.png"
        filepath = os.path.join(output_directory,filename)
        plt.savefig(filepath,dpi=300,bbox_inches="tight")
        plt.close()

plot_dv_grouped(final_results_nonveg,nonveg_ingredients,group_name='Non-Vegetarians',color='red')


Non-Veg Ingredients: ['Tomatoes', 'Mayonnaise', 'Turkey', 'Bacon', 'Chicken', 'Cheddar cheese', 'Swiss cheese', 'Lettuce', 'Provolone cheese', 'Spinach']
Veg Ingredients: ['Tomatoes', 'Onions', 'Cucumbers', 'Lettuce', 'Avocado', 'Sprouts', 'Provolone cheese', 'Peppers', 'Cheddar cheese', 'Mayonnaise']


In [12]:
output_directory = "/Users/unasantos/Documents/GitHub/SURP2025/D-vs-V/plots_veg"
os.makedirs(output_directory,exist_ok=True)

In [13]:
slider_df = pd.read_csv("/Users/unasantos/Documents/GitHub/SURP2025/Data/SliderIngs.csv", header=None)

nonveg_ingredients = slider_df.iloc[1].dropna().tolist()
veg_ingredients = slider_df.iloc[2].dropna().tolist()

# Clean them
nonveg_ingredients = [ing.strip() for ing in nonveg_ingredients]
veg_ingredients = [ing.strip() for ing in veg_ingredients]

print("Non-Veg Ingredients:", nonveg_ingredients)
print("Veg Ingredients:", veg_ingredients)

def plot_dv_grouped(df,ingredients,group_name='Vegetarian',color='green'):
    for ing in ingredients:
        d_col = f"D_{ing}"
        v_col = f"V_{ing}"

        if d_col not in final_results_veg.columns or v_col not in final_results_veg.columns:
            continue

        filtered= df[[d_col,v_col]].dropna()
        if df.empty:
            continue

        #plot
        plt.figure(figsize=(8,6))
        plt.scatter(df[d_col],df[v_col],label=group_name,color=color)
        plt.xlabel(f'Discriminability (D) - {ing}')
        plt.ylabel(f'Validity (D) - {ing}')
        plt.title(f'D vs V for "{ing}" for {group_name}')
        plt.legend()
        plt.grid(True)

        filename = f"DvV_{group_name}_{ing}.png"
        filepath = os.path.join(output_directory,filename)
        plt.savefig(filepath,dpi=300,bbox_inches="tight")
        plt.close()

plot_dv_grouped(final_results_veg,veg_ingredients,group_name='Vegetarian',color='green')


        


Non-Veg Ingredients: ['Tomatoes', 'Mayonnaise', 'Turkey', 'Bacon', 'Chicken', 'Cheddar cheese', 'Swiss cheese', 'Lettuce', 'Provolone cheese', 'Spinach']
Veg Ingredients: ['Tomatoes', 'Onions', 'Cucumbers', 'Lettuce', 'Avocado', 'Sprouts', 'Provolone cheese', 'Peppers', 'Cheddar cheese', 'Mayonnaise']
