In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
#data set
choices_df = pd.read_csv("/Users/unasantos/Documents/GitHub/SURP2025/Data/ModelDF.csv")
sandwich_df = pd.read_csv("/Users/unasantos/Documents/GitHub/SURP2025/Data/DecisionSandwiches.csv")

#creates matrix of choices
choice_cols = [col for col in choices_df.columns if col.startswith('Choice.')]
choice_matrix = choices_df[choice_cols].values
print(choice_matrix)

[[1 1 0 ... 1 1 0]
 [1 1 1 ... 1 1 1]
 [0 0 1 ... 1 1 1]
 ...
 [1 1 0 ... 1 0 0]
 [0 0 1 ... 0 0 1]
 [1 1 1 ... 1 1 0]]


In [5]:
#gets individual ingredients for each sandwich
s1_col = [col for col in sandwich_df.columns if col.startswith('s1')]
s2_col = [col for col in sandwich_df.columns if col.startswith('s2')]

all_ingredients = pd.unique(sandwich_df[s1_col + s2_col].values.ravel())
all_ingredients = [ing for ing in all_ingredients if pd.notna(ing)]

#result dictionary
results = {'Participant: ': choices_df['Participant'],"Vegetarian": choices_df['Vegetarian']}

In [6]:
def row_has_ingredients(row):
    return ingredient in row.values

for ingredient in all_ingredients:

    s1_has = sandwich_df[s1_col].apply(row_has_ingredients,axis=1)
    s2_has = sandwich_df[s2_col].apply(row_has_ingredients,axis=1)

    d_list = [] #discriminability list
    v_list = [] #validity list

    for pid in range(choice_matrix.shape[0]):
        participant_choices = choice_matrix[pid]

        #initializes numerators and denominators
        d_numerator = 0
        d_denominator = 0
        v_numerator = 0
        v_denominator = 0

        for trial, choice in enumerate(participant_choices):
            if pd.isna(choice):
                continue
            s1 = s1_has[trial]
            s2 = s2_has[trial]

            #discriminability = trials where ingredient is present
            if s1 or s2: #ingredient present on at least 1 sandwich
                d_denominator = d_denominator + 1 #count it toward total d
                if (choice == 0 and s1) or (choice == 1 and s2): #participant chose sandwich that had ingredient
                    d_numerator = d_numerator + 1
            
            #validity = of all choices, how often was the ingredient picked
            v_denominator = v_denominator + 1 #count every choice trial
            if (choice == 0 and s1) or (choice == 1 and s2):
                v_numerator = v_numerator + 1
        
        D = d_numerator / d_denominator if d_denominator > 0 else np.nan
        V = v_numerator / v_denominator if v_denominator > 0 else np.nan

        d_list.append(D)
        v_list.append(V)

    results[f"D_{ingredient}"] = d_list
    results[f"V_{ingredient}"] = v_list

final_results = pd.DataFrame(results)

final_results_veg = final_results[final_results['Vegetarian'] == 'y'].copy()
final_results_nonveg = final_results[final_results['Vegetarian'] == 'n'].copy()

def drop_cols(df):
    keep = ['Particpant','Vegetarian']
    cols_to_drop = [col for col in df.columns if col not in keep and df[col].isna().all()]
    return df.drop(columns=cols_to_drop)

final_results_veg.to_csv("DV_Results_Veg.csv",index=False)
final_results_nonveg.to_csv("DV_Results_NonVeg.csv",index=False)


Graphs

In [7]:
import os

output_directory = "/Users/unasantos/Documents/GitHub/SURP2025/D-vs-V/plots_nonveg"
os.makedirs(output_directory,exist_ok=True)

In [8]:
slider_df = pd.read_csv("/Users/unasantos/Documents/GitHub/SURP2025/Data/SliderIngs.csv", header=None)

nonveg_ingredients = slider_df.iloc[1].dropna().tolist()
veg_ingredients = slider_df.iloc[2].dropna().tolist()

# Clean them
nonveg_ingredients = [ing.strip() for ing in nonveg_ingredients]
veg_ingredients = [ing.strip() for ing in veg_ingredients]

print("Non-Veg Ingredients:", nonveg_ingredients)
print("Veg Ingredients:", veg_ingredients)

def plot_dv_grouped(df,ingredients,group_name='Non-Vegetarian',color='red'):
    for ing in ingredients:
        d_col = f"D_{ing}"
        v_col = f"V_{ing}"

        if d_col not in final_results_nonveg.columns or v_col not in final_results_nonveg.columns:
            continue

        filtered= df[[d_col,v_col]].dropna()
        if df.empty:
            continue

        #plot
        plt.figure(figsize=(8,6))
        plt.scatter(df[d_col],df[v_col],label=group_name,color=color)
        plt.xlabel(f'Discriminability (D) - {ing}')
        plt.ylabel(f'Validity (D) - {ing}')
        plt.title(f'D vs V for "{ing}" for {group_name}')
        plt.legend()
        plt.grid(True)

        filename = f"DvV_{group_name}_{ing}.png"
        filepath = os.path.join(output_directory,filename)
        plt.savefig(filepath,dpi=300,bbox_inches="tight")
        plt.close()

plot_dv_grouped(final_results_nonveg,nonveg_ingredients,group_name='Non-Vegetarians',color='red')


Non-Veg Ingredients: ['Tomatoes', 'Mayonnaise', 'Turkey', 'Bacon', 'Chicken', 'Cheddar cheese', 'Swiss cheese', 'Lettuce', 'Provolone cheese', 'Spinach']
Veg Ingredients: ['Tomatoes', 'Onions', 'Cucumbers', 'Lettuce', 'Avocado', 'Sprouts', 'Provolone cheese', 'Peppers', 'Cheddar cheese', 'Mayonnaise']


In [9]:
output_directory = "/Users/unasantos/Documents/GitHub/SURP2025/D-vs-V/plots_veg"
os.makedirs(output_directory,exist_ok=True)

In [10]:
slider_df = pd.read_csv("/Users/unasantos/Documents/GitHub/SURP2025/Data/SliderIngs.csv", header=None)

nonveg_ingredients = slider_df.iloc[1].dropna().tolist()
veg_ingredients = slider_df.iloc[2].dropna().tolist()

# Clean them
nonveg_ingredients = [ing.strip() for ing in nonveg_ingredients]
veg_ingredients = [ing.strip() for ing in veg_ingredients]

print("Non-Veg Ingredients:", nonveg_ingredients)
print("Veg Ingredients:", veg_ingredients)

def plot_dv_grouped(df,ingredients,group_name='Vegetarian',color='green'):
    for ing in ingredients:
        d_col = f"D_{ing}"
        v_col = f"V_{ing}"

        if d_col not in final_results_veg.columns or v_col not in final_results_veg.columns:
            continue

        filtered= df[[d_col,v_col]].dropna()
        if df.empty:
            continue

        #plot
        plt.figure(figsize=(8,6))
        plt.scatter(df[d_col],df[v_col],label=group_name,color=color)
        plt.xlabel(f'Discriminability (D) - {ing}')
        plt.ylabel(f'Validity (D) - {ing}')
        plt.title(f'D vs V for "{ing}" for {group_name}')
        plt.legend()
        plt.grid(True)

        filename = f"DvV_{group_name}_{ing}.png"
        filepath = os.path.join(output_directory,filename)
        plt.savefig(filepath,dpi=300,bbox_inches="tight")
        plt.close()

plot_dv_grouped(final_results_veg,veg_ingredients,group_name='Vegetarian',color='green')


        


Non-Veg Ingredients: ['Tomatoes', 'Mayonnaise', 'Turkey', 'Bacon', 'Chicken', 'Cheddar cheese', 'Swiss cheese', 'Lettuce', 'Provolone cheese', 'Spinach']
Veg Ingredients: ['Tomatoes', 'Onions', 'Cucumbers', 'Lettuce', 'Avocado', 'Sprouts', 'Provolone cheese', 'Peppers', 'Cheddar cheese', 'Mayonnaise']
