Importing required packages

In [2]:
import pandas as pd
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
from scipy import stats
from scipy.stats import wilcoxon, t
import altair as alt
from altair import selection_point
print(f"Altair version: {alt.__version__}")

# Set Pandas options to display all columns and rows
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)  # Set to None to display all text in a column



Altair version: 5.4.1


Loading data

In [3]:
# Load the CSV file
file_path = './data/final-study-raw-2.csv'
df = pd.read_csv(file_path)

# df.head(10)


Add a field named isCorrectlyAnswered

Fixed few things
- Multiple answers should be considered correct in any order (e.g, Fatique, Anosmia or Anosmia,Fatigue)
- "The dataset contains 18 sets and 1745 elements, of which 8 sets are shown in the plot": for this type of sentece, if we ask set number, both "8" and "18" are correct answer
- The participant ID "665c7ba6b596a3558adbf2ec" has no data in reVISit/Firebase. Removing this participant

In [4]:
# List of factual trialIds
factual_trial_ids = [
    'Covid-Vis-Q1', 'Covid-Text-Q1', 'Covid-Both-Q1',
    'Tennis-Vis-Q1', 'Tennis-Text-Q1', 'Tennis-Both-Q1',
    'Organizations-Vis-Q1', 'Organizations-Text-Q1', 'Organizations-Both-Q1'
]

# List of factual responseIds
factual_response_ids = ['voq1', 'voq2', 'voq3', 'voq4', 'voq5', 'voq6', 'voq7', 'voq8']

training_trial_ids = ['Training 1', 'Training 2', 'Training 3']
training_response_ids = ['1', '2', '3']

# Defining the participantId to be removed because it doesn't hold answer for all the essemtial questions
participants_to_remove = ['665c7ba6b596a3558adbf2ec']
df = df[~df['participantId'].isin(participants_to_remove)]



# Function that determines if the answer is correct or not or should be marked as "undefined" for not factual questions
def is_correctly_answered(row):
    if (row['trialId'] in factual_trial_ids and row['responseId'] in factual_response_ids) or \
       (row['trialId'] in training_trial_ids and row['responseId'] in training_response_ids):
        if row['trialId'] in ['Covid-Vis-Q1', 'Covid-Text-Q1', 'Covid-Both-Q1',
                              'Organizations-Vis-Q1', 'Organizations-Text-Q1', 'Organizations-Both-Q1'] and \
           row['responseId'] in ['voq3']:  # Add any other responseId that requires split comparison

            # Multiple answer may come in any order
            correct_answer_set = set(row['correctAnswer'].split(','))
            answer_set = set(row['answer'].split(','))
            return correct_answer_set == answer_set
        elif row['trialId'] in ['Organizations-Vis-Q1', 'Organizations-Text-Q1', 'Organizations-Both-Q1'] and \
             row['responseId'] == 'voq1':
            if row['answer'] in ['8', '18']:
                return True  # Mark as correct if answer is "8" or "18"
            else:
                # Otherwise, do a direct string comparison
                return row['answer'] == row['correctAnswer']

        else:
            # For all other cases, do a direct string comparison
            return row['answer'] == row['correctAnswer']
    else:
        return "undefined"

# Adding the isCorrectlyAnswered column
df['isCorrectlyAnswered'] = df.apply(is_correctly_answered, axis=1)

# Reordering columns to place isCorrectlyAnswered right after correctAnswer
cols = df.columns.tolist()
correct_answer_index = cols.index('correctAnswer')
cols.insert(correct_answer_index + 1, cols.pop(cols.index('isCorrectlyAnswered')))
df = df[cols]
# # Checking
# filtered_df = df[
#     (df['trialId'].isin(['Organizations-Vis-Q1', 'Organizations-Text-Q1', 'Organizations-Both-Q1'])) &
#     (df['responseId'] == 'voq1')
# ]
# for index, row in filtered_df.iterrows():
#     print(f"ParticipantId: {row['participantId']}, TrialId: {row['trialId']}, ResponseId: {row['responseId']}, Answer: {row['answer']}")

# df.head(10)

- Filtering out the "completed" studies (removing the pending and rejected ones)
- Adding a column named dataset
- Deleting "configHash" colum
- Saving all of these in a new csv file

In [5]:
# Filtering rows where 'Status' is 'completed'
filtered_df = df[df['status'] == 'completed']

# Counting the number of unique participants
unique_participant_count = filtered_df['participantId'].nunique()
print(f"Number of unique participants: {unique_participant_count}")

# Adding the dataset column based on the conditions
def determine_dataset(trial_id):
    if 'Covid' in trial_id:
        return 'Covid Symptoms'
    elif 'Tennis' in trial_id:
        return 'Tennis Grand Slam Winners'
    elif 'Organizations' in trial_id:
        return 'International Organizations'
    elif 'Training' in trial_id:
        return 'Movie Genres'
    else:
        return 'Undefined'

filtered_df['dataset'] = filtered_df['trialId'].apply(determine_dataset)

# Deleting unnecessary columns
filtered_df = filtered_df.drop(columns=['instruction', 'configHash'])

# Save the filtered data to a new CSV file
new_file_path = './data/final-study.csv'
filtered_df.to_csv(new_file_path, index=False)

print(f"Filtered data saved to: {new_file_path}")


Number of unique participants: 83
Filtered data saved to: ./data/final-study.csv


1. Separating three conditions (Vis, Text, Both) in three different files
2. Using "Survey.csv" only to plot likert scale answers

In [6]:
conditions = ['Text', 'Both', 'Vis', 'Survey']

# Generating separate CSV files for each condition
for condition in conditions:
    condition_df = filtered_df[filtered_df['trialId'].str.contains(condition)]

    new_file_path = f'./data/{condition}_data.csv'
    condition_df.to_csv(new_file_path, index=False)

    print(f"Filtered data for condition '{condition}' saved to: {new_file_path}")

Filtered data for condition 'Text' saved to: ./data/Text_data.csv
Filtered data for condition 'Both' saved to: ./data/Both_data.csv
Filtered data for condition 'Vis' saved to: ./data/Vis_data.csv
Filtered data for condition 'Survey' saved to: ./data/Survey_data.csv


Counting post survey preferences and effectiveness

In [7]:
# data = pd.read_csv('./data/Survey_data.csv')
data = pd.read_csv('./data/Survey_data.csv')

# Filtering relevant columns for analysis
columns_of_interest = ['participantId', 'trialId', 'responseId', 'responsePrompt', 'answer']
filtered_data = data[columns_of_interest]

# Responses to "Which presentation modality did you find most effective?"
most_effective = filtered_data[filtered_data['responsePrompt'] == 'Which presentation modality did you find most effective?']['answer']

# Responses to "Which condition did you find the least effective?"
least_effective = filtered_data[filtered_data['responsePrompt'] == 'Which condition did you find the least effective?']['answer']

# Responses to "If you were to receive information in the future, which format would you prefer?"
preferred_format = filtered_data[filtered_data['responsePrompt'] == 'If you were to receive information in the future, which format would you prefer?']['answer']

# Counters for each question
most_effective_counts = Counter(most_effective)
least_effective_counts = Counter(least_effective)
preferred_format_counts = Counter(preferred_format)

most_effective_counts, least_effective_counts, preferred_format_counts


(Counter({'Text and Visualization Combined': 57,
          'Just Visualization': 18,
          'Just Text Description': 8}),
 Counter({'Just Text Description': 57,
          'Just Visualization': 23,
          'Text and Visualization Combined': 3}),
 Counter({'Text and Visualization Combined': 60,
          'Just Visualization': 15,
          'Just Text Description': 8}))

Plotting the result for pereferences and the effectiveness

In [8]:
conditions = ['Just Visualization', 'Just Text Description', 'Text and Visualization Combined',]
# Initialize lists for counts
most_effective_list = [most_effective_counts.get(cond, 0) for cond in conditions]
least_effective_list = [least_effective_counts.get(cond, 0) for cond in conditions]
preferred_format_list = [preferred_format_counts.get(cond, 0) for cond in conditions]

# Print results to verify
print('Most Effective:', most_effective_list)
print('Least Effective:', least_effective_list)
print('Preferred Format:', preferred_format_list)

data = {
    'Condition': conditions,
    'Most Effective': most_effective_list,
    'Least Effective': least_effective_list,
    'Preferred Format': preferred_format_list
}

Most Effective: [18, 8, 57]
Least Effective: [23, 57, 3]
Preferred Format: [15, 8, 60]


In [9]:
# Data in long format
data = {
    'Condition': ['JV', 'JT', 'TV'] * 3,  # Use short labels
    'Category': ['Most Effective'] * 3 + ['Least Effective'] * 3 + ['Preferred Format'] * 3,
    'Count': most_effective_list + least_effective_list + preferred_format_list
}

df_melted = pd.DataFrame(data)

# Function to create a chart for each category
def create_individual_chart(df, condition, color_map, show_y_axis):
    # Filter the data based on the specified category
    df_filtered = df[df['Category'] == condition].copy()

    # Convert the count to numeric for proper encoding
    df_filtered['Count'] = pd.to_numeric(df_filtered['Count'])

    # Define the Altair chart
    chart = alt.Chart(df_filtered).mark_bar(size=15).encode(
        x=alt.X('Count:Q', title=None, axis=alt.Axis(values=[0, 10, 20, 30, 40, 50, 60], orient='top')),
        y=alt.Y('Condition:N', title=None, axis=alt.Axis(labelAngle=0, labels=show_y_axis, domain=show_y_axis, ticks=False), sort=['JV', 'JT', 'TV']),  # Control y-axis visibility
        color=alt.Color('Condition:N', scale=alt.Scale(domain=['JV', 'JT', 'TV'],
                                                      range=[color_map['Visualization'], color_map['Text'], color_map['Combined']])),
        tooltip=['Condition', 'Category', 'Count']
    ).properties(
        width=200,
        height=70,
        title=alt.TitleParams(condition, orient='bottom')
    )

    # Text layer for displaying counts
    text = chart.mark_text(
        align='left',
        baseline='middle',
        dx=3  # Adjust the position of text
    ).encode(
        text='Count:Q'
    )

    return chart + text  # Layer bars with text

# Define color map for consistency
color_map = {
    'Visualization': '#A5021D',  # Color for JV
    'Text': '#2F88A6',           # Color for JT
    'Combined': '#635293'        # Color for TV
}

# Create individual charts for each category
most_effective_chart = create_individual_chart(df_melted, 'Most Effective', color_map, show_y_axis=True)
least_effective_chart = create_individual_chart(df_melted, 'Least Effective', color_map, show_y_axis=False)
preferred_format_chart = create_individual_chart(df_melted, 'Preferred Format', color_map, show_y_axis=False)

# Combine the charts horizontally
combined_chart = alt.hconcat(most_effective_chart, least_effective_chart, preferred_format_chart).resolve_scale(
    y='shared'
)

# Text annotation for number of trials
text_annotation = alt.Chart(pd.DataFrame({'text': ["For each condition, N=83"]})).mark_text(
    align='center',
    baseline='top',
    fontSize=12
).encode(
    text='text:N'
).properties(
    width=600
)

# Combine the charts with the text annotation
final_chart = alt.vconcat(combined_chart, text_annotation).configure_axis(
    grid=False
).configure_view(
    stroke=None
).configure_title(
    fontSize=14
).configure_legend(
    disable=True
)


final_chart.display()


In [10]:
file_path = './data/final-study.csv'
df = pd.read_csv(file_path)

columns_of_interest = ['participantId','trialId', 'responseId', 'responsePrompt', 'answer', 'correctAnswer','isCorrectlyAnswered','dataset']
ans_df = df[columns_of_interest]

# ans_df.head()


Percentage of Answering Correcty (based on Text, Both and Vis for each dataset (covid-text-Q1, covid-vis-Q1, etc. and training dataset)

In [11]:
file_path = './data/final-study.csv'
df = pd.read_csv(file_path)

# The trialIds of interest
trial_ids_of_interest = [
    'Covid-Vis-Q1', 'Covid-Text-Q1', 'Covid-Both-Q1',
    'Tennis-Vis-Q1', 'Tennis-Text-Q1', 'Tennis-Both-Q1',
    'Organizations-Vis-Q1', 'Organizations-Text-Q1', 'Organizations-Both-Q1'
]

dataset_df = df[df['trialId'].isin(trial_ids_of_interest)]

print("The below trialIds have factual questions:")
print(dataset_df['trialId'].value_counts()/8)
dataset_df.head(10)

# Converting 'isCorrectlyAnswered' from string to boolean
dataset_df['isCorrectlyAnswered'] = dataset_df['isCorrectlyAnswered'].apply(lambda x: True if x == 'True' else False if x == 'False' else x)
filterCorrectAnswer_df = dataset_df[dataset_df['isCorrectlyAnswered'].isin([True, False])]
filterCorrectAnswer_df.head(10)

# print(filterCorrectAnswer_df['isCorrectlyAnswered'].value_counts())

# Calculating the percentage of correctly answered trials for each trialId
percentage_correct_by_trialid = filterCorrectAnswer_df.groupby('trialId')['isCorrectlyAnswered'].mean() * 100

# The results
print("Percentage of correctly answered trials for each trialId of having factual questions:")
print(percentage_correct_by_trialid)


The below trialIds have factual questions:
trialId
Tennis-Text-Q1           30.0
Covid-Both-Q1            29.0
Organizations-Both-Q1    28.0
Covid-Vis-Q1             28.0
Organizations-Vis-Q1     28.0
Tennis-Vis-Q1            27.0
Organizations-Text-Q1    27.0
Covid-Text-Q1            26.0
Tennis-Both-Q1           26.0
Name: count, dtype: float64
Percentage of correctly answered trials for each trialId of having factual questions:
trialId
Covid-Both-Q1            95.689655
Covid-Text-Q1            86.057692
Covid-Vis-Q1             95.089286
Organizations-Both-Q1    95.535714
Organizations-Text-Q1    95.370370
Organizations-Vis-Q1     87.053571
Tennis-Both-Q1           81.730769
Tennis-Text-Q1           87.500000
Tennis-Vis-Q1            79.166667
Name: isCorrectlyAnswered, dtype: float64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset_df['isCorrectlyAnswered'] = dataset_df['isCorrectlyAnswered'].apply(lambda x: True if x == 'True' else False if x == 'False' else x)


Percentage of Answering Correcty (based on each Visual Contents - Vis, Text or Both)

In [12]:
def get_visual_content_type(trialId):
    if 'Vis' in trialId:
        return 'VIS'
    elif 'Both' in trialId:
        return 'BOTH'
    elif 'Text' in trialId:
        return 'TEXT'


filterCorrectAnswer_df['VisualContentType'] = filterCorrectAnswer_df['trialId'].apply(get_visual_content_type)

# Calculating the percentage of correctly answered trials for each Visual Content Type
percentage_correct_by_visual_content = filterCorrectAnswer_df.groupby('VisualContentType')['isCorrectlyAnswered'].mean() * 100

# The results
print("Percentage of correctly answered cumulatively for each Visual Content Type:")
print(percentage_correct_by_visual_content)

Percentage of correctly answered cumulatively for each Visual Content Type:
VisualContentType
BOTH    91.265060
TEXT    89.608434
VIS     87.198795
Name: isCorrectlyAnswered, dtype: float64


Percentage of Answering Correcty (based on datasets)

In [13]:

# Calculating the percentage of correctly answered trials for each dataset
percentage_correct_by_dataset = filterCorrectAnswer_df.groupby('dataset')['isCorrectlyAnswered'].mean() * 100

# The result
print("Percentage of correctly answered answer cumulatively for each dataset (of having factual questions only):")
print(percentage_correct_by_dataset)



Percentage of correctly answered answer cumulatively for each dataset (of having factual questions only):
dataset
Covid Symptoms                 92.469880
International Organizations    92.620482
Tennis Grand Slam Winners      82.981928
Name: isCorrectlyAnswered, dtype: float64


Saving the answers of "What are the insights and takeaways about the data"? (Q3 of all conditions) and Post survey qualitative questions Q7 & Q9

In [14]:
file_path = './data/final-study.csv'
df = pd.read_csv(file_path)

q3_trial_ids = [
    'Covid-Vis-Q3', 'Covid-Text-Q3', 'Covid-Both-Q3',
    'Tennis-Vis-Q3', 'Tennis-Text-Q3', 'Tennis-Both-Q3',
    'Organizations-Vis-Q3', 'Organizations-Text-Q3', 'Organizations-Both-Q3'
]

qualitative_df = df[
    ((df['trialId'].isin(q3_trial_ids)) & (df['responseId'] == 'voq1')) |
    ((df['trialId'] == 'Post Study Survey') & (df['responseId'].isin(['q7', 'q9'])))
]

output_path = './data/qualitative_responses.csv'

# DataFrame to CSV
qualitative_df[['participantId', 'trialId', 'responsePrompt', 'answer']].to_csv(output_path, index=False)

# Print the answers for all the Q3 trialIds
# (qualitative_df[['participantId','trialId','answer']])


Completion time VS overall correctness

In [15]:
# file_path = './data/final-study.csv'
file_path = './data/final-study.csv'
df = pd.read_csv(file_path)

trial_ids_of_interest = [
    'Covid-Vis-Q1', 'Covid-Text-Q1', 'Covid-Both-Q1',
    'Tennis-Vis-Q1', 'Tennis-Text-Q1', 'Tennis-Both-Q1',
    'Organizations-Vis-Q1', 'Organizations-Text-Q1', 'Organizations-Both-Q1'
]
overall_df = df[df['trialId'].isin(trial_ids_of_interest)]

# print("Only the below trialIds have factual questions:")
# print(overall_df['trialId'].value_counts()/8)
# print(overall_df.shape)

# Converting 'isCorrectlyAnswered' from string to boolean
overall_df['isCorrectlyAnswered'] = overall_df['isCorrectlyAnswered'].apply(lambda x: True if x == 'True' else False if x == 'False' else x)
overallCorrectAnswer_df = overall_df[overall_df['isCorrectlyAnswered'].isin([True, False])]
print(overallCorrectAnswer_df.shape)

# Calculating correctness per participant per condition
overallCorrectAnswer_df['correctAnswerNumeric'] = overallCorrectAnswer_df['isCorrectlyAnswered'].astype(int)  # Convert correctness to numeric (1 for True, 0 for False)
correctness_by_participant = overallCorrectAnswer_df.groupby(['participantId', 'trialId'])['correctAnswerNumeric'].sum()
# print(correctness_by_participant.shape)

# Pivot the table to get a better structure for summing up correctness
correctness_pivot = correctness_by_participant.unstack().fillna(0)

# Calculating the overall correctness percentage per participant (sum the scores and calculate percentage)
correctness_pivot['overallCorrectness'] = (correctness_pivot.sum(axis=1) / 24) * 100

# Select the first occurrence of each trialId per participant for duration calculation
first_occurrence_df = overallCorrectAnswer_df.groupby(['participantId', 'trialId']).first().reset_index()
# print(first_occurrence_df)
# print(first_occurrence_df.shape)

# Calculating the total completion time per participant in minutes by summing the first durations
total_time_by_participant = first_occurrence_df.groupby('participantId')['duration'].sum() / 60000
# print(total_time_by_participant)  # Convert milliseconds to minutes
print(total_time_by_participant.shape)

# Combining correctness and duration into a single DataFrame
summary_df = pd.DataFrame({
    'totalCompletionTime': total_time_by_participant,
    'overallCorrectness': correctness_pivot['overallCorrectness']
})

# added altair version to show simplicity
alt.Chart(summary_df).mark_point().encode(x = 'totalCompletionTime', y='overallCorrectness')


(1992, 17)
(83,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  overall_df['isCorrectlyAnswered'] = overall_df['isCorrectlyAnswered'].apply(lambda x: True if x == 'True' else False if x == 'False' else x)


- A single plot that has three sub-plots
- Each scatter plot shows completion time vs correctness
- The color coding is based on the datasets


In [16]:
# file_path = './data/final-study.csv'
file_path = './data/final-study.csv'
df = pd.read_csv(file_path)

conditions = {
    'VIS': ['Covid-Vis-Q1', 'Tennis-Vis-Q1', 'Organizations-Vis-Q1'],
    'Text': ['Covid-Text-Q1', 'Tennis-Text-Q1', 'Organizations-Text-Q1'],
    'Both': ['Covid-Both-Q1', 'Tennis-Both-Q1', 'Organizations-Both-Q1']
}

overall_df = df[df['trialId'].isin(sum(conditions.values(), []))]

# print("Only the below trialIds have factual questions:")
# print(overall_df['trialId'].value_counts()/8)

overall_df.loc[:, 'isCorrectlyAnswered'] = overall_df['isCorrectlyAnswered'].apply(lambda x: True if x == 'True' else False if x == 'False' else x)
overallCorrectAnswer_df = overall_df[overall_df['isCorrectlyAnswered'].isin([True, False])]

# Calculating correctness per participant per condition
overallCorrectAnswer_df['correctAnswerNumeric'] = overallCorrectAnswer_df['isCorrectlyAnswered'].astype(int)
correctness_by_participant = overallCorrectAnswer_df.groupby(['participantId', 'trialId'])['correctAnswerNumeric'].sum().reset_index()

# Select the first occurrence of each trialId per participant for duration calculation
first_occurrence_df = overallCorrectAnswer_df.groupby(['participantId', 'trialId']).first().reset_index()

# Calculating the total completion time per participant in minutes by summing the first durations
first_occurrence_df['duration'] = first_occurrence_df['duration'] / 60000  # Convert milliseconds to minutes


duration_table = first_occurrence_df[['participantId', 'trialId', 'duration']]
duration_table = duration_table.sort_values(['participantId', 'trialId'])

# print("\nParticipant Duration Table:")
# print(duration_table.to_string(index=False))

# Merging duration and correctness into one DataFrame, ensure correctness column is added
merged_df = pd.merge(first_occurrence_df, correctness_by_participant, on=['participantId', 'trialId'], suffixes=('', '_correct'))
merged_df['correctness'] = (merged_df['correctAnswerNumeric_correct'] / 8) * 100  # Calculate correctness percentage


In [17]:

# Duration to seconds
merged_df['duration_seconds'] = merged_df['duration'] * 60

# Calculating the maximum duration across all conditions
max_duration = merged_df['duration_seconds'].max()
x_max = np.ceil(max_duration / 60) * 60   # Round up to the nearest minute and add 1 minute

# The base chart
def create_subplot(data, condition):
    return alt.Chart(data).mark_circle().encode(
        x=alt.X('duration_seconds:Q',
                title='Completion Time (seconds)',
                scale=alt.Scale(domain=[0, x_max])),
        y=alt.Y('correctness:Q', title='Correctness (%)',
                scale=alt.Scale(domain=[0, 100]),
                axis=alt.Axis(values=list(range(20, 101, 10)))),
        color=alt.Color('dataset:N', scale=alt.Scale(scheme='viridis')),
        tooltip=['dataset', 'duration_seconds', 'correctness']
    ).properties(
        width=300,
        height=200,
        title=f'{condition} Condition'
    )

# Subplots for each condition
vis_chart = create_subplot(merged_df[merged_df['trialId'].isin(conditions['VIS'])], 'VIS')
text_chart = create_subplot(merged_df[merged_df['trialId'].isin(conditions['Text'])], 'Text')
both_chart = create_subplot(merged_df[merged_df['trialId'].isin(conditions['Both'])], 'Both')

combined_chart = alt.hconcat(vis_chart, text_chart, both_chart).resolve_scale(
    color='shared'
).properties(
    # title=f'Correctness by Condition (Max Time: {x_max} seconds)'
    title=f'Correctness by Condition'
)

combined_chart

Data pre-processing to plot 'Jitter plot'

In [18]:
file_path = './data/final-study.csv'
df = pd.read_csv(file_path)

conditions = {
    'VIS': ['Covid-Vis-Q1', 'Tennis-Vis-Q1', 'Organizations-Vis-Q1'],
    'Text': ['Covid-Text-Q1', 'Tennis-Text-Q1', 'Organizations-Text-Q1'],
    'Both': ['Covid-Both-Q1', 'Tennis-Both-Q1', 'Organizations-Both-Q1']
}
overall_df = df[df['trialId'].isin(sum(conditions.values(), []))]

overall_df.loc[:, 'isCorrectlyAnswered'] = overall_df['isCorrectlyAnswered'].apply(lambda x: True if x == 'True' else False if x == 'False' else x)
overallCorrectAnswer_df = overall_df[overall_df['isCorrectlyAnswered'].isin([True, False])]

overallCorrectAnswer_df['correctAnswerNumeric'] = overallCorrectAnswer_df['isCorrectlyAnswered'].astype(int)
correctness_by_participant = overallCorrectAnswer_df.groupby(['participantId', 'trialId'])['correctAnswerNumeric'].sum().reset_index()

first_occurrence_df = overallCorrectAnswer_df.groupby(['participantId', 'trialId']).first().reset_index()

first_occurrence_df['duration'] = first_occurrence_df['duration'] / 60000  # Convert milliseconds to minutes

merged_df = pd.merge(first_occurrence_df, correctness_by_participant, on=['participantId', 'trialId'], suffixes=('', '_correct'))

# Calculating correctness percentage and number of correctness
merged_df['correctness'] = (merged_df['correctAnswerNumeric_correct'] / 8) * 100  # Assuming 8 questions
merged_df.rename(columns={'correctAnswerNumeric_correct': 'correct_number_of_answers'}, inplace=True)

# Condition mapping based on trialId
condition_map = {
    'Covid-Vis-Q1': 'Vis', 'Tennis-Vis-Q1': 'Vis', 'Organizations-Vis-Q1': 'Vis',
    'Covid-Text-Q1': 'Text', 'Tennis-Text-Q1': 'Text', 'Organizations-Text-Q1': 'Text',
    'Covid-Both-Q1': 'Both', 'Tennis-Both-Q1': 'Both', 'Organizations-Both-Q1': 'Both'
}
merged_df['condition'] = merged_df['trialId'].map(condition_map)

merged_df = merged_df[['participantId', 'trialId', 'correct_number_of_answers', 'correctness', 'condition', 'dataset', 'duration']]

# Load the survey data
survey_data_path = './data/Survey_data.csv'
survey_data = pd.read_csv(survey_data_path)

# Filter relevant columns for analysis
columns_of_interest = ['participantId', 'responsePrompt', 'answer']
filtered_data = survey_data[columns_of_interest]

# Map the preferred format for each unique participant
preferred_format_data = filtered_data[filtered_data['responsePrompt'] == 'If you were to receive information in the future, which format would you prefer?']
preferred_format_map = preferred_format_data.set_index('participantId')['answer'].to_dict()

# Add the preferred format to the merged_df DataFrame
merged_df['preferred_format'] = merged_df['participantId'].map(preferred_format_map)

# Make the condition values consistent with the preferred format column values
condition_mapping = {
    'Both': 'Text and Visualization Combined',
    'Text': 'Just Text Description',
    'Vis': 'Just Visualization'
}

merged_df['condition'] = merged_df['condition'].map(condition_mapping)


print(merged_df.head())




              participantId                trialId  correct_number_of_answers  \
0  574dc90512d86b000f833ab0          Covid-Text-Q1                          6   
1  574dc90512d86b000f833ab0   Organizations-Vis-Q1                          8   
2  574dc90512d86b000f833ab0         Tennis-Both-Q1                          7   
3  5755c957eb80c4000741a9ce          Covid-Both-Q1                          8   
4  5755c957eb80c4000741a9ce  Organizations-Text-Q1                          8   

   correctness                        condition                      dataset  \
0         75.0            Just Text Description               Covid Symptoms   
1        100.0               Just Visualization  International Organizations   
2         87.5  Text and Visualization Combined    Tennis Grand Slam Winners   
3        100.0  Text and Visualization Combined               Covid Symptoms   
4        100.0            Just Text Description  International Organizations   

   duration                 pref


1. P-Value Calculation (Wilcoxon Signed-Rank Test)
    - Method Used: The p-value is calculated using the Wilcoxon Signed-Rank Test.
    - Purpose: This non-parametric test is used to compare two paired samples to determine whether their population mean ranks differ.
    - When Used: It's particularly useful when the data is not normally distributed, and you want to compare two related groups (e.g., the same participants under different conditions).
    - Implementation: The stats.wilcoxon() function from the scipy library is used to calculate the Wilcoxon Signed-Rank Test statistic (W) and the corresponding p-value.

2. Cohen's d Calculation
    - Method Used: Cohen's d is calculated using a custom function that computes the standardized mean difference between two groups.
    - Purpose: Cohen's d is a measure of effect size, which quantifies the difference between two group means in terms of standard deviation.
    - When Used: It is commonly used to express the magnitude of the difference between two groups.
3. Confidence Level
    - Method Used: A confidence interval (CI) for the mean correctness is calculated using the t-distribution.
    - Confidence Level: The confidence level used in the code is 95%.
    - Purpose: The confidence interval provides a range of values that likely contains the true mean difference between groups, considering a 95% confidence level.
    - Implementation: The t.interval() function from scipy.stats is used to calculate the CI:
        ci = t.interval(0.95, len(group)-1, loc=mean, scale=sem)

In [19]:
def add_stats(df):
    df = df.copy()

    def calculate_ci(group):
        sem = group['correctness'].sem()  # Standard Error of the Mean
        mean = group['mean'].iloc[0]  # Get the already calculated mean
        ci = t.interval(0.95, len(group)-1, loc=mean, scale=sem)  # 95% CI
        group['ci0'] = ci[0]
        group['ci1'] = ci[1]
        return group

    df['mean'] = df.groupby(['condition'], group_keys=False)['correctness'].transform('mean')
    df = df.groupby(['condition'], group_keys=False).apply(calculate_ci)
    df['mean_ci_formatted'] = df.apply(lambda row: f"{row['ci0']:.2f} — {row['mean']:.2f} — {row['ci1']:.2f}", axis=1)

    def cohens_d(group1, group2):
        mean1, mean2 = np.mean(group1), np.mean(group2)
        pooled_std = np.sqrt((np.std(group1, ddof=1) ** 2 + np.std(group2, ddof=1) ** 2) / 2)
        return (mean1 - mean2) / pooled_std

    conditions = df['condition'].unique()
    cohens_d_values = {}

    for i in range(len(conditions)):
        for j in range(i + 1, len(conditions)):
            cond1 = conditions[i]
            cond2 = conditions[j]
            group1 = df[df['condition'] == cond1]['correctness']
            group2 = df[df['condition'] == cond2]['correctness']
            cohens_d_values[(cond1, cond2)] = cohens_d(group1, group2)
            cohens_d_values[(cond2, cond1)] = cohens_d(group1, group2)  # Store both pairs


    # Pivot DataFrame to get paired samples in separate columns
    pivot_df = df.pivot(index='participantId', columns='condition', values='correctness').reset_index()
    # print("printing pivot df")
    # print(pivot_df)

    # Calculate Wilcoxon test for each pair
    wilcoxon_results = []

    # Diagnostic Code for Wilcoxon Test
    conditions_list = ['Just Visualization', 'Just Text Description', 'Text and Visualization Combined']
    missing_data_pairs = []  # To collect pairs with insufficient data

    for i in range(len(conditions)):
        for j in range(i + 1, len(conditions)):

            cond1 = conditions_list[i]
            cond2 = conditions_list[j]

            # Extract paired data and drop NaN values
            data1 = pivot_df[cond1].dropna()
            data2 = pivot_df[cond2].dropna()

            # Check if there are any valid pairs
            if len(data1) == 0 or len(data2) == 0:
                # print(f"No valid pairs available for comparison between {cond1} and {cond2}.")
                missing_data_pairs.append((cond1, cond2))  # Record the pair with missing data
                wilcoxon_results.append({'Condition 1': cond1, 'Condition 2': cond2, 'W': np.nan, 'p': np.nan, 'd': np.nan})
            else:
                # Perform the Wilcoxon Signed-Rank Test
                w_stat, p_value = wilcoxon(data1, data2)
                # print(f"W-statistic: {w_stat}, p-value: {p_value}")
                wilcoxon_results.append({'Condition 1': cond1, 'Condition 2': cond2, 'W': w_stat, 'p': p_value, 'd': cohens_d_values.get((cond1, cond2), 'NaN')})
                # Add the new check for identical values here
                identical_values = all(data1 == data2)
                # print("Are the values identical?:", identical_values)

    wilcoxon_df = pd.DataFrame(wilcoxon_results)
    # Before merging Wilcoxon test results
    print("Wilcoxon statistical test results:")
    print(wilcoxon_df)

    # Merge these results back to the original DataFrame
    df = df.merge(wilcoxon_df, left_on='condition', right_on='Condition 1', how='left')

    # Check if 'd' column exists, if not, create it
    if 'd' not in df.columns:
        print("'d' column not found. Creating it with NaN values.")
        df['d'] = np.nan
    else:
        print("'d' column found. Filling NaN values.")
        df['d'] = df['d'].fillna(np.nan)

    df['n'] = df.groupby('condition')['participantId'].transform('count')

    # Handle potential NaN in 'd' values
    df['test_results'] = df.apply(lambda row: f"n={int(row['n'])}\nW={row['W']:.2f}\np={row['p']:.3f}\nd={row['d'] if pd.notna(row['d']) else 'NaN'}", axis=1)




    # After processing, report the pairs that lacked sufficient data
    if missing_data_pairs:
        print("\nPairs with insufficient data for Wilcoxon Test:")
        for pair in missing_data_pairs:
            print(f"{pair[0]} vs {pair[1]}")

    return df, wilcoxon_df

# Apply this function to your `merged_df`
stats_df, wilcoxon_df = add_stats(merged_df)
# print(stats_df.head())


Wilcoxon statistical test results:
             Condition 1                      Condition 2      W         p  \
0     Just Visualization            Just Text Description  527.0  0.510888   
1     Just Visualization  Text and Visualization Combined  566.5  0.108850   
2  Just Text Description  Text and Visualization Combined  512.0  0.418882   

          d  
0  0.164363  
1 -0.269231  
2 -0.130513  
'd' column found. Filling NaN values.


  df = df.groupby(['condition'], group_keys=False).apply(calculate_ci)


Jitter Plot

In [20]:
stats_df['condition'] = stats_df['condition'].replace({
    'Just Visualization': 'JV',
    'Just Text Description': 'JT',
    'Text and Visualization Combined': 'TV'
})

def split_beeswarm_facet(data, split_by, stats=True, legend=True):

    base = alt.Chart(data).encode(
        y=alt.Y(f"{split_by}:N", title=None, sort=["JV", "JT", "TV"], axis=alt.Axis(titleFontSize=15, labelFontSize=15)),
        x=alt.X("correctness:Q",
                title="Correctness (%)",  # This adds the x-axis label
                scale=alt.Scale(domain=[0, 100], nice=False),
                axis=alt.Axis(values=list(range(0, 101, 10)), titleFontSize=15, labelFontSize=12))
    )

    if not legend:
        base = base.encode(
            color=alt.Color(f"{split_by}:N",
                            scale=alt.Scale(domain=["JV", "JT", "TV"],
                                            range=["#A5021D", "#2F88A6", "#635293"]),
                            legend=None)
        )
    else:
        base = base.encode(
            color=alt.Color(f"{split_by}:N",
                            scale=alt.Scale(domain=["JV", "JT", "TV"],
                                            range=["#A5021D", "#2F88A6", "#635293"]),
                            legend=alt.Legend(labelFontSize=15, titleFontSize=15))
        )

    point = base.mark_point(opacity=0.5, tooltip=True).transform_calculate(
        jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
    ).encode(
        x=alt.X("correctness:Q", title="Correctness (%)", scale=alt.Scale(domain=[0, 100], nice=False),
                axis=alt.Axis(values=list(range(0, 101, 10)))),
        yOffset=alt.YOffset("jitter:Q"),
    )

    mean_point = base.mark_point(size=15).encode(
        x=alt.X("mean(correctness):Q", title="Correctness (%)", scale=alt.Scale(domain=[0, 100], nice=False),
                axis=alt.Axis(values=list(range(0, 101, 10))))
    )

    ci_rule = base.mark_rule(thickness=2, opacity=1, color="red", strokeWidth=2).encode(
        x=alt.X("ci0:Q", title="Correctness (%)", scale=alt.Scale(domain=[0, 100], nice=False),
                axis=alt.Axis(values=list(range(0, 101, 10)))),
        x2=alt.X2("ci1:Q", title=None),
    )

    chart = alt.layer(point, mean_point, ci_rule).properties(
        width=400,
        height=100,
    )

    if stats:
        mean_ci_text = base.mark_text(
            dx=-210, dy=8, size=10, align="right"  # Move text to the left
        ).encode(
            text=alt.Text("mean_ci_formatted:N"),
            x=alt.X("mean(correctness):Q", title="Correctness (%)", scale=alt.Scale(domain=[0, 100], nice=False),
                    axis=alt.Axis(values=list(range(0, 101, 10)))),
        )

        stat_text = base.mark_text(
            dx=0, dy=-40, size=12, align="center", fontWeight="bold"
        ).transform_filter(
            alt.datum.condition == "Both"
        ).encode(
            text=alt.Text("test_results:N"),
            x=alt.value(50),
            y=alt.value(-15),  # Positioning above the chart
            color=alt.value('gray'),
        )


        chart = alt.layer(point, mean_point, ci_rule, mean_ci_text, stat_text).properties(
            width=400,
            height=100,
        )

    return chart


In [21]:
# Generate the plot using the updated function
chart = split_beeswarm_facet(stats_df, "condition", stats=True, legend=False)
chart = chart.configure_axis(
    labelFontSize=12,
    titleFontSize=14,
    grid=True,  # Add gridlines
).configure_view(
    strokeWidth=0
).properties(
    title="Correctness by Condition"
)

chart.show()

This heatmap visualizes the total number of correct answers given by participants for each combination of Dataset and Condition.

- X-axis (Condition): Represents the different conditions under which participants answered the questions. These conditions could be "Text and Visualization Combined", "Just Text Description", or "Just Visualization".

- Y-axis (Dataset): Represents the different datasets (e.g., Covid, Tennis, Organizations) used in the study.

- Color (Number of Correct Answers): The color intensity represents the total number of correct answers given for a particular combination of dataset and condition. Darker colors (closer to the maximum value in the viridis color scale) indicate higher numbers of correct answers.

- Tooltips: When you hover over each rectangle in the heatmap, a tooltip will display the specific condition, dataset, and the number of correct answers associated with that combination.'

Explanation of the counted values:

- for each of the pair the partcipants faces 8 factual questions
- highest possible count of the correct answer = 8 * number of participants
- total correct answer (printed on heatmap) = number of correct answer (out of 8) * number of participants

In [22]:
# Calculate the sum of correct answers and count of participants by dataset and condition
summary_df = merged_df.groupby(['dataset', 'condition']).agg(
    total_correct_answers=('correct_number_of_answers', 'sum'),
    num_participants=('participantId', 'nunique')
).reset_index()

# Calculate total possible answers and correctness percentage
summary_df['total_possible_answers'] = summary_df['num_participants'] * 8  # Assuming 8 questions per participant
summary_df['correctness_percentage'] = (summary_df['total_correct_answers'] / summary_df['total_possible_answers']) * 100

# Print the summary information to the console
print("Summary of Correct Answers by Dataset and Condition:")
print(summary_df)

Summary of Correct Answers by Dataset and Condition:
                       dataset                        condition  \
0               Covid Symptoms            Just Text Description   
1               Covid Symptoms               Just Visualization   
2               Covid Symptoms  Text and Visualization Combined   
3  International Organizations            Just Text Description   
4  International Organizations               Just Visualization   
5  International Organizations  Text and Visualization Combined   
6    Tennis Grand Slam Winners            Just Text Description   
7    Tennis Grand Slam Winners               Just Visualization   
8    Tennis Grand Slam Winners  Text and Visualization Combined   

   total_correct_answers  num_participants  total_possible_answers  \
0                    179                26                     208   
1                    213                28                     224   
2                    222                29                     232

In [65]:
heatmap_correct = merged_df.groupby(['dataset', 'condition'])['correct_number_of_answers'].sum().reset_index()

heatmap_correct_chart = alt.Chart(heatmap_correct).mark_rect().encode(
    x=alt.X('condition:O', title='Condition'),
    y=alt.Y('dataset:O', title='Dataset'),
    color=alt.Color('correct_number_of_answers:Q', scale=alt.Scale(scheme='viridis'), title='Number of Correct Answers'),
    tooltip=['condition', 'dataset', 'correct_number_of_answers']
).properties(
    width=400,
    height=300,
    title='Heatmap of Number of Correct Answers by Dataset and Condition'
)

# Add the text labels
text_labels = alt.Chart(heatmap_correct).mark_text(baseline='middle').encode(
    x=alt.X('condition:O', title='Condition'),
    y=alt.Y('dataset:O', title='Dataset'),
    text=alt.Text('correct_number_of_answers:Q'),
    # color=alt.condition(
    #     alt.datum.correct_number_of_answers > heatmap_correct['correct_number_of_answers'].mean(),
    #     alt.value('black'),  # Set text color to black for better contrast
    #     alt.value('white')   # Set text color to white otherwise
    # )
)

# Layer the heatmap and text
final_chart = heatmap_correct_chart + text_labels

# Display the final chart
final_chart


In [66]:


# Group by dataset and condition, and calculate the mean correctness
heatmap_correctness = merged_df.groupby(['dataset', 'condition'])['correctness'].mean().reset_index()

# Create the heatmap for correctness
heatmap_correctness_chart = alt.Chart(heatmap_correctness).mark_rect().encode(
    x=alt.X('condition:O', title='Condition'),
    y=alt.Y('dataset:O', title='Dataset'),
    color=alt.Color('correctness:Q', scale=alt.Scale(scheme='viridis'), title='Correctness (%)'),
    tooltip=['condition', 'dataset', 'correctness']
).properties(
    width=400,
    height=300,
    title='Heatmap of Correctness (%) by Dataset and Condition'
)

# Add the text labels showing correctness percentage
text_labels = alt.Chart(heatmap_correctness).mark_text(baseline='middle').encode(
    x=alt.X('condition:O', title='Condition'),
    y=alt.Y('dataset:O', title='Dataset'),
    text=alt.Text('correctness:Q', format=".2f"),  # Format to show 2 decimal places
    # color=alt.condition(
    #     alt.datum.correctness > heatmap_correctness['correctness'].mean(),
    #     alt.value('black'),  # Set text color to black for better contrast
    #     alt.value('white')   # Set text color to white otherwise
    # )
)

# Layer the heatmap and text
final_chart = heatmap_correctness_chart + text_labels

# Display the final chart
final_chart.display()


This heatmap visualizes the total number of correct answers given by participants for each combination of Dataset and Preferred Format.

- X-axis (Preferred Format): Represents the different preferred formats under which participants answered the questions. These formats might include various presentation styles such as "Text and Visualization Combined", "Just Text Description", or "Just Visualization".

- Y-axis (Dataset): Represents the different datasets (e.g., Covid, Tennis, Organizations) used in the study.

- Color (Number of Correct Answers): The color intensity indicates the total number of correct answers given for a particular combination of dataset and preferred format. Darker colors (closer to the maximum value in the viridis color scale) represent higher numbers of correct answers.

- Tooltips: When hovering over each rectangle in the heatmap, a tooltip displays the specific preferred format, dataset, and the number of correct answers associated with that combination.

- Text Labels: The heatmap is overlaid with text labels showing the exact number of correct answers for each combination. The text color is dynamically set to black if the number of correct answers is above the average, ensuring better contrast against the background color, otherwise, the text is white.

Explanation of the counted values:

- for each of the pair the partcipants faces 8 factual questions
- highest possible count of the correct answer = 8 * number of participants
- total correct answer (printed on heatmap) = number of correct answer (out of 8) * number of participants

In [25]:
# Calculate the sum of correct answers and count of participants by dataset and preferred format
summary_df_format = merged_df.groupby(['dataset', 'preferred_format']).agg(
    total_correct_answers=('correct_number_of_answers', 'sum'),
    num_participants=('participantId', 'nunique')
).reset_index()

# Calculate total possible answers and correctness percentage
summary_df_format['total_possible_answers'] = summary_df_format['num_participants'] * 8  # Assuming 8 questions per participant
summary_df_format['correctness_percentage'] = (summary_df_format['total_correct_answers'] / summary_df_format['total_possible_answers']) * 100

# Print the summary information to the console
print("Summary of Correct Answers by Dataset and Preferred Format:")
print(summary_df_format)

Summary of Correct Answers by Dataset and Preferred Format:
                       dataset                 preferred_format  \
0               Covid Symptoms            Just Text Description   
1               Covid Symptoms               Just Visualization   
2               Covid Symptoms  Text and Visualization Combined   
3  International Organizations            Just Text Description   
4  International Organizations               Just Visualization   
5  International Organizations  Text and Visualization Combined   
6    Tennis Grand Slam Winners            Just Text Description   
7    Tennis Grand Slam Winners               Just Visualization   
8    Tennis Grand Slam Winners  Text and Visualization Combined   

   total_correct_answers  num_participants  total_possible_answers  \
0                     53                 8                      64   
1                    109                15                     120   
2                    452                60                 

In [67]:
heatmap_correct_format = merged_df.groupby(['dataset', 'preferred_format'])['correct_number_of_answers'].sum().reset_index()

heatmap_correct_format_chart = alt.Chart(heatmap_correct_format).mark_rect().encode(
    x=alt.X('preferred_format:O', title='Preferred Format'),
    y=alt.Y('dataset:O', title='Dataset'),
    color=alt.Color('correct_number_of_answers:Q', scale=alt.Scale(scheme='viridis'), title='Number of Correct Answers'),
    tooltip=['preferred_format', 'dataset', 'correct_number_of_answers']
).properties(
    width=400,
    height=300,
    title='Heatmap of Number of Correct Answers by Dataset and Preferred Format'
)

# Add the text labels
text_labels_format = alt.Chart(heatmap_correct_format).mark_text(baseline='middle').encode(
    x=alt.X('preferred_format:O', title='Preferred Format'),
    y=alt.Y('dataset:O', title='Dataset'),
    text=alt.Text('correct_number_of_answers:Q'),
    # color=alt.condition(
    #     alt.datum.correct_number_of_answers > heatmap_correct_format['correct_number_of_answers'].mean(),
    #     alt.value('black'),  # Set text color to black for better contrast
    #     alt.value('white')   # Set text color to white otherwise
    # )
)

# Layer the heatmap and text
final_chart_format = heatmap_correct_format_chart + text_labels_format

# Display the final chart
final_chart_format.display()

In [68]:


# Group by dataset and preferred format, and calculate the mean correctness
heatmap_correctness_format = merged_df.groupby(['dataset', 'preferred_format'])['correctness'].mean().reset_index()

# Create the heatmap for correctness
heatmap_correctness_format_chart = alt.Chart(heatmap_correctness_format).mark_rect().encode(
    x=alt.X('preferred_format:O', title='Preferred Format'),
    y=alt.Y('dataset:O', title='Dataset'),
    color=alt.Color('correctness:Q', scale=alt.Scale(scheme='viridis'), title='Correctness (%)'),
    tooltip=['preferred_format', 'dataset', 'correctness']
).properties(
    width=400,
    height=300,
    title='Heatmap of Correctness (%) by Dataset and Preferred Format'
)

# Add the text labels showing correctness percentage
text_labels_format = alt.Chart(heatmap_correctness_format).mark_text(baseline='middle').encode(
    x=alt.X('preferred_format:O', title='Preferred Format'),
    y=alt.Y('dataset:O', title='Dataset'),
    text=alt.Text('correctness:Q', format=".2f"),  # Format to show 2 decimal places
    # color=alt.condition(
    #     alt.datum.correctness > heatmap_correctness_format['correctness'].mean(),
    #     alt.value('black'),  # Set text color to black for better contrast
    #     alt.value('white')   # Set text color to white otherwise
    # )
)

# Layer the heatmap and text
final_chart_format = heatmap_correctness_format_chart + text_labels_format

# Display the final chart
final_chart_format.display()


This heatmap visualizes the total number of correct answers given by participants for each combination of Condition and Preferred Format.

- X-axis (Preferred Format): Represents the different preferred formats under which participants answered the questions. These formats might include various presentation styles such as "Text and Visualization Combined", "Just Text Description", or "Just Visualization".

- Y-axis (Condition): Represents the different conditions under which the study was conducted. These conditions could be different experimental setups or scenarios in which participants were placed.

- Color (Number of Correct Answers): The color intensity indicates the total number of correct answers given for a particular combination of condition and preferred format. Darker colors (closer to the maximum value in the viridis color scale) represent higher numbers of correct answers.

- Tooltips: When hovering over each rectangle in the heatmap, a tooltip displays the specific condition, preferred format, and the number of correct answers associated with that combination.

- Text Labels: The heatmap is overlaid with text labels showing the exact number of correct answers for each combination. The text color is dynamically set to black if the number of correct answers is above the average, ensuring better contrast against the background color, otherwise, the text is white.

Explanation of the counted values:

- for each of the pair the partcipants faces 8 factual questions
- highest possible count of the correct answer = 8 * number of participants
- total correct answer (printed on heatmap) = number of correct answer (out of 8) * number of participants

In [28]:
summary_df_condition_format = merged_df.groupby(['condition', 'preferred_format']).agg(
    total_correct_answers=('correct_number_of_answers', 'sum'),
    num_participants=('participantId', 'nunique')
).reset_index()

# Calculate total possible answers and correctness percentage
summary_df_condition_format['total_possible_answers'] = summary_df_condition_format['num_participants'] * 8  # Assuming 8 questions per participant
summary_df_condition_format['correctness_percentage'] = (summary_df_condition_format['total_correct_answers'] / summary_df_condition_format['total_possible_answers']) * 100

# Print the summary information to the console
print("Summary of Correct Answers by Condition and Preferred Format:")
print(summary_df_condition_format)

Summary of Correct Answers by Condition and Preferred Format:
                         condition                 preferred_format  \
0            Just Text Description            Just Text Description   
1            Just Text Description               Just Visualization   
2            Just Text Description  Text and Visualization Combined   
3               Just Visualization            Just Text Description   
4               Just Visualization               Just Visualization   
5               Just Visualization  Text and Visualization Combined   
6  Text and Visualization Combined            Just Text Description   
7  Text and Visualization Combined               Just Visualization   
8  Text and Visualization Combined  Text and Visualization Combined   

   total_correct_answers  num_participants  total_possible_answers  \
0                     53                 8                      64   
1                    106                15                     120   
2                

In [69]:
heatmap_correct_condition_format = merged_df.groupby(['condition', 'preferred_format'])['correct_number_of_answers'].sum().reset_index()

heatmap_correct_condition_format_chart = alt.Chart(heatmap_correct_condition_format).mark_rect().encode(
    x=alt.X('preferred_format:O', title='Preferred Format'),
    y=alt.Y('condition:O', title='Condition'),
    color=alt.Color('correct_number_of_answers:Q', scale=alt.Scale(scheme='viridis'), title='Number of Correct Answers'),
    tooltip=['condition', 'preferred_format', 'correct_number_of_answers']
).properties(
    width=400,
    height=300,
    title='Heatmap of Number of Correct Answers by Condition and Preferred Format'
)

# Add the text labels
text_labels_condition_format = alt.Chart(heatmap_correct_condition_format).mark_text(baseline='middle').encode(
    x=alt.X('preferred_format:O', title='Preferred Format'),
    y=alt.Y('condition:O', title='Condition'),
    text=alt.Text('correct_number_of_answers:Q'),
    # color=alt.condition(
    #     alt.datum.correct_number_of_answers > heatmap_correct_condition_format['correct_number_of_answers'].mean(),
    #     alt.value('black'),  # Set text color to black for better contrast
    #     alt.value('white')   # Set text color to white otherwise
    # )
)

# Layer the heatmap and text
final_chart_condition_format = heatmap_correct_condition_format_chart + text_labels_condition_format

# Display the final chart
final_chart_condition_format.display()

In [70]:


# Group by condition and preferred format, and calculate the mean correctness
heatmap_correctness_condition_format = merged_df.groupby(['condition', 'preferred_format'])['correctness'].mean().reset_index()

# Create the heatmap for correctness
heatmap_correctness_condition_format_chart = alt.Chart(heatmap_correctness_condition_format).mark_rect().encode(
    x=alt.X('preferred_format:O', title='Preferred Format'),
    y=alt.Y('condition:O', title='Condition'),
    color=alt.Color('correctness:Q', scale=alt.Scale(scheme='viridis'), title='Correctness (%)'),
    tooltip=['condition', 'preferred_format', 'correctness']
).properties(
    width=400,
    height=300,
    title='Heatmap of Correctness (%) by Condition and Preferred Format'
)

# Add the text labels showing correctness percentage
text_labels_condition_format = alt.Chart(heatmap_correctness_condition_format).mark_text(baseline='middle').encode(
    x=alt.X('preferred_format:O', title='Preferred Format'),
    y=alt.Y('condition:O', title='Condition'),
    text=alt.Text('correctness:Q', format=".2f"),  # Format to show 2 decimal places
    # color=alt.condition(
    #     alt.datum.correctness > heatmap_correctness_condition_format['correctness'].mean(),
    #     alt.value('black'),  # Set text color to black for better contrast
    #     alt.value('white')   # Set text color to white otherwise
    # )
)

# Layer the heatmap and text
final_chart_condition_format = heatmap_correctness_condition_format_chart + text_labels_condition_format

# Display the final chart
final_chart_condition_format.display()


Bar plot
- there are three sub plots
- each of them are for each of the conditions
- each of them plots participant ids in X-axis, and Total number of correct answers for each of them in Y-axis

In [72]:
# # Map participantId to labels like P1, P2, etc.
# participant_map = {pid: f'P{i+1}' for i, pid in enumerate(merged_df['participantId'].unique())}
# merged_df['participant_label'] = merged_df['participantId'].map(participant_map)

# # Create a filtered DataFrame for each condition
# conditions = ["Just Visualization", "Just Text Description", "Text and Visualization Combined"]

# # Create a list to hold the individual bar charts for each condition
# charts = []

# for condition in conditions:
#     # Filter the DataFrame for the current condition
#     condition_df = merged_df[merged_df['condition'] == condition]

#     # Create a bar plot for the current condition
#     bar_chart = alt.Chart(condition_df).mark_bar(size=9).encode(
#         x=alt.X('participant_label:O', title='Participant', sort=participant_map.values(), axis=alt.Axis(labelAngle=0)),
#         y=alt.Y('sum(correct_number_of_answers):Q', title='Total Correct Answers'),
#         tooltip=['participant_label', 'sum(correct_number_of_answers)'],
#         color=alt.Color('participant_label:N', legend=None)
#     ).properties(
#         title=f'Total Correct Answers by Participant - {condition}',
#         width=1000,
#         height=300
#     )

#     # Append the chart to the list
#     charts.append(bar_chart)

# # Combine the charts into a vertical stack
# final_chart = alt.vconcat(*charts).resolve_scale(
#     y='independent'
# )

# # Display the final chart
# final_chart.display()


- Preparing data for correctness per condition per question (as a histogram)
- Each sub plot represents correctness per question for JV, JT, TV

In [32]:
def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)

    conditions = {
        'Just Visualization': ['Covid-Vis-Q1', 'Tennis-Vis-Q1', 'Organizations-Vis-Q1'],
        'Just Text Description': ['Covid-Text-Q1', 'Tennis-Text-Q1', 'Organizations-Text-Q1'],
        'Text and Visualization Combined': ['Covid-Both-Q1', 'Tennis-Both-Q1', 'Organizations-Both-Q1']
    }

    # Filter for relevant trialIds
    df = df[df['trialId'].isin(sum(conditions.values(), []))]

    # Convert 'isCorrectlyAnswered' to boolean
    df['isCorrectlyAnswered'] = df['isCorrectlyAnswered'].map({'True': True, 'False': False})

    # Create condition column
    condition_map = {trialId: condition for condition, trialIds in conditions.items() for trialId in trialIds}
    df['condition'] = df['trialId'].map(condition_map)

    return df

def calculate_correctness(df):
    # Group by condition and responseId, calculate percentage correct
    correctness = df.groupby(['condition', 'responseId'])['isCorrectlyAnswered'].agg(['sum', 'count'])
    correctness['percentage'] = (correctness['sum'] / correctness['count']) * 100

    # print(correctness)
    # Reset the index for the DataFrame
    return correctness.reset_index()


def calculate_ci(data, confidence=0.95):
    mean = np.mean(data)
    sem = stats.sem(data)
    ci = stats.t.interval(confidence, len(data)-1, loc=mean, scale=sem)
    return mean, ci[0], ci[1]

def prepare_histogram_data(correctness_df):
    histogram_data = []
    condition_stats = []

    for condition in correctness_df['condition'].unique():
        condition_data = correctness_df[correctness_df['condition'] == condition]

        for i in range(1, 9):  # 8 questions (voq1 to voq8)
            question_data = condition_data[condition_data['responseId'] == f'voq{i}']
            mean_percentage = question_data['percentage'].mean()

            histogram_data.append({
                'condition': condition,
                'question': f'Q{i}',
                'percentage': mean_percentage
            })
        # Calculate mean and CI for the condition
        all_percentages = condition_data['percentage'].values
        mean, ci_low, ci_high = calculate_ci(all_percentages)
        condition_stats.append({
            'condition': condition,
            'mean': mean,
            'ci_low': ci_low,
            'ci_high': ci_high,
            'mean_ci_formatted': f"{ci_low:.1f} — {mean:.1f} — {ci_high:.1f}"
        })

    return pd.DataFrame(histogram_data), pd.DataFrame(condition_stats)


In [33]:
def plot_histogram(data, condition_stats):
    # Replace Q1, Q2, ..., Q8 with actual questions using a mapping
    question_mapping = {
        'Q1': 'How many sets are there?',
        'Q2': 'What is the largest set?',
        'Q3': 'What is the largest intersection?',
        'Q4': 'How large is the largest intersection?',
        'Q5': 'How many sets make up the largest intersection?',
        'Q6': 'How similar are the set sizes?',
        'Q7': 'Is the largest set present in the largest intersection?',
        'Q8': 'Is the all-set intersection (intersection having all the sets) present?'
    }

    # Apply the mapping to the 'question' column
    data['question'] = data['question'].map(question_mapping)

    # Calculate overall correctness across all questions for each condition
    overall_correctness = data.groupby('condition')['percentage'].mean().reset_index()
    overall_correctness['question'] = 'Overall Correctness'  # Add an identifier for the overall correctness row
    overall_data = pd.concat([data, overall_correctness], ignore_index=True)

    # Function to create a chart for each condition
    def create_individual_chart(data, condition, title, color, show_y_labels):
        base = alt.Chart(data[data['condition'] == condition]).encode(
            y=alt.Y('question:N',
                    title=None,  # Remove the y-axis title
                    axis=alt.Axis(labelAngle=0, labels=show_y_labels, labelLimit=300),  # Control y-axis labels visibility and prevent truncation
                    sort=['How many sets are there?',
                          'What is the largest set?',
                          'What is the largest intersection?',
                          'How large is the largest intersection?',
                          'How many sets make up the largest intersection?',
                          'How similar are the set sizes?',
                          'Is the largest set present in the largest intersection?',
                          'Is the all-set intersection (intersection having all the sets) present?',
                          'Overall Correctness']),  # Ensure correct order
            x=alt.X('percentage:Q', title=None, scale=alt.Scale(domain=[0, 120]), axis=alt.Axis(orient='top')),  # Remove the x-axis title
            color=alt.value(color)  # Apply the specific color for each condition
        ).properties(
            width=100,
            height=300,
            title=title  # Add the condition name as the title
        )

        bars = base.mark_bar(size=20, orient='horizontal')

        # Create a conditional format for the text
        text = base.mark_text(
            align='left',
            baseline='middle',
            dx=3
        ).encode(
            text=alt.Text('percentage:Q', format=",.1~f")  # Conditional format to display decimals only if needed
        )

        # Draw a reference line for overall correctness
        ref_line = alt.Chart(overall_correctness[overall_correctness['condition'] == condition]).mark_rule(
            color='black',
            strokeDash=[4, 2],  # Dashed line style
        ).encode(
            x='percentage:Q',
            tooltip=['percentage:Q']
        )

        return bars + text + ref_line

    # Create individual charts for each condition with appropriate titles and colors
    chart_vis = create_individual_chart(overall_data, 'Just Visualization', title='JV', color='#A5021D', show_y_labels=True)  # Show y labels for the first chart
    chart_text = create_individual_chart(overall_data, 'Just Text Description', title='JT', color='#2F88A6', show_y_labels=False)  # Hide y labels for the second chart
    chart_combined = create_individual_chart(overall_data, 'Text and Visualization Combined', title='TV', color='#635293', show_y_labels=False)  # Hide y labels for the third chart

    # Concatenate the charts horizontally and share the y-axis
    combined_chart = alt.hconcat(
        chart_vis, chart_text, chart_combined
    ).resolve_scale(
        y='shared'  # Share the y-axis
    )

    # Create a text annotation for the number of participants
    text_annotation = alt.Chart(pd.DataFrame({'text': ["For each condition, N=83"]})).mark_text(
        align='center',
        baseline='top',
        fontSize=12
    ).encode(
        text='text:N'
    ).properties(
        width=300
    )

    # Combine the charts with the text annotation
    final_chart = alt.vconcat(combined_chart, text_annotation).configure_axis(
        grid=False
    ).configure_view(
        stroke=None
    ).configure_title(
        fontSize=14
    ).configure_legend(
        disable=True
    )

    return final_chart


file_path = './data/final-study.csv'
df = load_and_preprocess_data(file_path)
correctness_df = calculate_correctness(df)
histogram_data, condition_stats = prepare_histogram_data(correctness_df)

chart = plot_histogram(histogram_data, condition_stats)
chart

In [34]:
def plot_histogram_withoutOverall(data, condition_stats):
    # Replace Q1, Q2, ..., Q8 with actual questions using a mapping
    question_mapping = {
        'Q1': 'How many sets are there?',
        'Q2': 'What is the largest set?',
        'Q3': 'What is the largest intersection?',
        'Q4': 'How large is the largest intersection?',
        'Q5': 'How many sets make up the largest intersection?',
        'Q6': 'How similar are the set sizes?',
        'Q7': 'Is the largest set present in the largest intersection?',
        'Q8': 'Is the all-set intersection (intersection having all the sets) present?'
    }

    # Apply the mapping to the 'question' column
    data['question'] = data['question'].map(question_mapping)

    # Calculate overall correctness across all questions for each condition
    overall_correctness = data.groupby('condition')['percentage'].mean().reset_index()
    overall_correctness['question'] = 'Overall Correctness'  # Add an identifier for the overall correctness row
    overall_data = pd.concat([data, overall_correctness], ignore_index=True)

    # Function to create a chart for each condition
    def create_individual_chart(data, condition, title, color, show_y_labels):
        base = alt.Chart(data[data['condition'] == condition]).encode(
            y=alt.Y('question:N',
                    title=None,  # Remove the y-axis title
                    axis=alt.Axis(labelAngle=0, labels=show_y_labels, labelLimit=300),  # Control y-axis labels visibility and prevent truncation
                    sort=['How many sets are there?',
                          'What is the largest set?',
                          'What is the largest intersection?',
                          'How large is the largest intersection?',
                          'How many sets make up the largest intersection?',
                          'How similar are the set sizes?',
                          'Is the largest set present in the largest intersection?',
                          'Is the all-set intersection (intersection having all the sets) present?',
                          'Overall Correctness']),  # Ensure correct order
            x=alt.X('percentage:Q', title=None, scale=alt.Scale(domain=[0, 120]), axis=alt.Axis(orient='top')),  # Remove the x-axis title
            color=alt.value(color)  # Apply the specific color for each condition
        ).properties(
            width=100,
            height=300,
            title=title  # Add the condition name as the title
        )

        bars = base.mark_bar(size=20, orient='horizontal')

        # Create a conditional format for the text
        text = base.mark_text(
            align='left',
            baseline='middle',
            dx=3
        ).encode(
            text=alt.Text('percentage:Q', format=",.1~f")  # Conditional format to display decimals only if needed
        )



        return bars + text

    # Create individual charts for each condition with appropriate titles and colors
    chart_vis = create_individual_chart(overall_data, 'Just Visualization', title='JV', color='#A5021D', show_y_labels=True)  # Show y labels for the first chart
    chart_text = create_individual_chart(overall_data, 'Just Text Description', title='JT', color='#2F88A6', show_y_labels=False)  # Hide y labels for the second chart
    chart_combined = create_individual_chart(overall_data, 'Text and Visualization Combined', title='TV', color='#635293', show_y_labels=False)  # Hide y labels for the third chart

    # Concatenate the charts horizontally and share the y-axis
    combined_chart = alt.hconcat(
        chart_vis, chart_text, chart_combined
    ).resolve_scale(
        y='shared'  # Share the y-axis
    )

    # Create a text annotation for the number of participants
    text_annotation = alt.Chart(pd.DataFrame({'text': ["For each condition, N=83"]})).mark_text(
        align='center',
        baseline='top',
        fontSize=12
    ).encode(
        text='text:N'
    ).properties(
        width=300
    )

    # Combine the charts with the text annotation
    final_chart = alt.vconcat(combined_chart, text_annotation).configure_axis(
        grid=False
    ).configure_view(
        stroke=None
    ).configure_title(
        fontSize=14
    ).configure_legend(
        disable=True
    )

    return final_chart

file_path = './data/final-study.csv'
df = load_and_preprocess_data(file_path)
correctness_df = calculate_correctness(df)
histogram_data, condition_stats = prepare_histogram_data(correctness_df)

chart = plot_histogram_withoutOverall(histogram_data, condition_stats)
chart.display()



Bar plots with mean dot and interval (95% cI)
- Plots overall correctness in X-axis
- Plots number of participants in Y-axis


In [35]:
def calculate_overall_correctness(df):
    correctness = df.groupby(['participantId', 'condition'])['isCorrectlyAnswered'].sum().reset_index()

    distribution = []
    for condition in correctness['condition'].unique():
        condition_data = correctness[correctness['condition'] == condition]

        for i in range(9):  # 0/8 to 8/8
            count = (condition_data['isCorrectlyAnswered'] == i).sum()
            percentage = (i / 8) * 100
            distribution.append({
                'condition': condition,
                'correctness (x/8)': f"{i}/8 ({percentage:.1f}%)",
                'number-of-participants': count
            })

    result_df = pd.DataFrame(distribution)
    result_df = result_df.sort_values(['condition', 'correctness (x/8)'])
    # print(result_df)

    return result_df

def calculate_mean_and_ci(data):
    results = []

    for condition in data['condition'].unique():
        condition_data = data[data['condition'] == condition].copy()
        condition_data['correctness_percentage'] = condition_data['correctness (x/8)'].apply(
            lambda x: float(x.split('(')[1].strip('%)'))
        )

        total_participants = condition_data['number-of-participants'].sum()

        correctness_values = []
        for index, row in condition_data.iterrows():
            correctness_values.extend([row['correctness_percentage']] * int(row['number-of-participants']))

        mean, lower_bound, upper_bound = calculate_ci(correctness_values)

        results.append({
            'condition': condition,
            'mean': mean,
            'ci_low': lower_bound,
            'ci_high': upper_bound
        })

    results_df = pd.DataFrame(results)
    return results_df

def calculate_ci(data, confidence=0.95):

    mean = np.mean(data)
    sem = stats.sem(data)
    ci = stats.t.interval(confidence, len(data)-1, loc=mean, scale=sem)

    return mean, ci[0], ci[1]


In [36]:
def plot_histogram_distribution(data, ci_data):

    base = alt.Chart(data).encode(
        x=alt.X('correctness (x/8):O', title='Correctness'),
        y=alt.Y('number-of-participants:Q', title='Number of Participants'),
    ).properties(
        width=300,
        height=100
    )

    bars = base.mark_bar()

    text = base.mark_text(
        align='center',
        baseline='bottom',
        dy=-5
    ).encode(
        text=alt.Text('number-of-participants:Q')
    )

    # Creating mean point and CI for each condition
    mean_ci = alt.Chart(ci_data).encode(
        y=alt.Y('condition:N')
    )

    interval = mean_ci.mark_rule(color="condition:N").encode(
        x='ci_low:Q',
        x2='ci_high:Q'
    )

    mean_point = mean_ci.mark_point(color='condition:N', size=60).encode(
        x='mean:Q'
    )
    # return  interval + mean_point

    chart = (bars + text).facet(
        row=alt.Row('condition:N')
    ).resolve_scale(
        # y='independent'
    )
    return chart
    # return alt.layer(chart, interval, mean_point)


file_path = './data/final-study.csv'
df = load_and_preprocess_data(file_path)
correctness_distribution = calculate_overall_correctness(df)
ci_data = calculate_mean_and_ci(correctness_distribution)
ci_data['mean'] = ci_data['mean'] / 100 * 8
ci_data['ci_low'] = ci_data['ci_low'] / 100 * 8
ci_data['ci_high'] = ci_data['ci_high'] / 100 * 8
ci_data


plot_histogram_distribution(correctness_distribution, ci_data)

temp_df = df[df['isCorrectlyAnswered'] == True]

asd_df = temp_df.groupby(['participantId', 'condition']).aggregate(
    {"isCorrectlyAnswered": ['count']}).reset_index()
asd_df.columns = ['participantId', 'condition', 'numCorrect']
asd_df['numCorrect'] = asd_df['numCorrect'] / 8 * 100

# checking to see if question 6 changes things a lot
alt_asd_df = temp_df[temp_df['responseId'] != 'voq6'].groupby(['participantId', 'condition']).aggregate(
    {"isCorrectlyAnswered": ['count']}).reset_index()
alt_asd_df.columns = ['participantId', 'condition', 'numCorrect']
alt_asd_df['numCorrect'] = alt_asd_df['numCorrect'] / 7 * 100

conditions = asd_df['condition'].unique()
charts = []

for condition in conditions:
    local_df = asd_df[asd_df['condition']==condition]
    chart = alt.Chart(local_df).mark_bar().encode(
        # x=alt.X('numCorrect', scale=alt.Scale(domain=[0, 8])),
        x=alt.X('numCorrect', scale=alt.Scale(domain=[0, 100])),
        y=alt.Y('count()', scale=alt.Scale(domain=[0, 50])),
        color='condition:N',
        # row='condition:N',
        # tooltip=['condition', 'numCorrect', 'count()']
    ).properties(width=300, height=100)
    mean_line = alt.Chart(local_df).mark_rule().encode(x='mean(numCorrect)')
    mean_num = alt.Chart(local_df).mark_text().encode(x='mean(numCorrect)', y=alt.value(10), text=alt.Text('mean(numCorrect)', format=".1f"))
    ci0_num = alt.Chart(local_df).mark_text().encode(x='ci0(numCorrect)', y=alt.value(20), text=alt.Text('ci0(numCorrect)', format=".1f"))
    ci1_num = alt.Chart(local_df).mark_text().encode(x='ci1(numCorrect)', y=alt.value(
        30), text=alt.Text('ci1(numCorrect)', format=".1f"))

    ci_line = alt.Chart(local_df).mark_rule().encode(
        x='ci0(numCorrect)',
        x2='ci1(numCorrect)',
        tooltip=['ci0(numCorrect)', 'ci1(numCorrect)']

        # size=alt.value(2)
    )
    text_labels = alt.Chart(local_df).mark_text(baseline='middle').encode(
        x=alt.X('numCorrect'),
        y=alt.Y('count()'),
        text=alt.Text('count()'),
    )
    charts.append(chart + mean_line + ci_line +
                  text_labels + mean_num + ci0_num + ci1_num)

print(asd_df.aggregate({"numCorrect": ['mean', 'std', 'count']}))
print(alt_asd_df.aggregate({"numCorrect": ['mean', 'std', 'count']}))
alt.vconcat(*charts)
# alt.Chart(asd_df).mark_errorbar(extent="ci").encode(
#     x=alt.X('numCorrect'),
#     y="condition",
#     color='condition:N'
# ).properties(width=300, height=100)

       numCorrect
mean     89.35743
std      14.23233
count   249.00000
       numCorrect
mean    92.197361
std     14.277615
count  249.000000


In [50]:
df = load_and_preprocess_data(file_path)

def make_heatmap(df):
    # data prep for heatmap
    grouped_df = df.groupby(['dataset', 'condition', 'isCorrectlyAnswered']).size().reset_index()
    summed_df = df.groupby(['dataset', 'condition']).count().reset_index()
    summed_df = summed_df[['dataset', 'condition', 'participantId']]
    summed_df.columns = ['dataset', 'condition', 'totalParticipants']

    merged_df = grouped_df.merge(summed_df, on=['dataset', 'condition'])
    merged_df = merged_df[merged_df['isCorrectlyAnswered'] == True]
    merged_df.columns = ['dataset', 'condition',  'isCorrectlyAnswered', 'correct_number_of_answers', 'totalParticipants']
    merged_df['rate'] = merged_df['correct_number_of_answers'] / merged_df['totalParticipants'] * 100
    merged_df.reset_index()

    # plot 
    base = alt.Chart(merged_df).encode(y="dataset",x="condition")
    boxes = base.mark_rect().encode(color=alt.Color("rate", scale=alt.Scale(scheme="blues"))).properties(width=300, height=100)
    text = base.mark_text().encode(text=alt.Text("rate", format=".2f"))
    chart = boxes + text

    # data prep for margin
    margin_grouped_df = df.groupby(['dataset', 'isCorrectlyAnswered']).size().reset_index()
    margin_summed_df = df.groupby(['dataset']).count().reset_index()
    margin_summed_df = margin_summed_df[['dataset', 'participantId']]
    margin_summed_df.columns = ['dataset', 'totalParticipants']

    margin_merged_df = margin_grouped_df.merge(margin_summed_df, on=['dataset'])
    margin_merged_df = margin_merged_df[margin_merged_df['isCorrectlyAnswered'] == True]
    margin_merged_df.columns = ['dataset', 'isCorrectlyAnswered', 'correct_number_of_answers', 'totalParticipants']
    margin_merged_df['rate'] = margin_merged_df['correct_number_of_answers'] / margin_merged_df['totalParticipants'] * 100
    margin_merged_df.reset_index()

    # plot margin
    margin_base = alt.Chart(margin_merged_df).encode(y="dataset")
    margin_boxes = margin_base.mark_rect().encode(color=alt.Color("mean(rate)", scale=alt.Scale(scheme="blues", domain=[79, 97]))).properties(width=300, height=100)
    margin_text = margin_base.mark_text().encode(text=alt.Text("mean(rate)", format=".2f"))
    margin_chart = margin_boxes + margin_text
    return margin_chart
# 
make_heatmap(df) | make_heatmap(df[df['responseId'] != 'voq6'])

Plot survey result of Confidence, Understandability and Effectiveness of informatioon rating

In [38]:
file_path = './data/final-study.csv'
df = pd.read_csv(file_path)

# The trialIds of interest
score_ids_of_interest = {
    'Just Visualization':['Covid-Vis-Q2', 'Tennis-Vis-Q2', 'Organizations-Vis-Q2'],
    'Just Text Description': ['Covid-Text-Q2', 'Tennis-Text-Q2', 'Organizations-Text-Q2'],
    'Text and Visualization Combined':['Covid-Both-Q2', 'Tennis-Both-Q2', 'Organizations-Both-Q2']
}

def assign_condition(trial_id, score_ids_of_interest):
    for condition, ids in score_ids_of_interest.items():
        if trial_id in ids:
            return condition
    return None

score_df = df[df['trialId'].isin(sum(score_ids_of_interest.values(), []))].copy()
score_df['condition'] = score_df['trialId'].apply(lambda x: assign_condition(x, score_ids_of_interest))
score_df = score_df[['participantId', 'trialId','responseId','responsePrompt','answer','condition', 'dataset']]

# print(score_df.head(9))

def create_condition_df(condition):
    condition_df = score_df[score_df['condition'] == condition]

    grouped = condition_df.groupby('participantId')['answer'].apply(list).reset_index()

    # Get the questions for this condition
    questions = condition_df['responsePrompt'].unique()

    new_df = pd.DataFrame({
        'PID': grouped['participantId'],
        questions[0]: grouped['answer'].apply(lambda x: x[0]),
        questions[1]: grouped['answer'].apply(lambda x: x[1]),
        questions[2]: grouped['answer'].apply(lambda x: x[2])
    })

    return new_df


vis_score_df = create_condition_df('Just Visualization')
text_score_df = create_condition_df('Just Text Description')
both_score_df = create_condition_df('Text and Visualization Combined')

# Print the first few rows of each DataFrame
# print("Visualization Score DataFrame:")
# print(vis_score_df.head())
# print("\nText Description Score DataFrame:")
# print(text_score_df.head())
# print("\nCombined Score DataFrame:")
# print(both_score_df.head())


In [39]:
def create_score_chart(df, title, color):
    dims = df.columns[1:]  # Assuming PID is the first column and the rest are score columns


    # Create a mapping for shorter labels
    label_map = {
        dims[0]: "Confidence",
        dims[1]: "Understandability",
        dims[2]: "Effectiveness"
    }

    # Melt the dataframe to long format
    melted_df = df.melt(id_vars=['PID'], value_vars=dims, var_name='Score Type', value_name='Score')

    # Convert Score to string to treat it as a categorical variable
    melted_df['Score'] = melted_df['Score'].astype(str)

    # Apply the mapping to create a new column with short labels
    melted_df['Score Type'] = melted_df['Score Type'].map(label_map)


    return alt.Chart(melted_df).mark_text().encode(
        x=alt.X("Score:N", scale=alt.Scale(domain=['1','2','3','4','5']), axis=alt.Axis(labelAngle=0)),
        y=alt.Y("count():Q", title="Count of Participants", scale=alt.Scale(domain=[0, 50], nice=False)),
        column=alt.Column("Score Type:N", sort=list(label_map.values())),
        # color=alt.Color("Score:N", scale=alt.Scale(scheme=color)),
        color=alt.value(color),
        tooltip=['Score Type', 'Score', 'count()'],
        text='count()'
    ).properties(
        width=100,
        height=70,
        title=title
    )

    # return
    # text = chart.mark_text(align='center', baseline='bottom').encode(text='count()')
    # .configure_header(
    #     labelOrient='top',
    #     labelPadding=5,
    #     labelFontSize=12
    # ).configure_view(
    #     stroke=None
    # ).configure_title(
    #     fontSize=16,
    #     anchor='middle'
    # )

    # return chart + text

In [40]:
vis_chart = create_score_chart(vis_score_df, "Visualization Scores", "#A5021D")
text_chart = create_score_chart(text_score_df, "Text Description Scores", "#2F88A6")
both_chart = create_score_chart(both_score_df, "Text and Visualization Combined Scores", "#635293")

# Display the charts
alt.vconcat(*[vis_chart, text_chart, both_chart])  

In [106]:
def create_score_chart2(df, title):
    dims = df.columns[1:]  # Assuming PID is the first column and the rest are score columns

    # Create a mapping for shorter labels
    label_map = {
        dims[0]: "Confidence",
        dims[1]: "Understandability",
        dims[2]: "Effectiveness"
    }

    # Melt the dataframe to long format
    melted_df = df.melt(id_vars=['PID'], value_vars=dims, var_name='Score Type', value_name='Score')

    # calculate average score for each score type
    melted_df2 = melted_df.copy()
    melted_df2['Score'] = pd.to_numeric(melted_df['Score'], errors='coerce').astype('Int64')

    avg_score_df = melted_df2.groupby('Score Type')['Score'].mean().reset_index(name='Avg Score')
    avg_score_df['Avg Score'] = avg_score_df['Avg Score'].round(2)


    melted_df2['Score Type'] = melted_df2['Score Type'].map(label_map)
    avg_score_df['Score Type'] = avg_score_df['Score Type'].map(label_map)

    combined_df = melted_df2.merge(avg_score_df, on='Score Type', suffixes=('', '_avg'))
    # cast score as int
    combined_df['Score'] = pd.to_numeric(combined_df['Score'], errors='coerce').astype('Int64')

    charts = []
    for scoreType in combined_df['Score Type'].unique():
        # Create the base chart
        base = alt.Chart(combined_df[combined_df['Score Type'] == scoreType]).encode(
            x=alt.X("Score:O", scale=alt.Scale(domain=[1,2,3,4,5])))

        # Create the bar chart
        bars = base.mark_bar().encode(
            y=alt.Y("count()", title="Count of Participants", scale=alt.Scale(domain=[0, 50], nice=False)),
            color=alt.Color("Score:Q"),
            tooltip=['Score Type', 'Score', 'count()']
        )

        # Create the average score rule
        rule = base.mark_rule(color='red', size=2).encode(
            x='Avg Score:Q',
            tooltip=['Score Type', 'Avg Score:Q']
        )

        # Create text labels for average scores
        text = base.mark_text(align='left', dx=5, dy=-10, color='red').encode(
            x='Avg Score:Q',
            text=alt.Text('Avg Score:Q', format='.2f')
        )

        # Combine the layers
        chart = alt.layer(*[
            bars, 
            rule, 
            text
            ]).properties(width=100, height=70, title=title)
        charts.append(chart)
    return alt.hconcat(*charts)


vis_chart = create_score_chart2(vis_score_df, "Visualization Scores")
text_chart = create_score_chart2(text_score_df, "Text Description Scores")
both_chart = create_score_chart2(both_score_df, "Text and Visualization Combined Scores")

# Display the charts
vis_chart.display()
text_chart.display()
both_chart.display()

In [85]:
df = load_and_preprocess_data(file_path)
df.head(10)

Unnamed: 0,participantId,trialId,trialOrder,responseId,status,percentComplete,description,responsePrompt,answer,correctAnswer,isCorrectlyAnswered,responseMin,responseMax,startTime,endTime,duration,dataset,condition
11,574dc90512d86b000f833ab0,Covid-Text-Q1,7,voq1,completed,100.0,Content: Text Only,How many sets are shown in the description?,6,6,True,0,100,2024-08-21T23:02:01.910Z,2024-08-21T23:05:22.137Z,200227,Covid Symptoms,Just Text Description
12,574dc90512d86b000f833ab0,Covid-Text-Q1,7,voq2,completed,100.0,Content: Text Only,What is the largest set?,Fatigue,Fatigue,True,undefined,undefined,2024-08-21T23:02:01.910Z,2024-08-21T23:05:22.137Z,200227,Covid Symptoms,Just Text Description
13,574dc90512d86b000f833ab0,Covid-Text-Q1,7,voq3,completed,100.0,Content: Text Only,What is the largest intersection?,"Anosmia,Fatigue","Anosmia,Fatigue",True,undefined,undefined,2024-08-21T23:02:01.910Z,2024-08-21T23:05:22.137Z,200227,Covid Symptoms,Just Text Description
14,574dc90512d86b000f833ab0,Covid-Text-Q1,7,voq4,completed,100.0,Content: Text Only,How large is the largest intersection?,281,281,True,0,1000000000,2024-08-21T23:02:01.910Z,2024-08-21T23:05:22.137Z,200227,Covid Symptoms,Just Text Description
15,574dc90512d86b000f833ab0,Covid-Text-Q1,7,voq5,completed,100.0,Content: Text Only,How many sets make up the largest intersection?,It is the intersection of 2-3 sets,It is the intersection of 2-3 sets,True,undefined,undefined,2024-08-21T23:02:01.910Z,2024-08-21T23:05:22.137Z,200227,Covid Symptoms,Just Text Description
16,574dc90512d86b000f833ab0,Covid-Text-Q1,7,voq6,completed,100.0,Content: Text Only,How similar are the set sizes?,Roughly equal,Diverging a lot,False,undefined,undefined,2024-08-21T23:02:01.910Z,2024-08-21T23:05:22.137Z,200227,Covid Symptoms,Just Text Description
17,574dc90512d86b000f833ab0,Covid-Text-Q1,7,voq7,completed,100.0,Content: Text Only,Is the largest set present in the largest intersection?,Yes,Yes,True,undefined,undefined,2024-08-21T23:02:01.910Z,2024-08-21T23:05:22.137Z,200227,Covid Symptoms,Just Text Description
18,574dc90512d86b000f833ab0,Covid-Text-Q1,7,voq8,completed,100.0,Content: Text Only,Is the all-set intersection (intersection having all the sets) present?,No,Yes,False,undefined,undefined,2024-08-21T23:02:01.910Z,2024-08-21T23:05:22.137Z,200227,Covid Symptoms,Just Text Description
23,574dc90512d86b000f833ab0,Organizations-Vis-Q1,10,voq1,completed,100.0,Content: Visualization Only,How many sets are shown in the upset plot?,8,8,True,0,100,2024-08-21T23:08:51.313Z,2024-08-21T23:10:02.643Z,71330,International Organizations,Just Visualization
24,574dc90512d86b000f833ab0,Organizations-Vis-Q1,10,voq2,completed,100.0,Content: Visualization Only,What is the largest set?,UPU,UPU,True,undefined,undefined,2024-08-21T23:08:51.313Z,2024-08-21T23:10:02.643Z,71330,International Organizations,Just Visualization
