# Extract Survey Scores

## DAS Scores

In [17]:
import pandas as pd

# Load your data
df = pd.read_csv("Pilot Data/redcap_responses.csv")

# Normalize column names
df.columns = df.columns.str.strip()

# Subscale items (by question number)
executive_positive = [1, 6, 11, 17, 19, 21, 23]  
executive_negative = [10] # Reverse-scored

emotional_positive = [3, 5, 7, 9, 20]  # Reverse-scored
emotional_negative = [12, 15, 24]

initiation_positive = [2, 4, 8, 13, 14, 16, 18, 22]  # Reverse-scored

# Max scale value (assuming 3-point scale from your data: 0-3)
MAX_SCORE = 3

def reverse_score(value):
    """Reverse score for negatively scored items"""
    if pd.isnull(value):
        return 0  # or None, depending on how you want to handle missing data
    return MAX_SCORE - value

# Scoring Function
def score_participant(row):
    try:
        das_executive_score = sum([
            row.get(f'das_{i}', 0) for i in executive_positive
        ]) + sum([
            reverse_score(row.get(f'das_{i}', 0)) for i in executive_negative
        ])

        das_emotional_score = sum([
            row.get(f'das_{i}', 0) for i in emotional_positive
        ]) + sum([
            reverse_score(row.get(f'das_{i}', 0)) for i in emotional_negative
        ])

        das_initiation_score = sum([
            row.get(f'das_{i}', 0) for i in initiation_positive
        ])

        total_score = das_executive_score + das_emotional_score + das_initiation_score

        return pd.Series([das_executive_score, das_emotional_score, das_initiation_score, total_score])

    except Exception as e:
        print(f"❌ Error: {e}")
        return pd.Series([None, None, None, None])

# Apply scoring function and assign column names directly
df[['das_executive_score', 'das_emotional_score', 'das_initiation_score', 'das_total_score']] = df.apply(
    score_participant, axis=1
)

# Rename prolific_id to participant_id and select output columns
output_df = df[['prolific_id', 'das_executive_score', 'das_emotional_score', 'das_initiation_score', 'das_total_score']].rename(
    columns={'prolific_id': 'participant_id'}
)

# Save to CSV
output_df.to_csv("Pilot Data/das_scored_results.csv", index=False)

print("✅ Scoring complete! Limited results saved to 'das_scored_results.csv'.")


✅ Scoring complete! Limited results saved to 'das_scored_results.csv'.


## QUIP Scores

In [7]:
import pandas as pd

# Load data
df = pd.read_csv("Pilot Data/redcap_responses.csv")

# Normalize column names
df.columns = df.columns.str.strip()

# Reverse any spelling errors in column names (e.g., 'repeating_activites_2' → 'repeating_activities_2')
df = df.rename(columns={
    'repeating_activites_2': 'repeating_activities_2'
})

# Define ICD subscales
gambling_items = ['gambling', 'gambling_2', 'gambling_3', 'gambling_4']
sex_items = ['sex', 'sex_2', 'sex_3', 'sex_4']
buying_items = ['buying', 'buying_2', 'buying_3', 'buying_4']
eating_items = ['eating', 'eating_2', 'eating_3', 'eating_4']

# Punding / Hobbyism combines 'perform_task' + 'repeating_activities' across items
performing_task_items = ['perform_task', 'performing_task_2', 'performing_tasks_3', 'performing_tasks_4']
repeating_activities_items = ['repeating_activities', 'repeating_activities_2', 'repeating_activities_3', 'repeating_activities_4']

# Scoring function
def score_quip_rs(row):
    try:
        # Sum subscales
        prolific_id = row['prolific_id']
        gambling_score = sum([row.get(item, 0) for item in gambling_items])
        sex_score = sum([row.get(item, 0) for item in sex_items])
        buying_score = sum([row.get(item, 0) for item in buying_items])
        eating_score = sum([row.get(item, 0) for item in eating_items])
        
        # Punding/Hobbyism score (sum of both sets of items)
        hobbyism_punding_score = (
            sum([row.get(item, 0) for item in performing_task_items]) +
            sum([row.get(item, 0) for item in repeating_activities_items])
        )
        
        # ICD Total (sum of subscales)
        icd_total = gambling_score + sex_score + buying_score + eating_score 
        
        # QUIP-RS Total (same as ICD total if no other domains are included)
        quip_rs_total = icd_total + hobbyism_punding_score
        
        return pd.Series([
            prolific_id,
            gambling_score,
            sex_score,
            buying_score,
            eating_score,
            hobbyism_punding_score,
            icd_total,
            quip_rs_total
        ])

    except Exception as e:
        print(f"❌ Error in scoring row: {e}")
        return pd.Series([None]*7)

# Apply scoring
df[['prolific_id','gambling_score', 'sex_score', 'buying_score', 'eating_score', 'hobbyism_punding_score', 'icd_total', 'quip_rs_total']] = df.apply(
    score_quip_rs, axis=1
)

# Create final output dataframe: participant_id + scores
output_df = df[['prolific_id','gambling_score', 'sex_score', 'buying_score', 'eating_score', 'hobbyism_punding_score', 'icd_total', 'quip_rs_total']].rename(
    columns={'prolific_id': 'participant_id'}
)

# Save to CSV
output_df.to_csv("Pilot Data/quip_results_scored.csv", index=False)

print("✅ QUIP-RS scoring complete! Results saved to 'quip_results_scored.csv'.")


✅ QUIP-RS scoring complete! Results saved to 'quip_results_scored.csv'.


## HADS Scoring

In [8]:
import pandas as pd

# Load CSV data
df = pd.read_csv("Pilot Data/redcap_responses.csv")

# Normalize column names
df.columns = df.columns.str.strip()

# Scoring maps: index → score
reverse_scoring = {0: 3, 1: 2, 2: 1, 3: 0}
standard_scoring = {0: 0, 1: 1, 2: 2, 3: 3}

# Define item lists by scale
anxiety_items = {
    'hads_1': reverse_scoring,
    'hads_3': reverse_scoring,
    'hads_5': reverse_scoring,
    'hads_7': standard_scoring,
    'hads_9': standard_scoring,
    'hads_11': reverse_scoring,
    'hads_13': reverse_scoring
}

depression_items = {
    'hads_2': standard_scoring,
    'hads_4': standard_scoring,
    'hads_6': reverse_scoring,
    'hads_8': reverse_scoring,
    'hads_10': reverse_scoring,
    'hads_12': standard_scoring,
    'hads_14': standard_scoring
}

# Scoring function for HADS
def score_hads(row):
    try:
        hads_anxiety_score = 0
        hads_depression_score = 0

        # Score Anxiety items
        for item, scoring_map in anxiety_items.items():
            response_index = row.get(item, None)
            if pd.isnull(response_index):
                score = 0  # or handle NaN differently
            else:
                score = scoring_map.get(response_index, 0)
            hads_anxiety_score += score

        # Score Depression items
        for item, scoring_map in depression_items.items():
            response_index = row.get(item, None)
            if pd.isnull(response_index):
                score = 0  # or handle NaN differently
            else:
                score = scoring_map.get(response_index, 0)
            hads_depression_score += score

        hads_total_score = hads_anxiety_score + hads_depression_score

        return pd.Series([hads_anxiety_score, hads_depression_score, hads_total_score])

    except Exception as e:
        print(f"❌ Error in scoring HADS row: {e}")
        return pd.Series([None, None, None])

# Apply scoring
df[['hads_anxiety_score', 'hads_depression_score', 'hads_total_score']] = df.apply(
    score_hads, axis=1
)

# Prepare final output dataframe
output_df = df[['prolific_id', 'hads_anxiety_score', 'hads_depression_score', 'hads_total_score']].rename(
    columns={'prolific_id': 'participant_id'}
)

# Export to CSV
output_df.to_csv("Pilot Data/hads_results_scored.csv", index=False)

print("✅ HADS scoring complete! Results saved to 'hads_results_scored.csv'.")


✅ HADS scoring complete! Results saved to 'hads_results_scored.csv'.


# LARS Scoring

In [10]:
import pandas as pd

# Load your data
df = pd.read_csv("Pilot Data/redcap_responses.csv")

# Optional reverse scoring (add column names as needed)
reverse_scored_items = []  # Example: ['ic_i_3', 'ai_ep_2']
MAX_SCORE = 4

# Reverse scoring function
def reverse_score(row, col):
    if col in reverse_scored_items:
        return MAX_SCORE - row[col]
    else:
        return row[col]

# Apply reverse scoring to all item columns
all_item_cols = [
    col for col in df.columns 
    if col.startswith(('e_er_', 'ai_ep_', 'ai_i_', 'ic_n_', 'ic_m_', 'ic_i_', 'ic_s_', 'sa_'))
]

# Apply reverse scoring
for col in all_item_cols:
    df[col] = df.apply(lambda row: reverse_score(row, col), axis=1)

# Score calculations
df['Intellectual_Curiosity'] = 125 - df[[col for col in df.columns if col.startswith('ic_')]].sum(axis=1)
df['Motivation'] = 40 - df[[col for col in df.columns if col.startswith('ic_m_')]].sum(axis=1)
df['Interest'] = 20 - df[[col for col in df.columns if col.startswith('ic_i_')]].sum(axis=1)
df['Novelty_Seeking'] = 30 - df[[col for col in df.columns if col.startswith('ic_n_')]].sum(axis=1)
df['Social_Life'] = 35 - df[[col for col in df.columns if col.startswith('ic_s_')]].sum(axis=1)

df['Emotional_Responsiveness'] = df[[col for col in df.columns if col.startswith('e_er_')]].sum(axis=1)
df['Emotional_Apathy'] = df['Emotional_Responsiveness']  # Same thing, different label

df['Self_Awareness'] = 40 - df[[col for col in df.columns if col.startswith('sa_')]].sum(axis=1)

df['Action_Initiation'] = 55 - df[[col for col in df.columns if col.startswith('ai_ep_') or col.startswith('ai_i_')]].sum(axis=1)
df['Everyday_Productivity'] = 25 - df[[col for col in df.columns if col.startswith('ai_ep_')]].sum(axis=1)
df['Initiative'] = 30 - df[[col for col in df.columns if col.startswith('ai_i_')]].sum(axis=1)

# Total LARS Score
df['Total_LARS_Score'] = 255 - df[all_item_cols].sum(axis=1)

# Select output columns
output_cols = [
    'prolific_id', 'Intellectual_Curiosity', 'Motivation', 'Interest',
    'Novelty_Seeking', 'Social_Life', 'Emotional_Responsiveness', 'Self_Awareness',
    'Action_Initiation', 'Everyday_Productivity', 'Initiative', 'Total_LARS_Score'
]

df_results = df[output_cols]

df_results = df_results.rename(
    columns={'prolific_id': 'participant_id'}
)

# Save to CSV
df_results.to_csv('Pilot Data/lars_results_scored.csv', index=False)

print("Scoring complete! File saved as 'lars_results_scored.csv'.")


Scoring complete! File saved as 'lars_results_scored.csv'.


# Find Scale Correlation

## First we correlate DAS with overall Acceptance Rates

In [18]:
import pandas as pd
import scipy.stats as stats


# Load the EBDM task data
df = pd.read_csv("Pilot Data/all_trials.csv")  # Replace with the actual file path

# Calculate the overall acceptance rate per block for each participant
participant_acceptance_rate = df.groupby(['participant_id'])['acceptance'].mean().reset_index()

# Rename the column to 'acceptance_rate'
participant_acceptance_rate.rename(columns={'acceptance': 'acceptance_rate'}, inplace=True)

#print(block_acceptance_rate.head())

das_data = pd.read_csv("Pilot Data/das_scored_results.csv")  # Replace with the actual file path

# Preview the aggregated subscale data
#print(subscale_avg.head())

# Merge the subscale averages with the block acceptance rates based on participant_id and block
merged_data = pd.merge(das_data, participant_acceptance_rate, on=['participant_id'])

# Preview the merged data
print(merged_data.head())

# Initialize a dictionary to store correlation coefficients and p-values
correlation_results = {}


correlation, p_value = stats.pearsonr(merged_data['das_executive_score'], merged_data['acceptance_rate'])
correlation_results['das_executive_score'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['das_emotional_score'], merged_data['acceptance_rate'])
correlation_results['das_emotional_score'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['das_initiation_score'], merged_data['acceptance_rate'])
correlation_results['das_initiation_score'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['das_total_score'], merged_data['acceptance_rate'])
correlation_results['das_total_score'] = {'correlation': correlation, 'p_value': p_value}
# Store the results

# Display the results
print("Correlation between subscale scores and acceptance rates (with p-values):")
for subscale, results in correlation_results.items():
    print(f"{subscale}: Correlation = {results['correlation']:.3f}, p-value = {results['p_value']:.3f}")

             participant_id  das_executive_score  das_emotional_score  \
0  67ac2151e5b37e0b91a5af0e                    4                   12   
1  66b5a7ae36e5b931db863954                    3                   10   
2  678fa07cd72c816bac76bda5                    3                   10   
3  6785b7dd6705050739feb0ad                    5                   15   
4  5adef850eb60400001539109                    8                   18   

   das_initiation_score  das_total_score  acceptance_rate  
0                    10               26         0.698333  
1                    14               27         0.433333  
2                     7               20         0.665000  
3                    13               33         0.536667  
4                    13               39         0.073333  
Correlation between subscale scores and acceptance rates (with p-values):
das_executive_score: Correlation = 0.078, p-value = 0.830
das_emotional_score: Correlation = -0.551, p-value = 0.099
das_initia

## Second we correlate LARS with overall Acceptance Rates

In [21]:
import pandas as pd
import scipy.stats as stats


# Load the EBDM task data
df = pd.read_csv("Pilot Data/all_trials.csv")  # Replace with the actual file path

# Calculate the overall acceptance rate per block for each participant
participant_acceptance_rate = df.groupby(['participant_id'])['acceptance'].mean().reset_index()

# Rename the column to 'acceptance_rate'
participant_acceptance_rate.rename(columns={'acceptance': 'acceptance_rate'}, inplace=True)

#print(block_acceptance_rate.head())

lars_data = pd.read_csv("Pilot Data/lars_results_scored.csv")  # Replace with the actual file path

# Preview the aggregated subscale data
#print(subscale_avg.head())

# Merge the subscale averages with the block acceptance rates based on participant_id and block
merged_data = pd.merge(lars_data, participant_acceptance_rate, on=['participant_id'])

# Preview the merged data
print(merged_data.head())

# Initialize a dictionary to store correlation coefficients and p-values
correlation_results = {}


correlation, p_value = stats.pearsonr(merged_data['Intellectual_Curiosity'], merged_data['acceptance_rate'])
correlation_results['Intellectual_Curiosity'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['Emotional_Responsiveness'], merged_data['acceptance_rate'])
correlation_results['Emotional_Responsiveness'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['Self_Awareness'], merged_data['acceptance_rate'])
correlation_results['Self_Awareness'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['Action_Initiation'], merged_data['acceptance_rate'])
correlation_results['Action_Initiation'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['Total_LARS_Score'], merged_data['acceptance_rate'])
correlation_results['Total_LARS_Score'] = {'correlation': correlation, 'p_value': p_value}
# Store the results

# Display the results
print("Correlation between subscale scores and acceptance rates (with p-values):")
for subscale, results in correlation_results.items():
    print(f"{subscale}: Correlation = {results['correlation']:.3f}, p-value = {results['p_value']:.3f}")

             participant_id  Intellectual_Curiosity  Motivation  Interest  \
0  67ac2151e5b37e0b91a5af0e                      20           7         5   
1  66b5a7ae36e5b931db863954                      28           8         5   
2  678fa07cd72c816bac76bda5                      33           9         4   
3  6785b7dd6705050739feb0ad                      44           8         6   
4  5adef850eb60400001539109                      47          12         6   

   Novelty_Seeking  Social_Life  Emotional_Responsiveness  Self_Awareness  \
0                4            4                        24              16   
1                6            9                        35               4   
2                8           12                        27               6   
3               10           20                        24               7   
4               12           17                        25              16   

   Action_Initiation  Everyday_Productivity  Initiative  Total_LARS_Score 

## Third we correlate QUIP with overall Acceptance Rates

In [23]:
import pandas as pd
import scipy.stats as stats


# Load the EBDM task data
df = pd.read_csv("Pilot Data/all_trials.csv")  # Replace with the actual file path

# Calculate the overall acceptance rate per block for each participant
participant_acceptance_rate = df.groupby(['participant_id'])['acceptance'].mean().reset_index()

# Rename the column to 'acceptance_rate'
participant_acceptance_rate.rename(columns={'acceptance': 'acceptance_rate'}, inplace=True)

#print(block_acceptance_rate.head())

quip_data = pd.read_csv("Pilot Data/quip_results_scored.csv")  # Replace with the actual file path

# Preview the aggregated subscale data
#print(subscale_avg.head())

# Merge the subscale averages with the block acceptance rates based on participant_id and block
merged_data = pd.merge(quip_data, participant_acceptance_rate, on=['participant_id'])

# Preview the merged data
print(merged_data.head())

# Initialize a dictionary to store correlation coefficients and p-values
correlation_results = {}


correlation, p_value = stats.pearsonr(merged_data['icd_total'], merged_data['acceptance_rate'])
correlation_results['icd_total'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['quip_rs_total'], merged_data['acceptance_rate'])
correlation_results['quip_rs_total'] = {'correlation': correlation, 'p_value': p_value}

# Store the results

# Display the results
print("Correlation between subscale scores and acceptance rates (with p-values):")
for subscale, results in correlation_results.items():
    print(f"{subscale}: Correlation = {results['correlation']:.3f}, p-value = {results['p_value']:.3f}")

             participant_id  gambling_score  sex_score  buying_score  \
0  67ac2151e5b37e0b91a5af0e               0          4             5   
1  66b5a7ae36e5b931db863954               0          2             2   
2  678fa07cd72c816bac76bda5               0          3             6   
3  6785b7dd6705050739feb0ad               0          1             2   
4  5adef850eb60400001539109               5          0             0   

   eating_score  hobbyism_punding_score  icd_total  quip_rs_total  \
0             7                      12         16             28   
1             3                       4          7             11   
2             8                      15         17             32   
3             2                       4          5              9   
4             0                       0          5              5   

   acceptance_rate  
0         0.698333  
1         0.433333  
2         0.665000  
3         0.536667  
4         0.073333  
Correlation between subsca

# Calculate correlations with difference in acceptance rates between conditions

## Das

In [27]:
import pandas as pd

# Load your CSV into a DataFrame
df = pd.read_csv("Pilot Data/all_trials.csv")  # Replace with your file path

# Step 1: Group by participant_id and delay, then calculate mean acceptance
acceptance_rates = df.groupby(['participant_id', 'delay'])['acceptance'].mean().reset_index()

# Step 2: Pivot to wide format (delays become columns)
pivot_df = acceptance_rates.pivot(index='participant_id', columns='delay', values='acceptance').reset_index()

# Optional: Rename delay columns for clarity
pivot_df.columns.name = None  # remove axis name
pivot_df = pivot_df.rename(columns={0: 'acceptance_delay_0', 1: 'acceptance_delay_1'})

# Step 3: Calculate the difference in acceptance rate between delay 0 and delay 1
pivot_df['acceptance_diff'] = pivot_df['acceptance_delay_0'] - pivot_df['acceptance_delay_1']

# Resulting DataFrame
print(pivot_df[['participant_id', 'acceptance_diff']])

das_data = pd.read_csv("Pilot Data/das_scored_results.csv")  # Replace with the actual file path

# Preview the aggregated subscale data
#print(subscale_avg.head())

# Merge the subscale averages with the block acceptance rates based on participant_id and block
merged_data = pd.merge(das_data, pivot_df, on=['participant_id'])

# Preview the merged data
print(merged_data.head())

# Initialize a dictionary to store correlation coefficients and p-values
correlation_results = {}


correlation, p_value = stats.pearsonr(merged_data['das_executive_score'], merged_data['acceptance_diff'])
correlation_results['das_executive_score'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['das_emotional_score'], merged_data['acceptance_diff'])
correlation_results['das_emotional_score'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['das_initiation_score'], merged_data['acceptance_diff'])
correlation_results['das_initiation_score'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['das_total_score'], merged_data['acceptance_diff'])
correlation_results['das_total_score'] = {'correlation': correlation, 'p_value': p_value}
# Store the results

# Display the results
print("Correlation between subscale scores and acceptance rates (with p-values):")
for subscale, results in correlation_results.items():
    print(f"{subscale}: Correlation = {results['correlation']:.3f}, p-value = {results['p_value']:.3f}")


             participant_id  acceptance_diff
0  5adef850eb60400001539109         0.060000
1  5d7472ad3bc4020015f3bb56         0.170000
2  63cff262503b56190be3eb18         0.066667
3  6562025a6e3331528cf8fb3d         0.000000
4  6612c945cebe319d923b80f8         0.080000
5  66b5a7ae36e5b931db863954         0.020000
6  6773fc8590d7b54b0d580e6b        -0.006406
7  6785b7dd6705050739feb0ad         0.246667
8  678fa07cd72c816bac76bda5         0.063333
9  67ac2151e5b37e0b91a5af0e         0.183333
             participant_id  das_executive_score  das_emotional_score  \
0  67ac2151e5b37e0b91a5af0e                    4                   12   
1  66b5a7ae36e5b931db863954                    3                   10   
2  678fa07cd72c816bac76bda5                    3                   10   
3  6785b7dd6705050739feb0ad                    5                   15   
4  5adef850eb60400001539109                    8                   18   

   das_initiation_score  das_total_score  acceptance_delay_0  \
0 

## LARS

In [28]:
import pandas as pd

# Load your CSV into a DataFrame
df = pd.read_csv("Pilot Data/all_trials.csv")  # Replace with your file path

# Step 1: Group by participant_id and delay, then calculate mean acceptance
acceptance_rates = df.groupby(['participant_id', 'delay'])['acceptance'].mean().reset_index()

# Step 2: Pivot to wide format (delays become columns)
pivot_df = acceptance_rates.pivot(index='participant_id', columns='delay', values='acceptance').reset_index()

# Optional: Rename delay columns for clarity
pivot_df.columns.name = None  # remove axis name
pivot_df = pivot_df.rename(columns={0: 'acceptance_delay_0', 1: 'acceptance_delay_1'})

# Step 3: Calculate the difference in acceptance rate between delay 0 and delay 1
pivot_df['acceptance_diff'] = pivot_df['acceptance_delay_0'] - pivot_df['acceptance_delay_1']

# Resulting DataFrame
print(pivot_df[['participant_id', 'acceptance_diff']])

lars_data = pd.read_csv("Pilot Data/lars_results_scored.csv")  # Replace with the actual file path

# Preview the aggregated subscale data
#print(subscale_avg.head())

# Merge the subscale averages with the block acceptance rates based on participant_id and block
merged_data = pd.merge(lars_data, pivot_df, on=['participant_id'])

# Preview the merged data
print(merged_data.head())

# Initialize a dictionary to store correlation coefficients and p-values
correlation_results = {}


correlation, p_value = stats.pearsonr(merged_data['Intellectual_Curiosity'], merged_data['acceptance_diff'])
correlation_results['Intellectual_Curiosity'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['Emotional_Responsiveness'], merged_data['acceptance_diff'])
correlation_results['Emotional_Responsiveness'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['Self_Awareness'], merged_data['acceptance_diff'])
correlation_results['Self_Awareness'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['Action_Initiation'], merged_data['acceptance_diff'])
correlation_results['Action_Initiation'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['Total_LARS_Score'], merged_data['acceptance_diff'])
correlation_results['Total_LARS_Score'] = {'correlation': correlation, 'p_value': p_value}
# Store the results

# Display the results
print("Correlation between subscale scores and acceptance rates (with p-values):")
for subscale, results in correlation_results.items():
    print(f"{subscale}: Correlation = {results['correlation']:.3f}, p-value = {results['p_value']:.3f}")

             participant_id  acceptance_diff
0  5adef850eb60400001539109         0.060000
1  5d7472ad3bc4020015f3bb56         0.170000
2  63cff262503b56190be3eb18         0.066667
3  6562025a6e3331528cf8fb3d         0.000000
4  6612c945cebe319d923b80f8         0.080000
5  66b5a7ae36e5b931db863954         0.020000
6  6773fc8590d7b54b0d580e6b        -0.006406
7  6785b7dd6705050739feb0ad         0.246667
8  678fa07cd72c816bac76bda5         0.063333
9  67ac2151e5b37e0b91a5af0e         0.183333
             participant_id  Intellectual_Curiosity  Motivation  Interest  \
0  67ac2151e5b37e0b91a5af0e                      20           7         5   
1  66b5a7ae36e5b931db863954                      28           8         5   
2  678fa07cd72c816bac76bda5                      33           9         4   
3  6785b7dd6705050739feb0ad                      44           8         6   
4  5adef850eb60400001539109                      47          12         6   

   Novelty_Seeking  Social_Life  Emotional

## QUIP

In [29]:
import pandas as pd

# Load your CSV into a DataFrame
df = pd.read_csv("Pilot Data/all_trials.csv")  # Replace with your file path

# Step 1: Group by participant_id and delay, then calculate mean acceptance
acceptance_rates = df.groupby(['participant_id', 'delay'])['acceptance'].mean().reset_index()

# Step 2: Pivot to wide format (delays become columns)
pivot_df = acceptance_rates.pivot(index='participant_id', columns='delay', values='acceptance').reset_index()

# Optional: Rename delay columns for clarity
pivot_df.columns.name = None  # remove axis name
pivot_df = pivot_df.rename(columns={0: 'acceptance_delay_0', 1: 'acceptance_delay_1'})

# Step 3: Calculate the difference in acceptance rate between delay 0 and delay 1
pivot_df['acceptance_diff'] = pivot_df['acceptance_delay_0'] - pivot_df['acceptance_delay_1']

# Resulting DataFrame
print(pivot_df[['participant_id', 'acceptance_diff']])


quip_data = pd.read_csv("Pilot Data/quip_results_scored.csv")  # Replace with the actual file path

# Preview the aggregated subscale data
#print(subscale_avg.head())

# Merge the subscale averages with the block acceptance rates based on participant_id and block
merged_data = pd.merge(quip_data, pivot_df, on=['participant_id'])

# Preview the merged data
print(merged_data.head())

# Initialize a dictionary to store correlation coefficients and p-values
correlation_results = {}


correlation, p_value = stats.pearsonr(merged_data['icd_total'], merged_data['acceptance_diff'])
correlation_results['icd_total'] = {'correlation': correlation, 'p_value': p_value}
correlation, p_value = stats.pearsonr(merged_data['quip_rs_total'], merged_data['acceptance_diff'])
correlation_results['quip_rs_total'] = {'correlation': correlation, 'p_value': p_value}

# Store the results

# Display the results
print("Correlation between subscale scores and acceptance rates (with p-values):")
for subscale, results in correlation_results.items():
    print(f"{subscale}: Correlation = {results['correlation']:.3f}, p-value = {results['p_value']:.3f}")

             participant_id  acceptance_diff
0  5adef850eb60400001539109         0.060000
1  5d7472ad3bc4020015f3bb56         0.170000
2  63cff262503b56190be3eb18         0.066667
3  6562025a6e3331528cf8fb3d         0.000000
4  6612c945cebe319d923b80f8         0.080000
5  66b5a7ae36e5b931db863954         0.020000
6  6773fc8590d7b54b0d580e6b        -0.006406
7  6785b7dd6705050739feb0ad         0.246667
8  678fa07cd72c816bac76bda5         0.063333
9  67ac2151e5b37e0b91a5af0e         0.183333
             participant_id  gambling_score  sex_score  buying_score  \
0  67ac2151e5b37e0b91a5af0e               0          4             5   
1  66b5a7ae36e5b931db863954               0          2             2   
2  678fa07cd72c816bac76bda5               0          3             6   
3  6785b7dd6705050739feb0ad               0          1             2   
4  5adef850eb60400001539109               5          0             0   

   eating_score  hobbyism_punding_score  icd_total  quip_rs_total  \
0  