# Extract Survey Scores

## DAS Scores

In [11]:
import pandas as pd

# Load your data
df = pd.read_csv("Pilot Data/redcap_responses.csv")

# Normalize column names
df.columns = df.columns.str.strip()

# Subscale items (by question number)
executive_negative = [1, 6, 11, 17, 19]  # Reverse-scored
executive_positive = [10, 21, 23]

emotional_negative = [3, 5, 7, 9, 20, 24]  # Reverse-scored
emotional_positive = [12, 15]

initiation_negative = [2, 4, 8, 13, 14, 16, 18, 22]  # Reverse-scored

# Max scale value (assuming 3-point scale from your data: 0-3)
MAX_SCORE = 3

def reverse_score(value):
    """Reverse score for negatively scored items"""
    if pd.isnull(value):
        return 0  # or None, depending on how you want to handle missing data
    return MAX_SCORE - value

# Scoring Function
def score_participant(row):
    try:
        das_executive_score = sum([
            row.get(f'das_{i}', 0) for i in executive_positive
        ]) + sum([
            reverse_score(row.get(f'das_{i}', 0)) for i in executive_negative
        ])

        das_emotional_score = sum([
            row.get(f'das_{i}', 0) for i in emotional_positive
        ]) + sum([
            reverse_score(row.get(f'das_{i}', 0)) for i in emotional_negative
        ])

        das_initiation_score = sum([
            reverse_score(row.get(f'das_{i}', 0)) for i in initiation_negative
        ])

        total_score = das_executive_score + das_emotional_score + das_initiation_score

        return pd.Series([das_executive_score, das_emotional_score, das_initiation_score, total_score])

    except Exception as e:
        print(f"❌ Error: {e}")
        return pd.Series([None, None, None, None])

# Apply scoring function and assign column names directly
df[['das_executive_score', 'das_emotional_score', 'das_initiation_score', 'das_total_score']] = df.apply(
    score_participant, axis=1
)

# Rename prolific_id to participant_id and select output columns
output_df = df[['prolific_id', 'das_executive_score', 'das_emotional_score', 'das_initiation_score', 'das_total_score']].rename(
    columns={'prolific_id': 'participant_id'}
)

# Save to CSV
output_df.to_csv("Pilot Data/das_scored_results.csv", index=False)

print("✅ Scoring complete! Limited results saved to 'scored_results_limited.csv'.")


✅ Scoring complete! Limited results saved to 'scored_results_limited.csv'.


## QUIP Scores

In [14]:
import pandas as pd

# Load data
df = pd.read_csv("Pilot Data/redcap_responses.csv")

# Normalize column names
df.columns = df.columns.str.strip()

# Reverse any spelling errors in column names (e.g., 'repeating_activites_2' → 'repeating_activities_2')
df = df.rename(columns={
    'repeating_activites_2': 'repeating_activities_2'
})

# Define ICD subscales
gambling_items = ['gambling', 'gambling_2', 'gambling_3', 'gambling_4']
sex_items = ['sex', 'sex_2', 'sex_3', 'sex_4']
buying_items = ['buying', 'buying_2', 'buying_3', 'buying_4']
eating_items = ['eating', 'eating_2', 'eating_3', 'eating_4']

# Punding / Hobbyism combines 'perform_task' + 'repeating_activities' across items
performing_task_items = ['perform_task', 'performing_task_2', 'performing_tasks_3', 'performing_tasks_4']
repeating_activities_items = ['repeating_activities', 'repeating_activities_2', 'repeating_activities_3', 'repeating_activities_4']

# Scoring function
def score_quip_rs(row):
    try:
        # Sum subscales
        prolific_id = row['prolific_id']
        gambling_score = sum([row.get(item, 0) for item in gambling_items])
        sex_score = sum([row.get(item, 0) for item in sex_items])
        buying_score = sum([row.get(item, 0) for item in buying_items])
        eating_score = sum([row.get(item, 0) for item in eating_items])
        
        # Punding/Hobbyism score (sum of both sets of items)
        hobbyism_punding_score = (
            sum([row.get(item, 0) for item in performing_task_items]) +
            sum([row.get(item, 0) for item in repeating_activities_items])
        )
        
        # ICD Total (sum of subscales)
        icd_total = gambling_score + sex_score + buying_score + eating_score 
        
        # QUIP-RS Total (same as ICD total if no other domains are included)
        quip_rs_total = icd_total + hobbyism_punding_score
        
        return pd.Series([
            prolific_id,
            gambling_score,
            sex_score,
            buying_score,
            eating_score,
            hobbyism_punding_score,
            icd_total,
            quip_rs_total
        ])

    except Exception as e:
        print(f"❌ Error in scoring row: {e}")
        return pd.Series([None]*7)

# Apply scoring
df[['prolific_id','gambling_score', 'sex_score', 'buying_score', 'eating_score', 'hobbyism_punding_score', 'icd_total', 'quip_rs_total']] = df.apply(
    score_quip_rs, axis=1
)

# Create final output dataframe: participant_id + scores
output_df = df[['prolific_id','gambling_score', 'sex_score', 'buying_score', 'eating_score', 'hobbyism_punding_score', 'icd_total', 'quip_rs_total']].rename(
    columns={'prolific_id': 'participant_id'}
)

# Save to CSV
output_df.to_csv("Pilot Data/quip_results_scored.csv", index=False)

print("✅ QUIP-RS scoring complete! Results saved to 'quip_results_scored.csv'.")


✅ QUIP-RS scoring complete! Results saved to 'quip_results_scored.csv'.


## HADS Scoring

In [16]:
import pandas as pd

# Load CSV data
df = pd.read_csv("Pilot Data/redcap_responses.csv")

# Normalize column names
df.columns = df.columns.str.strip()

# Scoring maps: index → score
reverse_scoring = {0: 3, 1: 2, 2: 1, 3: 0}
standard_scoring = {0: 0, 1: 1, 2: 2, 3: 3}

# Define item lists by scale
anxiety_items = {
    'hads_1': reverse_scoring,
    'hads_3': reverse_scoring,
    'hads_5': reverse_scoring,
    'hads_7': standard_scoring,
    'hads_9': standard_scoring,
    'hads_11': reverse_scoring,
    'hads_13': reverse_scoring
}

depression_items = {
    'hads_2': standard_scoring,
    'hads_4': standard_scoring,
    'hads_6': reverse_scoring,
    'hads_8': reverse_scoring,
    'hads_10': reverse_scoring,
    'hads_12': standard_scoring,
    'hads_14': standard_scoring
}

# Scoring function for HADS
def score_hads(row):
    try:
        hads_anxiety_score = 0
        hads_depression_score = 0

        # Score Anxiety items
        for item, scoring_map in anxiety_items.items():
            response_index = row.get(item, None)
            if pd.isnull(response_index):
                score = 0  # or handle NaN differently
            else:
                score = scoring_map.get(response_index, 0)
            hads_anxiety_score += score

        # Score Depression items
        for item, scoring_map in depression_items.items():
            response_index = row.get(item, None)
            if pd.isnull(response_index):
                score = 0  # or handle NaN differently
            else:
                score = scoring_map.get(response_index, 0)
            hads_depression_score += score

        hads_total_score = hads_anxiety_score + hads_depression_score

        return pd.Series([hads_anxiety_score, hads_depression_score, hads_total_score])

    except Exception as e:
        print(f"❌ Error in scoring HADS row: {e}")
        return pd.Series([None, None, None])

# Apply scoring
df[['hads_anxiety_score', 'hads_depression_score', 'hads_total_score']] = df.apply(
    score_hads, axis=1
)

# Prepare final output dataframe
output_df = df[['prolific_id', 'hads_anxiety_score', 'hads_depression_score', 'hads_total_score']].rename(
    columns={'prolific_id': 'participant_id'}
)

# Export to CSV
output_df.to_csv("Pilot Data/hads_results_scored.csv", index=False)

print("✅ HADS scoring complete! Results saved to 'hads_results_scored.csv'.")


✅ HADS scoring complete! Results saved to 'hads_results_scored.csv'.
