In [None]:
import pandas as pd
import os
import re
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from matplotlib.backends.backend_pdf import PdfPages

Subject matching

In [None]:
# base path
base_path = '/Users/zachary/Desktop/Princeton/2024-25/NEU 502B/neu502b-2025/fMRI Experiment/Behavior files' # change to your path

# question versions and subject mappings
question_files = {
    2: 'fmri_questions_V1.csv',
    4: 'fmri_questions_V2.csv',
    5: 'fmri_questions_V3.csv'  
}

# subject2 - V1
# subject4 - V2
# subject5 - V3


subject_files = {
    2: 'subject2_question_looper.csv',
    4: 'subject4_question_looper.csv',
    5: 'subject5_question_looper.csv'
}

# answer key - necessary for matching because we didn't get questions labeled with "math" or "language" in our response metadata
answers_key = {
    "What does a rose": 1,
    "A gardener has found": 2,
    "What does this saying": 1,
    "A satellite completes": 2,
    "Which next sentence best fits after The music": 3,
    "An orchestra sells": 2,
    "During a class discussion": 2,
    "Emma's class started": 4,
    "As Liam walked": 1,
    "Liam usually walks": 2,
    "It is a truth universally": 4,
    "A soon-to-be husband": 1,
    "James arrived at the café a few minutes early for his meeting. He ordered a coffee and sat": 2,
    "James arrived at the café a few minutes early for his meeting. He ordered a coffee and one": 3,
    "Which next sentence best fits after The newborn": 3,
    "A small zoo has": 1,
    "When the museum tour began, Daniel stayed": 2,
    "When the museum tour began, Daniel walked": 4,
    "What does the line Once": 1,
    "A tired poet writes": 2
}

# normalize question text for matching
def normalize_text(text):
    if pd.isna(text):
        return ""
    # convert to string
    text = str(text)
    # convert to lowercase
    text = text.lower()
    # remove punctuation
    text = re.sub(r'["\'\.,\?]', '', text)
    # remove extra whitespace
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# finds the correct answer for a question
def find_answer(question_text):
    if pd.isna(question_text) or not question_text:
        return None
        
    normalized_question = normalize_text(question_text)
    
    # tries matching with the beginning of each key phrase
    for key_start, answer in answers_key.items():
        normalized_key = normalize_text(key_start)
        if normalized_key and normalized_question.startswith(normalized_key):
            return answer
    
    # if no exact start match, try a more flexible match
    for key_start, answer in answers_key.items():
        normalized_key = normalize_text(key_start)
        if normalized_key and normalized_key in normalized_question:
            return answer
    
    # look for distinctive phrases
    if "rose by any other name" in normalized_question:
        return 1
    if "four roses" in normalized_question:
        return 2
    if "satellite" in normalized_question and "orbit" in normalized_question:
        return 2
    if "music soared" in normalized_question:
        return 3
    if "orchestra" in normalized_question and "tickets" in normalized_question:
        return 2
    if "emma raised" in normalized_question:
        return 2
    if "emma" in normalized_question and "class started" in normalized_question:
        return 4
    if "liam" in normalized_question and "rain" in normalized_question:
        return 1
    if "liam" in normalized_question and "walks" in normalized_question and "pace" in normalized_question:
        return 2
    if "truth universally acknowledged" in normalized_question:
        return 4
    if "husband and wife" in normalized_question:
        return 1
    if "james" in normalized_question and "window" in normalized_question:
        return 2
    if "james" in normalized_question and "croissant" in normalized_question:
        return 3
    if "newborn animals" in normalized_question:
        return 3
    if "zoo" in normalized_question and "mammals" in normalized_question:
        return 1
    if "daniel stayed" in normalized_question and "museum" in normalized_question:
        return 2
    if "daniel walked" in normalized_question and "museum" in normalized_question:
        return 4
    if "midnight dreary" in normalized_question:
        return 1
    if "tired poet" in normalized_question:
        return 2
    
    print(f"WARNING: No answer found for question: {question_text[:50]}...")
    return None

# for each subject
for subject_num in [2, 4, 5]:
    print(f"\nAnalyzing Subject {subject_num}...")
    
    # read the question file
    question_file_path = os.path.join(base_path, question_files[subject_num])
    questions_df = pd.read_csv(question_file_path)
    
    # read the subject response file
    subject_file_path = os.path.join(base_path, subject_files[subject_num])
    subject_df = pd.read_csv(subject_file_path)
    
    print(f"Found {len(questions_df)} questions and {len(subject_df)} subject responses for Subject {subject_num}")
    
    # create a dictionary to look up questions by their number
    question_lookup = dict(zip(questions_df['Number'], questions_df['Question']))
    
    # create a new dataframe for the matched data
    matched_data = []
    
    # keep track of matched/unmatched questions
    matched_count = 0
    unmatched_count = 0
    
    # process each row in the subject file
    for _, row in subject_df.iterrows():
        question_number = row['Number']
        question_text = question_lookup.get(question_number, "")
        
        # get correct answer for this question
        correct_answer = find_answer(question_text)
        
        if correct_answer is not None:
            matched_count += 1
        else:
            unmatched_count += 1
            print(f"  Unable to match question {question_number}: {question_text[:50]}...")
        
        # get subject's response
        subject_response = row.get('key_resp.keys', None)
        
        # determine if answer was correct
        if pd.notna(subject_response) and correct_answer is not None:
            # extract numeric value from response (handles cases like '[1]' or '1')
            if isinstance(subject_response, (int, float)):
                subject_response_int = int(subject_response)
            elif isinstance(subject_response, str):
                # Extract digits from string responses
                match = re.search(r'\d+', subject_response)
                if match:
                    subject_response_int = int(match.group())
                else:
                    subject_response_int = None
            else:
                subject_response_int = None
                
            # compare with correct answer
            if subject_response_int is not None:
                is_correct = 1 if subject_response_int == correct_answer else 0
            else:
                is_correct = None
        else:
            is_correct = None  # Not answered or no correct answer available
        
        matched_data.append({
            'question_number': question_number,
            'question_text': question_text,
            'question_started': row.get('Question_text.started', None),
            'question_ended': row.get('trial.stopped', None),
            'response_time': row.get('key_resp.rt', None),
            'subject_response': subject_response,
            'correct_answer': correct_answer,
            'is_correct': is_correct
        })
    
    # create the matched dataframe
    matched_df = pd.DataFrame(matched_data)
    
    # save as CSV
    output_file = os.path.join(base_path, f"subject{subject_num}_matched.csv")
    matched_df.to_csv(output_file, index=False)
    print(f"Created {output_file}")
    print(f"Summary: {matched_count} questions matched, {unmatched_count} questions unmatched")

print("\nsubjects processed")


Analyzing Subject 2...
Found 20 questions and 22 subject responses for Subject 2
  Unable to match question nan: ...
  Unable to match question nan: ...
Created /Users/zachary/Desktop/Princeton/2024-25/NEU 502B/neu502b-2025/fMRI Experiment/Behavior files/subject2_matched.csv
Summary: 20 questions matched, 2 questions unmatched

Analyzing Subject 4...
Found 20 questions and 22 subject responses for Subject 4
  Unable to match question nan: ...
  Unable to match question nan: ...
Created /Users/zachary/Desktop/Princeton/2024-25/NEU 502B/neu502b-2025/fMRI Experiment/Behavior files/subject4_matched.csv
Summary: 20 questions matched, 2 questions unmatched

Analyzing Subject 5...
Found 20 questions and 22 subject responses for Subject 5
  Unable to match question nan: ...
  Unable to match question nan: ...
Created /Users/zachary/Desktop/Princeton/2024-25/NEU 502B/neu502b-2025/fMRI Experiment/Behavior files/subject5_matched.csv
Summary: 20 questions matched, 2 questions unmatched

subjects 

Correct vs. Incorrect

In [None]:
# pdf to save all plots
pdf_path = os.path.join(base_path, 'reaction_time_analysis.pdf')
pdf = PdfPages(pdf_path)

# create function to create boxplots and perform t-tests
def analyze_reaction_times(data, subject_id=None):
    # only include rows with valid reaction times and is_correct values
    filtered_data = data.dropna(subset=['response_time', 'is_correct'])
    
    # separate correct and incorrect answers
    correct_rt = filtered_data[filtered_data['is_correct'] == 1]['response_time']
    incorrect_rt = filtered_data[filtered_data['is_correct'] == 0]['response_time']
    
    # check if we have enough data for both categories
    if len(correct_rt) < 2 or len(incorrect_rt) < 2:
        print(f"Not enough data for analysis for {'Subject ' + str(subject_id) if subject_id else 'Combined'}")
        return None
    
    # perform t-test
    t_stat, p_value = stats.ttest_ind(correct_rt, incorrect_rt, equal_var=False)
    
    # create boxplot
    fig, ax = plt.subplots(figsize=(8, 6))
    
    # set boxplot elements
    boxplot = ax.boxplot(
        [correct_rt, incorrect_rt],
        labels=['Correct Answers', 'Incorrect Answers'],
        patch_artist=True,
        zorder=5, widths=0.5  # Lower zorder than the data points
    )
    

    y_min, y_max = ax.get_ylim()
    ax.set_ylim(0, y_max)
    

    colors = ['lightgreen', 'darkorange']
    for patch, color in zip(boxplot['boxes'], colors):
        patch.set_facecolor(color)
        

    for element in ['whiskers', 'caps', 'medians', 'fliers']:
        for item in boxplot[element]:
            item.set(color='black')
    
    # plot actual data points on the chart
    for i, (data_points, color, marker) in enumerate(zip(
        [correct_rt, incorrect_rt], 
        ['black', 'black'],
        ['o', 'o'] 
    )):
        # generate random x values for jitter
        x = np.random.normal(i+1, 0.04, size=len(data_points))

        ax.scatter(x, data_points, alpha=1, s=25, color=color, 
                   marker=marker, edgecolor='black', linewidth=0.5, zorder=10)
    
    if subject_id:
        ax.set_title(f'Subject {subject_id}: Reaction Time Comparison - Correct vs. Incorrect Answers', color='black')
    else:
        ax.set_title(f'All Subjects: Reaction Time Comparison - Correct vs. Incorrect Answers', color='black')
    
    ax.set_ylabel('Reaction Time (s)', fontsize=16, color='black')
    
    stats_text = (
        f'Correct (n={len(correct_rt)}):\n'
        f'Mean = {correct_rt.mean():.3f}s\n'
        f'SD = {correct_rt.std():.3f}s\n\n'
        f'Incorrect (n={len(incorrect_rt)}):\n'
        f'Mean = {incorrect_rt.mean():.3f}s\n'
        f'SD = {incorrect_rt.std():.3f}s\n\n'
        f't-test: p = {p_value:.4f}'
    )
    
    ax.text(0.5, 0.98, stats_text,
            horizontalalignment='center',
            verticalalignment='top',
            transform=ax.transAxes,
            bbox=dict(facecolor='white', alpha=0.8, boxstyle='round,pad=0.5'),
            fontsize=13, zorder=15)
    
    ax.yaxis.grid(True, linestyle='--', alpha=0.3, color='black')
    
    for spine in ax.spines.values():
        spine.set_color('black')
    
    ax.tick_params(axis='both', colors='black', labelsize=14)
    
    plt.tight_layout()
    
    pdf.savefig(fig)
    
    plt.close()
    
    return {
        'correct_mean': correct_rt.mean(),
        'correct_std': correct_rt.std(),
        'incorrect_mean': incorrect_rt.mean(),
        'incorrect_std': incorrect_rt.std(),
        't_stat': t_stat,
        'p_value': p_value,
        'n_correct': len(correct_rt),
        'n_incorrect': len(incorrect_rt)
    }

# load data for each subject
subject_data = {}
all_data = []

for subject_num in [2, 4, 5]:
    file_path = os.path.join(base_path, f"subject{subject_num}_matched.csv")
    
    if os.path.exists(file_path):
        print(f"Loading data for Subject {subject_num}...")
        df = pd.read_csv(file_path)
        
        # store data for individual subject analysis
        subject_data[subject_num] = df
        
        # add subject identifier and append to combined data
        df['subject'] = subject_num
        all_data.append(df)
    else:
        print(f"Warning: File not found for Subject {subject_num}")

# combine all data
if all_data:
    combined_data = pd.concat(all_data, ignore_index=True)
    print(f"Combined data has {len(combined_data)} rows")
else:
    print("No data found for any subject")
    combined_data = None

# analyze each subject individually
results = {}
for subject_num, data in subject_data.items():
    print(f"\nAnalyzing Subject {subject_num}...")
    results[subject_num] = analyze_reaction_times(data, subject_num)

# analyze combined data
if combined_data is not None:
    print("\nAnalyzing combined data...")
    results['combined'] = analyze_reaction_times(combined_data)

pdf.close()

print("\nSummary of T-test Results:")
print("-" * 80)
print(f"{'Subject':<10} {'Correct RT':<20} {'Incorrect RT':<20} {'t-stat':<10} {'p-value':<10} {'Significant?':<12}")
print("-" * 80)

for subject, result in results.items():
    if result:
        subject_label = subject if subject == 'combined' else f"Subject {subject}"
        correct_rt = f"{result['correct_mean']:.3f} ± {result['correct_std']:.3f}"
        incorrect_rt = f"{result['incorrect_mean']:.3f} ± {result['incorrect_std']:.3f}"
        significant = "Yes (p<0.05)" if result['p_value'] < 0.05 else "No"
        
        print(f"{subject_label:<10} {correct_rt:<20} {incorrect_rt:<20} {result['t_stat']:<10.3f} {result['p_value']:<10.4f} {significant:<12}")

print("-" * 80)
print(f"\nPlots saved to: {pdf_path}")

Loading data for Subject 2...
Loading data for Subject 4...
Loading data for Subject 5...
Combined data has 66 rows

Analyzing Subject 2...

Analyzing Subject 4...

Analyzing Subject 5...

Analyzing combined data...


  boxplot = ax.boxplot(
  boxplot = ax.boxplot(
  boxplot = ax.boxplot(
  boxplot = ax.boxplot(



Summary of T-test Results:
--------------------------------------------------------------------------------
Subject    Correct RT           Incorrect RT         t-stat     p-value    Significant?
--------------------------------------------------------------------------------
Subject 2  19.589 ± 5.881       19.312 ± 9.647       0.067      0.9479     No          
Subject 4  18.645 ± 7.736       17.031 ± 8.658       0.302      0.7849     No          
Subject 5  16.810 ± 7.664       19.055 ± 2.105       -0.842     0.4273     No          
combined   18.289 ± 7.211       18.698 ± 8.113       -0.156     0.8777     No          
--------------------------------------------------------------------------------

Plots saved to: /Users/zachary/Desktop/Princeton/2024-25/NEU 502B/neu502b-2025/fMRI Experiment/Behavior files/reaction_time_analysis.pdf


Math vs. Language

In [None]:
# pdf to save plots
pdf_path = os.path.join(base_path, 'math_language_analysis.pdf')
pdf = PdfPages(pdf_path)

math_questions = [
    "A gardener has found",
    "A satellite completes",
    "An orchestra sells",
    "Emma's class started",
    "Liam usually walks",
    "A soon-to-be husband",
    "James arrived at the café a few minutes early for his meeting. He ordered a coffee and one",
    "A small zoo has",
    "When the museum tour began, Daniel walked",
    "A tired poet writes"
]

language_questions = [
    "What does a rose",
    "What does this saying",
    "Which next sentence best fits after The music",
    "During a class discussion",
    "As Liam walked",
    "It is a truth universally",
    "James arrived at the café a few minutes early for his meeting. He ordered a coffee and sat",
    "Which next sentence best fits after The newborn",
    "When the museum tour began, Daniel stayed",
    "What does the line Once"
]

# determine question type
def get_question_type(question_text):
    if pd.isna(question_text) or not isinstance(question_text, str):
        return None
    
    # check if it's a math question
    for math_start in math_questions:
        if math_start in question_text:
            return "math"
    
    # check if it's a language question
    for lang_start in language_questions:
        if lang_start in question_text:
            return "language"
    
    # if no match is found after checking both
    return None

#  analyze and plot accuracy comparison
def plot_accuracy_comparison(data, subject_id=None):
    """""
    creates a bar plot comparing math vs language accuracy
    """""

    data['question_type'] = data['question_text'].apply(get_question_type)
    
    # filter to only include rows with valid question_type and is_correct
    filtered_data = data.dropna(subset=['question_type', 'is_correct'])
    
    # get math and language subsets
    math_data = filtered_data[filtered_data['question_type'] == 'math']
    language_data = filtered_data[filtered_data['question_type'] == 'language']
    
    # calculate accuracy for each type
    math_accuracy = math_data['is_correct'].mean() if len(math_data) > 0 else 0
    language_accuracy = language_data['is_correct'].mean() if len(language_data) > 0 else 0
    
    # counts
    math_count = len(math_data)
    language_count = len(language_data)
    
    # correct counts
    math_correct = math_data['is_correct'].sum()
    language_correct = language_data['is_correct'].sum()
    
    if math_count > 0 and language_count > 0:
        # for proportions, chi-square test or Fisher's exact test
        # create contingency table
        contingency = np.array([
            [math_correct, math_count - math_correct],
            [language_correct, language_count - language_correct]
        ])
        
        # use Fisher's exact test for small samples
        if np.min(contingency) < 5:
            _, p_value = stats.fisher_exact(contingency)
            test_name = "Fisher's exact test"
        else:
            _, p_value, _, _ = stats.chi2_contingency(contingency)
            test_name = "Chi-square test"
    else:
        p_value = None
        test_name = "Not enough data"
    
    fig, ax = plt.subplots(figsize=(8, 6))
    
    bar_positions = [1, 2]
    bar_width = 0.6
    
    bars = ax.bar(bar_positions, 
                  [math_accuracy, language_accuracy], 
                  width=bar_width,
                  color=['lightskyblue', 'gold'],
                  edgecolor='black',
                  linewidth=1.5,
                  zorder=3)
    
    # standard errors for error bars
    math_se = np.sqrt(math_accuracy * (1 - math_accuracy) / math_count) if math_count > 0 else 0
    lang_se = np.sqrt(language_accuracy * (1 - language_accuracy) / language_count) if language_count > 0 else 0
    
    ax.errorbar(bar_positions, 
                [math_accuracy, language_accuracy],
                yerr=[math_se, lang_se],
                fmt='none',
                ecolor='black',
                capsize=5,
                elinewidth=1.5,
                capthick=1.5,
                zorder=10)
    
    ax.set_ylim(0, 1.1)
    
    ax.set_ylabel('Proportion Correct', fontsize=15, color='black')
    
    if subject_id:
        ax.set_title(f'Subject {subject_id}: Math vs. Language Accuracy', fontsize=14, color='black')
    else:
        ax.set_title(f'All Subjects: Math vs. Language Accuracy', fontsize=14, color='black')
    
    ax.set_xticks(bar_positions)
    ax.set_xticklabels(['Math', 'Language'], fontsize=14, color='black')
    
    ax.yaxis.grid(True, linestyle='--', alpha=0.3, color='black')
    
    for spine in ax.spines.values():
        spine.set_color('black')
    
    ax.tick_params(axis='both', colors='black',labelsize=14)
    
    stats_text = (
        f'Math (n={math_count}):\n'
        f'Accuracy = {math_accuracy:.3f}\n\n'
        f'Language (n={language_count}):\n'
        f'Accuracy = {language_accuracy:.3f}\n\n'
        f'{test_name}:\np = {p_value:.4f}' if p_value is not None else 'Not enough data for statistical test'
    )
    
    ax.text(0.5, 0.98, stats_text,
            horizontalalignment='center',
            verticalalignment='top',
            transform=ax.transAxes,
            bbox=dict(facecolor='white', alpha=0.8, boxstyle='round,pad=0.5'),
            fontsize=13, zorder=15)
    
    plt.tight_layout()
    
    pdf.savefig(fig)
    plt.close()
    
    return {
        'math_accuracy': math_accuracy,
        'language_accuracy': language_accuracy,
        'math_count': math_count,
        'language_count': language_count,
        'p_value': p_value,
        'test_name': test_name
    }

# function to plot reaction time comparison
def plot_rt_comparison(data, subject_id=None):
    """""
    creates boxplot comparing math vs language reaction times
    """""
    # add question type to data
    data['question_type'] = data['question_text'].apply(get_question_type)
    
    # filter to only include rows with valid question_type and response_time
    filtered_data = data.dropna(subset=['question_type', 'response_time'])
    
    # math and language subsets
    math_data = filtered_data[filtered_data['question_type'] == 'math']
    language_data = filtered_data[filtered_data['question_type'] == 'language']
    
    # reaction times
    math_rt = math_data['response_time'].values if len(math_data) > 0 else np.array([])
    language_rt = language_data['response_time'].values if len(language_data) > 0 else np.array([])
    
    # perform t-test
    if len(math_rt) > 1 and len(language_rt) > 1:
        t_stat, p_value = stats.ttest_ind(math_rt, language_rt, equal_var=False)
    else:
        t_stat = None
        p_value = None
    
    fig, ax = plt.subplots(figsize=(8, 6))
    
    boxplot = ax.boxplot(
        [math_rt, language_rt],
        labels=['Math', 'Language'],
        patch_artist=True,
        zorder=5, widths=0.5
    )
    
    colors = ['lightskyblue', 'gold']
    for patch, color in zip(boxplot['boxes'], colors):
        patch.set_facecolor(color)
    
    for element in ['whiskers', 'caps', 'medians', 'fliers']:
        for item in boxplot[element]:
            item.set(color='black')
    
    for i, data_points in enumerate([math_rt, language_rt]):
        if len(data_points) > 0:
            # Add some jitter to x position
            x = np.random.normal(i+1, 0.04, size=len(data_points))
            # Plot data points with standard black dots
            ax.scatter(x, data_points, alpha=1, s=25, color='black', zorder=10)
    
    y_min, y_max = ax.get_ylim()
    ax.set_ylim(0, y_max)
    
    if subject_id:
        ax.set_title(f'Subject {subject_id}: Math vs. Language Reaction Time', fontsize=14, color='black')
    else:
        ax.set_title(f'All Subjects: Math vs. Language Reaction Time', fontsize=14, color='black')
    
    ax.set_ylabel('Reaction Time (s)', fontsize=16, color='black')
    
    stats_text = (
        f'Math (n={len(math_rt)}):\n'
        f'Mean = {np.mean(math_rt):.3f}s\n'
        f'SD = {np.std(math_rt):.3f}s\n\n'
        f'Language (n={len(language_rt)}):\n'
        f'Mean = {np.mean(language_rt):.3f}s\n'
        f'SD = {np.std(language_rt):.3f}s\n\n'
        f't-test: p = {p_value:.4f}' if p_value is not None else 'Not enough data for t-test'
    )
    
    ax.text(0.5, 0.98, stats_text,
            horizontalalignment='center',
            verticalalignment='top',
            transform=ax.transAxes,
            bbox=dict(facecolor='white', alpha=0.8, boxstyle='round,pad=0.5'),
            fontsize=13, zorder=15)
    
    ax.yaxis.grid(True, linestyle='--', alpha=0.3, color='black')
    
    for spine in ax.spines.values():
        spine.set_color('black')
    
    ax.tick_params(axis='both', colors='black', labelsize=14)
    
    plt.tight_layout()
    
    # Save the figure to PDF
    pdf.savefig(fig)
    plt.close()
    
    return {
        'math_rt_mean': np.mean(math_rt) if len(math_rt) > 0 else 0,
        'math_rt_std': np.std(math_rt) if len(math_rt) > 0 else 0,
        'language_rt_mean': np.mean(language_rt) if len(language_rt) > 0 else 0,
        'language_rt_std': np.std(language_rt) if len(language_rt) > 0 else 0,
        'math_rt_count': len(math_rt),
        'language_rt_count': len(language_rt),
        't_stat': t_stat,
        'p_value': p_value
    }

# Load data for each subject
subject_data = {}
all_data = []

print("Starting Math vs. Language Analysis...")

for subject_num in [2, 4, 5]:
    file_path = os.path.join(base_path, f"subject{subject_num}_matched.csv")
    
    if os.path.exists(file_path):
        print(f"Loading data for Subject {subject_num}...")
        df = pd.read_csv(file_path)
        
        # Store data for individual subject analysis
        subject_data[subject_num] = df
        
        # Add subject identifier and append to combined data
        df['subject'] = subject_num
        all_data.append(df)
    else:
        print(f"Warning: File not found for Subject {subject_num}")

# Combine all data
if all_data:
    combined_data = pd.concat(all_data, ignore_index=True)
    print(f"Combined data has {len(combined_data)} rows")
else:
    print("No data found for any subject")
    combined_data = None

# Create dictionaries to store results
accuracy_results = {}
rt_results = {}

# Analyze each subject individually
print("\n=== Analyzing Math vs. Language Question Performance ===")
for subject_num, data in subject_data.items():
    print(f"\nAnalyzing Subject {subject_num}...")
    
    # Accuracy analysis
    print(f"  Analyzing accuracy for Subject {subject_num}...")
    accuracy_results[subject_num] = plot_accuracy_comparison(data, subject_num)
    
    # Reaction time analysis
    print(f"  Analyzing reaction times for Subject {subject_num}...")
    rt_results[subject_num] = plot_rt_comparison(data, subject_num)

# Analyze combined data
if combined_data is not None:
    print("\nAnalyzing combined data (all subjects)...")
    accuracy_results['all'] = plot_accuracy_comparison(combined_data)
    rt_results['all'] = plot_rt_comparison(combined_data)

# Close the PDF file
pdf.close()

# Print summary of accuracy results
print("\nSummary of Math vs. Language Accuracy Analysis:")
print("-" * 100)
print(f"{'Subject':<10} {'Math Acc.':<10} {'Lang Acc.':<10} {'Math n':<8} {'Lang n':<8} {'Test':<15} {'p-value':<10} {'Significant?':<12}")
print("-" * 100)

for subject, result in accuracy_results.items():
    if result:
        subject_label = "All" if subject == 'all' else f"Subject {subject}"
        math_acc = f"{result['math_accuracy']:.3f}"
        lang_acc = f"{result['language_accuracy']:.3f}"
        significant = "Yes (p<0.05)" if result['p_value'] is not None and result['p_value'] < 0.05 else "No"
        p_val = f"{result['p_value']:.4f}" if result['p_value'] is not None else "N/A"
        
        print(f"{subject_label:<10} {math_acc:<10} {lang_acc:<10} {result['math_count']:<8} {result['language_count']:<8} {result['test_name']:<15} {p_val:<10} {significant:<12}")

# Print summary of reaction time results
print("\nSummary of Math vs. Language Reaction Time Analysis:")
print("-" * 100)
print(f"{'Subject':<10} {'Math RT':<15} {'Lang RT':<15} {'Math n':<8} {'Lang n':<8} {'t-stat':<10} {'p-value':<10} {'Significant?':<12}")
print("-" * 100)

for subject, result in rt_results.items():
    if result:
        subject_label = "All" if subject == 'all' else f"Subject {subject}"
        math_rt = f"{result['math_rt_mean']:.3f} ± {result['math_rt_std']:.3f}"
        lang_rt = f"{result['language_rt_mean']:.3f} ± {result['language_rt_std']:.3f}"
        significant = "Yes (p<0.05)" if result['p_value'] is not None and result['p_value'] < 0.05 else "No"
        t_stat = f"{result['t_stat']:.3f}" if result['t_stat'] is not None else "N/A"
        p_val = f"{result['p_value']:.4f}" if result['p_value'] is not None else "N/A"
        
        print(f"{subject_label:<10} {math_rt:<15} {lang_rt:<15} {result['math_rt_count']:<8} {result['language_rt_count']:<8} {t_stat:<10} {p_val:<10} {significant:<12}")

print("-" * 100)
print(f"\nAll plots saved to: {pdf_path}")
print("Math vs. Language Analysis complete!")

Starting Math vs. Language Analysis...
Loading data for Subject 2...
Loading data for Subject 4...
Loading data for Subject 5...
Combined data has 66 rows

=== Analyzing Math vs. Language Question Performance ===

Analyzing Subject 2...
  Analyzing accuracy for Subject 2...
  Analyzing reaction times for Subject 2...

Analyzing Subject 4...
  Analyzing accuracy for Subject 4...
  Analyzing reaction times for Subject 4...

Analyzing Subject 5...
  Analyzing accuracy for Subject 5...


  boxplot = ax.boxplot(
  boxplot = ax.boxplot(
  boxplot = ax.boxplot(


  Analyzing reaction times for Subject 5...

Analyzing combined data (all subjects)...

Summary of Math vs. Language Accuracy Analysis:
----------------------------------------------------------------------------------------------------
Subject    Math Acc.  Lang Acc.  Math n   Lang n   Test            p-value    Significant?
----------------------------------------------------------------------------------------------------
Subject 2  0.714      0.250      7        4        Fisher's exact test 0.2424     No          
Subject 4  0.889      0.833      9        6        Fisher's exact test 1.0000     No          
Subject 5  0.875      0.667      8        3        Fisher's exact test 0.4909     No          
All        0.833      0.615      24       13       Fisher's exact test 0.2293     No          

Summary of Math vs. Language Reaction Time Analysis:
----------------------------------------------------------------------------------------------------
Subject    Math RT         Lang RT  

  boxplot = ax.boxplot(
