In [None]:
from google.colab import drive
drive.mount('/content/drive')

import scipy.io as sio
import pandas as pd
import numpy as np
import os

pre_stress_folder = '/content/drive/My Drive/PRE'
post_stress_folder = '/content/drive/My Drive/POST'


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [32]:
def process_mat_file(mat_file_path, stress_cond):
    import scipy.io as sio
    import numpy as np
    import pandas as pd
    import os

    mat = sio.loadmat(mat_file_path, squeeze_me=True)
    subid = os.path.basename(mat_file_path).replace('.mat', '')

    RTs = np.ravel(mat.get("ReactionTime", np.array([])))
    start_times = np.ravel(mat.get("AbsoluteTrialStartTime", np.array([])))

    if len(RTs) == 0 or len(start_times) == 0:
        print(f"Skipping {subid}: missing ReactionTime or StartTime")
        return pd.DataFrame()

    # Step 1: default zero rewards for all valid trials
    rewards = np.zeros(np.sum(~np.isnan(RTs)))

    # Step 2: Try extracting UserVars.rwrd
    UserVars = mat.get("UserVars", {})
    raw_rewards = []

    if isinstance(UserVars, dict) and 'rwrd' in UserVars:
        raw_rewards = UserVars['rwrd']
    elif hasattr(UserVars, 'dtype') and 'rwrd' in UserVars.dtype.names:
        raw_rewards = UserVars['rwrd']

    # Step 3: Flatten
    if isinstance(raw_rewards, (np.ndarray, list)):
        flat_rewards = []
        for r in raw_rewards:
            if isinstance(r, (list, np.ndarray)):
                flat_rewards.extend(np.ravel(r))
            else:
                flat_rewards.append(r)
        flat_rewards = np.array(flat_rewards).astype(float)

        # Step 4: Align reward length with good trials
        copy_len = min(len(flat_rewards), len(rewards))
        if copy_len > 0:
            rewards[:copy_len] = flat_rewards[:copy_len]

    # Step 5: Slice valid (non-NaN) trials
    valid_idx = np.where(~np.isnan(RTs))[0]
    RTs = RTs[valid_idx]
    start_times = start_times[valid_idx]
    rewards = rewards[:len(valid_idx)]

    # Step 6: Find NaN separator
    original_RTs = np.ravel(mat.get("ReactionTime", np.array([])))
    nan_pos = np.where(np.isnan(original_RTs))[0]
    if len(nan_pos) == 0:
        envs = ['short'] * len(valid_idx)
    else:
        split = nan_pos[0]
        envs = ['short'] * split + ['long'] * (len(valid_idx) - split)

    # Final DataFrame
    return pd.DataFrame({
        'subid': [subid] * len(valid_idx),
        'stress_cond': [stress_cond] * len(valid_idx),
        'env': envs,
        'trial_time': start_times,
        'reaction_time': RTs,
        'reward': rewards
    })



In [36]:
# Define paths to your pre/post .mat files in Drive
pre_folder = '/content/drive/MyDrive/PRE'
post_folder = '/content/drive/MyDrive/POST'

# Get all .mat files
pre_files = sorted(glob.glob(os.path.join(pre_folder, '*.mat')))
post_files = sorted(glob.glob(os.path.join(post_folder, '*.mat')))

# Process and combine
df_pre = pd.concat([process_mat_file(f, 'pre') for f in pre_files], ignore_index=True)
df_post = pd.concat([process_mat_file(f, 'post') for f in post_files], ignore_index=True)
df_all = pd.concat([df_pre, df_post], ignore_index=True)

# Print a preview
print("Combined Data Preview:")
print(df_all.head())
print(f"\nTotal trials: {len(df_all)}")


Combined Data Preview:
             subid stress_cond    env    trial_time  reaction_time    reward
0  31730_prestress         pre  short      0.000000         1094.0  6.862794
1  31730_prestress         pre  short  11233.907955         1283.0  6.315538
2  31730_prestress         pre  short  19343.886757         1198.0  5.592621
3  31730_prestress         pre  short  27910.864160         1163.0  4.974894
4  31730_prestress         pre  short  35838.928248         1234.0  4.362309

Total trials: 923


In [35]:
df_all.to_csv('/content/drive/MyDrive/trial_data_combined_final.csv', index=False)

