In [1]:
import pandas as pd
import numpy as np
import os

### Exclude participants based on Motion QC criteria

In [3]:
# Define the folder containing the confounds files
folder_path = "/pscratch/sd/p/pakmasha/ENIGMA_unzip/Rome_SLF/confounds"

# Initialize an empty list to store excluded participants
excluded = []

# Loop over all TSV files in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith("_confounds.tsv"):  # Process only confounds files
    # if file_name.endswith("confounds_timeseries.tsv"):  # for Vancouver_BCCHR
        
        file_path = os.path.join(folder_path, file_name)
        
        # Read the confounds file
        df = pd.read_csv(file_path, sep="\t")
        
        # Apply exclusion criteria
        max_translation = df[["trans_x", "trans_y", "trans_z"]].abs().max().max()
        max_rotation = df[["rot_x", "rot_y", "rot_z"]].abs().max().max() * (180 / np.pi)  # Convert to degrees
        exclude_motion = max_translation > 4 or max_rotation > 4

        average_fd = df["framewise_displacement"].mean()
        exclude_fd = average_fd > 0.3

        motion_unaffected = (df["framewise_displacement"] < 0.25).sum()
        exclude_volumes = motion_unaffected < 100

        exclude_participant = exclude_motion or exclude_fd or exclude_volumes

        # Print results for the current participant
        print(f"Participant {file_name}: Excluded = {exclude_participant}")
        
        # Extract subject name from the file_name
        subject_name = file_name.split("_confounds_timeseries.tsv")[0]
        # subject_name = file_name.split("_")[0] # for Vancouver_BCCHR
        
        # If the participant is excluded, add their name to the list
        if exclude_participant:
            excluded.append(subject_name)
            print("exclude_motion:", exclude_motion, "(", max_translation, max_rotation, ")")
            print("exclude_fd:", exclude_fd, "(", average_fd, ")")
            print("exclude_volumes:", exclude_volumes, "(", motion_unaffected, ")")
            
# Print the excluded participants
print("Excluded participants:", excluded)        
print(len(excluded))
            

Participant sub-subAOCD016_confounds.tsv: Excluded = False
Participant sub-subAOCD002_confounds.tsv: Excluded = False
Participant sub-subAOCD009_confounds.tsv: Excluded = False
Participant sub-subAOCD017_confounds.tsv: Excluded = False
Participant sub-subAOCD013_confounds.tsv: Excluded = True
exclude_motion: False ( 0.593259 0.9571431854998941 )
exclude_fd: False ( 0.29214418537430165 )
exclude_volumes: True ( 87 )
Participant sub-subAHC038_confounds.tsv: Excluded = False
Participant sub-subAOCD005_confounds.tsv: Excluded = True
exclude_motion: False ( 1.17013 2.109687897451204 )
exclude_fd: True ( 0.5381576233240224 )
exclude_volumes: True ( 43 )
Participant sub-subAOCD015_confounds.tsv: Excluded = False
Participant sub-subAOCD007_confounds.tsv: Excluded = False
Participant sub-subAHC032_confounds.tsv: Excluded = False
Participant sub-subAOCD006_confounds.tsv: Excluded = False
Participant sub-subAOCD012_confounds.tsv: Excluded = True
exclude_motion: False ( 0.583032 0.8138922775612857

### Format subject IDs if needed

In [4]:
formatted = []

for subject_id in excluded:
    add = "sub-" + subject_id
    formatted.append(add)
    
print(f"Formatted IDs: {formatted}")

Formatted IDs: ['sub-sub-subAOCD013_confounds.tsv', 'sub-sub-subAOCD005_confounds.tsv', 'sub-sub-subAOCD012_confounds.tsv']
