In [1]:
%cd ../

/home/zulqarnain/Code/ntfa_rest_template/ntfa_degeneracy


In [2]:
import collections
import logging
import htfa_torch.niidb as niidb
import htfa_torch.utils as utils
import glob
import os
import webdataset as wds
import torch
import pandas as pd
import numpy as np
import itertools

In [3]:
def sentinel(f):
    return f if f is not None else 0.0

In [4]:
%matplotlib inline

In [5]:
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S',
                    level=logging.INFO)

In [6]:
tar_file = 'data/mini_aging_site1_blocks.tar'
mask_file = '/data/zulqarnain/fmri_data/aging_data_fmriprep/masks/GM_fmriprep_spreng_rest_mask_N7.nii.gz'
nifti_files = '/data/zulqarnain/fmri_data/aging_data_fmriprep/tedana/clean_nifti_files/'
participant_file = '/data/zulqarnain/fmri_data/aging_data_fmriprep/participants.tsv'
FILENAME_TEMPLATE = 'sub-%03d_ses-%01d_task-rest_space-MNI152NLin2009cAsym_res-2_desc-optcomDenoised_bold.nii.gz'
def aging_filename(subject, ses):
    return nifti_files + (FILENAME_TEMPLATE % (subject, ses))

In [7]:
subjects = [193, 194, 195, 197, 154, 163, 220]
sessions = [1, 2]
NUM_IGNORE_TRS = 2

In [8]:
subjects_sessions = [x for x in itertools.product(subjects, sessions)]
session_lengths = {key:[] for key in subjects_sessions}

In [9]:
existed = os.path.isfile(tar_file)
sink = wds.TarWriter(tar_file)


OVERWRITE = True

In [10]:
participants_db = pd.read_csv(participant_file, sep='\t')

In [11]:
participants_db.head()

Unnamed: 0,participant_id,sex,site,agegroup,age,education,openneuro,flair_slicediff,rest_voldiff
0,sub-01,F,1,Y,21,15.0,0,1,0
1,sub-02,F,1,O,73,16.0,0,0,0
2,sub-03,F,1,O,77,16.0,0,0,0
3,sub-04,M,1,O,68,16.0,0,0,0
4,sub-05,F,1,O,60,22.0,0,0,0


In [12]:
if not existed or OVERWRITE:
    data_files = utils.sorted_glob('/data/zulqarnain/fmri_data/aging_data_fmriprep/tedana/clean_nifti_files/*.nii.gz')
    
    total_trs = 0
    metadata = {
        'blocks': []
    }
    block_index = 0
    for (sub, ses) in subjects_sessions:
#         session_length = session_lengths[(sub, ses)]
        file = aging_filename(sub, ses)
        session = niidb.FMriActivationBlock(zscore=True)
        session.filename = file
        session.subject = sub
        session.task = 'rest'
        session.run = ses
        session.load()
        session_length = session.activations.shape[0]
        del session
#         session_lengths[(sub, ses)] = session.activations.shape[0]        
        for s in range(NUM_IGNORE_TRS, session_length, 30):
            block = niidb.FMriActivationBlock(zscore=True)
#             block = session
            block.filename = file
            block.mask = mask_file
            block.subject = sub
            block.task = 'rest'
            block.run = ses
            block.block = block_index
            block_index += 1

            block.start_time = s
            if s < (session_length - 30):
                block.end_time = s + 30
            else:
                block.end_time = session_length
            participant_id_ind = 'sub-' + str(sub)
            block.individual_differences = {
                'sex': participants_db[participants_db['participant_id'] == participant_id_ind]['sex'].to_numpy()[0],
                'site': participants_db[participants_db['participant_id'] == participant_id_ind]["site"].to_numpy()[0],
                'agegroup': participants_db[participants_db['participant_id'] == participant_id_ind]["agegroup"].to_numpy()[0],
                'age': participants_db[participants_db['participant_id'] == participant_id_ind]["age"].to_numpy()[0],
                'education': participants_db[participants_db['participant_id'] == participant_id_ind]["education"].to_numpy()[0],
                'openneuro':  participants_db[participants_db['participant_id'] == participant_id_ind]["openneuro"].to_numpy()[0],
                'flairslicediff': participants_db[participants_db['participant_id'] == participant_id_ind]["flair_slicediff"].to_numpy()[0],
                'rest_voldiff': participants_db[participants_db['participant_id'] == participant_id_ind]["rest_voldiff"].to_numpy()[0]
            }
            block.load()
            metadata['blocks'].append(block.wds_metadata())
            for vals in block.format_wds():
                sink.write(vals)
            block_trs = (block.end_time - block.start_time)
            total_trs += block_trs

            logging.info('Block %d %s of run %d started at %f, ended at %f, contained %d TRs', block_index,
                         block.task, block.run, sentinel(block.start_time), sentinel(block.end_time),
                         block_trs)
    metadata['voxel_locations'] = block.locations
    metadata['num_times'] = total_trs
    torch.save(metadata, tar_file + '.meta')
    logging.info('Recorded metadata, including voxel locations')

sink.close()

09/15/2023 10:26:11 Loading Nifti image /data/zulqarnain/fmri_data/aging_data_fmriprep/tedana/clean_nifti_files/sub-193_ses-1_task-rest_space-MNI152NLin2009cAsym_res-2_desc-optcomDenoised_bold.nii.gz with mask None (zscore=True, smooth=None, zscore_by_rest=False)
09/15/2023 10:26:23 Loading Nifti image /data/zulqarnain/fmri_data/aging_data_fmriprep/tedana/clean_nifti_files/sub-193_ses-1_task-rest_space-MNI152NLin2009cAsym_res-2_desc-optcomDenoised_bold.nii.gz with mask /data/zulqarnain/fmri_data/aging_data_fmriprep/masks/GM_fmriprep_spreng_rest_mask_N7.nii.gz (zscore=True, smooth=None, zscore_by_rest=False)
09/15/2023 10:26:30 Block 1 rest of run 1 started at 2.000000, ended at 32.000000, contained 30 TRs
09/15/2023 10:26:30 Block 2 rest of run 1 started at 32.000000, ended at 62.000000, contained 30 TRs
09/15/2023 10:26:31 Block 3 rest of run 1 started at 62.000000, ended at 92.000000, contained 30 TRs
09/15/2023 10:26:31 Block 4 rest of run 1 started at 92.000000, ended at 122.000000

09/15/2023 10:28:25 Loading Nifti image /data/zulqarnain/fmri_data/aging_data_fmriprep/tedana/clean_nifti_files/sub-197_ses-1_task-rest_space-MNI152NLin2009cAsym_res-2_desc-optcomDenoised_bold.nii.gz with mask /data/zulqarnain/fmri_data/aging_data_fmriprep/masks/GM_fmriprep_spreng_rest_mask_N7.nii.gz (zscore=True, smooth=None, zscore_by_rest=False)
09/15/2023 10:28:32 Block 43 rest of run 1 started at 2.000000, ended at 32.000000, contained 30 TRs
09/15/2023 10:28:32 Block 44 rest of run 1 started at 32.000000, ended at 62.000000, contained 30 TRs
09/15/2023 10:28:32 Block 45 rest of run 1 started at 62.000000, ended at 92.000000, contained 30 TRs
09/15/2023 10:28:32 Block 46 rest of run 1 started at 92.000000, ended at 122.000000, contained 30 TRs
09/15/2023 10:28:32 Block 47 rest of run 1 started at 122.000000, ended at 152.000000, contained 30 TRs
09/15/2023 10:28:32 Block 48 rest of run 1 started at 152.000000, ended at 182.000000, contained 30 TRs
09/15/2023 10:28:33 Block 49 rest

09/15/2023 10:30:33 Block 85 rest of run 1 started at 2.000000, ended at 32.000000, contained 30 TRs
09/15/2023 10:30:33 Block 86 rest of run 1 started at 32.000000, ended at 62.000000, contained 30 TRs
09/15/2023 10:30:33 Block 87 rest of run 1 started at 62.000000, ended at 92.000000, contained 30 TRs
09/15/2023 10:30:33 Block 88 rest of run 1 started at 92.000000, ended at 122.000000, contained 30 TRs
09/15/2023 10:30:33 Block 89 rest of run 1 started at 122.000000, ended at 152.000000, contained 30 TRs
09/15/2023 10:30:33 Block 90 rest of run 1 started at 152.000000, ended at 182.000000, contained 30 TRs
09/15/2023 10:30:34 Block 91 rest of run 1 started at 182.000000, ended at 204.000000, contained 22 TRs
09/15/2023 10:30:34 Loading Nifti image /data/zulqarnain/fmri_data/aging_data_fmriprep/tedana/clean_nifti_files/sub-220_ses-2_task-rest_space-MNI152NLin2009cAsym_res-2_desc-optcomDenoised_bold.nii.gz with mask None (zscore=True, smooth=None, zscore_by_rest=False)
09/15/2023 10:30

In [13]:
logging.info('Finished building NiiDb out of Spreng dataset')

09/15/2023 10:30:55 Finished building NiiDb out of Spreng dataset
