In [1]:
%cd ../

/home/zulqarnain/Code/ntfa_rest/ntfa_degeneracy


In [2]:
import collections
import logging
import htfa_torch.niidb as niidb
import htfa_torch.utils as utils
import glob
import os
import webdataset as wds
import torch
import pandas as pd

In [3]:
def sentinel(f):
    return f if f is not None else 0.0

In [4]:
%matplotlib inline

In [5]:
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S',
                    level=logging.INFO)

In [6]:
tar_file = 'data/mini_aging_site1.tar'
mask_file = '/data/zulqarnain/fmri_data/aging_data_fmriprep/masks/GM_fmriprep_spreng_rest_mask_N7.nii.gz'
nifti_files = '/data/zulqarnain/fmri_data/aging_data_fmriprep/tedana/clean_nifti_files/'
participant_file = '/data/zulqarnain/fmri_data/aging_data_fmriprep/participants.tsv'

existed = os.path.isfile(tar_file)
sink = wds.TarWriter(tar_file)

FILENAME_TEMPLATE = 'sub-%03d_ses-%01d_task-rest_space-MNI152NLin2009cAsym_res-2_desc-optcomDenoised_bold.nii.gz'
def aging_filename(subject, ses):
    return nifti_files + (FILENAME_TEMPLATE % (subject, ses))

OVERWRITE = True
subjects = [193, 194, 195, 197, 154, 163, 220]
sessions = [1, 2]



In [7]:
participants_db = pd.read_csv(participant_file, sep='\t')

In [8]:
participants_db.head()

Unnamed: 0,participant_id,sex,site,agegroup,age,education,openneuro,flair_slicediff,rest_voldiff
0,sub-01,F,1,Y,21,15.0,0,1,0
1,sub-02,F,1,O,73,16.0,0,0,0
2,sub-03,F,1,O,77,16.0,0,0,0
3,sub-04,M,1,O,68,16.0,0,0,0
4,sub-05,F,1,O,60,22.0,0,0,0


In [9]:
participants_db[participants_db['participant_id'] == 'sub-220']['sex']

219    F
Name: sex, dtype: object

In [10]:
participants_db.query("participant_id == 'sub-220'")["sex"].to_numpy()

09/05/2023 12:25:00 Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
09/05/2023 12:25:00 NumExpr defaulting to 8 threads.


array(['F'], dtype=object)

In [11]:
if not existed or OVERWRITE:
    data_files = utils.sorted_glob('/data/zulqarnain/fmri_data/aging_data_fmriprep/tedana/clean_nifti_files/*.nii.gz')
    
    total_trs = 0
    metadata = {
        'blocks': []
    }
    block_index = 0
    for sub in subjects:
        for ses in sessions:
            file = aging_filename(sub, ses)
            block = niidb.FMriActivationBlock(zscore=True)
            block.block = block_index
            block_index += 1
            block.filename = file
            block.subject = sub
            block.task = 'rest'
            block.mask = mask_file
            participant_id_ind = 'sub-' + str(sub)
            block.individual_differences = {
                'sex': participants_db[participants_db['participant_id'] == participant_id_ind]['sex'].to_numpy()[0],
                'site': participants_db[participants_db['participant_id'] == participant_id_ind]["site"].to_numpy()[0],
                'agegroup': participants_db[participants_db['participant_id'] == participant_id_ind]["agegroup"].to_numpy()[0],
                'education': participants_db[participants_db['participant_id'] == participant_id_ind]["education"].to_numpy()[0],
                'openneuro':  participants_db[participants_db['participant_id'] == participant_id_ind]["openneuro"].to_numpy()[0],
                'flairslicediff': participants_db[participants_db['participant_id'] == participant_id_ind]["flair_slicediff"].to_numpy()[0],
                'rest_voldiff': participants_db[participants_db['participant_id'] == participant_id_ind]["rest_voldiff"].to_numpy()[0]
            }
            block.load()
            metadata['blocks'].append(block.wds_metadata())
            for vals in block.format_wds():
                sink.write(vals)
            block_trs = (block.end_time - block.start_time)
            total_trs += block_trs

            logging.info('Block %d %s of run %d started at %f, ended at %f, contained %d TRs', block_index,
                         block.task, block.run, sentinel(block.start_time), sentinel(block.end_time),
                         block_trs)
    metadata['voxel_locations'] = block.locations
    metadata['num_times'] = total_trs
    torch.save(metadata, tar_file + '.meta')
    logging.info('Recorded metadata, including voxel locations')

sink.close()

09/05/2023 12:25:00 Loading Nifti image /data/zulqarnain/fmri_data/aging_data_fmriprep/tedana/clean_nifti_files/sub-193_ses-1_task-rest_space-MNI152NLin2009cAsym_res-2_desc-optcomDenoised_bold.nii.gz with mask /data/zulqarnain/fmri_data/aging_data_fmriprep/masks/GM_fmriprep_spreng_rest_mask_N7.nii.gz (zscore=True, smooth=None, zscore_by_rest=False)
09/05/2023 12:25:08 Block 1 rest of run 0 started at 0.000000, ended at 204.000000, contained 204 TRs
09/05/2023 12:25:08 Loading Nifti image /data/zulqarnain/fmri_data/aging_data_fmriprep/tedana/clean_nifti_files/sub-193_ses-2_task-rest_space-MNI152NLin2009cAsym_res-2_desc-optcomDenoised_bold.nii.gz with mask /data/zulqarnain/fmri_data/aging_data_fmriprep/masks/GM_fmriprep_spreng_rest_mask_N7.nii.gz (zscore=True, smooth=None, zscore_by_rest=False)
09/05/2023 12:25:17 Block 2 rest of run 0 started at 0.000000, ended at 204.000000, contained 204 TRs
09/05/2023 12:25:17 Loading Nifti image /data/zulqarnain/fmri_data/aging_data_fmriprep/tedana/

In [12]:
logging.info('Finished building NiiDb out of synthetic dataset')

09/05/2023 12:26:54 Finished building NiiDb out of synthetic dataset


In [13]:
block.locations

tensor([[-68., -46.,  -2.],
        [-68., -46.,   0.],
        [-68., -46.,   2.],
        ...,
        [ 70., -16., -12.],
        [ 70., -16., -10.],
        [ 70., -14., -12.]])

In [14]:
 participants_db[participants_db['participant_id'] == participant_id_ind]["sex"].to_numpy()[0]

'F'