In [88]:
import nilearn
import numpy as np
import pandas as pd
import os
import hcp_utils as hcp
import nibabel as nib
import json
from sklearn.impute import SimpleImputer

## Dataset Class

In [10]:
class RawDataset():
    def __init__(self, BIDS_path):
        self.BIDS_path = BIDS_path
        if self.BIDS_path is not None:
            pass
        else:
            raise ValueError("The path to the dataset in BIDS format must be specified (BIDS_path).")
        self.data_description_path = self.BIDS_path + '/dataset_description.json'
        self.participant_data_path = self.BIDS_path + '/participants.tsv'
        self._participant_data = pd.read_csv(self.participant_data_path, sep = '\t')
        self._name = None
        self._data_description = None
        self._subjects = None
        self._group = None

    @property
    def participant_data(self):
        if self._participant_data is None:
            self._participant_data = pd.read_csv(self.participant_data_path, sep = '\t')
        return self._participant_data

    @property
    def subjects(self):
        if self._subjects is None:
            self._subjects = self._participant_data['participant_id'].values
        return self._subjects

    @property
    def group(self):
        if self._group is None:
            self._group = np.unique(self._participant_data['group'].values)
        return self._group
    
    @property
    def data_description(self):
        if self._data_description is None:
            self._data_description = json.load(open(self.data_description_path))
        return self._data_description

    @property
    def name(self):
        if self._name is None:
            self._name = self.data_description['Name']
        return self._name
    
    def __repr__(self):
        return f'Dataset(Name={self.name},\nGroup(s)={self.group},\nSubjects={self.subjects},\nData_Path={self.BIDS_path})'


In [87]:
class FmriPreppedDataSet(RawDataset):

    def __init__(self, BIDS_path):
        super().__init__(BIDS_path)
        self.data_path = self.BIDS_path + '/derivatives'
        self.data_path = self._find_sub_dirs()
    def __repr__(self):
        return f'Dataset(Group(s)={self.group},\n Subjects={self.subjects},\n Data_Path={self.data_path})'
    
    def _find_sub_dirs(self):
        path_not_found = True
        while path_not_found:
            subdirs = os.listdir(self.data_path)
            for subdir in subdirs:
                if any(subdir.startswith('sub-') for subdir in subdirs):
                        path_not_found = False
                else:
                    if os.path.isdir(os.path.join(self.data_path, subdir)):
                        self.data_path = os.path.join(self.data_path, subdir)
        return self.data_path
    
    def get_sessions(self, subject):
        subject_dir = f'{self.data_path}/sub-{subject}'
        subdirs = os.listdir(subject_dir)
        session_names = []
        for subdir in subdirs:
            if subdir.startswith('ses-'):
                session_names.append(subdir[4:])
        return session_names
    
    def _impute_nans_confounds(dataframe, pick_confounds = None):
        imputer = SimpleImputer(strategy='mean')
        if pick_confounds == None:
            pick_confounds = np.loadtxt('PyConn/PyConn/preprocessing/default_confounds.txt', dtype = 'str')
        if isinstance(pick_confounds, (list, np.ndarray)):
            df_no_nans = pd.DataFrame(imputer.fit_transform(dataframe), columns=dataframe.columns)[pick_confounds]
        else:
            df_no_nans = pd.DataFrame(imputer.fit_transform(dataframe), columns=dataframe.columns)
        return df_no_nans
    
    def get_confounds(self, subject, pick_confounds = None, no_nans = True):
        subject = "sub-" + subject
        if pick_confounds == None:
            pick_confounds = np.loadtxt('PyConn/PyConn/preprocessing/default_confounds.txt', dtype = 'str')
        confounds_suffix = "confounds_timeseries.tsv"
        func_dir = os.path.join(self.data_path, subject, "func", "")
        confounds_path = os.path.join(func_dir, [filename for filename in os.listdir(func_dir) if confounds_suffix in filename][0])
        confounds = pd.read_csv(confounds_path, sep = '\t')
        return confounds
    
    def get_confounds(self, subject, no_nans = True, pick_columns = None):
        confound_paths = []
        confound_list = []
        subject_dir = os.path.join(self.data_path, f'sub-{subject}')
        session_names = get_sessions(subject)
        if len(session_names) != 0:
            for session_name in session_names:
                session_dir = os.path.join(subject_dir, f'ses-{session_name}', 'func')
                if os.path.exists(session_dir):
                    confound_files = [os.path.join(session_dir, f) for f in os.listdir(session_dir) if f.endswith('confounds_timeseries.tsv')]
                    confound_paths.extend(confound_files)
            if no_nans == True:
                for confounds_path in confound_paths:
                    confounds = pd.read_csv(confounds_path, sep = '\t')
                    confounds = self._impute_nans_confounds(confounds, pick_columns = pick_columns)
                    confound_list.append(confounds)
            else:
                for confounds_path in confound_paths:
                    confounds = pd.read_csv(confounds_path, sep = '\t')
                    confound_list.append(confounds)
        return confound_list
    
    
    

IndentationError: expected an indented block (2475362527.py, line 22)

In [83]:
dataset = FmriPreppedDataSet(BIDS_path = '/Users/VictoriaShevchenko/Documents/Python_pour_scientifiques/PyConn/PyConn/data/depression_bezmaternykh')

In [89]:
len([])

0