In [1]:
import os
import glob
import numpy as np
import pandas as pd
import nibabel as nib

import matplotlib.pyplot as plt

In [2]:
FEATURES_PATH = "data/ds001246/derivatives/preproc-spm/output"
TARGETS_PATH = "data/ds001246"
TRAIN_CATEGORIES_PATH = "data/ds001246/stimulus_ImageNetTraining.csv"
TEST_CATEGORIES_PATH = "data/ds001246/stimulus_ImageNetTest.csv"
PROCESSED_PATH = "data/processed"

In [3]:
def convert(subject="01", session_id="01", task="perception", train=True):
    session = f"{task}{'Training' if train else 'Test'}{session_id}"

    # load data
    feature_runs = sorted(glob.glob(f"{FEATURES_PATH}/sub-{subject}/ses-{session}/func/*"))
    target_runs = sorted(glob.glob(f"{TARGETS_PATH}/sub-{subject}/ses-{session}/func/*events*"))
    categories = pd.read_csv(TRAIN_CATEGORIES_PATH if train else TEST_CATEGORIES_PATH, sep="\t", header=None)

    # process features and targets
    features = []
    targets = []

    for f_run, t_run in zip(feature_runs, target_runs):
        features_run = nib.load(f_run).get_fdata()
        targets_run = pd.read_csv(t_run, sep="\t")

        # remove resting states
        features_run_pp = features_run[:, :, :, 8:-2]
        targets_run_pp = targets_run[targets_run["event_type"] != "rest"]

        # reshape features into (N, C, D, W, H)
        features_run_pp = features_run_pp.transpose(3, 2, 1, 0).reshape(-1, 3, 50, 64, 64)

        # extract category labels
        targets_run_pp = targets_run_pp.merge(categories, left_on="stim_id", right_on=1)[2]
        targets_run_pp = targets_run_pp.to_numpy().reshape(-1, 1)

        features.append(features_run_pp)
        targets.append(targets_run_pp)

    features = np.vstack(features)
    targets = np.vstack(targets)

    # convert and store as tensors
    # features = torch.from_numpy(features).float()
    # targets = torch.from_numpy(targets).long() - 1

    # flatten targets
    targets = targets.squeeze()

    # save to disk
    os.makedirs(f"{PROCESSED_PATH}/sub-{subject}/ses-{session}/fmris", exist_ok=True)
    os.makedirs(f"{PROCESSED_PATH}/sub-{subject}/ses-{session}/categories", exist_ok=True)

    for i, (f, t) in enumerate(zip(features, targets)):
        np.save(f"{PROCESSED_PATH}/sub-{subject}/ses-{session}/fmris/{i}.npy", f)
        np.save(f"{PROCESSED_PATH}/sub-{subject}/ses-{session}/categories/{i}.npy", t)

In [4]:
convert(subject="01", session_id="01", task="perception", train=True)

In [5]:
convert(subject="01", session_id="01", task="perception", train=False)

In [6]:
from torchvision import transforms

In [8]:
len(glob.glob("data/processed/sub-01/ses-perceptionTraining01/fmris/*"))

550