In [1]:
from torch.utils.data import Dataset
import numpy as np
import os
from scipy import interpolate
import json
import csv
import torch
from pathlib import Path
from PIL import Image
import torchvision.transforms as transforms

In [13]:
def printNpStats(matrix):
    print("Shape:", matrix.shape)
    print("Mean:", np.mean(matrix))
    print("Variance:", np.var(matrix))
    print("Max:", np.max(matrix))
    print("Min:", np.min(matrix))

In [17]:
def identity(x):
    return x
def pad_to_patch_size(x, patch_size):
    assert x.ndim == 2
    return np.pad(x, ((0,0),(0, patch_size-x.shape[1]%patch_size)), 'wrap')
def normalize(x, mean=None, std=None):
    mean = np.mean(x) if mean is None else mean
    std = np.std(x) if std is None else std
    return (x - mean) / (std * 1.0)

# NSD Dataset

In [7]:
path = '/mnt/isilon/CSC6/HelenZhouLab/HZLHD0/InternsnStudents/Interns/jonathan/datasets/NSD'
subjects = ['subj01', 'subj02', 'subj03', 'subj04', 'subj05', 'subj06', 'subj07', 'subj08']

In [8]:
sub = "subj01"

fmri_dir = os.path.join(path, sub, 'training_split', 'training_fmri')
lh_fmri = np.load(os.path.join(fmri_dir, 'lh_training_fmri.npy'))
rh_fmri = np.load(os.path.join(fmri_dir, 'rh_training_fmri.npy'))

print('LH training fMRI data shape:')
print(lh_fmri.shape)
print('(Training stimulus images × LH vertices)')

print('\nRH training fMRI data shape:')
print(rh_fmri.shape)
print('(Training stimulus images × RH vertices)')

In [10]:
fmri_sub = np.concatenate((lh_fmri, rh_fmri), axis=1)
print(fmri_sub.shape)

In [14]:
printNpStats(fmri_sub)

Shape: (9841, 39548)
Mean: 0.0021823181
Variance: 0.50268227
Max: 6.3958163
Min: -6.224722


In [25]:
fmri_sub = normalize(pad_to_patch_size(fmri_sub, patch_size))
printNpStats(fmri_sub)

Shape: (9841, 39552)
Mean: -5.3962856e-09
Variance: 1.0000061
Max: 9.017823
Min: -8.7826605


In [26]:
print(fmri_sub)

[[-1.2185764  -0.28965947 -0.8865727  ... -0.28965947 -0.8865727
  -1.9951377 ]
 [ 1.0158539  -0.4024323   1.3003923  ... -0.4024323   1.3003923
  -0.00702139]
 [-0.43811968 -1.3179294   2.42168    ... -1.3179294   2.42168
   0.13285132]
 ...
 [ 0.8680637   0.33460727  1.1759323  ...  0.33460727  1.1759323
  -0.60141593]
 [-0.397421   -0.23484501  0.83007205 ... -0.23484501  0.83007205
   1.5428822 ]
 [-1.9804912  -0.5291305  -0.94592166 ... -0.5291305  -0.94592166
  -1.3088195 ]]


# BOLD Dataset

In [20]:
path = '/mnt/isilon/CSC6/HelenZhouLab/HZLHD0/InternsnStudents/Interns/jonathan/datasets/BOLD5000'
patch_size = 16
image_transform = identity
subjects = ['CSI1', 'CSI2', 'CSI3', 'CSI4']
include_nonavg_test = False
roi_list = ['EarlyVis', 'LOC', 'OPA', 'PPA', 'RSC']

fmri_path = os.path.join(path, 'BOLD5000_GLMsingle_ROI_betas/py')
img_path = os.path.join(path, 'BOLD5000_Stimuli')
imgs_dict = np.load(os.path.join(img_path, 'Scene_Stimuli/Presented_Stimuli/img_dict.npy'),allow_pickle=True).item()
repeated_imgs_list = np.loadtxt(os.path.join(img_path, 'Scene_Stimuli', 'repeated_stimuli_113_list.txt'), dtype=str)

In [22]:
fmri_files = [f for f in os.listdir(fmri_path) if f.endswith('.npy')]
fmri_files.sort()

sub = "CSI1"

# load fmri
fmri_data_sub = []
for roi in roi_list:
    for npy in fmri_files:
        if npy.endswith('.npy') and sub in npy and roi in npy:
            fmri_data_sub.append(np.load(os.path.join(fmri_path, npy)))
fmri_data_sub = np.concatenate(fmri_data_sub, axis=-1) # concatenate all rois

In [23]:
printNpStats(fmri_data_sub)

Shape: (5254, 1685)
Mean: -6.253479e-10
Variance: 1.0000005
Max: 8.60656
Min: -10.2187605


In [24]:
fmri_data_sub = normalize(pad_to_patch_size(fmri_data_sub, patch_size))
printNpStats(fmri_data_sub)

Shape: (5254, 1696)
Mean: -4.9905635e-10
Variance: 1.0
Max: 8.606558
Min: -10.218758


In [30]:
def get_stimuli_list(root, sub):
    sti_name = []
    path = os.path.join(root, 'Stimuli_Presentation_Lists', sub)
    folders = os.listdir(path)
    folders.sort()
    for folder in folders:
        if not os.path.isdir(os.path.join(path, folder)):
            continue
        files = os.listdir(os.path.join(path, folder))
        files.sort()
        for file in files:
            if file.endswith('.txt'):
                sti_name += list(np.loadtxt(os.path.join(path, folder, file), dtype=str))

    sti_name_to_return = []
    for name in sti_name:
        if name.startswith('rep_'):
            name = name.replace('rep_', '', 1)
        sti_name_to_return.append(name)
    return sti_name_to_return

# load image
img_files = get_stimuli_list(img_path, sub)
img_data_sub = [imgs_dict[name] for name in img_files]

In [32]:
def list_get_all_index(list, value):
    return [i for i, v in enumerate(list) if v == value]

# split train test
test_idx = [list_get_all_index(img_files, img) for img in repeated_imgs_list]
test_idx = [i for i in test_idx if len(i) > 0] # remove empy list for CSI4
test_fmri = np.stack([fmri_data_sub[idx].mean(axis=0) for idx in test_idx])
test_img = np.stack([img_data_sub[idx[0]] for idx in test_idx])

In [33]:
test_idx_flatten = []
for idx in test_idx:
    test_idx_flatten += idx # flatten
if include_nonavg_test:
    test_fmri = np.concatenate([test_fmri, fmri_data_sub[test_idx_flatten]], axis=0)
    test_img = np.concatenate([test_img, np.stack([img_data_sub[idx] for idx in test_idx_flatten])], axis=0)

train_idx = [i for i in range(len(img_files)) if i not in test_idx_flatten]
train_img = np.stack([img_data_sub[idx] for idx in train_idx])
train_fmri = fmri_data_sub[train_idx]

In [36]:
printNpStats(train_img)

Shape: (4803, 256, 256, 3)
Mean: 107.18298585843937
Variance: 3806.3570156922074
Max: 255
Min: 0


In [37]:
printNpStats(train_fmri)

Shape: (4803, 1696)
Mean: -0.0009539404
Variance: 1.0008868
Max: 8.606558
Min: -10.218758


In [38]:
printNpStats(test_img)

Shape: (113, 256, 256, 3)
Mean: 107.49450116452918
Variance: 4136.151750152919
Max: 255
Min: 0


In [39]:
printNpStats(test_fmri)

Shape: (113, 1696)
Mean: 0.010219635
Variance: 0.32646358
Max: 2.624513
Min: -2.789681
