In [1]:
import nibabel as nib
import numpy as np
import scipy.stats as spc
import scipy.io as sio
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os
import os.path as op
import time
import urllib.request
from tqdm import tqdm # progress bars

In [2]:
def calc_vox_reliabilities(repindices, results_glmsingle, subj, output_folder):
    
    models = dict()
    models['fithrf_glmdenoise'] = results_glmsingle['typec']['betasmd'].reshape(136,136,80,152)
    models['fithrf_glmdenoise_rr'] = results_glmsingle['typed']['betasmd'].reshape(136,136,80,152)

    out_fname = op.join(output_folder, 'vox_reliabilities_' + subj + '.npy')
    vox_reliabilities = [] # output variable for reliability values

    if not op.exists(out_fname):

        modelnames = list(models.keys())

        # for each beta version...
        for m in range(len(modelnames)):

            print(f'computing reliability for beta version: {modelnames[m]}')
            time.sleep(1)

            # get the repeated-condition GLM betas using our repindices variable
            betas = models[modelnames[m]][:,:,:,repindices] # automatically reshapes to (X x Y x Z x 2 x nConditions)
            x,y,z = betas.shape[:3] 

            rels = np.full((x,y,z),np.nan)

            # loop through voxels in the 3D volume...
            for xx in tqdm(range(x)):
                for yy in range(y):
                    for zz in range(z):

                        # reliability at a given voxel is pearson correlation between response profiles from first and 
                        # second image presentations (dim = 25 repeated conditions)
                        rels[xx,yy,zz] = np.corrcoef(betas[xx,yy,zz,0],
                                                     betas[xx,yy,zz,1])[1,0]

            vox_reliabilities.append(rels)

        vox_reliabilities = np.array(vox_reliabilities)
        
        np.save(out_fname, vox_reliabilities)

    else:
        vox_reliabilities = np.load(out_fname)
        
    return vox_reliabilities

In [3]:
def condition_info(designAll):
    # construct a vector containing 0-indexed condition numbers in chronological order
    corder = []
    for p in range(designALL.shape[0]):
        if np.any(designALL[p]):
            corder.append(np.argwhere(designALL[p])[0,0])

    corder = np.array(corder)

    # in order to compute split-half reliability, we have to do some indexing.
    # we want to find images with least two repetitions and then prepare a
    # useful matrix of indices that refer to when these occur.

    repindices = [] # 2 x images containing stimulus trial indices.

    # the first row refers to the first presentation; the second row refers to
    # the second presentation.
    for p in range(designALL.shape[1]): # loop over every condition

        temp = np.argwhere(corder==p)[:,0] # find indices where this condition was shown in the condition order

        # note that for conditions with 3 presentations, we are simply ignoring the third trial
        if len(temp) >= 2:
            repindices.append([temp[0], temp[1]]) 

    repindices = np.vstack(np.array(repindices)).T 
    
    images = [str(j+1) + k for j in range(181) for k in 'abc']
    lureindices = [] # 2 x images containing stimulus trial indices.
    lures = []

    # the first row refers to the first presentation; the second row refers to
    # the second presentation.
    for i, im in enumerate(images): # loop over every condition
        temp = []

        temp = np.argwhere(corder==i)[:,0] # find indices where this condition was shown

        if len(temp) == 1: # check if this exact image was shown only once (otherwise it's a repeat)
            image_version = im[-1]

            lureindices.append(temp[0])
            lures.append(im)

    lureindices = np.array(lureindices).reshape(48,2).T
    lures = np.array(lures).reshape(48,2).T
    
    return corder, repindices, lureindices, lures

In [22]:
def neuronal_pattern_sim_mat(betas, mask, vox_rel, out_fname, thr=-2):
    
    print(f'vox rel min {np.nanmax(vox_rel)}')
    # get betas from within mask
    betas_thr = betas.reshape(136,136,80,152)[((mask==1) & (vox_rel > thr)), :].T
    print(f'Thresholded betas shape: {betas_thr.shape}')
    
    # condition-wise similarity matrix
    corr_thr = np.corrcoef(betas_thr)
    print(f'Corr mat shape: {betas_thr.shape}')
    
    if corr_thr.shape[0]!=152:
        print(f'Error: Number of rows \({corr_rr_thr.shape[0]}\) does not match expected number of conditions')
    else: 
        np.save(out_fname, corr_thr)
        return corr_thr


In [5]:
def get_sim_bins(lures, ratings):
    sim_bins = []

    for img1, img2 in zip(lures[0], lures[1]):

        img1_type = img1[:-1]
        img1_ver = img1[-1]

        img2_type = img2[:-1]
        img2_ver = img2[-1]

        if img1_type != img2_type:
            print('Error: types don\'t match')

        abc = img1_ver + img2_ver
        #print(img1_type, abc)

        sim_bin = ratings['Rating_Bins'].loc[(ratings['ImageType'] == int(img1_type)) & (ratings['abc'] == abc)].reset_index(drop=True)[0]

        #print(sim_bin)
        sim_bins.append(sim_bin)
    
    target_bins = ['target'] * 25
    sim_bins = sim_bins + target_bins 
    
    return sim_bins

In [6]:
similarity_path =  op.join('C:\\', 'Users','Zsuzsa', 'Documents', 'miniTRK', 'Data', 'SimilarityRating')
ratings_fname = op.join(similarity_path, 'SimilarityRating_FinalSample.csv')
ratings = pd.read_csv(ratings_fname)

In [16]:
base_folder = op.join('D:\\', 'Zsuzsa', 'HCCCL', 'miniTRK', 'Results')
design_folder = op.join(base_folder, '02_APS_MRI_Logs', 'single_trials')
out_path = op.join(base_folder,'01_MRI','fMRI_RSA', 'BetaCorrMats')
#subjects = np.loadtxt('test_subjects.txt', dtype=str)
subjects = ['760384']
print(subjects)

['760384']


In [8]:
task = 'OBJ'
acq = 'ENC'
stimdur = 3.0
tr = 1

In [17]:
frames = []
for subj in subjects:
    
    print(subj)
    mask_folder = op.join(base_folder, '01_MRI', 'ANTS_REG', 'ROIS', subj)
    designs = []
    
    # load designs
    for r in ['1','2']:
        design_file =  subj + '_' + task + '_SingleTrials_run_' + r + '_Upsampled.csv' 
        fname = op.join(design_folder, design_file)
        design = pd.read_csv(fname).to_numpy()
        designs.append(design)
        
    # consolidate design matrices
    designALL = np.concatenate(designs,axis=0)

    corder, repindices, lureindices, lures = condition_info(designALL)
    print(repindices)
    
    # load GLMsingle outputs (only type C and type D)
    outputdir_glmsingle = op.join(base_folder,'01_MRI','fMRI_RSA','GLMsingle', subj, task + '_' + acq)
    
    # load existing file outputs if they exist
    results_glmsingle = dict()
    
    if not op.exists(outputdir_glmsingle):

        print('No GLMsingle output. Please run GLMsingle')

    else:
        print(f'loading existing GLMsingle outputs from directory:\n\t{outputdir_glmsingle}')
        
        results_glmsingle['typea'] = np.load(op.join(outputdir_glmsingle,'TYPEA_ONOFF.npy'),allow_pickle=True).item()
        results_glmsingle['typec'] = np.load(op.join(outputdir_glmsingle,'TYPEC_FITHRF_GLMDENOISE.npy'),allow_pickle=True).item()
        results_glmsingle['typed'] = np.load(op.join(outputdir_glmsingle,'TYPED_FITHRF_GLMDENOISE_RR.npy'),allow_pickle=True).item()


760384
[[122  78  60  11  71 137  18  79  13 104  29   2  49  36   0  97  91 139
   21 128 116  63   4 127 106]
 [126  85  64  16  74 140  25  86  17 110  31   8  56  42   7  99  96 142
   23 133 121  65  20 131 109]]
loading existing GLMsingle outputs from directory:
	D:\Zsuzsa\HCCCL\miniTRK\Results\01_MRI\fMRI_RSA\GLMsingle\760384\OBJ_ENC


In [20]:
results_glmsingle['typed']['betasmd'][50,50,50,:]

array([-1.4431019 , -0.33924586, -1.2115256 , -0.45679832,  1.176322  ,
       -0.9310342 ,  0.37455937,  1.0106702 , -1.0097609 ,  3.611507  ,
       -0.06361842,  3.4416943 ,  1.1861079 ,  2.024457  , -3.9822187 ,
       -3.553678  , -0.60248065, -0.05945471, -2.6391644 , -0.5704894 ,
       -1.9350622 ,  0.8514701 ,  1.519816  , -2.327885  , -2.719463  ,
       -3.062109  , -0.9431678 ,  0.92221546,  1.9906017 , -0.84569633,
       -1.0204957 ,  4.2035832 ,  2.3697858 ,  1.5256041 , -1.0744642 ,
        0.76115036,  3.9610102 ,  1.0791007 , -0.03571042, -3.0276864 ,
       -5.86808   , -0.4654776 ,  0.7162816 , -1.2108122 ,  0.6977863 ,
        0.13026191,  1.3412912 ,  2.2844217 ,  2.9267282 , -1.2492111 ,
        0.18719688,  0.40718335, -0.5638015 ,  0.52235055, -3.42464   ,
        0.46384534,  1.0185826 , -2.0684748 , -1.4357524 ,  0.07018877,
        3.9480646 , -0.6862677 , -2.488529  , -1.0600749 , -1.6876966 ,
       -1.1470199 , -1.1901504 , -0.393207  ,  0.49962905, -0.98

In [23]:
frames = []
for subj in subjects:
    
    print(subj)
    mask_folder = op.join(base_folder, '01_MRI', 'ANTS_REG', 'ROIS', subj)
    designs = []
    
    # load designs
    for r in ['1','2']:
        design_file =  subj + '_' + task + '_SingleTrials_run_' + r + '_Upsampled.csv' 
        fname = op.join(design_folder, design_file)
        design = pd.read_csv(fname).to_numpy()
        designs.append(design)
        
    # consolidate design matrices
    designALL = np.concatenate(designs,axis=0)

    corder, repindices, lureindices, lures = condition_info(designALL)
    print(repindices)
    
    # load GLMsingle outputs (only type C and type D)
    outputdir_glmsingle = op.join(base_folder,'01_MRI','fMRI_RSA','GLMsingle', subj, task + '_' + acq)
    
    # load existing file outputs if they exist
    results_glmsingle = dict()
    
    if not op.exists(outputdir_glmsingle):

        print('No GLMsingle output. Please run GLMsingle')

    else:
        print(f'loading existing GLMsingle outputs from directory:\n\t{outputdir_glmsingle}')
        
        results_glmsingle['typec'] = np.load(op.join(outputdir_glmsingle,'TYPEC_FITHRF_GLMDENOISE.npy'),allow_pickle=True).item()
        results_glmsingle['typed'] = np.load(op.join(outputdir_glmsingle,'TYPED_FITHRF_GLMDENOISE_RR.npy'),allow_pickle=True).item()

    rel_output_folder = op.join(base_folder,'01_MRI','fMRI_RSA', 'vox_reliabilities')
    vox_reliabilities = calc_vox_reliabilities(repindices, results_glmsingle, subj, rel_output_folder)
    
    print(vox_reliabilities[1,50,50,:])
    
    for hemi in ['left', 'right']:
        
        print(hemi)
        
        for area in ['CA3DG', 'CA1', 'ERC', 'SUB', 'PHC', 'PRC']:
            
            print(area)
            
            mask_file = 'C-mask_' + subj + '_' + hemi + '_' + area + '-To-meanFunc.nii.gz'
            fname = op.join(mask_folder, mask_file)
            img = nib.load(fname)
            mask = img.get_fdata()
            
            mask = mask.astype(float)
            print(f'mask sum: {mask.sum()}')

            # convert voxels outside ROI to nan for overlay plotting
            mask[mask>0.1] = np.nan 
            
            out_fname = op.join(out_path, subj + '_' + hemi + '_' + area + '_RR.npy')
            corr_rr = neuronal_pattern_sim_mat(results_glmsingle['typed']['betasmd'], mask, vox_reliabilities[1], out_fname)
            corr_rr_thr = neuronal_pattern_sim_mat(results_glmsingle['typed']['betasmd'], mask, vox_reliabilities[1], out_fname, thr=0.0)
            
            # Lure Similarity
            lure_pattern_sim = corr_rr[lureindices[0], lureindices[1]]
            lure_pattern_sim_thr = corr_rr_thr[lureindices[0], lureindices[1]]
            
            # Repetition Similarity
            rep_pattern_sim = corr_rr[repindices[0], repindices[1]]
            rep_pattern_sim_thr = corr_rr_thr[repindices[0], repindices[1]]
            
            sim_bins = get_sim_bins(lures, ratings)
            
            data = np.array([np.concatenate((lure_pattern_sim, lure_pattern_sim_thr)),
                             np.concatenate((rep_pattern_sim, rep_pattern_sim_thr))]).T
            print(data.shape)
            sim_frame = pd.DataFrame(data=data, columns=['PatternSim', 'PatternSim_THR'])
            sim_frame['Rating_Bins'] = sim_bins
            sim_frame['ID'] = subj
            sim_frame['Hemi'] = hemi
            sim_frame['Area'] = area
            sim_frame['Mask'] = hemi + '_' + area
            
            frames.append(sim_frame)

760384
[[122  78  60  11  71 137  18  79  13 104  29   2  49  36   0  97  91 139
   21 128 116  63   4 127 106]
 [126  85  64  16  74 140  25  86  17 110  31   8  56  42   7  99  96 142
   23 133 121  65  20 131 109]]
loading existing GLMsingle outputs from directory:
	D:\Zsuzsa\HCCCL\miniTRK\Results\01_MRI\fMRI_RSA\GLMsingle\760384\OBJ_ENC
[ 0.28081076  0.19016633 -0.10525645 -0.04813297 -0.04973302 -0.25629422
 -0.27404128  0.04710536 -0.15646467 -0.04261089  0.30474282 -0.23187092
  0.08797818 -0.21383761 -0.21274008 -0.1369628  -0.13465936  0.25328215
  0.09322416 -0.15121395 -0.20933228 -0.1649299   0.26592724 -0.19431238
 -0.08317897  0.09186536 -0.07015201 -0.40321449  0.00979905  0.01150918
 -0.339011   -0.1591746  -0.18522193 -0.14400895  0.55905799 -0.07509759
 -0.14468919  0.01837774  0.03889038 -0.14278559 -0.24180377  0.08375235
 -0.21028552  0.09525798  0.00499684 -0.07478319 -0.5391845  -0.26367105
  0.06347869 -0.05050093 -0.18558096  0.08616631  0.07773692 -0.22639996


  avg = a.mean(axis)
  ret = um.true_divide(
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)


vox rel min 0.8404014617025067
Thresholded betas shape: (152, 0)
Corr mat shape: (152, 0)
(2,)


  data = np.array([np.concatenate((lure_pattern_sim, lure_pattern_sim_thr)),


ValueError: Shape of passed values is (2, 1), indices imply (2, 2)

In [None]:
m = np.load(op.join(outputdir_glmsingle,'TYPEC_FITHRF_GLMDENOISE.npy'),allow_pickle=True)

In [None]:
op.exists(op.join(outputdir_glmsingle,'TYPEC_FITHRF_GLMDENOISE.npy'))

In [None]:
m