In [None]:
import glob
import pandas as pd
import os
import scipy.stats as stats
import numpy as np
import matplotlib.pyplot as plt

In [None]:
'''SETTING THINGS UP'''

'''Reads the files'''
#reads the files imported and sorts them numerically 
files_EA_resid_ts =[] 
files_EA_resid_ts = sorted(glob.glob("/projects/tsecara/RS_SPINS_SPASD/data/SPINS2/**/sub-*_RS_2mm_GSR_glasser_tian_meants.csv", recursive = True)) #for SPINS

#getting the data for the first columm here sfor region names
data = pd.read_csv("/projects/tsecara/SPINS_ASD_Project2/G_atlas_info_reorder_correct.csv")
list_of_region_names = data['atlas_roi'].tolist() #takes column that has the brain regions here and puts it within a list

'''Filters the files - to make sure there is no empty csv'''
#filters the files to make sure that there is no empty csv file - when print(filtered_files), you can see that there are no files that are 0 bytes
filtered_files = [file for file in files_EA_resid_ts if os.path.getsize(file) == 0]

'''Creates list of IDs'''
ptlist = []  
ptlist = ["SPN01_" + file[49:52] + "_" + file[52:56] for file in files_EA_resid_ts] #85 is the character for the "-" after second "sub" mentioned in csv title 
                                                                                        #file[164:167] + "_" + file[167:172] from using the one without the removed files - for SPINS 
'''Reads the time series files'''
#creating the read in the time series files
resid_ts = [pd.read_csv(file, header=None) for file in files_EA_resid_ts] #checks out with Teo's code 

'''Names the dictionary with the Participant IDs'''    
#Names dataframe with participant IDs within a dictionary now (participant ID -> time ser|ies data for participant ID)
resid_ts_named = {name: df for name, df in zip(ptlist, resid_ts)}

'''Changes the display limit - to see final matrix'''
pd.options.display.max_rows = 999


In [None]:
'''TESTING MENU'''

'''FUNCTIONS'''
def mean_centering(participant):
    center_function = lambda x: x - x.mean()
    centered_data = center_function(participant)
    return centered_data

def z_score(participant):
    data = np.array(participant)
    z_scores = stats.zscore(data, axis=1) #axis = 1 is of each row 
    return z_scores

def mssd_value(participant_scores):
    f_diff = np.diff(participant_scores) 
    f_sc_diff = (f_diff)**2
    mean = np.nanmean(f_sc_diff, axis=1) #axis=1 gets it from the rows
    sq_mean = np.sqrt(mean) 
    mssd_values = sq_mean
    return mssd_values

'''MAIN LOOP'''
#initializes variables used later
z_scores_list = []
centered_data_list = []
mssd_values_list = [] #for z-scoring
mssd_second_values = [] #for mean-centering

#creates a numpy array filled with 0's at the start, for row length of "len(resid_ts_named)" and then column length based on the first participant + 1 for the labels
mssd_matrix = np.zeros((len(resid_ts_named), len(resid_ts_named['SPN01_CMH_0001'][0]) + 1), dtype=object)  #have to set dtype because going to attach string to integer - for SPINS
mssd_matrix_mean = np.zeros((len(resid_ts_named), len(resid_ts_named['SPN01_CMH_0001'][0]) + 1), dtype=object) 

while True:
    try:
        #creating user input to see if the data should be mean-centered or z_scored:
        user = int(input("Would you like the data to be z_scored or mean-centered? Enter 1 for z_score and 2 for mean-centered: "))
        if user == 1:
            # this will be part of the main loop
            for i, (key, participant) in enumerate(resid_ts_named.items()): #iterates through all of the participants in resid_ts_named while keepying both (key, value - participant) AND index (i)
                z_scores = (z_score(participant))
                z_scores_list.append(z_scores)
                mssd_values_list.append(mssd_value(z_scores)) #calls functions for calculations

                mssd_values = np.array(mssd_values_list)
                mssd_values_rounded = np.round(mssd_values[-1], decimals=10)  # Round to 10 decimal places
                mssd_matrix[i, 0] = key
                mssd_matrix[i, 1:] = mssd_values_rounded
            
            mssd_matrix = np.vstack(([''] + list_of_region_names, mssd_matrix)) #makes [0][0] an empty space here to accomodate the region name labels, and then vertically stacks the region names atop the matrix
            
            '''MAKES IT LOOK BETTER HERE - bless pandas'''
            df_mssd = pd.DataFrame(mssd_matrix)
            break;
            
        if user == 2:
             # this will be part of the main loop
            for i, (key, participant) in enumerate(resid_ts_named.items()): #iterates through all of the participants in resid_ts_named while keepying both (key, value - participant) AND index (i)
                centered_data = (mean_centering(participant))
                centered_data_list.append(centered_data)
                mssd_second_values.append(mssd_value(centered_data)) #calls functions for calculations

                mssd_values_mean = np.array(mssd_second_values)
                mssd_values2_rounded = np.round(mssd_values_mean[-1], decimals=10)  # Round to 10 decimal places
                mssd_matrix_mean[i, 0] = key
                mssd_matrix_mean[i, 1:] = mssd_values2_rounded
            
            mssd_matrix_mean = np.vstack(([''] + list_of_region_names, mssd_matrix_mean)) #makes [0][0] an empty space here to accomodate the region name labels, and then vertically stacks the region names atop the matrix
                 
            '''MAKES IT LOOK BETTER HERE - bless pandas'''
            df_mssd = pd.DataFrame(mssd_matrix_mean)
            break;
            
        else:
            print("Invalid input. Please enter either 1 for z_score or 2 for mean-centered (integer only): ")
    except ValueError:
        print("Invalid input. Please enter either 1 for z_score or 2 for mean-centered (integer only): ")


print(df_mssd)

In [None]:
'''GETTING CSVs'''
pd.DataFrame(df_mssd).to_csv("/projects/tsecara/SPINS_ASD_Project2/data/SPINS/RS_regional_MSSD/SPINS_regional_MSSD_RS.csv") #gets a csv file here

In [None]:
#The same code was run to extract resting state MSSD for SPINS-ASD participants 

'''SETTING THINGS UP'''

'''Reads the files'''
#reads the files imported and sorts them numerically 
files_EA_resid_ts =[] 
files_EA_resid_ts = sorted(glob.glob("/projects/tsecara/RS_SPINS_SPASD/data/SPASD2/**/sub-*_RS_2mm_GSR_glasser_tian_meants.csv", recursive = True)) #for SPINS

#getting the data for the first columm here sfor region names
data = pd.read_csv("/projects/tsecara/SPINS_ASD_Project2/G_atlas_info_reorder_correct.csv")
list_of_region_names = data['atlas_roi'].tolist() #takes column that has the brain regions here and puts it within a list

'''Filters the files - to make sure there is no empty csv'''
#filters the files to make sure that there is no empty csv file - when print(filtered_files), you can see that there are no files that are 0 bytes
filtered_files = [file for file in files_EA_resid_ts if os.path.getsize(file) == 0]

'''Creates list of IDs'''
ptlist = []  
ptlist = ["SPASD_" + file[49:52] + "_" + file[52:56] for file in files_EA_resid_ts] #85 is the character for the "-" after second "sub" mentioned in csv title 
                                                                                        #file[164:167] + "_" + file[167:172] from using the one without the removed files - for SPINS
          
'''Reads the time series files'''
#creating the read in the time series files
resid_ts = [pd.read_csv(file, header=None) for file in files_EA_resid_ts] 

'''Names the dictionary with the Participant IDs'''    
#Names dataframe with participant IDs within a dictionary now (participant ID -> time ser|ies data for participant ID) so hopefully that is fine I hope
resid_ts_named = {name: df for name, df in zip(ptlist, resid_ts)}

'''Changes the display limit - to see final matrix'''
pd.options.display.max_rows = 999


In [None]:
'''FUNCTIONS'''
def mean_centering(participant):
    center_function = lambda x: x - x.mean()
    centered_data = center_function(participant)
    return centered_data

def z_score(participant):
    data = np.array(participant)
    z_scores = stats.zscore(data, axis=1) #axis = 1 is of each row 
    return z_scores

def mssd_value(participant_scores):
    f_diff = np.diff(participant_scores) 
    f_sc_diff = (f_diff)**2
    mean = np.nanmean(f_sc_diff, axis=1) #axis=1 gets it from the rows
    sq_mean = np.sqrt(mean) 
    mssd_values = sq_mean
    return mssd_values

'''MAIN LOOP'''
#initializes variables used later
z_scores_list = []
centered_data_list = []
mssd_values_list = [] #for z-scoring
mssd_second_values = [] #for mean-centering

#creates a numpy array filled with 0's at the start, for row length of "len(resid_ts_named)" and then column length based on the first participant + 1 for the labels
mssd_matrix = np.zeros((len(resid_ts_named), len(resid_ts_named['SPASD_CMP_0002'][0]) + 1), dtype=object)  #have to set dtype because going to attach string to integer - for SPINS
mssd_matrix_mean = np.zeros((len(resid_ts_named), len(resid_ts_named['SPASD_CMP_0002'][0]) + 1), dtype=object) 

while True:
    try:
        #creating user input to see if the data should be mean-centered or z_scored:
        user = int(input("Would you like the data to be z_scored or mean-centered? Enter 1 for z_score and 2 for mean-centered: "))
        if user == 1:
            # this will be part of the main loop
            for i, (key, participant) in enumerate(resid_ts_named.items()): #iterates through all of the participants in resid_ts_named while keepying both (key, value - participant) AND index (i)
                z_scores = (z_score(participant))
                z_scores_list.append(z_scores)
                mssd_values_list.append(mssd_value(z_scores)) #calls functions for calculations

                mssd_values = np.array(mssd_values_list)
                mssd_values_rounded = np.round(mssd_values[-1], decimals=10)  # Round to 10 decimal places
                mssd_matrix[i, 0] = key
                mssd_matrix[i, 1:] = mssd_values_rounded
            
            mssd_matrix = np.vstack(([''] + list_of_region_names, mssd_matrix)) #makes [0][0] an empty space here to accomodate the region name labels, and then vertically stacks the region names atop the matrix
            
            '''MAKES IT LOOK BETTER HERE - bless pandas'''
            df_mssd = pd.DataFrame(mssd_matrix)
            break;
            
        if user == 2:
             # this will be part of the main loop
            for i, (key, participant) in enumerate(resid_ts_named.items()): #iterates through all of the participants in resid_ts_named while keepying both (key, value - participant) AND index (i)
                centered_data = (mean_centering(participant))
                centered_data_list.append(centered_data)
                mssd_second_values.append(mssd_value(centered_data)) #calls functions for calculations

                mssd_values_mean = np.array(mssd_second_values)
                mssd_values2_rounded = np.round(mssd_values_mean[-1], decimals=10)  # Round to 10 decimal places
                mssd_matrix_mean[i, 0] = key
                mssd_matrix_mean[i, 1:] = mssd_values2_rounded
            
            mssd_matrix_mean = np.vstack(([''] + list_of_region_names, mssd_matrix_mean)) #makes [0][0] an empty space here to accomodate the region name labels, and then vertically stacks the region names atop the matrix
                 
            '''MAKES IT LOOK BETTER HERE - bless pandas'''
            df_mssd = pd.DataFrame(mssd_matrix_mean)
            break;
            
        else:
            print("Invalid input. Please enter either 1 for z_score or 2 for mean-centered (integer only): ")
    except ValueError:
        print("Invalid input. Please enter either 1 for z_score or 2 for mean-centered (integer only): ")



print(df_mssd)

In [None]:
'''GETTING CSVs - make sure to check which matrix variable is produced'''
pd.DataFrame(df_mssd).to_csv("/projects/tsecara/SPINS_ASD_Project2/data/SPASD/RS_regional_MSSD/SPASD_regional_MSSD_RS.csv") #gets a csv file here