In [66]:
import csv
import os
import pandas as pd
import numpy as np
import glob
from itertools import combinations


# Get a count of participants of file ending number
def calc_num_files_ending_with(folder_path: str, ending: int) -> list[int]:
    count: int = 0
    count_list: list[int] = []
    # Loop through folder to find csv file
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            # Files look like this 'M10011_rest_jhu.csv', extract the number
            parts = filename.split('_')
            if len(parts) > 0:
                # print(parts[0])
                num_str = ''.join(filter(str.isdigit, parts[0]))
                # print(num_str)
                if (len(num_str) == 5 and num_str.endswith(str(ending))):
                    # count_list.append(int(num_str[:-1]))
                    count_list.append(int(num_str))
                    count += 1
 
    print(count)                
    return count_list

# Save a list to csv
def save_list_to_csv(input_list, output_folder: str, filename: str) -> None:
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    with open(f"{output_folder}/{filename}", "w", newline='\n') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(['Participant ID'])
        for participant in input_list:
            writer.writerow([participant])
    return
        
# Check if two csv is identical
def is_csv_identical(csv1: str, csv2: str) -> bool:
    # Open both csv
    with open(csv1, 'r') as f1, open(csv2, 'r') as f2:
        reader1 = csv.reader(f1)
        reader2 = csv.reader(f2)
        
        for row1, row2 in zip(reader1, reader2):
            if row1 != row2:
                return False
        # Check if there are more lines
        try:
            next(reader1)
            return False  
        except StopIteration:
            pass
        
        try:
            next(reader2)
            return False  
        except StopIteration:
            pass  
    print('True')
    return True

# Produce the connectome matrix
def produce_connectome_csv(participant_csv: str, input_folder: str, output_folder: str, filename: str) -> None:
    # The brain regions responsible for language and speech in the JHU atlas. Started with 1 index
    speech_regions = [1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31,
                    32, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 49, 50, 69, 70, 71, 72, 184,
                    185, 186, 187]
    # There are 40 brain regions. Each region is connected to another region.
    # So that gives 40 choose 2 = 780 features
    # Calculate the combinations of the brain regions
    region_combination = list(combinations(speech_regions, 2))
    # Convert combination tuples to column names
    column_names = [f"{x}_{y}" for x, y in region_combination]
    # Initialize the table to store all the rest state fmri features
    result_df = pd.DataFrame(columns = column_names)
    # Use the participant_csv to open the mri data
    participant_df = pd.read_csv(participant_csv, index_col=False)
    for index, row in participant_df.iterrows():
       # Files look like this 'M10011_rest_jhu.csv', recreate it
        participant_id = row['Participant ID']
        mri_pattern = f"{input_folder}/M{participant_id}*.csv"
        matching_files = glob.glob(mri_pattern)
        mri_csv = matching_files[0]
        mri_df = pd.read_csv(mri_csv, index_col=False, header=None)
       
        # Create a numpy array to hold 780 features 
        features_array = []
         
        # Loop through all the brain region combination
        for row_ , col in region_combination:
            # Append the value to array. Make it 0 indexed.
            features_array.append(mri_df.iloc[row_ - 1, col - 1])
        # Convert the array to dataframe
        features_array_df = pd.DataFrame([features_array], columns=result_df.columns, index=[participant_id])
        result_df = pd.concat([result_df,features_array_df])
    result_df.to_csv(f"{output_folder}/{filename}.csv")

# Produce the prediction target csv
def produce_polar_csv(participant_csv: str, polar_csv: str, output_folder: str, filename: str) -> None:
    participant_df = pd.read_csv(participant_csv, index_col=False)
    polar_df = pd.read_csv(polar_csv, index_col='POLAR ID Number')
    
    print(participant_df.shape)
    result_df = pd.DataFrame()
    column_names = [
        "information content",
        "fluency rating",
        "spontaneous speech rating",
        "comprehension yes/no questions",
        "comprehension auditory words",
        "comprehension sequential commands",
        "comprehension subscore",
        "repetition subscore",
        "object naming",
        "word fluency",
        "sentence completion",
        "responsive speech",
        "naming subscore",
        "Aphasia quotient",
        "Aphasia Type from WAB"
    ]
    df_list = []
    for index, row in participant_df.iterrows():
        participant_id = int(str(row['Participant ID'])[:-1])
        # df_list.append(polar_df.loc[[participant_id], column_names])
        result_df = pd.concat([result_df, polar_df.loc[[participant_id], column_names]])
    result_df.to_csv(f"{output_folder}/{filename}.csv")
    print(result_df.shape)

In [33]:
# produce_connectome_csv(r"..\Processed Data\rest_participants_first.csv",
#                        r"..\ConnectivityMatrix\rest_jhu",
#                        r"..\Processed Data",
#                        "rest_101_participants_40_regions")
# produce_connectome_csv(r"..\Processed Data\dti_participants_first.csv",
#                        r"..\ConnectivityMatrix\dti_jhu",
#                        r"..\Processed Data",
#                        "dti_101_participants_40_regions")
# produce_connectome_csv(r"..\Processed Data\rest_participants_fourth.csv",
#                        r"..\ConnectivityMatrix\rest_jhu",
#                        r"..\Processed Data",
#                        "rest_61_participants_40_regions_fourth_vist")
# produce_connectome_csv(r"..\Processed Data\dti_participants_fourth.csv",
#                        r"..\ConnectivityMatrix\dti_jhu",
#                        r"..\Processed Data",
#                        "dti_61_participants_40_regions_fourth_vist")
produce_connectome_csv(r"..\Processed Data\rest_participants_61_first_visit.csv",
                       r"..\ConnectivityMatrix\rest_jhu",
                       r"..\Processed Data",
                       "rest_61_participants_40_regions_first_vist")
produce_connectome_csv(r"..\Processed Data\dti_participants_61_first_visit.csv",
                       r"..\ConnectivityMatrix\dti_jhu",
                       r"..\Processed Data",
                       "dti_61_participants_40_regions_first_vist")

  result_df = pd.concat([result_df,features_array_df])
  result_df = pd.concat([result_df,features_array_df])


In [19]:
participant_list = calc_num_files_ending_with(r"..\ConnectivityMatrix\rest_jhu", 1)
save_list_to_csv(participant_list, r"..\Processed Data", 'rest_participants_first.csv') 
participant_list = calc_num_files_ending_with(r"..\ConnectivityMatrix\rest_jhu", 4)
save_list_to_csv(participant_list, r"..\Processed Data", 'rest_participants_fourth.csv')
participant_list = calc_num_files_ending_with(r"..\ConnectivityMatrix\dti_jhu", 1)
save_list_to_csv(participant_list, r"..\Processed Data", 'dti_participants_first.csv') 
participant_list = calc_num_files_ending_with(r"..\ConnectivityMatrix\dti_jhu", 4)
save_list_to_csv(participant_list, r"..\Processed Data", 'dti_participants_fourth.csv') 

101
61
101
61


In [23]:
is_csv_identical(r"..\Processed Data\rest_participants_first.csv", 
                 r"..\Processed Data\dti_participants_first.csv")
is_csv_identical(r"..\Processed Data\rest_participants_fourth.csv", 
                 r"..\Processed Data\dti_participants_fourth.csv")

True
True


True

In [67]:
produce_polar_csv(r"..\Processed Data\rest_participants_first.csv",
                r"..\ConnectivityMatrix\POLAR_measures.csv",
                r"..\Processed Data",
                '101_participants_40_regions_target_variables')

(101, 1)
(103, 15)
