In [None]:
#list all files from bulk:
import dxpy
import os
import subprocess
import zipfile
import shutil

import nibabel as nib
import numpy as np
import pandas as pd



In [None]:
#Download_path
# Step 1: Get the current working directory
current_directory = os.getcwd()
print(f"Current directory: {current_directory}")

# Step 2: Define the name of the new folder
folder_name = "File_DownLoad"  # Change this to the name you want

saving_dir = current_directory+'/'+folder_name

if not os.path.exists(folder_name):
    os.mkdir(folder_name)  # Create the directory
    print(f"Folder created: {saving_dir}")
else:
    print(f"Folder already exists: {saving_dir}")
    
    
# Step 3: Cloud goal 
phenotype_origin_folder = "/GWAS_pipeline"

# Freesurfer config

In [None]:
# Run the command and capture the output
FREESURFER_HOME = subprocess.run('echo $FREESURFER_HOME', shell=True, check=True, capture_output=True, text=True)

# Get the stdout and strip any extra whitespace or newlines
freesurfer_path = FREESURFER_HOME.stdout.strip()

# Print the result
print(f"Freesurfer path is: {freesurfer_path}")

#copy the template to saving path
copy_command = f'cp -a {freesurfer_path}/subjects/fsaverage {saving_dir}'
subprocess.run(copy_command, shell=True, check=True)

#download annotate files
project_id = 'project-GxqpVq0Jpp5Py82xVbZV198y'  # Replace with the actual project ID
command = f'dx download "{project_id}:/{phenotype_origin_folder}/Freesurfer_related/mask_lh.annot" -o {saving_dir}'
subprocess.run(command, shell=True, check=True)
command = f'dx download "{project_id}:/{phenotype_origin_folder}/Freesurfer_related/mask_rh.annot" -o {saving_dir}'
subprocess.run(command, shell=True, check=True)
command = f'dx download "{project_id}:/{phenotype_origin_folder}/Freesurfer_related/license.txt" -o {saving_dir}'
subprocess.run(command, shell=True, check=True)

#copy the licence to freesurfer path for usage
copy_command = f'cp {saving_dir}/license.txt {freesurfer_path}'
subprocess.run(copy_command, shell=True, check=True)


# Environment setting

In [None]:
# Environment setting

os.environ['FREESURFER_HOME'] = freesurfer_path
os.environ['SUBJECTS_DIR'] = saving_dir+'/'

print(f"freesurfer path exists: {freesurfer_path}")
print(f"subject path exists: {saving_dir}")

# Source the setting up
subprocess.run(freesurfer_path+'/sources.sh',shell=True, check=True)

# Surf transform

In [None]:
def surf_transform(file_name,subj_path,freesurfer_path):

    # template
    sfile_annot_lh = subj_path+'/mask_lh.annot'
    tfile_annot_lh = subj_path+'/'+file_name+'_lh.annot'
    
    sfile_annot_rh = subj_path+'/mask_rh.annot'
    tfile_annot_rh = subj_path+'/'+file_name+'_rh.annot'
    
    #################
    command_annot2annot_lh = [freesurfer_path+'/bin/'+'mri_surf2surf',
                        '--srcsubject','fsaverage',
                        '--trgsubject',file_name,
                        '--sval-annot',sfile_annot_lh,
                        '--trgsurfval',tfile_annot_lh,
                        '--hemi','lh']
    
    print(f"surf transform in lh: {command_annot2annot_lh}")
    # subprocess.run(command_annot2annot_lh, shell=True)
    # try:
    #     result = subprocess.run(command_annot2annot_lh, shell=True, capture_output=True, text=True)
    #     print("Command executed successfully")
    #     print("Output:", result.stdout)
    # except subprocess.CalledProcessError as e:
    #     print("Command failed with exit status", e.returncode)
    #     print("Error output:", e.stderr)
    
    command_annot2annot_rh = [freesurfer_path+'/bin/'+'mri_surf2surf',
                        '--srcsubject','fsaverage',
                        '--trgsubject',file_name,
                        '--sval-annot',sfile_annot_rh,
                        '--trgsurfval',tfile_annot_rh,
                        '--hemi','rh']
    
    print(f"surf transform in rh: {command_annot2annot_rh}")
    
    
    return command_annot2annot_lh, command_annot2annot_rh

# Information Extraction

In [None]:
def data_information_extraction(information, template):
    
    value_all_average = {}
    value_all_sum = {}
    
    for region, value in template.items():
        
        # print(f"Region: {region}, Value: {value}")
        
        value_all_average[region] = np.average( information[value] )
        value_all_sum[region] = np.sum( information[value] )
        
    return value_all_average, value_all_sum

In [None]:
#######################################
# Read the files

def data_loading(saving_dir, file_name):
    
    ## Annotate files: template
    annot_data_lh = os.path.join( saving_dir, file_name[0:2]+'_'+file_name+'_lh.annot' )
    annot_data_lh = nib.freesurfer.io.read_annot( annot_data_lh )
    annot_data_lh = annot_data_lh[0]
    
    annot_data_rh = os.path.join( saving_dir, file_name[0:2]+'_'+file_name+'_rh.annot' )
    annot_data_rh = nib.freesurfer.io.read_annot( annot_data_rh )
    annot_data_rh = annot_data_rh[0]
    
    annot_data_lh_values = np.unique(annot_data_lh)
    annot_data_rh_values = np.unique(annot_data_rh)

    indices_dict_lh = {value: np.where(annot_data_lh == value)[0] for value in annot_data_lh_values}
    indices_dict_rh = {value: np.where(annot_data_rh == value)[0] for value in annot_data_rh_values}
    
    # print(indices_dict_lh)
    # print(indices_dict_rh)
    
    ## Mapping the values in different field
    file_destination = os.path.join( saving_dir, file_name, 'surf' )
    
    # Area (average for each region)
    if os.path.exists(file_destination+'/lh.area'):
        area_data_lh = nib.freesurfer.io.read_morph_data( file_destination+'/lh.area' )
        area_data_rh = nib.freesurfer.io.read_morph_data( file_destination+'/rh.area' )
        
        # print(area_data_lh)
        
        area_data_lh_template_average, area_data_lh_template_sum = data_information_extraction(area_data_lh, indices_dict_lh)
        area_data_rh_template_average, area_data_rh_template_sum = data_information_extraction(area_data_rh, indices_dict_rh)
        
    else:
        
        area_data_lh_template_average = {i: np.nan for i in range(-1, 12) if i != 0}
        area_data_lh_template_sum = {i: np.nan for i in range(-1, 12) if i != 0}
        area_data_rh_template_average = {i: np.nan for i in range(-1, 12) if i != 0}
        area_data_rh_template_sum = {i: np.nan for i in range(-1, 12) if i != 0}
        
    
    #Curvature
    if os.path.exists(file_destination+'/lh.curv'):
        curve_data_lh = nib.freesurfer.io.read_morph_data( file_destination+'/lh.curv' )
        curve_data_rh = nib.freesurfer.io.read_morph_data( file_destination+'/rh.curv' )

        # print(curve_data_lh)

        curve_data_lh_template_average, curve_data_lh_template_sum = data_information_extraction(curve_data_lh, indices_dict_lh)
        curve_data_rh_template_average, curve_data_rh_template_sum = data_information_extraction(curve_data_rh, indices_dict_rh)
        
    else:
        
        curve_data_lh_template_average = {i: np.nan for i in range(-1, 12) if i != 0}
        curve_data_lh_template_sum = {i: np.nan for i in range(-1, 12) if i != 0}
        curve_data_rh_template_average = {i: np.nan for i in range(-1, 12) if i != 0}
        curve_data_rh_template_sum = {i: np.nan for i in range(-1, 12) if i != 0}

    # thickness (average for each region)
    if os.path.exists(file_destination+'/lh.thickness'):
    
        thickness_data_lh = nib.freesurfer.io.read_morph_data( file_destination+'/lh.thickness' )
        thickness_data_rh = nib.freesurfer.io.read_morph_data( file_destination+'/rh.thickness' )

        # print(thickness_data_lh)

        thickness_data_lh_template_average, thickness_data_lh_template_sum = data_information_extraction(thickness_data_lh, indices_dict_lh)
        thickness_data_rh_template_average, thickness_data_rh_template_sum = data_information_extraction(thickness_data_rh, indices_dict_rh)
        
    else:
        
        thickness_data_lh_template_average = {i: np.nan for i in range(-1, 12) if i != 0}
        thickness_data_lh_template_sum = {i: np.nan for i in range(-1, 12) if i != 0}
        thickness_data_rh_template_average = {i: np.nan for i in range(-1, 12) if i != 0}
        thickness_data_rh_template_sum = {i: np.nan for i in range(-1, 12) if i != 0}
        

    # Total Volumn:
    if os.path.exists(file_destination+'/lh.volume'):
    
        volumn_data_lh = nib.freesurfer.io.read_morph_data( file_destination+'/lh.volume' )
        volumn_data_rh = nib.freesurfer.io.read_morph_data( file_destination+'/rh.volume' )

        # print(volumn_data_lh)

        volumn_data_lh_template_average, volumn_data_lh_template_sum = data_information_extraction(volumn_data_lh, indices_dict_lh)
        volumn_data_rh_template_average, volumn_data_rh_template_sum = data_information_extraction(volumn_data_rh, indices_dict_rh)
    
    else:
        volumn_data_lh_template_average = {i: np.nan for i in range(-1, 12) if i != 0}
        volumn_data_lh_template_sum = {i: np.nan for i in range(-1, 12) if i != 0}
        volumn_data_rh_template_average = {i: np.nan for i in range(-1, 12) if i != 0}
        volumn_data_rh_template_sum = {i: np.nan for i in range(-1, 12) if i != 0}

    
    #### combine them as a frame
    df_lh = pd.DataFrame([area_data_lh_template_average, curve_data_lh_template_average, thickness_data_lh_template_average, volumn_data_lh_template_average,
                        area_data_lh_template_sum, curve_data_lh_template_sum, thickness_data_lh_template_sum, volumn_data_lh_template_sum])
    df_lh.index = ['area_avg', 'curve_avg', 'thickness_avg', 'volume_avg','area_sum', 'curve_sum', 'thickness_sum', 'volume_sum']
    
    
    df_rh = pd.DataFrame([area_data_rh_template_average, curve_data_rh_template_average, thickness_data_rh_template_average, volumn_data_rh_template_average,
                        area_data_rh_template_sum, curve_data_rh_template_sum, thickness_data_rh_template_sum, volumn_data_rh_template_sum])
    df_rh.index = ['area_avg', 'curve_avg', 'thickness_avg', 'volume_avg','area_sum', 'curve_sum', 'thickness_sum', 'volume_sum']
    
    
    # print(df_lh)
    # print(df_rh)
    
    return df_lh, df_rh

# Main Job

In [None]:
# Define the project ID
# project_id = dxpy.api.user_get_project({'project': dxpy.DXProject('').get_id()})['project']
# project_name = dxpy.DXProject(project_id).describe()['name']
project_id = 'project-GxqpVq0Jpp5Py82xVbZV198y'  # Replace with the actual project ID

# Create a DXProject object for the project
project = dxpy.DXProject(project_id)

# List the contents of the root folder
folder_path = '/Bulk/Brain MRI/T1/'
folder_contents = project.list_folder(folder_path)

In [None]:
# load subject csv file
command = f'dx download "{project_id}:/{phenotype_origin_folder}/subject_list_annot_existed_latest.csv"'
subprocess.run(command, shell=True, check=True)
df_annot_list = pd.read_csv('subject_list_annot_existed_latest.csv')

In [None]:
# List the contents of the root folder
annot_folder_path = f'/{phenotype_origin_folder}/Annotate_files'
annot_folder_contents = project.list_folder(annot_folder_path)

#########################
# df_subj = []#columns = ['folder', 'Name','type','version','prefix']
#########################

for item in folder_contents['folders']:#for each folder
    #List folders
    # print(item)
    folder_name = item.split('/')
    
        
    ## folder create
    command_path_check = f'dx ls "{project_id}:/{phenotype_origin_folder}/Measurement_template/{folder_name[-1]}"'
    try:
        subprocess.run(command_path_check, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    except subprocess.CalledProcessError:
        command_path_check_create = f'dx mkdir "{project_id}:/{phenotype_origin_folder}/Measurement_template/{folder_name[-1]}"'
        subprocess.run(command_path_check_create, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    uploading_path = project_id+':/'+phenotype_origin_folder+'/Measurement_template/'+folder_name[-1]+'/'

    
    # List files
    files = dxpy.find_data_objects(classname='file', folder=item, project=project_id)
    # print(files)
    
    # Missing annotate files
    df_subj_annot = []
    
    for file in files:#for each file
        ### Check
        # print(file)
        # print(file["id"])
                
        dx_file = dxpy.DXFile(file["id"])
        file_metadata = dx_file.describe()
        # print(f"File Name: {file_metadata['name']}")
        # print(f"File Size: {file_metadata['size']} bytes")
        # print(f"File ID: {file_metadata['id']}")

        if '_20263_' in file_metadata['name']:#According to the file-name, only extract freesurfer files
            print(f"File Name: {file_metadata['name']}")
            # print(f"File Size: {file_metadata['size']} bytes")
            # print(f"File ID: {file_metadata['id']}")
            
            # if file_metadata["name"][:-4] == '2720263_20263_2_0':
            
            
            ##### examples：load the existed check point         
            #######################################
            #Making the subject csv
            name_list = file_metadata["name"][:-4].split('_')

            ######################################     

            # Construct the download command
            # print( project_id+":"+item+'/'+file_metadata['id'] )
            # print(f"File Name: {file_metadata['name']}")

            download_command = f'dx download "{project_id}:{item}/{file_metadata["id"]}" -o {saving_dir}'
            print(download_command)
            subprocess.run(download_command, shell=True, check=True)

            with zipfile.ZipFile(f"{saving_dir}/{file_metadata['name']}", 'r') as zip_ref:
                zip_ref.extractall(saving_dir)

            # Rename the folder
            tmp_file_name = saving_dir+'/'+file_metadata['name'][:-4]
            os.rename(saving_dir+'/FreeSurfer', saving_dir+'/'+file_metadata['name'][:-4])


            # Remove zip files
            os.remove(saving_dir+'/'+file_metadata["name"])
            # print(f"'{file_metadata['name']}' file has been deleted.")
        

            ##### make sure the folder is not empty:
            if os.path.getsize(saving_dir+'/'+file_metadata['name'][:-4]+'/surf/') != 0:

                ### file name from annotate
                tmp_check_file_name = folder_name[-1]+'_'+file_metadata['name'][:-4]
                # print(tmp_check_file_name)


                ##### Check exists annotate files

                check_lh = df_annot_list[(df_annot_list['name'].str.contains(file_metadata['name'][:-4])) & (df_annot_list['hemis'] == 'lh')]
                check_rh = df_annot_list[(df_annot_list['name'].str.contains(file_metadata['name'][:-4])) & (df_annot_list['hemis'] == 'rh')]


                if check_lh.empty or check_rh.empty:

                    # template annotate generation
                    command_annot2annot_lh, command_annot2annot_rh = surf_transform(file_metadata['name'][:-4],saving_dir,freesurfer_path)
                    subprocess.run(command_annot2annot_lh)
                    subprocess.run(command_annot2annot_rh)


                    # Rename the annot file name
                    tmp_file_name = saving_dir+'/'+file_metadata['name'][:-4]+'_lh.annot'
                    os.rename(tmp_file_name, saving_dir+'/'+file_metadata['name'][0:2]+'_'+file_metadata['name'][:-4]+'_lh.annot')

                    tmp_file_name = saving_dir+'/'+file_metadata['name'][:-4]+'_rh.annot'
                    os.rename(tmp_file_name, saving_dir+'/'+file_metadata['name'][0:2]+'_'+file_metadata['name'][:-4]+'_rh.annot')

                    # Extract measurements
                    data_lh, data_rh = data_loading(saving_dir, file_metadata['name'][:-4])


                    # Save the files
                    data_lh.to_csv(saving_dir+'/'+tmp_check_file_name+'_Template_lh.csv', index=True)
                    data_rh.to_csv(saving_dir+'/'+tmp_check_file_name+'_Template_rh.csv', index=True)

                    ##################################
                    #### upload files
                    # Upload csv
                    upload_command = f'dx upload {saving_dir}/{tmp_check_file_name}_Template_lh.csv -o {uploading_path}{tmp_check_file_name}_Template_lh.csv'
                    # print(upload_command)
                    subprocess.run(upload_command, shell=True, check=True) 

                    upload_command = f'dx upload {saving_dir}/{tmp_check_file_name}_Template_rh.csv -o {uploading_path}{tmp_check_file_name}_Template_rh.csv'
                    # print(upload_command)
                    subprocess.run(upload_command, shell=True, check=True) 

                    #Upload annot files
                    upload_command = f'dx upload {saving_dir}/{tmp_check_file_name}_lh.annot -o {uploading_path}{tmp_check_file_name}_lh.annot'
                    # print(upload_command)
                    subprocess.run(upload_command, shell=True, check=True) 

                    upload_command = f'dx upload {saving_dir}/{tmp_check_file_name}_rh.annot -o {uploading_path}{tmp_check_file_name}_rh.annot'
                    # print(upload_command)
                    subprocess.run(upload_command, shell=True, check=True) 

                    ##################################

                    # Remove annot files
                    os.remove(saving_dir+'/'+tmp_check_file_name+'_lh.annot')
                    os.remove(saving_dir+'/'+tmp_check_file_name+'_rh.annot')
                    # print(f"'{file_metadata['name'][:-4]}' lh.annot and rh.annot has been deleted.")

                    # Remove csv files
                    os.remove(saving_dir+'/'+tmp_check_file_name+'_Template_lh.csv')
                    os.remove(saving_dir+'/'+tmp_check_file_name+'_Template_rh.csv')
                    # print(f"'{file_metadata['name'][:-4]}' lh.annot and rh.annot has been deleted.")

                    df_subj_annot.append( [folder_name[-1], name_list[0], name_list[1], name_list[2], name_list[3]] )


                else:


                    #first one, ignore repeat ones
                    check_lh_id = check_lh.iloc[0]
                    check_rh_id = check_rh.iloc[0]

                    download_command = f'dx download "{project_id}:/{phenotype_origin_folder}/Annotate_files/{check_lh_id["file_id"]}" -o {saving_dir}'
                    print(download_command)
                    subprocess.run(download_command, shell=True, check=True) 

                    download_command = f'dx download "{project_id}:/{phenotype_origin_folder}/Annotate_files/{check_rh_id["file_id"]}" -o {saving_dir}'
                    print(download_command)
                    subprocess.run(download_command, shell=True, check=True) 

                    data_lh, data_rh = data_loading(saving_dir, file_metadata['name'][:-4])

                    # Save the files
                    data_lh.to_csv(saving_dir+'/'+tmp_check_file_name+'_Template_lh.csv', index=True)
                    data_rh.to_csv(saving_dir+'/'+tmp_check_file_name+'_Template_rh.csv', index=True)

                    ##################################
                    #### upload files
                    # Upload csv
                    upload_command = f'dx upload {saving_dir}/{tmp_check_file_name}_Template_lh.csv -o {uploading_path}{tmp_check_file_name}_Template_lh.csv'
                    # print(upload_command)
                    subprocess.run(upload_command, shell=True, check=True) 

                    upload_command = f'dx upload {saving_dir}/{tmp_check_file_name}_Template_rh.csv -o {uploading_path}{tmp_check_file_name}_Template_rh.csv'
                    # print(upload_command)
                    subprocess.run(upload_command, shell=True, check=True) 

                    #Upload annot files
                    upload_command = f'dx upload {saving_dir}/{tmp_check_file_name}_lh.annot -o {uploading_path}{tmp_check_file_name}_lh.annot'
                    # print(upload_command)
                    subprocess.run(upload_command, shell=True, check=True) 

                    upload_command = f'dx upload {saving_dir}/{tmp_check_file_name}_rh.annot -o {uploading_path}{tmp_check_file_name}_rh.annot'
                    # print(upload_command)
                    subprocess.run(upload_command, shell=True, check=True) 

                    ##################################


                    # Remove annot files
                    os.remove(saving_dir+'/'+tmp_check_file_name+'_lh.annot')
                    os.remove(saving_dir+'/'+tmp_check_file_name+'_rh.annot')
                    # print(f"'{file_metadata['name'][:-4]}' lh.annot and rh.annot has been deleted.")

                    # Remove csv files
                    os.remove(saving_dir+'/'+tmp_check_file_name+'_Template_lh.csv')
                    os.remove(saving_dir+'/'+tmp_check_file_name+'_Template_rh.csv')
                    # print(f"'{file_metadata['name'][:-4]}' lh.annot and rh.annot has been deleted.")



            #remove 
            # Delete zip and folder file
            shutil.rmtree(saving_dir+'/'+file_metadata['name'][:-4])
            # print(f"'{file_metadata['name'][:-4]}' folder has been deleted.")
                
        
    ## Save the files
    miss_subj = pd.DataFrame(df_subj_annot)
    miss_subj.to_csv(saving_dir+'/'+'miss_subj.csv', index=True)   

    upload_command = f'dx upload {saving_dir}/miss_subj.csv -o {uploading_path}miss_subj.csv'
    print(upload_command)
    subprocess.run(upload_command, shell=True, check=True) 
    
        