In [1]:
import pandas as pd
import pickle
import argparse
import os
import glob
import pydicom
import sys

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
#  json 파일 업로드 

In [3]:
combined_roi_dict = {
    'Left-Cingulate': {
        'ctx-lh-caudalanteriorcingulate',
        'ctx-lh-rostralanteriorcingulate',
        'ctx-lh-isthmuscingulate',
        'ctx-lh-posteriorcingulate',
    },
    'Right-Cingulate': {
        'ctx-rh-caudalanteriorcingulate',
        'ctx-rh-rostralanteriorcingulate',
        'ctx-rh-isthmuscingulate',
        'ctx-rh-posteriorcingulate',
    },
    'Left-Frontal': {
        'ctx-lh-caudalmiddlefrontal',
        'ctx-lh-lateralorbitofrontal',
        'ctx-lh-medialorbitofrontal',
        'ctx-lh-parsopercularis',
        'ctx-lh-parsorbitalis',
        'ctx-lh-parstriangularis',
        'ctx-lh-precentral',
        'ctx-lh-rostralmiddlefrontal',
        'ctx-lh-superiorfrontal',
        'ctx-lh-paracentral'
    },
    'Right-Frontal': {
        'ctx-rh-caudalmiddlefrontal',
        'ctx-rh-lateralorbitofrontal',
        'ctx-rh-medialorbitofrontal',
        'ctx-rh-parsopercularis',
        'ctx-rh-parsorbitalis',
        'ctx-rh-parstriangularis',
        'ctx-rh-precentral',
        'ctx-rh-rostralmiddlefrontal',
        'ctx-rh-superiorfrontal',
        'ctx-rh-paracentral'
    },
    'Left-Temporal': {
        'ctx-lh-entorhinal',
        'ctx-lh-fusiform',
        'ctx-lh-inferiortemporal',
        'ctx-lh-middletemporal',
        'ctx-lh-superiortemporal',
        'ctx-lh-transversetemporal',
        'ctx-lh-parahippocampal',
        'Left-Hippocampus',
        'Left-Amygdala',
    },
    'Right-Temporal': {
        'ctx-rh-entorhinal',
        'ctx-rh-fusiform',
        'ctx-rh-inferiortemporal',
        'ctx-rh-middletemporal',
        'ctx-rh-superiortemporal',
        'ctx-rh-transversetemporal',
        'ctx-rh-parahippocampal',
        'Right-Hippocampus',
        'Right-Amygdala',
    },
    'Left-Parietal': {
        'ctx-lh-inferiorparietal',
        'ctx-lh-postcentral',
        'ctx-lh-superiorparietal',
        'ctx-lh-supramarginal',
        'ctx-lh-precuneus'
    },
    'Right-Parietal': {
        'ctx-rh-inferiorparietal',
        'ctx-rh-postcentral',
        'ctx-rh-superiorparietal',
        'ctx-rh-supramarginal',
        'ctx-rh-precuneus'
    },
    'Left-Occipital': {
        'ctx-lh-cuneus',
        'ctx-lh-lateraloccipital',
        'ctx-lh-lingual',
        'ctx-lh-pericalcarine'
    },
    'Right-Occipital': {
        'ctx-rh-cuneus',
        'ctx-rh-lateraloccipital',
        'ctx-rh-lingual',
        'ctx-rh-pericalcarine'
    },
    'Left-Insula':{
        'ctx-lh-insula'
    },
    'Right-Insula': {
        'ctx-rh-insula'
    },
    'Left-Cerebellum':{
        'Left-Cerebellum-White-Matter',
        'Left-Cerebellum-Cortex'
    },
    'Right-Cerebellum': {
        'Right-Cerebellum-White-Matter',
        'Right-Cerebellum-Cortex'
    },
    'Left-Combined-Cerebral-WM': {
        'Left-Cerebral-White-Matter',
        'Left-WM-hypointensities'
    },
    'Right-Combined-Cerebral-WM': {
        'Right-Cerebral-White-Matter',
        'Right-WM-hypointensities'
    },
    'Left-Total-Lateral-Ventricle': {
        'Left-Lateral-Ventricle',
        'Left-Inf-Lat-Vent'
    },
    'Right-Total-Lateral-Ventricle': {
        'Right-Lateral-Ventricle',
        'Right-Inf-Lat-Vent'
    },
}
gray_matter_index = {
    'left_cortical_gm': {
        'left_cingulate',
        'left_frontal',
        'left_temporal',
        'left_parietal',
        'left_occipital',
        'left_insula'
    },
    'right_cortical_gm': {
        'right_cingulate',
        'right_frontal',
        'right_temporal',
        'right_parietal',
        'right_occipital',
        'right_insula'
    }
}

combined_roi_display_dict = {
    'Left-Cingulate': {
        'ctx-lh-caudalanteriorcingulate',
        'ctx-lh-rostralanteriorcingulate',
        'ctx-lh-isthmuscingulate',
        'ctx-lh-posteriorcingulate',
    },
    'Right-Cingulate': {
        'ctx-rh-caudalanteriorcingulate',
        'ctx-rh-rostralanteriorcingulate',
        'ctx-rh-isthmuscingulate',
        'ctx-rh-posteriorcingulate',
    },
    'Left-Frontal': {
        'ctx-lh-caudalmiddlefrontal',
        'ctx-lh-lateralorbitofrontal',
        'ctx-lh-medialorbitofrontal',
        'ctx-lh-parsopercularis',
        'ctx-lh-parsorbitalis',
        'ctx-lh-parstriangularis',
        'ctx-lh-precentral',
        'ctx-lh-rostralmiddlefrontal',
        'ctx-lh-superiorfrontal',
        'ctx-lh-paracentral'
    },
    'Right-Frontal': {
        'ctx-rh-caudalmiddlefrontal',
        'ctx-rh-lateralorbitofrontal',
        'ctx-rh-medialorbitofrontal',
        'ctx-rh-parsopercularis',
        'ctx-rh-parsorbitalis',
        'ctx-rh-parstriangularis',
        'ctx-rh-precentral',
        'ctx-rh-rostralmiddlefrontal',
        'ctx-rh-superiorfrontal',
        'ctx-rh-paracentral'
    },
    'Left-Temporal': {
        'ctx-lh-entorhinal',
        'ctx-lh-fusiform',
        'ctx-lh-inferiortemporal',
        'ctx-lh-middletemporal',
        'ctx-lh-superiortemporal',
        'ctx-lh-transversetemporal',
        'ctx-lh-parahippocampal',
    },
    'Right-Temporal': {
        'ctx-rh-entorhinal',
        'ctx-rh-fusiform',
        'ctx-rh-inferiortemporal',
        'ctx-rh-middletemporal',
        'ctx-rh-superiortemporal',
        'ctx-rh-transversetemporal',
        'ctx-rh-parahippocampal',
    },
    'Left-Parietal': {
        'ctx-lh-inferiorparietal',
        'ctx-lh-postcentral',
        'ctx-lh-superiorparietal',
        'ctx-lh-supramarginal',
        'ctx-lh-precuneus'
    },
    'Right-Parietal': {
        'ctx-rh-inferiorparietal',
        'ctx-rh-postcentral',
        'ctx-rh-superiorparietal',
        'ctx-rh-supramarginal',
        'ctx-rh-precuneus'
    },
    'Left-Occipital': {
        'ctx-lh-cuneus',
        'ctx-lh-lateraloccipital',
        'ctx-lh-lingual',
        'ctx-lh-pericalcarine'
    },
    'Right-Occipital': {
        'ctx-rh-cuneus',
        'ctx-rh-lateraloccipital',
        'ctx-rh-lingual',
        'ctx-rh-pericalcarine'
    },
    'Left-Insula':{
        'ctx-lh-insula'
    },
    'Right-Insula': {
        'ctx-rh-insula'
    },
    'Left-Cerebellum':{
        'Left-Cerebellum-White-Matter',
        'Left-Cerebellum-Cortex'
    },
    'Right-Cerebellum': {
        'Right-Cerebellum-White-Matter',
        'Right-Cerebellum-Cortex'
    },
    'Left-Combined-Cerebral-WM': {
        'Left-Cerebral-White-Matter',
        'Left-WM-hypointensities'
    },
    'Right-Combined-Cerebral-WM': {
        'Right-Cerebral-White-Matter',
        'Right-WM-hypointensities'
    }
}

In [4]:
def load_pickle_file(filepath):
    with open(filepath, 'rb') as file:
        data = pickle.load(file)
    return data
# 파일열어보기 , PICKLE로 되어있는것

In [5]:
import pandas as pd
import pickle

def load_pickle_file(file_path):
    with open(file_path, 'rb') as file:
        return pickle.load(file)
        
def find_volumes_files(root_path):
    volumes_files = []
    for root, dirs, files in os.walk(root_path):
        for file in files:
            if file == 'volumes.pickle':
                volumes_files.append(os.path.join(root, file))
    return volumes_files

def extract_subject_data(volumes_file_path):
    subject_name = os.path.dirname(volumes_file_path).split(os.sep)[-3]
    volumes_data = load_pickle_file(volumes_file_path)
    volumes_data['subject'] = subject_name
    return volumes_data

def compile_data(volumes_files):
    data = []
    for volumes_file in volumes_files:
        subject_data = extract_subject_data(volumes_file)
        data.append(subject_data)
    df = pd.DataFrame(data)
    # You might need to adjust the DataFrame to ensure it has the structure you want
    return df

root_path = '/home/limseoyoung/drwelly1/SeoyoungLim/drwelly-debug/drwelly_oasis1average_after_swift'
volumes_files = find_volumes_files(root_path)
df = compile_data(volumes_files)

In [6]:
df

Unnamed: 0,total_intracranial_volume,unknown,left_cerebral_white_matter,left_lateral_ventricle,left_inf_lat_vent,left_cerebellum_white_matter,left_cerebellum_cortex,left_thalamus,left_caudate,left_putamen,...,cerebellum,cerebellum_white_matter,cerebellum_cortex,lateral_ventricle,inf_lat_vent,total_lateral_ventricle,wm_hypointensities,HippocampusOccupancyScore,HippocampusAsymmetryIndex,subject
0,1.483843e+06,1.533272e+07,223501.532581,6197.666683,725.444963,13403.753315,52939.646295,7991.213592,3569.600818,5298.663729,...,133842.709162,26811.965630,107030.743532,11727.855401,1325.351932,13053.207333,1309.573933,0.840818,0.005738,OAS1_0014_MR1_Tag_pr100_sr100_ppf100_spf100_ip...
1,1.558668e+06,1.525790e+07,222830.281616,17250.498119,944.278952,13765.618297,55144.107183,7159.781634,3436.173824,4697.727760,...,138828.556907,27367.625602,111460.931305,32996.598314,1741.410911,34738.009225,1615.872917,0.776536,0.007439,OAS1_0262_MR1_Tag_pr100_sr100_ppf100_spf100_ip...
2,1.321234e+06,1.549533e+07,194998.234038,5536.019717,711.381964,12196.736377,47697.234563,7361.808624,3327.099830,4496.386770,...,118853.266928,23964.379776,94888.887152,10139.422482,1399.782928,11539.205410,955.254951,0.818062,0.039804,OAS1_0192_MR1_Tag_pr100_sr100_ppf100_spf100_ip...
3,1.796660e+06,1.501991e+07,259754.572729,11667.830404,1004.989949,14546.286257,65018.047678,8732.436554,4104.680790,5717.466708,...,160376.158806,28548.574541,131827.584265,19807.905988,1830.590906,21638.496894,1332.211932,0.802083,0.101329,OAS1_0370_MR1_Tag_pr100_sr100_ppf100_spf100_ip...
4,1.546352e+06,1.527022e+07,212213.403158,4669.944761,951.824951,12662.187353,56784.333099,8313.290575,3867.324802,5208.797734,...,139498.435873,25197.807713,114300.628160,8892.617546,1813.440907,10706.058453,1432.367927,0.792653,0.012716,OAS1_0261_MR1_Tag_pr100_sr100_ppf100_spf100_ip...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
431,1.499820e+06,1.531675e+07,206317.233459,8475.872567,707.951964,13673.351301,53510.741266,8081.422587,3747.274809,5544.937717,...,136479.350027,27201.270610,109278.079417,19722.498992,1547.272921,21269.771913,946.679952,0.833776,0.001593,OAS1_0079_MR1_Tag_pr100_sr100_ppf100_spf100_ip...
432,1.594108e+06,1.522246e+07,232747.440109,11206.495427,704.864964,12704.033351,46889.469604,7623.517611,3977.770797,5231.092733,...,120291.808854,24914.146727,95377.662127,21157.953919,1447.459926,22605.413845,1393.265929,0.838793,0.010796,OAS1_0190_MR1_Tag_pr100_sr100_ppf100_spf100_ip...
433,1.481450e+06,1.533512e+07,198674.850850,13551.929308,824.571958,11787.537398,44785.850712,6730.002656,3438.917824,4013.099795,...,114226.883164,23450.908802,90775.974362,28101.988564,1651.887916,29753.876480,8994.831540,0.764022,0.011286,OAS1_0082_MR1_Tag_pr100_sr100_ppf100_spf100_ip...
434,1.697462e+06,1.511911e+07,262428.600593,5216.686733,662.332966,15163.343225,57755.023049,9078.180536,3050.641844,4917.247749,...,146580.699511,29640.000486,116940.699026,9801.910499,1339.071932,11140.982431,914.780953,0.852596,0.016517,OAS1_0385_MR1_Tag_pr100_sr100_ppf100_spf100_ip...


In [7]:
df['subject'] = df['subject'].str.replace('_Tag_pr100_sr100_ppf100_spf100_ipf100_sipf143', '')


In [8]:
df

Unnamed: 0,total_intracranial_volume,unknown,left_cerebral_white_matter,left_lateral_ventricle,left_inf_lat_vent,left_cerebellum_white_matter,left_cerebellum_cortex,left_thalamus,left_caudate,left_putamen,...,cerebellum,cerebellum_white_matter,cerebellum_cortex,lateral_ventricle,inf_lat_vent,total_lateral_ventricle,wm_hypointensities,HippocampusOccupancyScore,HippocampusAsymmetryIndex,subject
0,1.483843e+06,1.533272e+07,223501.532581,6197.666683,725.444963,13403.753315,52939.646295,7991.213592,3569.600818,5298.663729,...,133842.709162,26811.965630,107030.743532,11727.855401,1325.351932,13053.207333,1309.573933,0.840818,0.005738,OAS1_0014_MR1
1,1.558668e+06,1.525790e+07,222830.281616,17250.498119,944.278952,13765.618297,55144.107183,7159.781634,3436.173824,4697.727760,...,138828.556907,27367.625602,111460.931305,32996.598314,1741.410911,34738.009225,1615.872917,0.776536,0.007439,OAS1_0262_MR1
2,1.321234e+06,1.549533e+07,194998.234038,5536.019717,711.381964,12196.736377,47697.234563,7361.808624,3327.099830,4496.386770,...,118853.266928,23964.379776,94888.887152,10139.422482,1399.782928,11539.205410,955.254951,0.818062,0.039804,OAS1_0192_MR1
3,1.796660e+06,1.501991e+07,259754.572729,11667.830404,1004.989949,14546.286257,65018.047678,8732.436554,4104.680790,5717.466708,...,160376.158806,28548.574541,131827.584265,19807.905988,1830.590906,21638.496894,1332.211932,0.802083,0.101329,OAS1_0370_MR1
4,1.546352e+06,1.527022e+07,212213.403158,4669.944761,951.824951,12662.187353,56784.333099,8313.290575,3867.324802,5208.797734,...,139498.435873,25197.807713,114300.628160,8892.617546,1813.440907,10706.058453,1432.367927,0.792653,0.012716,OAS1_0261_MR1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
431,1.499820e+06,1.531675e+07,206317.233459,8475.872567,707.951964,13673.351301,53510.741266,8081.422587,3747.274809,5544.937717,...,136479.350027,27201.270610,109278.079417,19722.498992,1547.272921,21269.771913,946.679952,0.833776,0.001593,OAS1_0079_MR1
432,1.594108e+06,1.522246e+07,232747.440109,11206.495427,704.864964,12704.033351,46889.469604,7623.517611,3977.770797,5231.092733,...,120291.808854,24914.146727,95377.662127,21157.953919,1447.459926,22605.413845,1393.265929,0.838793,0.010796,OAS1_0190_MR1
433,1.481450e+06,1.533512e+07,198674.850850,13551.929308,824.571958,11787.537398,44785.850712,6730.002656,3438.917824,4013.099795,...,114226.883164,23450.908802,90775.974362,28101.988564,1651.887916,29753.876480,8994.831540,0.764022,0.011286,OAS1_0082_MR1
434,1.697462e+06,1.511911e+07,262428.600593,5216.686733,662.332966,15163.343225,57755.023049,9078.180536,3050.641844,4917.247749,...,146580.699511,29640.000486,116940.699026,9801.910499,1339.071932,11140.982431,914.780953,0.852596,0.016517,OAS1_0385_MR1


In [9]:

def extract_number(subj):
    parts = subj.split('_')  
    if len(parts) > 1:
        number_part = parts[1]  # "OAS1_" 다음 오는 부분을 선택
        # 숫자만!1 추출
        numbers = ''.join(filter(str.isdigit, number_part))
        return int(numbers) if numbers.isdigit() else None
    return None

In [10]:

df['sort_key'] = df['subject'].apply(extract_number)  # 정렬 키 생성
df_final= df .sort_values(by='sort_key').drop(columns=['sort_key'])  # 정렬 및 정렬 키 컬럼 삭제


In [16]:
columns_to_include = [
    'csf', '3rd_ventricle', 'inf_lat_vent', '4th_ventricle', 'frontal', 'temporal', 'occipital',
    'insula', 'thalamus', 'caudate', 'putamen', 'pallidum', 'hippocampus', 'amygdala', 'accumbens_area',
    'ventraldc', 'cingulate', 'cerebellum', 'subject', 'brain_stem', 'parietal', 'lateral_ventricle',
    'total_cerebral_white_matter' 
]


def prepare_filtered_df(volume_df):
    
    volume_df['total_cerebral_white_matter'] = volume_df['left_cerebral_white_matter'] + volume_df['right_cerebral_white_matter']
    

    filtered_df = volume_df[columns_to_include].copy()
    return filtered_df




In [17]:
df_all = prepare_filtered_df(df)

In [18]:
df_all

Unnamed: 0,csf,3rd_ventricle,inf_lat_vent,4th_ventricle,frontal,temporal,occipital,insula,thalamus,caudate,...,amygdala,accumbens_area,ventraldc,cingulate,cerebellum,subject,brain_stem,parietal,lateral_ventricle,total_cerebral_white_matter
0,873.963955,876.707955,1325.351932,2050.110895,173647.857128,119952.238872,50038.209444,10917.003442,15522.464207,7258.222629,...,3206.706836,1142.532942,7645.126609,21201.857917,133842.709162,OAS1_0014_MR1,21047.164925,116488.968049,11727.855401,449740.891023
1,1192.267939,1086.623944,1741.410911,1220.393938,160165.213817,111271.938315,53488.446267,12104.126382,13894.243290,7016.750642,...,3310.635831,1050.951946,7255.135629,20225.336967,138828.556907,OAS1_0262_MR1,19889.539984,109535.329404,32996.598314,444122.894310
2,756.314961,751.512962,1399.782928,2630.466866,146554.974513,96529.456068,38933.585011,10111.982483,14152.522277,6605.836663,...,2560.494869,959.370951,7617.343611,17645.634098,118853.266928,OAS1_0192_MR1,18181.743071,93619.444217,10139.422482,391066.971021
3,1176.489940,894.543954,1830.590906,2155.068890,192126.981184,149341.506370,49756.606458,13541.982308,17080.713127,8305.401576,...,3424.854825,1246.118936,9515.162514,26968.716622,160376.158806,OAS1_0370_MR1,25595.001692,135306.633087,19807.905988,517523.518560
4,1071.188945,830.402958,1813.440907,1534.238922,170251.471302,123195.646706,46359.877631,13230.538324,15919.315187,7668.450608,...,3167.947838,1255.036936,8165.800583,22196.214866,139498.435873,OAS1_0261_MR1,19919.380982,108621.234451,8892.617546,423439.309367
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
431,1031.743947,830.402958,1547.272921,2426.038876,177676.391923,124532.660638,42264.457841,12751.024349,15655.205200,7609.454611,...,3516.435820,1370.284930,8795.205551,23564.098796,136479.350027,OAS1_0079_MR1,21982.182877,111215.343318,19722.498992,413142.449893
432,1307.172933,1336.670932,1447.459926,1345.588931,155578.618052,113375.214208,46773.878610,11733.000401,14907.465238,7947.995594,...,3069.506843,1057.125946,8265.613578,21336.313910,120291.808854,OAS1_0190_MR1,20660.260944,106852.383541,21157.953919,466379.134173
433,1381.260929,1489.991924,1651.887916,1563.050920,143905.642648,82929.849763,45212.885690,9429.069518,13060.067333,6706.335657,...,1838.822906,823.199958,6836.675651,20512.084952,114226.883164,OAS1_0082_MR1,20075.102974,95359.826128,28101.988564,400109.136559
434,1048.550946,856.813956,1339.071932,1757.874910,184918.493553,120910.237823,57733.757050,11848.248395,17393.186111,6260.778680,...,3267.760833,1086.623944,8925.202544,22920.630829,146580.699511,OAS1_0385_MR1,21936.563879,126804.349522,9801.910499,523867.989236


In [19]:
df_all.to_csv('all_oasis1_average_after_swift_20240430_final_111.csv')

In [20]:
pwd

'/home/limseoyoung/debug'