### Notebook to discover acq protocol names
    - Uses .heudiconv from the stage 1 run

In [42]:
import pandas as pd
import numpy as np
from pathlib import Path

In [43]:
dataset_root = "/home/nikhil/projects/NIMHANS/data/PD_YLO/"
bids_dir = f"{dataset_root}bids/"
heudiconv_dir = f"{bids_dir}.heudiconv/"
dicominfo_file = "{}/info/dicominfo_ses-{}.tsv"
mr_proc_manifest = f"{dataset_root}/tabular/demographics/mr_proc_manifest.csv"

In [44]:
def get_dicominfo(f):
    cols = ["protocol_name","series_description","image_type"]
    df = pd.read_csv(f, sep="\t")[cols]
    
    complex_protocols = df["protocol_name"].value_counts() > 1
    complex_protocols = list(complex_protocols[complex_protocols].index)
    
    simple_df = df[~df["protocol_name"].isin(complex_protocols)]
    complex_df = df[df["protocol_name"].isin(complex_protocols)]

    return simple_df, complex_df


In [45]:
# read current participant manifest 
manifest_df = pd.read_csv(mr_proc_manifest)
participants = manifest_df["participant_id"].str.strip().values
n_participants = len(participants)

ses="01"
simple_dicominfo_df = pd.DataFrame()
complex_dicominfo_df = pd.DataFrame()
for participant in participants:
    f = f"{heudiconv_dir}{dicominfo_file.format(participant, ses)}"
    try:
        s_df, c_df = get_dicominfo(f)
    except:
        print(f"could not read dicominfo for {participant}")
        
    simple_dicominfo_df = simple_dicominfo_df.append(s_df)
    complex_dicominfo_df = complex_dicominfo_df.append(c_df)

print(f"n_simple_procols: {len(simple_dicominfo_df)}")
print(f"n_complex_procols: {len(complex_dicominfo_df)}")

print("Dropping common rows across participants")
simple_dicominfo_df = simple_dicominfo_df.drop_duplicates()
complex_dicominfo_df = complex_dicominfo_df.drop_duplicates()

print(f"n_simple_procols: {len(simple_dicominfo_df)}")
print(f"n_complex_procols: {len(complex_dicominfo_df)}")


n_simple_procols: 78
n_complex_procols: 47
Dropping common rows across participants
n_simple_procols: 9
n_complex_procols: 5


In [46]:
simple_dicominfo_df

Unnamed: 0,protocol_name,series_description,image_type
0,MPRAGE GRAPPA2,MPRAGE GRAPPA2,"('ORIGINAL', 'PRIMARY', 'M', 'NONE')"
1,t2_space_dark-fluid_sag_p2_ns-t2prep,t2_space_dark-fluid_sag_p2_ns-t2prep,"('ORIGINAL', 'PRIMARY', 'M', 'NONE')"
4,MB_ep2d_bold_s8,MB_ep2d_bold_s8,"('ORIGINAL', 'PRIMARY', 'FMRI', 'NONE')"
5,t2_tse_tra_512,t2_tse_tra_512,"('ORIGINAL', 'PRIMARY', 'M', 'NONE')"
6,Diffusion_Kurtosis_FW_S2_modifide,Diffusion_Kurtosis_FW_S2_modifide,"('ORIGINAL', 'PRIMARY', 'DIFFUSION', 'NONE')"
7,Diffusion_Kurtosis_FW_S2_modifide_rev,Diffusion_Kurtosis_FW_S2_modifide_rev,"('ORIGINAL', 'PRIMARY', 'DIFFUSION', 'NONE')"
8,pcasl_3d_singleTI,pcasl_3d_singleTI,"('ORIGINAL', 'PRIMARY', 'ASL', 'NONE')"
0,AAhead_scout_64 CHANNEL,AAhead_scout_64 CHANNEL,"('ORIGINAL', 'PRIMARY', 'M', 'NONE')"
13,Phoenix Document,PhoenixZIPReport,"('ORIGINAL', 'PRIMARY', 'OTHER', 'CSA REPORT')"


In [47]:
complex_dicominfo_df

Unnamed: 0,protocol_name,series_description,image_type
2,Field_Mapping,Field_Mapping,"('ORIGINAL', 'PRIMARY', 'M', 'NONE')"
3,Field_Mapping,Field_Mapping,"('ORIGINAL', 'PRIMARY', 'P', 'NONE')"
9,t2_swi_tra_p2_2mm,t2_swi_tra_p2_2mm_Mag,"('ORIGINAL', 'PRIMARY', 'T2_STAR', 'NONE')"
10,t2_swi_tra_p2_2mm,t2_swi_tra_p2_2mm_Pha,"('ORIGINAL', 'PRIMARY', 'T2_STAR', 'NONE')"
11,t2_swi_tra_p2_2mm,t2_swi_tra_p2_2mm_SWI,"('ORIGINAL', 'PRIMARY', 'SWI', 'NONE')"
