## Notebook to keep track of mri derivatives

### Raw tabular data is here
    `/home/nikhil/projects/Parkinsons/ppmi/derivatives`

### Currently collating and tracking following pipelines:
    - BIDS
    - fMRIPrep
    - freesurfer

In [3]:
import numpy as np
import pandas as pd
from itertools import product

In [22]:
ppmi_dir = "/home/nikhil/projects/Parkinsons/ppmi/"
demographics_dir = f"{ppmi_dir}tabular/demographics/"
bids_dir = f"{ppmi_dir}bids/"
bids_csv = f"{bids_dir}participants.tsv"
derivatives_dir = f"{ppmi_dir}derivatives/"

## tmp subject counts on fmriprep dir
fmriprep_ses_subject_ids_dir = f"{derivatives_dir}subject_ids/"

## TODO status_dir

In [11]:
current_mr_proc_manifest_csv = f"{demographics_dir}/mr_proc_manifest.csv"
current_mr_proc_manifest_df = pd.read_csv(current_mr_proc_manifest_csv)

n_participants = len(current_mr_proc_manifest_df["participant_id"].unique())
print(f"n_participants: {n_participants}")

dx_groups = current_mr_proc_manifest_df["group"].unique()
print(f"groups: {dx_groups}")
select_dex_groups = ['PD', 'Prodromal', 'Control', 'SWEDD']
print(f"select groups: {select_dex_groups}")
current_mr_proc_manifest_df = current_mr_proc_manifest_df[current_mr_proc_manifest_df["group"].isin(select_dex_groups)]

n_participants = len(current_mr_proc_manifest_df["participant_id"].unique())
print(f"n_participants: {n_participants}")
current_mr_proc_manifest_df.head()

n_participants: 1240
groups: ['Phantom' 'PD' 'Prodromal' 'Control' 'SWEDD' 'GenReg Unaff' 'AV133']
select groups: ['PD', 'Prodromal', 'Control', 'SWEDD']
n_participants: 1216


Unnamed: 0,participant_id,age,sex,group
3,sub-100001,67,M,PD
4,sub-100005,53,M,PD
5,sub-100006,56,F,PD
6,sub-100007,67,M,PD
7,sub-100012,66,F,PD


### Check BIDS participants

In [43]:
bids_df = pd.read_csv(bids_csv,sep="\t")
bids_ids = bids_df["participant_id"].unique()
n_bids_ids = len(bids_ids)
print(f"n_participants: {n_bids_ids}")

# Demographics of bids ids
proc_df = current_mr_proc_manifest_df[current_mr_proc_manifest_df["participant_id"].isin(bids_ids)]
proc_df.groupby(["group"]).count()["participant_id"]

n_participants: 560


group
Control       89
PD           285
Prodromal    140
SWEDD         42
Name: participant_id, dtype: int64

### Check number of sessions
- 0,,'Screening'
- 1,0,'Baseline'
- 5,12,'Month 12'
- 7,24,'Month 24'
- 9,36,'Month 36'
- 11,48,'Month 48'
- 30,,'Premature Withdrawal'
- 21,,'Symptomatic Therapy'
- 90,,'Unscheduled Visit 01'

In [37]:
session_dict = {0: "Screening",
                1: "Baseline", 
                5: "Month 12",
                7: "Month 24",
                9: "Month 36",
                11: "Month 48"
                }

for ses_id, ses_name in session_dict.items():
    ses_participant_ids_txt = f"{fmriprep_ses_subject_ids_dir}/PPMI_ses-{ses_id}_fmriprep_anat_20.2.7_participant_ids.txt"
    participants_list = list(pd.read_csv(ses_participant_ids_txt, header=None)[0].unique())
    n_participants = len(participants_list)

    print(f"{ses_name}, n_participants: {n_participants}")

Screening, n_participants: 3
Baseline, n_participants: 430
Month 12, n_participants: 238
Month 24, n_participants: 237
Month 36, n_participants: 2
Month 48, n_participants: 157


### Baseline fmriprep proc

In [41]:
ses_participant_ids_txt = f"{fmriprep_ses_subject_ids_dir}/PPMI_ses-1_fmriprep_anat_20.2.7_participant_ids.txt"
baseline_participants_list = list(pd.read_csv(ses_participant_ids_txt, header=None)[0].unique())
n_participants = len(baseline_participants_list)
print(f"n_participants: {n_participants}")

n_participants: 430


In [46]:
# Demographics of processed fmriprep ids
proc_df = current_mr_proc_manifest_df[current_mr_proc_manifest_df["participant_id"].isin(baseline_participants_list)]
print(f"n_proc_df: {len(proc_df)}")
proc_df.groupby(["group"]).count()["participant_id"]

n_proc_df: 429


group
Control       75
PD           223
Prodromal     92
SWEDD         39
Name: participant_id, dtype: int64