In [1]:
import pydicom 
import numpy as np 
import pandas as pd 
import os 
import sys 
import glob 
import re 
from datetime import datetime 

In [2]:
DATA_ROOT = '/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations'
LDCT_ROOT = os.path.join(DATA_ROOT, 'Low-Dose_CT')
CECT_ROOT = os.path.join(DATA_ROOT, 'Contrast-Enhanced_CT')

# Checking the number of timepoints and scans

In [3]:
def check_annotations(data_root_dir):
    for patient_id in os.listdir(data_root_dir): 
        print(f"Patient: {patient_id}")
        patient_path = os.path.join(data_root_dir, patient_id)
        if not os.path.isdir(patient_path):
            print(f"{patient_path} is not a directory. Pass.")
            pass 
        for timepoint in os.listdir(patient_path):
            timepoint_path = os.path.join(patient_path, timepoint)
            print(f"\tTimepoint: {timepoint}")
            scan_count = 0
            annotations_available = False

            for scan in os.listdir(timepoint_path):
                scan_path = os.path.join(timepoint_path, scan)

                if os.path.isdir(scan_path):
                    scan_count += 1

                    contours_path = os.path.join(scan_path, 'Contours')
                    if os.path.exists(contours_path):
                        annotations_available = True
                        print(f"\t\tScan: {scan} - Annotation Available")
                    else:
                        print(f"\t\tScan: {scan} - No Annotation")


In [4]:
check_annotations(LDCT_ROOT)

Patient: 10056_1_6BZR9799
	Timepoint: 2022-10-10
		Scan: 1.3.12.2.1107.5.1.4.64297.30000022011016375941900008844 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.64297.30000022011015204533300001054 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.64297.30000022011016375941900008620 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.64297.30000022011016375941900008810 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.64297.30000022011016375941900008491 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.64297.30000022011016375941900008782 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.64297.30000022011016375941900007991 - No Annotation
	Timepoint: 2021-02-15
		Scan: 1.3.12.2.1107.5.1.4.64297.30000020051816572913900001376 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.64297.30000020051816572913900000831 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.64297.30000020051816572913900001719 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.64297.30000020051816

		Scan: 1.3.12.2.1107.5.1.4.51964.30000021092714024362500001634 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000021092715343240600021566 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000021092715343240600021715 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000021092714024362500001631 - No Annotation
Patient: 10057_1_B3672FL6
	Timepoint: 2022-04-29
		Scan: 1.3.12.2.1107.5.1.4.51964.30000021070114350864000002253 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000021070115225617100019669 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000021070115225617100019140 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000021070114350864000002250 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000021070115225617100019812 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000021070115225617100018469 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000021070115225617100019988 - No Annotation
		Scan: CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.51964.3

In [5]:
check_annotations(CECT_ROOT)

Patient: 10064_1_UTI1Q537
	Timepoint: 2023-09-28
		Scan: 1.3.12.2.1107.5.1.4.75487.30000023032314421944100023532 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.75487.30000023032314421944100023500 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.75487.30000023032314421944100022755 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.75487.30000023032314421944100023567 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.75487.30000023032314262280400002944 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.75487.30000023032314421944100022861 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.75487.30000023032314421944100023320 - No Annotation
		Scan: CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.75487.30000023032314421944100023006 - Annotation Available
Patient: 10064_1_0XVHE7T8
	Timepoint: 2023-11-27
		Scan: 1.3.12.2.1107.5.1.4.76430.30000023021314165347900033329 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.76430.30000023021314053084100002218 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.76430.3000002302131416534790003

		Scan: CT_Full_(thin)_Chest_at_TLC_Supine_Low_Dose--1.3.12.2.1107.5.1.4.60120.30000016120121073584700015798 - Annotation Available
		Scan: 1.3.12.2.1107.5.1.4.60120.30000016120121073584700016182 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.60120.30000016120121073584700015669 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.60120.30000016120121073584700016308 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.60120.30000016120119284092100006930 - No Annotation
Patient: 10064_1_25C08LV7
	Timepoint: 2019-05-21
		Scan: 1.3.12.2.1107.5.1.4.95559.30000019010813312986200003156 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.95559.30000019010813312986200003272 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.95559.30000019010813312986200003443 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.95559.30000019010813312986200003417 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.95559.30000019010813240537500000554 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.95559.30000019010813312986200002846 - No Annotation
		Scan: 1.3.12.2.11

		Scan: 1.3.12.2.1107.5.1.4.55174.30000020071812101573400025493 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.55174.30000020071812101573400025193 - No Annotation
Patient: 10057_1_85XB4VJF
	Timepoint: 2021-12-25
		Scan: 1.3.12.2.1107.5.1.4.76430.30000021112017152295500014296 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.76430.30000021112016560991100001616 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.76430.30000021112017152295500014613 - No Annotation
		Scan: CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.76430.30000021112017152295500014006 - Annotation Available
		Scan: 1.3.12.2.1107.5.1.4.76430.30000021112017152295500014586 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.76430.30000021112017152295500014423 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.76430.30000021112017152295500013908 - No Annotation
Patient: 10057_1_PR15SFW4
	Timepoint: 2016-09-27
		Scan: 1.3.6.1.4.1.56261.103.10057.872605622319560207527663964475416548 - No Annotation
		Scan: 1.3.6.1.4.1.56261.103.10057.11802324751948

	Timepoint: 2017-07-13
		Scan: 1.2.392.200036.9116.2.2.2.1762657897.1447895177.913227 - No Annotation
		Scan: CT_Full_(thin)_Chest_at_TLC_Supine--1.2.392.200036.9116.2.2.2.1762657897.1447894829.27785 - Annotation Available
		Scan: 1.3.6.1.4.1.56261.103.10057.311378423341220787192802885485894385 - No Annotation
		Scan: 1.2.392.200036.9116.2.2.2.1762657897.1447894518.443511 - No Annotation
		Scan: 1.3.6.1.4.1.56261.103.10057.407228670565030940787262088776812614 - No Annotation
		Scan: 1.2.392.200036.9116.2.2.2.1762657897.1447894693.271174 - No Annotation
		Scan: 1.3.6.1.4.1.56261.103.10057.685026823025312184100995593470027493 - No Annotation
		Scan: 1.3.6.1.4.1.56261.103.10057.625637264507220527831305264236641117 - No Annotation
Patient: 10057_1_AKWTH610
	Timepoint: 2019-06-17
		Scan: 1.3.12.2.1107.5.1.4.66859.30050018052115250646000000969 - No Annotation
		Scan: CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.66859.30050018052115250646000001091 - Annotation Available
		Scan: 1.3

		Scan: 1.3.12.2.1107.5.1.4.55174.30000021040621381048400007249 - No Annotation
Patient: 10057_1_PY067NU6
	Timepoint: 2020-05-28
		Scan: 1.3.12.2.1107.5.1.4.51964.30000019122616093562500020418 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000019122614150745300001750 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000019122616093562500019203 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000019122616093562500020624 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.51964.30000019122616093562500019882 - No Annotation
		Scan: CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.51964.30000019122616072204600005131 - Annotation Available
		Scan: 1.3.12.2.1107.5.1.4.51964.30000019122614150745300001768 - No Annotation
Patient: 10056_1_2D6033B8
	Timepoint: 2021-11-07
		Scan: 1.3.12.2.1107.5.1.4.73938.30050020062815422282900026392 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.73938.30050020062815422282900026593 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.73938.3005002006281527228450000

		Scan: 1.3.12.2.1107.5.1.7.123405.30000024012210363170700000168 - No Annotation
		Scan: 1.3.12.2.1107.5.1.7.123405.30000024012210363170700001031 - No Annotation
		Scan: 1.3.12.2.1107.5.1.7.123405.30000024012210363170700000363 - No Annotation
		Scan: 1.3.12.2.1107.5.1.7.123405.30000024012210363170700000765 - No Annotation
		Scan: 1.3.12.2.1107.5.1.7.123405.30000024012210363170700001033 - No Annotation
Patient: 10057_1_ZN1M7487
	Timepoint: 2018-08-15
		Scan: 1.3.12.2.1107.5.1.4.60120.30000018041114364546500011309 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.60120.30000018041114364546500012002 - No Annotation
		Scan: CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.60120.30000018041114364546500011520 - Annotation Available
		Scan: 1.3.12.2.1107.5.1.4.60120.30000018041114364546500012170 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.60120.30000018041114364546500012204 - No Annotation
		Scan: 1.3.12.2.1107.5.1.4.60120.30000018041114200132600001000 - No Annotation
		Scan: 1.3.12.2.1107.5

# Filter UCLA LDCT cases based on kernel, slice thickness, and orientation 

In [6]:
# Get paths to each DICOM folder and first .dcm file 
def get_first_dcm_per_scan(root_directory): 
    dcm_paths = [] 
    dcm_dir_paths = []
    for pid in os.listdir(root_directory): 
        patient_dir_path = os.path.join(root_directory, pid)
        for timepoint in os.listdir(patient_dir_path): 
            timepoint_dir_path = os.path.join(patient_dir_path, timepoint)
            
            for scan in os.listdir(timepoint_dir_path): 
                scan_dir_path = os.path.join(timepoint_dir_path, scan)
                dicom_dir_path = os.path.join(scan_dir_path, 'DICOM')
                if os.path.exists(dicom_dir_path): # this should always be true 
                    dcm_filenames = sorted(os.listdir(dicom_dir_path))
                    dcm_paths.append(os.path.join(dicom_dir_path, dcm_filenames[0]))
                    dcm_dir_paths.append(dicom_dir_path)
                else: 
                    print(f" Warning: path does not exist --{dicom_dir_path}")
    return dcm_dir_paths, dcm_paths 

In [7]:
dicom_dir_paths_ldct, first_files_ldct = get_first_dcm_per_scan(LDCT_ROOT) 
print(len(dicom_dir_paths_ldct))
print(dicom_dir_paths_ldct[0:5])
print(len(first_files_ldct))
print(first_files_ldct[0:5])

444
['/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008844/DICOM', '/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385/DICOM', '/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011015204533300001054/DICOM', '/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008620/DICOM', '/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008810/DICOM']
444
['/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64

In [8]:
#make list of dicom files based on list of directories

first_dicom_per_ldct_file = []

for scan in first_files_ldct:
        first_dicom_per_ldct_file.append(pydicom.dcmread(scan,force=True))
    
print(first_dicom_per_ldct_file[0])

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 200
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: CT Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 1.3.12.2.1107.5.1.4.64297.30000022011016375941900008854
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.2.276.0.7230010.3.0.3.6.4
(0002, 0013) Implementation Version Name         SH: 'OFFIS_DCMTK_364'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['DERIVED', 'PRIMARY', 'AXIAL', 'CT_SOM5 MIP']
(0008, 0016) SOP Class UID                       UI: CT Image Storage
(0008, 0018) SOP Instance UID                    UI: 1.3.12.2.1107.5.1.4.64297.30000022011016375941900008854
(0008, 0020) Study Date

In [10]:
def make_dicom_metadata_df(root_directory): 
    dicom_metadata_list = []
    for pid in os.listdir(root_directory): 
        patient_dir_path = os.path.join(root_directory, pid)
        for timepoint in os.listdir(patient_dir_path): 
            timepoint_dir_path = os.path.join(patient_dir_path, timepoint)
            
            for scan in os.listdir(timepoint_dir_path): 
                scan_dir_path = os.path.join(timepoint_dir_path, scan)
                dicom_dir_path = os.path.join(scan_dir_path, 'DICOM')
                
                try:
                    dcm_filenames = sorted(os.listdir(dicom_dir_path))
                    
                    # read the first .dcm file 
                    dcm_path = os.path.join(dicom_dir_path, dcm_filenames[0])
                    dicom = pydicom.dcmread(dcm_path, stop_before_pixels=True)
                
                    accession_number = getattr(dicom, 'AccessionNumber', 'missing')
                    slice_thickness = getattr(dicom, 'SliceThickness', 'missing')
                    convolution_kernel = getattr(dicom, 'ConvolutionKernel', 'missing')
                    image_orientation = getattr(dicom, 'ImageOrientationPatient', 'missing')
                    patient_id = getattr(dicom, 'PatientID', 'missing')
                    
                    dicom_metadata_list.append([
                        pid, 
                        timepoint, 
                        scan, 
                        accession_number,
                        slice_thickness,
                        convolution_kernel,
                        image_orientation,
                        patient_id,
                        dicom_dir_path
                    ])
                    
                except Exception as e:
                    # Ignore the scan if there's an error
                    print(f"Error reading DICOM file in {dicom_dir_path}: {e}")
                    continue
                                        
    dicom_metadata_df = pd.DataFrame(dicom_metadata_list, columns=[
        'pid', 
        'timepoint',
        'scan',
        'AccessionNumber', 
        'SliceThickness', 
        'ConvolutionKernel',
        'ImageOrientationPatient', 
        'PatientID', 
        "Directory",
    ])
    
    return dicom_metadata_df


In [12]:
ldct_dicom_metadata_df = make_dicom_metadata_df(LDCT_ROOT)
ldct_dicom_metadata_df

Error reading DICOM file in /workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_JLZKV012/2023-03-25/1.3.12.2.1107.5.1.4.66859.30050022022618071138600005003/DICOM: File is missing DICOM File Meta Information header or the 'DICM' prefix is missing from the header. Use force=True to force reading.
Error reading DICOM file in /workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_JLZKV012/2023-03-25/1.3.12.2.1107.5.1.4.66859.30050022022618071138600004227/DICOM: File is missing DICOM File Meta Information header or the 'DICM' prefix is missing from the header. Use force=True to force reading.
Error reading DICOM file in /workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_JLZKV012/2023-03-25/1.3.12.2.1107.5.1.4.66859.30050022022618071138600005064/DICOM: File is missing DICOM File Meta Information header or the 'DICM' prefix is missing from the header. Use force=True to force reading.
Error r

Unnamed: 0,pid,timepoint,scan,AccessionNumber,SliceThickness,ConvolutionKernel,ImageOrientationPatient,PatientID,Directory
0,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941...,A_W2ZWE00V,10.0,Br46f,"[0.99929074812244, -0.035267941082, -0.0131974...",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Im...
1,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941...,A_W2ZWE00V,3.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Im...
2,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011015204533...,A_W2ZWE00V,,Tr20f,"[1, 6.123233996e-017, 0, 0, 0, -1]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Im...
3,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941...,A_W2ZWE00V,2.0,Br46f,"[0.03527101119402, 0.99937778430849, 0, -0.013...",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Im...
4,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941...,A_W2ZWE00V,10.0,Br46f,"[0.03527101119402, 0.99937778430849, 0, -0.013...",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Im...
...,...,...,...,...,...,...,...,...,...
430,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818...,A_6461R8MR,10.0,B45f,"[0.99874851280103, 0.021294114512, 0.045254489...",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Im...
431,10057_1_X4DQH49F,2020-07-26,CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1...,A_6461R8MR,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Im...
432,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818...,A_6461R8MR,2.0,B45f,"[-0.021315952809, 0.99977278926556, 0, 0.04524...",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Im...
433,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818...,A_6461R8MR,2.0,B45f,"[0.998748512801, 0.021294114513, 0.04525448999...",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Im...


In [13]:

pd.set_option('display.max_colwidth', None)
print(ldct_dicom_metadata_df['Directory'])


0                                          /workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008844/DICOM
1                                          /workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385/DICOM
2                                          /workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011015204533300001054/DICOM
3                                          /workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008620/DICOM
4                                          /workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.

In [14]:
#function to determine ct orientation based on 'ImageOrientationPatient' column

def get_ct_orientation(df, index):

    image_orientation_patient_string = str(df['ImageOrientationPatient'].iloc[index])
    image_vectors = image_orientation_patient_string.replace('[','').replace(']','').split(',')
    image_vectors = [round(float(x)) for x in image_vectors]

    #[1,0,0,0,1,0]
    if image_vectors[0] == 1 and image_vectors[4] == 1: 
        return 'axial'
    
    #[0,1,0,0,0,-1]
    elif image_vectors[1] == 1 and image_vectors[5] == -1: 
        return 'sagittal'
    
    #[1,0,0,0,0,-1]
    elif image_vectors[0] == 1 and image_vectors[5] == -1: 
        return 'coronal'
    
    else:
        return 'unknown'

In [15]:
#add column for scan orientation

def add_orientation_column(dicom_df):
    orientation = []
    
    for index in range(len(dicom_df)):
        if str(dicom_df['ImageOrientationPatient'].iloc[index]) == 'missing':
            orientation.append('missing')
        else:
            orientation.append(get_ct_orientation(dicom_df, index))

    dicom_df['Orientation'] = orientation
    return dicom_df

In [16]:
ldct_dicom_metadata_df = add_orientation_column(ldct_dicom_metadata_df)
# ldct_dicom_metadata_df = add_timepoint(ldct_dicom_metadata_df)
ldct_dicom_metadata_df

Unnamed: 0,pid,timepoint,scan,AccessionNumber,SliceThickness,ConvolutionKernel,ImageOrientationPatient,PatientID,Directory,Orientation
0,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008844,A_W2ZWE00V,10.0,Br46f,"[0.99929074812244, -0.035267941082, -0.013197463723, 0.03527101284501, 0.99937778425022, -2.2e-011]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008844/DICOM,axial
1,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385,A_W2ZWE00V,3.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385/DICOM,axial
2,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011015204533300001054,A_W2ZWE00V,,Tr20f,"[1, 6.123233996e-017, 0, 0, 0, -1]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011015204533300001054/DICOM,coronal
3,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008620,A_W2ZWE00V,2.0,Br46f,"[0.03527101119402, 0.99937778430849, 0, -0.013189252362, 0.000465487902, -0.9999129096792]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008620/DICOM,sagittal
4,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008810,A_W2ZWE00V,10.0,Br46f,"[0.03527101119402, 0.99937778430849, 0, -0.013189252362, 0.000465487902, -0.9999129096792]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008810/DICOM,sagittal
...,...,...,...,...,...,...,...,...,...,...
430,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100002076,A_6461R8MR,10.0,B45f,"[0.99874851280103, 0.021294114512, 0.045254489997, -0.021315952903, 0.99977278926356, -4e-012]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100002076/DICOM,axial
431,10057_1_X4DQH49F,2020-07-26,CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305,A_6461R8MR,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305/DICOM,axial
432,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100001864,A_6461R8MR,2.0,B45f,"[-0.021315952809, 0.99977278926556, 0, 0.045244208353, 0.000964642587, -0.998975490728]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100001864/DICOM,sagittal
433,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100001746,A_6461R8MR,2.0,B45f,"[0.998748512801, 0.021294114513, 0.045254489997, 0.045244207691, 0.000964642581, -0.998975490758]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100001746/DICOM,coronal


In [17]:
# for feature counts, we need to convert everything to strings 
ldct_dicom_metadata_df['SliceThickness'] = ldct_dicom_metadata_df['SliceThickness'].astype(str)
ldct_dicom_metadata_df['ConvolutionKernel'] = ldct_dicom_metadata_df['ConvolutionKernel'].astype(str)
ldct_dicom_metadata_df['Orientation'] = ldct_dicom_metadata_df['Orientation'].astype(str)

# calculate the number of cases for each combination 
feature_count_ldct_df = (
    ldct_dicom_metadata_df
    .groupby(['SliceThickness', 'ConvolutionKernel', 'Orientation'])
    .size()
    .reset_index()
    .rename(columns={0: 'count'})
)

feature_count_ldct_df = feature_count_ldct_df.sort_values('count', ascending=False)
feature_count_ldct_df

Unnamed: 0,SliceThickness,ConvolutionKernel,Orientation,count
1,1,B45f,axial,22
7,10,B45f,axial,21
30,3,B45f,axial,20
8,10,B45f,coronal,20
22,2,B45f,sagittal,19
9,10,B45f,sagittal,19
21,2,B45f,coronal,19
37,,Tr20f,coronal,19
3,1,Br46f,axial,18
36,,T20f,coronal,18


## Exclude cases whose slice thickness is "missing" or None

In [18]:
# Filter out rows where SliceThickness is "missing"
filtered_df = ldct_dicom_metadata_df[ldct_dicom_metadata_df['SliceThickness'] != 'missing']

# Convert columns to strings to ensure they are hashable
filtered_df['SliceThickness'] = filtered_df['SliceThickness'].astype(str)
filtered_df['ConvolutionKernel'] = filtered_df['ConvolutionKernel'].astype(str)
filtered_df['Orientation'] = filtered_df['Orientation'].astype(str)

# Now group by the specified columns and calculate the counts
feature_count_ldct_df = (
    filtered_df
    .groupby(['SliceThickness', 'ConvolutionKernel', 'Orientation'])
    .size()
    .reset_index()
    .rename(columns={0: 'count'})
)

# Sort the DataFrame by the count column in descending order
feature_count_ldct_df = feature_count_ldct_df.sort_values('count', ascending=False)

# Display the result
feature_count_ldct_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['SliceThickness'] = filtered_df['SliceThickness'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['ConvolutionKernel'] = filtered_df['ConvolutionKernel'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Orientation'] = filtered_df['Orientati

Unnamed: 0,SliceThickness,ConvolutionKernel,Orientation,count
1,1.0,B45f,axial,22
7,10.0,B45f,axial,21
30,3.0,B45f,axial,20
8,10.0,B45f,coronal,20
22,2.0,B45f,sagittal,19
21,2.0,B45f,coronal,19
9,10.0,B45f,sagittal,19
37,,Tr20f,coronal,19
36,,T20f,coronal,18
3,1.0,Br46f,axial,18


In [19]:
# Filter out rows where SliceThickness is "missing"
filtered_df = filtered_df[filtered_df['SliceThickness'] != 'None']

# Now group by the specified columns and calculate the counts
feature_count_ldct_df = (
    filtered_df
    .groupby(['SliceThickness', 'ConvolutionKernel', 'Orientation'])
    .size()
    .reset_index()
    .rename(columns={0: 'count'})
)

# Sort the DataFrame by the count column in descending order
feature_count_ldct_df = feature_count_ldct_df.sort_values('count', ascending=False)

# Display the result
feature_count_ldct_df


Unnamed: 0,SliceThickness,ConvolutionKernel,Orientation,count
1,1.0,B45f,axial,22
7,10.0,B45f,axial,21
8,10.0,B45f,coronal,20
30,3.0,B45f,axial,20
9,10.0,B45f,sagittal,19
22,2.0,B45f,sagittal,19
21,2.0,B45f,coronal,19
3,1.0,Br46f,axial,18
14,10.0,Br46f,axial,16
2,1.0,B46f,axial,15


In [20]:
filtered_df

Unnamed: 0,pid,timepoint,scan,AccessionNumber,SliceThickness,ConvolutionKernel,ImageOrientationPatient,PatientID,Directory,Orientation
0,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008844,A_W2ZWE00V,10,Br46f,"[0.99929074812244, -0.035267941082, -0.013197463723, 0.03527101284501, 0.99937778425022, -2.2e-011]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008844/DICOM,axial
1,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385,A_W2ZWE00V,3,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385/DICOM,axial
3,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008620,A_W2ZWE00V,2,Br46f,"[0.03527101119402, 0.99937778430849, 0, -0.013189252362, 0.000465487902, -0.9999129096792]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008620/DICOM,sagittal
4,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008810,A_W2ZWE00V,10,Br46f,"[0.03527101119402, 0.99937778430849, 0, -0.013189252362, 0.000465487902, -0.9999129096792]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008810/DICOM,sagittal
5,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008491,A_W2ZWE00V,2,Br46f,"[0.99929074812244, -0.035267941082, -0.013197463723, -0.013189252054, 0.000465487901, -0.9999129096833]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008491/DICOM,coronal
...,...,...,...,...,...,...,...,...,...,...
430,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100002076,A_6461R8MR,10,B45f,"[0.99874851280103, 0.021294114512, 0.045254489997, -0.021315952903, 0.99977278926356, -4e-012]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100002076/DICOM,axial
431,10057_1_X4DQH49F,2020-07-26,CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305,A_6461R8MR,1,B45f,"[1, 0, 0, 0, 1, 0]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305/DICOM,axial
432,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100001864,A_6461R8MR,2,B45f,"[-0.021315952809, 0.99977278926556, 0, 0.045244208353, 0.000964642587, -0.998975490728]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100001864/DICOM,sagittal
433,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100001746,A_6461R8MR,2,B45f,"[0.998748512801, 0.021294114513, 0.045254489997, 0.045244207691, 0.000964642581, -0.998975490758]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100001746/DICOM,coronal


## Exclude slice thickness > 3

In [21]:
filtered_df['SliceThickness'] = pd.to_numeric(filtered_df['SliceThickness'], errors='coerce')

# only keep slice thickness <= 3 
filtered_df = filtered_df[filtered_df['SliceThickness'] <= 3]

# convert back to str for counting 
filtered_df['SliceThickness'] = filtered_df['SliceThickness'].astype(str)
filtered_df['ConvolutionKernel'] = filtered_df['ConvolutionKernel'].astype(str)
filtered_df['Orientation'] = filtered_df['Orientation'].astype(str)

feature_count_ldct_df = (
    filtered_df
    .groupby(['SliceThickness', 'ConvolutionKernel', 'Orientation'])
    .size()
    .reset_index()
    .rename(columns={0: 'count'})
)
feature_count_ldct_df = feature_count_ldct_df.sort_values('count', ascending=False)
feature_count_ldct_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['SliceThickness'] = filtered_df['SliceThickness'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['ConvolutionKernel'] = filtered_df['ConvolutionKernel'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Orientation'] = filtered_df['Orientati

Unnamed: 0,SliceThickness,ConvolutionKernel,Orientation,count
1,1.0,B45f,axial,22
17,3.0,B45f,axial,20
8,2.0,B45f,coronal,19
9,2.0,B45f,sagittal,19
3,1.0,Br46f,axial,18
2,1.0,B46f,axial,15
22,3.0,Br46f,axial,14
18,3.0,B46f,axial,14
5,1.0,T20s,coronal,14
10,2.0,B46f,coronal,13


In [22]:
filtered_df

Unnamed: 0,pid,timepoint,scan,AccessionNumber,SliceThickness,ConvolutionKernel,ImageOrientationPatient,PatientID,Directory,Orientation
1,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385,A_W2ZWE00V,3.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385/DICOM,axial
3,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008620,A_W2ZWE00V,2.0,Br46f,"[0.03527101119402, 0.99937778430849, 0, -0.013189252362, 0.000465487902, -0.9999129096792]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008620/DICOM,sagittal
5,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008491,A_W2ZWE00V,2.0,Br46f,"[0.99929074812244, -0.035267941082, -0.013197463723, -0.013189252054, 0.000465487901, -0.9999129096833]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008491/DICOM,coronal
7,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900007991,A_W2ZWE00V,1.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900007991/DICOM,axial
8,10056_1_6BZR9799,2021-02-15,1.3.12.2.1107.5.1.4.64297.30000020051816572913900001376,A_G7C0161N,2.0,B45f,"[0.99924525146225, 0.03878823317501, 0.002097712397, 0.002096133763, 8.1366745e-005, -0.9999977997989]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2021-02-15/1.3.12.2.1107.5.1.4.64297.30000020051816572913900001376/DICOM,coronal
...,...,...,...,...,...,...,...,...,...,...
426,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305,A_6461R8MR,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305/DICOM,axial
431,10057_1_X4DQH49F,2020-07-26,CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305,A_6461R8MR,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305/DICOM,axial
432,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100001864,A_6461R8MR,2.0,B45f,"[-0.021315952809, 0.99977278926556, 0, 0.045244208353, 0.000964642587, -0.998975490728]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100001864/DICOM,sagittal
433,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100001746,A_6461R8MR,2.0,B45f,"[0.998748512801, 0.021294114513, 0.045254489997, 0.045244207691, 0.000964642581, -0.998975490758]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100001746/DICOM,coronal


## Exclude scans with missing convolution kernel 

In [23]:
# Filter out rows where SliceThickness is "missing"
filtered_df = filtered_df[filtered_df['ConvolutionKernel'] != 'None' ]
filtered_df = filtered_df[filtered_df['ConvolutionKernel'] != 'missing' ]

# Now group by the specified columns and calculate the counts
feature_count_ldct_df = (
    filtered_df
    .groupby(['SliceThickness', 'ConvolutionKernel', 'Orientation'])
    .size()
    .reset_index()
    .rename(columns={0: 'count'})
)

# Sort the DataFrame by the count column in descending order
feature_count_ldct_df = feature_count_ldct_df.sort_values('count', ascending=False)

# Display the result
feature_count_ldct_df


Unnamed: 0,SliceThickness,ConvolutionKernel,Orientation,count
1,1.0,B45f,axial,22
17,3.0,B45f,axial,20
8,2.0,B45f,coronal,19
9,2.0,B45f,sagittal,19
3,1.0,Br46f,axial,18
2,1.0,B46f,axial,15
22,3.0,Br46f,axial,14
18,3.0,B46f,axial,14
5,1.0,T20s,coronal,14
10,2.0,B46f,coronal,13


In [24]:
filtered_df

Unnamed: 0,pid,timepoint,scan,AccessionNumber,SliceThickness,ConvolutionKernel,ImageOrientationPatient,PatientID,Directory,Orientation
1,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385,A_W2ZWE00V,3.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385/DICOM,axial
3,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008620,A_W2ZWE00V,2.0,Br46f,"[0.03527101119402, 0.99937778430849, 0, -0.013189252362, 0.000465487902, -0.9999129096792]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008620/DICOM,sagittal
5,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008491,A_W2ZWE00V,2.0,Br46f,"[0.99929074812244, -0.035267941082, -0.013197463723, -0.013189252054, 0.000465487901, -0.9999129096833]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008491/DICOM,coronal
7,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900007991,A_W2ZWE00V,1.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900007991/DICOM,axial
8,10056_1_6BZR9799,2021-02-15,1.3.12.2.1107.5.1.4.64297.30000020051816572913900001376,A_G7C0161N,2.0,B45f,"[0.99924525146225, 0.03878823317501, 0.002097712397, 0.002096133763, 8.1366745e-005, -0.9999977997989]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2021-02-15/1.3.12.2.1107.5.1.4.64297.30000020051816572913900001376/DICOM,coronal
...,...,...,...,...,...,...,...,...,...,...
426,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305,A_6461R8MR,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305/DICOM,axial
431,10057_1_X4DQH49F,2020-07-26,CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305,A_6461R8MR,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305/DICOM,axial
432,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100001864,A_6461R8MR,2.0,B45f,"[-0.021315952809, 0.99977278926556, 0, 0.045244208353, 0.000964642587, -0.998975490728]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100001864/DICOM,sagittal
433,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100001746,A_6461R8MR,2.0,B45f,"[0.998748512801, 0.021294114513, 0.045254489997, 0.045244207691, 0.000964642581, -0.998975490758]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100001746/DICOM,coronal


## Exclude orientation != axial 

In [25]:
# Filter out rows where SliceThickness is "missing"
filtered_df = filtered_df[filtered_df['Orientation'] == 'axial' ]

# Now group by the specified columns and calculate the counts
feature_count_ldct_df = (
    filtered_df
    .groupby(['SliceThickness', 'ConvolutionKernel', 'Orientation'])
    .size()
    .reset_index()
    .rename(columns={0: 'count'})
)

# Sort the DataFrame by the count column in descending order
feature_count_ldct_df = feature_count_ldct_df.sort_values('count', ascending=False)

# Display the result
feature_count_ldct_df


Unnamed: 0,SliceThickness,ConvolutionKernel,Orientation,count
0,1.0,B45f,axial,22
7,3.0,B45f,axial,20
2,1.0,Br46f,axial,18
1,1.0,B46f,axial,15
8,3.0,B46f,axial,14
10,3.0,Br46f,axial,14
3,1.0,Br49d,axial,5
4,1.0,"['Br49d', '3']",axial,5
9,3.0,Br36d,axial,5
6,2.0,Br46f,axial,4


In [26]:
filtered_df

Unnamed: 0,pid,timepoint,scan,AccessionNumber,SliceThickness,ConvolutionKernel,ImageOrientationPatient,PatientID,Directory,Orientation
1,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385,A_W2ZWE00V,3.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900008385/DICOM,axial
7,10056_1_6BZR9799,2022-10-10,1.3.12.2.1107.5.1.4.64297.30000022011016375941900007991,A_W2ZWE00V,1.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2022-10-10/1.3.12.2.1107.5.1.4.64297.30000022011016375941900007991/DICOM,axial
9,10056_1_6BZR9799,2021-02-15,1.3.12.2.1107.5.1.4.64297.30000020051816572913900000831,A_G7C0161N,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2021-02-15/1.3.12.2.1107.5.1.4.64297.30000020051816572913900000831/DICOM,axial
14,10056_1_6BZR9799,2021-02-15,1.3.12.2.1107.5.1.4.64297.30000020051816572913900001260,A_G7C0161N,3.0,B45f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2021-02-15/1.3.12.2.1107.5.1.4.64297.30000020051816572913900001260/DICOM,axial
16,10056_1_JRZ5OB12,2024-03-27,1.3.12.2.1107.5.1.4.76430.30000022112314094492500038041,A_YFQXW9V8,1.0,Br49d,"[1, 0, 0, 0, 1, 0]",10056_1_JRZ5OB12,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_JRZ5OB12/2024-03-27/1.3.12.2.1107.5.1.4.76430.30000022112314094492500038041/DICOM,axial
...,...,...,...,...,...,...,...,...,...,...
420,10056_1_0QX10Y18,2022-03-31,1.3.12.2.1107.5.1.4.64297.30000022021721044408300002396,A_XM008L5D,3.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_0QX10Y18,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_0QX10Y18/2022-03-31/1.3.12.2.1107.5.1.4.64297.30000022021721044408300002396/DICOM,axial
422,10056_1_0QX10Y18,2022-03-31,1.3.12.2.1107.5.1.4.64297.30000022021721044408300001954,A_XM008L5D,1.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_0QX10Y18,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_0QX10Y18/2022-03-31/1.3.12.2.1107.5.1.4.64297.30000022021721044408300001954/DICOM,axial
426,10057_1_X4DQH49F,2020-07-26,1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305,A_6461R8MR,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305/DICOM,axial
431,10057_1_X4DQH49F,2020-07-26,CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305,A_6461R8MR,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10057_1_X4DQH49F,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10057_1_X4DQH49F/2020-07-26/CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.66859.30050020011217220818100001305/DICOM,axial


In [27]:
unique_pid_count = filtered_df['pid'].nunique()
unique_pid_count

50

In [28]:
filtered_df = filtered_df.sort_values(by=['pid', 'timepoint', 'SliceThickness'], ascending=[True, True, True])

# Drop duplicates by keeping the first occurrence of each combination of 'pid' and 'timepoint' (which has the lowest SliceThickness)
filtered_df = filtered_df.drop_duplicates(subset=['pid', 'timepoint'], keep='first')


In [29]:
filtered_df

Unnamed: 0,pid,timepoint,scan,AccessionNumber,SliceThickness,ConvolutionKernel,ImageOrientationPatient,PatientID,Directory,Orientation
197,10056_1_01326Z62,2021-06-25,1.3.12.2.1107.5.1.4.73443.30000021062516500543700005303,A_09MP206N,1.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_01326Z62,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_01326Z62/2021-06-25/1.3.12.2.1107.5.1.4.73443.30000021062516500543700005303/DICOM,axial
293,10056_1_04XX307B,2022-07-12,1.3.12.2.1107.5.1.4.73938.30050022022215494056700002042,A_0I9J387Z,1.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_04XX307B,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_04XX307B/2022-07-12/1.3.12.2.1107.5.1.4.73938.30050022022215494056700002042/DICOM,axial
422,10056_1_0QX10Y18,2022-03-31,1.3.12.2.1107.5.1.4.64297.30000022021721044408300001954,A_XM008L5D,1.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_0QX10Y18,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_0QX10Y18/2022-03-31/1.3.12.2.1107.5.1.4.64297.30000022021721044408300001954/DICOM,axial
126,10056_1_1BU645Q2,2023-08-11,1.3.12.2.1107.5.1.4.75487.30000021121714403770300028794,A_7Y39L5Z6,1.0,"['Br49d', '3']","[1, 0, 0, 0, 1, 0]",10056_1_1BU645Q2,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_1BU645Q2/2023-08-11/1.3.12.2.1107.5.1.4.75487.30000021121714403770300028794/DICOM,axial
238,10056_1_21KE72W9,2022-09-05,CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.51964.30000021092715330693700005545,A_6DX60RV0,1.0,B46f,"[1, 0, 0, 0, 1, 0]",10056_1_21KE72W9,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_21KE72W9/2022-09-05/CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.51964.30000021092715330693700005545/DICOM,axial
218,10056_1_2P4IW5MT,2022-05-16,1.3.12.2.1107.5.1.4.51964.30000021101815224675000001937,A_68D4KQ6X,1.0,B46f,"[1, 0, 0, 0, 1, 0]",10056_1_2P4IW5MT,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_2P4IW5MT/2022-05-16/1.3.12.2.1107.5.1.4.51964.30000021101815224675000001937/DICOM,axial
308,10056_1_4U815U02,2021-10-28,CT_Chest_at_TLC_Supine_Post_Contrast--1.3.12.2.1107.5.1.4.51964.30000021042915181512500003229,A_16NA6O17,1.0,B46f,"[1, 0, 0, 0, 1, 0]",10056_1_4U815U02,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_4U815U02/2021-10-28/CT_Chest_at_TLC_Supine_Post_Contrast--1.3.12.2.1107.5.1.4.51964.30000021042915181512500003229/DICOM,axial
109,10056_1_5924808N,2020-12-12,1.3.12.2.1107.5.1.4.64297.30000020121216165563500010067,A_6T7IN2PX,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10056_1_5924808N,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_5924808N/2020-12-12/1.3.12.2.1107.5.1.4.64297.30000020121216165563500010067/DICOM,axial
161,10056_1_61RQR13T,2018-05-22,CT_Chest_at_TLC_Supine_Post_Contrast--1.2.276.0.7230010.3.1.3.2372643234.11500.1543450006.774,A_1T875SZ8,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10056_1_61RQR13T,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_61RQR13T/2018-05-22/CT_Chest_at_TLC_Supine_Post_Contrast--1.2.276.0.7230010.3.1.3.2372643234.11500.1543450006.774/DICOM,axial
9,10056_1_6BZR9799,2021-02-15,1.3.12.2.1107.5.1.4.64297.30000020051816572913900000831,A_G7C0161N,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2021-02-15/1.3.12.2.1107.5.1.4.64297.30000020051816572913900000831/DICOM,axial


In [30]:
duplicates = filtered_df.duplicated(subset=['pid', 'timepoint'])
if duplicates.any():
    print(filtered_df[duplicates])
else:
    print("All rows have unique combinations of pid and timepoint.")

All rows have unique combinations of pid and timepoint.


## Save all cases and only the first timepoint 

In [31]:
LDCT_OUT = '/workspace/home/tengyuezhang/sybil_cect/data/ucla_ldct'
CECT_OUT = '/workspace/home/tengyuezhang/sybil_cect/data/ucla_cect'

In [32]:
filtered_df.to_csv(os.path.join(LDCT_OUT, 'ucla_ldct_cases.csv'), index=False)

In [33]:
baseline_df = filtered_df.sort_values(by=['pid', 'timepoint'])
baseline_df = baseline_df.drop_duplicates(subset=['pid'], keep='first')
baseline_df

Unnamed: 0,pid,timepoint,scan,AccessionNumber,SliceThickness,ConvolutionKernel,ImageOrientationPatient,PatientID,Directory,Orientation
197,10056_1_01326Z62,2021-06-25,1.3.12.2.1107.5.1.4.73443.30000021062516500543700005303,A_09MP206N,1.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_01326Z62,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_01326Z62/2021-06-25/1.3.12.2.1107.5.1.4.73443.30000021062516500543700005303/DICOM,axial
293,10056_1_04XX307B,2022-07-12,1.3.12.2.1107.5.1.4.73938.30050022022215494056700002042,A_0I9J387Z,1.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_04XX307B,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_04XX307B/2022-07-12/1.3.12.2.1107.5.1.4.73938.30050022022215494056700002042/DICOM,axial
422,10056_1_0QX10Y18,2022-03-31,1.3.12.2.1107.5.1.4.64297.30000022021721044408300001954,A_XM008L5D,1.0,Br46f,"[1, 0, 0, 0, 1, 0]",10056_1_0QX10Y18,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_0QX10Y18/2022-03-31/1.3.12.2.1107.5.1.4.64297.30000022021721044408300001954/DICOM,axial
126,10056_1_1BU645Q2,2023-08-11,1.3.12.2.1107.5.1.4.75487.30000021121714403770300028794,A_7Y39L5Z6,1.0,"['Br49d', '3']","[1, 0, 0, 0, 1, 0]",10056_1_1BU645Q2,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_1BU645Q2/2023-08-11/1.3.12.2.1107.5.1.4.75487.30000021121714403770300028794/DICOM,axial
238,10056_1_21KE72W9,2022-09-05,CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.51964.30000021092715330693700005545,A_6DX60RV0,1.0,B46f,"[1, 0, 0, 0, 1, 0]",10056_1_21KE72W9,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_21KE72W9/2022-09-05/CT_Full_(thin)_Chest_at_TLC_Supine--1.3.12.2.1107.5.1.4.51964.30000021092715330693700005545/DICOM,axial
218,10056_1_2P4IW5MT,2022-05-16,1.3.12.2.1107.5.1.4.51964.30000021101815224675000001937,A_68D4KQ6X,1.0,B46f,"[1, 0, 0, 0, 1, 0]",10056_1_2P4IW5MT,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_2P4IW5MT/2022-05-16/1.3.12.2.1107.5.1.4.51964.30000021101815224675000001937/DICOM,axial
308,10056_1_4U815U02,2021-10-28,CT_Chest_at_TLC_Supine_Post_Contrast--1.3.12.2.1107.5.1.4.51964.30000021042915181512500003229,A_16NA6O17,1.0,B46f,"[1, 0, 0, 0, 1, 0]",10056_1_4U815U02,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_4U815U02/2021-10-28/CT_Chest_at_TLC_Supine_Post_Contrast--1.3.12.2.1107.5.1.4.51964.30000021042915181512500003229/DICOM,axial
109,10056_1_5924808N,2020-12-12,1.3.12.2.1107.5.1.4.64297.30000020121216165563500010067,A_6T7IN2PX,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10056_1_5924808N,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_5924808N/2020-12-12/1.3.12.2.1107.5.1.4.64297.30000020121216165563500010067/DICOM,axial
161,10056_1_61RQR13T,2018-05-22,CT_Chest_at_TLC_Supine_Post_Contrast--1.2.276.0.7230010.3.1.3.2372643234.11500.1543450006.774,A_1T875SZ8,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10056_1_61RQR13T,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_61RQR13T/2018-05-22/CT_Chest_at_TLC_Supine_Post_Contrast--1.2.276.0.7230010.3.1.3.2372643234.11500.1543450006.774/DICOM,axial
9,10056_1_6BZR9799,2021-02-15,1.3.12.2.1107.5.1.4.64297.30000020051816572913900000831,A_G7C0161N,1.0,B45f,"[1, 0, 0, 0, 1, 0]",10056_1_6BZR9799,/workspace/radraid/whsu/idx_data_request_79/Images_and_Annotations/Low-Dose_CT/10056_1_6BZR9799/2021-02-15/1.3.12.2.1107.5.1.4.64297.30000020051816572913900000831/DICOM,axial


In [34]:
print(baseline_df)

                  pid   timepoint  \
197  10056_1_01326Z62  2021-06-25   
293  10056_1_04XX307B  2022-07-12   
422  10056_1_0QX10Y18  2022-03-31   
126  10056_1_1BU645Q2  2023-08-11   
238  10056_1_21KE72W9  2022-09-05   
218  10056_1_2P4IW5MT  2022-05-16   
308  10056_1_4U815U02  2021-10-28   
109  10056_1_5924808N  2020-12-12   
161  10056_1_61RQR13T  2018-05-22   
9    10056_1_6BZR9799  2021-02-15   
408  10056_1_7092NE0M  2020-08-07   
372  10056_1_8C438KJF  2020-09-08   
80   10056_1_99QLKE94  2019-07-19   
188  10056_1_DF886X1E  2024-01-29   
342  10056_1_E8OU903K  2023-02-14   
183  10056_1_H69G11Q2  2020-08-18   
226  10056_1_J33378V4  2020-04-17   
16   10056_1_JRZ5OB12  2024-03-27   
92   10056_1_O0685VZ9  2024-01-24   
330  10056_1_O219DW1N  2021-03-01   
150  10056_1_O7M08O92  2021-02-01   
172  10056_1_VJE021Z7  2021-10-14   
209  10056_1_YETY7C91  2021-09-16   
72   10056_1_Z6GE4899  2022-06-09   
284  10057_1_0811T07Y  2020-11-02   
273  10057_1_08ZP58U0  2022-04-24   
4

In [35]:
baseline_df.to_csv(os.path.join(LDCT_OUT, 'ucla_ldct_baseline_cases.csv'), index=False)