In [None]:
#NOTEBOOK: Contains scripts from nnDetection and MONAI github
#NOTE: A WAY EASIER PROCESS CAN BE DONE THROUGH 3D-Slicer and MONAI plugin
#LINK: https://medium.com/@davesimms44/using-monai-bundle-and-the-monai-model-zoo-to-segment-medical-imaging-data-7ade49699248

In [None]:
#Install dependencies; might not need all
!pip install SimpleITK
!pip install loguru
!pip install pydicom
!pip install monai
!git clone https://github.com/MIC-DKFZ/nnDetection.git
!pip install omegaconf
!pip install gitpython
!pip install hydra-core
!pip install fire
!pip install pytorch-ignite

Collecting SimpleITK
  Downloading SimpleITK-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (52.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.7/52.7 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SimpleITK
Successfully installed SimpleITK-2.3.1


In [None]:
#Mount google drive
from google.colab import drive
import SimpleITK
drive.mount('/content/drive')
file_path1 = '/content/drive/My Drive/LUNG_DICOM/4-24533'


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Convert to Raw and Mhd Images; Might need for model training/inference
import os
import SimpleITK as sitk
def convert_dicom_to_mhd(dicom_directory, output_directory, output_filename):
    # Read the DICOM series
    reader = sitk.ImageSeriesReader()
    dicom_names = reader.GetGDCMSeriesFileNames(dicom_directory)
    reader.SetFileNames(dicom_names)
    image = reader.Execute()

    # Write to .mhd and .raw files
    sitk.WriteImage(image, os.path.join(output_directory, output_filename + ".mhd"))


dicom_directory = "/content/drive/My Drive/LUNG_DICOM/organized/DICOM_IMAGES"
output_directory = "/content/drive/My Drive/LUNG_DICOM/organized"
output_filename = "Output_image"

if not os.path.exists(output_directory):
  os.makedirs(output_directory)

convert_dicom_to_mhd(dicom_directory, output_directory, output_filename)


In [None]:
#Check to see if you have the 'nnDetection' repo installed; need for creating json files based on NIFTI/DICOM images
import sys
import os

# Path to the cloned nnDetection repository
repo_path = os.path.abspath('nnDetection')

# Add the path to sys.path
if repo_path not in sys.path:
  sys.path.append(repo_path)


from nndet.io.itk import create_circle_mask_itk
from nndet.io.load import save_pickle, save_json, save_yaml, load_json


In [None]:
#script that should make json files based on your image data
import argparse
import os
import sys
import traceback
from collections import defaultdict
from itertools import repeat
from multiprocessing.pool import Pool

import pandas as pd
import SimpleITK as sitk
from pathlib import Path
import torch
from loguru import logger

from nndet.io.itk import create_circle_mask_itk
from nndet.io.load import save_pickle, save_json, save_yaml, load_json
from nndet.utils.check import env_guard


def create_masks(source: Path, target: Path, df: pd.DataFrame, num_processes: int):
    files = []
    split = {}
    for i in range(10):
        subset_dir = source / f"subset{i}"
        if not subset_dir.is_dir():
            logger.error(f"{subset_dir} is not s valid subset directory!")
            continue

        tmp = list((subset_dir.glob('*.mhd')))
        files.extend(tmp)
        for t in tmp:
            split[t.stem.replace('.', '_')] = i
    save_json(split, target.parent.parent / "splits.json")

    centers = []
    rads = []
    for f in files:
        c = []
        r = []
        try:
            series_df = df.loc[[f.name.rsplit('.', 1)[0]]]
        except KeyError:
            pass
        else:
            for _, row in series_df.iterrows():
                c.append((float(row['coordX']), float(row['coordY']), float(row['coordZ'])))
                r.append(float(row['diameter_mm']) / 2)
        centers.append(c)
        rads.append(r)

    assert len(files) == len(centers) == len(rads)
    with Pool(processes=num_processes) as p:
        p.starmap(_create_mask, zip(files, repeat(target), centers, rads))
    # for t in zip(files, repeat(target), centers, rads):
    #     _create_mask(*t)


def _create_mask(source, target, centers, rads):
    try:
        logger.info(f"Processing {source.stem}")
        data = sitk.ReadImage(str(source))
        mask = create_circle_mask_itk(data, centers, rads, ndim=3)
        sitk.WriteImage(mask, str(target / f"{source.stem.replace('.', '_')}.nii.gz"))
        save_json({"instances": {str(k + 1): 0 for k in range(len(centers))}},
                  target / f"{source.stem.replace('.', '_')}.json")
    except Exception as e:
        logger.error(f"Case {source.stem} failed with {e} and {traceback.format_exc()}")


def create_splits(source, target):
    files = []
    for p in source.glob('subset*'):
        path = Path(p)
        if not p.is_dir():
            continue
        _files = [str(i).rsplit('.', 1)[0] for i in path.iterdir() if i.suffix == ".mhd"]
        files.append(_files)
    splits = []
    for i in range(len(files)):
        train_ids = list(range(len(files)))
        test = files[i]
        train_ids.pop(i)
        val = files[(i + 1) % len(files)]
        train_ids.pop((i + 1) % len(files))
        assert len(train_ids) == len(files) - 2
        train = [tr for tri in train_ids for tr in files[tri]]
        splits.append({"train": train, "val": val, "test": test})
    save_pickle(splits, target)


def convert_data(source: Path, target: Path, num_processes: int):
    for subset_dir in source.glob('subset*'):
        subset_dir = Path(subset_dir)
        if not subset_dir.is_dir():
            continue

        with Pool(processes=num_processes) as p:
            p.starmap(_convert_data, zip(subset_dir.glob('*.mhd'), repeat(target)))


def _convert_data(f, target):
    logger.info(f"Converting {f}")
    try:
        data = sitk.ReadImage(str(f))
        sitk.WriteImage(data, str(target / f"{f.stem.replace('.', '_')}_0000.nii.gz"))
    except Exception as e:
        logger.error(f"Case {f} failed with {e} and {traceback.format_exc()}")


@env_guard
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_processes', type=int, default=4, required=False,
                        help="Number of processes to use for preparation.")
    args = parser.parse_args()
    num_processes = args.num_processes

    det_data_dir = Path(os.getenv('det_data'))
    task_data_dir = det_data_dir / "Task016_Luna"
    source_data_dir = task_data_dir / "raw"

    if not source_data_dir.is_dir():
        raise RuntimeError(f"{source_data_dir} should contain the raw data but does not exist.")
    for i in range(10):
        if not (p := source_data_dir / f"subset{i}"):
            raise ValueError(f"Expected {p} to contain Luna data")
    if not (p := source_data_dir / "annotations.csv").is_file():
        raise ValueError(f"Exptected {p} to exist.")

    target_data_dir = task_data_dir / "raw_splitted" / "imagesTr"
    target_data_dir.mkdir(exist_ok=True, parents=True)
    target_label_dir = task_data_dir / "raw_splitted" / "labelsTr"
    target_label_dir.mkdir(exist_ok=True, parents=True)
    target_preprocessed_dir = task_data_dir / "preprocessed"
    target_preprocessed_dir.mkdir(exist_ok=True)

    logger.remove()
    logger.add(sys.stdout, level="INFO")
    logger.add(task_data_dir / "prepare.log", level="DEBUG")

    meta = {
        "name": "Luna",
        "task": "Task016_Luna",

        "target_class": None,
        "test_labels": False,

        "labels": {
            "0": "lesion",
        },
        "modalities": {
            "0": "CT",
        },
        "dim": 3,
    }
    save_json(meta, task_data_dir / "dataset.json")

    # prepare data and labels
    csv = source_data_dir / "annotations.csv"
    convert_data(source_data_dir, target_data_dir, num_processes=num_processes)

    df = pd.read_csv(csv, index_col='seriesuid')
    create_masks(source_data_dir, target_label_dir, df, num_processes=num_processes)

    # generate split
    logger.info("Generating luna splits... ")
    saved_original_splits = load_json(task_data_dir / "splits.json")
    logger.info(f"Found {len(list(saved_original_splits.keys()))} ids in splits.json")
    original_fold_ids = defaultdict(list)
    for cid, fid in saved_original_splits.items():
        original_fold_ids[fid].append(cid)

    splits = []
    for test_fold in range(10):
        all_folds = list(range(10))
        all_folds.pop(test_fold)

        train_ids = []
        for af in all_folds:
            train_ids.extend(original_fold_ids[af])
        splits.append({
            "train": train_ids,
            "val": original_fold_ids[test_fold],
        })
    save_pickle(splits, target_preprocessed_dir / "splits_final.pkl")
    save_json(splits, target_preprocessed_dir / "splits_final.json")

if __name__ == '__main__':
    main()

RuntimeError: 'det_data' environment variable not set. Please refer to the installation instructions. 

In [None]:
#Input: Either DICOM Images or mhd
#Configures the JSON environment files

import os
import json
import logging
import sys

def main():
    #  ------------- Modification starts -------------
    raw_data_base_dir = "/content/drive/My Drive/LUNG_DICOM/organized/DICOM_IMAGES"  # the directory of the raw images
    resampled_data_base_dir = "/content/drive/My Drive/LUNG_DICOM/organized"  # the directory of the resampled images
    downloaded_datasplit_dir = "/content/drive/My Drive/LUNG_DICOM/organized"  # the directory of downloaded data split files

    out_inference_result_dir = "/content/drive/My Drive/LUNG_DICOM/organized"  # the directory to save predicted boxes for inference

    dicom_meta_data_csv = '/content/drive/My Drive/LUNG_DICOM/annotations.csv'

    try:
        os.mkdir(out_inference_result_dir)
    except FileExistsError:
        pass

    # generate env json file for image resampling
    out_file = "/content/drive/My Drive/LUNG_DICOM/resampled.json"
    env_dict = {}
    env_dict["orig_data_base_dir"] = raw_data_base_dir
    env_dict["data_base_dir"] = resampled_data_base_dir
    if dicom_meta_data_csv != None:
        env_dict["data_list_file_path"] = os.path.join(downloaded_datasplit_dir, "dicom_original/dataset_fold0.json")
    else:
        env_dict["data_list_file_path"] = os.path.join(downloaded_datasplit_dir, "mhd_original/dataset_fold0.json")
    if dicom_meta_data_csv != None:
        env_dict["dicom_meta_data_csv"] = dicom_meta_data_csv
    with open(out_file, "w") as outfile:
        json.dump(env_dict, outfile, indent=4)

    # generate env json file for inference
    out_file = "/content/drive/My Drive/LUNG_DICOM/inference.json"
    env_dict = {}
    env_dict["model_path"] = "/content/drive/My Drive/lung_nodule_ct_detection_v0.5.9/lung_nodule_ct_detection/models/model.pt"  # Adjust this path to your actual model path
    env_dict["data_base_dir"] = resampled_data_base_dir
    env_dict["data_list_file_path"] = os.path.join(downloaded_datasplit_dir, "dataset_inference.json")  # Adjust this path as needed
    env_dict["result_list_file_path"] = os.path.join(out_inference_result_dir, "result_luna16_inference.json")
    with open(out_file, "w") as outfile:
        json.dump(env_dict, outfile, indent=4)

logging.basicConfig(
    stream=sys.stdout,
    level=logging.INFO,
    format="[%(asctime)s.%(msecs)03d][%(levelname)5s](%(name)s) - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)

if __name__ == "__main__":
    main()

In [None]:
#Rewrite DICOM images into the reformatted NIFTI file
import argparse
import json
import logging
import sys
import os
import csv
from pathlib import Path

import monai
import torch
from monai.data import DataLoader, Dataset, load_decathlon_datalist, NibabelWriter
from monai.data.utils import no_collation
from monai.transforms import (
    Compose,
    EnsureChannelFirstd,
    EnsureTyped,
    LoadImaged,
    Orientationd,
    Spacingd,
)

def main():
    parser = argparse.ArgumentParser(description="LUNA16 Detection Image Resampling")
    parser.add_argument(
        "-e",
        "--environment-file",
        default="/content/drive/My Drive/LUNG_DICOM/resampled.json",  # Updated to your environment file
        help="environment json file that stores environment path",
    )
    parser.add_argument(
        "-c",
        "--config-file",
        default="/content/drive/My Drive/lung_nodule_ct_detection_v0.5.9/lung_nodule_ct_detection/configs/config1.json",  # Path to your config file if needed
        help="config json file that stores hyper-parameters",
    )
    args, unknown = parser.parse_known_args()  # This will parse known args and ignore the rest

    monai.config.print_config()

    env_dict = json.load(open(args.environment_file, "r"))
    config_dict = json.load(open(args.config_file, "r"))

    for k, v in env_dict.items():
        setattr(args, k, v)
    for k, v in config_dict.items():
        setattr(args, k, v)

    # 1. define transform
    process_transforms = Compose(
        [
            LoadImaged(
                keys=["image"],
                image_only=True,
                reader="itkreader",
                affine_lps_to_ras=True,
            ),
            EnsureChannelFirstd(keys=["image"]),
            EnsureTyped(keys=["image"], dtype=torch.float16),
            Orientationd(keys=["image"], axcodes="RAS"),
            Spacingd(keys=["image"], pixdim=args.spacing, padding_mode="border")
        ]
    )

    # 2. prepare data
    meta_dict = {}
    with open(env_dict["dicom_meta_data_csv"], newline="") as csvfile:
        print("open " + env_dict["dicom_meta_data_csv"])
        reader = csv.DictReader(csvfile)
        for row in reader:
            meta_dict[row["seriesuid"]] = {
                "coordX": row["coordX"],
                "coordY": row["coordY"],
                "coordZ": row["coordZ"],
                "diameter_mm": row["diameter_mm"]
            }

    # Process only the validation data
    process_data = load_decathlon_datalist(
        data_list_file_path=env_dict["data_list_file_path"],
        is_segmentation=True,
        data_list_key="validation",
        base_dir=env_dict["orig_data_base_dir"],
    )

    process_ds = Dataset(
        data=process_data,
        transform=process_transforms,
    )

    process_loader = DataLoader(
        process_ds,
        batch_size=1,
        shuffle=False,
        num_workers=1,
        pin_memory=False,
        collate_fn=no_collation,
    )

    print("-" * 10)
    for batch_data in process_loader:
        for batch_data_i in batch_data:
            series_uid = '1.3.6.1.4.1.9328.50.1.83304264089411327530730818890072724533'
            if series_uid not in meta_dict:
                print(f"Warning: SeriesInstanceUID {series_uid} not found in metadata.")
                continue

            subj_id = series_uid
            new_path = os.path.join(args.data_base_dir, subj_id)
            Path(new_path).mkdir(parents=True, exist_ok=True)
            new_filename = os.path.join(new_path, subj_id + ".nii.gz")
            writer = NibabelWriter()
            writer.set_data_array(data_array=batch_data_i["image"])
            writer.set_metadata(meta_dict=batch_data_i["image"].meta)
            writer.write(new_filename, verbose=True)

if __name__ == "__main__":
    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format="[%(asctime)s.%(msecs)03d][%(levelname)5s](%(name)s) - %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    main()

MONAI version: 1.3.1
Numpy version: 1.25.2
Pytorch version: 2.3.0+cu121
MONAI flags: HAS_EXT = False, USE_COMPILED = False, USE_META_DICT = False
MONAI rev id: 96bfda00c6bd290297f5e3514ea227c6be4d08b4
MONAI __file__: /usr/local/lib/python3.10/dist-packages/monai/__init__.py

Optional dependencies:
Pytorch Ignite version: NOT INSTALLED or UNKNOWN VERSION.
ITK version: NOT INSTALLED or UNKNOWN VERSION.
Nibabel version: 4.0.2
scikit-image version: 0.19.3
scipy version: 1.11.4
Pillow version: 9.4.0
Tensorboard version: 2.15.2
gdown version: 4.7.3
TorchVision version: 0.18.0+cu121
tqdm version: 4.66.4
lmdb version: NOT INSTALLED or UNKNOWN VERSION.
psutil version: 5.9.5
pandas version: 2.0.3
einops version: NOT INSTALLED or UNKNOWN VERSION.
transformers version: 4.41.1
mlflow version: 2.13.1
pynrrd version: NOT INSTALLED or UNKNOWN VERSION.
clearml version: NOT INSTALLED or UNKNOWN VERSION.

For details about installing the optional dependencies, please visit:
    https://docs.monai.io/en/l

Orientation: spatial shape = (512, 512), channels = 1,please make sure the input is in the channel-first format.
  self.pid = os.fork()


2024-05-31 22:48:49,615 INFO image_writer.py:197 - writing: /content/drive/My Drive/LUNG_DICOM/organized/1.3.6.1.4.1.9328.50.1.83304264089411327530730818890072724533/1.3.6.1.4.1.9328.50.1.83304264089411327530730818890072724533.nii.gz
2024-05-31 22:48:49,922 INFO image_writer.py:197 - writing: /content/drive/My Drive/LUNG_DICOM/organized/1.3.6.1.4.1.9328.50.1.83304264089411327530730818890072724533/1.3.6.1.4.1.9328.50.1.83304264089411327530730818890072724533.nii.gz
2024-05-31 22:48:50,122 INFO image_writer.py:197 - writing: /content/drive/My Drive/LUNG_DICOM/organized/1.3.6.1.4.1.9328.50.1.83304264089411327530730818890072724533/1.3.6.1.4.1.9328.50.1.83304264089411327530730818890072724533.nii.gz
2024-05-31 22:48:50,384 INFO image_writer.py:197 - writing: /content/drive/My Drive/LUNG_DICOM/organized/1.3.6.1.4.1.9328.50.1.83304264089411327530730818890072724533/1.3.6.1.4.1.9328.50.1.83304264089411327530730818890072724533.nii.gz
2024-05-31 22:48:50,598 INFO image_writer.py:197 - writing: /con

In [None]:
#Creates files based on DICOM Series UID (Organize by patient)
import os
import shutil
import pydicom
from pathlib import Path

# Define the directory containing your DICOM files
dicom_directory = r'/content/drive/My Drive/LUNG_DICOM/DICOM'
output_directory = r'/content/drive/My Drive/LUNG_DICOM/organized'

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)
if not os.path.exists(dicom_directory):
    print(f"Directory does not exist: {dicom_directory}")
else:
    print(f"Directory exists: {dicom_directory}")

    # List the contents of the directory
    dir_contents = os.listdir(dicom_directory)
    if len(dir_contents) == 0:
        print(f"The directory is empty: {dicom_directory}")
    else:
        print(f"Contents of the directory: {dir_contents}")
# Walk through the directory and read each DICOM file
print(f"Walking through the directory: {dicom_directory}")
for root, dirs, files in os.walk(dicom_directory):
    print(f"Current directory: {root}")
    print(f"Directories: {dirs}")
    print(f"Files: {files}")
    for file in files:
        if file.endswith(".dcm"):
            file_path = os.path.join(root, file)
            print(f"Processing file: {file_path}")

            try:
                # Read the DICOM file
                dicom = pydicom.dcmread(file_path)

                # Extract relevant metadata
                series_uid = dicom.SeriesInstanceUID
                print(series_uid)
                patient_id = dicom.PatientID
                print(patient_id)
                # Create a directory for this Series UID if it doesn't exist
                series_dir = os.path.join(output_directory, series_uid)
                os.makedirs(series_dir, exist_ok=True)

                # Move the file to the appropriate directory
                shutil.move(file_path, os.path.join(series_dir, file))
            except Exception as e:
                print(f"Failed to process {file_path}: {e}")

print("DICOM files organized by Series UID.")



Directory exists: /content/drive/My Drive/LUNG_DICOM/DICOM
The directory is empty: /content/drive/My Drive/LUNG_DICOM/DICOM
Walking through the directory: /content/drive/My Drive/LUNG_DICOM/DICOM
Current directory: /content/drive/My Drive/LUNG_DICOM/DICOM
Directories: []
Files: []
DICOM files organized by Series UID.


In [None]:
import os
import json

# Define the directory containing your organized DICOM files
organized_directory = r'/content/drive/My Drive/LUNG_DICOM/organized/1.3.6.1.4.1.9328.50.1.83304264089411327530730818890072724533'
output_json = r'/content/drive/My Drive/LUNG_DICOM/data_list.json'

# Initialize the data list dictionary
data_list = {"validation": []}

# List all files in the organized directory
files = os.listdir(organized_directory)

# Create the data list JSON structure
for file in files:
    file_path = os.path.join(organized_directory, file)
    if file.endswith(".gz"):
        data_list['validation'].append({
            "image": file_path,
            "label": None  # Update this if you have corresponding label files
        })

# Save the data list to a JSON file
with open(output_json, 'w') as json_file:
    json.dump(data_list, json_file, indent=4)

print(f"Data list saved to {output_json}")


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/My Drive/LUNG_DICOM/organized/1.3.6.1.4.1.9328.50.1.83304264089411327530730818890072724533'

In [None]:
import monai
import torch

In [None]:
import os
import numpy as np
import pydicom

def convert_images_to_channel_first_and_save(directory):
    # List all files in the directory
    files = os.listdir(directory)

    for file in files:
        file_path = os.path.join(directory, file)

        # Load the DICOM file
        ds = pydicom.dcmread(file_path)

        # Extract the image data
        image = ds.pixel_array

        # Add channel dimension if it doesn't exist (assuming grayscale images)
        if image.ndim == 2:
            image = np.expand_dims(image, axis=-1)  # (H, W) -> (H, W, 1)

        # Convert to channel-first format (C, H, W)
        image_channel_first = np.transpose(image, (2, 0, 1))

        # Update the pixel array in the DICOM object
        ds.PixelData = image_channel_first.tobytes()

        # Update the Rows and Columns attributes to match the new shape
        ds.Rows, ds.Columns = image_channel_first.shape[1], image_channel_first.shape[2]

        # Save the modified DICOM file
        new_file_path = os.path.join(directory, f"channel_first_{file}")
        ds.save_as(new_file_path)

        # Print confirmation
        print(f"Saved channel-first format for {file} as {new_file_path}")

# Specify the directory containing the DICOM images
directory = '/content/drive/My Drive/LUNG_DICOM/organized/DICOM_IMAGES'

# Convert and save all images in the directory to channel-first format
convert_images_to_channel_first_and_save(directory)

Saved channel-first format for 000001.dcm as /content/drive/My Drive/LUNG_DICOM/organized/DICOM_IMAGES/channel_first_000001.dcm
Saved channel-first format for 000002.dcm as /content/drive/My Drive/LUNG_DICOM/organized/DICOM_IMAGES/channel_first_000002.dcm
Saved channel-first format for 000000.dcm as /content/drive/My Drive/LUNG_DICOM/organized/DICOM_IMAGES/channel_first_000000.dcm
Saved channel-first format for 000032.dcm as /content/drive/My Drive/LUNG_DICOM/organized/DICOM_IMAGES/channel_first_000032.dcm
Saved channel-first format for 000046.dcm as /content/drive/My Drive/LUNG_DICOM/organized/DICOM_IMAGES/channel_first_000046.dcm
Saved channel-first format for 000021.dcm as /content/drive/My Drive/LUNG_DICOM/organized/DICOM_IMAGES/channel_first_000021.dcm
Saved channel-first format for 000051.dcm as /content/drive/My Drive/LUNG_DICOM/organized/DICOM_IMAGES/channel_first_000051.dcm
Saved channel-first format for 000044.dcm as /content/drive/My Drive/LUNG_DICOM/organized/DICOM_IMAGES/c

In [None]:
#preprocess Dicom images based on json files
import os
import numpy as np
import torch
import pydicom
from monai.transforms import (
    Compose, LoadImaged, EnsureChannelFirstd, Orientationd,
    Spacingd, ScaleIntensityRanged, EnsureTyped
)
from monai.data import Dataset, DataLoader
from monai.utils import set_determinism
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Set determinism for reproducibility
set_determinism(seed=0)

# Define the preprocessing pipeline
preprocessing = Compose([
    LoadImaged(keys="image", meta_key_postfix="meta_dict", reader="PydicomReader", affine_lps_to_ras=True),
    EnsureChannelFirstd(keys="image"),
    Orientationd(keys="image", axcodes="RAS"),
    Spacingd(keys="image", pixdim=(0.703125, 0.703125, 1.25)),
    ScaleIntensityRanged(keys="image", a_min=-1024.0, a_max=300.0, b_min=0.0, b_max=1.0, clip=True),
    EnsureTyped(keys="image")
])

def preprocess_and_save_images(input_directory, output_directory):
    # Create output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)

    # List all files in the input directory
    files = [os.path.join(input_directory, f) for f in os.listdir(input_directory)]

    # Prepare the dataset and dataloader
    data = [{"image": file} for file in files]
    dataset = Dataset(data=data, transform=preprocessing)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2)

    # Process and save each image
    for batch in dataloader:
        try:
            # Print keys in the batch to debug
            logging.info(f"Batch keys: {batch.keys()}")

            image_tensor = batch["image"]
            meta_data = batch["image_meta_dict"]
            file_path = meta_data["filename_or_obj"][0]
            file_name = os.path.basename(file_path)

            logging.info(f"Processing file: {file_path}")

            # Update the pixel data in the DICOM object
            ds = pydicom.dcmread(file_path)
            ds.PixelData = image_tensor.numpy().tobytes()
            ds.Rows, ds.Columns = image_tensor.shape[2], image_tensor.shape[3]

            # Save the modified DICOM file
            output_file_path = os.path.join(output_directory, f"channel_first_{file_name}")
            ds.save_as(output_file_path)

            logging.info(f"Saved channel-first format for {file_name} as {output_file_path}")

        except KeyError as e:
            logging.error(f"KeyError: {str(e)} - batch keys available: {batch.keys()}")
        except Exception as e:
            logging.error(f"Failed to process file {file_path}: {str(e)}")

# Specify the input and output directories
input_directory = '/content/drive/My Drive/LUNG_DICOM/organized/DICOM_IMAGES'
output_directory = '/content/drive/My Drive/LUNG_DICOM/organized/NEW_DICOM'

# Preprocess and save all images in the input directory to the output directory
preprocess_and_save_images(input_directory, output_directory)


Orientation: spatial shape = (512, 512), channels = 1,please make sure the input is in the channel-first format.
Orientation: spatial shape = (512, 512), channels = 1,please make sure the input is in the channel-first format.
ERROR:root:KeyError: 'image_meta_dict' - batch keys available: dict_keys(['image'])
ERROR:root:KeyError: 'image_meta_dict' - batch keys available: dict_keys(['image'])
ERROR:root:KeyError: 'image_meta_dict' - batch keys available: dict_keys(['image'])
ERROR:root:KeyError: 'image_meta_dict' - batch keys available: dict_keys(['image'])
ERROR:root:KeyError: 'image_meta_dict' - batch keys available: dict_keys(['image'])
ERROR:root:KeyError: 'image_meta_dict' - batch keys available: dict_keys(['image'])
ERROR:root:KeyError: 'image_meta_dict' - batch keys available: dict_keys(['image'])
ERROR:root:KeyError: 'image_meta_dict' - batch keys available: dict_keys(['image'])
ERROR:root:KeyError: 'image_meta_dict' - batch keys available: dict_keys(['image'])
ERROR:root:KeyErro

In [None]:
#Run inference using model
#Better to run these on 3D-slicer
import os
os.chdir('/content/drive/My Drive/lung_nodule_ct_detection_v0.5.9/lung_nodule_ct_detection')
!python -m monai.bundle run --config_file configs/inference.json


2024-06-03 20:41:02.819209: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-03 20:41:02.819258: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-03 20:41:02.820726: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-03 20:41:05,604 - INFO - --- input summary of monai.bundle.scripts.run ---
2024-06-03 20:41:05,605 - INFO - > config_file: 'configs/inference.json'
2024-06-03 20:41:05,605 - INFO - ---


2024-06-03 20:41:05,606 - INFO - Setting logging properties based on config: configs/logging.conf.
2024-06-03 20:41:07,859 - root - INFO - Restored all variables from ./m