In [1]:
import monai

In [None]:
# Make a copick project
import os
import shutil

config_blob = """{
    "name": "czii_cryoet_mlchallenge_2024",
    "description": "2024 CZII CryoET ML Challenge training data.",
    "version": "1.0.0",

    "pickable_objects": [
        {
            "name": "apo-ferritin",
            "is_particle": true,
            "pdb_id": "4V1W",
            "label": 1,
            "color": [  0, 117, 220, 128],
            "radius": 60,
            "map_threshold": 0.0418
        },
        {
          "name" : "beta-amylase",
            "is_particle": true,
            "pdb_id": "8ZRZ",
            "label": 2,
            "color": [255, 255, 255, 128],
            "radius": 90,
            "map_threshold": 0.0578  
        },
        {
            "name": "beta-galactosidase",
            "is_particle": true,
            "pdb_id": "6X1Q",
            "label": 3,
            "color": [ 76,   0,  92, 128],
            "radius": 90,
            "map_threshold": 0.0578
        },
        {
            "name": "ribosome",
            "is_particle": true,
            "pdb_id": "6EK0",
            "label": 4,
            "color": [  0,  92,  49, 128],
            "radius": 150,
            "map_threshold": 0.0374
        },
        {
            "name": "thyroglobulin",
            "is_particle": true,
            "pdb_id": "6SCJ",
            "label": 5,
            "color": [ 43, 206,  72, 128],
            "radius": 130,
            "map_threshold": 0.0278
        },
        {
            "name": "virus-like-particle",
            "is_particle": true,
            "label": 6,
            "color": [255, 204, 153, 128],
            "radius": 135,
            "map_threshold": 0.201
        },
        {
            "name": "membrane",
            "is_particle": false,
            "label": 8,
            "color": [100, 100, 100, 128]
        },
        {
            "name": "background",
            "is_particle": false,
            "label": 9,
            "color": [10, 150, 200, 128]
        }
    ],

    "overlay_root": "./kaggle/working/overlay",

    "overlay_fs_args": {
        "auto_mkdir": true
    },

    "static_root": "./kaggle/input/czii-cryo-et-object-identification/train/static"
}"""

copick_config_path = "./kaggle/working/copick.config"
output_overlay = "./kaggle/working/overlay"


with open(copick_config_path, "w") as f:
    f.write(config_blob)
    
# Update the overlay
# Define source and destination directories
source_dir = './kaggle/input/czii-cryo-et-object-identification/train/overlay'
destination_dir = './kaggle/working/overlay'

# Walk through the source directory
for root, dirs, files in os.walk(source_dir):
    # Create corresponding subdirectories in the destination
    relative_path = os.path.relpath(root, source_dir)
    target_dir = os.path.join(destination_dir, relative_path)
    os.makedirs(target_dir, exist_ok=True)
    
    # Copy and rename each file
    for file in files:
        if file.startswith("curation_0_"):
            new_filename = file
        else:
            new_filename = f"curation_0_{file}"
            
        
        # Define full paths for the source and destination files
        source_file = os.path.join(root, file)
        destination_file = os.path.join(target_dir, new_filename)
        
        # Copy the file with the new name
        shutil.copy2(source_file, destination_file)
        print(f"Copied {source_file} to {destination_file}")

Copied ./kaggle/input/czii-cryo-et-object-identification/train/overlay\ExperimentRuns\TS_5_4\Picks\apo-ferritin.json to ./kaggle/working/overlay\ExperimentRuns\TS_5_4\Picks\curation_0_apo-ferritin.json
Copied ./kaggle/input/czii-cryo-et-object-identification/train/overlay\ExperimentRuns\TS_5_4\Picks\beta-amylase.json to ./kaggle/working/overlay\ExperimentRuns\TS_5_4\Picks\curation_0_beta-amylase.json
Copied ./kaggle/input/czii-cryo-et-object-identification/train/overlay\ExperimentRuns\TS_5_4\Picks\beta-galactosidase.json to ./kaggle/working/overlay\ExperimentRuns\TS_5_4\Picks\curation_0_beta-galactosidase.json
Copied ./kaggle/input/czii-cryo-et-object-identification/train/overlay\ExperimentRuns\TS_5_4\Picks\ribosome.json to ./kaggle/working/overlay\ExperimentRuns\TS_5_4\Picks\curation_0_ribosome.json
Copied ./kaggle/input/czii-cryo-et-object-identification/train/overlay\ExperimentRuns\TS_5_4\Picks\thyroglobulin.json to ./kaggle/working/overlay\ExperimentRuns\TS_5_4\Picks\curation_0_thy

In [3]:
import copick
import numpy as np
from tqdm import tqdm

root = copick.from_file(copick_config_path)

copick_user_name = "copickUtils"
copick_segmentation_name = "paintedPicks"
voxel_size = 10

In [4]:
len(root.runs)

7

In [5]:
from copick_utils.segmentation import segmentation_from_picks
import copick_utils.writers.write as write
from collections import defaultdict

# Just do this once
generate_masks = True
tomo_type_list = ["ctfdeconvolved", "denoised", "isonetcorrected", "wbp"]
for tomo_type in tomo_type_list:

    if generate_masks:
        target_objects = defaultdict(dict)
        for object in root.pickable_objects:
            if object.is_particle:
                target_objects[object.name]['label'] = object.label
                target_objects[object.name]['radius'] = object.radius


        for run in tqdm(root.runs):
            tomo = run.get_voxel_spacing(10)
            tomo = tomo.get_tomogram(tomo_type).numpy()
            target = np.zeros(tomo.shape, dtype=np.uint8)
            for pickable_object in root.pickable_objects:
                pick = run.get_picks(object_name=pickable_object.name, user_id="curation")
                if len(pick):  
                    target = segmentation_from_picks.from_picks(pick[0], 
                                                                target, 
                                                                target_objects[pickable_object.name]['radius'] * 0.8,
                                                                target_objects[pickable_object.name]['label']
                                                                )
            write.segmentation(run, target, copick_user_name, name=copick_segmentation_name)

100%|██████████| 7/7 [00:03<00:00,  1.83it/s]
100%|██████████| 7/7 [00:03<00:00,  2.10it/s]
100%|██████████| 7/7 [00:03<00:00,  2.13it/s]
100%|██████████| 7/7 [00:03<00:00,  2.09it/s]


In [6]:
from pathlib import Path
import numpy as np

tomo_type_list = ["ctfdeconvolved", "denoised", "isonetcorrected", "wbp"]
# Define directories for saving numpy arrays
train_image_dir = Path('./datasets/train/images')
train_label_dir = Path('./datasets/train/labels')
val_image_dir = Path('./datasets/val/images')
val_label_dir = Path('./datasets/val/labels')

for dir_path in [train_image_dir, train_label_dir, val_image_dir, val_label_dir]:
    dir_path.mkdir(parents=True, exist_ok=True)

# root.runs의 개수 출력
print(f"Number of runs: {len(root.runs)}")

for tomo_type in tomo_type_list:
    print(f"Processing \"{tomo_type}\" data...")
    for vol_idx, run in enumerate(root.runs):
        # Load image and label data
        tomogram = run.get_voxel_spacing(voxel_size).get_tomogram(tomo_type).numpy()
        segmentation = run.get_segmentations(
            name=copick_segmentation_name,
            user_id=copick_user_name,
            voxel_size=voxel_size,
            is_multilabel=True
        )[0].numpy()

        # Format run name
        run_name = run.name.replace("\\", "_").replace("/", "_")

        # Determine if this is the last volume
        is_last_volume = (vol_idx == len(root.runs) - 1)

        # Set directories based on whether it's the last volume
        image_dir = val_image_dir if is_last_volume else train_image_dir
        label_dir = val_label_dir if is_last_volume else train_label_dir

        # Save tomogram and segmentation as numpy arrays
        image_path = image_dir / f"{tomo_type}_{run_name}_image.npy"
        label_path = label_dir / f"{tomo_type}_{run_name}_label.npy"
        np.save(image_path, tomogram)
        np.save(label_path, segmentation)

        # 저장된 파일 경로 출력
        print(f"Saved image: {image_path}")
        print(f"Saved label: {label_path}")

# 저장된 파일의 개수 출력
print(f"Number of files in train images: {len(list(train_image_dir.glob('*.npy')))}")
print(f"Number of files in train labels: {len(list(train_label_dir.glob('*.npy')))}")
print(f"Number of files in val images: {len(list(val_image_dir.glob('*.npy')))}")
print(f"Number of files in val labels: {len(list(val_label_dir.glob('*.npy')))}")

print("Processing complete.")

Number of runs: 7
Processing "ctfdeconvolved" data...
Saved image: datasets\train\images\ctfdeconvolved_TS_5_4_image.npy
Saved label: datasets\train\labels\ctfdeconvolved_TS_5_4_label.npy
Saved image: datasets\train\images\ctfdeconvolved_TS_69_2_image.npy
Saved label: datasets\train\labels\ctfdeconvolved_TS_69_2_label.npy
Saved image: datasets\train\images\ctfdeconvolved_TS_6_4_image.npy
Saved label: datasets\train\labels\ctfdeconvolved_TS_6_4_label.npy
Saved image: datasets\train\images\ctfdeconvolved_TS_6_6_image.npy
Saved label: datasets\train\labels\ctfdeconvolved_TS_6_6_label.npy
Saved image: datasets\train\images\ctfdeconvolved_TS_73_6_image.npy
Saved label: datasets\train\labels\ctfdeconvolved_TS_73_6_label.npy
Saved image: datasets\train\images\ctfdeconvolved_TS_86_3_image.npy
Saved label: datasets\train\labels\ctfdeconvolved_TS_86_3_label.npy
Saved image: datasets\val\images\ctfdeconvolved_TS_99_9_image.npy
Saved label: datasets\val\labels\ctfdeconvolved_TS_99_9_label.npy
Proc

In [12]:
# Make a copick project
import os
import shutil
from pathlib import Path

config_blob = """{
    "name": "czii_cryoet_mlchallenge_2024",
    "description": "2024 CZII CryoET ML Challenge training data.",
    "version": "1.0.0",

    "pickable_objects": [
        {
            "name": "apo-ferritin",
            "is_particle": true,
            "pdb_id": "4V1W",
            "label": 1,
            "color": [  0, 117, 220, 128],
            "radius": 60,
            "map_threshold": 0.0418
        },
        {
          "name" : "beta-amylase",
            "is_particle": true,
            "pdb_id": "8ZRZ",
            "label": 2,
            "color": [255, 255, 255, 128],
            "radius": 90,
            "map_threshold": 0.0578  
        },
        {
            "name": "beta-galactosidase",
            "is_particle": true,
            "pdb_id": "6X1Q",
            "label": 3,
            "color": [ 76,   0,  92, 128],
            "radius": 90,
            "map_threshold": 0.0578
        },
        {
            "name": "ribosome",
            "is_particle": true,
            "pdb_id": "6EK0",
            "label": 4,
            "color": [  0,  92,  49, 128],
            "radius": 150,
            "map_threshold": 0.0374
        },
        {
            "name": "thyroglobulin",
            "is_particle": true,
            "pdb_id": "6SCJ",
            "label": 5,
            "color": [ 43, 206,  72, 128],
            "radius": 130,
            "map_threshold": 0.0278
        },
        {
            "name": "virus-like-particle",
            "is_particle": true,
            "label": 6,
            "color": [255, 204, 153, 128],
            "radius": 135,
            "map_threshold": 0.201
        },
        {
            "name": "membrane",
            "is_particle": false,
            "label": 8,
            "color": [100, 100, 100, 128]
        },
        {
            "name": "background",
            "is_particle": false,
            "label": 9,
            "color": [10, 150, 200, 128]
        }
    ],

    "overlay_root": "./kaggle/working/overlay",

    "overlay_fs_args": {
        "auto_mkdir": true
    },

    "static_root": "./kaggle/input/czii-cryo-et-object-identification/test/static"
}"""

copick_config_path = "./kaggle/working/copick.config"
output_overlay = "./kaggle/working/overlay"


with open(copick_config_path, "w") as f:
    f.write(config_blob)
    
import copick
import numpy as np
from tqdm import tqdm

root = copick.from_file(copick_config_path)
print(len(root.runs))

run = root.runs[0]
print(run.name)
tomo = run.get_voxel_spacing(10).get_tomogram("denoised").numpy()
tomo.shape

7
TS_5_4


(184, 630, 630)

In [13]:

copick_user_name = "copickUtils"
copick_segmentation_name = "paintedPicks"
voxel_size = 10
tomo_type_list = ["denoised"]
task_dir = Path('./datasets/task/images')
for dir_path in [task_dir]:
    dir_path.mkdir(parents=True, exist_ok=True)

for tomo_type in tomo_type_list:
    print(f"Processing \"{tomo_type}\" data...")
    for run in root.runs:
        # Load image and label data
        tomogram = run.get_voxel_spacing(voxel_size).get_tomogram(tomo_type).numpy()

        # Format run name
        run_name = run.name.replace("\\", "_").replace("/", "_")

        # Save tomogram as numpy array
        image_path = task_dir / f"{tomo_type}_{run_name}_image.npy"
        np.save(image_path, tomogram)

        # 저장된 파일 경로 출력
        print(f"Saved image: {image_path}")
print("Processing complete.")


Processing "denoised" data...
Saved image: datasets\task\images\denoised_TS_5_4_image.npy
Saved image: datasets\task\images\denoised_TS_69_2_image.npy
Saved image: datasets\task\images\denoised_TS_6_4_image.npy


AttributeError: 'NoneType' object has no attribute 'get_tomogram'