In [1]:
# Make a copick project
import os
import shutil

config_blob = """{
    "name": "czii_cryoet_mlchallenge_2024",
    "description": "2024 CZII CryoET ML Challenge training data.",
    "version": "1.0.0",

    "pickable_objects": [
        {
            "name": "apo-ferritin",
            "is_particle": true,
            "pdb_id": "4V1W",
            "label": 1,
            "color": [  0, 117, 220, 128],
            "radius": 60,
            "map_threshold": 0.0418
        },
        {
          "name" : "beta-amylase",
            "is_particle": true,
            "pdb_id": "8ZRZ",
            "label": 2,
            "color": [255, 255, 255, 128],
            "radius": 90,
            "map_threshold": 0.0578  
        },
        {
            "name": "beta-galactosidase",
            "is_particle": true,
            "pdb_id": "6X1Q",
            "label": 3,
            "color": [ 76,   0,  92, 128],
            "radius": 90,
            "map_threshold": 0.0578
        },
        {
            "name": "ribosome",
            "is_particle": true,
            "pdb_id": "6EK0",
            "label": 4,
            "color": [  0,  92,  49, 128],
            "radius": 150,
            "map_threshold": 0.0374
        },
        {
            "name": "thyroglobulin",
            "is_particle": true,
            "pdb_id": "6SCJ",
            "label": 5,
            "color": [ 43, 206,  72, 128],
            "radius": 130,
            "map_threshold": 0.0278
        },
        {
            "name": "virus-like-particle",
            "is_particle": true,
            "label": 6,
            "color": [255, 204, 153, 128],
            "radius": 135,
            "map_threshold": 0.201
        },
        {
            "name": "membrane",
            "is_particle": false,
            "label": 8,
            "color": [100, 100, 100, 128]
        },
        {
            "name": "background",
            "is_particle": false,
            "label": 9,
            "color": [10, 150, 200, 128]
        }
    ],

    "overlay_root": "./kaggle/working/overlay",

    "overlay_fs_args": {
        "auto_mkdir": true
    },

    "static_root": "./kaggle/input/czii-cryo-et-object-identification/train/static"
}"""

copick_config_path = "./kaggle/working/copick.config"
output_overlay = "./kaggle/working/overlay"


with open(copick_config_path, "w") as f:
    f.write(config_blob)
    
# Update the overlay
# Define source and destination directories
source_dir = './kaggle/input/czii-cryo-et-object-identification/train/overlay'
destination_dir = './kaggle/working/overlay'

# Walk through the source directory
for root, dirs, files in os.walk(source_dir):
    # Create corresponding subdirectories in the destination
    relative_path = os.path.relpath(root, source_dir)
    target_dir = os.path.join(destination_dir, relative_path)
    os.makedirs(target_dir, exist_ok=True)
    
    # Copy and rename each file
    for file in files:
        if file.startswith("curation_0_"):
            new_filename = file
        else:
            new_filename = f"curation_0_{file}"
            
        
        # Define full paths for the source and destination files
        source_file = os.path.join(root, file)
        destination_file = os.path.join(target_dir, new_filename)
        
        # Copy the file with the new name
        shutil.copy2(source_file, destination_file)
        print(f"Copied {source_file} to {destination_file}")

Copied ./kaggle/input/czii-cryo-et-object-identification/train/overlay/ExperimentRuns/TS_5_4/Picks/ribosome.json to ./kaggle/working/overlay/ExperimentRuns/TS_5_4/Picks/curation_0_ribosome.json
Copied ./kaggle/input/czii-cryo-et-object-identification/train/overlay/ExperimentRuns/TS_5_4/Picks/virus-like-particle.json to ./kaggle/working/overlay/ExperimentRuns/TS_5_4/Picks/curation_0_virus-like-particle.json
Copied ./kaggle/input/czii-cryo-et-object-identification/train/overlay/ExperimentRuns/TS_5_4/Picks/beta-galactosidase.json to ./kaggle/working/overlay/ExperimentRuns/TS_5_4/Picks/curation_0_beta-galactosidase.json
Copied ./kaggle/input/czii-cryo-et-object-identification/train/overlay/ExperimentRuns/TS_5_4/Picks/beta-amylase.json to ./kaggle/working/overlay/ExperimentRuns/TS_5_4/Picks/curation_0_beta-amylase.json
Copied ./kaggle/input/czii-cryo-et-object-identification/train/overlay/ExperimentRuns/TS_5_4/Picks/apo-ferritin.json to ./kaggle/working/overlay/ExperimentRuns/TS_5_4/Picks/c

In [2]:
import copick
import numpy as np
from tqdm import tqdm

root = copick.from_file(copick_config_path)

copick_user_name = "copickUtils"
copick_segmentation_name = "paintedPicks"
voxel_size = 10

  root = copick.from_file(copick_config_path)


In [5]:
import os
import numpy as np
from tqdm import tqdm
from pathlib import Path
import matplotlib.pyplot as plt

# Define tomogram types
tomo_tpye_list = ["ctfdeconvolved", "denoised", "isonetcorrected", "wbp"]

# Configuration for directories
train_label_dir = Path('./datasets/labels/train')
train_image_dir = Path('./datasets/images/train')
val_label_dir = Path('./datasets/labels/val')
val_image_dir = Path('./datasets/images/val')

for dir_path in [train_label_dir, train_image_dir, val_label_dir, val_image_dir]:
    dir_path.mkdir(parents=True, exist_ok=True)

# Iterate over all tomogram types
for tomo_type in tomo_tpye_list:
    print(f"Processing \"{tomo_type}\" data...")
    for vol_idx, run in enumerate(root.runs):
        # Load image and label data
        tomogram = run.get_voxel_spacing(voxel_size).get_tomogram(tomo_type).numpy()
        segmentation = run.get_segmentations(
            name=copick_segmentation_name,
            user_id=copick_user_name,
            voxel_size=voxel_size,
            is_multilabel=True
        )[0].numpy()

        data_dict = {"image": tomogram, "label": segmentation}

        # Determine dataset type (train/val)
        is_test = (vol_idx == len(root.runs) - 1)
        label_dir = val_label_dir if is_test else train_label_dir
        image_dir = val_image_dir if is_test else train_image_dir

        # Save slices for current tomogram type
        for slice_idx in range(data_dict["image"].shape[0]):  # Iterate over slices
            base_filename = f"{tomo_type}_vol_{vol_idx:01d}_slice_{slice_idx:03d}"

            # Save label as PNG
            label_slice = data_dict["label"][slice_idx]
            plt.imsave(label_dir / f"{base_filename}.png", label_slice, cmap='gray')

            # Normalize and save image
            slice_img = data_dict["image"][slice_idx]
            norm_img = ((slice_img - slice_img.min()) / (slice_img.max() - slice_img.min()) * 255).astype(np.uint8)
            plt.imsave(image_dir / f"{base_filename}.png", norm_img, cmap='gray')

        print(f"Processed volume {vol_idx} for \"{tomo_type}\" dataset.")

Processing "ctfdeconvolved" data...
Processed volume 0 for "ctfdeconvolved" dataset.
Processed volume 1 for "ctfdeconvolved" dataset.
Processed volume 2 for "ctfdeconvolved" dataset.
Processed volume 3 for "ctfdeconvolved" dataset.
Processed volume 4 for "ctfdeconvolved" dataset.
Processed volume 5 for "ctfdeconvolved" dataset.
Processed volume 6 for "ctfdeconvolved" dataset.
Processing "denoised" data...
Processed volume 0 for "denoised" dataset.
Processed volume 1 for "denoised" dataset.
Processed volume 2 for "denoised" dataset.
Processed volume 3 for "denoised" dataset.
Processed volume 4 for "denoised" dataset.
Processed volume 5 for "denoised" dataset.
Processed volume 6 for "denoised" dataset.
Processing "isonetcorrected" data...
Processed volume 0 for "isonetcorrected" dataset.
Processed volume 1 for "isonetcorrected" dataset.
Processed volume 2 for "isonetcorrected" dataset.
Processed volume 3 for "isonetcorrected" dataset.
Processed volume 4 for "isonetcorrected" dataset.
Pro

In [1]:
image_dir = Path(image_dir)
label_dir = Path(label_dir) if label_dir else None





# Collect sorted file paths for images
image_files = sorted(list(image_dir.glob("*.png")))
image_files[:194]

NameError: name 'Path' is not defined

In [None]:
from PIL import Image

label = Image.open('./datasets/labels/train/ctfdeconvolved_vol_0_slice_000.png').convert('L')
label_tensor = torch.Tensor(np.array(label))

print(f"Label min value: {label_tensor.min().item()}, Label max value: {label_tensor.max().item()}")

Label shape: torch.Size([630, 630]), Label dtype: torch.float32
Label min value: 0.0, Label max value: 255.0


In [88]:
import torch
from PIL import Image
import numpy as np
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict

class MultiChannelCryoETDataset(Dataset):
    def __init__(self, image_dir, label_dir=None, num_channels=11, slice_size=(224, 224), stride=112, transform=None):
        self.image_dir = Path(image_dir)
        self.label_dir = Path(label_dir) if label_dir else None
        self.num_channels = num_channels
        self.slice_size = slice_size
        self.stride = stride
        self.transform = transform

        # 볼륨별 슬라이스 그룹화
        self.volume_slices = self._group_slices_by_volume()
        self.slices = self._generate_slices()

        # 유효 클래스 정의 및 매핑
        self.valid_classes = [0, 42, 85, 128, 170, 213, 255]  # 원본 값
        self.class_map = {v: i for i, v in enumerate(self.valid_classes)}  # 클래스 매핑

    def _group_slices_by_volume(self):
        volume_groups = defaultdict(list)
        for file_path in sorted(self.image_dir.glob("*.png")):
            parts = file_path.stem.split("_")
            volume_id = parts[2]  # "vol_0"
            slice_id = int(parts[4])  # "slice_001"
            volume_groups[volume_id].append((slice_id, file_path))
        for key in volume_groups:
            volume_groups[key] = sorted(volume_groups[key], key=lambda x: x[0])
        return volume_groups

    def _generate_slices(self):
        slices = []
        for volume_id, slice_list in self.volume_slices.items():
            for i in range(0, len(slice_list) - self.num_channels + 1):
                slice_range = slice_list[i:i + self.num_channels]
                slices.append({"volume_id": volume_id, "slice_range": slice_range})
        return slices

    def _extract_patches(self, image, size, stride):
        """이미지에서 슬라이싱된 패치들을 추출"""
        patches = []
        h, w = image.shape
        ph, pw = size
        for y in range(0, h - ph + 1, stride):
            for x in range(0, w - pw + 1, stride):
                patches.append(image[y:y + ph, x:x + pw])
        return patches

    def __len__(self):
        return len(self.slices)

    def __getitem__(self, idx):
        slice_info = self.slices[idx]
        slice_range = slice_info["slice_range"]

        # 다채널 이미지 생성
        channels = []
        for _, file_path in slice_range:
            img = np.array(Image.open(file_path).convert("L"), dtype=np.float32)
            channels.append(img)

        # 다채널 입력 이미지 생성
        input_image = np.stack(channels, axis=0)  # (num_channels, H, W)

        # 슬라이싱
        input_patches = self._extract_patches(input_image, self.slice_size, self.stride)

        # 라벨 처리
        if self.label_dir:
            label_path = self.label_dir / slice_range[0][1].name
            label = np.array(Image.open(label_path), dtype=np.int64)

            # 슬라이싱
            label_patches = self._extract_patches(label, self.slice_size, self.stride)

            # 클래스 매핑
            label_patches = [self._remap_labels(patch) for patch in label_patches]
        else:
            label_patches = [np.zeros(self.slice_size, dtype=np.int64) for _ in input_patches]

        # Transform 적용
        if self.transform:
            input_patches, label_patches = self.transform(input_patches, label_patches)

        # Tensor 변환
        input_patches = torch.tensor(input_patches, dtype=torch.float32)  # (num_patches, num_channels, H, W)
        label_patches = torch.tensor(label_patches, dtype=torch.long)  # (num_patches, H, W)

        return input_patches, label_patches

    def _remap_labels(self, label):
        """라벨 데이터를 유효 클래스 값으로 매핑"""
        remapped_label = np.zeros_like(label, dtype=np.int64)
        for original, new_class in self.class_map.items():
            remapped_label[label == original] = new_class
        return remapped_label

# 데이터 확인
for images, labels in dataloader:
    print(f"Images shape: {images.shape}")  # (B, C, H, W)
    print(f"Labels shape: {labels.shape}")  # (B, H, W)
    print(f"Unique label values: {torch.unique(labels)}")  # 라벨 값 확인
    break

Images shape: torch.Size([2, 11, 224, 224])
Labels shape: torch.Size([2, 224, 224])
Unique label values: tensor([0, 1, 2, 3, 4, 5, 6])


In [89]:
import torch
import torch.nn as nn
from monai.networks.nets import UNet

class UNet2_5D_v2(nn.Module):
    def __init__(self, out_channels=6):
        super().__init__()
        
        # 초기 3D 처리 레이어
        self.init_3d = nn.Sequential(
            nn.Conv3d(1, 64, kernel_size=(11, 3, 3), padding=(0, 1, 1)),
            nn.BatchNorm3d(64),
            nn.ReLU(inplace=True)
        )
        
        # 2D UNet
        self.unet = UNet(
            spatial_dims=2,
            in_channels=64,  # 3D 컨볼루션 출력 채널
            out_channels=out_channels,
            channels=(64, 128, 256, 512),
            strides=(2, 2, 2, 2),
            num_res_units=2
        )

    def forward(self, x):
        # x shape: (batch, 1, 11, H, W)
        # 3D 처리
        x = x.unsqueeze(1)
        x = self.init_3d(x)  # (batch, 64, 1, H, W)
        x = x.squeeze(2)     # (batch, 64, H, W)
        
        # 2D UNet
        return self.unet(x)

# 테스트 코드
if __name__ == "__main__":
    model = UNet2_5D_v2(out_channels=7)
    x = torch.randn(2, 11, 224, 224)
    output = model(x)
    print(f"Output shape: {output.shape}")  # Expected: (8, 6, 256, 256)



Output shape: torch.Size([2, 7, 224, 224])


In [75]:
for inputs, targets in dataloader:
    print(f"Max label value: {targets.max().item()}")
    print(f"Num classes (model output): {outputs.shape[1]}")
    break

Max label value: 6
Num classes (model output): 7


In [90]:
from monai.losses import DiceLoss
from torch import optim
import torch.nn.functional as F

# Loss and Optimizer
criterion = DiceLoss(to_onehot_y=True, softmax=True)  # Dice Loss with softmax
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training Loop
# Training Loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in dataloader:
        optimizer.zero_grad()

        # 라벨 데이터 정수형 확인 및 변환
        targets = targets.long()  # 정수형으로 변환
        print(f"Targets unique values: {torch.unique(targets)}, dtype: {targets.dtype}")

        # 모델 출력
        outputs = model(inputs)  # outputs: (B, 7, H, W)

        # 손실 계산
        loss = criterion(outputs, targets)  # targets: (B, H, W)
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        running_loss += loss.item()

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(dataloader):.4f}")

Targets unique values: tensor([0, 1, 2, 3, 5, 6]), dtype: torch.int64


AssertionError: labels should have a channel with length equal to one.

In [None]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# from torch.utils.data import DataLoader, Dataset
# import numpy as np

# # Define the model (U-Net)
# class DoubleConv(nn.Module):
#     def __init__(self, in_channels, out_channels):
#         super(DoubleConv, self).__init__()
#         self.double_conv = nn.Sequential(
#             nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
#             nn.ReLU(inplace=True),
#             nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
#             nn.ReLU(inplace=True)
#         )

#     def forward(self, x):
#         return self.double_conv(x)


# class UNet(nn.Module):
#     def __init__(self, in_channels, out_channels):
#         super(UNet, self).__init__()
#         self.enc1 = DoubleConv(in_channels, 64)
#         self.pool1 = nn.MaxPool2d(2)
#         self.enc2 = DoubleConv(64, 128)
#         self.pool2 = nn.MaxPool2d(2)
#         self.enc3 = DoubleConv(128, 256)
#         self.pool3 = nn.MaxPool2d(2)
#         self.bridge = DoubleConv(256, 512)
#         self.upconv3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
#         self.dec3 = DoubleConv(512, 256)
#         self.upconv2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
#         self.dec2 = DoubleConv(256, 128)
#         self.upconv1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
#         self.dec1 = DoubleConv(128, 64)
#         self.out_conv = nn.Conv2d(64, out_channels, kernel_size=1)

#     def forward(self, x):
#         enc1 = self.enc1(x)
#         enc2 = self.enc2(self.pool1(enc1))
#         enc3 = self.enc3(self.pool2(enc2))
#         bridge = self.bridge(self.pool3(enc3))
#         dec3 = self.dec3(torch.cat([self.upconv3(bridge), enc3], dim=1))
#         dec2 = self.dec2(torch.cat([self.upconv2(dec3), enc2], dim=1))
#         dec1 = self.dec1(torch.cat([self.upconv1(dec2), enc1], dim=1))
#         return self.out_conv(dec1)


# # Instantiate model, loss, and optimizer
# model = UNet(in_channels=11, out_channels=2)  # 2 classes for segmentation
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=1e-4)


# # Training Loop
# for epoch in range(5):  # 5 epochs for demonstration
#     model.train()
#     running_loss = 0.0
#     for inputs, targets in dataloader:
#         optimizer.zero_grad()
#         outputs = model(inputs)  # Forward pass
#         loss = criterion(outputs, targets)  # Compute loss
#         loss.backward()  # Backpropagation
#         optimizer.step()  # Update weights
#         running_loss += loss.item()

#     print(f"Epoch [{epoch+1}/5], Loss: {running_loss/len(dataloader):.4f}")

Epoch [1/5], Loss: 0.6931
Epoch [2/5], Loss: 0.6931
Epoch [3/5], Loss: 0.6931
Epoch [4/5], Loss: 0.6931
Epoch [5/5], Loss: 0.6931
