In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pneumo import dataset
from torch.utils.data import DataLoader
import os
import glob
import albumentations as A

In [3]:
IMG_PATH = "/home/wilmer-linux/Projects/data/pneumo_s1/dicom-images-train/"
LABEL_PATH = os.path.join(IMG_PATH, '../train-rle.csv')

fns = glob.glob(os.path.join(IMG_PATH, "**/*.dcm"), recursive=True)

In [4]:
from sklearn.model_selection import train_test_split

train_fns, valid_fns = train_test_split(fns, train_size=0.8, random_state=42, shuffle=True)
print(len(train_fns), len(valid_fns))

9671 2418


In [5]:
config = {
    "lr" : 1e-3,
    "batch_size": 16,
}

In [6]:
from albumentations import (
    Compose, HorizontalFlip,
    RandomBrightness, RandomContrast, RandomGamma,OneOf,
    ShiftScaleRotate,GridDistortion, ElasticTransform, JpegCompression, HueSaturationValue,
    RandomBrightness, RandomContrast, Blur, MotionBlur, MedianBlur, GaussNoise,CenterCrop,
    IAAAdditiveGaussianNoise,GaussNoise,OpticalDistortion,RandomSizedCrop, Resize, RandomBrightnessContrast, ToFloat
)

IMG_SIZE= (256, 256)

TRAIN_AUGS = Compose([
    ToFloat(max_value=1),
    Resize(IMG_SIZE[0], IMG_SIZE[1]),
    HorizontalFlip(p=0.3),
    ShiftScaleRotate(p=0.2, rotate_limit=30),
    OneOf([
        RandomGamma((90,110)),
        RandomBrightnessContrast(0.1, 0.1),
         ], p=0.2),
],p=1)

VALID_AUGS = Compose([
    ToFloat(max_value=1),
    Resize(IMG_SIZE[0], IMG_SIZE[1])
])

In [7]:
train_ds = dataset.PneumoDataset(LABEL_PATH, train_fns, transforms=TRAIN_AUGS)
valid_ds = dataset.PneumoDataset(LABEL_PATH, valid_fns, transforms=VALID_AUGS)

train_loader = DataLoader(train_ds, batch_size=config["batch_size"], shuffle=True, num_workers=3, multiprocessing_context='spawn')
valid_loader = DataLoader(valid_ds, batch_size=config["batch_size"], shuffle=False, num_workers=0)

In [8]:
from pneumo.models import UNetLightning
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger

model_name = "unet++_effb2_dropout0_mixedlogdiceloss"
ckpt_path = "/home/wilmer-linux/Projects/code/pneumo/ckpt/"

unet = UNetLightning(learning_rate=config["lr"]).cuda()

wandb_logger = WandbLogger(project="pnuemo_2", name=model_name)
es = EarlyStopping(monitor="valid_loss", min_delta=0.001, patience=20)
checkpoint = ModelCheckpoint(dirpath=ckpt_path, filename=model_name, monitor="valid_dice_coeff", save_top_k=1)

trainer = Trainer(

    gpus=1,
    auto_select_gpus=True, 
    precision=16,
    logger=wandb_logger,
    callbacks=[es, checkpoint]
)

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [9]:
trainer.fit(unet, train_dataloader=train_loader, val_dataloaders=valid_loader)

  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[34m[1mwandb[0m: Currently logged in as: [33mwilmer[0m (use `wandb login --relogin` to force relogin)



  | Name     | Type                | Params
-------------------------------------------------
0 | encoder  | EfficientNetEncoder | 7.7 M 
1 | decoder  | UnetPlusPlusDecoder | 2.7 M 
2 | seg_head | Sequential          | 145   
-------------------------------------------------
10.4 M    Trainable params
0         Non-trainable params
10.4 M    Total params
20.801    Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

RuntimeError: Caught RuntimeError in DataLoader worker process 2.
Original Traceback (most recent call last):
  File "/home/wilmer-linux/miniconda3/envs/faceai-nada/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/wilmer-linux/miniconda3/envs/faceai-nada/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/wilmer-linux/miniconda3/envs/faceai-nada/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/wilmer-linux/Projects/code/pneumo/pneumo/dataset.py", line 73, in __getitem__
    return self.to_tensors(dcm_data)
  File "/home/wilmer-linux/Projects/code/pneumo/pneumo/dataset.py", line 77, in to_tensors
    "img": torch.as_tensor(data["img"]).cuda().half(),
RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.


In [None]:
# optimize threshold for dice coeff