In [5]:
import sys
sys.path.insert(0, "../")

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
from pathlib import Path
import segmentation_models_pytorch as smp
import torch
import torch.nn as nn
import tifffile
from torch.utils.data import DataLoader
from src.loops.loops import validation_full_image, inference, inference_overlap
from src.utils.utils import rle2mask, IMAGE_SIZES, mask2rle
from src.transforms.transform import base_transform, valid_transform
from src.datasets.dataset import ImageDataset, SingleTiffDataset
from src.utils.metrics import dice_numpy
import gc
from src.datasets.zarr_dataset import ZarrValidDataset

In [7]:
!ls -l ../../submission/fold_0_4096to1024_epoch_49_score_0.9339.pth

-rw-rw-r-- 1 trytolose trytolose 97921014 Mar 25 20:37 ../../submission/fold_0_4096to1024_epoch_49_score_0.9339.pth


In [8]:
FACTOR = 1
CROP_SIZE= 1024 * 4 * FACTOR
IMG_TRAIN_SIZE = 1024 * FACTOR
BATCH_SIZE = 24
NUM_WORKERS = 0

In [9]:
df = pd.read_csv("/hdd/kaggle/hubmap/input_v2/train.csv").set_index("id", drop=True)
# df_test = pd.read_csv("/hdd/kaggle/hubmap/input_v2/test.csv").set_index("id", drop=True)
df_sub = pd.read_csv("/hdd/kaggle/hubmap/input_v2/sample_submission.csv").set_index("id", drop=True)
df_crops = pd.read_csv("/hdd/kaggle/hubmap/input_v2/train_v1_1024/split_v2.csv")
df_valid = df_crops[df_crops["fold"] == 0].reset_index(drop=True)
img_ids = df_valid["img_id"].unique()

In [10]:
fold_imgs = {
    0: ["4ef6695ce", "0486052bb", "2f6ecfcdf"],
    1: ["c68fe75ea", "095bf7a1f", "aaa6a05cc"],
    2: ["afa5e8098", "1e2425f28", "b2dc8411c"],
    3: ["cb2d976f4", "8242609fa", "54f2eec69"],
    4: ["26dc41664", "b9a3865fc", "e79de561c"],
}

In [2]:
!ls -l ../../submission/1024_256_score_0.9166.pth

total 765056
-rw-rw-r-- 1 trytolose trytolose 97921016 Mar 31 23:03 1024_256_score_0.9166.pth
-rw-rw-r-- 1 trytolose trytolose 97921014 Apr  2 23:46 1024_weighted_epoch_36_score_0.9188.pth
-rw-rw-r-- 1 trytolose trytolose 97921015 Mar 31 20:18 512_weighted_08_score_0.9263.pth
-rw-rw-r-- 1 trytolose trytolose 97921014 Mar 31 18:06 base_train_weighted_batch_0.8_0.2_e32_score_0.9307.pth
-rw-rw-r-- 1 trytolose trytolose      122 Mar 20 23:08 dataset-metadata.json
-rw-rw-r-- 1 trytolose trytolose 97921014 Mar 25 17:22 fold_0_2048to512_epoch_48_score_0.9249.pth
-rw-rw-r-- 1 trytolose trytolose 97921014 Mar 25 20:37 fold_0_4096to1024_epoch_49_score_0.9339.pth
-rw-rw-r-- 1 trytolose trytolose 97921014 Apr  2 14:41 pseudo_score_0.9200.pth
-rw-rw-r-- 1 trytolose trytolose       57 Mar 27 21:16 upload.sh
-rw-rw-r-- 1 trytolose trytolose 97921013 Mar 31 02:02 zarr_e37_outlier_full_dice_score_0.9444.pth


In [15]:
model = smp.Unet("resnet34", encoder_weights=None).cuda()
# w_path = "../../submission/fold_0_zarr_epoch_23_score_0.9407.pth"
# w_path = "../../submission/fold_0_old_loader_epoch_38_score_0.9254.pth"
# w_path = "../../submission/fold_0_zarr_pdf_epoch_34_score_0.9123.pth"
w_path = "../weights/4096_1028_random_zarr_zarr_4096_1024/0/epoch_35_score_0.9311.pth"
model.load_state_dict(torch.load(w_path))

<All keys matched successfully>

In [14]:
!ls -l ../weights/4096_1028_random_zarr_zarr_4096_1024/0/epoch_35_score_0.9311.pth

total 478140
-rw-rw-r-- 1 trytolose trytolose 97921013 Apr  8 14:17 epoch_27_score_0.9168.pth
-rw-rw-r-- 1 trytolose trytolose 97921013 Apr  8 14:18 epoch_28_score_0.9183.pth
-rw-rw-r-- 1 trytolose trytolose 97921013 Apr  8 14:25 epoch_33_score_0.9218.pth
-rw-rw-r-- 1 trytolose trytolose 97921013 Apr  8 14:26 epoch_34_score_0.9221.pth
-rw-rw-r-- 1 trytolose trytolose 97921013 Apr  8 14:28 epoch_35_score_0.9311.pth


In [9]:
!ls /hdd/kaggle/hubmap/input_v2/test

2ec3f1bb9-anatomical-structure.json  aa05346ff-anatomical-structure.json
2ec3f1bb9.tiff			     aa05346ff.tiff
3589adb90-anatomical-structure.json  d488c759a-anatomical-structure.json
3589adb90.tiff			     d488c759a.tiff
57512b7f1-anatomical-structure.json  sample_submission.csv
57512b7f1.tiff


In [16]:
fold = 0
print(f"FOLD {fold}")
for img_id in fold_imgs[fold]:
    rle = df.loc[img_id, 'encoding']
    df_img = df_valid[df_valid["img_id"] == img_id].reset_index(drop=True)
    loss_fn = nn.BCEWithLogitsLoss()
    img_ds = SingleTiffDataset(
            tiff_path=f"/hdd/kaggle/hubmap/input_v2/train/{img_id}.tiff",
            transform=valid_transform(IMG_TRAIN_SIZE),
            crop_size=CROP_SIZE,
            step = CROP_SIZE,
    )

    img_loader = DataLoader(
            dataset=img_ds,
            batch_size=BATCH_SIZE,
            shuffle=False,
            num_workers=NUM_WORKERS,
            pin_memory=True,
        )
    mask_pred = inference(img_loader, model, CROP_SIZE, IMG_TRAIN_SIZE).astype(np.float32)
#     input_path = "../../input/zarr_train"
#     val_ds = ZarrValidDataset(
#         img_id,
#         img_path=input_path,
#         transform=valid_transform(IMG_TRAIN_SIZE),
#         crop_size=IMG_TRAIN_SIZE,
#         step=IMG_TRAIN_SIZE,
#     )

#     img_loader = DataLoader(
#         dataset=val_ds,
#         batch_size=BATCH_SIZE,
#         shuffle=False,
#         num_workers=10,
#         pin_memory=True,
#     )

#     metrics_val, mask_pred = validation_full_image(
#         img_loader, model, torch.nn.BCEWithLogitsLoss(), rle=rle, return_mask=True,
#     )
#     print(f"dice_pos: {metrics_val['dice_pos'].4f}, dice_neg: {metrics_val['dice_neg']:.4f}")

    mask_true = rle2mask(rle, (img_loader.dataset.w, img_loader.dataset.h))
    dice_full_image = dice_numpy(mask_pred, mask_true)
    print(f"{img_id}: {dice_full_image:.4f}")
#     cv2.imwrite(f"oof_predicts/{img_id}.png", mask_pred)
#     break
    del mask_pred, mask_true
    gc.collect()
    gc.collect()
    

  0%|                                           | 0/6 [00:00<?, ?it/s]

FOLD 0


100%|███████████████████████████████████| 6/6 [01:20<00:00, 13.39s/it]
  0%|                                           | 0/3 [00:00<?, ?it/s]

4ef6695ce: 0.9363


100%|███████████████████████████████████| 3/3 [00:31<00:00, 10.55s/it]
  0%|                                           | 0/3 [00:00<?, ?it/s]

0486052bb: 0.9407


100%|███████████████████████████████████| 3/3 [00:27<00:00,  9.28s/it]


2f6ecfcdf: 0.9518


In [11]:
mask_pred = (mask_pred>0.5).astype(np.uint8)
rle_pred = mask2rle(mask_pred)

In [15]:
pd.DataFrame({"img_id": "57512b7f1", "encoding": rle_pred}, index=[0], columns=['img_id', 'encoding']).to_csv("no_resize_pred.csv")

In [9]:
print((0.9398+0.9395+0.9474)/3) #base
print((0.9352+0.9421+0.9494)/3) #zarr dl
print((0.9404+0.9394+0.9520)/3) #avg

0.9422333333333333
0.9422333333333333
0.9439333333333333


In [10]:
np.mean([0.9352, 0.9421, 0.9494])

0.9422333333333333

In [4]:
np.mean([0.9398, 0.9395, 0.9474])

0.9422333333333333

In [11]:
!ls -l

total 107668
-rw-rw-r-- 1 trytolose trytolose 14233992 Mar 22 11:37 1.0-eda.ipynb
-rw-rw-r-- 1 trytolose trytolose    22354 Mar 23 19:50 2.0-train_split.ipynb
-rw-rw-r-- 1 trytolose trytolose  1963004 Mar 19 16:06 256x256-images.ipynb
-rw-rw-r-- 1 trytolose trytolose 18634436 Mar 23 20:00 3.0-inference.ipynb
-rw-rw-r-- 1 trytolose trytolose 70047264 Mar 23 19:48 bayanof.zip
drwxrwxr-x 2 trytolose trytolose     4096 Mar 23 13:48 oof_predicts
-rw-rw-r-- 1 trytolose trytolose    92472 Mar 21 14:17 pred_mask_3.png
drwxrwxr-x 3 trytolose trytolose     4096 Mar 23 19:49 rinat
-rw-rw-r-- 1 trytolose trytolose  5228237 Mar 21 17:18 submission.csv
drwxrwxr-x 2 trytolose trytolose     4096 Mar 20 18:37 test


In [10]:

for img_id in df_sub.index:
    img_ds = SingleTiffDataset(
            tiff_path=f"/hdd/kaggle/hubmap/input_v2/test/{img_id}.tiff",
            transform=valid_transform(IMG_TRAIN_SIZE),
            crop_size=CROP_SIZE,
            step = CROP_SIZE,
    )

    img_loader = DataLoader(
            dataset=img_ds,
            batch_size=BATCH_SIZE,
            shuffle=False,
            num_workers=NUM_WORKERS,
            pin_memory=True,
        )
    mask_pred = inference(img_loader, model, CROP_SIZE, IMG_TRAIN_SIZE).astype(np.float32)
    mask_pred = (mask_pred > 0.5).astype(np.uint8)
    mask_rle = mask2rle(mask_pred)
    df_sub.loc[img_id, "predicted"] = mask_rle
#     break
    del mask_pred
    gc.collect()
    gc.collect()
    

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
100%|███████████████████████████████████| 3/3 [02:05<00:00, 41.72s/it]
100%|███████████████████████████████████| 2/2 [01:15<00:00, 37.97s/it]
100%|███████████████████████████████████| 4/4 [02:29<00:00, 37.44s/it]
100%|███████████████████████████████████| 4/4 [02:37<00:00, 39.45s/it]
100%|███████████████████████████████████| 5/5 [02:27<00:00, 29.53s/it]


In [11]:
df_sub.to_csv("1024_resize.csv")

In [12]:
df_sub

Unnamed: 0_level_0,predicted
id,Unnamed: 1_level_1
2ec3f1bb9,60762295 15 60786278 34 60810260 47 60834241 6...
3589adb90,68658992 23 68688384 70 68717813 77 68747243 8...
d488c759a,191139775 58 191186435 58 191233095 58 1912797...
aa05346ff,52856702 6 52887407 36 52918120 49 52948833 61...
57512b7f1,271347246 6 271380483 13 271413720 20 27144695...


In [23]:
import zarr
z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i1')

In [24]:
z

<zarr.core.Array (10000, 10000) int8>