In [1]:
import sys
sys.path.insert(0, "../")

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
from pathlib import Path
import tifffile
from torch.utils.data import DataLoader
from src.loops.loops import validation_full_image, inference, inference_overlap
from src.utils.utils import rle2mask, IMAGE_SIZES, mask2rle
from src.transforms.transform import base_transform, valid_transform
from src.datasets.dataset import ImageDataset, SingleTiffDataset
from src.utils.metrics import dice_numpy
import gc
import rasterio
from tqdm import tqdm

In [3]:
df = pd.read_csv("/hdd/kaggle/hubmap/input_v2/train.csv").set_index("id", drop=True)
df_sub = pd.read_csv("/hdd/kaggle/hubmap/input_v2/sample_submission.csv").set_index("id", drop=True)
df_crops = pd.read_csv("/hdd/kaggle/hubmap/input_v2/train_v1_1024/split_v2.csv")
df_valid = df_crops[df_crops["fold"] == 0].reset_index(drop=True)
img_ids = df_valid["img_id"].unique()

In [4]:
def draw_contours(img, mask, color=(0, 0, 255), thinkness=10):

    contours, _ = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
#     print(len(contours))
#     print(type(img), img.dtype)
    for i in range(0, len(contours)):
#         print(i)
        cv2.polylines(img, np.int32(contours[i]), True, color, thinkness)

    
#     return img

In [5]:
# img_id = "57512b7f1"
def get_tiff(img_id, path="/hdd/kaggle/hubmap/input_v2/test"):
    img_path = Path(path) / f"{img_id}.tiff"
    dataset = rasterio.open(img_path, num_threads="all_cpus")
    # mask_main, mask_second = create_masks(img_id, rle, path)

    h, w = dataset.height, dataset.width
    if dataset.count == 3:
        image = dataset.read()#.transpose(1, 2, 0).astype(np.uint8)
        image = np.stack([x for x in image], axis=2).astype(np.uint8)
    else:
        layers = []
        subdatasets = dataset.subdatasets
        if len(subdatasets) > 0:
            for i, subdataset in enumerate(subdatasets, 0):
                layers.append(rasterio.open(subdataset))
        image = np.stack([x.read().squeeze() for x in layers], axis=2).astype(np.uint8)
    return image

In [None]:
df_public = pd.read_csv("submission.csv").set_index("id", drop=True)
df_pred_no_resize = pd.read_csv("1024_no_resize.csv").set_index('id')
df_pred_resize = pd.read_csv("1024_resize.csv").set_index("id", drop=True)

for img_id in df_public.index[1:]:
#     print(img_id)
    image = get_tiff(img_id)
    h, w = image.shape[:2]
    
    mask_no_resize = rle2mask(df_pred_no_resize.loc[img_id, "predicted"], (w, h))
    mask_public = rle2mask(df_public.loc[img_id, "predicted"], (w, h))
    mask_resize = rle2mask(df_pred_resize.loc[img_id, "predicted"], (w, h))

    draw_contours(image, mask_public, (0, 255, 0), thinkness=17)
    draw_contours(image, mask_resize, (255, 0, 0), thinkness=14)
    draw_contours(image, mask_no_resize, (0, 0, 255), thinkness=10)
    cv2.imwrite(f"/hdd/kaggle/hubmap/public_predicted/{img_id}.tiff", image)
    
    del image, mask_no_resize, mask_public, mask_resize
    gc.collect()
    gc.collect()

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


## Draw contours on train images

In [10]:
for img_id in df.index:
    print(img_id)
    image = get_tiff(img_id, path="/hdd/kaggle/hubmap/input_v2/train")
    h, w = image.shape[:2]
    
    mask_gt = rle2mask(df.loc[img_id, "encoding"], (w, h))
    draw_contours(image, mask_gt, (0, 0, 255), thinkness=10)
    cv2.imwrite(f"/hdd/kaggle/hubmap/train_with_mask/{img_id}.tiff", image)
    
    del image, mask_gt
    gc.collect()
    gc.collect()

2f6ecfcdf
<class 'numpy.ndarray'> uint8
8242609fa
<class 'numpy.ndarray'> uint8
aaa6a05cc
<class 'numpy.ndarray'> uint8
cb2d976f4
<class 'numpy.ndarray'> uint8
b9a3865fc
<class 'numpy.ndarray'> uint8
b2dc8411c
<class 'numpy.ndarray'> uint8
0486052bb
<class 'numpy.ndarray'> uint8
e79de561c
<class 'numpy.ndarray'> uint8
095bf7a1f
<class 'numpy.ndarray'> uint8
54f2eec69
<class 'numpy.ndarray'> uint8
4ef6695ce
<class 'numpy.ndarray'> uint8
26dc41664
<class 'numpy.ndarray'> uint8
c68fe75ea
<class 'numpy.ndarray'> uint8
afa5e8098
<class 'numpy.ndarray'> uint8
1e2425f28
<class 'numpy.ndarray'> uint8


In [1]:
!ls /hdd/kaggle/hubmap/train_with_mask/

0486052bb.tiff	26dc41664.tiff	54f2eec69.tiff	afa5e8098.tiff	c68fe75ea.tiff
095bf7a1f.tiff	2f6ecfcdf.tiff	8242609fa.tiff	b2dc8411c.tiff	cb2d976f4.tiff
1e2425f28.tiff	4ef6695ce.tiff	aaa6a05cc.tiff	b9a3865fc.tiff	e79de561c.tiff


In [6]:
!ls -l public_predicts/

total 66672
-rw-rw-r-- 1 trytolose trytolose 6411831 Apr 11 11:18 1024_256_score_0.9166_lb_0.923.csv
-rw-rw-r-- 1 trytolose trytolose 5915930 Apr 26 13:09 1024_512_2_head_897lb.csv
-rw-rw-r-- 1 trytolose trytolose 5403366 Apr 23 21:55 2048_1024_b3.csv
-rw-rw-r-- 1 trytolose trytolose 6372875 Apr 12 11:17 924lb.csv
-rw-rw-r-- 1 trytolose trytolose     141 Apr 14 13:00 dataset-metadata.json
-rw-rw-r-- 1 trytolose trytolose 6961344 Apr 11 21:51 effnet-0.csv
-rw-rw-r-- 1 trytolose trytolose 6378633 Apr 11 22:20 effnet-1.csv
-rw-rw-r-- 1 trytolose trytolose 6256218 Apr 11 19:44 hard_augs.csv
-rw-rw-r-- 1 trytolose trytolose 5586532 Apr 12 23:25 no_resize_1024_lb_920.csv
-rw-rw-r-- 1 trytolose trytolose 6314221 Apr 11 13:49 public_aug_w0.5_epoch_34_score_0.9175_lb_0.921.csv
-rw-rw-r-- 1 trytolose trytolose 6355181 Apr 11 11:19 public_aug_w0.5_myloss_epoch_34_score_0.9175_lb_0.918.csv
-rw-rw-r-- 1 trytolose trytolose 6288705 Apr 11 20:01 random_4096_1024_lb_921.csv


In [7]:
df_pred_923 = pd.read_csv("public_predicts/924lb.csv").set_index('id')
df_pred_aug = pd.read_csv("public_predicts/1024_512_2_head_897lb.csv").set_index('id')


for img_id in tqdm(df_pred_923.index):
#     print(img_id)
    image = get_tiff(img_id)
    h, w = image.shape[:2]
    
    mask_923 = rle2mask(df_pred_923.loc[img_id, "predicted"], (w, h))
    mask_aug = rle2mask(df_pred_aug.loc[img_id, "predicted"], (w, h))

    draw_contours(image, mask_923, (0, 255, 0), thinkness=10)
    draw_contours(image, mask_aug, (255, 0, 0), thinkness=8)
    cv2.imwrite(f"/hdd/kaggle/hubmap/public_924_and_2048_b3/{img_id}.tiff", image)
    
    del image, mask_923, mask_aug
    gc.collect()
    gc.collect()

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
100%|██████████| 5/5 [05:27<00:00, 65.50s/it]


In [7]:
!mkdir /hdd/kaggle/hubmap/public_924_and_2048_b3

In [8]:
!ls /hdd/kaggle/hubmap/ -l

total 16
drwxrwxr-x 5 trytolose trytolose 4096 Nov 22 16:56 input
drwxrwxr-x 7 trytolose trytolose 4096 Mar 25 19:39 input_v2
drwxrwxr-x 2 trytolose trytolose 4096 Apr  3 21:02 public_predicted
drwxrwxr-x 2 trytolose trytolose 4096 Apr  5 12:29 train_with_mask


In [9]:
!rm -rf /hdd/kaggle/hubmap/public_923_4096 /hdd/kaggle/hubmap/public_923_effnet_0 /hdd/kaggle/hubmap/public_predicted 

In [9]:
!ls ../../input/d48_hand_labelled.csv

calc_coords	       train_1024_256_pseudo_v1  zarr_pdf
d48_hand_labelled.csv  train_1024_512		 zarr_train
train_1024_256	       train_v3_4096_1024	 zarr_train_orig


In [7]:
df_sub

Unnamed: 0_level_0,predicted
id,Unnamed: 1_level_1
2ec3f1bb9,
3589adb90,
d488c759a,
aa05346ff,
57512b7f1,


In [6]:
df_pred_923 = pd.read_csv("public_predicts/blend.csv").set_index('id')
df_pred_aug = pd.read_csv("public_predicts/random_4096_1024_lb_921.csv").set_index('id')


for img_id in tqdm(['aa05346ff']):
#     print(img_id)
    image = get_tiff(img_id)
    h, w = image.shape[:2]
    
    mask_923 = rle2mask(df_pred_923.loc[img_id, "predicted"], (w, h))
    mask_aug = rle2mask(df_pred_aug.loc[img_id, "predicted"], (w, h))

    draw_contours(image, mask_923, (0, 255, 0), thinkness=10)
    draw_contours(image, mask_aug, (255, 0, 0), thinkness=8)
    cv2.imwrite(f"/hdd/kaggle/hubmap/vitaly_timur/{img_id}.tiff", image)
    
    del image, mask_923, mask_aug
    gc.collect()
    gc.collect()

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
100%|██████████| 1/1 [01:13<00:00, 73.89s/it]


In [2]:
!mkdir /hdd/kaggle/hubmap/vitaly_timur/

mkdir: cannot create directory '/hdd/kaggle/hubmap/vitaly_timur/': File exists
