In [2]:
import shutil
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas_path import path
from pathlib import Path
from PIL import Image
import pytorch_lightning as pl
import torch
import rasterio
import pyproj
import xarray
import xrspatial.multispectral as ms
import segmentation_models_pytorch as smp

from torchvision import transforms
from torchvision.transforms import ToTensor, ToPILImage

import albumentations as A

from cloud_model import CloudModel

import warnings
warnings.filterwarnings("ignore")

In [3]:
DATA_DIR = Path.cwd().parent.resolve() / "CloudCover/data"
TRAIN_FEATURES = DATA_DIR / "data/train_features"
TRAIN_LABELS = DATA_DIR / "data/train_labels"

BANDS = ["B02", "B03", "B04", "B08"]
train_meta = pd.read_csv(DATA_DIR / "train_metadata.csv")

def add_paths(df, feature_dir, label_dir=None, bands=BANDS):
    """
    Given dataframe with a column for chip_id, returns a dataframe with a column
    added indicating the path to each band's TIF image as "{band}_path", eg "B02_path".
    A column is also added to the dataframe with paths to the label TIF, if the
    path to the labels directory is provided.
    """
    for band in bands:
        df[f"{band}_path"] = feature_dir / df["chip_id"] / f"{band}.tif"
        assert df[f"{band}_path"].path.exists().all()
    if label_dir is not None:
        df["label_path"] = label_dir / (df["chip_id"] + ".tif")
        assert df["label_path"].path.exists().all()

    return df

train_meta = add_paths(train_meta, TRAIN_FEATURES, TRAIN_LABELS)

In [3]:
# удаляем мусорные метки
# def get_label_mean(path):
#     with rasterio.open(path) as lp:
#         label = lp.read(1).astype("float32")
#     return label.mean()
# train_meta['label_mean'] = train_meta["label_path"].apply(get_label_mean)
# train_meta = train_meta[(train_meta['label_mean'] < 1) & (train_meta['label_mean'] > 0)]
# train_meta.to_csv('train_meta_clear.csv', index=False)

In [4]:
train_meta = pd.read_csv('train_meta_clear.csv')

In [13]:
random.seed(11)  # set a seed for reproducibility
# random.seed(10)
# put 1/3 of chips into the validation set
chip_ids = train_meta.chip_id.unique().tolist()
val_chip_ids = random.sample(chip_ids, round(len(chip_ids) * 0.2))
val_mask = train_meta.chip_id.isin(val_chip_ids)
val = train_meta[val_mask].copy().reset_index(drop=True)
train = train_meta[~val_mask].copy().reset_index(drop=True)
# separate features from labels
feature_cols = ["chip_id"] + [f"{band}_path" for band in BANDS]
val_x = val[feature_cols].copy()
val_y = val[["chip_id", "label_path"]].copy()
train_x = train[feature_cols].copy()
train_y = train[["chip_id", "label_path"]].copy()
print((train_x.shape, train_y.shape),(val_x.shape, val_y.shape))

((6370, 5), (6370, 2)) ((1593, 5), (1593, 2))


In [14]:
a1 = A.HorizontalFlip(p=0.5)
a2 = A.RandomRotate90(p=0.7)
a3 = A.RandomSizedCrop((128,512), 512,512, p=0.7)
a_comp = A.Compose([a1, a2, a3])

In [15]:
# Set up pytorch_lightning.Trainer object
cloud_model = CloudModel(
    bands=BANDS,
    x_train=train_x,
    y_train=train_y,
    x_val=val_x,
    y_val=val_y,
    hparams = {
        "backbone":"resnet34",
        "weights": None,
        "gpu":False,
        "num_workers":2,
        "batch_size":32,
        "transform":a_comp
    }
)

cloud_model.load_state_dict(torch.load('J:/CloudCover/data/assets/cloud_model_album2.pt'))
cloud_model.transform = a_comp
# # cloud_model['train_dataset'].transforms = a_comp
cloud_model.eval()

checkpoint_callback = pl.callbacks.ModelCheckpoint(
    monitor="iou_epoch", mode="max", verbose=True
)

early_stopping_callback = pl.callbacks.early_stopping.EarlyStopping(
    monitor="iou_epoch",
    patience=(cloud_model.patience * 2),
    mode="max",
    verbose=True,
)

trainer = pl.Trainer(
    fast_dev_run=False,
    callbacks=[checkpoint_callback, early_stopping_callback]
    )

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [16]:
# Fit the model
trainer.fit(model=cloud_model)

Epoch 0:   3%|▎         | 7/250 [04:06<2:04:57, 30.85s/it, loss=0.201, v_num=55, loss_step=0.139]
Epoch 0:   0%|          | 0/250 [00:55<3:53:09, 55.96s/it]



  | Name  | Type | Params
-------------------------------
0 | model | Unet | 24.4 M
-------------------------------
24.4 M    Trainable params
0         Non-trainable params
24.4 M    Total params
97.759    Total estimated model params size (MB)


Epoch 0: 100%|██████████| 250/250 [1:09:13<00:00, 16.55s/it, loss=0.248, v_num=57, loss_step=0.445, iou_step=0.770, iou_epoch=0.850]

Metric iou_epoch improved. New best score: 0.850
Epoch 0, global step 199: iou_epoch reached 0.85001 (best 0.85001), saving model to "j:\CloudCover\lightning_logs\version_57\checkpoints\epoch=0-step=199.ckpt" as top 1


Epoch 1: 100%|██████████| 250/250 [1:10:43<00:00, 16.90s/it, loss=0.231, v_num=57, loss_step=0.402, iou_step=0.773, iou_epoch=0.862, loss_epoch=0.221]

Metric iou_epoch improved by 0.012 >= min_delta = 0.0. New best score: 0.862
Epoch 1, global step 399: iou_epoch reached 0.86197 (best 0.86197), saving model to "j:\CloudCover\lightning_logs\version_57\checkpoints\epoch=1-step=399.ckpt" as top 1


Epoch 2: 100%|██████████| 250/250 [1:12:34<00:00, 17.35s/it, loss=0.234, v_num=57, loss_step=0.0808, iou_step=0.777, iou_epoch=0.860, loss_epoch=0.219]

Epoch 2, global step 599: iou_epoch was not in top 1


Epoch 3: 100%|██████████| 250/250 [1:11:15<00:00, 17.03s/it, loss=0.237, v_num=57, loss_step=0.750, iou_step=0.780, iou_epoch=0.859, loss_epoch=0.217] 

Epoch 3, global step 799: iou_epoch was not in top 1


Epoch 4: 100%|██████████| 250/250 [1:09:53<00:00, 16.71s/it, loss=0.209, v_num=57, loss_step=0.177, iou_step=0.779, iou_epoch=0.868, loss_epoch=0.216]

Metric iou_epoch improved by 0.006 >= min_delta = 0.0. New best score: 0.868
Epoch 4, global step 999: iou_epoch reached 0.86762 (best 0.86762), saving model to "j:\CloudCover\lightning_logs\version_57\checkpoints\epoch=4-step=999.ckpt" as top 1


Epoch 5: 100%|██████████| 250/250 [1:10:26<00:00, 16.84s/it, loss=0.204, v_num=57, loss_step=0.269, iou_step=0.780, iou_epoch=0.867, loss_epoch=0.211]

Epoch 5, global step 1199: iou_epoch was not in top 1


Epoch 6: 100%|██████████| 250/250 [1:10:42<00:00, 16.90s/it, loss=0.214, v_num=57, loss_step=0.674, iou_step=0.781, iou_epoch=0.870, loss_epoch=0.204]

Metric iou_epoch improved by 0.002 >= min_delta = 0.0. New best score: 0.870
Epoch 6, global step 1399: iou_epoch reached 0.86977 (best 0.86977), saving model to "j:\CloudCover\lightning_logs\version_57\checkpoints\epoch=6-step=1399.ckpt" as top 1


Epoch 7: 100%|██████████| 250/250 [1:09:56<00:00, 16.72s/it, loss=0.187, v_num=57, loss_step=0.211, iou_step=0.779, iou_epoch=0.869, loss_epoch=0.204]

Epoch 7, global step 1599: iou_epoch was not in top 1


Epoch 8: 100%|██████████| 250/250 [1:12:04<00:00, 17.23s/it, loss=0.21, v_num=57, loss_step=0.0495, iou_step=0.781, iou_epoch=0.871, loss_epoch=0.201]

Metric iou_epoch improved by 0.001 >= min_delta = 0.0. New best score: 0.871
Epoch 8, global step 1799: iou_epoch reached 0.87123 (best 0.87123), saving model to "j:\CloudCover\lightning_logs\version_57\checkpoints\epoch=8-step=1799.ckpt" as top 1


Epoch 9: 100%|██████████| 250/250 [1:10:12<00:00, 16.78s/it, loss=0.238, v_num=57, loss_step=0.749, iou_step=0.782, iou_epoch=0.871, loss_epoch=0.196]

Epoch 9, global step 1999: iou_epoch was not in top 1


Epoch 10: 100%|██████████| 250/250 [1:11:37<00:00, 17.12s/it, loss=0.208, v_num=57, loss_step=0.161, iou_step=0.778, iou_epoch=0.868, loss_epoch=0.198]

Epoch 10, global step 2199: iou_epoch was not in top 1


Epoch 11: 100%|██████████| 250/250 [1:12:32<00:00, 17.34s/it, loss=0.186, v_num=57, loss_step=0.0819, iou_step=0.782, iou_epoch=0.871, loss_epoch=0.194]

Metric iou_epoch improved by 0.000 >= min_delta = 0.0. New best score: 0.871
Epoch 11, global step 2399: iou_epoch reached 0.87136 (best 0.87136), saving model to "j:\CloudCover\lightning_logs\version_57\checkpoints\epoch=11-step=2399.ckpt" as top 1


Epoch 12:   0%|          | 1/250 [00:29<1:00:20, 14.54s/it, loss=0.185, v_num=57, loss_step=0.145, iou_step=0.782, iou_epoch=0.871, loss_epoch=0.191]   

In [8]:
# # save the model
DATA_DIR = Path.cwd().parent.resolve() / "CloudCover/data"
submission_assets_dir = DATA_DIR / "assets"
submission_assets_dir.mkdir(parents=True, exist_ok=True)

model_weight_path = submission_assets_dir / "cloud_model_resnet34_final.pt"
torch.save(cloud_model.state_dict(), model_weight_path)