In [1]:
import shutil
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas_path import path
from pathlib import Path
from PIL import Image
import pytorch_lightning as pl
import torch
import rasterio
import pyproj
import xarray
import xrspatial.multispectral as ms
import segmentation_models_pytorch as smp

from torchvision import transforms
from torchvision.transforms import ToTensor, ToPILImage

from cloud_model import CloudModel

import warnings
warnings.filterwarnings("ignore")

In [2]:
DATA_DIR = Path.cwd().parent.resolve() / "CloudCover/data"
TRAIN_FEATURES = DATA_DIR / "data/train_features"
TRAIN_LABELS = DATA_DIR / "data/train_labels"

BANDS = ["B02", "B03", "B04", "B08"]
train_meta = pd.read_csv(DATA_DIR / "train_metadata.csv")

def add_paths(df, feature_dir, label_dir=None, bands=BANDS):
    """
    Given dataframe with a column for chip_id, returns a dataframe with a column
    added indicating the path to each band's TIF image as "{band}_path", eg "B02_path".
    A column is also added to the dataframe with paths to the label TIF, if the
    path to the labels directory is provided.
    """
    for band in bands:
        df[f"{band}_path"] = feature_dir / df["chip_id"] / f"{band}.tif"
        assert df[f"{band}_path"].path.exists().all()
    if label_dir is not None:
        df["label_path"] = label_dir / (df["chip_id"] + ".tif")
        assert df["label_path"].path.exists().all()

    return df

train_meta = add_paths(train_meta, TRAIN_FEATURES, TRAIN_LABELS)

In [3]:
# удаляем мусорные метки
def get_label_mean(path):
    with rasterio.open(path) as lp:
        label = lp.read(1).astype("float32")
    return label.mean()

train_meta['label_mean'] = train_meta["label_path"].apply(get_label_mean)
# train_meta.drop(train_meta['label_mean'] == 1)

In [4]:
train_meta = train_meta[(train_meta['label_mean'] < 1) & (train_meta['label_mean'] > 0)]

In [5]:
random.seed(9)  # set a seed for reproducibility

# put 1/3 of chips into the validation set
chip_ids = train_meta.chip_id.unique().tolist()
val_chip_ids = random.sample(chip_ids, round(len(chip_ids) * 0.33))

val_mask = train_meta.chip_id.isin(val_chip_ids)
val = train_meta[val_mask].copy().reset_index(drop=True)
train = train_meta[~val_mask].copy().reset_index(drop=True)

# separate features from labels
feature_cols = ["chip_id"] + [f"{band}_path" for band in BANDS]

val_x = val[feature_cols].copy()
val_y = val[["chip_id", "label_path"]].copy()

train_x = train[feature_cols].copy()
train_y = train[["chip_id", "label_path"]].copy()

print((train_x.shape, train_y.shape),(val_x.shape, val_y.shape))

((5335, 5), (5335, 2)) ((2628, 5), (2628, 2))


In [8]:
# Set up pytorch_lightning.Trainer object
cloud_model = CloudModel(
    bands=BANDS,
    x_train=train_x,
    y_train=train_y,
    x_val=val_x,
    y_val=val_y,
    hparams = {
        "backbone":"resnet34",
        "weights": "imagenet",
        "gpu":False,
        "num_workers":2,
        "batch_size":32,
        "transform":None
    }
)

# cloud_model.load_state_dict(torch.load('J:/CloudCover/data/assets/cloud_model.pt'))
# cloud_model.transform = a1
# cloud_model['train_dataset'].transforms = a1
# cloud_model.eval()

checkpoint_callback = pl.callbacks.ModelCheckpoint(
    monitor="iou_epoch", mode="max", verbose=True
)

early_stopping_callback = pl.callbacks.early_stopping.EarlyStopping(
    monitor="iou_epoch",
    patience=(cloud_model.patience * 3),
    mode="max",
    verbose=True,
)

trainer = pl.Trainer(
    fast_dev_run=False,
    callbacks=[checkpoint_callback, early_stopping_callback],
    max_epochs = 10
    )

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [9]:
#cloud_model.hparams

In [10]:
# Fit the model
trainer.fit(model=cloud_model)


  | Name  | Type | Params
-------------------------------
0 | model | Unet | 24.4 M
-------------------------------
24.4 M    Trainable params
0         Non-trainable params
24.4 M    Total params
97.759    Total estimated model params size (MB)


Epoch 0: 100%|██████████| 250/250 [1:03:18<00:00, 15.13s/it, loss=0.254, v_num=47, loss_step=0.323, iou_step=0.929, iou_epoch=0.831]

Metric iou_epoch improved. New best score: 0.831
Epoch 0, global step 166: iou_epoch reached 0.83103 (best 0.83103), saving model to "j:\CloudCover\lightning_logs\version_47\checkpoints\epoch=0-step=166.ckpt" as top 1


Epoch 1: 100%|██████████| 250/250 [1:02:52<00:00, 15.03s/it, loss=0.234, v_num=47, loss_step=0.185, iou_step=0.929, iou_epoch=0.825, loss_epoch=0.290]

Epoch 1, global step 333: iou_epoch was not in top 1


Epoch 2: 100%|██████████| 250/250 [1:03:37<00:00, 15.21s/it, loss=0.237, v_num=47, loss_step=0.166, iou_step=0.936, iou_epoch=0.849, loss_epoch=0.245]

Metric iou_epoch improved by 0.018 >= min_delta = 0.0. New best score: 0.849
Epoch 2, global step 500: iou_epoch reached 0.84862 (best 0.84862), saving model to "j:\CloudCover\lightning_logs\version_47\checkpoints\epoch=2-step=500.ckpt" as top 1


Epoch 3: 100%|██████████| 250/250 [1:06:47<00:00, 15.97s/it, loss=0.221, v_num=47, loss_step=0.196, iou_step=0.938, iou_epoch=0.853, loss_epoch=0.224]

Metric iou_epoch improved by 0.005 >= min_delta = 0.0. New best score: 0.853
Epoch 3, global step 667: iou_epoch reached 0.85332 (best 0.85332), saving model to "j:\CloudCover\lightning_logs\version_47\checkpoints\epoch=3-step=667.ckpt" as top 1


Epoch 4: 100%|██████████| 250/250 [1:06:48<00:00, 15.97s/it, loss=0.214, v_num=47, loss_step=0.143, iou_step=0.935, iou_epoch=0.847, loss_epoch=0.217]

Epoch 4, global step 834: iou_epoch was not in top 1


Epoch 5: 100%|██████████| 250/250 [1:06:51<00:00, 15.98s/it, loss=0.218, v_num=47, loss_step=0.144, iou_step=0.933, iou_epoch=0.858, loss_epoch=0.210]

Metric iou_epoch improved by 0.005 >= min_delta = 0.0. New best score: 0.858
Epoch 5, global step 1001: iou_epoch reached 0.85787 (best 0.85787), saving model to "j:\CloudCover\lightning_logs\version_47\checkpoints\epoch=5-step=1001.ckpt" as top 1


Epoch 6: 100%|██████████| 250/250 [1:07:15<00:00, 16.08s/it, loss=0.191, v_num=47, loss_step=0.104, iou_step=0.943, iou_epoch=0.863, loss_epoch=0.206]

Metric iou_epoch improved by 0.006 >= min_delta = 0.0. New best score: 0.863
Epoch 6, global step 1168: iou_epoch reached 0.86344 (best 0.86344), saving model to "j:\CloudCover\lightning_logs\version_47\checkpoints\epoch=6-step=1168.ckpt" as top 1


Epoch 7: 100%|██████████| 250/250 [1:08:31<00:00, 16.38s/it, loss=0.185, v_num=47, loss_step=0.154, iou_step=0.940, iou_epoch=0.867, loss_epoch=0.201]

Metric iou_epoch improved by 0.003 >= min_delta = 0.0. New best score: 0.867
Epoch 7, global step 1335: iou_epoch reached 0.86685 (best 0.86685), saving model to "j:\CloudCover\lightning_logs\version_47\checkpoints\epoch=7-step=1335.ckpt" as top 1


Epoch 8: 100%|██████████| 250/250 [1:06:04<00:00, 15.80s/it, loss=0.208, v_num=47, loss_step=0.227, iou_step=0.936, iou_epoch=0.867, loss_epoch=0.196]

Metric iou_epoch improved by 0.000 >= min_delta = 0.0. New best score: 0.867
Epoch 8, global step 1502: iou_epoch reached 0.86725 (best 0.86725), saving model to "j:\CloudCover\lightning_logs\version_47\checkpoints\epoch=8-step=1502.ckpt" as top 1


Epoch 9: 100%|██████████| 250/250 [1:08:13<00:00, 16.31s/it, loss=0.182, v_num=47, loss_step=0.145, iou_step=0.940, iou_epoch=0.868, loss_epoch=0.191]

Metric iou_epoch improved by 0.001 >= min_delta = 0.0. New best score: 0.868
Epoch 9, global step 1669: iou_epoch reached 0.86849 (best 0.86849), saving model to "j:\CloudCover\lightning_logs\version_47\checkpoints\epoch=9-step=1669.ckpt" as top 1


Epoch 9: 100%|██████████| 250/250 [1:08:15<00:00, 16.32s/it, loss=0.182, v_num=47, loss_step=0.145, iou_step=0.940, iou_epoch=0.868, loss_epoch=0.191]


In [11]:
# # save the model
DATA_DIR = Path.cwd().parent.resolve() / "CloudCover/data"
submission_assets_dir = DATA_DIR / "assets"
submission_assets_dir.mkdir(parents=True, exist_ok=True)

model_weight_path = submission_assets_dir / "cloud_model_clear.pt"
torch.save(cloud_model.state_dict(), model_weight_path)