In [1]:
import shutil
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas_path import path
from pathlib import Path
import pytorch_lightning as pl
import torch

import albumentations as A

from cloud_model import CloudModel

import warnings
warnings.filterwarnings("ignore")

DATA_DIR = Path.cwd().parent.resolve() / "CloudCover/data"
TRAIN_FEATURES = DATA_DIR / "data/train_features"
TRAIN_LABELS = DATA_DIR / "data/train_labels"

BANDS = ["B02", "B03", "B04", "B08"]

In [None]:
# порядок обучения - сначала учимся на чистых данных без поворотов
# потом добавляем вращения на 90 градусов и отражения
# потом добавляем вырезки от 1/4 до 3/4 изображения
# при этом каждый раз - немного меняем random seed
# если какая-то версия достигнет качества 90% - на ней можно разметить дополнительный набор данных

In [2]:
train_meta = pd.read_csv('train_meta_clear.csv')

In [3]:
random.seed(9)  # set a seed for reproducibility
# put 1/3 of chips into the validation set
chip_ids = train_meta.chip_id.unique().tolist()
val_chip_ids = random.sample(chip_ids, round(len(chip_ids) * 0.2))
val_mask = train_meta.chip_id.isin(val_chip_ids)
val = train_meta[val_mask].copy().reset_index(drop=True)
train = train_meta[~val_mask].copy().reset_index(drop=True)
# separate features from labels
feature_cols = ["chip_id"] + [f"{band}_path" for band in BANDS]
val_x = val[feature_cols].copy()
val_y = val[["chip_id", "label_path"]].copy()
train_x = train[feature_cols].copy()
train_y = train[["chip_id", "label_path"]].copy()
print((train_x.shape, train_y.shape),(val_x.shape, val_y.shape))

((6370, 5), (6370, 2)) ((1593, 5), (1593, 2))


In [4]:
# a1 = A.HorizontalFlip(p=0.5)
# a2 = A.RandomRotate90(p=0.7)
# a3 = A.RandomSizedCrop((128,512), 512,512, p=0.7)
# a_comp = A.Compose([a1, a2, a3])

aug1 = A.SomeOf([
        A.VerticalFlip(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.RandomRotate90(p=0.75)
        # A.RandomSizedCrop((256,512), 512,512, p=0.7)
    ], n=1)

# aug = A.SomeOf([
#         A.VerticalFlip(p=0.5), 
#         A.HorizontalFlip(p=0.5),
#         A.RandomRotate90(p=0.75),
#         A.RandomSizedCrop((256,512), 512,512, p=0.7)
#      ], n=1)

# aug = A.SomeOf([
#         A.VerticalFlip(p=0.5), 
#         A.HorizontalFlip(p=0.5),
#         A.RandomRotate90(p=0.75),
#         A.RandomSizedCrop((256,512), 512,512, p=0.7),
#         A.Cutout(p=0.7, num_holes=10, max_h_size=40, max_w_size=40)
#      ], n=2)

In [5]:
# Set up pytorch_lightning.Trainer object
cloud_model = CloudModel(
    bands=BANDS,
    x_train=train_x,
    y_train=train_y,
    x_val=val_x,
    y_val=val_y,
    hparams = {
        "backbone":'efficientnet-b3',
        "weights": 'imagenet',
        "gpu":False,
        "num_workers":2,
        "batch_size":32,
        "transform":aug1
    }
)

# cloud_model.load_state_dict(torch.load('J:/CloudCover/data/assets/cloud_model_efnetb0_e10_cutout.pt'))
# cloud_model.transform = aug
# cloud_model.eval()

checkpoint_callback = pl.callbacks.ModelCheckpoint(
    monitor="iou_epoch", mode="max", verbose=True
)

early_stopping_callback = pl.callbacks.early_stopping.EarlyStopping(
    monitor="iou_epoch",
    patience=(cloud_model.patience * 3),
    mode="max",
    verbose=True,
)

trainer = pl.Trainer(
    fast_dev_run=False,
    callbacks=[checkpoint_callback, early_stopping_callback],
    max_epochs = 20
    )

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [7]:
trainer.fit(model=cloud_model)


  | Name  | Type | Params
-------------------------------
0 | model | Unet | 13.2 M
-------------------------------
13.2 M    Trainable params
0         Non-trainable params
13.2 M    Total params
52.638    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [16]:
# # save the model
DATA_DIR = Path.cwd().parent.resolve() / "CloudCover/data"
submission_assets_dir = DATA_DIR / "assets"
submission_assets_dir.mkdir(parents=True, exist_ok=True)

model_weight_path = submission_assets_dir / "cloud_model_efnetb4.pt"
torch.save(cloud_model.state_dict(), model_weight_path)