## Импорт библиотек

In [1]:
import numpy as np 
import pandas as pd 
import cv2
from scipy import ndimage
from transformers import pipeline
from PIL import Image
import requests
import os
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import shutil
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch.optim as optim
from torch import nn
import csv


2024-05-03 19:09:36.387831: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-03 19:09:36.387930: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-03 19:09:36.523479: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Метрики

In [2]:
import scipy.signal as sps

def evaluate_model(inference):
    test_path = "/kaggle/input/nyu-depth-v2/nyu_data/data/nyu2_test"
    cnt_test = len(os.listdir(test_path))
    index = 0
    metrics = list()
    for filename in tqdm(os.listdir(test_path)):
        if "colors" not in filename:
            continue
        image = Image.open(os.path.join(test_path, filename))
        depth = inference(image)
        
        depth_filename = filename.replace("colors", "depth")
        image = Image.open(os.path.join(test_path, depth_filename))
        ground_truth_depth = np.asarray(image)
        
        ground_truth_depth = 1 / ground_truth_depth
        ground_truth_depth = ground_truth_depth / ground_truth_depth.max()
        
        metrics_values = get_metrics(depth, ground_truth_depth)
        metrics.append(metrics_values)
    
    result = dict()
    for k, v in metrics[0].items():
        result[k] = sum([x[k] for x in metrics]) / cnt_test
    return result


def get_metrics(x, y, size=5):
    x[y == 0] = 0
    values = dict()
    values['rmse'] = np.sum(rmse(x, y))
    values['mae'] = np.sum(mae(x, y))
    values['mre'] = np.sum(mre(x, y))
    values['gradient'] = np.sum(gradient_metric(x, y))
    values['rank'], values['census'] = rank_and_census(x, y, size)
    values['rank'] = np.sum(values['rank'])
    values['census'] = np.sum(values['census'])
    
    values['delta1'] = np.sum(get_delta(x, y, 1.25))
    values['delta2'] = np.sum(get_delta(x, y, 1.25 ** 2))
    values['delta3'] = np.sum(get_delta(x, y, 1.25 ** 3))

    return values

def get_delta(x, y, delta):
    frac1, frac2 = x / (y + 1e-6), y / (x + 1e-6)
    frac1, frac2 = frac1[..., np.newaxis], frac2[..., np.newaxis]
    frac = np.concatenate((frac1, frac2), axis=-1)

    delta_values = np.max(frac, axis=-1)
    return np.sum((delta_values < delta), axis=(1, 2)) / (x.shape[1] * x.shape[2])

def rmse(x, y):
    return np.sqrt(np.sum((x - y) ** 2, axis=(1, 2)) / (x.shape[1] * x.shape[2]))

def mae(x, y):
    return np.sum(np.abs(x - y), axis=(1, 2)) / (x.shape[1] * x.shape[2])

def mre(x, y):
    return np.sum(np.abs(x - y) / (y + 1e-7), axis=(1, 2)) / (x.shape[1] * x.shape[2])

def convolution(image, conv):
    height, width = image.shape[0], image.shape[1]
    padding_width = conv.shape[0] // 2
    image = np.pad(image, padding_width, 'constant')
    result = np.zeros((height, width))
    for i in range(padding_width, height + padding_width):
        for j in range(padding_width, width + padding_width):
            result[i-padding_width][j-padding_width] = np.sum(image[(i - padding_width):(i + padding_width + 1), (j - padding_width):(j + padding_width + 1)] * conv)
    return result

def gradient_metric(x, y):
    kernel1 = np.array([[
        [1, 0, -1],
        [2, 0, -2],
        [1, 0, -1],
    ]])
    
    kernel2 = np.array([[
        [1, 2, 1],
        [0, 0, 0],
        [-1, -2, -1],
    ]])
    
    x_deriv1, y_deriv1 = sps.fftconvolve(x, kernel1, mode='same'), sps.fftconvolve(y, kernel1, mode='same')
    x_deriv2, y_deriv2 = sps.fftconvolve(x, kernel2, mode='same'), sps.fftconvolve(y, kernel2, mode='same')

    return np.sum(np.abs(x_deriv1 - y_deriv1) + np.abs(x_deriv2 - y_deriv2), axis=(1, 2)) / (x.shape[1] * x.shape[2])

def neighborhood(x, size=5):
    indices = np.indices(x.shape[1:])
    padding_width = size // 2
    indices += padding_width
    pad_x = np.pad(x, ((0,), (padding_width,), (padding_width,)), 'constant')
    
    index_maps = list()
    for i in range(-padding_width, padding_width+1):
        for j in range(-padding_width, padding_width+1):
            index_map = np.zeros(indices.shape).astype(int)
            index_map[0] = indices[0] + i
            index_map[1] = indices[1] + j
            index_maps.append(index_map)
            
    index_maps = np.array(index_maps)
    return pad_x[:, index_maps[:, 0, :, :], index_maps[:, 1, :, :]]
    
def one_hot(x, size):
    center_index = size ** 2 // 2
    center = x[:, center_index]
    center = center[:, np.newaxis]

    encoding = (x < center).astype(int)
    return np.delete(encoding, center_index, axis=1)
    
def rank_metric(en_x, en_y, size=5):
    rank_x, rank_y = np.sum(en_x, axis=1), np.sum(en_y, axis=1)
    return np.sum(np.abs(rank_x - rank_y), axis=(1, 2)) / (en_x.shape[2] * en_x.shape[3])

def census_metric(en_x, en_y, size=5):
    mask = (en_x != en_y).astype(int)
    return np.sum(mask, axis=(1, 2, 3)) / (en_x.shape[2] * en_x.shape[3])

def rank_and_census(x, y, size=5):
    nx, ny = neighborhood(x, size), neighborhood(y, size)
    en_x, en_y = one_hot(nx, size), one_hot(ny, size)
    rank, census = rank_metric(en_x, en_y, size), census_metric(en_x, en_y, size)
    return rank, census

In [3]:
get_metrics(np.ones((4, 10, 10)) * 1.26 ** 2, np.ones((4, 10, 10)))

{'rmse': 2.3504,
 'mae': 2.350400000000001,
 'mre': 2.350399764960024,
 'gradient': 3.572608000000003,
 'rank': 0.0,
 'census': 0.0,
 'delta1': 0.0,
 'delta2': 0.0,
 'delta3': 4.0}

In [4]:
y.dtype

NameError: name 'y' is not defined

In [None]:
np.sum(x+y)

## Модели

### GLPN

In [None]:
from transformers import GLPNImageProcessor, GLPNForDepthEstimation
import torch
import numpy as np
from PIL import Image
import requests

path = "/kaggle/input/nyuv2-official-split-dataset/train/bathroom_0007/rgb_00001.png"
image = Image.open(path)

processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu")

# prepare image for the model
inputs = processor(images=image, return_tensors="pt")


outputs = model(**inputs)
predicted_depth = outputs.predicted_depth.cpu().detach().numpy()



In [None]:
import cv2
def image2depth(path):
    depth = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    depth = depth.astype('float32')
    depth /= (2**16 - 1)
    depth *= 10.0
    return depth

In [None]:
def glpn_cuda_inference(pathes, model, device='cuda'):
    images = [Image.open(path) for path in pathes]
    inputs = processor(images=images, return_tensors="pt")
    inputs = inputs.to(device)
    
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth
    return predicted_depth


In [None]:
pred = glpn_cuda_inference(["/kaggle/working/rgb_00766.png"], model)
plt.imshow(pred.cpu().numpy()[0])

In [None]:
plt.imshow(np.asarray(Image.open("/kaggle/input/nyuv2-official-split-dataset/test/official/depth_00766.png")))

## Оценка

In [None]:
DEVICE='cuda'
from tqdm import tqdm
from random import choice

class CarvanaDataset(Dataset):
    def __init__(self, dn):
        self.test_root = "/kaggle/input/nyuv2-official-split-dataset/test/official"
        self.out_root = "/kaggle/working"
        self.dn = dn
        self.init_path()


    def image2depth(self, path):
        depth = cv2.imread(path, cv2.IMREAD_UNCHANGED)
        depth = depth.astype('float32')
        depth /= (2**16 - 1)
        depth *= 10.0
        return depth

    def init_path(self):
        self.images = list()
        for f in os.listdir(self.test_root):
            if "rgb" in f:
                self.images.append(f)
                    
    def __len__(self):
        return len(self.images)
        

    def __getitem__(self, index):
        path = os.path.join(self.test_root, self.images[index])
        img_path = path
        if self.dn:
            new_path_filename = img_path.split("/")[-1]
            noise_img_path = os.path.join(self.out_root, new_path_filename)
            make_noise(img_path, noise_img_path, self.dn)
            img_path = noise_img_path
        
        depth_path = path.replace("rgb", "depth")
        depth = self.image2depth(depth_path)
        
        return  depth, img_path, depth_path

def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
    print("=> Saving checkpoint")
    torch.save(state, filename)

def train_fn(loader, model, optimizer, loss_fn, scaler):
    loop = tqdm(loader)

    for batch_idx, (depth, img_path, depth_path) in enumerate(loop):
        targets = depth.float().to(device=DEVICE)

        # forward
        with torch.cuda.amp.autocast():
            predictions = depth_anything_cuda_inference(img_path, model)
            loss = glpn_loss_fn(predictions, targets)
            loss = torch.mean(loss)

        # backward
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # update tqdm loop
        loop.set_postfix(loss=loss.item())
    return loss.item()

def write_demo(preds, pathes, dir_root):
    for pred, path in zip(preds, pathes):
        path = os.path.join(dir_root, path.split("/")[-1])
        pred = pred * 255 / pred.max()
        pred = pred.astype(np.uint8)
        img = Image.fromarray(pred)
        img.save(path)


def check_acc(loader, model, dir_root, device="cuda"):
    num_correct = 0
    num_pixels = 0
    dice_score = 0
    model.eval()
    metrics = {}
    with torch.no_grad():
        for depth, img_path, depth_path in tqdm(loader):
            
            preds = glpn_cuda_inference(img_path, model)

            preds = preds.cpu().detach().numpy()
            write_demo(preds, img_path, dir_root)
            y = depth.cpu().detach().numpy()

            cur = get_metrics(preds, y)
            for k, v in cur.items():
                if k in metrics.keys():
                    metrics[k].append(v)
                else:
                    metrics[k] = [v]
    for k, v in metrics.items():
        metrics[k] = sum(metrics[k]) / 654

    model.train()
    return metrics


def get_loader(
        batch_size,
        dn,
        num_workers=1,
        pin_memory=True
):
    dataset = CarvanaDataset(
        dn,
    )

    loader = DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=pin_memory,
        shuffle=True,
    )

    return loader


def load_weights(model, weights_path):
    """
    Загружает веса модели
    """
    weights = torch.load(weights_path)
    model.load_state_dict(weights['state_dict'])

def get_device():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    return device

def get_loaders(
        batch_size,
        transform_x,
        transform_y,
        num_workers=4,
        pin_memory=True,
):
    train_loader = get_loader(batch_size, "train", transform_x, transform_y, num_workers,pin_memory)
    val_loader = get_loader(batch_size, "test", transform_x, transform_y, num_workers,pin_memory)

    return train_loader, val_loader


def overlay_mask(image_dir_path, name, mask_true, mask_predict):

    alpha = 0.7
    mask_true, mask_predict = mask_true[0].cpu().numpy(), mask_predict[0].cpu().numpy()

    height, width = mask_true.shape[0], mask_true.shape[1]
    image = cv2.imread(os.path.join(image_dir_path, name))

    image = cv2.resize(image, (width, height))
    result = np.zeros((height, width * 2, 3))

    result[:, :width, :] = image
    result[:, width:, :] = image

    result[:, :width, 1] = image[:, :, 1] * alpha + mask_true * (1 - alpha) * 255
    result[:, width:, 1] = image[:, :, 1] * alpha + mask_predict * (1 - alpha) * 255

    return result




In [None]:
import os, shutil

def eval_model(model, model_name, batch_size=4):
    demo_root = "/kaggle/working"
    model = model.to("cuda")
    all_metrics = list()

    demo_dir = os.path.join(demo_root, model_name)
    if os.path.exists(demo_dir):
        shutil.rmtree(demo_dir)
    os.mkdir(demo_dir)
    for dn in range(0, 11):
        print("Noise " + str(dn) + " processing...")
        loader = get_loader(batch_size, dn)

        noise_dir = os.path.join(demo_dir, str(dn))
        os.mkdir(noise_dir)
        
        metrics = check_acc(loader, model, noise_dir)
        print(metrics)
        all_metrics.append(metrics)
    return all_metrics

In [None]:
eval_model(model, "GLPN")

In [None]:
os.mkdir("/kaggle/working/GLPN")

In [None]:
len(metrics)

In [None]:
import json

with open("/kaggle/working/GLPN/metrics.json", "w") as f:
    json.dump(metrics, f)

In [None]:
!rm /kaggle/working/rgb_01196.png

In [None]:
from IPython.display import FileLink
FileLink(r'/kaggle/working/glpn.zip')

In [None]:
import os
os.path.exists("/kaggle/working/glpn.zip")

In [None]:
!zip -r glpn.zip /kaggle/working/GLPN

In [None]:
!ls /kaggle/working

In [None]:
metrics = [{'rmse': 0.4814945667935402,
  'mae': 0.3206941941170881,
  'mre': 0.12280936993590187,
  'gradient': 0.22522862391157691,
  'rank': 3.168125129411955,
  'census': 7.754340118501523,
  'delta1': 0.8665908412175075,
  'delta2': 0.9709242601618244,
  'delta3': 0.9909827444094043},
 {'rmse': 0.5275929915385921,
  'mae': 0.3598546702611458,
  'mre': 0.1379353528698407,
  'gradient': 0.23750153419198558,
  'rank': 3.474044611286314,
  'census': 8.505195252771408,
  'delta1': 0.8301682753567787,
  'delta2': 0.9615586723926478,
  'delta3': 0.9887387262280197},
 {'rmse': 0.5377926300181683,
  'mae': 0.367930374388544,
  'mre': 0.14073925063029225,
  'gradient': 0.23871184553152233,
  'rank': 3.4880335306367845,
  'census': 8.577604002413036,
  'delta1': 0.8230637582823643,
  'delta2': 0.9587884502819188,
  'delta3': 0.9881602896438586},
 {'rmse': 0.5544007623581283,
  'mae': 0.38123224451879717,
  'mre': 0.1455959778045903,
  'gradient': 0.24045466460292766,
  'rank': 3.499783966655515,
  'census': 8.657543875629141,
  'delta1': 0.8099954357399975,
  'delta2': 0.953052131116208,
  'delta3': 0.9862977828746178},
 {'rmse': 0.5719793274779729,
  'mae': 0.3950834902006064,
  'mre': 0.14992653350460658,
  'gradient': 0.24201325792359007,
  'rank': 3.5107412169103616,
  'census': 8.743372360991659,
  'delta1': 0.7954188218734076,
  'delta2': 0.9476947898748083,
  'delta3': 0.9857629431861622},
 {'rmse': 0.5917600336299903,
  'mae': 0.4107609790642569,
  'mre': 0.15522922813345522,
  'gradient': 0.24378511090153396,
  'rank': 3.520897765553325,
  'census': 8.832286933175649,
  'delta1': 0.7823839722301861,
  'delta2': 0.9435883445623087,
  'delta3': 0.9833232839975157},
 {'rmse': 0.6049998643679008,
  'mae': 0.42264275757909187,
  'mre': 0.15952416215346368,
  'gradient': 0.24530417233800264,
  'rank': 3.5302816601363407,
  'census': 8.915712960308362,
  'delta1': 0.770624780995158,
  'delta2': 0.938203563009684,
  'delta3': 0.9820106087936422},
 {'rmse': 0.6281002518102758,
  'mae': 0.44030902300648966,
  'mre': 0.1656248971810764,
  'gradient': 0.2470700483328357,
  'rank': 3.5396002862990583,
  'census': 8.995570736573008,
  'delta1': 0.7571966086104741,
  'delta2': 0.929031361493374,
  'delta3': 0.9784937245158001},
 {'rmse': 0.6451089990249491,
  'mae': 0.45517374847520997,
  'mre': 0.17064408669777964,
  'gradient': 0.24900534846005584,
  'rank': 3.5495331563376014,
  'census': 9.068534384755665,
  'delta1': 0.742680748678007,
  'delta2': 0.9222271348990191,
  'delta3': 0.9760562802624868},
 {'rmse': 0.6628112682641608,
  'mae': 0.4711292466329874,
  'mre': 0.1770529132953842,
  'gradient': 0.2501698531528833,
  'rank': 3.556274593248279,
  'census': 9.140041824947438,
  'delta1': 0.7283142201834861,
  'delta2': 0.9141798119345693,
  'delta3': 0.9737691579462916},
 {'rmse': 0.6797625624358083,
  'mae': 0.4842705799145558,
  'mre': 0.18058565123244044,
  'gradient': 0.2517166397587941,
  'rank': 3.5677298157173802,
  'census': 9.223527630447249,
  'delta1': 0.7175993286506119,
  'delta2': 0.9107334223289372,
  'delta3': 0.9710204032874621}]

In [None]:
!rm -r /kaggle/working

In [None]:
model = model.to("cuda")
loader = get_loader(4, 10)
metrics = check_acc(loader, model)


In [None]:
metrics # dn = 0

In [None]:
metrics # dn = 2

In [None]:
metrics # dn = 5

In [None]:
metrics # dn = 10

## Шумы

In [None]:
import numpy as np
from PIL import Image

def adu2photons(image, qe=0.69, sensitivity=5.88):
    return image / (qe * sensitivity + 1e-7)

def add_camera_noise(input_irrad_photons, qe=0.69, sensitivity=5.88,
                     dark_noise=2.29, bitdepth=8, baseline=100,
                     rs=np.random.RandomState(seed=42)):
 
    # Add shot noise
    photons = rs.poisson(input_irrad_photons, size=input_irrad_photons.shape)
    
    # Convert to electrons
    electrons = qe * photons
    
    # Add dark noise
    electrons_out = rs.normal(scale=dark_noise, size=electrons.shape) + electrons
    
    # Convert to ADU and add baseline
    max_adu     = 2**bitdepth - 1
    adu         = (electrons_out * sensitivity).astype(int) # Convert to discrete numbers
    adu = np.clip(adu, 0, max_adu)
    
    return adu

In [None]:
import matplotlib.pyplot as plt

filename = "/kaggle/input/nyuv2-official-split-dataset/test/official/rgb_00017.png"
image = Image.open(filename)
image = np.asarray(image)
plt.imshow(image)



In [None]:
photons = adu2photons(image)
noised = add_camera_noise(photons, dark_noise=5)
plt.imshow(noised)

In [None]:
def make_noise(path_in, path_out, dn):
    image = Image.open(path_in)
    photons = adu2photons(np.asarray(image))
    noised = add_camera_noise(photons, dark_noise=dn)

    out_image = Image.fromarray(noised.astype(np.uint8))
    out_image.save(path_out)

In [None]:
make_noise(filename, "/kaggle/working/result.png", 5)

In [None]:

# Константы
IMAGE_HEIGHT = 480
IMAGE_WIDTH = 640
PIN_MEMORY = True
NUM_WORKERS = 1
NUM_EPOCHS = 20
BATCH_SIZE = 4
LEARNING_RATE = 7e-5
TRAIN_CSV, VAL_CSV = "/kaggle/input/nyu-resized/content/nyu_data/data/nyu2_train.csv", "/kaggle/input/nyu-resized/content/nyu_data/data/nyu2_test.csv"
DEVICE='cuda'
WEIGHTS_DIR = "/kaggle/working"

# Функция потерь
def loss_fn(predictions, targets):
    
    targets = targets.reshape((targets.shape[0], targets.shape[2], targets.shape[3]))
    
    
    batch_size = predictions.size()[0]
    H, W = predictions.size()[1], predictions.size()[2]
    
    max_depth = torch.max(predictions, dim=1)
    max_depth = torch.max(max_depth.values, dim=1)
    
    reshaped = max_depth.values.reshape((batch_size, 1, 1))
    
    targets_flattened = torch.flatten(targets, start_dim=1)
    t_d = torch.median(targets_flattened, dim=1).values
    
    diff = torch.abs(targets - t_d.reshape((batch_size, 1, 1))).sum(1).sum(1)
    s_d = diff / (H * W)

    x_norm, y_norm = (predictions - t_d.reshape((batch_size, 1, 1))) / (s_d.reshape((batch_size, 1, 1)) + 1e-6), (targets - t_d.reshape((batch_size, 1, 1))) / (s_d.reshape((batch_size, 1, 1)) + 1e-6)
    diff = torch.abs(x_norm - y_norm).sum(1).sum(1)
    result = diff / (H * W)
    return result

def glpn_loss_fn(pred, targets):
    n = 640 * 480
    print(pred)
    print(targets)
    pred_log, targets_log = torch.log(pred), torch.log(targets)
    pred_log[pred_log == float("Inf")] = 0
    targets_log
    d_diff = (torch.log(pred) - torch.log(targets))
    #print(d_diff)
    return (d_diff ** 2).sum(-1).sum(-1) / n - d_diff.sum(-1).sum(-1) ** 2 / (2 * n ** 2)


# Тренировка модели

def train_model(model):
    model = model.to(DEVICE)
    transform = A.Compose(
        [
            ToTensorV2(),
        ],
        is_check_shapes=False
    )

    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)

    train_loader, val_loader = get_loaders(
        BATCH_SIZE,
        transform,
        transform,
        NUM_WORKERS,
        PIN_MEMORY,
    )


    
    scaler = torch.cuda.amp.GradScaler()
    metrics_list = []
    loss_list = []
    GPU_mem_list = []
    checkpoint = {}
    max_mae = 1000

    for epoch in range(NUM_EPOCHS):
        print(f"Training epoch {epoch}")

        # обучаем, получаем метрику потерь, вычисляем точность (mIou(50%))
        loss = train_fn(train_loader, model, optimizer, loss_fn, scaler)
        loss_list.append(float(loss))
        print("Loss value: ", float(loss))
        
        if (epoch + 1) % 5 == 0:
            new_acc = check_acc(val_loader, model, device=DEVICE)
            metrics_list.append((epoch+1, new_acc))
            print("Metrics: ", new_acc)

            if new_acc['mae'] < max_mae:
                max_mae = new_acc['mae']
                # save model
                checkpoint = {
                    "state_dict": model.state_dict(),
                    "optimizer":optimizer.state_dict(),
                }
                log_name  = os.path.join(WEIGHTS_DIR, "yoloX_MSE_loss__checkpoint.pth.tar")
                save_checkpoint(checkpoint, filename=log_name)

    return metrics_list, loss_list