In [1]:
!pip install timm -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.5/2.2 MB[0m [31m15.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.2/2.2 MB[0m [31m47.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### References
[Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer](https://arxiv.org/abs/1907.01341)

[Vision Transformers for Dense Prediction](https://arxiv.org/abs/2103.13413)

Please cite our papers if you use our models:

In [4]:
import torch
model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)
#model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
#model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)

midas = torch.hub.load("intel-isl/MiDaS", model_type)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

Downloading: "https://github.com/intel-isl/MiDaS/zipball/master" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_large_384.pt" to /root/.cache/torch/hub/checkpoints/dpt_large_384.pt
100%|██████████| 1.28G/1.28G [00:08<00:00, 154MB/s] 


DPTDepthModel(
  (pretrained): Module(
    (model): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (patch_drop): Identity()
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=1024, out_features=3072, bias=True)
            (q_norm): Identity()
            (k_norm): Identity()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=1024, out_features=1024, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_featur

In [5]:
from PIL import Image
import numpy as np
def save_image(tensor, filename):
  array_data = tensor
  array_data_scaled = (array_data - np.min(array_data)) / (np.max(array_data) - np.min(array_data)) * 255
  array_data_uint8 = array_data_scaled.astype(np.uint8)
  image = Image.fromarray(array_data_uint8)
  image.save(filename)  # To save the image as a file


In [6]:
import torch
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


In [7]:
import os
import cv2
from torch.utils.data import Dataset
class load_data(Dataset):
    def __init__(self, data_dir, transform=None):
      self.data_dir = data_dir
      self.transform = transform
      self.image_paths = []
      self.labels = []

      for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith(('jpg', 'png', 'webp', 'jpeg')):
                self.image_paths.append(os.path.join(root, file))
                # Extract label from the directory name or file name as needed
                if '0_real' in root:
                  label = 0
                else:
                  label = 1

                self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]

        # Read the image
        #img = Image.open(img_path).convert("RGB")
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #img = cv2.resize(img, (512, 512))

        # Apply transformations if specified
        if self.transform is not None:
            img = self.transform(img)

        img = torch.squeeze(img, dim=0)
        return img, img_path


In [None]:
!unzip /content/drive/MyDrive/DATASETS/Common/real_gen_dataset.zip -d /content

In [9]:
data_dir = r'/content/real_gen_dataset'
dataset = load_data(data_dir = data_dir, transform = transform)


In [15]:
dataset[0][0].size()

torch.Size([3, 384, 384])

In [12]:
from torch.utils.data import DataLoader
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)


In [None]:
from tqdm import tqdm
for img, path in tqdm(dataloader):

  with torch.no_grad():
    prediction = midas(img.to('cuda'))
    prediction = torch.nn.functional.interpolate(
        prediction.unsqueeze(1),
        size=img.shape[-2:],
        mode="bicubic",
        align_corners=False,
    ).squeeze()
  new_path = path[0].replace('real_gen_dataset', r'result/real_gen_dataset')
  #print(new_path)
  #print(prediction.size())
  output = prediction.cpu().numpy()
  dir = os.path.dirname(new_path)
  os.makedirs(dir, exist_ok=True)
  save_image(output, new_path)

#output = prediction.cpu().numpy()

  9%|▊         | 381/4478 [01:04<11:04,  6.16it/s]