In [1]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt

from tqdm import tqdm

In [2]:
dataset_dir = '../dataset/'

In [3]:
image_names = os.listdir(os.path.join(dataset_dir, 'train', 'images'))
mask_names = os.listdir(os.path.join(dataset_dir, 'train', 'masks'))
assert set(image_names) == set(mask_names), 'Image names do not match mask names'
names = [name for name in image_names if name.endswith(('.jpg', '.JPG', '.png'))]

In [4]:
image_shapes = {}
pixel_values = {}
for name in tqdm(names):
    img = cv2.imread(os.path.join(dataset_dir, 'train', 'images', name))
    mask = cv2.imread(os.path.join(dataset_dir, 'train', 'masks', name))
    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    
    assert img.shape[:2] == mask.shape[:2], 'Image shape {} does not match mask shape {}'.format(img.shape, mask.shape)
    
    # Get image shape distribution
    if img.shape not in image_shapes:
        image_shapes[img.shape] = 1
    else:
        image_shapes[img.shape] += 1
    
    # Get pixel value distribution
    uniques, counts = np.unique(mask, return_counts=True)
    for pixel_value, count in zip(uniques, counts):
        if pixel_value not in pixel_values:
            pixel_values[pixel_value] = count
        else:
            pixel_values[pixel_value] += count

100%|██████████| 398/398 [00:13<00:00, 28.95it/s]


In [5]:
for shape, count in image_shapes.items():
    print('Shape: {:6} {:8} | Aspect ratio: {:10.2f} | Count: {:5} | To square: {}'.format(shape[0], shape[1], shape[1] / shape[0], count, max(shape) - min(shape[:2])))

Shape:    480      640 | Aspect ratio:       1.33 | Count:   131 | To square: 160
Shape:    256      456 | Aspect ratio:       1.78 | Count:    72 | To square: 200
Shape:    720     1280 | Aspect ratio:       1.78 | Count:    84 | To square: 560
Shape:    612      816 | Aspect ratio:       1.33 | Count:    40 | To square: 204
Shape:   2448     3264 | Aspect ratio:       1.33 | Count:    17 | To square: 816
Shape:   1920     1080 | Aspect ratio:       0.56 | Count:    25 | To square: 840
Shape:   2432     4320 | Aspect ratio:       1.78 | Count:    10 | To square: 1888
Shape:   2048     2048 | Aspect ratio:       1.00 | Count:     4 | To square: 0
Shape:    750     2048 | Aspect ratio:       2.73 | Count:    15 | To square: 1298


In [6]:
total_pixels = 0
values_list = []
for pixel_value, count in pixel_values.items():
    total_pixels += count
    values_list.append(pixel_value)

values_list.sort()
for pixel_value in values_list:
    print('{:3} - {:.4f}%'.format(pixel_value, pixel_values[pixel_value] / total_pixels * 100))

  0 - 91.1513%
  1 - 0.0265%
  2 - 0.0166%
  3 - 0.0079%
  4 - 0.0031%
  5 - 0.0013%
  6 - 0.0003%
  7 - 0.0002%
  8 - 0.0000%
 10 - 0.0000%
120 - 0.0000%
121 - 0.0002%
122 - 0.0003%
123 - 0.0012%
124 - 0.0028%
125 - 0.0072%
126 - 0.0151%
127 - 0.0240%
128 - 6.7967%
129 - 0.0251%
130 - 0.0148%
131 - 0.0079%
132 - 0.0031%
133 - 0.0011%
134 - 0.0002%
135 - 0.0000%
136 - 0.0000%
137 - 0.0000%
170 - 0.0027%
246 - 0.0000%
248 - 0.0000%
249 - 0.0000%
250 - 0.0001%
251 - 0.0001%
252 - 0.0004%
253 - 0.0008%
254 - 0.0014%
255 - 1.8875%


In [7]:
thresholded = {'background': 0.0,
              'egg': 0.0,
              'pan': 0.0
              }
for pixel_value in values_list:
    
    if pixel_value <= 64:
        thresholded['background'] += pixel_values[pixel_value] / total_pixels * 100
    elif pixel_value <= 192:
        thresholded['egg'] += pixel_values[pixel_value] / total_pixels * 100
    else:
        thresholded['pan'] += pixel_values[pixel_value] / total_pixels * 100

for cls, ratio in thresholded.items():
    print('{:3} - {:.4f}%'.format(cls, ratio))

background - 91.2072%
egg - 6.9024%
pan - 1.8904%


In [44]:
image_names = os.listdir(os.path.join(dataset_dir, 'test', 'images'))
names = [name for name in image_names if name.endswith(('.jpg', '.JPG', '.png'))]

In [45]:
image_shapes = {}
for name in tqdm(names):
    img = cv2.imread(os.path.join(dataset_dir, 'test', 'images', name))
    
    # Get image shape distribution
    if img.shape not in image_shapes:
        image_shapes[img.shape] = 1
    else:
        image_shapes[img.shape] += 1

100%|██████████| 102/102 [00:01<00:00, 72.84it/s]


In [46]:
for shape, count in image_shapes.items():
    print('Shape: {:6} {:8} | Aspect ratio: {:10.2f} | Count: {:5} | To square: {}'.format(shape[0], shape[1], shape[1] / shape[0], count, max(shape) - min(shape[:2])))

Shape:   2048     2048 | Aspect ratio:       1.00 | Count:     3 | To square: 0
Shape:    256      456 | Aspect ratio:       1.78 | Count:    28 | To square: 200
Shape:    480      640 | Aspect ratio:       1.33 | Count:    36 | To square: 160
Shape:    612      816 | Aspect ratio:       1.33 | Count:    10 | To square: 204
Shape:   2432     4320 | Aspect ratio:       1.78 | Count:     4 | To square: 1888
Shape:   1920     1080 | Aspect ratio:       0.56 | Count:     3 | To square: 840
Shape:    720     1280 | Aspect ratio:       1.78 | Count:    10 | To square: 560
Shape:   2448     3264 | Aspect ratio:       1.33 | Count:     3 | To square: 816
Shape:    750     2048 | Aspect ratio:       2.73 | Count:     5 | To square: 1298
