# Calculate mean and std on image dataset

In [4]:
import numpy as np
from os import listdir
from os.path import join, isdir
from glob import glob
import cv2
from tqdm import tqdm 

CHANNEL_NUM = 3

def cal_dir_stat(root):
    cls_dirs = [d for d in listdir(root) if isdir(join(root, d))]
    pixel_num = 0 # store all pixel number in the dataset
    channel_sum = np.zeros(CHANNEL_NUM)
    channel_sum_squared = np.zeros(CHANNEL_NUM)

    for idx, d in enumerate(cls_dirs):
        im_pths = glob(join(root, d, "*.jpg"))
        for path in tqdm(im_pths):
            im = cv2.imread(path) # image in M*N*CHANNEL_NUM shape, channel in BGR order
            im = im/255.0
            pixel_num += (im.size/CHANNEL_NUM)
            channel_sum += np.sum(im, axis=(0, 1))
            channel_sum_squared += np.sum(np.square(im), axis=(0, 1))

    bgr_mean = channel_sum / pixel_num
    bgr_std = np.sqrt(channel_sum_squared / pixel_num - np.square(bgr_mean))
    
    # change the format from bgr to rgb
    rgb_mean = list(bgr_mean)[::-1]
    rgb_std = list(bgr_std)[::-1]
    
    return rgb_mean, rgb_std, channel_sum, channel_sum_squared, pixel_num

# The script assumes that under train_root, there are separate directories for each class
# of training images.


## Calculate mean and std on train data

In [5]:
train_root = "/home/w/Projects/Courses/made/2_term/cv/MADE/homework_1/data/train"
rgb_mean, rgb_std, channel_sum, channel_sum_squared, pixel_num = cal_dir_stat(train_root)

100%|██████████| 393930/393930 [2:08:47<00:00, 50.98it/s]  


In [6]:
print(rgb_mean, rgb_std)

[0.39986175012900305, 0.3202357665077171, 0.2829780781417735] [0.33425172239907414, 0.28654204127102256, 0.2700744122414718]


## Calculate mean and std on test data

In [None]:
test_root = "/home/w/Projects/Courses/made/2_term/cv/MADE/homework_1/data/test"
rgb_mean_test, rgb_std_test, channel_sum_test, channel_sum_squared_test, pixel_num_test = cal_dir_stat(test_root)
print(rgb_mean_test, rgb_std_test)

In [18]:
rgb_mean_all = (channel_sum + channel_sum_test) / (pixel_num + pixel_num_test)
rgb_std_all = np.sqrt((channel_sum_squared + channel_sum_squared_test) /(pixel_num + pixel_num_test)- np.square(rgb_mean_all))

In [9]:
print(rgb_mean_test, rgb_std_test)

[0.3987713047901443, 0.3188266994750084, 0.280821570740049] [0.3339860318707827, 0.286073095094479, 0.2690661347078621]


## Calculate mean and std on whole data including test set

In [17]:
print(bgr_mean_all[::-1])

[0.39963884 0.31994772 0.28253724]


In [20]:
print(rgb_std_all[::-1])

[0.33419772 0.2864468  0.26987   ]
