In [1]:
import json

def get_classes_from_coco_json(json_path):
    with open(json_path, 'r') as file:
        coco_data = json.load(file)

    categories = coco_data['categories']
    class_labels = [category['name'] for category in categories]
    
    return class_labels

# Replace 'path_to_coco_json' with the actual path to your COCO JSON file
coco_json_path = '/home/yaniv.sheinfeld/data/je/annotations/instances_train2017.json'
class_labels = get_classes_from_coco_json(coco_json_path)

# Print the class labels
for label in class_labels:
    print(label)

chip
crack
grinding
contamination
dent
sticky


In [6]:
import cv2
import numpy as np
import os
from tqdm import tqdm
import concurrent.futures
import math

def calculate_dataset_std(image_folder):
    file_list = os.listdir(image_folder)
    mean_values = []
    std_values = []

    with tqdm(total=len(file_list), desc='Calculating Standard Deviation') as pbar:
        for file_name in file_list:
            image_path = os.path.join(image_folder, file_name)
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            #image = image.astype(float) / 255.0  # Normalize pixel values to [0, 1]

            mean = np.mean(image)
            std = np.std(image)

            mean_values.append(mean)
            std_values.append(std)

            pbar.update(1)

    mean_values = np.array(mean_values)
    std_values = np.array(std_values)

    mean = np.mean(mean_values)
    std = np.mean(std_values)

    return std

print(calculate_dataset_std('/home/yaniv.sheinfeld/data/je/train2017/'))

Calculating Standard Deviation: 100%|██████████| 13053/13053 [03:06<00:00, 70.10it/s]

42.92271917776942





In [9]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool

def read_and_get_mean_and_std(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    return np.mean(img), np.std(img)

def calc_mean(imgs_path, num_processes=8):
    img_list = [os.path.join(imgs_path, img) for img in os.listdir(imgs_path)]
    with Pool(num_processes) as p:
        results = list(tqdm(p.imap(read_and_get_mean_and_std, img_list), total=len(img_list), desc="Calculating mean"))
    # Filter out any failures from imread
    means,stds = list(zip(*[result for result in results if result is not None]))
    if not means:
        raise ValueError("No images could be read from the directory")
    means = np.array(means)
    stds = np.array(stds)
    return np.mean(means), np.mean(stds)

imgs_path = '/home/yaniv.sheinfeld/data/je/train2017'
mean, std = calc_mean(imgs_path)
print(mean, std)


Calculating mean: 100%|██████████| 13053/13053 [00:50<00:00, 257.78it/s]

64.34243205922937 42.92271917776942



