In [None]:
import os
import cv2
import src.preprocessing as ppc
from tqdm.notebook import tqdm
import json
import pandas as pd

# Config

In [None]:
TEST_SET = True

if TEST_SET:
    OUT_DIR_PREFIX = 'test_'
    CFP_DIR = './data/test_cfp'
    IS_SHUFFLED = False
    YOLO_DIR = "data/labels_test_finalModel"
    IMG_INFO_CSV = './data/test_shuffled_square_png_640_1/img_info.csv'
else:
    OUT_DIR_PREFIX = ''
    CFP_DIR = './data/cfp'
    IS_SHUFFLED = True
    YOLO_DIR = "data/labels_train_finalModel"
    IMG_INFO_CSV = './data/shuffled_square_png_640_1/img_info.csv'

# change this
MULTIPROCESSING_WORKERS = 8
USE_FOV_DIST = True  # if False, then uses the OD disk diameter as a basis for the crop size
MAX_OD_DIAMETER_PROP = 2.0
MIN_FOVEA_CONF = 0.75

# Don't change this unless you know exactly what you are doing
CROP_ENLARGMENT_FACTOR = 2**0.5 * 1.01  # so that a rotation of the area of interest will not show an artificial border
MAX_FOV_DIST_PROP = MAX_OD_DIAMETER_PROP * 1.92/((4.5**2+0.65**2)**0.5)  # ratio between average OD diameter and average distance between fovea center and OD center https://en.wikipedia.org/wiki/Fovea_centralis#/media/File:Macula.svg

# Implementation

In [None]:
if IS_SHUFFLED:
    with open('original_shuffled_map.json') as fp:
        original_shuffled_map = json.load(fp)

    shuffled_orig_map = {}
    for k, v in original_shuffled_map.items():
        shuffled_orig_map[v] = k

    shuffled_orig_map

In [None]:
records = []
for filename in tqdm(os.listdir(YOLO_DIR)):
    key = filename[:-4]
    if IS_SHUFFLED:
        key = shuffled_orig_map[key]
    with open(f"{YOLO_DIR}/{filename}", "r") as f:
        odc = [None] * 5
        fovea = [None] * 5
        for line in f.readlines():
            pred = [float(number) for number in line.split()]
            if pred[0]==0:
                odc=pred[1:]
            if pred[0]==1:
                fovea=pred[1:]

        record = [key, ]
        record.extend(odc)
        record.extend(fovea)
        records.append(tuple(record))

df_detector = pd.DataFrame.from_records(records, columns=[
    'filename',
    'odc_x_ratio', 'odc_y_ratio', 'odc_width_ratio', 'odc_height_ratio', 'odc_conf',
    'fovea_x_ratio', 'fovea_y_ratio', 'fovea_width_ratio', 'fovea_height_ratio', 'fovea_conf'])

df_detector = df_detector.set_index('filename')
df_detector

In [None]:
df_square = pd.read_csv(IMG_INFO_CSV)
df_square = df_square.drop(columns=['scaling', 'new_file', 'side']).set_index('orig_file')
df_square

In [None]:
if USE_FOV_DIST:
    OUT_PATH = f'data/{OUT_DIR_PREFIX}cfp_od_crop_FOV_f{MAX_OD_DIAMETER_PROP}_conf{MIN_FOVEA_CONF}'
else:
    OUT_PATH = f'data/{OUT_DIR_PREFIX}cfp_od_crop_OD_f{MAX_OD_DIAMETER_PROP}'

if not os.path.isdir(OUT_PATH):
    print(f'{OUT_PATH} does not exist, creating dir')
    os.mkdir(OUT_PATH)

In [None]:
from multiprocessing import Pool
import math

def _process_file(filename):
    try:
        file = f"{CFP_DIR}/{filename}"
        img = cv2.imread(file)
        sq_info = df_square.loc[filename]
        detect_info = df_detector.loc[filename]

        if math.isnan(detect_info.odc_x_ratio + detect_info.odc_y_ratio):
            print(f"No OD for {filename}")
            return

        if USE_FOV_DIST:
            if not math.isnan(detect_info.fovea_x_ratio + detect_info.fovea_y_ratio) and detect_info.fovea_conf >= MIN_FOVEA_CONF:
                x2, x1 = detect_info.fovea_x_ratio, detect_info.odc_x_ratio
                y2, y1 = detect_info.fovea_y_ratio, detect_info.odc_y_ratio
                # distance between the fovea and OD center
                side = ((x2 - x1)**2 + (y2 - y1)**2)**0.5
                side = side * MAX_FOV_DIST_PROP * sq_info.orig_crop_side
            else:
                print(f"No FOV (conf {detect_info.fovea_conf} < {MIN_FOVEA_CONF}) for {filename}")
                return
        else:
            side = (detect_info.odc_width_ratio + detect_info.odc_height_ratio)/2
            side = side * MAX_OD_DIAMETER_PROP * sq_info.orig_crop_side
        side = int(side * CROP_ENLARGMENT_FACTOR)

        odc_x = int(round(sq_info.orig_crop_side * detect_info.odc_x_ratio - sq_info.delta_y))
        odc_y = int(round(sq_info.orig_crop_side * detect_info.odc_y_ratio - sq_info.delta_x))

        # add_top = (img.shape[0] - sq_info.orig_crop_side)/2
        # add_left = (img.shape[1] - sq_info.orig_crop_side)/2
        # odc_x = int(round(sq_info.orig_crop_side * detect_info.odc_x_ratio + add_left))
        # odc_y = int(round(sq_info.orig_crop_side * detect_info.odc_y_ratio + add_top))

        #print(detect_info*sq_info.orig_crop_side, sq_info)
        #print(odc_x, odc_y, int(side * CROP_ENLARGMENT_FACTOR))
        square_img = ppc.crop_od_fill_if_needed(img, odc_x, odc_y, side)

        cv2.imwrite(f"{OUT_PATH}/{filename[:-4]}.png", square_img)

    except Exception as e:
        print('error ', filename, e)
        raise
        
l_files = sorted(os.listdir(CFP_DIR))

In [None]:
if MULTIPROCESSING_WORKERS > 1:
    with Pool(MULTIPROCESSING_WORKERS) as pool:
        _ = list(tqdm(pool.imap(_process_file, l_files), total=len(l_files)))
else:
    for f in tqdm(l_files):
        _process_file(f)

print('Finished.')