In [1]:
import os
import cv2
import src.preprocessing as ppc
from tqdm.notebook import tqdm
import json
import pandas as pd

# Config

In [2]:
SUBMISSION_SET = False

if SUBMISSION_SET:
    OUT_DIR_PREFIX = 'subm_'
    CFP_DIR = './data/subm_cfp'
    IS_SHUFFLED = False
    YOLO_DIR = "data/labels_test_finalModel"
    IMG_INFO_CSV = './data/subm_shuffled_square_png_640_1/img_info.csv'
else:
    OUT_DIR_PREFIX = ''
    CFP_DIR = './data/cfp'
    IS_SHUFFLED = True
    YOLO_DIR = "data/labels_train_finalModel"
    IMG_INFO_CSV = './data/shuffled_square_png_640_1/img_info.csv'

# change this
MULTIPROCESSING_WORKERS = 8
USE_FOV_DIST = True  # if False, then uses the OD disk diameter as a basis for the crop size
MAX_OD_DIAMETER_PROP = 2.0
MIN_FOVEA_CONF = 0.75

# Don't change this unless you know exactly what you are doing
CROP_ENLARGMENT_FACTOR = 2**0.5 * 1.01  # so that a rotation of the area of interest will not show an artificial border
MAX_FOV_DIST_PROP = MAX_OD_DIAMETER_PROP * 1.92/((4.5**2+0.65**2)**0.5)  # ratio between average OD diameter and average distance between fovea center and OD center https://en.wikipedia.org/wiki/Fovea_centralis#/media/File:Macula.svg

# Implementation

In [3]:
if IS_SHUFFLED:
    with open('original_shuffled_map.json') as fp:
        original_shuffled_map = json.load(fp)

    shuffled_orig_map = {}
    for k, v in original_shuffled_map.items():
        shuffled_orig_map[v] = k

    shuffled_orig_map

In [4]:
records = []
for filename in tqdm(os.listdir(YOLO_DIR)):
    key = filename[:-4]
    if IS_SHUFFLED:
        key = shuffled_orig_map[key]
    with open(f"{YOLO_DIR}/{filename}", "r") as f:
        odc = [None] * 5
        fovea = [None] * 5
        for line in f.readlines():
            pred = [float(number) for number in line.split()]
            if pred[0]==0:
                odc=pred[1:]
            if pred[0]==1:
                fovea=pred[1:]

        record = [key, ]
        record.extend(odc)
        record.extend(fovea)
        records.append(tuple(record))

df_detector = pd.DataFrame.from_records(records, columns=[
    'filename',
    'odc_x_ratio', 'odc_y_ratio', 'odc_width_ratio', 'odc_height_ratio', 'odc_conf',
    'fovea_x_ratio', 'fovea_y_ratio', 'fovea_width_ratio', 'fovea_height_ratio', 'fovea_conf'])

df_detector = df_detector.set_index('filename')
df_detector

  0%|          | 0/15000 [00:00<?, ?it/s]

Unnamed: 0_level_0,odc_x_ratio,odc_y_ratio,odc_width_ratio,odc_height_ratio,odc_conf,fovea_x_ratio,fovea_y_ratio,fovea_width_ratio,fovea_height_ratio,fovea_conf
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
DEV04766.jpg,0.792188,0.431250,0.168750,0.168750,0.945757,0.380469,0.485938,0.167187,0.165625,0.887069
DEV12882.jpg,0.514844,0.510938,0.139062,0.140625,0.927009,0.818750,0.553125,0.134375,0.140625,0.421091
DEV09597.jpg,0.751562,0.446094,0.140625,0.139062,0.916023,0.387500,0.514844,0.150000,0.151562,0.901883
DEV11890.jpg,0.234375,0.503906,0.175000,0.176563,0.928579,0.618750,0.521875,0.165625,0.171875,0.848114
DEV05139.jpg,0.175781,0.646094,0.160938,0.160938,0.928293,0.502344,0.610156,0.157813,0.157813,0.907048
...,...,...,...,...,...,...,...,...,...,...
DEV14891.jpg,0.720312,0.482031,0.175000,0.173438,0.941393,0.339844,0.525000,0.170312,0.175000,0.891026
DEV14264.jpg,0.900781,0.531250,0.185937,0.193750,0.923445,0.503906,0.554688,0.182812,0.184375,0.875659
DEV12131.jpg,0.325000,0.465625,0.178125,0.181250,0.935431,0.690625,0.528906,0.171875,0.176563,0.878764
DEV00367.jpg,0.886719,0.492969,0.179688,0.179688,0.943721,0.537500,0.423438,0.171875,0.175000,0.792016


In [5]:
df_square = pd.read_csv(IMG_INFO_CSV)
df_square = df_square.drop(columns=['scaling', 'new_file', 'side']).set_index('orig_file')
df_square

Unnamed: 0_level_0,delta_x,delta_y,orig_crop_side
orig_file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DEV13781.jpg,176,-144,2272
DEV12858.jpg,-89,-904,3150
DEV02899.jpg,-59,-685,2486
DEV12129.jpg,168,-152,2256
DEV04490.jpg,-66,-886,3166
...,...,...,...
DEV04506.jpg,-44,-905,3175
DEV12149.jpg,-45,-857,3207
DEV00409.jpg,-95,-870,2987
DEV01824.jpg,197,-127,2338


In [6]:
if USE_FOV_DIST:
    OUT_PATH = f'data/{OUT_DIR_PREFIX}cfp_od_crop_FOV_f{MAX_OD_DIAMETER_PROP}_conf{MIN_FOVEA_CONF}'
else:
    OUT_PATH = f'data/{OUT_DIR_PREFIX}cfp_od_crop_OD_f{MAX_OD_DIAMETER_PROP}'

if not os.path.isdir(OUT_PATH):
    print(f'{OUT_PATH} does not exist, creating dir')
    os.mkdir(OUT_PATH)

In [7]:
from multiprocessing import Pool
import math

df_info = pd.merge(df_square, df_detector, how='left', left_index=True, right_index=True)

def _process_file(filename):
    try:
        file = f"{CFP_DIR}/{filename}"
        img = cv2.imread(file)
        img_info = df_info.loc[filename]

        if math.isnan(img_info.odc_x_ratio + img_info.odc_y_ratio):
            print(f"No OD for {filename}")
            return

        if USE_FOV_DIST:
            if not math.isnan(img_info.fovea_x_ratio + img_info.fovea_y_ratio) and img_info.fovea_conf >= MIN_FOVEA_CONF:
                x2, x1 = img_info.fovea_x_ratio, img_info.odc_x_ratio
                y2, y1 = img_info.fovea_y_ratio, img_info.odc_y_ratio
                # distance between the fovea and OD center
                side = ((x2 - x1)**2 + (y2 - y1)**2)**0.5
                side = side * MAX_FOV_DIST_PROP * img_info.orig_crop_side
            else:
                print(f"No FOV (conf {img_info.fovea_conf} < {MIN_FOVEA_CONF}) for {filename}")
                return
        else:
            side = (img_info.odc_width_ratio + img_info.odc_height_ratio)/2
            side = side * MAX_OD_DIAMETER_PROP * img_info.orig_crop_side
        side = int(side * CROP_ENLARGMENT_FACTOR)

        odc_x = int(round(img_info.orig_crop_side * img_info.odc_x_ratio - img_info.delta_y))
        odc_y = int(round(img_info.orig_crop_side * img_info.odc_y_ratio - img_info.delta_x))

        # add_top = (img.shape[0] - sq_info.orig_crop_side)/2
        # add_left = (img.shape[1] - sq_info.orig_crop_side)/2
        # odc_x = int(round(sq_info.orig_crop_side * detect_info.odc_x_ratio + add_left))
        # odc_y = int(round(sq_info.orig_crop_side * detect_info.odc_y_ratio + add_top))

        #print(detect_info*sq_info.orig_crop_side, sq_info)
        #print(odc_x, odc_y, int(side * CROP_ENLARGMENT_FACTOR))
        square_img = ppc.crop_od_fill_if_needed(img, odc_x, odc_y, side)

        cv2.imwrite(f"{OUT_PATH}/{filename[:-4]}.png", square_img)

    except Exception as e:
        print('error ', filename, e)
        raise
        
l_files = sorted(os.listdir(CFP_DIR))

In [8]:
if MULTIPROCESSING_WORKERS > 1:
    with Pool(MULTIPROCESSING_WORKERS) as pool:
        _ = list(tqdm(pool.imap(_process_file, l_files), total=len(l_files)))
else:
    for f in tqdm(l_files):
        _process_file(f)

print('Finished.')

  0%|          | 0/15000 [00:00<?, ?it/s]

No FOV (conf 0.617985 < 0.75) for DEV00011.jpg
No FOV (conf 0.744447 < 0.75) for DEV00012.jpg
No FOV (conf 0.436658 < 0.75) for DEV00021.jpg
No FOV (conf 0.720483 < 0.75) for DEV00032.jpg
No FOV (conf 0.688288 < 0.75) for DEV00046.jpg
No FOV (conf nan < 0.75) for DEV00077.jpg
No FOV (conf 0.662897 < 0.75) for DEV00086.jpg
No FOV (conf 0.704871 < 0.75) for DEV00089.jpg
No FOV (conf nan < 0.75) for DEV00104.jpg
No FOV (conf 0.326223 < 0.75) for DEV00113.jpg
No FOV (conf 0.719752 < 0.75) for DEV00114.jpg
No FOV (conf 0.724608 < 0.75) for DEV00117.jpg
No FOV (conf 0.707861 < 0.75) for DEV00116.jpg
No FOV (conf 0.627108 < 0.75) for DEV00123.jpg
No FOV (conf 0.664482 < 0.75) for DEV00120.jpg
No FOV (conf 0.742995 < 0.75) for DEV00127.jpg
No FOV (conf 0.595564 < 0.75) for DEV00134.jpg
No FOV (conf 0.409111 < 0.75) for DEV00152.jpg
No FOV (conf 0.633094 < 0.75) for DEV00161.jpg
No FOV (conf 0.688142 < 0.75) for DEV00170.jpg
No FOV (conf 0.701932 < 0.75) for DEV00171.jpg
No FOV (conf nan < 0.75