In [1]:
import os
import cv2

import src.preprocessing as ppc
from tqdm.notebook import tqdm
import json
import pandas as pd

In [57]:
IS_SHUFFLED = True

if IS_SHUFFLED:
    with open('original_shuffled_map.json') as fp:
        original_shuffled_map = json.load(fp)

    shuffled_orig_map = {}
    for k, v in original_shuffled_map.items():
        shuffled_orig_map[v] = k

    shuffled_orig_map

In [3]:
cls_labels = pd.read_csv('data/dev_labels.csv')
cls_labels['filename'] = cls_labels['aimi_id']+'.jpg'
cls_labels['label_num'] = (cls_labels['class']=='RG').astype(int)
cls_labels = cls_labels.drop(columns=['aimi_id', 'class'])
cls_labels = cls_labels.set_index('filename')
cls_labels

Unnamed: 0_level_0,label_num
filename,Unnamed: 1_level_1
DEV00000.jpg,0
DEV00001.jpg,1
DEV00002.jpg,0
DEV00003.jpg,0
DEV00004.jpg,0
...,...
DEV14995.jpg,0
DEV14996.jpg,1
DEV14997.jpg,0
DEV14998.jpg,0


In [27]:
YOLO_DIR = "data/labels"
records = []
for filename in tqdm(os.listdir(YOLO_DIR)):
    key = filename[:-4]
    if IS_SHUFFLED:
        key = shuffled_orig_map[key]
    with open(f"data/labels/{filename}", "r") as f:
        odc = [None] * 5
        fovea = [None] * 5
        for line in f.readlines():
            pred = [float(number) for number in line.split()]
            if pred[0]==0:
                odc=pred[1:]
            if pred[0]==1:
                fovea=pred[1:]

        record = [key, ]
        record.extend(odc)
        record.extend(fovea)
        records.append(tuple(record))

df_to_join = pd.DataFrame.from_records(records, columns=[
    'filename',
    'odc_x_ratio', 'odc_y_ratio', 'odc_width_ratio', 'odc_height_ratio', 'odc_conf',
    'fovea_x_ratio', 'fovea_y_ratio', 'fovea_width_ratio', 'fovea_height_ratio', 'fovea_conf'])

max_fovea = df_to_join.fovea_conf.max()
max_odx = df_to_join.odc_conf.max()
df_detector = pd.merge(cls_labels, df_to_join, how='left', on='filename')
df_detector = df_detector.set_index('filename')
df_detector

  0%|          | 0/14999 [00:00<?, ?it/s]

Unnamed: 0_level_0,label_num,odc_x_ratio,odc_y_ratio,odc_width_ratio,odc_height_ratio,odc_conf,fovea_x_ratio,fovea_y_ratio,fovea_width_ratio,fovea_height_ratio,fovea_conf
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
DEV00000.jpg,0,0.275000,0.455469,0.190625,0.189063,0.857565,0.628125,0.503906,0.193750,0.195312,0.829064
DEV00001.jpg,1,0.726562,0.455469,0.162500,0.154687,0.881377,0.336719,0.519531,0.167187,0.170312,0.755090
DEV00002.jpg,0,0.318750,0.448438,0.153125,0.156250,0.899614,0.610156,0.501562,0.154687,0.159375,0.828746
DEV00003.jpg,0,0.292188,0.517969,0.134375,0.142188,0.898935,0.650000,0.467187,0.140625,0.143750,0.818837
DEV00004.jpg,0,0.346094,0.474219,0.167187,0.164062,0.885468,0.688281,0.533594,0.164062,0.167187,0.837267
...,...,...,...,...,...,...,...,...,...,...,...
DEV14995.jpg,0,0.738281,0.496094,0.198437,0.198437,0.883838,0.329688,0.522656,0.196875,0.204688,0.828903
DEV14996.jpg,1,0.687500,0.551562,0.140625,0.146875,0.907235,0.338281,0.464063,0.145313,0.150000,0.535397
DEV14997.jpg,0,0.118750,0.460938,0.187500,0.187500,0.890861,0.502344,0.474219,0.185937,0.189063,0.830783
DEV14998.jpg,0,0.713281,0.431250,0.160938,0.162500,0.898426,0.338281,0.528125,0.157813,0.159375,0.800794


In [5]:
IMG_INFO_CSV = './data/shuffled_square_png_640/img_info.csv'
df_square = pd.read_csv(IMG_INFO_CSV)
df_square = df_square.drop(columns=['scaling', 'new_file', 'side']).set_index('orig_file')
df_square

Unnamed: 0_level_0,delta_x,delta_y,orig_crop_side
orig_file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DEV13781.jpg,176,-144,2272
DEV12858.jpg,-89,-904,3150
DEV02899.jpg,-59,-685,2486
DEV12129.jpg,168,-152,2256
DEV04490.jpg,-66,-886,3166
...,...,...,...
DEV04506.jpg,-44,-905,3175
DEV12149.jpg,-45,-857,3207
DEV00409.jpg,-95,-870,2987
DEV01824.jpg,197,-127,2338


In [61]:
# change this
MULTIPROCESSING_WORKERS = 8
USE_FOV_DIST = True  # if False, then uses the OD disk diameter as a basis for the crop size
MAX_OD_DIAMETER_PROP = 2.0
MIN_FOVEA_CONF = 0.75

# Don't change this
CROP_ENLARGMENT_FACTOR = 2**0.5 * 1.01  # so that a rotation of the area of interest will not show an artificial border
MAX_FOV_DIST_PROP = MAX_OD_DIAMETER_PROP * 1.92/((4.5**2+0.65**2)**0.5)  # ratio between average OD diameter and average distance between fovea center and OD center https://en.wikipedia.org/wiki/Fovea_centralis#/media/File:Macula.svg


if USE_FOV_DIST:
    OUT_PATH = f'data/cfp_od_crop_FOV_f{MAX_OD_DIAMETER_PROP}_conf{MIN_FOVEA_CONF}'
else:
    OUT_PATH = f'data/cfp_od_crop_OD_f{MAX_OD_DIAMETER_PROP}'

if not os.path.isdir(OUT_PATH):
    print(f'{OUT_PATH} does not exist, creating dir')
    os.mkdir(OUT_PATH)

data/cfp_od_crop_FOV_f2.0_conf0.75 does not exist, creating dir


In [62]:
from multiprocessing import Pool
import math

def _process_file(filename):
    try:
        file = f"data/cfp/{filename}"
        img = cv2.imread(file)
        sq_info = df_square.loc[filename]
        detect_info = df_detector.loc[filename]

        if math.isnan(detect_info.odc_x_ratio + detect_info.odc_y_ratio):
            print(f"No OD for {filename}")
            return

        if USE_FOV_DIST:
            if not math.isnan(detect_info.fovea_x_ratio + detect_info.fovea_y_ratio) and detect_info.fovea_conf >= MIN_FOVEA_CONF:
                x2, x1 = detect_info.fovea_x_ratio, detect_info.odc_x_ratio
                y2, y1 = detect_info.fovea_y_ratio, detect_info.odc_y_ratio
                # distance between the fovea and OD center
                side = ((x2 - x1)**2 + (y2 - y1)**2)**0.5
                side = side * MAX_FOV_DIST_PROP * sq_info.orig_crop_side
            else:
                print(f"No FOV (conf {detect_info.fovea_conf} < {MIN_FOVEA_CONF}) for {filename}")
                return
        else:
            side = max(detect_info.odc_width_ratio, detect_info.odc_height_ratio)
            side = side * MAX_OD_DIAMETER_PROP * sq_info.orig_crop_side
        side = int(side * CROP_ENLARGMENT_FACTOR)

        odc_x = int(round(sq_info.orig_crop_side * detect_info.odc_x_ratio - sq_info.delta_y))
        odc_y = int(round(sq_info.orig_crop_side * detect_info.odc_y_ratio - sq_info.delta_x))

        # add_top = (img.shape[0] - sq_info.orig_crop_side)/2
        # add_left = (img.shape[1] - sq_info.orig_crop_side)/2
        # odc_x = int(round(sq_info.orig_crop_side * detect_info.odc_x_ratio + add_left))
        # odc_y = int(round(sq_info.orig_crop_side * detect_info.odc_y_ratio + add_top))

        #print(detect_info*sq_info.orig_crop_side, sq_info)
        #print(odc_x, odc_y, int(side * CROP_ENLARGMENT_FACTOR))
        square_img = ppc.crop_od_fill_if_needed(img, odc_x, odc_y, side)

        cv2.imwrite(f"{OUT_PATH}/{filename[:-4]}.png", square_img)

    except Exception as e:
        print('error ', filename, e)
        raise
        
l_files = sorted(os.listdir("data/cfp"))

In [63]:
if MULTIPROCESSING_WORKERS > 1:
    with Pool(MULTIPROCESSING_WORKERS) as pool:
        _ = list(tqdm(pool.imap(_process_file, l_files), total=len(l_files)))
else:
    for f in tqdm(l_files):
        _process_file(f)

print('Finished.')

  0%|          | 0/15000 [00:00<?, ?it/s]

No FOV (conf 0.434758 < 0.75) for DEV00011.jpg
No FOV (conf 0.70666 < 0.75) for DEV00015.jpg
No FOV (conf 0.630659 < 0.75) for DEV00012.jpg
No FOV (conf 0.673894 < 0.75) for DEV00021.jpg
No FOV (conf 0.697643 < 0.75) for DEV00028.jpg
No FOV (conf 0.723576 < 0.75) for DEV00032.jpg
No FOV (conf 0.729238 < 0.75) for DEV00044.jpg
No FOV (conf 0.680674 < 0.75) for DEV00046.jpg
No FOV (conf 0.701644 < 0.75) for DEV00048.jpg
No FOV (conf 0.726913 < 0.75) for DEV00053.jpg
No FOV (conf 0.682523 < 0.75) for DEV00067.jpg
No FOV (conf nan < 0.75) for DEV00077.jpg
No FOV (conf 0.681547 < 0.75) for DEV00086.jpg
No FOV (conf 0.736927 < 0.75) for DEV00089.jpg
No FOV (conf nan < 0.75) for DEV00104.jpg
No FOV (conf nan < 0.75) for DEV00113.jpg
No FOV (conf 0.592389 < 0.75) for DEV00115.jpg
No FOV (conf 0.359209 < 0.75) for DEV00117.jpg
No FOV (conf 0.670931 < 0.75) for DEV00114.jpg
No FOV (conf 0.723937 < 0.75) for DEV00121.jpgNo FOV (conf 0.580037 < 0.75) for DEV00116.jpgNo FOV (conf 0.61894 < 0.75) fo