# Add yolo results to dataframe, compute od in original image, save images 

In [24]:
import pandas as pd
import os
from tqdm import tqdm
from collections import Counter
import src.preprocessing as ppc
import cv2
import math

In [25]:
df = pd.read_csv("data/shuffled_square_75/img_info.csv", index_col=0)
df

Unnamed: 0,orig_file,new_file,delta_x,delta_y,orig_crop_side,side,scaling
0,DEV01452.jpg,SHUF02915,-88,-904,3152,3152,1.0
1,DEV08405.jpg,SHUF04819,171,-144,2262,2262,1.0
2,DEV10680.jpg,SHUF00929,-138,-650,2797,2797,1.0
3,DEV13443.jpg,SHUF14397,168,-152,2256,2256,1.0
4,DEV08466.jpg,SHUF05169,195,-130,2334,2334,1.0
...,...,...,...,...,...,...,...
14995,DEV05006.jpg,SHUF00628,-34,-56,1871,1871,1.0
14996,DEV04121.jpg,SHUF09316,-131,-875,2974,2974,1.0
14997,DEV14861.jpg,SHUF04089,-91,-624,2193,2193,1.0
14998,DEV02406.jpg,SHUF09274,196,-128,2336,2336,1.0


## Extract predictions and add to dataframe

In [26]:
leng = []

for filename in tqdm(os.listdir("data/labels_train_finalModel")):
    predictions = open(f"data/labels_train_finalModel/{filename}", "r").read()
    predictions = predictions.split("\n")[:-1]
    predictions = [pred.split() for pred in predictions]
    leng.append(len(predictions))

In [27]:
count = Counter(leng)
count # in 117 instances we only have 1 prediction 
# (I checked, its like half/half fovea or OD. so like 50 ODs are missing but in that case we can just use the full retina)

Counter({2: 14791, 3: 127, 1: 81, 4: 1})

In [28]:
len(os.listdir("data/labels_train_finalModel"))

15000

In [29]:
records = []
for filename in tqdm(os.listdir("data/labels_train_finalModel")):
    key = filename[:-4]
    with open(f"data/labels_train_finalModel/{filename}", "r") as f:
        odc = [None] * 5
        fovea = [None] * 5
        for line in f.readlines():
            pred = [float(number) for number in line.split()]
            if pred[0]==0:
                odc=pred[1:]
            if pred[0]==1:
                fovea=pred[1:]

        record = [key,]
        record.extend(odc)
        record.extend(fovea)
        records.append(tuple(record))
print(records[0])

df_to_join = pd.DataFrame.from_records(records, columns=[
    'new_file',
    'odc_x_ratio', 'odc_y_ratio', 'odc_width_ratio', 'odc_height_ratio', 'odc_conf',
    'fovea_x_ratio', 'fovea_y_ratio', 'fovea_width_ratio', 'fovea_height_ratio', 'fovea_conf'])


max_fovea = df_to_join.fovea_conf.max()
max_odx = df_to_join.odc_conf.max()
df = pd.merge(df.reset_index(), df_to_join, how='left', on='new_file')
df

100%|██████████| 15000/15000 [00:02<00:00, 7332.54it/s]

('SHUF00000', 0.7875, 0.457812, 0.1875, 0.190625, 0.940194, 0.395312, 0.502344, 0.190625, 0.195312, 0.840121)





Unnamed: 0,index,orig_file,new_file,delta_x,delta_y,orig_crop_side,side,scaling,odc_x_ratio,odc_y_ratio,odc_width_ratio,odc_height_ratio,odc_conf,fovea_x_ratio,fovea_y_ratio,fovea_width_ratio,fovea_height_ratio,fovea_conf
0,0,DEV01452.jpg,SHUF02915,-88,-904,3152,3152,1.0,0.227344,0.433594,0.167187,0.167187,0.947662,0.629687,0.521094,0.165625,0.170312,0.877343
1,1,DEV08405.jpg,SHUF04819,171,-144,2262,2262,1.0,0.309375,0.423438,0.159375,0.162500,0.948557,0.639844,0.480469,0.164062,0.167187,0.840693
2,2,DEV10680.jpg,SHUF00929,-138,-650,2797,2797,1.0,0.732813,0.479688,0.156250,0.153125,0.924706,0.371875,0.500000,0.150000,0.153125,0.893917
3,3,DEV13443.jpg,SHUF14397,168,-152,2256,2256,1.0,0.857031,0.439844,0.195312,0.204688,0.931552,0.484375,0.489844,0.187500,0.189063,0.865631
4,4,DEV08466.jpg,SHUF05169,195,-130,2334,2334,1.0,0.689844,0.546094,0.145313,0.145313,0.937109,0.385156,0.562500,0.148438,0.153125,0.761766
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14995,14995,DEV05006.jpg,SHUF00628,-34,-56,1871,1871,1.0,0.231250,0.475781,0.162500,0.164062,0.938521,0.603125,0.525781,0.162500,0.167187,0.876266
14996,14996,DEV04121.jpg,SHUF09316,-131,-875,2974,2974,1.0,0.271875,0.473437,0.209375,0.215625,0.914925,0.672656,0.516406,0.192188,0.192188,0.874059
14997,14997,DEV14861.jpg,SHUF04089,-91,-624,2193,2193,1.0,0.275000,0.480469,0.162500,0.164062,0.924159,0.668750,0.528906,0.162500,0.167187,0.879345
14998,14998,DEV02406.jpg,SHUF09274,196,-128,2336,2336,1.0,0.261719,0.404687,0.173438,0.168750,0.948592,0.629687,0.541406,0.162500,0.167187,0.743861


## Calculate OD size and center on original image

In [30]:
df["fovea_x_square"] = round(df.orig_crop_side * df.fovea_x_ratio, 0)
df["odc_x_square"] = round(df.orig_crop_side * df.odc_x_ratio, 0)
df["fovea_y_square"] = round(df.orig_crop_side * df.fovea_y_ratio, 0)
df["odc_y_square"] = round(df.orig_crop_side * df.odc_y_ratio, 0)
df["od_side_ratio_avg"] = (df.odc_height_ratio + df.odc_width_ratio)/2
df["odc_side_pxl"] = round(df.orig_crop_side * df.od_side_ratio_avg, 0)
df["odc_x_rect"] = df.odc_x_square - df.delta_y # it seems like delta x refers to how many pixels are cut of from the top and delta_y to the left
df["odc_y_rect"] = df.odc_y_square - df.delta_x

df

Unnamed: 0,index,orig_file,new_file,delta_x,delta_y,orig_crop_side,side,scaling,odc_x_ratio,odc_y_ratio,...,fovea_height_ratio,fovea_conf,fovea_x_square,odc_x_square,fovea_y_square,odc_y_square,od_side_ratio_avg,odc_side_pxl,odc_x_rect,odc_y_rect
0,0,DEV01452.jpg,SHUF02915,-88,-904,3152,3152,1.0,0.227344,0.433594,...,0.170312,0.877343,1985.0,717.0,1642.0,1367.0,0.167187,527.0,1621.0,1455.0
1,1,DEV08405.jpg,SHUF04819,171,-144,2262,2262,1.0,0.309375,0.423438,...,0.167187,0.840693,1447.0,700.0,1087.0,958.0,0.160938,364.0,844.0,787.0
2,2,DEV10680.jpg,SHUF00929,-138,-650,2797,2797,1.0,0.732813,0.479688,...,0.153125,0.893917,1040.0,2050.0,1398.0,1342.0,0.154688,433.0,2700.0,1480.0
3,3,DEV13443.jpg,SHUF14397,168,-152,2256,2256,1.0,0.857031,0.439844,...,0.189063,0.865631,1093.0,1933.0,1105.0,992.0,0.200000,451.0,2085.0,824.0
4,4,DEV08466.jpg,SHUF05169,195,-130,2334,2334,1.0,0.689844,0.546094,...,0.153125,0.761766,899.0,1610.0,1313.0,1275.0,0.145313,339.0,1740.0,1080.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14995,14995,DEV05006.jpg,SHUF00628,-34,-56,1871,1871,1.0,0.231250,0.475781,...,0.167187,0.876266,1128.0,433.0,984.0,890.0,0.163281,305.0,489.0,924.0
14996,14996,DEV04121.jpg,SHUF09316,-131,-875,2974,2974,1.0,0.271875,0.473437,...,0.192188,0.874059,2000.0,809.0,1536.0,1408.0,0.212500,632.0,1684.0,1539.0
14997,14997,DEV14861.jpg,SHUF04089,-91,-624,2193,2193,1.0,0.275000,0.480469,...,0.167187,0.879345,1467.0,603.0,1160.0,1054.0,0.163281,358.0,1227.0,1145.0
14998,14998,DEV02406.jpg,SHUF09274,196,-128,2336,2336,1.0,0.261719,0.404687,...,0.167187,0.743861,1471.0,611.0,1265.0,945.0,0.171094,400.0,739.0,749.0


In [31]:
df.to_csv("data/img_info_extended.csv")

In [32]:
df = pd.read_csv("data/img_info_extended.csv", index_col=0)
df

Unnamed: 0,index,orig_file,new_file,delta_x,delta_y,orig_crop_side,side,scaling,odc_x_ratio,odc_y_ratio,...,fovea_height_ratio,fovea_conf,fovea_x_square,odc_x_square,fovea_y_square,odc_y_square,od_side_ratio_avg,odc_side_pxl,odc_x_rect,odc_y_rect
0,0,DEV01452.jpg,SHUF02915,-88,-904,3152,3152,1.0,0.227344,0.433594,...,0.170312,0.877343,1985.0,717.0,1642.0,1367.0,0.167187,527.0,1621.0,1455.0
1,1,DEV08405.jpg,SHUF04819,171,-144,2262,2262,1.0,0.309375,0.423438,...,0.167187,0.840693,1447.0,700.0,1087.0,958.0,0.160938,364.0,844.0,787.0
2,2,DEV10680.jpg,SHUF00929,-138,-650,2797,2797,1.0,0.732813,0.479688,...,0.153125,0.893917,1040.0,2050.0,1398.0,1342.0,0.154688,433.0,2700.0,1480.0
3,3,DEV13443.jpg,SHUF14397,168,-152,2256,2256,1.0,0.857031,0.439844,...,0.189063,0.865631,1093.0,1933.0,1105.0,992.0,0.200000,451.0,2085.0,824.0
4,4,DEV08466.jpg,SHUF05169,195,-130,2334,2334,1.0,0.689844,0.546094,...,0.153125,0.761766,899.0,1610.0,1313.0,1275.0,0.145313,339.0,1740.0,1080.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14995,14995,DEV05006.jpg,SHUF00628,-34,-56,1871,1871,1.0,0.231250,0.475781,...,0.167187,0.876266,1128.0,433.0,984.0,890.0,0.163281,305.0,489.0,924.0
14996,14996,DEV04121.jpg,SHUF09316,-131,-875,2974,2974,1.0,0.271875,0.473437,...,0.192188,0.874059,2000.0,809.0,1536.0,1408.0,0.212500,632.0,1684.0,1539.0
14997,14997,DEV14861.jpg,SHUF04089,-91,-624,2193,2193,1.0,0.275000,0.480469,...,0.167187,0.879345,1467.0,603.0,1160.0,1054.0,0.163281,358.0,1227.0,1145.0
14998,14998,DEV02406.jpg,SHUF09274,196,-128,2336,2336,1.0,0.261719,0.404687,...,0.167187,0.743861,1471.0,611.0,1265.0,945.0,0.171094,400.0,739.0,749.0


## Crop original images

In [51]:
FACTOR = 1
THRESHOLD = 10
MULTIPROCESSING_WORKERS = 8
RESOLUTION = 384
INTERPOLATION_METHOD = cv2.INTER_CUBIC
DATA_DIR = f'./data/ods_center_{FACTOR}_{RESOLUTION}_{INTERPOLATION_METHOD}'
if not os.path.isdir(DATA_DIR):
    print(f'{DATA_DIR} does not exist, creating dir')
    os.mkdir(DATA_DIR)

def process_file(filename):
    img = cv2.imread(f"data/cfp/{filename}")
    img_data = df.loc[df.orig_file == filename]
    write_filename = f'{img_data.new_file.values[0]}.png'
    if img_data.odc_x_rect.isna().values[0] | img_data.fovea_x_square.isna().values[0]:
        # TODO: This is not correct, a separate model should be trained for images without an optic disc being detected
        img_square, _, _ = ppc.make_square(img, THRESHOLD)
        img_res = ppc.resize_square(img_square, RESOLUTION)
        cv2.imwrite(f"{DATA_DIR}/{write_filename}", img_res)
    else:
        add_top = (img.shape[0] - img_data.side)/2
        add_left = (img.shape[1] - img_data.side)/2
        odc_x_rect = int(img_data.odc_x_square.values[0] + add_left)
        odc_y_rect = int(img_data.odc_y_square.values[0] + add_top)

        fovea_x_rect = int(img_data.fovea_x_square.values[0] + add_left)
        fovea_y_rect = int(img_data.fovea_y_square.values[0] + add_top)


        distance = math.sqrt((odc_x_rect-fovea_x_rect)**2 + (odc_y_rect-fovea_y_rect)**2)
        #od = ppc.crop_od_fill_if_needed(img, odc_x_rect, odc_y_rect, int(img_data.odc_side_pxl.values[0]*FACTOR))
        od = ppc.crop_od_fill_if_needed(img, odc_x_rect, odc_y_rect, int(distance*FACTOR))

        od_res = ppc.resize_square(od, RESOLUTION)
        cv2.imwrite(f"{DATA_DIR}/{write_filename}", od_res) # png because it is lossless


cfp_files = os.listdir("data/cfp")
if MULTIPROCESSING_WORKERS > 1:
    from multiprocessing import Pool
    with Pool(MULTIPROCESSING_WORKERS) as pool:
        op_metadata = list(tqdm(pool.imap(process_file, cfp_files), total=len(cfp_files)))
    print('Finished.')
else:
    for filename in tqdm(os.listdir("data/cfp")):
        process_file(filename)
 

  1%|          | 180/15000 [00:17<23:35, 10.47it/s]


KeyboardInterrupt: 