# Add yolo results to dataframe, compute od in original image, save images 

In [None]:
import pandas as pd
import os
from tqdm import tqdm
from collections import Counter
import src.preprocessing as ppc
import cv2

In [None]:
df = pd.read_csv("data/shuffled_square_75/img_info.csv", index_col=0)
df

## Extract predictions and add to dataframe

In [None]:
leng = []

for filename in tqdm(os.listdir("data/labels")):
    predictions = open(f"data/labels/{filename}", "r").read()
    predictions = predictions.split("\n")[:-1]
    predictions = [pred.split() for pred in predictions]
    leng.append(len(predictions))

In [None]:
count = Counter(leng)
count # in 117 instances we only have 1 prediction 
# (I checked, its like half/half fovea or OD. so like 50 ODs are missing but in that case we can just use the full retina)

In [None]:
len(os.listdir("data/labels"))

In [None]:
records = []
for filename in tqdm(os.listdir("data/labels")):
    key = filename[:-4]
    with open(f"data/labels/{filename}", "r") as f:
        odc = [None] * 5
        fovea = [None] * 5
        for line in f.readlines():
            pred = [float(number) for number in line.split()]
            if pred[0]==0:
                odc=pred[1:]
            if pred[0]==1:
                fovea=pred[1:]

        record = [key,]
        record.extend(odc)
        record.extend(fovea)
        records.append(tuple(record))

df_to_join = pd.DataFrame.from_records(records, columns=[
    'new_file',
    'odc_x_ratio', 'odc_y_ratio', 'odc_width_ratio', 'odc_height_ratio', 'odc_conf',
    'fovea_x_ratio', 'fovea_y_ratio', 'fovea_width_ratio', 'fovea_height_ratio', 'fovea_conf'])

max_fovea = df_to_join.fovea_conf.max()
max_odx = df_to_join.odc_conf.max()
df = pd.merge(df.reset_index(), df_to_join, how='left', on='new_file')
df

## Calculate OD size and center on original image

In [None]:
df["fovea_x_square"] = round(df.orig_crop_side * df.fovea_x_ratio, 0)
df["odc_x_square"] = round(df.orig_crop_side * df.odc_x_ratio, 0)
df["fovea_y_square"] = round(df.orig_crop_side * df.fovea_y_ratio, 0)
df["odc_y_square"] = round(df.orig_crop_side * df.odc_y_ratio, 0)
df["od_side_ratio_avg"] = (df.odc_height_ratio + df.odc_width_ratio)/2
df["odc_side_pxl"] = round(df.orig_crop_side * df.od_side_ratio_avg, 0)
df["odc_x_rect"] = df.odc_x_square - df.delta_y # it seems like delta x refers to how many pixels are cut of from the top and delta_y to the left
df["odc_y_rect"] = df.odc_y_square - df.delta_x

df

In [None]:
df.to_csv("data/img_info_extended.csv")

In [None]:
df = pd.read_csv("data/img_info_extended.csv", index_col=0)
df

## Crop original images

In [None]:
FACTOR = 1.5
THRESHOLD = 10
MULTIPROCESSING_WORKERS = 8
RESOLUTION = 384
INTERPOLATION_METHOD = cv2.INTER_CUBIC
DATA_DIR = f'./data/ods_center_{FACTOR}_{RESOLUTION}_{INTERPOLATION_METHOD}'
if not os.path.isdir(DATA_DIR):
    print(f'{DATA_DIR} does not exist, creating dir')
    os.mkdir(DATA_DIR)

def process_file(filename):
    img = cv2.imread(f"data/cfp/{filename}")
    img_data = df.loc[df.orig_file == filename]
    write_filename = f'{img_data.new_file.values[0]}.png'
    if img_data.odc_x_rect.isna().values[0]:
        # TODO: This is not correct, a separate model should be trained for images without an optic disc being detected
        img_square, _, _ = ppc.make_square(img, THRESHOLD)
        img_res = ppc.resize_square(img_square, RESOLUTION)
        cv2.imwrite(f"{DATA_DIR}/{write_filename}", img_res)
    else:
        add_top = (img.shape[0] - img_data.side)/2
        add_left = (img.shape[1] - img_data.side)/2
        odc_x_rect = int(img_data.odc_x_square.values[0] + add_left)
        odc_y_rect = int(img_data.odc_y_square.values[0] + add_top)

        #od = ppc.crop_od(img, odc_x_rect, odc_y_rect, int(img_data.odc_side_pxl.values[0]*FACTOR))
        od = ppc.crop_od_fill_if_needed(img, odc_x_rect, odc_y_rect, int(img_data.odc_side_pxl.values[0]*FACTOR))

        od_res = ppc.resize_square(od, RESOLUTION)
        #print(write_filename)
        cv2.imwrite(f"{DATA_DIR}/{write_filename}", od_res) # png because it is lossless

cfp_files = os.listdir("data/cfp")
if MULTIPROCESSING_WORKERS > 1:
    from multiprocessing import Pool
    with Pool(MULTIPROCESSING_WORKERS) as pool:
        op_metadata = list(tqdm(pool.imap(process_file, cfp_files), total=len(cfp_files)))
    print('Finished.')
else:
    for filename in tqdm(os.listdir("data/cfp")):
        process_file(filename)