## Script to fix issue with image logging during training providing corrupted source images. 

Navigate through train-data to find and match the correct source to the corrupted logged images

In [1]:
import os, json, sys
sys.path.append("../")
import numpy as np
import cv2
from tqdm import tqdm 

In [6]:
# where we will save all uncorrupted sources found
uncorr_dir = "../image_log/uncorr"
os.makedirs(uncorr_dir,exist_ok=True)

# get paths to all corrupted sources in image_log
image_log_files = os.listdir("../image_log")
source_corr_paths_mask = [image_log_files[i].startswith("control") for i in range(len(image_log_files))]
source_corr_paths = np.array(image_log_files)[source_corr_paths_mask]

# get all paths to training targets
train_target_paths = np.array(os.listdir("../train-data/target"))

# for keeping track of files to match
match_table = {}

for k in tqdm(range(len(source_corr_paths))):

    try:
        source_corr_path = source_corr_paths[k]

        # where we will save uncorrupted image to
        source_uncorr_path = os.path.join(uncorr_dir, source_corr_path)

        # target corresponding to corrupted source
        target_path = os.path.join("../image_log", source_corr_path.replace("control","reconstruction"))
        # load target as vector
        target = np.array(cv2.cvtColor(cv2.imread(target_path), code=cv2.COLOR_BGR2RGB).ravel(), dtype=float)

        # keep track of best match in train targets
        best_match_i = -1
        best_association = np.inf
        for (i,train_target_path) in enumerate(train_target_paths):

            # get training target full path
            train_target_path = os.path.join("../train-data/target", train_target_path)
            train_target = np.array(cv2.cvtColor(cv2.imread(train_target_path), code=cv2.COLOR_BGR2RGB).ravel(), dtype=float)

            association = np.linalg.norm(target - train_target, 1)
            if association < best_association:
                best_match_i = i
                best_association = association

        # read and save best matching uncorruted source
        best_match_path = os.path.join("../train-data/source", train_target_paths[best_match_i])
        best_match = cv2.cvtColor(cv2.imread(best_match_path), code=cv2.COLOR_BGR2RGB)
        cv2.imwrite(source_uncorr_path, best_match)

        # save corresponding target
        best_match_target_path = os.path.join("../train-data/target", train_target_paths[best_match_i])
        best_match_target_name = os.path.join(uncorr_dir, source_corr_path.replace("control","reconstruction"))
        cv2.imwrite(best_match_target_name, cv2.cvtColor(cv2.imread(best_match_target_path), code=cv2.COLOR_BGR2RGB))

        # update match table
        match_table[source_corr_path] = best_match_path
        
    except:
        print(f"Problem for {source_corr_paths[k]}")

# save corrleation match table
with open("../image_log/uncorr/corruption_match_table.json", "w") as outfile:
    json.dump(match_table, outfile)

print("All uncorrupted images saved.")


100%|██████████| 165/165 [1:37:56<00:00, 35.62s/it]

All uncorrupted images saved.



