In [1]:
import numpy as np
import os
import pandas as pd

In [2]:
ids = [i for i in range(1,1001)]

In [3]:
folds = np.array_split(ids, 3)

In [4]:
len(folds)

3

## Make fold CSVs for real data

In [5]:
img_dir = "/work/vajira/DATA/sinGAN_polyps/singan_test_2/real_images"
mask_dir = "/work/vajira/DATA/sinGAN_polyps/singan_test_2/real_masks"

In [6]:
def make_df(img_dir, mask_dir, fold_ids):

    df = pd.DataFrame(columns=["image_path", "mask_path"])
    
    

    for data_id in fold_ids:

        img_path = os.path.join(img_dir, str(data_id) + ".jpg")
        mask_path = os.path.join(mask_dir, str(data_id) + ".jpg")

        if os.path.exists(img_path) and os.path.exists(mask_path):

            df = df.append({"image_path": img_path,
                            "mask_path": mask_path
                            }, ignore_index=True)
        else:
            print("Found mismatch files")
            print("Image path=", img_path)
            print("mask_path", mask_path)

    return df

In [8]:
def make_and_save_folds(img_dir, mask_dir, folds_id_array, basic_name="REAL"):
    
    for f in range(len(folds_id_array)):
        fold_ids = folds_id_array[f]
        df = make_df(img_dir, mask_dir, fold_ids)
        
        df.to_csv(f"{basic_name}_fold_{f}.csv", index=False, header=True)

In [9]:
make_and_save_folds(img_dir, mask_dir, folds, basic_name="REAL")

In [7]:
#df = make_df(img_dir, mask_dir, folds[0])

In [14]:
#df.to_csv("test.csv",index=False, header=True)

## Make fold CSVs for synthetic data

In [19]:
src_dir = "/work/vajira/DATA/sinGAN_polyps/singan_out"


In [20]:
def make_df_from_synthetic(src_dir, fold_ids, scales = [0, 1], num_samples_per_img=2):
    
    df = pd.DataFrame(columns=["image_path", "mask_path"])
    
    
    for data_id in fold_ids:
        
        for n in range(num_samples_per_img):
            
            for scale in scales:
                
                img_path = f"{src_dir}/chk_id_{data_id}_gen_scale_{scale}_{n}_img.png"
                mask_path = f"{src_dir}/chk_id_{data_id}_gen_scale_{scale}_{n}_mask.png"
                
                
                if os.path.exists(img_path) and os.path.exists(mask_path):
                    
                    df = df.append({"image_path": img_path,
                            "mask_path": mask_path
                            }, ignore_index=True)
                else:
                    print("Found mismatch files")
                    print("Image path=", img_path)
                    print("mask_path", mask_path)
                    
    return df
    
    

In [23]:
def make_and_save_folds_synth(src_dir, folds_id_array, scales = [0], num_samples_per_img=1, basic_name="FAKE_scale_0_num_img_1"):
    
    for f in range(len(folds_id_array)):
        fold_ids = folds_id_array[f]
        df = make_df_from_synthetic(src_dir, fold_ids, scales, num_samples_per_img)
        
        df.to_csv(f"{basic_name}_fold_{f}.csv", index=False, header=True)

In [24]:
make_and_save_folds_synth(src_dir, folds)

In [15]:
#df =make_df_from_synthetic(src_dir, folds[0])

In [16]:
#df

In [17]:
#df.to_csv("test_2.csv",index=False, header=True)