# Copy Images for G-CPA

## Copy Images

In [1]:
from glob import glob
from tqdm import tqdm

import os
import shutil
import random

In [16]:
DATA_DIR = "/home/ubuntu/data"
OUT_DIR = f"{DATA_DIR}/ood-cv-cls/occluded_bird"

In [17]:
OCCLUDED_CLS = {}
with open("occluded_classes/all.txt", "r") as f:
    OCCLUDED_CLS["all"] = set(f.read().splitlines())

with open("occluded_classes/bird.txt", "r") as f:
    OCCLUDED_CLS["bird"] = set(f.read().splitlines())

with open("occluded_classes/dog.txt", "r") as f:
    OCCLUDED_CLS["dog"] = set(f.read().splitlines())

with open("occluded_classes/cat.txt", "r") as f:
    OCCLUDED_CLS["cat"] = set(f.read().splitlines())

with open("occluded_classes/others.txt", "r") as f:
    OCCLUDED_CLS["others"] = set(f.read().splitlines())

print(
    len(OCCLUDED_CLS["all"]),
    len(OCCLUDED_CLS["bird"]),
    len(OCCLUDED_CLS["dog"]),
    len(OCCLUDED_CLS["cat"]),
    len(OCCLUDED_CLS["others"]),
)

143 15 116 5 7


In [18]:
NUM_SAMPLE = {"bird": 450, "dog": 60}
img_paths = []
for img_dir in tqdm(glob(f"{DATA_DIR}/imagenet/ILSVRC/Data/CLS-LOC/train/*")):
    cls = img_dir.split("/")[-1]
    if cls in OCCLUDED_CLS["all"]:
        img_paths_sub = glob(f"{img_dir}/*")

        if cls in OCCLUDED_CLS["bird"]:
            img_paths_sub = random.sample(img_paths_sub, NUM_SAMPLE["bird"])
        elif cls in OCCLUDED_CLS["dog"]:
            img_paths_sub = random.sample(img_paths_sub, NUM_SAMPLE["dog"])
        elif cls in OCCLUDED_CLS["cat"] or cls in OCCLUDED_CLS["others"]:
            pass
        
        img_paths.extend(img_paths_sub)

len(img_paths)

100%|██████████| 1000/1000 [00:00<00:00, 15571.83it/s]


6750

In [19]:
if os.path.isdir(OUT_DIR):
    shutil.rmtree(OUT_DIR)
for cls in OCCLUDED_CLS["all"]:
    os.makedirs(f"{OUT_DIR}/{cls}")

for img_path in tqdm(img_paths):
    cls = img_path.split("/")[-2]
    fname = os.path.basename(img_path)
    target_path = f"{OUT_DIR}/{cls}/{fname}"
    shutil.copyfile(img_path, target_path)

100%|██████████| 6750/6750 [00:07<00:00, 868.26it/s] 


## Confirm Sample Images

In [2]:
import numpy as np
import cv2

def get_mixed_img(img_path, mask_path):
    img = cv2.imread(img_path)
    mask = np.load(mask_path)
    mask = list(mask.values())[0]
    occ_obj = img * mask
    cv2.imwrite(f'imgs_cam/{fname}_cam.jpg', occ_obj)

    gray_img = cv2.cvtColor(occ_obj.astype(np.uint8), cv2.COLOR_BGR2GRAY)
    contours, _ = cv2.findContours(gray_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_TC89_KCOS)
    x, y, w, h = cv2.boundingRect(contours[0])
    rect = occ_obj[y:y+h, x:x+w]
    cv2.imwrite(f'imgs_cam/{fname}_rect.jpg', rect)

    new_img = cv2.imread("imgs/n02112018_138.JPEG")
    if new_img.shape[0] > new_img.shape[1]:
        x, w = new_img.shape[1] // 4, new_img.shape[1] // 2
        y, h = new_img.shape[0] // 4, new_img.shape[0] // 2
    else:
        x, w = new_img.shape[1] // 4, new_img.shape[1] // 2
        y, h = new_img.shape[0] // 4, new_img.shape[0] // 2

    rect_resized = cv2.resize(rect, (w, h))
    for r_idx, rows in enumerate(new_img[y:y+h, x:x+w, :]):
        for c_idx, cols in enumerate(rows):
            for d_idx, d in enumerate(cols):
                if rect_resized[r_idx, c_idx, d_idx] == 0:
                    rect_resized[r_idx, c_idx, d_idx] = new_img[y+r_idx, x+c_idx, d_idx]

    new_img[y:y+h, x:x+w, :] = rect_resized
    cv2.imwrite(f'imgs_cam/{fname}_mixed.jpg', new_img)

In [5]:
DATA_DIR = "/home/ubuntu/data/ood-cv-cls/occluded"
mask_paths = glob(f"{DATA_DIR}/**/*.npz")
for mask_path in tqdm(mask_paths):
    cls = os.path.dirname(mask_path).split("/")[-1]
    fname = os.path.basename(mask_path).replace(".npz", "")
    img_path = f"{DATA_DIR}/{cls}/{fname}.JPEG"
    print(img_path)
    print(mask_path)
    break
    get_mixed_img(img_path, mask_path)

  0%|          | 0/1980 [00:00<?, ?it/s]

/home/ubuntu/data/ood-cv-cls/occluded/n02101006/n02101006_764.JPEG
/home/ubuntu/data/ood-cv-cls/occluded/n02101006/n02101006_764.npz





In [3]:
DATA_DIR = "/home/ubuntu/data/ood-cv-cls/occluded"
mask_paths = glob(f"{DATA_DIR}/**/*.npz")
for mask_path in tqdm(mask_paths):
    cls = os.path.dirname(mask_path).split("/")[-1]
    fname = os.path.basename(mask_path).replace(".npz", "")
    img_path = f"{DATA_DIR}/{cls}/{fname}.JPEG"
    get_mixed_img(img_path, mask_path)

100%|██████████| 3/3 [00:01<00:00,  2.29it/s]
