In [2]:
from collections import defaultdict
import os
import shutil
import cv2
import matplotlib.pyplot as plt
import numpy as np
import xmltodict

In [3]:
root_directory = os.path.join(os.getcwd()) # '/Users/Username/dataset/afhq'
images_directory = os.path.join(root_directory, 'data', 'afhq')
# images_train_dir = os.path.join(images_directory, 'train')
# images_test_dir  = os.path.join(images_directory, 'test')

In [4]:
# Dir_name 
# [GREY]Grey Scale - rgb2gray
# [SILO]실루엣 - 고양이 흰색 그외 검정색
# [TXTR]텍스쳐 - 바운딩 박스 크롭하고 그 중 Half
# [BACK]배경 - 고양이 검정색 배경 그대로

aug_dir_names = ['grey', 'edge', 'part', 'txtr']
aug_dir = []
for name in aug_dir_names:
    dir_name = os.path.join(root_directory, f'data_{name}')
    aug_dir.append(dir_name)
    if not os.path.exists(dir_name):
        os.mkdir(dir_name)
        
tasks = ['train', 'val']
categories = ['cat', 'dog', 'wild']
for dir in aug_dir:
    for task in tasks:
        for i in categories:
            dir_name = os.path.join(dir, task, i)
            if not os.path.exists(dir_name):
                os.makedirs(dir_name)

In [7]:
# images_train_filenames = list(sorted(os.listdir(images_train_dir)))
# print(images_train_dir)

type     = 'train'
category = 'cat'
path     = os.path.join(images_directory, type, category)
print(path)
filenames = list(sorted(os.listdir(path)))

filepaths = [i for i in filenames if cv2.imread(os.path.join(path, i)) is not None]
filepaths = [i for i in filepaths if os.path.splitext(i)[1] == '.jpg']


#images_filenames, images_directory, target_directory:
#filenames,       filepaths        , path

/Users/woolee/biases-are-features-dataset/afhq/data/afhq/train/cat


In [6]:
def to_grey(images_filenames, images_directory, target_directory):
    for i, image_filename in enumerate(images_filenames):
        extension = os.path.splitext(image_filename)[1] # find extension to exclue '.mat'
        # print(extension)
        if (extension == '.jpg'):
            image = cv2.imread(os.path.join(images_directory, image_filename))
            grey_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            cv2.imwrite(os.path.join(target_directory, image_filename), grey_image)
# to_grey(filenames, path, os.path.join(root_directory, f'data_grey', type, category))

In [7]:
def to_edge(images_filenames, images_directory, target_directory):
    for i, image_filename in enumerate(images_filenames):
        extension = os.path.splitext(image_filename)[1] # find extension to exclue '.mat'
        if (extension == '.jpg'):
            image = cv2.imread(os.path.join(images_directory, image_filename))
            edges = cv2.Canny(image,100,200)
            cv2.imwrite(os.path.join(target_directory , image_filename), edges)
# to_edge(filenames, path, os.path.join(root_directory, f'data_edge', type, category))

In [8]:
def to_txtr(images_filenames, images_directory, target_directory):
    for i, image_filename in enumerate(images_filenames):
        extension = os.path.splitext(image_filename)[1] # find extension to exclue '.mat'
        if (extension == '.jpg'):
            image = cv2.imread(os.path.join(images_directory, image_filename))
            w, h = image.shape[0], image.shape[1]
            xmin, ymin, xmax, ymax = 0, 0, w, h

        txtred_image = image[ymin + int((ymax-ymin)*2/8): ymax - int((ymax-ymin)*4/8),\
                         xmin + int((xmax-xmin)*3/8): xmax - int((xmax-xmin)*3/8)]
        txtred_image = cv2.resize(txtred_image, dsize = (h, w), interpolation = cv2.INTER_LINEAR)

        cv2.imwrite(os.path.join(target_directory , image_filename), txtred_image)
# to_txtr(filenames, path, os.path.join(root_directory, f'data_txtr', type, category))

In [9]:
def to_part(images_filenames, images_directory, target_directory):
    for i, image_filename in enumerate(images_filenames):
        extension = os.path.splitext(image_filename)[1] # find extension to exclue '.mat'
        if (extension == '.jpg'):
            image = cv2.imread(os.path.join(images_directory, image_filename))
            w, h = image.shape[0], image.shape[1]
            xmin, ymin, xmax, ymax = 0, 0, w, h

        parted_image = image[ymin + int(5/12*h): ymax - int(5/12*h),\
                         xmin + int(1/4*w): xmax - int(1/4*w)]
        parted_image = cv2.resize(parted_image, dsize = (h, w), interpolation = cv2.INTER_LINEAR)

        cv2.imwrite(os.path.join(target_directory , image_filename), parted_image)
# to_part(filenames, path, os.path.join(root_directory, f'data_part', type, category))

In [13]:
tasks = ['train', 'val']
categories = ['cat', 'dog', 'wild']

if __name__ == "__main__":

    print('-------- Image Generation Start --------')
    for task in tasks:
        # print(task)
        for category in categories:

            path     = os.path.join(images_directory, task, category)
            print(f'looking at {path}')
            filenames = list(sorted(os.listdir(path)))

            filepaths = [i for i in filenames if cv2.imread(os.path.join(path, i)) is not None]
            filepaths = [i for i in filepaths if os.path.splitext(i)[1] == '.jpg']

            # print(category)
            # print(os.path.join(root_directory, f'data_grey', task, category))
            to_grey(filenames, path, os.path.join(root_directory, f'data_grey', task, category))
            to_edge(filenames, path, os.path.join(root_directory, f'data_edge', task, category))
            to_txtr(filenames, path, os.path.join(root_directory, f'data_txtr', task, category))
            to_part(filenames, path, os.path.join(root_directory, f'data_part', task, category))
    print('-------- Image Generation Done --------')

-------- Image Generation Start --------
looking at /Users/woolee/biases-are-features-dataset/afhq/data/afhq/train/cat
looking at /Users/woolee/biases-are-features-dataset/afhq/data/afhq/train/dog
looking at /Users/woolee/biases-are-features-dataset/afhq/data/afhq/train/wild
looking at /Users/woolee/biases-are-features-dataset/afhq/data/afhq/val/cat
looking at /Users/woolee/biases-are-features-dataset/afhq/data/afhq/val/dog
looking at /Users/woolee/biases-are-features-dataset/afhq/data/afhq/val/wild
-------- Image Generation Done --------


In [None]:

root_directory = os.path.join(os.getcwd()) # '/Users/Username/dataset'
images_directory = os.path.join(root_directory, "images")
masks_directory = os.path.join(root_directory, "annotations", "trimaps")
xml_directory = os.path.join(root_directory, "annotations", "xmls")


images_filenames = list(sorted(os.listdir(images_directory)))
correct_images_filenames = [i for i in images_filenames if cv2.imread(os.path.join(images_directory, i)) is not None]
correct_images_filenames = [i for i in correct_images_filenames if os.path.splitext(i)[1] == '.jpg']

"""## SILO"""

# mask == 1.0: 고양이 부분
# mask == 2.0: 배경 부분
# mask == 3.0: 테두리 부분
def preprocess_mask_silo(mask):
    mask = mask.astype(np.uint8)
    mask[(mask == 2.0) | (mask == 3.0)] = 0.0
    mask[mask == 1.0] = 255
    return mask

def to_silo(images_filenames, images_directory, target_directory):
    for i, image_filename in enumerate(images_filenames):
        extension = os.path.splitext(image_filename)[1] # find extension to exclue '.mat'
        if (extension == '.jpg'):
            image = cv2.imread(os.path.join(images_directory, image_filename))
            mask = cv2.imread(os.path.join(masks_directory, image_filename.replace(".jpg", ".png")), cv2.IMREAD_UNCHANGED,)
            silouetted = preprocess_mask_silo(mask)    
            cv2.imwrite(os.path.join(target_directory , image_filename), silouetted)



"""## BACK"""

def preprocess_mask_back(mask):
    mask = mask.astype(np.uint8)
    mask[mask == 2.0] = 255
    mask[(mask == 1.0) | (mask == 3.0)] = 0
    return mask

def to_back(images_filenames, images_directory, target_directory):
    for i, image_filename in enumerate(images_filenames):
        extension = os.path.splitext(image_filename)[1] # find extension to exclue '.mat'
        if (extension == '.jpg'):
            image = cv2.imread(os.path.join(images_directory, image_filename))
            mask = cv2.imread(os.path.join(masks_directory, image_filename.replace(".jpg", ".png")), cv2.IMREAD_UNCHANGED,)
            mask = preprocess_mask_back(mask)  
            background_only = cv2.bitwise_and(image, image, mask = mask)
            cv2.imwrite(os.path.join(target_directory , image_filename), background_only)



"""## GREY"""

def to_grey(images_filenames, images_directory, target_directory):
    for i, image_filename in enumerate(images_filenames):
        extension = os.path.splitext(image_filename)[1] # find extension to exclue '.mat'
        if (extension == '.jpg'):
            image = cv2.imread(os.path.join(images_directory, image_filename))
            grey_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            cv2.imwrite(os.path.join(target_directory , image_filename), grey_image)



"""## TXTR"""

def get_bb(image_filename):
    '''
    About: Finding Bounding Box for each image, using xml file. If no xml file found, just return zeros.
    Input: Image_filename
    Output: xmin, ymin, xmax, ymax (location of bb)
    '''
    xml_dir = os.path.join(xml_directory, image_filename.replace(".jpg", ".xml"))
    try:
        f = open(xml_dir)
        doc = xmltodict.parse(f.read()) 
        xmin = int(doc['annotation']['object']['bndbox']['xmin'])
        ymin = int(doc['annotation']['object']['bndbox']['ymin'])
        xmax = int(doc['annotation']['object']['bndbox']['xmax'])
        ymax = int(doc['annotation']['object']['bndbox']['ymax'])

    except:
        xmin, ymin, xmax, ymax = 0,0,0,0

    return xmin, ymin, xmax, ymax

def to_txtr(images_filenames, images_directory, target_directory):
    for i, image_filename in enumerate(images_filenames):
        extension = os.path.splitext(image_filename)[1] # find extension to exclue '.mat'
        if (extension == '.jpg'):
            image = cv2.imread(os.path.join(images_directory, image_filename))
            w, h = image.shape[0], image.shape[1]
            xmin, ymin, xmax, ymax = get_bb(image_filename)
            
        if xmin == 0 and ymin == 0 and xmax == 0 and ymax == 0:
            xmax = w
            ymax = h

        txtred_image = image[ymin + int((ymax-ymin)/4): ymax - int((ymax-ymin)/4),\
                         xmin + int((xmax-xmin)/4): xmax - int((xmax-xmin)/4)]
        txtred_image = cv2.resize(txtred_image, dsize = (h, w), interpolation = cv2.INTER_LINEAR)
        cv2.imwrite(os.path.join(target_directory , image_filename), txtred_image)


if __name__ == "__main__":
    print('-------- Image Generation Start --------')

    to_grey(correct_images_filenames, images_directory, os.path.join(root_directory, "images_grey"))
    print('-- Greyscaled Image Generation Done')

    to_silo(correct_images_filenames, images_directory, os.path.join(root_directory, "images_silo"))
    print('-- Silouetted Image Generation Done')

    to_back(correct_images_filenames, images_directory, os.path.join(root_directory, "images_back"))
    print('-- Background Only Image Generation Done')

    to_txtr(correct_images_filenames, images_directory, os.path.join(root_directory, "images_txtr"))
    print('-- Texture Only Image Generation Done')

    print('-------- Image Generation Done --------')