In [1]:
from osgeo import gdal
import numpy as np
from tqdm import tqdm
import os
import math
import time
from osgeo import gdal

In [2]:
def load_image(image):
    return gdal.Open(image, gdal.GA_ReadOnly)
    
def convert_to_array(dataset):
    bands = [dataset.GetRasterBand(i + 1).ReadAsArray() for i in range(dataset.RasterCount)]
    return np.stack(bands, 2)

In [3]:
def delete_empty_images(images_path, masks_path):
    files = next(os.walk(images_path))[2]
    
    num_deleted = 0
    for image_file in files:

        image = images_path + image_file

        mask = masks_path + image_file
        mask_data = load_image(mask)
        mask_data = convert_to_array(mask_data)

        # Deleta qualquer mask e imagem em que a mask esteja mais de 99% vazia.
        if np.any(mask_data):
            num_white_pixels = len(mask_data[mask_data==1])
            num_black_pixels = len(mask_data[mask_data==0])
            if num_black_pixels == 0: num_black_pixels = 1 # para evitar erro de dividir por 0

            if num_white_pixels/num_black_pixels < 0.01:
                os.system('rm -rf {0}'.format(image))
                os.system('rm -rf {0}'.format(mask))
                num_deleted+=1
        else:
            os.system('rm -rf {0}'.format(image))
            os.system('rm -rf {0}'.format(mask))
            num_deleted+=1

    print("\n{} images were deleted.".format(num_deleted))

In [4]:
def crop_images(images_path, out_path, image_name, tilesize=256):
    raster = gdal.Open(images_path+image_name)
    
    print('path', images_path+image_name)

    width = raster.RasterXSize
    height = raster.RasterYSize

    for i in range(0, width, tilesize):
        for j in range(0, height, tilesize):
            gdaltranString = "gdal_translate -of GTIFF -srcwin "+str(i)+", "+str(j)+", "+str(tilesize)+", " \
                +str(tilesize)+" "+images_path+image_name+" "+out_path+str(i)+"_"+str(j)+"_"+image_name
            os.system(gdaltranString)

In [5]:
def crop_and_save(images_path, masks_path, new_images_path, new_masks_path, tilesize, degrowth_factor, first=False, last=False):
    
    print("Clipping tilesize: {}".format(tilesize))
    
    if not first:
        images_path = new_images_path + str(tilesize * degrowth_factor)+"/"
        masks_path = new_masks_path + str(tilesize * degrowth_factor)+"/"
    
    if not last:
        new_images_path = new_images_path + str(tilesize)+"/"
        new_masks_path = new_masks_path + str(tilesize)+"/"
        for path in [new_images_path, new_masks_path]:
            if not os.path.exists(path):
                os.mkdir(path)
    
    start_time = time.time()
    files = next(os.walk(images_path))[2]
    print('Total number of files =',len(files))
    
    for image_name in tqdm(files, total = len(files)):
                
        crop_images(images_path, new_images_path, image_name, tilesize)
        crop_images(masks_path, new_masks_path, image_name, tilesize)
    
    # deleta imagens vazias
    #delete_empty_images(new_images_path, new_masks_path)
    
    if not first:
        os.system('rm -rf {0}'.format(images_path))
        os.system('rm -rf {0}'.format(masks_path))


In [6]:
if __name__ == "__main__":
    root_data_path = "Data/DesforestationAlerts/Prediction/"
    root_new_data_path = "Data/BuildingsDataSet/Prediction/"
    test_to_train_ratio = 0
    tilesize_start = 256
    degrowth_factor = 2

    # Path Information
    images_path = root_data_path + "Images/"
    masks_path = root_data_path + "Masks/"
    new_images_path = root_new_data_path + "Images/"
    new_masks_path = root_new_data_path + "Masks/"

    for path in [new_images_path, new_masks_path]:
        if not os.path.exists(path):
            os.mkdir(path)
            print("DIRECTORY CREATED: {}".format(path))
        else:
             print("DIRECTORY ALREADY EXISTS: {}".format(path))
                
    print("Building Dataset.")

    # recorta as imagens em diferentes tamanhos
    # 256x256
    tilesize = tilesize_start
    crop_and_save(images_path, masks_path, new_images_path, new_masks_path, tilesize, degrowth_factor, first=True, last=True)
    """
    # 128x128
    tilesize //= degrowth_factor
    crop_and_save(images_path, masks_path, new_images_path, new_masks_path, tilesize, degrowth_factor, first=False)
    # 64x64
    tilesize //= degrowth_factor
    crop_and_save(images_path, masks_path, new_images_path, new_masks_path, tilesize, degrowth_factor, first=False, last=True)
    
    # 32x32
    tilesize //= degrowth_factor
    crop_and_save(images_path, masks_path, new_images_path, new_masks_path, tilesize, degrowth_factor, first=False, last=True)
    """
    
    
    
    
    

  0%|          | 0/1 [00:00<?, ?it/s]

DIRECTORY ALREADY EXISTS: Data/BuildingsDataSet/Prediction/Images/
DIRECTORY ALREADY EXISTS: Data/BuildingsDataSet/Prediction/Masks/
Building Dataset.
Clipping tilesize: 256
Total number of files = 1
path Data/DesforestationAlerts/Prediction/Images/1.2.tif
path Data/DesforestationAlerts/Prediction/Masks/1.2.tif


100%|██████████| 1/1 [00:38<00:00, 38.37s/it]
