## Preprocessing for Feature extraction

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import skimage.exposure as skie
import skimage
import numpy as np
import glob

from pathlib import Path
from PIL import Image,ImageOps

In [None]:
img_path = 'data/Schiefer/CFB184_ortho_rgb_0003.png'
#img_path = '/home/richard/data/Schiefer/CFB184clipped_raster_files/CFB184_ortho_rgb_0003.png'
img = plt.imread(img_path)
#img = skimage.color.rgb2gray(img) # preserves luminance of img

#### Histogramm normalization

- with CLAHE
  - Two benefits: normalize brightness, reduce differences between darker and light images
  - enhance contrast of image (stronger responses in conv layers)

Kinda only useful for square images otherwise black pixels are appearing too often, but still brightness is normalized.

In [None]:
def show(img): #displays image next to a histogram
    # Display the image.
    fig, (ax1, ax2) = plt.subplots(1, 2,
                                   figsize=(12, 3))

    ax1.imshow(img, cmap=plt.cm.gray)
    ax1.set_axis_off()

    # Display the histogram.
    ax2.hist(img.ravel(), lw=0, bins=256)
    ax2.set_xlim(0, img.max())
    ax2.set_yticks([])

    plt.show()

show(img)

In [None]:
# Contrast Limited Adaptive Histogram Equalization (CLAHE) applied
show(skie.equalize_adapthist(img))
# under the hood works rescale_intensity, equalize_adapthist,

In [None]:
new = skie.equalize_adapthist(img)
print(type(new))
PIL_image = Image.fromarray((new * 255).astype(np.uint8))
plt.imshow(PIL_image)
print(PIL_image.size)

#### Resize images

VGG16 and ResNet default size = 224. Use this size for square crowns. For whole polygons resize images to 448. So they can be directly fed into the CNN.

#### _____________________

In [None]:
square_crops_path = '/home/richard/data/Schiefer/CFB184clipped_raster_files/'
polygon_crops_path = '/home/richard/data/Schiefer/clipped_raster_files_polygon/'

print(Path(square_crops_path).parent / 'preprocessed')

In [None]:
def padding(img, expected_size):
    desired_size = expected_size
    delta_width = desired_size - img.size[0]
    delta_height = desired_size - img.size[1]
    pad_width = delta_width // 2
    pad_height = delta_height // 2
    padding = (pad_width, pad_height, delta_width - pad_width, delta_height - pad_height)
    return ImageOps.expand(img, padding)

def resize_with_padding(img, expected_size):
    img.thumbnail((expected_size[0], expected_size[1]))
    # print(img.size)
    delta_width = expected_size[0] - img.size[0]
    delta_height = expected_size[1] - img.size[1]
    pad_width = delta_width // 2
    pad_height = delta_height // 2
    padding = (pad_width, pad_height, delta_width - pad_width, delta_height - pad_height)
    return ImageOps.expand(img, padding)

def get_png_file_names(file_dir):
    files = glob.glob(file_dir + '*.png')
    print("num files",len(files))
    return files

def preprocess_images(images_path,expected_size):
    out_dir = Path(images_path).parent / Path('preprocessed_' + str(expected_size)) 
    Path(out_dir).mkdir(parents=True, exist_ok=True)
    
    file_list = get_png_file_names(images_path)
    for img in file_list:
        file_name = Path(img).stem
        img = plt.imread(img)
        # normalize brightness
        processed_img = skie.equalize_adapthist(img)
        PIL_image = Image.fromarray((processed_img * 255).astype(np.uint8))
        # resize
        PIL_image = resize_with_padding(PIL_image,(expected_size,expected_size))
        PIL_image.save(str(out_dir) + '/' + str(file_name) + '_preprocessed.png')

In [None]:
preprocess_images(polygon_crops_path,224)