In [11]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from os.path import isfile, join
import errno

import csv

def get_dataset(dataset_folder, ids=False):
    result = {}
    x_set = []
    y_set = []
    print("Retrieving dataset from:", dataset_folder)
    taxons_dirs = next(os.walk(dataset_folder))[1]
    n_taxons = len(taxons_dirs)
    disp_progress = display("0/"+str(n_taxons),display_id=True)
    for i, taxon in enumerate(taxons_dirs):
        taxon_id = taxon
        path = join(dataset_folder, taxon)
        files = [f for f in os.listdir(path) if isfile(join(path, f))]
        for file in files:
            x_set.append(join(path, file))
            y_set.append(taxon_id)
            result.setdefault(taxon_id, []).append(join(path, file))
        disp_progress.update(str(i+1)+"/"+str(n_taxons))
    return np.array(x_set), np.array(y_set), result

def get_last_epoch(log_file):
    if os.path.exists(log_file):
        csv_reader = csv.reader(open(log_file), delimiter=',')
        return int(list(csv_reader)[-1][0])
    else:
        return 0
    
def save_model(model, root):
    model_path = os.path.join(root, "model.json")
    weights_path = os.path.join(root, "model.h5")
    check_dirs(model_path)

    model_json = model.to_json()
    with open(model_path, "w") as json_file:
        json_file.write(model_json)
    model.save_weights(weights_path)

    print("Saved model to", model_path)
    print("Saved weights to", weights_path)

def get_taxa_list(list_path):
    taxa_list = []
    with open(list_path, newline='') as csvfile: 
        csv_reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
        for row in csv_reader:
            taxa_list.extend(row)
    return taxa_list

def balance_dataset(X_train, y_train, max_samples=None):
    train_dict = {}
    for file, label in zip(X_train, y_train):
        train_dict.setdefault(label, []).append(file)
    if max_samples is None: max_samples = np.max([len(train_dict[taxon_id]) for taxon_id in train_dict])
    X_train = []
    y_train = []
    for taxon_id in train_dict:
        ratio = np.ceil(max_samples/len(train_dict[taxon_id]))
        tmp = np.repeat(train_dict[taxon_id], ratio)
        np.random.shuffle(tmp)
        train_dict[taxon_id] = tmp[0:max_samples]
        X_train.extend(tmp[0:max_samples])
        y_train.extend([taxon_id]*max_samples)
    print("Balanced to", max_samples, "samples per class!")
    return X_train, y_train, max_samples

In [None]:
def cv2_clipped_zoom(img, zoom_factor):
    """
    Center zoom in/out of the given image and returning an enlarged/shrinked view of 
    the image without changing dimensions
    Args:
        img : Image array
        zoom_factor : amount of zoom as a ratio (0 to Inf)
    """
    height, width = img.shape[:2] # It's also the final desired shape
    new_height, new_width = int(height * zoom_factor), int(width * zoom_factor)

    ### Crop only the part that will remain in the result (more efficient)
    # Centered bbox of the final desired size in resized (larger/smaller) image coordinates
    y1, x1 = max(0, new_height - height) // 2, max(0, new_width - width) // 2
    y2, x2 = y1 + height, x1 + width
    bbox = np.array([y1,x1,y2,x2])
    # Map back to original image coordinates
    bbox = (bbox / zoom_factor).astype(np.int)
    y1, x1, y2, x2 = bbox
    cropped_img = img[y1:y2, x1:x2]

    # Handle padding when downscaling
    resize_height, resize_width = min(new_height, height), min(new_width, width)
    pad_height1, pad_width1 = (height - resize_height) // 2, (width - resize_width) //2
    pad_height2, pad_width2 = (height - resize_height) - pad_height1, (width - resize_width) - pad_width1
    pad_spec = [(pad_height1, pad_height2), (pad_width1, pad_width2)] + [(0,0)] * (img.ndim - 2)

    result = cv2.resize(cropped_img, (resize_width, resize_height))
    result = np.pad(result, pad_spec, mode='edge')
    assert result.shape[0] == height and result.shape[1] == width
    return result