In [None]:
import csv
import datetime
import itertools
import numpy as np
import os.path
import pandas as pd
import pathlib

In [None]:
path = '/home/xandao/Documentos/resultados_gimp/identificacao_george/especie/acima-10/RGB'

In [None]:
list_files = [file for file in pathlib.Path(path).rglob('mean.csv') if file.is_file()]
len(list_files)

In [None]:
list_extractor = {
    'lbp': [59],
    'surf64': [128, 256, 257],
    'surf128': [128, 256, 513],
    'mobilenetv2': [128, 256, 512, 1024, 1280],
    'resnet50v2': [128, 256, 512, 1024, 2048],
    'vgg16': [128, 256, 512]
}
index = [e + '_' + str(d) + '_' + m for e in list_extractor.keys() for d in reversed(list_extractor[e]) for m in ['mean', 'std']]

In [None]:
list_classifier = ['DecisionTreeClassifier', 'KNeighborsClassifier', 'MLPClassifier', 'RandomForestClassifier', 'SVC']
list_dim = [256, 400, 512]
# list_segmented = ['manual', 'unet']
list_segmented = ['unet']
columns = [c + '_' + str(d) + '_' + s for c in list_classifier for s in sorted(list_segmented) for d in list_dim]

In [None]:
def create_sheet():
    return {
        'mean': pd.DataFrame(index=index, columns=columns),
        'time': pd.DataFrame(index=index, columns=columns),
        'folder': pd.DataFrame(index=index, columns=columns)
    }

sheet_rgb = create_sheet()
sheet_grayscale = create_sheet()
display(sheet_rgb['mean'])

In [None]:
ROUND_VALUE = 3

def round_mean(value):
     return '=ROUND(' + str(value) + ' * 100; ' + str(ROUND_VALUE) + ')'


def round_time(value):
     return '=ROUND(' + str(value) + '; ' + str(ROUND_VALUE) + ')'


def plus_minus_std(value, top_k, total_top_k):
     return '=CONCATENATE("Â±"; ROUND(' + str(value) + ' * 100; ' + str(ROUND_VALUE) + '); " (' + str(top_k) + '/' + total_top_k + ')")'


def get_classifier(path):
    classifier = list(filter(lambda x: x.lower() in str(path).lower(), list_classifier))

    if len(classifier) == 0:
        raise ValueError('classifier not available in list')

    return classifier[0]


def is_date(string):
    try:
        date = datetime.datetime.strptime(string, '%d-%m-%Y-%H-%M-%S')
        return True

    except ValueError:
        return False


def get_date(path):
    for p in str(path).split('/'):
        if is_date(p):
            return p
    return None


def set_values_sheet(column, date, index_folder, index_mean, index_std, mean, mean_time_search_best_params, mean_time_train_valid, sheet, std, top_k, total_top_k):
    sheet['mean'].loc[index_mean, column] = round_mean(mean)
    sheet['mean'].loc[index_std, column] = plus_minus_std(std, top_k, total_top_k)
    sheet['time'].loc[index_mean, column] = round_time(mean_time_train_valid)
    sheet['time'].loc[index_std, column] = round_time(mean_time_search_best_params)
    sheet['folder'].loc[index_folder, column] = date


def fill_sheet_mean_std(classifier, color_mode, date, filename, image_size, extractor, n_features, n_patch, segmented):
    sheet_mean = pd.read_csv(filename, sep=';', index_col=0, header=None)
    mean = sheet_mean.loc['mean_f1_sum'][1]
    mean_time_search_best_params = sheet_mean.loc['mean_time_search_best_params'][1]
    mean_time_train_valid = sheet_mean.loc['mean_time_train_valid'][1]
    std = sheet_mean.loc['std_f1_sum'][1]

    sheet_mean_top_k_sum = pd.read_csv(str(filename).replace('mean.csv', 'mean_top_k/mean_top_k_sum.csv'), sep=';', index_col=0, header=0)
    sheet_info_top_k_sum = pd.read_csv(str(filename).replace('mean.csv', '0/top_k/sum/info_top_k_sum.csv'), sep=';', index_col=0, header=None)
    top_k = sheet_mean_top_k_sum.iloc[1]['top_k']
    total_top_k = sheet_info_top_k_sum.loc['total'][1]

    index_mean = extractor + '_' + n_features + '_' + 'mean'
    index_std = extractor + '_' + n_features + '_' + 'std'
    column = classifier + '_' + image_size + '_' + segmented

    if color_mode == 'grayscale':
        set_values_sheet(column, date, index_mean, index_mean, index_std, mean, mean_time_search_best_params, mean_time_train_valid, sheet_grayscale, std, top_k, total_top_k)
    else:
        set_values_sheet(column, date, index_mean, index_mean, index_std, mean, mean_time_search_best_params, mean_time_train_valid, sheet_rgb, std, top_k, total_top_k)

In [None]:
for file in sorted(list_files):
    sheet_info = pd.read_csv(str(file).replace('mean.csv', 'info.csv'), header=None, sep=';', index_col=0)

    classifier = get_classifier(file)
    color_mode = sheet_info.loc['color_mode'][1]
    image_size = sheet_info.loc['dim_image'][1]
    extractor = sheet_info.loc['extractor'][1]
    n_features = sheet_info.loc['data_n_features'][1]
    n_patch = sheet_info.loc['n_patch'][1]
    slice_patch = sheet_info.loc['slice'][1]
    segmented = 'unet' if 'unet' in str(file).lower() or 'u-net' in str(file).lower() else 'manual'
    date = get_date(file)

    fill_sheet_mean_std(classifier, color_mode, date, file, image_size, extractor, n_features, n_patch, segmented)

In [None]:
pathlib.Path('out').mkdir(exist_ok=True, parents=True)
sheet_grayscale['mean'].to_csv('out/grayscale.csv', sep=';', na_rep='', quoting=csv.QUOTE_ALL)
sheet_grayscale['mean'].to_excel('out/grayscale.xlsx', na_rep='', engine='xlsxwriter')
sheet_rgb['mean'].to_csv('out/rgb.csv', sep=';', na_rep='', quoting=csv.QUOTE_ALL)
sheet_rgb['mean'].to_excel('out/rgb.xlsx', na_rep='', engine='xlsxwriter')
sheet_rgb['time'].to_csv('out/rgb_t.csv', sep=';', na_rep='', quoting=csv.QUOTE_ALL)
sheet_rgb['time'].to_excel('out/rgb_t.xlsx', na_rep='', engine='xlsxwriter')