In [None]:
import math
import os
import pandas as pd
import pathlib
import re

In [None]:
color_mode='grayscale'
metric = 'mean_f1_sum'
std_metric = 'std_f1_sum'
list_extractor = {
    'lbp': [59],
    'surf64': [128, 256, 257],
    'surf128': [128, 256, 513],
    'mobilenetv2': [128, 256, 512, 1024, 1280],
    'resnet50v2': [128, 256, 512, 1024, 2048],
    'vgg16': [128, 256, 512]
}
list_classifier = ['DecisionTreeClassifier', 'KNeighborsClassifier', 'MLPClassifier', 'RandomForestClassifier', 'SVC']
list_dim = [256, 400, 512]
list_metrics = ['mean', 'std']
list_source = ['manual', 'unet']
list_time = ['search_best_params', 'train_valid']

path = 'resultados'

In [None]:
columns = [c + '_' + str(d) + '_' + s for c in list_classifier for s in sorted(list_source) for d in list_dim]

index = [e + '_' + str(d) + '_' + m for e in list_extractor.keys() for d in reversed(list_extractor[e]) for m in list_metrics]
index_folder = [e + '_' + str(d) for e in list_extractor.keys() for d in reversed(list_extractor[e])]
index_time = [e + '_' + str(d) + '_' + t for e in list_extractor.keys() for d in reversed(list_extractor[e]) for t in sorted(list_time, reverse=True)]

In [None]:
def new(columns, index, index_folder, index_time):
    df_folder = pd.DataFrame(columns=columns, index=index_folder)
    df_mean = pd.DataFrame(columns=columns, index=index)
    df_mean_time = pd.DataFrame(columns=columns, index=index_time)
    return df_folder, df_mean, df_mean_time

def file_is_colormode_and_slice_and_patch(color, file):
    data_info = pd.read_csv(os.path.join(str(file).replace('mean.csv', 'info.csv')), sep=";", header=None, index_col=0)
    color_mode = data_info.loc['color_mode'][1]
    dim = data_info.loc['dim_image'][1]
    n_patch = data_info.loc['n_patch'][1]
    slice = data_info.loc['slice'][1]
    return True if color_mode.lower() == color.lower() and ('SEM_RESIZE' != dim) and (slice == 'horizontal' or math.isnan(slice)) and (n_patch == '3' or math.isnan(n_patch)) else False

def save(color_mode, df_folder, df_mean, df_mean_time):
    path_outfile = 'mean'
    pathlib.Path(path_outfile).mkdir(parents=True, exist_ok=True)
    writer = pd.ExcelWriter(os.path.join(path_outfile, f'mean-{color_mode}.xlsx'), engine='xlsxwriter')
    df_folder.to_excel(writer, sheet_name='folder', na_rep='')
    df_mean.to_excel(writer, sheet_name='mean', na_rep='')
    df_mean_time.to_excel(writer, sheet_name='mean_time', na_rep='')
    writer.save()

In [None]:
df_folder, df_mean, df_mean_time = new(columns, index, index_folder, index_time)

In [None]:
import datetime

def is_date(string, fuzzy=False):
    """
    Return whether the string can be interpreted as a date.

    :param string: str, string to check for date
    :param fuzzy: bool, ignore unknown tokens in string if True
    """
    try:
        date = datetime.datetime.strptime(string, '%d-%m-%Y-%H-%M-%S')
        return True

    except ValueError:
        return False

def search_info(list_info, info):
    result = list(filter(lambda x: x.lower() in str(info).lower(), list_info))
    return None if len(result) == 0 else result[0]

def get_info(path):
    # dataset = search_info(['george', 'sp', 'specieslink'], str(path))
    # color_mode = search_info(['grayscale', 'rgb'], str(path))
    segmented = search_info(['manual', 'unet'], str(path))
    dim = search_info(['256', '400', '512'], str(path))
    extractor = search_info(['lbp', 'surf', 'mobilenetv2', 'resnet50v2', 'vgg16'], str(path))
    slice = search_info(['horizontal', 'vertical', 'h+v'], str(path))
    classifier = search_info(list_classifier, str(path))
    print(classifier, path)
    date = ''

    for p in str(path).split('/'):
        if is_date(p):
            date = p

    return date, segmented, dim, extractor, classifier, extractor

list_files = [file for file in pathlib.Path(path).rglob('mean.csv') if file.is_file()]
list_files = [file for file in list_files if file_is_colormode_and_slice_and_patch(color_mode, file)]
len(list_files)

In [None]:
for file in list_files:
    # print(file)
    filename_info = str(file).replace('mean.csv', 'info.csv')
    data_info = data = pd.read_csv(filename_info, sep=';', header=None, index_col=0).squeeze()
    n_features = data_info.loc['data_n_features']

    date, segmented, dim, extractor, classifier, extractor = get_info(file)
    print(date, segmented, dim, extractor, classifier, n_features)

    # source = source.replace('segmented_', '')
    data = pd.read_csv(file, sep=';', header=None, index_col=0).squeeze()
    print(n_features)
    index = extractor + '_' + str(n_features)
    column = classifier + '_' + dim + '_' + segmented

    print(data.loc[metric][1], data.loc[std_metric][1])
    index_mean = index + '_mean'
    index_std = index + '_std'

    print(index_mean, index_std)
    df_mean.loc[index_mean, column] = str(f'=ROUND({data.loc[metric][1]} * 100, 2)')
    df_mean.loc[index_std, column] = str(f'=\"±\"&ROUND({data.loc[std_metric][1]} * 100, 2)')
#
    df_folder.loc[index, column] = date
    # print(index, column, date)

    index_mean_time_train_valid = index + '_train_valid'
    index_mean_time_search_best_params = index + '_search_best_params'
    df_mean_time.loc[index_mean_time_train_valid, column] = str(f'=ROUND({data.loc["mean_time_train_valid"][1]}, 2)')
    df_mean_time.loc[index_mean_time_search_best_params, column] = str(f'=ROUND({data.loc["mean_time_search_best_params"][1]}, 2)')
    print(" ")
save(color_mode, df_folder, df_mean, df_mean_time)