In [23]:
import math
import os
import pandas as pd
import pathlib
import re

In [24]:
list_extractor = {
    'lbp': [59],
    'surf64': [128, 256, 257],
    'surf128': [128, 256, 513],
    'mobilenetv2': [128, 256, 512, 1024, 1280],
    'resnet50v2': [128, 256, 512, 1024, 2048],
    'vgg16': [128, 256, 512]
}
list_classifier = ['DecisionTreeClassifier', 'KNeighborsClassifier', 'MLPClassifier', 'RandomForestClassifier', 'SVC']
list_dim = [256, 400, 512]
list_metrics = ['mean', 'std']
list_source = ['manual', 'unet']
list_time = ['search_best_params', 'train_valid']

path = 'resultados'

In [25]:
columns = [c + '_' + str(d) + '_' + s for c in list_classifier for s in sorted(list_source) for d in list_dim]

index = [e + '_' + str(d) + '_' + m for e in list_extractor.keys() for d in reversed(list_extractor[e]) for m in list_metrics]
index_folder = [e + '_' + str(d) for e in list_extractor.keys() for d in reversed(list_extractor[e])]
index_time = [e + '_' + str(d) + '_' + t for e in list_extractor.keys() for d in reversed(list_extractor[e]) for t in sorted(list_time, reverse=True)]

In [26]:
def new(columns, index, index_folder, index_time):
    df_folder = pd.DataFrame(columns=columns, index=index_folder)
    df_mean = pd.DataFrame(columns=columns, index=index)
    df_mean_time = pd.DataFrame(columns=columns, index=index_time)
    return df_folder, df_mean, df_mean_time

def file_is_colormode_and_slice_and_patch(color, file):
    data_info = pd.read_csv(os.path.join(str(file).replace('mean.csv', 'info.csv')), sep=";", header=None, index_col=0)
    color_mode = data_info.loc['color_mode'][1]
    dim = data_info.loc['dim_image'][1]
    n_patch = data_info.loc['n_patch'][1]
    slice = data_info.loc['slice'][1]
    return True if color_mode.lower() == color.lower() and ('SEM_RESIZE' != dim) and (slice == 'horizontal' or math.isnan(slice)) and (n_patch == '3' or math.isnan(n_patch)) else False

def save(color_mode, df_folder, df_mean, df_mean_time):
    path_outfile = 'mean'
    pathlib.Path(path_outfile).mkdir(parents=True, exist_ok=True)
    writer = pd.ExcelWriter(os.path.join(path_outfile, f'mean-{color_mode}.xlsx'), engine='xlsxwriter')
    df_folder.to_excel(writer, sheet_name='folder', na_rep='')
    df_mean.to_excel(writer, sheet_name='mean', na_rep='')
    df_mean_time.to_excel(writer, sheet_name='mean_time', na_rep='')
    writer.save()

In [27]:
df_folder, df_mean, df_mean_time = new(columns, index, index_folder, index_time)

In [28]:
color_mode="rgb"
list_files = [p for p in pathlib.Path(path).rglob('mean.csv') if p.is_file()]
list_files = [file for file in list_files if file_is_colormode_and_slice_and_patch(color_mode, file)]
for file in list_files:
    print(file)

    if len(re.split('/', str(file))) == 10:
        _, date, _, source, dim_image, extractor, classifier, _, n_features, _ = re.split('/', str(file))
    else:
        _, date, _, source, _, dim_image, extractor, classifier, _, n_features, _ = re.split('/', str(file))
    print(date, source, _, dim_image, extractor, classifier, n_features)
    source = source.replace('segmented_', '')
    data = pd.read_csv(file, sep=';', header=None, index_col=0).squeeze()
    index = extractor + '_' + str(n_features)
    column = classifier + '_' + dim_image + '_' + source

    print(data.loc['mean_sum'][1], data.loc['std_sum'][1])
    index_mean = index + '_mean'
    index_std = index + '_std'
    print(index_mean, index_std)
    df_mean.loc[index_mean, column] = str(f'=ROUND({data.loc["mean_sum"][1]} * 100, 2)')
    df_mean.loc[index_std, column] = str(f'=\"±\"&ROUND({data.loc["std_sum"][1]} * 100, 2)')
#
    df_folder.loc[index, column] = date
    print(index, column, date)

    index_mean_time_train_valid = index + '_train_valid'
    index_mean_time_search_best_params = index + '_search_best_params'
    df_mean_time.loc[index_mean_time_train_valid, column] = str(f'=ROUND({data.loc["mean_time_train_valid"][1]}, 2)')
    df_mean_time.loc[index_mean_time_search_best_params, column] = str(f'=ROUND({data.loc["mean_time_search_best_params"][1]}, 2)')
    print(" ")
save(color_mode, df_folder, df_mean, df_mean_time)

resultados/13-10-2022-19-06-08/imagens_sp/segmented_manual/RGB/400/resnet50v2/KNeighborsClassifier/patch=3/1024/mean.csv
13-10-2022-19-06-08 segmented_manual mean.csv 400 resnet50v2 KNeighborsClassifier 1024
0.6853333333333333 0.038273866918419504
resnet50v2_1024_mean resnet50v2_1024_std
resnet50v2_1024 KNeighborsClassifier_400_manual 13-10-2022-19-06-08
 
resultados/13-10-2022-19-06-08/imagens_sp/segmented_manual/RGB/400/resnet50v2/KNeighborsClassifier/patch=3/128/mean.csv
13-10-2022-19-06-08 segmented_manual mean.csv 400 resnet50v2 KNeighborsClassifier 128
0.752 0.045879068090893996
resnet50v2_128_mean resnet50v2_128_std
resnet50v2_128 KNeighborsClassifier_400_manual 13-10-2022-19-06-08
 
resultados/13-10-2022-19-06-08/imagens_sp/segmented_manual/RGB/400/resnet50v2/KNeighborsClassifier/patch=3/512/mean.csv
13-10-2022-19-06-08 segmented_manual mean.csv 400 resnet50v2 KNeighborsClassifier 512
0.7066666666666667 0.05962847939999442
resnet50v2_512_mean resnet50v2_512_std
resnet50v2_512 K