In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import ranksums
import seaborn as sns
from tabulate import tabulate

# Carregar o arquivo CSV
file_path = "/media/williancaddd/CODES/projects/malaria-pibiti/6_resultados/analitycs/by_test/all_results.csv"
data = pd.read_csv(file_path)

# Definir as métricas a serem analisadas
metrics_to_compare = ["val_accuracy", "val_precision", "val_specificity", "val_f1_score", "val_auc", "val_sensitivity"]

# Função para testar hipóteses considerando a média global
def test_hypothesis_with_mean(main_pd, roi, alpha=0.01):
    normalized_values = pd.DataFrame()

    # Calcular a média de cada métrica para todo o dataset
    overall_means = main_pd.groupby('network')[metrics_to_compare].mean()

    for network in main_pd['network'].unique():
        network_data = main_pd[main_pd['network'] == network]
        roi_data = network_data[network_data['dataset'] == roi]

        # Avaliar todas as métricas em comparação com a média global
        for column in metrics_to_compare:
            overall_mean = overall_means.loc[network, column]
            p_value = ranksums(roi_data[column], [overall_mean] * len(roi_data), alternative='two-sided').pvalue
            h = 0 if p_value < alpha else 1
            normalized_values = normalized_values._append({
                'network': network,
                'roi': roi,
                'column': column,
                'p_value': p_value,
                'h': h
            }, ignore_index=True)
    
    return normalized_values

# Aplicar a função para diferentes ROIs
mean_based_results = []
other_rois = data[data['dataset'] != 100.0]['dataset'].unique()

for roi in sorted(other_rois):
    result = test_hypothesis_with_mean(data, roi)
    mean_based_results.append(result)

# Concatenar todos os resultados
mean_final_results_df = pd.concat(mean_based_results, ignore_index=True)

# Filtrar os resultados que não rejeitam a hipótese nula (h == 1)
mean_non_rejected_results = mean_final_results_df[mean_final_results_df['h'] == 1]

# Identificar a última ROI que não rejeitou a hipótese nula para cada modelo e métrica
mean_reduction_summary = []

for network in mean_non_rejected_results['network'].unique():
    network_data = mean_non_rejected_results[mean_non_rejected_results['network'] == network]
    for column in mean_non_rejected_results['column'].unique():
        metric_data = network_data[network_data['column'] == column]
        if not metric_data.empty:
            last_non_rejected_roi = metric_data['roi'].max()
            mean_reduction_summary.append({
                'Model': network,
                'Metric': column,
                'Last Non-Rejected ROI': last_non_rejected_roi
            })

# Converter para DataFrame
mean_reduction_summary_df = pd.DataFrame(mean_reduction_summary)

# Analisar a redução de ROI considerando pelo menos metade das métricas preservadas
num_metrics = len(metrics_to_compare)
threshold = num_metrics // 2

# Construir a análise com o formato especificado
final_summary_with_pvalues = []

for model in mean_final_results_df['network'].unique():
    model_data = mean_final_results_df[mean_final_results_df['network'] == model]
    
    for roi in sorted(other_rois, reverse=True):
        roi_data = model_data[(model_data['roi'] == roi) & (model_data['h'] == 1)]
        
        if len(roi_data) >= threshold:
            # Extrair os p-valores das métricas preservadas
            pvalues = {metric: roi_data[roi_data['column'] == metric]['p_value'].mean() if metric in roi_data['column'].values else None
                       for metric in metrics_to_compare}
            
            # Adicionar ao resumo final com o formato solicitado
            final_summary_with_pvalues.append({
                'Model': model,
                'Max ROI Reduction': roi,
                'Metrics with Significant Performance': len(roi_data),
                **pvalues
            })
            break

# Converter para DataFrame e exibir
final_reduction_with_pvalues_df = pd.DataFrame(final_summary_with_pvalues)

# Exibir o resumo final
print(tabulate(final_reduction_with_pvalues_df, headers='keys', tablefmt='psql'))


+----+-------------------+---------------------+----------------------------------------+----------------+-----------------+-------------------+----------------+-------------+-------------------+
|    | Model             |   Max ROI Reduction |   Metrics with Significant Performance |   val_accuracy |   val_precision |   val_specificity |   val_f1_score |     val_auc |   val_sensitivity |
|----+-------------------+---------------------+----------------------------------------+----------------+-----------------+-------------------+----------------+-------------+-------------------|
|  0 | DenseNet201       |                  35 |                                      5 |      0.62507   |       0.221823  |         0.142639  |      0.221823  | nan         |         0.328393  |
|  1 | MobileNetV2       |                  40 |                                      5 |      0.463548  |       0.221823  |         0.142639  |      0.221823  | nan         |         0.0278744 |
|  2 | InceptionV3  