# Packages

In [1]:
import os
import itertools
import numpy as np
import math as math
import pandas as pd 
import random as random
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon
from utils.Auxiliary import *

# Input


In [2]:
### Set Up ###
BaseDirectory = "/Users/simondn/Documents/RashomonActiveLearning/Results/"

### Monk1 ###
PassiveLearning_Monk1 = LoadAnalyzedData("MONK1", BaseDirectory, "RandomForestClassification", "PLA0")
RandomForest_Monk1 = LoadAnalyzedData("MONK1", BaseDirectory, "RandomForestClassification", "RFA0")
AnalyzedDataUNREALDUREAL_Monk1 = LoadAnalyzedData("MONK1", BaseDirectory, "TreeFarms", 0.03)

### Monk3 ###
PassiveLearning_Monk3 = LoadAnalyzedData("MONK3", BaseDirectory, "RandomForestClassification", "PLA0")
RandomForest_Monk3 = LoadAnalyzedData("MONK3", BaseDirectory, "RandomForestClassification", "RFA0")
AnalyzedDataUNREALDUREAL_Monk3 = LoadAnalyzedData("MONK3", BaseDirectory, "TreeFarms",  0.019)

### Iris ###
PassiveLearning_Iris = LoadAnalyzedData("Iris", BaseDirectory, "RandomForestClassification", "PLA0")
RandomForest_Iris = LoadAnalyzedData("Iris", BaseDirectory, "RandomForestClassification", "RFA0")
AnalyzedDataUNREALDUREAL_Iris = LoadAnalyzedData("Iris", BaseDirectory, "TreeFarms", 0.025)

### Bar7 ###
PassiveLearning_Bar7 = LoadAnalyzedData("Bar7", BaseDirectory, "RandomForestClassification", "PLA0")
RandomForest_Bar7 = LoadAnalyzedData("Bar7", BaseDirectory, "RandomForestClassification", "RFA0")
AnalyzedDataUNREALDUREAL_Bar7 = LoadAnalyzedData("Bar7", BaseDirectory, "TreeFarms", 0.02)

# ### BreastCancer ###
# PassiveLearning_BreastCancer = LoadAnalyzedData("BreastCancer", BaseDirectory, "RandomForestClassification", "PLA0")
# RandomForest_BreastCancer = LoadAnalyzedData("BreastCancer", BaseDirectory, "RandomForestClassification", "RFA0")
# AnalyzedDataUNREALDUREAL_BreastCancer = LoadAnalyzedData("BreastCancer", BaseDirectory, "TreeFarms", 0.03)

# ### COMPAS ###
# PassiveLearning_COMPAS = LoadAnalyzedData("COMPAS", BaseDirectory, "RandomForestClassification", "PLA0")
# RandomForest_COMPAS = LoadAnalyzedData("COMPAS", BaseDirectory, "RandomForestClassification", "RFA0")
# AnalyzedDataUNREALDUREAL_COMPAS = LoadAnalyzedData("COMPAS", BaseDirectory, "TreeFarms", 0.03)

# Stopping Criteria

In [3]:
# Analysis parameters
params = {
    'window_size': 3,
    'performance_threshold': 0.01,
    'plateau_count': 3,
    'initial_data_percentage': 20.0
}

In [4]:

# Example usage:
# Analyze all datasets
datasets_info = [
    {
        'name': 'MONK1',
        'passive': PassiveLearning_Monk1,
        'random_forest': RandomForest_Monk1,
        'unreal_dureal': AnalyzedDataUNREALDUREAL_Monk1
    },
    {
        'name': 'MONK3',
        'passive': PassiveLearning_Monk3,
        'random_forest': RandomForest_Monk3,
        'unreal_dureal': AnalyzedDataUNREALDUREAL_Monk3
    },
    {
        'name': 'Iris',
        'passive': PassiveLearning_Iris,
        'random_forest': RandomForest_Iris,
        'unreal_dureal': AnalyzedDataUNREALDUREAL_Iris
    },
    {
        'name': 'Bar7',
        'passive': PassiveLearning_Bar7,
        'random_forest': RandomForest_Bar7,
        'unreal_dureal': AnalyzedDataUNREALDUREAL_Bar7
    }
]

# Analyze all datasets
all_results = []
for dataset in datasets_info:
    results = analyze_dataset_methods(
        dataset_name=dataset['name'],
        passive_data=dataset['passive'],
        random_forest_data=dataset['random_forest'],
        unreal_dureal_data=dataset['unreal_dureal'],
        **params
    )
    all_results.append(results)

# Combine all results
final_results = pd.concat(all_results, ignore_index=True)

# Create and print both DataFrame and LaTeX table
print("\nResults as DataFrame:")
print(final_results)

print("\nLaTeX table:")
print(create_latex_table(final_results))


Analysis with parameters:
Window size: 3
Performance threshold: 0.01
Plateau count: 3
--------------------------------------------------
Recommended stopping at iteration: 13
Final mean F1 score: 0.7712
Percentage of data used: 37.5%

Analysis with parameters:
Window size: 3
Performance threshold: 0.01
Plateau count: 3
--------------------------------------------------
Recommended stopping at iteration: 17
Final mean F1 score: 0.8119
Percentage of data used: 42.5%

Analysis with parameters:
Window size: 3
Performance threshold: 0.01
Plateau count: 3
--------------------------------------------------
Recommended stopping at iteration: 16
Final mean F1 score: 0.9677
Percentage of data used: 41.2%

Analysis with parameters:
Window size: 3
Performance threshold: 0.01
Plateau count: 3
--------------------------------------------------
Recommended stopping at iteration: 17
Final mean F1 score: 0.9697
Percentage of data used: 42.5%

Analysis with parameters:
Window size: 3
Performance thresh

  start_performance = window[0]
  end_performance = window[-1]


In [5]:
final_results.loc[0:3]

Unnamed: 0,Dataset,Method,Stopping Iteration,Final F1,Data_Usage_Percent (math wrong)
0,MONK1,PassiveLearning,13,0.7712,37.5
1,MONK1,RandomForest,17,0.811867,42.5
2,MONK1,UNREAL,16,0.967733,41.25
3,MONK1,DUREAL,17,0.969733,42.5


In [6]:
final_results.loc[0+4:3+4]

Unnamed: 0,Dataset,Method,Stopping Iteration,Final F1,Data_Usage_Percent (math wrong)
4,MONK3,PassiveLearning,26,0.7264,54.615385
5,MONK3,RandomForest,25,0.766267,53.333333
6,MONK3,UNREAL,38,0.875867,70.0
7,MONK3,DUREAL,31,0.852,61.025641


In [7]:
final_results.loc[0+2*4:3+2*4]

Unnamed: 0,Dataset,Method,Stopping Iteration,Final F1,Data_Usage_Percent (math wrong)
8,Iris,PassiveLearning,12,0.958889,33.541667
9,Iris,RandomForest,12,0.972222,33.541667
10,Iris,UNREAL,12,0.973333,33.541667
11,Iris,DUREAL,12,0.974111,33.541667


In [8]:
final_results.loc[0+3*4:3+3*4]

Unnamed: 0,Dataset,Method,Stopping Iteration,Final F1,Data_Usage_Percent (math wrong)
12,Bar7,PassiveLearning,12,0.596667,30.15625
13,Bar7,RandomForest,12,0.648333,30.15625
14,Bar7,UNREAL,12,0.639444,30.15625
15,Bar7,DUREAL,12,0.632778,30.15625


In [9]:
# # Start LaTeX table
# latex_str = """
# \\begin{table}[!htp]
#     \\centering
#     \\begin{tabular}{l c c}
#         \\toprule
#         Method & Stopping Iteration & Final F1\\\\
#         \\midrule
# """

# # Group by dataset and format each section
# for dataset, group in final_results.groupby("Dataset"):
#     latex_str += f"        \\\multicolumn{{3}}{{l}}{{\\textbf{{{dataset}}}}} \\\\\n"
#     for _, row in group.iterrows():
#         latex_str += f"        {row['Method']} & {row['Stopping Iteration']} & {row['Final F1']:.5f} \\\\\n"
#     latex_str += "        \midrule\n"

# # Remove last \midrule and close table
# latex_str = latex_str.rsplit("        \\midrule\n", 1)[0]
# latex_str += """
#         \\bottomrule
#     \end{tabular}
# """
# import pandas as pd

# # Assuming final_results is already defined
# datasets = ["MONK1", "MONK3", "Iris", "Bar7"]

# latex_table = []
# latex_table.append("\\begin{table}[!htp]")
# latex_table.append("    \\centering")
# latex_table.append("    \\begin{tabular}{l c c}")
# latex_table.append("        \\toprule")
# latex_table.append("        Method & Stopping Iteration & Final F1\\\\")
# latex_table.append("        \\midrule")

# for dataset in datasets:
#     latex_table.append(f"        \\multicolumn{{3}}{{l}}{{\\textbf{{{dataset}}}}} \\\\")
    
#     subset = final_results[final_results["Dataset"] == dataset]
#     for _, row in subset.iterrows():
#         method = row["Method"]
#         stop_iter = row["Stopping Iteration"]
#         final_f1 = f"{row['Final F1']:.5f}"  # Formatting F1 score to 5 decimal places
#         latex_table.append(f"        {method} & {stop_iter} & {final_f1} \\\\")
    
#     latex_table.append("        \\midrule")

# # Remove last \midrule and close table
# latex_table.pop()  # Remove last midrule
# latex_table.append("        \\bottomrule")
# latex_table.append("    \\end{tabular}")
# latex_table.append("    \\caption{Comparison of active learning methods based on stopping iteration and final F1 score.}")
# latex_table.append("    \\label{tab:ActiveLearningComparison}")
# latex_table.append("\\end{table}")

# # Print or save to file
# latex_output = "\n".join(latex_table)
# print(latex_output)
# """"
#     \caption{Comparison of active learning methods based on stopping iteration and final F1 score.}
#     \label{tab:ActiveLearningComparison}
# \end{table}
# """
