In [1]:
### Import libraries ###
import os
import pickle
import argparse
import numpy as np
import pandas as pd

In [2]:
### Extract Error and Time Function ###
def ExtractErrorAndTime(files):
    ErrorVec = []
    TimeVec = []
    for file in files:
        try:
            with open(file, "rb") as f:
                data = pickle.load(f)
                ErrorVec.append(data["ErrorVec"])
                TimeVec.append(data["ElapsedTime"])
        except Exception as e:
            print(f"Error loading file {file}: {e}")
    return np.array(ErrorVec), np.array(TimeVec)

In [3]:
# Simulate argparse args object
class Args:
    def __init__(self, DataType, ModelType, Categories):
        self.DataType = DataType
        self.ModelType = ModelType
        self.Categories = Categories

# Manually setting up args
args = Args(
    DataType="BostonHousing",
    ModelType="TreeFarms",
    Categories= "MTTreeFarms_UEI0_NE100_Reg0.01_RBA0.01.pkl"
)

# Parsing arguments
data_type = args.DataType
model_type = args.ModelType
categories = args.Categories

# Display arguments (for testing)
print("DataType:", data_type)
print("ModelType:", model_type)
print("Categories:", categories)

# Add the main script logic below using `data_type`, `model_type`, and `categories`.


DataType: BostonHousing
ModelType: TreeFarms
Categories: MTTreeFarms_UEI0_NE100_Reg0.01_RBA0.01.pkl


In [4]:
args.Categories

'MTTreeFarms_UEI0_NE100_Reg0.01_RBA0.01.pkl'

In [5]:
### Set Up ###
cwd = os.getcwd()
ResultsDirectory = os.path.join(cwd, "Results", args.DataType, args.ModelType)
OutputDirectory = os.path.join(ResultsDirectory, "ProcessedResults")
# RawDirectory = os.path.join(ResultsDirectory, "Raw")
RawDirectory = "/Users/simondn/Documents/RashomonActiveLearning/Results/BostonHousing/TreeFarms/Raw"
Category = args.Categories


In [6]:
### Extract File Names ###
CategoryFileNames = []
for filename in os.listdir(RawDirectory):
    if filename.endswith(".pkl") and filename.endswith(Category):
        CategoryFileNames.append(os.path.join(RawDirectory, filename))

In [7]:
# ### Extract Data ###
if not CategoryFileNames:
    print(f"Warning: No files found for category {Category}. Exiting.")
    exit(1)
print(f"Processing category: {Category} with {len(CategoryFileNames)} files")
ErrorVec, TimeVec = ExtractErrorAndTime(CategoryFileNames)
ErrorMatrix = pd.DataFrame(ErrorVec.squeeze())
TimeMatrix = pd.DataFrame(TimeVec.squeeze())

### Save ###
# ErrorMatrix.to_csv(os.path.join(OutputDirectory, f"{Category.replace('.pkl', '')}_ErrorMatrix.csv"), index=False)
# TimeMatrix.to_csv(os.path.join(OutputDirectory, f"{Category.replace('.pkl', '')}_TimeMatrix.csv"), index=False)

Processing category: MTTreeFarms_UEI0_NE100_Reg0.01_RBA0.01.pkl with 3 files


In [8]:
ErrorMatrix[14]

0                 0.078431
1    [0.17647058823529416]
2                 0.117647
Name: 14, dtype: object

In [9]:
Sim0 = "/Users/simondn/Documents/RashomonActiveLearning/Results/BostonHousing/TreeFarms/Raw/Seed0_DataBostonHousing_TP0.2_CP0.8_STTreeEnsembleQBC_MTTreeFarms_UEI0_NE100_Reg0.01_RBA0.01.pkl"
Sim1 = "/Users/simondn/Documents/RashomonActiveLearning/Results/BostonHousing/TreeFarms/Raw/Seed1_DataBostonHousing_TP0.2_CP0.8_STTreeEnsembleQBC_MTTreeFarms_UEI0_NE100_Reg0.01_RBA0.01.pkl"
Sim2 = "/Users/simondn/Documents/RashomonActiveLearning/Results/BostonHousing/TreeFarms/Raw/Seed2_DataBostonHousing_TP0.2_CP0.8_STTreeEnsembleQBC_MTTreeFarms_UEI0_NE100_Reg0.01_RBA0.01.pkl"


In [10]:
with open(Sim1, "rb") as f:
    data = pickle.load(f)
    data1 = data["ErrorVec"]


In [13]:
data1.head(20)

Unnamed: 0,Error
0,0.156863
1,0.156863
2,0.156863
3,0.156863
4,0.156863
5,0.156863
6,0.156863
7,0.156863
8,0.156863
9,0.156863


In [11]:
data["SimulationParameters"]

{'DataFileInput': 'BostonHousing',
 'Seed': '1',
 'TestProportion': '0.2',
 'CandidateProportion': '0.8',
 'SelectorType': 'TreeEnsembleQBCFunction',
 'ModelType': 'TreeFarmsFunction',
 'UniqueErrorsInput': '0',
 'n_estimators': '100',
 'regularization': '0.01',
 'rashomon_bound_adder': '0.01',
 'Type': 'Classification'}