In [28]:
### Import libraries ###
import os
import pickle
import argparse
import numpy as np
import pandas as pd

# Arguments 

In [29]:
# Create a Namespace object to mimic argparse behavior
args = argparse.Namespace()

# Manually assign values
args.DataType = "BostonHousing"  # Replace with the actual value if known
args.ModelType = "TreeFarms"
args.Categories = "DPL0.01.pkl"

# Use the args object
print(f"DataType: {args.DataType}")
print(f"ModelType: {args.ModelType}")
print(f"Categories: {args.Categories}")

DataType: BostonHousing
ModelType: TreeFarms
Categories: DPL0.01.pkl


# Define Function

In [30]:
### Extract Error and Time Function ###
def ExtractInformation(files):

    ### Set Up ###
    ErrorVec = []
    TimeVec = []
    SelectionHistoryVec = []
    AllTreeCountVec = []
    UniqueTreeCounttVec = []
    for file in files:
        try:
            with open(file, "rb") as f:
                data = pickle.load(f)
                ErrorVec.append(data["ErrorVec"])
                TimeVec.append(data["ElapsedTime"])
                SelectionHistoryVec.append(data["SelectionHistory"])
                AllTreeCountVec.append(data["TreeCount"]["AllTreeCount"])
                UniqueTreeCounttVec.append(data["TreeCount"]["UniqueTreeCount"])
        except Exception as e:
            print(f"Error loading file {file}: {e}")
    return np.array(ErrorVec), np.array(TimeVec), np.array(SelectionHistoryVec), np.array(AllTreeCountVec), np.array(UniqueTreeCounttVec)

# Extract Data

In [31]:
### Set Up ###
cwd = '/Users/simondn/Documents/RashomonActiveLearning/'
ResultsDirectory = os.path.join(cwd, "Results", args.DataType, args.ModelType)
OutputDirectory = os.path.join(ResultsDirectory, "ProcessedResults")
RawDirectory = os.path.join(ResultsDirectory, "Raw")
Category = args.Categories

In [32]:
### Extract File Names ###
CategoryFileNames = []
for filename in os.listdir(RawDirectory):
    if filename.endswith(".pkl") and filename.endswith(Category):
        CategoryFileNames.append(os.path.join(RawDirectory, filename))

In [33]:
### Extract Data ###
if not CategoryFileNames:
    print(f"Warning: No files found for category {Category}. Exiting.")
    exit(1)
print(f"Processing category: {Category} with {len(CategoryFileNames)} files")
ErrorVec, TimeVec, SelectionHistoryVec, AllTreeCountVec, UniqueTreeCounttVec = ExtractInformation(CategoryFileNames)
ErrorMatrix = pd.DataFrame(ErrorVec.squeeze())
TimeMatrix = pd.DataFrame(TimeVec.squeeze())
SelectionHistoryVec = pd.DataFrame(SelectionHistoryVec.squeeze())
AllTreeCountVec = pd.DataFrame(AllTreeCountVec.squeeze())
UniqueTreeCounttVec = pd.DataFrame(UniqueTreeCounttVec.squeeze())


Processing category: DPL0.01.pkl with 84 files


In [34]:

### Save ###
ErrorMatrix.to_csv(os.path.join(OutputDirectory, "ErrorVec", f"{Category.replace('.pkl', '')}_ErrorMatrix.csv"), index=False)
TimeMatrix.to_csv(os.path.join(OutputDirectory, "ElapsedTime", f"{Category.replace('.pkl', '')}_TimeMatrix.csv"), index=False)
AllTreeCountVec.to_csv(os.path.join(OutputDirectory, "TreeCount", f"{Category.replace('.pkl', '')}_AllTreeCount.csv"), index=False)
UniqueTreeCounttVec.to_csv(os.path.join(OutputDirectory, "TreeCount", f"{Category.replace('.pkl', '')}_UniqueTreeCount.csv"), index=False)
SelectionHistoryVec.to_csv(os.path.join(OutputDirectory, "SelectionHistory", f"{Category.replace('.pkl', '')}_SelectionHistory.csv"), index=False)
