In [2]:
### Import Packages ###
import os
import ast
import numpy as np
import math as math
import pandas as pd
import random as random
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist

### Local Packages ###
from utils.Prediction import *
from utils.Selector import *
from utils.Auxiliary import *
from utils.Main import *


### Add the `treeFarms` directory to sys.path ###
import sys
from pathlib import Path
sys.path.append(str("/Users/simondn/Documents/RashomonActiveLearning/Code/treeFarms"))
from treeFarms.treefarms.model.treefarms import TREEFARMS
from utils.Prediction.TreeFARMS import TreeFarmsFunction

In [5]:
### Get Directory ###
cwd = os.getcwd()
ParentDirectory = os.path.abspath(os.path.join(cwd, ".."))

### DataType ###
DataTypeInput = "MONK2"

### Parameter Vector ###
ParameterVector = pd.read_csv(os.path.join(ParentDirectory, "Data", "ParameterVectors", "ParameterVector" + DataTypeInput + ".csv"))

# Test Learning Process

## Input

In [None]:
DataTypeInput = "MONK2"
DataFileInput = DataTypeInput
Seed = 1
TestProportion = 0.2
CandidateProportion = 0.8
SelectorType = RashomonQBCFunction
ModelType = TreeFarmsFunction
DataArgs = {}
SelectorArgs = {"TopCModels": 10}
ModelArgs = {"TopCModels": 10, "config" : {"regularization": 0.01, "rashomon_bound_multiplier": 0.03}, "Type":"Classification"}



In [None]:
### Run Time ###
StartTime = time.time()

### Set Up ###
random.seed(Seed)
np.random.seed(Seed)
ErrorVec = []
SelectedObservationHistory = []

### Generate Data ###
if(DataFileInput == "Simulate"):
    from utils.Main import DataGeneratingProcess                             ### NOTE: Why is this not imported from utils.Main import *
    df = DataGeneratingProcess(**DataArgs)
else:
    df = LoadData(DataFileInput)

### Train Test Candidate Split
from utils.Main import TrainTestCandidateSplit                           ### NOTE: Why is this not imported from utils.Main import *
df_Train, df_Test, df_Candidate = TrainTestCandidateSplit(df, TestProportion, CandidateProportion)

### Selector Arguments ###
SelectorArgs["df_Train"] = df_Test
SelectorArgs["df_Candidate"] = df_Candidate
SelectorArgs["Model"] = ModelType
# SelectorArgsFiltered = FilterArguments(SelectorType, SelectorArgs)

### Model Arguments ###
ModelArgs['df_Train'] = df_Train
# ModelArgsFiltered = FilterArguments(ModelType, ModelArgs)

In [None]:
Model = TREEFARMS({"regularization": 0.01, "rashomon_bound_multiplier": 0.03})
Model.fit(df_Train.loc[:, df_Train.columns != "Y"], df_Train["Y"])

In [7]:
### Learning Process ###
from utils.Main import LearningProcedure                                 ### NOTE: Why is this not imported from utils.Main import *
ErrorVec, SelectedObservationHistory = LearningProcedure(df_Train = df_Train[0:10], 
                                                            df_Test = df_Test[0:10], 
                                                            df_Candidate = df_Candidate, 
                                                            SelectorType = SelectorType, 
                                                            SelectorArgs = SelectorArgs,
                                                            ModelType = ModelType, 
                                                            ModelArgs = ModelArgs
                                                            )

Iteration: 0
null
Finding Optimal Objective...
treefarms reported successful execution
training completed. Number of trees in the Rashomon set: 272
Model :<treeFarms.treefarms.model.treefarms.TREEFARMS object at 0x10794a550>
{
  "false": {
    "false": {
      "complexity": 0.009999999776482582,
      "loss": 0.1556561142206192,
      "name": "Y",
      "prediction": 0
    },
    "feature": 2,
    "name": "age:<23",
    "reference": 1,
    "relation": "==",
    "true": {
      "complexity": 0.009999999776482582,
      "loss": 0.07963801175355911,
      "name": "Y",
      "prediction": 1
    },
    "type": "integral"
  },
  "feature": 11,
  "model_objective": 0.375701367855072,
  "name": "priors:>3",
  "reference": 1,
  "relation": "==",
  "true": {
    "complexity": 0.009999999776482582,
    "loss": 0.11040724068880081,
    "name": "Y",
    "prediction": 1
  },
  "type": "integral"
}
{
  "false": {
    "false": {
      "complexity": 0.009999999776482582,
      "loss": 0.155656114220619

KeyboardInterrupt: 

# Simulation

In [None]:
### Set Up ###
ErrorVecSimulation = []
HistoryVecSimulation = []

### Run Code ###
for i in range(0,13):

    ### Set Up ###
    print(i)

    ### Sequential Learning Process ###
    SimulationResults = OneIterationFunction(DataFileInput = ParameterVector.iloc[i]["Data"],
                                                    Seed = int(ParameterVector.iloc[i]["Seed"]),
                                                    TestProportion = ParameterVector.iloc[i]["TestProportion"],
                                                    CandidateProportion = ParameterVector.iloc[i]["CandidateProportion"],
                                                    SelectorType = globals().get(ParameterVector.iloc[i]["SelectorType"], None), 
                                                    ModelType = globals().get(ParameterVector.iloc[i]["ModelType"], None), 
                                                    DataArgs = ast.literal_eval(ast.literal_eval(ParameterVector.iloc[i]["DataArgs"])),
                                                    SelectorArgs = ast.literal_eval(ast.literal_eval(ParameterVector.iloc[i]["SelectorArgs"].replace("[","").replace("]",""))),
                                                    ModelArgs = ast.literal_eval(ast.literal_eval(ParameterVector.iloc[i]["ModelArgs"].replace("[","").replace("]","")))
                                                    )

    ErrorVecSimulation.append(SimulationResults["ErrorVec"])
    HistoryVecSimulation.append(SimulationResults["SelectionHistory"])

    # ErrorVec.to_csv(os.path.join(ParentDirectory, 
    #          "Results",
    #          str(DataFileInput).replace("Function", ""),
    #          "RandomForest",
    #          ) + str(ParameterVector.iloc[i]["Output"]))

0
1
2
3
4
5
6
7
8
