In [1]:
### Import Packages ###
import os
import ast
import numpy as np
import math as math
import pandas as pd
import random as random
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist

### Local Packages ###
from utils.Main import *
from utils.Selector import *
from utils.Auxiliary import *
from utils.Prediction import *


In [2]:
### Get Directory ###
cwd = os.getcwd()
ParentDirectory = os.path.abspath(os.path.join(cwd, ".."))

### DataType ###
DataTypeInput = "BostonHousing"

### Parameter Vector ###
ParameterVector = pd.read_csv(os.path.join(ParentDirectory, "Data", "ParameterVectors", "ParameterVector" + DataTypeInput + ".csv"))

# Simulation

In [None]:
### Set Up ###
ErrorVecSimulation = []
HistoryVecSimulation = []

### Run Code ###
for i in range(0,13):

    ### Set Up ###
    print(i)

    ### Sequential Learning Process ###
    SimulationResults = OneIterationFunction(DataFileInput = ParameterVector.iloc[i]["Data"],
                                                    Seed = int(ParameterVector.iloc[i]["Seed"]),
                                                    TestProportion = ParameterVector.iloc[i]["TestProportion"],
                                                    CandidateProportion = ParameterVector.iloc[i]["CandidateProportion"],
                                                    SelectorType = globals().get(ParameterVector.iloc[i]["SelectorType"], None), 
                                                    ModelType = globals().get(ParameterVector.iloc[i]["ModelType"], None), 
                                                    DataArgs = ast.literal_eval(ast.literal_eval(ParameterVector.iloc[i]["DataArgs"])),
                                                    SelectorArgs = ast.literal_eval(ast.literal_eval(ParameterVector.iloc[i]["SelectorArgs"].replace("[","").replace("]",""))),
                                                    ModelArgs = ast.literal_eval(ast.literal_eval(ParameterVector.iloc[i]["ModelArgs"].replace("[","").replace("]","")))
                                                    )

    ErrorVecSimulation.append(SimulationResults["ErrorVec"])
    HistoryVecSimulation.append(SimulationResults["SelectionHistory"])

    # ErrorVec.to_csv(os.path.join(ParentDirectory, 
    #          "Results",
    #          str(DataFileInput).replace("Function", ""),
    #          "RandomForest",
    #          ) + str(ParameterVector.iloc[i]["Output"]))

0
1
2
3
4
5
6
7
8


# One Iteration

## Inputs

In [13]:
i=1
DataFileInput = ParameterVector.iloc[i]["Data"],
Seed = ParameterVector.iloc[i]["Seed"],
TestProportion = ParameterVector.iloc[i]["TestProportion"],
CandidateProportion = ParameterVector.iloc[i]["CandidateProportion"],
SelectorType = globals().get(ParameterVector.iloc[i]["SelectorType"], None), 
ModelType = globals().get(ParameterVector.iloc[i]["ModelType"], None), 
DataArgs = ast.literal_eval(ast.literal_eval(ParameterVector.iloc[i]["DataArgs"])),
SelectorArgs = ast.literal_eval(ast.literal_eval(ParameterVector.iloc[i]["SelectorArgs"].replace("[","").replace("]",""))),
ModelArgs = ast.literal_eval(ast.literal_eval(ParameterVector.iloc[i]["ModelArgs"].replace("[","").replace("]","")))


In [15]:
Seed = int(Seed[0])

## One Iteration Function

### Set Up

In [16]:
### Run Time ###
StartTime = time.time()

### Set Up ###
random.seed(Seed)
np.random.seed(Seed)
ErrorVec = []
SelectedObservationHistory = []

### Load Data

In [19]:
DataFileInput = "Auto"

In [20]:

from utils.Main import DataGeneratingProcess                             ### NOTE: Why is this not imported from utils.Main import *
### Generate Data ###
if(DataFileInput == "Simulate"):
    df = DataGeneratingProcess(**DataArgs)
else:
    df = LoadData(DataFileInput)

/Users/simondn/Documents/RashomonActiveLearning/Data/processed/Auto.pkl


In [21]:
### Directory ###
cwd = os.getcwd()
ParentDirectory = os.path.abspath(os.path.join(cwd, "../"))
CurrentDirectory = ParentDirectory                              # NOTE: FOR LOCAL SIMULATIONS
# CurrentDirectory = cwd                                        # NOTE: FOR THE CLUSTER


In [None]:
filepath = os.path.join(CurrentDirectory, "Data","processed", DataFileInput +".pkl")
filepath

'/Users/simondn/Documents/RashomonActiveLearning/Data/processed/Auto.pkl'

In [7]:
filename

'RidgeRegression.py'

In [None]:

### File Path ###
filepath = os.path.join(CurrentDirectory, "Data","processed", filename +".pkl")
print(filepath)
with open(filepath, 'rb') as file:
    data = pickle.load(file).dropna() #ERROR HERE WITH NOT FINDING PANDAS SOMETHING
return data

### Train Test Split

In [None]:

### Train Test Candidate Split
from utils.Main import TrainTestCandidateSplit                           ### NOTE: Why is this not imported from utils.Main import *
df_Train, df_Test, df_Candidate = TrainTestCandidateSplit(df, TestProportion, CandidateProportion)

### Argument Processing

In [None]:

### Selector Arguments ###
SelectorArgs["df_Train"] = df_Test
SelectorArgs["df_Candidate"] = df_Candidate
SelectorArgs["Model"] = ModelType
SelectorArgsFiltered = FilterArguments(SelectorType, SelectorArgs)

### Model Arguments ###
ModelArgs['df_Train'] = df_Train
ModelArgsFiltered = FilterArguments(ModelType, ModelArgs)


### Learning Process

In [None]:

### Learning Process ###
from utils.Main import LearningProcedure                                 ### NOTE: Why is this not imported from utils.Main import *
ErrorVec, SelectedObservationHistory = LearningProcedure(df_Train = df_Train, 
                                                            df_Test = df_Test, 
                                                            df_Candidate = df_Candidate, 
                                                            SelectorType = SelectorType, 
                                                            SelectorArgs = SelectorArgsFiltered,
                                                            ModelType = ModelType, 
                                                            ModelArgs = ModelArgsFiltered
                                                            )

### Return

In [None]:

SimulationParameters = {"DataFileInput" : str(DataFileInput),
                        "Seed" : str(Seed),
                        "TestProportion" : str(TestProportion),
                        "CandidateProportion" : str(CandidateProportion),
                        "SelectorType" : str(SelectorType),
                        "ModelType" : str(ModelType),
                        "DataArgs" : str(DataArgs),
                        # "SelectorArgs" : str(SelectorArgs),
                        "ModelArgs" : str(ModelArgsFiltered.pop('df_Train', None))
                        }

ElapsedTime = time.time() - StartTime

### Return Dictionary ###
SimulationResults = {"ErrorVec" : pd.DataFrame(ErrorVec, columns =["Error"]),
                            "SelectionHistory" : pd.DataFrame(SelectedObservationHistory, columns = ["ObservationID"]),
                            "SimulationParameters" : SimulationParameters,
                            "ElapsedTime" : ElapsedTime}