In [1]:
### Import packages ###
import numpy as np
import math as math
import pandas as pd
import random as random
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist

### Import functions ###
from utils.Main import *
from utils.Selector import *
from utils.Prediction import *

# Import Functions

## Input 

In [2]:
import inspect

def FilterArguments(Func, ArgumentDictionary):

    ### Set Up ###
    Signature = inspect.signature(Func)    
    FilteredArguments = {}
    
    ### Filter Arguments ###
    for ParameterName, _ in Signature.parameters.items():
        if ParameterName in ArgumentDictionary:
            FilteredArguments[ParameterName] = ArgumentDictionary[ParameterName]
    
    ### Return ###
    return FilteredArguments

In [3]:
def OneIterationFunction(DataFileInput,
                         Seed,
                         TestProportion,
                         CandidateProportion,
                         SelectorType, 
                         ModelType, 
                         DataArgs,
                         SelectorArgs,
                         ModelArgs):

    ### Set Up ###
    random.seed(Seed)
    np.random.seed(Seed)
    ErrorVec = []
    SelectedObservationHistory = []

    ### Generate Data ###
    if(DataFileInput == "Simulate"):
        df = DataGeneratingProcess(**DataArgs)
    else:
        df = DataFileInput

    ### Train Test Candidate Split
    df_Train, df_Test, df_Candidate = TrainTestCandidateSplit(df, TestProportion, CandidateProportion)


    ### Selector Arguments ###
    SelectorArgs["df_Train"] = df_Test
    SelectorArgs["df_Candidate"] = df_Candidate
    SelectorArgs["Model"] = ModelType
    SelectorArgsFiltered = FilterArguments(SelectorType, SelectorArgs)

    ### Model Arguments ###
    ModelArgs = {'df_Train' : df_Train}
    ModelArgsFiltered = FilterArguments(ModelType, ModelArgs)
    
    ### Learning Process ###
    ErrorVec, SelectedObservationHistory = LearningProcedure(df_Train = df_Train, 
                                                                df_Test = df_Test, 
                                                                df_Candidate = df_Candidate, 
                                                                SelectorType = SelectorType, 
                                                                SelectorArgs = SelectorArgsFiltered,
                                                                ModelType = ModelType, 
                                                                ModelArgs = ModelArgsFiltered
                                                                )
    return ErrorVec, SelectedObservationHistory

In [7]:
OneIterationFunction(DataFileInput = "Simulate",
                     Seed = 1,
                     TestProportion = 0.2,
                     CandidateProportion = 0.8,
                     SelectorType = PassiveLearning, 
                     ModelType = LinearRegressionFunction, 
                     DataArgs = {"N": 1000, "K": 2},
                     SelectorArgs = {'distance' : 'euclidean'},
                     ModelArgs = {})

([1.9220321300813552,
  1.9208721965927997,
  1.9168842598130442,
  1.9094635545954872,
  1.9106644650410363,
  1.9129314670590487,
  1.9154898568889296,
  1.9151777169831155,
  1.9151412664181684,
  1.916432081758606,
  1.9157947809678892,
  1.9119556220050193,
  1.9126892634417565,
  1.9135917782545042,
  1.9149246362363967,
  1.9197709112716677,
  1.918956655138399,
  1.9209323886357617,
  1.9191414051525981,
  1.9224148551748386,
  1.9235830066445094,
  1.9246666816773113,
  1.92063590647547,
  1.920886143743761,
  1.9201021646915737,
  1.921561458635481,
  1.9190538517904847,
  1.9175375922808804,
  1.9206458890636202,
  1.920871080414846,
  1.9219451209047855,
  1.9209898730724317,
  1.924537149105842,
  1.9238036299373695,
  1.9244172798739183,
  1.9261422198318727,
  1.926550606693545,
  1.9275355069929998,
  1.9259987238152652,
  1.925754679696044,
  1.9260928657837075,
  1.92666851963204,
  1.927159962386956,
  1.9254164996312075,
  1.9230129612838527,
  1.9220822311298387,
 

## Set Up

In [None]:
# ### Input ###
# SimulationN = 2
# N= 1000
# K = 3
# TestProportion = 0.2
# CandidateProportion = 0.8
# SimulationErrorResultsPassive = []
# SimulationErrorResultsGSx = []
# SimulationErrorResultsGSy_Test = []
# SimulationErrorResultsGSy = []
# SimulationErrorResultsiGS = []
# SimulationErrorResultsiGS_Test = []

## Simulation

In [None]:
# for seed in range(0,SimulationN):

#     print("Simulation iter: ", seed)

#     ### Set Seed ###
#     random.seed(seed)
#     np.random.seed(seed)

#     ### Set Up ###
#     ErrorVec = []
#     SelectedObservationHistory = []

#     ### Generate Data ###
#     df = DataGeneratingProcess(N,K)
#     df_Train, df_Test, df_Candidate = TrainTestCandidateSplit(df, TestProportion, CandidateProportion)

#     ### Learning Algorithm ###

#     ## Passive ##
#     ErrorVecPassive, SelectedObservationHistoryPassive = RunLearningProcedure(df_Train = df_Train, 
#                      df_Test = df_Test, 
#                      df_Candidate = df_Candidate, 
#                      SelectorType = PassiveLearning, 
#                      SelectorArgs = {'df_Candidate' : df_Candidate},
#                      ModelType = LinearRegressionFunction, 
#                      ModelArgs = {'df_Train' : df_Train}
#                      )
    
#     ## GSx ##
#     ErrorVecGSx, SelectedObservationHistoryGSx = RunLearningProcedure(df_Train = df_Train, 
#                      df_Test = df_Test, 
#                      df_Candidate = df_Candidate, 
#                      SelectorType = GSxFunction, 
#                      SelectorArgs = {'df_Train' : df_Train,
#                                       'df_Candidate' : df_Candidate,
#                                       'distance' : 'euclidean'},
#                      ModelType = LinearRegressionFunction, 
#                      ModelArgs = {'df_Train' : df_Train}
#                      )
    
#     ## GSy - Training ##
#     ErrorVecGSy, SelectedObservationHistoryGSy = RunLearningProcedure(df_Train = df_Train, 
#                      df_Test = df_Test, 
#                      df_Candidate = df_Candidate, 
#                      SelectorType = GSyFunction, 
#                      SelectorArgs = {'df_Train' : df_Train,                  #  NOTE: or should this be df_Test
#                                       'df_Candidate' : df_Candidate,
#                                       'Model': None,
#                                       'distance' : 'euclidean'},
#                      ModelType = LinearRegressionFunction, 
#                      ModelArgs = {'df_Train' : df_Train}
#                      )
    
#     ## GSy - Test ##
#     ErrorVecGSy_Test, SelectedObservationHistoryGSy_Test = RunLearningProcedure(df_Train = df_Train, 
#                      df_Test = df_Test, 
#                      df_Candidate = df_Candidate, 
#                      SelectorType = GSyFunction, 
#                      SelectorArgs = {'df_Train' : df_Test,                  #  NOTE: or should this be df_Test
#                                       'df_Candidate' : df_Candidate,
#                                       'Model': None,
#                                       'distance' : 'euclidean'},
#                      ModelType = LinearRegressionFunction, 
#                      ModelArgs = {'df_Train' : df_Train}
#                      )
    
#     ## iGS ##
#     ErrorVeciGS, SelectedObservationHistoryiGS = RunLearningProcedure(df_Train = df_Train, 
#                      df_Test = df_Test, 
#                      df_Candidate = df_Candidate, 
#                      SelectorType = iGSFunction, 
#                      SelectorArgs = {'df_Train' : df_Train,                  #  NOTE: or should this be df_Test
#                                       'df_Candidate' : df_Candidate,
#                                       'Model': None,
#                                       'distance' : 'euclidean'},
#                      ModelType = LinearRegressionFunction, 
#                      ModelArgs = {'df_Train' : df_Train}
#                      )
    
#     ## iGS - Test ##
#     ErrorVeciGS_Test, SelectedObservationHistoryiGS_Test = RunLearningProcedure(df_Train = df_Train, 
#                      df_Test = df_Test, 
#                      df_Candidate = df_Candidate, 
#                      SelectorType = iGSFunction, 
#                      SelectorArgs = {'df_Train' : df_Test,                  #  NOTE: or should this be df_Test
#                                       'df_Candidate' : df_Candidate,
#                                       'Model': None,
#                                       'distance' : 'euclidean'},
#                      ModelType = LinearRegressionFunction, 
#                      ModelArgs = {'df_Train' : df_Train}
#                      )
    
#     ### Save ###
#     SimulationErrorResultsPassive.append(ErrorVecPassive)
#     SimulationErrorResultsGSx.append(ErrorVecGSx)
#     SimulationErrorResultsGSy.append(ErrorVecGSy)
#     SimulationErrorResultsGSy_Test.append(ErrorVecGSy_Test)
#     SimulationErrorResultsiGS.append(ErrorVeciGS)
#     SimulationErrorResultsiGS_Test.append(ErrorVeciGS_Test)


Simulation iter:  0
Simulation iter:  1


In [None]:
# np.savetxt("/Users/simondn/Documents/RashomonActiveLearning/PythonResults/Passive.csv", 
#               SimulationErrorResultsPassive, 
#               delimiter=",")
# np.savetxt("/Users/simondn/Documents/RashomonActiveLearning/PythonResults/GSx.csv", 
#               SimulationErrorResultsGSx, 
#               delimiter=",")
# np.savetxt("/Users/simondn/Documents/RashomonActiveLearning/PythonResults/GSy.csv", 
#               SimulationErrorResultsGSy,
#               delimiter=",")
# np.savetxt("/Users/simondn/Documents/RashomonActiveLearning/PythonResults/GSy_Test.csv", 
#               SimulationErrorResultsGSy_Test, 
#               delimiter=",")
# np.savetxt("/Users/simondn/Documents/RashomonActiveLearning/PythonResults/iGS.csv", 
#               SimulationErrorResultsiGS, 
#               delimiter=",")
# np.savetxt("/Users/simondn/Documents/RashomonActiveLearning/PythonResults/iGS_Test.csv", 
#               SimulationErrorResultsiGS_Test, 
#               delimiter=",")