In [1]:
### Import packages ###
import time
import numpy as np
import math as math
import pandas as pd
import random as random

### Append Path ###
import sys
sys.path.append('..')

### Import functions ###
from utils.Main import *
from utils.Selector import *
from utils.Auxiliary import *
from utils.Prediction import *

  from .autonotebook import tqdm as notebook_tqdm


# Inputs

In [2]:
### Get Directory ###
cwd = os.getcwd()
ParentDirectory = os.path.abspath(os.path.join(cwd, "..",".."))
DataDirectory = os.path.join(ParentDirectory,"Data","processed")

### Get Data Files ###
data_list = [f for f in os.listdir(DataDirectory) if f.endswith('.pkl')]
data_list = [s.rstrip(".pkl") for s in data_list] 
data_list


['concrete_cs',
 'wine_red',
 'dgp_two_regime',
 'bodyfat',
 'dgp_misspecified',
 'concrete_4',
 'beer',
 'qsar',
 'dgp_low_noise',
 'mpg',
 'housing',
 'concrete_flow',
 'cps_wage',
 'yacht',
 'dgp_correct',
 'wine_white',
 'concrete_slum']

In [3]:
### DataType ###
METRIC_TO_PLOT = 'RMSE' # RMSE, CC, R2, MAE
DataFileInput = "dgp_low_noise"
NSim = 5

# Simulation

In [4]:
all_results = RunSimulationFunction(DataFileInput = DataFileInput,
                                    Seed = 1,
                                    machine_learning_model = 'LinearRegressionPredictor',
                                    test_proportion = 0.2,
                                    candidate_proportion = 0.8)

In [8]:
all_results

{'Passive Learning': {'ErrorVec':          RMSE       MAE        R2       CC
  0    0.271191  0.182876  0.391446  0.65377
  1    0.271786  0.182604  0.388777  0.65377
  2    0.271807  0.182601  0.388678  0.65377
  3    0.271743  0.182525  0.388967  0.65377
  4    0.271976  0.182491  0.387922  0.65377
  ..        ...       ...       ...      ...
  315  0.263188  0.190176  0.426836  0.65377
  316  0.263149  0.190132  0.427005  0.65377
  317  0.263145  0.190137  0.427023  0.65377
  318  0.263139  0.190136  0.427047  0.65377
  319  0.263136  0.190106  0.427061  0.65377
  
  [320 rows x 4 columns],
  'SelectionHistory': [[143],
   [180],
   [426],
   [210],
   [268],
   [466],
   [457],
   [408],
   [150],
   [52],
   [287],
   [185],
   [474],
   [303],
   [336],
   [30],
   [105],
   [172],
   [54],
   [377],
   [188],
   [19],
   [23],
   [440],
   [327],
   [399],
   [7],
   [396],
   [267],
   [321],
   [156],
   [432],
   [282],
   [410],
   [484],
   [232],
   [297],
   [24],
   [413

# Simulation Results

## Graph Parameters

In [5]:
### Labels ###
Y_Label = "RMSE"
PlotSubtitle = "Active Learning Performance Comparison on MPG Dataset"


### Colors ###
colors = {
    # Original methods
    'Passive Learning': 'gray',
    'GSx': 'cornflowerblue',
    'GSy': 'salmon',
    'iGS': 'red',

    # WiGS Static methods
    'WiGS (Static w_x=0.75)': 'lightgreen',
    'WiGS (Static w_x=0.5)': 'forestgreen',
    'WiGS (Static w_x=0.25)': 'darkgreen',

    # WiGS Time-Decay methods
    'WiGS (Time-Decay, Linear)': 'orange',
    'WiGS (Time-Decay, Exponential)': 'saddlebrown',

    # MAB 
    'WiGS (MAB-UCB1, c=0.5)': 'orchid',
    'WiGS (MAB-UCB1, c=2.0)': 'darkviolet',
    'WiGS (MAB-UCB1, c=5.0)': 'indigo',
}

### Line styles ###
linestyles = {
    'Passive Learning': ':',
    'GSx': ':',
    'GSy': ':',
    'iGS': '-',
    'WiGS (Static w_x=0.75)': '-',
    'WiGS (Static w_x=0.5)': '-.',
    'WiGS (Static w_x=0.25)': '--',
    'WiGS (Time-Decay, Linear)': '-',
    'WiGS (Time-Decay, Exponential)': '-.',
    'WiGS (MAB-UCB1, c=0.5)': '-',
    'WiGS (MAB-UCB1, c=2.0)': '-',
    'WiGS (MAB-UCB1, c=5.0)': '-'}

### Legend ###
LegendMapping = {
    'Passive Learning': 'Random',
    'GSx': 'GSx',
    'GSy': 'GSy',
    'iGS': 'iGS',
    'WiGS (Static w_x=0.75)': 'WiGS (Static, w_x=0.75)',
    'WiGS (Static w_x=0.5)': 'WiGS (Static, w_x=0.5)',
    'WiGS (Static w_x=0.25)': 'WiGS (Static, w_x=0.25)',
    'WiGS (Time-Decay, Linear)': 'WiGS (Linear Decay)',
    'WiGS (Time-Decay, Exponential)': 'WiGS (Exponential Decay)',
    'WiGS (MAB-UCB1, c=0.5)': 'WiGS (MAB-UCB1, c=0.5)',
    'WiGS (MAB-UCB1, c=2.0)': 'WiGS (MAB-UCB1, c=2.0)',
    'WiGS (MAB-UCB1, c=5.0)': 'WiGS (MAB-UCB1, c=5.0)' }

## Graph Inputs

In [6]:
### Plots ###
strategies_to_plot = [
    ## Original Methods ##
    # 'Passive Learning',
    # 'GSx',
    # 'GSy',
    'iGS',

    ## WIGS Methods (Static) ##
    'WiGS (Static w_x=0.75)',  # Favors exploration (GSx)
    # 'WiGS (Static w_x=0.5)',   # Balanced approach
    # 'WiGS (Static w_x=0.25)',  # Favors exploitation (GSy)

    ## WiGS Methods (Time-decayed) ##
    'WiGS (Time-Decay, Linear)',
    'WiGS (Time-Decay, Exponential)',
    
    ## WiGS Methods (MAB) ##
    'WiGS (MAB-UCB1, c=0.5)',
    'WiGS (MAB-UCB1, c=2.0)',
    'WiGS (MAB-UCB1, c=5.0)'
]

In [14]:
filtered_results_to_plot = {
    key: all_results[key]["ErrorVec"][METRIC_TO_PLOT] 
    for key in strategies_to_plot if key in all_results
}


## Plot

In [15]:
TracePlotMean, TracePlotVariance = MeanVariancePlot(
    RelativeError="iGS",
    Colors=colors,
    LegendMapping=LegendMapping,
    Linestyles=linestyles,
    Y_Label=Y_Label,
    Subtitle="Comparison of WiGS Strategies vs. iGS", 
    TransparencyVal=0.00,
    VarInput=True,
    CriticalValue=1.96,
    initial_train_proportion=0.16,
    candidate_pool_proportion=0.64,
    **filtered_results_to_plot 
)


ValueError: No axis named 1 for object type Series

## Save

In [54]:
# ### Save Simulations ###
# file_path = os.path.join("/Users/simondn/Documents/WeightedGreedySampling/Results/simulation_results", f"{DataFileInput}_results.pkl")
# with open(file_path, 'wb') as file:
#     pickle.dump(all_results, file)
    
# ### Save Trace Plot ###
# trace_plot_path = os.path.join("/Users/simondn/Documents/WeightedGreedySampling/Results/images/trace", f"{DataFileInput}_TracePlot.png")
# TracePlotMean.savefig(trace_plot_path, bbox_inches='tight', dpi=300)

# ### Save Variance Plot ###
# variance_plot_path = os.path.join("/Users/simondn/Documents/WeightedGreedySampling/Results/images/variance", f"{DataFileInput}_VariancePlot.png")
# TracePlotVariance.savefig(variance_plot_path, bbox_inches='tight', dpi=300)