# Solver Performance Comparisons

This notebook demonstrates our testing and comparing the performance of different general purpose solvers.

### Imports and Setup

In [1]:
# Third party
import matplotlib.pyplot as plt
import matplotlib_inline

import pandas as pd
import seaborn as sns 

# Local
import flexibleSubsetSelection as fss

# Initialize notebook settings
sns.set_theme() # set seaborn theme
matplotlib_inline.backend_inline.set_matplotlib_formats('svg') # vector plots
%matplotlib inline 
%load_ext autoreload
%autoreload 2

In [4]:
directory = "Fig4-performance" # data directory for this notebook
subsetSize = 10                # size of subset selected
verbose = False                # verbosity of solvers
seed = 123                     # random generation seed for replicability
numTrials = 5

dataset = fss.Dataset(randTypes="uniform", size=(1000, 10), seed=seed)
dataset.save(f"{directory}/dataset")

dataset.preprocess(mean=fss.metric.mean)
meanLoss = fss.UniCriterion(objective = fss.objective.preserveMetric, 
                            metric = fss.metric.mean,
                            datasetMetric = dataset.mean)

dataset.preprocess(range=fss.metric.range)
rangeLoss = fss.UniCriterion(objective = fss.objective.preserveMetric, 
                            metric = fss.metric.range,
                            datasetMetric = dataset.range)

lossFunctions = [meanLoss, rangeLoss]

solver = fss.Solver(algorithm=fss.algorithm.greedySwap, loss=meanLoss)

In [5]:
for i in range(numTrials):
    for j, lossFunction in enumerate(lossFunctions):
        # print(f"{i/numTrials:.2%}")
        solver.loss = lossFunction

        solver.algorithm = fss.algorithm.greedySwap
        subset = solver.solve(dataset, subsetSize=subsetSize)

        # solver.algorithm = fss.algorithm.optimization
        # subset = solver.solve(dataset, subsetSize=subsetSize)

        solver.algorithm = fss.algorithm.bestOfRandom
        subset = solver.solve(dataset, subsetSize=subsetSize)

        solver.algorithm = fss.algorithm.worstOfRandom
        subset = solver.solve(dataset, subsetSize=subsetSize)


Solved for Subset(size: (np.float64(10.0), 10), solve time: 0.13s, loss=0.41).
Solved for Subset(size: (np.float64(10.0), 10), solve time: 0.03s, loss=1.1).
Solved for Subset(size: (np.float64(10.0), 10), solve time: 0.03s, loss=5.54).
Solved for Subset(size: (np.float64(10.0), 10), solve time: 0.11s, loss=0.56).
Solved for Subset(size: (np.float64(10.0), 10), solve time: 0.02s, loss=3.13).
Solved for Subset(size: (np.float64(10.0), 10), solve time: 0.03s, loss=13.91).
Solved for Subset(size: (np.float64(10.0), 10), solve time: 0.13s, loss=0.42).
Solved for Subset(size: (np.float64(10.0), 10), solve time: 0.03s, loss=1.01).
Solved for Subset(size: (np.float64(10.0), 10), solve time: 0.03s, loss=5.28).
Solved for Subset(size: (np.float64(10.0), 10), solve time: 0.11s, loss=0.55).
Solved for Subset(size: (np.float64(10.0), 10), solve time: 0.02s, loss=3.75).
Solved for Subset(size: (np.float64(10.0), 10), solve time: 0.02s, loss=12.24).
Solved for Subset(size: (np.float64(10.0), 10), sol

In [None]:
def list_to_string(objective):
    if isinstance(objective, str):
        # Remove brackets and quotes, then split by comma
        objective = objective.strip("[]").replace("'", "").replace(" ", "").split(",")
    if isinstance(objective, list):
        return "_".join(sorted(objective))
    return objective

In [None]:
palette = {"green": "#8dd3c7", "orange": "#fb8072", "yellow": "#fdb462", 
           "blue": "#8dadd3", "grey": "#eff0f2"}
color = fss.plot.Color(palette)
fss.plot.initialize(color)

fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(12, 7))
axs = axs.ravel()
metrics = ["mean", "range", "mean_range", "coverage", "distribution",
           "coverage_distribution_mean_range"]
titles = ["Mean", "Range", "Range and Mean", "Coverage", "Distribution",
          "Range, Mean, Coverage and Distribution"]
df = pd.read_csv(f"../data/{directory}/computationData.csv")
df['Objective'] = df['Objective'].apply(list_to_string)
print(df["Objective"].unique())

for i, metric in enumerate(metrics):
    data = df[df["Objective"] == metric]
    sns.scatterplot(x="Computation Time",
                    y="Loss",
                    alpha=0.5,
                    hue="Algorithm",
                    palette=color.palette.values(),
                    data=data,
                    ax=axs[i],
                    s=75)

    axs[i].set_title(titles[i], fontsize=16)
    axs[i].set_ylabel("Loss")
    axs[i].set_xlabel("Time (s)")
    axs[i].set_xlim([0, 1000])
    axs[i].legend().set_visible(False)