In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.neighbors import KernelDensity
from synthetic_fun import base_fun
import matplotlib.patches as mpatches
import pickle
import pathlib
import os
%load_ext autoreload
%autoreload 2

In [None]:
# matplotlib conf
plt.rcParams['text.usetex'] = True
plt.rcParams['text.latex.preamble'] = r'\usepackage{bm}'
sns.set_style("white")

# Sampling Comparison

To compute the results run:
python no_inter_forest_train.py
python no_inter_samp_comp.py

## Setup

In [None]:
save_pdf = True
if save_pdf:

    file_out = "plots/sampling_comparison.pdf"
    pathlib.Path(os.path.dirname(file_out)).mkdir(parents=True, exist_ok=True)
pickle_path = "precomputed_results/sampling_comparison.pickle"
with open(pickle_path, 'rb') as f:
    sampling_comparison_results = pickle.load(f)

## Plot

In [None]:
# Adjust plot settings and labels
labels = [r"\emph{All-Thresholds}",
          r"\emph{$K$-Quantile}",
          r"\emph{Equi-Width}",
          r"\emph{$K$-Means}",
          "\emph{Equi-Size}"]

sampling_methods = ["all", "quantile", "equal", "kmeans", "equi_size"]

# Plot positions
pos = [(0, 0), (0, 1), (0, 2), (1, 3), (1, 4)]

feat = "x_2"

fig, axs = plt.subplots(1, 5, sharey="all", tight_layout=True, figsize=(20, 5))

# Real function behind the data
fun = base_fun(2)

# Points used to plot the real function
xs = np.linspace(0, 1, 1000)
plots_for_legend = []

# Compute the KDE of the real thresholds used by the forest
all_thresholds = sampling_comparison_results["all"].get_feature_thresholds()
kde = KernelDensity(kernel='gaussian', bandwidth=0.3)
kde.fit(np.array(all_thresholds[feat]).reshape(-1, 1))
y_kde_plot = kde.score_samples(np.array(all_thresholds[feat]).reshape(-1, 1))

kde_offset = 1
y_lim = [-0.2, 1.7]

# Define colors
_, rug_plot_col = sns.color_palette(n_colors=2)
points_col = rug_plot_col
kde_fill_col, dist_fun_color, kde_contour_col = sns.color_palette("Blues", n_colors=3)

# For each sampling methods plot its visualization
for i, sampling_method in enumerate(sampling_methods):
    row, col = pos[i]
    ax = axs[i]

    ax.set_ylim(y_lim)

    ax.plot(np.array(all_thresholds[feat]), y_kde_plot + kde_offset, color=kde_contour_col)
    ax.fill_between(np.array(all_thresholds[feat]), y_kde_plot + kde_offset, y2=0, color=kde_fill_col)

    sampling_results = np.array(sampling_comparison_results[sampling_method].sampled[feat])

    points_y = fun(sampling_results)
    linewidth = 0.05 if sampling_method == "all" else 1
    sns.rugplot(x=sampling_results, ax=ax, color=rug_plot_col, height=0.07, linewidth=linewidth)

    sampled_ax = ax.scatter(sampling_results, points_y, color=points_col, marker="o", s=10)

    # function and points
    ys = fun(xs)
    original_dist = ax.plot(xs, ys, color=dist_fun_color, marker=None)

    ax.set_title(labels[i])

# Update legend
blue_patch = mpatches.Patch(color=dist_fun_color, label='Original thresholds distribution')
orange_patch = mpatches.Patch(color=points_col, label='Sampled thresholds distribution')
plt.figlegend(handles=[blue_patch, orange_patch], bbox_to_anchor=(0.8, 0.05), ncol=2, facecolor='white', framealpha=1)

# Update font sizes
params = {'legend.fontsize': 25,
          'figure.figsize': (20, 5),
          'axes.titlesize': 30,
          'xtick.labelsize': 18,
          'ytick.labelsize': 20}
plt.rcParams.update(params)

# Save figure if needed
if save_pdf:
    fig.savefig(file_out, bbox_inches="tight")

# Sampling strategies comparison

To compute the results run:
```
python no_inter_forest_train.py

python no_inter_samp_comp_var.py
```

## Setup

In [None]:
PRECOMPUTED_PATH = "precomputed_results/sampling_comparison_var.pickle"
with open(PRECOMPUTED_PATH, 'rb') as f:
    acc_methods = pickle.load(f)
sampling_methods = ["all", "quantile", "equal", "kmeans", "equi_size"]
range_m = range(500, 20001, 750)
save_pdf = True
if save_pdf:
    file_out = "plots/rmse_sampling_comparison.pdf"
    pathlib.Path(os.path.dirname(file_out)).mkdir(parents=True, exist_ok=True)

## Plot

In [None]:
labels_dict = {"all": r"\emph{All-Thresholds}",
               "quantile": r"\emph{$K$-Quantile}",
               "equal": r"\emph{Equi-Width}",
               "kmeans": r"\emph{$K$-Means}",
               "equi_size": r"\emph{Equi-Size}"}
markers = ["o", "s", "*", "x", "D"]
colors = sns.color_palette(n_colors=len(sampling_methods))
for i, sampling_method in enumerate(sampling_methods):
    plt.plot(range_m,
             acc_methods[sampling_method],
             f'{markers[i]}--',
             mfc='none',
             mec=colors[i],
             ms=11,
             label=labels_dict[sampling_method])
plt.xlabel("$K$")
plt.ylabel("RMSE")

plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3, fancybox=False, shadow=False, facecolor='white')
params = {'legend.fontsize': 32,
          'figure.figsize': (15, 10),
          'axes.labelsize': 32,
          'xtick.labelsize': 32,
          'ytick.labelsize': 20}
plt.rcParams.update(params)
plt.tight_layout()

plt.grid(visible=True)

file_out = "plots/rmse_sampling_comparison.pdf"
plt.savefig(file_out)

## Splines reconstructions

To compute the results run:
python no_inter_forest_train.py
python no_inter_final_explain.py

## Setup

In [None]:
PRECOMPUTED_PATH = "precomputed_results/no_inter_explainer.pickle"
with open(PRECOMPUTED_PATH, 'rb') as f:
    explainer = pickle.load(f)
save_pdf = True
file_out = None
if save_pdf:
    file_out = "plots/generators.pdf"
    pathlib.Path(os.path.dirname(file_out)).mkdir(parents=True, exist_ok=True)

## Plot

In [None]:
n_row, n_col = 1, 5
texts = [r"$\bm{x}_1$",
         r"$\sin\left(20\bm{x}_2\right)$",
         r"$\frac{\exp\left(50(\bm{x}_3 -0.5)\right)}{\exp\left(50(\bm{x}_3 -0.5)\right) + 1}$",
         r"$\frac{\arctan\left(10\bm{x}_4\right)- \sin\left(10\bm{x}_4\right)}{2}$",
         r"$\frac{2}{\bm{x}_5 +1}$"]

font_sizes = [30, 30, 40, 40, 40]
if not plt.rcParams['text.usetex']:
    texts = ["First spline", "Second spline", "Third spline", "Fourth spline", "Fifth spline"]

fig = plt.figure(figsize=(30, 6))

terms = [(i, x) for i, x in enumerate(explainer.gam.terms) if not x.isintercept and not x.istensor]
terms.sort(key=lambda x: x[1].feature)

axes = fig.subplots(n_row, n_col, sharey=True)

for i, ax in enumerate(axes):
    term = explainer.gam.terms[i]

    c1, c2 = sns.color_palette("Blues", 2)

    # Spline print
    grid = explainer.gam.generate_X_grid(term=i)
    pdep, confi = explainer.gam.partial_dependence(term=i, X=grid, width=0.95)
    #print(pdep)
    centered_pdep = pdep - np.average(pdep)
    l1 = ax.plot(grid[:, term.feature], centered_pdep, label="Spline learned", lw=3)

    # Generating function print
    real_fun_aux = base_fun(term.feature)(grid[:, term.feature])
    real_fun_centered = real_fun_aux - np.average(real_fun_aux)
    l2 = ax.plot(grid[:, term.feature], real_fun_centered, label="Generating function", ls='--', lw=3)

    ax.set_title(texts[term.feature], loc='center', fontsize=font_sizes[term.feature])

axes[0].legend(bbox_to_anchor=(3.9, -0.1), ncol=2, facecolor='white')

params = {'xtick.labelsize': 28,
          'ytick.labelsize': 28,
          'legend.fontsize': 30,
          'axes.titley': 1,
          'axes.titlepad': 26,
          'axes.labelsize': 32}
plt.rcParams.update(params)
plt.subplots_adjust(hspace=0.3)
if save_pdf:
    fig.savefig(file_out, bbox_inches="tight")