In [1]:
%config Completer.use_jedi = False

In [2]:
import math
import numpy as np
import pandas as pd
from cmaes import CMA
import seaborn as sns
from scipy.spatial.distance import pdist

In [3]:
def errorfcn(alpha, Xbar, n, m):
    f1 = np.reshape(alpha[(2 * n):], (m, 2))
    f2 = np.reshape(alpha[0:2 * n], (2, n))
    f3 = Xbar[:, 0:n].T
    r = (Xbar - np.dot(f1, np.dot(f2, f3)).T) ** 2
    return np.nanmean(np.nanmean(r, axis=0))

In [5]:
df = pd.read_csv('datasets/credit-g_metadata.csv').drop(columns = ['instances'])
performance_columns = [col for col in df.columns if col.startswith('algo')]
X = df.drop(columns = performance_columns)
Y = df[performance_columns]

In [14]:
%%time
Xbar = np.hstack((X, Y))
n = X.shape[1]
m = Xbar.shape[1]

search_space = 32.768
bounds = list()
bounds.append([[-search_space, search_space] for i in range(2 * m + 2 * n)])
bounds = np.array(bounds).reshape((2 * m + 2 * n, 2))

lower_bounds, upper_bounds = bounds[:, 0], bounds[:, 1]
mean = lower_bounds + (np.random.rand(2 * m + 2 * n) * (upper_bounds - lower_bounds))
sigma = search_space * 2 / 5  # 1/5 of the domain width

optimizer = CMA(mean=mean, sigma=sigma, bounds=bounds, seed=0)
ntries = 5

perf = np.zeros(ntries)
alpha = np.zeros((2 * m + 2 * n, ntries))
Hd = pdist(X)[np.newaxis].T

n_restarts = 30  # A small restart doesn't count in the n_restarts
small_n_eval, large_n_eval = 0, 0
popsize0 = optimizer.population_size
inc_popsize = 2

# Initial run is with "normal" population size; it is
# the large population before first doubling, but its
# budget accounting is the same as in case of small
# population.
poptype = "small"

for generation in range(200):
    solutions = []
    for _ in range(optimizer.population_size):
        alpha[:, generation] = optimizer.ask()
        
        # Calculate function value
        value = errorfcn(alpha = alpha[:, generation], Xbar = Xbar, n = n, m = m)
        solutions.append((alpha[:, generation], value))
#         print(f"#{generation} {value} (x1={x[0]}, x2 = {x[1]})")
    optimizer.tell(solutions)

    aux = alpha[:, [generation]]
    A = np.reshape(aux[0:2 * n], (2, n))
    Z = np.dot(X, A.T)
    perf[generation] = np.corrcoef(Hd, pdist(Z)[np.newaxis].T, rowvar=False)[0][1]
    
    if optimizer.should_stop():
        n_eval = optimizer.population_size * optimizer.generation
        if poptype == "small":
            small_n_eval += n_eval
        else:  # poptype == "large"
            large_n_eval += n_eval

        if small_n_eval < large_n_eval:
            poptype = "small"
            popsize_multiplier = inc_popsize ** n_restarts
            popsize = math.floor(
                popsize0 * popsize_multiplier ** (np.random.uniform() ** 2)
            )
        else:
            poptype = "large"
            n_restarts += 1
            popsize = popsize0 * (inc_popsize ** n_restarts)

        mean = lower_bounds + (np.random.rand(2) * (upper_bounds - lower_bounds))
        optimizer = CMA(
            mean=mean,
            sigma=sigma,
            bounds=bounds,
            population_size=popsize,
        )
        print("Restart CMA-ES with popsize={} ({})".format(popsize, poptype))


IndexError: index 5 is out of bounds for axis 1 with size 5