In [None]:
# Papermill params
nevals = 100         # Number of hyperparameter iterations
n = 4                # Output size
ratio = 0.9          # Train-Test split ratio
n_runs = 3           # Number of times to run with the same parameters

# Hyperparameter optimisation

In this notebook we set up the neural networks and evaluate different hyperparameter choices.

In [None]:
%run model.py

In [None]:
import skopt
from skopt import space as sp
from skopt.plots import plot_convergence, plot_objective

In [None]:
class CheckpointSaver(object):
    """
    Save current state after each iteration with `skopt.dump`.
    Example usage:
        import skopt
        checkpoint_callback = skopt.callbacks.CheckpointSaver("./result.pkl")
        skopt.gp_minimize(obj_fun, dims, callback=[checkpoint_callback])
    Parameters
    ----------
    * `checkpoint_path`: location where checkpoint will be saved to;
    * `dump_options`: options to pass on to `skopt.dump`, like `compress=9`
    """
    def __init__(self, checkpoint_path, **dump_options):
        self.checkpoint_path = checkpoint_path
        self.dump_options = dump_options

    def __call__(self, res):
        """
        Parameters
        ----------
        * `res` [`OptimizeResult`, scipy object]:
            The optimization as a OptimizeResult object.
        """
        skopt.utils.dump(res, self.checkpoint_path, **self.dump_options)

## Data
### Trajectories
Trajectories were acquired in five rounds of 1024 simulations each, totalling 5119 runs (one simulation failed to run) at 278 K in the $NVT$ ensemble. Postprocessing involved removing water, subsampling to 250 ps timesteps, and making molecules whole.

In [None]:
trajs = sorted(glob("trajectories/red/r?/traj*.xtc"))
top = "trajectories/red/topol.gro"
KBT = 2.311420 # 278 K

We use minimum distances as features for the neural network:

In [None]:
feat = pe.coordinates.featurizer(top)
feat.add_residue_mindist()
inpcon = pe.coordinates.source(trajs, feat)

# Switch for full version:
# lengths = sort_lengths(inpcon.trajectory_lengths(), [1024, 1023, 1024, 1024, 1024])
lengths = [inpcon.trajectory_lengths()]
nframes = inpcon.trajectory_lengths().sum()

In [None]:
print("Trajectories: {0}".format(len(trajs)))
print("Frames: {0}".format(nframes))
print("Time: {0:5.3f} µs".format(inpcon.trajectory_lengths().sum() * 0.00025))

## VAMPNet
VAMPNet[1] is composed of two lobes, one reading the system features $\mathbf{x}$ at a timepoint $t$ and the other after some lag time $\tau$. In this case the network reads all minimum inter-residue distances (780 values) and sends them through 5 layers with 256 nodes each. The final layer uses between 2 and 6 *softmax* outputs to yield a state assignment vector $\chi: \mathbb{R}^m \to \Delta^{n}$ where $\Delta^{n} = \{ s \in \mathbb{R}^n \mid 0 \le s_i \le 1, \sum_i^n s_i = 1 \}$ representing the probability of a state assignment. One lobe thus transforms a system state into a state occupation probability. We can also view this value as a kind of reverse ambiguity, i.e. how sure the network is that the system is part of a certain cluster. These outputs are then used as the input for the VAMP scoring function. We use the new enhanced version with physical constraints[2], particularly the ones for positive entries and reversibility.

[1] Mardt, A., Pasquali, L., Wu, H. & Noé, F. VAMPnets for deep learning of molecular kinetics. Nat Comms 1–11 (2017). doi:10.1038/s41467-017-02388-1

[2] Mardt, A., Pasquali, L., Noé, F. & Wu, H. Deep learning Markov and Koopman models with physical constraints. arXiv:1912.07392 [physics] (2019).

### Data preparation
We use minimum residue distances as input ($\frac{N(N-1)}{2}$ values, where $N$ is the number of residues) and first normalize the data:

In [None]:
filename = "intermediate/mindist-780-red.npy"
if not os.path.exists(filename):
    print("No mindist file for red ensemble, calculating from scratch...")
    con = np.vstack(inpcon.get_output())
    np.save(filename, con)

In [None]:
raw = np.load("intermediate/mindist-780-red.npy")
raw_mean, raw_std = raw.mean(axis=0), raw.std(axis=0)
input_flat = (raw - raw_mean) / raw_std
input_data = [(r - raw_mean) / raw_std for r in unflatten(raw, lengths)]

### Neural network hyperparameters
To allow for a larger hyperparameter search space, we use the self-normalizing neural network approach by Klambauer *et al.* [2], thus using SELU units, `AlphaDropout` and normalized `LeCun` weight initialization. The other hyperparameters are defined at the beginning of this notebook.

[2] Klambauer, G., Unterthiner, T., Mayr, A. & Hochreiter, S. Self-Normalizing Neural Networks. arXiv.org cs.LG, (2017).

In [None]:
lag = 20                         # Lag time
n_dims = input_data[0].shape[1]  # Input dimension
nres = 42                        # Number of residues
epsilon = 1e-7                   # Floating point noise
dt = 0.25                        # Trajectory timestep in ns
bs_frames = 1000000              # Number of frames to use

outsizes = np.array([2, 3, 4, 5, 6])
lags = np.array([1, 2, 5, 10, 20, 50, 100])

# Comment for full version:
bs_frames = nframes
attempts = 2
outsizes = np.array([4])

### Run
We run the training several times with different train/test splits to get an error estimate, this is referred to as bootstrap aggregating (*bagging*).

In [None]:
generators = [DataGenerator(input_data, ratio=ratio, dt=dt, max_frames=bs_frames) for _ in range(n_runs)]

In [None]:
space = [
    sp.Categorical([2, 4, 6, 8], name="depth"),
    sp.Categorical([128, 512, 1024], name="width"),
    sp.Categorical([1e-1, 1e-2, 1e-3], name="learning_rate"),
    sp.Categorical([1e-6, 1e-8, 1e-10], name="regularization"),
    sp.Categorical([0.0, 0.1], name="dropout"),
]

In [None]:
@skopt.utils.use_named_args(space)
def test_model(**space):
    depth, width, learning_rate, regularization, dropout = (
        space["depth"], space["width"], space["learning_rate"],
        space["regularization"], space["dropout"])
    print("Parameters: {0}".format(space))
    width, depth = int(width), int(depth)
    
    scores = np.full(n_runs, np.nan)
    for i in range(n_runs):
        koop = KoopmanModel(n, verbose=1, network_lag=lag, nnargs=dict(
            width=width, depth=depth, learning_rate=learning_rate,
            regularization=regularization, dropout=dropout,
            batchnorm=True, lr_factor=5e-3))
        try:
            koop.fit(generators[i])
            scores[i] = koop.score()
        except Exception as e:
            print(e)
        finally:
            del koop
            gc.collect()
    
    return np.nanmean(scores)

In [None]:
checkpoint_file = "hyperpar-0.pkl"
chkp = CheckpointSaver(checkpoint_file)
if not os.path.exists(checkpoint_file):
    print("Creating new checkpoint file...")
    res = skopt.dummy_minimize(
        test_model,
        space,
        n_calls=nevals,
        callback=[chkp],
        verbose=True
    )
else:
    print("Checkpoint file exists, continuing optimization...")
    res = skopt.load(checkpoint_file)
    res = skopt.dummy_minimize(
        test_model,
        space,
        x0=res.x_iters,
        y0=res.func_vals,
        n_calls=nevals,
        callback=[chkp],
        verbose=True
    )