zapata-engineering · MSRudolph · Feb 7, 2022 · Dec 6, 2021 · Dec 6, 2021 · Dec 6, 2021
diff --git a/README.md b/README.md
@@ -59,6 +59,17 @@ This code results in the following plot:
 
 ![Image](docs/example_plot.png)
 
+## FAQ
+
+**What are the expected type and shape for the parameters?**\
+Parameters should be of type `numpy.ndarray` filled with real numbers. In recent releases, the shape of the parameters can be arbitrary, as long as `numpy` allows it, i.e., you cannot have inconsistent sizes per dimension. Until version `0.1.1`, the parameter array needed to be one-dimensional.
+
+**What is the format of the `loss_function` that most `orqviz` methods expect?**\
+We define a `loss_function` as a function which receives only the parameters of the model and returns a floating point/ real number. That value could for example be the cost function of an optimization problem, the prediction of a classifier, or the fidelity with respect to a fixed quantum state. All the calculation that needs to be performed to get to these values needs to happen in your function. Check out the above code as a minimal example.
+
+**What can I do if my loss function requires additional arguments?**\
+In that case you need to wrap the function into another function such that it again receives only the parameters of the model. We built a wrapper class called `LossFunctionWrapper` that you can import from `orqviz.loss_function`. It is a thin wrapper with helpful perks such as measuring the average evaluation time of a single loss function call, and the total number of calls.
+
 ## Authors
 
 The leading developer of this package is Manuel Rudolph at Zapata Computing.\

diff --git a/docs/examples/advanced_example_notebook.ipynb b/docs/examples/advanced_example_notebook.ipynb
diff --git a/docs/examples/gradient_descent_optimizer.py b/docs/examples/gradient_descent_optimizer.py
@@ -1,23 +1,31 @@
 from typing import Callable, Optional, Tuple
 
 import numpy as np
-
+from orqviz.aliases import (
+    ArrayOfParameterVectors,
+    FullGradientFunction,
+    LossFunction,
+    ParameterVector,
+)
 from orqviz.gradients import calculate_full_gradient
 
 
 def gradient_descent_optimizer(
-    init_params: np.ndarray,
-    loss_function: Callable[[np.ndarray], float],
+    init_params: ParameterVector,
+    loss_function: LossFunction,
     n_iters: int,
     learning_rate: float = 0.1,
-    full_gradient_function: Optional[Callable] = None,
+    full_gradient_function: FullGradientFunction = None,
     eval_loss_during_training: bool = True,
-) -> Tuple[np.ndarray, np.ndarray]:
+) -> Tuple[ArrayOfParameterVectors, np.ndarray]:
     """Function perform gradient descent optimization on a loss function.
 
     Args:
         init_params: Initial parameter vector from which to start the optimization.
-        loss_function: Loss function with respect to which the gradient is calculated.
+        loss_function: Function with respect to which the gradient is calculated. It must receive only a
+            numpy.ndarray of parameters, and return a real number.
+            If your function requires more arguments, consider using the 'LossFunctionWrapper'
+            class from 'orqviz.loss_function'.
         n_iters: Number of iterations to optimize.
         learning_rate: Learning rate for gradient descent. The calculated gradient
             is multiplied with this value and then updates the parameter vector.

diff --git a/src/orqviz/aliases.py b/src/orqviz/aliases.py
@@ -1,3 +1,5 @@
+from typing import Callable
+
 import numpy as np
 
 """
@@ -8,7 +10,17 @@
 the only dimension, is always of size number_of_parameters, while the other dimensions
 indicate how many of them there are.
 """
-ParameterVector = np.ndarray  # 1D array
-ArrayOfParameterVectors = np.ndarray  # 2D array
-GridOfParameterVectors = np.ndarray  # 3D array
+ParameterVector = np.ndarray  # ND array
+ArrayOfParameterVectors = np.ndarray  # Array of ND arrays
+GridOfParameterVectors = np.ndarray  # Grid of ND arrays
 Weights = np.ndarray  # 1D vector of floats from 0-1
+DirectionVector = np.ndarray  # ND array with same shape as ParameterVector
+LossFunction = Callable[
+    [ParameterVector], float
+]  # Function that can be scanned with orqviz
+GradientFunction = Callable[
+    [ParameterVector, DirectionVector], float
+]  # Returns partial derrivative of LossFunction wrt DirectionVector
+FullGradientFunction = Callable[
+    [ParameterVector], np.ndarray
+]  # Returns all partial derrivatives of LossFunction wrt each parameter
diff --git a/src/orqviz/elastic_band/auto_neb.py b/src/orqviz/elastic_band/auto_neb.py
@@ -3,16 +3,16 @@
 import numpy as np
 from scipy.interpolate import interp1d
 
-from ..aliases import ParameterVector
+from ..aliases import FullGradientFunction, LossFunction, ParameterVector
 from .data_structures import Chain
 from .neb import run_NEB
 
 
 # Nudged-Elastic-Band
 def run_AutoNEB(
     init_chain: Chain,
-    loss_function: Callable[[ParameterVector], float],
-    full_gradient_function: Optional[Callable[[ParameterVector], np.ndarray]] = None,
+    loss_function: LossFunction,
+    full_gradient_function: FullGradientFunction = None,
     n_cycles: int = 4,
     n_iters_per_cycle: int = 10,
     max_new_pivots: int = 1,
@@ -39,7 +39,10 @@ def run_AutoNEB(
 
     Args:
         init_chain: Initial chain that is optimized with the algorithm.
-        loss_function: Loss function that is used to optimize the chain.
+        loss_function: Function that is used to optimize the chain. It must receive only a
+            numpy.ndarray of parameters, and return a real number.
+            If your function requires more arguments, consider using the 'LossFunctionWrapper'
+            class from 'orqviz.loss_function'.
         full_gradient_function: Function to calculate the gradient w.r.t.
             the loss function for all parameters. Defaults to None.
         n_cycles: Number of cycles between which new pivots can be inserted.
@@ -118,7 +121,7 @@ def run_AutoNEB(
 
 def _insert_pivots_to_improve_approximation(
     chain: Chain,
-    loss_function: Callable[[ParameterVector], float],
+    loss_function: LossFunction,
     max_new_pivots: int = 1,
     percentage_tol: float = 0.2,
     absolute_tol: float = 0.0,
@@ -129,7 +132,10 @@ def _insert_pivots_to_improve_approximation(
 
     Args:
         chain: Current Chain
-        loss_function: Loss function for the NEB training
+        loss_function: Function for NEB training. It must receive only a
+            numpy.ndarray of parameters, and return a real number.
+            If your function requires more arguments, consider using the 'LossFunctionWrapper'
+            class from 'orqviz.loss_function'.
         max_new_pivots: Maximum number of pivots inserted to Chain. Defaults to 1.
         percentage_tol: Percentage error threshold to insert new pivots.
             Be mindful of the magnitude and sign of typical loss values.

diff --git a/src/orqviz/elastic_band/data_structures.py b/src/orqviz/elastic_band/data_structures.py
@@ -1,11 +1,12 @@
 from __future__ import annotations
 
-from typing import Callable, NamedTuple
+from typing import Callable, NamedTuple, Tuple
 
 import numpy as np
 from scipy.interpolate import interp1d
 
-from ..aliases import ArrayOfParameterVectors, Weights
+from ..aliases import ArrayOfParameterVectors, LossFunction, ParameterVector, Weights
+from ..geometric import _norm_of_arrayofparametervectors
 from ..scans import eval_points_on_path
 
 
@@ -21,14 +22,14 @@ class Chain(NamedTuple):
     pivots: ArrayOfParameterVectors
 
     def get_weights(self) -> Weights:
-        chain_weights = np.linalg.norm(np.diff(self.pivots, axis=0), axis=1)
+        chain_weights = _norm_of_arrayofparametervectors(np.diff(self.pivots, axis=0))
         chain_weights /= np.sum(chain_weights)
         cum_weights = np.cumsum(chain_weights)
         matching_cum_weights = np.insert(cum_weights, 0, 0)
         matching_cum_weights[-1] = 1
         return matching_cum_weights
 
-    def evaluate_on_pivots(self, loss_function: Callable) -> np.ndarray:
+    def evaluate_on_pivots(self, loss_function: LossFunction) -> np.ndarray:
         return eval_points_on_path(self.pivots, loss_function)
 
     @property
@@ -37,7 +38,11 @@ def n_pivots(self) -> int:
 
     @property
     def n_params(self) -> int:
-        return len(self.pivots[0])
+        return np.prod(self.param_shape)
+
+    @property
+    def param_shape(self) -> Tuple[int, ...]:
+        return np.atleast_1d(self.pivots[0]).shape
 
 
 class ChainPath(NamedTuple):
@@ -65,7 +70,7 @@ def generate_uniform_chain(self, n_points: int) -> Chain:
         return self._get_chain_from_weights(weights)
 
     def evaluate_points_on_path(
-        self, n_points: int, loss_function: Callable, weighted: bool = False
+        self, n_points: int, loss_function: LossFunction, weighted: bool = False
     ) -> np.ndarray:
         if weighted:
             chain = self.generate_chain(n_points)
@@ -74,8 +79,10 @@ def evaluate_points_on_path(
         return chain.evaluate_on_pivots(loss_function)
 
     def _get_chain_from_weights(self, weights: Weights) -> Chain:
+        distance_between_pivots = np.diff(self.primary_chain.pivots, axis=0)
+
         chain_diff = np.cumsum(
-            np.linalg.norm(np.diff(self.primary_chain.pivots, axis=0), axis=1)
+            _norm_of_arrayofparametervectors(distance_between_pivots)
         )
         chain_diff /= max(chain_diff)
         chain_diff = np.insert(chain_diff, 0, 0)

diff --git a/src/orqviz/elastic_band/neb.py b/src/orqviz/elastic_band/neb.py
@@ -2,17 +2,23 @@
 
 import numpy as np
 
-from ..aliases import ParameterVector, Weights
+from ..aliases import (
+    DirectionVector,
+    FullGradientFunction,
+    LossFunction,
+    ParameterVector,
+    Weights,
+)
 from ..gradients import calculate_full_gradient
 from .data_structures import Chain, ChainPath
 
 
 def run_NEB(
     init_chain: Chain,
-    loss_function: Callable[[ParameterVector], float],
-    full_gradient_function: Optional[Callable[[ParameterVector], np.ndarray]] = None,
+    loss_function: LossFunction,
+    full_gradient_function: FullGradientFunction = None,
     n_iters: int = 10,
-    eps: float = 0.1,
+    eps: float = 1e-3,
     learning_rate: float = 0.1,
     stochastic: bool = False,
     calibrate_tangential: bool = False,
@@ -29,12 +35,16 @@ def run_NEB(
 
     Args:
         init_chain: Initial chain that is optimized with the algorithm.
-        loss_function: Loss function that is used to optimize the chain.
+        loss_function: Function that is used to optimize the chain. It must receive only a
+            numpy.ndarray of parameters, and return a real number.
+            If your function requires more arguments, consider using the 'LossFunctionWrapper'
+            class from 'orqviz.loss_function'.
         full_gradient_function: Function to calculate the gradient w.r.t.
             the loss function for all parameters. Defaults to None.
         n_iters: Number of optimization iterations. Defaults to 10.
         eps: Stencil for finite difference gradient if full_gradient_function
-            is not provided. Defaults to 0.1.
+            is not provided. For noisy loss functions,
+            we recommend increasing this value. Defaults to 1e-3.
         learning_rate: Learning rate/ step size for the gradient descent optimization.
             Defaults to 0.1.
         stochastic: Flag to indicate whether to perform stochastic gradient descent
@@ -86,16 +96,19 @@ def _full_gradient_function(pars: ParameterVector) -> ParameterVector:
 
 def _get_gradients_on_pivots(
     chain: Chain,
-    loss_function: Callable[[ParameterVector], float],
-    full_gradient_function: Callable[[ParameterVector], np.ndarray],
+    loss_function: LossFunction,
+    full_gradient_function: FullGradientFunction,
     calibrate_tangential: bool = False,
 ) -> np.ndarray:
     """Calculates gradient for every pivot on the chain w.r.t. the loss function
         using the gradient function.
 
     Args:
         chain: Chain to calculate the gradients on.
-        loss_function: Loss function for which to calculate the gradient.
+        loss_function: Function that is used to optimize the chain. It must receive only a
+            numpy.ndarray of parameters, and return a real number.
+            If your function requires more arguments, consider using the 'LossFunctionWrapper'
+            class from 'orqviz.loss_function'.
         full_gradient_function: Function to calculate the gradient w.r.t.
             the loss function for all parameters.
         calibrate_tangential: Flag to indicate whether next neighbor for finding
@@ -105,7 +118,7 @@ def _get_gradients_on_pivots(
 
     # We initialize with zeros, as we always want first and last gradient
     # to be equal to 0.
-    gradients_on_pivots = np.zeros(shape=(chain.n_pivots, chain.n_params))
+    gradients_on_pivots = np.zeros(shape=(chain.n_pivots, *chain.param_shape))
 
     for ii in range(1, chain.n_pivots - 1):
         before = chain.pivots[ii - 1]
@@ -118,7 +131,10 @@ def _get_gradients_on_pivots(
         if calibrate_tangential and loss_function(after) > loss_function(before):
             tan = after - this
         tan /= np.linalg.norm(tan)
-        tangential_grad = np.dot(full_grad, tan) * tan
+        ax_indices = tuple(range(len(full_grad.shape)))
+        tangential_grad = (
+            np.tensordot(full_grad, tan, axes=(ax_indices, ax_indices)) * tan
+        )
         # save update
         gradients_on_pivots[ii] = full_grad - tangential_grad
 

diff --git a/src/orqviz/elastic_band/plots.py b/src/orqviz/elastic_band/plots.py
@@ -3,23 +3,26 @@
 import matplotlib
 import numpy as np
 
-from ..aliases import ParameterVector
+from ..aliases import LossFunction, ParameterVector
 from ..plot_utils import _check_and_create_fig_ax
 from ..scans import eval_points_on_path
 from .neb import Chain
 
 
 def plot_all_chains_losses(
     all_chains: List[Chain],
-    loss_function: Callable[[ParameterVector], float],
+    loss_function: LossFunction,
     ax: Optional[matplotlib.axes.Axes] = None,
     **plot_kwargs,
 ) -> None:
     """Function to plot
 
     Args:
         all_chains: List of Chains to evaluate the loss on.
-        loss_function: Loss function to evaluate the Chains
+        loss_function: Function to evaluate the chain pivots on. It must receive only a
+            numpy.ndarray of parameters, and return a real number.
+            If your function requires more arguments, consider using the 'LossFunctionWrapper'
+            class from 'orqviz.loss_function'.
         ax: Matplotlib axis to plot on. If None, a new axis is created
             from the current figure. Defaults to None.
         plot_kwargs: kwargs for plotting with matplotlib.pyplot.plot (plt.plot)

diff --git a/src/orqviz/geometric.py b/src/orqviz/geometric.py
@@ -1,24 +1,24 @@
-from typing import Optional, Tuple
+from typing import Optional, Tuple, Union
 
 import numpy as np
 from scipy.interpolate import interp1d
 
-from .aliases import ArrayOfParameterVectors, ParameterVector
+from .aliases import ArrayOfParameterVectors, DirectionVector, ParameterVector
 
 
-def get_random_normal_vector(dimension: int) -> ParameterVector:
+def get_random_normal_vector(dimension: Union[int, Tuple]) -> DirectionVector:
     """Helper function to generate a vector with a specified dimension and norm=1."""
     random_vector = np.random.normal(0, 1, size=dimension)
     return random_vector / np.linalg.norm(random_vector)
 
 
-def get_random_orthonormal_vector(base_vector: ParameterVector) -> ParameterVector:
+def get_random_orthonormal_vector(base_vector: DirectionVector) -> DirectionVector:
     """Helper function to generate a random orthogonal vector with respect to
     a provided base vector."""
-    random_vector = np.random.normal(size=base_vector.shape)
+    random_vector = np.random.normal(size=np.shape(base_vector))
     new_vector = (
         random_vector
-        - np.dot(random_vector, base_vector)
+        - np.dot(random_vector.flatten(), base_vector.flatten())
         * base_vector
         / np.linalg.norm(base_vector) ** 2
     )
@@ -87,7 +87,7 @@ def relative_periodic_trajectory_wrap(
 
 def get_coordinates_on_direction(
     points: ArrayOfParameterVectors,
-    direction: np.ndarray,
+    direction: DirectionVector,
     origin: Optional[ParameterVector] = None,
     in_units_of_direction: bool = False,
 ) -> np.ndarray:
@@ -107,12 +107,19 @@ def get_coordinates_on_direction(
     norm_direction = np.linalg.norm(direction)
     if in_units_of_direction:
         direction = direction / norm_direction
-    return np.dot(points, direction) / norm_direction
+    return (
+        np.tensordot(
+            points,
+            direction,
+            axes=(range(1, len(points.shape)), range(len(direction.shape))),
+        )
+        / norm_direction
+    )
 
 
 def direction_linspace(
     origin: ParameterVector,
-    direction: np.ndarray,
+    direction: DirectionVector,
     n_points: int,
     endpoints: Tuple[float, float] = (-1, 1),
 ) -> ArrayOfParameterVectors:
@@ -148,3 +155,11 @@ def uniformly_distribute_trajectory(
     )
     eval_points = np.linspace(0, 1, num=n_points)
     return weight_interpolator(eval_points)
+
+
+def _norm_of_arrayofparametervectors(param_array: ArrayOfParameterVectors):
+    ax_indices = tuple(range(len(param_array.shape)))
+    t_dot = np.tensordot(
+        param_array, param_array, axes=(ax_indices[1:], ax_indices[1:])
+    )
+    return np.array(np.sqrt(np.diag(t_dot)))