From ebf116cf48093eac5078ab4938222bceb854c1ad Mon Sep 17 00:00:00 2001
From: Jonas Eschle <mayou36@jonas.eschle.com>
Date: Fri, 19 Apr 2024 22:56:56 -0400
Subject: [PATCH 1/2] fix multithreading and minor bug (#561)

* wip: add improved loss

Signed-off-by: Jonas Eschle <mayou36@jonas.eschle.com>

* fix: typos

Signed-off-by: Jonas Eschle <mayou36@jonas.eschle.com>

* fix: typo

Signed-off-by: Jonas Eschle <mayou36@jonas.eschle.com>

---------

Signed-off-by: Jonas Eschle <mayou36@jonas.eschle.com>
---
 CHANGELOG.rst            |   4 ++
 zfit/__init__.py         |   1 -
 zfit/core/data.py        |   2 +
 zfit/core/loss.py        | 124 +++++++++++++++++++++++++++++++++------
 zfit/minimizers/ipopt.py |   4 +-
 zfit/models/basic.py     |   3 +-
 zfit/util/ztyping.py     |   2 -
 7 files changed, 118 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index defdc8586..8dbeef720 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -10,6 +10,7 @@ Develop
 Major Features and Improvements
 -------------------------------
 
+
 Breaking changes
 ------------------
 
@@ -18,6 +19,8 @@ Deprecations
 
 Bug fixes and small changes
 ---------------------------
+- enhanced loss: simple loss can take a gradient and hesse function and the default base loss provides fallbacks that work correctly between ``value_gradient`` and ``gradient``. This maybe matters if you've implemented a custom loss and should fix any issues with it.
+- multiprocessing would get stuck due to an `upstream bug in TensorFlow <https://github.com/tensorflow/tensorflow/issues/66115>`_. Working around it by disabling an unused piece of code.
 
 
 Experimental
@@ -28,6 +31,7 @@ Requirement changes
 
 Thanks
 ------
+- acampoverde for finding the bug in the multiprocessing
 
 0.20.2 (16 Apr 2024)
 ========================
diff --git a/zfit/__init__.py b/zfit/__init__.py
index 4fe33603e..535294540 100644
--- a/zfit/__init__.py
+++ b/zfit/__init__.py
@@ -83,7 +83,6 @@ def _maybe_disable_warnings():
         f"You are using TensorFlow version {_tf.__version__}. This zfit version ({__version__}) works"
         f" only with TF >= 2"
     )
-
 from . import z  # initialize first
 from . import (
     constraint,
diff --git a/zfit/core/data.py b/zfit/core/data.py
index ca7738581..c5e3f5f65 100644
--- a/zfit/core/data.py
+++ b/zfit/core/data.py
@@ -739,6 +739,8 @@ def from_numpy(
         #     warn_once("The order of the arguments `obs` and `array` has been swapped, array goes first (as any other `from_` constructor.", identifier="data_from_numpy")
         #     obs, array = array, obs
         # # legacy end
+        if isinstance(array, (float, int)):
+            array = np.array([array])
         if not isinstance(array, (np.ndarray)) and not (tf.is_tensor(array) and hasattr(array, "numpy")):
             msg = f"`array` has to be a `np.ndarray`. Is currently {type(array)}"
             raise TypeError(msg)
diff --git a/zfit/core/loss.py b/zfit/core/loss.py
index c0086f860..1ecf181bc 100644
--- a/zfit/core/loss.py
+++ b/zfit/core/loss.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from contextlib import suppress
 from functools import partial
 from typing import TYPE_CHECKING, Literal, Optional, Union
 
@@ -9,7 +10,7 @@
 from pydantic import Field
 from tensorflow.python.util.deprecation import deprecated
 
-from ..exception import OutsideLimitsError
+from ..exception import OutsideLimitsError, SpecificFunctionNotImplementedError
 from ..serialization.serializer import BaseRepr, Serializer
 from .data import convert_to_data
 from .serialmixin import SerializableMixin
@@ -40,9 +41,11 @@
 from ..util.warnings import warn_advanced_feature
 from ..z.math import (
     autodiff_gradient,
+    autodiff_hessian,
     autodiff_value_gradients,
     automatic_value_gradients_hessian,
     numerical_gradient,
+    numerical_hessian,
     numerical_value_gradient,
     numerical_value_gradients_hessian,
 )
@@ -156,6 +159,22 @@ def _check_container(cls, v):
         return v
 
 
+class GradientNotImplementedError(SpecificFunctionNotImplementedError):
+    pass
+
+
+class ValueGradientNotImplementedError(SpecificFunctionNotImplementedError):
+    pass
+
+
+class ValueGradientHessianNotImplementedError(SpecificFunctionNotImplementedError):
+    pass
+
+
+class HessianNotImplementedError(SpecificFunctionNotImplementedError):
+    pass
+
+
 class BaseLoss(ZfitLoss, BaseNumeric):
     def __init__(
         self,
@@ -569,14 +588,25 @@ def gradient(
         numgrad = self._options["numgrad"] if numgrad is None else numgrad
         paramvals, checked = self.check_precompile(params=paramvals)
         with self._check_set_input_params(paramvals, guarantee_checked=checked):
+            return self._call_gradient(params, numgrad)
+
+    @z.function(wraps="loss")
+    def _call_gradient(self, params, numgrad):
+        with suppress(GradientNotImplementedError):
             return self._gradient(params=params, numgrad=numgrad)
 
+        with suppress(ValueGradientNotImplementedError):
+            return self._value_gradient(params=params, numgrad=numgrad, full=False)[1]
+        return self._fallback_gradient(params=params, numgrad=numgrad)
+
     def gradients(self, *_, **__):
         msg = "`gradients` is deprecated, use `gradient` instead."
         raise BreakingAPIChangeError(msg)
 
-    @z.function(wraps="loss")
-    def _gradient(self, params, numgrad):
+    def _gradient(self, params, numgrad):  # noqa: ARG002
+        raise GradientNotImplementedError
+
+    def _fallback_gradient(self, params, numgrad):
         self_value = partial(self.value, full=False)
         if numgrad:
             gradient = numerical_gradient(self_value, params=params)
@@ -622,15 +652,26 @@ def value_gradient(
             full = DEFAULT_FULL_ARG
         paramvals, checked = self.check_precompile(params=paramvals)
         with self._check_set_input_params(paramvals, guarantee_checked=checked):
-            value, gradient = self._value_gradient(params=params, numgrad=numgrad, full=full)
+            value, gradient = self._call_value_gradient(params, numgrad, full)
         return value, gradient
 
+    @z.function(wraps="loss")
+    def _call_value_gradient(self, params, numgrad, full):
+        with suppress(ValueGradientNotImplementedError):
+            return self._value_gradient(params=params, numgrad=numgrad, full=full)
+        with suppress(GradientNotImplementedError):
+            gradient = self._gradient(params=params, numgrad=numgrad)
+            return self.value(full=full), gradient
+        return self._fallback_value_gradient(params=params, numgrad=numgrad, full=full)
+
     def value_gradients(self, *_, **__):
         msg = "`value_gradients` is deprecated, use `value_gradient` instead."
         raise BreakingAPIChangeError(msg)
 
-    @z.function(wraps="loss")
-    def _value_gradient(self, params, numgrad=False, *, full: bool | None = None):
+    def _value_gradient(self, params, numgrad, full):  # noqa: ARG002
+        raise ValueGradientNotImplementedError
+
+    def _fallback_value_gradient(self, params, numgrad=False, *, full: bool | None = None):
         if full is None:
             full = DEFAULT_FULL_ARG
         self_value = partial(self.value, full=full)
@@ -665,13 +706,31 @@ def hessian(
         numgrad = self._options["numgrad"] if numgrad is None else numgrad
         paramvals, checked = self.check_precompile(params=paramvals)
         with self._check_set_input_params(paramvals, guarantee_checked=checked):
-            return self.value_gradient_hessian(params=params, hessian=hessian, full=False, numgrad=numgrad)[2]
+            return self._call_hessian(params, numgrad, hessian)
+
+    def _call_hessian(self, params, numgrad, hessian):
+        with suppress(HessianNotImplementedError):
+            return self._hessian(params=params, hessian=hessian, numgrad=numgrad)
+        with suppress(ValueGradientHessianNotImplementedError):
+            return self._value_gradient_hessian(params=params, hessian=hessian, numerical=numgrad, full=False)[2]
+        return self._fallback_hessian(params=params, hessian=hessian, numgrad=numgrad)
+
+    def _hessian(self, params, hessian, numgrad):  # noqa: ARG002
+        raise HessianNotImplementedError
+
+    def _fallback_hessian(self, params, hessian, numgrad):
+        self_value = partial(self.value, full=False)
+        if numgrad:
+            hessian = numerical_hessian(self_value, params=params, hessian=hessian)
+        else:
+            hessian = autodiff_hessian(self_value, params=params, hessian=hessian)
+        return hessian
 
     def value_gradient_hessian(
         self,
         params: ztyping.ParamTypeInput = None,
-        hessian=None,
         *,
+        hessian=None,
         full: bool | None = None,
         numgrad=None,
         paramvals: ztyping.ParamTypeInput = None,
@@ -707,16 +766,27 @@ def value_gradient_hessian(
             full = DEFAULT_FULL_ARG
         paramvals, checked = self.check_precompile(params=paramvals)
         with self._check_set_input_params(paramvals, guarantee_checked=checked):
+            return self._call_value_gradient_hessian(params, numgrad, full, hessian)
+
+    @z.function(wraps="loss")
+    def _call_value_gradient_hessian(self, params, numgrad, full, hessian):
+        with suppress(ValueGradientHessianNotImplementedError):
             return self._value_gradient_hessian(params=params, hessian=hessian, numerical=numgrad, full=full)
+        with suppress(HessianNotImplementedError):
+            hessian = self._hessian(params=params, hessian=hessian, numgrad=numgrad)
+            return *self.value_gradient(params=params, numgrad=numgrad, full=full), hessian
+        return self._fallback_value_gradient_hessian(params=params, hessian=hessian, numgrad=numgrad, full=full)
 
     def value_gradients_hessian(self, *_, **__):
         msg = "`value_gradients_hessian` is deprecated, use `value_gradient_hessian` instead."
         raise BreakingAPIChangeError(msg)
 
-    @z.function(wraps="loss")
-    def _value_gradient_hessian(self, params, hessian, numerical=False, *, full: bool | None = None):
+    def _value_gradient_hessian(self, params, hessian, numerical=False, full: bool | None = None):  # noqa: ARG002
+        raise ValueGradientHessianNotImplementedError
+
+    def _fallback_value_gradient_hessian(self, params, hessian, numgrad=False, *, full: bool | None = None):
         self_value = partial(self.value, full=full)
-        if numerical:
+        if numgrad:
             return numerical_value_gradients_hessian(
                 func=self_value, gradient=self.gradient, params=params, hessian=hessian
             )
@@ -1111,6 +1181,9 @@ def __init__(
         func: Callable,
         params: Iterable[zfit.Parameter] | None = None,
         errordef: float | None = None,
+        *,
+        gradient: Callable | None = None,
+        hessian: Callable | None = None,
         # legacy
         deps: Iterable[zfit.Parameter] = NONE,
         dependents: Iterable[zfit.Parameter] = NONE,
@@ -1126,10 +1199,14 @@ def __init__(
               the ``func`` depends on.
             errordef: Definition of which change in the loss corresponds to a change of 1 sigma.
                 For example, 1 for Chi squared, 0.5 for negative log-likelihood.
+            gradient: Function that calculates the gradient of the loss with respect to the parameters. If not given,
+                the gradient will be calculated automatically.
+            hessian: Function that calculates the hessian of the loss with respect to the parameters.
+                If not given, the hessian will be calculated automatically.
 
         Usage:
 
-        .. code:: python
+        .. code-block:: python
 
             import zfit
             import zfit.z.numpy as znp
@@ -1188,6 +1265,8 @@ def squared_loss(params):
 
         self._simple_func = func
         self._errordef = errordef
+        self._grad_fn = gradient
+        self._hess_fn = hessian
         params = convert_to_parameters(params, prefer_constant=False)
         self._params = params
         self._simple_func_params = _extract_dependencies(params)
@@ -1205,6 +1284,18 @@ def _get_params(
         own_params = extract_filter_params(self._params, floating=floating, extract_independent=extract_independent)
         return params.union(own_params)
 
+    def _gradient(self, params, numgrad):
+        del numgrad
+        if self._grad_fn is not None:
+            return self._grad_fn(params)
+        raise GradientNotImplementedError
+
+    def _hessian(self, params, hessian, numgrad):
+        del hessian, numgrad
+        if self._hess_fn is not None:
+            return self._hess_fn(params)
+        raise HessianNotImplementedError
+
     @property
     def errordef(self):
         errordef = self._errordef
@@ -1213,11 +1304,10 @@ def errordef(self):
             raise RuntimeError(msg)
         return errordef
 
-    # @z.function(wraps='loss')
     def _loss_func(self, model, data, fit_range, constraints=None, log_offset=None):  # noqa: ARG002
-        if log_offset not in (None, False):
-            msg = "log_offset is not allowed for a SimpleLoss"
-            raise ValueError(msg)
+        if log_offset is not None and log_offset is not False:
+            pass
+            # raise ValueError(msg)
         try:
             params = self._simple_func_params
             params = tuple(params)
@@ -1227,7 +1317,7 @@ def _loss_func(self, model, data, fit_range, constraints=None, log_offset=None):
                 value = self._simple_func()
             else:
                 raise error
-        return z.convert_to_tensor(value)
+        return znp.asarray(value)
 
     def __add__(self, other):
         msg = "Cannot add a SimpleLoss, 'addition' of losses can mean anything." "Add them manually"
diff --git a/zfit/minimizers/ipopt.py b/zfit/minimizers/ipopt.py
index 1fb40fdf8..51bc0491f 100644
--- a/zfit/minimizers/ipopt.py
+++ b/zfit/minimizers/ipopt.py
@@ -379,7 +379,9 @@ def hessian_inplace(x, out):
             minimizer.set(**{option: "yes" for option in warm_start_options})
 
             # update the tolerances
-            self._update_tol_inplace(criterion_value=criterion_value, internal_tol=internal_tol)
+            self._update_tol_inplace(
+                criterion_value=criterion_value, internal_tol=internal_tol
+            )  # hand-tuned 0.1 factor
 
         else:
             valid = False
diff --git a/zfit/models/basic.py b/zfit/models/basic.py
index a539d6e42..5497c9f9d 100644
--- a/zfit/models/basic.py
+++ b/zfit/models/basic.py
@@ -337,7 +337,8 @@ def _voigt_integral_from_inf_to_inf(limits, params, model):
     return sigma * np.sqrt(2 * np.pi)
 
 
-limits = Space(axes=0, limits=(-znp.inf, znp.inf))
+# do NOT uncomment, this can lead to deadlocks. No joke: https://github.com/tensorflow/tensorflow/issues/66115
+# limits = Space(axes=0, limits=(-znp.inf, znp.inf))
 
 
 # todo: this only works if executing eagerly, which fails for at least the binned PDFs
diff --git a/zfit/util/ztyping.py b/zfit/util/ztyping.py
index ec4c2aa18..3ba73882e 100644
--- a/zfit/util/ztyping.py
+++ b/zfit/util/ztyping.py
@@ -133,7 +133,6 @@
     Iterable["zfit.core.interfaces.ZfitGraphCachable"],
 ]  #:
 
-
 LimitsDictAxes = Dict[Tuple[int], "zfit.core.interfaces.ZfitLimit"]  #:
 LimitsDictObs = Dict[Tuple[str], "zfit.core.interfaces.ZfitLimit"]  #:
 LimitsDictNoCoords = Union[LimitsDictAxes, LimitsDictObs]  #:
@@ -149,5 +148,4 @@
 ]  #:
 ArrayLike = tf.types.experimental.TensorLike  #:
 
-
 ParamValuesMap = Optional[Mapping[Union[str, "zfit.core.interfaces.ZfitParameter"], NumericalScalarType]]

From 57c3ae2cd49bb613e19d354559029add2bb21969 Mon Sep 17 00:00:00 2001
From: Jonas Eschle <mayou36@jonas.eschle.com>
Date: Fri, 19 Apr 2024 23:07:08 -0400
Subject: [PATCH 2/2] release: 0.20.3

Signed-off-by: Jonas Eschle <mayou36@jonas.eschle.com>
---
 CHANGELOG.rst | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 8dbeef720..6ba5c4620 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -19,8 +19,6 @@ Deprecations
 
 Bug fixes and small changes
 ---------------------------
-- enhanced loss: simple loss can take a gradient and hesse function and the default base loss provides fallbacks that work correctly between ``value_gradient`` and ``gradient``. This maybe matters if you've implemented a custom loss and should fix any issues with it.
-- multiprocessing would get stuck due to an `upstream bug in TensorFlow <https://github.com/tensorflow/tensorflow/issues/66115>`_. Working around it by disabling an unused piece of code.
 
 
 Experimental
@@ -29,6 +27,17 @@ Experimental
 Requirement changes
 -------------------
 
+Thanks
+------
+
+0.20.3 (19 Apr 2024)
+========================
+
+Bug fixes and small changes
+---------------------------
+- consistent behavior in loss: simple loss can take a gradient and hesse function and the default base loss provides fallbacks that work correctly between ``value_gradient`` and ``gradient``. This maybe matters if you've implemented a custom loss and should fix any issues with it.
+- multiprocessing would get stuck due to an `upstream bug in TensorFlow <https://github.com/tensorflow/tensorflow/issues/66115>`_. Working around it by disabling an unused piece of code.
+
 Thanks
 ------
 - acampoverde for finding the bug in the multiprocessing