wjakob · wjakob · Jun 7, 2023 · May 16, 2023 · May 16, 2023 · May 17, 2023
diff --git a/include/nanobind/eigen/dense.h b/include/nanobind/eigen/dense.h
@@ -26,46 +26,73 @@ template <typename T> using DMap = Eigen::Map<T, 0, DStride>;
 NAMESPACE_BEGIN(detail)
 
 template <typename T>
-constexpr int NumDimensions = bool(T::IsVectorAtCompileTime) ? 1 : 2;
+constexpr int num_dimensions = bool(T::IsVectorAtCompileTime) ? 1 : 2;
 
-template <typename T>
-using array_for_eigen_t = ndarray<
+template<typename T> struct StrideExtr {
+    using Type = Eigen::Stride<0, 0>;
+};
+
+template <typename T, int Options, typename StrideType> struct StrideExtr<Eigen::Map<T, Options, StrideType>> {
+    using Type = StrideType;
+};
+
+template<typename T> using Stride = typename StrideExtr<T>::Type;
+
+/// Is true for Eigen types that are known at compile-time to hold contiguous memory only, which includes all specializations of Matrix and Array,
+/// and specializations of Map and Ref with according stride types and shapes. A (compile-time) stride of 0 means "contiguous" to Eigen.
+template<typename T> constexpr bool requires_contig_memory =
+    (Stride<T>::InnerStrideAtCompileTime == 0 || Stride<T>::InnerStrideAtCompileTime == 1) &&
+    (num_dimensions<T> == 1 ||
+     Stride<T>::OuterStrideAtCompileTime == 0 ||
+     Stride<T>::OuterStrideAtCompileTime != Eigen::Dynamic && Stride<T>::OuterStrideAtCompileTime == T::InnerSizeAtCompileTime);
+
+/// Is true for StrideTypes that can describe the contiguous memory layout of the plain Eigen type T.
+template<typename StrideType, typename T> constexpr bool can_map_contig_memory =
+    (StrideType::InnerStrideAtCompileTime == 0 || StrideType::InnerStrideAtCompileTime == 1 || StrideType::InnerStrideAtCompileTime == Eigen::Dynamic) &&
+    (num_dimensions<T> == 1 ||
+     StrideType::OuterStrideAtCompileTime == 0 ||
+     StrideType::OuterStrideAtCompileTime == Eigen::Dynamic ||
+     StrideType::OuterStrideAtCompileTime == T::InnerSizeAtCompileTime);
+
+/// Alias ndarray for a given Eigen type, to be used by type_caster<EigenType>::from_python, which calls type_caster<array_for_eigen_t<EigenType>>::from_python.
+/// If the Eigen type is known at compile-time to handle contiguous memory only, then this alias makes type_caster<array_for_eigen_t<EigenType>>::from_python
+/// either fail or provide an ndarray with contiguous memory, triggering a conversion if necessary and supported by flags.
+/// Otherwise, this alias makes type_caster<array_for_eigen_t<EigenType>>::from_python either fail or provide an ndarray with arbitrary strides,
+/// which need to be checked for compatibility then. There is no way to ask type_caster<ndarray> for specific strides other than c_contig and f_contig.
+/// Hence, if an Eigen type requires non-contiguous strides (at compile-time) and type_caster<array_for_eigen_t<EigenType>> provides an ndarray with unsuitable strides (at run-time),
+/// then type_caster<EigenType>::from_python just fails. Note, however, that this is rather unusual, since the default stride type of Map requires contiguous memory,
+/// and the one of Ref requires a contiguous inner stride, while it can handle any outer stride.
+template <typename T> using array_for_eigen_t = ndarray<
     typename T::Scalar,
     numpy,
     std::conditional_t<
-        NumDimensions<T> == 1,
+        num_dimensions<T> == 1,
         shape<(size_t) T::SizeAtCompileTime>,
         shape<(size_t) T::RowsAtCompileTime,
               (size_t) T::ColsAtCompileTime>>,
     std::conditional_t<
-        T::InnerStrideAtCompileTime == Eigen::Dynamic,
-        any_contig,
+        requires_contig_memory<T>,
         std::conditional_t<
-            T::IsRowMajor || NumDimensions<T> == 1,
+            num_dimensions<T> == 1 || T::IsRowMajor,
             c_contig,
-            f_contig
-        >
-    >
->;
+            f_contig>,
+        any_contig>>;
 
 /// Any kind of Eigen class
-template <typename T> constexpr bool is_eigen_v =
-is_base_of_template_v<T, Eigen::EigenBase>;
+template <typename T> constexpr bool is_eigen_v = is_base_of_template_v<T, Eigen::EigenBase>;
 
 /// Detects Eigen::Array, Eigen::Matrix, etc.
-template <typename T> constexpr bool is_eigen_plain_v =
-is_base_of_template_v<T, Eigen::PlainObjectBase>;
+template <typename T> constexpr bool is_eigen_plain_v = is_base_of_template_v<T, Eigen::PlainObjectBase>;
 
 /// Detect Eigen::SparseMatrix
-template <typename T> constexpr bool is_eigen_sparse_v =
-is_base_of_template_v<T, Eigen::SparseMatrixBase>;
+template <typename T> constexpr bool is_eigen_sparse_v = is_base_of_template_v<T, Eigen::SparseMatrixBase>;
 
 /// Detects expression templates
 template <typename T> constexpr bool is_eigen_xpr_v =
     is_eigen_v<T> && !is_eigen_plain_v<T> && !is_eigen_sparse_v<T> &&
     !std::is_base_of_v<Eigen::MapBase<T, Eigen::ReadOnlyAccessors>, T>;
 
-template <typename T> struct type_caster<T, enable_if_t<is_eigen_plain_v<T>>> {
+template <typename T> struct type_caster<T, enable_if_t<is_eigen_plain_v<T> && is_ndarray_scalar_v<typename T::Scalar>>> {
     using Scalar = typename T::Scalar;
     using NDArray = array_for_eigen_t<T>;
     using NDArrayCaster = make_caster<NDArray>;
@@ -77,17 +104,12 @@ template <typename T> struct type_caster<T, enable_if_t<is_eigen_plain_v<T>>> {
         if (!caster.from_python(src, flags, cleanup))
             return false;
         const NDArray &array = caster.value;
-
-        if constexpr (NumDimensions<T> == 1) {
+        if constexpr (num_dimensions<T> == 1)
             value.resize(array.shape(0));
-            memcpy(value.data(), array.data(),
-                   array.shape(0) * sizeof(Scalar));
-        } else {
+        else
             value.resize(array.shape(0), array.shape(1));
-            memcpy(value.data(), array.data(),
-                   array.shape(0) * array.shape(1) * sizeof(Scalar));
-        }
-
+        // array_for_eigen_t<T> ensures that array holds contiguous memory.
+        memcpy(value.data(), array.data(), array.size() * sizeof(Scalar));
         return true;
     }
 
@@ -100,10 +122,10 @@ template <typename T> struct type_caster<T, enable_if_t<is_eigen_plain_v<T>>> {
     }
 
     static handle from_cpp(const T &v, rv_policy policy, cleanup_list *cleanup) noexcept {
-        size_t shape[NumDimensions<T>];
-        int64_t strides[NumDimensions<T>];
+        size_t shape[num_dimensions<T>];
+        int64_t strides[num_dimensions<T>];
 
-        if constexpr (NumDimensions<T> == 1) {
+        if constexpr (num_dimensions<T> == 1) {
             shape[0] = v.size();
             strides[0] = v.innerStride();
         } else {
@@ -148,18 +170,19 @@ template <typename T> struct type_caster<T, enable_if_t<is_eigen_plain_v<T>>> {
             policy == rv_policy::move ? rv_policy::reference : policy;
 
         object o = steal(NDArrayCaster::from_cpp(
-            NDArray(ptr, NumDimensions<T>, shape, owner, strides),
+            NDArray(ptr, num_dimensions<T>, shape, owner, strides),
             array_rv_policy, cleanup));
 
         return o.release();
     }
 };
 
 /// Caster for Eigen expression templates
-template <typename T> struct type_caster<T, enable_if_t<is_eigen_xpr_v<T>>> {
+template <typename T> struct type_caster<T, enable_if_t<is_eigen_xpr_v<T> && is_ndarray_scalar_v<typename T::Scalar>>> {
     using Array = Eigen::Array<typename T::Scalar, T::RowsAtCompileTime,
                                T::ColsAtCompileTime>;
     using Caster = make_caster<Array>;
+    static constexpr bool IsClass = false;
     static constexpr auto Name = Caster::Name;
     template <typename T_> using Cast = T;
 
@@ -174,25 +197,60 @@ template <typename T> struct type_caster<T, enable_if_t<is_eigen_xpr_v<T>>> {
 
 /// Caster for Eigen::Map<T>
 template <typename T, int Options, typename StrideType>
-struct type_caster<Eigen::Map<T, Options, StrideType>, enable_if_t<is_eigen_plain_v<T>>> {
+struct type_caster<Eigen::Map<T, Options, StrideType>, enable_if_t<is_eigen_plain_v<T> && is_ndarray_scalar_v<typename T::Scalar>>> {
     using Map = Eigen::Map<T, Options, StrideType>;
     using NDArray = array_for_eigen_t<Map>;
     using NDArrayCaster = type_caster<NDArray>;
+    static constexpr bool IsClass = false;
     static constexpr auto Name = NDArrayCaster::Name;
     template <typename T_> using Cast = Map;
 
     NDArrayCaster caster;
 
-    bool from_python(handle src, uint8_t flags,
-                     cleanup_list *cleanup) noexcept {
-        return caster.from_python(src, flags, cleanup);
+    bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
+        // Conversions result in an Eigen::Map pointing into a temporary ndarray.
+        // If src is not a bound function argument, but e.g. an argument of cast, then this temporary would be destroyed upon returning from cast.
+        // Hence, conversions cannot be supported in this case.
+        // If src is a bound function argument, then cleanup would keep alive this temporary until returning from the bound function.
+        // Hence, conversions could be supported in this case, resulting in a bound function altering the Map without an effect on the Python side.
+        // This behaviour would be surprising, however, as bound functions expecting a Map most probably expect that Map to point into the caller's data.
+        // Hence, do not support conversions in any case.
+        return from_python_(src, flags & ~(uint8_t)cast_flags::convert, cleanup);
+    }
+
+    bool from_python_(handle src, uint8_t flags, cleanup_list* cleanup) noexcept {
+        if (!caster.from_python(src, flags, cleanup))
+            return false;
+
+        // Check if StrideType can cope with the strides of caster.value. Avoid this check if their types guarantee that, anyway.
+
+        // If requires_contig_memory<Map> is true, then StrideType is known at compile-time to only cope with contiguous memory.
+        // Then since caster.from_python has succeeded, caster.value now surely provides contiguous memory, and so its strides surely fit.
+        if constexpr (!requires_contig_memory<Map>)  {
+            // A stride that is dynamic at compile-time copes with any stride at run-time. 
+            if constexpr (StrideType::InnerStrideAtCompileTime != Eigen::Dynamic) {
+                // A stride of 0 at compile-time means "contiguous" to Eigen, which is always 1 for the inner stride.
+                int64_t expected_inner_stride = StrideType::InnerStrideAtCompileTime == 0 ? 1 : StrideType::InnerStrideAtCompileTime;
+                if (expected_inner_stride != (num_dimensions<T> == 1 || !T::IsRowMajor ? caster.value.stride(0) : caster.value.stride(1)))
+                    return false;
+            }
+            if constexpr (num_dimensions<T> == 2 && StrideType::OuterStrideAtCompileTime != Eigen::Dynamic) {
+                int64_t expected_outer_stride =
+                    StrideType::OuterStrideAtCompileTime == 0
+                    ? T::IsRowMajor ? caster.value.shape(1) : caster.value.shape(0)
+                    : StrideType::OuterStrideAtCompileTime;
+                if (expected_outer_stride != (T::IsRowMajor ? caster.value.stride(0) : caster.value.stride(1)))
+                    return false;
+            }
+        }
+        return true;
     }
 
     static handle from_cpp(const Map &v, rv_policy, cleanup_list *cleanup) noexcept {
-        size_t shape[NumDimensions<T>];
-        int64_t strides[NumDimensions<T>];
+        size_t shape[num_dimensions<T>];
+        int64_t strides[num_dimensions<T>];
 
-        if constexpr (NumDimensions<T> == 1) {
+        if constexpr (num_dimensions<T> == 1) {
             shape[0] = v.size();
             strides[0] = v.innerStride();
         } else {
@@ -203,53 +261,126 @@ struct type_caster<Eigen::Map<T, Options, StrideType>, enable_if_t<is_eigen_plai
         }
 
         return NDArrayCaster::from_cpp(
-            NDArray((void *) v.data(), NumDimensions<T>, shape, handle(), strides),
+            NDArray((void *) v.data(), num_dimensions<T>, shape, handle(), strides),
             rv_policy::reference, cleanup);
     }
 
     StrideType strides() const {
-        constexpr int IS = StrideType::InnerStrideAtCompileTime,
-                      OS = StrideType::OuterStrideAtCompileTime;
+        constexpr int is = StrideType::InnerStrideAtCompileTime,
+                      os = StrideType::OuterStrideAtCompileTime;
 
         int64_t inner = caster.value.stride(0),
-                outer = caster.value.stride(1);
-        (void) outer;
+                outer;
+        if constexpr (num_dimensions<T> == 1)
+            outer = caster.value.shape(0);
+        else
+            outer = caster.value.stride(1);
 
-        if constexpr (T::IsRowMajor)
+        if constexpr (num_dimensions<T> == 2 && T::IsRowMajor)
             std::swap(inner, outer);
 
-        if constexpr (std::is_same_v<StrideType, Eigen::InnerStride<IS>>)
+        // Compile-time strides of 0 must be passed as such to constructors of StrideType, to avoid assertions in Eigen.
+        if constexpr (is == 0) {
+            // Ensured by stride checks in from_python_:
+            // assert(inner == 1);
+            inner = 0;
+        }
+
+        if constexpr (os == 0) {
+            // Ensured by stride checks in from_python_:
+            // assert(num_dimensions<T> == 1 || outer == (T::IsRowMajor ? int64_t(caster.value.shape(1)) : int64_t(caster.value.shape(0))));
+            outer = 0;
+        }
+
+        if constexpr (std::is_same_v<StrideType, Eigen::InnerStride<is>>)
             return StrideType(inner);
-        else if constexpr (std::is_same_v<StrideType, Eigen::OuterStride<OS>>)
+        else if constexpr (std::is_same_v<StrideType, Eigen::OuterStride<os>>)
             return StrideType(outer);
         else
             return StrideType(outer, inner);
     }
 
     operator Map() {
         NDArray &t = caster.value;
-        return Map(t.data(), t.shape(0), t.ndim() == 1 ? 1 : t.shape(1),
-                   strides());
+        if constexpr (num_dimensions<T> == 1)
+            return Map(t.data(), t.shape(0), strides());
+        else
+            return Map(t.data(), t.shape(0), t.shape(1), strides());
     }
 };
 
+
 /// Caster for Eigen::Ref<T>
 template <typename T, int Options, typename StrideType>
-struct type_caster<Eigen::Ref<T, Options, StrideType>, enable_if_t<is_eigen_plain_v<T>>> {
+struct type_caster<Eigen::Ref<T, Options, StrideType>, enable_if_t<is_eigen_plain_v<T> && is_ndarray_scalar_v<typename T::Scalar>>> {
     using Ref = Eigen::Ref<T, Options, StrideType>;
     using Map = Eigen::Map<T, Options, StrideType>;
+    using DMap = Eigen::Map<T, Options, DStride>;
     using MapCaster = make_caster<Map>;
-    static constexpr auto Name = MapCaster::Name;
+    using DMapCaster = make_caster<DMap>;
+    using DmapMatches = typename Eigen::internal::traits<Ref>::template match<DMap>::type;
+    static constexpr bool can_map_contig_mem = can_map_contig_memory<StrideType, T>;
+    static constexpr bool IsClass = false;
+    static constexpr auto Name = const_name<std::is_const_v<T>>(DMapCaster::Name, MapCaster::Name);
     template <typename T_> using Cast = Ref;
 
     MapCaster caster;
+    DMapCaster dcaster;
+
+
+    /// In short:
+    /// - type_caster<Ref<T>> supports no conversions, independent of flags.
+    /// - type_caster<Ref<T const>>
+    ///   + supports stride conversions, independent of flags, except for uncommon strides.
+    ///   + It additionally supports conversions to T::Scalar if flags say so, 
+    ///     and if either a cleanup_list is passed, or if Ref is guaranteed to map its own data.
+    /// 
+    /// type_caster<Ref<T const>> supports stride conversions independent of flags, because if the intention was to not allow them,
+    /// then the bound function would most probably expect a Map instead of a Ref.
+    /// 
+    /// Both Ref<T> and Ref<T const> map data.
+    /// Like for Map, type_caster<Ref<T>>::from_python does not support conversions, and for the same reasons.
+    /// But unlike Ref<T>, instead of mapping external data, Ref<T const> may alternatively map data that it owns itself.
+    /// Ref<T const> then maps its member variable m_object, having copy-constructed it from the passed Eigen type.
+    /// The primary use case of Ref<T const> is as function argument that either maps the caller's data, or a suitably converted copy thereof.
+    /// Hence, unlike with Map and Ref<T>, a Ref<T const> that maps a (converted) copy is intended,
+    /// and thus, type_caster<Ref<T const>>::from_python may support conversions.
+    /// It first calls the type_caster for matching strides, not supporting conversions.
+    /// If that fails, it calls the one for arbitrary strides. Since conversions to T::Scalar create a temporary ndarray, 
+    /// conversions are supported only if flags say so, and if either a cleanup_list is passed (that keeps the temporary alive),
+    /// or if Ref<T const> is guaranteed to map its own data (having copied the temporary), which is ensured only if DmapMatches::value is false.
+    /// 
+    /// Unfortunately, if src's scalar type needs to be converted, then the latter means that e.g.
+    ///   cast<Eigen::Ref<const Eigen::VectorXi>>(src) succeeds, while
+    ///   cast<      DRef<const Eigen::VectorXi>>(src) fails -
+    /// even though DRef would be expected to support a superset of the types supported by Ref.
+    /// 
+    /// Ref<T const>::m_object holds contiguous memory, which Ref silently fails to map if this is impossible given StrideType
+    /// and the passed object's shape. If mapping fails, then Ref is left with mapping nullptr.
+    /// While this could be considered below, it is not done for efficiency reasons:
+    /// due to Ref's missing move constructor, its unusual copy constructor, and since C++ does not guarantee named return value optimizations,
+    /// the Ref would need to be created only for checking it, and created a second time for returning it,
+    /// which seems too costly for a Ref that owns its data.
+    /// Instead of checking thoroughly after construction, conversion fails if it is known at compile-time that mapping may fail,
+    /// even though it may actually succeed in some of these cases at run-time (e.g. StrideType::OuterStrideAtCompileTime==4,
+    /// and a row-major Matrix with a dynamic number of columns and 4 columns at run-time).
+    /// Once Ref<T const> defines a move constructor https://gitlab.com/libeigen/eigen/-/issues/2668, this restriction may be lifted.
+    bool from_python(handle src, uint8_t flags, cleanup_list *cleanup) noexcept {
+        if constexpr (std::is_const_v<T>)
+            return caster.from_python(src, flags, cleanup) ||
+                   can_map_contig_mem &&
+                   dcaster.from_python_(src, (!DmapMatches::value || cleanup) ? flags : flags & ~(uint8_t)cast_flags::convert, cleanup);
+        else
+            return caster.from_python(src, flags, cleanup);
+    }
 
-    bool from_python(handle src, uint8_t flags,
-                     cleanup_list *cleanup) noexcept {
-        return caster.from_python(src, flags, cleanup);
+    operator Ref() {
+        if constexpr (std::is_const_v<T>)
+            if (dcaster.caster.value.is_valid())
+                return Ref(dcaster.operator DMap());
+        return Ref(caster.operator Map());
     }
 
-    operator Ref() { return Ref(caster.operator Map()); }
 };
 
 NAMESPACE_END(detail)

diff --git a/include/nanobind/ndarray.h b/include/nanobind/ndarray.h
@@ -78,9 +78,16 @@ struct tensorflow { };
 struct pytorch { };
 struct jax { };
 
+NAMESPACE_BEGIN(detail)
+
+template<typename T> constexpr bool is_ndarray_scalar_v =
+std::is_floating_point_v<T> || std::is_integral_v<T>;
+
+NAMESPACE_END(detail)
+
 template <typename T> constexpr dlpack::dtype dtype() {
     static_assert(
-        std::is_floating_point_v<T> || std::is_integral_v<T>,
+        detail::is_ndarray_scalar_v<T>,
         "nanobind::dtype<T>: T must be a floating point or integer variable!"
     );