pandas-dev · mroeschke · Apr 3, 2025 · Feb 22, 2025 · Mar 1, 2025 · Mar 1, 2025
diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py
@@ -67,6 +67,14 @@ class NumericEngineIndexing:
     def setup(self, engine_and_dtype, index_type, unique, N):
         engine, dtype = engine_and_dtype
 
+        if (
+            index_type == "non_monotonic"
+            and dtype in [np.int16, np.int8, np.uint8]
+            and unique
+        ):
+            # Values overflow
+            raise NotImplementedError
+
         if index_type == "monotonic_incr":
             if unique:
                 arr = np.arange(N * 3, dtype=dtype)
@@ -115,6 +123,14 @@ def setup(self, engine_and_dtype, index_type, unique, N):
         engine, dtype = engine_and_dtype
         dtype = dtype.lower()
 
+        if (
+            index_type == "non_monotonic"
+            and dtype in ["int16", "int8", "uint8"]
+            and unique
+        ):
+            # Values overflow
+            raise NotImplementedError
+
         if index_type == "monotonic_incr":
             if unique:
                 arr = np.arange(N * 3, dtype=dtype)

diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst
@@ -383,7 +383,7 @@ In Python, since ``a`` is a list, you can simply use list comprehension.
 
 .. ipython:: python
 
-   a = np.array(list(range(1, 24)) + [np.NAN]).reshape(2, 3, 4)
+   a = np.array(list(range(1, 24)) + [np.nan]).reshape(2, 3, 4)
    pd.DataFrame([tuple(list(x) + [val]) for x, val in np.ndenumerate(a)])
 
 meltlist
@@ -402,7 +402,7 @@ In Python, this list would be a list of tuples, so
 
 .. ipython:: python
 
-   a = list(enumerate(list(range(1, 5)) + [np.NAN]))
+   a = list(enumerate(list(range(1, 5)) + [np.nan]))
    pd.DataFrame(a)
 
 For more details and examples see :ref:`the Intro to Data Structures

diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
@@ -2064,12 +2064,12 @@ different numeric dtypes will **NOT** be combined. The following example will gi
 
 .. ipython:: python
 
-   df1 = pd.DataFrame(np.random.randn(8, 1), columns=["A"], dtype="float32")
+   df1 = pd.DataFrame(np.random.randn(8, 1), columns=["A"], dtype="float64")
    df1
    df1.dtypes
    df2 = pd.DataFrame(
        {
-           "A": pd.Series(np.random.randn(8), dtype="float16"),
+           "A": pd.Series(np.random.randn(8), dtype="float32"),
            "B": pd.Series(np.random.randn(8)),
            "C": pd.Series(np.random.randint(0, 255, size=8), dtype="uint8"),  # [0,255] (range of uint8)
        }

diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst
@@ -171,6 +171,7 @@ can be improved by passing an ``np.ndarray``.
    In [4]: %%cython
       ...: cimport numpy as np
       ...: import numpy as np
+      ...: np.import_array()
       ...: cdef double f_typed(double x) except? -2:
       ...:     return x * (x - 1)
       ...: cpdef double integrate_f_typed(double a, double b, int N):
@@ -225,6 +226,7 @@ and ``wraparound`` checks can yield more performance.
       ...: cimport cython
       ...: cimport numpy as np
       ...: import numpy as np
+      ...: np.import_array()
       ...: cdef np.float64_t f_typed(np.float64_t x) except? -2:
       ...:     return x * (x - 1)
       ...: cpdef np.float64_t integrate_f_typed(np.float64_t a, np.float64_t b, np.int64_t N):

diff --git a/doc/source/whatsnew/v0.11.0.rst b/doc/source/whatsnew/v0.11.0.rst
@@ -74,10 +74,10 @@ Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passe
 
 .. ipython:: python
 
-   df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32')
+   df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float64')
    df1
    df1.dtypes
-   df2 = pd.DataFrame({'A': pd.Series(np.random.randn(8), dtype='float16'),
+   df2 = pd.DataFrame({'A': pd.Series(np.random.randn(8), dtype='float32'),
                        'B': pd.Series(np.random.randn(8)),
                        'C': pd.Series(range(8), dtype='uint8')})
    df2

diff --git a/environment.yml b/environment.yml
@@ -23,7 +23,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy<2
+  - numpy<3
 
   # optional dependencies
   - beautifulsoup4>=4.11.2

diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py
@@ -36,8 +36,8 @@
                 r".*In the future `np\.long` will be defined as.*",
                 FutureWarning,
             )
-            np_long = np.long  # type: ignore[attr-defined]
-            np_ulong = np.ulong  # type: ignore[attr-defined]
+            np_long = np.long
+            np_ulong = np.ulong
     except AttributeError:
         np_long = np.int_
         np_ulong = np.uint

diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py
@@ -351,7 +351,7 @@ def register_dataframe_accessor(name: str) -> Callable[[TypeT], TypeT]:
 AttributeError: The series must contain integer data only.
 >>> df = pd.Series([1, 2, 3])
 >>> df.int_accessor.sum()
-6"""
+np.int64(6)"""
 
 
 @doc(_register_accessor, klass="Series", examples=_register_series_examples)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -941,7 +941,7 @@ def argmin(self, skipna: bool = True) -> int:
         --------
         >>> arr = pd.array([3, 1, 2, 5, 4])
         >>> arr.argmin()
-        1
+        np.int64(1)
         """
         # Implementer note: You have two places to override the behavior of
         # argmin.
@@ -975,7 +975,7 @@ def argmax(self, skipna: bool = True) -> int:
         --------
         >>> arr = pd.array([3, 1, 2, 5, 4])
         >>> arr.argmax()
-        3
+        np.int64(3)
         """
         # Implementer note: You have two places to override the behavior of
         # argmax.
@@ -1959,10 +1959,10 @@ def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
         --------
         >>> class MyExtensionArray(pd.arrays.NumpyExtensionArray):
         ...     def _formatter(self, boxed=False):
-        ...         return lambda x: "*" + str(x) + "*" if boxed else repr(x) + "*"
+        ...         return lambda x: "*" + str(x) + "*"
         >>> MyExtensionArray(np.array([1, 2, 3, 4]))
         <MyExtensionArray>
-        [1*, 2*, 3*, 4*]
+        [*1*, *2*, *3*, *4*]
         Length: 4, dtype: int64
         """
         if boxed:
@@ -2176,15 +2176,15 @@ def _reduce(
         Examples
         --------
         >>> pd.array([1, 2, 3])._reduce("min")
-        1
+        np.int64(1)
         >>> pd.array([1, 2, 3])._reduce("max")
-        3
+        np.int64(3)
         >>> pd.array([1, 2, 3])._reduce("sum")
-        6
+        np.int64(6)
         >>> pd.array([1, 2, 3])._reduce("mean")
-        2.0
+        np.float64(2.0)
         >>> pd.array([1, 2, 3])._reduce("median")
-        2.0
+        np.float64(2.0)
         """
         meth = getattr(self, name, None)
         if meth is None:

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -275,7 +275,7 @@ def _unbox_scalar(
         --------
         >>> arr = pd.array(np.array(["1970-01-01"], "datetime64[ns]"))
         >>> arr._unbox_scalar(arr[0])
-        numpy.datetime64('1970-01-01T00:00:00.000000000')
+        np.datetime64('1970-01-01T00:00:00.000000000')
         """
         raise AbstractMethodError(self)
 

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -1775,7 +1775,8 @@ def to_tuples(self, na_tuple: bool = True) -> np.ndarray:
         [(0, 1], (1, 2]]
         Length: 2, dtype: interval[int64, right]
         >>> idx.to_tuples()
-        array([(0, 1), (1, 2)], dtype=object)
+        array([(np.int64(0), np.int64(1)), (np.int64(1), np.int64(2))],
+              dtype=object)
 
         For :class:`pandas.IntervalIndex`:
 

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -1378,25 +1378,25 @@ def any(
         skips NAs):
 
         >>> pd.array([True, False, True]).any()
-        True
+        np.True_
         >>> pd.array([True, False, pd.NA]).any()
-        True
+        np.True_
         >>> pd.array([False, False, pd.NA]).any()
-        False
+        np.False_
         >>> pd.array([], dtype="boolean").any()
-        False
+        np.False_
         >>> pd.array([pd.NA], dtype="boolean").any()
-        False
+        np.False_
         >>> pd.array([pd.NA], dtype="Float64").any()
-        False
+        np.False_
 
         With ``skipna=False``, the result can be NA if this is logically
         required (whether ``pd.NA`` is True or False influences the result):
 
         >>> pd.array([True, False, pd.NA]).any(skipna=False)
-        True
+        np.True_
         >>> pd.array([1, 0, pd.NA]).any(skipna=False)
-        True
+        np.True_
         >>> pd.array([False, False, pd.NA]).any(skipna=False)
         <NA>
         >>> pd.array([0, 0, pd.NA]).any(skipna=False)
@@ -1466,17 +1466,17 @@ def all(
         skips NAs):
 
         >>> pd.array([True, True, pd.NA]).all()
-        True
+        np.True_
         >>> pd.array([1, 1, pd.NA]).all()
-        True
+        np.True_
         >>> pd.array([True, False, pd.NA]).all()
-        False
+        np.False_
         >>> pd.array([], dtype="boolean").all()
-        True
+        np.True_
         >>> pd.array([pd.NA], dtype="boolean").all()
-        True
+        np.True_
         >>> pd.array([pd.NA], dtype="Float64").all()
-        True
+        np.True_
 
         With ``skipna=False``, the result can be NA if this is logically
         required (whether ``pd.NA`` is True or False influences the result):
@@ -1486,9 +1486,9 @@ def all(
         >>> pd.array([1, 1, pd.NA]).all(skipna=False)
         <NA>
         >>> pd.array([True, False, pd.NA]).all(skipna=False)
-        False
+        np.False_
         >>> pd.array([1, 0, pd.NA]).all(skipna=False)
-        False
+        np.False_
         """
         nv.validate_all((), kwargs)
 

diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
@@ -297,7 +297,7 @@ class SparseFrameAccessor(BaseAccessor, PandasDelegate):
     --------
     >>> df = pd.DataFrame({"a": [1, 2, 0, 0], "b": [3, 0, 0, 4]}, dtype="Sparse[int]")
     >>> df.sparse.density
-    0.5
+    np.float64(0.5)
     """
 
     def _validate(self, data) -> None:
@@ -459,7 +459,7 @@ def density(self) -> float:
         --------
         >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0, 1])})
         >>> df.sparse.density
-        0.5
+        np.float64(0.5)
         """
         tmp = np.mean([column.array.density for _, column in self._parent.items()])
         return tmp

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -804,9 +804,9 @@ def argmax(
         dtype: float64
 
         >>> s.argmax()
-        2
+        np.int64(2)
         >>> s.argmin()
-        0
+        np.int64(0)
 
         The maximum cereal calories is the third element and
         the minimum cereal calories is the first element,
@@ -1360,7 +1360,7 @@ def factorize(
         dtype: int64
 
         >>> ser.searchsorted(4)
-        3
+        np.int64(3)
 
         >>> ser.searchsorted([0, 4])
         array([0, 3])
@@ -1379,7 +1379,7 @@ def factorize(
         dtype: datetime64[s]
 
         >>> ser.searchsorted('3/14/2000')
-        3
+        np.int64(3)
 
         >>> ser = pd.Categorical(
         ...     ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True
@@ -1389,7 +1389,7 @@ def factorize(
         Categories (4, object): ['apple' < 'bread' < 'cheese' < 'milk']
 
         >>> ser.searchsorted('bread')
-        1
+        np.int64(1)
 
         >>> ser.searchsorted(['bread'], side='right')
         array([3])

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -246,7 +246,8 @@ def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLi
         with warnings.catch_warnings():
             # Can remove warning filter once NumPy 1.24 is min version
             if not np_version_gte1p24:
-                warnings.simplefilter("ignore", np.VisibleDeprecationWarning)
+                # np.VisibleDeprecationWarning only in np.exceptions in 2.0
+                warnings.simplefilter("ignore", np.VisibleDeprecationWarning)  # type: ignore[attr-defined]
             result = np.asarray(values, dtype=dtype)
     except ValueError:
         # Using try/except since it's more performant than checking is_list_like

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -428,9 +428,9 @@ def array_equivalent(
     Examples
     --------
     >>> array_equivalent(np.array([1, 2, np.nan]), np.array([1, 2, np.nan]))
-    True
+    np.True_
     >>> array_equivalent(np.array([1, np.nan, 2]), np.array([1, 2, np.nan]))
-    False
+    np.False_
     """
     left, right = np.asarray(left), np.asarray(right)
 
@@ -626,7 +626,7 @@ def na_value_for_dtype(dtype: DtypeObj, compat: bool = True):
     >>> na_value_for_dtype(np.dtype("bool"))
     False
     >>> na_value_for_dtype(np.dtype("datetime64[ns]"))
-    numpy.datetime64('NaT')
+    np.datetime64('NaT')
     """
 
     if isinstance(dtype, ExtensionDtype):

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -887,7 +887,7 @@ def squeeze(self, axis: Axis | None = None) -> Scalar | Series | DataFrame:
         dtype: int64
 
         >>> even_primes.squeeze()
-        2
+        np.int64(2)
 
         Squeezing objects with more than one value in every axis does nothing:
 
@@ -945,7 +945,7 @@ def squeeze(self, axis: Axis | None = None) -> Scalar | Series | DataFrame:
         Squeezing all axes will project directly into a scalar:
 
         >>> df_0a.squeeze()
-        1
+        np.int64(1)
         """
         axes = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),)
         result = self.iloc[
@@ -7954,7 +7954,7 @@ def asof(self, where, subset=None):
         dtype: float64
 
         >>> s.asof(20)
-        2.0
+        np.float64(2.0)
 
         For a sequence `where`, a Series is returned. The first value is
         NaN, because the first element of `where` is before the first
@@ -7969,7 +7969,7 @@ def asof(self, where, subset=None):
         NaN, even though NaN is at the index location for ``30``.
 
         >>> s.asof(30)
-        2.0
+        np.float64(2.0)
 
         Take all columns into consideration