From d0608ba3a673e19b707997e774afac9601d1b8ac Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 25 Apr 2022 17:31:37 -0700 Subject: [PATCH] ENH: ints_to_pytimedelta support non-nano (#46828) --- pandas/_libs/tslibs/timedeltas.pyi | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 50 +++++++++++++++++++++++--- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/timedeltas.py | 4 +-- pandas/tests/tslibs/test_timedeltas.py | 28 +++++++++++++++ 5 files changed, 77 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index f4711f728907d..a04104915cf1f 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -63,7 +63,7 @@ UnitChoices = Literal[ _S = TypeVar("_S", bound=timedelta) def ints_to_pytimedelta( - arr: npt.NDArray[np.int64], # const int64_t[:] + arr: npt.NDArray[np.timedelta64], box: bool = ..., ) -> npt.NDArray[np.object_]: ... def array_to_timedelta64( diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 7402ec86c036f..8f145d0d66acc 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -49,6 +49,7 @@ from pandas._libs.tslibs.np_datetime cimport ( cmp_scalar, get_datetime64_unit, get_timedelta64_value, + get_unit_from_dtype, npy_datetimestruct, pandas_datetime_to_datetimestruct, pandas_timedelta_to_timedeltastruct, @@ -141,14 +142,14 @@ _no_input = object() @cython.boundscheck(False) @cython.wraparound(False) -def ints_to_pytimedelta(const int64_t[:] arr, box=False): +def ints_to_pytimedelta(ndarray m8values, box=False): """ convert an i8 repr to an ndarray of timedelta or Timedelta (if box == True) Parameters ---------- - arr : ndarray[int64_t] + arr : ndarray[timedelta64] box : bool, default False Returns @@ -157,9 +158,12 @@ def ints_to_pytimedelta(const int64_t[:] arr, box=False): array of Timedelta or timedeltas objects """ cdef: - Py_ssize_t i, n = len(arr) + Py_ssize_t i, n = m8values.size int64_t value object[::1] result = np.empty(n, dtype=object) + NPY_DATETIMEUNIT reso = get_unit_from_dtype(m8values.dtype) + + arr = m8values.view("i8") for i in range(n): @@ -168,9 +172,26 @@ def ints_to_pytimedelta(const int64_t[:] arr, box=False): result[i] = NaT else: if box: - result[i] = Timedelta(value) - else: + result[i] = _timedelta_from_value_and_reso(value, reso=reso) + elif reso == NPY_DATETIMEUNIT.NPY_FR_ns: result[i] = timedelta(microseconds=int(value) / 1000) + elif reso == NPY_DATETIMEUNIT.NPY_FR_us: + result[i] = timedelta(microseconds=value) + elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: + result[i] = timedelta(milliseconds=value) + elif reso == NPY_DATETIMEUNIT.NPY_FR_s: + result[i] = timedelta(seconds=value) + elif reso == NPY_DATETIMEUNIT.NPY_FR_m: + result[i] = timedelta(minutes=value) + elif reso == NPY_DATETIMEUNIT.NPY_FR_h: + result[i] = timedelta(hours=value) + elif reso == NPY_DATETIMEUNIT.NPY_FR_D: + result[i] = timedelta(days=value) + elif reso == NPY_DATETIMEUNIT.NPY_FR_W: + result[i] = timedelta(weeks=value) + else: + # Month, Year, NPY_FR_GENERIC, pico, fempto, atto + raise NotImplementedError(reso) return result.base # .base to access underlying np.ndarray @@ -1530,6 +1551,9 @@ class Timedelta(_Timedelta): int64_t result, unit, remainder ndarray[int64_t] arr + if self._reso != NPY_FR_ns: + raise NotImplementedError + from pandas._libs.tslibs.offsets import to_offset unit = to_offset(freq).nanos @@ -1620,6 +1644,8 @@ class Timedelta(_Timedelta): elif is_integer_object(other) or is_float_object(other): # integers or floats + if self._reso != NPY_FR_ns: + raise NotImplementedError return Timedelta(self.value / other, unit='ns') elif is_array(other): @@ -1633,6 +1659,8 @@ class Timedelta(_Timedelta): other = Timedelta(other) if other is NaT: return np.nan + if self._reso != NPY_FR_ns: + raise NotImplementedError return float(other.value) / self.value elif is_array(other): @@ -1651,17 +1679,25 @@ class Timedelta(_Timedelta): other = Timedelta(other) if other is NaT: return np.nan + if self._reso != NPY_FR_ns: + raise NotImplementedError return self.value // other.value elif is_integer_object(other) or is_float_object(other): + if self._reso != NPY_FR_ns: + raise NotImplementedError return Timedelta(self.value // other, unit='ns') elif is_array(other): if other.dtype.kind == 'm': # also timedelta-like + if self._reso != NPY_FR_ns: + raise NotImplementedError return _broadcast_floordiv_td64(self.value, other, _floordiv) elif other.dtype.kind in ['i', 'u', 'f']: if other.ndim == 0: + if self._reso != NPY_FR_ns: + raise NotImplementedError return Timedelta(self.value // other) else: return self.to_timedelta64() // other @@ -1678,11 +1714,15 @@ class Timedelta(_Timedelta): other = Timedelta(other) if other is NaT: return np.nan + if self._reso != NPY_FR_ns: + raise NotImplementedError return other.value // self.value elif is_array(other): if other.dtype.kind == 'm': # also timedelta-like + if self._reso != NPY_FR_ns: + raise NotImplementedError return _broadcast_floordiv_td64(self.value, other, _rfloordiv) # Includes integer array // Timedelta, disallowed in GH#19761 diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 27260f8ed62ca..9ced8f225c3a8 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -432,7 +432,7 @@ def astype(self, dtype, copy: bool = True): elif self.dtype.kind == "m": i8data = self.asi8.ravel() - converted = ints_to_pytimedelta(i8data, box=True) + converted = ints_to_pytimedelta(self._ndarray.ravel(), box=True) return converted.reshape(self.shape) return self._box_values(self.asi8.ravel()).reshape(self.shape) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index bae67bdac68eb..2c6e7119b478d 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -369,7 +369,7 @@ def __iter__(self): yield self[i] else: # convert in chunks of 10k for efficiency - data = self.asi8 + data = self._ndarray length = len(self) chunksize = 10000 chunks = (length // chunksize) + 1 @@ -886,7 +886,7 @@ def to_pytimedelta(self) -> np.ndarray: ------- timedeltas : ndarray[object] """ - return tslibs.ints_to_pytimedelta(self.asi8) + return tslibs.ints_to_pytimedelta(self._ndarray) days = _field_accessor("days", "days", "Number of days for each element.") seconds = _field_accessor( diff --git a/pandas/tests/tslibs/test_timedeltas.py b/pandas/tests/tslibs/test_timedeltas.py index 7d7de7492a07b..d9e86d53f2587 100644 --- a/pandas/tests/tslibs/test_timedeltas.py +++ b/pandas/tests/tslibs/test_timedeltas.py @@ -6,12 +6,14 @@ from pandas._libs.tslibs.timedeltas import ( array_to_timedelta64, delta_to_nanoseconds, + ints_to_pytimedelta, ) from pandas import ( Timedelta, offsets, ) +import pandas._testing as tm @pytest.mark.parametrize( @@ -89,3 +91,29 @@ def test_array_to_timedelta64_non_object_raises(self): msg = "'values' must have object dtype" with pytest.raises(TypeError, match=msg): array_to_timedelta64(values) + + +@pytest.mark.parametrize("unit", ["s", "ms", "us"]) +def test_ints_to_pytimedelta(unit): + # tests for non-nanosecond cases + arr = np.arange(6, dtype=np.int64).view(f"m8[{unit}]") + + res = ints_to_pytimedelta(arr, box=False) + # For non-nanosecond, .astype(object) gives pytimedelta objects + # instead of integers + expected = arr.astype(object) + tm.assert_numpy_array_equal(res, expected) + + res = ints_to_pytimedelta(arr, box=True) + expected = np.array([Timedelta(x) for x in arr], dtype=object) + tm.assert_numpy_array_equal(res, expected) + + +@pytest.mark.parametrize("unit", ["Y", "M", "ps", "fs", "as"]) +def test_ints_to_pytimedelta_unsupported(unit): + arr = np.arange(6, dtype=np.int64).view(f"m8[{unit}]") + + with pytest.raises(NotImplementedError, match=r"\d{1,2}"): + ints_to_pytimedelta(arr, box=False) + with pytest.raises(NotImplementedError, match=r"\d{1,2}"): + ints_to_pytimedelta(arr, box=True)