Skip to content

Commit

Permalink
ENH: ints_to_pytimedelta support non-nano (pandas-dev#46828)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and yehoshuadimarsky committed Jul 13, 2022
1 parent 7efe3bb commit d0608ba
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/timedeltas.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ UnitChoices = Literal[
_S = TypeVar("_S", bound=timedelta)

def ints_to_pytimedelta(
arr: npt.NDArray[np.int64], # const int64_t[:]
arr: npt.NDArray[np.timedelta64],
box: bool = ...,
) -> npt.NDArray[np.object_]: ...
def array_to_timedelta64(
Expand Down
50 changes: 45 additions & 5 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ from pandas._libs.tslibs.np_datetime cimport (
cmp_scalar,
get_datetime64_unit,
get_timedelta64_value,
get_unit_from_dtype,
npy_datetimestruct,
pandas_datetime_to_datetimestruct,
pandas_timedelta_to_timedeltastruct,
Expand Down Expand Up @@ -141,14 +142,14 @@ _no_input = object()

@cython.boundscheck(False)
@cython.wraparound(False)
def ints_to_pytimedelta(const int64_t[:] arr, box=False):
def ints_to_pytimedelta(ndarray m8values, box=False):
"""
convert an i8 repr to an ndarray of timedelta or Timedelta (if box ==
True)
Parameters
----------
arr : ndarray[int64_t]
arr : ndarray[timedelta64]
box : bool, default False
Returns
Expand All @@ -157,9 +158,12 @@ def ints_to_pytimedelta(const int64_t[:] arr, box=False):
array of Timedelta or timedeltas objects
"""
cdef:
Py_ssize_t i, n = len(arr)
Py_ssize_t i, n = m8values.size
int64_t value
object[::1] result = np.empty(n, dtype=object)
NPY_DATETIMEUNIT reso = get_unit_from_dtype(m8values.dtype)

arr = m8values.view("i8")

for i in range(n):

Expand All @@ -168,9 +172,26 @@ def ints_to_pytimedelta(const int64_t[:] arr, box=False):
result[i] = <object>NaT
else:
if box:
result[i] = Timedelta(value)
else:
result[i] = _timedelta_from_value_and_reso(value, reso=reso)
elif reso == NPY_DATETIMEUNIT.NPY_FR_ns:
result[i] = timedelta(microseconds=int(value) / 1000)
elif reso == NPY_DATETIMEUNIT.NPY_FR_us:
result[i] = timedelta(microseconds=value)
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
result[i] = timedelta(milliseconds=value)
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
result[i] = timedelta(seconds=value)
elif reso == NPY_DATETIMEUNIT.NPY_FR_m:
result[i] = timedelta(minutes=value)
elif reso == NPY_DATETIMEUNIT.NPY_FR_h:
result[i] = timedelta(hours=value)
elif reso == NPY_DATETIMEUNIT.NPY_FR_D:
result[i] = timedelta(days=value)
elif reso == NPY_DATETIMEUNIT.NPY_FR_W:
result[i] = timedelta(weeks=value)
else:
# Month, Year, NPY_FR_GENERIC, pico, fempto, atto
raise NotImplementedError(reso)

return result.base # .base to access underlying np.ndarray

Expand Down Expand Up @@ -1530,6 +1551,9 @@ class Timedelta(_Timedelta):
int64_t result, unit, remainder
ndarray[int64_t] arr

if self._reso != NPY_FR_ns:
raise NotImplementedError

from pandas._libs.tslibs.offsets import to_offset
unit = to_offset(freq).nanos

Expand Down Expand Up @@ -1620,6 +1644,8 @@ class Timedelta(_Timedelta):

elif is_integer_object(other) or is_float_object(other):
# integers or floats
if self._reso != NPY_FR_ns:
raise NotImplementedError
return Timedelta(self.value / other, unit='ns')

elif is_array(other):
Expand All @@ -1633,6 +1659,8 @@ class Timedelta(_Timedelta):
other = Timedelta(other)
if other is NaT:
return np.nan
if self._reso != NPY_FR_ns:
raise NotImplementedError
return float(other.value) / self.value

elif is_array(other):
Expand All @@ -1651,17 +1679,25 @@ class Timedelta(_Timedelta):
other = Timedelta(other)
if other is NaT:
return np.nan
if self._reso != NPY_FR_ns:
raise NotImplementedError
return self.value // other.value

elif is_integer_object(other) or is_float_object(other):
if self._reso != NPY_FR_ns:
raise NotImplementedError
return Timedelta(self.value // other, unit='ns')

elif is_array(other):
if other.dtype.kind == 'm':
# also timedelta-like
if self._reso != NPY_FR_ns:
raise NotImplementedError
return _broadcast_floordiv_td64(self.value, other, _floordiv)
elif other.dtype.kind in ['i', 'u', 'f']:
if other.ndim == 0:
if self._reso != NPY_FR_ns:
raise NotImplementedError
return Timedelta(self.value // other)
else:
return self.to_timedelta64() // other
Expand All @@ -1678,11 +1714,15 @@ class Timedelta(_Timedelta):
other = Timedelta(other)
if other is NaT:
return np.nan
if self._reso != NPY_FR_ns:
raise NotImplementedError
return other.value // self.value

elif is_array(other):
if other.dtype.kind == 'm':
# also timedelta-like
if self._reso != NPY_FR_ns:
raise NotImplementedError
return _broadcast_floordiv_td64(self.value, other, _rfloordiv)

# Includes integer array // Timedelta, disallowed in GH#19761
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def astype(self, dtype, copy: bool = True):

elif self.dtype.kind == "m":
i8data = self.asi8.ravel()
converted = ints_to_pytimedelta(i8data, box=True)
converted = ints_to_pytimedelta(self._ndarray.ravel(), box=True)
return converted.reshape(self.shape)

return self._box_values(self.asi8.ravel()).reshape(self.shape)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ def __iter__(self):
yield self[i]
else:
# convert in chunks of 10k for efficiency
data = self.asi8
data = self._ndarray
length = len(self)
chunksize = 10000
chunks = (length // chunksize) + 1
Expand Down Expand Up @@ -886,7 +886,7 @@ def to_pytimedelta(self) -> np.ndarray:
-------
timedeltas : ndarray[object]
"""
return tslibs.ints_to_pytimedelta(self.asi8)
return tslibs.ints_to_pytimedelta(self._ndarray)

days = _field_accessor("days", "days", "Number of days for each element.")
seconds = _field_accessor(
Expand Down
28 changes: 28 additions & 0 deletions pandas/tests/tslibs/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
from pandas._libs.tslibs.timedeltas import (
array_to_timedelta64,
delta_to_nanoseconds,
ints_to_pytimedelta,
)

from pandas import (
Timedelta,
offsets,
)
import pandas._testing as tm


@pytest.mark.parametrize(
Expand Down Expand Up @@ -89,3 +91,29 @@ def test_array_to_timedelta64_non_object_raises(self):
msg = "'values' must have object dtype"
with pytest.raises(TypeError, match=msg):
array_to_timedelta64(values)


@pytest.mark.parametrize("unit", ["s", "ms", "us"])
def test_ints_to_pytimedelta(unit):
# tests for non-nanosecond cases
arr = np.arange(6, dtype=np.int64).view(f"m8[{unit}]")

res = ints_to_pytimedelta(arr, box=False)
# For non-nanosecond, .astype(object) gives pytimedelta objects
# instead of integers
expected = arr.astype(object)
tm.assert_numpy_array_equal(res, expected)

res = ints_to_pytimedelta(arr, box=True)
expected = np.array([Timedelta(x) for x in arr], dtype=object)
tm.assert_numpy_array_equal(res, expected)


@pytest.mark.parametrize("unit", ["Y", "M", "ps", "fs", "as"])
def test_ints_to_pytimedelta_unsupported(unit):
arr = np.arange(6, dtype=np.int64).view(f"m8[{unit}]")

with pytest.raises(NotImplementedError, match=r"\d{1,2}"):
ints_to_pytimedelta(arr, box=False)
with pytest.raises(NotImplementedError, match=r"\d{1,2}"):
ints_to_pytimedelta(arr, box=True)

0 comments on commit d0608ba

Please sign in to comment.