Skip to content

Commit

Permalink
REF: delta_to_nanoseconds handle non-nano (pandas-dev#47191)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and yehoshuadimarsky committed Jul 13, 2022
1 parent a7edc1f commit 155fce3
Show file tree
Hide file tree
Showing 13 changed files with 123 additions and 59 deletions.
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/offsets.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def to_offset(freq: None) -> None: ...
def to_offset(freq: timedelta | BaseOffset | str) -> BaseOffset: ...

class Tick(SingleConstructorOffset):
_reso: int
def __init__(self, n: int = ..., normalize: bool = ...) -> None: ...
@property
def delta(self) -> Timedelta: ...
Expand Down
7 changes: 7 additions & 0 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -968,42 +968,49 @@ cdef class Day(Tick):
_nanos_inc = 24 * 3600 * 1_000_000_000
_prefix = "D"
_period_dtype_code = PeriodDtypeCode.D
_reso = NPY_DATETIMEUNIT.NPY_FR_D


cdef class Hour(Tick):
_nanos_inc = 3600 * 1_000_000_000
_prefix = "H"
_period_dtype_code = PeriodDtypeCode.H
_reso = NPY_DATETIMEUNIT.NPY_FR_h


cdef class Minute(Tick):
_nanos_inc = 60 * 1_000_000_000
_prefix = "T"
_period_dtype_code = PeriodDtypeCode.T
_reso = NPY_DATETIMEUNIT.NPY_FR_m


cdef class Second(Tick):
_nanos_inc = 1_000_000_000
_prefix = "S"
_period_dtype_code = PeriodDtypeCode.S
_reso = NPY_DATETIMEUNIT.NPY_FR_s


cdef class Milli(Tick):
_nanos_inc = 1_000_000
_prefix = "L"
_period_dtype_code = PeriodDtypeCode.L
_reso = NPY_DATETIMEUNIT.NPY_FR_ms


cdef class Micro(Tick):
_nanos_inc = 1000
_prefix = "U"
_period_dtype_code = PeriodDtypeCode.U
_reso = NPY_DATETIMEUNIT.NPY_FR_us


cdef class Nano(Tick):
_nanos_inc = 1
_prefix = "N"
_period_dtype_code = PeriodDtypeCode.N
_reso = NPY_DATETIMEUNIT.NPY_FR_ns


def delta_to_tick(delta: timedelta) -> Tick:
Expand Down
15 changes: 9 additions & 6 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1680,14 +1680,17 @@ cdef class _Period(PeriodMixin):

def _add_timedeltalike_scalar(self, other) -> "Period":
cdef:
int64_t nanos, base_nanos
int64_t inc

if is_tick_object(self.freq):
nanos = delta_to_nanoseconds(other)
base_nanos = self.freq.base.nanos
if nanos % base_nanos == 0:
ordinal = self.ordinal + (nanos // base_nanos)
return Period(ordinal=ordinal, freq=self.freq)
try:
inc = delta_to_nanoseconds(other, reso=self.freq._reso, round_ok=False)
except ValueError as err:
raise IncompatibleFrequency("Input cannot be converted to "
f"Period(freq={self.freqstr})") from err
# TODO: overflow-check here
ordinal = self.ordinal + inc
return Period(ordinal=ordinal, freq=self.freq)
raise IncompatibleFrequency("Input cannot be converted to "
f"Period(freq={self.freqstr})")

Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ from .np_datetime cimport NPY_DATETIMEUNIT


# Exposed for tslib, not intended for outside use.
cpdef int64_t delta_to_nanoseconds(delta) except? -1
cpdef int64_t delta_to_nanoseconds(
delta, NPY_DATETIMEUNIT reso=*, bint round_ok=*, bint allow_year_month=*
) except? -1
cdef convert_to_timedelta64(object ts, str unit)
cdef bint is_any_td_scalar(object obj)

Expand Down
7 changes: 6 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,12 @@ def array_to_timedelta64(
errors: str = ...,
) -> np.ndarray: ... # np.ndarray[m8ns]
def parse_timedelta_unit(unit: str | None) -> UnitChoices: ...
def delta_to_nanoseconds(delta: np.timedelta64 | timedelta | Tick) -> int: ...
def delta_to_nanoseconds(
delta: np.timedelta64 | timedelta | Tick,
reso: int = ..., # NPY_DATETIMEUNIT
round_ok: bool = ...,
allow_year_month: bool = ...,
) -> int: ...

class Timedelta(timedelta):
min: ClassVar[Timedelta]
Expand Down
75 changes: 62 additions & 13 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -201,28 +201,76 @@ def ints_to_pytimedelta(ndarray m8values, box=False):

# ----------------------------------------------------------------------

cpdef int64_t delta_to_nanoseconds(delta) except? -1:
if is_tick_object(delta):
return delta.nanos
if isinstance(delta, _Timedelta):
if delta._reso == NPY_FR_ns:
return delta.value
raise NotImplementedError(delta._reso)

if is_timedelta64_object(delta):
return get_timedelta64_value(ensure_td64ns(delta))
cpdef int64_t delta_to_nanoseconds(
delta,
NPY_DATETIMEUNIT reso=NPY_FR_ns,
bint round_ok=True,
bint allow_year_month=False,
) except? -1:
cdef:
_Timedelta td
NPY_DATETIMEUNIT in_reso
int64_t n

if is_tick_object(delta):
n = delta.n
in_reso = delta._reso
if in_reso == reso:
return n
else:
td = Timedelta._from_value_and_reso(delta.n, reso=in_reso)

elif isinstance(delta, _Timedelta):
td = delta
n = delta.value
in_reso = delta._reso
if in_reso == reso:
return n

elif is_timedelta64_object(delta):
in_reso = get_datetime64_unit(delta)
n = get_timedelta64_value(delta)
if in_reso == reso:
return n
else:
# _from_value_and_reso does not support Year, Month, or unit-less,
# so we have special handling if speciifed
try:
td = Timedelta._from_value_and_reso(n, reso=in_reso)
except NotImplementedError:
if allow_year_month:
td64 = ensure_td64ns(delta)
return delta_to_nanoseconds(td64, reso=reso)
else:
raise

if PyDelta_Check(delta):
elif PyDelta_Check(delta):
in_reso = NPY_DATETIMEUNIT.NPY_FR_us
try:
return (
n = (
delta.days * 24 * 3600 * 1_000_000
+ delta.seconds * 1_000_000
+ delta.microseconds
) * 1000
)
except OverflowError as err:
raise OutOfBoundsTimedelta(*err.args) from err

raise TypeError(type(delta))
if in_reso == reso:
return n
else:
td = Timedelta._from_value_and_reso(n, reso=in_reso)

else:
raise TypeError(type(delta))

try:
return td._as_reso(reso, round_ok=round_ok).value
except OverflowError as err:
unit_str = npy_unit_to_abbrev(reso)
raise OutOfBoundsTimedelta(
f"Cannot cast {str(delta)} to unit={unit_str} without overflow."
) from err


@cython.overflowcheck(True)
Expand Down Expand Up @@ -1411,6 +1459,7 @@ cdef class _Timedelta(timedelta):
else:
mult = get_conversion_factor(self._reso, reso)
with cython.overflowcheck(True):
# Note: caller is responsible for re-raising as OutOfBoundsTimedelta
value = self.value * mult
return type(self)._from_value_and_reso(value, reso=reso)

Expand Down
12 changes: 11 additions & 1 deletion pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,17 @@ cdef class _Timestamp(ABCTimestamp):
raise NotImplementedError(self._reso)

if is_any_td_scalar(other):
nanos = delta_to_nanoseconds(other)
if (
is_timedelta64_object(other)
and get_datetime64_unit(other) == NPY_DATETIMEUNIT.NPY_FR_GENERIC
):
# TODO: deprecate allowing this? We only get here
# with test_timedelta_add_timestamp_interval
other = np.timedelta64(other.view("i8"), "ns")
# TODO: disallow round_ok, allow_year_month?
nanos = delta_to_nanoseconds(
other, reso=self._reso, round_ok=True, allow_year_month=True
)
try:
result = type(self)(self.value + nanos, tz=self.tzinfo)
except OverflowError:
Expand Down
25 changes: 3 additions & 22 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1120,28 +1120,9 @@ def _add_timedeltalike_scalar(self, other):
new_values.fill(iNaT)
return type(self)(new_values, dtype=self.dtype)

# FIXME: this may overflow with non-nano
inc = delta_to_nanoseconds(other)

if not is_period_dtype(self.dtype):
# FIXME: don't hardcode 7, 8, 9, 10 here
# TODO: maybe patch delta_to_nanoseconds to take reso?

# error: "DatetimeLikeArrayMixin" has no attribute "_reso"
reso = self._reso # type: ignore[attr-defined]
if reso == 10:
pass
elif reso == 9:
# microsecond
inc = inc // 1000
elif reso == 8:
# millisecond
inc = inc // 1_000_000
elif reso == 7:
# second
inc = inc // 1_000_000_000
else:
raise NotImplementedError(reso)
# PeriodArray overrides, so we only get here with DTA/TDA
# error: "DatetimeLikeArrayMixin" has no attribute "_reso"
inc = delta_to_nanoseconds(other, reso=self._reso) # type: ignore[attr-defined]

new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan)
new_values = new_values.view("i8")
Expand Down
14 changes: 10 additions & 4 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,7 @@ def _add_offset(self, other: BaseOffset):
self._require_matching_freq(other, base=True)
return self._addsub_int_array_or_scalar(other.n, operator.add)

# TODO: can we de-duplicate with Period._add_timedeltalike_scalar?
def _add_timedeltalike_scalar(self, other):
"""
Parameters
Expand All @@ -797,10 +798,15 @@ def _add_timedeltalike_scalar(self, other):
raise raise_on_incompatible(self, other)

if notna(other):
# special handling for np.timedelta64("NaT"), avoid calling
# _check_timedeltalike_freq_compat as that would raise TypeError
other = self._check_timedeltalike_freq_compat(other)
other = np.timedelta64(other, "ns")
# Convert to an integer increment of our own freq, disallowing
# e.g. 30seconds if our freq is minutes.
try:
inc = delta_to_nanoseconds(other, reso=self.freq._reso, round_ok=False)
except ValueError as err:
# "Cannot losslessly convert units"
raise raise_on_incompatible(self, other) from err

return self._addsub_int_array_or_scalar(inc, operator.add)

return super()._add_timedeltalike_scalar(other)

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/scalar/timedelta/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def test_td_add_timestamp_overflow(self):
with pytest.raises(OverflowError, match=msg):
Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D")

msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timestamp("1700-01-01") + timedelta(days=13 * 19999)

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/scalar/timedelta/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ def test_overflow_on_construction():
with pytest.raises(OverflowError, match=msg):
Timedelta(7 * 19999, unit="D")

msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(timedelta(days=13 * 19999))

Expand Down
11 changes: 8 additions & 3 deletions pandas/tests/scalar/timestamp/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from pandas._libs.tslibs import (
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
Timedelta,
Timestamp,
offsets,
Expand Down Expand Up @@ -45,16 +46,20 @@ def test_overflow_offset_raises(self):
"will overflow"
)
lmsg = "|".join(
["Python int too large to convert to C long", "int too big to convert"]
[
"Python int too large to convert to C (long|int)",
"int too big to convert",
]
)
lmsg2 = r"Cannot cast <-?20169940 \* Days> to unit=ns without overflow"

with pytest.raises(OverflowError, match=lmsg):
with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
stamp + offset_overflow

with pytest.raises(OverflowError, match=msg):
offset_overflow + stamp

with pytest.raises(OverflowError, match=lmsg):
with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
stamp - offset_overflow

# xref https://github.com/pandas-dev/pandas/issues/14080
Expand Down
9 changes: 1 addition & 8 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1847,14 +1847,7 @@ def test_to_datetime_list_of_integers(self):
def test_to_datetime_overflow(self):
# gh-17637
# we are overflowing Timedelta range here

msg = "|".join(
[
"Python int too large to convert to C long",
"long too big to convert",
"int too big to convert",
]
)
msg = "Cannot cast 139999 days, 0:00:00 to unit=ns without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
date_range(start="1/1/1700", freq="B", periods=100000)

Expand Down

0 comments on commit 155fce3

Please sign in to comment.