Description
Bug report
Bug description:
The builtin min()
and max()
functions always iterate over their argument. This makes sense for list
s and tuple
s, but for containers where elements are created as needed, such as range
s, this approach is often unnecessary and can take a very, very long time (up to six or seven thousand years, in the case of a range
.)
Where iteration can take place entirely in native functions and methods (as with range
), the Python interpreter can't be stopped with ^C (SIGINT
); it has to be kill(1)
ed.
I imagine the "right" way to fix this is to have min()
/max()
delegate to __min__()
/__max__()
where the argument's class provides that method.
A simple reproduction case:
import time
r = range(1 << 29)
before = time.perf_counter()
assert min(r) == 0
# should be instantaneous, but takes several seconds
# `max()` is similar
after = time.perf_counter()
duration = after - before
print(f"That took {duration:.3f}s.")
# That took 7.390s.
It doesn't have to be like this:
"""Demonstrates that finding the min/max of a `range` should be instantaneous
Prints (for example):
The new version is 33,200× faster.
"""
import time
def min_range(r: range) -> int:
if not r:
return min(())
return min((r[0], r[-1]))
def max_range(r: range) -> int:
if not r:
return max(())
return max((r[0], r[-1]))
new_time_s = 0.0
builtin_time_s = 0.0
for power in range(24+1):
r = range(1 << power)
before = time.perf_counter()
got_builtin = min(r), max(r)
builtin_time_s += time.perf_counter() - before
before = time.perf_counter()
got_new = min_range(r), max_range(r)
new_time_s += time.perf_counter() - before
assert got_new == got_builtin
ratio = int(round(builtin_time_s / new_time_s, -2))
print(f"The new version is {ratio:,}× faster.")
# The new version is 33,200× faster.
A motivating example:
"""Motivating example
"""
import random
import sys
from typing import Final
def float_cent() -> float:
"""Return the smallest positive `float`"""
float_info: Final = sys.float_info
float_cent = float_info.min / (1 << (float_info.mant_dig - 1))
assert float_cent / 2 in (0, float_cent)
# no representable value exists between zero, `float_cent`
return float_cent
def random_subnormal() -> float:
"""Return a random, positive, subnormal float
"""
# (Yes, there are ways to write this that avoid `range` entirely;
# that's not really the point.)
cent: Final = float_cent()
range_ = range(1, round(sys.float_info.min / cent))
# length: roughly 2**52
lowest = min(range_) * cent # ← interpreter hangs here for a few years
highest = max(range_) * cent # ← hangs again for another few years
assert lowest == cent
assert highest == sys.float_info.min - cent
return random.choice(range_) * cent
Estimates of min(<range>)
or max(<range>)
runtime:
(below ; Python 3.13.3, Apple M1 Max; Python 3.14.0a7 takes about 1/3rd less time)
1 << 10: 23 us
1 << 11: 46 us
1 << 12: 92 us
1 << 13: 180 us
1 << 14: 370 us
1 << 15: 740 us
1 << 16: 1.5 ms
1 << 17: 2.9 ms
1 << 18: 5.9 ms
1 << 19: 11 ms
1 << 20: 23 ms
1 << 21: 47 ms
1 << 22: 94 ms
1 << 23: 190 ms
1 << 24: 380 ms
1 << 25: 750 ms
1 << 26: 1.5 seconds
1 << 27: 3.0 seconds
1 << 28: 6.0 seconds
1 << 29: 12 seconds
1 << 30: 24 seconds
1 << 31: 48 seconds
1 << 32: 1.6 mins
1 << 33: 3.2 mins
1 << 34: 6.4 mins
1 << 35: 12 mins
1 << 36: 25 mins
1 << 37: 51 mins
1 << 38: 1.7 hours
1 << 39: 3.4 hours
1 << 40: 6.9 hours
1 << 41: 13 hours
1 << 42: 1.1 days
1 << 43: 2.3 days
1 << 44: 4.6 days
1 << 45: 1.3 weeks
1 << 46: 2.6 weeks
1 << 47: 5.2 weeks
1 << 48: 10 weeks
1 << 49: 20 weeks
1 << 50: 41 weeks
1 << 51: 1.6 years
1 << 52: 3.2 years
1 << 53: 6.4 years
1 << 54: 12 years
1 << 55: 25 years
1 << 56: 51 years
1 << 57: 100 years
1 << 58: 210 years
1 << 59: 410 years
1 << 60: 820 years
1 << 61: 1600 years
1 << 62: 3300 years
1 << 63: 6600 years
The above was generated using:
"""Print estimated time required to compute `min(<range>)` or `max(<range>)`
for power-of-two `<range>` lengths
Sample output [[ removed; appears above. ]]
"""
import functools
import time
@functools.cache
def min_max_range_time_per_element_s() -> float:
"""Measure time required to run `min()`/`max()` on a `range`
Returns:
average time required per `range` element (seconds); accurate
for longer ranges (roughly 10⁵ elements or more)
"""
# rough, but good enough:
scale = 1 << 24 # takes about 300ms on my system
range_ = range(scale)
before = time.perf_counter()
min(range_)
after = time.perf_counter()
duration = after - before
duration_per_cycle = duration / scale
return duration_per_cycle
def est_time_min_max_range_s(elements: int) -> float:
"""Estimate time to run `min()` or `max()` on a `range()` with given length
Args:
elements: length of the `range`
Returns:
estimated runtime of `min(<range>)` or `max(<range>)` (seconds)
"""
per_element_s = min_max_range_time_per_element_s()
total_s = elements * per_element_s
return total_s
def fmt_time_s(total_s: float) -> str:
unit_divs = {
'years': 31_556_736, 'weeks': 604_800, 'days': 86_400,
'hours': 3600, 'mins': 60, 'seconds': 1, 'ms': 1/1000,
'us': 1/1_000_000,
}
unit_name = value = None
for unit_name, div in unit_divs.items():
if (value := total_s / div) >= 1:
break
assert (unit_name is not None) and (value is not None) # pyright
if value >= 1000:
fmt_value = f"{int(round(value, -2))}"
elif value >= 100:
fmt_value = f"{int(round(value, -1))}"
elif value >= 10:
fmt_value = f"{int(value)}"
elif value >= 1:
fmt_value = f"{value:.1f}"
else:
fmt_value = f"{value:.2f}"
assert (value < 0.1) or (0.8 <= float(fmt_value) / value <= 1.2)
if '.' not in fmt_value:
shift = 4 - len(fmt_value)
pad = 3
else:
index = fmt_value.index('.')
shift = 4 - index
pad = 3 - (len(fmt_value) - index)
shifts = shift * ' '
pads = pad * ' '
return f"{shifts}{fmt_value}{pads}{unit_name}"
def main() -> None:
for power in range(10, 63+1):
time_s = est_time_min_max_range_s(1 << power)
time_str = fmt_time_s(time_s)
print(f"1 << {power:2d}: {time_str}")
if __name__ == '__main__':
main()
CPython versions tested on:
3.13, 3.12, 3.11, 3.10, 3.9, 3.14
Operating systems tested on:
macOS