Skip to content

min(<range>) and max(<range>) can cause indefinite, uninterruptable interpreter hangs #135824

Closed as not planned
@finite-state-machine

Description

@finite-state-machine

Bug report

Bug description:

The builtin min() and max() functions always iterate over their argument. This makes sense for lists and tuples, but for containers where elements are created as needed, such as ranges, this approach is often unnecessary and can take a very, very long time (up to six or seven thousand years, in the case of a range.)

Where iteration can take place entirely in native functions and methods (as with range), the Python interpreter can't be stopped with ^C (SIGINT); it has to be kill(1)ed.

I imagine the "right" way to fix this is to have min()/max() delegate to __min__()/__max__() where the argument's class provides that method.

A simple reproduction case:

import time

r = range(1 << 29)
before = time.perf_counter()
assert min(r) == 0
        # should be instantaneous, but takes several seconds
        # `max()` is similar
after = time.perf_counter()
duration = after - before
print(f"That took {duration:.3f}s.")
# That took 7.390s.

It doesn't have to be like this:

"""Demonstrates that finding the min/max of a `range` should be instantaneous

Prints (for example):

        The new version is 33,200× faster.
"""
import time

def min_range(r: range) -> int:
    if not r:
        return min(())
    return min((r[0], r[-1]))

def max_range(r: range) -> int:
    if not r:
        return max(())
    return max((r[0], r[-1]))

new_time_s = 0.0
builtin_time_s = 0.0
for power in range(24+1):

    r = range(1 << power)

    before = time.perf_counter()
    got_builtin = min(r), max(r)
    builtin_time_s += time.perf_counter() - before

    before = time.perf_counter()
    got_new = min_range(r), max_range(r)
    new_time_s += time.perf_counter() - before

    assert got_new == got_builtin

ratio = int(round(builtin_time_s / new_time_s, -2))
print(f"The new version is {ratio:,}× faster.")
# The new version is 33,200× faster.

A motivating example:

"""Motivating example
"""
import random
import sys
from typing import Final


def float_cent() -> float:
    """Return the smallest positive `float`"""
    float_info: Final = sys.float_info
    float_cent = float_info.min / (1 << (float_info.mant_dig - 1))
    assert float_cent / 2 in (0, float_cent)
            # no representable value exists between zero, `float_cent`
    return float_cent

def random_subnormal() -> float:
    """Return a random, positive, subnormal float
    """
    # (Yes, there are ways to write this that avoid `range` entirely;
    #  that's not really the point.)
    cent: Final = float_cent()
    range_ = range(1, round(sys.float_info.min / cent))
            # length: roughly 2**52
    lowest = min(range_) * cent                                         # ← interpreter hangs here for a few years
    highest = max(range_) * cent                                        # ← hangs again for another few years
    assert lowest == cent
    assert highest == sys.float_info.min - cent
    return random.choice(range_) * cent

Estimates of min(<range>) or max(<range>) runtime:
(below ; Python 3.13.3, Apple M1 Max; Python 3.14.0a7 takes about 1/3rd less time)

1 << 10:    23   us
1 << 11:    46   us
1 << 12:    92   us
1 << 13:   180   us
1 << 14:   370   us
1 << 15:   740   us
1 << 16:     1.5 ms
1 << 17:     2.9 ms
1 << 18:     5.9 ms
1 << 19:    11   ms
1 << 20:    23   ms
1 << 21:    47   ms
1 << 22:    94   ms
1 << 23:   190   ms
1 << 24:   380   ms
1 << 25:   750   ms
1 << 26:     1.5 seconds
1 << 27:     3.0 seconds
1 << 28:     6.0 seconds
1 << 29:    12   seconds
1 << 30:    24   seconds
1 << 31:    48   seconds
1 << 32:     1.6 mins
1 << 33:     3.2 mins
1 << 34:     6.4 mins
1 << 35:    12   mins
1 << 36:    25   mins
1 << 37:    51   mins
1 << 38:     1.7 hours
1 << 39:     3.4 hours
1 << 40:     6.9 hours
1 << 41:    13   hours
1 << 42:     1.1 days
1 << 43:     2.3 days
1 << 44:     4.6 days
1 << 45:     1.3 weeks
1 << 46:     2.6 weeks
1 << 47:     5.2 weeks
1 << 48:    10   weeks
1 << 49:    20   weeks
1 << 50:    41   weeks
1 << 51:     1.6 years
1 << 52:     3.2 years
1 << 53:     6.4 years
1 << 54:    12   years
1 << 55:    25   years
1 << 56:    51   years
1 << 57:   100   years
1 << 58:   210   years
1 << 59:   410   years
1 << 60:   820   years
1 << 61:  1600   years
1 << 62:  3300   years
1 << 63:  6600   years

The above was generated using:

"""Print estimated time required to compute `min(<range>)` or `max(<range>)`
   for power-of-two `<range>` lengths

Sample output [[ removed; appears above. ]]
"""
import functools
import time

@functools.cache
def min_max_range_time_per_element_s() -> float:
    """Measure time required to run `min()`/`max()` on a `range`

    Returns:
        average time required per `range` element (seconds); accurate
        for longer ranges (roughly 10⁵ elements or more)
    """
    # rough, but good enough:
    scale = 1 << 24  # takes about 300ms on my system
    range_ = range(scale)

    before = time.perf_counter()
    min(range_)
    after = time.perf_counter()

    duration = after - before
    duration_per_cycle = duration / scale
    return duration_per_cycle

def est_time_min_max_range_s(elements: int) -> float:
    """Estimate time to run `min()` or `max()` on a `range()` with given length

    Args:
        elements: length of the `range`
    Returns:
        estimated runtime of `min(<range>)` or `max(<range>)` (seconds)
    """
    per_element_s = min_max_range_time_per_element_s()
    total_s = elements * per_element_s
    return total_s

def fmt_time_s(total_s: float) -> str:
    unit_divs = {
            'years': 31_556_736, 'weeks': 604_800, 'days': 86_400,
            'hours': 3600, 'mins': 60, 'seconds': 1, 'ms': 1/1000,
            'us': 1/1_000_000,
            }
    unit_name = value = None
    for unit_name, div in unit_divs.items():
        if (value := total_s / div) >= 1:
            break
    assert (unit_name is not None) and (value is not None)  # pyright
    if value >= 1000:
        fmt_value = f"{int(round(value, -2))}"
    elif value >= 100:
        fmt_value = f"{int(round(value, -1))}"
    elif value >= 10:
        fmt_value = f"{int(value)}"
    elif value >= 1:
        fmt_value = f"{value:.1f}"
    else:
        fmt_value = f"{value:.2f}"
    assert (value < 0.1) or (0.8 <= float(fmt_value) / value <= 1.2)
    if '.' not in fmt_value:
        shift = 4 - len(fmt_value)
        pad = 3
    else:
        index = fmt_value.index('.')
        shift = 4 - index
        pad = 3 - (len(fmt_value) - index)
    shifts = shift * ' '
    pads = pad * ' '
    return f"{shifts}{fmt_value}{pads}{unit_name}"

def main() -> None:

    for power in range(10, 63+1):
        time_s = est_time_min_max_range_s(1 << power)
        time_str = fmt_time_s(time_s)
        print(f"1 << {power:2d}:  {time_str}")

if __name__ == '__main__':
    main()

CPython versions tested on:

3.13, 3.12, 3.11, 3.10, 3.9, 3.14

Operating systems tested on:

macOS

Metadata

Metadata

Assignees

No one assigned

    Labels

    interpreter-core(Objects, Python, Grammar, and Parser dirs)performancePerformance or resource usagetype-featureA feature request or enhancement

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions