In [1]:
from datetime import datetime
from typing import List, Tuple

from libs import test_acceleration, print_estimation, bootstrap_growth_parameters, sliding_window_analysis

print('Loaded libraries')

Loaded libraries


In [2]:
observed_models: List[Tuple[str, datetime, float, float]] = [
    # model                       # release date         # task length at 50%  # task length at 80% (in hrs)
    ("GPT‑2",                     datetime(2019, 2, 14),  2/3600,              0.1/3600),
    ("GPT-3",                     datetime(2020, 5, 28),  9/3600,                2/3600),
    ("GPT‑3.5 Turbo",             datetime(2023, 3, 1),  36/3600,               10/3600),
    ("GPT-4",                     datetime(2023, 3, 14),  6/60,                  1/60  ),
    ("GPT-4-Nov23",               datetime(2023, 11, 6),  8/60,                  1/60  ),
    ("Claude 3 Opus",             datetime(2024, 3,  4),  6/60,                  1/60  ),
    ("GPT‑4o",                    datetime(2024, 5, 13),  9/60,                  2/60  ),
    ("Claude 3.5 Sonnet (old)",   datetime(2024, 6, 20), 18/60,                  3/60  ),
    ("o1 preview",                datetime(2024, 9, 12), 22/60,                  4/60  ),
    ("Claude 3.5 Sonnet (new)",   datetime(2024,10, 22), 28/60,                  5/60  ),
    ("o1",                        datetime(2024,12,  5), 39/60,                  6/60  ),
    ("Claude 3.7 Sonnet",         datetime(2025, 2, 24), 59/60,                 15/60  ),
    ("o3",                        datetime(2025, 4, 16),  1+45/60,              20/60  ),
    ("Claude 4 Sonnet",           datetime(2025, 5, 22),  1+7/60,               16/60  ),
    ("Claude 4 Opus",             datetime(2025, 5, 22),  1+19/60,              20/60  ),
    ("Gemini 2.5 Pro",            datetime(2025, 6, 5),  39/60,                  9/60  ),
    ("Grok 4",                    datetime(2025, 7, 9),   1+50/60,              15/60  ),
    ("GPT5",                      datetime(2025, 8, 7),   2+17/60,              25/60  ),
]

print("=== 50% Reliability ===")
print_estimation(observed_models)
print_estimation(observed_models[1:])
print_estimation(observed_models[3:])
print_estimation(observed_models[5:])

print_estimation(observed_models[1:-3])
print_estimation(observed_models[3:-3])
print_estimation(observed_models[5:-3])
print_estimation(observed_models[5:-3])

print("\n=== 80% Reliability ===")
print_estimation(observed_models, "80%")
print_estimation(observed_models[1:], "80%")
print_estimation(observed_models[3:], "80%")
print_estimation(observed_models[5:], "80%")

print_estimation(observed_models[1:-3], "80%")
print_estimation(observed_models[3:-3], "80%")
print_estimation(observed_models[5:-3], "80%")
print_estimation(observed_models[5:-3], "80%")

=== 50% Reliability ===
GPT‑2 to GPT5 (50%): (318, 0.906)
GPT-3 to GPT5 (50%): (296, 0.9)
GPT-4 to GPT5 (50%): (254, 0.9)
Claude 3 Opus to GPT5 (50%): (115, 1.0)
GPT-3 to Claude 4 Opus (50%): (293, 0.9)
GPT-4 to Claude 4 Opus (50%): (255, 0.9)
Claude 3 Opus to Claude 4 Opus (50%): (109, 1.0)
Claude 3 Opus to Claude 4 Opus (50%): (109, 1.0)

=== 80% Reliability ===
GPT‑2 to GPT5 (80%): (204, 0.974)
GPT-3 to GPT5 (80%): (302, 0.9)
GPT-4 to GPT5 (80%): (240, 0.9)
Claude 3 Opus to GPT5 (80%): (108, 1.0)
GPT-3 to Claude 4 Opus (80%): (300, 0.9)
GPT-4 to Claude 4 Opus (80%): (235, 0.9)
Claude 3 Opus to Claude 4 Opus (80%): (99, 1.0)
Claude 3 Opus to Claude 4 Opus (80%): (99, 1.0)


In [3]:
print("=== Bootstrap Analysis ===")
current_date = datetime.today()
print(f"Current date: {current_date.strftime('%Y-%m-%d')}")

for rel in ["50%", "80%"]:
    print(f"\n{rel} Reliability:")
    full_results = bootstrap_growth_parameters(observed_models, reliability_level=rel, current_date=current_date)
    print(f"Full dataset: {full_results['median']} (95% CI: {full_results['ci_95']})")
    recent_models = [m for m in observed_models if m[1] >= datetime(2024, 1, 1)]
    recent_results = bootstrap_growth_parameters(recent_models, reliability_level=rel, current_date=current_date)
    print(f"2024+ models: {recent_results['median']} (95% CI: {recent_results['ci_95']})")

df = sliding_window_analysis(observed_models)
print("\n=== Parameter stability by time window ===")
print(df.groupby('window')[['doubling_time', 'acceleration']].agg(['mean', 'std']))

=== Bootstrap Analysis ===
Current date: 2025-08-07

50% Reliability:
Full dataset: (294, 0.904) (95% CI: {'doubling_time': (108, 332), 'acceleration': (0.9, 1.0)})
2024+ models: (128, 0.93) (95% CI: {'doubling_time': (108, 171), 'acceleration': (0.9, 1.0)})

80% Reliability:
Full dataset: (229, 0.944) (95% CI: {'doubling_time': (118, 306), 'acceleration': (0.9, 1.0)})
2024+ models: (127, 0.9) (95% CI: {'doubling_time': (102, 156), 'acceleration': (0.9, 1.0)})

=== Parameter stability by time window ===
       doubling_time             acceleration          
                mean         std         mean       std
window                                                 
6         155.076923  101.871211     0.946154  0.051887
8         177.090909   83.641443     0.935091  0.048325
10        186.888889   83.553044     0.928556  0.044596
12        199.000000   91.471307     0.942571  0.053100


In [5]:
test_acceleration(
    start_task_length=2/60/60, # GPT2
    agi_task_length=167, 
    initial_doubling_time=260,
    acceleration=0.95,
    start_date="2019-02-14", # GPT2
)

Step |    Date    |    Day |       Task | τ (d)
-----------------------------------------------
   0 | 2019‑02‑14 |      0 |       2sec | 260.0
   1 | 2019‑11‑01 |    260 |       4sec | 247.0
   2 | 2020‑07‑05 |    507 |       8sec | 234.6
   3 | 2021‑02‑24 |    741 |      16sec | 222.9
   4 | 2021‑10‑05 |    964 |      32sec | 211.8
   5 | 2022‑05‑05 |   1176 |    1.07min | 201.2
   6 | 2022‑11‑22 |   1377 |    2.13min | 191.1
   7 | 2023‑06‑01 |   1568 |    4.27min | 181.6
   8 | 2023‑11‑30 |   1750 |    8.53min | 172.5
   9 | 2024‑05‑20 |   1922 |   17.07min | 163.9
  10 | 2024‑10‑31 |   2086 |   34.13min | 155.7
  11 | 2025‑04‑05 |   2242 |     1.14hr | 147.9
  12 | 2025‑08‑31 |   2390 |     2.28hr | 140.5
  13 | 2026‑01‑18 |   2530 |     4.55hr | 133.5
  14 | 2026‑06‑01 |   2664 |     9.10hr | 126.8
  15 | 2026‑10‑05 |   2790 |    18.20hr | 120.5
  16 | 2027‑02‑03 |   2911 |    36.41hr | 114.4
  17 | 2027‑05‑28 |   3025 |    72.82hr | 108.7
  18 | 2027‑09‑14 |   3134 |   145.64hr 