In [1]:
from datetime import datetime

from libs import (
    test_acceleration,
    print_estimation,
    bootstrap_growth_parameters,
    sliding_window_analysis,
)
from model_data import model_data

print("Loaded libraries")

Loaded libraries


In [2]:
# Construct observed_models from model_data
# Format: (model_name, release_date, task_length_50%, task_length_80%) in hours
observed_models = [
    (model['name'], model['launch_date'], model['performance_50p'], model['performance_80p'])
    for model in model_data.values()
    if model['performance_50p'] is not None  # Exclude models without data
]

print("=== 50% Reliability ===")
print_estimation(observed_models)
print_estimation(observed_models[1:])
print_estimation(observed_models[3:])
print_estimation(observed_models[5:])

print_estimation(observed_models[1:-1])
print_estimation(observed_models[3:-1])
print_estimation(observed_models[5:-1])
print_estimation(observed_models[5:-1])

print("\n=== 80% Reliability ===")
print_estimation(observed_models, "80%")
print_estimation(observed_models[1:], "80%")
print_estimation(observed_models[3:], "80%")
print_estimation(observed_models[5:], "80%")

print_estimation(observed_models[1:-1], "80%")
print_estimation(observed_models[3:-1], "80%")
print_estimation(observed_models[5:-1], "80%")
print_estimation(observed_models[5:-1], "80%")

=== 50% Reliability ===
GPT‑2 to GPT5 (50%): (316, 0.908)
GPT-3 to GPT5 (50%): (296, 0.9)
GPT-4 to GPT5 (50%): (253, 0.9)
Claude 3 Opus to GPT5 (50%): (116, 1.0)
GPT-3 to Claude 4.1 Opus (50%): (296, 0.9)
GPT-4 to Claude 4.1 Opus (50%): (256, 0.9)
Claude 3 Opus to Claude 4.1 Opus (50%): (117, 1.0)
Claude 3 Opus to Claude 4.1 Opus (50%): (117, 1.0)

=== 80% Reliability ===
GPT‑2 to GPT5 (80%): (204, 0.974)
GPT-3 to GPT5 (80%): (302, 0.9)
GPT-4 to GPT5 (80%): (239, 0.9)
Claude 3 Opus to GPT5 (80%): (109, 1.0)
GPT-3 to Claude 4.1 Opus (80%): (302, 0.9)
GPT-4 to Claude 4.1 Opus (80%): (241, 0.9)
Claude 3 Opus to Claude 4.1 Opus (80%): (108, 1.0)
Claude 3 Opus to Claude 4.1 Opus (80%): (108, 1.0)


In [3]:
print("=== Bootstrap Analysis ===")
current_date = datetime.today()
print(f"Current date: {current_date.strftime('%Y-%m-%d')}")

for rel in ["50%", "80%"]:
    print(f"\n{rel} Reliability:")
    full_results = bootstrap_growth_parameters(
        observed_models, reliability_level=rel, current_date=current_date
    )
    print(f"Full dataset: {full_results['median']} (95% CI: {full_results['ci_95']})")
    recent_models = [m for m in observed_models if m[1] >= datetime(2024, 1, 1)]
    recent_results = bootstrap_growth_parameters(
        recent_models, reliability_level=rel, current_date=current_date
    )
    print(
        f"2024+ models: {recent_results['median']} (95% CI: {recent_results['ci_95']})"
    )

df = sliding_window_analysis(observed_models)
print("\n=== Parameter stability by time window ===")
print(df.groupby("window")[["doubling_time", "acceleration"]].agg(["mean", "std"]))

=== Bootstrap Analysis ===
Current date: 2025-10-07

50% Reliability:
Full dataset: (297, 0.901) (95% CI: {'doubling_time': (110, 333), 'acceleration': (0.9, 1.0)})
2024+ models: (125, 0.956) (95% CI: {'doubling_time': (110, 172), 'acceleration': (0.9, 1.0)})

80% Reliability:
Full dataset: (231, 0.947) (95% CI: {'doubling_time': (121, 306), 'acceleration': (0.9, 1.0)})
2024+ models: (127, 0.922) (95% CI: {'doubling_time': (105, 157), 'acceleration': (0.9, 1.0)})

=== Parameter stability by time window ===
       doubling_time             acceleration          
                mean         std         mean       std
window                                                 
6         148.071429  101.323901      0.95000  0.051887
8         172.666667   81.422951      0.94050  0.049740
10        182.500000   79.971175      0.92570  0.043004
12        194.875000   85.318625      0.93725  0.051414


In [4]:
test_acceleration(
    start_task_length=model_data['gpt2']['performance_50p'],
    agi_task_length=167,
    initial_doubling_time=260,
    acceleration=0.95,
    start_date=model_data['gpt2']['launch_date'].strftime('%Y-%m-%d'),
)

Step |    Date    |    Day |       Task | τ (d)
-----------------------------------------------
   0 | 2019‑02‑14 |      0 |       2sec | 260.0
   1 | 2019‑11‑01 |    260 |       4sec | 247.0
   2 | 2020‑07‑05 |    507 |       8sec | 234.6
   3 | 2021‑02‑24 |    741 |      16sec | 222.9
   4 | 2021‑10‑05 |    964 |      32sec | 211.8
   5 | 2022‑05‑05 |   1176 |    1.07min | 201.2
   6 | 2022‑11‑22 |   1377 |    2.13min | 191.1
   7 | 2023‑06‑01 |   1568 |    4.27min | 181.6
   8 | 2023‑11‑30 |   1750 |    8.53min | 172.5
   9 | 2024‑05‑20 |   1922 |   17.07min | 163.9
  10 | 2024‑10‑31 |   2086 |   34.13min | 155.7
  11 | 2025‑04‑05 |   2242 |     1.14hr | 147.9
  12 | 2025‑08‑31 |   2390 |     2.28hr | 140.5
  13 | 2026‑01‑18 |   2530 |     4.55hr | 133.5
  14 | 2026‑06‑01 |   2664 |     9.10hr | 126.8
  15 | 2026‑10‑05 |   2790 |    18.20hr | 120.5
  16 | 2027‑02‑03 |   2911 |    36.41hr | 114.4
  17 | 2027‑05‑28 |   3025 |    72.82hr | 108.7
  18 | 2027‑09‑14 |   3134 |   145.64hr 