In [16]:
import numpy as np
import pandas as pd

from datetime import datetime, timedelta
from typing import List, Tuple
from scipy.optimize import minimize, Bounds


def _pretty_time(hours: float) -> str:
    """Return a string with value + unit, choosing h / min / s."""
    if hours >= 1:
        return f"{hours:6.2f}hr"
    minutes = hours * 60
    if minutes >= 1:
        return f"{minutes:6.2f}min"
    seconds = minutes * 60
    return f"{seconds:6.0f}sec"

    
def test_acceleration(
    start_task_length: float,
    agi_task_length: float,
    initial_doubling_time: float,
    acceleration: float = 1.0,
    start_date: str | datetime | None = None,
    date_fmt: str = "%Y‑%m‑%d",
):
    # Anchor date
    if start_date is None:
        start_date = datetime.today()
    elif isinstance(start_date, str):
        start_date = datetime.fromisoformat(start_date)

    current_task = start_task_length
    days_elapsed = 0.0
    tau = initial_doubling_time
    step = 0

    header = f"{'Step':>4} | {'Date':^10} | {'Day':>6} | {'Task':>10} | τ (d)"
    print(header)
    print("-" * len(header))

    while current_task < agi_task_length:
        date = start_date + timedelta(days=days_elapsed)
        print(f"{step:4d} | {date.strftime(date_fmt)} | "
              f"{int(days_elapsed):6d} | {_pretty_time(current_task):>10} | {tau:5.1f}")

        current_task *= 2            # actual doubling
        days_elapsed += tau
        tau *= acceleration          # super‑/sub‑exponential effect
        step += 1

    # final line after exceeding target
    date = start_date + timedelta(days=days_elapsed)
    print(f"{step:4d} | {date.strftime(date_fmt)} | "
          f"{int(days_elapsed):6d} | {_pretty_time(current_task):>10} | {tau:5.1f}  <-- reached target")


def estimate_growth_parameters(
    observations,
    baseline_date=None,
    baseline_task_hours=None,
    reliability_level="50%"
):
    """
    Returns (initial_doubling_time, acceleration) where:
    - initial_doubling_time = doubling time in days at baseline capability
    - acceleration = multiplicative change after each doubling
    """
    # Pick the right column based on reliability level
    if reliability_level == "50%":
        hours_idx = 2
    elif reliability_level == "80%":
        hours_idx = 3
    else:
        raise ValueError("reliability_level must be '50%' or '80%'")
    
    clean_data = [(name, date, obs[hours_idx]) for obs in observations for name, date in [(obs[0], obs[1])]]
    
    if baseline_date is None:
        baseline_date = clean_data[0][1]
    if baseline_task_hours is None:
        baseline_task_hours = clean_data[0][2]

    doublings = np.log([hours / baseline_task_hours for _, _, hours in clean_data]) / np.log(2)
    elapsed_days = np.array([(date - baseline_date).days for _, date, _ in clean_data], dtype=float)

    def mse_loss(params):
        doubling_time, accel = params
        if doubling_time <= 0 or not 0 < accel < 2:
            return np.inf
        if np.isclose(accel, 1.0):
            prediction = doublings * doubling_time
        else:
            prediction = doubling_time * (1 - accel**doublings) / (1 - accel)
        return np.mean((prediction - elapsed_days)**2)

    bounds = Bounds([1e-6, 0.9], [np.inf, 1.0])
    result = minimize(mse_loss, x0=[260.0, 0.95], method="L-BFGS-B", bounds=bounds)    
    doubling_time, acceleration = result.x
    return round(doubling_time), round(acceleration, 3)


def print_estimation(data, reliability_level="50%"):
    start = data[0][0]
    end = data[-1][0]
    params = estimate_growth_parameters(data, reliability_level=reliability_level)
    print(f"{start} to {end} ({reliability_level}): {params}")


def bootstrap_growth_parameters(observations, n_bootstrap=1000, reliability_level="50%", 
                               min_models=5, recent_weight=2.0):
    """
    Bootstrap confidence intervals for growth parameters with time-based weighting.
    """
    n_obs = len(observations)
    results = []
    
    # Weight more recent observations higher
    weights = np.array([recent_weight ** (i / n_obs) for i in range(n_obs)])
    weights /= weights.sum()
    
    for _ in range(n_bootstrap):
        # Sample with replacement, weighted by recency
        indices = np.random.choice(n_obs, size=n_obs, replace=True, p=weights)
        bootstrap_sample = [observations[i] for i in sorted(indices)]
        
        # Only fit if we have enough unique models and reasonable time span
        if len(set(indices)) >= min_models:
            try:
                params = estimate_growth_parameters(bootstrap_sample, reliability_level=reliability_level)
                if params[0] < 1000:  # Filter out degenerate fits
                    results.append(params)
            except:
                pass
    
    if not results:
        return None
    
    # Calculate percentiles
    results = np.array(results)
    percentiles = np.percentile(results, [2.5, 50, 97.5], axis=0)
    
    return {
        'median': (round(percentiles[1, 0]), round(percentiles[1, 1], 3)),
        'ci_95': {
            'doubling_time': (round(percentiles[0, 0]), round(percentiles[2, 0])),
            'acceleration': (round(percentiles[0, 1], 3), round(percentiles[2, 1], 3))
        },
        'mean': (round(results[:, 0].mean()), round(results[:, 1].mean(), 3)),
        'std': (round(results[:, 0].std()), round(results[:, 1].std(), 3))
    }


def sliding_window_analysis(observations, window_sizes=[6, 8, 10, 12], reliability_level="50%"):
    """
    Test different time windows to see parameter stability.
    """
    results = []
    
    for window in window_sizes:
        if window <= len(observations):
            # Try all possible windows of this size
            for start in range(len(observations) - window + 1):
                subset = observations[start:start + window]
                params = estimate_growth_parameters(subset, reliability_level=reliability_level)
                
                start_date = subset[0][1]
                end_date = subset[-1][1]
                time_span = (end_date - start_date).days
                
                results.append({
                    'window': window,
                    'start_model': subset[0][0],
                    'end_model': subset[-1][0],
                    'time_span_days': time_span,
                    'doubling_time': params[0],
                    'acceleration': params[1]
                })
    
    return pd.DataFrame(results)


print('Loaded libraries')

Loaded libraries


In [14]:
observed_models: List[Tuple[str, datetime, float, float]] = [
    # model                       # release date         # task length at 50%  # task length at 80% (in hrs)
    ("GPT‑2",                     datetime(2019, 2, 14),  2/3600,              0.1/3600),
    ("GPT-3",                     datetime(2020, 5, 28),  9/3600,                2/3600),
    ("GPT‑3.5 Turbo",             datetime(2023, 6, 13), 36/3600,               10/3600),
    ("GPT-4",                     datetime(2023, 3, 14),  6/60,                  1/60  ),
    ("GPT-4-Nov23",               datetime(2023, 11, 6),  8/60,                  1/60  ),
    ("Claude 3 Opus",             datetime(2024, 3,  4),  6/60,                  1/60  ),
    ("GPT‑4o",                    datetime(2024, 5, 13),  9/60,                  2/60  ),
    ("Claude 3.5 Sonnet (old)",   datetime(2024, 6, 20), 18/60,                  3/60  ),
    ("o1 preview",                datetime(2024, 9, 12), 22/60,                  4/60  ),
    ("Claude 3.5 Sonnet (new)",   datetime(2024,10, 22), 28/60,                  5/60  ),
    ("o1",                        datetime(2024,12,  5), 39/60,                  6/60  ),
    ("Claude 3.7 Sonnet",         datetime(2025, 2, 24), 59/60,                 15/60  ),
    ("o3",                        datetime(2025, 4, 16),  1+45/60,              20/60  ),
    ("Claude 4 Sonnet",           datetime(2025, 5, 22),  1+7/60,               16/60  ),
    ("Claude 4 Opus",             datetime(2025, 5, 22),  1+19/60,              20/60  ),
]

print("=== 50% Reliability ===")
print_estimation(observed_models)
print_estimation(observed_models[1:])
print_estimation(observed_models[3:])
print_estimation(observed_models[5:])
print_estimation(observed_models[3:-2])
print_estimation(observed_models[6:-2])
print_estimation(observed_models[8:-2])
print_estimation(observed_models[3:])
print_estimation(observed_models[6:])
print_estimation(observed_models[8:])

print("\n=== 80% Reliability ===")
print_estimation(observed_models, "80%")
print_estimation(observed_models[1:], "80%")
print_estimation(observed_models[3:], "80%")
print_estimation(observed_models[5:], "80%")
print_estimation(observed_models[3:-2], "80%")
print_estimation(observed_models[6:-2], "80%")
print_estimation(observed_models[8:-2], "80%")
print_estimation(observed_models[3:], "80%")
print_estimation(observed_models[6:], "80%")
print_estimation(observed_models[8:], "80%")

=== 50% Reliability ===
GPT‑2 to Claude 4 Opus (50%): (324, 0.9)
GPT-3 to Claude 4 Opus (50%): (294, 0.9)
GPT-4 to Claude 4 Opus (50%): (255, 0.9)
Claude 3 Opus to Claude 4 Opus (50%): (109, 1.0)
GPT-4 to o3 (50%): (256, 0.9)
GPT‑4o to o3 (50%): (97, 1.0)
o1 preview to o3 (50%): (107, 0.9)
GPT-4 to Claude 4 Opus (50%): (255, 0.9)
GPT‑4o to Claude 4 Opus (50%): (107, 1.0)
o1 preview to Claude 4 Opus (50%): (126, 0.9)

=== 80% Reliability ===
GPT‑2 to Claude 4 Opus (80%): (213, 0.965)
GPT-3 to Claude 4 Opus (80%): (301, 0.9)
GPT-4 to Claude 4 Opus (80%): (235, 0.9)
Claude 3 Opus to Claude 4 Opus (80%): (99, 1.0)
GPT-4 to o3 (80%): (241, 0.9)
GPT‑4o to o3 (80%): (115, 0.9)
o1 preview to o3 (80%): (98, 0.9)
GPT-4 to Claude 4 Opus (80%): (235, 0.9)
GPT‑4o to Claude 4 Opus (80%): (122, 0.905)
o1 preview to Claude 4 Opus (80%): (111, 0.9)


In [17]:
print("=== Bootstrap Analysis ===")
for rel in ["50%", "80%"]:
    print(f"\n{rel} Reliability:")
    full_results = bootstrap_growth_parameters(observed_models, reliability_level=rel)
    print(f"Full dataset: {full_results['median']} (95% CI: {full_results['ci_95']})")
    recent_models = [m for m in observed_models if m[1] >= datetime(2024, 1, 1)]
    recent_results = bootstrap_growth_parameters(recent_models, reliability_level=rel)
    print(f"2024+ models: {recent_results['median']} (95% CI: {recent_results['ci_95']})")

df = sliding_window_analysis(observed_models)
print("\n=== Parameter stability by time window ===")
print(df.groupby('window')[['doubling_time', 'acceleration']].agg(['mean', 'std']))

=== Bootstrap Analysis ===

50% Reliability:
Full dataset: (293, 0.9) (95% CI: {'doubling_time': (85, 332), 'acceleration': (0.9, 1.0)})
2024+ models: (112, 1.0) (95% CI: {'doubling_time': (101, 165), 'acceleration': (0.9, 1.0)})

80% Reliability:
Full dataset: (231, 0.934) (95% CI: {'doubling_time': (95, 304), 'acceleration': (0.9, 1.0)})
2024+ models: (105, 0.983) (95% CI: {'doubling_time': (95, 139), 'acceleration': (0.9, 1.0)})

=== Parameter stability by time window ===
       doubling_time             acceleration          
                mean         std         mean       std
window                                                 
6            178.700  102.563855     0.940000  0.051640
8            189.875   99.900576     0.947875  0.051482
10           207.500   99.885434     0.933333  0.051640
12           238.500  106.591119     0.925000  0.050000


In [15]:
test_acceleration(
    start_task_length=2/60/60, # GPT2
    agi_task_length=167, 
    initial_doubling_time=260,
    acceleration=0.95,
    start_date="2019-02-14", # GPT2
)

Step |    Date    |    Day |       Task | τ (d)
-----------------------------------------------
   0 | 2019‑02‑14 |      0 |       2sec | 260.0
   1 | 2019‑11‑01 |    260 |       4sec | 247.0
   2 | 2020‑07‑05 |    507 |       8sec | 234.6
   3 | 2021‑02‑24 |    741 |      16sec | 222.9
   4 | 2021‑10‑05 |    964 |      32sec | 211.8
   5 | 2022‑05‑05 |   1176 |    1.07min | 201.2
   6 | 2022‑11‑22 |   1377 |    2.13min | 191.1
   7 | 2023‑06‑01 |   1568 |    4.27min | 181.6
   8 | 2023‑11‑30 |   1750 |    8.53min | 172.5
   9 | 2024‑05‑20 |   1922 |   17.07min | 163.9
  10 | 2024‑10‑31 |   2086 |   34.13min | 155.7
  11 | 2025‑04‑05 |   2242 |     1.14hr | 147.9
  12 | 2025‑08‑31 |   2390 |     2.28hr | 140.5
  13 | 2026‑01‑18 |   2530 |     4.55hr | 133.5
  14 | 2026‑06‑01 |   2664 |     9.10hr | 126.8
  15 | 2026‑10‑05 |   2790 |    18.20hr | 120.5
  16 | 2027‑02‑03 |   2911 |    36.41hr | 114.4
  17 | 2027‑05‑28 |   3025 |    72.82hr | 108.7
  18 | 2027‑09‑14 |   3134 |   145.64hr 