In [None]:
# --- repo bootstrap ---------------------------------------------------------
from pathlib import Path
from dotenv import load_dotenv
import os, sys

def repo_root(start: Path) -> Path:
    cur = start.resolve()
    while cur != cur.parent:
        if (cur / ".env").exists() or (cur / ".git").exists():
            return cur
        cur = cur.parent
    raise RuntimeError("repo root not found")

ROOT = repo_root(Path.cwd())
load_dotenv(ROOT / ".env")             # loads secrets
sys.path.append(str(ROOT / "src"))     # optional helpers

DATA_DIR = ROOT / "data"
OUT_DIR  = ROOT / "outputs"
FIG_DIR  = OUT_DIR / "figs"; FIG_DIR.mkdir(exist_ok=True)

print("Repo root:", ROOT)

## Reload Cached Scores (up to 4/20/2025)

In [None]:
# ---------------------------------------------------------------
# FAST-PATH ▸ reuse cached daily-index if it exists
# ---------------------------------------------------------------
from pathlib import Path
import pandas as pd, matplotlib.pyplot as plt

CSV_CACHE = ROOT / "outputs" / "ukraine_escalation_daily.csv"   # always RELATIVE to repo root

if CSV_CACHE.exists():
    print(f"📄  Loading cached scores from {CSV_CACHE.relative_to(ROOT)}")

    # read: first column is the index we wrote earlier
    df = (pd.read_csv(CSV_CACHE, index_col=0)        # ← index_col=0 fixes the error
            .rename(columns=str.lower)               # normalise headers
            .rename(columns={"score": "escalation"}) # unify column name if needed
    )

    # make sure index is datetime (it was stored as ISO strings)
    df.index = pd.to_datetime(df.index, errors="raise")
    df.index.name = "date"

    # add rolling 7-day mean (if not already present / you want to recompute)
    if "roll7" not in df.columns:
        df["roll7"] = df["escalation"].rolling(7, min_periods=1).mean()

    # quick peek
    display(df.head())
    df.plot(y=["escalation","roll7"], figsize=(10,4),
            label=["Daily","7-day mean"], title="Cached escalation series")
    plt.tight_layout(); plt.show()

    USE_CACHE_ONLY = True      # later cells can skip the batch-download logic
else:
    print("⚠️  No cached CSV found – full batch logic will run.")
    USE_CACHE_ONLY = False

## Score Recent Headlines (up to 5/27/2025)
### Load cache & figure out what remains to score

In [None]:
# ---------------------------------------------------------------
# repo paths  (ROOT was defined in the earlier bootstrap cell)
# ---------------------------------------------------------------
import pandas as pd, numpy as np, json, time, datetime as dt, openai, pathlib
OPENAI_CLIENT = openai.OpenAI()

CSV_CACHE   = ROOT / "outputs" / "ukraine_escalation_daily.csv"
NEW_RAW_CSV = ROOT / "outputs" / "raw_headlines_4_21_to_5_27.csv"

# --- load existing daily index ----------------------------------
df_existing = (pd.read_csv(CSV_CACHE, index_col=0)      # first col is the index
                 .rename(columns=str.lower)             # 'escalation', 'roll7'
                 .rename(columns={"score": "escalation"}))
df_existing.index = pd.to_datetime(df_existing.index)
df_existing.index.name = "date"

print("✅ loaded cached index →", df_existing.index.min().date(),
      "…", df_existing.index.max().date())

# --- load the NEW headlines ------------------------------------
raw_new = pd.read_csv(NEW_RAW_CSV, parse_dates=["date"])
raw_new["title"] = raw_new["title"].astype(str)

# keep only dates that are NOT in the cache
todo = (raw_new[~raw_new["date"].isin(df_existing.index)]
          .groupby("date")["title"].apply(list)
          .sort_index())

print(f"🆕 need to score {len(todo)} new day(s) • {raw_new.shape[0]:,} headlines total")

if not len(todo):
    print("🎉 cache already up-to-date – nothing to do"); raise SystemExit

### Build one JSONL & launch a batch

In [None]:
# ---------------------------------------------------------------
# build JSONL  (≤120 headlines / day, no look-back headlines)
# ---------------------------------------------------------------
SYSTEM_PROMPT = """
You are an analyst quantifying RUSSIA–UKRAINE military-escalation RISK.

Scale (integer only):
0  = No fighting / purely diplomatic
2  = Low-level skirmishes
4  = Noticeable escalation (large drone / missile strike)
6  = Major battlefield offensive or big weapons shipment
8  = Strategic escalation (Crimea bridge hit, WMD threat)
10 = Nuclear rhetoric, nuclear alert or actual WMD use

Return ONLY the integer 0-10.
"""

MAX_HEADLINES = 120
jsonl_path = ROOT / "outputs" / "ua_tasks_2025Q2.jsonl"

with jsonl_path.open("w") as fh:
    for day, titles in todo.items():
        task = {
            "custom_id": str(day.date()),
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                "model": "gpt-4o-mini",
                "temperature": 0,
                "max_tokens": 3,
                "messages": [
                    {"role": "system", "content": SYSTEM_PROMPT},
                    {"role": "user",
                     "content": "### Headlines TODAY\n" +
                                "\n".join(f"- {t}" for t in titles[:MAX_HEADLINES])}
                ]
            }
        }
        fh.write(json.dumps(task) + "\n")

print("✅ wrote JSONL →", jsonl_path.name)

# ------------- launch batch & poll -----------------------------
file_id = OPENAI_CLIENT.files.create(file=jsonl_path.open("rb"),
                                     purpose="batch").id
batch   = OPENAI_CLIENT.batches.create(
            input_file_id=file_id,
            endpoint="/v1/chat/completions",
            completion_window="24h")

print("🚀 batch", batch.id, "submitted – polling…")
while True:
    b = OPENAI_CLIENT.batches.retrieve(batch.id)
    rc = b.request_counts
    print(f"\r{dt.datetime.now():%H:%M:%S}  {b.status:<10}"
          f"{rc.completed}/{rc.total}", end="")
    if b.status in {"completed", "failed", "expired"}:
        print()
        if b.status != "completed":
            raise RuntimeError("Batch finished with status", b.status)
        break
    time.sleep(10)

In [None]:
# ------------------------------------------------------------------
# DIRECT CALLS FOR SMALL BACK-FILL WINDOWS  (≤ ~100 days)
# ------------------------------------------------------------------
import time, json, pandas as pd, numpy as np, datetime as dt, openai, tqdm

SYSTEM_PROMPT = """You are an analyst quantifying RUSSIA–UKRAINE military-escalation RISK.

Scale (integer only):
0  = No active fighting; purely diplomatic headlines
2  = Low-level skirmishes, no major offensives
4  = Noticeable escalation (large drone / missile strikes)
6  = Major battlefield offensive OR significant weapons shipment
8  = Strategic escalation (Crimea bridge hit, use of banned weapons)
10 = Nuclear rhetoric, nuclear forces on alert, or actual WMD use

Return ONLY the integer (0-10)."""

MAX_HEADLINES = 120            # same cap as before
RATE_DELAY    = 0.8            # ≤75 requests / min; stay well under limits
RETRIES       = 3
TIMEOUT       = 40

openai_client = openai.OpenAI()

new_scores, lat = {}, []        # day → int   & latency tracker
pbar = tqdm.tqdm(todo.items(), total=len(todo), ncols=90, desc="Scoring")

for day, titles in pbar:
    user_block = "### Headlines TODAY\n" + \
                 "\n".join(f"- {t}" for t in titles[:MAX_HEADLINES])

    for attempt in range(1, RETRIES + 1):
        try:
            t0 = time.time()
            rsp = openai_client.chat.completions.create(
                    model       ="gpt-4o-mini",
                    temperature =0,
                    max_tokens  =3,
                    timeout     =TIMEOUT,
                    messages=[{"role":"system","content":SYSTEM_PROMPT},
                              {"role":"user",  "content":user_block}],
            )
            lat.append(time.time() - t0)
            new_scores[pd.to_datetime(day)] = int(rsp.choices[0].message.content.strip())
            break                                   # success → exit retry-loop
        except Exception as e:
            if attempt == RETRIES:
                pbar.write(f"❌ {day.date()} failed ({e.__class__.__name__}); NaN stored")
                new_scores[pd.to_datetime(day)] = np.nan
            else:
                backoff = 3 * attempt
                pbar.write(f"⚠️ {day.date()} attempt {attempt} error: {e}; retrying in {backoff}s…")
                time.sleep(backoff)

    time.sleep(RATE_DELAY)

pbar.close()
print(f"✅ finished   mean latency = {np.nanmean(lat):.2f}s   failures = {pd.isna(list(new_scores.values())).sum()}")

# ------------------------------------------------------------------
# MERGE with existing index and save
# ------------------------------------------------------------------
ser_new   = pd.Series(new_scores, name="escalation").sort_index()
df_merged = (pd.concat([df_existing["escalation"], ser_new])
               .sort_index()
               .to_frame())
df_merged["roll7"] = df_merged["escalation"].rolling(7, min_periods=1).mean()

cache_path = ROOT / "outputs" / "ukraine_escalation_daily.csv"
df_merged.to_csv(cache_path, date_format="%Y-%m-%d")
print("📄 updated cache →", cache_path.relative_to(ROOT))

# quick sanity plot
df_merged["escalation"].plot(figsize=(12,3), alpha=.5, lw=.8)
df_merged["roll7"].plot(color="tab:red", lw=2)
plt.title("Updated escalation index (incl. new back-fill)")
plt.tight_layout(); plt.show()

In [None]:
# ╔══════════════════════════════════════════════════════════╗
# ║  Embed ~40 000 headlines  →  text-embedding-3-small      ║
# ╚══════════════════════════════════════════════════════════╝
from pathlib import Path
import json, tqdm, numpy as np, openai

ROOT        = Path.cwd().resolve().parents[0]
JSONL_PATH  = ROOT / "outputs" / "filter_tasks_part01.jsonl"
EMB_MODEL   = "text-embedding-3-small"
CHUNK_N     = 1_000                                 # up to 2 048 allowed
OUT_NPY     = ROOT / "outputs" / "emb_part01.npy"
OUT_IDX     = ROOT / "outputs" / "idx_part01.npy"

client = openai.OpenAI()

# ---------- load titles -------------------------------------------
idxs, titles, skipped = [], [], 0
with JSONL_PATH.open() as fh:
    for ln in fh:
        obj   = json.loads(ln)
        title = obj["body"]["messages"][1]["content"]
        if not isinstance(title, str) or not title.strip():
            skipped += 1
            continue
        idxs.append(int(obj["custom_id"]))
        titles.append(title.strip())

n = len(titles)
print(f"📝 kept {n:,} titles   |  skipped {skipped}")

# ---------- embed --------------------------------------------------
if OUT_NPY.exists() and OUT_IDX.exists():
    print("✅ embeddings already on disk – skipping")
else:
    print(f"🔄 embedding {n:,} titles with {EMB_MODEL} …")
    embeds = np.empty((n, 1536), dtype=np.float32)   # 1536-dim!

    for i in tqdm.tqdm(range(0, n, CHUNK_N), ncols=90):
        chunk = titles[i:i+CHUNK_N]
        resp  = client.embeddings.create(model=EMB_MODEL, input=chunk)
        vecs  = [r.embedding for r in resp.data]      # list-of-lists
        embeds[i : i+len(chunk)] = vecs              # write exact slice

    np.save(OUT_NPY, embeds)
    np.save(OUT_IDX, np.asarray(idxs, dtype=np.int32))
    print(f"💾 saved {embeds.shape[0]:,} × {embeds.shape[1]} matrix → {OUT_NPY.name}")


In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  Quick pilot: score a small, time-stratified sample          ║
# ║            (change SAMPLE_N as you wish)                    ║
# ╚══════════════════════════════════════════════════════════════╝
import pandas as pd, numpy as np, matplotlib.pyplot as plt, time, json, openai, tqdm
from pathlib import Path

# ---------- parameters -------------------------------------------------------
SAMPLE_N     = 200                         # 200 → ~1 headline / 6 days
SEED         = 42                          # reproducible random
OUT_DIR      = ROOT / "outputs"
OUT_CSV      = OUT_DIR / "sample_headline_scores.csv"
MAX_TOKENS_R = 3                           # model only returns “0” … “10”
DELAY_SEC    = 0.7                         # ≤ 80 req/min on 4-o-mini

# ---------- load filtered headline universe ----------------------------------
DF_ALL = pd.read_csv("outputs/headlines_ru_ua.csv", parse_dates=["date"])
DF_ALL = DF_ALL.sort_values("date").reset_index(drop=True)

# ---------- even-time sampling -----------------------------------------------
lin_idx  = np.linspace(0, len(DF_ALL)-1, SAMPLE_N, dtype=int)
sample   = DF_ALL.loc[lin_idx].sample(frac=1, random_state=SEED)  # shuffle once

print(f"🎯 sampled {len(sample)} headlines  ({sample['date'].min().date()} → {sample['date'].max().date()})")

# ---------- build gold-set exemplars for the system prompt -------------------
GOLD_EX = [
    (0,  "Vessel to collect first humanitarian wheat shipment under Ukraine grain deal"),
    (1,  "UN offers Putin SWIFT access in exchange for Black Sea grain extension"),
    (2,  "Russia: Person injured in Ukrainian drone attack on Belgorod region"),
    (3,  "Russian shelling kills Kherson residents, Zelenskyy denounces ‘terrorist attacks’"),
    (4,  "Russia blames Ukraine drone attack for major Crimea fuel-depot fire"),
    (5,  "U.S. Eyes Giving Ukraine Patriot Missile Defense Systems"),
    (6,  "Ukraine launches ‘main thrust’ of counter-offensive, punches through Russian defences"),
    (7,  "Russia unleashes country-wide missile barrage on Ukraine energy grid"),
    (8,  "Putin blames Ukraine for Crimea Bridge blast, calls it a ‘terrorist act’"),
    (9,  "Russia planning ‘provocations’ at Zaporizhzhia nuclear plant, Zelensky warns"),
    (10, "Russia threatens US nuclear war by Christmas, analysts say"),
]

sys_lines = ["You are an analyst scoring how *escalatory* a single headline is on a **0-10 integer scale**.",
             "Examples:"]
for lvl, hl in GOLD_EX:
    sys_lines.append(f"{lvl} → {hl}")
sys_lines.append("Return **only** the integer (0-10).")
SYSTEM_PROMPT = "\n".join(sys_lines)

# ---------- score sample ------------------------------------------------------
client   = openai.OpenAI()
scores   = []
latency  = []

pbar = tqdm.tqdm(sample.itertuples(index=False), total=len(sample), ncols=90, desc="Scoring")
for row in pbar:
    user_msg = f"[{row.date.date()}] {row.title}"
    t0       = time.time()
    rsp      = client.chat.completions.create(
                  model="gpt-o4-mini",
                  temperature=0,
                  max_tokens=MAX_TOKENS_R,
                  messages=[{"role":"system","content":SYSTEM_PROMPT},
                            {"role":"user",  "content":user_msg}],
              )
    latency.append(time.time()-t0)
    try:
        s = int(rsp.choices[0].message.content.strip())
    except ValueError:
        s = np.nan                      # in the unlikely case the model slips
    scores.append(s)
    time.sleep(DELAY_SEC)

sample = sample.assign(score=scores)

print(f"✅ finished   mean latency = {np.mean(latency):.2f}s   NaNs = {sample['score'].isna().sum()}")

sample.to_csv(OUT_CSV, index=False)
print("💾 wrote", OUT_CSV.relative_to(Path.cwd()))

# ---------- quick visual ------------------------------------------------------
daily_small = (sample.groupby(sample["date"].dt.date)["score"]
                        .mean()
                        .rename("sample_mean"))

plt.figure(figsize=(10,4))
plt.plot(daily_small.index, daily_small, marker='o', lw=.8, alpha=.7)
plt.title(f"Pilot {SAMPLE_N}-headline escalation scores")
plt.ylabel("0 = low … 10 = high")
plt.grid(alpha=.3); plt.tight_layout(); plt.show()


In [None]:
sample.to_csv(OUT_CSV, index=False)
print(f"💾 wrote {OUT_CSV}")          # simpler & always works

# ---------- quick visual ------------------------------------------------------
daily_small = (sample.groupby(sample["date"].dt.date)["score"]
                        .mean()
                        .rename("sample_mean"))

TIMELINE   = Path("../src/ukraine-war-timeline.json")          # JSONL

plt.figure(figsize=(10,4))
plt.plot(daily_small.index, daily_small, marker='o', lw=.8, alpha=.7)
plt.title(f"Pilot {SAMPLE_N}-headline escalation scores")
plt.ylabel("0 = low … 10 = high")
plt.grid(alpha=.3); plt.tight_layout(); plt.show()

In [None]:
# ────────────────────────────────────────────────────────────────
#  Save sample-scores CSV (already computed above)
# ────────────────────────────────────────────────────────────────
sample.to_csv(OUT_CSV, index=False)
print(f"💾 wrote {OUT_CSV}")

# ────────────────────────────────────────────────────────────────
#  Build per-day mean from the 200-headline pilot
# ────────────────────────────────────────────────────────────────
daily_small = (sample
               .groupby(sample["date"].dt.date)["score"]
               .mean()
               .rename("sample_mean"))

# ────────────────────────────────────────────────────────────────
#  Load major-event timeline (JSON-lines)  ➜ list of dicts
# ────────────────────────────────────────────────────────────────
TIMELINE = Path("../src/ukraine-war-timeline.json")      # adjust if needed
events   = []
with TIMELINE.open() as fh:
    for ln in fh:
        events.append(json.loads(ln))

tl = (pd.DataFrame(events)
        .query("major == True")                          # keep only major
        .assign(date = lambda d: pd.to_datetime(d["date"])))

# ────────────────────────────────────────────────────────────────
#  Plot: scatter + red verticals
# ────────────────────────────────────────────────────────────────
plt.figure(figsize=(10,4))
plt.plot(daily_small.index, daily_small, marker='o', lw=.8, alpha=.8)

for _, r in tl.iterrows():
    plt.axvline(r["date"], color="crimson", lw=1.4, alpha=.8)
    plt.text(r["date"], 10.3, r.get("label", ""), rotation=90,
             ha="right", va="bottom", color="crimson", fontsize=7)

plt.title(f"Pilot {SAMPLE_N}-headline escalation scores  +  major events")
plt.ylabel("0 = low … 10 = high")
plt.ylim(-0.5, 10.5)
plt.grid(alpha=.3)
plt.tight_layout()
plt.show()


In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  7-day rolling mean  +  simple pre/post deltas               ║
# ╚══════════════════════════════════════════════════════════════╝

# ---------------------------------------------------------------
# 1) daily series & 7-day rolling mean
# ---------------------------------------------------------------
daily = (sample
         .groupby(sample["date"].dt.date)["score"]
         .mean()
         .rename("daily_mean")
         .sort_index())

roll7 = daily.rolling(7, min_periods=1).mean().rename("roll7")

fig, ax = plt.subplots(figsize=(10,4))
ax.plot(daily.index, roll7, lw=2, color="tab:red", label="7-day mean")
ax.scatter(daily.index, daily, s=18, alpha=.6, label="daily mean")
for _, r in tl.iterrows():                         # red event lines already in tl
    ax.axvline(r["date"], color="crimson", lw=.7, alpha=.6)
ax.set_title("Daily & 7-day rolling escalation index   +  major events")
ax.set_ylabel("Escalation  (0 = low … 10 = high)")
ax.grid(alpha=.3); ax.legend(); plt.tight_layout(); plt.show()

# ---------------------------------------------------------------
# 2) naive ±7-day pre/post comparison around each *major* event
# ---------------------------------------------------------------
rows = []
for _, ev in tl.iterrows():
    d0   = ev["date"].date()
    pre  = daily.loc[(daily.index >= d0 - pd.Timedelta(days=7)) &
                     (daily.index <  d0)]
    post = daily.loc[(daily.index >  d0) &
                     (daily.index <= d0 + pd.Timedelta(days=7))]
    if len(pre) >= 3 and len(post) >= 3:           # need enough points
        rows.append({
            "date"     : d0,
            "label"    : ev.get("label",""),
            "pre_mean" : pre.mean(),
            "post_mean": post.mean(),
            "delta"    : post.mean() - pre.mean()
        })

if rows:                                           # -------- only if we have data
    event_effects = (pd.DataFrame(rows)
                       .sort_values("date")
                       .reset_index(drop=True))

    display(event_effects.style.format({"pre_mean":"{:.2f}",
                                        "post_mean":"{:.2f}",
                                        "delta":"{:+.2f}"}))

    n_pos = (event_effects["delta"] >  0).sum()
    n_neg = (event_effects["delta"] <  0).sum()
    print(f"\n📊  Pre/post window (±7 d)   ↑ delta: {n_pos}   ↓ delta: {n_neg}   "
          f"zero/insufficient: {len(event_effects) - n_pos - n_neg}")
else:
    print("⚠️  No events have ≥3 pre- and post-day observations in this 200-headline pilot\n"
          "    — re-run after full scoring for meaningful statistics.")


In [None]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  SCORE all ~40 000 RU–UA headlines with GPT-4.1-nano  (uses GOLD_EX) ║
# ╚══════════════════════════════════════════════════════════════════════╝
from pathlib import Path, PurePath
import json, time, datetime as dt, pandas as pd, openai, tqdm, math

# ── repo paths ─────────────────────────────────────────────────────────
ROOT       = Path.cwd().resolve().parents[0]
HEAD_CSV   = ROOT / "outputs" / "headlines_ru_ua.csv"
CHUNK_DIR  = ROOT / "outputs" / "score_chunks"
OUT_CSV    = ROOT / "outputs" / "headline_scores_full.csv"
STATE_JSON = CHUNK_DIR / "_progress.json"

# ── model / batch params ───────────────────────────────────────────────
MODEL      = "gpt-4o-mini"      # 
MAX_REQ    = 8_000              # platform limit
COMPL_WIN  = "24h"

# ── your exemplar rubric ───────────────────────────────────────────────
GOLD_EX = """
You are a defence analyst scoring how *escalatory* ONE Ukraine-war-related headline is.

Two example sets to establish scale (integer only):
---
```
0  Vessel to collect first humanitarian wheat shipment under Ukraine grain deal
1  UN Offers Putin SWIFT access in exchange for Black Sea grain extension, sources say
2  Russia: Person injured in Ukrainian drone attack on Belgorod region
3  Russian shelling kills Kherson residents, Zelenskyy denounces ‘terrorist attacks’
4  Russia blames Ukraine drone attack for major Crimea fuel-depot fire – Reuters
5  U.S. Eyes Giving Ukraine Patriot Missile Defense Systems
6  Ukraine launches ‘main thrust’ of counter-offensive, punches through Russian defenses
7  Russia unleashes country-wide missile barrage on Ukraine as Putin addresses security conference
8  Putin blames Ukraine for Crimea Bridge blast, calls it a ‘terrorist act’
9  Russia planning ‘provocations’ at nuclear plant, Zelensky tells Macron
10 Russia threatens US nuclear war by Christmas? Here’s what we know of threats and possible responses
```
---
```
[
  {
    "level": 0,
    "headline": "How did Russia and the West agree to a prisoner exchange?",
    "reason": "Purely diplomatic focus on a de-escalatory prisoner exchange agreement, with no active fighting reported. [cite: 82]"
  },
  {
    "level": 1,
    "headline": "Russia bans access to over 80 Western media outlets",
    "reason": "An act of information warfare (banning media outlets) directly related to the conflict, escalating tensions beyond diplomacy but without kinetic military action. [cite: 78]"
  },
  {
    "level": 2,
    "headline": "Russia: Person injured in Ukrainian drone attack on Belgorod region",
    "reason": "Reports an isolated drone attack causing a single injury, clearly fitting the 'low-level skirmish, few casualties' definition."
  },
  {
    "level": 3,
    "headline": "Russian strikes kill at least three in Kharkiv and injure dozens",
    "reason": "Describes Russian strikes resulting in multiple deaths and dozens of injuries, significantly escalating beyond a low-level skirmish (L2) but not specified as a targeted logistics strike (L4)."
  },
  {
    "level": 4,
    "headline": "Russia blames Ukraine drone attack for major Crimea fuel depot fire - Reuters",
    "reason": "A clear example of a single strike on a key logistics target (fuel depot), matching the rubric's definition for this level."
  },
  {
    "level": 5,
    "headline": "Ukraine brings war to Russia in ‘massive’ drone strike on military, government targets - Fox News",
    "reason": "A 'massive' drone strike on multiple military and government targets inside Russia implies a coordinated attack of greater scale and impact than a single logistics strike (L4), but not a major theater-wide offensive (L6)."
  },
  {
    "level": 6,
    "headline": "U.S. To Send “Massive Surge” Of Weapons To Ukraine",
    "reason": "The term \"Massive Surge\" directly indicates a major shipment of weapons, fulfilling one of the criteria for this level (\"major weapons shipment\"). [cite: 94]"
  },
  {
    "level": 7,
    "headline": "Russia unleashes country-wide missile barrage on Ukraine as Putin addresses security conference",
    "reason": "A \"country-wide missile barrage\" signifies a broad and intense series of attacks aimed at overwhelming defenses or causing widespread disruption across Ukraine, an escalation beyond a standard major offensive (L6) and impacting national infrastructure/morale. [cite: 49]"
  },
  {
    "level": 8,
    "headline": "Putin Blames Ukraine For Crimea Bridge Blast, Calls It A \"Terrorist Act\"",
    "reason": "Directly refers to an attack on the Crimea Bridge, a key strategic infrastructure target explicitly mentioned as an example for this level of escalation in the rubric."
  },
  {
    "level": 9,
    "headline": "West criticizes Russia at U.N. for Ukraine war after missile crash in Poland",
    "reason": "A missile incident in a NATO member state (Poland), causing fatalities and attributed to the conflict, represents a severe strategic escalation due to the high risk of wider conflict (e.g., NATO involvement), just short of explicit nuclear threats (L10)."
  },
  {
    "level": 10,
    "headline": "Russia threatens US nuclear war by Christmas? Here's what we know of threats and possible responses",
    "reason": "Explicitly reports on Russia making threats of nuclear war, fitting the 'nuclear rhetoric' criterion for the highest escalation level. [cite: 92]"
  }
]
```

Output only the matching integer 0-10.
""".strip()

# ── create temp dir if first run ───────────────────────────────────────
CHUNK_DIR.mkdir(exist_ok=True)

# ------------------------------------------------------------------ #
# 1) build / discover JSONL chunks (idempotent)                      #
# ------------------------------------------------------------------ #
df = pd.read_csv(HEAD_CSV, parse_dates=["date"])
if not STATE_JSON.exists():                        # first ever run
    chunks, part, n = [], 1, 0
    fh = (CHUNK_DIR / f"new_score_part{part:02d}.jsonl").open("w")
    for idx, row in df.iterrows():
        task = {
            "custom_id": str(idx),
            "method":   "POST",
            "url":      "/v1/chat/completions",
            "body": {
                "model": MODEL,
                "temperature": 0,
                "max_tokens": 3,
                "messages": [
                    {"role":"system", "content": GOLD_EX},
                    {"role":"user",   "content": str(row["title"])[:500]}
                ]
            }
        }
        fh.write(json.dumps(task) + "\n")
        n += 1
        if n >= MAX_REQ:
            fh.close(); chunks.append(fh.name)
            part += 1; n = 0
            fh = (CHUNK_DIR / f"score_part{part:02d}.jsonl").open("w")
    fh.close(); chunks.append(fh.name)

    json.dump({"chunks": chunks, "launched": {}, "done": {}},
              STATE_JSON.open("w"), indent=2)
    print(f"📝 prepared {len(chunks)} JSONL chunks for scoring")

# ------------------------------------------------------------------ #
# 2) resume-safe batch launcher                                      #
# ------------------------------------------------------------------ #
state  = json.load(STATE_JSON.open())
client = openai.OpenAI()

for path in state["chunks"]:
    fname = PurePath(path).name
    if fname in state["done"]:                       # already finished
        continue

    if fname in state["launched"]:                  # running / finalising
        bid = state["launched"][fname]
    else:                                           # need to launch
        fid  = client.files.create(file=open(path,"rb"), purpose="batch").id
        bid  = client.batches.create(
                  input_file_id=fid,
                  endpoint="/v1/chat/completions",
                  completion_window=COMPL_WIN).id
        state["launched"][fname] = bid
        json.dump(state, STATE_JSON.open("w"), indent=2)
        print("🚀 launched", bid, "for", fname)

    bar = tqdm.tqdm(total=MAX_REQ, desc=f"{fname} → {bid[:8]}", unit="req")
    while True:
        b   = client.batches.retrieve(bid)
        rc  = b.request_counts
        bar.total = rc.total or bar.total
        bar.n     = rc.completed + rc.failed
        bar.refresh()
        if b.status == "completed":
            bar.close(); print("✅", fname, "done")
            state["done"][fname] = bid
            json.dump(state, STATE_JSON.open("w"), indent=2)
            break
        if b.status == "failed":
            bar.close(); raise RuntimeError(f"Batch {bid} failed")
        time.sleep(15)

print("\n🎉 every batch completed – assembling final CSV")

# ------------------------------------------------------------------ #
# 3) download & merge                                                #
# ------------------------------------------------------------------ #
scores = {}
for fname, bid in state["done"].items():
    out_txt = client.files.content(
                 client.batches.retrieve(bid).output_file_id).content
    for ln in out_txt.splitlines():
        rec = json.loads(ln)
        try:
            scores[int(rec["custom_id"])] = int(
                rec["response"]["body"]["choices"][0]["message"]["content"].strip())
        except ValueError:
            scores[int(rec["custom_id"])] = math.nan

df_out = df.assign(score = pd.Series(scores).astype("Int64"))
df_out.to_csv(OUT_CSV, index=False)

print(f"💾 wrote {OUT_CSV.relative_to(ROOT.parent)}   "
      f"(NaNs: {df_out['score'].isna().sum()})")


In [None]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  SCORE all ~40,000 RU–UA headlines with Claude Haiku 3.5 (uses GOLD_EX) ║
# ╚══════════════════════════════════════════════════════════════════════╝
from pathlib import Path
import json, time, datetime as dt, pandas as pd, tqdm, math
import anthropic
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

# ── repo paths ─────────────────────────────────────────────────────────
ROOT       = Path.cwd().resolve().parents[0]
HEAD_CSV   = ROOT / "outputs" / "headlines_ru_ua.csv"
CHUNK_DIR  = ROOT / "outputs" / "score_chunks_anthropic"
OUT_CSV    = ROOT / "outputs" / "headline_scores_anthropic.csv"
STATE_JSON = CHUNK_DIR / "_progress.json"

# ── model / batch params ───────────────────────────────────────────────
MODEL      = "claude-3-5-haiku-20241022"  # Cost-effective for simple tasks
MAX_REQ    = 10_000              # Conservative limit per batch (Anthropic allows up to 100k)

# ── your exemplar rubric ───────────────────────────────────────────────
GOLD_EX = """
You are a defence analyst scoring how *escalatory* ONE Ukraine-war-related headline is.

Two example sets to establish scale (integer only):
---
```
0  Vessel to collect first humanitarian wheat shipment under Ukraine grain deal
1  UN Offers Putin SWIFT access in exchange for Black Sea grain extension, sources say
2  Russia: Person injured in Ukrainian drone attack on Belgorod region
3  Russian shelling kills Kherson residents, Zelenskyy denounces 'terrorist attacks'
4  Russia blames Ukraine drone attack for major Crimea fuel-depot fire – Reuters
5  U.S. Eyes Giving Ukraine Patriot Missile Defense Systems
6  Ukraine launches 'main thrust' of counter-offensive, punches through Russian defenses
7  Russia unleashes country-wide missile barrage on Ukraine as Putin addresses security conference
8  Putin blames Ukraine for Crimea Bridge blast, calls it a 'terrorist act'
9  Russia planning 'provocations' at nuclear plant, Zelensky tells Macron
10 Russia threatens US nuclear war by Christmas? Here's what we know of threats and possible responses
```
---
```
[
  {
    "level": 0,
    "headline": "How did Russia and the West agree to a prisoner exchange?",
    "reason": "Purely diplomatic focus on a de-escalatory prisoner exchange agreement, with no active fighting reported. [cite: 82]"
  },
  {
    "level": 1,
    "headline": "Russia bans access to over 80 Western media outlets",
    "reason": "An act of information warfare (banning media outlets) directly related to the conflict, escalating tensions beyond diplomacy but without kinetic military action. [cite: 78]"
  },
  {
    "level": 2,
    "headline": "Russia: Person injured in Ukrainian drone attack on Belgorod region",
    "reason": "Reports an isolated drone attack causing a single injury, clearly fitting the 'low-level skirmish, few casualties' definition."
  },
  {
    "level": 3,
    "headline": "Russian strikes kill at least three in Kharkiv and injure dozens",
    "reason": "Describes Russian strikes resulting in multiple deaths and dozens of injuries, significantly escalating beyond a low-level skirmish (L2) but not specified as a targeted logistics strike (L4)."
  },
  {
    "level": 4,
    "headline": "Russia blames Ukraine drone attack for major Crimea fuel depot fire - Reuters",
    "reason": "A clear example of a single strike on a key logistics target (fuel depot), matching the rubric's definition for this level."
  },
  {
    "level": 5,
    "headline": "Ukraine brings war to Russia in 'massive' drone strike on military, government targets - Fox News",
    "reason": "A 'massive' drone strike on multiple military and government targets inside Russia implies a coordinated attack of greater scale and impact than a single logistics strike (L4), but not a major theater-wide offensive (L6)."
  },
  {
    "level": 6,
    "headline": "U.S. To Send "Massive Surge" Of Weapons To Ukraine",
    "reason": "The term \"Massive Surge\" directly indicates a major shipment of weapons, fulfilling one of the criteria for this level (\"major weapons shipment\"). [cite: 94]"
  },
  {
    "level": 7,
    "headline": "Russia unleashes country-wide missile barrage on Ukraine as Putin addresses security conference",
    "reason": "A \"country-wide missile barrage\" signifies a broad and intense series of attacks aimed at overwhelming defenses or causing widespread disruption across Ukraine, an escalation beyond a standard major offensive (L6) and impacting national infrastructure/morale. [cite: 49]"
  },
  {
    "level": 8,
    "headline": "Putin Blames Ukraine For Crimea Bridge Blast, Calls It A \"Terrorist Act\"",
    "reason": "Directly refers to an attack on the Crimea Bridge, a key strategic infrastructure target explicitly mentioned as an example for this level of escalation in the rubric."
  },
  {
    "level": 9,
    "headline": "West criticizes Russia at U.N. for Ukraine war after missile crash in Poland",
    "reason": "A missile incident in a NATO member state (Poland), causing fatalities and attributed to the conflict, represents a severe strategic escalation due to the high risk of wider conflict (e.g., NATO involvement), just short of explicit nuclear threats (L10)."
  },
  {
    "level": 10,
    "headline": "Russia threatens US nuclear war by Christmas? Here's what we know of threats and possible responses",
    "reason": "Explicitly reports on Russia making threats of nuclear war, fitting the 'nuclear rhetoric' criterion for the highest escalation level. [cite: 92]"
  }
]
```

Output only the matching integer 0-10.
""".strip()

# ── create temp dir if first run ───────────────────────────────────────
CHUNK_DIR.mkdir(exist_ok=True, parents=True)

# ------------------------------------------------------------------ #
# 1) build / discover batches (idempotent)                           #
# ------------------------------------------------------------------ #
df = pd.read_csv(HEAD_CSV, parse_dates=["date"])
if not STATE_JSON.exists():                        # first ever run
    batches, batch_num, requests = [], 1, []
    
    for idx, row in df.iterrows():
        request = {
            "custom_id": str(idx),
            "params": {
                "model": MODEL,
                "max_tokens": 10,  # Just need a single digit
                "temperature": 0,
                "system": GOLD_EX,
                "messages": [
                    {"role": "user", "content": str(row["title"])[:500]}
                ]
            }
        }
        requests.append(request)
        
        if len(requests) >= MAX_REQ:
            batch_file = CHUNK_DIR / f"batch_{batch_num:03d}.json"
            json.dump({"requests": requests}, batch_file.open("w"))
            batches.append(str(batch_file))
            batch_num += 1
            requests = []
    
    # Save remaining requests
    if requests:
        batch_file = CHUNK_DIR / f"batch_{batch_num:03d}.json"
        json.dump({"requests": requests}, batch_file.open("w"))
        batches.append(str(batch_file))
    
    json.dump({"batches": batches, "launched": {}, "done": {}},
              STATE_JSON.open("w"), indent=2)
    print(f"📝 prepared {len(batches)} batch files for scoring")

# ------------------------------------------------------------------ #
# 2) resume-safe batch launcher                                      #
# ------------------------------------------------------------------ #
state = json.load(STATE_JSON.open())
client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

for batch_path in state["batches"]:
    batch_name = Path(batch_path).name
    if batch_name in state["done"]:                 # already finished
        continue
    
    if batch_name in state["launched"]:            # running / finalizing
        batch_id = state["launched"][batch_name]
    else:                                           # need to launch
        # Load the batch data
        with open(batch_path, 'r') as f:
            batch_data = json.load(f)
        
        # Create the batch
        batch = client.messages.batches.create(
            requests=batch_data["requests"]
        )
        batch_id = batch.id
        state["launched"][batch_name] = batch_id
        json.dump(state, STATE_JSON.open("w"), indent=2)
        print(f"🚀 launched {batch_id} for {batch_name}")
    
    # Poll for completion
    # First get the number of requests in this batch
    with open(batch_path, 'r') as f:
        batch_data = json.load(f)
        num_requests = len(batch_data["requests"])
    
    bar = tqdm.tqdm(total=num_requests, desc=f"{batch_name} → {batch_id[:8]}", unit="req")
    while True:
        batch = client.messages.batches.retrieve(batch_id)
        
        # Update progress
        completed = (batch.request_counts.succeeded + batch.request_counts.errored + 
                    batch.request_counts.canceled + batch.request_counts.expired)
        bar.n = completed
        bar.refresh()
        
        if batch.processing_status == "ended":
            bar.close()
            print(f"✅ {batch_name} done")
            state["done"][batch_name] = batch_id
            json.dump(state, STATE_JSON.open("w"), indent=2)
            break
        
        time.sleep(10)  # Check every 10 seconds

print("\n🎉 every batch completed – assembling final CSV")

# ------------------------------------------------------------------ #
# 3) download & merge results                                        #
# ------------------------------------------------------------------ #
scores = {}
for batch_name, batch_id in state["done"].items():
    batch = client.messages.batches.retrieve(batch_id)
    
    # Get results
    if batch.results_url:
        # Download results (this returns a string of JSONL data)
        results_response = client._client.get(batch.results_url)
        results_data = results_response.text
        
        # Process each line
        for line in results_data.strip().split('\n'):
            if line:
                result = json.loads(line)
                custom_id = int(result["custom_id"])
                
                if result["result"]["type"] == "succeeded":
                    try:
                        # Extract the score from the message content
                        content = result["result"]["message"]["content"][0]["text"].strip()
                        # Try to extract just the number
                        score = int(''.join(c for c in content if c.isdigit()))
                        if 0 <= score <= 10:
                            scores[custom_id] = score
                        else:
                            scores[custom_id] = math.nan
                    except (ValueError, KeyError, IndexError):
                        scores[custom_id] = math.nan
                else:
                    scores[custom_id] = math.nan

# Create output DataFrame
df_out = df.assign(score=pd.Series(scores).astype("Int64"))
df_out.to_csv(OUT_CSV, index=False)

print(f"💾 wrote {OUT_CSV.relative_to(ROOT.parent)}   "
      f"(NaNs: {df_out['score'].isna().sum()})")

In [None]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  RESUME BATCH PROCESSING - Continue from where you left off           ║
# ╚══════════════════════════════════════════════════════════════════════╝
from pathlib import Path
import json, time, pandas as pd, tqdm, math
import anthropic
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

# ── repo paths ─────────────────────────────────────────────────────────
ROOT       = Path.cwd().resolve().parents[0]
HEAD_CSV   = ROOT / "outputs" / "headlines_ru_ua.csv"
CHUNK_DIR  = ROOT / "outputs" / "score_chunks_anthropic"
OUT_CSV    = ROOT / "outputs" / "headline_scores_full.csv"
STATE_JSON = CHUNK_DIR / "_progress.json"

# Model
MODEL = "claude-3-5-haiku-20241022"

# Initialize client
client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

# Load state
if not STATE_JSON.exists():
    print("❌ No state file found. Run the initial batch processing script first.")
    exit(1)

state = json.load(STATE_JSON.open())

# Check what's left to process
remaining_batches = []
for batch_path in state["batches"]:
    batch_name = Path(batch_path).name
    if batch_name not in state["done"]:
        remaining_batches.append((batch_path, batch_name))

print(f"📊 Processing Status:")
print(f"   Total batches: {len(state['batches'])}")
print(f"   Completed: {len(state['done'])}")
print(f"   Remaining: {len(remaining_batches)}")

if not remaining_batches:
    print("\n✅ All batches have been completed!")
    exit(0)

print(f"\n🚀 Resuming processing of {len(remaining_batches)} remaining batches...")

# Process remaining batches
for batch_path, batch_name in remaining_batches:
    print(f"\n📦 Processing {batch_name}...")
    
    if batch_name in state["launched"]:
        # Check if this batch is still running or completed
        batch_id = state["launched"][batch_name]
        try:
            batch = client.messages.batches.retrieve(batch_id)
            
            if batch.processing_status == "ended":
                # Batch completed while we were away
                print(f"   ✅ Batch already completed!")
                state["done"][batch_name] = batch_id
                json.dump(state, STATE_JSON.open("w"), indent=2)
                continue
            elif batch.processing_status == "in_progress":
                print(f"   ⏳ Batch still processing, waiting for completion...")
            else:
                print(f"   ❌ Batch in unexpected state: {batch.processing_status}")
                continue
                
        except Exception as e:
            print(f"   ❌ Error checking batch status: {str(e)}")
            print(f"   🔄 Will try to relaunch this batch...")
            # Remove from launched so we can retry
            del state["launched"][batch_name]
            json.dump(state, STATE_JSON.open("w"), indent=2)
    
    # Launch or wait for batch
    if batch_name not in state["launched"]:
        # Need to launch this batch
        try:
            with open(batch_path, 'r') as f:
                batch_data = json.load(f)
            
            batch = client.messages.batches.create(
                requests=batch_data["requests"]
            )
            batch_id = batch.id
            state["launched"][batch_name] = batch_id
            json.dump(state, STATE_JSON.open("w"), indent=2)
            print(f"   🚀 Launched new batch: {batch_id}")
            
        except Exception as e:
            print(f"   ❌ Error launching batch: {str(e)}")
            if "rate_limit" in str(e).lower():
                print("   💡 Hit rate limit. You may need to wait or upgrade your plan.")
            elif "insufficient_credit" in str(e).lower() or "payment" in str(e).lower():
                print("   💳 Insufficient credits. Please add more credits to your account.")
                print(f"   📊 You've completed {len(state['done'])}/{len(state['batches'])} batches so far.")
                break
            continue
    else:
        batch_id = state["launched"][batch_name]
    
    # Wait for completion
    with open(batch_path, 'r') as f:
        batch_data = json.load(f)
        num_requests = len(batch_data["requests"])
    
    bar = tqdm.tqdm(total=num_requests, desc=f"{batch_name} → {batch_id[:8]}", unit="req")
    
    while True:
        try:
            batch = client.messages.batches.retrieve(batch_id)
            
            # Update progress
            completed = (batch.request_counts.succeeded + batch.request_counts.errored + 
                        batch.request_counts.canceled + batch.request_counts.expired)
            bar.n = completed
            bar.refresh()
            
            if batch.processing_status == "ended":
                bar.close()
                print(f"   ✅ Batch completed!")
                state["done"][batch_name] = batch_id
                json.dump(state, STATE_JSON.open("w"), indent=2)
                break
                
        except Exception as e:
            bar.close()
            print(f"   ❌ Error checking batch status: {str(e)}")
            break
        
        time.sleep(10)

# Final summary
print(f"\n📊 Final Status:")
print(f"   Completed: {len(state['done'])}/{len(state['batches'])} batches")

if len(state['done']) == len(state['batches']):
    print("\n🎉 All batches completed! Running final assembly...")
    
    # Import and run the retrieval script to get all results
    import subprocess
    subprocess.run([sys.executable, "retrieve_partial_results.py"])
else:
    print(f"\n⏸️  Processing paused. {len(state['batches']) - len(state['done'])} batches remaining.")
    print("   Run this script again when you have more credits.")

In [None]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  DEBUG RETRIEVE RESULTS - See what's in the batch responses           ║
# ╚══════════════════════════════════════════════════════════════════════╝
from pathlib import Path
import json, pandas as pd, math
import anthropic
from dotenv import load_dotenv
import os
import requests

# Load environment variables
load_dotenv()

# ── repo paths ─────────────────────────────────────────────────────────
ROOT       = Path.cwd().resolve().parents[0]
HEAD_CSV   = ROOT / "outputs" / "headlines_ru_ua.csv"
CHUNK_DIR  = ROOT / "outputs" / "score_chunks_anthropic"
STATE_JSON = CHUNK_DIR / "_progress.json"
PARTIAL_CSV = ROOT / "outputs" / "headline_scores_partial.csv"

# Initialize client
client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

# Load state
state = json.load(STATE_JSON.open())
print(f"📊 Found {len(state['done'])} completed batches")

# Let's debug the first batch
if state["done"]:
    batch_name, batch_id = list(state["done"].items())[0]
    print(f"\n🔍 Debugging batch: {batch_name} (ID: {batch_id})")
    
    try:
        # Retrieve batch
        batch = client.messages.batches.retrieve(batch_id)
        print(f"✅ Batch retrieved successfully")
        print(f"   Status: {batch.processing_status}")
        print(f"   Results URL: {batch.results_url}")
        
        if batch.results_url:
            # Method 1: Using requests with API key
            print("\n📥 Attempting to download results...")
            headers = {
                "x-api-key": os.getenv("ANTHROPIC_API_KEY"),
                "anthropic-version": "2023-06-01"
            }
            
            response = requests.get(batch.results_url, headers=headers)
            print(f"   Response status: {response.status_code}")
            
            if response.status_code == 200:
                # Save raw response for inspection
                debug_file = CHUNK_DIR / f"debug_{batch_name}_results.txt"
                with open(debug_file, 'w') as f:
                    f.write(response.text)
                print(f"   Saved raw response to: {debug_file}")
                
                # Try to parse first few lines
                lines = response.text.strip().split('\n')
                print(f"\n📄 Found {len(lines)} result lines")
                print("\n🔍 First 3 results (raw):")
                for i, line in enumerate(lines[:3]):
                    print(f"\nLine {i}:")
                    print(line[:200] + "..." if len(line) > 200 else line)
                    
                    try:
                        result = json.loads(line)
                        print(f"✅ Parsed successfully")
                        print(f"   Keys: {list(result.keys())}")
                        if 'result' in result:
                            print(f"   Result keys: {list(result['result'].keys())}")
                            print(f"   Result type: {result['result'].get('type', 'N/A')}")
                    except Exception as e:
                        print(f"❌ Parse error: {e}")
            else:
                print(f"❌ Failed to download: {response.status_code}")
                print(f"   Response: {response.text[:500]}")
                
    except Exception as e:
        print(f"\n❌ Error: {type(e).__name__}: {str(e)}")
        import traceback
        traceback.print_exc()

# Now let's create a corrected retrieval function
print("\n\n" + "="*60)
print("📊 ATTEMPTING FULL RETRIEVAL WITH CORRECTED PARSING")
print("="*60)

scores = {}
total_processed = 0
total_errors = 0

for batch_name, batch_id in state["done"].items():
    print(f"\n📦 Processing {batch_name}...")
    
    try:
        batch = client.messages.batches.retrieve(batch_id)
        
        if batch.results_url:
            # Download results
            headers = {
                "x-api-key": os.getenv("ANTHROPIC_API_KEY"),
                "anthropic-version": "2023-06-01"
            }
            
            response = requests.get(batch.results_url, headers=headers)
            
            if response.status_code == 200:
                lines = response.text.strip().split('\n')
                batch_scores = 0
                batch_errors = 0
                
                for line in lines:
                    if line:
                        try:
                            data = json.loads(line)
                            custom_id = int(data["custom_id"])
                            
                            if data["result"]["type"] == "succeeded":
                                # Extract content from the message
                                message = data["result"]["message"]
                                content = message["content"][0]["text"].strip()
                                
                                # Extract just the number
                                score_text = ''.join(c for c in content if c.isdigit())
                                if score_text:
                                    score = int(score_text)
                                    if 0 <= score <= 10:
                                        scores[custom_id] = score
                                        batch_scores += 1
                                    else:
                                        scores[custom_id] = math.nan
                                        batch_errors += 1
                                else:
                                    scores[custom_id] = math.nan
                                    batch_errors += 1
                            else:
                                scores[custom_id] = math.nan
                                batch_errors += 1
                                
                        except Exception as e:
                            batch_errors += 1
                            if "custom_id" in line:  # Only print if it looks like valid data
                                print(f"   ⚠️ Parse error: {str(e)[:100]}")
                
                print(f"   ✅ Processed: {batch_scores} scores, {batch_errors} errors")
                total_processed += batch_scores
                total_errors += batch_errors
            else:
                print(f"   ❌ HTTP {response.status_code}: {response.text[:200]}")
        else:
            print(f"   ❌ No results URL available")
            
    except Exception as e:
        print(f"   ❌ Error: {type(e).__name__}: {str(e)}")

# Save results
if scores:
    print(f"\n📊 Total scores retrieved: {len(scores)}")
    print(f"   Successful: {total_processed}")
    print(f"   Errors: {total_errors}")
    
    df = pd.read_csv(HEAD_CSV, parse_dates=["date"])
    df_out = df.assign(score=pd.Series(scores).astype("Int64"))
    df_out.to_csv(PARTIAL_CSV, index=False)
    
    print(f"\n💾 Saved to: {PARTIAL_CSV.relative_to(ROOT.parent)}")
    print(f"   Headlines scored: {df_out['score'].notna().sum()}/{len(df_out)}")
    
    # Show distribution
    print("\n📊 Score distribution:")
    score_counts = df_out['score'].value_counts().sort_index()
    for score, count in score_counts.items():
        print(f"   Level {score}: {count:,} headlines")
else:
    print("\n⚠️ No scores retrieved")

## New Interchangeable Headline Scoring for Anthropic

In [None]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  SCORE Ukraine War Headlines - Batch API Version                      ║
# ╚══════════════════════════════════════════════════════════════════════╝
from pathlib import Path
import json, time, pandas as pd, tqdm, re
import anthropic
from dotenv import load_dotenv
import os
import requests
import logging

load_dotenv()

# Configuration - can change model
ROOT = Path.cwd().resolve().parents[0]
HEADLINES_CSV = ROOT / "outputs" / "headlines_ru_ua.csv"
OUT_CSV = ROOT / "outputs" / "headline_scores_anthropic_claude-sonnet-4-20250514.csv"
MODEL = "claude-sonnet-4-20250514"  # Can change to sonnet-4 or opus-4

# Gold set exemplar rubric (exact same as original)
GOLD_EX = """You are a defence analyst scoring how *escalatory* ONE Ukraine-war-related headline is.

Two example sets to establish scale (integer only):
---
```
0  Vessel to collect first humanitarian wheat shipment under Ukraine grain deal
1  UN Offers Putin SWIFT access in exchange for Black Sea grain extension, sources say
2  Russia: Person injured in Ukrainian drone attack on Belgorod region
3  Russian shelling kills Kherson residents, Zelenskyy denounces 'terrorist attacks'
4  Russia blames Ukraine drone attack for major Crimea fuel-depot fire – Reuters
5  U.S. Eyes Giving Ukraine Patriot Missile Defense Systems
6  Ukraine launches 'main thrust' of counter-offensive, punches through Russian defenses
7  Russia unleashes country-wide missile barrage on Ukraine as Putin addresses security conference
8  Putin blames Ukraine for Crimea Bridge blast, calls it a 'terrorist act'
9  Russia planning 'provocations' at nuclear plant, Zelensky tells Macron
10 Russia threatens US nuclear war by Christmas? Here's what we know of threats and possible responses
```
---
```
[
  {
    "level": 0,
    "headline": "How did Russia and the West agree to a prisoner exchange?",
    "reason": "Purely diplomatic focus on a de-escalatory prisoner exchange agreement, with no active fighting reported. [cite: 82]"
  },
  {
    "level": 1,
    "headline": "Russia bans access to over 80 Western media outlets",
    "reason": "An act of information warfare (banning media outlets) directly related to the conflict, escalating tensions beyond diplomacy but without kinetic military action. [cite: 78]"
  },
  {
    "level": 2,
    "headline": "Russia: Person injured in Ukrainian drone attack on Belgorod region",
    "reason": "Reports an isolated drone attack causing a single injury, clearly fitting the 'low-level skirmish, few casualties' definition."
  },
  {
    "level": 3,
    "headline": "Russian strikes kill at least three in Kharkiv and injure dozens",
    "reason": "Describes Russian strikes resulting in multiple deaths and dozens of injuries, significantly escalating beyond a low-level skirmish (L2) but not specified as a targeted logistics strike (L4)."
  },
  {
    "level": 4,
    "headline": "Russia blames Ukraine drone attack for major Crimea fuel depot fire - Reuters",
    "reason": "A clear example of a single strike on a key logistics target (fuel depot), matching the rubric's definition for this level."
  },
  {
    "level": 5,
    "headline": "Ukraine brings war to Russia in 'massive' drone strike on military, government targets - Fox News",
    "reason": "A 'massive' drone strike on multiple military and government targets inside Russia implies a coordinated attack of greater scale and impact than a single logistics strike (L4), but not a major theater-wide offensive (L6)."
  },
  {
    "level": 6,
    "headline": "U.S. To Send "Massive Surge" Of Weapons To Ukraine",
    "reason": "The term \"Massive Surge\" directly indicates a major shipment of weapons, fulfilling one of the criteria for this level (\"major weapons shipment\"). [cite: 94]"
  },
  {
    "level": 7,
    "headline": "Russia unleashes country-wide missile barrage on Ukraine as Putin addresses security conference",
    "reason": "A \"country-wide missile barrage\" signifies a broad and intense series of attacks aimed at overwhelming defenses or causing widespread disruption across Ukraine, an escalation beyond a standard major offensive (L6) and impacting national infrastructure/morale. [cite: 49]"
  },
  {
    "level": 8,
    "headline": "Putin Blames Ukraine For Crimea Bridge Blast, Calls It A \"Terrorist Act\"",
    "reason": "Directly refers to an attack on the Crimea Bridge, a key strategic infrastructure target explicitly mentioned as an example for this level of escalation in the rubric."
  },
  {
    "level": 9,
    "headline": "West criticizes Russia at U.N. for Ukraine war after missile crash in Poland",
    "reason": "A missile incident in a NATO member state (Poland), causing fatalities and attributed to the conflict, represents a severe strategic escalation due to the high risk of wider conflict (e.g., NATO involvement), just short of explicit nuclear threats (L10)."
  },
  {
    "level": 10,
    "headline": "Russia threatens US nuclear war by Christmas? Here's what we know of threats and possible responses",
    "reason": "Explicitly reports on Russia making threats of nuclear war, fitting the 'nuclear rhetoric' criterion for the highest escalation level. [cite: 92]"
  }
]
```

Output only the matching integer 0-10."""

# Load data
df = pd.read_csv(HEADLINES_CSV, parse_dates=["date"])
print(f"📊 Processing {len(df)} headlines")

# Initialize client
client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

# Quiet the httpx + anthropic client
for name in ("httpx", "anthropic"):
    logging.getLogger(name).setLevel(logging.WARNING)

# Add index for tracking
df["batch_idx"] = range(len(df))

# Prepare batch requests
requests_list = []
for idx, row in df.iterrows():
    if pd.isna(row.get("title")) or str(row["title"]).strip() == "":
        continue
        
    request = {
        "custom_id": str(row["batch_idx"]),
        "params": {
            "model": MODEL,
            "max_tokens": 10,  # Just need a single digit 0-10
            "temperature": 0,
            "system": GOLD_EX,
            "messages": [
                {"role": "user", "content": str(row["title"])[:500]}
            ]
        }
    }
    requests_list.append(request)

print(f"📝 Prepared {len(requests_list)} requests")

# Create batch
batch = client.messages.batches.create(requests=requests_list)
print(f"🚀 Launched batch {batch.id}")

# Monitor progress
bar = tqdm.tqdm(total=len(requests_list), desc="Processing", unit="headline")
start_time = time.time()
while True:
    batch_status = client.messages.batches.retrieve(batch.id)
    completed = (batch_status.request_counts.succeeded + 
                batch_status.request_counts.errored + 
                batch_status.request_counts.canceled + 
                batch_status.request_counts.expired)
    bar.n = completed
    bar.refresh()
    
    if batch_status.processing_status == "ended":
        bar.close()
        break
    
    time.sleep(5)

elapsed_time = time.time() - start_time
print(f"✅ Batch processing complete in {elapsed_time/60:.1f} minutes")

# Parse results
scores = {}
parse_errors = []

# Retrieve the final batch status
batch_final = client.messages.batches.retrieve(batch.id)

if batch_final.results_url:
    print(f"📥 Fetching results from batch {batch.id}")
    
    headers = {
        "x-api-key": os.getenv("ANTHROPIC_API_KEY"),
        "anthropic-version": "2023-06-01"
    }
    
    response = requests.get(batch_final.results_url, headers=headers, stream=True)
    
    if response.status_code == 200:
        # Process JSONL results line by line
        for line in response.iter_lines():
            if not line:
                continue
                
            try:
                result = json.loads(line)
                custom_id = result.get("custom_id")
                
                if custom_id is None:
                    continue
                
                idx = int(custom_id)
                
                # Check if request succeeded
                if result.get("result", {}).get("type") != "succeeded":
                    parse_errors.append(f"Request {custom_id} failed: {result.get('result', {}).get('type')}")
                    continue
                
                # Extract the response text
                message_content = result["result"]["message"]["content"][0]["text"].strip()
                
                # Parse the score - should be a single digit 0-10
                # Try direct int conversion first
                try:
                    score = int(message_content)
                    if 0 <= score <= 10:
                        scores[idx] = score
                    else:
                        parse_errors.append(f"Invalid score {score} for request {custom_id}")
                except ValueError:
                    # If that fails, try extracting digits
                    digits = re.findall(r'\d+', message_content)
                    if digits and 0 <= int(digits[0]) <= 10:
                        scores[idx] = int(digits[0])
                    else:
                        parse_errors.append(f"Could not parse score from: {message_content}")
                    
            except Exception as e:
                parse_errors.append(f"Error parsing result: {e}")
                continue
                
        print(f"✅ Successfully parsed {len(scores)} results")
    else:
        print(f"❌ Error fetching results: HTTP {response.status_code}")
else:
    print("❌ No results URL available")

# Map scores back to dataframe
df["score"] = df["batch_idx"].map(scores).astype("Int64")

# Save results
df_out = df.drop(columns=["batch_idx"])
df_out.to_csv(OUT_CSV, index=False)

# Summary statistics
print(f"\n✅ Scoring complete")
print(f"   Total headlines: {len(df)}")
print(f"   Successfully scored: {len(scores)}")
print(f"   Failed: {len(df) - len(scores)}")
print(f"   Success rate: {len(scores)/len(df)*100:.1f}%")

if parse_errors:
    print(f"\n⚠️  Parse errors encountered ({len(parse_errors)} total):")
    for error in parse_errors[:5]:
        print(f"   - {error}")
    if len(parse_errors) > 5:
        print(f"   ... and {len(parse_errors) - 5} more errors")

# Show distribution of scores
if len(scores) > 0:
    print("\n📊 Score distribution:")
    score_series = pd.Series(scores.values())
    print(f"   Mean: {score_series.mean():.2f}")
    print(f"   Std: {score_series.std():.2f}")
    print(f"   Range: {score_series.min()}-{score_series.max()}")
    
    # Distribution by score
    print("\n   Score counts:")
    for score in range(11):
        count = (score_series == score).sum()
        pct = count / len(score_series) * 100
        print(f"   {score:2d}: {count:6d} ({pct:5.1f}%)")

print(f"\n📁 Results saved to: {OUT_CSV}")