# Tracking Ukraine-War Escalation in US Headlines (Feb 2022 → Apr 2025)
**Goal** : build a **daily “escalation-risk” index** from mainstream English headlines about the Russia–Ukraine war, scoring each day on a 0-10 scale (0 = diplomacy, 10 = nuclear threat) using GPT-4o-mini.  
We will later compare this index to sentiment on Reddit war-discussion subreddits to test whether grassroots discourse **anticipates, mirrors, or lags** mainstream coverage.

Data pipeline (current PoC):
1. Download Ukraine-related headlines via **NewsAPI**.  
2. Cache `date | source | title` to CSV.  
3. Prompt GPT-4o-mini with headline batches; receive a single 0-10 score per day.  
4. Plot the raw series and 7-day rolling mean; save as `ukraine_escalation_daily.csv`.


In [None]:
import os, json, time, requests, pandas as pd, numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from openai import OpenAI

In [None]:
# --- .env loader that works in notebooks ------------------------------------
from pathlib import Path
from dotenv import load_dotenv
import os, sys

def find_repo_root(start: Path, marker=".git") -> Path:
    """Walk up until we see a folder containing the given marker ('.git' or '.env')."""
    cur = start.resolve()
    while cur != cur.parent:
        if (cur / marker).exists():
            return cur
        cur = cur.parent
    raise FileNotFoundError(f"Repository root with {marker} not found from {start}")

# 1) locate repo root (folder that has .env **or** .git)
repo_root = find_repo_root(Path.cwd(), ".env")

# 2) load environment variables
load_dotenv(repo_root / ".env")

# 3) add src/ to Python path (optional, if you’ll import from src/)
src_path = repo_root / "src"
if src_path.exists():
    sys.path.append(str(src_path))

# 4) fetch secrets (raise fast if any missing)
REQUIRED = ["OPENAI_API_KEY", "NEWSAPI_KEY"]
CREDS = {k: os.getenv(k) for k in REQUIRED}
missing = [k for k, v in CREDS.items() if not v]
if missing:
    raise RuntimeError(f"Missing secrets in .env: {', '.join(missing)}")

# handy variables
OPENAI_KEY       = CREDS["OPENAI_API_KEY"]
NEWSAPI_KEY      = CREDS["NEWSAPI_KEY"]
REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
REDDIT_SECRET    = os.getenv("REDDIT_CLIENT_SECRET")
REDDIT_AGENT     = os.getenv("REDDIT_USER_AGENT")

print(f"✅  .env loaded from {repo_root}")

In [None]:
# --- configuration -------------------------------------------------
start_date = datetime(2022, 2, 24)          # <∎  invasion day
end_date   = datetime(2025, 4, 20)          # <∎  “now”

# Pull EVERYTHING (no source/domain filter) that mentions Ukraine
# We add a few spelling / city variants so 99 % of stories match
base_query = (
    'ukraine OR kyiv OR kiev OR lviv OR odessa OR donbas OR donbass '
    'OR "volodymyr zelensky" OR zelenskyy OR putin OR russia OR invasion'
)


### 1. Fetch & cache headlines

In [None]:
# ---------------------------------------------------------------
# 1.  Fetch & cache *all* Ukraine‑related headlines (24 Feb 2022 → Apr 20 2025)
#      – pulls from EVERY English source in NewsAPI, paged day‑by‑day
# ---------------------------------------------------------------
import sys, time, urllib.parse, requests, pandas as pd
from datetime import datetime, timedelta

NEWSAPI_KEY = os.getenv("NEWSAPI_KEY")        # be sure it is set
assert NEWSAPI_KEY, "➡️  set NEWSAPI_KEY env‑var first!"

# date range: full war period
start_date = datetime(2022, 2, 24)            # invasion day
end_date   = datetime(2025, 4, 20)            # “now”

# broad query that catches > 99 % of RU–UA war pieces
base_query = (
    'ukraine OR kyiv OR kiev OR lviv OR odessa OR donbas OR donbass '
    'OR "volodymyr zelensky" OR zelenskyy'
)
ENC_QUERY = urllib.parse.quote_plus(base_query)

DAY_URL = (
    'https://newsapi.org/v2/everything?'
    'q={q}&from={f}&to={t}&language=en&sortBy=publishedAt&pageSize=100&page={pg}'
)

def daterange(start, end):
    for n in range((end - start).days + 1):
        yield start + timedelta(n)

records = []
total   = 0
for day in daterange(start_date, end_date):
    d_str = day.strftime('%Y-%m-%d')
    next_str = (day + timedelta(1)).strftime('%Y-%m-%d')

    pg = 1
    while True:
        url  = DAY_URL.format(q=ENC_QUERY, f=d_str, t=next_str, pg=pg)
        resp = requests.get(url, headers={'X-Api-Key': NEWSAPI_KEY})
        if resp.status_code != 200:
            sys.stderr.write(f"\n⚠️  {d_str} page {pg} → {resp.status_code}: {resp.json().get('message','')}\n")
            break

        arts = resp.json().get('articles', [])
        if not arts:
            break

        for a in arts:
            records.append({
                "date"  : d_str,
                "source": a["source"]["id"] or "unknown",
                "title" : a["title"]
            })
        total += len(arts)
        if len(arts) < 100:                      # last page
            break
        pg += 1
        time.sleep(0.3)                          # stay well under 30 req/min

    sys.stdout.write(f"\r{d_str} ✓ {total:,} headlines so far"); sys.stdout.flush()

print(f"\nDone! Collected {total:,} headlines.")
df = pd.DataFrame(records).drop_duplicates()
df.to_csv("raw_headlines.csv", index=False)
df.head()

In [None]:
# ================================================================
# Helper — write ONE jsonl file that contains 1 task per calendar day
# ================================================================
import json, pathlib, datetime as dt, pandas as pd, numpy as np, openai

def build_tasks_jsonl(date_start:str,
                      date_end  :str,
                      out_path  :str,
                      csv_path  ="raw_headlines.csv",
                      max_today =120,      # headline cap per day
                      max_look  =30):      # how many prev headlines to feed
    """
    Build the JSONL needed for the 'per‑day escalation index' prompt.
    One task = one calendar day between date_start and date_end (inclusive).
    - date strings: 'YYYY‑MM‑DD'
    - out_path: filename to create
    """
    # ---- Load headlines & construct dict {date: [list of titles]} -----------
    df = pd.read_csv(csv_path, parse_dates=["date"])
    day_mask = (df["date"]>=date_start) & (df["date"]<=date_end)
    daily_titles = (df.loc[day_mask]
                      .groupby("date")["title"]
                      .apply(list)
                      .sort_index())

    all_days = daily_titles.index.to_list()

    # ---- system prompt once -----------------------------------------------
    sys_prompt = ("""
    You are an analyst quantifying RUSSIA–UKRAINE military-escalation RISK.
    
    Scale (integer only):
    0  = No active fighting; purely diplomatic headlines
    2  = Low-level skirmishes, no major offensives
    4  = Noticeable escalation (large drone / missile strikes)
    6  = Major battlefield offensive OR significant weapons shipment
    8  = Strategic escalation (Crimea bridge hit, use of banned weapons)
    10 = Nuclear rhetoric, nuclear forces on alert, or actual WMD use
    
    Output ONE integer from 0-10.
    
    Examples
    DATE: 2022-02-24
    HEADLINES: Russia invades Ukraine, tanks cross border … → 10
    
    DATE: 2022-07-22
    HEADLINES: Grain-export deal signed in Istanbul → 2
    """)

    out = pathlib.Path(out_path).open("w")

    # ---- iterate day → task -----------------------------------------------
    for day in all_days:
        today_list = daily_titles.loc[day][:max_today]

        prev_slice = [d for d in all_days if (day-dt.timedelta(days=3) <= d < day)]
        prev_list  = sum((daily_titles.loc[d] for d in prev_slice), [])[:max_look]

        user_block = (
            f"### Headlines TODAY ({day.date()})\n"
            + "\n".join(f"- {h}" for h in today_list) + "\n\n"
            f"### Headlines PREVIOUS 3 days\n"
            + "\n".join(f"- {h}" for h in prev_list)
        )

        task = {
            "custom_id": str(day.date()),          # easy key later
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                "model": "gpt-4o-mini",
                "temperature": 0,
                "max_tokens": 3,
                "messages": [
                    {"role":"system", "content": sys_prompt},
                    {"role":"user",   "content": user_block}
                ]
            }
        }
        out.write(json.dumps(task) + "\n")

    out.close()
    print(f"✅ Wrote {len(all_days)} day‑tasks → {out_path}")

In [None]:
# ================================================================
# Launch ≤2 M‑token batches sequentially
# ================================================================
import time, datetime as dt, openai, pathlib, json

client = openai.OpenAI()

date_slices = [                        # adjust if you like
    ("2022-02-24", "2022-12-31"),
    ("2023-01-01", "2023-12-31"),
    ("2024-01-01", "2024-12-31"),
    ("2025-01-01", "2025-04-20"),
]

launched_ids = []

for idx, (d0, d1) in enumerate(date_slices, 1):
    fname = f"daily_escalation_tasks_{idx}.jsonl"
    build_tasks_jsonl(d0, d1, fname)                # <- now defined!

    file_id = client.files.create(file=open(fname, "rb"), purpose="batch").id
    batch   = client.batches.create(
                input_file_id=file_id,
                endpoint="/v1/chat/completions",
                completion_window="24h")
    launched_ids.append(batch.id)
    print(f"\n📤  [{idx}/{len(date_slices)}] Batch {batch.id} submitted — validating")

    # ----- poll every 20 s until finished -----------------------
    while True:
        b = client.batches.retrieve(batch.id)
        c = b.request_counts
        ts = dt.datetime.now().strftime("%H:%M:%S")
        print(f"\r   {ts}  {b.status:<10}  {c.completed:>5}/{c.total}  (failed {c.failed})",
              end="", flush=True)
        if b.status in ("completed", "failed", "expired"):
            print()     # newline
            if b.status != "completed":
                raise RuntimeError(f"Batch {batch.id} ended with {b.status}")
            break
        time.sleep(20)

print("\n🎉  All slices done.  Batch IDs →", launched_ids)

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  A. Launch (or re-launch) the 4th/last batch only            ║
# ╚══════════════════════════════════════════════════════════════╝
import datetime as dt, time, json, openai, pathlib

client = openai.OpenAI()
d0, d1   = "2025-01-01", "2025-04-20"
fname    = "daily_escalation_tasks_4.jsonl"

# — build / overwrite the JSONL for this slice —
build_tasks_jsonl(d0, d1, fname)

# — upload + create batch —
file_id = client.files.create(file=open(fname, "rb"), purpose="batch").id
batch4  = client.batches.create(
            input_file_id     = file_id,
            endpoint          = "/v1/chat/completions",
            completion_window = "24h")
print("🆕  Batch-4 ID:", batch4.id, "— validating")

# — poll until done; auto-retry on transient connection hiccups —
while True:
    try:
        b = client.batches.retrieve(batch4.id)
        c = b.request_counts
        ts = dt.datetime.now().strftime("%H:%M:%S")
        print(f"\r{ts}  {b.status:<10}  {c.completed}/{c.total}  (failed {c.failed})",
              end="", flush=True)
        if b.status in ("completed", "failed", "expired"):
            print()                       # newline
            if b.status != "completed":
                raise RuntimeError(f"Batch-4 ended with status {b.status}")
            break
        time.sleep(25)
    except Exception as e:                # e.g. ReadTimeout / ConnectError
        print("\n⚠️  transient error:", type(e).__name__, "-- retrying in 30 s")
        time.sleep(30)

print("✅  Batch-4 finished.")

In [None]:

# ------------------------------------------------------------------
# 4.  Download results & assemble daily index
# ------------------------------------------------------------------
content = client.files.content(b.output_file_id).content
scores  = {}
for line in content.splitlines():
    rec  = json.loads(line)
    day  = rec["custom_id"][2:]               # strip "d-"
    s    = rec["response"]["body"]["choices"][0]["message"]["content"].strip()
    try:
        scores[pd.to_datetime(day)] = int(s)
    except ValueError:
        scores[pd.to_datetime(day)] = np.nan

daily_idx = (pd.Series(scores)
               .sort_index()
               .to_frame("escalation"))

# 7‑day rolling mean
daily_idx["roll7"] = daily_idx["escalation"].rolling(7, min_periods=1).mean()

In [None]:
# ------------------------------------------------------------------
# 5.  Plot
# ------------------------------------------------------------------
plt.figure(figsize=(14,6))
plt.plot(daily_idx.index, daily_idx["escalation"],
         marker='.', lw=0.8, alpha=0.6, label="Daily index")
plt.plot(daily_idx.index, daily_idx["roll7"],
         color="tab:red", lw=2.5, label="7‑day rolling mean")

plt.ylabel("Escalation index  (0 = lower … 10 = higher)")
plt.title("Per‑day ‘Escalation‑Risk’ Signal in Ukraine‑War Headlines")
plt.grid(True, alpha=.3)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# ------------------------------------------------------------------
# Merge ALL 4 batches   →   full escalation index 2022-02-24 … 2025-04-20
# ------------------------------------------------------------------
import json, pandas as pd, numpy as np, matplotlib.pyplot as plt, datetime as dt, openai, collections

openai.api_key = os.getenv("OPENAI_API_KEY")
client = openai.OpenAI()

# <<<—— put your four finished batch IDs here ———————————————————
BATCH_IDS = [
    "batch_6807f1772ca88190b2ad0fa77855d67a",   # 2022 slice
    "batch_6807f1ca7a508190a4292c2a721ab9c2",   # 2023 slice
    "batch_6807f4e2abc881908e73abd7270a0c22",   # 2024 slice
    "batch_680a7d198fd08190b8493d3abf137548",   # 2025-Q1 slice  << new
]
# ------------------------------------------------------------------

all_scores = {}          # {datetime : int}
for bid in BATCH_IDS:
    b = client.batches.retrieve(bid)
    assert b.status == "completed", f"{bid} not completed (status={b.status})"
    print(f"⬇️  downloading {bid} …")
    data = client.files.content(b.output_file_id).content
    for line in data.splitlines():
        rec = json.loads(line)
        day = pd.to_datetime(rec["custom_id"])          # YYYY-MM-DD
        s   = rec["response"]["body"]["choices"][0]["message"]["content"].strip()
        try:
            all_scores[day] = int(s)
        except ValueError:
            all_scores[day] = np.nan

print(f"✅ merged {len(all_scores):,} daily scores")

# — build tidy DataFrame —
ser = (pd.Series(all_scores, name="escalation")
         .sort_index())
df  = ser.to_frame()
df["roll7"] = df["escalation"].rolling(7, min_periods=1).mean()

df.to_csv("ukraine_escalation_daily.csv")
print("📝 wrote ukraine_escalation_daily.csv")

# — quick sanity-check of the new distribution —
print("\nScore distribution (value_counts):")
print(df["escalation"].value_counts().sort_index())

# — plot —
plt.figure(figsize=(14,6))
plt.plot(df.index, df["escalation"], lw=.8, alpha=.6,
         marker='.', markersize=3, label="Daily index")
plt.plot(df.index, df["roll7"], color="tab:red", lw=2.5,
         label="7-day rolling mean")
plt.ylabel("Escalation risk   (0 = low … 10 = much higher)")
plt.title("LLM-derived Ukraine Escalation Index  (full war period)")
plt.grid(alpha=.3)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# ---------------------------------------------------------------
# Inspect the 2025-only slice that used the new prompt
# ---------------------------------------------------------------
import json, pandas as pd, numpy as np, matplotlib.pyplot as plt, openai, datetime as dt

openai.api_key = os.getenv("OPENAI_API_KEY")
client = openai.OpenAI()

BATCH4_ID = "batch_680a7d198fd08190b8493d3abf137548"   #  ◀︎ paste the real ID

# ── download & parse ────────────────────────────────────────────
b = client.batches.retrieve(BATCH4_ID)
assert b.status == "completed", f"batch-4 not completed (status={b.status})"

data = client.files.content(b.output_file_id).content
scores = {}
for ln in data.splitlines():
    rec  = json.loads(ln)
    day  = pd.to_datetime(rec["custom_id"])
    s    = rec["response"]["body"]["choices"][0]["message"]["content"].strip()
    try:
        scores[day] = int(s)
    except ValueError:
        scores[day] = np.nan

ser = (pd.Series(scores, name="escalation")
         .sort_index())
df  = ser.to_frame()
df["roll7"] = df["escalation"].rolling(7, min_periods=1).mean()

print("Value-counts for 2025 slice:")
print(df["escalation"].value_counts().sort_index())

# ── plot ────────────────────────────────────────────────────────
plt.figure(figsize=(12,5))
plt.plot(df.index, df["escalation"], marker='.', lw=.8, alpha=.6,
         label="Daily index")
plt.plot(df.index, df["roll7"], color="tab:red", lw=2.2,
         label="7-day rolling mean")
plt.ylabel("Escalation risk   (0 = low … 10 = much higher)")
plt.title("Ukraine-War Escalation Index — 2025-Jan-01 → Apr-20  (new prompt)")
plt.grid(alpha=.3); plt.legend(); plt.tight_layout(); plt.show()


In [None]:
import openai, os, pprint
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

for b in client.batches.list().data:          # no status filter available
    if b.status in {"in_progress", "validating"}:
        print("cancelling", b.id, b.status)
        client.batches.cancel(b.id)


In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  Build day-level JSONL(s)  •  launch batches  •  analyse     ║
# ╚══════════════════════════════════════════════════════════════╝
import os, json, math, time, pathlib, datetime as dt
import pandas as pd, numpy as np, openai

client = openai.OpenAI()

# ────────────────────────────────────────────────────────────────
# 1.  Load cached headlines  → list-of-titles per day
# ────────────────────────────────────────────────────────────────
df = (pd.read_csv("raw_headlines.csv", parse_dates=["date"])
        .drop_duplicates(subset=["date", "title"]))

daily_titles = (df.groupby("date")["title"]
                  .apply(list)               # list of headlines
                  .sort_index())             # 1 152 rows

# ────────────────────────────────────────────────────────────────
# 2.  Rich 0-10 rubric (system prompt) + 10 exemplars
# ────────────────────────────────────────────────────────────────
SYSTEM_PROMPT = """

You are a veteran conflict-desk editor and media analyst rating how much today's HEADLINES
(together, on average) suggest an escalation in Russia-Ukraine military risk. 

Rate TODAY’s overall escalation-risk signal in the Russia-Ukraine war on a 0-10 integer scale, 
using the following examples as your reference guide:

**How to score**  
0  — Status-quo calm / clear signs of de-escalation  
  *Ex.*  UN aid convoy reaches Kherson unimpeded for second day, agencies say fighting “silent”  
1  — Small de-escalatory move (prisoner swap, minor corridor reopened)  
  *Ex.*  200 Russian & Ukrainian soldiers freed in surprise prisoner-exchange brokered by UAE  
2  — Cease-fire or peace-talk gesture, but fragile or localised  
  *Ex.*  Turkey offers to host new Russia-Ukraine peace talks as Erdoğan meets Zelensky  
3  — Routine frontline shelling & rhetoric; no notable change vs. recent days  
  *Ex.*  UN warns Black-Sea grain deal at risk after Russia pauses participation for inspection row  
4  — Limited strike / drone attack away from front lines; casualties low  
  *Ex.*  Russian drones hit Kyiv power grid overnight, no deaths reported, officials say  
5  — Large, coordinated missile/drone barrage OR major battlefield push  
  *Ex.*  Kremlin launches 40-missile wave across 11 Ukrainian cities after rail-depot blast  
6  — Strategic shift (partial mobilisation, new weapons promised, major aid cut-off)  
  *Ex.*  Putin announces “partial mobilisation” of 300 000 reservists for Ukraine war effort  
7  — Widest attack since war’s start OR deliberate strike on NATO-bordering region  
  *Ex.*  Ukraine says Russia fires 90 missiles in biggest attack on Kyiv & Lviv since February ’22  
8  — Open nuclear or WW-3 rhetoric / forward deployment of tactical nukes  
  *Ex.*  Putin says Russia will station tactical nuclear weapons in Belarus this summer  
9  — Nuclear warheads physically moved / command-and-control escalations; NATO emergency meet  
  *Ex.*  Kremlin transfers nuclear warheads to Belarus; NATO calls urgent summit in Brussels  
10 — Actual nuclear use order or confirmed detonation / direct NATO-Russia clash announced  
  *Ex.*  Moscow conducts “live tactical nuclear strike drill”; White House vows “severe response”

Return **only** the integer (0-10).  No extra words, no units.

"""

# ────────────────────────────────────────────────────────────────
# 3.  Rough token sizing  → split into two chunks (< 2 M tokens each)
#    • max 80 headlines fed per day              (trimmed below)
#    • assume 13 tokens / headline after packing  (safe side)
# ────────────────────────────────────────────────────────────────
MAX_HEADLINES   = 120            # truncate per-day list
TOKENS_PER_HEAD = 15             # safe upper-bound after packing
TOKENS_SYS      = 80             # prompt, wrappers, misc
TOKENS_DAY      = TOKENS_SYS + MAX_HEADLINES * TOKENS_PER_HEAD   # ≈ 1 880
TOKENS_LIMIT    = 1_000_000      # keep a cushion under 2 M

DAYS_PER_FILE   = TOKENS_LIMIT // TOKENS_DAY      # ≈ 850

# -- slice the 1 152-row Series into blocks of ~850 days -------------
day_blocks = [
    daily_titles.iloc[i : i + DAYS_PER_FILE]
    for i in range(0, len(daily_titles), DAYS_PER_FILE)
]                                   # → 2 files of 850 & 302 days

jsonl_paths = []

for i, block in enumerate(day_blocks, 1):
    fname = f"ua_daily_tasks_{i}.jsonl"
    with open(fname, "w") as fh:
        for day, titles in block.items():
            user_block = "### Headlines TODAY\n" + \
                         "\n".join(f"- {t}" for t in titles[:MAX_HEADLINES])

            task = {
                "custom_id": str(day.date()),
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": "gpt-4o-mini",
                    "temperature": 0,
                    "max_tokens": 3,
                    "messages": [
                        {"role": "system", "content": SYSTEM_PROMPT},
                        {"role": "user",   "content": user_block}
                    ]
                }
            }
            fh.write(json.dumps(task) + "\n")
    jsonl_paths.append(fname)
    print(f"✅ wrote {len(block):,} day-tasks → {fname}")

# ────────────────────────────────────────────────────────────────
# 4.  Launch one batch per JSONL, poll until each finishes
# ────────────────────────────────────────────────────────────────
batch_ids = []

for p in jsonl_paths:
    fid   = client.files.create(file=open(p, "rb"), purpose="batch").id
    batch = client.batches.create(
        input_file_id=fid,
        endpoint="/v1/chat/completions",
        completion_window="24h",
    )
    batch_ids.append(batch.id)
    print(f"\n📤 Batch {batch.id} → validating")

    # simple poll loop
    while True:
        b  = client.batches.retrieve(batch.id)
        rc = b.request_counts
        done  = rc.completed + rc.failed
        total = rc.total
        ts = dt.datetime.now().strftime("%H:%M:%S")
        print(f"\r {ts} {b.status:<10} {done}/{total}", end="", flush=True)
        if b.status in {"completed", "failed", "expired"}:
            print()
            if b.status != "completed":
                raise RuntimeError("batch ended:", b.status)
            break
        time.sleep(10)

print("\n🎉  All daily batches finished →", batch_ids)

# ────────────────────────────────────────────────────────────────
# 5.  Download scores  →  daily DataFrame
# ────────────────────────────────────────────────────────────────
scores = {}
for bid in batch_ids:
    out = client.batches.retrieve(bid).output_file_id
    for ln in client.files.content(out).content.splitlines():
        rec = json.loads(ln)
        day = pd.to_datetime(rec["custom_id"])
        try:
            scores[day] = int(rec["response"]["body"]["choices"][0]
                                ["message"]["content"].strip())
        except ValueError:
            scores[day] = np.nan

daily = (pd.Series(scores, name="score")
           .sort_index()
           .to_frame())

daily["roll7"] = daily["score"].rolling(7, min_periods=1).mean()
daily.to_csv("ukraine_escalation_daily_v2.csv")
print("📝 wrote ukraine_escalation_daily_v2.csv")

# quick distribution check
print("\nValue counts:")
print(daily["score"].value_counts().sort_index())

# ────────────────────────────────────────────────────────────────
# 6.  Plot
# ────────────────────────────────────────────────────────────────
plt.figure(figsize=(14,6))
plt.plot(daily.index, daily["score"], lw=.8, alpha=.6,
         marker='.', markersize=3, label="Daily score")
plt.plot(daily["roll7"], color="tab:red", lw=2.5, label="7-day rolling mean")
plt.ylabel("Escalation risk  (0 = low … 10 = higher)")
plt.title("LLM-derived Ukraine Escalation Index  (Feb 2022 → Apr 2025)")
plt.grid(alpha=.3); plt.legend(); plt.tight_layout(); plt.show()


In [None]:
# ---------------------------------------------------------------
# helper: create a batch & poll; auto-retry on token-limit error
# ---------------------------------------------------------------
def launch_batch(file_id):
    backoff = 90
    while True:
        # ---- create ---------------------------------------------------------
        try:
            batch = client.batches.create(
                input_file_id=file_id,
                endpoint="/v1/chat/completions",
                completion_window="24h",
            )
        except openai.BadRequestError as e:
            if "enqueued token limit reached" in str(e):
                print(f"🔄 token envelope busy — sleeping {backoff}s …")
                time.sleep(backoff)
                continue
            raise

        # ---- poll -----------------------------------------------------------
        print(f"\n📤 Batch {batch.id} → validating")
        while True:
            b  = client.batches.retrieve(batch.id)
            rc = b.request_counts
            ts = dt.datetime.now().strftime("%H:%M:%S")
            print(f"\r {ts} {b.status:<10} {rc.completed}/{rc.total or '?'}",
                  end="", flush=True)
            if b.status == "completed":
                print()
                return b                        # success
            if b.status == "failed":
                # ───── automatic token-limit retry ───────────────
                if "enqueued token limit" in (b.message or "").lower():
                    print(f"\n⚠️  token envelope still hot — sleeping {backoff}s "
                          "and retrying same file …")
                    time.sleep(backoff)
                    break                       # exit poll loop ⇒ outer while retries
                raise RuntimeError(f"batch {b.id} ended: failed")
            time.sleep(10)


In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  A.  Load + plot *completed* batch_1                         ║
# ║  B.  Queue leftover days in safer ≤350-day batches           ║
# ╚══════════════════════════════════════════════════════════════╝
import os, json, time, datetime as dt, pathlib
import pandas as pd, numpy as np, matplotlib.pyplot as plt
import openai
client = openai.OpenAI()

# ────────────────────────────────────────────────────────────────
# A.  Download results from the *finished* batch and plot
# ────────────────────────────────────────────────────────────────
BATCH1_ID = "batch_680a9515e434819086bd06e8b10354ac"            # ⬅︎ your success ID

out_id  = client.batches.retrieve(BATCH1_ID).output_file_id
lines   = client.files.content(out_id).content.splitlines()

daily1 = {}
for ln in lines:
    rec = json.loads(ln)
    day = pd.to_datetime(rec["custom_id"])
    val = rec["response"]["body"]["choices"][0]["message"]["content"].strip()
    try:
        daily1[day] = int(val)
    except ValueError:
        daily1[day] = np.nan

ser1 = (pd.Series(daily1).sort_index()
         .rename("score"))
ser1_7 = ser1.rolling(7, min_periods=1).mean()

plt.figure(figsize=(12,5))
plt.plot(ser1.index, ser1, marker='.', lw=.8, alpha=.6,
         label="Daily score")
plt.plot(ser1.index, ser1_7, color="tab:red", lw=2.2,
         label="7-day rolling mean")
plt.title("Ukraine Escalation Index — first 531 days (batch 1)")
plt.ylabel("Escalation risk  (0 … 10)")
plt.grid(alpha=.3); plt.legend(); plt.tight_layout(); plt.show()

print("✅ plotted", len(ser1), "days")

# ────────────────────────────────────────────────────────────────
# B.  Re-batch the *remaining* days in ≤350-day chunks
#     (≈ 350 × 1 600 ≈ 560 k tokens per batch — comfortably < 2 M)
# ────────────────────────────────────────────────────────────────
all_titles = (pd.read_csv("raw_headlines.csv", parse_dates=["date"])
                .drop_duplicates(subset=["date", "title"])
                .groupby("date")["title"]
                .apply(list)
                .sort_index())                 # 1 152 total days

done_days   = set(ser1.index.date)             # from batch 1

# --- build mask with NumPy ------------------------------------
mask = ~np.isin(all_titles.index.date, list(done_days))
todo_series = all_titles[mask]
print("⏩  remaining days to score:", len(todo_series))

# ────────────────────────────────────────────────────────────────
# B.  Re-batch leftover days in ≤200-day slices  (~375 k tokens) 
# ────────────────────────────────────────────────────────────────
CHUNK_DAYS   = 200         # ↓ 350 → 200
COOLDOWN_SEC = 75          # wait after each batch finishes


# seed done_days with *all* batches that have already finished
ALREADY_COMPLETE = [
    "batch_680a9515e434819086bd06e8b10354ac",   # 531-day master
    "batch_680a97f72b5c819098c99c763d6da397",   # first 350-day slice
    "batch_680a9fb4fb088190b2525a8b97b027ac",   # recent 200-day slice ✅
]

done_days = set(ser1.index.date)                # first 531 days

for bid in ALREADY_COMPLETE:
    out_id = client.batches.retrieve(bid).output_file_id
    for ln in client.files.content(out_id).content.splitlines():
        d = pd.to_datetime(json.loads(ln)["custom_id"]).date()
        done_days.add(d)

def remaining_days(scored_dates):
    # recompute every time so reruns work incrementally
    mask = ~np.isin(all_titles.index.date, list(scored_dates))
    return all_titles[mask]

todo_series = remaining_days(done_days)
print("⏩  remaining days to score:", len(todo_series))

left_batch_ids = []

while len(todo_series):
    block        = todo_series.iloc[:CHUNK_DAYS]
    block_no     = len(left_batch_ids) + 1
    fname        = f"ua_daily_tasks_left_{block_no}.jsonl"
    
    # ---------- write JSONL ---------------------------------------------------
    with open(fname, "w") as fh:
        for day, titles in block.items():
            task = {
                "custom_id": str(day.date()),
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": "gpt-4o-mini",
                    "temperature": 0,
                    "max_tokens": 3,
                    "messages": [
                        {"role": "system", "content": SYSTEM_PROMPT},
                        {"role": "user",   "content": 
                           "### Headlines TODAY\n" +
                           "\n".join(f"- {t}" for t in titles[:MAX_HEADLINES])
                        }
                    ]
                }
            }
            fh.write(json.dumps(task) + "\n")
    print(f"✅ wrote {len(block):,} day-tasks → {fname}")

    # ---------- launch & poll -------------------------------------------------
    # upload file once
    fid = client.files.create(file=open(fname, "rb"), purpose="batch").id

    # launch with safety wrapper
    completed_batch = launch_batch(fid)
    left_batch_ids.append(completed_batch.id)

    # mark these days as done & recalc remainder
    done_days.update(block.index.date)
    todo_series = remaining_days(done_days)

print("\n🎉  All remaining batches finished →", left_batch_ids)

# ╔══════════════════════════════════════════════════════════════╗
# ║  C.  Download + merge *all* scores, save & re-plot           ║
# ╚══════════════════════════════════════════════════════════════╝
scores_rest = {}
for bid in left_batch_ids:
    out = client.batches.retrieve(bid).output_file_id
    for ln in client.files.content(out).content.splitlines():
        rec = json.loads(ln)
        day = pd.to_datetime(rec["custom_id"])
        try:
            scores_rest[day] = int(
                rec["response"]["body"]["choices"][0]["message"]["content"].strip()
            )
        except ValueError:
            scores_rest[day] = np.nan

ser_rest = (pd.Series(scores_rest).sort_index()
              .rename("score"))

# combine with the first-batch series `ser1` from section A
full = (pd.concat([ser1, ser_rest])
          .sort_index()
          .rename("score"))

full_7 = full.rolling(7, min_periods=1).mean()
full.to_csv("ukraine_escalation_daily_v2.csv")
print(f"📝 wrote ukraine_escalation_daily_v2.csv  ({len(full)} days)")

# fresh plot
plt.figure(figsize=(14,6))
plt.plot(full.index, full, marker='.', lw=.8, alpha=.6, label="Daily score")
plt.plot(full.index, full_7, color="tab:red", lw=2.5, label="7-day rolling mean")
plt.title("LLM-derived Ukraine Escalation Index  (Feb 2022 → Apr 2025)")
plt.ylabel("Escalation risk  (0 = low … 10 = high)")
plt.grid(alpha=.3); plt.legend(); plt.tight_layout(); plt.show()


In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  D.  Finish the last 71 days with plain chat completions     ║
# ╚══════════════════════════════════════════════════════════════╝
import time, json, pandas as pd, numpy as np, datetime as dt

# -- rebuild scored series from *all* completed batches -----------
COMPLETED_BATCHES = [
    "batch_680a9515e434819086bd06e8b10354ac",   # 531-day
    "batch_680a97f72b5c819098c99c763d6da397",   # 350-day
    "batch_680a9fb4fb088190b2525a8b97b027ac",   # 200-day
]

scored = {}           # day → score
for bid in COMPLETED_BATCHES:
    out_id = client.batches.retrieve(bid).output_file_id
    for ln in client.files.content(out_id).content.splitlines():
        rec  = json.loads(ln)
        day  = pd.to_datetime(rec["custom_id"])
        val  = rec["response"]["body"]["choices"][0]["message"]["content"].strip()
        try:
            scored[day] = int(val)
        except ValueError:
            scored[day] = np.nan

# -- identify remaining days (should be 71) -----------------------
done_days   = set(d.date() for d in scored.keys())
mask        = ~np.isin(all_titles.index.date, list(done_days))
todo_series = all_titles[mask]
print("⚙️  days left:", len(todo_series))

# -- call chat completions one-by-one ------------------------------
scores_tail = {}
for i, (day, titles) in enumerate(todo_series.items(), 1):
    user_block = "### Headlines TODAY\n" + \
                 "\n".join(f"- {t}" for t in titles[:MAX_HEADLINES])

    rsp = client.chat.completions.create(
        model="gpt-4o-mini",
        temperature=0,
        max_tokens=3,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user",   "content": user_block},
        ],
    )
    score = int(rsp.choices[0].message.content.strip())
    scores_tail[pd.to_datetime(day)] = score
    print(f"{i:02}/{len(todo_series)} {day.date()} → {score}")
    time.sleep(0.6)            # ≤100 req/min safety cushion

print("✅ direct calls finished")

# -- merge everything & save --------------------------------------
ser_done  = pd.Series(scored,      name="score")
ser_tail  = pd.Series(scores_tail, name="score")
full      = pd.concat([ser_done, ser_tail]).sort_index()

full_7 = full.rolling(7, min_periods=1).mean()
full.to_csv("ukraine_escalation_daily_v2.csv")
print("📝 wrote ukraine_escalation_daily_v2.csv  (", len(full), "days)")

# -- plot ---------------------------------------------------------
import matplotlib.pyplot as plt
plt.figure(figsize=(14,6))
plt.plot(full.index, full, marker='.', lw=.8, alpha=.6, label="Daily score")
plt.plot(full.index, full_7, color="tab:red", lw=2.5, label="7-day rolling mean")
plt.title("LLM-derived Ukraine Escalation Index  (Feb 2022 → Apr 2025)")
plt.ylabel("Escalation risk  (0 = low … 10 = high)")
plt.grid(alpha=.3); plt.legend(); plt.tight_layout(); plt.show()

In [None]:
# -- merge everything & save --------------------------------------
ser_done  = pd.Series(scored,      name="score")
ser_tail  = pd.Series(scores_tail, name="score")
full      = pd.concat([ser_done, ser_tail]).sort_index()

full_7 = full.rolling(7, min_periods=1).mean()
full.to_csv("ukraine_escalation_daily_v2.csv")
print("📝 wrote ukraine_escalation_daily_v2.csv  (", len(full), "days)")

# -- plot ---------------------------------------------------------
import time, json, pandas as pd, numpy as np, datetime as dt
import matplotlib.pyplot as plt
plt.figure(figsize=(14,6))
plt.plot(full.index, full, marker='.', lw=.8, alpha=.6, label="Daily score")
plt.plot(full.index, full_7, color="tab:red", lw=2.5, label="7-day rolling mean")
plt.title("LLM-derived Ukraine Escalation Index  (Feb 2022 → Apr 2025)")
plt.ylabel("Escalation risk  (0 = low … 10 = high)")
plt.grid(alpha=.3); plt.legend(); plt.tight_layout(); plt.show()
plt.savefig("fig_escalation7d.png", dpi=300)

## Richer views of the 0-10 series

### What the next cell shows
1. Histogram & annual box-plots – are some years genuinely “hotter”?

2. 30-day z-score (standardised anomaly) – highlights departures from the local mean; spikes often line up with mobilisation / large aid packages.

3. Signal‐change heat-map – each cell = day-to-day Δscore; reds = jumps, blues = dips.

4. Rolling volatility – 14-day stdev exposes periods of uncertainty even when the mean is flat.

*Why it matters*

- A stable headline score but rising volatility flags fog-of-war periods (e.g., Kharkiv counter-offensive rumours).

- Z-scores strip out long-run drift: you can overlay them on external series (oil prices, aid deliveries) without scale headaches.


In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  Visual deep-dive into Escalation Index                      ║
# ╚══════════════════════════════════════════════════════════════╝
import matplotlib.pyplot as plt
import numpy as np

# 1. histogram + yearly boxes -----------------------------------
fig, ax = plt.subplots(1, 2, figsize=(14,4))

# -- histogram --
full.plot.hist(bins=11, rwidth=.8, ax=ax[0])
ax[0].set_title("Score histogram"); ax[0].set_xlabel("Score"); ax[0].set_ylabel("Days")

# -- yearly box-plots --
full_df = full.to_frame(name="score")
full_df["year"] = full_df.index.year          # helper col
full_df.boxplot(column="score", by="year", ax=ax[1], showfliers=False)

ax[1].set_title("Year-by-year distribution")
ax[1].set_xlabel("Year"); ax[1].set_ylabel("Score")
plt.suptitle("")                              # remove automatic suptitle
plt.tight_layout(); plt.show()

# 2. rolling z-score --------------------------------------------
z = (full - full.rolling(30, min_periods=15).mean()) / \
    full.rolling(30, min_periods=15).std()

plt.figure(figsize=(14,3))
plt.plot(z.index, z, lw=1)
plt.axhline(0, c='k', lw=.7); plt.axhline(2, c='r', ls="--")
plt.title("30-day z-score (de-seasoned anomalies)"); plt.ylabel("σ")
plt.tight_layout(); plt.show()

# 3. heat-map of daily changes ----------------------------------
delta = full.diff().fillna(0).clip(-3, 3)     # limit extreme arrows

# ---- pad to multiple of 52 weeks ----
n_days   = len(delta)
n_cols   = 52                         # one column per week
n_rows   = int(np.ceil(n_days / n_cols))
pad_len  = n_rows * n_cols - n_days

padded   = np.concatenate([delta.values, np.full(pad_len, np.nan)])
mat      = padded.reshape(n_rows, n_cols)

plt.figure(figsize=(14, 2.8))
plt.imshow(mat, cmap="bwr", aspect="auto", vmin=-3, vmax=3)
plt.colorbar(label="Δ score (day-to-day)")
plt.yticks(range(n_rows),
           [(full.index[0] + pd.Timedelta(weeks=i)).strftime("%Y-W%V")
            for i in range(n_rows)])
plt.title("Day-to-day jump heat-map (reds ↑, blues ↓)")
plt.tight_layout(); plt.show()

# 4. rolling 14-day volatility ----------------------------------
vol = full.rolling(14, min_periods=7).std()

plt.figure(figsize=(14,3))
plt.plot(vol.index, vol, color="orange")
plt.title("14-day volatility of escalation score"); plt.ylabel("σ")
plt.tight_layout(); plt.show()

## Results Comparison with GPT 3.5 Turbo

In [None]:
import time
import openai, pandas as pd, numpy as np, matplotlib.pyplot as plt

# Configuration
client = OpenAI()
MODEL       = "gpt-3.5-turbo-0125"
TIMEOUT_SEC = 40
MAX_RETRIES = 5
BACKOFF_SEC = 10
SPACING_SEC = 0.7  # keep < 90 req/min

print("🚀 Starting script", flush=True)

try:
    print("⚡️ Warm-up ping…", flush=True)
    # Warm-up ping
    client.chat.completions.create(
        model=MODEL,
        temperature=0,
        max_tokens=1,
        messages=[{"role":"user","content":"ping"}],
        timeout=TIMEOUT_SEC
    )
    print("✅ Ping OK", flush=True)
except Exception as e:
    print(f"❌ Ping failed: {e}", flush=True)
    raise

print(f"🔢 Loaded all_titles: {len(all_titles)} days", flush=True)

scores35 = {}
latencies = []

total_days = len(all_titles)
day_index = 0

for day, titles in all_titles.items():
    day_index += 1
    print(f"\n[{day_index}/{total_days}] Processing {day.date()}")

    user_block = "### Headlines TODAY\n" + "\n".join(
        f"- {t}" for t in titles[:MAX_HEADLINES]
    )

    for attempt in range(1, MAX_RETRIES + 1):
        try:
            t0 = time.time()
            rsp = client.chat.completions.create(
                model=MODEL,
                temperature=0,
                max_tokens=3,
                timeout=TIMEOUT_SEC,
                messages=[
                    {"role": "system", "content": SYSTEM_PROMPT},
                    {"role": "user",   "content": user_block},
                ],
            )
            latency = time.time() - t0
            latencies.append(latency)
            score = int(rsp.choices[0].message.content.strip())
            scores35[pd.to_datetime(day)] = score
            print(f" → Success (latency {latency:.2f}s): score = {score}")
            break
        except Exception as e:
            if attempt == 1:
                print(f" ⚠️  {day.date()} attempt {attempt} error: {type(e).__name__}: {e}")
            if attempt == MAX_RETRIES:
                print(f" ❌  {day.date()} failed after {MAX_RETRIES} attempts; storing NaN")
                scores35[pd.to_datetime(day)] = np.nan
            else:
                backoff = BACKOFF_SEC * attempt
                print(f"    retrying in {backoff}s...")
                time.sleep(backoff)

    time.sleep(SPACING_SEC)

print(f"\n✅ Run done – mean latency {np.nanmean(latencies):.2f}s  |  failures {sum(pd.isna(list(scores35.values())))}")

# ---- compare to 4-o-mini --------------------------------------
turbo = pd.Series(scores35).sort_index()
df_cmp = pd.DataFrame({"mini": full, "turbo": turbo}).dropna()
rho   = df_cmp.mini.corr(df_cmp.turbo).round(3)
med_d = (df_cmp.mini - df_cmp.turbo).abs().median()

print(f"\nPearson ρ = {rho}   |   median |Δ| = {med_d}")

ax = df_cmp.plot.scatter("mini", "turbo", alpha=0.35, figsize=(4, 4))
ax.plot([0, 10], [0, 10], "r--", lw=1)
ax.set_xlabel("GPT-4o-mini score")
ax.set_ylabel("3.5-turbo score")
ax.set_title("Daily escalation scores: 4-o-mini vs 3.5-turbo")
plt.tight_layout()
plt.show()


## Results Comparison with Claude-3-Haiku

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  Cheap benchmark — Claude-3-Haiku vs. GPT-4o-mini            ║
# ╚══════════════════════════════════════════════════════════════╝
from tqdm.auto import tqdm
import anthropic, time, pandas as pd, numpy as np, matplotlib.pyplot as plt, datetime as dt

# ---------- configuration ----------------------------------------------------
MODEL_ID      = "claude-3-haiku-20240307"    # explicit snapshot id
TIMEOUT_SEC   = 40
MAX_RETRIES   = 5
BACKOFF_SEC   = 10
SPACING_SEC   = 0.6                          # 100 req/min soft limit for Haiku
MAX_TOKENS_OUT= 3                            # we only need the integer score

client = anthropic.Anthropic()               # picks up ANTHROPIC_API_KEY from env

# ---------- warm-up ping ------------------------------------------------------
print("⚡️ Haiku warm-up ping…", flush=True)
_ = client.messages.create(
        model      = MODEL_ID,
        max_tokens = 1,
        system     = "ping-test",
        messages   = [{"role":"user","content":"ping"}],
        timeout    = TIMEOUT_SEC,
)
print("✅ ping OK\n", flush=True)

# ---------- 5-day smoke-test --------------------------------------------------
print("★ 5-day smoke-test (Claude 3 Haiku)")
for d, titles in list(all_titles.items())[:5]:
    block = "### Headlines TODAY\n" + "\n".join(f"- {t}" for t in titles[:MAX_HEADLINES])
    rsp   = client.messages.create(
              model      = MODEL_ID,
              max_tokens = MAX_TOKENS_OUT,
              system     = SYSTEM_PROMPT,
              messages   = [{"role":"user","content": block}],
              timeout    = TIMEOUT_SEC,
            )
    score = rsp.content[0].text.strip()
    print(f"{d.date()} → {score}")
print("★ smoke-test OK — starting full loop\n")

# ---------- full run ----------------------------------------------------------
scores_haiku, latencies = {}, []
pbar = tqdm(total=len(all_titles), desc="Haiku scoring", ncols=100)

for day, titles in all_titles.items():
    user_block = "### Headlines TODAY\n" + "\n".join(f"- {t}" for t in titles[:MAX_HEADLINES])

    for attempt in range(1, MAX_RETRIES+1):
        try:
            t0  = time.time()
            rsp = client.messages.create(
                    model      = MODEL_ID,
                    max_tokens = MAX_TOKENS_OUT,
                    system     = SYSTEM_PROMPT,
                    messages   = [{"role":"user","content": user_block}],
                    timeout    = TIMEOUT_SEC,
                  )
            latencies.append(time.time() - t0)
            scores_haiku[pd.to_datetime(day)] = int(rsp.content[0].text.strip())
            break                                    # success
        except Exception as e:
            if attempt == 1:
                pbar.write(f"⚠️ {day.date()} err: {type(e).__name__}: {e}")
            if attempt == MAX_RETRIES:
                pbar.write(f"❌ {day.date()} → NaN")
                scores_haiku[pd.to_datetime(day)] = np.nan
            else:
                time.sleep(BACKOFF_SEC * attempt)

    pbar.update(1)
    time.sleep(SPACING_SEC)

pbar.close()
print(f"✅ run done – mean latency {np.nanmean(latencies):.2f}s | failures {pd.isna(list(scores_haiku.values())).sum()}")

# ---------- simple cost estimate ---------------------------------------------
TOK_IN  = len(all_titles) * 1900      # rough 1 900 input tok / day
TOK_OUT = len(all_titles) * MAX_TOKENS_OUT
cost_in  = TOK_IN  / 1_000_000 * 0.25   # $0.25/M input tok  – see Anthropic pricing&#8203;:contentReference[oaicite:0]{index=0}&#8203;:contentReference[oaicite:1]{index=1}
cost_out = TOK_OUT / 1_000_000 * 1.25   # $1.25/M output tok – Haiku output
print(f"💲 approx cost ≈ ${cost_in+cost_out:0.2f}")

# ---------- compare to GPT-4o-mini -------------------------------------------
haiku = pd.Series(scores_haiku).sort_index()
cmp   = pd.DataFrame({"mini": full, "haiku": haiku}).dropna()

rho_s  = cmp.mini.corr(cmp.haiku, method="spearman").round(3)
rho_p  = cmp.mini.corr(cmp.haiku, method="pearson").round(3)
print(f"\nPearson ρ = {rho_p}   |   Spearman ρ = {rho_s}")

ax = cmp.plot.scatter("mini","haiku",alpha=.35,figsize=(4,4))
ax.plot([0,10],[0,10],'r--',lw=1)
ax.set_xlabel("GPT-4o-mini score"); ax.set_ylabel("Claude 3 Haiku score")
ax.set_title("Daily escalation scores: 4-o-mini vs Claude Haiku")
plt.tight_layout(); plt.show()

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  Compare trend & volatility – 4-o-mini vs 3.5-turbo          ║
# ╚══════════════════════════════════════════════════════════════╝
import matplotlib.pyplot as plt
import pandas as pd

# --- align the two series --------------------------------------
mini  = full.copy()              # 4-o-mini series (already complete)
turbo = turbo.reindex(mini.index)   # ensure same index length

# --- rolling stats ---------------------------------------------
mini_7   = mini.rolling(7,  min_periods=1).mean()
turbo_7  = turbo.rolling(7, min_periods=1).mean()

mini_vol = mini.rolling(14, min_periods=7).std()
turbo_vol= turbo.rolling(14, min_periods=7).std()

# --- plot 7-day rolling mean -----------------------------------
fig, ax = plt.subplots(2, 1, figsize=(14,6), sharex=True,
                       gridspec_kw={"height_ratios":[2,1]})

ax[0].plot(mini_7.index,  mini_7,  label="4-o-mini (7-day mean)",  lw=2.0, c="tab:red")
ax[0].plot(turbo_7.index, turbo_7, label="3.5-turbo (7-day mean)", lw=1.8, c="tab:blue")
ax[0].set_title("7-day rolling average of escalation score")
ax[0].set_ylabel("Mean score"); ax[0].grid(alpha=.3); ax[0].legend()

# --- plot 14-day volatility ------------------------------------
ax[1].plot(mini_vol.index,  mini_vol,  label="4-o-mini (14-day σ)",  c="tab:red")
ax[1].plot(turbo_vol.index, turbo_vol, label="3.5-turbo (14-day σ)", c="tab:blue")
ax[1].set_title("14-day volatility of escalation score")
ax[1].set_ylabel("σ"); ax[1].grid(alpha=.3); ax[1].legend()

plt.tight_layout(); plt.show()

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  Trend & regime visuals – EWMA • rolling pct • CUSUM         ║
# ║            (4-o-mini vs 3.5-turbo)                           ║
# ╚══════════════════════════════════════════════════════════════╝
import matplotlib.pyplot as plt, numpy as np, pandas as pd, datetime as dt
from matplotlib.colors import ListedColormap

# ----------------------------------------------------------------
# 0. ensure identical index
mini  = mini.reindex(turbo.index)
turbo = turbo.reindex(mini.index)

# ----------------------------------------------------------------
# 1. EXPONENTIALLY-WEIGHTED MOVING AVERAGE  (half-life = 7 days)
hl  = 7                               # half-life
ewma_mini  = mini.ewm(halflife=hl).mean()
ewma_turbo = turbo.ewm(halflife=hl).mean()

fig, ax = plt.subplots(figsize=(14,3))
ax.plot(ewma_mini,  c="tab:red",  lw=2, label="4-o-mini EWMA (7-d hl)")
ax.plot(ewma_turbo, c="tab:blue", lw=1.5, label="3.5-turbo EWMA")
ax.set_title(f"Exponentially-weighted mean (half-life = {hl} days)")
ax.set_ylabel("Score"); ax.grid(alpha=.3); ax.legend(); plt.tight_layout(); plt.show()

# ----------------------------------------------------------------
# 2. 90-DAY ROLLING PERCENTILE RANK
win = 90
pct_mini  = mini.rolling(win).apply(lambda s: s.rank(pct=True).iloc[-1])
pct_turbo = turbo.rolling(win).apply(lambda s: s.rank(pct=True).iloc[-1])

fig,ax = plt.subplots(figsize=(14,3))
ax.plot(pct_mini,  c="tab:red",  label="4-o-mini")
ax.plot(pct_turbo, c="tab:blue", label="3.5-turbo")
ax.set_ylim(0,1); ax.set_ylabel("Percentile"); ax.set_title(f"{win}-day rolling percentile rank")
ax.grid(alpha=.3); ax.legend(); plt.tight_layout(); plt.show()

# ----------------------------------------------------------------
# 3. CUMULATIVE-SUM (CUSUM) OF DEVIATIONS FROM LONG-RUN MEAN
mu_mini  = mini.mean()
mu_turbo = turbo.mean()

cusum_mini  = (mini  - mu_mini ).cumsum()
cusum_turbo = (turbo - mu_turbo).cumsum()

fig,ax = plt.subplots(figsize=(14,3))
ax.plot(cusum_mini,  c="tab:red",  lw=2, label=f"4-o-mini (μ={mu_mini:.2f})")
ax.plot(cusum_turbo, c="tab:blue", lw=1.5, label=f"3.5-turbo (μ={mu_turbo:.2f})")
ax.axhline(0,c='k',lw=.7)
ax.set_title("CUSUM of deviations (regime-shift visual)")
ax.set_ylabel("Cumulative deviation"); ax.grid(alpha=.3); ax.legend()
plt.tight_layout(); plt.show()

# ----------------------------------------------------------------
# 4. Change-point detection (ruptures) ----------------------------
try:
    import ruptures as rpt
    model = (mini - mini.mean()).values  # centre series
    algo  = rpt.KernelCPD(kernel="rbf").fit(model)
    cpts  = algo.predict(pen=5)[:-1]      # last point is len(series)
    fig, ax = plt.subplots(figsize=(14,2.5))
    ax.plot(mini.index, mini, lw=.8)
    for cp in cpts:
        ax.axvline(mini.index[cp], c="tab:red", lw=1)
    ax.set_title("Change-point detection (vertical bars) – 4-o-mini")
    plt.tight_layout(); plt.show()
except ImportError:
    print("⚠️  ruptures not installed – skip change-point plot")

# ----------------------------------------------------------------
# 5. STL decomposition (trend + remainder) ------------------------
try:
    from statsmodels.tsa.seasonal import STL
    stl = STL(mini, period=7, robust=True).fit()
    fig = stl.plot()
    fig.set_size_inches(14,4)
    fig.axes[0].set_title("STL decomposition (trend & remainder) – 4-o-mini")
    plt.tight_layout(); plt.show()
except ImportError:
    print("⚠️  statsmodels not installed – skip STL plot")

# ----------------------------------------------------------------
# 6. Calendar heat-map (4-o-mini raw scores) ---------------------
years     = sorted(mini.index.year.unique())
day_cols  = 366                              # always pad to leap-year length
rows      = []

for y in years:
    # full date range for that calendar year
    yr_range = pd.date_range(f"{y}-01-01", f"{y}-12-31", freq="D")
    # align series to that range
    yr_vals  = mini.reindex(yr_range).values
    # pad / truncate to 366 so every row is equal length
    if len(yr_vals) < day_cols:
        yr_vals = np.append(yr_vals, [np.nan]*(day_cols - len(yr_vals)))
    rows.append(yr_vals[:day_cols])

mat = np.vstack(rows)                         # now all rows = 366 cols

fig, ax = plt.subplots(figsize=(14, len(years)*0.5))
cmap = ListedColormap(plt.cm.viridis(np.linspace(0,1,11)))
im   = ax.imshow(mat, aspect="auto", cmap=cmap, vmin=0, vmax=10)

ax.set_yticks(range(len(years))); ax.set_yticklabels(years)
ax.set_xticks([0, 90, 181, 273, 355])
ax.set_xticklabels(["Jan","Apr","Jul","Oct","Dec"])
ax.set_title("Calendar heat-map – 4-o-mini escalation score")
plt.colorbar(im, ax=ax, label="Score"); plt.tight_layout(); plt.show()


# ----------------------------------------------------------------
# 7. 30-day rolling correlation between models -------------------
roll_corr = mini.rolling(30).corr(turbo)

plt.figure(figsize=(14,2.5))
plt.plot(roll_corr.index, roll_corr, lw=1.2, c="purple")
plt.axhline(0, c='k', lw=.6); plt.ylim(-1,1)
plt.title("30-day rolling correlation (4-o-mini vs. 3.5-turbo)")
plt.ylabel("ρ"); plt.grid(alpha=.3); plt.tight_layout(); plt.show()

# ----------------------------------------------------------------
# 8. Event “lollipop” overlay – ≥ ±3 point jumps -----------------
fig, ax = plt.subplots(figsize=(14,2.5))
ax.plot(mini.index, mini, lw=.8)
ax.stem(events.index, events.values,
        basefmt=" ",               # hide baseline
        markerfmt="C3o",           # red circle markers
        linefmt="C3-")             # red stems
ax.set_title(f"Lollipop events (|Δ| ≥ {thr}) – 4-o-mini")
ax.set_ylabel("Score"); ax.grid(alpha=.3)
plt.tight_layout(); plt.show()




### Save Model Comparison Results for Future Use

In [None]:
import sys, pyarrow, pprint, importlib; print(sys.executable); print(pyarrow.__version__)

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  Persist comparison results (4-o-mini vs 3.5-turbo)          ║
# ╚══════════════════════════════════════════════════════════════╝
from pathlib import Path
import pandas as pd, matplotlib.pyplot as plt, datetime as dt

today_tag = dt.date.today().isoformat()
out_dir   = Path("model_benchmarks")
out_dir.mkdir(exist_ok=True)

# ---------- tidy DataFrame -------------------------------------
df_save = (pd.DataFrame({
            "date"      : df_cmp.index,
            "score_4o"  : df_cmp["mini"],
            "score_35"  : df_cmp["turbo"],
            "abs_diff"  : (df_cmp["mini"] - df_cmp["turbo"]).abs()
          })
          .reset_index(drop=True))

csv_path     = out_dir / f"ukraine_escalation_compare_4o_vs_turbo_{today_tag}.csv"
feather_path = out_dir / f"ukraine_escalation_compare_4o_vs_turbo_{today_tag}.feather"

df_save.to_csv(csv_path, index=False)
print(f"📁 Saved CSV → {csv_path}")

# feather (optional)
try:
    df_save.to_feather(feather_path)    # requires pyarrow
    print(f"📁 Saved feather → {feather_path}")
except ImportError:
    print("⚠️  pyarrow not installed – feather file skipped.")

# ---------- scatter plot ---------------------------------------
fig, ax = plt.subplots(figsize=(4,4))
ax.scatter(df_cmp["mini"], df_cmp["turbo"], alpha=.35)
ax.plot([0,10],[0,10],'r--',lw=1)
ax.set_xlabel("GPT-4o-mini score"); ax.set_ylabel("3.5-turbo score")
ax.set_title("Daily escalation scores: 4-o-mini vs 3.5-turbo")
plt.tight_layout()

png_path = out_dir / f"scatter_4o_vs_turbo_{today_tag}.png"
fig.savefig(png_path, dpi=300)
plt.show()
print(f"🖼️  Scatter plot saved → {png_path}")

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  Extra agreement metrics: 4-o-mini vs 3.5-turbo              ║
# ╚══════════════════════════════════════════════════════════════╝
import numpy as np, pandas as pd, scipy.stats as st

# aligned series ------------------------------------------------
s4  = df_cmp["mini"]
s35 = df_cmp["turbo"]

# 1. Spearman & Kendall -----------------------------------------
spearman = st.spearmanr(s4,  s35).correlation
kendall  = st.kendalltau(s4, s35).correlation

# 2. Directional Accuracy ---------------------------------------
d4  = np.sign(s4.diff().fillna(0))
d35 = np.sign(s35.diff().fillna(0))
DA  = (d4 == d35).mean()            # fraction of matching signs

# 3. Correlation of first differences ---------------------------
delta_r = st.pearsonr(s4.diff().iloc[1:], s35.diff().iloc[1:])[0]

# 4. Up / Down capture ratios -----------------------------------
ups   = d4 > 0
downs = d4 < 0
up_cap   = (s35.diff()[ups].mean())  / (s4.diff()[ups].mean())
down_cap = (s35.diff()[downs].mean())/ (s4.diff()[downs].mean())

print(f"\n--- Direction-oriented agreement metrics ---")
print(f"Spearman ρ (ranks)        : {spearman: .3f}")
print(f"Kendall τ                 : {kendall: .3f}")
print(f"Directional accuracy      : {DA: .3%}")
print(f"Corr of first differences : {delta_r: .3f}")
print(f"Up-capture ratio          : {up_cap: .2f}")
print(f"Down-capture ratio        : {down_cap: .2f}")

**Interpretation tips** :
* A downward slope after 2 Apr 2025 for a source indicates more critical framing of tariffs.
* Compare amplitudes—Fox vs CNN—to detect partisan divergence.
* Check `df[df.tone.abs()>1.5]` to view the strongest headlines driving spikes.