In [None]:
# WFGY 3.0 Singularity demo: Q127 Synthetic Worlds Entropy Gauge
# One-cell Colab MVP: three tiny Gaussian worlds, one small MLP per world,
# entropy-aware tension observable T_entropy, and a cross-world dashboard.
#
# If you do not have PyTorch / pandas / matplotlib in this runtime, uncomment:
# !pip install --quiet torch pandas matplotlib

import math
import random
from typing import Dict, List

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset


# -------------------------------------------------------------------
# 0. Header text: what this demo is and how to use it
# -------------------------------------------------------------------

header_text = """
WFGY 3.0 Singularity demo: Q127 Synthetic Worlds Entropy Gauge
==============================================================

This notebook is a small, fully inspectable MVP for TU Q127.
It lives entirely at the effective layer. No weights are modified
outside this script and there is no fine-tuning loop over real data.

What Q127-A is asking
---------------------

We build three tiny synthetic worlds W1, W2 and W3:

  - W1: balanced clean labels (class prior 0.5 / 0.5, no label noise).
  - W2: imbalanced clean labels (class prior 0.1 / 0.9, no label noise).
  - W3: balanced but noisy labels (class prior 0.5 / 0.5, labels flipped
        with probability 0.2).

Each world is a simple 2D Gaussian mixture binary classification problem.

For each world W_i we train a small MLP classifier on samples from that
world only. Then we evaluate every trained classifier on all three worlds.

For every train->test pair we compute:

  - accuracy on the test world,
  - average cross entropy in bits,
  - KL divergence in bits between the model label distribution and the
    empirical label distribution of the test world,
  - the difference in label entropies |H_label(train) - H_label(test)|,
  - a scalar tension observable T_entropy in [0, 1] built from these pieces.

We also define a very simple "effective correctness" flag:

  - a pair is marked correct if accuracy >= 0.8 and KL <= 0.05.

This is not a benchmark. The exact numbers move with the seed,
the model size, and the sample counts. What matters is the pattern:

  - T_entropy is small when a classifier is evaluated on the world it was
    trained on, or on a very similar world,
  - T_entropy grows when the classifier carries the "wrong" class balance
    or noise structure in its beliefs about the test world.

How to use this notebook
------------------------

1. Run this cell once, top to bottom.

2. The script will:

   - define the three worlds,
   - print basic world statistics (class priors and entropies),
   - train one small MLP per world,
   - evaluate all train->test pairs and assemble a DataFrame,
   - print a compact table of accuracy, CE, KL, deltaH and T_entropy,
   - draw a 3x3 heatmap of T_entropy(train_world -> test_world).

3. You can treat T_entropy as a crude "world detector":

   - low tension on train==test pairs,
   - higher tension when train and test worlds differ in class balance
     or label noise.

4. If you want to extend the experiment, you can change:

   - the world definitions (means, priors, noise),
   - the model (deeper MLP, different optimizer),
   - the weights inside the T_entropy functional.

Formal disclaimer
-----------------

This notebook does not claim to solve TU Q127 as a mathematical object
or as a full benchmark. It only provides one small effective-layer
experiment that can be inspected and re-run line by line.
"""

print(header_text)


# -------------------------------------------------------------------
# 1. Reproducibility setup
# -------------------------------------------------------------------

SEED = 127
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)


# -------------------------------------------------------------------
# 2. Synthetic world generators
# -------------------------------------------------------------------

def binary_entropy(p: float) -> float:
    """Shannon entropy of a Bernoulli(p) in bits."""
    if p <= 0.0 or p >= 1.0:
        return 0.0
    return -(p * math.log(p, 2.0) + (1.0 - p) * math.log(1.0 - p, 2.0))


def generate_world(name: str,
                   prior1: float,
                   noise: float,
                   n_samples: int = 600) -> Dict[str, object]:
    """
    Generate one synthetic binary world in R^2.

    Features:
      - Class 0: Gaussian around [-1, 0]
      - Class 1: Gaussian around [+1, 0]

    Labels:
      - y_clean ~ Bernoulli(prior1)
      - y = y_clean, then flipped with probability `noise`.
    """
    # Clean labels from the class prior
    u = np.random.rand(n_samples)
    y_clean = (u < prior1).astype(np.int64)

    # Gaussian blobs for features
    x = np.zeros((n_samples, 2), dtype=np.float32)
    mean0 = np.array([-1.0, 0.0])
    mean1 = np.array([+1.0, 0.0])
    cov = 0.5 * np.eye(2)

    idx0 = np.where(y_clean == 0)[0]
    idx1 = np.where(y_clean == 1)[0]
    x[idx0, :] = np.random.multivariate_normal(mean0, cov, size=len(idx0))
    x[idx1, :] = np.random.multivariate_normal(mean1, cov, size=len(idx1))

    # Apply label noise
    y = y_clean.copy()
    if noise > 0.0:
        flips = np.random.rand(n_samples) < noise
        y[flips] = 1 - y[flips]

    # Effective label statistics after noise
    p1_emp = float(y.mean())
    H_label = binary_entropy(p1_emp)
    H_noise = binary_entropy(noise) if 0.0 < noise < 1.0 else 0.0

    return {
        "name": name,
        "prior1": float(prior1),
        "noise": float(noise),
        "x": x,
        "y": y,
        "p1_empirical": p1_emp,
        "H_label": H_label,
        "H_noise": H_noise,
    }


def make_worlds(n_samples: int = 600) -> List[Dict[str, object]]:
    """Define the three Q127-A worlds."""
    worlds = [
        generate_world("W1_balanced_clean", prior1=0.5, noise=0.0, n_samples=n_samples),
        generate_world("W2_imbalanced_clean", prior1=0.9, noise=0.0, n_samples=n_samples),
        generate_world("W3_balanced_noisy", prior1=0.5, noise=0.2, n_samples=n_samples),
    ]
    return worlds


worlds = make_worlds(n_samples=600)

print("\nWorld definitions (empirical after noise):")
for w in worlds:
    print(
        f"  {w['name']}: "
        f"prior1={w['prior1']:.2f}, noise={w['noise']:.2f}, "
        f"p1_emp={w['p1_empirical']:.3f}, "
        f"H_label={w['H_label']:.3f} bits, "
        f"H_noise={w['H_noise']:.3f} bits"
    )


# -------------------------------------------------------------------
# 3. Tiny MLP classifier per world
# -------------------------------------------------------------------

class TinyMLP(nn.Module):
    """Small 2-layer MLP for 2D binary classification."""

    def __init__(self) -> None:
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(2, 16),
            nn.ReLU(),
            nn.Linear(16, 16),
            nn.ReLU(),
            nn.Linear(16, 1),
            nn.Sigmoid(),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x)


def train_classifier(world: Dict[str, object],
                     epochs: int = 300,
                     batch_size: int = 64,
                     lr: float = 1e-2) -> TinyMLP:
    """Train a TinyMLP on samples from one world."""
    x_np = world["x"]
    y_np = world["y"]

    x = torch.from_numpy(x_np).float()
    y = torch.from_numpy(y_np).float().unsqueeze(1)

    ds = TensorDataset(x, y)
    dl = DataLoader(ds, batch_size=batch_size, shuffle=True)

    model = TinyMLP()
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.BCELoss()

    for epoch in range(epochs):
        model.train()
        for xb, yb in dl:
            opt.zero_grad()
            preds = model(xb)
            loss = loss_fn(preds, yb)
            loss.backward()
            opt.step()

    return model


print("\nTraining one TinyMLP per world...")
models: Dict[str, TinyMLP] = {}
for w in worlds:
    print(f"  Training on {w['name']} ...")
    m = train_classifier(w, epochs=300)
    models[w["name"]] = m


# -------------------------------------------------------------------
# 4. Effective-layer metrics and T_entropy
# -------------------------------------------------------------------

def evaluate_on_world(model: TinyMLP,
                      train_world: Dict[str, object],
                      test_world: Dict[str, object]) -> Dict[str, float]:
    """
    Evaluate a trained classifier on a test world and compute:

      - accuracy
      - cross entropy in bits
      - KL divergence in bits between model and world label distributions
      - |H_label(train) - H_label(test)|
      - scalar tension T_entropy in [0, 1]
      - effective-layer correctness flag
    """
    model.eval()
    x_np = test_world["x"]
    y_np = test_world["y"]

    x = torch.from_numpy(x_np).float()
    y_true = torch.from_numpy(y_np).float().unsqueeze(1)

    with torch.no_grad():
        probs = model(x).numpy().reshape(-1)

    # Classification accuracy
    preds = (probs >= 0.5).astype(np.int64)
    acc = float((preds == y_np).mean())

    # Cross entropy in bits
    eps = 1e-8
    ce_nat = -(
        y_np * np.log(probs + eps) +
        (1.0 - y_np) * np.log(1.0 - probs + eps)
    ).mean()
    ce_bits = float(ce_nat / math.log(2.0))

    # Label distributions
    p_hat1 = float(probs.mean())
    p_hat = np.array([1.0 - p_hat1, p_hat1])

    p_true1 = float(y_np.mean())
    p_true = np.array([1.0 - p_true1, p_true1])

    # KL(p_true || p_hat) in bits
    kl = 0.0
    for pt, ph in zip(p_true, p_hat):
        if pt > 0.0 and ph > 0.0:
            kl += pt * math.log(pt / ph, 2.0)
    kl_bits = float(kl)

    # Entropy gap |H_label(train) - H_label(test)|
    deltaH = abs(float(train_world["H_label"]) - float(test_world["H_label"]))

    # Normalise the three pieces to [0, 1] using simple caps
    CE_MAX = 1.5   # bits
    KL_MAX = 1.0   # bits
    DELTAH_MAX = 1.0  # bits for a Bernoulli

    ce_norm = min(ce_bits / CE_MAX, 1.0)
    kl_norm = min(kl_bits / KL_MAX, 1.0)
    deltaH_norm = min(deltaH / DELTAH_MAX, 1.0)

    # Tension weights (sum to 1.0)
    b_ce = 0.4
    b_kl = 0.4
    b_deltaH = 0.2

    T_entropy = b_ce * ce_norm + b_kl * kl_norm + b_deltaH * deltaH_norm

    # Effective-layer correctness: "B-lite" notion
    is_correct = (acc >= 0.8) and (kl_bits <= 0.05)

    return {
        "accuracy": acc,
        "ce_bits": ce_bits,
        "kl_bits": kl_bits,
        "deltaH": deltaH,
        "T_entropy": float(max(0.0, min(1.0, T_entropy))),
        "is_correct": bool(is_correct),
        "p_true1": p_true1,
        "p_hat1": p_hat1,
    }


# -------------------------------------------------------------------
# 5. Cross-world evaluation and dashboard
# -------------------------------------------------------------------

rows: List[Dict[str, object]] = []

for train_w in worlds:
    for test_w in worlds:
        res = evaluate_on_world(
            model=models[train_w["name"]],
            train_world=train_w,
            test_world=test_w,
        )
        row = {
            "train_world": train_w["name"],
            "test_world": test_w["name"],
        }
        row.update(res)
        rows.append(row)

df = pd.DataFrame(rows)

print("\nCross-world results (one row per train->test pair):\n")
pd.set_option("display.width", 160)
pd.set_option("display.max_rows", None)
print(df[[
    "train_world",
    "test_world",
    "accuracy",
    "ce_bits",
    "kl_bits",
    "deltaH",
    "T_entropy",
    "is_correct",
]].to_string(index=False))

# -------------------------------------------------------------------
# 6. Simple T_entropy heatmap
# -------------------------------------------------------------------

pivot = df.pivot(index="train_world", columns="test_world", values="T_entropy")

plt.figure(figsize=(6, 5))
im = plt.imshow(pivot.values, origin="upper", interpolation="nearest")
plt.colorbar(im, label="T_entropy (train -> test)")

plt.xticks(range(len(pivot.columns)), pivot.columns, rotation=45, ha="right")
plt.yticks(range(len(pivot.index)), pivot.index)
plt.title("Q127-A: T_entropy for train_world -> test_world")

for i in range(pivot.shape[0]):
    for j in range(pivot.shape[1]):
        value = pivot.values[i, j]
        plt.text(j, i, f"{value:.2f}", ha="center", va="center", fontsize=8)

plt.tight_layout()
plt.show()
