In [None]:
!sudo apt-get update
!sudo apt-get install -y zstd

In [None]:
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
# import os

# BASE_DIR = "/content/drive/MyDrive/anki_mindmap_LLM"

# OLLAMA_DIR = f"{BASE_DIR}/ollama"
# MODELS_DIR = f"{OLLAMA_DIR}/models"

# # os.makedirs(MODELS_DIR, exist_ok=True)

# print("‚úÖ Ollama model directory ready")
# print("MODELS:", MODELS_DIR)


In [None]:
%env OLLAMA_MODELS=/content/drive/MyDrive/anki_mindmap_LLM/ollama/models


In [None]:
!nohup /usr/local/bin/ollama serve > ollama.log 2>&1 &

In [None]:
!ollama list


In [None]:
!pip install bloom-filter2

In [None]:
from pathlib import Path

path = Path("/content/drive/MyDrive/anki_mindmap_LLM/input/metadata.csv")

print("Exists:", path.exists())
print("Is file:", path.is_file())
print("Parent exists:", path.parent.exists())
print("Parent contents:", list(path.parent.glob("*")))

In [None]:
# ============================================================
# CELL 1: CONFIGURATION & CONSTANTS (FIXED VERSION)
# ============================================================
import json
import time
import subprocess
import requests
import pandas as pd
import sys
import hashlib
import re
import os
import logging
from pathlib import Path
from typing import Dict, List, Set, Optional, Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
from enum import Enum
from collections import defaultdict
from threading import Lock
from functools import lru_cache
from types import MappingProxyType

# Setup logging before anything else
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler('/tmp/anki_pipeline.log')
    ]
)
logger = logging.getLogger("anki-pipeline")

# Optional bloom filter (graceful degradation if not available)
try:
    from bloom_filter2 import BloomFilter
    BLOOM_AVAILABLE = True
except ImportError:
    BLOOM_AVAILABLE = False
    logger.warning("bloom_filter2 not installed - using set-based deduplication")

# ============================================================
# CONFIG - IMMUTABLE
# ============================================================
def load_config():
    """Load configuration with proper validation"""
    # Determine CSV location
    CSV_LOCATIONS = [
        "/content/drive/MyDrive/anki_mindmap_LLM/input/metadata.csv",
        "./metadata.csv",
        os.getenv("ANKI_CSV_PATH", ""),
    ]
    
    CSV_FILE = None
    for loc in CSV_LOCATIONS:
        if loc and Path(loc).exists():
            CSV_FILE = loc
            break
    
    if not CSV_FILE:
        CSV_FILE = CSV_LOCATIONS[0]
    
    # Determine base directory
    BASE_DIR = Path(os.getenv("ANKI_BASE_DIR", "/content/drive/MyDrive/anki_mindmap_LLM"))
    
    config = {
        # Core paths
        "OLLAMA_URL": "http://127.0.0.1:11434",
        "CSV_FILE": CSV_FILE,
        "BASE_DIR": BASE_DIR,
        "OUT_DIR": BASE_DIR / "output",
        
        # Processing limits
        "BATCH_SIZE": 5,
        "MAX_WORKERS": 4,
        "MAX_REELS": 1000,
        "CONFIDENCE_THRESHOLD": 0.65,
        "FINGERPRINT_BATCH_SIZE": 100,
        "CACHE_VERSION": "v4_fixed",
        
        # Circuit breaker settings
        "CIRCUIT_BREAKER_THRESHOLD": 5,
        "CIRCUIT_BREAKER_TIMEOUT": 60,
        
        # PRODUCTION SAFETY SWITCHES
        "ENABLE_ENRICHMENT": True,
        "ENABLE_REJECTION_LEARNING": True,
        "ENABLE_TRADEOFFS": True,
        "ENABLE_PROMPT_ROUTING": True,
        "ENABLE_FOUNDATION_EXPANSION": True,
        
        # PHASE 1-3: ADVANCED CONTENT PROCESSING
        "ENABLE_CONTENT_FILTERING": True,
        "ENABLE_TRANSCRIPT_NORMALIZATION": True,
        "ENABLE_HYBRID_ROUTING": True,
        
        # ENRICHMENT CONTROLS
        "MAX_ENRICHMENTS_PER_CONCEPT": {
            "foundation": 1,
            "intermediate": 2,
            "advanced": 2
        },
        "MAX_RETRIES": 3,
        
        # COMPLETION THRESHOLDS
        "MIN_CARDS_FOR_FULL": 3,
        "MIN_CARDS_FOR_PARTIAL": 2,
        
        # NORMALIZATION
        "NORMALIZE_TECH_SCORES": True,
        
        # Content density thresholds
        "DENSE_CONTENT_MIN_WORDS": 150,
        "LIGHT_CONTENT_MAX_WORDS": 100,
        
        # Validation
        "MIN_TRANSCRIPT_LENGTH": 80,
        "MIN_CATEGORY_CONFIDENCE": 70,
    }
    
    # Create directories
    config["OUT_DIR"].mkdir(parents=True, exist_ok=True)
    
    # Create cache directory
    CACHE_DIR = config["OUT_DIR"] / "cache"
    CACHE_DIR.mkdir(exist_ok=True)
    config["CACHE_DIR"] = CACHE_DIR
    
    return config

# Load and make config immutable
_CONFIG = load_config()
CONFIG = MappingProxyType(_CONFIG)
logger.info(f"Loaded configuration from {CONFIG['CSV_FILE']}")

# File paths
PROGRESS_FILE = CONFIG["OUT_DIR"] / "processed.txt"
CARD_FINGERPRINTS_FILE = CONFIG["OUT_DIR"] / "card_fingerprints.json"
BLOOM_FILE = CONFIG["OUT_DIR"] / "card_bloom.bin"
REJECTION_MEMORY_FILE = CONFIG["OUT_DIR"] / "rejection_memory.json"
PROMPT_VERSION_FILE = CONFIG["OUT_DIR"] / "prompt_version_stats.json"
ROUTING_METRICS_FILE = CONFIG["OUT_DIR"] / "routing_metrics.json"
TERMINAL_REJECTIONS_FILE = CONFIG["OUT_DIR"] / "terminal_rejections.json"
CONFIDENCE_CALIBRATION_FILE = CONFIG["OUT_DIR"] / "confidence_calibration.json"
ERROR_LOG_FILE = CONFIG["OUT_DIR"] / "error_log.json"

# ============================================================
# CIRCUIT BREAKER PATTERN
# ============================================================
class CircuitBreaker:
    """Circuit breaker to prevent cascade failures"""
    
    def __init__(self, name, failure_threshold=5, reset_timeout=60):
        self.name = name
        self.failure_threshold = failure_threshold
        self.reset_timeout = reset_timeout
        self.failures = 0
        self.last_failure_time = 0
        self.state = "CLOSED"  # CLOSED, OPEN, HALF_OPEN
        self.lock = Lock()
    
    def is_open(self):
        """Check if circuit breaker is open"""
        with self.lock:
            if self.state == "OPEN":
                # Check if timeout has passed
                if time.time() - self.last_failure_time > self.reset_timeout:
                    self.state = "HALF_OPEN"
                    logger.info(f"Circuit breaker {self.name} moving to HALF_OPEN")
                    return False
                return True
            return False
    
    def record_failure(self):
        """Record a failure and potentially open the circuit"""
        with self.lock:
            self.failures += 1
            self.last_failure_time = time.time()
            
            if self.failures >= self.failure_threshold:
                if self.state != "OPEN":
                    self.state = "OPEN"
                    logger.warning(f"Circuit breaker {self.name} OPENED after {self.failures} failures")
    
    def record_success(self):
        """Record a success and potentially close the circuit"""
        with self.lock:
            if self.state == "HALF_OPEN":
                self.state = "CLOSED"
                self.failures = 0
                logger.info(f"Circuit breaker {self.name} CLOSED after successful trial")
            elif self.state == "CLOSED":
                # Decay failures slowly
                self.failures = max(0, self.failures - 1)
    
    def call(self, func, *args, **kwargs):
        """Wrap a function call with circuit breaker protection"""
        if self.is_open():
            raise RuntimeError(f"Circuit breaker {self.name} is OPEN")
        
        try:
            result = func(*args, **kwargs)
            self.record_success()
            return result
        except Exception as e:
            self.record_failure()
            raise

# Initialize circuit breakers
OLLAMA_CIRCUIT_BREAKER = CircuitBreaker(
    "ollama",
    failure_threshold=CONFIG["CIRCUIT_BREAKER_THRESHOLD"],
    reset_timeout=CONFIG["CIRCUIT_BREAKER_TIMEOUT"]
)

# ============================================================
# ERROR TRACKING
# ============================================================
class ErrorTracker:
    """Track and categorize errors for observability"""
    
    def __init__(self, error_file: Path):
        self.error_file = error_file
        self.errors = []
        self.error_counts = defaultdict(int)
        self.lock = Lock()
        self._load()
    
    def _load(self):
        """Load existing errors"""
        if self.error_file.exists():
            try:
                with open(self.error_file, 'r') as f:
                    data = json.load(f)
                    self.errors = data.get("errors", [])
                    self.error_counts = defaultdict(int, data.get("counts", {}))
            except Exception as e:
                logger.error(f"Failed to load error tracker: {e}")
    
    def record(self, error_type: str, message: str, context: Dict = None):
        """Record an error"""
        with self.lock:
            error_entry = {
                "type": error_type,
                "message": message,
                "context": context or {},
                "timestamp": time.time(),
                "iso_timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
            }
            
            self.errors.append(error_entry)
            self.error_counts[error_type] += 1
            
            # Keep only last 1000 errors
            if len(self.errors) > 1000:
                self.errors = self.errors[-1000:]
            
            # Periodic save
            if len(self.errors) % 50 == 0:
                self.save()
    
    def save(self):
        """Save errors to disk"""
        with self.lock:
            try:
                with open(self.error_file, 'w') as f:
                    json.dump({
                        "errors": self.errors[-500:],  # Keep only recent errors
                        "counts": dict(self.error_counts),
                        "total_errors": len(self.errors)
                    }, f, indent=2)
            except Exception as e:
                logger.error(f"Failed to save error tracker: {e}")
    
    def get_stats(self) -> Dict:
        """Get error statistics"""
        with self.lock:
            return {
                "total_errors": len(self.errors),
                "error_counts": dict(self.error_counts),
                "recent_errors": self.errors[-20:] if self.errors else []
            }

# Initialize error tracker
error_tracker = ErrorTracker(ERROR_LOG_FILE)

# ============================================================
# ATOMIC FILE OPERATIONS
# ============================================================
def atomic_write(data, filepath: Path):
    """Write data atomically to prevent corruption"""
    import tempfile
    temp_path = Path(str(filepath) + '.tmp')
    try:
        with open(temp_path, 'w') as f:
            if isinstance(data, (dict, list)):
                json.dump(data, f, indent=2)
            else:
                f.write(str(data))
        temp_path.replace(filepath)
    except Exception as e:
        logger.error(f"Atomic write failed: {e}")
        # Clean up temp file
        if temp_path.exists():
            temp_path.unlink()
        raise

def atomic_read(filepath: Path, default=None):
    """Read data with corruption recovery"""
    if not filepath.exists():
        return default
    
    try:
        with open(filepath, 'r') as f:
            if filepath.suffix == '.json':
                return json.load(f)
            else:
                return f.read()
    except (json.JSONDecodeError, IOError) as e:
        logger.warning(f"File {filepath} corrupted, attempting recovery: {e}")
        # Try to backup corrupted file
        backup_path = filepath.with_suffix('.corrupted')
        try:
            filepath.rename(backup_path)
        except:
            pass
        return default

# ============================================================
# INITIALIZATION STATUS
# ============================================================
logger.info("=" * 60)
logger.info("ANKI GENERATION PIPELINE - INITIALIZED")
logger.info("=" * 60)
logger.info(f"Output directory: {CONFIG['OUT_DIR']}")
logger.info(f"CSV file: {CONFIG['CSV_FILE']}")
logger.info(f"Max reels: {CONFIG['MAX_REELS']}")
logger.info(f"Circuit breaker: {CONFIG['CIRCUIT_BREAKER_THRESHOLD']} failures / {CONFIG['CIRCUIT_BREAKER_TIMEOUT']}s timeout")
logger.info("=" * 60)

In [None]:
# ============================================================
# CELL 2: DATA TYPES & SCHEMAS
# ============================================================
class CompletionState(Enum):
    FULL = "full"
    PARTIAL = "partial"
    INCOMPLETE = "incomplete"

class RejectionType(Enum):
    SEMANTIC = "rejected_semantic"
    STRUCTURAL = "rejected_structural"
    MECHANICAL = "error_mechanical"

class ContentDensity(Enum):
    DENSE = "dense"
    LIGHT = "light"
    SKIP = "skip"

class PromptStrategy(Enum):
    STRICT_ADVANCED = "A_STRICT"
    FOUNDATION_AWARE = "B_FOUNDATION"
    DSA_FOCUSED = "C_DSA"

@dataclass
class QualityDimensions:
    correctness_score: float
    richness_score: float
    combined_score: float

    @classmethod
    def calculate(cls, atoms: Dict, cards: List[Dict]):
        has_definition = bool(atoms.get("definition"))
        has_tech_points = len(atoms.get("technical_points", [])) >= 3
        has_solutions = len(atoms.get("solutions", [])) >= 1

        correctness = (
            (0.4 if has_definition else 0) +
            (0.4 if has_tech_points else 0) +
            (0.2 if has_solutions else 0)
        )

        card_count = len(cards)
        has_related = len(atoms.get("related_concepts", [])) > 0
        has_tradeoffs = atoms.get("has_tradeoffs", False)

        richness = (
            min(card_count / 3, 0.5) +
            (0.3 if has_related else 0) +
            (0.2 if has_tradeoffs else 0)
        )
        richness = min(richness, 1.0)

        combined = correctness * 0.6 + richness * 0.4

        return cls(
            correctness_score=round(correctness, 2),
            richness_score=round(richness, 2),
            combined_score=round(combined, 2)
        )

@dataclass
class ModelConfig:
    name: str
    gpu_layers: int = -1
    temperature: float = 0.1
    num_predict: int = 2000
    top_p: float = 0.9

class BaseSchema:
    required: set = set()

    @classmethod
    def validate(cls, data: dict) -> bool:
        return isinstance(data, dict) and cls.required.issubset(data.keys())

class AtomsSchema(BaseSchema):
    required = {
        "concept", "category", "definition",
        "technical_points", "solutions", "impact", "has_tradeoffs",
    }

class BasicCardSchema(BaseSchema):
    required = {"type", "front", "back"}

class ClozeCardSchema(BaseSchema):
    required = {"type", "cloze"}

class TradeoffCardSchema(BaseSchema):
    required = {"type", "tradeoffs"}

class CardsContainerSchema(BaseSchema):
    required = {"cards"}

# ============================================================
# GLOBAL STATE (Minimal)
# ============================================================
ROUTING_METRICS = defaultdict(int)
ROUTING_COUNTS = defaultdict(int)
MODEL_CAPABILITY_CACHE = {}
MODEL_CAPABILITY_LOCK = Lock()
PROMPT_VERSION_STATS = {}
PROMPT_VERSION_LOCK = Lock()
CONFIDENCE_CALIBRATION = {
    "buckets": {
        "0.5-0.6": {"total": 0, "accepted": 0},
        "0.6-0.7": {"total": 0, "accepted": 0},
        "0.7-0.8": {"total": 0, "accepted": 0},
        "0.8-0.9": {"total": 0, "accepted": 0},
        "0.9-1.0": {"total": 0, "accepted": 0}
    }
}

ENRICHMENT_BUDGET = defaultdict(int)
ENRICHMENT_TIMESTAMPS = defaultdict(float)
ENRICHMENT_BUDGET_LOCK = Lock()
ENRICHMENT_BUDGET_RESET_DAYS = 7

In [None]:
# ============================================================
# CELL 3: CONTENT PROCESSING & CLASSIFICATION
# ============================================================

# Mechanism markers for deep content detection
MECHANISM_MARKERS = [
    "contract", "invariant", "guarantee", "violates", "breaks", "ensures",
    "depends on", "requires", "only if", "undefined behavior", "happens-before",
    "race condition", "memory model", "visibility", "ordering", "consistency",
    "semantics", "specification", "constraint", "precondition", "postcondition"
]

# Topic classification configuration
TOPIC_CLASSES = {
    "foundation": {
        "keywords": [
            "immutability", "string", "array", "oops", "collection",
            "hashmap", "equals", "hashcode", "sorting", "basics",
            "fundamentals", "introduction", "inheritance", "polymorphism",
            "encapsulation", "abstraction", "list", "set", "queue"
        ],
        "threshold_adjustment": -3,
        "expected_depth": "intermediate",
        "expected_cards": 2
    },
    "intermediate": {
        "keywords": [
            "concurrency", "thread", "lock", "synchronization", "volatile",
            "atomic", "concurrent", "deadlock", "race condition",
            "memory model", "jvm", "garbage collection", "classloader"
        ],
        "threshold_adjustment": -1,
        "expected_depth": "deep",
        "expected_cards": 3
    },
    "advanced": {
        "keywords": [
            "distributed", "saga", "circuit breaker", "event sourcing",
            "cqrs", "microservice", "kubernetes", "docker", "kafka",
            "consistency model", "partition tolerance", "cap theorem"
        ],
        "threshold_adjustment": 0,
        "expected_depth": "very_deep",
        "expected_cards": 4
    }
}

# Technical keywords for scoring
TECHNICAL_KEYWORDS = {
    "java": [
        "heap", "stack", "garbage collection", "jvm", "bytecode",
        "classloader", "reflection", "synchronized", "volatile",
        "thread", "immutable", "serialization", "generics"
    ],
    "spring": [
        "dependency injection", "bean", "autowired", "aop",
        "transactional", "repository", "service", "controller",
        "configuration", "component scan"
    ],
    "microservices": [
        "saga", "event sourcing", "cqrs", "api gateway",
        "service mesh", "circuit breaker", "bulkhead", "rate limiting",
        "idempotency", "distributed transaction"
    ],
    "algorithms": [
        "time complexity", "space complexity", "big o", "recursion",
        "dynamic programming", "backtracking", "greedy", "divide and conquer"
    ],
    "system_design": [
        "scalability", "availability", "consistency", "partition tolerance",
        "load balancer", "caching", "sharding", "replication",
        "eventual consistency", "cap theorem"
    ]
}

# Foundation-specific keywords for better scoring
FOUNDATION_ENHANCEMENT_KEYWORDS = [
    "string", "array", "list", "map", "set", "queue", "stack",
    "equals", "hashcode", "immutable", "mutable", "inheritance",
    "polymorphism", "encapsulation", "abstraction", "interface",
    "abstract class", "constructor", "static", "final", "volatile",
    "transient", "synchronized", "exception", "try", "catch", "finally",
    "stream", "lambda", "functional interface", "optional", "generic"
]

def classify_topic(caption: str, category: str, transcript: str = "") -> str:
    """
    Classify topic depth based on keywords and context.
    Returns: 'foundation', 'intermediate', or 'advanced'
    """
    text = f"{caption} {transcript}".lower()

    # Check each class in reverse order (advanced first)
    for topic_class, config in reversed(list(TOPIC_CLASSES.items())):
        keywords = config["keywords"]
        if any(kw in text for kw in keywords):
            return topic_class

    # Enhanced: Check for decision/comparison/tradeoff signals
    tradeoff_signals = ["vs", " vs ", "versus", "or ", "instead", "trade-off", "tradeoff",
                       "compare", "comparison", "difference between", "when to", "choose"]
    decision_signals = ["decide", "decision", "which", "what if", "scenario", "use case",
                       "depends on", "context", "situation"]

    has_tradeoff = any(signal in text for signal in tradeoff_signals)
    has_decision = any(signal in text for signal in decision_signals)

    # Count technical depth markers
    depth_markers = ["internal", "mechanism", "how it works", "under the hood",
                    "implementation", "architecture", "design pattern", "best practice"]
    depth_count = sum(1 for marker in depth_markers if marker in text)

    # Don't default to foundation - use multiple signals
    transcript_len = len(transcript.strip())

    # Advanced signals: long transcript + tradeoffs/decisions + depth
    if transcript_len > 400 and (has_tradeoff or depth_count >= 2):
        return "advanced"

    # Intermediate signals: medium length + decisions OR good depth
    if transcript_len > 200 and (has_decision or has_tradeoff or depth_count >= 1):
        return "intermediate"

    # Long transcripts without keywords default to intermediate (not foundation)
    if transcript_len > 500:
        return "intermediate"
    elif transcript_len > 250:
        return "intermediate"
    else:
        return "foundation"

def classify_content_density(caption: str, transcript: str, category: str = "") -> ContentDensity:
    """
    PHASE 3: Classify content density for multi-track routing.
    
    Returns:
    - DENSE: Tutorial/problem-solving with concrete examples ‚Üí Full pipeline
    - LIGHT: Foundation/motivational content ‚Üí Reference-only cards  
    - SKIP: Pure CTA/promotional ‚Üí Don't process
    """
    text = f"{caption} {transcript}".lower()
    word_count = len(transcript.split())
    
    # Dense indicators (tutorial/problem-solving content)
    dense_signals = {
        "code_present": any(marker in text for marker in [
            "code", "function", "method", "class", "variable", 
            "algorithm", "implementation", "syntax", "example"
        ]),
        "problem_solving": any(marker in text for marker in [
            "question", "problem", "solve", "solution", "approach",
            "step 1", "step 2", "algorithm", "complexity"
        ]),
        "concrete_example": any(marker in text for marker in [
            "for example", "let's say", "consider", "here is",
            "output", "result", "returns"
        ]),
        "mechanism": any(marker in text for marker in [
            "how it works", "internally", "mechanism", "under the hood",
            "behind the scenes", "what happens"
        ]),
        "structured": "step" in text and word_count > 150,
        "long_form": word_count > 300
    }
    
    # Light indicators (overview/motivational content)
    light_signals = {
        "overview": any(marker in text for marker in [
            "introduction", "overview", "basics", "fundamentals",
            "what is", "definition"
        ]),
        "motivational": any(marker in text for marker in [
            "you must know", "important to", "should learn",
            "every developer", "trust me"
        ]),
        "list_based": any(marker in text for marker in [
            "top 10", "top 5", "things to", "tips", "must know"
        ]),
        "short_form": word_count < 150
    }
    
    # Calculate density score
    dense_score = sum(dense_signals.values())
    light_score = sum(light_signals.values())
    
    # Decision logic
    if dense_score >= 3:
        return ContentDensity.DENSE
    elif dense_score >= 2 and light_score <= 1:
        return ContentDensity.DENSE
    elif light_score >= 2 or word_count < 100:
        return ContentDensity.LIGHT
    else:
        # Default: if unclear, treat as LIGHT (safer)
        return ContentDensity.LIGHT

def normalize_learning_key(text: str) -> str:
    """
    Robust caption normalization to prevent memory fragmentation.
    
    Without this, memory fragments across:
    - punctuation variants ("HashMap!" vs "HashMap")
    - emoji presence
    - trailing hashtags
    - "Part 1 / Part 2" suffixes
    
    This ensures learning convergence.
    """
    text = text.lower()
    text = re.sub(r'#\w+', '', text)              # Remove hashtags
    text = re.sub(r'\bpart\s*\d+\b', '', text, flags=re.IGNORECASE)  # Remove "Part 1", "Part 2"
    text = re.sub(r'[^\w\s]', ' ', text)          # Remove punctuation, keep words and spaces
    text = re.sub(r'\s+', ' ', text).strip()      # Normalize whitespace
    return text

@lru_cache(maxsize=10000)
def technical_score(text: str) -> int:
    """
    Calculate technical density score (0-10).
    Cached with LRU for 50k+ reel nightly processing.
    """
    if not text:
        return 0

    if not isinstance(text, str):
        text = json.dumps(text, ensure_ascii=False)

    text_lower = text.lower()
    score = 0

    # Keyword matching (base score)
    for category, keywords in TECHNICAL_KEYWORDS.items():
        matches = sum(1 for kw in keywords if kw in text_lower)
        score += min(matches, 3)

    # Enhanced: Foundation-specific keyword bonus
    foundation_matches = sum(1 for kw in FOUNDATION_ENHANCEMENT_KEYWORDS if kw in text_lower)
    score += min(foundation_matches, 4)  # Up to +4 for foundation content

    # Mechanism marker bonus
    mechanism_count = sum(1 for marker in MECHANISM_MARKERS if marker in text_lower)
    if mechanism_count > 0:
        score += min(mechanism_count * 2, 4)  # Up to +4 for deep content

    # Length normalization
    word_count = len(text.split())
    if word_count < 50:
        score = score * 0.7
    elif word_count > 200:
        score = score * 1.2

    return min(max(int(score), 1), 10)  # Ensure at least 1, max 10

In [None]:
# ============================================================
# CELL 4: MEMORY & LEARNING SYSTEMS
# ============================================================
import pickle

class HybridDuplicateDetector:
    """
    Hybrid duplicate detection:
    - Bloom filter for O(1) probabilistic check
    - Exact set for final verification
    - Batch persistence
    """
    def __init__(self, fingerprints_file: Path, bloom_file: Path):
        self.fingerprints_file = fingerprints_file
        self.bloom_file = bloom_file
        self.lock = Lock()

        self.fingerprints: Set[str] = self._load_fingerprints()

        if BLOOM_AVAILABLE:
            self.bloom = self._load_bloom()
        else:
            self.bloom = None

        self._dirty = False
        self._dirty_count = 0

    def _load_fingerprints(self) -> Set[str]:
        if self.fingerprints_file.exists():
            try:
                with open(self.fingerprints_file, 'r') as f:
                    return set(json.load(f))
            except Exception as e:
                print(f"‚ö†Ô∏è  Error loading fingerprints: {e}")
                return set()
        return set()

    def _load_bloom(self):
        if self.bloom_file.exists():
            try:
                with open(self.bloom_file, "rb") as f:
                    return pickle.load(f)
            except Exception as e:
                print(f"‚ö†Ô∏è  Error loading bloom filter: {e}")

        return BloomFilter(max_elements=2_000_000, error_rate=0.001)

    def _normalize_text(self, text) -> str:
        if not isinstance(text, str):
            text = json.dumps(text, ensure_ascii=False)

        text = re.sub(r'[^\w\s]', '', text.lower())
        text = re.sub(r'\s+', ' ', text).strip()
        return text

    def _create_fingerprint(self, card: Dict) -> str:
        card_type = card.get("type", "basic")

        if card_type == "basic":
            front = self._normalize_text(card.get("front", ""))
            back = self._normalize_text(card.get("back", ""))[:300]
            content = f"{front}|{back}"

        elif card_type == "cloze":
            cloze = card.get("cloze", "")
            clean = re.sub(r'\{\{c\d+::(.*?)\}\}', r'\1', cloze)
            content = self._normalize_text(clean)

        elif card_type == "tradeoff":
            front = self._normalize_text(card.get("front", ""))
            approaches = [t.get("approach", "") for t in card.get("tradeoffs", [])]
            content = f"{front}|{' '.join(approaches)}"

        else:
            content = ""

        return hashlib.md5(content.encode()).hexdigest()

    def is_duplicate(self, card: Dict) -> bool:
        fingerprint = self._create_fingerprint(card)

        ROUTING_METRICS["_bloom_total_checks"] = \
            ROUTING_METRICS.get("_bloom_total_checks", 0) + 1

        if self.bloom and fingerprint not in self.bloom:
            return False

        with self.lock:
            is_dup = fingerprint in self.fingerprints

            if self.bloom and not is_dup:
                ROUTING_METRICS["bloom_false_positive_suspected"] = \
                    ROUTING_METRICS.get("bloom_false_positive_suspected", 0) + 1

            return is_dup

    def add_fingerprint(self, fingerprint: str):
        self.fingerprints.add(fingerprint)

        if self.bloom:
            self.bloom.add(fingerprint)

        self._dirty = True
        self._dirty_count += 1

    def save_if_dirty(self, force: bool = False):
        if not self._dirty:
            return

        if not force and self._dirty_count < CONFIG["FINGERPRINT_BATCH_SIZE"]:
            return

        with self.lock:
            try:
                with open(self.fingerprints_file, 'w') as f:
                    json.dump(list(self.fingerprints), f, indent=2)

                if self.bloom:
                    with open(self.bloom_file, "wb") as f:
                        pickle.dump(self.bloom, f)

                self._dirty = False
                self._dirty_count = 0
            except Exception as e:
                print(f"‚ö†Ô∏è  Error saving fingerprints: {e}")

class TerminalRejectionTracker:
    """
    Track reels that are terminally rejected by logic.
    Once a reel is marked terminal, it NEVER re-enters pipeline.
    """
    def __init__(self, file_path: Path):
        self.file_path = file_path
        self.terminal_reels: Dict[str, Dict] = self._load()
        self.lock = Lock()

    def _load(self) -> Dict[str, Dict]:
        if self.file_path.exists():
            try:
                with open(self.file_path, 'r') as f:
                    data = json.load(f)
                    return data.get("terminal_reels", {})
            except Exception:
                return {}
        return {}

    def mark_terminal(self, reel_id: str, reason: str = "", stage: str = "stage1", rejection_type: str = "STRUCTURAL"):
        with self.lock:
            self.terminal_reels[reel_id] = {
                "reason": reason,
                "stage": stage,
                "rejection_type": rejection_type,
                "timestamp": time.time()
            }

    def is_terminal(self, reel_id: str) -> bool:
        with self.lock:
            return reel_id in self.terminal_reels

    def get_stats(self) -> Dict:
        with self.lock:
            reasons = defaultdict(int)
            stages = defaultdict(int)
            for entry in self.terminal_reels.values():
                reasons[entry.get("reason", "unknown")] += 1
                stages[entry.get("stage", "unknown")] += 1

            return {
                "total": len(self.terminal_reels),
                "by_reason": dict(reasons),
                "by_stage": dict(stages)
            }

    def save(self):
        with self.lock:
            try:
                with open(self.file_path, 'w') as f:
                    json.dump({
                        "terminal_reels": self.terminal_reels,
                        "count": len(self.terminal_reels)
                    }, f, indent=2)
            except Exception as e:
                print(f"‚ö†Ô∏è  Failed to save terminal rejections: {e}")

class RejectionMemory:
    """
    Consistent memory key across all operations
    Key format: concept::category::topic_class
    """
    def __init__(self, memory_file: Path):
        self.memory_file = memory_file
        self.memory = self._load()
        self.lock = Lock()

    def _load(self) -> Dict:
        if self.memory_file.exists():
            try:
                with open(self.memory_file, 'r') as f:
                    return json.load(f)
            except Exception:
                return {}
        return {}

    def _make_key(self, concept: str, category: str = "", topic_class: str = "") -> str:
        return f"{concept}::{category}::{topic_class}"

    def _make_canonical_key(self, concept: str, category: str = "") -> str:
        return f"{concept}::{category or 'unknown'}"

    def record_rejection(self, concept: str, score: int, reason: str,
                        category: str = "", topic_class: str = ""):
        with self.lock:
            key = self._make_key(concept, category, topic_class)

            if key not in self.memory:
                self.memory[key] = {
                    "concept": concept,
                    "category": category,
                    "topic_class": topic_class,
                    "rejections": [],
                    "successful_strategy": None,
                    "first_seen_ts": time.time(),
                    "first_success_ts": None,
                    "attempts_until_success": None
                }

            self.memory[key]["rejections"].append({
                "score": score,
                "reason": reason,
                "timestamp": time.time()
            })

    def record_success(self, concept: str, strategy: str,
                      category: str = "", topic_class: str = "",
                      delta_score: int = 0):
        with self.lock:
            key = self._make_key(concept, category, topic_class)

            if key in self.memory:
                self.memory[key]["successful_strategy"] = {
                    "strategy": strategy,
                    "delta_score": delta_score,
                    "topic_class": topic_class,
                    "timestamp": time.time()
                }

                if self.memory[key].get("first_success_ts") is None:
                    self.memory[key]["first_success_ts"] = time.time()
                    attempts = len(self.memory[key].get("rejections", []))
                    self.memory[key]["attempts_until_success"] = attempts

                if "success_by_topic_class" not in self.memory[key]:
                    self.memory[key]["success_by_topic_class"] = {}

                tc = topic_class or "unknown"
                if tc not in self.memory[key]["success_by_topic_class"]:
                    self.memory[key]["success_by_topic_class"][tc] = 0
                self.memory[key]["success_by_topic_class"][tc] += 1

    def get_strategy(self, concept: str, category: str = "", topic_class: str = "") -> Optional[str]:
        key = self._make_key(concept, category, topic_class)
        entry = self.memory.get(key)
        if entry and entry.get("successful_strategy"):
            success_matrix = entry.get("success_by_topic_class", {})
            if topic_class and success_matrix.get(topic_class, 0) > 0:
                return entry.get("successful_strategy")
            elif not topic_class or not success_matrix:
                return entry.get("successful_strategy")

        canonical_key = self._make_canonical_key(concept, category)
        for stored_key, stored_entry in self.memory.items():
            if stored_key.startswith(canonical_key + "::"):
                if stored_entry.get("successful_strategy"):
                    return stored_entry.get("successful_strategy")

        return None

    def should_skip(self, concept: str, category: str = "", topic_class: str = "") -> bool:
        key = self._make_key(concept, category, topic_class)
        entry = self.memory.get(key)
        if not entry:
            return False

        rejection_count = len(entry.get("rejections", []))
        has_success = entry.get("successful_strategy") is not None

        return rejection_count >= 3 and not has_success

    def get_rejection_count(self, concept: str, category: str = "", topic_class: str = "") -> int:
        key = self._make_key(concept, category, topic_class)
        entry = self.memory.get(key)
        if not entry:
            return 0
        return len(entry.get("rejections", []))

    def save(self, prune: bool = True):
        with self.lock:
            if prune:
                TTL_DAYS = 90
                now = time.time()
                ttl_seconds = TTL_DAYS * 24 * 60 * 60

                keys_to_remove = []
                for key, entry in self.memory.items():
                    first_seen = entry.get("first_seen_ts", now)
                    age_seconds = now - first_seen
                    has_success = entry.get("successful_strategy") is not None

                    if age_seconds > ttl_seconds and not has_success:
                        keys_to_remove.append(key)

                for key in keys_to_remove:
                    del self.memory[key]

                if keys_to_remove:
                    print(f"      üßπ Pruned {len(keys_to_remove)} stale rejection memory entries (90+ days old, no success)")

            try:
                with open(self.memory_file, 'w') as f:
                    json.dump(self.memory, f, indent=2)
            except Exception as e:
                print(f"‚ö†Ô∏è  Failed to save rejection memory: {e}")

    def get_stats(self) -> Dict:
        with self.lock:
            total_concepts = len(self.memory)
            total_rejections = sum(len(e.get("rejections", [])) for e in self.memory.values())

            strategies = defaultdict(int)
            for entry in self.memory.values():
                strategy = entry.get("successful_strategy")
                if strategy:
                    if isinstance(strategy, dict):
                        strategies[strategy.get("strategy", "unknown")] += 1
                    else:
                        strategies[strategy] += 1

            learning_velocities = []
            for entry in self.memory.values():
                if entry.get("attempts_until_success") is not None:
                    learning_velocities.append(entry["attempts_until_success"])

            avg_attempts = sum(learning_velocities) / len(learning_velocities) if learning_velocities else 0

            return {
                "total_concepts": total_concepts,
                "total_rejections": total_rejections,
                "strategies": dict(strategies),
                "learning_velocity": {
                    "avg_attempts_until_success": round(avg_attempts, 2),
                    "concepts_learned": len(learning_velocities),
                    "concepts_still_learning": total_concepts - len(learning_velocities)
                }
            }

# Initialize instances (global)
duplicate_detector = HybridDuplicateDetector(CARD_FINGERPRINTS_FILE, BLOOM_FILE)
terminal_rejections = TerminalRejectionTracker(TERMINAL_REJECTIONS_FILE)

# Initialize rejection memory if enabled
rejection_memory = None
if CONFIG["ENABLE_REJECTION_LEARNING"]:
    rejection_memory = RejectionMemory(REJECTION_MEMORY_FILE)

In [None]:
# ============================================================
# CELL 5: LLM & MODEL MANAGEMENT
# ============================================================

# Model configuration
PIPELINE_MODELS = {
    'extract': ModelConfig(
        name='mistral:7b-instruct-v0.2',
        temperature=0.1,
        num_predict=2000
    ),
    'extract_retry': ModelConfig(
        name='mistral:7b-instruct-v0.2',
        temperature=0.15,
        num_predict=1500
    ),
    'generate_basic': ModelConfig(
        name='mistral:7b-instruct',
        temperature=0.2,
        num_predict=1500
    ),
    'generate_cloze': ModelConfig(
        name='mistral:7b-instruct',
        temperature=0.2,
        num_predict=800
    ),
    'generate_tradeoff': ModelConfig(
        name='qwen2.5:7b-instruct',
        temperature=0.2,
        num_predict=1200
    ),
    'validate': ModelConfig(
        name='qwen2.5:7b-instruct',
        temperature=0.05,
        num_predict=500
    ),
}

MODEL_FALLBACKS = {
    'mistral:7b-instruct-v0.2': ['mistral:7b-instruct', 'mistral:latest'],
    'llama3.1:8b-instruct-q8_0': [
        'mistral:7b-instruct',
        'qwen2.5:7b-instruct',
        'llama3.1:8b-instruct-q4_0',
        'llama3.1:latest'
    ],
    'qwen2.5:7b-instruct-q4_K_M': [
        'qwen2.5:7b-instruct',
        'mistral:7b-instruct',
        'llama3.1:8b-instruct-q4_0'
    ],
    'qwen2.5:7b-instruct': [
        'qwen2.5:latest',
        'mistral:7b-instruct',
        'phi3:mini'
    ],
}

# Create session
session = requests.Session()
session.headers.update({"Connection": "keep-alive"})

def gpu_available() -> bool:
    try:
        subprocess.check_output(["nvidia-smi"], stderr=subprocess.DEVNULL)
        return True
    except:
        return False

def get_gpu_memory() -> Optional[int]:
    try:
        output = subprocess.check_output(
            ["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits"],
            stderr=subprocess.DEVNULL
        ).decode()
        return int(output.strip()) // 1024
    except:
        return None

def model_supports_generate(model_name: str) -> bool:
    """
    Check if model supports /api/generate endpoint.
    Memoized for stability.
    """
    with MODEL_CAPABILITY_LOCK:
        if model_name in MODEL_CAPABILITY_CACHE:
            return MODEL_CAPABILITY_CACHE[model_name]

    model_lower = model_name.lower()

    CHAT_ONLY_HINTS = ["chat", "assistant", "instruct-chat"]
    if any(hint in model_lower for hint in CHAT_ONLY_HINTS):
        result = False
    elif "embed" in model_lower or "embedding" in model_lower:
        result = False
    else:
        GENERATE_MODELS = [
            "mistral", "mixtral", "qwen", "phi", 
            "deepseek", "gemma", "codellama", "yi"
        ]
        result = any(key in model_lower for key in GENERATE_MODELS)

    with MODEL_CAPABILITY_LOCK:
        MODEL_CAPABILITY_CACHE[model_name] = result

    return result

def check_ollama_health() -> bool:
    try:
        r = requests.get(f"{CONFIG['OLLAMA_URL']}/api/tags", timeout=5)
        return r.status_code == 200
    except:
        return False

def get_available_models() -> List[str]:
    try:
        r = requests.get(f"{CONFIG['OLLAMA_URL']}/api/tags", timeout=5)
        r.raise_for_status()
        return [m["name"] for m in r.json().get("models", [])]
    except:
        return []

def select_best_model(preferred: str, fallbacks: List[str]) -> Optional[str]:
    """
    Select best available model with generate capability check.
    """
    available = set(get_available_models())

    if preferred in available:
        if model_supports_generate(preferred):
            return preferred
        else:
            print(f"   ‚ö†Ô∏è  '{preferred}' exists but doesn't support generate endpoint")

    for fallback in fallbacks:
        if fallback in available:
            if model_supports_generate(fallback):
                print(f"   ‚ö†Ô∏è  '{preferred}' not usable, using '{fallback}'")
                return fallback
            else:
                print(f"   ‚ö†Ô∏è  '{fallback}' doesn't support generate endpoint, skipping")

    return None

def validate_and_fix_models():
    """
    Validate models and apply fallbacks.
    """
    available = set(get_available_models())
    missing = []

    for stage, config in PIPELINE_MODELS.items():
        if config.name not in available:
            fallbacks = MODEL_FALLBACKS.get(config.name, [])
            replacement = select_best_model(config.name, fallbacks)

            if replacement:
                PIPELINE_MODELS[stage].name = replacement
            else:
                missing.append((stage, config.name))
        else:
            if "generate" in stage and not model_supports_generate(config.name):
                print(f"   üö´ WARNING: {config.name} doesn't support generate endpoint!")
                print(f"      Attempting to find generate-capable replacement...")

                fallbacks = MODEL_FALLBACKS.get(config.name, [])
                replacement = select_best_model(config.name, fallbacks)

                if replacement:
                    PIPELINE_MODELS[stage].name = replacement
                    print(f"      ‚úÖ Replaced with: {replacement}")
                else:
                    missing.append((stage, config.name + " (no generate support)"))

    return missing

def warmup_models():
    """
    Warm up models AND verify generate capability.
    """
    print("\nüî• Warming up models...")
    unique_models = set(config.name for config in PIPELINE_MODELS.values())

    failed_models = []
    for model in unique_models:
        try:
            r = requests.post(
                f"{CONFIG['OLLAMA_URL']}/api/generate",
                json={"model": model, "prompt": "test", "stream": False, "options": {"num_predict": 1}},
                timeout=30
            )

            if r.status_code == 400:
                error_msg = r.json().get("error", r.text)
                if "support" in error_msg.lower() or "not" in error_msg.lower():
                    print(f"   üö´ {model} - Does not support /api/generate")
                    failed_models.append((model, "no generate support"))
                    continue

            r.raise_for_status()
            print(f"   ‚úÖ {model}")

        except requests.exceptions.Timeout:
            print(f"   ‚è±Ô∏è  {model} - Slow to load (continuing anyway)")
        except Exception as e:
            error_str = str(e)
            if "400" in error_str or "support" in error_str.lower():
                print(f"   üö´ {model} - Likely no generate support")
                failed_models.append((model, str(e)))
            else:
                print(f"   ‚ö†Ô∏è  {model} - Error: {e}")

    if failed_models:
        print(f"\nüö® CRITICAL: {len(failed_models)} model(s) don't support /api/generate:")

        generate_stage_failures = []
        for model, reason in failed_models:
            print(f"   ‚ùå {model}: {reason}")
            stages_using = [stage for stage, cfg in PIPELINE_MODELS.items() if cfg.name == model]
            if stages_using:
                print(f"      Used in stages: {', '.join(stages_using)}")

                if any("generate" in stage for stage in stages_using):
                    generate_stage_failures.append((model, stages_using))

        if generate_stage_failures:
            print(f"\n‚ùå FATAL: Generation stages have non-functional models:")
            for model, stages in generate_stage_failures:
                print(f"   {model} ‚Üí {', '.join(stages)}")
            print(f"\nüí° FIX: Run with different models or update PIPELINE_MODELS config")
            raise RuntimeError("Cannot proceed with broken generate-stage models")

        print()

def llm(prompt: str, model_config: ModelConfig, timeout=600, max_retries=3) -> str:
    """
    LLM interface with retry logic.
    CRITICAL FIX #1: Removed invalid 'num_gpu' parameter
    """
    prompt_len = len(prompt)
    if prompt_len > 8000:
        print(f"         ‚ö†Ô∏è  Very long prompt ({prompt_len} chars) - may cause context issues")
        if prompt_len > 15000:
            print(f"         üî¥ Prompt exceeds 15K chars - truncating to prevent failure")
            prompt = prompt[:15000] + "\n\nReturn ONLY valid JSON:"

    for attempt in range(max_retries):
        try:
            payload = {
                "model": model_config.name,
                "prompt": prompt,
                "stream": False,
                "options": {
                    "temperature": model_config.temperature,
                    "top_p": model_config.top_p,
                    "num_predict": model_config.num_predict
                },
            }

            r = session.post(
                f"{CONFIG['OLLAMA_URL']}/api/generate",
                json=payload,
                timeout=timeout,
            )
            r.raise_for_status()
            return r.json()["response"]
        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 400:
                error_detail = ""
                try:
                    error_detail = e.response.json()
                except:
                    error_detail = e.response.text

                print(f"         üî¥ HTTP 400 Error from Ollama:")
                print(f"            Model: {model_config.name}")
                print(f"            Error: {error_detail}")
                print(f"            Prompt length: {len(prompt)} chars")

                if "context" in str(error_detail).lower() or "too long" in str(error_detail).lower():
                    print(f"            üí° Likely cause: Prompt exceeds model context window")

                raise

            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)
            else:
                raise

        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 404:
                raise RuntimeError(
                    f"Model '{model_config.name}' not found. "
                    f"Pull: ollama pull {model_config.name}"
                )
            raise

        except requests.exceptions.Timeout:
            if attempt == max_retries - 1:
                raise RuntimeError(f"Timeout after {max_retries} attempts")
            wait = 5 * (attempt + 1)
            print(f"      ‚è≥ Timeout, retrying in {wait}s...")
            time.sleep(wait)

        except Exception as e:
            if attempt == max_retries - 1:
                raise
            wait = 2 ** attempt
            print(f"      üîÑ Retry {attempt + 1}/{max_retries} in {wait}s...")
            time.sleep(wait)

    raise RuntimeError("Max retries exceeded")

def extract_json(text: str, lenient: bool = False) -> Dict:
    """Extract JSON from LLM response"""
    text = text.strip()

    json_match = re.search(r'\{.*\}', text, re.DOTALL)
    if json_match:
        text = json_match.group()

    text = re.sub(r'[\x00-\x1F\x7F]', '', text)

    try:
        return json.loads(text)
    except json.JSONDecodeError:
        if lenient:
            try:
                text = re.sub(r',(\s*[}\]])', r'\1', text)
                text = re.sub(r'([{\[,])\s*,', r'\1', text)
                return json.loads(text)
            except:
                pass

        return {}

In [None]:
# ============================================================
# CELL 6: PROMPT MANAGEMENT & GENERATION
# ============================================================

# Progress tracker
class ProgressTracker:
    def __init__(self, progress_file: Path):
        self.progress_file = progress_file
        self.processed: Set[str] = self._load()
        self.lock = Lock()
        self._buffer: List[str] = []
        self._buffer_size = 50

    def _load(self) -> Set[str]:
        if self.progress_file.exists():
            try:
                with open(self.progress_file, 'r') as f:
                    return set(line.strip() for line in f if line.strip())
            except Exception as e:
                print(f"‚ö†Ô∏è  Error loading progress: {e}")
                return set()
        return set()

    def mark_processed(self, reel_id: str):
        with self.lock:
            if reel_id not in self.processed:
                self.processed.add(reel_id)
                self._buffer.append(reel_id)

                if len(self._buffer) >= self._buffer_size:
                    self.flush()

    def flush(self):
        if not self._buffer:
            return

        try:
            with open(self.progress_file, 'a') as f:
                f.write('\n'.join(self._buffer) + '\n')
            self._buffer.clear()
        except Exception as e:
            print(f"‚ö†Ô∏è  Error flushing progress: {e}")

progress_tracker = ProgressTracker(PROGRESS_FILE)

# Caching
CACHE_DIR = CONFIG["OUT_DIR"] / "cache"
CACHE_DIR.mkdir(exist_ok=True)

def get_cached_result(reel_id: str, stage: str) -> Optional[Dict]:
    cache_file = CACHE_DIR / f"{reel_id}_{stage}.json"
    if cache_file.exists():
        try:
            with open(cache_file, 'r') as f:
                return json.load(f)
        except:
            pass
    return None

def save_cached_result(reel_id: str, stage: str, data: Dict):
    cache_file = CACHE_DIR / f"{reel_id}_{stage}.json"
    try:
        with open(cache_file, 'w') as f:
            json.dump(data, f, indent=2)
    except Exception as e:
        print(f"      ‚ö†Ô∏è  Cache save failed: {e}")

# Prompt version lifecycle management
def load_prompt_version_stats():
    """Load prompt version statistics for lifecycle management"""
    if PROMPT_VERSION_FILE.exists():
        try:
            with open(PROMPT_VERSION_FILE, 'r') as f:
                return json.load(f)
        except Exception:
            return {}
    return {}

def save_prompt_version_stats(stats):
    """Save prompt version statistics"""
    try:
        with open(PROMPT_VERSION_FILE, 'w') as f:
            json.dump(stats, f, indent=2)
    except Exception as e:
        print(f"      ‚ö†Ô∏è  Failed to save prompt version stats: {e}")

def record_prompt_version_result(version: str, success: bool, stage: str = "unknown"):
    """
    Track success rates by prompt version for auto-deprecation.
    Mechanical failures don't count against prompt quality.
    """
    with PROMPT_VERSION_LOCK:
        if version not in PROMPT_VERSION_STATS:
            PROMPT_VERSION_STATS[version] = {
                "attempts": 0,
                "successes": 0,
                "deprecated": False,
                "success_rate": 0.0,
                "last_failure_stage": None,
                "mechanical_failures": 0
            }

        stats = PROMPT_VERSION_STATS[version]

        if stage == "mechanical":
            stats["mechanical_failures"] += 1
            return

        stats["attempts"] += 1
        if success:
            stats["successes"] += 1
        else:
            stats["last_failure_stage"] = stage

        stats["success_rate"] = stats["successes"] / stats["attempts"] if stats["attempts"] > 0 else 0.0

        if stats["attempts"] >= 8 and stats["success_rate"] < 0.35 and not stats["deprecated"]:
            stats["deprecated"] = True
            print(f"      üö´ Auto-deprecated prompt version '{version}' (success rate: {stats['success_rate']:.1%} after {stats['attempts']} attempts)")

        if stats["deprecated"] and stats["attempts"] % 50 == 0:
            if stats["success_rate"] >= 0.45:
                stats["deprecated"] = False
                print(f"      ‚ôªÔ∏è  Un-deprecated prompt version '{version}' (success rate recovered: {stats['success_rate']:.1%})")

        if stats["attempts"] % 10 == 0:
            save_prompt_version_stats(PROMPT_VERSION_STATS)

def is_prompt_version_deprecated(version: str) -> bool:
    """Check if a prompt version has been deprecated"""
    return PROMPT_VERSION_STATS.get(version, {}).get("deprecated", False)

# Confidence calibration
def load_confidence_calibration():
    """Load confidence calibration data"""
    if CONFIDENCE_CALIBRATION_FILE.exists():
        try:
            with open(CONFIDENCE_CALIBRATION_FILE, 'r') as f:
                return json.load(f)
        except Exception:
            return CONFIDENCE_CALIBRATION
    return CONFIDENCE_CALIBRATION

def save_confidence_calibration():
    """Save confidence calibration data"""
    with CONFIDENCE_CALIBRATION_LOCK:
        try:
            with open(CONFIDENCE_CALIBRATION_FILE, 'w') as f:
                json.dump(CONFIDENCE_CALIBRATION, f, indent=2)
        except Exception as e:
            print(f"‚ö†Ô∏è  Failed to save confidence calibration: {e}")

# Load existing calibration data
CONFIDENCE_CALIBRATION = load_confidence_calibration()

def track_confidence_outcome(confidence: float, accepted: bool = False):
    """
    Track confidence calibration
    Call this when you have human feedback on whether a card was accepted.
    """
    bucket = None
    if 0.5 <= confidence < 0.6:
        bucket = "0.5-0.6"
    elif 0.6 <= confidence < 0.7:
        bucket = "0.6-0.7"
    elif 0.7 <= confidence < 0.8:
        bucket = "0.7-0.8"
    elif 0.8 <= confidence < 0.9:
        bucket = "0.8-0.9"
    elif 0.9 <= confidence <= 1.0:
        bucket = "0.9-1.0"

    if bucket:
        with CONFIDENCE_CALIBRATION_LOCK:
            CONFIDENCE_CALIBRATION["buckets"][bucket]["total"] += 1
            if accepted:
                CONFIDENCE_CALIBRATION["buckets"][bucket]["accepted"] += 1

# Stage 0: Transcript normalization
def normalize_transcript(transcript: str, caption: str) -> str:
    """
    PHASE 2: Transform spoken Instagram Reel transcript into structured content.
    Converts conversational/spoken content into declarative educational prose.
    """
    if not transcript or len(transcript.strip()) < 50:
        return transcript
    
    filler_count = transcript.lower().count("so ") + transcript.lower().count("well ") + \
                   transcript.lower().count("let's ") + transcript.lower().count("hello ") + \
                   transcript.lower().count(" follow ") + transcript.lower().count(" follow @")
    
    if filler_count < 2 and len(transcript.split()) > 100:
        return transcript
    
    prompt = f"""You are a technical content normalizer for educational flashcards. Transform spoken Instagram Reel transcripts into structured technical prose.

INPUT TRANSCRIPT (spoken/conversational):
{transcript}

TOPIC/CAPTION:
{caption}

CRITICAL RULES:
1. Convert speech ‚Üí declarative statements
2. Remove ALL non-technical content:
   - Filler: "so", "well", "hello", "let's see", "right?", "okay"
   - Self-reference: "I have created", "I will show", "comment link", "DM for"
   - CTAs: "go ahead", "link in bio", "check out", "share with your friends"
3. Extract ONLY what was explicitly said or clearly implied
4. Preserve: ALL technical terms, code examples, specific numbers/metrics
5. Format as neutral technical explanation

OUTPUT FORMAT (JSON):
{{
  "normalized_content": "Technical explanation in declarative prose",
  "key_concepts": ["term1", "term2"],
  "has_code_example": true/false,
  "content_type": "explanation|warning|comparison|definition"
}}

Return ONLY valid JSON:"""

    try:
        raw = llm(prompt, PIPELINE_MODELS['extract'])
        result = extract_json(raw, lenient=True)
        
        normalized = result.get("normalized_content", "").strip()
        content_type = result.get("content_type", "explanation")
        
        if not normalized or len(normalized) < 50:
            print(f"      ‚è≠Ô∏è  Normalization skipped: no technical content extracted")
            return transcript
        
        original_words = len(transcript.split())
        normalized_words = len(normalized.split())
        ratio = normalized_words / original_words if original_words > 0 else 0
        
        if ratio < 0.5:
            print(f"      ‚ö†Ô∏è  Normalization suspicious: {ratio:.1%} of original length")
            return transcript
        
        if ratio > 2.0:
            print(f"      ‚ö†Ô∏è  Normalization rejected: {ratio:.1%} of original (hallucination risk)")
            return transcript
        
        if normalized.lower().strip() == transcript.lower().strip():
            return transcript
        
        print(f"      ‚ú® Normalized: {original_words}w ‚Üí {normalized_words}w ({ratio:.0%}, {content_type})")
        return normalized
        
    except Exception as e:
        print(f"      ‚ö†Ô∏è  Normalization error: {e}")
        return transcript

# Stage 1: Atom extraction prompts
def stage1_prompt_a_strict(caption: str, transcript: str) -> str:
    """PROMPT A ‚Äî STRICT/ADVANCED"""
    return f"""Extract COMPREHENSIVE technical atoms for ADVANCED topics.

CRITICAL RULES:
1. IGNORE: Company names, celebrity names, dates, marketing language
2. Focus on CONCRETE mechanisms and SPECIFIC implementations
3. Provide DEEP technical details with internal behavior
4. DO NOT introduce unrelated or orthogonal concepts
5. MUST extract AT LEAST 4 technical points and 2 solutions

PRIMARY CONCEPT:
- Main concept name (precise technical term)
- Definition (one clear sentence with technical precision)
- Detailed mechanisms (4-6 points explaining HOW it works internally)
- Concrete solutions with SPECIFIC implementations
- System impact (quantifiable: latency, throughput, availability)

RELATED CONCEPTS (1-3 directly connected topics):
Only if they are direct prerequisites, consequences, or internal mechanisms.

OUTPUT SCHEMA:
{{
  "valid": true|false,
  "reject_reason": "string if invalid, empty otherwise",
  "concept": "Technical term",
  "category": "java_spring|azure|microservices|system_design|dsa|java_programming|java_developer|algorithms",
  "definition": "Precise one-sentence definition",
  "technical_points": ["...", "...", "...", "..."],
  "solutions": ["...", "..."],
  "impact": ["...", "...", "..."],
  "has_tradeoffs": true|false,
  "related_concepts": [
    {{
      "name": "Related concept",
      "why_relevant": "Connection reason",
      "key_points": ["...", "...", "..."]
    }}
  ]
}}

Caption: {caption}
Transcript: {transcript}

Return ONLY valid JSON:"""

def stage1_prompt_b_foundation(caption: str, transcript: str) -> str:
    """PROMPT B ‚Äî FOUNDATION-AWARE"""
    return f"""Extract technical atoms for FOUNDATIONAL Java interview concepts.

CRITICAL RULES:
1. IGNORE: Company names, celebrity names, dates, marketing language
2. Focus on INTERNAL BEHAVIOR appropriate to concept level
3. Provide technical details that explain WHY and HOW
4. DO NOT invent JVM internals unless mentioned
5. REJECT: Generic advice, motivational content, "just do X" statements
6. REQUIRE: At least one of: decision, tradeoff, failure mode, or counterexample
7. MUST extract AT LEAST 3 technical points and 1 solution

MANDATORY EXTRACTION REQUIREMENT:
Extract AT LEAST ONE of the following:
- A technical decision (when to use X vs Y)
- A tradeoff (pros/cons of approach)
- A failure mode (what goes wrong if...)
- A counterexample (when this doesn't apply)

If the content is purely motivational or generic advice, set valid=false with reason "motivational_content".

PRIMARY CONCEPT:
- Main concept name (precise technical term)
- Definition (one clear sentence)
- Internal mechanisms (3-5 points on HOW it works)
- Concrete examples with code snippets or specific scenarios
- Practical impact (thread safety, memory, performance)

RELATED CONCEPTS (2-3 pedagogically connected topics):
Only if commonly taught together or frequently co-asked.

OUTPUT SCHEMA:
{{
  "valid": true|false,
  "reject_reason": "string if invalid, empty otherwise",
  "concept": "Technical term",
  "category": "java_spring|azure|microservices|system_design|dsa|java_programming|java_developer|algorithms",
  "definition": "Clear one-sentence definition",
  "technical_points": ["...", "...", "..."],
  "solutions": ["...", "..."],
  "impact": ["...", "..."],
  "has_tradeoffs": true|false,
  "related_concepts": [
    {{
      "name": "Pedagogically connected concept",
      "relation_type": "pedagogical",
      "why_relevant": "How they're taught together",
      "key_points": ["...", "...", "..."]
    }}
  ]
}}

Caption: {caption}
Transcript: {transcript}

Return ONLY valid JSON:"""

def stage1_prompt_c_dsa(caption: str, transcript: str) -> str:
    """PROMPT C ‚Äî DSA-FOCUSED"""
    return f"""Extract technical atoms for ALGORITHM/DATA STRUCTURE concepts.

CRITICAL RULES:
1. IGNORE: Company names, celebrity names, dates
2. Focus on STEP-BY-STEP logic and COMPLEXITY analysis
3. Provide concrete examples and edge cases
4. Include time/space complexity
5. MUST extract AT LEAST 4 technical points including complexity

PRIMARY CONCEPT:
- Algorithm/data structure name
- Definition (one clear sentence)
- Step-by-step logic (4-6 steps)
- Time and space complexity (MUST include Big O notation)
- Edge cases and optimizations

OUTPUT SCHEMA:
{{
  "valid": true|false,
  "reject_reason": "string if invalid, empty otherwise",
  "concept": "Algorithm/Data structure name",
  "category": "dsa|algorithms",
  "definition": "Clear definition",
  "technical_points": ["Step 1...", "Step 2...", "Complexity: O(...)"],
  "solutions": ["Code example", "Optimization"],
  "impact": ["Performance characteristics"],
  "has_tradeoffs": true|false,
  "related_concepts": [
    {{
      "name": "Related algorithm/structure",
      "why_relevant": "Comparison or prerequisite",
      "key_points": ["...", "..."]
    }}
  ]
}}

Caption: {caption}
Transcript: {transcript}

Return ONLY valid JSON:"""

def create_enrichment_prompt(atoms: Dict, strategy: Optional[str] = None) -> str:
    """Create enrichment prompt based on learned strategy"""
    base_prompt = f"""RE-EXTRACT with MORE TECHNICAL DEPTH.

Previous extraction was too shallow. Provide:
1. Deeper technical mechanisms
2. More concrete examples
3. Specific edge cases
4. Implementation details
5. MUST extract at least 4 technical points and 2 solutions

Previous atoms:
{json.dumps(atoms, indent=2)}

Return enhanced JSON with same schema."""

    if strategy:
        base_prompt += f"\n\nUse strategy: {strategy}"

    return base_prompt

# Prompt routing
def select_prompt_strategy(caption: str, transcript: str, category: str,
                          learned_override: Optional[str] = None) -> Tuple[PromptStrategy, str]:
    """
    Select appropriate prompt based on content signals.
    FIX #5: Block deprecated prompts during routing
    """
    if CONFIG["ENABLE_PROMPT_ROUTING"] and learned_override:
        version_key = f"stage1_{learned_override}_{CONFIG['CACHE_VERSION']}"
        if is_prompt_version_deprecated(version_key):
            print(f"      üö´ Learned strategy '{learned_override}' is deprecated - using heuristics")
            learned_override = None
        else:
            try:
                strategy = PromptStrategy(learned_override)
                ROUTING_METRICS[learned_override] += 1
                ROUTING_COUNTS[learned_override] += 1
                return strategy, f"learned from past success on similar concept"
            except ValueError:
                pass

    text = f"{caption} {transcript}".lower()

    # Content-based routing signals
    tradeoff_patterns = [
        " vs ", " vs.", "versus", "instead of", "rather than",
        "trade-off", "tradeoff", "compare", "comparison",
        "difference between", "advantage", "disadvantage"
    ]
    if any(pattern in text for pattern in tradeoff_patterns):
        ROUTING_METRICS["content_signal::tradeoff_detected"] = \
            ROUTING_METRICS.get("content_signal::tradeoff_detected", 0) + 1

    decision_patterns = [
        "when to", "when should", "which to", "choose between",
        "depends on", "if you", "use case", "scenario"
    ]
    if any(pattern in text for pattern in decision_patterns):
        ROUTING_METRICS["content_signal::decision_detected"] = \
            ROUTING_METRICS.get("content_signal::decision_detected", 0) + 1

    # DSA/algorithms ‚Üí DSA-focused
    dsa_signals = [
        "algorithm", "complexity", "time complexity", "space complexity",
        "binary search", "tree", "graph", "heap", "sort", "dynamic programming",
        "big o", "o(n)", "o(log n)", "recursion", "backtracking"
    ]

    if any(signal in text for signal in dsa_signals):
        ROUTING_METRICS["C_DSA"] += 1
        ROUTING_COUNTS["C_DSA"] += 1
        return PromptStrategy.DSA_FOCUSED, "DSA/algorithm signals detected"

    # Foundation topics + short transcript ‚Üí foundation-aware
    foundation_signals = [
        "string", "array", "list", "hashmap", "equals", "hashcode",
        "inheritance", "polymorphism", "encapsulation", "interface",
        "immutability", "immutable", "collection", "basics", "introduction"
    ]

    transcript_length = len(transcript.split())

    if transcript_length < 150 and any(signal in text for signal in foundation_signals):
        ROUTING_METRICS["B_FOUNDATION"] += 1
        ROUTING_COUNTS["B_FOUNDATION"] += 1
        return PromptStrategy.FOUNDATION_AWARE, "foundation topic with short transcript"

    # Advanced/system design ‚Üí strict
    advanced_signals = [
        "distributed", "microservice", "scalability", "consistency",
        "saga", "cdc", "event sourcing", "kafka", "redis cluster",
        "kubernetes", "docker", "load balancer", "api gateway",
        "circuit breaker", "service mesh", "eventual consistency"
    ]

    if any(signal in text for signal in advanced_signals):
        ROUTING_METRICS["A_STRICT"] += 1
        ROUTING_COUNTS["A_STRICT"] += 1
        return PromptStrategy.STRICT_ADVANCED, "advanced/system design signals"

    # Technical frameworks ‚Üí foundation-aware
    technical_signals = [
        "spring", "@autowired", "@transactional", "jpa", "hibernate",
        "azure", "aks", "service bus", "cosmos db"
    ]

    if any(signal in text for signal in technical_signals):
        ROUTING_METRICS["B_FOUNDATION"] += 1
        ROUTING_COUNTS["B_FOUNDATION"] += 1
        return PromptStrategy.FOUNDATION_AWARE, "technical framework signals"

    # Default to foundation-aware
    ROUTING_METRICS["B_FOUNDATION"] += 1
    ROUTING_COUNTS["B_FOUNDATION"] += 1
    fallback_version = f"stage1_{PromptStrategy.FOUNDATION_AWARE.value}_{CONFIG['CACHE_VERSION']}"
    if is_prompt_version_deprecated(fallback_version):
        return PromptStrategy.FOUNDATION_AWARE, "forced fallback (all others deprecated)"

    return PromptStrategy.FOUNDATION_AWARE, "default (safest for mixed content)"

In [None]:
# ============================================================
# CELL 7: MAIN PIPELINE & ORCHESTRATION
# ============================================================

def compact_atoms_for_generation(atoms: Dict, max_chars: int = 2000) -> Dict:
    """
    Compact atoms before Stage-2 to prevent context overflow
    """
    compacted = {
        "concept": atoms.get("concept", "")[:200],
        "category": atoms.get("category", ""),
        "definition": atoms.get("definition", "")[:300],
        "technical_points": atoms.get("technical_points", [])[:4],
        "solutions": atoms.get("solutions", [])[:2],
        "impact": atoms.get("impact", [])[:2],
        "has_tradeoffs": atoms.get("has_tradeoffs", False),
        "related_concepts": [
            {
                "name": rc.get("name", "")[:100],
                "why_relevant": rc.get("why_relevant", "")[:150],
                "key_points": rc.get("key_points", [])[:2]
            }
            for rc in atoms.get("related_concepts", [])[:2]
        ]
    }

    compacted_str = json.dumps(compacted)
    if len(compacted_str) > max_chars:
        compacted["technical_points"] = compacted["technical_points"][:2]
        compacted["related_concepts"] = compacted["related_concepts"][:1]
        compacted_str = json.dumps(compacted)

        if len(compacted_str) > max_chars:
            compacted["related_concepts"] = []

    return compacted

def validate_cloze(card: Dict, topic_class: str) -> bool:
    """
    Topic-aware cloze validation
    Foundation: 2 clozes minimum, 60 chars minimum
    Intermediate/Advanced: 3 clozes minimum, 90 chars minimum
    """
    cloze = card.get("cloze", "")
    cloze_count = cloze.count("{{c")

    if topic_class == "foundation":
        return cloze_count >= 2 and len(cloze) >= 60
    else:
        return cloze_count >= 3 and len(cloze) >= 90

# Stage 2: Card generation prompts
def stage2a_basic_prompt(atoms: Dict) -> str:
    clean_atoms = compact_atoms_for_generation(atoms)

    return f"""Generate 2-3 BASIC interview cards.

Input:
{json.dumps(clean_atoms, indent=2)}

CRITICAL RULES:
1. Generate 2‚Äì3 cards for the PRIMARY concept
2. Each card must be interview-ready with clear front/back
3. Use bullet points for clarity in back
4. Include concrete examples or code snippets
5. Focus on understanding, not just facts

OUTPUT JSON:
{{
  "cards": [
    {{
      "type": "basic",
      "concept_source": "primary",
      "front": "Clear question about {clean_atoms.get('concept', 'concept')}",
      "back": "Detailed answer with bullet points and examples",
      "tags": ["domain:{clean_atoms.get('category', 'java')}", "difficulty:medium"]
    }}
  ]
}}

Return ONLY valid JSON."""

def stage2b_cloze_prompt(atoms: Dict) -> str:
    clean_atoms = compact_atoms_for_generation(atoms)

    return f"""Generate 1-2 CLOZE deletion cards.

Input:
{json.dumps(clean_atoms, indent=2)}

CRITICAL RULES:
1. Each card MUST have at least 2-3 deletions (topic-aware)
2. Prefer mechanisms, definitions, and key concepts for cloze
3. Length 60‚Äì180 characters (topic-aware)
4. Avoid filler text - focus on core technical content
5. Make deletions meaningful (key terms, not obvious fillers)

FORMAT:
Use {{{{c1:: }}}}, {{{{c2:: }}}}, {{{{c3:: }}}} syntax

OUTPUT JSON:
{{
  "cards": [
    {{
      "type": "cloze",
      "concept_source": "primary",
      "cloze": "Text with {{{{c1::term}}}} and {{{{c2::another}}}} deletion...",
      "tags": ["domain:{clean_atoms.get('category', 'java')}", "difficulty:medium"]
    }}
  ]
}}

Return ONLY valid JSON."""

def stage2c_tradeoff_prompt(atoms: Dict) -> str:
    if not atoms.get("has_tradeoffs"):
        return ""

    clean_atoms = compact_atoms_for_generation(atoms)

    return f"""Generate ONE TRADEOFF card.

Input:
{json.dumps(clean_atoms, indent=2)}

RULES:
1. Compare exactly 2‚Äì3 approaches
2. Each approach MUST have 2 PROS and 2 CONS
3. Focus on engineering tradeoffs (performance, complexity, maintainability)
4. If no explicit alternative exists, infer the most common real-world alternative

OUTPUT JSON:
{{
  "cards": [
    {{
      "type": "tradeoff",
      "concept_source": "primary",
      "front": "What are the trade-offs when implementing {clean_atoms.get('concept', 'this concept')}?",
      "tradeoffs": [
        {{
          "approach": "Approach name",
          "pros": ["Performance benefit...", "Simplicity..."],
          "cons": ["Memory overhead...", "Complexity..."]
        }}
      ],
      "tags": ["domain:{clean_atoms.get('category', 'java')}", "difficulty:senior"]
    }}
  ]
}}

Return ONLY valid JSON."""

def generate_basic_cards(atoms: Dict) -> List[Dict]:
    try:
        prompt = stage2a_basic_prompt(atoms)
        raw = llm(prompt, PIPELINE_MODELS['generate_basic'])
        output = extract_json(raw, lenient=True)

        if not output or not output.get("cards"):
            return []

        if isinstance(output, list):
            return output
        elif isinstance(output, dict):
            return output.get("cards", [])
        else:
            return []
    except Exception as e:
        print(f"         ‚ö†Ô∏è  Basic generation failed: {e}")
        return []

def generate_cloze_cards(atoms: Dict) -> List[Dict]:
    try:
        topic_class = atoms.get("topic_class", "intermediate")
        tech_points = atoms.get("technical_points", [])
        definition = atoms.get("definition", "")

        has_suitable_content = False
        if topic_class == "foundation":
            combined_text = f"{definition} {' '.join(tech_points)}"
            has_definition = len(definition.split()) >= 5
            has_is_statement = " is " in combined_text.lower() or " are " in combined_text.lower()
            has_cause_effect = any(word in combined_text.lower() for word in ["because", "therefore", "causes", "leads to", "results in"])
            has_suitable_content = has_definition or has_is_statement or has_cause_effect
        else:
            has_suitable_content = len(tech_points) >= 2

        if not has_suitable_content:
            return []

        prompt = stage2b_cloze_prompt(atoms)
        raw = llm(prompt, PIPELINE_MODELS['generate_cloze'])
        output = extract_json(raw, lenient=True)

        if not output or not output.get("cards"):
            return []

        cards = output.get("cards", []) if isinstance(output, dict) else output

        valid_cards = []
        for card in cards:
            if validate_cloze(card, topic_class):
                valid_cards.append(card)

        return valid_cards
    except Exception as e:
        print(f"         ‚ö†Ô∏è  Cloze generation failed: {e}")
        return []

def generate_tradeoff_cards(atoms: Dict) -> List[Dict]:
    if not atoms.get("has_tradeoffs") or not CONFIG["ENABLE_TRADEOFFS"]:
        return []

    try:
        prompt = stage2c_tradeoff_prompt(atoms)
        raw = llm(prompt, PIPELINE_MODELS['generate_tradeoff'])
        output = extract_json(raw, lenient=True)

        if not output or not output.get("cards"):
            return []

        cards = output.get("cards", []) if isinstance(output, dict) else output

        valid_cards = []
        for card in cards:
            tradeoffs = card.get("tradeoffs", [])
            if len(tradeoffs) >= 2:
                valid = all(
                    len(t.get("pros", [])) >= 2 and len(t.get("cons", [])) >= 2
                    for t in tradeoffs
                )
                if valid:
                    valid_cards.append(card)

        return valid_cards
    except Exception as e:
        print(f"         ‚ö†Ô∏è  Tradeoff generation failed: {e}")
        return []

def generate_adjacent_card(atoms: Dict) -> Optional[Dict]:
    try:
        concept = atoms.get("concept", "")
        definition = atoms.get("definition", "")
        technical_points = atoms.get("technical_points", [])

        prompt = f"""Generate ONE adjacent basic card for a foundation concept.

Original concept: {concept}
Definition: {definition}
Key points: {technical_points[:3]}

Rules:
1. Must directly relate to the SAME concept
2. Focus on practical application, common mistake, or edge case
3. Keep it simple and interview-ready
4. No new concepts - only extensions of the original

OUTPUT JSON:
{{
  "type": "basic",
  "concept_source": "adjacent",
  "front": "Question about practical application of {concept}",
  "back": "Answer with practical insight, common mistake, or edge case",
  "tags": ["domain:foundation", "type:adjacent"]
}}

Return ONLY valid JSON:"""

        raw = llm(prompt, PIPELINE_MODELS['generate_basic'])
        output = extract_json(raw, lenient=True)

        if not output:
            return None

        if isinstance(output, dict) and "type" in output:
            return output
        elif isinstance(output, list) and len(output) > 0:
            return output[0]
        elif isinstance(output, dict) and "cards" in output:
            cards = output["cards"]
            return cards[0] if cards else None

        return None
    except Exception as e:
        print(f"         ‚ö†Ô∏è  Adjacent card generation failed: {e}")
        return None

def determine_completion_state(card_count: int, has_tradeoffs: bool, tradeoff_count: int,
                              duplicates_filtered: int = 0, topic_class: str = "intermediate") -> Tuple[CompletionState, str]:
    """
    Returns (state, reason) for better observability.
    """
    if card_count < CONFIG["MIN_CARDS_FOR_PARTIAL"]:
        if duplicates_filtered > 0:
            return CompletionState.INCOMPLETE, "duplicates_filtered"
        return CompletionState.INCOMPLETE, "low_card_count"

    if card_count >= CONFIG["MIN_CARDS_FOR_FULL"]:
        if (topic_class != "foundation" and
            has_tradeoffs and
            tradeoff_count == 0 and
            CONFIG["ENABLE_TRADEOFFS"]):
            return CompletionState.PARTIAL, "missing_tradeoff"
        return CompletionState.FULL, "complete"

    return CompletionState.PARTIAL, "partial_cards"

def quality_score_fallback(card: Dict) -> int:
    """Enhanced quality scoring"""
    score = 0
    card_type = card.get("type", "basic")

    front = card.get("front") or ""
    back = card.get("back") or ""
    cloze = card.get("cloze") or ""

    full_text = f"{front} {back} {cloze}".strip()

    if not full_text:
        return 0

    # Enhanced technical scoring
    tech_count = 0
    for kws in TECHNICAL_KEYWORDS.values():
        for kw in kws:
            if kw in full_text.lower():
                tech_count += 1
    
    foundation_count = sum(1 for kw in FOUNDATION_ENHANCEMENT_KEYWORDS if kw in full_text.lower())
    
    score += min(tech_count * 6, 30)
    score += min(foundation_count * 3, 15)

    if card_type == "basic":
        if len(back) >= 150:
            score += 15
        bullet_count = back.count("‚Ä¢") + back.count("-") + back.count("*")
        score += min(bullet_count * 3, 25)
        if "`" in back or "```" in back:
            score += 10

    elif card_type == "cloze":
        cloze_count = cloze.count("{{c")
        score += min(cloze_count * 8, 25)
        if len(cloze) >= 120:
            score += 15

    if any(ind in front.lower() for ind in ["what", "how", "why", "explain", "compare", "difference"]):
        score += 20
    if len(front.split()) >= 5:
        score += 10

    return min(score, 100)

def assign_priority(quality: int) -> str:
    if quality >= 85:
        return "P0"
    elif quality >= 70:
        return "P1"
    else:
        return "P2"

def calculate_confidence(atoms: Dict, cards: List[Dict], quality_dims: QualityDimensions) -> float:
    """
    Topic-aware confidence with floor for high-quality foundation cards
    """
    topic_class = atoms.get("topic_class", "intermediate")
    expected_cards = TOPIC_CLASSES.get(topic_class, {}).get("expected_cards", 3)

    final_count = len(cards)
    avg_quality = sum(quality_score_fallback(c) for c in cards) / max(final_count, 1)

    # Base confidence calculation
    card_factor = min(final_count / expected_cards, 1.0)
    quality_factor = avg_quality / 100

    confidence = (
        card_factor * 0.4 +
        quality_factor * 0.3 +
        quality_dims.combined_score * 0.3
    )

    # CRITICAL FIX #7: Confidence MUST align with correctness
    if quality_dims.correctness_score >= 0.9 and final_count >= 1:
        confidence = max(confidence, 0.6)
        print(f"      üéØ High correctness ({quality_dims.correctness_score:.2f}) ‚Üí confidence floor 0.6")

    # Adaptive foundation confidence boost
    if topic_class == "foundation":
        if quality_dims.correctness_score >= 0.95 and quality_dims.richness_score >= 0.75:
            boost_factor = 1.35
            confidence *= boost_factor
            print(f"      üöÄ Foundation quality boost: {confidence:.2f} (correctness={quality_dims.correctness_score:.2f}, richness={quality_dims.richness_score:.2f})")
        elif quality_dims.correctness_score >= 0.9:
            boost_factor = 1.2
            confidence *= boost_factor
            print(f"      üìà Foundation moderate boost: {confidence:.2f}")

    # Confidence floor prevents penalizing high-quality foundational cards
    if final_count >= 1 and avg_quality >= 70 and topic_class == "foundation":
        confidence = max(confidence, 0.65)
        print(f"      üõ°Ô∏è  Foundation quality floor: {confidence:.2f}")

    # Incomplete completion state reduces confidence
    completion_state_obj = atoms.get("_completion_state_obj")
    if completion_state_obj == CompletionState.INCOMPLETE:
        penalty = 0.85 if topic_class == "foundation" else 0.6
        confidence *= penalty
        print(f"      üìâ Confidence reduced due to incomplete state ({topic_class}): {confidence:.2f}")

    # Topic-aware confidence floor
    CONFIDENCE_FLOOR = {
        "foundation": 0.45,
        "intermediate": 0.50,
        "advanced": 0.55
    }
    
    floor = CONFIDENCE_FLOOR.get(topic_class, 0.50)
    if confidence < floor:
        print(f"      üõ°Ô∏è  Applying confidence floor: {confidence:.2f} ‚Üí {floor:.2f} ({topic_class})")
        confidence = floor

    return min(confidence, 1.0)

def extract_atoms_with_retry(reel: Dict) -> Dict:
    """
    Extract atoms with adaptive routing and enrichment.
    All CRITICAL FIXES applied here.
    """
    reel_id = str(reel.get("reel_id", reel.get("id", "")))
    caption = str(reel.get("caption", ""))
    transcript = str(reel.get("transcript", ""))
    category = str(reel.get("category", ""))

    # PHASE 2: Apply transcript normalization
    if CONFIG.get("ENABLE_TRANSCRIPT_NORMALIZATION", True):
        original_transcript = transcript
        transcript = normalize_transcript(transcript, caption)
        
        if len(transcript) != len(original_transcript):
            normalization_delta = len(transcript) - len(original_transcript)
            ROUTING_METRICS["content_signal::transcript_normalized"] += 1
            ROUTING_METRICS[f"content_signal::normalization_delta_avg"] = \
                (ROUTING_METRICS.get(f"content_signal::normalization_delta_avg", 0) + normalization_delta) / 2

    try:
        # Check terminal rejections FIRST
        if terminal_rejections.is_terminal(reel_id):
            return {
                "reel_id": reel_id,
                "status": "terminal_rejected",
                "reason": "Previously rejected by logic - no retry allowed"
            }

        # Check cache first
        cached = get_cached_result(reel_id, "stage1_atoms")
        if cached and cached.get("status") == "extracted":
            return cached

        # Classify topic
        topic_class = classify_topic(caption, category, transcript)

        # Create normalized concept key upfront
        normalized_concept = normalize_learning_key(caption)

        # Get learned strategy from rejection memory
        learned_override = None
        if rejection_memory:
            learned_override = rejection_memory.get_strategy(
                concept=normalized_concept,
                category=category,
                topic_class=topic_class
            )

        # Select prompt strategy
        strategy, routing_reason = select_prompt_strategy(
            caption, transcript, category, learned_override
        )

        # Build prompt
        if strategy == PromptStrategy.STRICT_ADVANCED:
            prompt = stage1_prompt_a_strict(caption, transcript)
        elif strategy == PromptStrategy.DSA_FOCUSED:
            prompt = stage1_prompt_c_dsa(caption, transcript)
        else:
            prompt = stage1_prompt_b_foundation(caption, transcript)

        # Call LLM
        raw = llm(prompt, PIPELINE_MODELS['extract'])
        atoms = extract_json(raw, lenient=True)

        # Use caption as canonical learning_key, not LLM output
        if AtomsSchema.validate(atoms):
            atoms["learning_key"] = normalized_concept

            # Entropy-based augmentation for generic captions
            word_count = len(normalized_concept.split())
            concept_name = atoms.get("concept", "")
            if word_count < 4 and concept_name:
                concept_normalized = re.sub(r'[^\w\s]', ' ', concept_name.lower()).strip()
                atoms["learning_key"] = f"{normalized_concept}::{concept_normalized}"
                normalized_concept = atoms["learning_key"]

        # Validate
        if not AtomsSchema.validate(atoms):
            return {
                "reel_id": reel_id,
                "status": "error",
                "reason": "Invalid schema"
            }

        if not atoms.get("valid"):
            reject_reason = atoms.get("reject_reason", "Unknown")

            terminal_rejections.mark_terminal(
                reel_id,
                reason=f"handled_by_logic: {reject_reason}",
                stage="stage1",
                rejection_type="SEMANTIC_TERMINAL"
            )

            return {
                "reel_id": reel_id,
                "status": "rejected",
                "reason": reject_reason,
                "handled_by_logic": True
            }

        # Calculate technical score
        tech_score_val = technical_score(
            f"{atoms.get('concept','')} {atoms.get('definition','')} "
            f"{' '.join(atoms.get('technical_points', []))}"
        )

        # Topic-aware thresholds
        min_score = 4
        if CONFIG.get("NORMALIZE_TECH_SCORES"):
            adjustment = TOPIC_CLASSES.get(topic_class, {}).get("threshold_adjustment", 0)
            min_score += adjustment

        # Foundation topics: disable tradeoffs only if truly basic
        if topic_class == "foundation":
            has_comparison = any(word in transcript.lower() for word in ["vs", "versus", "instead", "rather than", "compared"])
            if not has_comparison:
                atoms["has_tradeoffs"] = False
            else:
                atoms["has_tradeoffs"] = (
                    atoms.get("has_tradeoffs", False) and
                    len(atoms.get("related_concepts", [])) >= 1
                )

        # Structural impossibility gate
        char_length = len(transcript.strip())
        if char_length < 80 and topic_class != "foundation":
            print(f"      üö´ Structural impossibility: transcript too short ({char_length} chars)")
            terminal_rejections.mark_terminal(
                reel_id,
                reason="insufficient_source_material",
                stage="stage1",
                rejection_type="STRUCTURAL"
            )
            return {
                "reel_id": reel_id,
                "status": RejectionType.STRUCTURAL.value,
                "reason": "insufficient_source_material",
                "skip_enrichment": True
            }

        print(f"      üìä Tech score: {tech_score_val} | class: {topic_class} | threshold: {min_score}")

        # Check if below threshold
        if tech_score_val < min_score:
            if topic_class == "foundation":
                is_valuable = (
                    len(atoms.get("related_concepts", [])) > 0 or
                    len(atoms.get("technical_points", [])) >= 3 or
                    atoms.get("definition", "") != ""
                )

                if is_valuable:
                    print(f"      ‚úÖ Foundation topic - pedagogically valuable (accepted despite score)")
                    atoms["low_density_but_valid"] = True
                else:
                    print(f"      ‚ùå Foundation topic - insufficient pedagogical value (rejected)")
            else:
                print(f"      ‚ö†Ô∏è  Below depth threshold (rejected unless enriched)")
        else:
            print(f"      ‚úÖ Passed depth threshold (accepted)")

        # Enrichment budget is per-concept, not per-reel
        has_rejection_history = (
            rejection_memory and
            rejection_memory.get_rejection_count(normalized_concept, category, topic_class) > 0
        )

        # Predictive confidence check
        would_have_low_confidence = (
            len(atoms.get("technical_points", [])) < 3 or
            len(atoms.get("solutions", [])) < 1 or
            (topic_class != "foundation" and not atoms.get("has_tradeoffs", False))
        )

        should_enrich = (
            (tech_score_val < min_score or has_rejection_history or would_have_low_confidence) and
            CONFIG["ENABLE_ENRICHMENT"] and
            not atoms.get("low_density_but_valid", False)
        )

        if should_enrich:
            concept = atoms.get("learning_key")
            enrichment_key = f"{concept}::{category or 'unknown'}"

            # Topic-aware enrichment budget
            max_enrichments = CONFIG["MAX_ENRICHMENTS_PER_CONCEPT"].get(
                topic_class,
                CONFIG["MAX_ENRICHMENTS_PER_CONCEPT"].get("intermediate", 2)
            )

            # Check per-concept enrichment budget with temporal spacing
            with ENRICHMENT_BUDGET_LOCK:
                now = time.time()
                last_enrichment = ENRICHMENT_TIMESTAMPS.get(enrichment_key, 0)
                days_since_last = (now - last_enrichment) / (24 * 60 * 60)

                if days_since_last > ENRICHMENT_BUDGET_RESET_DAYS:
                    if ENRICHMENT_BUDGET[enrichment_key] > 0:
                        print(f"      üîÑ Resetting enrichment budget ({days_since_last:.1f} days since last attempt)")
                    ENRICHMENT_BUDGET[enrichment_key] = 0

                concept_enrichments = ENRICHMENT_BUDGET[enrichment_key]
                already_enriched = concept_enrichments >= max_enrichments

            if already_enriched:
                print(f"      ‚è≠Ô∏è  Skipping enrichment - concept budget exhausted ({concept_enrichments}/{max_enrichments} for {topic_class})")
                print(f"         Budget will reset in {max(0, ENRICHMENT_BUDGET_RESET_DAYS - days_since_last):.1f} days")
            else:
                # Check rejection memory
                if rejection_memory:
                    if rejection_memory.should_skip(concept, category, topic_class):
                        print(f"      ‚è≠Ô∏è  Skipping - too many rejections")

                        terminal_rejections.mark_terminal(
                            reel_id,
                            reason="repeatedly_failed_enrichment",
                            stage="stage1_enrichment",
                            rejection_type="SEMANTIC_TERMINAL"
                        )

                        return {
                            "reel_id": reel_id,
                            "status": RejectionType.SEMANTIC.value,
                            "reason": "repeatedly_failed"
                        }

                    strategy_hint = rejection_memory.get_strategy(concept, category, topic_class)
                else:
                    strategy_hint = None

                enrichment_reason = (
                    f"low_score ({tech_score_val})" if tech_score_val < min_score
                    else "predicted_low_confidence" if would_have_low_confidence
                    else "rejection_history"
                )
                print(f"      üîÑ Enriching (reason: {enrichment_reason}, strategy: {strategy_hint or 'default'})")

                # Increment using canonical key
                with ENRICHMENT_BUDGET_LOCK:
                    ENRICHMENT_BUDGET[enrichment_key] += 1
                    ENRICHMENT_TIMESTAMPS[enrichment_key] = time.time()

                ROUTING_METRICS["enriched"] = ROUTING_METRICS.get("enriched", 0) + 1

            enrichment_start = time.time()

            try:
                enrichment_prompt = create_enrichment_prompt(atoms, strategy_hint)
                enriched_raw = llm(enrichment_prompt, PIPELINE_MODELS['extract_retry'])
                enriched_atoms = extract_json(enriched_raw, lenient=True)

                if AtomsSchema.validate(enriched_atoms) and enriched_atoms.get("valid"):
                    new_score = technical_score(
                        f"{enriched_atoms.get('concept','')} {enriched_atoms.get('definition','')} "
                        f"{' '.join(enriched_atoms.get('technical_points', []))}"
                    )

                    print(f"      ‚ú® Enriched score: {new_score} (was: {tech_score_val})")

                    score_improvement = new_score - tech_score_val
                    if new_score >= min_score and score_improvement > 1:
                        atoms = enriched_atoms
                        tech_score_val = new_score
                        atoms["was_enriched"] = True
                        atoms["_enrichment_attempts"] = 1
                        atoms["_elapsed_enrichment"] = round(time.time() - enrichment_start, 2)
                        atoms["_delta_score"] = score_improvement
                        print(f"      ‚úÖ Enrichment successful! (Œî={score_improvement})")
                    elif new_score >= min_score:
                        print(f"      ‚ö†Ô∏è  Enrichment marginal (Œî={score_improvement}) - not learning from noise")
                    else:
                        print(f"      ‚ö†Ô∏è  Still below threshold")
            except Exception as e:
                print(f"      ‚ö†Ô∏è  Enrichment failed: {e}")

        # Final threshold check - SEMANTIC rejection
        if tech_score_val < min_score and not atoms.get("low_density_but_valid", False):
            if rejection_memory:
                concept_key = atoms.get("learning_key", normalized_concept)
                rejection_memory.record_rejection(
                    concept_key,
                    tech_score_val,
                    f"Low technical signal ({tech_score_val})",
                    category=category,
                    topic_class=topic_class
                )

            return {
                "reel_id": reel_id,
                "status": RejectionType.SEMANTIC.value,
                "reason": f"Low technical signal ({tech_score_val})"
            }

        # Success
        atoms["reel_id"] = reel_id
        atoms["status"] = "extracted"
        atoms["tech_score"] = tech_score_val
        atoms["topic_class"] = topic_class
        atoms["prompt_strategy"] = strategy.value

        # Model-aware prompt versioning
        model_name = PIPELINE_MODELS['extract'].name.replace(":", "_").replace(".", "_")
        atoms["prompt_version"] = f"stage1_{strategy.value}_{model_name}_{CONFIG['CACHE_VERSION']}"
        atoms["routing_reason"] = routing_reason

        ROUTING_METRICS[f"reason::{routing_reason}"] = ROUTING_METRICS.get(f"reason::{routing_reason}", 0) + 1

        save_cached_result(reel_id, "stage1_atoms", atoms)

        return atoms

    except Exception as e:
        return {
            "reel_id": reel_id,
            "status": "error",
            "reason": str(e)
        }

def generate_cards_from_atoms(atoms: Dict) -> Dict:
    """
    Generate all card types from atoms with density-aware routing.
    """
    reel_id = atoms.get("reel_id", "")
    prompt_recorded = False

    try:
        # Check cache
        cached = get_cached_result(reel_id, "stage2_cards")
        if cached and cached.get("status") == "success":
            return cached

        print(f"   üé¥ Generating cards...")
        
        # PHASE 3: Determine content density and route accordingly
        if CONFIG.get("ENABLE_HYBRID_ROUTING", True):
            caption = atoms.get("caption", "")
            transcript = atoms.get("transcript", "")
            category = atoms.get("category", "")
            
            content_density = classify_content_density(caption, transcript, category)
            ROUTING_METRICS[f"content_density::{content_density.value}"] += 1
            
            if content_density == ContentDensity.DENSE:
                print(f"      üìö Dense content ‚Üí Full card generation (basic + cloze + tradeoff)")
                basic_cards = generate_basic_cards(atoms)
                cloze_cards = generate_cloze_cards(atoms)
                tradeoff_cards = generate_tradeoff_cards(atoms)
                all_cards = basic_cards + cloze_cards + tradeoff_cards
                
            elif content_density == ContentDensity.LIGHT:
                print(f"      üìÑ Light content ‚Üí Reference cards only (basic)")
                basic_cards = generate_basic_cards(atoms)
                all_cards = basic_cards
                for card in all_cards:
                    card["content_density"] = "light"
                    
            else:
                return {
                    "reel_id": reel_id,
                    "status": "rejected",
                    "reason": "Content too sparse for card generation"
                }
        else:
            print(f"      üìö Hybrid routing disabled ‚Üí Full card generation")
            basic_cards = generate_basic_cards(atoms)
            cloze_cards = generate_cloze_cards(atoms)
            tradeoff_cards = generate_tradeoff_cards(atoms)
            all_cards = basic_cards + cloze_cards + tradeoff_cards
            content_density = ContentDensity.DENSE

        print(f"      Basic: {len(basic_cards if 'basic_cards' in locals() else [])}, "
              f"Cloze: {len(cloze_cards if 'cloze_cards' in locals() else [])}, "
              f"Tradeoff: {len(tradeoff_cards if 'tradeoff_cards' in locals() else [])}")

        # Foundation Expansion (only for DENSE content)
        topic_class = atoms.get("topic_class", "intermediate")
        if (content_density == ContentDensity.DENSE and
            topic_class == "foundation" and
            len(all_cards) <= 2 and
            CONFIG.get("ENABLE_FOUNDATION_EXPANSION", True)):

            temp_dims = QualityDimensions.calculate(atoms, all_cards)

            was_enriched = atoms.get("was_enriched", False)
            tech_score = atoms.get("tech_score", 0)
            min_score_for_expansion = 3

            can_expand = (
                temp_dims.correctness_score >= 0.9 and
                temp_dims.richness_score < 0.7 and
                not was_enriched and
                tech_score >= min_score_for_expansion
            )

            if can_expand:
                print(f"      üå± Attempting foundation expansion (correct but thin, no enrichment needed)...")
                adjacent_card = generate_adjacent_card(atoms)
                if adjacent_card:
                    all_cards.append(adjacent_card)
                    print(f"      ‚úÖ Added adjacent card")
            elif was_enriched:
                print(f"      ‚è≠Ô∏è  Skipping expansion - content was enriched (expansion is polish, not repair)")
            elif tech_score < min_score_for_expansion:
                print(f"      ‚è≠Ô∏è  Skipping expansion - tech score too low ({tech_score} < {min_score_for_expansion})")

        if not all_cards:
            return {
                "reel_id": reel_id,
                "status": "rejected",
                "reason": "No valid cards generated"
            }

        # Filter duplicates but DEFER fingerprint registration
        unique_cards = []
        duplicates_found = 0
        pending_fingerprints = []

        for card in all_cards:
            if not duplicate_detector.is_duplicate(card):
                unique_cards.append(card)
                fingerprint = duplicate_detector._create_fingerprint(card)
                pending_fingerprints.append(fingerprint)
            else:
                duplicates_found += 1

        if duplicates_found > 0:
            print(f"      üîÑ Filtered {duplicates_found} duplicates")

        final_count = len(unique_cards)

        if final_count == 0:
            return {
                "reel_id": reel_id,
                "status": "rejected",
                "reason": "All cards were duplicates"
            }

        # Quality scoring
        quality_dims = QualityDimensions.calculate(atoms, unique_cards)

        # Completion state with explicit reason
        completion_state, completion_reason = determine_completion_state(
            final_count,
            atoms.get("has_tradeoffs", False),
            len(tradeoff_cards),
            duplicates_found,
            topic_class=atoms.get("topic_class", "intermediate")
        )

        atoms["completion_state"] = completion_state.value
        atoms["completion_reason"] = completion_reason
        atoms["_completion_state_obj"] = completion_state

        # Confidence calculation
        confidence = calculate_confidence(atoms, unique_cards, quality_dims)

        # Assign priorities
        for card in unique_cards:
            card["quality"] = quality_score_fallback(card)
            card["priority"] = assign_priority(card["quality"])
            assert reel_id, f"CRITICAL: Card created without reel_id - {card.get('type', 'unknown')}"
            card["reel_id"] = reel_id

        # NOW register fingerprints after cards passed all quality gates
        for fingerprint in pending_fingerprints:
            duplicate_detector.add_fingerprint(fingerprint)

        # Explicit approval logic
        auto_approved = (
            confidence >= 0.85 and
            quality_dims.correctness_score >= 0.9
        )

        result = {
            "reel_id": reel_id,
            "status": "success",
            "cards": unique_cards,
            "basic_count": len(basic_cards),
            "cloze_count": len(cloze_cards),
            "tradeoff_count": len(tradeoff_cards),
            "duplicates_filtered": duplicates_found,
            "final_count": final_count,
            "confidence": round(confidence, 2),
            "completion_state": completion_state.value,
            "completion_reason": completion_reason,
            "correctness_score": quality_dims.correctness_score,
            "richness_score": quality_dims.richness_score,
            "attempted_enrichment": atoms.get("was_enriched", False),
            "topic_class": atoms.get("topic_class", "intermediate"),
            "prompt_strategy": atoms.get("prompt_strategy", "unknown"),
            "prompt_version": atoms.get("prompt_version", "unknown"),
            "auto_approved": auto_approved
        }

        save_cached_result(reel_id, "stage2_cards", result)

        # Record prompt version success ONLY if cards meet minimum threshold
        prompt_version = atoms.get("prompt_version")
        rejection_type = result.get("status", "")

        if prompt_version and rejection_type != RejectionType.MECHANICAL.value and not prompt_recorded:
            if final_count >= CONFIG["MIN_CARDS_FOR_PARTIAL"]:
                record_prompt_version_result(prompt_version, True, stage="complete")
            else:
                record_prompt_version_result(prompt_version, False, stage="generation")
            prompt_recorded = True

        # Record success in rejection memory if enriched
        if atoms.get("was_enriched") and rejection_memory:
            concept_key = atoms.get("learning_key", atoms.get("concept", ""))
            delta_score = atoms.get("_delta_score", 0)
            rejection_memory.record_success(
                concept_key,
                atoms.get("prompt_strategy", ""),
                category=atoms.get("category", ""),
                topic_class=atoms.get("topic_class", ""),
                delta_score=delta_score
            )

        return result

    except Exception as e:
        prompt_version = atoms.get("prompt_version")
        if prompt_version and not prompt_recorded:
            record_prompt_version_result(prompt_version, False, stage="mechanical")

        return {
            "reel_id": reel_id,
            "status": "error",
            "reason": str(e)
        }

def process_single_reel(reel: Dict) -> Dict:
    """Process one reel through the full pipeline"""
    reel_id = str(reel.get("reel_id", reel.get("id", "")))

    print(f"\nüé¨ Reel {reel_id}: {reel.get('caption', '')[:50]}...")

    # Stage 1: Extract atoms
    atoms = extract_atoms_with_retry(reel)

    if atoms.get("status") != "extracted":
        print(f"   ‚ùå {atoms.get('status')}: {atoms.get('reason')}")
        return atoms

    print(f"   ‚úÖ Atoms extracted (score: {atoms.get('tech_score')}, class: {atoms.get('topic_class')})")

    # Stage 2: Generate cards
    result = generate_cards_from_atoms(atoms)

    if result.get("status") == "success":
        print(f"   ‚úÖ {result['final_count']} cards (confidence: {result['confidence']:.2f})")
    else:
        print(f"   ‚ùå {result.get('status')}: {result.get('reason')}")

    return result

def process_batch(reels: List[Dict]) -> List[Dict]:
    """Process reels in parallel"""
    results = []

    with ThreadPoolExecutor(max_workers=CONFIG["MAX_WORKERS"]) as executor:
        futures = {
            executor.submit(process_single_reel, reel): reel
            for reel in reels
        }

        for future in as_completed(futures):
            try:
                result = future.result()
                results.append(result)

                # Periodic saves
                duplicate_detector.save_if_dirty()
                progress_tracker.flush()

                # Mark as processed
                if result.get("reel_id"):
                    progress_tracker.mark_processed(result["reel_id"])

            except Exception as e:
                reel = futures[future]
                reel_id = str(reel.get("reel_id", reel.get("id", "")))
                print(f"‚ùå Error processing reel {reel_id}: {e}")
                results.append({
                    "reel_id": reel_id,
                    "status": "error",
                    "reason": str(e)
                })

    return results

def is_card_worthy(reel: Dict) -> Tuple[bool, str]:
    """
    PHASE 1: Pre-filter to identify reels that are worth processing into Anki cards.
    """
    transcript = reel.get("transcript", "").strip()
    caption = reel.get("caption", "").strip()
    category_confidence = reel.get("category_confidence", 0)
    
    try:
        category_confidence = int(category_confidence) if category_confidence else 0
    except:
        category_confidence = 0
    
    # Filter 1: Promotional CTA patterns
    cta_patterns = [
        "comment link",
        "link in bio",
        "check out this document",
        "get the link",
        "dm for",
        "link in comment",
        "go ahead and check",
    ]
    
    combined_text = (transcript + " " + caption).lower()
    
    for pattern in cta_patterns:
        if pattern in combined_text:
            cta_count = combined_text.count(pattern)
            word_count = len(combined_text.split())
            if cta_count > 0 and word_count < 150:
                return False, f"promotional_cta: {pattern}"
    
    # Filter 2: Length check
    word_count = len(transcript.split())
    if word_count < 80:
        return False, f"too_short: {word_count} words"
    
    # Filter 3: Category confidence
    if category_confidence < 70:
        return False, f"low_confidence: {category_confidence}"
    
    # Filter 4: Pure motivational patterns
    motivational_only = [
        "you must know",
        "stop scrolling",
        "trust me",
        "no one is talking about",
    ]
    
    technical_indicators = [
        "code",
        "method",
        "function",
        "class",
        "variable",
        "algorithm",
        "data structure",
        "exception",
        "annotation",
        "interface",
    ]
    
    has_motivational = any(pattern in combined_text for pattern in motivational_only)
    has_technical = any(indicator in combined_text for indicator in technical_indicators)
    
    if has_motivational and not has_technical and word_count < 120:
        return False, "motivational_only"
    
    return True, ""

def diagnose_csv_file(csv_path: str) -> Dict:
    """
    Diagnose CSV file issues and provide actionable feedback.
    """
    diagnosis = {
        "exists": False,
        "readable": False,
        "line_count": 0,
        "issues": [],
        "sample_lines": []
    }

    try:
        csv_file = Path(csv_path)
        diagnosis["exists"] = csv_file.exists()

        if not diagnosis["exists"]:
            diagnosis["issues"].append(f"File not found: {csv_path}")
            return diagnosis

        with open(csv_path, 'r', encoding='utf-8', errors='ignore') as f:
            lines = f.readlines()

        diagnosis["readable"] = True
        diagnosis["line_count"] = len(lines)

        if len(lines) < 2:
            diagnosis["issues"].append("File has fewer than 2 lines (need header + data)")
            return diagnosis

        header = lines[0].strip()
        expected_fields = len(header.split(','))
        diagnosis["expected_fields"] = expected_fields
        diagnosis["sample_lines"].append(f"Header: {header[:100]}...")

        bad_lines = []
        for i, line in enumerate(lines[1:11], start=2):
            field_count = len(line.split(','))
            if field_count != expected_fields:
                bad_lines.append((i, field_count, line.strip()[:80]))

        if bad_lines:
            diagnosis["issues"].append(f"Found {len(bad_lines)} lines with inconsistent field counts")
            for line_num, count, preview in bad_lines[:3]:
                diagnosis["sample_lines"].append(
                    f"Line {line_num}: has {count} fields (expected {expected_fields}): {preview}..."
                )

    except Exception as e:
        diagnosis["issues"].append(f"Error reading file: {e}")

    return diagnosis

def main():
    start_time = time.time()

    print("=" * 60)
    print("STARTING ANKI GENERATION PIPELINE")
    print("=" * 60)
    print()

    # Load CSV
    if not Path(CONFIG["CSV_FILE"]).exists():
        print(f"‚ùå CSV not found: {CONFIG['CSV_FILE']}")
        sys.exit(1)

    # Robust CSV parsing with error handling
    try:
        df = pd.read_csv(CONFIG["CSV_FILE"], sep="|")
    except pd.errors.ParserError as e:
        print(f"‚ö†Ô∏è  CSV parsing error: {e}")

        print(f"\nüîç Diagnosing CSV file...")
        diagnosis = diagnose_csv_file(CONFIG["CSV_FILE"])

        if diagnosis["issues"]:
            print(f"   Found {len(diagnosis['issues'])} issue(s):")
            for issue in diagnosis["issues"]:
                print(f"   ‚ùå {issue}")

        if diagnosis["sample_lines"]:
            print(f"\n   Sample lines:")
            for sample in diagnosis["sample_lines"]:
                print(f"   {sample}")

        print(f"\n   Attempting robust parsing...")
        try:
            df = pd.read_csv(
                CONFIG["CSV_FILE"],
                sep="|",
                on_bad_lines='skip',
                engine='python',
                quoting=1,
                encoding='utf-8'
            )
            print(f"   ‚úÖ Recovered with lenient parsing ({len(df)} rows)")
        except Exception as e2:
            print(f"   ‚ùå Lenient parsing also failed: {e2}")
            print(f"\nüí° SUGGESTION: Fix your CSV file:")
            print(f"   1. Ensure all text fields with commas are properly quoted")
            print(f"   2. Check that all rows have the same number of columns")
            sys.exit(1)

    print(f"üìÑ Loaded {len(df)} reels from CSV")

    # Normalize NaN values to prevent float crashes
    def normalize_reel(reel: dict) -> dict:
        normalized = {}
        for k, v in reel.items():
            if isinstance(v, float):
                import math
                if math.isnan(v):
                    normalized[k] = ""
                else:
                    normalized[k] = str(v)
            else:
                normalized[k] = v
        return normalized

    # Filter unprocessed
    unprocessed = []
    for _, row in df.iterrows():
        reel_dict = normalize_reel(row.to_dict())
        reel_id = str(reel_dict.get("reel_id", ""))
        if reel_id not in progress_tracker.processed:
            unprocessed.append(reel_dict)

    print(f"‚ôªÔ∏è  {len(progress_tracker.processed)} already processed")
    print(f"üÜï {len(unprocessed)} remaining")
    print()

    if not unprocessed:
        print("‚úÖ All reels already processed!")
        return

    # PHASE 1: Apply pre-filtering
    if CONFIG.get("ENABLE_CONTENT_FILTERING", True):
        print(f"üîç Applying content quality filters...")
        filtered_reels = []
        skip_stats = defaultdict(int)
        
        for reel in unprocessed:
            is_worthy, skip_reason = is_card_worthy(reel)
            if is_worthy:
                filtered_reels.append(reel)
            else:
                skip_stats[skip_reason] += 1
                reel_id = str(reel.get("reel_id", reel.get("id", "")))
                if reel_id:
                    progress_tracker.mark_processed(reel_id)
        
        print(f"‚úÖ {len(filtered_reels)} reels passed quality filters")
        print(f"‚è≠Ô∏è  {len(unprocessed) - len(filtered_reels)} reels skipped:")
        for reason, count in sorted(skip_stats.items(), key=lambda x: x[1], reverse=True):
            print(f"   - {reason}: {count}")
        print()
        
        if not filtered_reels:
            print("‚ö†Ô∏è  No reels passed quality filters!")
            return
    else:
        print(f"‚è≠Ô∏è  Content filtering disabled - processing all reels")
        filtered_reels = unprocessed

    # Limit to MAX_REELS
    to_process = filtered_reels[:CONFIG["MAX_REELS"]]

    if len(to_process) < len(unprocessed):
        print(f"‚ö†Ô∏è  Processing first {CONFIG['MAX_REELS']} reels (use MAX_REELS config to adjust)")

    # Process in batches
    all_results = []
    batch_size = CONFIG["BATCH_SIZE"]

    for i in range(0, len(to_process), batch_size):
        batch = to_process[i:i + batch_size]
        print(f"\nüì¶ Batch {i // batch_size + 1}/{(len(to_process) - 1) // batch_size + 1}")

        batch_results = process_batch(batch)
        all_results.extend(batch_results)

    # Save final state
    duplicate_detector.save_if_dirty(force=True)
    progress_tracker.flush()
    if rejection_memory:
        rejection_memory.save()

    # Save routing metrics
    try:
        with open(ROUTING_METRICS_FILE, 'w') as f:
            json.dump(dict(ROUTING_METRICS), f, indent=2)
        print(f"\nüíæ Routing metrics saved to: {ROUTING_METRICS_FILE}")
    except Exception as e:
        print(f"‚ö†Ô∏è  Failed to save routing metrics: {e}")

    # Generate output
    successful = [r for r in all_results if r.get("status") == "success"]

    all_cards = []
    for result in successful:
        all_cards.extend(result.get("cards", []))

    output_file = CONFIG["OUT_DIR"] / "anki_cards.json"
    with open(output_file, 'w') as f:
        json.dump(all_cards, f, indent=2)

    # Needs review
    needs_review = [r for r in successful if 0.65 <= r.get("confidence", 0) < 0.85]
    if needs_review:
        review_file = CONFIG["OUT_DIR"] / "needs_review.json"
        with open(review_file, 'w') as f:
            json.dump(needs_review, f, indent=2)

    # Summary stats
    elapsed = time.time() - start_time

    total_cards = len(all_cards)
    basic_cards = sum(1 for c in all_cards if c.get("type") == "basic")
    cloze_cards = sum(1 for c in all_cards if c.get("type") == "cloze")
    tradeoff_cards = sum(1 for c in all_cards if c.get("type") == "tradeoff")

    total_duplicates = sum(r.get("duplicates_filtered", 0) for r in successful)

    avg_quality = sum(c.get("quality", 0) for c in all_cards) / max(len(all_cards), 1)
    avg_confidence = sum(r.get("confidence", 0) for r in successful) / max(len(successful), 1)

    p0_cards = sum(1 for c in all_cards if c.get("priority") == "P0")
    p1_cards = sum(1 for c in all_cards if c.get("priority") == "P1")
    p2_cards = sum(1 for c in all_cards if c.get("priority") == "P2")

    high_confidence = [r for r in successful if r.get("confidence", 0) >= 0.85]
    low_confidence = [r for r in successful if r.get("confidence", 0) < 0.65]

    auto_approved_count = len([r for r in successful if r.get("auto_approved", False)])

    topic_class_dist = defaultdict(int)
    for r in successful:
        topic_class_dist[r.get("topic_class", "unknown")] += 1

    completion_states = defaultdict(int)
    for r in successful:
        completion_states[r.get("completion_state", "unknown")] += 1

    enrichment_attempts = len([r for r in all_results if r.get("attempted_enrichment")])
    enrichment_successes = len([r for r in successful if r.get("attempted_enrichment")])

    avg_correctness = sum(r.get("correctness_score", 0) for r in successful) / max(len(successful), 1)
    avg_richness = sum(r.get("richness_score", 0) for r in successful) / max(len(successful), 1)

    # Print report
    print("\n" + "=" * 60)
    print("FINAL STATS")
    print("=" * 60)
    print(f"‚è±Ô∏è  Time: {elapsed:.1f}s")
    print(f"üé¨ Processed: {len(all_results)} reels")
    print(f"‚úÖ Success: {len(successful)} reels")
    print(f"‚ùå Errors: {len([r for r in all_results if r['status'] == 'error'])}")
    print(f"üö´ Rejected: {len([r for r in all_results if r['status'] == 'rejected'])}")
    print()
    print(f"üé¥ Cards Generated:")
    print(f"   Total: {total_cards}")
    print(f"   Basic: {basic_cards}")
    print(f"   Cloze: {cloze_cards}")
    print(f"   Tradeoff: {tradeoff_cards}")
    print()
    print(f"üîÑ Duplicates filtered: {total_duplicates}")
    print(f"‚≠ê Avg Quality: {avg_quality:.1f}/100")
    print(f"üìä Avg Confidence: {avg_confidence:.2f}")
    print(f"üöÄ Throughput: {len(all_results)/elapsed:.2f} reels/s")
    print()
    print(f"üìä QUALITY DIMENSIONS:")
    print(f"   Correctness: {avg_correctness:.2f}/1.0")
    print(f"   Richness: {avg_richness:.2f}/1.0")
    print()
    print(f"üéØ COMPLETION STATES:")
    for state, count in sorted(completion_states.items()):
        pct = (count / len(successful) * 100) if successful else 0
        print(f"   {state}: {count} ({pct:.1f}%)")
    print()
    print(f"üìå Priority Distribution:")
    print(f"   P0 (‚â•85): {p0_cards} cards")
    print(f"   P1 (70-84): {p1_cards} cards")
    print(f"   P2 (<70): {p2_cards} cards")
    print()
    print(f"üìä Confidence Breakdown:")
    print(f"   High (‚â•0.85): {len(high_confidence)} reels ‚Äî Auto-approved ‚úÖ")
    print(f"   Medium (0.65-0.84): {len(needs_review)} reels ‚Äî Review needed ‚ö†Ô∏è")
    print(f"   Low (<0.65): {len(low_confidence)} reels ‚Äî Consider re-prompt üîÑ")
    print(f"   Auto-approved (confidence ‚â•0.85 AND correctness ‚â•0.9): {auto_approved_count} reels üéØ")
    print()
    print("üß† ADAPTIVE LEARNING STATS")
    print("-" * 60)
    print(f"üîÑ Enrichment attempts: {enrichment_attempts}")
    print(f"‚úÖ Enrichment successes: {enrichment_successes}")
    if enrichment_attempts > 0:
        success_rate = (enrichment_successes / enrichment_attempts) * 100
        print(f"üìà Success rate: {success_rate:.1f}%")

    print(f"\nüéØ Prompt Routing Distribution:")
    total_routed = sum(ROUTING_COUNTS.values())
    for strategy, count in sorted(ROUTING_COUNTS.items()):
        if count > 0:
            pct = (count / total_routed * 100) if total_routed > 0 else 0
            print(f"   {strategy}: {count} ({pct:.1f}%)")

    if ROUTING_METRICS.get("enriched", 0) > 0:
        print(f"   Enriched (after initial): {ROUTING_METRICS['enriched']}")

    print(f"\nüìä Content Signals Detected:")
    content_signals = {k: v for k, v in ROUTING_METRICS.items() if k.startswith("content_signal::")}
    for signal, count in sorted(content_signals.items()):
        print(f"   {signal.replace('content_signal::', '')}: {count}")
    
    print(f"\nüöÄ PHASE 1-3 ENHANCEMENTS:")
    print(f"=" * 60)
    
    density_metrics = {k: v for k, v in ROUTING_METRICS.items() if k.startswith("content_density::")}
    if density_metrics:
        print(f"üìä Content Density Distribution:")
        for density, count in sorted(density_metrics.items()):
            print(f"   {density.replace('content_density::', '').upper()}: {count} reels")
    
    if ROUTING_METRICS.get("content_signal::transcript_normalized", 0) > 0:
        normalized_count = ROUTING_METRICS["content_signal::transcript_normalized"]
        avg_delta = ROUTING_METRICS.get("content_signal::normalization_delta_avg", 0)
        print(f"\nüìù Transcript Normalization:")
        print(f"   Normalized: {normalized_count} transcripts")
        print(f"   Avg delta: {avg_delta:.0f} chars")
    
    print(f"\n‚öôÔ∏è  Feature Status:")
    print(f"   Content Filtering: {'‚úÖ' if CONFIG.get('ENABLE_CONTENT_FILTERING') else '‚ùå'}")
    print(f"   Transcript Normalization: {'‚úÖ' if CONFIG.get('ENABLE_TRANSCRIPT_NORMALIZATION') else '‚ùå'}")
    print(f"   Hybrid Routing: {'‚úÖ' if CONFIG.get('ENABLE_HYBRID_ROUTING') else '‚ùå'}")
    print(f"=" * 60)

    if rejection_memory:
        mem_stats = rejection_memory.get_stats()
        print(f"\nüß† Rejection memory:")
        print(f"   Concepts tracked: {mem_stats['total_concepts']}")
        print(f"   Total rejections: {mem_stats['total_rejections']}")
        if mem_stats['strategies']:
            print(f"   Strategies learned:")
            for strategy, count in mem_stats['strategies'].items():
                print(f"     - {strategy}: {count} concepts")
        if 'learning_velocity' in mem_stats:
            lv = mem_stats['learning_velocity']
            print(f"   Learning velocity:")
            print(f"     - Avg attempts to success: {lv['avg_attempts_until_success']}")
            print(f"     - Concepts learned: {lv['concepts_learned']}")
            print(f"     - Still learning: {lv['concepts_still_learning']}")

    term_stats = terminal_rejections.get_stats()
    if term_stats['total'] > 0:
        print(f"\nüö´ Terminal Rejections (never retry):")
        print(f"   Total: {term_stats['total']}")
        print(f"   By stage:")
        for stage, count in sorted(term_stats['by_stage'].items()):
            print(f"     - {stage}: {count}")
        if term_stats['by_reason']:
            print(f"   Top reasons:")
            sorted_reasons = sorted(term_stats['by_reason'].items(), key=lambda x: x[1], reverse=True)
            for reason, count in sorted_reasons[:5]:
                print(f"     - {reason}: {count}")

    print(f"\nüìö Topic Class Distribution:")
    for topic_class, count in sorted(topic_class_dist.items()):
        print(f"   {topic_class}: {count} reels")

    if PROMPT_VERSION_STATS:
        print(f"\nüîÑ PROMPT VERSION LIFECYCLE:")
        print("-" * 60)
        sorted_versions = sorted(
            PROMPT_VERSION_STATS.items(),
            key=lambda x: x[1].get("attempts", 0),
            reverse=True
        )[:10]
        for version, stats in sorted_versions:
            status = "üö´ DEPRECATED" if stats.get("deprecated") else "‚úÖ"
            print(f"   {status} {version[:40]:40s} | "
                  f"attempts: {stats['attempts']:3d} | "
                  f"success: {stats['success_rate']:5.1%}")

        save_prompt_version_stats(PROMPT_VERSION_STATS)
        print(f"   üíæ Saved to: {PROMPT_VERSION_FILE}")

    print(f"\nüìÅ Output: {output_file}")
    if needs_review:
        print(f"‚ö†Ô∏è  Review: {CONFIG['OUT_DIR']}/needs_review.json")
    print(f"üîç Fingerprints: {len(duplicate_detector.fingerprints)} unique cards")

    if BLOOM_AVAILABLE:
        bloom_stats = duplicate_detector.get_bloom_stats()
        print(f"üå∏ Bloom filter: {bloom_stats['false_positives']}/{bloom_stats['total_checks']} FP ({bloom_stats['fp_rate_pct']:.3f}%)")
        if bloom_stats['needs_rebuild']:
            print(f"   ‚ö†Ô∏è  WARNING: Bloom filter saturated (>1% FP rate) - rebuilding...")
            duplicate_detector.rebuild_bloom_if_needed()

    if rejection_memory:
        print(f"üß† Rejection memory: {REJECTION_MEMORY_FILE}")
    terminal_rejections.save()
    print(f"üö´ Terminal rejections: {TERMINAL_REJECTIONS_FILE}")
    if PROMPT_VERSION_STATS:
        print(f"üîÑ Prompt versions: {PROMPT_VERSION_FILE}")
    print(f"üìä Routing metrics: {ROUTING_METRICS_FILE}")
    print("\n‚úÖ DONE\n")

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\n\n‚ö†Ô∏è  Interrupted - saving state...")
        duplicate_detector.save_if_dirty(force=True)
        progress_tracker.flush()
        if rejection_memory:
            rejection_memory.save()
        terminal_rejections.save()
        save_prompt_version_stats(PROMPT_VERSION_STATS)

        try:
            with open(ROUTING_METRICS_FILE, 'w') as f:
                json.dump(dict(ROUTING_METRICS), f, indent=2)
        except:
            pass

        sys.exit(0)
    except Exception as e:
        print(f"\n\n‚ùå ERROR: {e}")
        import traceback
        traceback.print_exc()
        duplicate_detector.save_if_dirty(force=True)
        if rejection_memory:
            rejection_memory.save()
        save_prompt_version_stats(PROMPT_VERSION_STATS)
        sys.exit(1)