# 1. chunking

In [None]:
# chunk.py
# Step-1
# Generate all possible sentence pairs between [news1, conflict_news, other_news1, other_news2, other_news3]
# Sentence pairs within the same document are not generated

import json
import os
import spacy
from pathlib import Path

# ============================================================
# PATH SETTINGS
# ============================================================

# Using Path for better cross-platform compatibility
BASE_DIR = Path("input Low Intensity Factoid Conflict Dataset Folder Path")
DATA_DIR = BASE_DIR / "data"

INPUT_PATH = DATA_DIR / "Low_Intensity_Factoid_Conflict.json"
OUTPUT_DIR = DATA_DIR / "Chunk"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


# ============================================================
# LOAD spaCy
# ============================================================
NLP = spacy.load("en_core_web_sm")


# ============================================================
# 1. Sentence Chunking
# ============================================================
def chunk_sentences(text, doc_id):
    doc = NLP(text)
    results = []
    for sent in doc.sents:
        t = sent.text.strip()
        if t:
            results.append({"text": t, "doc_id": doc_id})
    return results


# ============================================================
# 2. Generate ALL Document Sentence Pairs
# ============================================================
def generate_all_sentence_pairs(doc_sentences):
    """
    doc_sentences: dict
        {
            "news1": [sent1, sent2, ...],
            "conflict_news": [sent1, sent2, ...],
            ...
        }
    """

    doc_keys = list(doc_sentences.keys())
    pairs = []
    pair_id = 1

    # All document combinations (without A-B, B-A duplicates)
    for i in range(len(doc_keys)):
        for j in range(i + 1, len(doc_keys)):
            docA = doc_keys[i]
            docB = doc_keys[j]

            sentsA = doc_sentences[docA]
            sentsB = doc_sentences[docB]

            # Cartesian product of all sentences
            for s1 in sentsA:
                for s2 in sentsB:
                    pairs.append({
                        "id": pair_id,
                        "sentence_1": s1,
                        "sentence_2": s2,
                        "semantic_score": "",
                        "semantic_filtered": "",
                        "cosine_score": "",
                        "cosine_filtered": "",
                        "conflict_label": ""
                    })
                    pair_id += 1

    return pairs


# ============================================================
# 3. Process Single Event
# ============================================================
def process_event(event):
    event_id = event["id"]
    print(f"=== Processing event {event_id} ===")

    news = event["news"]

    # document keys to include in ALL-to-ALL matching
    target_keys = [
        "news1",
        "conflict_news",
        "other_news1",
        "other_news2",
        "other_news3",
    ]

    # 1) Sentence chunking for all target documents
    doc_sentences = {}
    for key in target_keys:
        if key in news and "article" in news[key]:
            doc_sentences[key] = chunk_sentences(news[key]["article"], key)

    # 2) Generate ALL-to-ALL document sentence pairs
    pairs = generate_all_sentence_pairs(doc_sentences)

    print(f"Generated {len(pairs)} sentence pairs (ALL combinations)")

    # Save output
    output_file = os.path.join(OUTPUT_DIR, f"event_{event_id}.json")
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(pairs, f, ensure_ascii=False, indent=2)

    print(f"Saved → {output_file}")


# ============================================================
# MAIN
# ============================================================
if __name__ == "__main__":
    print("Loading dataset:", INPUT_PATH)

    with open(INPUT_PATH, "r", encoding="utf-8") as f:
        data = json.load(f)

    for event in data:
        process_event(event)


# 2. Semantic Filtering

In [None]:
# semantic_filtering.py
# Step-2
# Filtering criterion: score > threshold

import json
import os
import torch
import numpy as np
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from tqdm import tqdm

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# BGE Reranker path
BGE_RERANKER = "bge-reranker-v2-m3" # Use Hugging Face model name or local path

# Path settings
BASE_DIR = Path("input Low Intensity Factoid Conflict Dataset Folder Path")
DATA_DIR = BASE_DIR / "data"
INPUT_DIR = DATA_DIR / "Chunk"  # Input from chunk.py output
OUTPUT_DIR = DATA_DIR / "Semantic_Filtered"  # Output directory
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


# ============================================================
# Load Model
# ============================================================
def load_model():
    """Load BGE Reranker model and tokenizer."""
    print(f"Loading Reranker: {BGE_RERANKER}")
    tokenizer = AutoTokenizer.from_pretrained(BGE_RERANKER)
    model = AutoModelForSequenceClassification.from_pretrained(BGE_RERANKER)
    model.to(DEVICE).eval()
    return tokenizer, model


# ============================================================
# Compute Semantic Scores
# ============================================================
@torch.no_grad()
def compute_semantic_scores(tokenizer, model, pairs, batch_size=16):

    q = [p["sentence_1"]["text"] for p in pairs]
    d = [p["sentence_2"]["text"] for p in pairs]

    scores = []

    for start in tqdm(range(0, len(pairs), batch_size), ncols=120):
        end = min(start + batch_size, len(pairs))

        inputs = tokenizer(
            q[start:end],
            d[start:end],
            padding=True,
            truncation=True,
            max_length=512,
            return_tensors="pt"
        ).to(DEVICE)

        logits = model(**inputs).logits.squeeze(-1).cpu().tolist()
        scores.extend(logits)

    return scores


# ============================================================
# Gap-based Threshold Filtering
# ============================================================
def gap_threshold(values):
    """Find threshold based on the largest drop in score distribution."""
    sorted_vals = sorted(values, reverse=True)
    diffs = np.diff(sorted_vals)
    idx = np.argmin(diffs)
    return (sorted_vals[idx] + sorted_vals[idx + 1]) / 2


# ============================================================
# Process Single File
# ============================================================
def process_file(input_path, output_path, tokenizer, model):
    """Process a single event file for semantic filtering."""
    print("\nProcessing:", input_path)

    with open(input_path, "r", encoding="utf-8") as f:
        pairs = json.load(f)

    # Compute semantic similarity
    semantic_scores = compute_semantic_scores(tokenizer, model, pairs)

    # Determine threshold
    th = gap_threshold(semantic_scores)
    print("Semantic threshold:", th)

    # Update JSON with scores and filtering results
    for p, score in zip(pairs, semantic_scores):
        p["semantic_score"] = score
        p["semantic_filtered"] = 1 if score > th else 0

    # Save to OUTPUT directory
    with open(output_path, "w", encoding="utf-8") as g:
        json.dump(pairs, g, ensure_ascii=False, indent=2)

    print("Saved:", output_path)


# ============================================================
# MAIN
# ============================================================
if __name__ == "__main__":
    tokenizer, model = load_model()

    # Get all JSON files from chunk output directory
    files = sorted([f for f in os.listdir(INPUT_DIR) if f.endswith(".json")])

    for fname in files:
        input_path = INPUT_DIR / fname
        output_path = OUTPUT_DIR / fname
        process_file(input_path, output_path, tokenizer, model)

    print("\nProcessing complete.")

# 3. Vector(Cosine) Filtering

In [None]:
# vector_filtering.py
# Step-3
# Filtering: 1) Auto-filter cosine=1.0, 2) Calculate gap threshold for remaining and filter score < threshold

import json
import os
from pathlib import Path
from tqdm import tqdm
import torch
import numpy as np
from sentence_transformers import SentenceTransformer, util

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Model path
BGE_EMBED_MODEL = "BAAI/bge-base-en-v1.5"  # Use Hugging Face model name or local path

# Path settings (matching chunk.py and bert.py structure)
BASE_DIR = Path("input Low Intensity Factoid Conflict Dataset Folder Path")
DATA_DIR = BASE_DIR / "data"
INPUT_DIR = DATA_DIR / "Semantic_Filtered"  # Input from bert.py output
OUTPUT_DIR = DATA_DIR / "Cosine_Filtered"  # Output directory
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


# ============================================================
# Load Embedding Model
# ============================================================
def load_embedding_model():
    """Load BGE embedding model for cosine similarity computation."""
    print(f"Loading BGE Embedding Model: {BGE_EMBED_MODEL}")
    model = SentenceTransformer(BGE_EMBED_MODEL)
    model.to(DEVICE)
    return model


# ============================================================
# Compute Cosine Similarity (per event)
# ============================================================
def compute_cosine_similarity_event(model, pairs):
    """
    Compute cosine similarity for all sentence pairs in an event.
    
    Args:
        model: SentenceTransformer model
        pairs: List of sentence pair dictionaries
    
    Returns:
        list: Cosine similarity scores
    """
    sentences1 = [p["sentence_1"]["text"] for p in pairs]
    sentences2 = [p["sentence_2"]["text"] for p in pairs]

    cosine_scores = []
    batch_size = 32

    for start in range(0, len(pairs), batch_size):
        end = min(start + batch_size, len(pairs))

        emb1 = model.encode(sentences1[start:end], convert_to_tensor=True)
        emb2 = model.encode(sentences2[start:end], convert_to_tensor=True)

        batch_scores = util.cos_sim(emb1, emb2).diagonal().cpu().tolist()
        cosine_scores.extend(batch_scores)

    return cosine_scores


# ============================================================
# Modified Cosine Threshold (excluding 1.0 values)
# ============================================================
def cosine_gap_threshold_excluding_ones(scores):
    """
    Calculate gap-based threshold excluding cosine=1.0 scores.
    (Using the largest gap approach)
    
    Args:
        scores: List of cosine similarity scores
    
    Returns:
        float: Threshold value
    """
    # Exclude 1.0 values (consider 0.9999+ as identical)
    filtered_scores = [s for s in scores if s < 0.9999]

    # If less than 2 scores remain, no meaningful threshold
    if len(filtered_scores) < 2:
        return 0.9999

    # Sort in descending order
    sorted_vals = sorted(filtered_scores, reverse=True)

    # Calculate gaps between adjacent scores
    diffs = np.diff(sorted_vals)

    # Find the index of the largest gap
    idx = np.argmin(diffs)

    # Threshold is the midpoint of this gap
    threshold = (sorted_vals[idx] + sorted_vals[idx + 1]) / 2

    return threshold


# ============================================================
# Extract Event ID from Filename
# ============================================================
def extract_event_id(filename):
    """Extract event ID from filename for sorting."""
    try:
        return int(filename.stem.replace("event_", ""))
    except:
        return 999999999


# ============================================================
# Process Single File
# ============================================================
def process_file(input_path, output_path, model):
    """
    Process a single event file for cosine similarity filtering.
    
    Args:
        input_path: Path to input JSON file
        output_path: Path to output JSON file
        model: SentenceTransformer model
    
    Returns:
        dict: Statistics about the processing
    """
    with open(input_path, "r", encoding="utf-8") as f:
        pairs = json.load(f)

    # 1) Compute cosine similarity
    cosine_scores = compute_cosine_similarity_event(model, pairs)

    # 2) Calculate modified threshold (excluding 1.0)
    threshold = cosine_gap_threshold_excluding_ones(cosine_scores)
    
    # File-level statistics
    file_perfect = 0
    file_threshold_filtered = 0

    # 3) Apply modified filtering
    for p, score in zip(pairs, cosine_scores):
        p["cosine_score"] = float(score)
        
        # Step 1: Auto-filter if cosine=1.0
        if score >= 0.9999:
            p["cosine_filtered"] = 1
            file_perfect += 1
        # Step 2: Filter if score < threshold
        elif score < threshold:
            p["cosine_filtered"] = 1
            file_threshold_filtered += 1
        # Step 3: Survive if score >= threshold
        else:
            p["cosine_filtered"] = 0

    # 4) Save file
    with open(output_path, "w", encoding="utf-8") as g:
        json.dump(pairs, g, ensure_ascii=False, indent=2)

    return {
        "perfect_matches": file_perfect,
        "threshold_filtered": file_threshold_filtered,
        "threshold": threshold
    }


# ============================================================
# MAIN
# ============================================================
if __name__ == "__main__":
    
    model = load_embedding_model()

    # Get all JSON files from semantic filtered directory
    files = sorted(
        [f for f in INPUT_DIR.iterdir() if f.suffix == ".json"],
        key=lambda x: extract_event_id(x)
    )
    
    if not files:
        print(f"No JSON files found in {INPUT_DIR}")
        exit(1)

    perfect_match_count = 0  # Pairs filtered by cosine≈1.0
    threshold_filtered_count = 0  # Pairs filtered by threshold

    print(f"\nTotal event files: {len(files)}\n")

    pbar = tqdm(files, desc="Processing events", ncols=120)

    for input_file in pbar:
        output_file = OUTPUT_DIR / input_file.name
        
        # Update progress bar with current event
        pbar.set_postfix({
            "event": input_file.stem
        })

        # Process file
        stats = process_file(input_file, output_file, model)
        
        # Update global statistics
        perfect_match_count += stats["perfect_matches"]
        threshold_filtered_count += stats["threshold_filtered"]

    # ============================================================
    # FINAL REPORT
    # ============================================================
    print("\n" + "=" * 60)
    print(" " * 20 + "FINAL REPORT")
    print("=" * 60)
    
    print(f"\nFiltering Statistics:")
    print(f"- Perfect matches (cosine≈1.0) filtered: {perfect_match_count} pairs")
    print(f"- Threshold-based filtered: {threshold_filtered_count} pairs")
    print(f"- Total filtered: {perfect_match_count + threshold_filtered_count} pairs")

    print("\n" + "=" * 60 + "\n")

# 4. Sentence Conflict Detection

In [None]:
# sentence_conflict_detection.py
# Step-4: Conflict Detection using GPT
# Only verify conflict candidates (semantic_filtered=0 AND cosine_filtered=0) with GPT

import json
import os
from pathlib import Path
from tqdm import tqdm
from openai import OpenAI

# ==========================================
# CONFIG
# ==========================================
client = OpenAI(api_key="your-api-key-here")  # Replace with your API key

# Path settings (matching previous scripts)
BASE_DIR = Path("input Low Intensity Factoid Conflict Dataset Folder Path")
DATA_DIR = BASE_DIR / "data"
INPUT_DIR = DATA_DIR / "Cosine_Filtered"  # Input from cosine.py output
OUTPUT_DIR = DATA_DIR / "GPT_Detected"  # Output directory
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "gpt-3.5-turbo"
# MODEL_NAME = "gpt-4o"

# ==========================================
# GPT Prompt
# ==========================================
def build_prompt_zero_shot(s1, s2):
    return f"""You are a conflict detector for sentence pairs.

TASK: Decide if the two sentences make mutually incompatible claims about the same single fact.

CONFLICT = 1 when:
- For the same event/entity/measure, they give different numbers/dates/times/locations/agents; or
- One asserts occurrence while the other denies it; or
- Core outcome for the same event is incompatible (e.g., winner/responsible party/casualty count).

NO CONFLICT = 0 when:
- They discuss different facts/events or different scopes (subset/superset) without contradiction;
- Quantifiers/hedges explain the difference (“at least/around/at most/estimated/reported” vs an exact value);
- A plausible timepoint/update difference explains it;
- Modal/hedged statements don’t directly contradict a categorical claim.

Rules:
- Use only the two sentences; no outside knowledge.
- Normalize numbers/dates/units; treat synonyms/pronouns as the same referent when clear.
- If uncertain they refer to the same fact, output 0.
- Output only one character with no explanation: 1 for conflict, 0 otherwise.

Sentence A: "{s1}"
Sentence B: "{s2}"

Answer:"""

# ==========================================
# GPT Request Function
# ==========================================
def ask_gpt_for_conflict(s1, s2):
    """
    Request GPT to determine if two sentences conflict.
    
    Args:
        s1: First sentence text
        s2: Second sentence text
    
    Returns:
        int: 1 if conflict detected, 0 otherwise
    """
    prompt = build_prompt_zero_shot(s1, s2)

    try:
        response = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[{"role": "user", "content": prompt}],
            temperature=0,
            max_tokens=1
        )
        answer = response.choices[0].message.content.strip()
        return 1 if answer == "1" else 0

    except Exception as e:
        print(f"GPT API Error: {e}")
        return 0


# ==========================================
# Process Event File
# ==========================================
def process_event_file(input_path, output_path):
    """
    Process a single event file for GPT conflict detection.
    
    Args:
        input_path: Path to input JSON file
        output_path: Path to output JSON file
    
    Returns:
        int: Number of conflicts detected
    """
    with open(input_path, "r", encoding="utf-8") as f:
        pairs = json.load(f)

    # Reset labels
    for p in pairs:
        p["conflict_label"] = 0

    # Filter GPT target pairs (survived both semantic and cosine filtering)
    gpt_targets = [
        p for p in pairs
        if p.get("semantic_filtered") == 0 and p.get("cosine_filtered") == 0
    ]

    # Perform GPT detection
    conflicts_detected = 0

    print(f"Processing {len(gpt_targets)} candidate pairs with GPT...")
    
    for p in tqdm(gpt_targets, desc=f"GPT Detection ({input_path.name})", ncols=120):
        s1 = p["sentence_1"]["text"]
        s2 = p["sentence_2"]["text"]

        label = ask_gpt_for_conflict(s1, s2)
        p["conflict_label"] = label

        if label == 1:
            conflicts_detected += 1

    # Save file
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(pairs, f, ensure_ascii=False, indent=2)

    return conflicts_detected


# ==========================================
# Extract Event ID for Sorting
# ==========================================
def extract_event_id(filename):
    """Extract event ID from filename for sorting."""
    try:
        return int(filename.stem.replace("event_", ""))
    except:
        return 999999999


# ==========================================
# MAIN
# ==========================================
if __name__ == "__main__":

    # Get all JSON files from cosine filtered directory
    files = sorted(
        [f for f in INPUT_DIR.iterdir() if f.suffix == ".json"],
        key=lambda x: extract_event_id(x)
    )
    
    if not files:
        print(f"No JSON files found in {INPUT_DIR}")
        exit(1)

    print(f"Found {len(files)} event files")

    total_conflicts = 0

    for input_file in files:
        output_file = OUTPUT_DIR / input_file.name
        conflicts_detected = process_event_file(input_file, output_file)
        total_conflicts += conflicts_detected

        print(f"Event {input_file.name}: {conflicts_detected} conflicts detected")

    # =============================
    # Final Statistics
    # =============================
    print("\n" + "=" * 60)
    print(" " * 20 + "FINAL REPORT")
    print("=" * 60)
    print(f"Total events processed: {len(files)}")
    print(f"Total conflicts detected: {total_conflicts}")
    print("=" * 60)

# 5. Document Conflict Detection

In [None]:
# document_conflict_detection.py
# Document-level conflict detection using filtered sentence pairs

import os, json, random, time, re
from pathlib import Path
from tqdm import tqdm
from openai import OpenAI

# ===========================
# Basic Settings
# ===========================
DATA_PATH = Path("input Low Intensity Factoid Conflict Dataset Folder Path") / "data" / "Low_Intensity_Factoid_Conflict.json"
CHUNK_DIR = Path("input Low Intensity Factoid Conflict Dataset Folder Path") / "data" / "GPT_Detected"

SAVE_DIR = Path("input Low Intensity Factoid Conflict Dataset Folder Path") / "results"
SAVE_DIR.mkdir(parents=True, exist_ok=True)

client = OpenAI(api_key="your-api-key-here")  # Replace with your API key
MODELS = ["gpt-3.5-turbo"]
GEN_PARAMS = dict(temperature=0.0, max_tokens=512)
DOC_COUNT = 5

# ===========================
# Controlled Random Seeds
# ===========================
MASTER_SEED = 42
SAMPLE_ORDER_SEED = 100
ID_SHUFFLE_SEED_BASE = 1000


# ================================================================
# Load Candidate Pairs
# ================================================================
def load_candidate_pairs(event_id):
    """
    Load conflict candidate pairs from GPT detection results.
    
    Args:
        event_id: Event identifier
    
    Returns:
        list: Candidate pairs marked as conflicts by GPT
    """
    path = CHUNK_DIR / f"event_{event_id}.json"
    if not path.exists():
        return []

    with open(path, "r", encoding="utf-8") as f:
        pairs = json.load(f)

    cands = []
    for p in pairs:
        if p.get("conflict_label") == 1:
            cands.append({
                "s1": p["sentence_1"]["text"],
                "s2": p["sentence_2"]["text"],
                "doc_id_1": p["sentence_1"]["doc_id"],
                "doc_id_2": p["sentence_2"]["doc_id"],
            })
    return cands


# ================================================================
# Document Formatting
# ================================================================
def format_docs(docs):
    """Format documents with doc IDs."""
    return "\n\n".join(
        f"[Doc {i}] ({tag}, id={doc_id}): {txt}"
        for i, (tag, txt, doc_id) in enumerate(docs, 1)
    )


def map_candidates_to_indices(cands, doc_id_to_idx):
    """Map candidate pairs to document indices."""
    used, dropped = [], []
    for c in cands:
        i1 = doc_id_to_idx.get(c["doc_id_1"])
        i2 = doc_id_to_idx.get(c["doc_id_2"])
        if not i1 or not i2:
            dropped.append(c)
            continue
        used.append({
            "s1": c["s1"], "s2": c["s2"],
            "i1": i1, "i2": i2
        })
    return used, dropped


def format_focus_pairs(mapped):
    """Format mapped candidate pairs for GPT prompt."""
    if not mapped:
        return ""
    out = []
    for i, c in enumerate(mapped, 1):
        out.append(
            f"[Pair {i}]\n"
            f"Sentence A (Doc {c['i1']}): {c['s1']}\n"
            f"Sentence B (Doc {c['i2']}): {c['s2']}"
        )
    return "\n\n".join(out)


# ================================================================
# Prompts
# ================================================================
PROMPT_BASE = """
You are an expert factual conflict detector.

You will be given 5 documents (Doc 1..5). Some may describe the SAME event; others may be unrelated.

Your task:
1) Determine whether any documents contain factual contradictions about the SAME event or entity.
2) Translation/paraphrase differences are NOT contradictions.
3) Only explicit factual disagreements count.

Analyze the following documents:
{docs}

Output ONLY valid JSON:
{{
  "conflict_exists": true/false,
  "conflicting_docs": [integers],
  "reason": "one concise sentence"
}}
""".strip()


PROMPT_WITH_HINTS = """
You are an expert factual conflict detector.

You will be given 5 documents (Doc 1..5). Some may describe the SAME event; others may be unrelated.

Your task:
1) Determine whether any documents contain factual contradictions about the SAME event or entity.
2) Translation/paraphrase differences are NOT contradictions.
3) Only explicit factual disagreements count.

Analyze the following documents:
{docs}

IMPORTANT: Some sentence pairs are provided below for reference only. These may or may not indicate actual conflicts. 
You MUST independently analyze ALL documents and make your own judgment.

{focus}

Output ONLY valid JSON:
{{
  "conflict_exists": true/false,
  "conflicting_docs": [integers],
  "reason": "one concise sentence"
}}
""".strip()


# ================================================================
# Utility Functions
# ================================================================
def parse_json(text):
    """Parse JSON from GPT response, handling various formats."""
    text = (text or "").strip()
    if text.startswith("```"):
        text = re.sub(r"^```(json)?|```$", "", text, flags=re.DOTALL).strip()
    f, l = text.find("{"), text.rfind("}")
    if f != -1 and l != -1:
        try: return json.loads(text[f:l+1])
        except: return {}
    return {}


def to_bool(x):
    """Convert various types to boolean."""
    if isinstance(x, bool): return x
    if isinstance(x, str): return x.strip().lower() in {"true", "1", "yes"}
    return False


def intify_list(xs):
    """Extract and validate document indices from various formats."""
    if xs is None: return []
    if isinstance(xs, (int,str)): xs = [xs]
    out=[]
    for v in xs:
        if isinstance(v,int): k=v
        else:
            m=re.search(r"\d+", str(v))
            if not m: continue
            k=int(m.group())
        if 1<=k<=DOC_COUNT: out.append(k)
    return sorted(set(out))


def call_gpt(model, prompt):
    """Call GPT API with retry logic."""
    for _ in range(3):
        try:
            resp = client.chat.completions.create(
                model=model,
                messages=[
                    {"role":"system", "content":"Return ONLY valid JSON."},
                    {"role":"user", "content":prompt}
                ],
                **GEN_PARAMS
            )
            return resp.choices[0].message.content
        except Exception as e:
            print("Error:", e)
            time.sleep(3)
    return "{}"


# ================================================================
# Build Document Sample
# ================================================================
def build_document_sample(item):
    """
    Build document sample from event data.
    
    Args:
        item: Event data item
    
    Returns:
        tuple: (docs list, doc_id_map)
    """
    n = item["news"]
    docs = [
        ("news1", n["news1"]["article"], "news1"),
        ("conflict_news", n["conflict_news"]["article"], "conflict_news"),
        ("other1", n["other_news1"]["article"], "other_news1"),
        ("other2", n["other_news2"]["article"], "other_news2"),
        ("other3", n["other_news3"]["article"], "other_news3"),
    ]
    
    # Fixed seed per ID for reproducible shuffle
    seed = ID_SHUFFLE_SEED_BASE + hash(item["id"]) % 10000
    random.seed(seed)
    random.shuffle(docs)

    doc_id_map = {doc_id: i for i, (tag, _, doc_id) in enumerate(docs, 1)}
    
    return docs, doc_id_map


# ================================================================
# Process Single Event
# ================================================================
def process_event(model, item):
    """
    Process a single event for conflict detection.
    
    Args:
        model: Model name
        item: Event data item
    
    Returns:
        dict: Detection result
    """
    docs, doc_id_map = build_document_sample(item)
    
    # Load candidate pairs from pipeline
    raw_pairs = load_candidate_pairs(item["id"])
    mapped_pairs, _ = map_candidates_to_indices(raw_pairs, doc_id_map)
    
    # Build prompt
    docs_text = format_docs(docs)
    
    if len(mapped_pairs) >= 1:
        focus_text = format_focus_pairs(mapped_pairs)
        prompt = PROMPT_WITH_HINTS.format(docs=docs_text, focus=focus_text)
    else:
        prompt = PROMPT_BASE.format(docs=docs_text)
    
    # Call GPT
    raw_out = call_gpt(model, prompt)
    parsed = parse_json(raw_out)
    
    pred_exists = to_bool(parsed.get("conflict_exists"))
    pred_docs = intify_list(parsed.get("conflicting_docs", []))
    reason = parsed.get("reason", "")
    
    return {
        "id": item["id"],
        "model": model,
        "doc_order": [tag for tag, _, _ in docs],
        "candidate_pairs_used": len(mapped_pairs),
        "conflict_exists": pred_exists,
        "conflicting_docs": pred_docs,
        "reason": reason,
        "raw_output": raw_out
    }


# ================================================================
# MAIN
# ================================================================
def main():
    """Main execution function."""
    
    with open(DATA_PATH, "r", encoding="utf-8") as f:
        data = json.load(f)
    if isinstance(data, dict): 
        data = [data]

    print(f"Loaded {len(data)} events")

    for model in MODELS:
        print(f"\n{'#'*70}")
        print(f"# MODEL: {model}")
        print(f"{'#'*70}")

        results = []
        
        for item in tqdm(data, desc=f"Processing events - {model}"):
            result = process_event(model, item)
            results.append(result)
        
        # Save results
        output_path = SAVE_DIR / f"conflict_detection_{model.replace('-', '_')}.json"
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(results, f, indent=2, ensure_ascii=False)
        
        # Summary
        conflicts_found = sum(1 for r in results if r["conflict_exists"])
        
        print(f"\n{'='*60}")
        print(f"RESULTS - {model}")
        print(f"{'='*60}")
        print(f"Total events processed: {len(results)}")
        print(f"Events with conflicts detected: {conflicts_found}")
        print(f"Results saved → {output_path}")
        print(f"{'='*60}\n")


if __name__ == "__main__":
    main()