# Week 4.
**Deliverables**
* Refine prompts for Binary & Multitype

## 1. Prompts

In [None]:
# (1) Zero-Shot — final (no inline JSON examples)
prompt_binary_zeroshot = """
You are analyzing whether a sentence describes Intimate Partner Violence (IPV).

Definition:
Intimate Partner Violence includes any form of physical, emotional, or sexual harm
inflicted by a partner. This may involve control, manipulation, threats, humiliation,
or physical aggression.

Decide if the following sentence involves IPV or not.

Sentence: "{text}"

Return ONLY one JSON object, enclosed between <json> and </json>.
Format: a single key "label" with value "IPV" or "NOT_IPV".
Print nothing else.
""".strip()


# (2) Few-Shot — final (no inline JSON examples)
prompt_binary_fewshot = """
You are classifying sentences as describing Intimate Partner Violence (IPV) or NOT_IPV.

Definition:
IPV occurs when one partner harms or controls another physically, emotionally, or sexually.

Here are examples (for guidance only):
1. "He pushed me against the wall and shouted that I’m worthless." → IPV
2. "She often insults me and forbids me from seeing my friends." → IPV
3. "We argued but both apologized and moved on peacefully." → NOT_IPV
4. "He surprised me with flowers after work." → NOT_IPV

Now analyze this sentence:
"{text}"

Return ONLY one JSON object, enclosed between <json> and </json>.
Format: {"label": "IPV"} or {"label": "NOT_IPV"}.
Print nothing else.
""".strip()


# (3) Chain-of-Thought — final (no inline JSON examples)
prompt_binary_cot = """
You are reasoning step-by-step to decide if a sentence describes Intimate Partner Violence (IPV).

Guidelines:
- IPV includes threats, coercion, physical harm, humiliation, or emotional manipulation.
- NOT_IPV describes healthy, neutral, or unrelated situations.

Think internally (do NOT show your reasoning) about:
1. Does the sentence show any behavior that causes harm, fear, or control?
2. Is there a partner/relationship context?
3. Does it express affection or support instead of harm?

After thinking silently, return ONLY one JSON object, enclosed between <json> and </json>.
Format: {"label": "IPV"} or {"label": "NOT_IPV"}.
Print nothing else.

Sentence: "{text}"
""".strip()


# (4) Meta — final (no inline JSON examples)
prompt_binary_meta = """
You are a social-behavioral analyst evaluating sentences for signs of Intimate Partner Violence (IPV).

Your objective is to be accurate but cautious.
- If the sentence clearly involves harm, coercion, or control → label as IPV.
- If the sentence shows affection, neutrality, or uncertainty → label as NOT_IPV.

Reflect internally before answering; do NOT print your reasoning.

Sentence: "{text}"

Return ONLY one JSON object, enclosed between <json> and </json>.
Format: {"label": "IPV"} or {"label": "NOT_IPV"}.
Print nothing else.
""".strip()


# (5) Self-Consistency — final (no inline JSON examples)
prompt_binary_selfconsistency = """
You will internally evaluate the sentence for Intimate Partner Violence (IPV) multiple times
and choose the majority label as your final answer.

Guidelines:
- IPV → signs of physical, emotional, or sexual harm, threats, or coercion.
- NOT_IPV → supportive, neutral, or unrelated content.

Do NOT reveal thoughts or votes.

Sentence: "{text}"

Return ONLY one JSON object, enclosed between <json> and </json>.
Format: {"label": "IPV"} or {"label": "NOT_IPV"}.
Print nothing else.
""".strip()

## 2. System & Model Setup

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import pandas as pd
import re
import math
import numpy as np
from datetime import datetime
from tqdm import tqdm
from pathlib import Path
import json
import os
import time
from datetime import datetime
from __future__ import annotations
from typing import Dict, List, Optional, Tuple

In [None]:
#FILENAMES
model_name = "Qwen/Qwen2.5-7B-Instruct"

#Load Model & Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16
)

## 3. Prediction Generation

In [None]:
#Clone from git
!git clone https://github.com/zelaneroz/ipvresearch25
%cd ipvresearch25/1_LLM_Eval
#Load dataset
filename = "../Dataset/617points.csv"
df = pd.read_csv(filename)

In [None]:
#Prompt dictionaries
binary_prompts = {
    "zeroshot": prompt_binary_zeroshot,
    "fewshot": prompt_binary_fewshot,
    "cot": prompt_binary_cot,
    "meta": prompt_binary_meta,
    "selfconsistency": prompt_binary_selfconsistency
}

multilabel_prompts = {
    "zeroshot": prompt_multilabel_zeroshot,
    "fewshot": prompt_multilabel_fewshot,
    "cot": prompt_multilabel_cot,
    "meta": prompt_multilabel_meta,
    "selfconsistency": prompt_multilabel_selfconsistency
}

### 3.1 Binary Prediction Generation

In [None]:
def test_binary_prompts(df, n_samples=3,path):
    """
    Run and test all binary prompt types.
    Extracts IPV / NOT_IPV from <json>...</json> blocks
    and saves each prompt type's outputs to a separate JSON file.
    """
    import re
    import json
    from datetime import datetime
    from pathlib import Path

    df_subset = df.head(n_samples)
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    # results_dir = Path("../1_LLM_Eval/test_results")
    results_dir=Path(path)
    results_dir.mkdir(parents=True, exist_ok=True)

    print("Running binary classification tests...")
    print(f"Number of samples: {len(df_subset)}")
    print(f"Results will be saved in: {results_dir}\n")

    for prompt_type, template in binary_prompts.items():
        print(f"Testing prompt type: {prompt_type}")

        records = []

        for i, row in df_subset.iterrows():
            # Retrieve text input
            text = row["items"] if "items" in df.columns else str(row.iloc[0])
            prompt_text = template.replace("{text}", text)

            # Run model
            try:
                inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
                output = model.generate(
                    **inputs,
                    max_new_tokens=128,
                    temperature=0.0,
                    do_sample=False
                )
                gen_tokens = output[0][inputs["input_ids"].shape[-1]:]
                result_text = tokenizer.decode(gen_tokens, skip_special_tokens=True)
            except Exception as e:
                result_text = f"ERROR: {e}"

            # Extract label within <json>...</json>
            label = None
            match = re.search(r"<json[^>]*>\s*(.*?)\s*</json>", result_text, re.DOTALL | re.IGNORECASE)
            if match:
                block = match.group(1).strip()
                try:
                    parsed = json.loads(block)
                    if isinstance(parsed, dict):
                        label = parsed.get("label") or parsed.get("labels")
                    elif isinstance(parsed, list) and len(parsed) > 0:
                        label = parsed[0]
                    elif isinstance(parsed, str):
                        label = parsed.strip()
                except json.JSONDecodeError:
                    # Handle plain text JSON-like outputs
                    if "NOT_IPV" in block.upper():
                        label = "NOT_IPV"
                    elif "IPV" in block.upper():
                        label = "IPV"
            else:
                # Fallback: detect keywords in entire text
                if "NOT_IPV" in result_text.upper():
                    label = "NOT_IPV"
                elif "IPV" in result_text.upper():
                    label = "IPV"

            # Default to UNKNOWN if extraction failed
            if label is None:
                label = "UNKNOWN"

            records.append({
                "id": int(i),
                "prompt_type": prompt_type,
                "extracted_label": label,
                "raw_response": result_text
            })

        # Save per prompt type
        output_path = results_dir / f"binary_{prompt_type}.json"
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(records, f, indent=4, ensure_ascii=False)

        print(f"Saved results for '{prompt_type}' to {output_path}")

    print("\nAll binary prompt tests completed.")

### 3.2 Multitype Prediction Generation

In [None]:
# Load Qwen
model_name = "Qwen/Qwen2.5-7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")


# ---------- Stage 1: Classification ----------
def multitype_predict(sentence, sample_id=None):
    prompt = f"""
You are an expert classifier identifying types of Intimate Partner Violence (IPV) from text.

Decide which of the following apply (multiple may apply):

1. Emotional abuse — verbal or nonverbal behaviors that harm a partner’s self-worth (e.g., humiliation, control, threats, neglect).
2. Physical abuse — intentional physical force causing harm or fear (e.g., hitting, pushing, choking, restraining).
3. Sexual abuse — unwanted sexual acts or coercion (e.g., pressuring, forcing sex, violating consent).

If none apply, set all to 0.

Respond **only** with a valid JSON dictionary:
{{
  "id": "{sample_id or 0}",
  "emotional": 0 or 1,
  "physical": 0 or 1,
  "sexual": 0 or 1
}}

Sentence: "{sentence}"
"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=128,
        do_sample=False  # use greedy decoding instead of temperature=0
    )
    decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    try:
        return json.loads(decoded)
    except:
        # Fallback: try to extract JSON if extra text surrounds it
        import re
        m = re.search(r"\{.*\}", decoded, re.DOTALL)
        return json.loads(m.group()) if m else {"emotional": 0, "physical": 0, "sexual": 0}


# ---------- Stage 2: Confidence ----------
def logprob_confidence(prompt, generated_text):
    # Tokenize prompt + output together
    tokens = tokenizer(prompt + generated_text, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model(**tokens)
        log_probs = torch.nn.functional.log_softmax(outputs.logits, dim=-1)
    input_ids = tokens["input_ids"][0]

    logp_sum = 0
    count = 0
    for i in range(1, len(input_ids)):
        token_id = input_ids[i]
        logp_sum += log_probs[0, i - 1, token_id].item()
        count += 1
    avg_logp = logp_sum / count
    confidence = math.exp(avg_logp)
    # Clamp to [0, 1]
    return float(max(0.0, min(1.0, confidence)))


# ---------- Combined Function ----------
def multitype_classify(sentence, sample_id=None):
    pred = multitype_predict(sentence, sample_id)

    # Use same prompt text from prediction stage for confidence computation
    classification_prompt = f"""
You are an expert classifier identifying types of Intimate Partner Violence (IPV) from text.

Decide which of the following apply (multiple may apply):

1. Emotional abuse — verbal or nonverbal behaviors that harm a partner’s self-worth (e.g., humiliation, control, threats, neglect).
2. Physical abuse — intentional physical force causing harm or fear (e.g., hitting, pushing, choking, restraining).
3. Sexual abuse — unwanted sexual acts or coercion (e.g., pressuring, forcing sex, violating consent).

If none apply, set all to 0.

Respond **only** with a valid JSON dictionary:
{{
  "id": "{sample_id or 0}",
  "emotional": 0 or 1,
  "physical": 0 or 1,
  "sexual": 0 or 1
}}

Sentence: "{sentence}"
"""
    output_str = json.dumps(pred)
    conf = logprob_confidence(classification_prompt, output_str)
    pred["confidence_score"] = round(conf, 4)
    pred["id"] = sample_id or 0
    return pred

## 4. Results
* Show table of results
* Append results to appropriate JSON file
* Visualizations 

* This should utilize `eval_llm_pipeline.py`.