## LLM classifier

In [None]:
from dotenv import load_dotenv
from typing import List, Dict
import os
load_dotenv()

from openai import OpenAI



# -------------------------
# Config
# -------------------------
ANNOTATION_GLOB = "prompts/annotation_guidelines"
OUT_CSV = "antisemitism_placeholders_dataset.csv"

NUM_POSTS_PER_DEFINITION = 1
TASK_PROMPT = f"""Generate {NUM_POSTS_PER_DEFINITION} social media post templates that is antisemitic. IMPORTANT, make sure this post complies with the definition below of antisemitism.
First, read the definition carefully. Then, generate the post. 
keep the posts short (1-2 sentences), and make them as realistic as possible. 
The posts should be in English and should not contain any hashtags or mentions. Each post should be different in terms of content and style."""



from pathlib import Path
def load_definitions(definitions_dir: str) -> Dict[str, str]:
    """Load antisemitism definitions from markdown files."""
    definitions = {}
    definitions_path = Path(definitions_dir)
    
    if not definitions_path.exists():
        raise FileNotFoundError(f"Directory {definitions_dir} not found")
    
    for md_file in definitions_path.glob("*.md"):
        definition_name = md_file.stem
        with open(md_file, 'r', encoding='utf-8') as f:
            definitions[definition_name] = f.read().strip()
    return definitions


annotations = load_definitions(ANNOTATION_GLOB)

{'id': 'gen-1757053508-U0RPFjwEJWJYxBQG3rbS', 'provider': 'OpenAI', 'model': 'openai/gpt-3.5-turbo', 'object': 'chat.completion', 'created': 1757053508, 'choices': [{'logprobs': None, 'finish_reason': 'stop', 'native_finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': 'The meaning of life is subjective and can vary from person to person. Some believe that the meaning of life is to find happiness, fulfillment, and personal growth. Others believe that it is to serve a higher purpose or to make a positive impact on the world. Ultimately, it is up to each individual to determine the meaning of their own life.', 'refusal': None, 'reasoning': None}}], 'system_fingerprint': None, 'usage': {'prompt_tokens': 14, 'completion_tokens': 69, 'total_tokens': 83, 'prompt_tokens_details': {'cached_tokens': 0, 'audio_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0}}}


In [None]:
import os
import json
import requests
from typing import Dict, List, Optional, Generator, Any

OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"


def _build_headers(
    api_key: Optional[str] = None,
    extra_headers: Optional[Dict[str, str]] = None,
) -> Dict[str, str]:
    """Compose required and optional headers."""
    key = api_key or os.getenv("OPENROUTER_API_KEY")
    if not key:
        raise RuntimeError("Missing OPENROUTER_API_KEY")

    headers = {
        "Authorization": f"Bearer {key}",
        "Content-Type": "application/json",
    }

    # Optional attribution headers (helpful for rankings/analytics)
    # https://openrouter.ai/docs/quickstart • https://openrouter.ai/docs/app-attribution
    site_url = os.getenv("OPENROUTER_SITE_URL")
    site_name = os.getenv("OPENROUTER_SITE_NAME")
    if site_url:
        headers["HTTP-Referer"] = site_url
    if site_name:
        headers["X-Title"] = site_name

    if extra_headers:
        headers.update(extra_headers)

    return headers


def llm(
    messages: List[Dict[str, Any]],
    model: str,
    *,
    api_key: Optional[str] = None,
    base_url: str = OPENROUTER_BASE_URL,
    timeout: int = 90,
    extra_headers: Optional[Dict[str, str]] = None,
    return_json: bool = False,
    **kwargs: Any,
) -> Any:
    """
    Simple OpenRouter chat call.

    Required:
      - messages: OpenAI-style messages [{"role": "...", "content": "..."}]
      - model: e.g. "openai/gpt-4o-mini"

    Optional:
      - Any extra OpenAI Chat Completions fields via **kwargs, for example:
        temperature, max_tokens, top_p, stop, seed, response_format, tools, tool_choice

    Returns:
      - By default, the first message string
      - If return_json=True, the full parsed JSON response

    Docs: https://openrouter.ai/docs/api-reference/chat-completion
    """
    if not isinstance(messages, list):
        raise ValueError("messages must be a list of dicts")

    url = f"{base_url.rstrip('/')}/chat/completions"
    payload: Dict[str, Any] = {
        "model": model,
        "messages": messages,
    }
    # Forward any supported parameters (temperature, max_tokens, etc.)
    payload.update({k: v for k, v in kwargs.items() if v is not None})

    headers = _build_headers(api_key=api_key, extra_headers=extra_headers)

    resp = requests.post(url, json=payload, headers=headers, timeout=timeout)
    if not resp.ok:
        # Try to include server error details when possible
        try:
            detail = resp.json()
        except Exception:
            detail = resp.text
        raise RuntimeError(f"OpenRouter error {resp.status_code}: {detail}")

    data = resp.json()
    if return_json:
        return data

    # Standard OpenAI-style shape
    try:
        return data["choices"][0]["message"]["content"]
    except Exception:
        raise RuntimeError(f"Unexpected response shape: {json.dumps(data)[:500]}")



resp = llm(
    [{"role": "system", "content": "you are a helpful assitant"},{"role": "user", "content": "Give me a JSON with fields a,b."}],
    "openai/gpt-4o-mini",
    response_format={"type": "json_object"},
    temperature=0.2,
    max_tokens=200,
    return_json=True,
)
print(resp["usage"])




{'prompt_tokens': 26, 'completion_tokens': 18, 'total_tokens': 44, 'prompt_tokens_details': {'cached_tokens': 0, 'audio_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0}}


In [15]:
import pandas as pd
"""
‘TweetID’: Represents the tweet ID. 

‘Username’: Represents the username who published the tweet.  

‘Text’: Represents the full text of the tweet (not pre-processed).

‘CreateDate’: Represents the date the tweet was created.  

‘Biased’: Represents the labeled by our annotations if the tweet is antisemitic or non-antisemitic. 

‘Keyword’: Represents the keyword that was used in the query. The keyword can be in the text, including mentioned names, or the username.  

https://zenodo.org/records/7932888
 
"""
twitter_df = pd.read_csv("GoldStanderDataSet.csv",encoding='cp1252')
twitter_df.head()


Unnamed: 0,TweetID,Username,Text,CreateDate,Biased,Keyword
0,1228740093357092865,Celtic_Films,AIPAC should be registered as a foreign agent ...,2020-02-15 17:57:21+00:00,1,Israel
1,1239547900012589056,zariths__,"RT @qiss0rkid: go to israel pls , we don't nee...",2020-03-16 13:43:43+00:00,1,Israel
2,1216559517887954945,kelownascott,"The world, including Canada, has given Israel ...",2020-01-13 03:16:06+00:00,1,Israel
3,1217123508754534400,KeishaJake,These children avoid walking over the US &amp;...,2020-01-14 16:37:12+00:00,0,Israel
4,1232258532273090560,amit2nirvana,RT @IDF: The last 24 hours in Israel: https://...,2020-02-25 10:58:23+00:00,0,Israel


In [16]:
twitter_df["Keyword"].value_counts()

Keyword
Jews       4605
Israel     1524
ZioNazi     529
Kikes       283
Name: count, dtype: int64

In [24]:
samples = twitter_df[twitter_df["Biased"]==1].sample(1, random_state=42)
samples


Unnamed: 0,TweetID,Username,Text,CreateDate,Biased,Keyword
6036,1465147721724178432,rulajebreal,Herzog lit Hanukkah candles in the heart of th...,2021-11-29 02:36:52+00:00,1,Jews


In [None]:

from __future__ import annotations
from typing import Dict, Tuple
import json
import os
import re
import pandas as pd
from pathlib import Path
from openai import OpenAI

# -------------------------
# Config
# -------------------------
ANNOTATION_GLOB = "prompts/annotation_guidelines"
MODEL = "openai-gpt-4o-mini"  
CSV_PATH = "GoldStanderDataSet.csv"  # update if needed

# -------------------------
# Load defs
# -------------------------
def load_definitions(definitions_dir: str) -> Dict[str, str]:
    d: Dict[str, str] = {}
    p = Path(definitions_dir)
    if not p.exists():
        raise FileNotFoundError(f"Directory {definitions_dir} not found")
    for md in p.glob("*.md"):
        d[md.stem] = md.read_text(encoding="utf-8").strip()
    if not d:
        raise RuntimeError("No definitions found")
    return d

annotations = load_definitions(ANNOTATION_GLOB)

# -------------------------
# Prompt builder
# -------------------------
CLASSIFIER_SYSTEM = """You are a careful text classifier.

You will be given:
- a JSON object named "annotations" mapping names to definitions
- a "text" string to classify

Task:
- Pick exactly one key from annotations as the best match for text.
- If several could fit, choose the stricter or more specific one.
- Output JSON only, no extra words.

Output schema:
{
  "answer": "<one key from annotations>",
  "description": "<1 to 3 sentences explaining the choice>"
}

Rules:
- answer must be exactly one of annotations keys
- description must cite concrete cues from the text
"""

def generate_prompt(text: str, annotations: Dict[str, str]) -> str:
    """Return the user prompt content as one string (easy to log)."""
    return (
        "annotations:\n"
        + json.dumps(annotations, ensure_ascii=False, indent=2)
        + "\n\ntext:\n"
        + text
        + "\n\nRespond with JSON only matching the schema."
    )



def extract_pred_and_desc(json_text: str, allowed_keys: Dict[str, str]) -> Tuple[str, str]:
    """Parse JSON and validate 'answer' against provided annotations keys."""
    try:
        obj = json.loads(json_text)
        answer = obj.get("answer", "")
        desc = obj.get("description", "")
        
        if not desc:
            desc = "Model answer not in annotation keys; fell back to the first key."
        return answer, desc
    except Exception:
        return json_text, json_text
def get_answer(resp):
    return resp['choices'][0]['message']['content']


# -------------------------
# One-sample demo (simple to debug)
# -------------------------
# Load data (only once)
twitter_df = pd.read_csv(CSV_PATH, encoding="cp1252", low_memory=False)
#log about the dimensions

sampled = twitter_df[twitter_df["Biased"] == 1].sample(1, random_state=42)
row = samples.iloc[0]
text = str(row["Text"])
keyword = row.get("Keyword", None)
#log the text and keyword

prompt = generate_prompt(text, annotations)
#log prompt
resp = llm(
    [{"role": "system", "content": CLASSIFIER_SYSTEM},{"role": "user", "content": prompt}],
    MODEL,
    response_format={"type": "json_object"},
    temperature=0.1,
    max_tokens=800,
    return_json=True,
)
#log resp
resp_text = get_answer(resp)
prediction, desc = extract_pred_and_desc(resp_text, annotations)
# log the columns that will be writing into the results dataframe
# print("TEXT:", text)
# print("KEYWORD:", keyword)
# print("PREDICTION:", prediction)
# print("DESC:", desc)




annotations:
{
  "Jerusalem Declaration": "# Annotation Guidelines for Identifying Antisemitism Based on the Jerusalem Declaration\n\n## 1. Task Description\n\n### Overview\nThese guidelines are designed to help annotators identify antisemitic content based on the Jerusalem Declaration on Antisemitism (JDA). The JDA provides a clear, concise, and historically-informed definition that aims to strengthen the fight against antisemitism while protecting space for legitimate political discourse, particularly regarding Israel and Palestine.\n\n### Purpose\nThe purpose of these annotations is to accurately identify content that constitutes discrimination, prejudice, hostility, or violence against Jews as Jews, while distinguishing this from legitimate criticism of states, policies, or political ideologies. These guidelines will help create consistent, nuanced assessments that recognize both the serious nature of antisemitism and the importance of protecting free political speech.\n\n### What 

In [53]:
twitter_df

Unnamed: 0,TweetID,Username,Text,CreateDate,Biased,Keyword,text_length
0,1228740093357092865,Celtic_Films,AIPAC should be registered as a foreign agent ...,2020-02-15 17:57:21+00:00,1,Israel,246
1,1239547900012589056,zariths__,"RT : go to israel pls , we don't need you",2020-03-16 13:43:43+00:00,1,Israel,41
2,1216559517887954945,kelownascott,"The world, including Canada, has given Israel ...",2020-01-13 03:16:06+00:00,1,Israel,164
3,1217123508754534400,KeishaJake,These children avoid walking over the US &amp;...,2020-01-14 16:37:12+00:00,0,Israel,314
4,1232258532273090560,amit2nirvana,RT : The last 24 hours in Israel: https://t.co...,2020-02-25 10:58:23+00:00,0,Israel,57
...,...,...,...,...,...,...,...
6936,1241770877336711168,CitizenJane99,"""Use your socks as face masks""\n Zionazi Isr...",2020-03-22 16:57:02+00:00,1,ZioNazi,88
6937,1241902809592803328,CHUCK_1967,THE GREAT DEPRESSION. THESE ZIONAZI JEWS AND T...,2020-03-23 01:41:17+00:00,1,ZioNazi,112
6938,1243432780127707136,FeralGlance10,Not when it's run by blairites and Zionazis.,2020-03-27 07:00:51+00:00,1,ZioNazi,44
6939,1244685065470107648,Nables_S,This Zionazi criminal is the most wanted terro...,2020-03-30 17:56:59+00:00,1,ZioNazi,61


In [66]:

from __future__ import annotations
from typing import Dict, Tuple
import json
import logging
from logging.handlers import RotatingFileHandler
import os
import pandas as pd
from pathlib import Path

# If you use OpenAI directly elsewhere, leave this import
from openai import OpenAI  # noqa: F401

# -------------------------
# Logging setup (to file)
# -------------------------
LOG_DIR = "logs"
LOG_FILE = os.path.join(LOG_DIR, "classifier.log")
os.makedirs(LOG_DIR, exist_ok=True)

logger = logging.getLogger("grok_classifier")
logger.setLevel(logging.DEBUG)  # DEBUG to file, INFO to console (optional below)

# Avoid duplicate handlers on repeated runs
if logger.handlers:
    logger.handlers.clear()

fmt = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")

# File handler with rotation
file_handler = RotatingFileHandler(
    LOG_FILE, maxBytes=5_000_000, backupCount=3, encoding="utf-8"
)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(fmt)
logger.addHandler(file_handler)

def _truncate(s: str | None, n: int = 1500) -> str:
    if not s:
        return ""
    return (s[:n] + "...") if len(s) > n else s

# -------------------------
# Config
# -------------------------
ANNOTATION_GLOB = "prompts/annotation_guidelines"
MODEL = "openai/gpt-4o-mini"  # keep as you set (assumes your llm wrapper accepts this)
CSV_PATH = "GoldStanderDataSet.csv"  # update if needed
TEMPERATURE = 0.1
MAX_TOKENS = 800

# -------------------------
# Load defs
# -------------------------
def load_definitions(definitions_dir: str) -> Dict[str, str]:
    d: Dict[str, str] = {}
    p = Path(definitions_dir)
    if not p.exists():
        raise FileNotFoundError(f"Directory {definitions_dir} not found")
    for md in p.glob("*.md"):
        d[md.stem] = md.read_text(encoding="utf-8").strip()
    if not d:
        raise RuntimeError("No definitions found")
    return d

annotations = load_definitions(ANNOTATION_GLOB)

# -------------------------
# Prompt builder
# -------------------------
CLASSIFIER_SYSTEM = """You are a careful text classifier.

You will be given:
- a JSON object named "annotations" mapping names to definitions
- a "text" string to classify

Task:
- Pick exactly one key from annotations as the best match for text.
- If several could fit, choose the stricter or more specific one.
- Output JSON only, no extra words.

Output schema:
{
  "answer": "<one key from annotations>",
  "description": "<1 to 3 sentences explaining the choice>"
}

Rules:
- answer MUST be exactly one of annotations keys: IHRA Definition, Jerusalem Declaration
- description must cite concrete cues from the text
- Dont say if this text is antisemitic or not, just pick the best fitting definition!
"""

def generate_prompt(text: str, annotations: Dict[str, str]) -> str:
    """Return the user prompt content as one string (easy to log)."""
    return (
        "annotations:\n"
        + json.dumps(annotations, ensure_ascii=False, indent=2)
        + "\n\ntext:\n"
        + text
        + "\n\nRespond with JSON only matching the schema."
    )

def extract_pred_and_desc(json_text: str, allowed_keys: Dict[str, str]) -> Tuple[str, str]:
    """Parse JSON and validate 'answer' against provided annotations keys."""
    try:
        obj = json.loads(json_text)
        answer = obj.get("answer", "")
        desc = obj.get("description", "")
        
        if not desc:
            desc = "Model answer not in annotation keys; fell back to the first key."
        return answer, desc
    except Exception:
        # If the model did not return JSON, store the raw text for debugging
        return json_text, json_text

def get_answer(resp):
    # expects OpenAI-style dict payload
    return resp["choices"][0]["message"]["content"]

# -------------------------
# Load data once
# -------------------------
twitter_df = pd.read_csv(CSV_PATH, encoding="cp1252", low_memory=False)
twitter_df['Text'] = twitter_df['Text'].apply(
    lambda x: re.sub(r'@\w+', '', str(x)) if pd.notna(x) else x
)
twitter_df['Text'] = twitter_df['Text'].str.strip()
twitter_df['text_length'] = twitter_df['Text'].apply(lambda x: len(str(x)) if pd.notna(x) else 0)

# add log about the dimensions
logger.info("Loaded CSV: path=%s shape=%s columns=%s",
            CSV_PATH, twitter_df.shape, list(twitter_df.columns))

# Decide which rows to run on.
# You said "run on all samples", so we'll use all non-null Text rows.


2025-09-05 13:25:04,604 INFO Loaded CSV: path=GoldStanderDataSet.csv shape=(6941, 7) columns=['TweetID', 'Username', 'Text', 'CreateDate', 'Biased', 'Keyword', 'text_length']


In [None]:
N_SAMPLES = 40
MIN_CHARS = 100
samples = twitter_df[twitter_df["Text"].notna()].copy()
samples = samples[(samples["Biased"] == 1) & (samples["text_length"]>MIN_CHARS)].sample(N_SAMPLES, random_state=42)
samples


Unnamed: 0,TweetID,Username,Text,CreateDate,Biased,Keyword,text_length
0,1228740093357092865,Celtic_Films,AIPAC should be registered as a foreign agent ...,2020-02-15 17:57:21+00:00,1,Israel,246
1,1239547900012589056,zariths__,"RT : go to israel pls , we don't need you",2020-03-16 13:43:43+00:00,1,Israel,41
2,1216559517887954945,kelownascott,"The world, including Canada, has given Israel ...",2020-01-13 03:16:06+00:00,1,Israel,164
3,1217123508754534400,KeishaJake,These children avoid walking over the US &amp;...,2020-01-14 16:37:12+00:00,0,Israel,314
4,1232258532273090560,amit2nirvana,RT : The last 24 hours in Israel: https://t.co...,2020-02-25 10:58:23+00:00,0,Israel,57
...,...,...,...,...,...,...,...
6936,1241770877336711168,CitizenJane99,"""Use your socks as face masks""\n Zionazi Isr...",2020-03-22 16:57:02+00:00,1,ZioNazi,88
6937,1241902809592803328,CHUCK_1967,THE GREAT DEPRESSION. THESE ZIONAZI JEWS AND T...,2020-03-23 01:41:17+00:00,1,ZioNazi,112
6938,1243432780127707136,FeralGlance10,Not when it's run by blairites and Zionazis.,2020-03-27 07:00:51+00:00,1,ZioNazi,44
6939,1244685065470107648,Nables_S,This Zionazi criminal is the most wanted terro...,2020-03-30 17:56:59+00:00,1,ZioNazi,61


In [64]:
# -------------------------
# Iterate over all samples (NOT inside a function)
# -------------------------
results_rows = []
logger.info(f'System prompt for all prompts {CLASSIFIER_SYSTEM}')
for idx, row in samples.iterrows():
    # Robust extract of fields
    text = str(row["Text"])
    keyword = row["Keyword"] if "Keyword" in row and pd.notna(row["Keyword"]) else None

    # add log: the text and keyword
    logger.info("Row %s: text=%s | keyword=%s",
                idx, _truncate(text, 200), keyword)

    prompt = generate_prompt(text, annotations)

    # add log: prompt
    logger.debug("Prompt (truncated): %s", _truncate(prompt, 800))

    try:
        resp = llm(
            [
                {"role": "system", "content": CLASSIFIER_SYSTEM},
                {"role": "user", "content": prompt},
            ],
            MODEL,
            response_format={"type": "json_object"},
            temperature=TEMPERATURE,
            max_tokens=MAX_TOKENS,
            return_json=True,
        )
    except Exception as e:
        logger.exception("llm() call failed on row %s: %s", idx, e)
        # Store a failure row for debugging
        results_rows.append({
            "text": text,
            "keyword": keyword,
            "prediction": "LLM_ERROR",
            "description": str(e),
            "model": MODEL,
            "max_tokens": MAX_TOKENS,
            "temperature": TEMPERATURE,
        })
        continue

    # add log: resp
    try:
        resp_text = get_answer(resp)
    except Exception as e:
        logger.exception("Failed extracting answer on row %s: %s", idx, e)
        resp_text = json.dumps({"answer": "PARSING_ERROR", "description": str(e)})

    logger.debug("Raw model content (truncated): %s", _truncate(resp_text, 800))

    prediction, desc = extract_pred_and_desc(resp_text, annotations)

    # add log: the columns that will be written into the results dataframe
    log_row_preview = {
        "text": _truncate(text, 160),
        "keyword": keyword,
        "prediction": prediction,
        "description": _truncate(desc, 200),
        "model": MODEL,
        "max_tokens": MAX_TOKENS,
        "temperature": TEMPERATURE,
    }
    logger.info("Result row: %s", log_row_preview)

    results_rows.append({
        "text": text,
        "keyword": keyword,
        "prediction": prediction,
        "description": desc,
        "model": MODEL,
        "max_tokens": MAX_TOKENS,
        "temperature": TEMPERATURE,
    })

# Build the results DataFrame in the requested order
results_df = pd.DataFrame(
    results_rows,
    columns=["text", "keyword", "prediction", "description", "model", "max_tokens", "temperature"]
)

logger.info("Finished. results_df shape=%s", results_df.shape)
# If you want, you can save it:
# results_df.to_csv("classification_results.csv", index=False)


2025-09-05 10:14:23,340 INFO System prompt for all prompts You are a careful text classifier.

You will be given:
- a JSON object named "annotations" mapping names to definitions
- a "text" string to classify

Task:
- Pick exactly one key from annotations as the best match for text.
- If several could fit, choose the stricter or more specific one.
- Output JSON only, no extra words.

Output schema:
{
  "answer": "<one key from annotations>",
  "description": "<1 to 3 sentences explaining the choice>"
}

Rules:
- answer MUST be exactly one of annotations keys: IHRA Definition, Jerusalem Declaration
- description must cite concrete cues from the text
- Dont say if this text is antisemitic or not, just pick the best fitting definition!

2025-09-05 10:14:23,341 INFO Row 3910: text=Lol y'all still in denial about who the true Jews are, huh? | keyword=Jews
2025-09-05 10:14:23,342 DEBUG Prompt (truncated): annotations:
{
  "Jerusalem Declaration": "# Annotation Guidelines for Identifying Anti

In [65]:
results_df

Unnamed: 0,text,keyword,prediction,description,model,max_tokens,temperature
0,Lol y'all still in denial about who the true J...,Jews,IHRA Definition,The text suggests a denial of the identity of ...,openai/gpt-4o-mini,800,0.1
1,"So far from what I’ve read, I’ve gathered that...",Jews,Jerusalem Declaration,The text discusses the historical coexistence ...,openai/gpt-4o-mini,800,0.1
