In [1]:
# ======================================================================
#  ARBOPEDIA LLM-AGENT ‚Äî PIPELINE COMPLETO (IA1..IA5 + RAG + AVALIA√á√ÉO)
# ======================================================================
#
# Este arquivo integra:
#   - IA 1  ‚Üí PLANNER (PLANNER.Modelfile)
#   - MINI-RAG DE SCHEMA ‚Üí para ajudar a IA 2
#   - IA 2  ‚Üí CYPHER_GENERATOR (CYPHER_GENERATOR.Modelfile)
#   - Neo4j ‚Üí execu√ß√£o segura (somente leitura)
#   - RAG normativo ‚Üí Chroma Cloud (cole√ß√£o "arbopedia")
#   - IA 3  ‚Üí ANSWER (ANSWER.Modelfile)
#   - IA 5  ‚Üí OBSERVER (OBSERVABILITY.Modelfile)
#   - IA 4  ‚Üí EVALUATOR (EVALUATOR.Modelfile) + datasets JSON
#
# O fluxo suporta:
#   - graph_only         ‚Üí s√≥ grafo Neo4j
#   - rag_only           ‚Üí s√≥ RAG normativo
#   - hybrid_graph_rag   ‚Üí combina grafo + RAG
#
# E j√° est√° preparado para:
#   - logging por sess√£o (IA 5),
#   - avalia√ß√£o autom√°tica (IA 4) com dataset_evaluation.json
#   - m√©tricas como Accuracy backend, KG, Recall@k e RAGAS.
# ======================================================================

import os
import re
import json
import time
import uuid
import math
import datetime
import threading
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
from sentence_transformers import SentenceTransformer

from neo4j import GraphDatabase, basic_auth
from neo4j.exceptions import Neo4jError
import chromadb
import ollama


In [2]:
# =========================
# C√âLULA 1 ‚Äî VARI√ÅVEIS DE AMBIENTE E CONFIG GERAL
# =========================

# Modelos (Modelfiles j√° carregados no Ollama)
MODEL_NAME_PLANNER   = os.getenv("MODEL_NAME_PLANNER",   "PLANNER")
MODEL_NAME_CYPHER    = os.getenv("MODEL_NAME_CYPHER",    "CYPHER_GENERATOR")
MODEL_NAME_ANSWER    = os.getenv("MODEL_NAME_ANSWER",    "ANSWER")
MODEL_NAME_OBSERVER  = os.getenv("MODEL_NAME_OBSERVER",  "OBSERVER")
MODEL_NAME_EVALUATOR = os.getenv("MODEL_NAME_EVALUATOR", "EVALUATOR")

# Neo4j (somente leitura)
NEO4J_URI      = os.getenv("NEO4J_URI",      "bolt://localhost:7687")
NEO4J_USER     = os.getenv("NEO4J_USER",     "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "zWF$yls*J;K:DtC3")

# Chroma Cloud (RAG normativo)
CHROMA_API_KEY        = os.getenv("CHROMA_API_KEY",        "ck-13V15SvUh23Zc7MXYoio9uoNGHgyJLVNcwJw9ZxYr2Z2")
CHROMA_TENANT         = os.getenv("CHROMA_TENANT",         "c3e00254-1f1b-49fb-8f51-c9fbad3c8d76")
CHROMA_DATABASE       = os.getenv("CHROMA_DATABASE",       "arbopedia")
CHROMA_COLLECTION_NAME= os.getenv("CHROMA_COLLECTION_NAME","arbopedia")

# Diret√≥rios gerais
LOGS_BASE_DIR = Path("logs_sessions")
LOGS_BASE_DIR.mkdir(parents=True, exist_ok=True)


In [3]:
# =========================
# C√âLULA 2 ‚Äî FUN√á√ïES AUXILIARES GERAIS
# =========================

def log_info(msg: str) -> None:
    print(f"[INFO] {msg}")

def log_warn(msg: str) -> None:
    print(f"[WARN] {msg}")

def log_error(msg: str) -> None:
    print(f"[ERROR] {msg}")

def safe_json_loads(text: str) -> Any:
    """
    Faz o poss√≠vel para transformar um texto em JSON.
    """
    text = (text or "").strip()

    # 1) Tenta direto
    try:
        return json.loads(text)
    except Exception:
        pass

    # 2) Tenta remover ```json ... ``` ou ``` ... ```
    m = re.search(r"```json(.*?)```", text, flags=re.DOTALL | re.IGNORECASE)
    if not m:
        m = re.search(r"```(.*?)```", text, flags=re.DOTALL)
    if m:
        inner = m.group(1).strip()
        try:
            return json.loads(inner)
        except Exception:
            pass

    # 3) Tenta achar o primeiro {...} bem-formado
    m = re.search(r"(\{.*\})", text, flags=re.DOTALL)
    if m:
        candidate = m.group(1)
        try:
            return json.loads(candidate)
        except Exception:
            pass

    raise ValueError(f"N√£o consegui interpretar o texto como JSON. Amostra: {text[:200]}")

def new_session_id() -> str:
    """
    Gera um ID √∫nico de sess√£o, usado para os logs e relat√≥rios.
    """
    return uuid.uuid4().hex[:12]

def prepare_text_for_model(text: str) -> str:
    """
    Normaliza o texto antes de gerar embeddings (tira quebras de linha extras, etc).
    """
    if not text:
        return ""
    s = str(text).replace("\n", " ")
    s = " ".join(s.split())
    return s.strip()



def sanitize_cypher(q: str) -> str:
    if not q:
        return q

    q = q.strip()

    # Remove fences ```cypher ... ```
    q = re.sub(r"^\s*```(?:cypher)?\s*", "", q, flags=re.IGNORECASE)
    q = re.sub(r"\s*```\s*$", "", q)

    # Remove prefixos tipo "Cypher:" no in√≠cio
    q = re.sub(r"^\s*cypher\s*:\s*", "", q, flags=re.IGNORECASE)

    # Remove backticks que envolvem a query
    q = q.strip().strip("`").strip()

    # Remove ; final
    q = q.rstrip(";").strip()

    # Remove aspas perdidas nas pontas
    q = q.strip().strip('"').strip("'").strip()

    return q


def is_cypher_suspicious(q: str) -> Tuple[bool, List[str]]:
    issues = []
    if not q:
        return True, ["cypher_empty"]

    uq = q.upper()

    if "MATCH P =" in uq or "MATCH P=" in uq:
        issues.append("uses_path_variable_p")
    if "SUM(P." in uq or "AVG(P." in uq or "COUNT(P." in uq:
        issues.append("aggregates_over_path_p")

    if "RETURN DISTINCT" in uq and ("SUM(" in uq or "AVG(" in uq or "COUNT(" in uq):
        issues.append("distinct_with_aggregation")

    if "ATIVIDADERESUMO" in uq:
        issues.append("uses_AtividadeResumo_label")

    if "CASOS_EM*" in uq or "CASOS_NO_DIA" in uq:
        issues.append("uses_old_rel_names")

    if "CREATE " in uq or "DELETE " in uq or "MERGE " in uq or "SET " in uq:
        issues.append("write_query_detected")

    return (len(issues) > 0), issues


In [4]:
# =========================
# C√âLULA 3 ‚Äî EMBEDDINGS BGE-M3 PARA RAG NORMATIVO
# =========================

EMBED_MODEL_NAME = "BAAI/bge-m3"
_embed_lock = threading.Lock()
_embed_model: Optional[SentenceTransformer] = None

def _load_embed_model() -> SentenceTransformer:
    global _embed_model
    if _embed_model is None:
        with _embed_lock:
            if _embed_model is None:
                log_info(f"Carregando modelo de embedding: {EMBED_MODEL_NAME}")
                _embed_model = SentenceTransformer(EMBED_MODEL_NAME)
    return _embed_model

def embed_normative_query(text: str) -> List[float]:
    """
    Gera o embedding (1024 dims) da pergunta para o RAG normativo,
    usando BAAI/bge-m3.

    Retorna uma lista de floats, pronto pra passar em query_embeddings.
    """
    prepared = prepare_text_for_model(text)
    model = _load_embed_model()
    vec = model.encode(prepared, normalize_embeddings=True)
    return vec.astype(float).tolist()


In [5]:
# =========================
# C√âLULA 4 ‚Äî CONEX√ÉO COM CHROMA CLOUD (RAG NORMATIVO)
# =========================

chroma_client = chromadb.CloudClient(
    api_key=CHROMA_API_KEY,
    tenant=CHROMA_TENANT,
    database=CHROMA_DATABASE,
)

rag_collection = chroma_client.get_collection(name=CHROMA_COLLECTION_NAME)


In [6]:
# =========================
# C√âLULA 5 ‚Äî MINI-RAG DE SCHEMA (PARA IA 2)
# =========================

SCHEMA_INDEX_DIR      = Path("schema_vetorizado")
SCHEMA_EMB_PATH       = SCHEMA_INDEX_DIR / "schema_emb.npy"
SCHEMA_CHUNKS_PATH    = SCHEMA_INDEX_DIR / "schema_chunks.json"

if not SCHEMA_EMB_PATH.exists() or not SCHEMA_CHUNKS_PATH.exists():
    log_warn("Arquivos de MINI-RAG de schema n√£o encontrados; IA 2 usar√° apenas o texto de base.")

_schema_embeddings: Optional[np.ndarray] = None
_schema_chunks: Optional[List[Dict[str, Any]]] = None

def _load_schema_index() -> Tuple[np.ndarray, List[Dict[str, Any]]]:
    global _schema_embeddings, _schema_chunks
    if _schema_embeddings is None or _schema_chunks is None:
        if SCHEMA_EMB_PATH.exists() and SCHEMA_CHUNKS_PATH.exists():
            log_info("Carregando √≠ndice vetorizado do schema...")
            _schema_embeddings = np.load(SCHEMA_EMB_PATH)
            with SCHEMA_CHUNKS_PATH.open("r", encoding="utf-8") as f:
                _schema_chunks = json.load(f)
        else:
            _schema_embeddings = np.zeros((0, 1024), dtype=float)
            _schema_chunks = []
    return _schema_embeddings, _schema_chunks

def mini_rag_schema(query: str, top_k: int = 5) -> List[Dict[str, Any]]:
    """
    Recupera trechos da documenta√ß√£o da ontologia do grafo para IA 2.
    """
    embs, chunks = _load_schema_index()
    if embs.shape[0] == 0:
        return []

    q_emb = np.array(embed_normative_query(query), dtype=float)
    scores = embs @ q_emb
    idxs = np.argsort(-scores)[:top_k]
    return [chunks[int(i)] for i in idxs]


In [7]:
# =========================
# C√âLULA 6 ‚Äî NEO4J (CONEX√ÉO E EXECU√á√ÉO SEGURA)
# =========================

neo4j_driver = GraphDatabase.driver(
    NEO4J_URI,
    auth=basic_auth(NEO4J_USER, NEO4J_PASSWORD),
    encrypted=False,
    max_connection_lifetime=3600,
)

def run_cypher_safe(cypher: str, params: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
    """
    Executa a consulta Cypher em modo somente leitura.
    """
    params = params or {}
    log_info(f"Executando Cypher (read-only): {cypher[:200]}...")

    with neo4j_driver.session(default_access_mode="READ") as session:
        try:
            result = session.run(cypher, **params)
            records = [r.data() for r in result]
            return records
        except Exception as e:
            log_error(f"Erro ao executar Cypher: {e}")
            return [{"error": str(e)}]


In [8]:
# =========================
# C√âLULA 7 ‚Äî IA 1 (PLANNER) ‚Üí AISchema
# =========================

_schema_llm_lock = threading.Lock()

def _warm_up_planner() -> None:
    """
    Chamada ‚Äúdummy‚Äù para evitar lat√™ncia muito alta na primeira requisi√ß√£o do PLANNER.
    """
    pass  # pode chamar o modelo com uma pergunta simples se quiser

def infer_needs_flags_from_schema(data: Dict[str, Any]) -> Tuple[str, bool, bool]:
    """
    Normaliza backend_mode / needs_query / needs_query_rag a partir do JSON retornado pela IA 1.
    """
    backend_mode = data.get("backend_mode") or data.get("backend") or "graph_only"
    backend_mode = backend_mode.strip().lower()

    needs_query     = bool(data.get("needs_query"))
    needs_query_rag = bool(data.get("needs_query_rag"))

    # Heur√≠sticas simples de fallback
    if backend_mode == "graph_only":
        needs_query     = True
        needs_query_rag = False
    elif backend_mode == "rag_only":
        needs_query     = False
        needs_query_rag = True
    elif backend_mode == "hybrid_graph_rag":
        needs_query     = True
        needs_query_rag = True

    return backend_mode, needs_query, needs_query_rag

def get_ai_schema_dict(question: str) -> Dict[str, Any]:
    """
    Chama o PLANNER (IA 1) e retorna o AISchema j√° com:
      - backend_mode normalizado,
      - needs_query / needs_query_rag coerentes,
      - rag_plan sempre presente (dict ou None),
      - _generation_time (segundos) para debug.
    """
    with _schema_llm_lock:
        _warm_up_planner()
        start = time.time()
        resp = ollama.chat(
            model=MODEL_NAME_PLANNER,
            messages=[{"role": "user", "content": question}],
            format="json",
        )
        elapsed = time.time() - start

    data = safe_json_loads(resp.get("message", {}).get("content", "{}"))

    backend_mode, needs_query, needs_query_rag = infer_needs_flags_from_schema(data)
    data["backend_mode"] = backend_mode
    data["needs_query"] = needs_query
    data["needs_query_rag"] = needs_query_rag
    if "rag_plan" not in data:
        data["rag_plan"] = None

    data["_generation_time"] = elapsed
    return data


In [9]:

# =========================
# C√âLULA 8 ‚Äî IA 2 (CYPHER_GENERATOR) + MINI-RAG DE SCHEMA
# =========================

def retrieve_schema_context_for_cypher(
    question: str,
    aischema: Dict[str, Any],
    top_k: int = 6,
) -> Tuple[str, List[Dict[str, Any]]]:
    """
    Usa o mini-RAG de schema para recuperar trechos relevantes da ontologia
    que v√£o ajudar a IA 2 a gerar a query Cypher.

    Retorna:
      - schema_context: string √∫nica concatenando os chunks (para ir no prompt)
      - schema_chunks: lista de dicts brutos (para log / auditoria)
    """
    query = f"{question}\n\nAISchema:\n{json.dumps(aischema, ensure_ascii=False)}"
    schema_chunks = mini_rag_schema(query, top_k=top_k)

    if not schema_chunks:
        return "(schema context n√£o encontrado ‚Äî Cypher ser√° gerado de forma mais gen√©rica)", []

    docs_str = "\n\n".join(
        f"[CHUNK {i+1}] {(c.get('text') or c.get('content') or '').strip()}"
        for i, c in enumerate(schema_chunks)
    )
    return docs_str.strip(), schema_chunks


def build_cypher_prompt(
    question: str,
    aischema: Dict[str, Any],
    schema_context: Optional[str] = None,
) -> str:
    """
    Monta o prompt da IA 2 (CYPHER_GENERATOR), recebendo opcionalmente
    o schema_context j√° recuperado pelo mini-RAG.

    Se schema_context vier None, ele √© recuperado aqui.
    """
    if schema_context is None:
        schema_context, _ = retrieve_schema_context_for_cypher(
            question,
            aischema,
            top_k=6,
        )

    templates = """
TEMPLATES CORRETOS (prefira estes padr√µes):

(1) Backbone Munic√≠pio -> Dia
MATCH (m:Municipio)-[:TEM_DADO_NO_DIA]->(d:Dia)

(2) Casos por agravo
MATCH (m)-[:TEM_CASOS]->(c:Casos)
MATCH (d)-[:TEM_CASOS]->(c)
MATCH (c)-[:E_DO_AGRAVO]->(a:Agravo)

(3) Agrega√ß√£o mensal segura
RETURN date.truncate('month', d.date) AS periodo, m.nome AS municipio, sum(c.qtd) AS total
ORDER BY periodo ASC, municipio

(4) Meteorologia
MATCH (d)-[:TEM_METEOROLOGIA]->(me:Meteo)

(5) Atividades
MATCH (m)-[:EXECUTOU_ATIVIDADE]->(ae:AtividadeExec)
MATCH (ae)-[:NO_DIA]->(d:Dia)

(6) Notifica√ß√µes
MATCH (d)-[:TEM_NOTIFICACAO]->(n:Notificacao)
MATCH (n)-[:E_DO_AGRAVO]->(a:Agravo)
""".strip()

    hard_rules = """
REGRAS OBRIGAT√ìRIAS:
- Retorne APENAS a Cypher final (texto puro). N√ÉO use ``` e N√ÉO use `...`.
- N√ÉO use MATCH p = ... e nunca agregue (sum/avg/count) sobre Path.
- Evite RETURN DISTINCT com SUM/AVG/COUNT (use WITH DISTINCT antes, se precisar).
- ORDER BY s√≥ pode usar vari√°veis/aliases presentes no MESMO WITH/RETURN.
- N√£o invente labels/relacionamentos fora do mini-RAG.
- Query READ-ONLY: sem CREATE/MERGE/SET/DELETE.
""".strip()

    return f"""
Voc√™ √© a IA 2 (CYPHER_GENERATOR), especialista em gerar consultas Cypher para Neo4j.

{hard_rules}

Pergunta:
{question}

AISchema (IA 1):
{json.dumps(aischema, ensure_ascii=False, indent=2)}

Mini-RAG (contexto do grafo):
{schema_context}

TEMPLATES CORRETOS (prefira estes padr√µes):
{templates}

Tarefa:
- Gere a Cypher mais simples poss√≠vel que responda ao AISchema.
- Respeite filtros (munic√≠pio, datas, agravos) e granularidade.
- Retorne somente a Cypher final.
""".strip()


def get_cypher_from_schema(
    question: str,
    aischema: Dict[str, Any],
) -> Tuple[str, List[str], Dict[str, Any]]:
    """
    Gera Cypher via IA 2, garantindo que:
      - o schema_context (mini-RAG) foi de fato usado no prompt;
      - retornamos metadados ricos para auditoria/log (debug).
    """
    # 1) mini-RAG de schema (contexto + chunks brutos)
    schema_context, schema_chunks = retrieve_schema_context_for_cypher(
        question,
        aischema,
        top_k=6,
    )

    # 2) prompt j√° recebendo explicitamente o schema_context
    prompt = build_cypher_prompt(
        question=question,
        aischema=aischema,
        schema_context=schema_context,
    )

    # 3) chamada ao modelo de Cypher
    resp = ollama.chat(
        model=MODEL_NAME_CYPHER,
        messages=[{"role": "user", "content": prompt}],
    )
    raw = resp.get("message", {}).get("content", "") or ""

    # 4) extrai de fences (se vier)
    m = re.search(r"```cypher(.*?)```", raw, flags=re.DOTALL | re.IGNORECASE)
    if not m:
        m = re.search(r"```(.*?)```", raw, flags=re.DOTALL)
    cypher = (m.group(1).strip() if m else raw.strip())

    # 5) sanitiza SEMPRE
    cypher = sanitize_cypher(cypher)

    warnings: List[str] = []

    # 6) heur√≠stica de suspeita (para log e poss√≠vel retry)
    suspicious, issues = is_cypher_suspicious(cypher)
    if suspicious:
        warnings.extend(issues)

    # 7) pacote de debug para observabilidade
    debug: Dict[str, Any] = {
        "schema_context": schema_context,
        "schema_chunks": schema_chunks,   # lista de chunks do mini-RAG
        "prompt": prompt,                 # prompt completo enviado para IA 2
        "raw_model_output": raw[:8000],   # truncado para evitar JSON gigante
    }

    return cypher, warnings, debug


In [10]:
# %%
# =========================
# C√âLULA 9 ‚Äî RAG NORMATIVO (CHROMA CLOUD)
# =========================


def normalize_chroma_where(where: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
    """
    Chroma Cloud pode exigir que o 'where' tenha exatamente 1 operador no topo.
    Se vierem m√∫ltiplos campos no topo, embrulhamos em {"$and": [{"k": v}, ...]}.
    """
    if not where or not isinstance(where, dict):
        return None

    # Se j√° √© um operador ($and / $or / etc), mant√©m
    top_keys = list(where.keys())
    if len(top_keys) == 1 and isinstance(top_keys[0], str) and top_keys[0].startswith("$"):
        return where

    # Se tem exatamente 1 campo no topo, mant√©m
    if len(top_keys) == 1:
        return where

    # M√∫ltiplos campos no topo -> vira $and
    return {"$and": [{k: where[k]} for k in top_keys]}


def run_rag_for_question(
    question: str,
    rag_plan: Optional[Dict[str, Any]] = None,
    n_results: int = 5,
    rag_query_hint: Optional[str] = None,
) -> Dict[str, Any]:
    """
    Busca documentos normativos relevantes no Chroma Cloud.

    Comportamento:
      - Usa, quando existir, o filtro `where` vindo do PLANNER (AISchema.rag_plan.where).
      - Se a busca com `where` n√£o retornar nenhum id, aplica fallback sem where
        (para pelo menos recuperar algum contexto e permitir avalia√ß√£o).
      - Injeta opcionalmente um `rag_query_hint` (ex.: resumo do grafo) no texto
        usado para gerar o embedding.
    """
    where_original: Optional[Dict[str, Any]] = None
    if rag_plan and isinstance(rag_plan, dict):
        where_original = rag_plan.get("where")

    # Normaliza where para formato aceito pelo Chroma
    where_original = normalize_chroma_where(where_original)

    def _query_chroma(where_used: Optional[Dict[str, Any]], query_text: str) -> Dict[str, Any]:
        q_emb = embed_normative_query(query_text)
        results = rag_collection.query(
            query_embeddings=[q_emb],
            n_results=n_results,
            where=where_used or None,
        )

        docs_raw = results.get("documents", [[]])
        metas_raw = results.get("metadatas", [[]])
        ids_raw = results.get("ids", [[]])
        dists_raw = results.get("distances", [[]])

        docs = docs_raw[0] if docs_raw else []
        metas = metas_raw[0] if metas_raw else []
        ids = ids_raw[0] if ids_raw else []
        dists = dists_raw[0] if dists_raw else []

        return {
            "documents": docs,
            "metadatas": metas,
            "ids": ids,
            "distances": dists,
            "where_used": where_used,
            "query_used": query_text,
            "error": None,
        }

    # definimos aqui para tamb√©m ser usado no bloco de except
    query_text = question
    if rag_query_hint:
        query_text = f"{question}\n\nCONTEXTO DO GRAFO (RESUMO):\n{rag_query_hint}"

    try:
        # 1) tentativa: com where_original do planner
        r1 = _query_chroma(where_original, query_text)
        ids1 = _flatten_chroma_list(r1.get("ids"))
        if ids1:
            r1["rag_fallback_used"] = False
            r1["where_original"] = where_original
            return r1

        # 2) fallback: sem where (relaxa filtros)
        r2 = _query_chroma(None, query_text)
        r2["rag_fallback_used"] = True
        r2["where_original"] = where_original
        r2["fallback_reason"] = "0 resultados com where_original (prov√°vel mismatch de metadata/valores)"
        return r2

    except Exception as e:
        log_error(f"Falha ao consultar Chroma Cloud: {e}")
        return {
            "documents": [],
            "metadatas": [],
            "ids": [],
            "distances": [],
            "where_used": where_original,
            "where_original": where_original,
            "rag_fallback_used": False,
            "query_used": query_text,
            "error": str(e),
        }


In [11]:
# =========================
# C√âLULA 10 ‚Äî IA 3 (ANSWER), IA 5 (OBSERVER) E LOGS DE PIPELINE
# (com sanitize_cypher + h√≠brido: grafo -> resumo -> RAG -> ANSWER)
# =========================

from pathlib import Path
from typing import Dict, Any, List, Optional, Tuple
import datetime
import json
import re
import time

# Garanta que LOGS_BASE_DIR exista (pode j√° existir em c√©lulas anteriores)
try:
    LOGS_BASE_DIR
except NameError:
    LOGS_BASE_DIR = Path("logs_sessions")

LOGS_BASE_DIR.mkdir(parents=True, exist_ok=True)


# ---------------------------------------------------------------------
# 0) Sanitiza√ß√£o e ‚Äúguardrails‚Äù de Cypher
# ---------------------------------------------------------------------
def sanitize_cypher(q: str) -> str:
    if not q:
        return q

    q = q.strip()

    # Remove code fences ```cypher ... ```
    q = re.sub(r"^\s*```(?:cypher)?\s*", "", q, flags=re.IGNORECASE)
    q = re.sub(r"\s*```\s*$", "", q)

    # Remove r√≥tulos tipo "Cypher:" no come√ßo
    q = re.sub(r"^\s*cypher\s*:\s*", "", q, flags=re.IGNORECASE)

    q = re.sub(r"(\b\w+\b)\s*:\s*(COUNT|SUM)\s*\(\s*(CASE\b.*?\bEND)\s*\)\s+AS\s+(\b\w+\b)",  r"\4: \2(\3)",q,
               flags=re.IGNORECASE | re.DOTALL
    )

    # Remove crases/backticks que envolvem a query inteira (ou sobras nas pontas)
    q = q.strip().strip("`").strip()

    # Remove ; final (opcional)
    q = q.rstrip(";").strip()

    return q


def cypher_is_read_only(q: str) -> bool:
    """
    Guardrail simples: impede queries destrutivas.
    """
    if not q:
        return True
    bad = [
        r"\bCREATE\b", r"\bMERGE\b", r"\bDELETE\b", r"\bDETACH\b",
        r"\bSET\b", r"\bDROP\b", r"\bREMOVE\b", r"\bCALL\s+db\.",
        r"\bLOAD\s+CSV\b", r"\bAPOC\."
    ]
    for pat in bad:
        if re.search(pat, q, flags=re.IGNORECASE):
            return False
    return True


def try_repair_common_cypher_issues(q: str) -> str:
    """
    ‚ÄúRepair‚Äù leve e seguro: corrige alguns padr√µes comuns que geram erro,
    sem tentar ‚Äúinventar‚Äù ontologia.
    """
    if not q:
        return q
    qq = q.strip()

    # 1) Datas sem aspas em compara√ß√µes: d.data >= 2021-01-01  -> d.data >= date('2021-01-01')
    #    cobre >= e <= e = e BETWEEN.
    def _wrap_date(m):
        op = m.group(1)
        dt = m.group(2)
        return f"{op} date('{dt}')"

    qq = re.sub(
        r"(\b>=\s*|\b<=\s*|\b=\s*)(\d{4}-\d{2}-\d{2})\b",
        _wrap_date,
        qq
    )

    # BETWEEN 2021-01-01 AND 2021-12-31
    qq = re.sub(
        r"\bBETWEEN\s+(\d{4}-\d{2}-\d{2})\s+AND\s+(\d{4}-\d{2}-\d{2})\b",
        lambda m: f"BETWEEN date('{m.group(1)}') AND date('{m.group(2)}')",
        qq,
        flags=re.IGNORECASE
    )

    # 2) Se tiver aspas duplas para strings e voc√™ preferir padronizar:
    #    (opcional) manter como est√° ‚Äî aqui N√ÉO vamos mexer agressivamente.

    return qq


# ---------------------------------------------------------------------
# 1) Utilit√°rios de preview e resumo do grafo (para ANSWER e para RAG)
# ---------------------------------------------------------------------
def build_graph_preview_table(graph_rows: List[Dict[str, Any]], max_rows: int = 50) -> str:
    if not graph_rows:
        return "(sem dados do grafo ou consulta n√£o executada)"

    header = list(graph_rows[0].keys())
    lines = [" | ".join(header)]
    for r in graph_rows[:max_rows]:
        row = [str(r.get(h, "")) for h in header]
        lines.append(" | ".join(row))
    return "\n".join(lines)


def build_graph_summary_for_rag(
    question: str,
    aischema: Dict[str, Any],
    cypher: Optional[str],
    graph_rows: List[Dict[str, Any]],
    neo4j_error: Optional[str],
    max_chars: int = 2000
) -> str:
    """
    Resumo curto (tipo ‚Äúbriefing‚Äù) para orientar a busca normativa no RAG,
    especialmente no cen√°rio h√≠brido.
    """
    parts = []
    parts.append("=== CONTEXTO DO GRAFO (RESUMO) ===")
    parts.append(f"Pergunta: {question}")

    # do aischema, extrair coisas-chave
    filtros = (aischema or {}).get("filters", {}) or {}
    targets = (aischema or {}).get("targets", {}) or {}
    municipios = filtros.get("municipios") or []
    datef = filtros.get("date") or {}
    agravos = (targets.get("agravos") or []) or (filtros.get("agravos") or [])

    if municipios:
        parts.append(f"Munic√≠pio(s): {municipios}")
    if agravos:
        parts.append(f"Agravos/Doen√ßas: {agravos}")
    if datef:
        parts.append(f"Filtro de data: {datef}")

    if cypher:
        parts.append("Cypher executada (higienizada):")
        parts.append(cypher)

    if neo4j_error:
        parts.append("STATUS: ERRO ao executar no Neo4j.")
        parts.append(f"Erro: {neo4j_error}")
    else:
        parts.append("STATUS: OK (sem erro Neo4j reportado).")

    if graph_rows:
        parts.append("Amostra (primeiras linhas):")
        parts.append(build_graph_preview_table(graph_rows, max_rows=10))
    else:
        parts.append("Amostra: (sem linhas retornadas)")

    text = "\n".join(parts)
    return text[:max_chars]


def merge_rag_query_with_graph_context(question: str, graph_summary: str) -> str:
    """
    Injeta o resumo do grafo como ‚Äúcontexto adicional‚Äù para ajudar o retrieval.
    """
    if not graph_summary:
        return question
    return (
        f"{question}\n\n"
        f"{graph_summary}\n\n"
        "Tarefa do retrieval: recuperar trechos normativos diretamente relacionados "
        "aos achados acima (ou √† aus√™ncia deles), priorizando recomenda√ß√µes operacionais."
    )


# ---------------------------------------------------------------------
# 2) IA 3 (ANSWER)
# ---------------------------------------------------------------------
def build_final_answer(
    question: str,
    aischema: Dict[str, Any],
    backend_mode: str,
    graph_rows: List[Dict[str, Any]],
    rag_context: Dict[str, Any],
    neo4j_error: Optional[str] = None,
) -> str:
    """
    Chama a IA 3 (ANSWER) para integrar:
      - pergunta,
      - plano (AISchema),
      - resultado do grafo (quando houver),
      - trechos normativos via RAG (quando houver).
    """

    graph_preview = build_graph_preview_table(graph_rows, max_rows=80)

    rag_docs = rag_context.get("documents") or []
    rag_metas = rag_context.get("metadatas") or []
    rag_ids   = rag_context.get("ids") or []

    # Chroma √†s vezes devolve lista de listas
    if rag_ids and isinstance(rag_ids, list) and len(rag_ids) > 0 and isinstance(rag_ids[0], list):
        rag_ids = rag_ids[0]
    if rag_docs and isinstance(rag_docs, list) and len(rag_docs) > 0 and isinstance(rag_docs[0], list):
        rag_docs = rag_docs[0]
    if rag_metas and isinstance(rag_metas, list) and len(rag_metas) > 0 and isinstance(rag_metas[0], list):
        rag_metas = rag_metas[0]

    rag_snippets = []
    for i, (doc, meta, rid) in enumerate(zip(rag_docs, rag_metas, rag_ids), start=1):
        src = meta.get("source") if isinstance(meta, dict) else meta
        rag_snippets.append(f"[DOC {i} | id={rid} | fonte={src}]\n{doc}")

    rag_block = "\n\n".join(rag_snippets) if rag_snippets else "(nenhum trecho normativo recuperado)"

    prompt = (
        "Voc√™ √© a IA 3 (ANSWER), respons√°vel por produzir a resposta final ao usu√°rio, "
        "integrando resultados do grafo de conhecimento (se houver) com trechos normativos recuperados (se houver).\n\n"
        f"Pergunta original do usu√°rio:\n{question}\n\n"
        "Plano produzido pela IA 1 (AISchema):\n"
        f"{json.dumps(aischema, ensure_ascii=False, indent=2)}\n\n"
        f"Backend selecionado: {backend_mode}\n\n"
        f"Neo4j_error (se houver): {neo4j_error}\n\n"
        "Pr√©via dos resultados do grafo (primeiras linhas):\n"
        f"{graph_preview}\n\n"
        "Trechos normativos recuperados (RAG):\n"
        f"{rag_block}\n\n"
        "Tarefa:\n"
        "- Responda de forma clara, t√©cnica e rastre√°vel;\n"
        "- Quando houver s√©rie temporal, apresente em tabela (per√≠odo, munic√≠pio, m√©trica) e diga a unidade/defini√ß√£o se estiver no grafo;\n"
        "- Em cen√°rio h√≠brido, conecte explicitamente o diagn√≥stico quantitativo (grafo) com recomenda√ß√µes normativas (RAG);\n"
        "- Se o grafo falhar (neo4j_error) ou n√£o retornar linhas, deixe isso expl√≠cito e N√ÉO invente n√∫meros;\n"
        "- Se o RAG n√£o retornar trechos, deixe expl√≠cito e N√ÉO cite diretrizes inexistentes;\n"
        "- Quando usar evid√™ncia normativa, cite pelo marcador [DOC X] e id;\n"
        "- N√£o invente entidades, rela√ß√µes, datas ou m√©tricas."
    )

    resp = ollama.chat(
        model=MODEL_NAME_ANSWER,
        messages=[{"role": "user", "content": prompt}],
    )
    return resp.get("message", {}).get("content", "").strip()


# ---------------------------------------------------------------------
# 3) IA 5 (OBSERVER)
# ---------------------------------------------------------------------
def build_observer_input(pipeline_output: Dict[str, Any]) -> str:
    """
    Constr√≥i um prompt detalhado para a IA 5 (OBSERVER), para que ela
    reconstrua o racioc√≠nio do agente de forma textual.
    """
    schema_ctx = (pipeline_output.get("schema_context_for_ia2") or "")[:6000]
    return (
        "Voc√™ √© a IA 5 (OBSERVER), respons√°vel por descrever, em linguagem natural, "
        "o racioc√≠nio seguido pelo LLM-Agent ao responder a pergunta.\n\n"
        "Abaixo est√£o os principais artefatos da intera√ß√£o:\n\n"
        f"Pergunta: {pipeline_output.get('question')}\n\n"
        f"AISchema (IA 1):\n{json.dumps(pipeline_output.get('aischema', {}), ensure_ascii=False, indent=2)}\n\n"
        f"Backend_mode: {pipeline_output.get('backend_mode')}\n\n"
        f"Cypher (IA 2):\n{pipeline_output.get('cypher')}\n\n"
        f"Cypher_warnings: {pipeline_output.get('cypher_warnings')}\n\n"
        f"Mini-RAG schema (passado para IA 2):\n{schema_ctx}\n\n"
        f"Neo4j_error: {pipeline_output.get('neo4j_error')}\n\n"
        f"graph_result_len: {pipeline_output.get('graph_result_len')}\n"
        f"graph_result_has_error_key: {pipeline_output.get('graph_result_has_error_key')}\n\n"
        f"Resultado do grafo (Neo4j):\n{json.dumps(pipeline_output.get('graph_result', []), ensure_ascii=False, indent=2)[:4000]}\n\n"
        f"Resultado do RAG normativo:\n{json.dumps(pipeline_output.get('rag_result', {}), ensure_ascii=False, indent=2)[:4000]}\n\n"
        f"Resposta final (IA 3):\n{pipeline_output.get('final_answer')}\n\n"
        "Explique passo a passo como o agente:\n"
        "- interpretou a pergunta,\n"
        "- escolheu o backend,\n"
        "- construiu a consulta ao grafo (se houver), incluindo como o schema_context foi usado,\n"
        "- executou no Neo4j (inclua erros se existirem),\n"
        "- (se h√≠brido) como o resultado/resumo do grafo guiou a consulta normativa,\n"
        "- utilizou os documentos normativos recuperados,\n"
        "- chegou √† resposta final.\n"
        "Se houver erros aparentes, inconsist√™ncias de cen√°rio, ou limita√ß√µes, comente tamb√©m."
    )

def get_observer_trace(pipeline_output: Dict[str, Any]) -> str:
    user_prompt = build_observer_input(pipeline_output)
    resp = ollama.chat(
        model=MODEL_NAME_OBSERVER,
        messages=[{"role": "user", "content": user_prompt}],
    )
    return resp.get("message", {}).get("content", "").strip()


# ---------------------------------------------------------------------
# 4) Salvamento ‚Äúpor sess√£o‚Äù (observability / evaluation na mesma pasta)
# ---------------------------------------------------------------------
def save_session_artifact(session_id: str, tag: str, payload: Dict[str, Any]) -> Path:
    """
    Salva um artefato JSON dentro de logs_sessions/<session_id>/timestamp__<tag>.json
    """
    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    session_dir = LOGS_BASE_DIR / session_id
    session_dir.mkdir(parents=True, exist_ok=True)

    safe_tag = re.sub(r"[^a-zA-Z0-9_\-]+", "_", tag)[:80]
    fname = session_dir / f"{ts}__{safe_tag}.json"

    with fname.open("w", encoding="utf-8") as f:
        json.dump(payload, f, ensure_ascii=False, indent=2, default=str)

    return fname


# ---------------------------------------------------------------------
# 5) Pipeline principal
# ---------------------------------------------------------------------
def ask_arbo_agent(
    question: str,
    session_id: Optional[str] = None,
    tag: Optional[str] = None,
    eval_case: Optional[Dict[str, Any]] = None,
    k_retrieval: int = 5,
) -> Dict[str, Any]:
    """
    Pipeline completo (IA1..IA3 + RAG + OBSERVER):

    1) PLANNER (IA 1) ‚Üí AISchema (backend_mode, needs_query_graph, needs_query_rag, rag_plan).
    2) Se usar grafo ‚Üí CYPHER_GENERATOR + Neo4j (IA 2).
    3) Se usar RAG ‚Üí Chroma Cloud (RAG normativo).
       - No h√≠brido: roda grafo primeiro e injeta resumo do grafo como contexto do retrieval.
    4) ANSWER (IA 3) ‚Üí resposta final integrando grafo + RAG.
    5) OBSERVER (IA 5) ‚Üí reconstru√ß√£o textual do racioc√≠nio.
    """
    if session_id is None:
        session_id = new_session_id()

    timers: Dict[str, float] = {}
    t0 = time.time()

    # --- IA 1: PLANNER ---
    t_planner = time.time()
    aischema = get_ai_schema_dict(question)
    timers["planner_s"] = time.time() - t_planner

    backend_mode = aischema.get("backend_mode") or "graph_only"

    # flags (padroniza: use needs_query_graph / needs_query_rag como fonte principal)
    needs_query_graph = bool(aischema.get("needs_query_graph") or (backend_mode in ("graph_only", "hybrid_graph_rag") and aischema.get("needs_query")))
    needs_query_rag   = bool(aischema.get("needs_query_rag") or (backend_mode in ("rag_only", "hybrid_graph_rag")))

    # hard-guard: se o PLANNER marcou como h√≠brido, for√ßamos execu√ß√£o de grafo + RAG
    if backend_mode == "hybrid_graph_rag":
        needs_query_graph = True
        needs_query_rag = True

    rag_plan = aischema.get("rag_plan") or {}

    cypher: Optional[str] = None
    cypher_warnings: List[str] = []
    graph_result: List[Dict[str, Any]] = []
    neo4j_error: Optional[str] = None
    cypher_debug: Dict[str, Any] = {}
  # auditoria: schema_context/prompt usados na IA2

    # --- IA 2 + NEO4J (grafo) ---
    if backend_mode in ("graph_only", "hybrid_graph_rag") and needs_query_graph:
        try:
            t_cypher = time.time()
            cypher, cypher_warnings, cypher_debug = get_cypher_from_schema(question, aischema)
            timers["cypher_s"] = time.time() - t_cypher

            cypher = sanitize_cypher(cypher)
            cypher = try_repair_common_cypher_issues(cypher)

            if not cypher_is_read_only(cypher):
                neo4j_error = "Cypher bloqueada por guardrail (n√£o-read-only)."
                graph_result = []
            else:
                t_neo = time.time()
                graph_result = run_cypher_safe(cypher)
                timers["neo4j_s"] = time.time() - t_neo

                # run_cypher_safe √†s vezes devolve [{"error": "..."}]
                if graph_result and isinstance(graph_result[0], dict) and "error" in graph_result[0]:
                    neo4j_error = graph_result[0]["error"]
        except Exception as e:
            neo4j_error = str(e)
            graph_result = []


    # --- Logs adicionais do grafo (para observabilidade / avalia√ß√£o) ---
    graph_result_len = len(graph_result) if isinstance(graph_result, list) else 0
    graph_result_has_error_key = bool(
        graph_result and isinstance(graph_result[0], dict) and "error" in graph_result[0]
    )

    schema_context_for_ia2 = cypher_debug.get("schema_context")
    schema_chunks_for_ia2  = cypher_debug.get("schema_chunks")
    cypher_prompt_for_ia2  = (cypher_debug.get("prompt") or "")[:8000]

    # --- RAG normativo ---
    rag_result: Dict[str, Any] = {}
    if backend_mode in ("rag_only", "hybrid_graph_rag") and needs_query_rag:
        t_rag = time.time()

        graph_summary = ""
        if backend_mode == "hybrid_graph_rag":
            graph_summary = build_graph_summary_for_rag(
                question=question,
                aischema=aischema,
                cypher=cypher,
                graph_rows=graph_result,
                neo4j_error=neo4j_error,
                max_chars=2000
            )
            rag_question = merge_rag_query_with_graph_context(question, graph_summary)
        else:
            rag_question = question

        rag_result = run_rag_for_question(
            rag_question,
            rag_plan=rag_plan,
            n_results=k_retrieval,
        )
        timers["rag_s"] = time.time() - t_rag

    # --- IA 3: resposta final ---
    t_ans = time.time()
    final_answer = build_final_answer(
        question=question,
        aischema=aischema,
        backend_mode=backend_mode,
        graph_rows=graph_result,
        rag_context=rag_result,
        neo4j_error=neo4j_error,
    )
    timers["answer_s"] = time.time() - t_ans
    timers["total_s"] = time.time() - t0

    # --- OBSERVER: reconstru√ß√£o do racioc√≠nio ---
    pipeline_output: Dict[str, Any] = {
        "session_id":        session_id,
        "tag":               tag,
        "question":          question,
        "aischema":          aischema,
        "backend_mode":      backend_mode,
        "needs_query_graph": needs_query_graph,
        "needs_query_rag":   needs_query_rag,
        "cypher":            cypher,
        "cypher_warnings":   cypher_warnings,
        "cypher_debug":      cypher_debug,
        "graph_result":      graph_result,
        "rag_result":        rag_result,
        "final_answer":      final_answer,
        "neo4j_error":       neo4j_error,
        "graph_result_len":  graph_result_len,
        "graph_result_has_error_key": graph_result_has_error_key,
        "schema_context_for_ia2": schema_context_for_ia2,
        "schema_chunks_for_ia2":  schema_chunks_for_ia2,
        "cypher_prompt_for_ia2":  cypher_prompt_for_ia2,
        "timers":            timers,
        "eval_case":         eval_case,
    }

    t_obs = time.time()
    observer_trace = get_observer_trace(pipeline_output)
    timers["observer_s"] = time.time() - t_obs
    pipeline_output["observer_trace"] = observer_trace

    # Salva ‚Äúobservability‚Äù na pasta da sess√£o
    try:
        p = save_session_artifact(session_id, "observability", pipeline_output)
        log_info(f"Execu√ß√£o salva em {p}")
    except Exception as e:
        log_warn(f"Falha ao salvar log de observabilidade: {e}")

    return pipeline_output

In [12]:
# ====== C√âLULA 11 ======

# =========================
# C√âLULA 11 ‚Äî IA 4 (EVALUATOR) + SU√çTE DE AVALIA√á√ÉO
# (com Recall@K + RAGAS-like (op√ß√£o A) + pesos por cen√°rio + salvar na sess√£o)
# =========================

import os
from typing import Any, Dict, List, Optional
import json
import datetime
import re

BASE_EVAL_PATH = os.path.join("eval_datasets", "JSON Datasets")

def _load_eval_dataset(filename: str):
    path = os.path.join(BASE_EVAL_PATH, filename)
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

# dataset unificado com 24 cen√°rios (graph_only, rag_only, hybrid_graph_rag)
try:
    UNIFIED_EVAL_DATASET: List[Dict[str, Any]] = _load_eval_dataset("dataset_evaluation.json")
except FileNotFoundError:
    UNIFIED_EVAL_DATASET = []
    log_warn(
        f"dataset_evaluation.json n√£o encontrado em {os.path.join(BASE_EVAL_PATH, 'dataset_evaluation.json')} ‚Äî "
        "UNIFIED_EVAL_DATASET ficar√° vazio."
    )


# ---------------------------------------------------------------------
# 0) Helpers
# ---------------------------------------------------------------------
def _safe_float(x: Any, default: float = 0.0) -> float:
    try:
        return float(x)
    except Exception:
        return default


def _flatten_chroma_list(x: Any) -> List[Any]:
    """
    Chroma √†s vezes devolve listas aninhadas (ex.: [ [id1, id2...] ]).
    """
    if x is None:
        return []
    if isinstance(x, list) and len(x) > 0 and isinstance(x[0], list):
        return x[0]
    if isinstance(x, list):
        return x
    return []


def compute_retrieval_metrics(
    rag_result: Dict[str, Any],
    gold_chunk_ids: List[str],
    k: int
) -> Dict[str, Any]:
    retrieved_ids = _flatten_chroma_list(rag_result.get("ids"))
    topk = retrieved_ids[:k] if retrieved_ids else []
    gold = list(gold_chunk_ids or [])

    gold_set = set(gold)
    topk_set = set(topk)

    hits = [rid for rid in topk if rid in gold_set]
    hit_at_k = 1.0 if len(hits) > 0 else 0.0

    if len(gold_set) == 0:
        recall_at_k = None
    else:
        recall_at_k = len(set(hits)) / float(len(gold_set))

    precision_at_k = (len(set(hits)) / float(len(topk))) if topk else 0.0

    return {
        "k": k,
        "retrieved_ids_topk": topk,
        "gold_chunk_ids": gold,
        "hits": hits,
        "hit_at_k": hit_at_k,
        "precision_at_k": precision_at_k,
        "recall_at_k": recall_at_k,
    }


def judge_ragas_like_option_a(
    question: str,
    answer: str,
    contexts: List[str],
    gold_answer: Optional[Dict[str, Any]] = None,
    max_context_chars: int = 6000
) -> Dict[str, Any]:
    """
    RAGAS-like (op√ß√£o A): usa um ‚Äújudge‚Äù LLM (o mesmo MODEL_NAME_EVALUATOR)
    para pontuar m√©tricas no estilo RAGAS, retornando JSON determin√≠stico no formato.
    """
    # compacta contexts
    ctx_joined = "\n\n---\n\n".join([c for c in (contexts or []) if isinstance(c, str)])
    ctx_joined = ctx_joined[:max_context_chars]

    gold_text = ""
    gold_kps = []
    if isinstance(gold_answer, dict):
        gold_text = gold_answer.get("text") or ""
        gold_kps = gold_answer.get("key_points") or []

    judge_prompt = {
        "task": "ragas_like_judge",
        "instructions": (
            "Voc√™ √© um avaliador de RAG no estilo RAGAS, mas deve produzir SOMENTE JSON v√°lido. "
            "Pontue cada m√©trica de 0.0 a 1.0 com base nas defini√ß√µes:\n"
            "- faithfulness: o quanto a resposta √© sustentada pelos CONTEXTOS fornecidos (n√£o inventar).\n"
            "- answer_relevancy: o quanto a resposta atende a PERGUNTA.\n"
            "- context_relevancy: o quanto os CONTEXTOS s√£o relevantes para a PERGUNTA.\n"
            "- context_precision: propor√ß√£o dos CONTEXTOS que realmente s√£o √∫teis/relevantes.\n"
            "- context_recall: o quanto os CONTEXTOS cobrem os pontos essenciais do gabarito (se fornecido).\n\n"
            "Regras:\n"
            "- Se contexts estiver vazio, context_* deve ser 0.0 e faithfulness deve ser 0.0.\n"
            "- N√£o use markdown. N√£o use texto fora do JSON.\n"
        ),
        "question": question,
        "answer": answer,
        "contexts": ctx_joined,
        "gold_answer_text": gold_text,
        "gold_key_points": gold_kps,
        "output_schema": {
            "faithfulness": "float 0..1",
            "answer_relevancy": "float 0..1",
            "context_relevancy": "float 0..1",
            "context_precision": "float 0..1",
            "context_recall": "float 0..1",
            "ragas_like_score": "float 0..1 (m√©dia simples das 5 m√©tricas)",
            "issues": "list[str]",
            "notes": "str"
        }
    }

    resp = ollama.chat(
        model=MODEL_NAME_EVALUATOR,
        messages=[{"role": "user", "content": json.dumps(judge_prompt, ensure_ascii=False)}],
        format="json",
    )
    raw = resp.get("message", {}).get("content", "{}")
    try:
        out = json.loads(raw)
    except Exception:
        out = {"raw": raw}

    # p√≥s-padroniza√ß√£o m√≠nima
    for k in ["faithfulness", "answer_relevancy", "context_relevancy", "context_precision", "context_recall", "ragas_like_score"]:
        if k in out and out[k] is not None:
            out[k] = _safe_float(out[k], 0.0)

    if "issues" not in out or not isinstance(out["issues"], list):
        out["issues"] = []

    return out


def compute_graph_execution_score(pipeline_output: Dict[str, Any]) -> float:
    """
    Score determin√≠stico de execu√ß√£o do grafo.

    Regras:
    - 0.0 se h√° neo4j_error
    - 0.0 se graph_result cont√©m chave "error" (defensivo)
    - 1.0 caso contr√°rio (mesmo que retorne 0 linhas)
    """
    if pipeline_output.get("neo4j_error"):
        return 0.0
    if pipeline_output.get("graph_result_has_error_key"):
        return 0.0
    return 1.0

def mark_not_applicable_blocks(report: Dict[str, Any]) -> None:
    scenario = report.get("scenario") or (report.get("eval_case", {}) or {}).get("scenario")

    if scenario == "graph_only":
        report.setdefault("rag_evaluation", {})
        report["rag_evaluation"]["not_applicable"] = True

    elif scenario == "rag_only":
        report.setdefault("graph_query_evaluation", {})
        report["graph_query_evaluation"]["not_applicable"] = True

    # hybrid_graph_rag: nenhum bloco √© N/A


def recompute_global_score(report: Dict[str, Any]) -> None:
    """
    Recalcula global_score_post com pesos por cen√°rio.
    Usa preferencialmente scores p√≥s-processados (graph_score_post, rag_score_post).
    """
    scenario = report.get("scenario") or (report.get("eval_case", {}) or {}).get("scenario")

    pe = _safe_float((report.get("planner_evaluation") or {}).get("overall_planner_score", 0.0), 0.0)

    gqe = report.get("graph_query_evaluation") or {}
    rge = report.get("rag_evaluation") or {}
    fae = report.get("final_answer_evaluation") or {}

    ge_model = _safe_float(gqe.get("overall_graph_score", 0.0), 0.0)
    ge_post  = _safe_float(gqe.get("overall_graph_score_post", ge_model), ge_model)

    re_model = _safe_float(rge.get("overall_rag_score", 0.0), 0.0)
    re_post  = _safe_float(rge.get("overall_rag_score_post", rge.get("rag_score_post", re_model)), re_model)

    ae = _safe_float(fae.get("overall_answer_score", 0.0), 0.0)

    # facilita export/log: replica componentes no bloco overall_evaluation
    report.setdefault("overall_evaluation", {})
    report["overall_evaluation"]["overall_planner_score"] = pe
    report["overall_evaluation"]["overall_graph_score_model"] = ge_model
    report["overall_evaluation"]["overall_graph_score_post"] = ge_post
    report["overall_evaluation"]["overall_rag_score_model"] = re_model
    report["overall_evaluation"]["overall_rag_score_post"] = re_post
    report["overall_evaluation"]["overall_answer_score"] = ae

    rag_na = bool(rge.get("not_applicable"))
    grafo_na = bool(gqe.get("not_applicable"))

    # Pesos ajustados por cen√°rio (mais coerentes com a natureza da tarefa)
    if scenario == "graph_only":
        # Mais peso no grafo + resposta final
        global_score = 0.25 * pe + 0.55 * ge_post + 0.20 * ae

    elif scenario == "rag_only":
        # Mais peso em RAG (retrieval + qualidade) + resposta final
        global_score = 0.20 * pe + 0.55 * re_post + 0.25 * ae

    else:
        # hybrid_graph_rag
        w_planner = 0.15
        w_graph   = 0.35 if not grafo_na else 0.0
        w_rag     = 0.30 if not rag_na else 0.0
        w_answer  = 0.20

        w_sum = w_planner + w_graph + w_rag + w_answer
        if w_sum <= 0:
            global_score = 0.0
        else:
            global_score = (w_planner * pe + w_graph * ge_post + w_rag * re_post + w_answer * ae) / w_sum

    report.setdefault("overall_evaluation", {})
    report["overall_evaluation"]["global_score_post"] = global_score


# ---------------------------------------------------------------------
# 1) Payload para IA 4 (EVALUATOR)
# ---------------------------------------------------------------------
def build_evaluator_input(
    pipeline_output: Dict[str, Any],
    eval_case: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
    return {
        "question":       pipeline_output.get("question"),
        "aischema":       pipeline_output.get("aischema", {}),
        "backend_mode":   pipeline_output.get("backend_mode"),
        "needs_query_graph": pipeline_output.get("needs_query_graph"),
        "needs_query_rag":   pipeline_output.get("needs_query_rag"),
        "cypher":         pipeline_output.get("cypher"),
        "neo4j_error":    pipeline_output.get("neo4j_error"),
        "graph_result":   pipeline_output.get("graph_result"),
        "graph_result_len": pipeline_output.get("graph_result_len"),
        "graph_result_has_error_key": pipeline_output.get("graph_result_has_error_key"),
        "schema_context_for_ia2": (pipeline_output.get("schema_context_for_ia2") or "")[:6000],
        "rag_result":     pipeline_output.get("rag_result"),
        "final_answer":   pipeline_output.get("final_answer"),
        "observer_trace": pipeline_output.get("observer_trace"),
        "timers":         pipeline_output.get("timers", {}),
        "eval_case":      eval_case or pipeline_output.get("eval_case") or {},
    }

def evaluate_pipeline_interaction(
    pipeline_output: Dict[str, Any],
    eval_case: Optional[Dict[str, Any]] = None,
    k_retrieval: int = 5,
) -> Dict[str, Any]:

    payload = build_evaluator_input(pipeline_output, eval_case=eval_case)

    # 2.1) chamada base do EVALUATOR (mant√©m suas notas de planner/grafo/resposta)
    resp = ollama.chat(
        model=MODEL_NAME_EVALUATOR,
        messages=[{
            "role": "user",
            "content": json.dumps(payload, ensure_ascii=False, indent=2),
        }],
        format="json",
    )

    raw = resp.get("message", {}).get("content", "{}")
    try:
        report = json.loads(raw)
    except Exception:
        report = {"raw": raw}

    # 2.2) metadados para auditoria
    report["session_id"]   = pipeline_output.get("session_id") or new_session_id()
    report["question"]     = pipeline_output.get("question")
    report["backend_mode"] = pipeline_output.get("backend_mode")

    # auditoria (campos pedidos p/ an√°lise de execu√ß√£o do grafo)
    report["neo4j_error"] = pipeline_output.get("neo4j_error")
    report["graph_result_len"] = pipeline_output.get("graph_result_len")
    report["graph_result_has_error_key"] = pipeline_output.get("graph_result_has_error_key")
    report["cypher_warnings"] = pipeline_output.get("cypher_warnings")
    report["schema_context_for_ia2"] = (pipeline_output.get("schema_context_for_ia2") or "")[:6000]

    if eval_case is not None:
        report["eval_case_id"] = eval_case.get("id")
        report["scenario"]     = eval_case.get("scenario")
        report["eval_case"]    = eval_case

    # 2.3) N/A por cen√°rio
    mark_not_applicable_blocks(report)

    # -----------------------------------------------------------------
    # 2.4) P√≥s-processamento determin√≠stico do GRAFO (execu√ß√£o)
    #      (evita ‚Äúnota alta‚Äù quando Neo4j falhou)
    # -----------------------------------------------------------------
    gqe = report.setdefault("graph_query_evaluation", {})
    if not bool(gqe.get("not_applicable")):
        exec_score = compute_graph_execution_score(pipeline_output)
        gqe["graph_execution_score"] = exec_score

        ge_model = _safe_float(gqe.get("overall_graph_score", 0.0), 0.0)

        # Se houve erro Neo4j, derruba o score p√≥s-processado do grafo
        if exec_score <= 0.0:
            ge_post = min(0.2, ge_model)  # clamp forte
            gqe.setdefault("issues", [])
            gqe["issues"].append("Neo4j retornou erro (execu√ß√£o do grafo falhou).")
        else:
            # mistura leve: mant√©m coer√™ncia do modelo, mas registra execu√ß√£o OK
            ge_post = 0.8 * ge_model + 0.2 * exec_score

        gqe["overall_graph_score_post"] = ge_post
    else:
        gqe["overall_graph_score_post"] = None

    # -----------------------------------------------------------------
    # 2.5) P√≥s-processamento determin√≠stico do RAG (Recall@K + RAGAS-like)
    #      (somente se aplic√°vel e se o caso pedir)
    # -----------------------------------------------------------------
    rge = report.setdefault("rag_evaluation", {})
    scenario = report.get("scenario") or (report.get("eval_case", {}) or {}).get("scenario")

    metrics_cfg = (eval_case or {}).get("metrics_to_compute", {}) if isinstance(eval_case, dict) else {}
    compute_recall = bool(metrics_cfg.get("recall_at_k"))
    compute_ragas  = bool(metrics_cfg.get("ragas"))

    # campos padr√£o (sempre presentes, mesmo quando None)
    rge.setdefault("compute_recall_at_k", compute_recall)
    rge.setdefault("compute_ragas_like", compute_ragas)

    if rge.get("not_applicable") or scenario == "graph_only":
        # n√£o calcula nada
        rge["recall_at_k"] = None
        rge["precision_at_k"] = None
        rge["hit_at_k"] = None
        rge["retrieved_ids_topk"] = []
        rge["gold_chunk_ids"] = (eval_case or {}).get("gold_chunk_ids", []) if isinstance(eval_case, dict) else []
        rge["ragas_like_score"] = None
        rge["overall_rag_score_post"] = 0.0
        report.setdefault("overall_evaluation", {})
        report["overall_evaluation"]["rag_score_post"] = 0.0

    else:
        rag_result = pipeline_output.get("rag_result") or {}
        gold_chunk_ids = (eval_case or {}).get("gold_chunk_ids", []) if isinstance(eval_case, dict) else []

        docs = _flatten_chroma_list(rag_result.get("documents"))
        retrieved_any = len(_flatten_chroma_list(rag_result.get("ids"))) > 0

        # Recall@K
        if compute_recall:
            rm = compute_retrieval_metrics(rag_result, gold_chunk_ids, k=k_retrieval)
            rge["recall_at_k"] = rm["recall_at_k"]
            rge["precision_at_k"] = rm["precision_at_k"]
            rge["hit_at_k"] = rm["hit_at_k"]
            rge["retrieved_ids_topk"] = rm["retrieved_ids_topk"]
            rge["gold_chunk_ids"] = rm["gold_chunk_ids"]
            rge["hits"] = rm["hits"]
        else:
            rge["recall_at_k"] = None
            rge["precision_at_k"] = None
            rge["hit_at_k"] = None
            rge["retrieved_ids_topk"] = _flatten_chroma_list(rag_result.get("ids"))[:k_retrieval]
            rge["gold_chunk_ids"] = gold_chunk_ids
            rge["hits"] = []

        # RAGAS-like (op√ß√£o A)
        if compute_ragas:
            j = judge_ragas_like_option_a(
                question=pipeline_output.get("question") or "",
                answer=pipeline_output.get("final_answer") or "",
                contexts=docs,
                gold_answer=(eval_case or {}).get("gold_answer") if isinstance(eval_case, dict) else None
            )
            rge["faithfulness"] = j.get("faithfulness")
            rge["answer_relevancy"] = j.get("answer_relevancy")
            rge["context_relevancy"] = j.get("context_relevancy")
            rge["context_precision"] = j.get("context_precision")
            rge["context_recall"] = j.get("context_recall")
            rge["ragas_like_score"] = j.get("ragas_like_score")
            rge.setdefault("issues", [])
            rge["issues"].extend(j.get("issues") or [])
        else:
            rge["faithfulness"] = None
            rge["answer_relevancy"] = None
            rge["context_relevancy"] = None
            rge["context_precision"] = None
            rge["context_recall"] = None
            rge["ragas_like_score"] = None

        # Se deveria usar RAG, mas n√£o recuperou nada, zera score p√≥s
        should_use_rag = True if scenario in ("rag_only", "hybrid_graph_rag") else bool(rge.get("should_use_rag_expected"))
        if should_use_rag and not retrieved_any:
            rge.setdefault("issues", [])
            rge["issues"].append("RAG era esperado, mas nenhum trecho foi recuperado.")
            rag_score_post = 0.0
        else:
            # combina Recall@K + RAGAS-like
            recall_val = rge.get("recall_at_k")
            recall_val = _safe_float(recall_val, 0.0) if recall_val is not None else 0.0

            ragas_like = rge.get("ragas_like_score")
            ragas_like = _safe_float(ragas_like, 0.0) if ragas_like is not None else 0.0

            # pesos (ajuste fino): enfatiza qualidade sem√¢ntica (ragas-like)
            rag_score_post = 0.40 * recall_val + 0.60 * ragas_like

        rge["overall_rag_score_post"] = rag_score_post
        report.setdefault("overall_evaluation", {})
        report["overall_evaluation"]["rag_score_post"] = rag_score_post

        # Mant√©m compatibilidade: overall_rag_score (modelo) pode existir ou n√£o
        if "overall_rag_score" not in rge:
            rge["overall_rag_score"] = None

    # 2.6) recomputa global_score_post com pesos por cen√°rio
    recompute_global_score(report)

    return report


# ---------------------------------------------------------------------
# 3) Salvamento: avalia√ß√£o junto da sess√£o (logs_sessions/<session_id>/...)
# ---------------------------------------------------------------------
def save_eval_report(report: Dict[str, Any], folder: Optional[str] = None, tag: str = "evaluation") -> str:
    """
    Salva o relat√≥rio:
      - se folder=None: em logs_sessions/<session_id>/timestamp__evaluation.json
      - se folder fornecido: em folder/<session_id>.json (modo legado)
    """
    sid = report.get("session_id") or new_session_id()

    if folder is None:
        # salva dentro da pasta da sess√£o (mesma do observability)
        try:
            p = save_session_artifact(sid, tag, report)  # fun√ß√£o da c√©lula 10
            log_info(f"Execu√ß√£o salva em {p}")
            return str(p)
        except Exception as e:
            log_warn(f"Falha ao salvar evaluation na sess√£o: {e}")

    # fallback legado
    os.makedirs(folder or "reports", exist_ok=True)
    fname = f"{sid}.json"
    path  = os.path.join(folder or "reports", fname)
    with open(path, "w", encoding="utf-8") as f:
        json.dump(report, f, ensure_ascii=False, indent=2, default=str)
    return path


# ---------------------------------------------------------------------
# 4) Runner principal (dataset_evaluation.json)
# ---------------------------------------------------------------------
def run_eval_from_dataset_evaluation(
    k_retrieval: int = 5,
    reports_folder: str = "reports_dataset_eval",
    titulo: str = "TESTES PIPELINE (IA1..IA5 + RAG + ANSWER + OBSERVER + EVALUATOR)",
):
    """
    - Roda o pipeline (ask_arbo_agent) para cada caso
    - Avalia com IA4 + p√≥s-processamento (Recall@K + RAGAS-like quando aplic√°vel)
    - Salva:
        * observability: logs_sessions/<sid>/...__observability.json  (pela c√©lula 10)
        * evaluation:    logs_sessions/<sid>/...__evaluation.json     (aqui)
      e mant√©m um _summary_all_cases.json em reports_folder.
    """
    os.makedirs(reports_folder, exist_ok=True)

    print(f"\n===== {titulo} =====")
    print(f"Total de testes: {len(UNIFIED_EVAL_DATASET)}")

    all_reports: List[Dict[str, Any]] = []

    for i, case in enumerate(UNIFIED_EVAL_DATASET, start=1):
        q        = case["question"]
        cid      = case.get("id", f"case_{i:03d}")
        scenario = case.get("scenario")
        expected = case.get("expected", {})

        # 1) Executa o pipeline
        session_id = new_session_id()
        pipeline_output = ask_arbo_agent(
            q,
            session_id=session_id,
            tag=cid,
            eval_case=case,
            k_retrieval=k_retrieval,
        )

        # 2) Avalia√ß√£o + p√≥s-processamento
        report = evaluate_pipeline_interaction(pipeline_output, eval_case=case, k_retrieval=k_retrieval)

        # 3) Salva relat√≥rio na PASTA DA SESS√ÉO
        path_eval = save_eval_report(report, folder=None, tag="evaluation")

        # 4) Scores (modelo vs p√≥s)
        overall = report.get("overall_evaluation", {}) or {}
        score_model = overall.get("global_score") or overall.get("score")
        score_post  = overall.get("global_score_post")
        rag_post    = overall.get("rag_score_post")

        # 5) Impress√£o detalhada
        print("\n" + "=" * 80)

        aischema = pipeline_output.get("aischema", {}) or {}
        backend_mode_obs = pipeline_output.get("backend_mode")
        needs_query_graph_obs = bool(pipeline_output.get("needs_query_graph"))
        needs_query_rag_obs   = bool(pipeline_output.get("needs_query_rag"))

        backend_mode_exp    = expected.get("backend_mode")
        needs_query_exp     = expected.get("needs_query")
        needs_query_rag_exp = expected.get("needs_query_rag")

        backend_ok = (backend_mode_obs == backend_mode_exp) if backend_mode_exp else True
        status_symbol = "‚úÖ" if backend_ok else "‚ùå"

        timers = pipeline_output.get("timers", {}) or {}
        t_planner  = float(timers.get("planner_s", 0.0) or 0.0)
        t_cypher   = float(timers.get("cypher_s", 0.0) or 0.0)
        t_neo4j    = float(timers.get("neo4j_s", 0.0) or 0.0)
        t_rag      = float(timers.get("rag_s", 0.0) or 0.0)
        t_answer   = float(timers.get("answer_s", 0.0) or 0.0)
        t_observer = float(timers.get("observer_s", 0.0) or 0.0)
        t_total    = float(timers.get("total_s", 0.0) or 0.0)

        neo4j_error = pipeline_output.get("neo4j_error")
        neo4j_err_line = f" | neo4j_error={neo4j_error}" if neo4j_error else ""

        print(f"[{i:03d}] {status_symbol} PERGUNTA: {q}")
        print(
            f"      (IA1: {t_planner:.2f}s | IA2: {t_cypher:.2f}s | Neo4j: {t_neo4j:.2f}s | "
            f"RAG: {t_rag:.2f}s | ANSWER: {t_answer:.2f}s | OBS: {t_observer:.2f}s | TOTAL: {t_total:.2f}s){neo4j_err_line}"
        )
        print(
            f"      backend_mode='{backend_mode_obs}' (esperado='{backend_mode_exp}') | "
            f"needs_query_graph={needs_query_graph_obs} (esperado={needs_query_exp}) | "
            f"needs_query_rag={needs_query_rag_obs} (esperado={needs_query_rag_exp})"
        )
        if score_model is not None:
            print(f"      Score global (EVALUATOR/modelo): {score_model}")
        if score_post is not None:
            print(f"      Score global (post-processado): {score_post}")
        if rag_post is not None:
            print(f"      rag_score_post (Recall@k + RAGAS-like): {rag_post}")

        print(f"      Relat√≥rio evaluation salvo em: {path_eval}")
        print("-" * 80)

        print("üîπ [IA 1] SCHEMA JSON (PLANNER):")
        print(json.dumps(aischema, ensure_ascii=False, indent=2))
        print("----------------------------------------")

        cypher = pipeline_output.get("cypher")
        if backend_mode_obs in ("graph_only", "hybrid_graph_rag") and cypher:
            print("üîπ [IA 2] CONSULTA CYPHER (CYPHER_GENERATOR):")
            print(cypher)
        else:
            print("üîπ [IA 2] N/A (sem consulta ao grafo para este caso)")
        print("----------------------------------------")

        final_answer = pipeline_output.get("final_answer") or ""
        print("üîπ [IA 3] RESPOSTA FINAL (completa):")
        print(final_answer)
        print("----------------------------------------")

        print("üîπ [IA 4] RELAT√ìRIO DE AVALIA√á√ÉO (todas as m√©tricas):")
        print(json.dumps(report, ensure_ascii=False, indent=2))
        print("----------------------------------------")

        all_reports.append({
            "id": cid,
            "scenario": scenario,
            "question": q,
            "score_model": score_model,
            "score_post": score_post,
            "rag_score_post": rag_post,
            "session_id": report.get("session_id"),
            "evaluation_path": path_eval,
        })

    summary_path = os.path.join(reports_folder, "_summary_all_cases.json")
    with open(summary_path, "w", encoding="utf-8") as f:
        json.dump(all_reports, f, ensure_ascii=False, indent=2, default=str)

    print(f"\nResumo geral salvo em: {summary_path}")
    return all_reports

In [13]:
# =========================
# C√âLULA 12 ‚Äî TESTES E RUNNER
# =========================

"""
Aqui continuam os testes antigos (TESTES_GRAPH_ONLY, TESTES_RAG_ONLY, TESTES_HYBRID_GRAPH_RAG)
e a fun√ß√£o run_tests_pipeline_layout_final, que voc√™ j√° tinha para depura√ß√£o manual.

Eles foram mantidos, mas a execu√ß√£o padr√£o abaixo agora usa o dataset_evaluation.json.
"""

# [...] (todo o bloco de defini√ß√£o de TESTES_GRAPH_ONLY / RAG_ONLY / HYBRID_GRAPH_RAG,
#       mais run_tests_pipeline_layout_final, permanece igual ao seu c√≥digo atual.)

# Execu√ß√£o t√≠pica no notebook / script para o TCC:
#   - usa o dataset_evaluation.json (24 cen√°rios),
#   - gera um relat√≥rio JSON por pergunta em reports_dataset_eval/,
#   - e um resumo geral em reports_dataset_eval/_summary_all_cases.json
run_eval_from_dataset_evaluation(
    k_retrieval=5
)



===== TESTES PIPELINE (IA1..IA5 + RAG + ANSWER + OBSERVER + EVALUATOR) =====
Total de testes: 4
[INFO] Carregando √≠ndice vetorizado do schema...
[INFO] Carregando modelo de embedding: BAAI/bge-m3
[INFO] Executando Cypher (read-only): MATCH (n:Notificacao)-[:TEM_NO_DIA]->(d:Dia), 
    (m:Municipio)<-[:LOCALIZA]-(n),
    (a:Agravo {nome:'DENGUE'})<-[:E_DO_AGRAVO]-(c:Casos)
WHERE d.data > date() - 7 * day
AND m.nome = 'SAO JOSE DO RI...
[ERROR] Erro ao executar Cypher: {neo4j_code: Neo.ClientError.Statement.SyntaxError} {message: Variable `day` not defined (line 4, column 29 (offset: 168))
"WHERE d.data > date() - 7 * day"
                             ^} {gql_status: 42001} {gql_status_description: error: syntax error or access rule violation - invalid syntax}
[INFO] Execu√ß√£o salva em logs_sessions\378aadf57175\20251213-054207__observability.json
[INFO] Execu√ß√£o salva em logs_sessions\378aadf57175\20251213-054309__evaluation.json

[001] ‚úÖ PERGUNTA: Considerando as notifica√ß√µes d



[INFO] Execu√ß√£o salva em logs_sessions\2a4b90558f17\20251213-054626__observability.json
[INFO] Execu√ß√£o salva em logs_sessions\2a4b90558f17\20251213-054707__evaluation.json

[002] ‚úÖ PERGUNTA: Ao analisar a quantidade mensal de a√ß√µes de educa√ß√£o em sa√∫de relacionadas ao controle do Aedes em S√£o Jos√© do Rio Preto em 2023, essas a√ß√µes est√£o compat√≠veis com o papel da APS descrito no "Guia Pr√°tico de Arboviroses Urbanas na APS"?
      (IA1: 44.63s | IA2: 16.76s | Neo4j: 0.80s | RAG: 2.97s | ANSWER: 35.78s | OBS: 96.01s | TOTAL: 100.94s)
      backend_mode='hybrid_graph_rag' (esperado='hybrid_graph_rag') | needs_query_graph=True (esperado=True) | needs_query_rag=True (esperado=True)
      Score global (post-processado): 0.20499999999999996
      rag_score_post (Recall@k + RAGAS-like): 0.44999999999999996
      Relat√≥rio evaluation salvo em: logs_sessions\2a4b90558f17\20251213-054707__evaluation.json
-------------------------------------------------------------------------



[INFO] Execu√ß√£o salva em logs_sessions\bf4991287a62\20251213-055506__observability.json
[INFO] Execu√ß√£o salva em logs_sessions\bf4991287a62\20251213-055544__evaluation.json

[004] ‚úÖ PERGUNTA: Analisando a radia√ß√£o solar di√°ria em S√£o Jos√© do Rio Preto no m√™s de agosto de 2020, e as orienta√ß√µes das Diretrizes Nacionais e do Guia Pr√°tico de Arboviroses Urbanas na APS, como esses dados podem apoiar o planejamento das a√ß√µes de preven√ß√£o para o per√≠odo pr√©-epid√™mico?
      (IA1: 47.32s | IA2: 13.03s | Neo4j: 0.23s | RAG: 3.01s | ANSWER: 43.99s | OBS: 91.35s | TOTAL: 107.57s)
      backend_mode='hybrid_graph_rag' (esperado='hybrid_graph_rag') | needs_query_graph=True (esperado=True) | needs_query_rag=True (esperado=True)
      Score global (post-processado): 0.20499999999999996
      rag_score_post (Recall@k + RAGAS-like): 0.44999999999999996
      Relat√≥rio evaluation salvo em: logs_sessions\bf4991287a62\20251213-055544__evaluation.json
-------------------------------

[{'id': 'hybrid_sjrp_004',
  'scenario': 'hybrid_graph_rag',
  'question': 'Considerando as notifica√ß√µes di√°rias de dengue em S√£o Jos√© do Rio Preto nos √∫ltimos 7 dias, como a Aten√ß√£o Prim√°ria √† Sa√∫de deve organizar o acolhimento e a classifica√ß√£o de risco dos pacientes, de acordo com o documento "Dengue: diagn√≥stico e manejo cl√≠nico de adultos e crian√ßas"?',
  'score_model': None,
  'score_post': 0.13499999999999998,
  'rag_score_post': 0.44999999999999996,
  'session_id': '378aadf57175',
  'evaluation_path': 'logs_sessions\\378aadf57175\\20251213-054309__evaluation.json'},
 {'id': 'hybrid_sjrp_005',
  'scenario': 'hybrid_graph_rag',
  'question': 'Ao analisar a quantidade mensal de a√ß√µes de educa√ß√£o em sa√∫de relacionadas ao controle do Aedes em S√£o Jos√© do Rio Preto em 2023, essas a√ß√µes est√£o compat√≠veis com o papel da APS descrito no "Guia Pr√°tico de Arboviroses Urbanas na APS"?',
  'score_model': None,
  'score_post': 0.20499999999999996,
  'rag_score_pos