In [40]:
import pandas as pd

In [41]:
import os
import pandas as pd
from datetime import datetime, timedelta
from langfuse import Langfuse
from dotenv import load_dotenv

load_dotenv()

langfuse = Langfuse()  # Auto-reads LANGFUSE_* env vars

# def download_all_traces_to_csv(
#     output_file: str = "my_langfuse_traces_nov2025.csv",
#     days: int = 180,
#     limit_per_page: int = 100,
# ):
output_file: str = "my_langfuse_traces_nov2025.csv",
days= 1
limit_per_page: int = 100,
all_traces = []
page = 1
from_dt = datetime.utcnow() - timedelta(days=days)

print(f"Downloading all traces from the last {days} days (since {from_dt.isoformat()}Z)...")

while True:
    print(f"Fetching page {page} (limit={limit_per_page})...")

    response = langfuse.api.trace.list(
        page=page,
        limit=limit_per_page,
        from_timestamp=from_dt,
        order_by="timestamp.DESC",
        # THIS IS THE KEY: explicitly ask for token usage & cost
        fields="core,io,metrics,scores",
        # core = id, timestamp, name, etc.
        # io    = input/output
        # metrics = "latency, totalCost, inputTokens, outputTokens, etc",
        # scores = optional
    )

    if not response.data or len(response.data) == 0:
        print("No more traces. Done!")
        break

    print(f"  → Got {len(response.data)} traces (total so far: {len(all_traces) + len(response.data)})")

    for trace in response.data:
        # Now these fields exist because we requested "metrics"
        all_traces.append({
            "trace_id": trace.id,
            "timestamp": trace.timestamp.isoformat() if trace.timestamp else None,
            "name": trace.name or "",
            "user_id": trace.user_id or "",
            "session_id": trace.session_id or "",
            "input": trace.input,
            "output": trace.output,
            "model": getattr(trace, "model", None),  
            "input_tokens": getattr(trace, "input_tokens", None),
            "output_tokens": getattr(trace, "output_tokens", None),
            "total_tokens": getattr(trace, "total_tokens", None),
            "input_cost_usd": getattr(trace, "input_cost", None),
            "output_cost_usd": getattr(trace, "output_cost", None),
            "total_cost_usd": getattr(trace, "total_cost", None),
            "latency_seconds": getattr(trace, "latency", None),
            "metadata": str(trace.metadata) if trace.metadata else "",
            "tags": ", ".join(trace.tags) if hasattr(trace, "tags") and trace.tags else "",
            "release": trace.release or "",
            "version": trace.version or "",
        })

    page += 1

# Save
# if all_traces:
df = pd.DataFrame(all_traces)
#     print(df)
#     # df.to_csv(output_file, index=False)
#     total_cost = df["total_cost_usd"].sum()
#     print(f"\nSUCCESS! Exported {len(df):,} traces → {output_file}")
#     print(f"Total LLM cost in period: ${total_cost:,.6f}")
# else:
#     print("No traces found.")


# RUN
# if __name__ == "__main__":
#     print(download_all_traces_to_csv(days=180, limit_per_page=100))
 

Downloading all traces from the last 1 days (since 2025-11-17T08:45:12.514363Z)...
Fetching page 1 (limit=(100,))...
  → Got 18 traces (total so far: 18)
Fetching page 2 (limit=(100,))...
No more traces. Done!


In [42]:
df.iloc[6]

trace_id                            9712c7004f63738901b225577c27835a
timestamp                           2025-11-18T07:53:19.784000+00:00
name                                                        call_llm
user_id                                                             
session_id                                                          
input              {'model': 'azure/gpt-4o-mini', 'config': {'sys...
output             {'content': {'parts': [{'text': 'Here is the i...
model                                                           None
input_tokens                                                    None
output_tokens                                                   None
total_tokens                                                    None
input_cost_usd                                                  None
output_cost_usd                                                 None
total_cost_usd                                                   0.0
latency_seconds                   

In [43]:
obs = langfuse.api.observations.get_many()

In [44]:
for o in obs.data:
    print(o)
    

id='887c384728cbb338' trace_id='5f98442073757da5118c817fc12713d2' type='GENERATION' name='call_llm' start_time=datetime.datetime(2025, 11, 18, 8, 41, 22, 568000, tzinfo=datetime.timezone.utc) end_time=datetime.datetime(2025, 11, 18, 8, 41, 23, 897000, tzinfo=datetime.timezone.utc) completion_start_time=None model='azure/gpt-4o-mini' model_parameters={} input={'model': 'azure/gpt-4o-mini', 'config': {'system_instruction': '\n        **Role:** Host agent for the agent-to-agent protocol; delegates queries to specialized remote agents with maximum efficiency.\n\n**Core Directives:**\n\n* **Task Delegation:** Use the `send_message` function to assign precise, actionable tasks to remote agents.\n* **Full Context Provision:** If an agent repeatedly asks for user confirmation, it likely lacks conversation history. Include all relevant context in the task to prevent this.\n* **Autonomous Multi-Agent Engagement:** Engage any required agents directly—never seek user permission or preference. If m

In [45]:
obs.dict()

{'data': [{'id': '887c384728cbb338',
   'traceId': '5f98442073757da5118c817fc12713d2',
   'type': 'GENERATION',
   'name': 'call_llm',
   'startTime': datetime.datetime(2025, 11, 18, 8, 41, 22, 568000, tzinfo=datetime.timezone.utc),
   'endTime': datetime.datetime(2025, 11, 18, 8, 41, 23, 897000, tzinfo=datetime.timezone.utc),
   'completionStartTime': None,
   'model': 'azure/gpt-4o-mini',
   'modelParameters': {},
   'input': {'model': 'azure/gpt-4o-mini',
    'config': {'system_instruction': '\n        **Role:** Host agent for the agent-to-agent protocol; delegates queries to specialized remote agents with maximum efficiency.\n\n**Core Directives:**\n\n* **Task Delegation:** Use the `send_message` function to assign precise, actionable tasks to remote agents.\n* **Full Context Provision:** If an agent repeatedly asks for user confirmation, it likely lacks conversation history. Include all relevant context in the task to prevent this.\n* **Autonomous Multi-Agent Engagement:** Engage 

In [61]:
from langfuse import Langfuse
import json
import os
from openai import AzureOpenAI   # or your actual client

langfuse = Langfuse()
trace_id = "5f98442073757da5118c817fc12713d2"

# 1. Fetch the trace
trace = langfuse.api.trace.get(trace_id=trace_id)

# === SMART EXTRACTION OF INPUT & OUTPUT (handles all common formats) ===
raw_input  = trace.input
raw_output = trace.output

# Extract clean question / input
if isinstance(raw_input, dict):
    question = raw_input.get("question") or raw_input.get("messages", [{}])[-1].get("content", "") or str(raw_input)
elif hasattr(raw_input, 'model_dump'):  # Pydantic objects
    question = str(raw_input)
else:
    question = str(raw_input or "")

# Extract clean answer text (handles Gemini, OpenAI, LiteLLM, etc.)
if isinstance(raw_output, dict):
    # Gemini / Vertex AI style
    if "content" in raw_output and "parts" in raw_output["content"]:
        answer = "".join(part.get("text", "") for part in raw_output["content"]["parts"])
    # OpenAI style
    elif "choices" in raw_output:
        answer = raw_output["choices"][0]["message"]["content"]
    else:
        answer = raw_output.get("content", str(raw_output))
else:
    answer = str(raw_output or "No output")

print(f"Question: {question}")
print(f"Answer:   {answer}\n")

# === Rest is the same – Azure LLM judge ===
azure_client = AzureOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version="2024-08-01-preview"
)

judge_response = azure_client.chat.completions.create(
    model=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
    temperature=0.0,
    response_format={"type": "json_object"},
    messages=[
        {"role": "system", "content": f"""
You are an expert fact-checker. Score factual correctness from 0.0 to 1.0.

Question: {question}
Answer: {answer}

Respond ONLY with JSON: {{"score": 0.94, "explanation": "short reason"}}
        """.strip()}
    ]
)

result = json.loads(judge_response.choices[0].message.content)
score = float(result["score"])

langfuse.create_score(
    trace_id=trace_id,
    name="correctness",
    value=score,
    comment=f"Auto LLM judge: {result.get('explanation', '')}",
    data_type="NUMERIC"
)

print(f"Final LLM Judge Score → {score}")

Question: {'model': 'azure/gpt-4o-mini', 'config': {'system_instruction': '\n        **Role:** Host agent for the agent-to-agent protocol; delegates queries to specialized remote agents with maximum efficiency.\n\n**Core Directives:**\n\n* **Task Delegation:** Use the `send_message` function to assign precise, actionable tasks to remote agents.\n* **Full Context Provision:** If an agent repeatedly asks for user confirmation, it likely lacks conversation history. Include all relevant context in the task to prevent this.\n* **Autonomous Multi-Agent Engagement:** Engage any required agents directly—never seek user permission or preference. If multiple agents are needed, orchestrate them seamlessly.\n* **Intelligent Inter-Agent Collaboration:** Instruct agents to determine if they need data from another agent. **If Agent A says "I need X to proceed" and Agent B can provide X, immediately query Agent B, then resubmit the updated task to Agent A.**\n* **Transparent Output:** Deliver the full

In [52]:
x = main()

None
None
