In [None]:
import os
import requests

# Step 1: Set up environment & API key securely
# You can either store the Perplexity key in an environment variable,
# or replace os.getenv("PERPLEXITY_API_KEY") with the key string directly.
API_KEY = os.getenv("PERPLEXITY_API_KEY")

# Base endpoint for Perplexity's chat completions API
API_URL = "https://api.perplexity.ai/chat/completions"

# Define a system‑level instruction that restricts allowed topics
SYSTEM_PROMPT = (
    "You are a specialized research AI assistant who only answers "
    "questions related to business, markets, finance, or economics. "
    "If a user asks about an unrelated topic, politely decline."
)

def ask_perplexity(question: str):
    """
    Sends a query to the Perplexity Sonar API with a controlled system prompt.
    Returns the model's text reply.
    """
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "sonar-large",  # You can use sonar-small / sonar-medium depending on plan
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": question}
        ],
        # Optional tuning:
        "temperature": 0.5,
        "max_tokens": 800
    }

    response = requests.post(API_URL, headers=headers, json=payload)
    response.raise_for_status()
    data = response.json()
    return data["choices"][0]["message"]["content"]

# Example use
if __name__ == "__main__":
    query = "What are the latest trends in global bond yields?"
    result = ask_perplexity(query)
    print(result)


In [None]:
import os
import requests

# --- API Keys ---
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")
SERPAPI_KEY = os.getenv("SERPAPI_KEY")  # Get one from serpapi.com

# --- API URLs ---
PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions"
SERPAPI_URL = "https://serpapi.com/search"

# --- Controlled System Prompt ---
SYSTEM_PROMPT = (
    "You are an intelligent financial research assistant. "
    "Only answer questions related to markets, finance, or economics. "
    "Use the live search context below to ensure data accuracy."
)

def fetch_web_context(query: str, num_results: int = 3):
    """
    Retrieve live search results related to the query from SerpAPI.
    Returns a combined string containing top summaries and URLs.
    """
    params = {
        "q": query,
        "api_key": SERPAPI_KEY,
        "num": num_results,
    }
    resp = requests.get(SERPAPI_URL, params=params)
    resp.raise_for_status()
    data = resp.json()

    snippets = []
    if "organic_results" in data:
        for item in data["organic_results"][:num_results]:
            title = item.get("title", "")
            snippet = item.get("snippet", "")
            link = item.get("link", "")
            snippets.append(f"{title} — {snippet} ({link})")

    return "\n".join(snippets)


def ask_perplexity_with_live_context(question: str):
    """
    Adds current web research context to the system prompt
    before sending the query to the Perplexity API.
    """
    # Step 2 → Fetch the latest contextual information
    web_context = fetch_web_context(question)

    # Compose full prompt payload
    payload = {
        "model": "sonar-large",
        "messages": [
            {"role": "system", "content": f"{SYSTEM_PROMPT}\n\nContext:\n{web_context}"},
            {"role": "user", "content": question}
        ],
        "temperature": 0.5,
        "max_tokens": 1000
    }

    headers = {
        "Authorization": f"Bearer {PERPLEXITY_KEY}",
        "Content-Type": "application/json"
    }

    response = requests.post(PERPLEXITY_URL, headers=headers, json=payload)
    response.raise_for_status()
    data = response.json()
    return data["choices"][0]["message"]["content"]


# --- Example use ---
if __name__ == "__main__":
    query = "How are global inflation forecasts changing as of October 2025?"
    output = ask_perplexity_with_live_context(query)
    print("\n=== ASSISTANT RESPONSE ===\n")
    print(output)


In [None]:
import os
import requests
from transformers import pipeline

# ---------- CONFIG ----------
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")
PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions"

SYSTEM_PROMPT = (
    "You are a research AI assistant focused only on markets, "
    "finance, business, and economics topics. Decline unrelated questions."
)

# ---------- DOMAIN CLASSIFIER ----------
# Step 3 domain filter: use a zero-shot classifier to decide
# if the query belongs to financial or business context
domain_classifier = pipeline(
    "zero-shot-classification",
    model="facebook/bart-large-mnli"
)

ALLOWED_TOPICS = [
    "finance",
    "economics",
    "business",
    "investment",
    "stock market",
    "macroeconomics",
    "corporate strategy",
    "markets",
    "financial regulation"
]


def is_financial_query(question: str, threshold: float = 0.65) -> bool:
    """Return True if the question matches allowed financial/economic classes."""
    result = domain_classifier(question, ALLOWED_TOPICS)
    label, score = result["labels"][0], result["scores"][0]
    print(f"Detected domain: {label} (score={score:.2f})")
    return score >= threshold


# ---------- PERPLEXITY CALL ----------
def query_perplexity(question: str):
    headers = {
        "Authorization": f"Bearer {PERPLEXITY_KEY}",
        "Content-Type": "application/json"
    }
    data = {
        "model": "sonar-large",
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": question}
        ],
        "temperature": 0.6,
        "max_tokens": 700
    }

    response = requests.post(PERPLEXITY_URL, headers=headers, json=data)
    response.raise_for_status()
    return response.json()["choices"][0]["message"]["content"]


# ---------- MAIN GATEWAY ----------
def financial_assistant(question: str):
    if not is_financial_query(question):
        return (
            "I'm a research assistant specialized in markets, finance, "
            "and economics only. Your question appears unrelated."
        )


In [None]:
# ------------------------------
# AI Financial Research Assistant (Steps 1–5)
# ------------------------------
import os
import requests
import streamlit as st
from transformers import pipeline

# ===== STEP 1: Base model setup (Perplexity Sonar API) =====
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")
PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions"

SYSTEM_PROMPT = (
    "You are a financial research assistant that provides up‑to‑date, "
    "fact‑checked insights strictly related to markets, finance, "
    "and economics. Decline questions outside this scope."
)

# ===== STEP 2: Live web context retrieval (SerpAPI or similar) =====
SERPAPI_URL = "https://serpapi.com/search"
SERPAPI_KEY = os.getenv("SERPAPI_KEY")

def get_live_context(query: str, n: int = 3) -> str:
    """Retrieve fresh context from the web using SerpAPI."""
    if not SERPAPI_KEY:
        return "No live context (SerpAPI key missing)."
    params = {"q": query, "api_key": SERPAPI_KEY, "num": n}
    try:
        resp = requests.get(SERPAPI_URL, params=params, timeout=10)
        data = resp.json().get("organic_results", [])
        results = [f"{r.get('title','')}: {r.get('snippet','')} ({r.get('link','')})"
                   for r in data[:n]]
        return "\n".join(results)
    except Exception as e:
        return f"Error getting context: {e}"

# ===== STEP 3: Domain classifier (gatekeeper) =====
domain_classifier = pipeline("zero-shot-classification",
                             model="facebook/bart-large-mnli")

FINANCE_TOPICS = [
    "finance", "economics", "markets", "investing",
    "macroeconomics", "business", "corporate strategy"
]

def is_finance_query(query: str, threshold: float = 0.65) -> bool:
    """Checks if user query fits finance/economic categories."""
    res = domain_classifier(query, FINANCE_TOPICS)
    label, score = res["labels"][0], res["scores"][0]
    return score >= threshold

# ===== STEP 4: Query Perplexity with contextual web data =====
def ask_perplexity(question: str, context: str = "") -> str:
    """Send the composed request to Perplexity Sonar API."""
    headers = {
        "Authorization": f"Bearer {PERPLEXITY_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "sonar-large",
        "messages": [
            {"role": "system", "content": f"{SYSTEM_PROMPT}\n\nLatest context:\n{context}"},
            {"role": "user", "content": question}
        ],
        "temperature": 0.5,
        "max_tokens": 1000
    }
    resp = requests.post(PERPLEXITY_URL, headers=headers, json=payload)
    resp.raise_for_status()
    return resp.json()["choices"][0]["message"]["content"]

# ===== STEP 5: Streamlit dashboard interface =====
def main():
    st.set_page_config(page_title="AI Financial Research Assistant", layout="wide")
    st.title("📊 AI Financial Research Assistant (Powered by Perplexity Sonar)")

    st.sidebar.header("Configuration")
    enable_live = st.sidebar.checkbox("Enable Live Web Search (SerpAPI)", value=True)
    strict_mode = st.sidebar.slider("Finance Domain Strictness", 0.5, 0.9, 0.65)

    query = st.text_input("Enter your question about markets, finance, or economics:")

    if st.button("Analyze") and query:
        with st.spinner("Processing..."):
            if not is_finance_query(query, strict_mode):
                st.warning("This question does not appear financial/economic in nature.")
                return

            context = get_live_context(query, n=3) if enable_live else ""
            answer = ask_perplexity(query, context)
            
            st.subheader("Perplexity Response:")
            st.write(answer)

            st.subheader("Live Context (Recent Sources):")
            st.text(context)

# ----- Run the dashboard -----
if __name__ == "__main__":
    main()


In [None]:
import os
import requests
import streamlit as st
from transformers import pipeline

# ===== Step 1: Base LLM configuration =====
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")
PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions"

# Define a structured JSON output format for responses
RESPONSE_TEMPLATE = """
You are a financial research assistant. Answer strictly in this JSON format:

{
  "summary": "<brief summary of the topic>",
  "key_metrics": {
    "gdp_growth": "<value>",
    "inflation_rate": "<value>",
    "unemployment_rate": "<value>"
  },
  "sources": ["<URL 1>", "<URL 2>", ...]
}

Provide no explanation outside the JSON.
"""

SYSTEM_PROMPT = (
    "You only answer questions related to finance, markets, or economics.\n"
    "Return your answer exactly as the JSON template below:\n"
    f"{RESPONSE_TEMPLATE}"
)

# ===== Step 2: Live web search context (example using SerpAPI) =====
SERPAPI_KEY = os.getenv("SERPAPI_KEY")
SERPAPI_URL = "https://serpapi.com/search"

def get_live_context(query:str, num_results:int=3) -> str:
    if not SERPAPI_KEY:
        return "Live search disabled: missing SERPAPI_KEY."
    try:
        params = {"q": query, "api_key": SERPAPI_KEY, "num": num_results}
        resp = requests.get(SERPAPI_URL, params=params, timeout=10)
        resp.raise_for_status()
        results = resp.json().get("organic_results", [])
        snippets = []
        for r in results[:num_results]:
            title = r.get("title", "")
            snippet = r.get("snippet", "")
            link = r.get("link", "")
            snippets.append(f"{title} — {snippet} ({link})")
        return "\n".join(snippets)
    except Exception as e:
        return f"Failed to fetch live context: {e}"

# ===== Step 3: Domain filter to restrict to finance =====
domain_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
ALLOWED_TOPICS = [
    "finance", "markets", "economics", "investment", 
    "macroeconomics", "business", "financial regulation"
]

def is_finance_query(query:str, threshold:float=0.65) -> bool:
    result = domain_classifier(query, ALLOWED_TOPICS)
    label, score = result["labels"][0], result["scores"][0]
    return score >= threshold

# ===== Step 4: Query Perplexity API with context & formatting =====
def ask_perplexity(query:str, context:str="") -> str:
    headers = {
        "Authorization": f"Bearer {PERPLEXITY_KEY}",
        "Content-Type": "application/json"
    }
    system_content = f"{SYSTEM_PROMPT}\n\nLatest context:\n{context}"
    payload = {
        "model": "sonar-large",
        "messages": [
            {"role": "system", "content": system_content},
            {"role": "user", "content": query}
        ],
        "temperature": 0,
        "max_tokens": 1000
    }
    response = requests.post(PERPLEXITY_URL, headers=headers, json=payload)
    response.raise_for_status()
    return response.json()["choices"][0]["message"]["content"]

# ===== Step 5: Streamlit Dashboard =====
def main():
    st.title("AI Financial Research Assistant Dashboard")
    st.markdown("Enter finance, markets, or economics questions. Responses are structured JSON.")

    query = st.text_input("Your Question:")
    enable_live_search = st.checkbox("Enable live web search context", value=True)

    if st.button("Get Answer") and query:
        if not is_finance_query(query):
            st.error("Query filtered out: not related to finance or economics.")
            return

        context = get_live_context(query) if enable_live_search else ""
        with st.spinner("Getting response from Perplexity..."):
            response = ask_perplexity(query, context)

        st.subheader("Response (JSON format):")
        st.code(response, language="json")

        if context:
            st.subheader("Live Web Context Used:")
            st.text(context)

if __name__ == "__main__":
    main()


In [None]:
import json
import pandas as pd
import plotly.express as px
import streamlit as st

def render_dashboard(response_text):
    """Render a consistent dashboard layout based on JSON structure."""
    try:
        data = json.loads(response_text)
    except json.JSONDecodeError:
        st.error("Response not in valid JSON; showing raw text:")
        st.text(response_text)
        return

    # --- Section 1: Summary ---
    st.header("Summary")
    st.write(data.get("summary", "No summary available."))

    # --- Section 2: Key Insights ---
    st.subheader("Key Insights")
    for item in data.get("key_insights", []):
        st.markdown(f"- {item}")

    # --- Section 3: Key Metrics ---
    st.subheader("Economic Metrics")
    metrics = data.get("metrics", {})
    cols = st.columns(len(metrics))
    for i, (key, val) in enumerate(metrics.items()):
        cols[i].metric(key, val)

    # --- Section 4: Visualization ---
    st.subheader("Trend Chart")
    visual = data.get("visual_data", {})
    if visual and "labels" in visual and "values" in visual:
        df = pd.DataFrame({"Period": visual["labels"], "Value": visual["values"]})
        chart = px.line(df, x="Period", y="Value", title="Trend Over Time", markers=True)
        st.plotly_chart(chart, use_container_width=True)
    else:
        st.info("No visual data provided.")

    # --- Section 5: Data Table ---
    st.subheader("Comparative Table")
    table_data = data.get("table", [])
    if table_data:
        df_table = pd.DataFrame(table_data)
        st.dataframe(df_table, use_container_width=True)
    else:
        st.info("No table data provided.")

    # --- Section 6: Sources ---
    st.subheader("Sources")
    for src in data.get("sources", []):
        st.markdown(f"- [{src}]({src})")



In [None]:
# ============================================
# AI FINANCIAL RESEARCH ASSISTANT DASHBOARD
# Integrating Steps 1–5 from previous builds
# ============================================

import os
import json
import requests
import pandas as pd
import plotly.express as px
import streamlit as st
from transformers import pipeline

# ----------------------------
# STEP 1: Base Perplexity Configuration
# ----------------------------
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")
PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions"

# Response structure for consistent formatting
RESPONSE_TEMPLATE = """
You are a financial research assistant. Return responses ONLY in VALID JSON formatted exactly as follows:
{
  "summary": "Brief text overview of recent market or economic findings.",
  "key_insights": ["Insight 1", "Insight 2", "Insight 3"],
  "metrics": {"GDP Growth (%)": number, "Inflation (%)": number, "Unemployment (%)": number},
  "visual_data": {"labels": ["Q1","Q2","Q3","Q4"], "values": [2.3,2.6,3.1,2.9]},
  "table": [{"Country": "US", "GDP": 25.5, "Inflation": 3.4}],
  "sources": ["https://www.imf.org", "https://www.reuters.com/markets"],
  "confidence_score": "Provide a number from 0 to 100 indicating how confident you are in the accuracy of this response."

}
Do not include any text or explanation outside of JSON.
"""

SYSTEM_PROMPT = (
 #   "You are restricted to topics about finance, business, markets, or economics.\n"
 #   "Be honest and factual, and do not make anything up.\n"
 #   "Format your answer exactly using the following JSON template:\n"

    "You are a financial research assistant that provides up‑to‑date, "
    "fact‑checked insights strictly related to markets, finance, business, "
    "stock market, macroeconomics and economics. Decline questions outside this scope.\n"
    "Format your answer exactly using the following JSON template:\n"
    
    f"{RESPONSE_TEMPLATE}"
)

# ----------------------------
# STEP 2: Real-Time Search Context
# ----------------------------


SCRAPING_DOG_URL = "https://api.scrapingdog.com/scrape"
SCRAPING_DOG_KEY = os.getenv("SCRAPING_DOG_KEY")

SERPAPI_KEY = os.getenv("SERPAPI_KEY")
SERPAPI_URL = "https://serpapi.com/search"

def get_live_context(search_query: str, num_results: int = 3) -> str:
    """Fetch live data snippets for the query to add context."""
    if not SERPAPI_KEY:
        return "Live web search disabled (SERPAPI_KEY missing)."
    try:
        params = {"q": search_query, "api_key": SERPAPI_KEY, "num": num_results}
        resp = requests.get(SERPAPI_URL, params=params, timeout=10)
        data = resp.json().get("organic_results", [])
        snippets = [
            f"{r.get('title','')}: {r.get('snippet','')} ({r.get('link','')})"
            for r in data[:num_results]
        ]
        return "\n".join(snippets)
    except Exception as e:
        return f"[Context Warning] Failed to retrieve search data: {e}"

# ----------------------------
# STEP 3: Domain Filtering
# ----------------------------
domain_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
ALLOWED_TOPICS = [
    "finance", "economics", "markets", "trade", "business",
    "macroeconomics", "investment", "monetary policy"
]

def is_financial_query(query: str, threshold: float = 0.65) -> bool:
    """Check whether a query belongs to the financial domain."""
    result = domain_classifier(query, ALLOWED_TOPICS)
    label, score = result["labels"][0], result["scores"][0]
    print(f"[DomainCheck] Detected topic: {label} ({score:.2f})")
    return score >= threshold

# ----------------------------
# STEP 4: Query Perplexity with Context and Formatting
# ----------------------------
def query_perplexity(prompt: str, query: str) -> str:
    """Send structured query to Perplexity with optional background."""
    headers = {
        "Authorization": f"Bearer {PERPLEXITY_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "sonar-large",
        "temperature": 0,
        "max_tokens": 1000,
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": query}
        ]
    }
    print("[Perplexity] Sending request...")
    response = requests.post(PERPLEXITY_URL, headers=headers, json=payload)
    response.raise_for_status()
    return response.json()["choices"][0]["message"]["content"]

# ----------------------------
# STEP 5: Streamlit Rendering Logic
# ----------------------------
def render_dashboard(response_text):
    """Parse and render structured financial report from model output."""
    try:
        data = json.loads(response_text)
    except json.JSONDecodeError:
        st.error("⚠️ Model output was not valid JSON format. Here's the raw response:")
        st.text(response_text)
        return

    # ---- Section 1: Summary
    st.header("📈 Market Summary")
    st.write(data.get("summary", "No summary available."))

    # ---- Section 2: Key Insights
    st.subheader("Key Insights")
    insights = data.get("key_insights", [])
    if insights:
        for item in insights:
            st.markdown(f"- {item}")
    else:
        st.text("No notable insights found.")

    # ---- Section 3: Metrics Display
    st.subheader("Current Economic Metrics")
    metrics = data.get("metrics", {})
    if metrics:
        metric_cols = st.columns(len(metrics))
        for i, (key, value) in enumerate(metrics.items()):
            metric_cols[i].metric(key, value)
    else:
        st.info("Model did not produce metrics data.")

    # ---- Section 4: Trend Chart
    st.subheader("Economic Trends")
    visual_data = data.get("visual_data", {})
    if visual_data and "labels" in visual_data and "values" in visual_data:
        df_chart = pd.DataFrame({
            "Period": visual_data["labels"],
            "Value": visual_data["values"]
        })
        fig = px.line(df_chart, x="Period", y="Value", title="Quarterly Trend", markers=True)
        st.plotly_chart(fig, use_container_width=True)
    else:
        st.info("No visual data to plot.")

    # ---- Section 5: Comparative Table
    st.subheader("Comparative Data Table")
    table_data = data.get("table", [])
    if isinstance(table_data, list) and table_data:
        df_table = pd.DataFrame(table_data)
        st.dataframe(df_table, use_container_width=True)
    else:
        st.warning("No tabular data found.")

    # ---- Section 6: Sources
    st.subheader("Sources & References")
    for src in data.get("sources", []):
        st.markdown(f"- [{src}]({src})")

    # ---- Section 6: Confidence Score
    st.subheader("Model Confidence Score")
    score = data.get("confidence_score", None)
    if score:
        st.metric("Overall Confidence", f"{score}%")


# ----------------------------
# COMBINED MAIN APP
# ----------------------------
def main():
    st.set_page_config(page_title="AI Financial Research Assistant", layout="wide")
    st.title("💹 AI‑Powered Financial Research Assistant Dashboard")

    query = st.text_input("Enter a finance, markets, or economics question:")
    enable_live = st.checkbox("Enable live market context search", True)

    if st.button("Generate Report") and query:
        with st.spinner("Fetching AI‑driven financial insights..."):
            # --- Step 3: Domain Filtering
            if not is_financial_query(query):
                st.error("Query denied: not a financial or economic topic.")
                return

            # --- Step 2: Add Web Context if enabled
            context = get_live_context(query) if enable_live else "Live context not applied."

            # --- Step 4: Get LLM Response
            raw_output = query_perplexity(SYSTEM_PROMPT + "\n" + context, query)

            # --- Step 5: Structured Dashboard Rendering
            render_dashboard(raw_output)

# Entry point
if __name__ == "__main__":
    main()


In [None]:
# ============================================
# AI FINANCIAL RESEARCH ASSISTANT DASHBOARD v2
# Includes Steps 1–6 (cross‑model validation)
# ============================================

import os
import json
import requests
import pandas as pd
import plotly.express as px
import streamlit as st
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util  # Cross‑model validation

# ----------------------------
# STEP 1: Base Perplexity Configuration
# ----------------------------
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")
PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions"

RESPONSE_TEMPLATE = """
You are a financial research assistant. Return only valid JSON formatted as:
{
  "summary": "Brief summary of findings.",
  "key_insights": ["Insight 1", "Insight 2"],
  "metrics": {"GDP Growth (%)": number, "Inflation (%)": number, "Unemployment (%)": number},
  "visual_data": {"labels": ["Q1","Q2"], "values": [2.3,2.5]},
  "table": [{"Country": "US", "GDP": 25.5, "Inflation": 3.4}],
  "sources": ["https://imf.org", "https://reuters.com"],
  "confidence_score": "Provide a 0–100 confidence measure of your own answer"
}
"""

SYSTEM_PROMPT = (
    "You respond only to topics on finance, markets, or economics.\n"
    "Always output structured JSON in the exact format below:\n"
    f"{RESPONSE_TEMPLATE}"
)

# ----------------------------
# STEP 2: Live Context Fetch (Optional)
# ----------------------------
SERPAPI_KEY = os.getenv("SERPAPI_KEY")
SERPAPI_URL = "https://serpapi.com/search"

def get_live_context(query: str, num_results: int = 3) -> str:
    """Pull live search context from SerpAPI."""
    if not SERPAPI_KEY:
        return "Live context disabled (no SerpAPI key)."
    try:
        params = {"q": query, "api_key": SERPAPI_KEY, "num": num_results}
        resp = requests.get(SERPAPI_URL, params=params, timeout=10)
        data = resp.json().get("organic_results", [])
        return "\n".join(
            [f"{r.get('title','')}: {r.get('snippet','')} ({r.get('link','')})"
             for r in data[:num_results]]
        )
    except Exception as e:
        return f"[Context fetch error] {e}"

# ----------------------------
# STEP 3: Domain Filtering
# ----------------------------
domain_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
ALLOWED_TOPICS = [
    "finance", "economics", "business", "markets",
    "macroeconomics", "monetary policy", "banking", "investments"
]

def is_finance_query(query: str, threshold: float = 0.65) -> bool:
    result = domain_classifier(query, ALLOWED_TOPICS)
    label, score = result["labels"][0], result["scores"][0]
    print(f"[DomainCheck] {label} = {score:.2f}")
    return score >= threshold

# ----------------------------
# STEP 4: Query Perplexity (Primary Model)
# ----------------------------
def query_perplexity(query: str, context: str = "") -> str:
    headers = {
        "Authorization": f"Bearer {PERPLEXITY_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "sonar-large",
        "temperature": 0,
        "max_tokens": 1000,
        "messages": [
            {"role": "system", "content": f"{SYSTEM_PROMPT}\nContext:\n{context}"},
            {"role": "user", "content": query}
        ]
    }
    resp = requests.post(PERPLEXITY_URL, headers=headers, json=payload)
    resp.raise_for_status()
    return resp.json()["choices"][0]["message"]["content"]

# ----------------------------
# STEP 5: Secondary Model for Cross‑Validation
# ----------------------------
def query_secondary_model(query: str) -> str:
    """Simulate a second model (or call a different provider API)."""
    # Replace with actual secondary model if available, e.g. Claude or GPT‑4
    # For demonstration, use Perplexity Sonar Small to diversify output
    headers = {
        "Authorization": f"Bearer {PERPLEXITY_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "sonar-small",
        "temperature": 0,
        "max_tokens": 800,
        "messages": [
            {"role": "system", "content": "You are a financial assistant returning short analytical summaries."},
            {"role": "user", "content": query}
        ]
    }
    resp = requests.post(PERPLEXITY_URL, headers=headers, json=payload)
    resp.raise_for_status()
    return resp.json()["choices"][0]["message"]["content"]

# ----------------------------
# STEP 6: Cross‑Model Validation
# ----------------------------
embedder = SentenceTransformer("all-MiniLM-L6-v2")

def cross_model_confidence(primary_text: str, secondary_text: str) -> float:
    """Compute cosine similarity between two model outputs."""
    emb1, emb2 = embedder.encode([primary_text, secondary_text])
    sim = util.cos_sim(emb1, emb2).item()
    final_score = round(sim * 100, 2)
    print(f"[Cross‑Model Confidence] {final_score}%")
    return final_score

# ----------------------------
# Dashboard Rendering Function
# ----------------------------
def render_dashboard(response_text: str, cross_conf: float):
    """Display AI results in rich layout."""
    try:
        data = json.loads(response_text)
    except json.JSONDecodeError:
        st.error("Model output invalid. Raw response below:")
        st.text(response_text)
        return

    # ---- Top Section: Confidence
    self_conf = float(data.get("confidence_score", 0))
    avg_conf = (self_conf + cross_conf) / 2 if cross_conf else self_conf
    st.metric("Overall Confidence", f"{avg_conf:.1f} %")

    # ---- Summary Section
    st.header("📈 Executive Summary")
    st.write(data.get("summary", "No summary."))

    # ---- Insights
    st.subheader("Key Insights")
    for i in data.get("key_insights", []):
        st.markdown(f"- {i}")

    # ---- Metrics
    st.subheader("Macroeconomic Indicators")
    metrics = data.get("metrics", {})
    if metrics:
        cols = st.columns(len(metrics))
        for i, (k, v) in enumerate(metrics.items()):
            cols[i].metric(k, v)

    # ---- Chart
    st.subheader("Trends")
    vis = data.get("visual_data", {})
    if vis and "labels" in vis and "values" in vis:
        df_plot = pd.DataFrame({"Period": vis["labels"], "Value": vis["values"]})
        fig = px.line(df_plot, x="Period", y="Value", title="Trend Data", markers=True)
        st.plotly_chart(fig, use_container_width=True)

    # ---- Table
    st.subheader("Comparative Table")
    table_data = data.get("table", [])
    if table_data:
        st.dataframe(pd.DataFrame(table_data), use_container_width=True)

    # ---- Sources
    st.subheader("Sources & References")
    for s in data.get("sources", []):
        st.markdown(f"- [{s}]({s})")

# ----------------------------
# MAIN APP
# ----------------------------
def main():
    st.set_page_config(page_title="AI Financial Research Assistant v2", layout="wide")
    st.title("💹 AI Financial Research Assistant — With Cross‑Model Validation")

    q = st.text_input("Enter a financial or market question:")
    live_context = st.checkbox("Enable Live Web Search", True)

    if st.button("Generate Report") and q:
        if not is_finance_query(q):
            st.error("❌ Query not recognized as financial/economic.")
            return

        context = get_live_context(q) if live_context else ""
        with st.spinner("Getting analysis from primary LLM..."):
            res_primary = query_perplexity(q, context)

        with st.spinner("Validating with secondary LLM..."):
            res_secondary = query_secondary_model(q)
            cross_conf = cross_model_confidence(res_primary, res_secondary)

        render_dashboard(res_primary, cross_conf)

# Entry
if __name__ == "__main__":
    main()


In [None]:
# ============================================
# AI Financial Research Assistant Dashboard v3
# Adds Cross-Model Fact/Numeric Accuracy Validation
# ============================================

import os
import json
import requests
import pandas as pd
import plotly.express as px
import streamlit as st
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util

# ----------------------------
# STEP 1: Base Perplexity Configuration
# ----------------------------
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")
PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions"

RESPONSE_TEMPLATE = """
You are a financial research assistant. Return only valid JSON formatted as:
{
  "summary": "Brief summary of findings.",
  "key_insights": ["Insight 1", "Insight 2"],
  "metrics": {"GDP Growth (%)": number, "Inflation (%)": number, "Unemployment (%)": number},
  "visual_data": {"labels": ["Q1","Q2"], "values": [2.3,2.5]},
  "table": [{"Country": "US", "GDP": 25.5, "Inflation": 3.4}],
  "sources": ["https://imf.org", "https://reuters.com"],
  "confidence_score": "Provide a 0–100 confidence measure of your own answer"
}
"""

SYSTEM_PROMPT = (
    "You respond only to topics on finance, markets, or economics.\n"
    "Always output structured JSON in the exact format below:\n"
    f"{RESPONSE_TEMPLATE}"
)

# ----------------------------
# STEP 2: Real-Time Search Context
# ----------------------------
SERPAPI_KEY = os.getenv("SERPAPI_KEY")
SERPAPI_URL = "https://serpapi.com/search"

def get_live_context(query: str, num_results: int = 3) -> str:
    if not SERPAPI_KEY:
        return "Live context disabled (no SerpAPI key)."
    try:
        params = {"q": query, "api_key": SERPAPI_KEY, "num": num_results}
        resp = requests.get(SERPAPI_URL, params=params, timeout=10)
        data = resp.json().get("organic_results", [])
        return "\n".join(
            [f"{r.get('title','')}: {r.get('snippet','')} ({r.get('link','')})"
             for r in data[:num_results]]
        )
    except Exception as e:
        return f"[Context fetch error] {e}"

# ----------------------------
# STEP 3: Domain Filtering
# ----------------------------
domain_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
ALLOWED_TOPICS = [
    "finance", "economics", "business", "markets",
    "macroeconomics", "monetary policy", "banking", "investments"
]

def is_finance_query(query: str, threshold: float = 0.65) -> bool:
    result = domain_classifier(query, ALLOWED_TOPICS)
    label, score = result["labels"][0], result["scores"][0]
    print(f"[DomainCheck] {label} = {score:.2f}")
    return score >= threshold

# ----------------------------
# STEP 4: Query Perplexity (Primary Model)
# ----------------------------
def query_perplexity(query: str, context: str = "") -> str:
    headers = {
        "Authorization": f"Bearer {PERPLEXITY_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "sonar-large",
        "temperature": 0,
        "max_tokens": 1000,
        "messages": [
            {"role": "system", "content": f"{SYSTEM_PROMPT}\nContext:\n{context}"},
            {"role": "user", "content": query}
        ]
    }
    resp = requests.post(PERPLEXITY_URL, headers=headers, json=payload)
    resp.raise_for_status()
    return resp.json()["choices"][0]["message"]["content"]

# ----------------------------
# STEP 5: Secondary Model for Cross-Validation
# ----------------------------
def query_secondary_model(query: str) -> str:
    # For demo, uses Perplexity Sonar small variant; replace with actual alt LLM API if available
    headers = {
        "Authorization": f"Bearer {PERPLEXITY_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "sonar-small",
        "temperature": 0,
        "max_tokens": 800,
        "messages": [
            {"role": "system", "content": "You are a financial assistant generating concise summaries."},
            {"role": "user", "content": query}
        ]
    }
    resp = requests.post(PERPLEXITY_URL, headers=headers, json=payload)
    resp.raise_for_status()
    return resp.json()["choices"][0]["message"]["content"]

# ----------------------------
# STEP 6a: Semantic similarity calculation
# ----------------------------
embedder = SentenceTransformer("all-MiniLM-L6-v2")

def cross_model_confidence(primary_text: str, secondary_text: str) -> float:
    emb1, emb2 = embedder.encode([primary_text, secondary_text])
    sim = util.cos_sim(emb1, emb2).item()
    final_score = round(sim * 100, 2)  # Convert to percentage
    print(f"[Cross-Model Semantic Similarity] {final_score}%")
    return final_score

# ----------------------------
# STEP 6b: Numeric alignment score
# ----------------------------
def numeric_alignment_score(primary_json, secondary_json):
    primary_metrics = primary_json.get("metrics", {})
    secondary_metrics = secondary_json.get("metrics", {})
    
    if not primary_metrics or not secondary_metrics:
        return None

    total_diff = 0
    count = 0
    for key in primary_metrics:
        if key in secondary_metrics:
            try:
                val1 = float(primary_metrics[key])
                val2 = float(secondary_metrics[key])
                if val1 == 0 and val2 == 0:
                    diff = 0
                else:
                    diff = abs(val1 - val2) / max(abs(val1), abs(val2))
                total_diff += diff
                count += 1
            except (ValueError, TypeError):
                continue

    if count == 0:
        return None

    avg_diff = total_diff / count
    similarity_score = max(0.0, 1.0 - avg_diff) * 100
    print(f"[Numeric Alignment Score] {similarity_score}%")
    return round(similarity_score, 2)

# ----------------------------
# Dashboard Rendering
# ----------------------------
def render_dashboard(response_text: str, semantic_conf: float, numeric_conf: float):
    try:
        data = json.loads(response_text)
    except json.JSONDecodeError:
        st.error("Model output invalid JSON. Raw output:")
        st.text(response_text)
        return

    # Show combined confidence
    self_conf = float(data.get("confidence_score", 0))
    combined_conf = (self_conf + semantic_conf) / 2
    if numeric_conf is not None:
        combined_conf = (combined_conf + numeric_conf) / 2

    st.metric("Overall Confidence Score (%)", f"{combined_conf:.1f}")

    st.header("📈 Executive Summary")
    st.write(data.get("summary", "No summary provided."))

    st.subheader("Key Insights")
    for insight in data.get("key_insights", []):
        st.markdown(f"- {insight}")

    st.subheader("Macroeconomic Metrics")
    metrics = data.get("metrics", {})
    if metrics:
        metric_cols = st.columns(len(metrics))
        for i, (k, v) in enumerate(metrics.items()):
            metric_cols[i].metric(k, v)

    st.subheader("Trends Over Time")
    visual = data.get("visual_data", {})
    if "labels" in visual and "values" in visual:
        df_plot = pd.DataFrame({"Period": visual["labels"], "Value": visual["values"]})
        fig = px.line(df_plot, x="Period", y="Value", markers=True, title="Trend")
        st.plotly_chart(fig, use_container_width=True)
    else:
        st.info("No visual data available.")

    st.subheader("Comparative Data Table")
    table_data = data.get("table", [])
    if table_data:
        st.dataframe(pd.DataFrame(table_data), use_container_width=True)
    else:
        st.info("No tabular data present.")

    st.subheader("Sources")
    for source in data.get("sources", []):
        st.markdown(f"- [{source}]({source})")

# ----------------------------
# Main App
# ----------------------------
def main():
    st.set_page_config(page_title="AI Financial Research Assistant v3", layout="wide")
    st.title("💹 AI Financial Research Assistant Dashboard with Fact and Semantic Validation")

    user_query = st.text_input("Enter your market, finance, or economics question:")
    live_search_enabled = st.checkbox("Enable live context web search", value=True)

    if st.button("Get Report") and user_query:
        if not is_finance_query(user_query):
            st.error("Query rejected: not classified as finance/economics domain.")
            return

        context = get_live_context(user_query) if live_search_enabled else ""

        with st.spinner("Querying primary LLM..."):
            primary_response = query_perplexity(user_query, context)

        with st.spinner("Querying secondary LLM for validation..."):
            secondary_response = query_secondary_model(user_query)

        semantic_score = cross_model_confidence(primary_response, secondary_response)

        # Extract JSON for numeric comparison
        try:
            primary_json = json.loads(primary_response)
            secondary_json = json.loads(secondary_response)
            numeric_score = numeric_alignment_score(primary_json, secondary_json)
        except Exception as e:
            numeric_score = None
            st.warning(f"Numeric validation skipped due to parsing error: {e}")

        render_dashboard(primary_response, semantic_score, numeric_score)

if __name__ == "__main__":
    main()


In [None]:
# ============================================
# AI Financial Research Assistant Dashboard v3
# Adds Cross-Model Fact/Numeric Accuracy Validation + Self-Consistency Prompting
# ============================================


import os
import json
import requests
import pandas as pd
import plotly.express as px
import streamlit as st
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
from collections import Counter

# --- Config & API keys ---
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")
PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions"
SERPAPI_KEY = os.getenv("SERPAPI_KEY")
SERPAPI_URL = "https://serpapi.com/search"

# --- Prompts ---
RESPONSE_TEMPLATE = """
You are a financial research assistant. Return ONLY valid JSON formatted exactly as:
{
  "summary": "...",
  "key_insights": ["...", "..."],
  "metrics": {"GDP Growth (%)": number, "Inflation (%)": number, "Unemployment (%)": number},
  "visual_data": {"labels": ["Q1","Q2"], "values": [2.3,2.5]},
  "table": [{"Country": "US", "GDP": 25.5, "Inflation": 3.4}],
  "sources": ["https://imf.org", "https://reuters.com"],
  "confidence_score": "Give a numeric confidence level 0-100 for your answer"
}
"""
SYSTEM_PROMPT = (
    "You answer business, finance, markets, or economics queries ONLY.\n"
    "Format your response EXACTLY as the JSON template below:\n"
    f"{RESPONSE_TEMPLATE}"
)

# --- Classifier and embedding for domain filtering and validation ---
domain_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
ALLOWED_TOPICS = ["finance", "economics", "business", "markets", "investment"]
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# --- Helper functions ---

def is_finance_query(query: str, threshold: float = 0.65) -> bool:
    result = domain_classifier(query, ALLOWED_TOPICS)
    label, score = result["labels"][0], result["scores"][0]
    return score >= threshold

def get_live_context(query: str, num_results: int = 3) -> str:
    if not SERPAPI_KEY:
        return "Live context disabled (no SerpAPI key)."
    try:
        params = {"q": query, "api_key": SERPAPI_KEY, "num": num_results}
        resp = requests.get(SERPAPI_URL, params=params, timeout=10)
        data = resp.json().get("organic_results", [])
        return "\n".join(
            [f"{r.get('title','')}: {r.get('snippet','')} ({r.get('link','')})"
             for r in data[:num_results]]
        )
    except Exception as e:
        return f"Context fetch error: {e}"

def query_perplexity(query: str, context: str = "", temperature: float = 0) -> str:
    headers = {
        "Authorization": f"Bearer {PERPLEXITY_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "sonar-large",
        "temperature": temperature,
        "max_tokens": 1000,
        "messages": [
            {"role": "system", "content": f"{SYSTEM_PROMPT}\nContext:\n{context}"},
            {"role": "user", "content": query}
        ]
    }
    resp = requests.post(PERPLEXITY_URL, headers=headers, json=payload)
    resp.raise_for_status()
    return resp.json()["choices"][0]["message"]["content"]

def cross_model_confidence(text1: str, text2: str) -> float:
    emb1, emb2 = embedder.encode([text1, text2])
    sim = util.cos_sim(emb1, emb2).item()
    return round(sim * 100, 2)

def numeric_alignment_score(primary_json, secondary_json):
    primary_metrics = primary_json.get("metrics", {})
    secondary_metrics = secondary_json.get("metrics", {})
    if not primary_metrics or not secondary_metrics:
        return None
    total_diff = 0
    count = 0
    for k in primary_metrics:
        if k in secondary_metrics:
            try:
                v1 = float(primary_metrics[k])
                v2 = float(secondary_metrics[k])
                if v1 == 0 and v2 == 0:
                    diff = 0
                else:
                    diff = abs(v1 - v2) / max(abs(v1), abs(v2))
                total_diff += diff
                count += 1
            except Exception:
                continue
    if count == 0:
        return None
    avg_diff = total_diff / count
    return round(max(0, 1 - avg_diff) * 100, 2)

def majority_vote(responses):
    cleaned_responses = [r.strip() for r in responses]
    counts = Counter(cleaned_responses)
    majority_response = counts.most_common(1)[0][0]
    return majority_response

def parse_confidence(response_text):
    try:
        data = json.loads(response_text)
        return float(data.get("confidence_score", 0))
    except Exception:
        return 0.0

def render_dashboard(response_text: str, semantic_conf: float, numeric_conf: float, final_conf: float):
    try:
        data = json.loads(response_text)
    except Exception:
        st.error("Invalid JSON output. Raw output:")
        st.text(response_text)
        return

    st.metric("Overall Confidence (%)", f"{final_conf:.1f}")
    st.header("📈 Summary")
    st.write(data.get("summary", "No summary."))

    st.subheader("Key Insights")
    for insight in data.get("key_insights", []):
        st.markdown(f"- {insight}")

    st.subheader("Metrics")
    metrics = data.get("metrics", {})
    if metrics:
        cols = st.columns(len(metrics))
        for i, (k, v) in enumerate(metrics.items()):
            cols[i].metric(k, v)

    st.subheader("Trends")
    vis = data.get("visual_data", {})
    if "labels" in vis and "values" in vis:
        df = pd.DataFrame({"Period": vis["labels"], "Value": vis["values"]})
        fig = px.line(df, x="Period", y="Value", title="Trend Over Time", markers=True)
        st.plotly_chart(fig, use_container_width=True)
    else:
        st.info("No visual data available.")

    st.subheader("Comparative Table")
    table = data.get("table", [])
    if table:
        st.dataframe(pd.DataFrame(table), use_container_width=True)
    else:
        st.info("No table data available.")

    st.subheader("Sources")
    for src in data.get("sources", []):
        st.markdown(f"- [{src}]({src})")


### Main

def main():
    st.set_page_config(page_title="AI Financial Assistant (Self-Consistency)", layout="wide")
    st.title("💹 AI Financial Research Assistant with Self-Consistency")

    user_query = st.text_input("Enter finance or market related question:")
    enable_live_context = st.checkbox("Enable live context search", True)
    n_samples = st.slider("Number of model samples (self-consistency)", 3, 10, 5)

    if st.button("Generate Report") and user_query:
        if not is_finance_query(user_query):
            st.error("Query rejected: not related to finance/economics.")
            return

        context = get_live_context(user_query) if enable_live_context else ""

        responses = []
        confidences = []

        with st.spinner("Generating multiple responses for self-consistency..."):
            for _ in range(n_samples):
                resp = query_perplexity(user_query, context, temperature=0.7)  # sampling
                responses.append(resp)
                c = parse_confidence(resp)
                confidences.append(c)

        # Combine outputs
        final_response = majority_vote(responses)
        highest_confidence_resp = responses[confidences.index(max(confidences))]

        # Use highest confidence response or majority vote (choose one)
        chosen_response = highest_confidence_resp  # or final_response

        # Validation with secondary model to get semantic and numeric scores
        st.info("Validating response with secondary model for confidence...")
        secondary_resp = query_secondary_model(user_query)
        semantic_conf = cross_model_confidence(chosen_response, secondary_resp)

        try:
            primary_json = json.loads(chosen_response)
            secondary_json = json.loads(secondary_resp)
            numeric_conf = numeric_alignment_score(primary_json, secondary_json)
        except Exception:
            numeric_conf = None

        # Compute final combined confidence
        self_conf = max(confidences) if confidences else 0
        avg_confidence = (semantic_conf + self_conf) / 2
        if numeric_conf is not None:
            avg_confidence = (avg_confidence + numeric_conf) / 2

        render_dashboard(chosen_response, semantic_conf, numeric_conf, avg_confidence)


if __name__ == "__main__":
    main()


In [None]:
# ============================================================
# AI FINANCIAL RESEARCH ASSISTANT DASHBOARD v4
# Uses Perplexity Sonar (primary) + Gemini 2.0 Flash (secondary)
# Adds self-consistency, numeric fact alignment, semantic validation
# ============================================================

import os
import json
import requests
import pandas as pd
import plotly.express as px
import streamlit as st
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
from collections import Counter
from openai import OpenAI

# ----------------------------
# STEP 1: API KEYS AND CONFIG
# ----------------------------
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")
GEMINI_KEY = os.getenv("GEMINI_API_KEY")
PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions"

# Google Gemini uses OpenAI-compatible SDK
gemini = OpenAI(
    api_key=GEMINI_KEY,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

RESPONSE_TEMPLATE = """
You are a financial research assistant. Return ONLY valid JSON formatted as:
{
  "summary": "Brief summary of findings.",
  "key_insights": ["Insight 1", "Insight 2"],
  "metrics": {"GDP Growth (%)": number, "Inflation (%)": number, "Unemployment (%)": number},
  "visual_data": {"labels": ["Q1","Q2"], "values": [2.3,2.5]},
  "table": [{"Country": "US", "GDP": 25.5, "Inflation": 3.4}],
  "sources": ["https://imf.org", "https://reuters.com"],
  "confidence_score": "Provide a 0–100 confidence measure of your own answer"
}
"""

SYSTEM_PROMPT = (
    "You respond only to topics on business, finance, markets, or economics.\n"
    "Always output structured JSON in the exact format below:\n"
    f"{RESPONSE_TEMPLATE}"
)

# ----------------------------
# STEP 2: Domain Classifier, Embedder
# ----------------------------
domain_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
ALLOWED_TOPICS = ["finance", "economics", "markets", "business", "macroeconomics"]
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# ----------------------------
# STEP 3: Helper Functions
# ----------------------------
def is_finance_query(query: str, threshold=0.65):
    res = domain_classifier(query, ALLOWED_TOPICS)
    return res["scores"][0] >= threshold

def get_live_context(query: str):
    return ""

def query_perplexity(query: str, context: str = "", temperature: float = 0.7):
    headers = {"Authorization": f"Bearer {PERPLEXITY_KEY}", "Content-Type": "application/json"}
    payload = {
        "model": "sonar-large",
        "temperature": temperature,
        "max_tokens": 1000,
        "messages": [
            {"role": "system", "content": f"{SYSTEM_PROMPT}\nContext:\n{context}"},
            {"role": "user", "content": query}
        ]
    }
    resp = requests.post(PERPLEXITY_URL, headers=headers, json=payload)
    resp.raise_for_status()
    return resp.json()["choices"][0]["message"]["content"]

# ----------------------------
# STEP 4: Gemini 2.0 Flash (Secondary Model)
# ----------------------------
def query_gemini_secondary(query: str, context: str = "") -> str:
    completion = gemini.chat.completions.create(
        model="gemini-2.0-flash",
        messages=[
            {"role": "system", "content": f"{SYSTEM_PROMPT}\nContext:\n{context}"},
            {"role": "user", "content": query}
        ],
        temperature=0.3
    )
    return completion.choices[0].message.content[0].text if hasattr(completion.choices[0].message, "content") else completion.choices[0].message

# ----------------------------
# STEP 5: Validation Functions
# ----------------------------
def cross_model_confidence(text1, text2):
    emb1, emb2 = embedder.encode([text1, text2])
    sim = util.cos_sim(emb1, emb2).item()
    return round(sim * 100, 2)

def numeric_alignment_score(p_json, s_json):
    p_metrics, s_metrics = p_json.get("metrics", {}), s_json.get("metrics", {})
    if not p_metrics or not s_metrics:
        return None
    diffs, count = 0, 0
    for k in p_metrics:
        if k in s_metrics:
            try:
                v1, v2 = float(p_metrics[k]), float(s_metrics[k])
                if v1 == v2 == 0:
                    d = 0
                else:
                    d = abs(v1 - v2) / max(abs(v1), abs(v2))
                diffs += d
                count += 1
            except Exception:
                pass
    if count == 0:
        return None
    score = (1 - (diffs / count)) * 100
    return round(score, 2)

def parse_conf(resp):
    try:
        js = json.loads(resp)
        return float(js.get("confidence_score", 0))
    except Exception:
        return 0.0

def majority_vote(responses):
    clean = [r.strip() for r in responses]
    return Counter(clean).most_common(1)[0][0]

# ----------------------------
# STEP 6: Dashboard Rendering
# ----------------------------
def render_dashboard(resp_text, sem_conf, num_conf, final_conf):
    try:
        data = json.loads(resp_text)
    except Exception:
        st.error("Invalid model JSON output. Showing raw text:")
        st.text(resp_text)
        return

    st.metric("Overall Confidence", f"{final_conf:.1f}%")
    st.header("📈 Executive Summary")
    st.write(data.get("summary", "No summary available."))

    st.subheader("Key Insights")
    for insight in data.get("key_insights", []):
        st.markdown(f"- {insight}")

    st.subheader("Metrics")
    met = data.get("metrics", {})
    if met:
        cols = st.columns(len(met))
        for i, (k, v) in enumerate(met.items()):
            cols[i].metric(k, v)

    st.subheader("Trends Over Time")
    vis = data.get("visual_data", {})
    if vis and "labels" in vis and "values" in vis:
        df = pd.DataFrame({"Period": vis["labels"], "Value": vis["values"]})
        fig = px.line(df, x="Period", y="Value", title="Trend", markers=True)
        st.plotly_chart(fig, use_container_width=True)

    st.subheader("Data Table")
    tab = data.get("table", [])
    if tab:
        st.dataframe(pd.DataFrame(tab), use_container_width=True)

    st.subheader("Sources")
    for s in data.get("sources", []):
        st.markdown(f"- [{s}]({s})")

# ----------------------------
# STEP 7: Self-Consistency Loop and Main
# ----------------------------
def main():
    st.set_page_config(page_title="Finance AI Assistant (Gemini Validation)", layout="wide")
    st.title("💹 Financial AI Assistant – Perplexity x Gemini Self-Consistent Reasoning")

    query = st.text_input("Enter financial or economics question:")
    n_samples = st.slider("Number of self-consistency samples", 3, 10, 5)

    if st.button("Generate"):
        if not is_finance_query(query):
            st.error("Query rejected (out of domain).")
            return

        st.info("Generating multiple Perplexity Sonar responses...")
        responses, confidences = [], []
        for _ in range(n_samples):
            r = query_perplexity(query, temperature=0.7)
            responses.append(r)
            confidences.append(parse_conf(r))

        best_resp = responses[confidences.index(max(confidences))]
        majority_resp = majority_vote(responses)
        chosen_response = best_resp

        st.info("Validating via Gemini 2.0 Flash...")
        secondary_resp = query_gemini_secondary(query)
        sem_conf = cross_model_confidence(chosen_response, secondary_resp)
        try:
            p_json, s_json = json.loads(chosen_response), json.loads(secondary_resp)
            num_conf = numeric_alignment_score(p_json, s_json)
        except Exception:
            num_conf = None

        base_conf = max(confidences)
        final_conf = (base_conf + sem_conf + (num_conf or base_conf)) / (3 if num_conf else 2)
        render_dashboard(chosen_response, sem_conf, num_conf, final_conf)

# ----------------------------
# Run Streamlit App
# ----------------------------
if __name__ == "__main__":
    main()


In [None]:
# =========================================================
# AI FINANCIAL RESEARCH ASSISTANT – HYBRID VERIFICATION v5
# Combines:
#   - Self‑consistency reasoning (Perplexity Sonar)
#   - Independent cross‑model validation (Gemini 2.0 Flash)
# =========================================================

import os
import json
import requests
import pandas as pd
import plotly.express as px
import streamlit as st
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
from collections import Counter
from openai import OpenAI

# ----------------------------
# STEP 1: CONFIGURATION
# ----------------------------
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")
GEMINI_KEY = os.getenv("GEMINI_API_KEY")

PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions"
gemini = OpenAI(
    api_key=GEMINI_KEY,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

RESPONSE_TEMPLATE = """
You are a financial research assistant. Return ONLY valid JSON formatted as:
{
  "summary": "Brief summary of findings.",
  "key_insights": ["Insight 1", "Insight 2"],
  "metrics": {"GDP Growth (%)": number, "Inflation (%)": number, "Unemployment (%)": number},
  "visual_data": {"labels": ["Q1","Q2"], "values": [2.3,2.5]},
  "table": [{"Country": "US", "GDP": 25.5, "Inflation": 3.4}],
  "sources": ["https://imf.org", "https://reuters.com"],
  "confidence_score": "Provide a 0–100 confidence measure of your own answer"
}
"""
SYSTEM_PROMPT = (
    "You only answer topics in finance, economics, or markets.\n"
    "Output strictly in the JSON structure below:\n"
    f"{RESPONSE_TEMPLATE}"
)

# Domain checking & embeddings for scoring
domain_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
ALLOWED_TOPICS = ["finance", "economics", "markets", "business", "macroeconomics"]
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# ----------------------------
# STEP 2: CORE HELPERS
# ----------------------------
def is_finance_query(query: str, threshold=0.65):
    r = domain_classifier(query, ALLOWED_TOPICS)
    return r["scores"][0] >= threshold

def query_perplexity(query: str, temperature=0.7):
    headers = {"Authorization": f"Bearer {PERPLEXITY_KEY}",
               "Content-Type": "application/json"}
    payload = {
        "model": "sonar-large",
        "temperature": temperature,
        "max_tokens": 1000,
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": query}
        ]
    }
    resp = requests.post(PERPLEXITY_URL, headers=headers, json=payload)
    resp.raise_for_status()
    return resp.json()["choices"][0]["message"]["content"]

def query_gemini(query: str):
    comp = gemini.chat.completions.create(
        model="gemini-2.0-flash",
        temperature=0.3,
        max_tokens=1000,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": query}
        ]
    )
    result = comp.choices[0].message
    if hasattr(result, "content") and isinstance(result.content, list):
        return result.content[0].text
    return result.content if hasattr(result, "content") else str(result)

# ----------------------------
# STEP 3: SELF‑CONSISTENCY PROMPTING
# ----------------------------
def generate_self_consistent_responses(query, n=5):
    st.info(f"Generating {n} independent Perplexity analyst responses...")
    responses, scores = [], []
    for _ in range(n):
        r = query_perplexity(query, temperature=0.8)
        responses.append(r)
        scores.append(parse_confidence(r))
    return responses, scores

def majority_vote(responses):
    cleaned = [r.strip() for r in responses]
    vote = Counter(cleaned).most_common(1)[0][0]
    return vote

def parse_confidence(text):
    try:
        js = json.loads(text)
        return float(js.get("confidence_score", 0))
    except Exception:
        return 0.0

# ----------------------------
# STEP 4: HYBRID VALIDATION SCORING
# ----------------------------
def semantic_similarity_score(a, b):
    v1, v2 = embedder.encode([a, b])
    sim = util.cos_sim(v1, v2).item()
    return round(sim * 100, 2)

def numeric_alignment_score(j1, j2):
    m1, m2 = j1.get("metrics", {}), j2.get("metrics", {})
    if not m1 or not m2:
        return None
    total, count = 0, 0
    for k in m1:
        if k in m2:
            try:
                v1, v2 = float(m1[k]), float(m2[k])
                if v1 == v2 == 0:
                    d = 0
                else:
                    d = abs(v1 - v2) / max(abs(v1), abs(v2))
                total += d
                count += 1
            except Exception:
                pass
    if not count: return None
    return round((1 - (total / count)) * 100, 2)

# ----------------------------
# STEP 5: DASHBOARD RENDERER
# ----------------------------
def render_dashboard(response, final_conf, sem_conf, num_conf):
    try:
        data = json.loads(response)
    except Exception:
        st.error("Invalid JSON returned by model.")
        st.text(response)
        return

    st.metric("Overall Confidence (%)", f"{final_conf:.1f}")
    st.header("📊 Financial Summary")
    st.write(data.get("summary", "No summary."))

    st.subheader("Key Insights")
    for i in data.get("key_insights", []):
        st.markdown(f"- {i}")

    st.subheader("Metrics")
    mets = data.get("metrics", {})
    if mets:
        cols = st.columns(len(mets))
        for i, (k, v) in enumerate(mets.items()):
            cols[i].metric(k, v)

    st.subheader("Trend Visualization")
    vis = data.get("visual_data", {})
    if "labels" in vis and "values" in vis:
        df = pd.DataFrame({"Period": vis["labels"], "Value": vis["values"]})
        fig = px.line(df, x="Period", y="Value", title="Quarterly Trends", markers=True)
        st.plotly_chart(fig, use_container_width=True)

    st.subheader("Data Table")
    tab = data.get("table", [])
    if tab:
        st.dataframe(pd.DataFrame(tab), use_container_width=True)

    st.subheader("Sources")
    for s in data.get("sources", []):
        st.markdown(f"- [{s}]({s})")

    st.write(f"Semantic Similarity: {sem_conf:.2f}%")
    if num_conf is not None:
        st.write(f"Numeric Alignment: {num_conf:.2f}%")

# ----------------------------
# STEP 6: MAIN WORKFLOW
# ----------------------------
def main():
    st.set_page_config(page_title="Hybrid Financial Research Assistant", layout="wide")
    st.title("💹 Hybrid AI Financial Analyst – Self‑Consistency + Cross‑Model Verification")

    q = st.text_input("Enter your question about markets, finance, or economics:")
    n_paths = st.slider("Number of self‑consistent analysts (Perplexity)", 3, 10, 5)

    if st.button("Analyze") and q:
        if not is_finance_query(q):
            st.error("Query not recognized as financial domain.")
            return

        # 1. --- SELF-CONSISTENT REASONING ---
        responses, scores = generate_self_consistent_responses(q, n_paths)
        voted_response = majority_vote(responses)
        best_response = responses[scores.index(max(scores))]
        chosen_primary = best_response  # you may swap to voted_response if you prefer majority

        # 2. --- INDEPENDENT SECONDARY VALIDATION (GEMINI) ---
        st.info("Cross‑verifying via Gemini 2.0 Flash...")
        secondary_resp = query_gemini(q)

        # 3. --- SCORING ---
        sem_conf = semantic_similarity_score(chosen_primary, secondary_resp)
        try:
            j1, j2 = json.loads(chosen_primary), json.loads(secondary_resp)
            num_conf = numeric_alignment_score(j1, j2)
        except Exception:
            num_conf = None

        base_conf = max(scores)
        final_conf = (base_conf + sem_conf + (num_conf if num_conf else base_conf)) / (3 if num_conf else 2)

        # 4. --- DISPLAY ---
        render_dashboard(chosen_primary, final_conf, sem_conf, num_conf)

# ----------------------------
# RUN STREAMLIT
# ----------------------------
if __name__ == "__main__":
    main()


In [None]:
# =========================================================
# AI FINANCIAL RESEARCH ASSISTANT – HYBRID VERIFICATION v5.1
# Combines:
#   - Self‑consistency reasoning (Perplexity Sonar)
#   - Cross‑model validation (Gemini 2.0 Flash)
# With bug fixes, timeouts, and validation guards.
# =========================================================

import os
import json
import requests
import pandas as pd
import plotly.express as px
import streamlit as st
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
from collections import Counter
from openai import OpenAI
import numpy as np

# ----------------------------
# STEP 1: CONFIGURATION
# ----------------------------
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")
GEMINI_KEY = os.getenv("GEMINI_API_KEY")

if not PERPLEXITY_KEY or not GEMINI_KEY:
    st.error("Missing API keys. Please set PERPLEXITY_API_KEY and GEMINI_API_KEY.")
    st.stop()

PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions"
gemini = OpenAI(
    api_key=GEMINI_KEY,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

RESPONSE_TEMPLATE = """
You are a financial research assistant. Return ONLY valid JSON formatted as:
{
  "summary": "Brief summary of findings.",
  "key_insights": ["Insight 1", "Insight 2"],
  "metrics": {"GDP Growth (%)": number, "Inflation (%)": number, "Unemployment (%)": number},
  "visual_data": {"labels": ["Q1","Q2"], "values": [2.3,2.5]},
  "table": [{"Country": "US", "GDP": 25.5, "Inflation": 3.4}],
  "sources": ["https://imf.org", "https://reuters.com"],
  "confidence_score": "Provide a 0–100 confidence measure of your own answer"
}
"""

SYSTEM_PROMPT = (
    "You only answer topics in finance, economics, or markets.\n"
    "Output strictly in the JSON structure below:\n"
    f"{RESPONSE_TEMPLATE}"
)

# Domain classifier and embedder
domain_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=-1)
ALLOWED_TOPICS = ["finance", "economics", "markets", "business", "macroeconomics"]
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# ----------------------------
# STEP 2: CORE HELPERS
# ----------------------------
def is_finance_query(query: str, threshold=0.65):
    r = domain_classifier(query, ALLOWED_TOPICS)
    return r["scores"][0] >= threshold

def query_perplexity(query: str, temperature=0.7):
    headers = {"Authorization": f"Bearer {PERPLEXITY_KEY}", "Content-Type": "application/json"}
    payload = {
        "model": "sonar-large",
        "temperature": temperature,
        "max_tokens": 1000,
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": query}
        ]
    }
    resp = requests.post(PERPLEXITY_URL, headers=headers, json=payload, timeout=30)
    resp.raise_for_status()
    return resp.json()["choices"][0]["message"]["content"]

def query_gemini(query: str):
    comp = gemini.chat.completions.create(
        model="gemini-2.0-flash",
        temperature=0.3,
        max_tokens=1000,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": query}
        ]
    )
    try:
        msg = comp.choices[0].message
        content = msg.content
        if isinstance(content, list):
            return " ".join([part.text for part in content if hasattr(part, "text")])
        return content if isinstance(content, str) else str(content)
    except Exception as e:
        print(f"[Gemini Parsing Error] {e}")
        return json.dumps({
            "summary": "Gemini parsing error.",
            "key_insights": [],
            "metrics": {},
            "visual_data": {},
            "table": [],
            "sources": [],
            "confidence_score": 0
        })

# ----------------------------
# STEP 3: SELF‑CONSISTENCY PROMPTING
# ----------------------------
def generate_self_consistent_responses(query, n=5):
    st.info(f"Generating {n} independent Perplexity analyst responses...")
    responses, scores = [], []
    for _ in range(n):
        try:
            r = query_perplexity(query, temperature=0.8)
            responses.append(r)
            scores.append(parse_confidence(r))
        except Exception as e:
            st.warning(f"Perplexity API error: {e}")
    return responses, scores

def majority_vote(responses):
    if not responses:
        return ""
    cleaned = [r.strip() for r in responses if r]
    if not cleaned:
        return ""
    return Counter(cleaned).most_common(1)[0][0]

def parse_confidence(text):
    try:
        js = json.loads(text)
        return float(js.get("confidence_score", 0))
    except Exception:
        return 0.0

# ----------------------------
# STEP 4: VALIDATION FUNCTIONS
# ----------------------------
def semantic_similarity_score(a, b):
    try:
        v1, v2 = embedder.encode([a, b])
        sim = float(util.cos_sim(v1, v2))
        return round(sim * 100, 2)
    except Exception as e:
        print(f"Embedding error: {e}")
        return 0.0

def numeric_alignment_score(j1, j2):
    m1, m2 = j1.get("metrics", {}), j2.get("metrics", {})
    if not m1 or not m2:
        return None
    total, count = 0, 0
    for k in m1:
        if k in m2:
            try:
                v1, v2 = float(m1[k]), float(m2[k])
                if v1 == v2 == 0:
                    d = 0
                else:
                    d = abs(v1 - v2) / max(abs(v1), abs(v2))
                total += d
                count += 1
            except Exception:
                pass
    if count == 0:
        return None
    return round((1 - (total / count)) * 100, 2)

# ----------------------------
# STEP 5: DASHBOARD RENDERER
# ----------------------------
def render_dashboard(response, final_conf, sem_conf, num_conf):
    try:
        data = json.loads(response)
    except Exception:
        st.error("Invalid JSON returned by model.")
        st.text(response)
        return

    st.metric("Overall Confidence (%)", f"{final_conf:.1f}")
    st.header("📊 Financial Summary")
    st.write(data.get("summary", "No summary."))

    st.subheader("Key Insights")
    for i in data.get("key_insights", []):
        st.markdown(f"- {i}")

    st.subheader("Metrics")
    mets = data.get("metrics", {})
    if mets:
        cols = st.columns(len(mets))
        for i, (k, v) in enumerate(mets.items()):
            cols[i].metric(k, v)

    st.subheader("Trend Visualization")
    vis = data.get("visual_data", {})
    if "labels" in vis and "values" in vis:
        df = pd.DataFrame({"Period": vis["labels"], "Value": vis["values"]})
        fig = px.line(df, x="Period", y="Value", title="Quarterly Trends", markers=True)
        st.plotly_chart(fig, use_container_width=True)
    else:
        st.info("No visual data available.")

    st.subheader("Data Table")
    tab = data.get("table", [])
    if tab:
        st.dataframe(pd.DataFrame(tab), use_container_width=True)

    st.subheader("Sources")
    for s in data.get("sources", []):
        st.markdown(f"- [{s}]({s})")

    st.write(f"Semantic Similarity: {sem_conf:.2f}%")
    if num_conf is not None:
        st.write(f"Numeric Alignment: {num_conf:.2f}%")

# ----------------------------
# STEP 6: MAIN WORKFLOW
# ----------------------------
def main():
    st.set_page_config(page_title="Hybrid Financial Research Assistant", layout="wide")
    st.title("💹 Hybrid AI Financial Analyst – Self‑Consistency + Cross‑Model Verification")

    q = st.text_input("Enter your question about markets, finance, or economics:")
    n_paths = st.slider("Number of self‑consistent analysts (Perplexity)", 3, 10, 5)

    if st.button("Analyze") and q:
        if not is_finance_query(q):
            st.error("Query not recognized as financial domain.")
            return

        # --- Self‑Consistency Stage ---
        responses, scores = generate_self_consistent_responses(q, n_paths)
        voted_response = majority_vote(responses)
        best_response = responses[scores.index(max(scores))] if scores else ""
        chosen_primary = best_response or voted_response

        if not chosen_primary:
            st.error("Primary model failed to generate responses.")
            return

        # --- Independent Validation Stage ---
        st.info("Cross‑verifying via Gemini 2.0 Flash...")
        secondary_resp = query_gemini(q)

        # --- Scoring ---
        sem_conf = semantic_similarity_score(chosen_primary, secondary_resp)
        try:
            j1 = json.loads(chosen_primary)
        except Exception:
            j1 = {}
        try:
            j2 = json.loads(secondary_resp)
        except Exception:
            j2 = {}
        num_conf = numeric_alignment_score(j1, j2)
        base_conf = max(scores) if scores else 0

        if num_conf is not None:
            final_conf = np.mean([base_conf, sem_conf, num_conf])
        else:
            final_conf = np.mean([base_conf, sem_conf])

        # --- Display ---
        render_dashboard(chosen_primary, final_conf, sem_conf, num_conf)

# ----------------------------
# RUN STREAMLIT
# ----------------------------
if __name__ == "__main__":
    main()


In [None]:
# =========================================================
# AI FINANCIAL RESEARCH ASSISTANT – HYBRID VERIFICATION v6
# Combines:
#   - Self‑consistency reasoning (Perplexity Sonar)
#   - Cross‑model validation (Gemini 2.0 Flash)
# With bug fixes, timeouts, and validation guards.
# Validated and fixed by Claude
# =========================================================

import os
import json
import requests
import pandas as pd
import plotly.express as px
import streamlit as st
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
from collections import Counter
from openai import OpenAI
import numpy as np

# ----------------------------
# STEP 1: CONFIGURATION
# ----------------------------
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")
GEMINI_KEY = os.getenv("GEMINI_API_KEY")

if not PERPLEXITY_KEY or not GEMINI_KEY:
    st.error("Missing API keys. Please set PERPLEXITY_API_KEY and GEMINI_API_KEY.")
    st.stop()

PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions"
gemini = OpenAI(
    api_key=GEMINI_KEY,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

RESPONSE_TEMPLATE = """
You are a financial research assistant. Return ONLY valid JSON formatted as:
{
  "summary": "Brief summary of findings.",
  "key_insights": ["Insight 1", "Insight 2"],
  "metrics": {"GDP Growth (%)": number, "Inflation (%)": number, "Unemployment (%)": number},
  "visual_data": {"labels": ["Q1","Q2"], "values": [2.3,2.5]},
  "table": [{"Country": "US", "GDP": 25.5, "Inflation": 3.4}],
  "sources": ["https://imf.org", "https://reuters.com"],
  "confidence_score": "Provide a 0–100 confidence measure of your own answer"
}
"""

SYSTEM_PROMPT = (
    "You only answer topics in finance, economics, or markets.\n"
    "Output strictly in the JSON structure below:\n"
    f"{RESPONSE_TEMPLATE}"
)

# Domain classifier and embedder
domain_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=-1)
ALLOWED_TOPICS = ["finance", "economics", "markets", "business", "macroeconomics"]
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# ----------------------------
# STEP 2: CORE HELPERS
# ----------------------------
def is_finance_query(query: str, threshold=0.65):
    r = domain_classifier(query, ALLOWED_TOPICS)
    return r["scores"][0] >= threshold

def query_perplexity(query: str, temperature=0.7):
    headers = {"Authorization": f"Bearer {PERPLEXITY_KEY}", "Content-Type": "application/json"}
    payload = {
        "model": "sonar-large",
        "temperature": temperature,
        "max_tokens": 1000,
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": query}
        ]
    }
    resp = requests.post(PERPLEXITY_URL, headers=headers, json=payload, timeout=30)
    resp.raise_for_status()
    return resp.json()["choices"][0]["message"]["content"]

def query_gemini(query: str):
    comp = gemini.chat.completions.create(
        model="gemini-2.0-flash",
        temperature=0.3,
        max_tokens=1000,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": query}
        ]
    )
    try:
        msg = comp.choices[0].message
        content = msg.content
        if isinstance(content, list):
            return " ".join([part.text for part in content if hasattr(part, "text")])
        return content if isinstance(content, str) else str(content)
    except Exception as e:
        print(f"[Gemini Parsing Error] {e}")
        return json.dumps({
            "summary": "Gemini parsing error.",
            "key_insights": [],
            "metrics": {},
            "visual_data": {},
            "table": [],
            "sources": [],
            "confidence_score": 0
        })

# ----------------------------
# STEP 3: SELF‑CONSISTENCY PROMPTING
# ----------------------------
def generate_self_consistent_responses(query, n=5):
    st.info(f"Generating {n} independent Perplexity analyst responses...")
    responses, scores = [], []
    for _ in range(n):
        try:
            r = query_perplexity(query, temperature=0.8)
            responses.append(r)
            scores.append(parse_confidence(r))
        except Exception as e:
            st.warning(f"Perplexity API error: {e}")
    return responses, scores

def majority_vote(responses):
    if not responses:
        return ""
    cleaned = [r.strip() for r in responses if r]
    if not cleaned:
        return ""
    return Counter(cleaned).most_common(1)[0][0]

def parse_confidence(text):
    try:
        js = json.loads(text)
        return float(js.get("confidence_score", 0))
    except Exception:
        return 0.0

# ----------------------------
# STEP 4: VALIDATION FUNCTIONS
# ----------------------------
def semantic_similarity_score(a, b):
    try:
        v1, v2 = embedder.encode([a, b])
        sim = float(util.cos_sim(v1, v2))
        return round(sim * 100, 2)
    except Exception as e:
        print(f"Embedding error: {e}")
        return 0.0

def numeric_alignment_score(j1, j2):
    m1, m2 = j1.get("metrics", {}), j2.get("metrics", {})
    if not m1 or not m2:
        return None
    total, count = 0, 0
    for k in m1:
        if k in m2:
            try:
                v1, v2 = float(m1[k]), float(m2[k])
                if v1 == v2 == 0:
                    d = 0
                else:
                    d = abs(v1 - v2) / max(abs(v1), abs(v2))
                total += d
                count += 1
            except Exception:
                pass
    if count == 0:
        return None
    return round((1 - (total / count)) * 100, 2)

# ----------------------------
# STEP 5: DASHBOARD RENDERER
# ----------------------------
def render_dashboard(response, final_conf, sem_conf, num_conf):
    try:
        data = json.loads(response)
    except Exception:
        st.error("Invalid JSON returned by model.")
        st.text(response)
        return

    st.metric("Overall Confidence (%)", f"{final_conf:.1f}")
    st.header("📊 Financial Summary")
    st.write(data.get("summary", "No summary."))

    st.subheader("Key Insights")
    for i in data.get("key_insights", []):
        st.markdown(f"- {i}")

    st.subheader("Metrics")
    mets = data.get("metrics", {})
    if mets:
        cols = st.columns(len(mets))
        for i, (k, v) in enumerate(mets.items()):
            cols[i].metric(k, v)

    st.subheader("Trend Visualization")
    vis = data.get("visual_data", {})
    if "labels" in vis and "values" in vis:
        df = pd.DataFrame({"Period": vis["labels"], "Value": vis["values"]})
        fig = px.line(df, x="Period", y="Value", title="Quarterly Trends", markers=True)
        st.plotly_chart(fig, use_container_width=True)
    else:
        st.info("No visual data available.")

    st.subheader("Data Table")
    tab = data.get("table", [])
    if tab:
        st.dataframe(pd.DataFrame(tab), use_container_width=True)

    st.subheader("Sources")
    for s in data.get("sources", []):
        st.markdown(f"- [{s}]({s})")

    st.write(f"Semantic Similarity: {sem_conf:.2f}%")
    if num_conf is not None:
        st.write(f"Numeric Alignment: {num_conf:.2f}%")

# ----------------------------
# STEP 6: MAIN WORKFLOW
# ----------------------------
def main():
    st.set_page_config(page_title="Hybrid Financial Research Assistant", layout="wide")
    st.title("💹 Hybrid AI Financial Analyst – Self‑Consistency + Cross‑Model Verification")

    q = st.text_input("Enter your question about markets, finance, or economics:")
    n_paths = st.slider("Number of self‑consistent analysts (Perplexity)", 3, 10, 5)

    if st.button("Analyze") and q:
        if not is_finance_query(q):
            st.error("Query not recognized as financial domain.")
            return

        # --- Self‑Consistency Stage ---
        responses, scores = generate_self_consistent_responses(q, n_paths)
        voted_response = majority_vote(responses)
        best_response = responses[scores.index(max(scores))] if scores else ""
        chosen_primary = best_response or voted_response

        if not chosen_primary:
            st.error("Primary model failed to generate responses.")
            return

        # --- Independent Validation Stage ---
        st.info("Cross‑verifying via Gemini 2.0 Flash...")
        secondary_resp = query_gemini(q)

        # --- Scoring ---
        sem_conf = semantic_similarity_score(chosen_primary, secondary_resp)
        try:
            j1 = json.loads(chosen_primary)
        except Exception:
            j1 = {}
        try:
            j2 = json.loads(secondary_resp)
        except Exception:
            j2 = {}
        num_conf = numeric_alignment_score(j1, j2)
        base_conf = max(scores) if scores else 0

        if num_conf is not None:
            final_conf = np.mean([base_conf, sem_conf, num_conf])
        else:
            final_conf = np.mean([base_conf, sem_conf])

        # --- Display ---
        render_dashboard(chosen_primary, final_conf, sem_conf, num_conf)

# ----------------------------
# RUN STREAMLIT
# ----------------------------
if __name__ == "__main__":
    main()
