# PROJEKTS: Knowledge Hub



Situācija
Tu esi tikko pieņemts darbā par MI Inženieri. Uzņēmuma vadītājs tev iedevis mapi UNSORTED_DATA, kurā ir haoss - sapulču ieraksti, tāfeles fotogrāfijas un līgumu melnraksti.

Nevienam nav laika to visu lasīt vai klausīties.

## Tavs Uzdevums:

Uzbūvēt Multimodālu RAG Sistēmu, kas "apēd" šos failus un ļauj direktoram čatā uzdot jautājumus (piemēram: "Cik mēs esam parādā?" vai "Ko nolēma par mārketingu?"), saņemot precīzas atbildes ar atsaucēm.

## Vides sagatavošana

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Instalējam nepieciešamās bibliotēkas
!pip install openai langchain langchain-community langchain-openai chromadb pypdf pymupdf openai-whisper tiktoken > null
!apt-get install ffmpeg -y  > null # Nepieciešams priekš Whisper audio apstrādes

In [5]:
import os
import getpass
from pathlib import Path
from google.colab import drive

# 1. Pievienojam Google Drive (ja vēl nav pievienots)
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# 2. Definējam darba mapi (ceļš no tava ekrānšāviņa)
BASE_PATH = Path("/content/drive/MyDrive/Colab Notebooks/BDA_M1/Lab26_KnowledgeHub")

# 3. Ievadi OpenAI API atslēgu
os.environ["OPENAI_API_KEY"] = getpass.getpass("Ievadi OpenAI API Key: ")

# 4. Pārbaudām failus
required_files = ["project.pdf", "meeting.mp3", "whiteboard.png"] # Pamanīju, ka tev ir .png
missing_files = [f for f in required_files if not (BASE_PATH / f).exists()]

if missing_files:
    print(f"❌ TRŪKST FAILU mapē {BASE_PATH}: {missing_files}")
    print("Pārbaudi failu nosaukumus!")
else:
    print(f"✅ Visi faili atrasti mapē: {BASE_PATH}")
    # Iestatām pilnos ceļus ērtākai lietošanai
    pdf_path = str(BASE_PATH / "project.pdf")
    audio_path = str(BASE_PATH / "meeting.mp3")
    image_path = str(BASE_PATH / "whiteboard.png")

Ievadi OpenAI API Key: ··········
✅ Visi faili atrasti mapē: /content/drive/MyDrive/Colab Notebooks/BDA_M1/Lab26_KnowledgeHub


## Izstrādes soļi (Ingestion & Vector Store)
Šis kods "apēd" failus no Google Drive mapes.
* Whisper lasa audio no Drive ceļa.
* GPT-4o lasa attēlu no Drive ceļa.
* PyPDFLoader lasa PDF no Drive ceļa.

In [6]:
import whisper
from langchain_community.document_loaders import PyPDFLoader
try:
    from langchain_community.document_loaders import PyMuPDFLoader
except Exception:
    PyMuPDFLoader = None
try:
    import fitz  # PyMuPDF, used for PDF page -> image OCR fallback
except Exception:
    fitz = None
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from openai import OpenAI
import base64
import logging
import re

# Iniciējam OpenAI klientu attēlu apstrādei
client = OpenAI()

print(".......... Sāku datu apstrādi... (Tas var aizņemt pāris minūtes)")
all_documents = []

# --- A. DATU "APĒŠANA" (Ingestion) ---

# 1. AUDIO APSTRĀDE (Whisper)
print(f".......... Apstrādāju audio: {os.path.basename(audio_path)}...")
whisper_model = whisper.load_model("base")
# Whisper ņem faila ceļu tieši no Drive
result = whisper_model.transcribe(
    audio_path,
    task="transcribe",
    language="en",
    fp16=False,
    condition_on_previous_text=True,
)
segments = result.get("segments", [])
audio_text = "\n".join([f"[{s['start']:.1f}-{s['end']:.1f}] {s['text'].strip()}" for s in segments]) if segments else result.get("text", "")
all_documents.append(Document(page_content=audio_text, metadata={"source": "meeting.mp3"}))


# 2. ATTĒLA APSTRĀDE (GPT-4o Vision)
print(f".......... Apstrādāju attēlu: {os.path.basename(image_path)}...")

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Kodējam attēlu no Drive
base64_image = encode_image(image_path)

response = client.chat.completions.create(
    model="gpt-4o",
    temperature=0,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Extract all visible text exactly from this image. If there is a table, transcribe each row. Keep every number exactly as written (including commas, currency symbols, and dates). Then provide a short section named NUMBERS_FOUND that lists all numbers you see.",
                },
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/png;base64,{base64_image}", "detail": "high"},
                },
            ],
        }
    ],
    max_tokens=1600,
)
image_description = response.choices[0].message.content
# Add retrieval keywords so numeric queries match this chunk more reliably.
image_text_for_index = (
    "Keywords: numbers, digits, amounts, budget, costs, estimated cost, actual, total budget, current spend.\n\n"
    f"{image_description}"
)
all_documents.append(Document(page_content=image_text_for_index, metadata={"source": "whiteboard.png"}))


def ocr_pdf_page_with_vision(pdf_file_path, page_index):
    if fitz is None:
        return ""

    try:
        with fitz.open(pdf_file_path) as pdf_doc:
            if page_index < 0 or page_index >= len(pdf_doc):
                return ""
            page = pdf_doc.load_page(page_index)
            pix = page.get_pixmap(matrix=fitz.Matrix(2, 2), alpha=False)
            page_b64 = base64.b64encode(pix.tobytes("png")).decode("utf-8")
    except Exception as e:
        print(f"   [PDF OCR render fallback] lapa {page_index + 1}: {e}")
        return ""

    try:
        ocr_response = client.chat.completions.create(
            model="gpt-4o",
            temperature=0,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "Extract all visible text exactly from this PDF page image. Return only the transcribed text. If no readable text is present, return an empty string.",
                        },
                        {
                            "type": "image_url",
                            "image_url": {"url": f"data:image/png;base64,{page_b64}", "detail": "high"},
                        },
                    ],
                }
            ],
            max_tokens=1600,
        )
        return (ocr_response.choices[0].message.content or "").strip()
    except Exception as e:
        print(f"   [PDF OCR API fallback] lapa {page_index + 1}: {e}")
        return ""


# 3. PDF APSTRĀDE
print(f".......... Apstrādāju PDF: {os.path.basename(pdf_path)}...")
logging.getLogger("pypdf").setLevel(logging.ERROR)
pdf_docs = []
pdf_loader_used = "PyPDFLoader"
try:
    if PyMuPDFLoader is not None:
        pdf_docs = PyMuPDFLoader(pdf_path).load()
        pdf_loader_used = "PyMuPDFLoader"
    else:
        raise ImportError("PyMuPDFLoader not available")
except Exception as e:
    print(f"   [PDF fallback] {e}")
    pdf_docs = PyPDFLoader(pdf_path).load()

clean_pdf_docs = []
for idx, d in enumerate(pdf_docs):
    text = (d.page_content or "").strip()
    if not text:
        page_idx = d.metadata.get("page", idx)
        if not isinstance(page_idx, int):
            try:
                page_idx = int(page_idx)
            except Exception:
                page_idx = idx

        ocr_text = ocr_pdf_page_with_vision(pdf_path, page_idx)
        if not ocr_text:
            continue

        text = ocr_text
        d.metadata = {**d.metadata, "ocr": True}
    d.page_content = "\n".join([line.strip() for line in text.splitlines() if line.strip()])
    d.metadata = {**d.metadata, "source": "project.pdf"}
    clean_pdf_docs.append(d)

pdf_docs = clean_pdf_docs
print(f".......... PDF loaded with {pdf_loader_used}; pages kept: {len(pdf_docs)}")
# Pievienojam PDF lapas kopējam sarakstam
all_documents.extend(pdf_docs)


# --- B. ZINĀŠANU BĀZE (Vector Store) ---

print(".......... Veidoju Vektoru Datubāzi (ChromaDB)...")

# Sadalām tekstu gabalos
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
splits = text_splitter.split_documents(all_documents)

def _sec_label(value):
    return str(value).replace(".", ":")

def extract_first_audio_timestamp(text):
    match = re.search(r"\[(\d+(?:\.\d+)?)\s*-\s*(\d+(?:\.\d+)?)\]", text or "")
    if not match:
        return None
    start_sec = _sec_label(match.group(1))
    end_sec = _sec_label(match.group(2))
    return f"{start_sec}-{end_sec} sekundes"

for d in splits:
    if d.metadata.get("source") == "meeting.mp3":
        timestamp = extract_first_audio_timestamp(d.page_content)
        if timestamp:
            d.metadata["timestamp"] = timestamp

# Saglabājam ChromaDB (operatīvajā atmiņā)
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OpenAIEmbeddings(model="text-embedding-3-small")
)

print(f"Datu bāze gatava! Apstrādāti {len(splits)} informācijas gabali.")

.......... Sāku datu apstrādi... (Tas var aizņemt pāris minūtes)
.......... Apstrādāju audio: meeting.mp3...


100%|████████████████████████████████████████| 139M/139M [00:00<00:00, 274MiB/s]


.......... Apstrādāju attēlu: whiteboard.png...
.......... Apstrādāju PDF: project.pdf...
.......... PDF loaded with PyMuPDFLoader; pages kept: 13
.......... Veidoju Vektoru Datubāzi (ChromaDB)...
Datu bāze gatava! Apstrādāti 40 informācijas gabali.


## Kods čata interfeisam (RAG funkcija)

In [7]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import re


# --- RAG SISTĒMAS UZBŪVE ---

# 1. Definējam LLM
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)

# 2. Definējam meklētāju
retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 10, "fetch_k": 40, "lambda_mult": 0.15}
)


def list_sources():
    return sorted({d.metadata.get("source", "Nezināms") for d in splits if d.metadata.get("source")})


def dedupe_docs(docs):
    seen = set()
    unique_docs = []
    for d in docs:
        source = d.metadata.get("source", "Nezināms")
        page = d.metadata.get("page", None)
        key = (source, page, (d.page_content or "").strip()[:240])
        if key in seen:
            continue
        seen.add(key)
        unique_docs.append(d)
    return unique_docs


def format_used_sources(docs):
    sources = sorted({
        d.metadata.get("source", "Nezināms")
        for d in docs
        if d.metadata.get("source")
    })
    return ", ".join(sources) if sources else "Neviens"


def enrich_audio_citations(answer, docs):
    def normalize_meeting_range(text):
        pattern = re.compile(
            r"(?i)(Avots:\s*meeting\.mp3,\s*)(\d+(?:[\.:]\d+)?)\s*-\s*(\d+(?:[\.:]\d+)?)(?:\s*sekundes)?"
        )
        return pattern.sub(
            lambda m: f"{m.group(1)}{m.group(2).replace('.', ':')}-{m.group(3).replace('.', ':')} sekundes",
            text,
        )

    answer = normalize_meeting_range(answer)

    timestamps = []
    for d in docs:
        if d.metadata.get("source") != "meeting.mp3":
            continue
        timestamp = d.metadata.get("timestamp")
        if timestamp and timestamp not in timestamps:
            timestamps.append(timestamp)
    if not timestamps:
        return answer

    default_timestamp = timestamps[0]
    answer = re.sub(
        r"\[Avots:\s*meeting\.mp3\s*\]",
        f"[Avots: meeting.mp3, {default_timestamp}]",
        answer,
        flags=re.IGNORECASE,
    )
    answer = re.sub(
        r"\(Avots:\s*meeting\.mp3\s*\)",
        f"(Avots: meeting.mp3, {default_timestamp})",
        answer,
        flags=re.IGNORECASE,
    )
    return normalize_meeting_range(answer)


def route_sources(question):
    q = (question or "").lower()
    keywords = {
        "meeting.mp3": ["sapulc", "meeting", "nolēma", "teica", "diskut"],
        "whiteboard.png": ["tāfel", "rēķin", "skait", "sum", "kopā", "budžet", "$", "izmaks"],
        "project.pdf": ["projek", "plān", "roadmap", "mērķ", "dokumen", "pdf"],
    }
    selected = [
        source for source, keys in keywords.items()
        if any(k in q for k in keys)
    ]
    return selected or list_sources()


def _keyword_set(text):
    stop_words = {
        "kas", "kā", "vai", "par", "pie", "un", "ar", "to", "tas", "tā", "šo",
        "ir", "bija", "būs", "man", "mums", "jūs", "viņi", "kur", "kad", "kāpēc",
    }
    tokens = re.findall(r"\b[\wāčēģīķļņōŗšūž]{3,}\b", (text or "").lower())
    return {t for t in tokens if t not in stop_words}


def rerank_docs(question, docs, top_k=6):
    if not docs:
        return []
    q_words = _keyword_set(question)
    preferred_sources = set(route_sources(question))

    scored = []
    for idx, d in enumerate(docs):
        text = (d.page_content or "").lower()
        overlap = sum(1 for w in q_words if w in text)
        if d.metadata.get("source") in preferred_sources:
            overlap += 1
        scored.append((overlap, -idx, d))

    scored.sort(key=lambda x: (x[0], x[1]), reverse=True)
    ranked = [d for score, _, d in scored if score > 0]
    if not ranked:
        ranked = [d for _, _, d in scored]
    return ranked[:top_k]


def retrieve_docs(question, per_source_k=2, global_k=3, final_k=6):
    combined_docs = []
    source_hits = {}
    target_sources = route_sources(question)

    # 1) Meklējam tikai vajadzīgajos avotos
    for source in target_sources:
        docs = []
        try:
            docs = vectorstore.similarity_search(
                question,
                k=per_source_k,
                filter={"source": source},
            )
        except Exception:
            docs = []

        if not docs:
            docs = [
                d for d in retriever.invoke(question)
                if d.metadata.get("source") == source
            ][:per_source_k]

        source_hits[source] = docs
        combined_docs.extend(docs)

    # 2) Pievienojam nedaudz globālos kandidātus un rerankojam
    combined_docs.extend(retriever.invoke(question)[:global_k])
    unique_docs = dedupe_docs(combined_docs)
    ranked_docs = rerank_docs(question, unique_docs, top_k=final_k)
    return ranked_docs, source_hits


def format_docs(docs):
    grouped = {}
    for d in docs:
        source = d.metadata.get("source", "Nezināms")
        grouped.setdefault(source, []).append(d)

    blocks = []
    for source in sorted(grouped.keys()):
        blocks.append(f"=== Avots: {source} ===")
        for i, d in enumerate(grouped[source], start=1):
            page = d.metadata.get("page")
            timestamp = d.metadata.get("timestamp")
            if source == "meeting.mp3" and timestamp:
                locator = f", laiks {timestamp}"
            else:
                locator = f", lapa {page + 1}" if isinstance(page, int) else ""
            blocks.append(f"[{source}#{i}{locator}] {d.page_content}")
        blocks.append("")

    return "\n".join(blocks).strip()


def is_followup_question(question):
    q = (question or "").strip().lower()
    if not q:
        return False

    followup_patterns = [
        r"^un\b",
        r"^vēl\b",
        r"\biepriekš\w*\b",
        r"\btas\b",
        r"\btā\b",
        r"\bšo\b",
        r"\bkā ar\b",
        r"\bcik tas\b",
        r"\bun cik\b",
        r"\bwhat about\b",
        r"\band what\b",
        r"\bit\b",
        r"\bthat\b",
    ]
    return any(re.search(pattern, q) for pattern in followup_patterns)


def format_chat_history(chat_history, max_turns=1):
    if not chat_history:
        return "Nav iepriekšējas čata vēstures."
    recent_turns = chat_history[-max_turns:]
    lines = []
    for idx, turn in enumerate(recent_turns, start=1):
        q = (turn.get("question") or "").strip()
        a = (turn.get("answer") or "").strip()
        if q:
            lines.append(f"{idx}. Lietotājs: {q}")
        if a:
            short_answer = re.sub(r"\s+", " ", a)[:260]
            lines.append(f"   Asistents: {short_answer}")
    return "\n".join(lines) if lines else "Nav iepriekšējas čata vēstures."


def build_retrieval_query(question, chat_history):
    if not chat_history or not is_followup_question(question):
        return question

    last_turn = chat_history[-1]
    prev_question = (last_turn.get("question") or "").strip()
    prev_answer = (last_turn.get("answer") or "").strip()

    if not prev_question and not prev_answer:
        return question

    parts = []
    parts.append("Šis ir turpinājuma jautājums. Interpretē atsauces uz iepriekšējo soli.")
    if prev_question:
        parts.append(f"Iepriekšējais jautājums: {prev_question}")
    if prev_answer:
        parts.append(f"Iepriekšējā atbilde: {prev_answer[:260]}")
    parts.append(f"Pašreizējais jautājums: {question}")
    return "\n".join(parts)


def build_context(question):
    use_history = bool(chat_history) and is_followup_question(question)
    retrieval_query = build_retrieval_query(question, chat_history)
    docs, source_hits = retrieve_docs(retrieval_query)
    if not docs:
        return "NAV ATRASTS NEVIENS RELEVANTS FRAGMENTS."

    missing = [source for source, hits in source_hits.items() if not hits]
    header = ""
    if missing:
        header = (
            "PIEZĪME: šiem avotiem semantiskā meklēšana neatgrieza fragmentus: "
            + ", ".join(missing)
            + "\n\n"
        )

    history_block = ""
    if use_history:
        history_block = (
            "=== Čata vēsture (iepriekšējais solis) ===\n"
            + format_chat_history(chat_history)
            + "\n\n"
        )
    return header + history_block + format_docs(docs)


def is_sum_question(question):
    q = (question or "").lower()
    markers = ["kopā", "summa", "saskaiti", "sum", "total", "cik tas ir kopā"]
    return any(m in q for m in markers)


def extract_money_amounts_from_whiteboard(docs):
    whiteboard_texts = [
        (d.page_content or "")
        for d in docs
        if d.metadata.get("source") == "whiteboard.png"
    ]
    if not whiteboard_texts:
        return []

    # Ņemam garāko whiteboard fragmentu, lai mazinātu overlap dubultskaitīšanu.
    text = max(whiteboard_texts, key=len)
    raw_amounts = re.findall(r"\$\s*(\d{1,3}(?:,\d{3})+|\d+)", text)
    return [int(a.replace(",", "")) for a in raw_amounts]


def deterministic_sum_answer(question, docs):
    if not is_sum_question(question):
        return None

    amounts = extract_money_amounts_from_whiteboard(docs)
    if not amounts:
        return None

    total = sum(amounts)
    return (
        f"Kopējā summa ir ${total:,.0f} [Avots: whiteboard.png]. "
        f"Summā iekļauti tikai naudas ieraksti ar '$' ({len(amounts)} vērtības)."
    )


# 3. Definējam promptu
template = """Tu esi uzņēmuma stratēģiskais AI asistents.
Tavs uzdevums ir atbildēt uz jautājumiem, izmantojot TIKAI zemāk sniegto kontekstu.

Noteikumi:
1) Izveido vienu apkopotu atbildi no visiem avotiem, kuros ir atbilstoša informācija.
2) Ja viens fakts redzams vairākos avotos, apvieno to viena teikumā un pievieno visus avotus.
3) Ja avoti savstarpēji pretrunojas, to arī norādi.
4) Pēc katra būtiska fakta norādi avotu iekavās. MP3 avotam VIENMĒR pievieno sekunžu intervālu ar '.' un vienību 'sekundes' (piem., [Avots: meeting.mp3, 292.7-305.4 sekundes], [Avots: project.pdf]).
5) Ja atbildes nav kontekstā, saki: "Diemžēl failos šādas informācijas nav."
6) Ja jautājums ir turpinājums iepriekšējam (piem., "Un cik tas ir dolāros?"), interpretē to pēc čata vēstures.
7) Atbildi TIKAI uz pašreizējo jautājumu. Ja jautājums ir jauns un nav turpinājums, ignorē čata vēsturi.
8) Nemini avotus, kas nav izmantoti atbildes faktu pamatošanai.
9) Ja jautājums ir par summēšanu, summē tikai naudas vērtības (ar '$'), nevis datumus vai citus skaitļus.

Konteksts:
{context}

Jautājums: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# 4. RAG ķēde
rag_chain = (
    {"context": RunnableLambda(build_context), "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


# --- NOTEBOOK ČATS (bez input cilpas) ---

print("Projekta Smadzenes ir gatavas. Jautā man jebko par failiem!")
print("Notebook režīms: lieto ask_question(\"tavs jautājums\") nākamajā šūnā.")

chat_history = []
_ask_guard = {"last_q": "", "last_ts": 0.0}


def ask_question(user_question):
    user_question = (user_question or "").strip()
    if not user_question:
        print("Kļūda: tukšs jautājums.")
        return None

    if user_question.lower() in ["exit", "quit", "beigt", "-"]:
        print("Čats pabeigts.")
        return None

    try:
        import time
        now = time.monotonic()
        if user_question == _ask_guard["last_q"] and (now - _ask_guard["last_ts"]) < 1.0:
            return None
        _ask_guard["last_q"] = user_question
        _ask_guard["last_ts"] = now

        print(f"   Saņemts jautājums: {user_question}")
        print("   Analizēju datus...", flush=True)

        retrieval_query = build_retrieval_query(user_question, chat_history)
        retrieved_docs, _ = retrieve_docs(retrieval_query)

        # Lēts, deterministisks ceļš skaitļu summēšanas jautājumiem.
        response = deterministic_sum_answer(user_question, retrieved_docs)
        if response is None:
            response = rag_chain.invoke(user_question)
            response = enrich_audio_citations(response, retrieved_docs)

        used_sources = format_used_sources(retrieved_docs)

        print(f"\nJautājums:\n{user_question}")
        print(f"\nAtbilde:\n{response}")
        print(f"\nIzmantotie avoti: {used_sources}")
        print("-" * 50)

        chat_history.append({"question": user_question, "answer": response})
        chat_history[:] = chat_history[-6:]

        return {
            "question": user_question,
            "answer": response,
            "used_sources": used_sources,
        }
    except Exception as e:
        print(f"Kļūda: {e}")
        return None


def reset_chat_history():
    chat_history.clear()
    print("Čata vēsture notīrīta.")


Projekta Smadzenes ir gatavas. Jautā man jebko par failiem!
Notebook režīms: lieto ask_question("tavs jautājums") nākamajā šūnā.


In [8]:
# Interaktīvs čata panelis ar VS Code fallback
import os

def start_text_chat():
    print("Teksta čats gatavs. Raksti jautājumu un spied Enter. Lai beigtu: exit")
    while True:
        try:
            q = input("Jautājums: ").strip()
        except (EOFError, KeyboardInterrupt):
            print("\nČats pabeigts.")
            break
        if not q:
            continue
        if q.lower() in ["exit", "quit", "beigt", "-"]:
            print("Čats pabeigts.")
            break
        ask_question(q)

is_vscode = bool(os.environ.get("VSCODE_PID"))

if is_vscode:
    print("VS Code vidē widgets var nerenderēties. Startēju teksta čatu.")
    start_text_chat()
else:
    try:
        import ipywidgets as widgets
        from IPython.display import display, HTML
    except Exception as e:
        print(f"UI nav pieejams: {e}")
        print("Pāreju uz teksta čatu.")
        start_text_chat()
    else:
        question_box = widgets.Text(
            placeholder="Ieraksti jautājumu un spied Send",
            description="Jautājums:",
            layout=widgets.Layout(width="70%"),
        )
        send_btn = widgets.Button(description="Send", button_style="primary")
        clear_btn = widgets.Button(description="Notīrīt vēsturi")
        output = widgets.Output(
            layout=widgets.Layout(
                border="1px solid #444",
                padding="8px",
                max_height="420px",
                overflow_y="auto",
                overflow_x="auto",
            )
        )
        output.add_class("kh-chat-output")

        _ui_state = {"busy": False, "last_q": "", "last_ts": 0.0}

        def _run_question(_=None):
            import time
            q = (question_box.value or "").strip()
            if not q:
                return

            now = time.monotonic()
            if _ui_state["busy"]:
                return
            if q == _ui_state["last_q"] and (now - _ui_state["last_ts"]) < 1.0:
                return

            _ui_state["busy"] = True
            _ui_state["last_q"] = q
            _ui_state["last_ts"] = now
            question_box.value = ""
            try:
                with output:
                    ask_question(q)
            finally:
                _ui_state["busy"] = False

        def _clear_history(_=None):
            with output:
                reset_chat_history()

        send_btn.on_click(_run_question, remove=True)
        clear_btn.on_click(_clear_history, remove=True)
        send_btn.on_click(_run_question)
        clear_btn.on_click(_clear_history)
        display(HTML("<style>.kh-chat-output pre, .kh-chat-output .jp-OutputArea-output pre {white-space: pre-wrap !important; word-break: break-word !important;}</style>"))
        display(widgets.HBox([question_box, send_btn, clear_btn]))
        display(output)
        print("Čata panelis gatavs. Raksti jautājumu laukā un spied Send.")


HBox(children=(Text(value='', description='Jautājums:', layout=Layout(width='70%'), placeholder='Ieraksti jaut…

Output(layout=Layout(border='1px solid #444', max_height='420px', overflow_x='auto', overflow_y='auto', paddin…

Čata panelis gatavs. Raksti jautājumu laukā un spied Send.
