# Hypothetical Prompt Embeddings (HyPE)

In [None]:
import os
import sys
app_path = os.path.abspath('..')
sys.path.insert(0, app_path)

from datetime import datetime, UTC, timedelta

from tqdm import tqdm
from langchain import PromptTemplate
from langchain_ollama import ChatOllama

from app.chroma_client import get_embeddings, get_client

In [None]:
channel_name = 'relocationexpert'
channel_id = 'relocationexpert'

In [None]:
# ru_model_name = "cointegrated/rubert-tiny2"
ru_model_name = "intfloat/multilingual-e5-large"
embeddings_model = get_embeddings(ru_model_name)

chroma_client_from_telegram = get_client(f'telegram_{channel_name}', embeddings_model)

In [None]:
llm_qwen3_8b = ChatOllama(model="qwen3:8b")
llm_llama3_3b = ChatOllama(model="llama3.2:3b")

## Get channel messages from vector DB.
How messages have been added in DB see in [./telegram_crawler.ipynb](telegram_crawler.ipynb)

In [None]:
all_docs = chroma_client_from_telegram.get(
    where={"date": {"$gte": (datetime.now(UTC) - timedelta(days=90)).timestamp()}},
)
docs_with_meta = list(zip(all_docs['metadatas'], all_docs['documents']))
len(docs_with_meta)

In [None]:
chroma_client_prompt_embedding = get_client(f'telegram_{channel_name}_PE', embeddings_model)
# chroma_client_prompt_embedding._client.delete_collection(name=f'telegram_{channel_name}_PE')
collection = chroma_client_prompt_embedding._client.get_or_create_collection(name=f'telegram_{channel_name}_PE')

In [None]:
def generate_and_add_records_to_db(meta, text):
    examples = []

    ru_telegram_prompt_expanding = PromptTemplate(
        input_variables=["text"],
        template="Проанализируйте текст и сгенерируйте вопросы, которые, если на них ответить, \
    отразят основные аспекты текста. В выводе должны быть только вопросы и ничего больше. Каждый вопрос должен быть одной строкой, без нумерации или префиксов.\n\n \
    Текст:\n{text}\n\nВопросы:\n",
    )

    hype_chain = ru_telegram_prompt_expanding | llm_llama3_3b
    hypothetical_questions = list(filter(
        bool,
        map(
            str.strip,
            hype_chain.invoke({"text": text}).content.replace("\n\n", "\n").split("\n")
        ),
   ))

    print(hypothetical_questions)

    question_embeddings = embeddings_model.embed_documents(hypothetical_questions)

    collection.add(
        documents=[text] * len(question_embeddings),
        embeddings=question_embeddings,
        metadatas=[meta] * len(question_embeddings),
        ids=[f"{meta['id']}_{i}" for i in range(len(question_embeddings))],
    )


for meta, text in tqdm(docs_with_meta):
    generate_and_add_records_to_db(meta, text)
# print('/n'.join(examples))

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed

with ThreadPoolExecutor(max_workers=2) as pool:
    futures = [pool.submit(generate_and_add_records_to_db, meta, text) for meta, text in docs_with_meta]

    for f in tqdm(as_completed(futures), total=len(chunks)):
        pass

In [None]:
related_docs = chroma_client_prompt_embedding.similarity_search_with_relevance_scores(user_query, k=10)

unique_docs = {}
for doc, relevance in related_docs:
    if doc.metadata['id'] in unique_docs:
        continue
    unique_docs[doc.metadata['id']] = (doc, relevance)

list(unique_docs.values())

In [None]:
filtered_related_docs = filter(lambda doc_score: doc_score[-1] > 0.3, unique_docs.values())

context = "\n\n---\n\n".join(f"{datetime.fromtimestamp(doc.metadata['date'], UTC)} - {doc.page_content}" for doc, _score in filtered_related_docs)

In [None]:
user_query = "Когда Кипр входит в Шенгенскую зону?"

In [None]:
ru_telegram_prompt = PromptTemplate(
    input_variables=["context", "user_query"],
    template="""
    Ты полезный AI ассистент, который отвечает на вопросы пользователя на основе контекста.
    Контекст это релевантные вопросу сообщения из телеграм чата.

    Контекст:
    {context}

    Вопрос пользователя:
    {user_query}

    Если в контексте недостаточно информации, чтобы ответить на вопрос пользователя, то скажи, что недостаточно информации.

    Ответ:
    """,
)

hyde_chain_r1_8b = ru_telegram_prompt | llm_qwen3_8b
llm_response = hyde_chain_r1_8b.invoke({"user_query": user_query, "context": context}).content
print(llm_response)