# Astrophysics Chat Application 

This module will create an application that allows other users to interact with the application built in the previous module.
This can be accomplished via any frontend such as Django, Flask or Streamlit. 

The example code below uses Panel, a powerful open source Python library to create web-based apps. 


In [None]:
# Include relevant libraries and packages

import textwrap
from uuid import uuid4
import warnings

import panel as pn
from qdrant_client import QdrantClient

from langchain_core.runnables import RunnablePassthrough
from langchain_core.callbacks import CallbackManager
from langchain_core.prompts import PromptTemplate
from langchain_community.llms import LlamaCpp
from langchain_qdrant import Qdrant
from langchain_huggingface import HuggingFaceEmbeddings

from ssec_tutorials import OLMO_MODEL, QDRANT_PATH, QDRANT_COLLECTION_NAME
from ssec_tutorials import download_olmo_model, download_qdrant_data

In [None]:
warnings.filterwarnings("ignore")

In [None]:
pn.extension()

In [None]:
download_olmo_model()
download_qdrant_data()

In [None]:
qdrant_path = QDRANT_PATH
qdrant_collection = QDRANT_COLLECTION_NAME
model_path = OLMO_MODEL

In [None]:
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2")

In [None]:
# Invoke caching in panel using python cache decorator for the expensive db load operation that follows
@pn.cache
def get_vector_store():
    # If the Qdrant Vector Database Collection already exists, load it
    client = QdrantClient(path=str(qdrant_path))
    db = Qdrant(client=client, collection_name=qdrant_collection, embeddings=embedding)
    return db

In [None]:
db = get_vector_store()

In [None]:
def get_chain(callbacks):
    # Set up retriever using mmr (maximal marginal relevance, a typical information retrieval tactic) search to return top 2 results
    retriever = db.as_retriever(
        callbacks=callbacks, search_type="mmr", search_kwargs={"k": 2}
    )

    # Callbacks support token-wise streaming
    callback_manager = CallbackManager(callbacks)
    # LLaMacpp is a Python library built around LLaMa.cpp that implements Meta’s LLaMa architecture in efficient C/C++
    olmo = LlamaCpp(
        model_path=str(model_path),
        callback_manager=callback_manager,
        # Temperature controls the randomness of the model's output
        temperature=0.8,
        # n_ctx limits the length of the input context
        n_ctx=4096,
        # max_tokens limits the length of the generated text
        max_tokens=512,
        verbose=False,
        # echo determines whether the input prompt should be included in the output
        echo=False,
    )

    # Set up the prompt template using text based jinja2 format
    prompt_template = PromptTemplate.from_template(
        template=olmo.client.metadata["tokenizer.chat_template"],
        template_format="jinja2",
        partial_variables={"add_generation_prompt": True, "eos_token": "<|endoftext|>"},
    )

    transformed_prompt_template = PromptTemplate.from_template(
        prompt_template.partial(
            messages=[
                {
                    "role": "user",
                    "content": textwrap.dedent(
                        """\
                    You are an astrophysics expert. Please answer the question on astrophysics based on the following context:
                    
                    {context}
                    
                    Question: {question}"""
                    ),
                }
            ]
        ).format()
    )

    def format_docs(docs):
        text = "\n\n".join([d.page_content for d in docs])
        return text

    def show_docs(docs):
        # https://github.com/langchain-ai/langchain/issues/7290
        for callback in callbacks:
            callback.on_retriever_end(docs, run_id=uuid4())
        return docs

    return (
        {
            "context": retriever | show_docs | format_docs,
            "question": RunnablePassthrough(),
        }
        | transformed_prompt_template
        | olmo
    )

In [None]:
async def callback(contents, user, instance):
    callback_handler = pn.chat.langchain.PanelCallbackHandler(
        instance, user="OLMo", avatar="🌳"
    )
    # Not return the result at the end of the generation
    # this prevents the model from repeating the result
    callback_handler.on_llm_end = lambda response, *args, **kwargs: None
    chain = get_chain(callbacks=[callback_handler])
    _ = await chain.ainvoke(contents)

In [None]:
pn.chat.ChatInterface(callback=callback).servable()