In [1]:
import textwrap
from uuid import uuid4
import warnings
from pathlib import Path

from langchain_core.runnables import RunnablePassthrough
from langchain_core.callbacks import CallbackManager, BaseCallbackHandler
from langchain_core.prompts import PromptTemplate
from langchain_community.llms import LlamaCpp
from langchain_qdrant import Qdrant
from langchain_huggingface import HuggingFaceEmbeddings
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer
from langchain.globals import set_verbose, set_debug

set_debug(True)
set_verbose(True)

from ssec_tutorials import download_olmo_model

warnings.filterwarnings("ignore")

import panel as pn

def get_chain(callback_handlers: list[BaseCallbackHandler], input_prompt_template: str):
    # 1. Set up the vector database retriever.
    # This line of code will create a retriever object that
    # will be used to retrieve documents from the vector database.
    print("[DEBUG] Setting up the retriever...")
    retriever = db.as_retriever(
        callbacks=callback_handlers,  # pass the result of the retrieval to the callback handler
        search_type="mmr",  # the mmr (maximal marginal relevance, a typical information retrieval tactic) search
        search_kwargs={"k": 2},  # return top 2 results
    )
    print("[DEBUG] Retriever setup completed.")
    # 2. Setup the Langchain callback manager to handle callbacks from Langchain LLM object.
    # At which results are passed to the callback handler.
    callback_manager = CallbackManager(callback_handlers)

    # 3. Setup the Langchain llama.cpp model object.
    # In our case, we are using the `OLMo-7B-Instruct` model.
    # llama-cpp-python is a Python binding for llama.cpp C++ library as mentioned in previous modules.
    olmo = LlamaCpp(
        model_path=str(model_path),  # the path to the OLMo model in GGUF file format
        callback_manager=callback_manager,  # set the callback manager to handle callbacks
        temperature=0.8,  # set the randomness of the model's output
        n_ctx=4096,  # set limit for the length of the input context
        max_tokens=512,  # set limit for the length of the generated text
        verbose=False,  # determines whether the model should print out debug information
        echo=False,  # determines whether the input prompt should be included in the output
    )

    # 4. Set up the initial Langchain Prompt Template using text based jinja2 format
    prompt_template = PromptTemplate.from_template(
        template=olmo.client.metadata[
            "tokenizer.chat_template"
        ],  # get the chat template from the model metadata
        template_format="jinja2",  # set the template format to jinja2
        partial_variables={
            "add_generation_prompt": True,  # add generation prompt to the template, this option is from the model metadata
            "eos_token": "<|endoftext|>",  # set the end of sentence token
        },
    )

    # 5. Transform the Prompt Template to include the user role and the context
    # This will allow the model to generate text based on the context provided.
    # However, after setting this new template, the model will be limited to
    # generating text based on the created prompt template with input of
    # `context` and `question` keys.
    transformed_prompt_template = PromptTemplate.from_template(
        prompt_template.partial(
            # The default chat template takes a list of messages with a role and content
            # to setup this particular app, we will only pass a single message with the user role
            # and the input prompt content
            messages=[
                {
                    "role": "user",  # set the role to user, this allows for user input to be passed to the model
                    "content": input_prompt_template,  # the input prompt template, must have `context` and `question` keys to work
                }
            ]
        ).format()
    )


    # 6. Define the `format_docs` function to format the retrieved Langchain documents object to simple string
    def format_docs(docs):
        text = "\n\n".join([d.page_content for d in docs])
        return text

    # 7. Define the `show_docs` function to display the retrieved documents to app panel
    # this is currently a small hack to display the retrieved documents to the app panel
    # as mentioned in https://github.com/langchain-ai/langchain/issues/7290
    def show_docs(docs):
        for callback_handler in callback_handlers:
            callback_handler.on_retriever_end(
                docs,  # pass the retrieved documents to the callback handler
                run_id=uuid4(),  # generate a random run id
            )
        return docs

    # 8. Adding expanded query which adds LSST to the query if Rubin telescope is present
    def expand_query_with_synonyms(query):
        if "Rubin" in query:
            query += " LSST Large Synoptic Survey Telescope"
        return query
    
    # 8. Return the Langchain chain object
    # The way the chain reads is as follows:
    return (
        {
            # The Vector Database retriever documents,
            # which is then passed to the `show_docs` function,
            # which is then passed to the `format_docs` function for formatting
            "context": expand_query_with_synonyms| retriever | show_docs | format_docs,
            # The Question asked by the user from the Chat Text Input Interface is passed in as well
            "question": RunnablePassthrough(),
        }
        # The dictionary above that contains text values for `context` and `question` is now passed
        # to the transformed prompt template so that the final prompt text can be generated
        | transformed_prompt_template
        # The full final prompt text with both context and question is passed to the OLMo model
        # for generation of the final output. Note that this final prompt text cannot exceed the maximum
        # `n_ctx` input context value set in the OLMo model above.
        | olmo
    )

async def callback(contents, user, instance):
    # 1. Create a panel callback handler
    # The Langchain PanelCallbackHandler is useful for rendering and streaming the chain of thought
    # from Langchain objects like Tools, Agents, and Chains.
    # It inherits from Langchain’s BaseCallbackHandler.
    # Here we set the user to be the model name "OLMo" with an avatar of a tree emoji "🌳"
    # for the tree of knowledge.
    callback_handler = pn.chat.langchain.PanelCallbackHandler(
        instance, user="OLMo", avatar="🌳"
    )
    print(f"[DEBUG] Callback received contents: {contents}")
    # 2. Set to not return the full generated result at the end of the generation;
    # this prevents the model from repeating the result in the interface
    callback_handler.on_llm_end = lambda response, *args, **kwargs: None

    # 3. Create and setup the Langchain chain object with the callback handler and input prompt template
    chain = get_chain(
        callback_handlers=[callback_handler],
        input_prompt_template=input_prompt_template,
    )
    print("[DEBUG] Running chain with contents...")
    # 4. Run the chain with the input contents
    _ = await chain.ainvoke(contents)
    print("[DEBUG] Chain invocation completed.")


pn.extension()

model_path = download_olmo_model()
qdrant_path = Path("/workspaces/Rubin-RAG/resources/rubin_qdrant")
qdrant_collection = "rubin_telescope"

# #NOTE: This model requires trusting remote code, not recommneded for non public documents
# embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2") 

# Replacing HuggingFaceEmbeddings with SentenceTransformer embeddings
embedding = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2")

# db = Qdrant.from_existing_collection(
#     collection_name=qdrant_collection, embedding=embedding, path=qdrant_path
# )

client = QdrantClient(path=str(qdrant_path))
db = Qdrant(
    client=client,
    collection_name=qdrant_collection,
    embeddings=embedding.encode # Using the custom embedding function
)

# Debug to confirm the embedding function is being used correctly
print("[DEBUG] Qdrant initialized with embedding function")

input_prompt_template = textwrap.dedent(
    """\
You are an astrophysics expert with a focus on the Rubin telescope project (formerly known as Large Synoptic Survey Telescope - LSST). Please answer the question on astrophysics based on the following context:

{context}

Question: {question}
"""
)

chat_interface = pn.chat.ChatInterface(callback=callback)

# Enable serving the app on a web URL
if __name__ == '__main__':
    pn.serve({'/': chat_interface}, port=5006, websocket_origin='*', show=False)

  from tqdm.autonotebook import tqdm, trange


Model already exists at /home/mambauser/.cache/ssec_tutorials/OLMo-7B-Instruct-Q4_K_M.gguf




[DEBUG] Qdrant initialized with embedding function
Launching server at http://localhost:5006


[DEBUG] Callback received contents: What is the expected image quality of the optical system in the Rubin Telescope
[DEBUG] Setting up the retriever...
[DEBUG] Retriever setup completed.
[DEBUG] Running chain with contents...
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": "What is the expected image quality of the optical system in the Rubin Telescope"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question>] Entering Chain run with input:
[0m{
  "input": "What is the expected image quality of the optical system in the Rubin Telescope"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question> > chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": "What is the expected image quality of the optical system in the Rubin Telescope"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<con

In [1]:
import textwrap
from uuid import uuid4
import warnings
from pathlib import Path

from langchain_core.runnables import RunnablePassthrough
from langchain_core.callbacks import CallbackManager, BaseCallbackHandler
from langchain_core.prompts import PromptTemplate
from langchain_community.llms import LlamaCpp
from langchain_qdrant import Qdrant
from langchain_huggingface import HuggingFaceEmbeddings
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer
from langchain.globals import set_verbose, set_debug

set_debug(True)
set_verbose(True)

from ssec_tutorials import download_olmo_model

warnings.filterwarnings("ignore")

import panel as pn


def get_chain(callback_handlers: list[BaseCallbackHandler], input_prompt_template: str):
    # 1. Set up the vector database retriever.
    # This line of code will create a retriever object that
    # will be used to retrieve documents from the vector database.
    print("[DEBUG] Setting up the retriever...")
    retriever = db.as_retriever(
        callbacks=callback_handlers,  # pass the result of the retrieval to the callback handler
        search_type="mmr",  # the mmr (maximal marginal relevance, a typical information retrieval tactic) search
        search_kwargs={"k": 2},  # return top 2 results
    )
    print("[DEBUG] Retriever setup completed.")
    # 2. Setup the Langchain callback manager to handle callbacks from Langchain LLM object.
    # At which results are passed to the callback handler.
    callback_manager = CallbackManager(callback_handlers)

    # 3. Setup the Langchain llama.cpp model object.
    # In our case, we are using the `OLMo-7B-Instruct` model.
    # llama-cpp-python is a Python binding for llama.cpp C++ library as mentioned in previous modules.
    olmo = LlamaCpp(
        model_path=str(model_path),  # the path to the OLMo model in GGUF file format
        callback_manager=callback_manager,  # set the callback manager to handle callbacks
        temperature=0.8,  # set the randomness of the model's output
        n_ctx=4096,  # set limit for the length of the input context
        max_tokens=512,  # set limit for the length of the generated text
        verbose=False,  # determines whether the model should print out debug information
        echo=False,  # determines whether the input prompt should be included in the output
    )

    # 4. Set up the initial Langchain Prompt Template using text based jinja2 format
    prompt_template = PromptTemplate.from_template(
        template=olmo.client.metadata[
            "tokenizer.chat_template"
        ],  # get the chat template from the model metadata
        template_format="jinja2",  # set the template format to jinja2
        partial_variables={
            "add_generation_prompt": True,  # add generation prompt to the template, this option is from the model metadata
            "eos_token": "<|endoftext|>",  # set the end of sentence token
        },
    )

    # 5. Transform the Prompt Template to include the user role and the context
    # This will allow the model to generate text based on the context provided.
    # However, after setting this new template, the model will be limited to
    # generating text based on the created prompt template with input of
    # `context` and `question` keys.
    transformed_prompt_template = PromptTemplate.from_template(
        prompt_template.partial(
            # The default chat template takes a list of messages with a role and content
            # to setup this particular app, we will only pass a single message with the user role
            # and the input prompt content
            messages=[
                {
                    "role": "user",  # set the role to user, this allows for user input to be passed to the model
                    "content": input_prompt_template,  # the input prompt template, must have `context` and `question` keys to work
                }
            ]
        ).format()
    )


    # 6. Define the `format_docs` function to format the retrieved Langchain documents object to simple string
    def format_docs(docs):
        text = "\n\n".join([d.page_content for d in docs])
        return text

    # 7. Define the `show_docs` function to display the retrieved documents to app panel
    # this is currently a small hack to display the retrieved documents to the app panel
    # as mentioned in https://github.com/langchain-ai/langchain/issues/7290
    def show_docs(docs):
        for callback_handler in callback_handlers:
            callback_handler.on_retriever_end(
                docs,  # pass the retrieved documents to the callback handler
                run_id=uuid4(),  # generate a random run id
            )
        return docs

    # 8. Adding expanded query which adds LSST to the query if Rubin telescope is present
    def expand_query_with_synonyms(query):
        if "Rubin" in query:
            query += " LSST Large Synoptic Survey Telescope"
        return query
    
    # 8. Return the Langchain chain object
    # The way the chain reads is as follows:
    return (
        {
            # The Vector Database retriever documents,
            # which is then passed to the `show_docs` function,
            # which is then passed to the `format_docs` function for formatting
            "context": expand_query_with_synonyms| retriever | show_docs | format_docs,
            # The Question asked by the user from the Chat Text Input Interface is passed in as well
            "question": RunnablePassthrough(),
        }
        # The dictionary above that contains text values for `context` and `question` is now passed
        # to the transformed prompt template so that the final prompt text can be generated
        | transformed_prompt_template
        # The full final prompt text with both context and question is passed to the OLMo model
        # for generation of the final output. Note that this final prompt text cannot exceed the maximum
        # `n_ctx` input context value set in the OLMo model above.
        | olmo
    )

async def callback(contents, user, instance):
    # 1. Create a panel callback handler
    # The Langchain PanelCallbackHandler is useful for rendering and streaming the chain of thought
    # from Langchain objects like Tools, Agents, and Chains.
    # It inherits from Langchain’s BaseCallbackHandler.
    # Here we set the user to be the model name "OLMo" with an avatar of a tree emoji "🌳"
    # for the tree of knowledge.
    callback_handler = pn.chat.langchain.PanelCallbackHandler(
        instance, user="OLMo", avatar="🌳"
    )
    print(f"[DEBUG] Callback received contents: {contents}")
    # 2. Set to not return the full generated result at the end of the generation;
    # this prevents the model from repeating the result in the interface
    callback_handler.on_llm_end = lambda response, *args, **kwargs: None

    # 3. Create and setup the Langchain chain object with the callback handler and input prompt template
    chain = get_chain(
        callback_handlers=[callback_handler],
        input_prompt_template=input_prompt_template,
    )
    print("[DEBUG] Running chain with contents...")
    # 4. Run the chain with the input contents
    _ = await chain.ainvoke(contents)
    print("[DEBUG] Chain invocation completed.")


pn.extension()

model_path = download_olmo_model()
qdrant_path = Path("/workspaces/Rubin-RAG/resources/rubin_qdrant")
qdrant_collection = "rubin_telescope"

# #NOTE: This model requires trusting remote code, not recommneded for non public documents
# embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2") 

# Replacing HuggingFaceEmbeddings with SentenceTransformer embeddings
embedding =  SentenceTransformer(
                "dunzhang/stella_en_400M_v5",
                trust_remote_code=True,
                device="cpu",
                config_kwargs={"use_memory_efficient_attention": False, "unpad_inputs": False}
            )
# db = Qdrant.from_existing_collection(
#     collection_name=qdrant_collection, embedding=embedding, path=qdrant_path
# )

client = QdrantClient(path=str(qdrant_path))
db = Qdrant(
    client=client,
    collection_name=qdrant_collection,
    embeddings=embedding.encode # Using the custom embedding function
)

# Debug to confirm the embedding function is being used correctly
print("[DEBUG] Qdrant initialized with embedding function")

input_prompt_template = textwrap.dedent(
    """\
You are an astrophysics expert with a focus on the Rubin telescope project (formerly known as Large Synoptic Survey Telescope - LSST). Please answer the question on astrophysics based on the following context:

{context}

Question: {question}
"""
)

chat_interface = pn.chat.ChatInterface(callback=callback)

# Enable serving the app on a web URL
if __name__ == '__main__':
    pn.serve({'/': chat_interface}, port=5006, websocket_origin='*', show=False)

  from tqdm.autonotebook import tqdm, trange


Model already exists at /home/mambauser/.cache/ssec_tutorials/OLMo-7B-Instruct-Q4_K_M.gguf


Some weights of the model checkpoint at dunzhang/stella_en_400M_v5 were not used when initializing NewModel: ['new.pooler.dense.bias', 'new.pooler.dense.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[DEBUG] Qdrant initialized with embedding function
Launching server at http://localhost:5006


[DEBUG] Callback received contents: What is the expected image quality of optical system in the rubin telescope
[DEBUG] Setting up the retriever...
[DEBUG] Retriever setup completed.
[DEBUG] Running chain with contents...
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": "What is the expected image quality of optical system in the rubin telescope"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question>] Entering Chain run with input:
[0m{
  "input": "What is the expected image quality of optical system in the rubin telescope"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question> > chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": "What is the expected image quality of optical system in the rubin telescope"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question> >



In [1]:
!pwd

/workspaces/Rubin-RAG/rubin-chat


In [1]:
# Create new qdrant database for new embedding

import os
from langchain_community.document_loaders import PyMuPDFLoader

pdf_folder_path = "/workspaces/Rubin-RAG/resources/raw-data/Rubin"  

# Process the extracted PDF files
documents = []
for file in os.listdir(pdf_folder_path):
    if file.endswith('.pdf'):
        pdf_path = os.path.join(pdf_folder_path, file)
        loader = PyMuPDFLoader(pdf_path)
        documents.extend(loader.load())

# Print the metadata of each document
for each in documents:
    # print(each.page_content)  # Uncomment this line to see the individual page_content
    print(each.metadata)



{'source': '/workspaces/Rubin-RAG/resources/raw-data/Rubin/PSTN-010.pdf', 'file_path': '/workspaces/Rubin-RAG/resources/raw-data/Rubin/PSTN-010.pdf', 'page': 0, 'total_pages': 11, 'format': 'PDF 1.5', 'title': 'title', 'author': 'author', 'subject': '', 'keywords': 'keywords', 'creator': 'LaTeX with hyperref package', 'producer': 'xdvipdfmx (0.7.9)', 'creationDate': "D:20190923215906-00'00'", 'modDate': '', 'trapped': ''}
{'source': '/workspaces/Rubin-RAG/resources/raw-data/Rubin/PSTN-010.pdf', 'file_path': '/workspaces/Rubin-RAG/resources/raw-data/Rubin/PSTN-010.pdf', 'page': 1, 'total_pages': 11, 'format': 'PDF 1.5', 'title': 'title', 'author': 'author', 'subject': '', 'keywords': 'keywords', 'creator': 'LaTeX with hyperref package', 'producer': 'xdvipdfmx (0.7.9)', 'creationDate': "D:20190923215906-00'00'", 'modDate': '', 'trapped': ''}
{'source': '/workspaces/Rubin-RAG/resources/raw-data/Rubin/PSTN-010.pdf', 'file_path': '/workspaces/Rubin-RAG/resources/raw-data/Rubin/PSTN-010.pdf'

In [3]:
len(documents)

13340

In [4]:
documents[0]

Document(metadata={'source': '/workspaces/Rubin-RAG/resources/raw-data/Rubin/PSTN-010.pdf', 'file_path': '/workspaces/Rubin-RAG/resources/raw-data/Rubin/PSTN-010.pdf', 'page': 0, 'total_pages': 11, 'format': 'PDF 1.5', 'title': 'title', 'author': 'author', 'subject': '', 'keywords': 'keywords', 'creator': 'LaTeX with hyperref package', 'producer': 'xdvipdfmx (0.7.9)', 'creationDate': "D:20190923215906-00'00'", 'modDate': '', 'trapped': ''}, page_content='Draft version September 23, 2019\nTypeset using LATEX modern style in AASTeX62\nLSST Camera Optics\nJustin Wolfe1\n1SLAC National Accelerator Laboratory, 2575 Sand Hill Rd., Menlo Park, CA 94025, USA\n(Dated: September 23, 2019)\nABSTRACT\nAs the Commissioning Execution Plan (LSE-390) says, ”The project team shall\ndeliver all reports documenting the as-built hardware and software including: draw-\nings, source code, modiﬁcations, compliance exceptions, and recommendations for\nimprovement.” As a ﬁrst step towards the delivery of docum

In [None]:
import os
from pathlib import Path
from langchain_qdrant import Qdrant
from qdrant_client import QdrantClient, models 
from sentence_transformers import SentenceTransformer

# Setup the embedding model
embedding =  SentenceTransformer(
                "dunzhang/stella_en_400M_v5",
                trust_remote_code=True,
                device="cpu",
                config_kwargs={"use_memory_efficient_attention": False, "unpad_inputs": False}
            )


# Define the path to the Qdrant collection
qdrant_path = Path("/workspaces/Rubin-RAG/resources/rubin_qdrant_stella")
qdrant_collection = "rubin_telescope_stella"

# if qdrant_path.exists():
#     print(f"Qdrant Vector Database Collection already exists in {qdrant_path}, load it")
#     client = QdrantClient(path=str(qdrant_path))
#     qdrant = Qdrant(
#         client=client,
#         collection_name=qdrant_collection,
#         embeddings=embedding.encode
#     )
# else:
print(f"Creating new Qdrant collection '{qdrant_collection}' from {len(documents)} documents")
# Create the directory if it doesn't exist
qdrant_path.mkdir(parents=True, exist_ok=True)

client = QdrantClient(path=str(qdrant_path))

client.create_collection(
    collection_name=qdrant_collection,
    vectors_config=models.VectorParams(
        size=embedding.get_sentence_embedding_dimension(),  # Vector size is defined by used model
        distance=models.Distance.COSINE,
    )
)
client.upload_points(
    collection_name=qdrant_collection,
    points=[
        models.PointStruct(
            id=idx, 
            vector=embedding.encode(doc.page_content).tolist(),  # Use doc.page_content instead of doc["page_content"]
            payload=doc.metadata  # Assuming you want to store metadata as the payload
        )
        for idx, doc in enumerate(documents)
    ],
)

# Load the documents into a Qdrant Vector Database Collection
# this will save locally in the qdrant_path as sqlite
qdrant = Qdrant.from_documents(
    documents=documents,
    embedding=embedding.encode,
    path=str(qdrant_path),
    collection_name=qdrant_collection,
)


Some weights of the model checkpoint at dunzhang/stella_en_400M_v5 were not used when initializing NewModel: ['new.pooler.dense.bias', 'new.pooler.dense.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Creating new Qdrant collection 'rubin_telescope_stella' from 13340 documents




In [4]:
client.delete_collection(collection_name=qdrant_collection)
print(f"Deleted collection '{qdrant_collection}'.")

Deleted collection 'rubin_telescope_stella'.


In [None]:
from qdrant_client import QdrantClient

# Connect to the Qdrant client
qdrant_path = "/workspaces/Rubin-RAG/resources/rubin_qdrant_stella"
client = QdrantClient(path=qdrant_path)

collection_info = client.get_collection(collection_name="rubin_telescope_stella")
print("Collection info:", collection_info)


In [4]:
## NOTE DOES NOT RUN ON CODESPACES

from testcontainers.qdrant import QdrantContainer
import os
from pathlib import Path
from langchain_qdrant import Qdrant
from qdrant_client import models 
from sentence_transformers import SentenceTransformer

# Setup the embedding model
embedding =  SentenceTransformer(
                "dunzhang/stella_en_400M_v5",
                trust_remote_code=True,
                device="cpu",
                config_kwargs={"use_memory_efficient_attention": False, "unpad_inputs": False}
            )

qdrant = QdrantContainer()
qdrant.start()
client = qdrant.get_client()

qdrant_path = Path("/workspaces/Rubin-RAG/resources/rubin_qdrant_stella")
qdrant_collection = "rubin_telescope_stella"

print(f"Creating new Qdrant collection '{qdrant_collection}' from {len(documents)} documents")
# Create the directory if it doesn't exist
qdrant_path.mkdir(parents=True, exist_ok=True)

client.create_collection(
    collection_name=qdrant_collection,
    vectors_config=models.VectorParams(
        size=embedding.get_sentence_embedding_dimension(),  # Vector size is defined by used model
        distance=models.Distance.COSINE,
    )
)
client.upload_points(
    collection_name=qdrant_collection,
    points=[
        models.PointStruct(
            id=idx, 
            vector=embedding.encode(doc.page_content).tolist(),  # Use doc.page_content instead of doc["page_content"]
            payload=doc.metadata  # Assuming you want to store metadata as the payload
        )
        for idx, doc in enumerate(documents)
    ],
)

# Load the documents into a Qdrant Vector Database Collection
# this will save locally in the qdrant_path as sqlite
qdrant = Qdrant.from_documents(
    documents=documents,
    embedding=embedding.encode,
    path=str(qdrant_path),
    collection_name=qdrant_collection,
)

  from tqdm.autonotebook import tqdm, trange


modules.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/397 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/170k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/51.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/892 [00:00<?, ?B/s]

configuration.py:   0%|          | 0.00/7.13k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/dunzhang/stella_en_400M_v5:
- configuration.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling.py:   0%|          | 0.00/57.5k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/dunzhang/stella_en_400M_v5:
- modeling.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/1.74G [00:00<?, ?B/s]

Some weights of the model checkpoint at dunzhang/stella_en_400M_v5 were not used when initializing NewModel: ['new.pooler.dense.bias', 'new.pooler.dense.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/186 [00:00<?, ?B/s]

2_Dense_1024/config.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.20M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/4.20M [00:00<?, ?B/s]

Pulling image testcontainers/ryuk:0.8.1
Container started: 9acbf56ca0fc
Waiting for container <Container: 9acbf56ca0fc> with image testcontainers/ryuk:0.8.1 to be ready ...
Pulling image qdrant/qdrant:v1.8.3
Container started: 1413224b2226
Waiting for container <Container: 1413224b2226> with image qdrant/qdrant:v1.8.3 to be ready ...
Waiting for container <Container: 1413224b2226> with image qdrant/qdrant:v1.8.3 to be ready ...
Waiting for container <Container: 1413224b2226> with image qdrant/qdrant:v1.8.3 to be ready ...


Creating new Qdrant collection 'rubin_telescope_stella' from 13340 documents




KeyboardInterrupt: 

In [1]:
from langchain.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine

# Initialize LangChain embedding model
lc_embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2")

# Initialize Sentence Transformers model
st_model = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2")

# Define a sample text
sample_text = "This is a test sentence for comparison."

# Get embeddings
lc_embedding_vector = lc_embedding.embed_query(sample_text)  # LangChain
st_embedding_vector = st_model.encode(sample_text).tolist()  # SentenceTransformer

# Compare embeddings
cosine_similarity = 1 - cosine(lc_embedding_vector, st_embedding_vector)
print("Cosine Similarity between embeddings:", cosine_similarity)

# Optionally, check value-by-value difference
difference = [abs(a - b) for a, b in zip(lc_embedding_vector, st_embedding_vector)]
max_difference = max(difference)
print("Maximum difference in embedding values:", max_difference)


  from tqdm.autonotebook import tqdm, trange
  lc_embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2")


Cosine Similarity between embeddings: 1.0
Maximum difference in embedding values: 0.0
