In [5]:
import nest_asyncio

nest_asyncio.apply()

In [6]:
import qdrant_client

# Collections in Qdrant are like tables in databases, where each collection can hold a set of vectors. 
# Here, "chat_with_docs" is intended to store document embeddings to support query-based information retrieva
collection_name="chat_with_docs"

# below initializes a QdrantClient instance, connecting it to a Qdrant server running locally.
client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)

In [7]:
#SimpleDirectoryReader scans a directory, filters for specific file types, and loads document content into a format we can work with.
from llama_index.core import SimpleDirectoryReader 

input_dir_path = './docs'

loader = SimpleDirectoryReader(
            input_dir = input_dir_path,
            required_exts=[".pdf"],
            recursive=True
        )

#load_data() method is used to read the PDF file’s content and return it in a structured format, storing it in docs list
docs = loader.load_data()

In [8]:
type(docs), len(docs)

(list, 32)

In [9]:
docs[0].get_content()

'Preprint\nDSP Y: C OMPILING DECLARATIVE LANGUAGE\nMODEL CALLS INTO SELF -IMPROVING PIPELINES\nOmar Khattab,1 Arnav Singhvi,2\nParidhi Maheshwari,4 Zhiyuan Zhang,1\nKeshav Santhanam,1 Sri Vardhamanan,6 Saiful Haq,6\nAshutosh Sharma,6 Thomas T. Joshi,7 Hanna Moazam,8\nHeather Miller,3,9 Matei Zaharia,2 Christopher Potts1\n1Stanford University, 2UC Berkeley, 3Carnegie Mellon University,\n4Amazon Alexa AI, 5Dashworks Technologies, Inc.,\n6IIT Bombay, 7Calera Capital, 8Microsoft, 9Two Sigma Investments\nokhattab@cs.stanford.edu\nABSTRACT\nThe ML community is rapidly exploring techniques for prompting language mod-\nels (LMs) and for stacking them into pipelines that solve complex tasks. Un-\nfortunately, existing LM pipelines are typically implemented using hard-coded\n“prompt templates”, i.e. lengthy strings discovered via trial and error. Toward a\nmore systematic approach for developing and optimizing LM pipelines, we intro-\nduce DSPy, a programming model that abstracts LM pipelines as

In [10]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex, ServiceContext, StorageContext
# from llama_index.core.node_parser import SimpleNodeParser


def create_index(documents):

    vector_store = QdrantVectorStore(client=client,
                                     collection_name=collection_name)
    

    # # Configure the node parser with desired chunk size and overlap
    # node_parser = SimpleNodeParser.from_defaults(chunk_size=512, chunk_overlap=50)

    # # Create a service context with the custom node parser
    # service_context = ServiceContext.from_defaults(node_parser=node_parser)

    #By default, LlamaIndex splits documents into chunks of 1024 tokens with an overlap of 20 tokens
    
    #we configure storage settings by specifying the above vector_store as the storage backend.
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    
    #Finally, we create an index by embedding each document in documents and storing it in the Qdrant vector store.
    index = VectorStoreIndex.from_documents(documents,
                                            storage_context=storage_context)
    


    return index

In [11]:
# now, we will create the embeddings and store them in the Qdrant vector store.
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                   trust_remote_code=True)

#embed_model as the default embedding model in Settings. This setting ensures that the same model is used throughout our RAG pipeline to maintain consistency in embedding generation.
Settings.embed_model = embed_model

index = create_index(docs)

In [14]:
Settings.embed_model

HuggingFaceEmbedding(model_name='BAAI/bge-large-en-v1.5', embed_batch_size=10, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x0000013CB1C4FC90>, num_workers=None, max_length=512, normalize=True, query_instruction=None, text_instruction=None, cache_folder=None, show_progress_bar=False)

In [12]:
from llama_index.llms.ollama import Ollama

llm = Ollama(model="llama3.2:1b", request_timeout=120.0)

Settings.llm = llm

In [13]:
from llama_index.core import PromptTemplate

template = """Context information is below:
              ---------------------
              {context_str}
              ---------------------
              Given the context information above I want you to think
              step by step to answer the query in a crisp manner,
              incase you don't know the answer say 'I don't know!'
            
              Query: {query_str}
        
              Answer:"""

qa_prompt_tmpl = PromptTemplate(template)

In [14]:
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2", 
    top_n=3
)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [15]:
#engine should retrieve the top 10 most similar document chunks based on vector similarity to the query.
query_engine = index.as_query_engine(similarity_top_k=10,
                                     node_postprocessors=[rerank])

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

response = query_engine.query("What exactly is DSPy?")

In [16]:
from IPython.display import Markdown, display

display(Markdown(str(response)))

DSPy stands for Declarative Signatures and Parameterized Significance, which refers to a programming model used for natural language processing (NLP) tasks. It's an approach that allows users to define the structure of their prompts or questions using natural language signatures, rather than writing explicit code for each prompt.

In simpler terms, DSPy provides a way to abstract and automate the process of prompting Natural Language Models (LMs), such as those used in chatbots, text analysis, and other NLP tasks. This is achieved by defining a set of declarative instructions or signatures that specify how to transform input data into output results.

These signatures are represented as tuples of fields, each with its own metadata, which include information about the field's purpose, description, and any optional constraints on its value. By using DSPy, developers can write concise and expressive code that defines their NLP tasks, rather than relying on explicit programming or scripting.

The key features of DSPy include:

1. Natural language signatures: Define prompts or questions as declarative instructions using natural language.
2. Structured formatting: Use placeholders for fields to indicate the expected input format.
3. Optional constraints: Specify limits on field values, such as integer types or specific data formats.
4. Parameterized significance: Allow users to re-use and combine DSPy signatures across different tasks.

Overall, DSPy provides a powerful tool for building and automating NLP workflows, making it easier to develop complex natural language processing tasks with minimal coding effort.