In [7]:
!pip install openai langchain langchain-community langchain-openai langchain-qdrant qdrant-client python-dotenv


In [None]:

from pathlib import Path
from dotenv import load_dotenv
from openai import OpenAI

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient



In [11]:

# Configuration
PDF_PATH = "python.pdf"
COLLECTION_NAME = "learning_langchain-hyde"
QDRANT_URL = "http://localhost:6333"
THRESHOLD = 0.50
QUERY = "How to loop over two lists together?"

# Initialize clients
key="" # insert your Open Ai key here
# initilize embedding using openai model
embedder = OpenAIEmbeddings(api_key=key,model="text-embedding-3-small")
qdrant_client = QdrantClient( url=QDRANT_URL)
openai_client = OpenAI(api_key=key)
system_prompt = """
You are an AI Assistant who can take users' Python documentation queries and answer how to perform operations in Python.
The user might be a newbie and may ask vague or worded-differently queries. Answer them properly.
Your answers must be short and correct.
"""


In [12]:

# Loads a PDF file from the given path and prepares it for embedding.
def load_and_split_pdf(pdf_path: str):
    loader = PyPDFLoader(file_path=pdf_path)
    docs = loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
    return docs  # or splitter.split_documents(docs) if document is too big


In [13]:

# insert splitted docs into databse(e.g.Qdrant)
def inject_into_qdrant(split_docs):
    print("⚙️ Injecting documents into Qdrant...")
    vector_store = QdrantVectorStore.from_documents(
        documents=[],
        url=QDRANT_URL,
        collection_name=COLLECTION_NAME,
        embedding=embedder,
        force_recreate=True
    )
    vector_store.add_documents(documents=split_docs)
    print("✅ Injection done!")


In [14]:

# Performs a semantic search on the vector database using the provided query.
def search_with_threshold(query: str):
    retriever = QdrantVectorStore.from_existing_collection(
        url=QDRANT_URL,
        collection_name=COLLECTION_NAME,
        embedding=embedder
    )
    results = retriever.similarity_search_with_score(query, k=3)
    # Filters out results that have a similarity score lower than the defined threshold.
    filtered = [(doc, score) for doc, score in results if score >= THRESHOLD]

    if filtered:
        print("\n✅ Relevant Chunks Found:\n")
        for doc, score in filtered:
            print(f"Score: {score:.2f}")
            print("Chunk:", doc.page_content)
            print("-" * 50)
    else:
        print("\n❌ No relevant data found for this query.")


In [15]:

#  generated hypothetical answer for user query
def generate_hypothetical_answer(query: str) -> str:
    response = openai_client.chat.completions.create(
        model="gpt-4",
        messages=[
            { "role": "system", "content": system_prompt },
            { "role": "user", "content": query }
        ]
    )
    answer = response.choices[0].message.content
    print("\n🤖 Hypothetical Answer (HyDE):\n", answer)
    return answer


In [16]:

# Step 1: Load & Inject PDF
docs = load_and_split_pdf(PDF_PATH)
inject_into_qdrant(docs)

# Step 2: Search without HyDE
print("\n🔍 Searching without HyDE:")
search_with_threshold(QUERY)

# Step 3: Generate Hypothetical Answer via LLM
hypo_query = generate_hypothetical_answer(QUERY)

# Step 4: Search again with HyDE-enhanced query
print("\n🔍 Searching with HyDE-enhanced query:")
search_with_threshold(hypo_query)


Ignoring wrong pointing object 14 0 (offset 0)


⚙️ Injecting documents into Qdrant...
✅ Injection done!

🔍 Searching without HyDE:

❌ No relevant data found for this query.

🤖 Hypothetical Answer (HyDE):
 You can loop over two lists together using the `zip()` function in Python. Here's an example:

```python
list1 = ['a', 'b', 'c']
list2 = [1, 2, 3]
for item1, item2 in zip(list1, list2):
    print(item1, item2)
```

This will print:
```
a 1
b 2
c 3
```
Please note that if the lists are not of the same length, `zip()` stops creating tuples when the first list ends.

🔍 Searching with HyDE-enhanced query:

✅ Relevant Chunks Found:

Score: 0.52
Chunk: "The
 
zip()
 
function
 
in
 
Python
 
allows
 
you
 
to
 
iterate
 
over
 
multiple
 
iterables
 
(like
 
lists)
 
in
 
parallel."
 
 
--------------------------------------------------
