In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.runnables import chain
from typing import List
from dotenv import load_dotenv
import os

In [16]:
load_dotenv(override=True)

True

## Documents and Document Loaders

In [17]:
documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

In [18]:
file_path = "./example_data/langchain_test_file.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))

3


In [19]:
print(f"{docs[0].page_content[:200]}\n")
print(docs[0].metadata)

1 Introduction to LangChain
LangChain is an open-source framework designed to simplify the development
of applications powered by large language models (LLMs). It provides a mod-
ular and flexible str

{'producer': 'xdvipdfmx (20220710)', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-10-02T15:38:43+00:00', 'source': './example_data/langchain_test_file.pdf', 'total_pages': 3, 'page': 0, 'page_label': '1'}


## Splitting

In [84]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

all_splits

[Document(metadata={'producer': 'xdvipdfmx (20220710)', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-10-02T15:38:43+00:00', 'source': './example_data/langchain_test_file.pdf', 'total_pages': 3, 'page': 0, 'page_label': '1', 'start_index': 0}, page_content='1 Introduction to LangChain\nLangChain is an open-source framework designed to simplify the development\nof applications powered by large language models (LLMs). It provides a mod-\nular and flexible structure for integrating LLMs with external tools, memory,\nand data sources, enabling developers to create context-aware and interactive\napplications.\n1.1 Core Components of LangChain\nLangChain’s architecture revolves around several key components:\n• LLM Wrappers: Interfaces to interact with various language models, such\nas those from OpenAI, Hugging Face, or Anthropic.\n• Prompt Templates: Tools for creating dynamic prompts to guide LLM re-\nsponses.\n• Memory: Mechanisms to maintain context across interactions, such a

In [85]:
# embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [86]:
vector_1 = embedding_model.embed_query(all_splits[0].page_content)
vector_2 = embedding_model.embed_query(all_splits[1].page_content)

assert len(vector_1) == len(vector_2)
print(f"Generated vectors of length {len(vector_1)}\n")
print(vector_1[:10])

Generated vectors of length 384

[-0.0013769008219242096, -0.04395429044961929, 0.008399590849876404, -0.02527850866317749, 0.06378486007452011, -0.05110172927379608, -0.012214490212500095, 0.05130786448717117, 0.04954077675938606, -0.06765234470367432]


## Vector stores

In [87]:
vector_store = InMemoryVectorStore(embedding_model)

In [88]:
ids = vector_store.add_documents(documents=all_splits)
ids

['dd7a8902-260f-4c94-9817-bf2e2425ae14',
 '594e0b46-c616-4436-9fec-3a12628bb1ef',
 '21e365ae-5bd0-4976-8fdc-9ed234cc7fec',
 'f9244bf7-a99d-41fe-9fc5-c5d45471ac7a',
 '01aacc8e-1ec1-4ebc-98a8-ebf210e36c27',
 '9e9a1f11-57d2-486f-b3f0-e0c71bcdbc6d']

In [89]:
results = vector_store.similarity_search(
    "What is Prompt Templates?"
)

print(results[0])

page_content='1 Introduction to LangChain
LangChain is an open-source framework designed to simplify the development
of applications powered by large language models (LLMs). It provides a mod-
ular and flexible structure for integrating LLMs with external tools, memory,
and data sources, enabling developers to create context-aware and interactive
applications.
1.1 Core Components of LangChain
LangChain’s architecture revolves around several key components:
• LLM Wrappers: Interfaces to interact with various language models, such
as those from OpenAI, Hugging Face, or Anthropic.
• Prompt Templates: Tools for creating dynamic prompts to guide LLM re-
sponses.
• Memory: Mechanisms to maintain context across interactions, such as con-
versation history.
• Tools and Agents: Integrations with external APIs, databases, or search
engines, allowing LLMs to perform tasks like web searches or calculations.
• Chains: Sequences of operations that combine prompts, LLMs, and tools to
achieve complex 

In [90]:
results = vector_store.similarity_search_with_score("What is Prompt Templates?")
doc, score = results[0]
print(f"Score: {score}\n")
print(doc)

Score: 0.32155087494172074

page_content='1 Introduction to LangChain
LangChain is an open-source framework designed to simplify the development
of applications powered by large language models (LLMs). It provides a mod-
ular and flexible structure for integrating LLMs with external tools, memory,
and data sources, enabling developers to create context-aware and interactive
applications.
1.1 Core Components of LangChain
LangChain’s architecture revolves around several key components:
• LLM Wrappers: Interfaces to interact with various language models, such
as those from OpenAI, Hugging Face, or Anthropic.
• Prompt Templates: Tools for creating dynamic prompts to guide LLM re-
sponses.
• Memory: Mechanisms to maintain context across interactions, such as con-
versation history.
• Tools and Agents: Integrations with external APIs, databases, or search
engines, allowing LLMs to perform tasks like web searches or calculations.
• Chains: Sequences of operations that combine prompts, LLMs, a

## Retrievers