In [None]:
import getpass
import os

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API key: ")

In [None]:
from langchain_google_genai.llms import GoogleGenerativeAI

model = GoogleGenerativeAI(model="gemini-2.0-flash")

In [None]:
# Text Loader
from langchain_community.document_loaders import TextLoader

loader = TextLoader("./gemini.txt")
loader.load()

[Document(metadata={'source': './gemini.txt'}, page_content="An overview of the Gemini app\nWe have long seen the potential of AI to make information and computing more accessible and useful to people. We have made pioneering advances on large language models (LLMs) and have seen great progress across Google and in this field more broadly. For several years, we have applied LLMs in the background to improve many of our products, such as autocompleting sentences in Gmail, expanding Google Translate, and helping us better understand queries in Google Search. We continue using LLMs for many Google services, as well as to power the Gemini app, which allows people to collaborate directly with generative AI. We want the Gemini app to be the most helpful and personal AI assistant, giving users direct access to Google’s latest AI models.\n\nWhile we’re at an important inflection point and encouraged by the widespread excitement around generative AI, it’s still early days for this technology. T

In [None]:
# Web Loader
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://gemini.google/overview/")
loader.load()

[Document(metadata={'source': 'https://gemini.google/overview/', 'title': 'What is Gemini and how it works', 'description': "Learn about Gemini: its capabilities, how it works, and its limitations. Explore Google's approach to AI, from model training to safety guidelines and future developments.", 'language': 'en'}, page_content="\n\n\n\nWhat is Gemini and how it works\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSkip to main content\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nWhat Gemini Can Do\n\n\n\n\n\n\n\nGemini Live\n\n\nImage Generation\n\n\nVideo Generation\n\n\nDeep Research\n\n\nPersonalization\n\n\nCanvas\n\n\nApps\n\n\nGems\n\n\nGemini in Chrome\n\n\nLong Context\n\n\n\n\n\n\nSubscriptions\n\n\nAbout Gemini\n\n\n\n\n\n\n\nOverview\n\n\nOur Approach\n\n\nPolicy Guidelines\n\n\nLatest News\n\n\n\n\n\n\n\n\nTry Gemini\n\n\n\n\n\n\n\n\n\nIntroduction\nWhat Gemini is\nHow Gemini works\nLimitations\nWhat’s next\n\n\n\nIntroduction\nWhat Gemini i

In [None]:
# PDF Loader
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("./sagemaker.pdf")
pages = loader.load()
len(pages)

16

In [None]:
# Text Splitter
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)
splitted_docs = splitter.split_documents(pages)
len(splitted_docs)

35

In [None]:
# Text Splitter for programming languages
from langchain_text_splitters import (
    Language,
    RecursiveCharacterTextSplitter,
)

code = """
def faktorial(n):
    if n == 0:
        return 1
    else:
        return n * faktorial(n - 1)
faktorial(5)
"""
python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, chunk_size=50, chunk_overlap=0
)
python_docs = python_splitter.create_documents([code])
len(python_docs)

3

In [None]:
from langchain_text_splitters import Language

markdown_text = """
LangChain is a framework for building LLM-powered applications. It helps you chain
together interoperable components and third-party integrations to simplify AI
application development —  all while future-proofing decisions as the underlying
technology evolves.

```bash
pip install -U langchain
```

To learn more about LangChain, check out
[the docs](https://python.langchain.com/docs/introduction/). If you’re looking for more
advanced customization or agent orchestration, check out
[LangGraph](https://langchain-ai.github.io/langgraph/), our framework for building
controllable agent workflows.
"""

md_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0
)
md_docs = md_splitter.create_documents(
    [markdown_text], [{"source": "https://www.langchain.com"}]
)
md_docs[0]

Document(metadata={'source': 'https://www.langchain.com'}, page_content='LangChain is a framework for building LLM-powered')

In [None]:
# Generate text embeddings
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings

embeddings_model = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-exp-03-07"
)
vector = embeddings_model.embed_documents(
    [
        "LangChain is a framework for building LLM-powered applications.",
        "It helps you chain together interoperable components and third-party integrations.",
        "To learn more about LangChain, check out the docs.",
    ]
)
vector[0][:10]

[-0.013569727540016174,
 0.007698915898799896,
 0.004195104818791151,
 -0.0801468938589096,
 -0.009787925519049168,
 0.0188269205391407,
 -0.01276074256747961,
 0.010045362636446953,
 0.021251028403639793,
 0.006386178079992533]

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)
chunks = text_splitter.split_documents(pages)

embeddings = embeddings_model.embed_documents([chunk.page_content for chunk in chunks])
len(embeddings)

35

In [None]:
len(chunks)

6

In [None]:
# Database connection
from langchain_postgres.vectorstores import PGVector

loader = TextLoader("./gemini.txt")
chunks = text_splitter.split_documents(loader.load())
chunks

[Document(metadata={'source': './gemini.txt'}, page_content='An overview of the Gemini app\nWe have long seen the potential of AI to make information and computing more accessible and useful to people. We have made pioneering advances on large language models (LLMs) and have seen great progress across Google and in this field more broadly. For several years, we have applied LLMs in the background to improve many of our products, such as autocompleting sentences in Gmail, expanding Google Translate, and helping us better understand queries in Google Search. We continue using LLMs for many Google services, as well as to power the Gemini app, which allows people to collaborate directly with generative AI. We want the Gemini app to be the most helpful and personal AI assistant, giving users direct access to Google’s latest AI models.'),
 Document(metadata={'source': './gemini.txt'}, page_content='While we’re at an important inflection point and encouraged by the widespread excitement aroun

In [None]:
connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"
db = PGVector.from_documents(chunks, embeddings_model, connection=connection)
db

<langchain_postgres.vectorstores.PGVector at 0x75bf93eb6dd0>

In [None]:
db.similarity_search("test", k=4)

[Document(id='b7922c37-e14e-4924-9884-74576655a242', metadata={'page': 8, 'title': 'Amazon SageMaker - User Guide', 'author': 'AWS', 'source': './sagemaker.pdf', 'creator': 'ZonBook XSL Stylesheets with Apache FOP', 'keywords': 'Amazon SageMaker, next-generation-sagemaker', 'producer': 'Apache FOP Version 2.6', 'page_label': '6', 'total_pages': 16, 'creationdate': '2025-06-13T16:55:55+00:00'}, page_content='Amazon SageMaker User Guide\nGet started with Amazon SageMaker\nYou can view demos of Amazon SageMaker and get started by setting up a domain and project.\nView demos of Amazon SageMaker\nTo see Amazon SageMaker before using it yourself, you can review the following clickthrough \ndemos:\n• For an end-to-end demo, see the Amazon SageMaker detailed clickthrough experience. This \ndemo includes Amazon SageMaker Lakehouse, Amazon SageMaker Catalog, and more in Amazon \nSageMaker Uniﬁed Studio.\n• For a demo of Amazon SageMaker Lakehouse, see Amazon SageMaker: Access data in your \nlake