# FastAPI Docs RAG

In [1]:
from dotenv import load_dotenv

load_dotenv()

True

## Get all Tutorials URLs from FastAPI Learn

In [4]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def get_fastapi_docs_urls_requests():
    base_url = "https://fastapi.tiangolo.com/learn/"
    print("Carregando a página com a biblioteca 'requests'...")
    try:
        response = requests.get(base_url, headers={'User-Agent': 'Mozilla/5.0'})
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'lxml')
    except requests.exceptions.RequestException as e:
        print(f"Erro ao carregar a página: {e}")
        return []

    # Encontra o menu de navegação principal
    nav_container = soup.find("nav", {"class": "md-nav--primary"})
    if not nav_container:
        print("Falha: Menu de navegação primário não encontrado.")
        return []

    urls = set() # Usar um set para evitar duplicatas automaticamente

    # Itera sobre todas as seções do tutorial e guia avançado
    sections_to_find = ["Tutorial - User Guide", "Advanced User Guide"]
    for section_text in sections_to_find:
        span = nav_container.find("span", class_="md-ellipsis", string=lambda t: t and section_text in t.strip())
        if span:
            # Navega até o <li> pai e encontra todos os links dentro dele
            container = span.find_parent('li')
            if container:
                for item in container.find_all("a", class_="md-nav__link"):
                    href = item.get("href")
                    if href:
                        full_url = urljoin("https://fastapi.tiangolo.com/", href)
                        urls.add(full_url)

    unique_urls = sorted(list(urls)) # Converte para lista e ordena
    print(f"Encontradas {len(unique_urls)} URLs únicas para carregar.")
    return unique_urls

In [5]:
urls_to_load = get_fastapi_docs_urls_requests()
urls_to_load

Carregando a página com a biblioteca 'requests'...
Encontradas 78 URLs únicas para carregar.


['https://fastapi.tiangolo.com/advanced/',
 'https://fastapi.tiangolo.com/advanced/additional-responses/',
 'https://fastapi.tiangolo.com/advanced/additional-status-codes/',
 'https://fastapi.tiangolo.com/advanced/advanced-dependencies/',
 'https://fastapi.tiangolo.com/advanced/async-tests/',
 'https://fastapi.tiangolo.com/advanced/behind-a-proxy/',
 'https://fastapi.tiangolo.com/advanced/custom-response/',
 'https://fastapi.tiangolo.com/advanced/dataclasses/',
 'https://fastapi.tiangolo.com/advanced/events/',
 'https://fastapi.tiangolo.com/advanced/generate-clients/',
 'https://fastapi.tiangolo.com/advanced/middleware/',
 'https://fastapi.tiangolo.com/advanced/openapi-callbacks/',
 'https://fastapi.tiangolo.com/advanced/openapi-webhooks/',
 'https://fastapi.tiangolo.com/advanced/path-operation-advanced-configuration/',
 'https://fastapi.tiangolo.com/advanced/response-change-status-code/',
 'https://fastapi.tiangolo.com/advanced/response-cookies/',
 'https://fastapi.tiangolo.com/advanc

## Load Content of URLs

In [4]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

bs4_strainer = bs4.SoupStrainer(class_=("md-content"))
loader = WebBaseLoader(
    web_paths=urls_to_load,
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [5]:
for i in range(len(docs)):
    print(f"Total characters: {len(docs[i].page_content)}")

Total characters: 4850
Total characters: 12194
Total characters: 5516
Total characters: 3513
Total characters: 14585
Total characters: 10751
Total characters: 2542
Total characters: 2351
Total characters: 1793
Total characters: 9209
Total characters: 582
Total characters: 270213
Total characters: 12940
Total characters: 2276
Total characters: 5578
Total characters: 4040
Total characters: 3181
Total characters: 11530
Total characters: 3734
Total characters: 28049
Total characters: 9841
Total characters: 1088
Total characters: 788
Total characters: 10078
Total characters: 4353
Total characters: 14267
Total characters: 12133
Total characters: 3117
Total characters: 1295
Total characters: 14593


In [6]:
docs[1].page_content



## Indexing Data

### Splitting in Chunks

In [7]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    add_start_index=True
)
split_docs = splitter.split_documents(docs)
split_docs[0]

Document(metadata={'source': 'https://fastapi.tiangolo.com/tutorial/', 'start_index': 10}, page_content='FastAPI\n  \n\n\n\n\n\n    Learn\n  \n\n\n\n\n\n    Tutorial - User Guide\n  \n\n\n\n\n\nTutorial - User Guide¶\nThis tutorial shows you how to use FastAPI with most of its features, step by step.\nEach section gradually builds on the previous ones, but it\'s structured to separate topics, so that you can go directly to any specific one to solve your specific API needs.\nIt is also built to work as a future reference so you can come back and see exactly what you need.\nRun the code¶\nAll the code blocks can be copied and used directly (they are actually tested Python files).\nTo run any of the examples, copy the code to a file main.py, and start fastapi dev with:\n\n$ <font color="#4E9A06">fastapi</font> dev <u style="text-decoration-style:solid">main.py</u>\n\n  <span style="background-color:#009485"><font color="#D3D7CF"> FastAPI </font></span>  Starting development server 🚀')

### Embedding Model

In [8]:
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings

llm_embeddings = GoogleGenerativeAIEmbeddings(model='models/gemini-embedding-001')

### Vector Store

In [9]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="fastapi_docs",
    embedding_function=llm_embeddings,
    persist_directory="./chroma_fastapi_db",
)

In [None]:
import time

def add_documents_in_batches(vector_store, documents, batch_size=100, delay_seconds=20):
    """
    Adiciona documentos ao vector_store em lotes para evitar erros de limite de taxa.
    """
    for i in range(0, len(documents), batch_size):
        batch = documents[i:i + batch_size]

        print(f"Processando lote de {i} a {i + len(batch)}...")

        vector_store.add_documents(batch)

        print(f"Lote processado. Aguardando {delay_seconds} segundos...")

        # Pausa a execução para respeitar o limite da API
        time.sleep(delay_seconds)

try:
    add_documents_in_batches(vector_store, split_docs, 20, 35)
    print("Todos os documentos foram adicionados com sucesso!")
except Exception as e:
    print(f"Ocorreu um erro durante o processamento: {e}")

Processando lote de 0 a 5...
Lote processado. Aguardando 20 segundos...
Processando lote de 5 a 10...
Lote processado. Aguardando 20 segundos...
Processando lote de 10 a 15...
Lote processado. Aguardando 20 segundos...
Processando lote de 15 a 20...
Lote processado. Aguardando 20 segundos...
Processando lote de 20 a 25...
Lote processado. Aguardando 20 segundos...
Processando lote de 25 a 30...
Lote processado. Aguardando 20 segundos...
Processando lote de 30 a 35...
Lote processado. Aguardando 20 segundos...
Processando lote de 35 a 40...
Lote processado. Aguardando 20 segundos...
Processando lote de 40 a 45...
Lote processado. Aguardando 20 segundos...
Processando lote de 45 a 50...
Lote processado. Aguardando 20 segundos...
Processando lote de 50 a 55...
Lote processado. Aguardando 20 segundos...
Processando lote de 55 a 60...
Lote processado. Aguardando 20 segundos...
Processando lote de 60 a 65...
Lote processado. Aguardando 20 segundos...
Processando lote de 65 a 70...
Lote proce

## ChatModel

In [20]:
from langchain.chat_models import init_chat_model

llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")

## Prompt

In [21]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "(context goes here)", "question": "(question goes here)"}
).to_messages()

assert len(example_messages) == 1
print(example_messages[0].content)



You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: (question goes here) 
Context: (context goes here) 
Answer:


In [25]:
from langchain.chains.retrieval_qa.base import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(llm, retriever=vector_store.as_retriever())

pergunta = "How can i add security, authentication and authorization in my API?"
resultado = qa_chain.invoke({ "query" : pergunta})
print(resultado)

{'query': 'How can i add security, authentication and authorization in my API?', 'result': "I'm sorry, but the provided context does not contain information on how to add security, authentication, or authorization to your FastAPI API."}
