In [65]:
from dotenv import load_dotenv
import os

DATA_DIR="./data"
load_dotenv() 

if not os.getenv("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = os.getenv("GEMINI_API_KEY")

In [81]:
# Let's first load the document
from langchain_community.document_loaders import PyPDFLoader, TextLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

file_path = DATA_DIR + "/_1_cv_yuri_barsotti_mendes.pdf"
pdf_loader = PyPDFLoader(file_path,
                    mode="single")

txt_loader = DirectoryLoader(DATA_DIR, glob='**/*.txt', loader_cls=TextLoader)
pdf_doc = pdf_loader.load()
txt_docs = txt_loader.load()

# Joining all docs since I have more than one type
all_docs = pdf_doc + txt_docs

# It's a good practice to break the text into smaller chinks
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
split_docs = splitter.split_documents(all_docs)


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [84]:
# embedding text
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_chroma import Chroma
from uuid import uuid4
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Cannot use This AIEmbedding for now, quota excceeded
# embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07")
vector_store = Chroma(
    collection_name="yuri_data_1",
    embedding_function=embeddings,
    persist_directory="./chroma_db",
)

# Is this a good idea?
ids = [str(uuid4()) for _ in range(len(split_docs))]

vector_store.add_documents(documents=split_docs, ids=ids)

['46fe028f-ef63-4456-9fe4-1dc059d0feb1',
 '5f9c8f9b-631c-4e84-8657-5e1d4918f939',
 'f3db0539-aa7a-47b4-b3c9-5eb86231b132',
 '5e7ffff5-cb2f-4f73-89d7-d1052f4ff05c',
 '96b71abd-2ab0-4f49-964a-0c78b952c7fc',
 '80b94b9b-3347-4690-919f-c496558a58a5',
 '642685ad-a13c-4707-aa0a-ec81761c5a69',
 'cd0e980b-f243-4164-8f2b-b23a959f0511',
 '3863a8be-5e5f-4cff-9b1d-b7420a8d943a',
 'a5feb1f4-0ca2-4266-81a2-89f10de7bd00',
 'c2d3a3d1-9c43-4510-9b43-fd619a2d39e4',
 '5dd98d63-19fa-42a1-b38b-c04dd9f7be55',
 '443d76b4-3f48-4157-8037-a4785f280443',
 'd1ba12ce-3d6f-4bb9-a165-e284c3c4664a',
 'a3e6c840-74e2-4cc2-8805-13eccf6db33d',
 'd28fa955-74a1-46ae-8679-73ebf4d6fd11',
 '78bca169-9520-40e7-9952-30ae2c93cd6d',
 '5971b9c6-1a58-4e1c-adab-03ef2bf45926',
 '93813d20-84d9-4fb3-99fb-03dfe19b64c7',
 'e939226c-b721-4af6-80f6-200bd2876991',
 '2f9de72c-e65b-47f4-8080-a541b9bef75c',
 'cecde43c-a745-4ce0-99b3-3f81db28c3ac',
 '7a5c3d85-92ea-4644-abdd-3cb85be1d91c',
 '7b9bd238-cced-447e-94b4-2c8f54894b25',
 '0925a7a8-1896-

In [106]:
# Query data
retriever = vector_store.as_retriever(search_kwargs={"k": 4})


Description: Developed during a game jam, URUBU is a 3D horror game set in Brazil with PS1-style visuals and an eerie, immersive atmosphere. The game was voted the winner of the jam and featured in a full article: https://www.nerdmaldito.com/2024/03/urubu-terror-ambientado-no-brasil.html. As a team, we built a complete MVP demo featuring a gripping intro, gameplay loop, and narrative hook for future expansion. This was my first 3D game, where I learned key concepts in game architecture and
Role: Designed and developed the entire game loop using Godot and GDScript, focusing on performance and gameplay feel. Adapted external assets and built the scene structure from the ground up.
Tech stack: ['Godot', 'Aseprite', 'GDScript']
Category: Game
------------------------------------------
Title: URUBU – Brazilian Horror Game
Company: Gamejam Team (noname)
Date: Aug 2024 - Sep 2024
Education
FullCycle University, Remote Jan 2024 - Dec 2025
MBA in Software Architecture
Anhembi Morumbi University

In [112]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain import hub

system_instructions = ''
with open("./llm_instructions/system_prompt.txt", "r") as llm_instructions_f:
    system_instructions = llm_instructions_f.read()

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            system_instructions,
        ),
        ("human", "{input}"),
    ]
)

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0.5,
    max_tokens=300, # Reduced a bit, it's talking too much
    max_retries=2
)

combine_docs_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=prompt
)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

answer = retrieval_chain.invoke({
    "input": "How many years of experience does he have with Python?"
})
print(answer)


{'input': 'How many years of experience he has with Python?', 'context': [Document(id='cd0e980b-f243-4164-8f2b-b23a959f0511', metadata={'source': 'data/tech_experience.txt'}, page_content='Python: 6 years\nJavaScript: 6 years\nTypeScript: 5 years\nGolang: 1 years\nDjango: 5 years\nNestJS: 4 years\nReact: 5 years\nNext.js: 4 years\nNode.js: 5 years\nAWS: 4 years\nDocker: 5 years\nCI/CD: 5 years\nRabbitMQ: 2 years\nRelational database: 7 years\nRedis: 5 years\nUML: 7 years\nC4: 5 years\nDDD: 4 years\nClean Architecture: 4 years\nMicroservices: 1 years'), Document(id='443d76b4-3f48-4157-8037-a4785f280443', metadata={'source': 'data/projects.txt'}, page_content="Role: Focused on hands-on implementations and collaborative decision-making with the team to define architectural standards and optimize project structures. Developed automation scripts in Python to streamline repetitive tasks, enhancing team efficiency and delivery speed. Recognized for proactive contributions, high-quality delive