# RAG + HR Program

In [3]:
# !pip install langchain
# !pip install langchain-openai
# !pip install langchain-community
# !pip install chromadb
# !pip install python-dotenv

In [4]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate
from langchain.chains import RetrievalQA

In [5]:
load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("Missing OPENAI_API_KEY in environment")

llm = ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo", temperature=0)

In [6]:
loader = TextLoader("hr_policy_long.txt")
docs = loader.load()

In [7]:
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_docs = splitter.split_documents(docs)

In [8]:
print(f"Number of chunks created: {len(split_docs)}")

Number of chunks created: 10


In [9]:
embeddings = OpenAIEmbeddings(api_key=api_key)
vectordb = Chroma.from_documents(split_docs, embeddings, persist_directory="./hr_chroma_db")

In [10]:
retriever = vectordb.as_retriever()

In [11]:
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an HR assistant. Use the following HR policy context to answer the question at the end.
If the answer is not in the document, respond with 'I don't know.'

Context:
{context}

Question:
{question}

Answer:
"""
)

In [12]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt}
)

In [14]:
query = "What is the company policy on overtime pay?"
response = qa_chain.invoke(query)


print("Q:", query)
print("A:", response)

Q: How many sick days do employees get each year?
A: {'query': 'How many sick days do employees get each year?', 'result': 'Employees receive 5 sick days per year.'}
