In [1]:
import os
import getpass
from openai import OpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

In [2]:
if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

client = OpenAI()

Enter your OpenAI API key:  ········


In [3]:
persist_directory = './persist'

# Load the Chroma vector store
chroma_vs = Chroma(persist_directory=persist_directory, embedding_function=OpenAIEmbeddings())

  chroma_vs = Chroma(persist_directory=persist_directory, embedding_function=OpenAIEmbeddings())


In [8]:
query = "What's the maternity leave policy?"

results = chroma_vs.similarity_search(query=query)

for result in results:
    print(result.page_content+"\n")

In [5]:
def get_response(messages):
    """
    Generate a response using the chat completions API
    """
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error generating response: {e}")
        return None

In [6]:
def append_message(messages, role, content):
    """Append a message to the conversation history."""
    messages.append({"role": role, "content": content})
    return messages

In [7]:
# Obtain answer from RAG
rag_results = chroma_vs.similarity_search(query=query)
rag_result = rag_results[0].page_content

# Initialize conversation
messages = [
    {"role": "system", "content": "You are an assistant summarizing policies into employee-friendly explanations."},
    {"role": "user", "content": rag_result}
]

response = get_response(messages)
messages = append_message(messages, "assistant", response)

IndexError: list index out of range

In [None]:
response

In [None]:
rag_result

In [None]:
query_2 = "What if I am adopting a child?"
messages = append_message(messages, "user", query_2)
get_response(messages)

In [None]:
rag_results_aux = chroma_vs.similarity_search(query=query_2)
rag_results_aux[0].page_content

In [None]:
query_2 = "What if I am adopting a child?"
query_2 += "\n" + rag_results_aux[0].page_content


messages = append_message(messages, "user", query_2)
get_response(messages)

In [None]:
# Initialize conversation
messages = [
    {"role": "system", "content": "You are an assistant summarizing policies into employee-friendly explanations."}
]
while True:
    # Let the user ask a question
    query = input()

    # Get the most similar answer from the RAG
    rag_results = chroma_vs.similarity_search(query=query)
    rag_result = rag_results[0].page_content # Get the most similar one

    # Concatenate the user query with the rag response
    final_query = query + "\n" + rag_result

    # Append user message
    messages = append_message(messages, "user", final_query)

    # Get response from the chatbot and add it to the message to continue the context
    response = get_response(messages)
    messages = append_message(messages, "assistant", response)
    print(response+"\n")

In [None]:
# STARTS HERE

In [16]:
# Load the collection
from typing import List
from langchain_core.documents import Document

def load_chroma_collection(name: str, directory: str) -> Chroma:
    """
    Load an existing Chroma collection.

    Args:
        name (str): Name of the collection.
        directory (str): Directory where the collection is persisted.

    Returns:
        Chroma: The loaded Chroma vectorstore.
    """
    persist_directory = os.path.join(directory, name)
    if not os.path.exists(persist_directory):
        raise ValueError(f"Collection '{name}' does not exist in '{directory}'.")

    embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))

    collection = Chroma(
        collection_name=name,
        embedding_function=embeddings,
        persist_directory=persist_directory
    )
    return collection


# Load retriever from the collection
def load_retriever_from_collection(
    collection_name: str,
    search_type: str = "similarity_score_threshold",
    score_threshold: float = 0.3,
    top_k: int = 5
):
    """
    Load a retriever from a Chroma collection with configurable retrieval behavior.

    Args:
        collection_name (str): Name of the Chroma collection.
        search_type (str): Retrieval type (similarity_score_threshold or mmr).
        score_threshold (float): Minimum similarity score for retrieval.
        top_k (int): Number of documents to return.

    Returns:
        Retriever: Configured retriever.
    """

    # Load the persisted collection
    collection = load_chroma_collection(name=collection_name, directory="./persist")
    
    # Build retriever with configurable behavior
    retriever = collection.as_retriever(
        search_type=search_type,
        search_kwargs={
            "score_threshold": score_threshold,
            "k": top_k
        }
    )
    return retriever

# load retirever with metadata filtering
def load_retriever_with_metadata_from_collection(
    collection_name: str,
    search_type: str = "similarity_score_threshold",
    score_threshold: float = 0.3,
    top_k: int = 5,
    metadata_filter: dict = None
):
    """
    Load a retriever from a Chroma collection with configurable retrieval behavior
    and optional metadata filtering.

    Args:
        collection_name (str): Name of the Chroma collection.
        search_type (str): Retrieval type ("similarity_score_threshold" or "mmr").
        score_threshold (float): Minimum similarity score for retrieval.
        top_k (int): Number of documents to return.
        metadata_filter (dict): Optional filter, e.g. {"source": "assets/documents/vacation-policy.pdf"}

    Returns:
        Retriever: Configured retriever.
    """
    collection = load_chroma_collection(name=collection_name, directory="./persist")
    
    retriever = collection.as_retriever(
        search_type=search_type,
        search_kwargs={
            "score_threshold": score_threshold,
            "k": top_k,
            "filter": metadata_filter  # <-- apply metadata filter
        }
    )
    return retriever


# Retrieve with expanded queries
def retrieve_with_expanded_queries(
    collection_name: str,
    queries: List[str],
    search_type: str = "similarity_score_threshold",
    score_threshold: float = 0.3,
    top_k: int = 5,
    metadata_filter: dict = None
) -> List[Document]:
    """
    Retrieve relevant documents from a Chroma collection using one or more expanded queries.

    Args:
        collection_name (str): Name of the Chroma collection.
        queries (List[str]): List of queries, e.g., original query + expanded terms.
        search_type (str): Retrieval type ("similarity_score_threshold" or "mmr").
        score_threshold (float): Minimum similarity score.
        top_k (int): Number of documents to return per query.
        metadata_filter (dict): Optional metadata filter.

    Returns:
        List[Document]: Aggregated, deduplicated documents.
    """
    retriever = load_retriever_from_collection(
        collection_name=collection_name,
        search_type=search_type,
        score_threshold=score_threshold,
        top_k=top_k
    )
    
    results = []
    for q in queries:
        docs = retriever.get_relevant_documents(q)
        results.extend(docs)
    
    # Deduplicate by source or content
    unique_results = {d.metadata.get("source", d.page_content): d for d in results}
    return list(unique_results.values())

# Simple query expansion function
def expand_query(query: str, n_terms: int = 5) -> list[str]: 
    """ Use LLM to generate related terms for query expansion. """ 
    client = OpenAI() 
    prompt = f""" 
        Generate {n_terms} synonyms of the core word/phrase of the following query for use in document retrieval. 
        Keep them short, noun-phrases. Query: "{query}" """ 
    
    response = client.chat.completions.create( model="gpt-4o-mini", messages=[{"role":"user","content": prompt}], max_tokens=100 ) 
    text = response.choices[0].message.content.strip() 
    
    return [t.strip("-• ") for t in text.split("\n") if t.strip()] 

In [34]:
query = "Maternity leave policy"
expanded_terms = expand_query(query, n_terms=3)
all_queries = [query] + expanded_terms

retriever = load_retriever_from_collection(
    collection_name="benefits_collection",
    score_threshold=0.6,
    top_k=3
)

docs = retriever.get_relevant_documents(query)

for i, doc in enumerate(docs, 1):
    print(f"\nResult {i}:")
    print(doc.page_content[:300], "...")
    print("Metadata:", doc.metadata)


Result 1:
of paid maternity leave, while non-birth parents receive six weeks of paid paternity leave. adoptive parents receive eight weeks of paid leave that can be shared between both parents. employees must have been with the company for at least 12 months to qualify for paid parental leave, though unpaid l ...
Metadata: {'source': 'assets/documents/childcare-policy.pdf'}

Result 2:
for ﬁnding specialized care providers in the community. this policy is eﬀective as of [current date] and may be modiﬁed as business needs and legal requirements change. employees will receive 30 days advance notice of any signiﬁcant changes to childcare beneﬁts. for speciﬁc questions about your situ ...
Metadata: {'source': 'assets/documents/childcare-policy.pdf'}

Result 3:
launch periods, and other critical business periods that will be communicated to employees at least 60 days in advance. while we try to minimize blackout periods, these restrictions help ensure we can meet our commitments to clients 

In [None]:
# Initialize conversation
messages = [
    {"role": "system", "content": "You are an assistant summarizing policies into employee-friendly explanations."}
]
while True:
    # Let the user ask a question
    query = input()

    # Get the most similar answer from the RAG
    rag_results = retriever.get_relevant_documents(query)
    rag_result = rag_results[0].page_content # Get the most similar one

    # Concatenate the user query with the rag response
    final_query = query + "\n" + rag_result

    # Append user message
    messages = append_message(messages, "user", final_query)

    # Get response from the chatbot and add it to the message to continue the context
    response = get_response(messages)
    messages = append_message(messages, "assistant", response)
    print(response+"\n")

 Maternity leave policy


**Maternity Leave Policy Summary**

At our company, we offer supportive parental leave options to help new parents transition during this important time.

- **Paid Leave**: 
  - **Birth Parents**: Eligible employees can take 12 weeks of paid maternity leave.
  - **Non-Birth Parents**: These employees are entitled to six weeks of paid paternity leave.
  - **Adoptive Parents**: They can receive eight weeks of paid leave, which can be shared between partners.

- **Eligibility**: To qualify for paid leave, you must have been with the company for at least 12 months. However, if you've been with us for less time, you may have options for unpaid leave under FMLA guidelines.

- **Benefits During Leave**: Your health insurance and other benefits remain active while you're on leave, and we guarantee you a spot in our on-site childcare center when you return.

- **Return-to-Work Flexibility**: We understand the transition back to work can be challenging, so we offer flexible return-to-work arrang

 what if I want to adopt a child?


**Adoption Support Policy Summary**

If you are considering adopting a child, our company is here to support you through the process. Here’s what you need to know:

- **Adoption Assistance**: We provide up to $5,000 in reimbursement for adoption-related expenses. This can help cover costs like agency fees and legal services.

- **Referrals**: We offer referrals to attorneys who specialize in adoption, ensuring you have access to knowledgeable support during the legal aspects of the process.

- **Travel for International Adoptions**: If you’re adopting internationally, you may be eligible for additional unpaid leave for necessary travel. We also offer limited reimbursement for travel expenses associated with the adoption.

- **Support for Nursing Mothers**: For those who may be nursing after bringing a child home, we have dedicated lactation rooms equipped with hospital-grade pumps, refrigeration, and comfortable seating. These rooms can be reserved online to ensure privacy.

- **Flexib

 What if I adopt a dog?


**Pet Adoption Support Policy Summary**

While our company has comprehensive support for human adoptions, we understand that bringing a pet into your family is also a significant commitment. Here's how we are here to support you if you adopt a dog:

- **Pet Ownership Support**: While we don't offer specific reimbursement for pet adoption expenses like we do for human adoptions, we encourage our employees to take advantage of our flexible work schedules to help acclimate their new pet to their home environment.

- **Flexible Work Arrangements**: If you're adopting a dog, you may consider utilizing hybrid work options or flexible hours to make the transition smoother for both you and your new pet.

- **Pet-Friendly Culture**: We aim to foster a pet-friendly workplace culture. Check with your manager about policies regarding pets in the office or any pet-related events our company may host.

We are committed to promoting a healthy work-life balance and understanding the joys and responsib