<a href="https://colab.research.google.com/github/vanshika1302/GrociFun/blob/main/Chatbot_HR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

!pip install langchain openai chromadb pandas

In [8]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
import os

In [9]:
!pip install -U langchain-community



In [10]:
import getpass
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

Enter your OpenAI API key: ··········


In [11]:
sample_text = """
Company HR Policy:

1. Employees are entitled to 20 days of paid annual leave.
2. Sick leave requires a medical certificate if longer than 2 days.
3. Work from home is allowed 2 days per week with manager approval.
4. Payroll is processed on the 25th of each month.
5. Overtime will be compensated as per company guidelines.
"""

with open("hr_policy.txt", "w") as f:
    f.write(sample_text)

In [12]:
loader = TextLoader("hr_policy.txt")
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
chunks = splitter.split_documents(docs)

print(f"Number of chunks: {len(chunks)}")
print(chunks[0].page_content)

Number of chunks: 3
Company HR Policy:


In [13]:
!pip install chromadb



In [14]:
from huggingface_hub import notebook_login
notebook_login()  # will ask you to paste your token

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [15]:
!pip install huggingface_hub transformers



In [16]:
!pip install sentence-transformers transformers accelerate scipy



In [17]:
# 1) Read the chunks we created earlier (if you used the splitter, use that list 'chunks')
# If you followed earlier steps you have `chunks` that are Document objects with .page_content
# For safety, we'll create a simple chunks list from the file if not present:

try:
    texts = [c.page_content for c in chunks]   # if you already created `chunks` via LangChain
except NameError:
    with open("hr_policy.txt", "r") as f:
        txt = f.read()
    # small manual split (for tiny doc)
    texts = [txt]

# 2) Load a sentence-transformer model for embeddings
from sentence_transformers import SentenceTransformer
embed_model = SentenceTransformer("all-MiniLM-L6-v2")   # small, fast, good for semantics

# 3) Compute embeddings for each chunk
embeddings = embed_model.encode(texts, convert_to_numpy=True)
print("Loaded", len(texts), "chunks and computed embeddings; embedding_dim =", embeddings.shape[1])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Loaded 3 chunks and computed embeddings; embedding_dim = 384


In [18]:
import numpy as np
from scipy.spatial.distance import cdist

def retrieve_top_k(question, k=2):
    """
    - embeds the question
    - computes cosine similarity with document chunk embeddings
    - returns top-k chunk texts (most similar)
    """
    q_emb = embed_model.encode([question], convert_to_numpy=True)  # shape (1, dim)
    # compute cosine distances between question and all chunks (cdist gives distances; 1 - similarity)
    distances = cdist(q_emb, embeddings, metric="cosine")[0]       # shape (n_chunks,)
    # smaller distance => more similar. get k smallest distances
    topk_idx = np.argsort(distances)[:k]
    topk_texts = [texts[i] for i in topk_idx]
    topk_scores = [1 - float(distances[i]) for i in topk_idx]     # convert to similarity score
    return topk_texts, topk_scores

In [19]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# load the small Flan-T5 text2text model (runs in Colab)
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# create a text2text pipeline for generation
generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=0 if __import__("torch").cuda.is_available() else -1)

def answer_question(question, k=2, max_length=256):
    # 1. retrieve relevant context
    top_texts, scores = retrieve_top_k(question, k=k)
    # 2. build prompt: provide context + question (clear instruction)
    context_text = "\n".join([f"Context {i+1}:\n{t}" for i, t in enumerate(top_texts)])
    prompt = "You are an assistant that answers HR policy questions based on the provided context.\n\n"
    prompt += f"{context_text}\n\n"
    prompt += f"Question: {question}\nAnswer concisely:"
    # 3. generate an answer from the model
    out = generator(prompt, max_new_tokens=max_length, do_sample=False)
    return out[0]["generated_text"], scores

# Quick local test
q = "When is payroll processed?"
ans, sims = answer_question(q, k=2)
print("Similarity scores:", sims)
print("Answer:\n", ans)

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cpu


Similarity scores: [0.6995278764780954, 0.2607640868064236]
Answer:
 25th of each month


In [20]:
questions = [
    "What are important documents employee should submit while onboarding?",
    "What is the sick leave rule?",
    "Can I work from home more than 2 days?",
    "When is payroll processed?"
]

for q in questions:
    ans, sims = answer_question(q, k=2)
    print("Q:", q)
    print("Similarity:", sims)
    print("A:", ans.strip(), "\n" + "-"*60)

Q: What are important documents employee should submit while onboarding?
Similarity: [0.22635944046703682, 0.10504236923941102]
A: Payroll 
------------------------------------------------------------
Q: What is the sick leave rule?
Similarity: [0.6863082285132348, 0.3384559336543933]
A: Sick leave requires a medical certificate if longer than 2 days. 
------------------------------------------------------------
Q: Can I work from home more than 2 days?
Similarity: [0.5124700198850847, 0.22876836508725862]
A: Yes 
------------------------------------------------------------
Q: When is payroll processed?
Similarity: [0.6995278764780954, 0.2607640868064236]
A: 25th of each month 
------------------------------------------------------------


fastapi
uvicorn[standard]
gradio
sentence-transformers
transformers
torch
chromadb
requests
python-multipart

In [21]:
%%writefile requirements.txt
fastapi
uvicorn[standard]
gradio
sentence-transformers
transformers
torch
chromadb
requests
python-multipart

Writing requirements.txt


In [22]:
pip install -r requirements.txt



In [24]:
# ingest.py
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import os
import pickle

# 1. Load text
loader = TextLoader("hr_policy.txt", encoding="utf-8")
docs = loader.load()   # list of Document objects (page_content)

# 2. Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=100)
chunks = splitter.split_documents(docs)
texts = [c.page_content for c in chunks]

# 3. Embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedder.encode(texts, convert_to_numpy=True)

# 4. Persist embeddings + texts (simple)
os.makedirs("vector_data", exist_ok=True)
# Save texts
with open("vector_data/texts.pkl", "wb") as f:
    pickle.dump(texts, f)
# Save embeddings (numpy)
import numpy as np
np.save("vector_data/embeddings.npy", embeddings)

print("Ingested:", len(texts), "chunks. Embeddings saved in vector_data/")

Ingested: 1 chunks. Embeddings saved in vector_data/


In [26]:
# app/chat_backend.py
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from scipy.spatial.distance import cdist
import pickle, os

# Load persisted data
# BASE = os.path.dirname(os.path.dirname(__file__))  # project root when run from app/
BASE = "/content" # Assuming the data is saved in /content/vector_data
texts_path = os.path.join(BASE, "vector_data", "texts.pkl")
emb_path = os.path.join(BASE, "vector_data", "embeddings.npy")

with open(texts_path, "rb") as f:
    TEXTS = pickle.load(f)
EMBEDDINGS = np.load(emb_path)

# Load embedding model for queries (same model so vectors comparable)
EMBED_MODEL = SentenceTransformer("all-MiniLM-L6-v2")

# Load generator (Flan-T5)
MODEL_NAME = "google/flan-t5-small"
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)
MODEL = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
GENERATOR = pipeline("text2text-generation", model=MODEL, tokenizer=TOKENIZER, device=0 if __import__("torch").cuda.is_available() else -1)

def retrieve_top_k(question, k=2):
    q_emb = EMBED_MODEL.encode([question], convert_to_numpy=True)  # (1, dim)
    distances = cdist(q_emb, EMBEDDINGS, metric="cosine")[0]       # (n_chunks,)
    topk_idx = np.argsort(distances)[:k]
    top_texts = [TEXTS[i] for i in topk_idx]
    top_scores = [1 - float(distances[i]) for i in topk_idx]
    return top_texts, top_scores

def build_prompt(contexts, question):
    # strict prompt: only use context
    prompt = "You are an assistant that answers questions using ONLY the provided context. If answer not present, say 'I don't know based on the provided documents.'\n\n"
    for i, c in enumerate(contexts, 1):
        prompt += f"[Context {i}]\n{c}\n\n"
    prompt += f"Question: {question}\nAnswer concisely and cite the context like [Context 1]."
    return prompt

def answer_question(question, k=2, max_length=256):
    contexts, scores = retrieve_top_k(question, k=k)
    prompt = build_prompt(contexts, question)
    out = GENERATOR(prompt, max_length=max_length, do_sample=False)
    answer = out[0]["generated_text"]
    return {"answer": answer, "sources": [{"text": contexts[i], "score": scores[i], "idx": i} for i in range(len(contexts))]}

Device set to use cpu


In [27]:
# Test the answer_question function
question = "What is the sick leave policy?"
response = answer_question(question)
print("Answer:", response["answer"])
print("Sources:", response["sources"])

Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Answer: 1. Employees are entitled to 20 days of paid annual leave. 2. Sick leave requires a medical certificate if longer than 2 days. 3. Work from home is allowed 2 days per week with manager approval. 4. Payroll is processed on the 25th of each month. 5. Overtime will be compensated as per company guidelines.
Sources: [{'text': 'Company HR Policy:\n\n1. Employees are entitled to 20 days of paid annual leave.\n2. Sick leave requires a medical certificate if longer than 2 days.\n3. Work from home is allowed 2 days per week with manager approval.\n4. Payroll is processed on the 25th of each month.\n5. Overtime will be compensated as per company guidelines.', 'score': 0.6614773052440638, 'idx': 0}]


In [50]:
# app/main.py
from fastapi import FastAPI
from pydantic import BaseModel
# from app.chat_backend import answer_question # Remove this line
# from X6zU2KstllfW import answer_question # Import directly from the cell where it's defined


app = FastAPI(title="HR RAG Chatbot")

class Query(BaseModel):
    question: str
    k: int = 2

@app.post("/chat")
async def chat(q: Query):
    res = answer_question(q.question, k=q.k)
    return res

 # uvicorn app.main:app --reload --port 8000

In [None]:
# Run the FastAPI application with uvicorn
# This will expose the application to the internet via ngrok (automatically handled by Colab)
!uvicorn __main__:app --reload --port 8000

[32mINFO[0m:     Will watch for changes in these directories: ['/content']
[32mINFO[0m:     Uvicorn running on [1mhttp://127.0.0.1:8000[0m (Press CTRL+C to quit)
[32mINFO[0m:     Started reloader process [[36m[1m12033[0m] using [36m[1mWatchFiles[0m
[31mERROR[0m:    Error loading ASGI app. Attribute "app" not found in module "__main__".


In [None]:
import gradio as gr
import requests

# Define the FastAPI endpoint URL
# In Colab, this will be the public URL provided by ngrok when you run FastAPI
# You'll need to get this URL after running the uvicorn cell (bc02b01d)
FASTAPI_URL = "http://127.0.0.1:8000" # Replace with your actual URL from the uvicorn output

def get_answer_from_backend(question, k):
    """Sends a question to the FastAPI backend and returns the response."""
    try:
        response = requests.post(f"{FASTAPI_URL}/chat", json={"question": question, "k": k})
        response.raise_for_status() # Raise an exception for bad status codes
        data = response.json()
        answer = data.get("answer", "No answer found.")
        sources = data.get("sources", [])
        source_text = "\n\nSources:\n" + "\n".join([f"- Context {s['idx'] + 1}: {s['text']}" for s in sources])
        return answer + source_text
    except requests.exceptions.RequestException as e:
        return f"Error communicating with backend: {e}"

# Create the Gradio interface
iface = gr.Interface(
    fn=get_answer_from_backend,
    inputs=[
        gr.Textbox(label="Ask a question about the HR Policy"),
        gr.Slider(minimum=1, maximum=5, value=2, step=1, label="Number of source chunks (k)")
    ],
    outputs=gr.Textbox(label="Answer"),
    title="HR Policy Chatbot"
)

# Launch the Gradio app
iface.launch(inline=True)

**Note:** After running the cell above, you will see a public URL generated by Gradio. You can use this URL to access your chatbot interface in a separate browser tab. Remember to replace `"YOUR_FASTAPI_URL_HERE"` in the code with the actual URL provided by ngrok when you run the FastAPI application in cell `bc02b01d`.