In [1]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.chat_models import ChatPerplexity
from langchain.chains import RetrievalQA
import os

#Loader
loader = PyMuPDFLoader("data\Huy_Bui_Resume.pdf")
documents = loader.load()

#Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
chunks = text_splitter.split_documents(documents)

#Embbedding
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False} #False Euclidean, True cosine similarity
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

#Vector Store
vector_store = FAISS.from_documents(chunks, hf)
vector_store.save_local("faiss_index_open")

#Retriever
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

#LLM
llm = ChatPerplexity(
    model="sonar",
    pplx_api_key = "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",
    temperature=0.2
)

# QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    return_source_documents=True
)

query = "How many times do the vowels 'ae' appear in 'I am a data analyst and engineer'?"
query = "What are Huy Bui’s certifications from the resume?"
query = "Dont make up the answer if you dont know. Question: What are Huy Bui's top skills?"
response = qa_chain.invoke(query)

print("Response", response['result'])
print("############################")
for i, doc in enumerate(response['source_documents']):
    print(f"######DOC {i+1}######")
    print(doc)

  hf = HuggingFaceEmbeddings(
  llm = ChatPerplexity(


Response Based on the provided information, Huy Bui's top skills appear to be in **data science and programming**, particularly in:

- **Python programming**: Utilizing libraries like Pandas, Numpy, and Scikit-learn.
- **Data analysis and modeling**: Developing regression models and leveraging machine learning techniques.
- **Cloud computing**: Proficient in AWS services such as S3 and Lambda.
- **Database management**: Skilled in SQL and BigQuery.
- **Web development**: Experienced with React and JavaScript for UI/UX enhancements.
- **Leadership and team management**: Proven ability to lead cross-functional teams in fast-paced environments.

However, if you are referring to a different Huy Bui, such as the professor at Parsons or the barber, their skills would be different and not detailed in the provided context.
############################
######DOC 1######
page_content='Huy Bui
williamhuybui@gmail.com | linkedin.com/in/huy-bui-ds
Experience
Publicis Groupe
Remote
Senior Data Scien

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import TokenTextSplitter
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import FAISS
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.retrievers import MultiQueryRetriever
from langchain_community.llms import Ollama
from langchain_community.chat_models import ChatPerplexity
import os

# 1. Document Loading
loader = PyPDFLoader("data/Huy_Bui_Resume.pdf")
documents = loader.load()

# 2. Text Splitting
text_splitter = TokenTextSplitter(chunk_size=5000, chunk_overlap=500)
chunks = text_splitter.split_documents(documents)

# 3. Embeddings
model_name = "BAAI/bge-base-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
hf = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# 4. Vector Store (should replace with other model - Chromadb)
vector_store = FAISS.from_documents(chunks, hf)
vector_store.save_local("faiss_index_open")

# 5. Prompt Template (Need to fix)
prompt_template = """
You are an expert at analyzing resumes. Answer based on this context:
{context}

Question: {question}
If the answer isn't in the context, say "I don't know"."""
PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

# 6. Retrieval Setup
base_retriever = vector_store.as_retriever(search_kwargs={"k": 5})
qretriever_llm = Ollama(model="llama3")

# Perplexity LLM
perplexity_llm = ChatPerplexity(
    model="sonar",  
    pplx_api_key="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",
    temperature=0.2
)

# Contextual compression (using multi_retriever or base_retriever)
compressor = LLMChainExtractor.from_llm(perplexity_llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=base_retriever
)

# 7. QA Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=perplexity_llm,
    chain_type="stuff",
    retriever=compression_retriever,
    chain_type_kwargs={"prompt": PROMPT},
    return_source_documents=True
)

# Test queries
queries = [
    "What are Huy Bui's certifications?",
    "What are Huy Bui's top skills?",
    "How many years of experience does Huy Bui have?"
]

for query in queries:
    print(f"\nQuery: {query}")
    try:
        response = qa_chain({"query": query})  
        print("Answer:", response['result'])
        for i, doc in enumerate(response['source_documents']):
            print(f"\nSource {i+1}:")
            print(doc.page_content)
    except Exception as e:
        print(f"Error processing query: {e}")


Query: What are Huy Bui's certifications?
Answer: Based on the context provided, there is no mention of Huy Bui's certifications in the available sources. The information about a "Bootcamp certification in Data Science from Flatiron School" is not referenced in the provided links or summaries[1][2][3]. If you are referring to a specific Huy Bui, please provide more details.

**Answer:**  
I don't know. The available context does not specify Huy Bui's certifications. If you are referring to a different Huy Bui, please clarify.

Source 1:
There is no specific mention of Huy Bui's certifications in the provided context. However, based on other available information, Huy Bui has a **Bootcamp certification in Data Science from Flatiron School**[1]. If you are referring to a different Huy Bui, please provide more context or details.

Query: What are Huy Bui's top skills?
Answer: Huy Bui's top skills, based on his resume, include:

- **Programming and Data Science Languages:** Python (with l