In [17]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.chat_models import ChatPerplexity
from langchain.chains import RetrievalQA
import os

#Load
loader = PyMuPDFLoader("data/Huy_Bui_Resume.pdf")
documents = loader.load()

#Split
splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
chunks = splitter.split_documents(documents)

#Embbedding
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False} #False Euclidean, True cosine similarity
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

#Vector Store
vector_store = FAISS.from_documents(chunks, hf)
vector_store.save_local("faiss_index_open")

#Retriever
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

#LLM
llm = ChatPerplexity(
    model="sonar",
    pplx_api_key = "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",
    temperature=0.2
)

# QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    return_source_documents=True
)


  hf = HuggingFaceEmbeddings(
  llm = ChatPerplexity(


In [18]:
query = "How many times do the vowels 'ae' appear in 'I am a data analyst and engineer'?"
query = "What are Huy Bui’s certifications from the resume?"
query = "Dont make up the answer if you dont know. Question: What are Huy Bui's top skills?"
response = qa_chain.invoke(query)

In [19]:
query = "How many times do the vowels 'ae' appear in 'I am a data analyst and engineer'?"
# query = "What are Huy Bui’s certifications from the resume?"
# query = "What are Huy Bui's top skills?"
response = qa_chain.invoke(query)

print("Response", response['result'])
print("############################")
for i, doc in enumerate(response['source_documents']):
    print(f"######DOC {i+1}######")
    print(doc)

Response Based on the provided context, Huy Bui's top skills appear to be in **data science and programming**, specifically:

- **Programming Languages**: Python, JavaScript, SQL, and React.
- **Data Science Tools**: Pandas, Numpy, Scikit-learn, BigQuery, and AWS services like S3 and Lambda.
- **Data Analysis and Modeling**: Regression models, time series analysis, and hybrid models.
- **Cloud and Infrastructure**: AWS, OpenAI API, and CI/CD tools like TeamCity and GitHub.
- **Leadership and Team Management**: Leading cross-functional teams and mentoring.

However, if you are referring to a different Huy Bui, I don't have enough information to provide their specific skills.
############################
######DOC 1######
page_content='Huy Bui
williamhuybui@gmail.com | linkedin.com/in/huy-bui-ds
Experience
Publicis Groupe
Remote
Senior Data Scientist
June 2022 – Present
– Developed 7 client-agnostic applications using Python and Dash, empowering 100+ analysts to streamline data
analysis 

In [20]:
import os 

In [21]:
os.listdir()

['.DS_Store',
 'indexing.ipynb',
 'pdf_qa_simple.ipynb',
 'faiss_index_open',
 '.git',
 'data']