In [25]:
from langchain.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI

loader = PyPDFLoader("app/assets/certifications.pdf")
docs = loader.load_and_split()

In [26]:
docs[0]

Document(metadata={'producer': 'www.ilovepdf.com', 'creator': 'Microsoft® Word 2016', 'creationdate': '2025-07-27T02:17:26+00:00', 'author': 'html-to-docx', 'keywords': 'html-to-docx', 'moddate': '2025-07-27T02:17:26+00:00', 'source': 'app/assets/certifications.pdf', 'total_pages': 15, 'page': 0, 'page_label': '1'}, page_content='Cisco Networking Certifications \nCisco Certified Support Technician (CCST) \n\uf0b7 Description (from official site): \n"The Cisco Certified Support Technician (CCST) certification validates foundational knowledge in \nnetworking, cybersecurity, and data analytics and is ideal for those starting their IT careers." \n\uf0b7 Official Vendor Name & Price: \nCisco: $125 \n\uf0b7 Training Providers: \no Learning Tree \nLink: https://www.learningtree.ca/courses/cisco-certified-support-technician-ccst-training/ \nPrice: $2,195 \no Udemy \nLink: https://www.udemy.com/course/cisco-certified-support-technician/ \nPrice: $14.99–$49.99 \nCisco Certified Network Associate

In [27]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.8)

embeddings = OpenAIEmbeddings()

In [28]:
from langchain_chroma import Chroma
chroma_db = Chroma.from_documents(
    documents=docs, 
    embedding=embeddings, 
    persist_directory="data", 
    collection_name="certificates1"
)

In [29]:
query = "What is this document about?"

In [30]:
docs = chroma_db.similarity_search(query)
print(docs)

[Document(id='f943e6e1-9722-453a-91b1-5e2fd24a2105', metadata={'creationdate': '2025-07-27T02:17:26+00:00', 'keywords': 'html-to-docx', 'page_label': '15', 'producer': 'www.ilovepdf.com', 'page': 14, 'author': 'html-to-docx', 'total_pages': 15, 'source': 'app/assets/certifications.pdf', 'moddate': '2025-07-27T02:17:26+00:00', 'creator': 'Microsoft® Word 2016'}, page_content='o GreyCampus \nLink: https://www.greycampus.com/ \nPrice: ~$600–$900'), Document(id='714decad-7cc4-403b-9ab0-61620c942a26', metadata={'keywords': 'html-to-docx', 'moddate': '2025-07-27T02:17:26+00:00', 'producer': 'www.ilovepdf.com', 'source': 'app/assets/certifications.pdf', 'total_pages': 15, 'author': 'html-to-docx', 'creationdate': '2025-07-27T02:17:26+00:00', 'page_label': '11', 'creator': 'Microsoft® Word 2016', 'page': 10}, page_content='\uf0b7 Description (from official site): \n"Proves expertise in designing cloud and hybrid solutions that run on Microsoft Azure, including \ncompute, network, storage, moni

In [47]:
prompt = """

    You help the user let them choose a certification based on the context provided. You are not allowed to use any knowledge other than the context provided.
    Answer the user query based on the context and analyzie the chat history when required.
    if the answer dosent exist in the context and chat history then say i dont know
    if the answer is related to chat history then give an appropriate answer.

    Format answers in proper format with links, prices and resources from the context provided.
    If you dont have information about the query which user asked. In the context provided then you should say i dont know.

    give response in a proper and clean markdown format.
    
    context:{context}

    """

source_docs = chroma_db.similarity_search(query=query)

print(source_docs)

[Document(id='383ba26b-b860-49ee-9f4e-e25e4a6568c5', metadata={'creator': 'Microsoft® Word 2016', 'creationdate': '2025-07-27T02:17:26+00:00', 'keywords': 'html-to-docx', 'source': 'app/assets/certifications.pdf', 'moddate': '2025-07-27T02:17:26+00:00', 'page': 13, 'author': 'html-to-docx', 'producer': 'www.ilovepdf.com', 'total_pages': 15, 'page_label': '14'}, page_content='o Simplilearn \nLink: https://www.simplilearn.com/certified-ethical-hacker-ceh-certification-course \nPrice: $1,099 \no Udemy \nLink: https://www.udemy.com/courses/search/?q=ceh \nPrice: $14.99–$49.99 \nOSCP (Offensive Security Certified Professional) \n\uf0b7 Description (from official site): \n"The OSCP is a hands-on penetration testing certification for professionals who want to prove their \npractical and technical skills." \n\uf0b7 Official Vendor Name & Price: \nOffensive Security: $1,599 (exam & 90 days lab access) \n\uf0b7 Training Providers: \no Hacker101 \nLink: https://www.hacker101.com/ \nPrice: Free \n

In [48]:
from langchain_core.messages import AIMessage, HumanMessage
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

template = ChatPromptTemplate.from_messages([
        ("system", prompt),
        MessagesPlaceholder("history"),
        ("human", "{question}")
    ])

chat_history = []


In [49]:
query = "Which certification has the most affordable training options?"
source_docs = chroma_db.similarity_search(query=query)
print(source_docs)

[Document(id='383ba26b-b860-49ee-9f4e-e25e4a6568c5', metadata={'creator': 'Microsoft® Word 2016', 'page_label': '14', 'moddate': '2025-07-27T02:17:26+00:00', 'page': 13, 'author': 'html-to-docx', 'keywords': 'html-to-docx', 'total_pages': 15, 'creationdate': '2025-07-27T02:17:26+00:00', 'source': 'app/assets/certifications.pdf', 'producer': 'www.ilovepdf.com'}, page_content='o Simplilearn \nLink: https://www.simplilearn.com/certified-ethical-hacker-ceh-certification-course \nPrice: $1,099 \no Udemy \nLink: https://www.udemy.com/courses/search/?q=ceh \nPrice: $14.99–$49.99 \nOSCP (Offensive Security Certified Professional) \n\uf0b7 Description (from official site): \n"The OSCP is a hands-on penetration testing certification for professionals who want to prove their \npractical and technical skills." \n\uf0b7 Official Vendor Name & Price: \nOffensive Security: $1,599 (exam & 90 days lab access) \n\uf0b7 Training Providers: \no Hacker101 \nLink: https://www.hacker101.com/ \nPrice: Free \n

In [50]:
res = llm.invoke(template.format(question=query, history=chat_history, context=source_docs))
print(res.content)

The certification with the most affordable training options based on the provided context is the **CompTIA Advanced Security Practitioner (CASP+)**. The training for CASP+ is available on Udemy for a price range of $14.99–$49.99. The official vendor price for the CASP+ certification is $494. 

You can find more information about the CASP+ certification and training options at the following links:
- [Official Vendor Price and Description](https://www.comptia.org/certifications/comptia-advanced-security-practitioner)
- [Udemy Training Options](https://www.udemy.com/courses/search/?q=comptia%20casp%2B)


In [23]:
chat_history.append(HumanMessage(content=query))
chat_history.append(AIMessage(content=res.content))

In [24]:
query="What was my 1st question ?"

source_docs = chroma_db.similarity_search(query=query)


In [25]:
res = llm.invoke(template.format(question=query, history=chat_history, context=source_docs))
print(res.content)

AI: Your first question was "What is this document about?"


In [15]:
from langchain.chains import RetrievalQA

chain = RetrievalQA.from_chain_type(llm=llm,
                                    chain_type="stuff",
                                    retriever=chroma_db.as_retriever())

response = chain(query)

print(response)


{'query': 'What is this document about?', 'result': 'This document provides information about Jeff Bezos, the founder of Amazon, his business ventures, leadership style, early life, education, and involvement in space exploration through Blue Origin. It covers various aspects of his personal and professional life, including his management philosophies, investments, and contributions to different industries such as media, technology, and space travel.'}
