In [1]:
#! pip install -U langchain-nomic langchain_community tiktoken chromadb langchain langgraph tavily-python gpt4all firecrawl-py

In [2]:
import os 

In [3]:
os.environ['LANGCHAIN_TRACING_V2'] = 'True'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com/'
os.environ['LANGCHAIN_API_KEY'] = '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'


In [4]:
# local_llm = 'llama3'
local_llm ='mistral'

#### Retrieve Docs , split them , filter metadata, create a vectorDB and add the embeddings to the vectorstore


In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.document_loaders import FireCrawlLoader
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain_community.docstore.document import Document


urls = [
    'https://www.ai-jason.com/learning-ai/how-to-reduce-llm-cos',
    'https://www.ai-jason.com/learning-ai/how-to-build-ai-agent-tutorial-3',
    'https://www.ai-jason.com/learning-ai/gpt5-llm'
]
url = 'https://www.ai-jason.com/learning-ai/gpt5-llm'

docs = [FireCrawlLoader(api_key="fc-92da0e383e4943868bdfe0601d0cad6e", url=url, mode="scrape").load()]
#docs = [FireCrawlLoader(api_key="fc-92da0e383e4943868bdfe0601d0cad6e", url=url, mode="scrape").load() for url in urls]


#split documents
docs_list = [item for sublist in docs for item in sublist]


# Split documents 
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size = 250 , chunk_overlap =0
)
doc_splits = text_splitter.split_documents(docs_list)

# Filter out complex metadata and ensure proper document formatting
filtered_docs = []
for doc in doc_splits: 
    # Ensure the doc is an instance of Document and has a metadata attribute
    if isinstance(doc, Document) and hasattr(doc, 'metadata'):
        clean_metadata = {
            k:v for k,v in doc.metadata.items() if isinstance (v, (str, int ,float, bool))
        }
        
        filtered_docs.append(Document(page_content= doc.page_content, metadata = clean_metadata))


# Add to vectorDB 
vectorstore =Chroma.from_documents(
    documents= filtered_docs,
    collection_name="rag-chroma", 
    embedding=GPT4AllEmbeddings(),
)

retriever = vectorstore.as_retriever()

### Retrieval Grader

In [6]:
# Retrieval Grader 
from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

llm = ChatOllama(model = local_llm, format="json", temperature=0)


prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing relevance of a retrieved document to a user question. If the document contains keywords related to the user question, grade it as relevant. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.  \n 
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination. 
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document : \n\n {document} \n\n 
    Here is the user question : {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables= ["question", "document"], 
)

retrieval_grader = prompt |llm |JsonOutputParser()
question = "how to save llm cost?"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question , "document": doc_txt}))

{'score': 'no'}


#### Generate answer 

In [7]:
from langchain.prompts import PromptTemplate
from langchain import hub
from langchain_core.output_parsers import StrOutputParser


# prompt 
prompt = PromptTemplate (
    template = """<|begin_of_text|><|start_header_id|>System<|end_header_id|> You are an assistant for question-answering tasks. \n
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
    USe three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question : {question}
    Context : {context}
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables= ["question", "document"], 
)
llm = ChatOllama(model =local_llm , temperature=0)

#post-preprocessing 
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

#Chain 
rag_chain = prompt |llm | StrOutputParser()

#Run 
question = "give me three main difference between GPT4 and GPT5 in bulletpoints"
docs = retriever.invoke(question)
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)

 * GPT-5 aims to enhance reasoning abilities and introduce system two thinking compared to the limited reasoning capabilities and reliability of GPT-4.
* Prompt engineering and communicative agents are strategies to promote system two thinking in large language models like GPT-4, but they will be more effective with GPT-5 due to its enhanced reasoning abilities.
* GPT-5 holds the promise of enabling large language models to solve complex problems more effectively by bridging the gap between system one and system two thinking.


#### web search via tavily

In [8]:
os.environ['TAVILY_API_KEY'] = "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"
from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults(k=3)

### Hallucination grader

In [9]:
import time
llm = ChatOllama(model =local_llm , format = "json", temprature = 0) 

# Prompt
prompt = PromptTemplate(
    template=""" <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether 
    an answer is grounded in / supported by a set of facts. Give a binary 'yes' or 'no' score to indicate 
    whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a 
    single key 'score' and no preamble or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here are the facts:
    \n ------- \n
    {documents} 
    \n ------- \n
    Here is the answer: {generation}  <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "documents"],
)
start = time.time()
hallucination_grader = prompt | llm | JsonOutputParser()
hallucination_grader_response = hallucination_grader.invoke({"documents": docs, "generation": generation})
end = time.time()
print(f"The time required to generate response by the generation chain in seconds:{end - start}")
print(hallucination_grader_response)

The time required to generate response by the generation chain in seconds:101.66108012199402
{'score': 'yes'}


### Answer grader

In [10]:
# answer grader 
 #LLM 
llm = ChatOllama(model=local_llm, format = "json", temperature=0)
 
#prompt
prompt = PromptTemplate(
    template = """ <|begin_of_text|><|start_header_id|>System<|end_header_id|> You are a grader assessing whether an answer is useful to resolve a question. 
    Give a binary score 'yes' or 'no' score to indicate whether the answer is useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation 
    <|eot_id|><|start_header_id|>user <|end_header_id|>
    Here is the answer : 
    \n --------------------  \n
    {generation}
    \n --------------------  \n
    
    Here is the answer : {question}<|eot_id|><|start_header_id|>assistant <|end_header_id|> """, 
    input_variables= ["generation" , "question"] , 
)
answer_grader = prompt |llm | JsonOutputParser()
answer_grader.invoke({"question": question , "generation" : generation})

{'score': 'yes'}

### Lang graph  -Setup states and nodes 


In [17]:
from typing_extensions import TypedDict
from typing import List 
from pprint import pprint

class GraphState(TypedDict): 
    """ 
        Represents the state of our graph 
        Attributes:
            question : question
            generation : LLM generation
            web_search : whether to add search 
            documents: List of documents
    """
    question :str
    generation : str
    web_search :str
    documents :List[str]
    
from langchain.schema import Document 

def retrieve(state):
    """
    Retrieve documents from vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}
#
def generate(state):
    """
    Generate answer using RAG on retrieved documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
    
    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}
#
def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question
    If any document is not relevant, we will set a flag to run web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Filtered out irrelevant documents and updated web_search state
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]
    
    # Score each doc
    filtered_docs = []
    web_search = "No"
    for d in documents:
        score = retrieval_grader.invoke({"question": question, "document": d.page_content})
        grade = score['score']
        # Document relevant
        if grade.lower() == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        # Document not relevant
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            # We do not include the document in filtered_docs
            # We set a flag to indicate that we want to run web search
            web_search = "Yes"
            continue
    return {"documents": filtered_docs, "question": question, "web_search": web_search}
#
def web_search(state):
    """
    Web search based based on the question

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Appended web results to documents
    """

    print("---WEB SEARCH---")
    question = state["question"]
    documents = state["documents"]

    # Web search
    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {"documents": documents, "question": question}
#s

def decide_to_generate(state):
    """ 
    Determines whether to generate an answer, or add web search 
    Args:
        state(Dict): The current graoh state
    Returns:   
        str: Binary decision for next node to call
    """
    print("---- ASSESS GRADED DOCUMENTS----")
    question = state["question"]
    web_search = state["web_search"]
    filtered_documents = state["documents"]
    if web_search == "yes":
        
        print("----DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO THE QUESTION. INCLUDE WEB SEARCH-----")
        return "websearch"
    else: 
        print("-----DECISION: GENRATE ------")
        return "generate"
    
def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke({"documents": documents, "generation": generation})
    grade = score['score']

# Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question,"generation": generation})
        grade = score['score']
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"
    
    
from langgraph.graph import StateGraph, END
workflow = StateGraph(GraphState)

#DEfine the nodes 
workflow.add_node("websearch", web_search)
workflow.add_node("retrieve", retrieve)
workflow.add_node("grade_documents", grade_documents)
workflow.add_node("generate", generate)
    

            

In [18]:
# Build graph 
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve" , "grade_documents") 
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate, 
    {
        "websearch" : "websearch",
        "generate" : "generate", 
    },
)

workflow.add_edge("websearch","generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported" : "generate", 
        "useful" : END,
        "not useful" : "websearch",
    },
)

In [19]:
#Compile 
app = workflow.compile()

#Test
from pprint import pprint
inputs = {"question" :"Tell me one thing about GPT5"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running :{key}:")
print(value["generation"])

---RETRIEVE---
'Finished running :retrieve:'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---- ASSESS GRADED DOCUMENTS----
-----DECISION: GENRATE ------
'Finished running :grade_documents:'
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
'Finished running :generate:'
 GPT-5 is a proposed upgrade to the current large language model, GPT-4. It aims to enhance reasoning abilities and introduce system two thinking for greater reliability and problem-solving capabilities. However, its development is ongoing, and strategies like prompt engineering and communicative agents are being used today to promote system two thinking in existing models.


In [20]:
app = workflow.compile()

#Test
from pprint import pprint
inputs = {"question" :"Who is lotfi bouchnak?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running :{key}:")
print(value["generation"])

---RETRIEVE---
'Finished running :retrieve:'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---- ASSESS GRADED DOCUMENTS----
-----DECISION: GENRATE ------
'Finished running :grade_documents:'
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
'Finished running :generate:'
 Lotfi Bouchnak is a renowned Tunisian pianist and composer known for his virtuosity and unique interpretations of classical music. He has won numerous international competitions and has given performances in prestigious venues around the world.
