In [10]:
# replace the standard sqlite3 module with pysqlite3
# for compatibility with Chroma
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

import json
import langchain
import os
import bs4
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_google_vertexai import VertexAI
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.tools.retriever import create_retriever_tool
from langchain_community.vectorstores import FAISS
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_vertexai import ChatVertexAI
from langgraph.prebuilt import chat_agent_executor
from langchain_core.messages import HumanMessage
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import PipelinePromptTemplate, PromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_core.output_parsers import JsonOutputParser
from langchain.pydantic_v1 import BaseModel, Field
from langchain_core.tools import StructuredTool
from typing import Optional, Type
from langchain.tools import BaseTool
from langchain_core.messages import SystemMessage
from langgraph.checkpoint import MemorySaver  # an in-memory checkpointer
from langgraph.prebuilt import create_react_agent

# load environment variables
load_dotenv()

True

In [2]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"

# credential json not required if you are working within vertex AI workbench
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/workspaces/LLM-agent-with-Gemini/fleet-anagram-244304-7dafcc771b2f.json"

LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") # only if you are using text embedding model from google

In [3]:
# llm = ChatVertexAI(model="gemini-1.5-flash")
llm = ChatOpenAI(model="gpt-4o")

### Example of Chain / Runnable Sequence

In [4]:

full_template = """{introduction}

{example}

{start}"""


full_prompt = PromptTemplate.from_template(full_template)

introduction_template = """You are a helpful assistant that can help me to complete the following API payload:"""
introduction_prompt = PromptTemplate.from_template(introduction_template)

example_template = """Here are some examples of interactions you might have with me:

Q: B16C, 1126911, FPP
A: name:p.dsid,value, value:B16C, name:p.lot, value:1126911, name:p.pid, value:FPP

Q: Y42M, 11952591, FQQP
A: name:p.dsid, value:Y42M, name:p.lot, value:11952591, name:p.pid, value:FQQP

Q: Y42M, 1252391, FPC
A: name:p.dsid, value:Y42M, name:p.lot, value:1252391, name:p.pid, value:FPC

Wrap the answer in a dictionary with the structure of the example above. The input will be a string with the format "dsid, lot, pid" 
and the output should be a dictionary with the keys "name:p.dsid", "name:p.lot", and "name:p.pid" with the corresponding values from the input string.

"""


example_prompt = PromptTemplate.from_template(example_template)

start_template = """Now, do this for real!

Q: {input}
A:"""
start_prompt = PromptTemplate.from_template(start_template)

input_prompts = [
    ("introduction", introduction_prompt),
    ("example", example_prompt),
    ("start", start_prompt),
]
pipeline_prompt = PipelinePromptTemplate(
    final_prompt=full_prompt, pipeline_prompts=input_prompts
)


In [5]:
pipeline_prompt.input_variables

['input']

In [6]:
print(
    pipeline_prompt.format(
        input="B16C, 1126911, FPP",
    )
)

You are a helpful assistant that can help me to complete the following API payload:

Here are some examples of interactions you might have with me:

Q: B16C, 1126911, FPP
A: name:p.dsid,value, value:B16C, name:p.lot, value:1126911, name:p.pid, value:FPP

Q: Y42M, 11952591, FQQP
A: name:p.dsid, value:Y42M, name:p.lot, value:11952591, name:p.pid, value:FQQP

Q: Y42M, 1252391, FPC
A: name:p.dsid, value:Y42M, name:p.lot, value:1252391, name:p.pid, value:FPC

Wrap the answer in a dictionary with the structure of the example above. The input will be a string with the format "dsid, lot, pid" 
and the output should be a dictionary with the keys "name:p.dsid", "name:p.lot", and "name:p.pid" with the corresponding values from the input string.



Now, do this for real!

Q: B16C, 1126911, FPP
A:


In [7]:
chain = pipeline_prompt | llm | StrOutputParser()

output = chain.invoke({"input": "B47R,1952591,FPC"})

print(output)


Sure! Based on the format you provided, here is the output:

```python
{
    "name:p.dsid": "B47R",
    "name:p.lot": "1952591",
    "name:p.pid": "FPC"
}
```


In [8]:
output = chain.invoke({"input": "testing"})
print(output)

It looks like the input "testing" does not follow the expected format "dsid, lot, pid". Please provide the input in the correct format.

Here's an example of how the input should look: "A12B, 123456, XYZ"

If you provide an input in this format, I can generate the corresponding dictionary for you.

Let's try again! Please provide a valid input.


In [9]:
output = chain.invoke({"input": "What is langchain"})
print(output)

It appears you want to create an API payload based on the input string format. However, the question "What is langchain?" doesn't match the task provided. 

If you meant to ask about "langchain" instead of constructing an API payload, I'll answer that first:

**LangChain** is a framework that enables the development of applications powered by language models. It allows developers to build applications that can understand and generate human language, facilitating the creation of chatbots, virtual assistants, and other language-based applications. LangChain provides tools and abstractions to streamline the integration of language models into software, making it easier to leverage their capabilities for various use cases.

If you need assistance with the API payload, please provide the correct input string, and I can help you construct the appropriate dictionary.

For example, if you provide the input string "A12B, 345678, XYZ", I can generate the corresponding dictionary as follows:

```

### Retrieval Augmented Generation

In [12]:
# using LLM as a knowledge base retriever

web_paths = [
    "https://google.github.io/styleguide/pyguide.html",
    "https://google.github.io/styleguide/Rguide.html",
]

docs = []
for path in web_paths:
    loader = WebBaseLoader(web_paths=(path,))
    docs += loader.load()

In [13]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = FAISS.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

In [14]:
# Contextualize or internalize the current input question with the chat history
# This is useful when the current question is referencing the chat history

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [15]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [16]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [17]:
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [18]:
conversational_rag_chain.invoke(
    {"input": "What are the best practices for Python code style?"},
    config={
        "configurable": {"session_id": "testing"}
    },  # constructs a key "testing" in `store` to store the chat history of the session
)["answer"]

'The best practices for Python code style include:\n\n1. **Linting:** Use pylint to check your code for errors and enforce coding standards.\n2. **Comments and Docstrings:** Use proper docstring format as per PEP 257, starting with a summary line and following with more details if necessary, all within triple double quotes.\n3. **Consistency:** Maintain consistency with the existing code style in the project, ensuring that your additions blend seamlessly with the surrounding code.\n\nFor more detailed guidelines, refer to the Google Python Style Guide.'

### Agent with custom tools

In [19]:
class GenericInputSchema(BaseModel):
    """Inputs for url navigation tool."""
    domain: str = Field(
        description="The domain of the website you want to navigate to.")

class ParamInputSchema(BaseModel):
    """Inputs API payload."""
    params: str = Field(
        description="The input string with the format 'dsid, lot, type', which can be used to create a dictionary with the keys 'name:p.dsid', 'name:p.lot', and 'name:p.type'.")
    
class payload_formatter(BaseTool):
    name: str = "payload_formatter"
    args_schema: Optional[Type[BaseModel]] = ParamInputSchema
    description: str = """
    
    payload formatter is a tool that takes an input string with the format 'dsid, lot, type' and returns a dictionary with the keys 'name:p.dsid', 'name:p.lot', and 'name:p.type'.
    """
    def format_input(self,input_string):
        parts = input_string.replace('.', '').split(',')
        parts = [part.strip() for part in parts if part.strip() != '']
        if len(parts) != 3:
            raise ValueError("Please check that the input contains exactly three parts: dsid, lot, and pid.")
        for part in parts:
            if part[0].isalpha() and part[-1].isalpha() and part[1:-1].isdigit():
                dsid = part
            elif part.isdigit():
                lot = part
            else:
                type_ = part
        output = {
            "name:p.dsid": dsid,
            "name:p.lot": lot,
            "name:p.type": type_
        }
        return output

    def _run(self, params: str):
        return self.format_input(params)
    
class url_navigator(BaseTool):
    name: str = "map_viewer_url"
    args_schema: Optional[Type[BaseModel]] = GenericInputSchema
    description: str = """
    
    url navigator is a tool that takes a domain as input and returns the url of the website you want to navigate to.
    """

    def _run(self, domain: str):
        return f"www.{domain}.com"

In [20]:
tools = [url_navigator(), payload_formatter()] # create a list of tools which will be the input of create_react_agent

In [21]:
system_message = "You are a helpful assistant"
# not compulsory but I want to remind it the tools that it has access to

memory = MemorySaver()
app = create_react_agent(
    llm, tools, messages_modifier=system_message, checkpointer=memory
)


In [22]:
config = {"configurable": {"thread_id": "testing123"}}

print(
    app.invoke(
        {
            "messages": [
                ("user", "1952591,,,,,B47R,FPC")
            ]
        },
        config,
    )["messages"][-1].content)

Here is the formatted payload:

- **name:p.dsid**: B47R
- **name:p.lot**: 1952591
- **name:p.type**: FPC


In [23]:
print(
    app.invoke(
        {
            "messages": [
                ("Navigate to langchain")
            ]
        },
        config,
    )["messages"][-1].content
)

You can navigate to [LangChain](http://www.langchain.com) by clicking on the link.


### Adding in other built-in tools (e.g., search)

In [24]:
search = TavilySearchResults(max_results=2)

tools = [url_navigator(), payload_formatter(), search]

system_message = "You are a helpful assistant and you know how to use url navigator tool, payload formatter tool and tavily search."
# not compulsory but I want to remind it the tools that it has access to

memory = MemorySaver()
app = create_react_agent(
    llm, tools, messages_modifier=system_message, checkpointer=memory
)

config = {"configurable": {"thread_id": "test-thread"}}



In [25]:
print(
    app.invoke(
        {
            "messages": [
                ("Search for the latest Conor McGregor news")
            ]
        },
        config,
    )["messages"][-1].content
)

Here are some of the latest news articles about Conor McGregor:

1. **Conor McGregor's Silence: A Sign of Renewed Focus?**
   - Chael Sonnen and Daniel Cormier discuss McGregor's silence and its implications for his upcoming return.
   - [Read more](https://www.mmafighting.com/2024/6/4/24171284/chael-sonnen-wonders-conor-mcgregor-silence-sign-of-renewed-focus-daniel-cormier-finds-it-worrisome)

2. **Conor McGregor's Return Teased in Promo for UFC 303**
   - Conor McGregor is set to return to the octagon on June 29 to fight Michael Chandler at UFC 303.
   - [Read more](https://www.mmafighting.com/2024/5/5/24149315/watch-conor-mcgregor-return-teased-in-spine-tingling-promo-ahead-of-ufc-303)

Feel free to check out the articles for more details!


In [30]:
from langchain.tools.retriever import create_retriever_tool

knowledge_retriever = create_retriever_tool(
    retriever,
    "style_guide_retriever",
    "Searches and returns key pointers for programming languages from google style guide",
)
tools = [url_navigator(), payload_formatter(), search, knowledge_retriever]

In [31]:
system_message = "You are a helpful assistant and you know how to use url navigator tool, payload formatter tool, tavily search and google style guide retriever."


app = create_react_agent(
    llm, tools, messages_modifier=system_message, checkpointer=memory
)

config = {"configurable": {"thread_id": "chat-thread"}}

In [33]:
print(
    app.invoke(
        {
            "messages": [
                ("What are some differences between Python and R's style guide?")
            ]
        },
        config,
    )["messages"][-1].content
)

### Key Differences Between Python and R Style Guides

1. **Naming Conventions**:
   - **Python**: Typically uses `snake_case` for variables and functions.
   - **R**: Uses `BigCamelCase` for functions and prefixes private functions with a dot (`.`).

2. **Assignment Operators**:
   - **Python**: Uses the standard `=` for assignment.
   - **R**: Uses `<-` for assignment. The right-hand assignment (`->`) is discouraged.

3. **Docstrings and Comments**:
   - **Python**: Emphasizes the use of docstrings (`"""`) for documenting modules, classes, and functions. Inline comments should be used sparingly and should be clear.
   - **R**: Focuses more on comments and ensuring they are clear, well-written, and use proper punctuation and grammar.

4. **Power Features and Advanced Techniques**:
   - **Python**: Advises against using advanced features like custom metaclasses, access to bytecode, reflection, and dynamic inheritance as they can make code harder to read and maintain.
   - **R**: Does n