In [None]:
import os
from openai import AzureOpenAI
from dotenv import load_dotenv, find_dotenv

load_dotenv()

aoai_api_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
aoai_api_key = os.getenv("AZURE_OPENAI_API_KEY")
aoai_api_version = os.getenv("AZURE_OPENAI_API_VERSION")
aoai_deployment_name = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
aoai_emb_deployment_name = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")

if not aoai_api_version:
    aoai_api_version = os.getenv("OPENAI_API_VERSION")
    
try:
    print("=== Initialized AzuureOpenAI client ===")
    print(f"AZURE_OPENAI_ENDPOINT={aoai_api_endpoint}")
    print(f"AZURE_OPENAI_API_VERSION={aoai_api_version}")
    print(f"AZURE_OPENAI_DEPLOYMENT_NAME={aoai_deployment_name}")
    print(f"AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME={aoai_emb_deployment_name}")    
except (ValueError, TypeError) as e:
    print(e)

In [None]:
from azure_genai_utils.rag.pdf import PDFRetrievalChain

pdf_path = "../../sample-docs/AutoGen-paper.pdf"

pdf = PDFRetrievalChain(
    source_uri=[pdf_path],
    loader_type="PDFPlumber",
    model_name="gpt-4o-mini",
    embedding_name="text-embedding-3-large",
    chunk_size=500,
    chunk_overlap=50,
).create_chain()

pdf_retriever = pdf.retriever
pdf_chain = pdf.chain

In [None]:
import json
from typing import List, Literal, Optional

import tiktoken
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.messages import get_buffer_string
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableConfig
from langchain_core.tools import tool
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import AzureChatOpenAI
from langchain_openai.embeddings import AzureOpenAIEmbeddings
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import END, START, MessagesState, StateGraph
from langgraph.prebuilt import ToolNode

In [None]:
embeddings = AzureOpenAIEmbeddings(
    model="text-embedding-3-large",
    chunk_size=1000,
)
recall_vector_store = InMemoryVectorStore(embeddings)

### Define tools

In [None]:
import uuid


def get_user_id(config: RunnableConfig) -> str:
    user_id = config["configurable"].get("user_id")
    if user_id is None:
        raise ValueError("User ID needs to be provided to save a memory.")

    return user_id


@tool
def save_recall_memory(memory: str, config: RunnableConfig) -> str:
    """Save memory to vectorstore for later semantic retrieval."""
    user_id = get_user_id(config)
    document = Document(
        page_content=memory, id=str(uuid.uuid4()), metadata={"user_id": user_id}
    )
    recall_vector_store.add_documents([document])
    return memory


@tool
def search_recall_memories(query: str, config: RunnableConfig) -> List[str]:
    """Search for relevant memories."""
    user_id = get_user_id(config)

    def _filter_function(doc: Document) -> bool:
        return doc.metadata.get("user_id") == user_id

    documents = recall_vector_store.similarity_search(
        query, k=3, filter=_filter_function
    )
    return [document.page_content for document in documents]


@tool
def pdf_retrieve(query: str, config: RunnableConfig):
    """Retrieve information regarding AutoGen paper. If the query asks for details about AutoGen, use this tool."""
    print("\n==== [RETRIEVE] ====\n")
    # msg = state["messages"][-1]["user"]
    # convo_str = get_buffer_string(state["messages"])
    documents = pdf_retriever.invoke(query)
    return [document.page_content for document in documents]

In [None]:
from azure_genai_utils.tools import BingSearch

WEB_SEARCH_FORMAT_OUTPUT = False

web_search_tool = BingSearch(
    max_results=1,
    locale="en-US",
    include_news=False,
    include_entity=False,
    format_output=WEB_SEARCH_FORMAT_OUTPUT,
)

In [None]:
# search = TavilySearchResults(max_results=1)
tools = [save_recall_memory, search_recall_memories, pdf_retrieve, web_search_tool]

In [None]:
from typing import List
from typing_extensions import TypedDict, Annotated


class State(MessagesState):
    # add memories that will be retrieved based on the conversation context
    recall_memories: Annotated[List[str], "List of recall memories"]
    # documents: Annotated[List[str], "List of documents"]

In [None]:
system_prpmpt = """
You are a helpful assistant with advanced long-term memory capabilities. 
Powered by a stateless LLM, you must rely on external memory to store information between conversations. 
Utilize the available memory tools to store and retrieve important details that will help you better attend to the user's needs and understand their context.

## Memory Usage Guidelines:
1. Actively use memory tools (save_core_memory, save_recall_memory) to build a comprehensive understanding of the user.
2. Make informed suppositions and extrapolations based on stored memories.
3. Regularly reflect on past interactions to identify patterns and preferences.
4. Update your mental model of the user with each new piece of information.
5. Cross-reference new information with existing memories for consistency.
6. Prioritize storing emotional context and personal values alongside facts.
7. Use memory to anticipate needs and tailor responses to the user's style.
8. Recognize and acknowledge changes in the user's situation or perspectives over time.
9. Leverage memories to provide personalized examples and analogies.
10. Recall past challenges or successes to inform current problem-solving.

## Constraint
1. Review the provided context thoroughly and extract key details related to the question.
2. Craft a precise answer based on the relevant information.
3. Keep the answer concise but logical/natural/in-depth.
4. If the retrieved context does not contain relevant information or no context is available, respond with: 'I can't find the answer to that question in the context.'

## Recall Memories
Recall memories are contextually retrieved based on the current conversation:
{recall_memories}

## Instructions
Engage with the user naturally, as a trusted colleague or friend. There's no need to explicitly mention your memory capabilities. 
Instead, seamlessly incorporate your understanding of the user into your responses. 
Be attentive to subtle cues and underlying emotions. Adapt your communication style to match the user's preferences and current emotional state. 
Use tools to persist information you want to retain in the next conversation. 
If you do call tools, all text preceding the tool call is an internal message. 
Respond AFTER calling the tool, once you have confirmation that the tool completed successfully.
"""

# Define the prompt template for the agent
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prpmpt),
        ("placeholder", "{messages}"),
    ]
)

In [None]:
model = AzureChatOpenAI(model_name=aoai_deployment_name)
model_with_tools = model.bind_tools(tools)
tokenizer = tiktoken.encoding_for_model("gpt-4o")

from typing import List
from typing_extensions import TypedDict, Annotated


class State(MessagesState):
    # add memories that will be retrieved based on the conversation context
    recall_memories: Annotated[List[str], "List of recall memories"]


def agent(state: State) -> State:
    """Process the current state and generate a response using the LLM.

    Args:
        state (schemas.State): The current state of the conversation.

    Returns:
        schemas.State: The updated state with the agent's response.
    """
    bound = prompt | model_with_tools
    recall_str = (
        "<recall_memory>\n" + "\n".join(state["recall_memories"]) + "\n</recall_memory>"
    )
    prediction = bound.invoke(
        {
            "messages": state["messages"],
            # "context": format_docs(state["documents"]),
            "recall_memories": recall_str,
        }
    )
    return {
        "messages": [prediction],
    }


def load_memories(state: State, config: RunnableConfig) -> State:
    """Load memories for the current conversation.

    Args:
        state (schemas.State): The current state of the conversation.
        config (RunnableConfig): The runtime configuration for the agent.

    Returns:
        State: The updated state with loaded memories.
    """
    convo_str = get_buffer_string(state["messages"])
    convo_str = tokenizer.decode(tokenizer.encode(convo_str)[:2048])
    recall_memories = search_recall_memories.invoke(convo_str, config)
    return {
        "recall_memories": recall_memories,
    }


def route_tools(state: State):
    """Determine whether to use tools or end the conversation based on the last message.

    Args:
        state (schemas.State): The current state of the conversation.

    Returns:
        Literal["tools", "__end__"]: The next step in the graph.
    """
    msg = state["messages"][-1]
    if msg.tool_calls:
        return "tools"

    return END

In [None]:
# Create the graph and add nodes
builder = StateGraph(State)

builder.add_node(load_memories)
builder.add_node(agent)
builder.add_node("tools", ToolNode(tools))

# Add edges to the graph
builder.add_edge(START, "load_memories")
builder.add_edge("load_memories", "agent")
builder.add_conditional_edges("agent", route_tools, ["tools", END])
builder.add_edge("tools", "agent")

# Compile the graph
memory = MemorySaver()
graph = builder.compile(checkpointer=memory)

In [None]:
# from IPython.display import Image, display

# display(Image(graph.get_graph().draw_mermaid_png()))

In [None]:
def pretty_print_stream_chunk(chunk):
    for node, updates in chunk.items():
        print(f"Update from node: {node}")
        if "messages" in updates:
            updates["messages"][-1].pretty_print()
        else:
            print(updates)

        print("\n")

In [None]:
# NOTE: we're specifying `user_id` to save memories for a given user
config = {"configurable": {"user_id": "1", "thread_id": "1"}}

for chunk in graph.stream(
    {"messages": [("user", "Daekeun is a Machine Learning geek. He loves to learn AIML new things.")]},
    config=config,
):
    pretty_print_stream_chunk(chunk)

In [None]:
for chunk in graph.stream(
    {"messages": [("user", "Daekeun provides AIML technology support at Microsoft.")]},
    config=config,
):
    pretty_print_stream_chunk(chunk)

In [None]:
for chunk in graph.stream(
    {"messages": [("user", "Daekeun wants to know AutoGen")]}, config=config
):
    pretty_print_stream_chunk(chunk)

In [None]:
for chunk in graph.stream(
    {"messages": [("user", "What is AutoGen's main featrues?")]}, config=config
):
    pretty_print_stream_chunk(chunk)

In [None]:
for chunk in graph.stream(
    {
        "messages": [
            ("user", "Daekeun wants to study AutoGen in 4 weeks. How can he does?")
        ]
    },
    config=config,
):
    pretty_print_stream_chunk(chunk)

In [None]:
config = {"configurable": {"user_id": "1", "thread_id": "1"}}

In [None]:
for chunk in graph.stream(
    {
        "messages": [
            ("user", "Daekeun wants to study AutoGen in 2 weeks. Please recommend Microsoft's website or appropriate learning material.")
        ]
    },
    config=config,
):
    pretty_print_stream_chunk(chunk)

Now we can use the saved information about our user on a different thread. Let's try it out:

In [None]:
config = {"configurable": {"user_id": "2", "thread_id": "1"}}

for chunk in graph.stream(
    {"messages": [("user", "Hyo is a big fan of Microsoft")]}, config=config
):
    pretty_print_stream_chunk(chunk)

In [None]:
config = {"configurable": {"user_id": "2", "thread_id": "1"}}

for chunk in graph.stream(
    {"messages": [("user", "Hyo is interested in AutoGen and Semantic Kernel")]}, config=config
):
    pretty_print_stream_chunk(chunk)

In [None]:
config = {"configurable": {"user_id": "2", "thread_id": "1"}}

for chunk in graph.stream(
    {"messages": [("user", "Where is learning materials?")]}, config=config
):
    pretty_print_stream_chunk(chunk)

Notice how the agent is loading the most relevant memories before answering, and in our case suggests the dinner recommendations based on both the food preferences as well as location.

Finally, let's use the search tool together with the rest of the conversation context and memory to find location of a pizzeria:

에이전트가 답변하기 전에 가장 관련성 높은 기억을 불러오는 방식에 주목하세요. 이 경우에는 음식 선호도와 위치를 모두 고려하여 저녁 식사 추천을 제안합니다.

마지막으로, 검색 도구를 나머지 대화 맥락 및 기억과 함께 사용하여 피자 가게의 위치를 ​​찾아 보겠습니다.

In [None]:
for chunk in graph.stream(
    {"messages": [("user", "what's the address for joe's in greenwich village?")]},
    config=config,
):
    pretty_print_stream_chunk(chunk)

### Adding structured memories

In [None]:
recall_vector_store = InMemoryVectorStore(embeddings)

In [None]:
from typing_extensions import TypedDict


class KnowledgeTriple(TypedDict):
    subject: str
    predicate: str
    object_: str

# @tool
# def save_recall_memory(memory: str, config: RunnableConfig) -> str:
#     """Save memory to vectorstore for later semantic retrieval."""
#     user_id = get_user_id(config)
#     document = Document(
#         page_content=memory, id=str(uuid.uuid4()), metadata={"user_id": user_id}
#     )
#     recall_vector_store.add_documents([document])
#     return memory


@tool
def save_recall_memory(memories: List[KnowledgeTriple], config: RunnableConfig) -> str:
    """Save memory to vectorstore for later semantic retrieval."""
    user_id = get_user_id(config)
    for memory in memories:
        serialized = " ".join(memory.values())
        document = Document(
            serialized,
            id=str(uuid.uuid4()),
            metadata={
                "user_id": user_id,
                **memory,
            },
        )
        recall_vector_store.add_documents([document])
    return memories

In [None]:
tools = [save_recall_memory, search_recall_memories, pdf_retrieve, web_search_tool]
model_with_tools = model.bind_tools(tools)

# Create the graph and add nodes
builder = StateGraph(State)
builder.add_node(load_memories)
builder.add_node(agent)
builder.add_node("tools", ToolNode(tools))

# Add edges to the graph
builder.add_edge(START, "load_memories")
builder.add_edge("load_memories", "agent")
builder.add_conditional_edges("agent", route_tools, ["tools", END])
builder.add_edge("tools", "agent")

# Compile the graph
memory = MemorySaver()
graph = builder.compile(checkpointer=memory)

In [None]:
config = {"configurable": {"user_id": "3", "thread_id": "1"}}

for chunk in graph.stream({"messages": [("user", "Hi I am Wonchan.")]}, config=config):
    pretty_print_stream_chunk(chunk)

In [None]:
for chunk in graph.stream(
    {"messages": [("user", "I am non-tech, but interested in Microsoft's multi-agent strategy and tech stack like AutoGen.")]}, config=config
):
    pretty_print_stream_chunk(chunk)

As before, the memories generated from one thread are accessed in another thread from the same user:

In [None]:
config = {"configurable": {"user_id": "3", "thread_id": "2"}}

for chunk in graph.stream(
    {"messages": [("user", "Recommend me a website where I can easily try AutoGen hands-on")]}, config=config
):
    pretty_print_stream_chunk(chunk)

In [None]:
config = {"configurable": {"user_id": "3", "thread_id": "2"}}

for chunk in graph.stream(
    {"messages": [("user", "Recommend other multi-agent frameworks to me inorder to learn about other companies' multi-agent strategies")]}, config=config
):
    pretty_print_stream_chunk(chunk)

In [None]:
records = recall_vector_store.similarity_search(
    "multi-agent", k=3, filter=lambda doc: doc.metadata["user_id"] == "3"
)
print(records)

In [None]:
import matplotlib.pyplot as plt
import networkx as nx

# Fetch records
records = recall_vector_store.similarity_search(
    "multi-agent", k=3, filter=lambda doc: doc.metadata["user_id"] == "3"
)


# Plot graph
plt.figure(figsize=(6, 4), dpi=80)
G = nx.DiGraph()

for record in records:
    G.add_edge(
        record.metadata["subject"],
        record.metadata["object_"],
        label=record.metadata["predicate"],
    )

pos = nx.spring_layout(G)
nx.draw(
    G,
    pos,
    with_labels=True,
    node_size=3000,
    node_color="lightblue",
    font_size=10,
    font_weight="bold",
    arrows=True,
)
edge_labels = nx.get_edge_attributes(G, "label")
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_color="red")
plt.show()