In [None]:
from dotenv import load_dotenv

import os

load_dotenv()

wandb_key_preview = os.getenv("WANDB_API_KEY")[:10]
print(f"First 10 characters of W&B key: {wandb_key_preview}")

groq_key_preview = os.getenv("GROQ_API_KEY")[:10]
print(f"First 10 characters of Groq key: {groq_key_preview}")

### Installing Ollama dependencies

1. `pciutils` is required by Ollama to detect the GPU type.
2. Installation of Ollama in the runtime instance will be taken care by `curl -fsSL https://ollama.com/install.sh | sh`

In [None]:
import sys
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
  !sudo apt update -qq
  !sudo apt install -qq -y pciutils
  !curl -fsSL https://ollama.com/install.sh | sh
else:
    print("Not running in Google Colab")
    ! if ! ollama --version; then echo "ollama is not installed" && exit 1; fi

### Starting Ollama
---

In order to use Ollama it needs to run as a service in background parallel to your scripts. Because Jupyter Notebooks is built to run code blocks in sequence this make it difficult to run two blocks at the same time. As a workaround we will create a service using subprocess in Python so it doesn't block any cell from running.

Service can be started by command `ollama serve`.

`time.sleep(5)` adds some delay to get the Ollama service up before downloading the model.

In [None]:
import threading
import subprocess
import time
import requests

def run_ollama_serve():
  subprocess.Popen(["ollama", "serve"])

# Check if ollama is running
try:
  response = requests.get('http://localhost:11434')
  if response.status_code == 200:
    print("Ollama is running")
except:
  print("Ollama is not running")
  thread = threading.Thread(target=run_ollama_serve)
  thread.start()
  time.sleep(5)

### Define tools

In [None]:
from typing import Annotated

from langchain_core.tools import tool
# from langchain_experimental.utilities.python import PythonREPL

# repl = PythonREPL()

# @tool
# def python_repl_tool(
#     code: Annotated[str, "The python code to execute to generate your chart."],
# ):
#     """Use this to execute python code. If you want to see the output of a value,
#     you should print it out with `print(...)`. This is visible to the user."""
#     try:
#         result = repl.run(code)
#     except BaseException as e:
#         return f"Failed to execute. Error: {repr(e)}"
#     result_str = f"Successfully executed:\n```python\n{code}\n```\nStdout: {result}"
#     return (
#         result_str + "\n\nIf you have completed all tasks, respond with FINAL ANSWER."
#     )

@tool
def search_process(id: str) -> str:
    """
    Search for a SEI process folder in the file system.

    Use this function to locate administrative process documents by their reference number.
    The function handles both traditional (166/2025) and compact (1662025) ID formats.

    Args:
        id (str): Process number in either format:
            - Separated format: "166/2025"
            - Compact format: "1662025"
            The number will be automatically padded if needed.

    Returns:
        str: One of:
            - Folder name (e.g., "SEI_00166_2025") if process exists
            - None if process not found (compact format)
            - "Process not found" if process not found (separated format or errors)

    Example:
        To find process 166/2025:
        > search_process("166/2025")
        Returns: "SEI_00166_2025"

        To find same process with compact format:
        > search_process("1662025")
        Returns: "SEI_00166_2025"
    """
    import os

    root_path = os.path.abspath("")
    processes_path = os.path.join(root_path, "processos")
    try:
        if len(id) < 9:
            if id.find("/") == -1:
                id = id.zfill(9)
            else:
                id = id.split("/")
                id[0] = id[0].zfill(5)
                id[1] = id[1]
                id = "/".join(id)
        if id.find("/") == -1:
            folder = f"SEI_{id[:-4]}_{id[-4:]}"
            # print(f"Searching for {folder}")
            if os.path.exists(os.path.join(processes_path, folder)):
                # print(f"Process {id} found!")
                return folder
            else:
                # print(f"Process {id} not found!")
                return "Process not found"
        else:
            folder = f"SEI_{id.split('/')[0]}_{id.split('/')[1]}"
            # print(f"Searching for {folder}")
            if os.path.exists(os.path.join(processes_path, folder)):
                # print(f"Process {id} found!")
                return folder
            else:
                # print(f"Process {id} not found!")
                return "Process not found"
    except Exception as e:
        print(f"Error: {e}")
        return "Process not found"

@tool
def get_documents_from_process(
    parameters: str
) -> list[str]:
    """
    Retrieve PDF documents from a SEI process folder with pagination support.

    Use this function to get a list of PDF documents within a process folder.
    Results can be paginated using limit and offset parameters.
    Typically used after locating a process folder with search_process().

    Args:
        parameters (str): A string containing the process folder name and pagination parameters.
            The string should be formatted as follows:
            "process_folder,limit,offset"
            - process_folder: The name of the process folder (e.g., "SEI_00166_2025")
            - limit: The maximum number of documents to return (default: 10)
            - offset: The number of documents to skip (default: 0)

    Returns:
        Union[list[str], str]: One of:
            - List of PDF filenames if documents are found
            - "Invalid parameters" if parameters are incorrect
            - "Process folder not found" if folder doesn't exist or error occurs

    Example:
        # Get first 10 documents
        > get_documents_from_process("SEI_00166_2025")
        Returns: ["doc1.pdf", "doc2.pdf", ...]

        # Get next 10 documents
        > get_documents_from_process("SEI_00166_2025", limit=10, offset=10)
        Returns: ["doc11.pdf", "doc12.pdf", ...]

    Note:
        - Only returns PDF files
        - Use with search_process() to locate folder first
        - Empty list means no documents found in range
    """
    import os
    try:
        process_folder, limit, offset = parameters.split(",")
        limit = int(limit)
        offset = int(offset)
    except Exception as e:
        print(f"Error: {e}")
        return "Invalid parameters"
    try:
        tree = os.walk(os.path.join(os.path.abspath(""), "processos", process_folder))
        documents = []
        for root, dirs, files in tree:
            for file in files:
                documents.extend([
                    file
                    for file in files
                    if file.endswith(".pdf")
                    ])
        return documents[offset:offset+limit]
    except Exception as e:
        print(f"Error: {e}")
        return "Process folder not found"

In [None]:
folder = search_process("166/2025")

get_documents_from_process(f"{folder},5,10")

In [None]:
def make_system_prompt(suffix: str) -> str:
    return (
        "You are a helpful AI assistant, collaborating with other assistants."
        " Use the provided tools to progress towards answering the question."
        " If you are unable to fully answer, that's OK, another assistant with different tools "
        " will help where you left off. Execute what you can to make progress."
        " If you or any of the other assistants have the final answer or deliverable,"
        " make sure to prefix your response with FINAL ANSWER so the team knows to stop."
        f"\n{suffix}"
    )

In [None]:
from typing import Literal
from langchain_core.messages import BaseMessage, HumanMessage
from langgraph.prebuilt import create_react_agent
from langgraph.graph import MessagesState, END
from langgraph.types import Command

### Choose model

In [None]:
from langchain_ollama import ChatOllama

# Initialize the chat model
model_name = 'llama3.1'  # Change this to the model you want to use

# Download the model from the Ollama server
!ollama pull $model_name

llm = ChatOllama(
    model=model_name,  # Specify the model version
    base_url="http://localhost:11434",  # URL where Ollama is running locally
    verbose=False,
)

In [None]:
from langchain_groq import ChatGroq

# Initialize the chat model
llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0.60,
)

In [None]:
def get_next_node(last_message: BaseMessage, goto: str):
    if "FINAL ANSWER" in last_message.content:
        return END
    return goto

In [None]:
# Research agent and node
research_agent = create_react_agent(
    llm,
    tools=[search_process, get_documents_from_process],
    prompt=make_system_prompt(
        "You can only search for SEI processes and get documents from them."
    ),
)


def research_node(
    state: MessagesState,
) -> Command[Literal["researcher", END]]:
    result = research_agent.invoke(state)
    goto = get_next_node(result["messages"][-1], "researcher")
    # wrap in a human message, as not all providers allow
    # # AI message at the last position of the input messages list
    # result["messages"][-1] = HumanMessage(
    #     content=result["messages"][-1].content, name="researcher"
    # )
    return Command(
        update={
            # share internal message history of research agent with other agents
            "messages": result["messages"],
        },
        goto=goto,
    )
    
# # Chart generator agent and node
# # NOTE: THIS PERFORMS ARBITRARY CODE EXECUTION, WHICH CAN BE UNSAFE WHEN NOT SANDBOXED
# chart_agent = create_react_agent(
#     llm,
#     [python_repl_tool],
#     prompt=make_system_prompt(
#         "You can only generate charts. You are working with a researcher colleague."
#     ),
# )


# def chart_node(state: MessagesState) -> Command[Literal["researcher", END]]:
#     result = chart_agent.invoke(state)
#     goto = get_next_node(result["messages"][-1], "researcher")
#     # wrap in a human message, as not all providers allow
#     # AI message at the last position of the input messages list
#     result["messages"][-1] = HumanMessage(
#         content=result["messages"][-1].content, name="chart_generator"
#     )
#     return Command(
#         update={
#             # share internal message history of chart agent with other agents
#             "messages": result["messages"],
#         },
#         goto=goto,
#     )

In [None]:
from langgraph.graph import StateGraph, START

workflow = StateGraph(MessagesState)
workflow.add_node("researcher", research_node)
# workflow.add_node("chart_generator", chart_node)

workflow.add_edge(START, "researcher")
graph = workflow.compile()

In [None]:
from IPython.display import Image, display
from langchain_core.runnables.graph import CurveStyle

try:
    # Generate and display a visual representation of the workflow graph
    # The get_graph method is called on the app object with xray=True to include detailed information
    # The draw_mermaid_png method converts the graph to a PNG image using Mermaid.js
    # The curve_style parameter is set to CurveStyle.NATURAL to use smooth curves for the edges in the graph
    graph_image = graph.get_graph(xray=True).draw_mermaid_png(
        curve_style=CurveStyle.NATURAL
    )

    # Display the generated image in the Jupyter notebook
    display(Image(graph_image))
except Exception as e:
    # If an error occurs during the graph generation or display, print the error message
    print(e)

In [None]:
def print_stream(stream):
    for s in stream:
        message = s["messages"][-1]
        if isinstance(message, tuple):
            print(message)
        else:
            message.pretty_print()

In [None]:
inputs = {
        "messages": [
            (
                "user",
                "Is there a process with the number 166/2025? If yes, please list the files 11 to 15.",
            )
        ],
    }

In [None]:
events = graph.stream(inputs, stream_mode="values")

print_stream(events)

In [None]:
run = graph.invoke(inputs)

run