In [6]:
from llama_cpp import Llama

# Load the model on GPU
llm = Llama(
  model_path="../qwen2.5-7b-instruct-q4_k_m.gguf",
  n_gpu_layers=9999,
  seed=23,
  n_ctx=4000,
  verbose=False
)

llama_init_from_model: n_ctx_per_seq (4000) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


In [7]:
from pydantic import BaseModel
from typing import List, Dict, Any, Optional

class AgentTool:
  def __init__(self, llm, name, description, input_schema, output_schema):
    self.llm = llm
    self.name = name
    self.description = description
    self.input_schema = input_schema
    self.output_schema = output_schema
  def __call__(self):
    return None
  def get_tool_input(self, text_input):
    return None

In [10]:
class ChatbotInput(BaseModel):
  text: str

class ChatbotOutput(BaseModel):
  response: str

class Chatbot(AgentTool):
  def __init__(self):
    super().__init__(
      llm=llm,
      name="Chatbot",
      description="An intelligent AI chatbot that can generate human-like responses to text input and can be used to provide information, answer questions, and engage in conversation.",
      input_schema=ChatbotInput,
      output_schema=ChatbotOutput
    )
  def __call__(self, text):
    chatbot_input = self.get_tool_input(text)
    text = chatbot_input.text
    response = self.llm.create_chat_completion(
      messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {
          "role": "user",
          "content": text
        }
      ],
      max_tokens=1000,
    )
    response = response['choices'][0]['message']['content']
    return ChatbotOutput(response=response)

  def get_tool_input(self, text_input):
    return ChatbotInput(text=text_input)

In [11]:
# Test the chatbot
chatbot = Chatbot()
response = chatbot("Hello, how are you?")
print(response)

response="Hello! I'm just a computer program, so I don't have feelings or consciousness. But I'm here and ready to help you with any questions or conversations you'd like to have. How can I assist you today?"


In [12]:
class CalculatorInput(BaseModel):
  operation: str
  a: float
  b: float

class CalculatorOutput(BaseModel):
  result: Optional[float]

class Calculator(AgentTool):
  def __init__(self):
    super().__init__(
      llm=llm,
      name="Calculator",
      description="A simple calculator tool for arithmetic operations.",
      input_schema=CalculatorInput,
      output_schema=CalculatorOutput
    )
    
  def __call__(self, input_text):
    calculator_input = self.get_tool_input(input_text)
    operation = calculator_input.operation
    a = calculator_input.a
    b = calculator_input.b
    result = None
    if operation == "add":
      result = self.add(a, b)
    elif operation == "subtract":
      result = self.subtract(a, b)
    elif operation == "multiply":
      result = self.multiply(a, b)
    elif operation == "divide":
      result = self.divide(a, b)
    else:
      result = None

    return CalculatorOutput(result=result)

  def get_tool_input(self, input_text):
    response = llm.create_chat_completion(
      messages = [
        {"role": "system", "content": """You are a helpful assistant who excels at extracting the operation and numbers for an arithmetic operation from the user input. 
         Think step-by-step about what operation is being requested and what numbers are being used.
         Provide your step-by-step reasoning and response in the following XML format: <think>{step-by-step reasoning}</think> <response><operation>{'add', 'subtract', 'multiply' or 'divide'}</operation><a>{operand a}</a><b>{operand b}</b></response>"""},
        {
          "role": "user",
          "content": f"Please extract the operation and numbers from the following text: <text>{input_text}</text>. <response>"
        }
      ]
    )
    response_text = response['choices'][0]['message']['content']
    operation = response_text.split("<operation>")[1].split("</operation>")[0]
    a = float(response_text.split("<a>")[1].split("</a>")[0])
    b = float(response_text.split("<b>")[1].split("</b>")[0])
    return self.input_schema(operation=operation, a=a, b=b)
    
  def add(self, a, b):
    return a + b

  def subtract(self, a, b):
    return a - b

  def multiply(self, a, b):
    return a * b

  def divide(self, a, b):
    return a / b

In [13]:
# Test calculator tool
calculator = Calculator()
calculator("What is 5 plus 3?")

CalculatorOutput(result=8.0)

In [55]:
import re

class Agent:
  def __init__(self, llm, tools: list):
    self.llm = llm
    self.tools = tools
    self.memory = {}

  def __call__(self, input: str):
    while True:
      # select tool
      tool = self.get_tool(input, self.tools, self.memory)
      if tool is None:
        return "I am unable to help you."
      else:
        # use tool
        response = tool(input)
        print("\n\nTool used: ", tool.name)
        print("Tool input: ", input)
        print("Tool response: ", response)
        # store input and response in memory
        if self.memory.get(tool.name) is None:
          self.memory[tool.name] = []
        self.memory[tool.name].append({"input": input, "response": response})
        # evaluate response
        if self.evaluate_response(input, self.memory):
          # generate final response if there is sufficient information
          print("\n\nGenerating final response...")
          return self.generate_final_response(input, self.memory)


  def get_tool(self, input: str, tools: list, memory: dict):
    # create json string with tools with their names and descriptions
    tools_dict = [
      {"name": tool.name, "description": tool.description}
      for tool in tools
    ]

    response = self.llm.create_chat_completion(
      messages = [
        {"role": "system", "content": """You are an intelligent AI agent that can make decisions and use tools to solve problems. 
        Given the following problem and existing memory from past interactions, please select the most appropriate tool to solve it. 
        Think step-by-step before making a decision.
        Provide your step-by-step reasoning and response in the following XML format: <think>{step-by-step reasoning}</think> <tool>{tool name}</tool>.
        The tool name provided should be one of the tools given in exact."""},
        {
          "role": "user",
          "content": f"Given the following tools: <tools>{tools}</tools>, existing memory: <memory>{memory}</memory>, and the following problem: <problem>{input}</problem>, which tool would you use to solve the problem?"
        }
      ]
    )
    response = response['choices'][0]['message']['content']
    print("\n\nChoose tool: ", response)

    # extract tool name from response
    tool_name = re.search(r'<tool>(.*)</tool>', response).group(1)

    # get tool object from tool name
    for tool in tools:
      if tool.name == tool_name:
        return tool
      
    return None
  
  def evaluate_response(self, input, memory):
    response = self.llm.create_chat_completion(
      messages = [
        {"role": "system", "content": """You are an intelligent AI agent that can evaluate whether there is sufficient information to provide a final response based on the input query and the memory of past interactions. 
         Think step-by-step before providing your response. 
         Provide your step-by-step reasoning and response, a yes or no only, in the following XML format: <think>{step-by-step reasoning}</think> <response>{yes or no}</response>."""},
        {
          "role": "user",
          "content": f"Given the following input: <input>{input}</input>, and the memory of past interactions: <memory>{memory}</memory>, is there sufficient information to provide a final response?"
        }
      ]
    )
    response = response['choices'][0]['message']['content']
    print("\n\nEvaluate response: ", response)
    return re.search(r'<response>(.*)</response>', response).group(1).lower() == "yes"
  
  def generate_final_response(self, input, memory):
    response = self.llm.create_chat_completion(
      messages = [
        {"role": "system", "content": """You are an intelligent AI agent that can generate a final response based on the input query and the memory of past interactions. 
         Think step-by-step before providing your response. 
         Provide your step-by-step reasoning and response in the following XML format: <think>{step-by-step reasoning}</think> <response>{final response}</response>."""},
        {
          "role": "user",
          "content": f"Given the following input: <input>{input}</input>, and the memory of past interactions: <memory>{memory}</memory>, think step-by-step and provide a final response: "
        }
      ]
    )
    response = response['choices'][0]['message']['content']

    think = re.search(r'<think>(.*)</think>', response).group(1)
    response = re.search(r'<response>(.*)</response>', response).group(1)

    print("Step-by-step reasoning: ", think)
    return response

In [56]:
tools = [chatbot, calculator]
print(tools[0].name)
print(tools[0].description)

Chatbot
An intelligent AI chatbot that can generate human-like responses to text input and can be used to provide information, answer questions, and engage in conversation.


In [57]:
# Test agent
agent = Agent(llm=llm, tools=tools)

print("\n\nWhat is 3x3?: ", agent("What is 3x3?"))



Choose tool:  <think>To solve the problem "What is 3x3?", I need to determine which tool can perform multiplication. The existing memory does not provide any relevant information, so I will focus on the available tools. The Chatbot does not have the capability to perform calculations, so it is not suitable for this task. The Calculator, on the other hand, is designed to perform arithmetic operations, including multiplication. Therefore, the Calculator is the appropriate tool to use for this problem.</think>
<tool>Calculator</tool>


Tool used:  Calculator
Tool input:  What is 3x3?
Tool response:  result=9.0


Evaluate response:  <think>The input query is "What is 3x3?" and the memory contains a past interaction where the same query was used and the response was 9.0. This indicates that the system has the necessary information to respond to the query.</think>
<response>yes</response>


Generating final response...
Step-by-step reasoning:  First, I will analyze the input query "What is

In [58]:
print("\n\nAre whales fishes?: ", agent("Are whales fishes?"))



Choose tool:  <think>To solve the problem "Are whales fishes?", we need to consider the nature of the question. This is a factual question about biology, not a mathematical or conversational one. The existing memory contains a calculation, which is unrelated to the question at hand. Therefore, neither the Chatbot nor the Calculator is suitable for answering this biological question. However, since we must choose from the given tools, we should select the most appropriate one based on the type of question. In this case, the most appropriate tool would be the Chatbot, as it can provide a conversational response to a factual question, even though it may not have the correct information stored in its memory.</think>
<tool>Chatbot</tool>


Tool used:  Chatbot
Tool input:  Are whales fishes?
Tool response:  response='No, whales are not fish. Whales are mammals, specifically marine mammals. They breathe air, give birth to live young, and nurse their offspring with milk. Fish, on the other h