## Setup environment

In [None]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=ON" pip install llama-cpp-python

Download model. For this example, we are using Mistral 7B Instruct.

In [None]:
!wget https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q6_K.gguf

## Setup simulation

In [None]:
class GridSimulation:
    def __init__(self):
        # Initialize the grid as a 4x4 list of lists filled with 0s
        self.grid = [[0 for _ in range(4)] for _ in range(4)]
        # Place the user agent at row 1, column 1 (0-indexed)
        self.user_row = 0
        self.user_col = 0
        # Set the goal at row 4, column 4 (3-indexed)
        self.goal_row = 3
        self.goal_col = 3
        self.simulation_completed = False
        self.update_grid()

    def update_grid(self):
        """Updates the grid to reflect the current positions of the user agent and goal."""
        if self.user_row == self.goal_row and self.user_col == self.goal_col:
            self.simulation_completed = True
        else:
            for r in range(4):
                for c in range(4):
                    self.grid[r][c] = 0  # Reset grid
            self.grid[self.user_row][self.user_col] = 1  # Mark the user agent's position
            self.grid[self.goal_row][self.goal_col] = 2  # Mark the goal's position

    def move(self, direction):
        """Moves the user agent in the specified direction if the move is legal."""

        if self.simulation_completed:
            return "Simulation already completed."

        new_row, new_col = self.user_row, self.user_col

        if direction == "left":
            new_col -= 1
        elif direction == "right":
            new_col += 1
        elif direction == "top":
            new_row -= 1
        elif direction == "bottom":
            new_row += 1
        else:
            return "Invalid direction. Please choose left, right, top, or bottom."

        # Check if the new position is within the grid boundaries
        if 0 <= new_row < 4 and 0 <= new_col < 4:
            # Update the user's position
            self.user_row, self.user_col = new_row, new_col
        else:
            # Illegal move, do not update the user's position
            return "Illegal move: Cannot move outside the grid."

        # Place the user agent in the new position
        self.grid[self.user_row][self.user_col] = 1
        self.grid[self.goal_row][self.goal_col] = 2  # Ensure the goal's position remains marked with a 2

        self.update_grid()
        return self.get_state()

    def get_state(self):
        """Returns the current state of the grid."""
        if self.simulation_completed:
            return "Simulation completed."
        else:
            return '\n'.join([' '.join([str(cell) for cell in row]) for row in self.grid])
            # return f"User location: ({self.user_col}, {self.user_row}), Goal location: ({self.goal_col}, {self.goal_row})"


## Setup agent to run the simulation

In [None]:
from llama_cpp import Llama

llm = Llama(model_path="./mistral-7b-instruct-v0.2.Q6_K.gguf", n_ctx=8192, use_mlock=True, n_gpu_layers=33, embedding=False)

In [None]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_state",
            "description": "Retrieves the current state of the grid simulation. If the simulation is not completed, it returns a visual representation of the grid with the user agent and goal positions. If the simulation is completed, it returns a message indicating completion.",
            "parameters": {}
        }
    },
    {
        "type": "function",
        "function": {
            "name": "move",
            "description": "Moves the user agent in a specified direction on the grid. The method updates the user agent's position and checks for a win condition. If the move is illegal or outside the grid boundaries, it returns an error message. If the move leads to the user agent reaching the goal, it completes the simulation.",
            "parameters": {
                "type": "object",
                "title": "move",
                "properties": {
                    "direction": {
                        "title": "Direction",
                        "type": "string",
                        "enum": ["left", "right", "top", "bottom"],
                        "description": "The direction in which to move the user agent. Valid options are 'left', 'right', 'top', or 'bottom'."
                    }
                },
                "required": ["direction"]
            }
        }
    }
]

In [None]:
tool_format={
        "type": "json_object",
        "schema": {
  "type": "object",
  "properties": {
    "function": {
      "type": "string",
      "enum": ["get_state", "move"]
    },
    "parameters": {
      "type": "object",
      "properties": {
        "direction": {
          "type": "string",
          "enum": ["top", "bottom", "left", "right" ]
        }
      },
      "required": ["direction"],
    }
  },
  "required": ["function"],
  "if": {
    "properties": { "function": { "const": "move" } }
  },
  "then": {
    "required": ["parameters"]
  },
  "else": {
    "not": {
      "required": ["parameters"]
    }
  }
},
    }


In [None]:
simulation = GridSimulation()

In [None]:
llm.create_chat_completion(
      messages = [
        {
          "role": "system",
          "content": "The assistant has to solve the task given by the user. The assistant calls functions with appropriate input when necessary to solve the task. Assistant uses JSON to choose tools."
        },
        {
          "role": "user",
          "content": "Complete the simulation by taking 1 to 2 with the given tools. You can use get_state to get the state of the simulation and move to move inside the simulation."
        }
      ],
      response_format=tool_format
)

In [None]:
import json

def run_conversation(messages, simulation):

    runs = []
    ideas = []
    complete = False

    response = llm.create_chat_completion(
      messages = messages,
      response_format=tool_format
    )

    response_message = response.get("choices")[0].get("message")
    response_message_content = response_message.get("content")

    try:
      tool_call = json.loads(response_message_content)
    except:
      tool_call = None
      messages.append(response_message)
      messages.append({
          "role": "user",
          "content": "Wrong JSON format."
      })

    if tool_call:

      ideas.append(response_message)

      available_functions = {
          "get_state": simulation.get_state,
          "move": simulation.move
      }
      messages.append(response_message)

      function_name = tool_call.get("function")
      function_to_call = available_functions[function_name]

      if function_name == "move":
        function_args = tool_call.get("parameters")
        function_response = function_to_call(
            direction=function_args.get("direction"),
        )
      else:
        function_response = function_to_call()

      runs.append(function_response)

      if function_response == "Simulation completed." or function_response == "Simulation already completed.":
        complete = True

      messages.append(
          {
              "role": "user",
              "content": "Here is the result of the function call:\n" + function_response,
          }
      )

    return messages, runs, complete, ideas

## Run simulation until complete

In [None]:
messages = [
        {
          "role": "system",
          "content": "The assistant has to solve the task given by the user. The assistant calls functions with appropriate input when necessary to solve the task. Assistant uses JSON to choose tools."
        },
        {
          "role": "user",
          "content": "Complete the simulation by taking user to goal location with the given tools. You can use `get_state` to get the locations of the user and goal and `move` to move inside the simulation.\nThe grid is 4x4, so you should move bottom or right to reach goal."
        }
      ]

In [None]:
simulation = GridSimulation()

In [None]:
for i in range(0,30):
  print("## Thinking for step", i, "\n")
  messages, runs, complete, ideas = run_conversation(messages, simulation)
  # for idea in ideas:
  #   print("## Local LLM thought\n")
  #   print(idea.get("content"))
  for run in runs:
    print("## Simulation step\n")
    print(run)
  if complete:
    break

In [None]:
messages