In [None]:
# Create docker container for agent environment
import docker
import os 
import dotenv

dotenv.load_dotenv()

# Create a Docker client from the environment
client = docker.from_env()

# Get GH token for authorization
gh_token = os.getenv("GH_TOKEN")
if not gh_token:
    raise Exception("GH_TOKEN is not set")

work_dir = "/git-agent"

# Pull the image (Ubuntu and Git pre-installed)
# Use `ubuntu` base and install git during container creation
container = client.containers.run(
    image="ubuntu:latest",              # Base image
    command="sleep infinity",           # Keep container running
    detach=True,
    tty=True,                           # Enables command execution like a shell
    stdin_open=True,
    working_dir=work_dir
)

# Install git inside the container
exit_code, output = container.exec_run("bash -c 'apt-get update && apt-get install -y git'")
if exit_code != 0:
    _, stderr = output
    container.stop()
    container.remove()
    raise Exception(f"Failed to install git: {stderr.decode()}")

print(f"Successfully installed git on container: {container.id}")

# Configure git
# Write credentials
exit_code, output = container.exec_run("bash -c 'echo \"https://${GITHUB_TOKEN}@github.com\" > ~/.git-credentials'", environment={"GITHUB_TOKEN": gh_token})
if exit_code != 0:
    _, stderr = output
    container.stop()
    container.remove()
    raise Exception(f"Failed to write credentials: {stderr.decode()}")

# Store credentials
exit_code, output = container.exec_run("git config --global credential.helper store")
if exit_code != 0:
    _, stderr = output
    container.stop()
    container.remove()
    raise Exception(f"Failed to tell Git to use stored credentials: {stderr.decode()}")

print(f"Successfully configured git on container: {container.id}")


Successfully installed git on container: 7b14b3cab456fbec4f07b8b9fce617aa2f7c0bc6843eb3b7b0eaaa681ad696ce
Successfully configured git on container: 7b14b3cab456fbec4f07b8b9fce617aa2f7c0bc6843eb3b7b0eaaa681ad696ce


In [22]:
# Define tools
BASH_CMD = "bash -c"
GIT_CMD = "git"

def parse_command(cmd: str) -> bool:
    allowed_unix = ["ls", "pwd", "cat", "mkdir"]
    allowed_git = ["add", "commit", "push", "pull", "status"]
    allowed_commands = allowed_unix + allowed_git
    
    if cmd.split()[0] not in allowed_commands:
        return False
    
    return True

def execute_command(cmd: str) -> str:
    print(f"Executing command: {cmd} in work dir: {work_dir}")
    exit_code, output = container.exec_run(cmd, workdir=work_dir)
    if exit_code != 0:
        stderr = output
        return f"Error: {stderr.decode()}"
    return output.decode()



def execute_unix_command(cmd: str) -> str:
    """
    Executes a Unix shell command inside the Docker container using bash.

    Args:
        cmd (str): The Unix command to execute (e.g., 'ls', 'pwd', 'mkdir test').

    Returns:
        str: The output of the command, or an error message if the command fails.

    Examples:
        >>> execute_unix_command('ls')
        'file1\nfile2\n'
        >>> execute_unix_command('pwd')
        '/root\n'
    """
    return execute_command(f"{BASH_CMD} '{cmd}'")

def execute_git_command(cmd: str) -> str:
    """
    Executes a Git command inside the Docker container using bash and git.

    Args:
        cmd (str): The Git command to execute (e.g., 'status', 'add .', 'commit -m \"msg\"').

    Returns:
        str: The output of the command, or an error message if the command fails.

    Examples:
        >>> execute_git_command('status')
        'On branch master\\nnothing to commit, working tree clean\\n'
        >>> execute_git_command('add .')
        ''
    """
    return execute_command(f"{BASH_CMD} '{GIT_CMD} {cmd}'")

def call_tool(tool: str, cmd: str) -> str:
    if tool == "execute_unix_command":
        return execute_unix_command(cmd)
    elif tool == "execute_git_command":
        return execute_git_command(cmd)
    else:
        return f"Error: Invalid tool: {tool}"


In [None]:
# Test commands
print(execute_unix_command("mkdir -p sanity-check"))
print(execute_unix_command("touch sanity-check/sanity-check-file"))
print(execute_unix_command("ls sanity-check"))

Executing command: bash -c 'mkdir -p sanity-check-3' in work dir: /git-agent

Executing command: bash -c 'touch sanity-check-3/sanity-check-file' in work dir: /git-agent

Executing command: bash -c 'ls sanity-check-3' in work dir: /git-agent
sanity-check-file



In [35]:
# Define agent
from openai import OpenAI
from pydantic import BaseModel
import instructor


system_prompt = """
You are a git agent. You will be given tasks to complete git-based operations like creating a new local repository and pushing it to a remote repository.

You have access to two tools:
1. execute_unix_command(cmd: str, work_dir: str) -> str: This tool is for executing unix commands (e.g. ls, cd, mkdir, etc.)
2. execute_git_command(cmd: str, work_dir: str) -> str: This tool is for executing git commands (e.g. git init, git add, git commit, git push, etc.)

Beware of the following invariant: You cannot cd, as you are in a container environment and interactions are done via `container.exec_run(cmd, workdir=work_dir)`. 
Where `work_dir` will always be equal to `/git-agent`. This means `cd`'s are not persisted between `container.exec_run` calls.
You must respect the work_dir argument in the tool calls, meaning if you wanted to create a new directory, you would not pre-fix the directory with an absolute path.

You may call one tool call per turn, for up to 15 turns before giving you final answer.

In each turn you should respond in the following format:

<think>
[your thoughts here]
</think>
<tool>
JSON with the following fields:
- tool: The name of the tool to call
- args: The arguments to pass to the tool
</tool>

Use the tools to verify the completion of the task. E.g. to create a new directory you could use `ls` to verify the directory was created.

Only when you have completed the final goal, e.g. in the case of the task of creating a new directory, when the directory is created - say the task is finished in the answer tags below. 
This is the signal that the task is complete.

<answer>
[your final answer here]
</answer>
"""

class ToolCall(BaseModel):
    tool: str
    args: str

class AgentResponse(BaseModel):
    think: str
    tool_call: ToolCall
    answer: str


oai = OpenAI()
oai_instructor = instructor.from_openai(oai)

In [None]:
# Agent loop
repo_name = "test-repo"

messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": f"Create a new local repository called {repo_name}"}
]

final_answer = ""

turns = 0
while turns < 15:
    turns += 1
    retries = 0 
    while retries < 5:
        try:
            response = oai_instructor.chat.completions.create(
                model="gpt-4.1",
                messages=messages, # type: ignore
                response_model=AgentResponse
            )
            maybe_think = response.think
            maybe_tool_call = response.tool_call
            maybe_answer = response.answer
            if len(maybe_tool_call.tool) > 0 or len(maybe_answer) > 0:
                break 
        except Exception as e:
            print(f"Error: {e}")
            retries += 1
    print("=== Turn", turns, "===")
    if len(maybe_think) > 0: # type: ignore
        thinking = maybe_think.strip()
        # This is not ideal, and is hacky. Struggling to get the answer tags filled.
        if 'complete' in thinking.lower() or 'done' in thinking.lower() or 'finished' in thinking.lower():
            final_answer = thinking
            break
        print(f"Thinking: {thinking}")
    if len(maybe_tool_call.tool) > 0: # type: ignore
        tool_call = maybe_tool_call.tool
        tool_call_args = maybe_tool_call.args
        tool_result = call_tool(tool_call, tool_call_args)
        print(f"Tool call: {tool_call}")
        print(f"Args: {tool_call_args}")
        print(f"Tool result: {tool_result}")
        messages.append({"role": "user", "content": tool_result})
    elif len(maybe_answer) > 0: # type: ignore
        final_answer = maybe_answer
        break
    else:
        print("Error: No tool call or answer found")
        break

if len(final_answer) > 0:
    print(f"Final answer: {final_answer}")
    
# Verify the result by checking existence of .git file
res = execute_unix_command(f"ls -a {repo_name}")
if ".git" in res:
    print("Git repository created successfully")
else:
    print(f"Git repository not created: {res}")



=== Turn 1 ===
Thinking: To create a new local repository called 'test-repo', first create the directory, then run 'git init' inside it.
Executing command: bash -c 'mkdir test-repo' in work dir: /git-agent
Tool call: execute_unix_command
Args: mkdir test-repo
Tool result: 
=== Turn 2 ===
Thinking: To create a new local Git repository called test-repo, I need to first create a directory named 'test-repo' and then initialize a Git repository in that directory.
Executing command: bash -c 'mkdir test-repo' in work dir: /git-agent
Tool call: execute_unix_command
Args: mkdir test-repo
Tool result: Error: mkdir: cannot create directory 'test-repo': File exists

=== Turn 3 ===
Thinking: The directory 'test-repo' already exists, so I will initialize a git repository inside it if one is not already present. First, I will check if it already contains a git repo by listing its contents, then proceed accordingly.
Executing command: bash -c 'ls test-repo' in work dir: /git-agent
Tool call: execute_u