# MLX Server Function Calling Example

This is a detailed text version of the function calling example for MLX Server with OpenAI-compatible API.

## Setup

In [4]:
from openai import OpenAI

## Initialize the client

Connect to your local MLX server:

In [5]:
client = OpenAI(
    base_url = "http://localhost:8000/v1",
    api_key = "mlx-server-api-key"
)

## Function calling example

This example demonstrates how to use function calling with the MLX server:

In [10]:
# Define the user message
messages = [
    {
        "role": "user",
        "content": "What is the weather in Tokyo?"
    }
]

# Define the available tools/functions
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the weather in a given city",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {"type": "string", "description": "The city to get the weather for"}
                }
            }
        }
    }
]

# Make the API call
completion = client.chat.completions.create(
    model="mlx-server-model",
    messages=messages,
    tools=tools,
    tool_choice="auto",
    max_tokens = 512,
    extra_body = {
        "enable_thinking": True
    }
)

# Get the result
print(completion)

ChatCompletion(id='chatcmpl-1746341897231150', choices=[Choice(finish_reason='function_call', index=0, logprobs=None, message=ChatCompletionMessage(content='<think>\nOkay, the user is asking for the weather in Tokyo. Let me check the tools provided. There\'s a function called get_weather that takes a city parameter. So I need to call that function with the city set to Tokyo. I\'ll make sure the JSON is correctly formatted with the city name as a string. Let me double-check the parameters. The function requires "city" as a string, so the arguments should be {"city": "Tokyo"}. Alright, that\'s all I need for the tool call.\n</think>', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call-1746341897436269', function=Function(arguments='{"city": "Tokyo"}', name='get_weather'), type='function')]))], created=1746341897, model='mlx-server-model', object='chat.completion', service_tier=None, system_fingerprint=None,

## Streaming version

In [9]:
# Set stream=True in the API call
completion = client.chat.completions.create(
    model="mlx-server-model",
    messages=messages,
    tools=tools,
    tool_choice="auto",
    stream=True,
    extra_body = {
        "enable_thinking": False
    }
)

# Process the streaming response
for chunk in completion:
    print(chunk)

ChatCompletionChunk(id='chatcmpl-1746341853477872', choices=[Choice(delta=ChoiceDelta(content=None, function_call=ChoiceDeltaFunctionCall(arguments='', name='get_weather'), refusal=None, role='assistant', tool_calls=[ChoiceDeltaToolCall(index=0, id='call-1746341853501513', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]), finish_reason=None, index=0, logprobs=None)], created=1746341853, model='mlx-server-model', object='chat.completion.chunk', service_tier=None, system_fingerprint=None, usage=None)
ChatCompletionChunk(id='chatcmpl-1746341853345993', choices=[Choice(delta=ChoiceDelta(content=None, function_call=ChoiceDeltaFunctionCall(arguments='{"city": "Tokyo"}', name=None), refusal=None, role='assistant', tool_calls=[ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"city": "Tokyo"}', name=None), type=None)]), finish_reason=None, index=0, logprobs=None)], created=1746341853, model='mlx-server-model', objec