In [31]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [56]:
import instructor
from pydantic import BaseModel, Field
from groq import Groq

from fastmcp.client import Client
from mcp.types import Tool


def transform_schema_to_parameters(schema: dict) -> dict:
    properties = {}
    
    if schema.get('properties'):
        for field_name, field_info in schema['properties'].items():
            properties[field_name] = {
                "type": field_info['type'],
                "description": field_info['title']
            }
            # Add default value if it exists
            if 'default' in field_info:
                properties[field_name]['default'] = field_info['default']
    
    return {
        "type": "object",
        "properties": properties,
        "required": schema.get('required')
    }
    
def transform_tool_definition(tool: Tool) -> dict:
    return {
        "type": "function",
        "function": {
            "name": tool.name,
            "description": tool.description,
            "parameters": transform_schema_to_parameters(tool.inputSchema)
        }
    }

In [57]:
mcp_client = Client("http://127.0.0.1:8000/mcp")
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))


In [58]:
async with mcp_client:
    tools = await mcp_client.list_tools()
    for tool in tools:
        print(tool)

name='process_video' description='Process a video file and prepare it for searching.' inputSchema={'properties': {'video_path': {'title': 'Video Path', 'type': 'string'}}, 'required': ['video_path'], 'type': 'object'} annotations=None
name='list_tables' description='List all processed videos in the database.' inputSchema={'properties': {}, 'type': 'object'} annotations=None
name='get_clip_by_speech_sim' description='Get a video clip based on a user query using the transcripts index.' inputSchema={'properties': {'video_name': {'title': 'Video Name', 'type': 'string'}, 'user_query': {'title': 'User Query', 'type': 'string'}, 'top_k': {'default': 3, 'title': 'Top K', 'type': 'integer'}}, 'required': ['video_name', 'user_query'], 'type': 'object'} annotations=None
name='get_clip_by_image_sim' description='Get a video clip based on a user query using the image index.' inputSchema={'$defs': {'Base64ToPILImageModel': {'properties': {'image': {'title': 'Image', 'type': 'string'}}, 'required': 

In [59]:
tool.name

'get_clip_by_caption_sim'

In [60]:
tool.description

'Get a video clip based on a user query using the caption index.'

In [61]:
print(tool.inputSchema)

{'properties': {'video_name': {'title': 'Video Name', 'type': 'string'}, 'user_query': {'title': 'User Query', 'type': 'string'}, 'top_k': {'default': 3, 'title': 'Top K', 'type': 'integer'}}, 'required': ['video_name', 'user_query'], 'type': 'object'}


In [62]:
transform_schema_to_parameters(tool.inputSchema)

{'type': 'object',
 'properties': {'video_name': {'type': 'string', 'description': 'Video Name'},
  'user_query': {'type': 'string', 'description': 'User Query'},
  'top_k': {'type': 'integer', 'description': 'Top K', 'default': 3}},
 'required': ['video_name', 'user_query']}

In [63]:
async with mcp_client:
    mcp_tools = await mcp_client.list_tools()

In [64]:
mcp_tools

[Tool(name='process_video', description='Process a video file and prepare it for searching.', inputSchema={'properties': {'video_path': {'title': 'Video Path', 'type': 'string'}}, 'required': ['video_path'], 'type': 'object'}, annotations=None),
 Tool(name='list_tables', description='List all processed videos in the database.', inputSchema={'properties': {}, 'type': 'object'}, annotations=None),
 Tool(name='get_clip_by_speech_sim', description='Get a video clip based on a user query using the transcripts index.', inputSchema={'properties': {'video_name': {'title': 'Video Name', 'type': 'string'}, 'user_query': {'title': 'User Query', 'type': 'string'}, 'top_k': {'default': 3, 'title': 'Top K', 'type': 'integer'}}, 'required': ['video_name', 'user_query'], 'type': 'object'}, annotations=None),
 Tool(name='get_clip_by_image_sim', description='Get a video clip based on a user query using the image index.', inputSchema={'$defs': {'Base64ToPILImageModel': {'properties': {'image': {'title': 

In [66]:
tools = [transform_tool_definition(mcp_tools[2])]

In [80]:

MODEL = 'llama-3.3-70b-versatile'
USER_PROMPT = ""

messages=[
    {
        "role": "system",
        "content": "You are a video processing assistant. You need to find videoclips using the tool `find_videoclip` or answer general questions about the video using the `get_video_information` tool"
    },
    {
        "role": "user",
        "content": "Hey! What's your name?",
    }
]

response = groq_client.chat.completions.create(
    model=MODEL,
    messages=messages,
    stream=False,
    tools=tools,
    tool_choice="auto",
    max_completion_tokens=4096
)

In [81]:
response_message = response.choices[0].message
tool_calls = response_message.tool_calls

In [83]:
response_message.content

"I don't have a personal name, but I'm here to help you with video processing tasks. How can I assist you today? Do you need help finding a specific video clip or would you like to know more about a particular video?"

In [82]:
tool_calls

In [76]:
MODEL = 'llama-3.3-70b-versatile'

# Define the tool schema
tool_schema = {
    "name": "get_weather_info",
    "description": "Get the weather information for any location.",
    "parameters": {
        "type": "object",
        "properties": {
            "location": {
                "type": "string",
                "description": "The location for which we want to get the weather information (e.g., New York)"
            }
        },
        "required": ["location"]
    }
}

# Define the Pydantic model for the tool call
class ToolCall(BaseModel):
    input_text: str = Field(description="The user's input text")
    tool_name: str = Field(description="The name of the tool to call")
    tool_parameters: str = Field(description="JSON string of tool parameters")

class ResponseModel(BaseModel):
    tool_calls: list[ToolCall]

# Patch Groq() with instructor
client = instructor.from_groq(Groq(), mode=instructor.Mode.JSON)

def run_conversation(user_prompt):
    # Prepare the messages
    messages = [
        {
            "role": "system",
            "content": f"You are an assistant that can use tools. You have access to the following tool: {tool_schema}"
        },
        {
            "role": "user",
            "content": user_prompt,
        }
    ]

    # Make the Groq API call
    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        response_model=ResponseModel,
        messages=messages,
        temperature=0.5,
        max_completion_tokens=1000,
    )

    return response.tool_calls

# Example usage
user_prompt = "What's the weather like in San Francisco?"
tool_calls = run_conversation(user_prompt)

for call in tool_calls:
    print(f"Input: {call.input_text}")
    print(f"Tool: {call.tool_name}")
    print(f"Parameters: {call.tool_parameters}")
    print()

Input: What's the weather like in San Francisco?
Tool: get_weather_info
Parameters: {"location": "San Francisco"}

