# Debug Gorilla OpenFunctions v2

https://github.com/ShishirPatil/gorilla/tree/main/openfunctions

In [10]:
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


In [21]:
from speechless.agents.openfunctionsv2.utils.python_parser import parse_python_function_call
# from speechless.agents.openfunctionsv2.utils.java_parser import parse_java_function_call
# from speechless.agents.openfunctionsv2.utils.js_parser import parse_javascript_function_call

FN_CALL_DELIMITER = "<<function>>"

def strip_function_calls(content: str) -> list[str]:
    """
    Split the content by the function call delimiter and remove empty strings
    """
    # return [element.strip() for element in content.split(FN_CALL_DELIMITER)[2:] if element.strip()]
    return [element.strip() for element in content.split(FN_CALL_DELIMITER)[1:] if element.strip()]

def parse_function_call(call: str) -> dict[str, any]:
    """
    This is temporary. The long term solution is to union all the 
    types of the parameters from the user's input function definition,
    and check which language is a proper super set of the union type.
    """
    try:
        return parse_python_function_call(call)
    except:
        return None
    # except Exception as e:
    #     # If Python parsing fails, try Java parsing
    #     try:
    #         java_result = parse_java_function_call(call)
    #         if not java_result:
    #             raise Exception("Java parsing failed")
    #         return java_result
    #     except Exception as e:
    #         # If Java parsing also fails, try JavaScript parsing
    #         try:   
    #             javascript_result = parse_javascript_function_call(call)
    #             if not javascript_result:
    #                 raise Exception("JavaScript parsing failed")
    #             return javascript_result
    #         except:
    #             return None

In [22]:

def get_prompt(user_query: str, functions: list = []) -> str:
    """
    Generates a conversation prompt based on the user's query and a list of functions.

    Parameters:
    - user_query (str): The user's query.
    - functions (list): A list of functions to include in the prompt.

    Returns:
    - str: The formatted conversation prompt.
    """
    system = "You are an AI programming assistant, utilizing the Gorilla LLM model, developed by Gorilla LLM, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer."
    if len(functions) == 0:
        return f"{system}\n### Instruction: <<question>> {user_query}\n### Response: "
    functions_string = json.dumps(functions)
    return f"{system}\n### Instruction: <<function>>{functions_string}\n<<question>>{user_query}\n### Response: "


def format_response(response: str):
    """
    Formats the response from the OpenFunctions model.

    Parameters:
    - response (str): The response generated by the LLM.

    Returns:
    - str: The formatted response.
    - dict: The function call(s) extracted from the response.

    """
    function_call_dicts = None
    try:
        response = strip_function_calls(response)
        print(f"{response=}")
        # Parallel function calls returned as a str, list[dict]
        if len(response) > 1: 
            function_call_dicts = []
            for function_call in response:
                function_call_dicts.append(parse_function_call(function_call))
            response = ", ".join(response)
        # Single function call returned as a str, dict
        else:
            function_call_dicts = parse_function_call(response[0])
            response = response[0]
    except Exception as e:
        # Just faithfully return the generated response str to the user
        pass
    return response, function_call_dicts

In [23]:

def test_demo():
    # # Device setup
    # device : str = "cuda:0" if torch.cuda.is_available() else "cpu"
    # torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

    # # Model and tokenizer setup
    # model_id : str = "gorilla-llm/gorilla-openfunctions-v2"
    # tokenizer = AutoTokenizer.from_pretrained(model_id)
    # model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True)

    # # Move model to device
    # model.to(device)

    # # Pipeline setup
    # pipe = pipeline(
    #     "text-generation",
    #     model=model,
    #     tokenizer=tokenizer,
    #     max_new_tokens=128,
    #     batch_size=16,
    #     torch_dtype=torch_dtype,
    #     device=device,
    # )

    from llama_cpp import Llama
    from llama_cpp.llama_tokenizer import LlamaHFTokenizer
    FUNCTION_CALLING_MODELS_DIR = "/opt/local/llm_models/GGUF/function-calling"
    # ---------- Gorilla OpenFunctions v2 ----------
    model_path = f"{FUNCTION_CALLING_MODELS_DIR}/gorilla-openfunctions-v2.Q4_K_M.gguf"
    # ---------- functionary-small-v2.4-GGUF ----------
    # FUNCTIONARY_MODELS_DIR=f"{FUNCTION_CALLING_MODELS_DIR}/meetkai/functionary-small-v2.4-GGUF"
    # model_path=f"{FUNCTIONARY_MODELS_DIR}/functionary-small-v2.4.Q8_0.gguf"
    # ---------- functionary-medium-v2.4-GGUF ----------
    # FUNCTIONARY_MODELS_DIR=f"{FUNCTION_CALLING_MODELS_DIR}/meetkai/functionary-medium-v2.4-GGUF"
    # model_path=f"{FUNCTIONARY_MODELS_DIR}/functionary-medium-v2.4.Q4_0.gguf"

    llm = Llama(
        model_path=model_path,
        # chat_format="functionary-v2",
        # tokenizer=LlamaHFTokenizer.from_pretrained(FUNCTIONARY_MODELS_DIR),
        n_gpu_layers=-1
    )

    # Example usage 1
    #  This should return 2 functions with the right argument
    query_1: str = "What's the weather like in the two cities of Boston and San Francisco?"
    functions_1 = [
        {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        }
    ]

    # Example usage 2
    #  This should return an error since the function cann't help with the prompt
    query_2: str = "What is the freezing point of water at a pressure of 10 kPa?"
    functions_2 = [{"name": "thermodynamics.calculate_boiling_point", "description": "Calculate the boiling point of a given substance at a specific pressure.", "parameters": {"type": "object", "properties": {"substance": {"type": "string", "description": "The substance for which to calculate the boiling point."}, "pressure": {"type": "number", "description": "The pressure at which to calculate the boiling point."}, "unit": {"type": "string", "description": "The unit of the pressure. Default is 'kPa'."}}, "required": ["substance", "pressure"]}}]

    # Generate prompt and obtain model output
    prompt_1 = get_prompt(query_1, functions=functions_1)

    # output_1 = pipe(prompt_1)
    output_1 = llm(prompt_1)
    print(f"{output_1=}")
    print(f"{output_1['choices'][0]['text']}")

    # fn_call_string, function_call_dict = format_response(output_1[0]['generated_text'])
    fn_call_string, function_call_dict = format_response(output_1['choices'][0]['text'])
    print("--------------------")
    print(f"Function call strings 1(s): {fn_call_string}")
    print("--------------------")
    print(f"OpenAI compatible `function_call`: {function_call_dict}")
    print("--------------------")

test_demo()

llama_model_loader: loaded meta data with 22 key-value pairs and 273 tensors from /opt/local/llm_models/GGUF/function-calling/gorilla-openfunctions-v2.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 30
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7: 

llama_model_loader: - kv  13:                      tokenizer.ggml.tokens arr[str,102400]  = ["!", "\"", "#", "$", "%", "&", "'", ...
llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,102400]  = [0.000000, 0.000000, 0.000000, 0.0000...
llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,102400]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
llama_model_loader: - kv  16:                      tokenizer.ggml.merges arr[str,99757]   = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "h e...
llama_model_loader: - kv  17:                tokenizer.ggml.bos_token_id u32              = 100000
llama_model_loader: - kv  18:                tokenizer.ggml.eos_token_id u32              = 100015
llama_model_loader: - kv  19:            tokenizer.ggml.padding_token_id u32              = 100001
llama_model_loader: - kv  20:                    tokenizer.chat_template str              = {% if not add_generation_prompt is de...
llama_model_loader: - kv  21:             

output_1={'id': 'cmpl-db812936-6cde-4e2b-80c1-5ead6ca5697b', 'object': 'text_completion', 'created': 1713494467, 'model': '/opt/local/llm_models/GGUF/function-calling/gorilla-openfunctions-v2.Q4_K_M.gguf', 'choices': [{'text': " <<function>>get_current_weather(location='San Francisco, CA')", 'index': 0, 'logprobs': None, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 181, 'completion_tokens': 16, 'total_tokens': 197}}
 <<function>>get_current_weather(location='San Francisco, CA')
response=["get_current_weather(location='San Francisco, CA')"]
--------------------
Function call strings 1(s): get_current_weather(location='San Francisco, CA')
--------------------
OpenAI compatible `function_call`: {'name': 'get_current_weather', 'arguments': {'location': 'San Francisco, CA'}}
--------------------
