# Introduction

this notebook demos example of using llm in a MPS backend (apple silicon GPU) using torch 2.x

Referece:
* torch 2.x MPS Backend: https://pytorch.org/docs/stable/notes/mps.html

In [1]:
import os
import torch
import applyllm as apl

print(apl.__version__)

0.0.6


In [2]:
# check that MPS is availabe (Metal Performance Shaders)
if not torch.backends.mps.is_available():
    print("MPS is not available")
else:
    print("MPS is available")
    mps_device = torch.device("mps")
    print(mps_device)



MPS is available
mps


## Define global variables

In [3]:
# backend = "ollama"
backend = "lmstudio"

if backend == "ollama":
    model="gorilla-openfunctions-v2-q4_K_M"
    endpoint="http://localhost:11434/v1"
elif backend == "lmstudio":
    model="TheBloke/gorilla-openfunctions-v1-GGUF"
    # model="gorilla-openfunctions-v1.Q8_0",
    endpoint="http://localhost:1234/v1"  

In [4]:
#sdk openai==0.28.1
import openai
import json

def get_gorilla_response(
    prompt="Call me an Uber ride type \"Plus\" in Berkeley at zipcode 94704 in 10 minutes", 
    model="gorilla-openfunctions-v2-q4_K_M",
    endpoint="http://localhost:11434/v1",
    functions=[]):
  openai.api_key = "EMPTY"
  openai.api_base = endpoint
  try:
    # extend with custom prompt
    prompt = f'<<question>> {prompt} <<function>> {json.dumps(functions)}'
    completion = openai.ChatCompletion.create(
      # model="adrienbrault/gorilla-openfunctions-v2:Q4_K_M",
      model=model, # model from the ollama list
      temperature=0.01,
      top_p=1,
      messages=[{"role": "user", "content": prompt}],
      functions=functions,
    )
    # return completion.choices[0].message.content
    return completion.choices[0]
    
  except Exception as e:
    print(e, model, prompt)

In [5]:
#sdk openai==1.17.0
# from openai import OpenAI

# def get_gorilla_response(prompt="Call me an Uber ride type \"Plus\" in Berkeley at zipcode 94704 in 10 minutes", model="gorilla-openfunctions-v2-q4_K_M", functions=[]):
  
#   client = OpenAI(
#     api_key="EMPTY",
#     base_url="http://localhost:11434/v1"
#   )
#   try:
#     chat_completion = client.chat.completion.create(
#       # model="adrienbrault/gorilla-openfunctions-v2:Q4_K_M",
#       model="gorilla-openfunctions-v2-q4_K_M", # model from the ollama list
#       temperature=0.0,
#       messages=[{"role": "user", "content": prompt}],
#       functions=functions,
#       max_tokens=80
#     )
#     return chat_completion.choices[0]
#   except Exception as e:
#     print(e, model, prompt)

In [6]:
query = "What's the weather like in the two cities of Boston and San Francisco?"
functions = [
    {
        "name": "get_current_weather",
        "description": "Get the current weather in a given location",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "The city and state, e.g. San Francisco, CA",
                },
                "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
            },
            "required": ["location"],
        },
    }
]

response = get_gorilla_response(query, functions=functions, model=model, endpoint=endpoint)
response

<OpenAIObject at 0x175c7cf50> JSON: {
  "index": 0,
  "message": {
    "role": "assistant",
    "content": "get_current_weather(location=\"Boston, MA\")"
  },
  "finish_reason": "stop"
}

In [7]:
print(response)

{
  "index": 0,
  "message": {
    "role": "assistant",
    "content": "get_current_weather(location=\"Boston, MA\")"
  },
  "finish_reason": "stop"
}
