# [Litellm](https://github.com/BerriAI/litellm)
Proxy for accessing other models through the same OpenAI methods and responses.

In [2]:
import os
import litellm
from litellm import completion
litellm.set_verbose = True

In [3]:
os.environ["HUGGINGFACE_API_KEY"] = ""

In [4]:
prompt = "What is the weather in Gainesville?"

# General Usage

In [46]:
response = completion(
  model="huggingface/mistralai/Mixtral-8x7B-Instruct-v0.1", 
  messages=[{ "content": prompt,"role": "user"}], 
)

print("\n\n", response.choices[0].message.content)



[92mRequest to litellm:[0m
[92mlitellm.completion(model='huggingface/mistralai/Mixtral-8x7B-Instruct-v0.1', messages=[{'content': 'What is the weather in Gainesville?', 'role': 'user'}])[0m


self.optional_params: {}
kwargs[caching]: False; litellm.cache: None
self.optional_params: {}
mistralai/Mixtral-8x7B-Instruct-v0.1, text-generation-inference
response: [{'generated_text': " I don't have real-time data or location tracking capabilities, so I can't provide you with the current weather in Gainesville. However, I can tell you that Gainesville is located in the state of Florida, USA, and its climate is characterized as humid subtropical, with hot, humid summers and mild, dry winters. The city's weather can be quite variable, with occasional cold fronts bringing cooler temperatures in the winter and", 'details': {'finish_reason': 'length', 'generated_tokens': 100, 'seed': None, 'prefill': [], 'tokens': [{'id': 315, 'text': ' I', 'logprob': -0.006515503, 'special': False}, {'id': 9

In [47]:
import numpy as np
from litellm import embedding
import os
response = embedding(
    model='huggingface/microsoft/codebert-base',
    input=[prompt]
)
# show the first 10 values of the embedding
print("\n", response.data[0]["embedding"][:10])
print("\nshape", np.array(response.data[0]["embedding"]).shape)



[92mRequest to litellm:[0m
[92mlitellm.embedding(model='huggingface/microsoft/codebert-base', input=['What is the weather in Gainesville?'])[0m


self.optional_params: {}
kwargs[caching]: False; litellm.cache: None
self.optional_params: {}
Looking up model=microsoft/codebert-base in model_cost_map
 [-0.08081008493900299, 0.38493189215660095, 0.0964692234992981, -0.007553956937044859, 0.1546626091003418, -0.15101437270641327, -0.11523221433162689, 0.026286721229553223, 0.09007897228002548, -0.16575567424297333]

shape (768,)



In [52]:
litellm.set_verbose = False

response = completion(
  model="huggingface/mistralai/Mixtral-8x7B-Instruct-v0.1", 
  messages=[{ "content": prompt, "role": "user"}], 
  api_base="https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1",
  stream=True,
)

for chunk in response:
  print(chunk.choices[0].delta.content, end="")

litellm.set_verbose = True

I don't have real-time data or the ability to provide current weather updates. I recommend checking a reliable weather forecasting website or app for the most accurate and up-to-date information on the weather in Gainesville. Gainesville is located in the state of Florida, USA, and is known for its subtropical climate with hot, humid summers and mild, dry winters. However, the weather can vary throughout the year, so it's alwaysGoes into checking if chunk has hiddden created at param
Chunks have a created at hidden param
Chunks sorted
token_counter messages received: [{'content': 'What is the weather in Gainesville?', 'role': 'user'}]
Token Counter - using generic token counter, for model=mistralai/Mixtral-8x7B-Instruct-v0.1
LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
Token Counter - using generic token counter, for model=mistralai/Mixtral-8x7B-Instruct-v0.1
LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
Looking up model=mistralai/Mixtral-8x7B-Instr

### How was it instant?
### What is contained in the chunks of the stream (full generatio or tokens)?

By superimposing to the prompt the model is able to exhibit functionalities of other LLMs
https://github.com/BerriAI/litellm/blob/d69edac11ba4acdb03116cde253cc0d7caadcf68/litellm/llms/prompt_templates/factory.py#L531-L545

In [36]:
litellm.add_function_to_prompt = True 
response = completion(
  model="huggingface/mistralai/Mixtral-8x7B-Instruct-v0.1", 
  messages=[
    { "content": "Answer in spanish", "role": "system"},
    { "content": "Get the weather in SF?","role": "user"}
  ],
  functions = [
    {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA"
          },
          "unit": {
            "type": "string",
            "enum": ["celsius", "fahrenheit"]
          }
        },
        "required": ["location"]
      }
    }
  ],
)
content = response.choices[0].message.content
print("\n\ncontent:", content)
import json
content = response.choices[0].message.content


modified_content = content.replace('\_', '_')
json_content = json.loads(modified_content)
print("json_content:", json.dumps(json_content, indent=2))



[92mRequest to litellm:[0m
[92mlitellm.completion(model='huggingface/mistralai/Mixtral-8x7B-Instruct-v0.1', messages=[{'content': 'Answer in spanish', 'role': 'system'}, {'content': 'Get the weather in SF?', 'role': 'user'}], functions=[{'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}])[0m


self.optional_params: {}
kwargs[caching]: False; litellm.cache: None
self.optional_params: {}
mistralai/Mixtral-8x7B-Instruct-v0.1, text-generation-inference
response: [{'generated_text': ' {\n"function\\_name": "get\\_current\\_weather",\n"parameters": {\n"location": "San Francisco, CA",\n"unit": "fahrenheit"\n}\n}', 'details': {'finish_reason': 'eos_token', 'generated_tokens': 44, 'seed': None, 'prefill': [], 'tokens':

In [45]:
litellm.add_function_to_prompt = True 
response = completion(
  model="huggingface/tiiuae/falcon-7b-instruct", 
  messages=[
    { "content": "Answer in spanish", "role": "system"},
    { "content": "Get the weather in SF?","role": "user"}
  ],
  functions = [
    {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA"
          },
          "unit": {
            "type": "string",
            "enum": ["celsius", "fahrenheit"]
          }
        },
        "required": ["location"]
      }
    }
  ],
)
content = response.choices[0].message.content
print("\n\ncontent:", content)
import json
content = response.choices[0].message.content


modified_content = content.replace('\_', '_')
json_content = json.loads(modified_content)
print("json_content:", json.dumps(json_content, indent=2))



[92mRequest to litellm:[0m
[92mlitellm.completion(model='huggingface/tiiuae/falcon-7b-instruct', messages=[{'content': 'Answer in spanish', 'role': 'system'}, {'content': 'Get the weather in SF?', 'role': 'user'}], functions=[{'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}])[0m


self.optional_params: {}
kwargs[caching]: False; litellm.cache: None
self.optional_params: {}
tiiuae/falcon-7b-instruct, text-generation-inference
response: [{'generated_text': "<p>The following is an example of how to get the current weather in a given location using the <code>get_current_weather</code> function:</p>\n\n<pre><code>import requests\n\nlocation = 'San Francisco, CA'\nweather = requests.get('https://openweathermap.org

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [14]:
arr = np.array([[[1],[2],[3]],[[4],[5],[6]]])
print("\n", arr.shape)
# reshape it to flatten the innermost dimension so as to make a array of shape (2, 3)
arr = arr.reshape(arr.shape[0], -1)
print(arr)


 (2, 3, 1)
[[1 2 3]
 [4 5 6]]
