# Introduction

To interact with the Kubernetes self hosted LLM model we can use the python `OpenAI` module.

Let's start by installing the required modules:

In [None]:
!python3 -m pip install openai

Please restart the kernel. (`Kernel` -> `Restart Kernel`)

You may customize the `base_url`, model values based on the deployment.

The `llm_api_key` is required only for the models deployed using vLLM.

In [None]:
from getpass import getpass

# Update the BaseURL of the model (we are using the service name)
# $ kubectl get service
base_url = "http://llm/v1"

# When using NIM, the llm_api_key is not used, you can provide a dummy value.
llm_api_key  = getpass('Input the model API KEY: ')
if not llm_api_key:
    llm_api_key = "dummy"

In [None]:
import sys
from openai import OpenAI

client = OpenAI(
  base_url = base_url,
  api_key = llm_api_key
)

#discover available models and use the first one
available_models = client.models.list()
if len(available_models.data):
    model = available_models.data[0].id
    print(f"Discovered model is: {model}")
else:
    print("No model discovered")
    sys.exit(1)

completion = client.chat.completions.create(
  model=model,
  messages=[{"role":"user","content":"What is the height of the Eiffel tower?"}],
  temperature=0,
  top_p=1,
  max_tokens=1024,
  stream=True
)

for chunk in completion:
  if chunk.choices[0].delta.content is not None:
    print(chunk.choices[0].delta.content, end="")

In [None]:
available_models = client.models.list()
print(available_models)