In [None]:
!pip install ollama

# Ollama Vision and Language API

Ollama is an LLM/LVLM model server that optimizes resources and allows to run multiple models on the same GPU.
This notebook provides you with a few use cases with a Llava model. You can provide the model with an instruction and an image, and ask it to generate the answer. Note that Llava supports both language-only requests and language-and-vision requests.

The complete documentation is available here:
 - https://github.com/ollama/ollama-python

In [None]:
from ollama import Client

client = Client(
  host='https://twiz.novasearch.org/ollama',
  headers={'x-some-header': 'some-value'}
)

model_multimodal = 'llava-phi3:latest'

### Example 1: Generative answers

In [None]:
response = client.generate(model=model_multimodal, prompt='Why is the sky blue?')

In [None]:
print(response.response)

### Example 2: Chat template

In [None]:
response = client.chat(model=model_multimodal, messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
])

In [None]:
print(response.message.content)

### Example 3: Image captioning

In [None]:
from PIL import Image
img = './images/dog.jpg'
image = Image.open(img)
image.show()

In [None]:
response = client.chat(model=model_multimodal, messages=[
  {
    'role': 'user',
    'content': 'Describe this image.',
    'images': [img]
  },
])

In [None]:
print(response.message.content)

### Example 4: Visual question answering

In [None]:
response = client.chat(model=model_multimodal, messages=[
  {
    'role': 'user',
    'content': 'Is the dog running or jumping?.',
    'images': ['./images/dog.jpg']
  },
])

In [None]:
print(response.message.content)