In [None]:
# environment setup

from io import BytesIO
from pprint import pprint
import ollama
import PIL.Image
from IPython.display import Markdown, display

# llama3 models
LLAMA_32_VISION = 'llama3.2-vision:11b'
LLAMA_32_3B = 'llama3.2:3b'
LLAMA_31_8B = 'llama3.1:8b'

# convert a PIL Image to bytes for use with llama 3.2
def image_to_bytes(image: PIL.Image.Image) -> BytesIO:
    image_bytes = BytesIO()
    image.save(image_bytes, format='JPEG')
    return image_bytes.getvalue()

# print model response as formatted markdown
def print_response(response_string):
    display(Markdown(response_string))

# ollama client
OLLAMA_HOST = 'http://localhost:11434'
llm = ollama.Client(host=OLLAMA_HOST)


In [None]:
# example output from a prompt

response = llm.generate(model=LLAMA_31_8B,
    prompt="Tell me about Chicago, Illinois",
)

print_response(response['response'])


In [None]:
# example output based on image input

input_image = PIL.Image.open('./example.jpg')
display(input_image)

messages = [
    {
        'role': 'user',
        'content': 'tell me about this image',
        'images': [image_to_bytes(input_image)],
    }
]

response = llm.chat(model=LLAMA_32_VISION,
    messages=messages,
)

pprint(response)
print_response(response.message.content)


In [None]:
# example conversation

messages.append(response.message.model_dump())

messages.append({
    'role': 'user',
    'content': 'describe the clothing of the woman in the image'
})

response = llm.chat(model=LLAMA_32_VISION,
    messages=messages,
)

print_response(response.message.content)


In [None]:
# example embeddings

response = llm.embed(model=LLAMA_32_3B,
    input=['the quick brown fox jumped over the lazy dog', 'all work and no play makes jack a dull boy'],
)

print(len(response.embeddings))
pprint(response.embeddings[0])
