In [13]:
import ollama
from ollama import chat

stream = chat(
    model='deepseek-r1:8b',
    messages=[{'role': 'user', 'content': 'What animals are llamas related to?'}],
    stream=True,
)

for chunk in stream:
  print(chunk['message']['content'], end='', flush=True)

Llamas are part of the **Camelidae** family, which is a family of **even-toed ungulates**.

The animals most closely related to llamas are:

1.  **Alpacas:** Llamas and alpacas are both species of the genus *Lama*. They are native to South America and are very similar in appearance, but distinct species with different uses (llamas as pack animals, alpacas for fiber).
2.  **Vicuñas:** These are the wild ancestors of the domestic llama and alpaca. They are the closest living wild relative. Also a species of *Lama* (*Lama vicugna*).
3.  **Guanacos:** Another wild camelid native to South America, related to llamas and alpacas. They are smaller than llamas and belong to the genus *Llama* (*Llama guanicoe*).
4.  **Camels:** While geographically separated (llamas are New World camelids, camels are Old World), they are all members of the same family, *Camelidae*. This includes dromedary camels (*Camelus dromedarius*), Bactrian camels (*Camelus bactrianus*), and the now-extinct wild Bactrian ca

In [3]:
#ollama pull mxbai-embed-large
#https://ollama.com/blog/embedding-models
import ollama
r = ollama.embed(
  model='mxbai-embed-large',
  input='Llamas are members of the camelid family',
)
print(r)

model='mxbai-embed-large' created_at=None done=None done_reason=None total_duration=1205344000 load_duration=1187994400 prompt_eval_count=10 prompt_eval_duration=None eval_count=None eval_duration=None embeddings=[[0.0328727, 0.066107795, 0.03607437, 0.045058895, -0.007502289, 0.03650291, -0.024996953, 0.050031144, 0.036556356, -0.013849664, 0.035647586, 0.019333433, -0.035478283, 0.007762028, -0.029359553, 0.03942745, 0.012218423, -0.009392667, -0.008075294, -0.016050396, -0.046640176, 0.042093594, -0.0466896, 0.009737081, 0.068642244, 0.05631361, 0.06936915, 0.008090932, 0.056825504, 0.003536289, 0.0008181776, 0.009593568, 0.10507871, -0.065222636, -0.012071177, -0.0128118275, 0.017264301, 0.00033136673, -0.0108409645, -0.0849873, 0.05510164, -0.0018853414, 0.02819077, 0.00052796974, -0.032138083, -0.023198094, -0.0026137363, -0.030292384, 0.0052892333, -0.05766199, -0.021923209, -0.020098476, 0.0036403034, -0.014109983, -0.041632432, -0.036545567, 0.004224667, -0.016364953, -0.01967

In [4]:
import chromadb

documents = [
  "Llamas are members of the camelid family meaning they're pretty closely related to vicuñas and camels",
  "Llamas were first domesticated and used as pack animals 4,000 to 5,000 years ago in the Peruvian highlands",
  "Llamas can grow as much as 6 feet tall though the average llama between 5 feet 6 inches and 5 feet 9 inches tall",
  "Llamas weigh between 280 and 450 pounds and can carry 25 to 30 percent of their body weight",
  "Llamas are vegetarians and have very efficient digestive systems",
  "Llamas live to be about 20 years old, though some only live for 15 years and others live to be 30 years old",
]

client = chromadb.Client()
collection = client.create_collection(name="docs")

# store each document in a vector embedding database
for i, d in enumerate(documents):
  response = ollama.embed(model="mxbai-embed-large", input=d)
  embeddings = response["embeddings"]
  collection.add(
    ids=[str(i)],
    embeddings=embeddings,
    documents=[d]
  )

In [5]:
# an example input
input_text = "What animals are llamas related to?"
# generate an embedding for the input and retrieve the most relevant doc
response = ollama.embed(
  model="mxbai-embed-large",
  input=input_text
)
# Extract the vector correctly
embedding = response["embeddings"][0]  # <-- unpack the first (and only) vector
print(embedding)

results = collection.query(
  query_embeddings=[embedding],
  n_results=1
)
data = results['documents'][0][0]
print(data)

[0.01786377, 0.06339345, 0.0024996048, 0.02502161, 0.014855389, 0.0334003, -0.018956073, 0.02171464, 0.022911033, 0.0057279426, 0.011778289, 0.024637656, -0.036008663, -0.012204374, -0.038985595, 0.028628545, 0.04275913, 0.00411677, -0.0074988576, -0.0046399487, -0.0405683, 0.06581174, -0.040517457, 0.047029454, 0.077567965, 0.04061988, 0.08638656, 0.008873674, 0.058472518, 0.020817805, -0.0028468114, 0.029205505, 0.09079524, -0.057086937, -0.024544934, -0.016138304, 0.0126863085, -0.0020917833, -0.009116702, -0.08302273, 0.030908996, 0.0042737224, -0.004041226, 0.003311486, -0.019074434, 0.0013556668, -0.0049487595, -0.002646903, -0.0055790264, -0.06045811, 0.00038250262, -0.00054901646, -0.028457485, -0.006062815, -0.0050662034, -0.051830936, 0.041494623, -0.030039823, -0.009940314, 0.012273749, 0.03610637, -0.0011506302, 0.017290493, -0.021381255, -0.009727899, 0.018859142, -0.0413918, -0.03178829, 0.016009763, 0.02661536, 0.042465277, 0.05851053, -0.028771654, -0.008860419, -0.0221

In [12]:
# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model="deepseek-r1:8b",
  prompt=f"Using this data: {data}. Respond to this prompt: {input}"
)

print(output['response'])

Based on the data provided, llamas are members of the camelid family. Therefore, they are closely related to:

1.  **Vicuñas**
2.  **Alpacas**
3.  **Guanacos**
4.  **Bactrian Camels** (two-humped camel)
5.  **Dromedary Camels** (one-humped camel)

So, llamas are primarily related to other camelids, including wild South American camelids like vicuñas and guanacos, as well as the Old World camels.


In [None]:
"""

Prompt compression is a technique for reducing the length of prompts sent to large language models (LLMs) 
while retaining their essential information. It improves efficiency by decreasing response latency, lowering costs, and 
reducing the computational load. Methods for prompt compression include removing redundant tokens, extracting keywords, using abbreviations, and semantic rephrasing. 


"""