# Fine tune an LLM for Bible Assistant AI agent

In [21]:
%load_ext autoreload
%autoreload 2

In [22]:
import sys
from pathlib import Path
current_dir = Path().resolve()
sys.path.append(str(current_dir.parent))

import os
from pathlib import Path
import numpy as np

import subprocess

from datasets import load_dataset
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
import huggingface_hub
import torch
from peft import LoraConfig, get_peft_model, PeftModel
from transformers import TrainingArguments, Trainer
from transformers import DataCollatorForSeq2Seq
import ollama

import bibleAssistant.agent as bagent

In [11]:
# For debugging pytorch environment
x = torch.tensor([1,2,3])
print(x.numpy())   # should work now

[1 2 3]


In [12]:
import transformers, accelerate, torch
print("Torch:", torch.__version__)
print("Transformers:", transformers.__version__)
print("Accelerate:", accelerate.__version__)


Torch: 2.4.1+cpu
Transformers: 4.57.3
Accelerate: 1.12.0


In [13]:
huggingface_hub.login(os.environ["HUGGING_FACE_TOKEN"])

## Parameters

In [14]:
dev_folder = os.path.abspath("../data/dev")
print(f"What are the possible data files (in {dev_folder}):")
print(os.listdir(dev_folder))

print("-"*40)
print("What output model folders we already have:")
print(os.listdir("../models"))

print("-"*40)
print("Which models are already locally served in ollama:")
print([item.model for item in ollama.list().models])

What are the possible data files (in c:\Users\Yonatan\Documents\coding_projects\bible-text-analysis\data\dev):
['lookup_verse.3.test.jsonl', 'lookup_verse.3.train.jsonl', 'two_tools.1.test.jsonl', 'two_tools.1.train.jsonl']
----------------------------------------
What output model folders we already have:
['gemma3-lora-8', 'gemma3-lora-9']
----------------------------------------
Which models are already locally served in ollama:
['gemma3:1b-ft9-650', 'gemma3:1b-ft9-350', 'gemma3:1b-ft3-50', 'gemma3:1b-ft3-200', 'gemma3:1b-ft3', 'gemma3:1b-ft2', 'gemma3:1b-ft1', 'google/gemma-3-1b-it-ft1:latest', 'gemma3:4b', 'llama3.2:3b', 'llama3.2:1b', 'gemma3:1b', 'mistral:latest']


In [15]:
trainset_file = os.path.join(dev_folder, "two_tools.1.train.jsonl")
testset_file = os.path.join(dev_folder, "two_tools.1.test.jsonl")
print(f"Input data:")
print(f"Train set from {trainset_file}")
print(f"Test  set from {testset_file}")

# Huggingface, base and adaptation:
model_name = "google/gemma-3-1b-it" # Huggingface base model
output_dir = os.path.abspath("../models/gemma3-2tools1")

# For serving the model in ollama:
ollama_base_model_name = "gemma3:1b"
model_nickname = "2t1"

print(f"Model:")
print(f"  Start from {model_name}")
print(f"  Save adaptation checkpoints into {output_dir}")
print(f"  Serve in ollama, based on model {ollama_base_model_name} with version of {model_nickname} (and checkpoint)")

Input data:
Train set from c:\Users\Yonatan\Documents\coding_projects\bible-text-analysis\data\dev\two_tools.1.train.jsonl
Test  set from c:\Users\Yonatan\Documents\coding_projects\bible-text-analysis\data\dev\two_tools.1.test.jsonl
Model:
  Start from google/gemma-3-1b-it
  Save adaptation checkpoints into c:\Users\Yonatan\Documents\coding_projects\bible-text-analysis\models\gemma3-2tools1
  Serve in ollama, based on model gemma3:1b with version of 2t1 (and checkpoint)


## Helper functions

In [16]:
def do_generate(use_tokenizer, use_model, prompt, max_new_tokens=100, temperature=0.7):
    inputs = use_tokenizer(prompt, return_tensors="pt")
    input_len = inputs["input_ids"].shape[1]
    if temperature > 0:
        outputs = use_model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature)
    else:
        outputs = use_model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False)
    output = outputs[0][input_len:] # Remove the repeated input token sequence. Keep only the newly generated tokens
    outtext = use_tokenizer.decode(output, skip_special_tokens=True)
    return outtext

def do_generate_response(use_tokenizer, use_model, messages, max_new_tokens=100, temperature=0.7):
    prompt = use_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    return do_generate(use_tokenizer, use_model, prompt, max_new_tokens=max_new_tokens, temperature=temperature)


## Load train/test data

In [18]:
train_ds = load_dataset("json", data_files=trainset_file, split="train")
test_ds = load_dataset("json", data_files=testset_file, split="train")

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

## Base model

In [19]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForCausalLM.from_pretrained(model_name)

In [23]:
bagent.AgentUI().display_convo(train_ds[10]['messages'])

In [41]:
np.argmax([len(ex["messages"]) for ex in train_ds])

np.int64(179)

In [42]:
input_messages = train_ds[179]['messages'][:-1]
for msg in input_messages:
    print(msg)
print("-"*40)
resp = do_generate_response(tokenizer, base_model, input_messages)
print(resp)

{'role': 'system', 'content': 'You are a research assistant for biblical texts that always responds using a JSON object with fields "tool" and "arguments".\n\nTo respond normally to the user, use:\n{"tool": "respond_to_user", "arguments":{"text": "<text to show the user>"}}\n\nTo call a tool, you need to indicate which tool to use and what arguments to send to it - use the structure:\n{"tool": "<tool_name>", "arguments":{ ... }}\n\nAfter you call a tool, you will receive a tool-response message from role "user" containing a JSON object.\nThe tool-response object always includes fields "tool_name" and "status".\n\nIf the tool call succeeded, the tool response object will have the structure:\n{"tool_name": "<tool_name>", "status": "ok", "result": { ... }}\nDifferent tools have different structures of the returned data inside the dictionary "result".\nIf you know what to do next, go ahead (e.g., if the user already told you what to do with the results, or if you have a plan and you want t

## Prepare data for training

In [43]:
def tokenize_for_gemma_token_level(example):
    # Convert the list of messages into a single text sequence
    text = tokenizer.apply_chat_template(
        example["messages"],
        tokenize=False,
        add_generation_prompt=False
    )
    print(text)
    tokens = tokenizer(
        text,
        truncation=True,
        max_length=2048,
        padding=False,
        return_tensors=None
    )
    input_ids = tokens["input_ids"]
    labels = [-100] * len(input_ids)

    assistant_marker = tokenizer("<start_of_turn>model")["input_ids"][1:] # Assume the first is a <bos> token that will not appear in the middle of a chat
    endturn_marker = tokenizer("<end_of_turn>")["input_ids"][1:]
    print(f"assistant marker: {assistant_marker}")
    print(f"end-turn marker: {endturn_marker}")
    i = 0
    while i < len(input_ids):
        # Look for assistant start:
        if input_ids[i:i+len(assistant_marker)] == assistant_marker:
            # Move index to the first token *after* the start marker
            i += len(assistant_marker)
            # Copy the target tokens until reaching end of assistant message:
            while i < len(input_ids) and input_ids[i:i+len(endturn_marker)] != endturn_marker:
                labels[i] = input_ids[i]
                i += 1
            i += len(endturn_marker)
        else:
            i += 1

    tokens["labels"] = labels
    return tokens

def tokenize_for_gemma(example, max_length=20480): # I increased the max length. Let's see if training gemma can handle it
    """
    Tokenize a multi-turn chat example for Gemma using text-level markers.
    Only assistant spans (<start_of_turn>model ... <end_of_turn>) are labeled.
    Everything else is masked with -100.
    """

    # 1. Render the conversation using Gemma's chat template (text only)
    text = tokenizer.apply_chat_template(
        example["messages"],
        tokenize=False,
        add_generation_prompt=False
    )

    # 2. Find all assistant spans in the raw text
    assistant_spans = []
    start_tag = "<start_of_turn>model"
    end_tag = "<end_of_turn>"

    search_pos = 0
    while True:
        start_idx = text.find(start_tag, search_pos)
        if start_idx == -1:
            break

        # content begins *after* the start tag
        content_start = start_idx + len(start_tag)

        end_idx = text.find(end_tag, content_start)
        if end_idx == -1:
            # malformed example; ignore
            break

        assistant_spans.append((content_start, end_idx))
        search_pos = end_idx + len(end_tag)

    # 3. Tokenize the entire text with offsets
    encoded = tokenizer(
        text,
        return_offsets_mapping=True,
        truncation=True,
        max_length=max_length,
        padding=False,
    )

    input_ids = encoded["input_ids"]
    offsets = encoded["offset_mapping"]

    # 4. Initialize all labels as masked
    labels = [-100] * len(input_ids)

    # 5. For each assistant span, unmask tokens whose offsets fall inside it
    for (span_start, span_end) in assistant_spans:
        for i, (tok_start, tok_end) in enumerate(offsets):
            # token overlaps the assistant span
            if tok_end > span_start and tok_start < span_end:
                labels[i] = input_ids[i]

    encoded["labels"] = labels
    encoded.pop("offset_mapping", None)

    return encoded

In [44]:
train_tok = train_ds.map(tokenize_for_gemma, remove_columns=train_ds.column_names)
test_tok = test_ds.map(tokenize_for_gemma, remove_columns=train_ds.column_names)

Map:   0%|          | 0/850 [00:00<?, ? examples/s]

In [45]:
# For debugging, use small data:
# train_tok = train_tok.select(range(20))
# test_tok = test_tok.select(range(10))

test_tok = test_tok.select(list(range(10)) + list(range(100,130)))

In [46]:
# #Select specific examples by metadata:
# train_inds = [i for i in range(len(train_ds)) if train_ds[i]['metadata']['scenario']=='lookup_verse_ok_then_another_version']
# print(len(train_inds))
# train_tok = train_tok.select(train_inds)

In [47]:
print(f"Train {len(train_tok)}")
print(f"Test {len(test_tok)}")

Train 850
Test 40


In [48]:
train_tok[0].keys()

dict_keys(['input_ids', 'attention_mask', 'labels'])

In [49]:
def validate_tokenized_dataset(tok_ds):
    for i, tok_ex in enumerate(tok_ds):
        if np.isnan(tok_ex["input_ids"]).any():
            print("NaN in input_ids at batch", i)
        if np.isnan(tok_ex["labels"]).any():
            print("NaN in labels at batch", i)
        if (np.array(tok_ex["labels"]) >= tokenizer.vocab_size).any():
            print("Out-of-range label at batch", i)
        if len(tok_ex["labels"]) <= 0:
            print("Empty labels at batch", i)
        if i > 100:
            break
    
    max_len = max(len(ex["input_ids"]) for ex in tok_ds)
    max_len_label = max(len(ex["labels"]) for ex in tok_ds)
    print(f"Maximum example length is input: {max_len} and label: {max_len_label}")


validate_tokenized_dataset(train_tok)
validate_tokenized_dataset(test_tok)

Maximum example length is input: 5544 and label: 5544
Maximum example length is input: 3947 and label: 3947


In [50]:
for i in range(len(train_ds)):
    if 'another' in train_ds[i]['metadata']['scenario']:
        print(i)
        break

3


In [52]:
## Debugging code:
import re

def reverse_tokenized_data(tok_example):
    labels = [107 if id<0 else id for id in tok_example['labels']]
    text = tokenizer.decode(labels)
    text = re.sub("\n+","\n", text)

    whole_text = tokenizer.decode(tok_example['input_ids'])
    return text, whole_text
(label_text, whole_text) = reverse_tokenized_data(train_tok[81])
print(whole_text)
print("="*40)
print(label_text)

<bos><bos><start_of_turn>user
You are a research assistant for biblical texts that always responds using a JSON object with fields "tool" and "arguments".

To respond normally to the user, use:
{"tool": "respond_to_user", "arguments":{"text": "<text to show the user>"}}

To call a tool, you need to indicate which tool to use and what arguments to send to it - use the structure:
{"tool": "<tool_name>", "arguments":{ ... }}

After you call a tool, you will receive a tool-response message from role "user" containing a JSON object.
The tool-response object always includes fields "tool_name" and "status".

If the tool call succeeded, the tool response object will have the structure:
{"tool_name": "<tool_name>", "status": "ok", "result": { ... }}
Different tools have different structures of the returned data inside the dictionary "result".
If you know what to do next, go ahead (e.g., if the user already told you what to do with the results, or if you have a plan and you want to use informati

## Train with LoRA

In [54]:
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
print(output_dir)
os.listdir(output_dir)

c:\Users\Yonatan\Documents\coding_projects\bible-text-analysis\models\gemma3-2tools1


[]

In [55]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
)

model = get_peft_model(base_model, lora_config)
model

PeftModel(
  (base_model): LoraModel(
    (model): Gemma3ForCausalLM(
      (model): Gemma3TextModel(
        (embed_tokens): Gemma3TextScaledWordEmbedding(262144, 1152, padding_idx=0)
        (layers): ModuleList(
          (0-25): 26 x Gemma3DecoderLayer(
            (self_attn): Gemma3Attention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=1152, out_features=1024, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1152, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=1024, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
   

In [None]:
args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
#    gradient_accumulation_steps=1,### Debugging
    num_train_epochs=2,
    learning_rate=5e-5,
    logging_steps=50,
#    logging_steps=1, ## Debugging
    eval_strategy="steps",
    eval_steps=50,
#    eval_steps=1,
    save_strategy="steps",
    save_steps=50,
    save_only_model=True,
    save_safetensors=False,
    remove_unused_columns=False,
)

use_train_set = train_tok
use_eval_set = test_tok

collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=use_train_set,
    eval_dataset=use_eval_set,
    data_collator=collator
)

trainer.train()

Step,Training Loss,Validation Loss
50,0.3971,0.246724
100,0.1947,0.159505
150,0.1406,0.129712
200,0.1252,0.107252
250,0.0967,0.093277
300,0.0957,0.084157
350,0.0801,0.078716


HTTP Error 502 thrown while requesting HEAD https://huggingface.co/google/gemma-3-1b-it/resolve/main/config.json
Retrying in 1s [Retry 1/5].
HTTP Error 502 thrown while requesting HEAD https://huggingface.co/google/gemma-3-1b-it/resolve/main/config.json
Retrying in 2s [Retry 2/5].
'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 2649f288-a161-4b75-9d6b-927197893428)')' thrown while requesting HEAD https://huggingface.co/google/gemma-3-1b-it/resolve/main/config.json
Retrying in 4s [Retry 3/5].
HTTP Error 502 thrown while requesting HEAD https://huggingface.co/google/gemma-3-1b-it/resolve/main/config.json
Retrying in 8s [Retry 4/5].
HTTP Error 502 thrown while requesting HEAD https://huggingface.co/google/gemma-3-1b-it/resolve/main/config.json
Retrying in 8s [Retry 5/5].
'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 1b7ba479-99a3-4def-

In [24]:
input_messages = test_ds[11]['messages'][:4]
for msg in input_messages:
    print(msg)
print("-"*40)
resp = do_generate_response(tokenizer, model, input_messages)
print(resp)

{'role': 'system', 'content': 'You are a research assistant that always responds using a JSON object with fields "tool" and "arguments".\n\nTo respond normally to the user, use:\n{"tool": "respond_to_user", "arguments":{"text": "<text to show the user>"}}\n\nTo call a tool, use:\n{"tool": "<tool_name>", "arguments":{ ... }}\n\nAfter you call a tool, you will receive a message with role "user" containing a JSON object.\nThe tool message always includes "tool_name" and "status".\n\nIf "status" is "ok":\n- The message will include a "result" object.\n- Read "result.text".\n- Respond using "respond_to_user" and copy "result.text" exactly as-is.\n\nIf "status" is "error":\n- The message will include an "error_message".\n- If the error message is clear enough (e.g., if the user spelled a book name wrong and it is clear which book the user intended), you can call the tool again with the corrected arguments.\n- Otherwise, respond using "respond_to_user" and copy "error_message" exactly as-is (

## Merge, save, and register with ollama

In [31]:
def merge_lora_checkpoint(base_model_name, training_dir, checkpoint):
    adapter_dir = f"{training_dir}/checkpoint-{checkpoint}"
    merged_dir = f"{training_dir}/merged-checkpoint-{checkpoint}"

    # Keep in mind you need to get a fresh base every time before attaching to the adaptation (otherwise you may mutate the same model with different adaptations)
    base_tmp = AutoModelForCausalLM.from_pretrained(base_model_name)
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)

    adapted = PeftModel.from_pretrained(base_tmp, adapter_dir)
    merged = adapted.merge_and_unload()
    merged.save_pretrained(merged_dir)
    tokenizer.save_pretrained(merged_dir)
    return merged_dir

def write_gemma_modelfile(merged_dir, ollama_base_model_name):
    """
    Create a correct Modelfile for a fine‑tuned Gemma model by copying the
    base model's Modelfile and replacing only the FROM line.

    Parameters
    ----------
    merged_dir : str or Path
        Path to the folder containing the merged model files.
    ollama_base_model_name : str
        Name of the base model as registered in Ollama (e.g., "gemma3:1b").

    Returns
    -------
    Path
        Path to the newly written Modelfile.
    """

    merged_dir = Path(merged_dir)
    modelfile_path = merged_dir / "Modelfile"

    # 1. Get the base model's Modelfile text
    result = subprocess.run(
        ["ollama", "show", "--modelfile", ollama_base_model_name],
        capture_output=True,
        text=True,
        check=True
    )
    base_modelfile = result.stdout

    # 2. Replace the FROM line with a local directory reference
    new_from = f"FROM {merged_dir.as_posix()}"
    modelfile_lines = base_modelfile.splitlines()

    for i, line in enumerate(modelfile_lines):
        if line.startswith("FROM "):
            modelfile_lines[i] = new_from
            break

    updated_modelfile = "\n".join(modelfile_lines)

    # 3. Write the updated Modelfile into merged_dir
    with open(modelfile_path, "w", encoding="utf-8") as f:
        f.write(updated_modelfile)

    return modelfile_path

def build_ollama_model_cli(model_name: str, merged_dir: str):
    merged_dir = Path(merged_dir)
    modelfile = merged_dir / "Modelfile"

    subprocess.run(
        ["ollama", "create", model_name, "-f", str(modelfile)],
        check=True
    )

def merge_save_ollama(base_model_name, training_dir, checkpoint, ollama_base_model_name, model_nickname):
    merged_dir = merge_lora_checkpoint(base_model_name, training_dir, checkpoint)
    print(f"Merged model folder: {merged_dir}")
    print(os.listdir(merged_dir))

    modelfile_path = write_gemma_modelfile(merged_dir, ollama_base_model_name)
    print(f"==> Wrote Modelfile: {modelfile_path}")

    new_model_name = f"{ollama_base_model_name}-{model_nickname}-{checkpoint}"
    build_ollama_model_cli(new_model_name, merged_dir)
    print(f"Registered {new_model_name} in ollama. Now ollama has:")
    print(ollama.list())
    return (merged_dir, new_model_name)

In [None]:
(merged_dir, new_model_name) = merge_save_ollama(model_name, output_dir, 350, ollama_base_model_name, model_nickname)

Merged model folder: c:\Users\Yonatan\Documents\coding_projects\bible-text-analysis\models\gemma3-lora-9/merged-checkpoint-350
['added_tokens.json', 'chat_template.jinja', 'config.json', 'generation_config.json', 'model.safetensors', 'special_tokens_map.json', 'tokenizer.json', 'tokenizer.model', 'tokenizer_config.json']
==> Wrote Modelfile: c:\Users\Yonatan\Documents\coding_projects\bible-text-analysis\models\gemma3-lora-9\merged-checkpoint-350\Modelfile
Registered gemma3:1b-ft9-350 in ollama. Now ollama has:
models=[Model(model='gemma3:1b-ft9-350', modified_at=datetime.datetime(2026, 1, 28, 9, 16, 5, 457480, tzinfo=TzInfo(-18000)), digest='8d25ff0f607b75234689b0a8a1d8104ab6895a444845c699068b889785fdf89c', size=2015833891, details=ModelDetails(parent_model='', format='gguf', family='gemma3', families=['gemma3'], parameter_size='999.89M', quantization_level='F16')), Model(model='gemma3:1b-ft3-50', modified_at=datetime.datetime(2026, 1, 23, 13, 54, 9, 433956, tzinfo=TzInfo(-18000)), dig

In [34]:
(merged_dir, new_model_name) = merge_save_ollama(model_name, output_dir, 650, ollama_base_model_name, model_nickname)

Merged model folder: c:\Users\Yonatan\Documents\coding_projects\bible-text-analysis\models\gemma3-lora-9/merged-checkpoint-650
['added_tokens.json', 'chat_template.jinja', 'config.json', 'generation_config.json', 'model.safetensors', 'special_tokens_map.json', 'tokenizer.json', 'tokenizer.model', 'tokenizer_config.json']
==> Wrote Modelfile: c:\Users\Yonatan\Documents\coding_projects\bible-text-analysis\models\gemma3-lora-9\merged-checkpoint-650\Modelfile
Registered gemma3:1b-ft9-650 in ollama. Now ollama has:
models=[Model(model='gemma3:1b-ft9-650', modified_at=datetime.datetime(2026, 1, 28, 9, 17, 20, 621636, tzinfo=TzInfo(-18000)), digest='c5a1be2c32546bb87caedb2bf083dc2cf574f767233460d3cb0dd6d4f6e9e866', size=2015833891, details=ModelDetails(parent_model='', format='gguf', family='gemma3', families=['gemma3'], parameter_size='999.89M', quantization_level='F16')), Model(model='gemma3:1b-ft9-350', modified_at=datetime.datetime(2026, 1, 28, 9, 16, 5, 457480, tzinfo=TzInfo(-18000)), di

## Test the merged model using huggingface (not ollama)

In [35]:
loaded_hf_model = AutoModelForCausalLM.from_pretrained(merged_dir)

In [36]:
input_messages = test_ds[0]['messages'][:4]
for msg in input_messages:
    print(msg)
print("-"*40)
resp = do_generate_response(tokenizer, loaded_hf_model, input_messages)
print(resp)

{'role': 'system', 'content': 'You are a research assistant that always responds using a JSON object with fields "tool" and "arguments".\n\nTo respond normally to the user, use:\n{"tool": "respond_to_user", "arguments":{"text": "<text to show the user>"}}\n\nTo call a tool, use:\n{"tool": "<tool_name>", "arguments":{ ... }}\n\nAfter you call a tool, you will receive a message with role "user" containing a JSON object.\nThe tool message always includes "tool_name" and "status".\n\nIf "status" is "ok":\n- The message will include a "result" object.\n- Read "result.text".\n- Respond using "respond_to_user" and copy "result.text" exactly as-is.\n\nIf "status" is "error":\n- The message will include an "error_message".\n- If the error message is clear enough (e.g., if the user spelled a book name wrong and it is clear which book the user intended), you can call the tool again with the corrected arguments.\n- Otherwise, respond using "respond_to_user" and copy "error_message" exactly as-is (

In [37]:
ollama.list().models

[Model(model='gemma3:1b-ft9-650', modified_at=datetime.datetime(2026, 1, 28, 9, 17, 20, 621636, tzinfo=TzInfo(-18000)), digest='c5a1be2c32546bb87caedb2bf083dc2cf574f767233460d3cb0dd6d4f6e9e866', size=2015833891, details=ModelDetails(parent_model='', format='gguf', family='gemma3', families=['gemma3'], parameter_size='999.89M', quantization_level='F16')),
 Model(model='gemma3:1b-ft9-350', modified_at=datetime.datetime(2026, 1, 28, 9, 16, 5, 457480, tzinfo=TzInfo(-18000)), digest='8d25ff0f607b75234689b0a8a1d8104ab6895a444845c699068b889785fdf89c', size=2015833891, details=ModelDetails(parent_model='', format='gguf', family='gemma3', families=['gemma3'], parameter_size='999.89M', quantization_level='F16')),
 Model(model='gemma3:1b-ft3-50', modified_at=datetime.datetime(2026, 1, 23, 13, 54, 9, 433956, tzinfo=TzInfo(-18000)), digest='9daf2271960acc2503ed4f895bbefdf85ea4b0d77e18348fedf6be476d8bfbab', size=2015833891, details=ModelDetails(parent_model='', format='gguf', family='gemma3', famili