In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open('../data/keys/hf_key.txt') as f:
    hf_key = f.read()

In [3]:
tokenizer = AutoTokenizer.from_pretrained("google/gemma-1.1-2b-it")

In [None]:
llm = AutoModelForCausalLM.from_pretrained(
    "google/gemma-1.1-2b-it"
)

In [8]:
o = tokenizer.encode("<start_of_turn>model")
print(o)
tokenizer.decode(o)

[2, 106, 2516]


'<bos><start_of_turn>model'

In [14]:
o = tokenizer.encode("""<bos><start_of_turn>user
Write a hello world program<end_of_turn>
<start_of_turn>model""")
print(o)
tokenizer.decode(o)

[2, 2, 106, 1645, 108, 5559, 476, 25612, 2134, 2733, 107, 108, 106, 2516]


'<bos><bos><start_of_turn>user\nWrite a hello world program<end_of_turn>\n<start_of_turn>model'

In [13]:
response_template_with_context = "<start_of_turn>model"
response_template_ids = tokenizer.encode(response_template_with_context, add_special_tokens=False)
response_template_ids

[106, 2516]

In [3]:
tokenizer = AutoTokenizer.from_pretrained("../models")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(
    "/home/vpo/batchelor_thesis/models",
    device_map="auto",
    quantization_config=bnb_config,
)

chat = [
    {"role": "user", "content": "Write me a poem about Machine Learning as a valid JSON."}
]
token_inputs = tokenizer.apply_chat_template(chat, tokenize=True, return_tensors="pt", add_generation_prompt=True).to("cuda")

outputs = model.generate(input_ids=token_inputs, do_sample=True, max_new_tokens=500, temperature=.5)


print(tokenizer.decode(outputs[0]))

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

<bos><start_of_turn>user
Write me a poem about Machine Learning as a valid JSON.<end_of_turn>
<start_of_turn>model
```json
{
  "title": "Machine Learning",
  "author": "AI Poet",
  "version": "1.0",
  "content": [
    {
      "line": "Algorithms learn from data's sway",
      "meta": {
        "sentiment": "positive",
        "tone": "informative"
      }
    },
    {
      "line": "Patterns emerge, a hidden display",
      "meta": {
        "sentiment": "curious",
        "tone": "analytical"
      }
    },
    {
      "line": "Predictions crafted, future's in sight",
      "meta": {
        "sentiment": "hopeful",
        "tone": "optimistic"
      }
    },
    {
      "line": "Learning evolves, a constant refrain",
      "meta": {
        "sentiment": "inspiring",
        "tone": "reflective"
      }
    }
  ]
}
```<eos>
