In [1]:
%pip install accelerate peft bitsandbytes datasets trl

Collecting peft
  Downloading peft-0.13.1-py3-none-any.whl.metadata (13 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting datasets
  Downloading datasets-3.0.1-py3-none-any.whl.metadata (20 kB)
Collecting trl
  Downloading trl-0.11.2-py3-none-any.whl.metadata (12 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.17-py310-none-any.whl.metadata (7.2 kB)
Collecting tyro>=0.5.11 (from trl)
  Downloading tyro-0.8.11-py3-none-any.whl.metadata (8.4 kB)
Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl)
  Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)
INFO: pip is looking at multiple versions of multiprocess to determine which version is compatib

In [2]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)

In [3]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [4]:
base_model = "openchat/openchat-3.5-0106"

In [5]:
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map="auto"
)

config.json:   0%|          | 0.00/651 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/179 [00:00<?, ?B/s]



In [6]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(base_model, padding_side="left")
model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/491 [00:00<?, ?B/s]

In [7]:
generated_ids = model.generate(**model_inputs, max_new_tokens=50)
tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

'A list of colors: red, blue, green, yellow, orange, purple, pink, brown, black, white, gray, and silver.\n\nA list of animals: dog, cat, bird, fish, snake, spider, frog, lizard, turt'

In [8]:
tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
model_inputs = tokenizer(
    ["A list of colors: red, blue", "Write a 5 line sentence on Portugal is"], return_tensors="pt", padding=True
).to("cuda")
generated_ids = model.generate(**model_inputs, max_new_tokens=200)
print( generated_ids)
print("completed generated ids")
tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

tensor([[32000,     1,   330,  1274,   302,  9304, 28747,  2760, 28725,  5045,
         28725,  5344, 28725,  9684, 28725, 14545, 28725, 19435, 28725, 12937,
         28725,  9060, 28725,  2687, 28725,  3075, 28725, 11870, 28725,   304,
          9746, 28723,    13,    13, 28741,  1274,   302,  8222, 28747,  3914,
         28725,  5255, 28725,  7727, 28725,  8006, 28725, 24342, 28725,   668,
          1184, 28725,  8169, 28721, 28725,   305, 14640, 28725,   261,  3525,
           291, 28725, 16479,  2581, 28725,  4524,   361,  3045, 28725,   304,
         12733, 28723,    13,    13, 28741,  1274,   302, 21566, 28747, 19767,
         28725,  8743,  2238, 28725,   484, 13327, 28725,   847,  1748, 28725,
         14545, 28725,   757,   595, 28725, 26847, 28725,   549,   383, 28725,
         18393, 14233, 28725,   304,  2130, 12625,   266, 28723,    13,    13,
         28741,  1274,   302,  5780, 28747,  2969,  3543, 28725,  6082, 28725,
         10242, 28725, 13250, 28725, 16540, 28725, 1

['A list of colors: red, blue, green, yellow, orange, purple, pink, brown, black, white, gray, and silver.\n\nA list of animals: dog, cat, bird, fish, snake, spider, frog, lizard, turtle, rabbit, squirrel, and mouse.\n\nA list of fruits: apple, banana, cherry, grape, orange, peach, pear, plum, strawberry, and watermelon.\n\nA list of countries: United States, Canada, Mexico, Brazil, Argentina, Chile, Peru, Bolivia, Ecuador, Colombia, and Venezuela.\n\nA list of cities: New York, Los Angeles, Chicago, Houston, Philadelphia, Phoenix, San Antonio, San Diego, Dallas, and San Jose.\n\nA list of mountains: Mount Everest, Mount Kilimanjaro, Mount Fuji, Mount Denali, Mount Vinson, Mount Logan, Mount Elbr',
 "Write a 5 line sentence on Portugal is a beautiful country with a rich history and culture.\n\nPortugal is a beautiful country with a rich history and culture, boasting stunning landscapes, vibrant cities, and a warm, welcoming atmosphere.\n\nFrom its picturesque coastline to its charming 