In [None]:
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git
!pip install -q wandb
!pip install -q ctranslate2

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [None]:
import os
os.environ['WANDB_BASE_URL'] = "https://staging-aws.wandb.io/"
os.environ['WANDB_API_KEY'] = ""

In [None]:
from peft import PeftModel, PeftConfig
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

def convert_qlora2ct2(adapter_path='model-registry/OPT-125M:latest',
                      full_model_path="opt125m-finetuned",
                      offload_path="opt125m-offload",
                      ct2_path="opt125m-finetuned-ct2",
                      quantization="int8"):


    peft_model_id = adapter_path
    peftconfig = PeftConfig.from_pretrained(peft_model_id)

    model = AutoModelForCausalLM.from_pretrained(
      "facebook/opt-125m",
      offload_folder  = offload_path,
      device_map='auto',
    )

    tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")

    model = PeftModel.from_pretrained(model, peft_model_id)

    print("Peft model loaded")

    merged_model = model.merge_and_unload()

    merged_model.save_pretrained(full_model_path)
    tokenizer.save_pretrained(full_model_path)

    if quantization == False:
        os.system(f"ct2-transformers-converter --model {full_model_path} --output_dir {ct2_path} --force")
    else:
        os.system(f"ct2-transformers-converter --model {full_model_path} --output_dir {ct2_path} --quantization {quantization} --force")
    print("Convert successfully")
    return merged_model, tokenizer

  warn("The installed version of bitsandbytes was compiled without GPU support. "


/usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32


# Consume a Registered Model
- Names and aliases offer a simple handle to retrieve Registered Model versions
- Facilitate easy hand-off between teams and processes

In [None]:
import ctranslate2
import wandb


wandb.init(project="model-registry-walkthrough", entity="smle-machine", job_type="ctranslate2")

best_model = wandb.use_artifact('smle-machine/model-registry/Review Summarization:staging')
best_model.download(root='model-registry/Review-Summarization:staging')


[34m[1mwandb[0m: Currently logged in as: [33mkenleewb[0m ([33msmle-machine[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: \ 1 of 8 files downloaded...[34m[1mwandb[0m:   8 of 8 files downloaded.  


'model-registry/Review-Summarization:staging'

## Process the model
- Quantize, convert formats, etc.

In [None]:
# Quantizing the model to int8
merged_model, tokenizer = convert_qlora2ct2(adapter_path='model-registry/Review-Summarization:staging',
                                            ct2_path='model-registry/Review-Summarization-quantized')

# Log the quantized model to the registry
model_art = wandb.Artifact('review-summary-ct2-quantized', type="model")
model_art.add_dir('model-registry/Review-Summarization-quantized')
wandb.run.link_artifact(model_art, 'smle-machine/model-registry/Review Summarization', aliases=['quantized'])


Peft model loaded


[34m[1mwandb[0m: Adding directory to artifact (./model-registry/Review-Summarization-quantized)... 

Convert successfully


Done. 0.5s


## Run Inference on a Test Dataset
- Log the results in a W&B Table

In [None]:
# Run inference on a test set and log results to W&B
generator = ctranslate2.Generator("model-registry/Review-Summarization-quantized")

reviews = [
    "BlastMaster 3000 Vacuum Cleaner: I never knew cleaning could be this easy until I got the BlastMaster 3000! It glides effortlessly across all surfaces and picks up even the tiniest of dust particles. The only downside is that it's a bit noisy, but the power it packs more than makes up for it.",
    "Sunrise Organic Facial Cream: I've been using Sunrise Organic Facial Cream for a month now, and the results are astonishing. My skin feels softer, smoother, and looks radiant. However, I wish the fragrance was a bit milder; it's a tad overpowering for my liking.",
    "MellowTunes Wireless Earbuds: The sound quality of the MellowTunes earbuds is surprisingly good for its price range. They fit comfortably in my ears and the battery life lasts an entire day of listening. Just wish they came with a case that was a bit more durable."
]

prompts = [f"Summarize this review {review}" for review in reviews]


test_table = wandb.Table(columns=["review", "summary"])

for r, p in zip(reviews, prompts):
  start_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(p))
  results = generator.generate_batch([start_tokens], max_length=100)
  output = tokenizer.decode(results[0].sequences_ids[0])
  test_table.add_data(r, p)

wandb.log({"test_table": test_table})
wandb.finish()

# Query a Registered Model's upstream and downstream run data
- Walk the pipeline DAG with the API to retrieve upstream training run data or downstream testing data

In [None]:
import wandb

api = wandb.Api()

registered_model_quantized = api.artifact('smle-machine/model-registry/Review Summarization:quantized')

# Get info about the quantization run
quantizing_run = registered_model_quantized.logged_by()
print(quantizing_run.summary)

registered_model_checkpoint = list(filter(lambda x: "checkpoint" in x.name,
                                     quantizing_run.used_artifacts()))[0]

training_run =registered_model_checkpoint.logged_by()
print(training_run.history())

{'_runtime': 77.95275163650513, '_timestamp': 1694108604.4251437, 'test_table': {'_latest_artifact_path': 'wandb-client-artifact://74yev5ehwetftnu17hgwwupgg2dr4x5g776zzem23nmh2ywvy2p9xeqonnkxwmmrwhrsh3ue95wwnown81cp150k4keqh2dlfvvs24aynurpjs6itmi1d70mqfhrcafm:latest/test_table.table.json', 'path': 'media/table/test_table_0_fbe33428917ff54897bb.table.json', 'size': 1778, '_type': 'table-file', 'ncols': 2, 'nrows': 3, 'sha256': 'fbe33428917ff54897bb274488b7bb0f6fc680dbfe7969f66949ffa4208914c8', 'artifact_path': 'wandb-client-artifact://74yev5ehwetftnu17hgwwupgg2dr4x5g776zzem23nmh2ywvy2p9xeqonnkxwmmrwhrsh3ue95wwnown81cp150k4keqh2dlfvvs24aynurpjs6itmi1d70mqfhrcafm:latest/test_table.table.json'}, '_step': 0, '_wandb': {'runtime': 46}}
    _step   _runtime    _timestamp  train/loss  train/epoch  \
0       0  10.073223  1.694105e+09      3.1699         0.01   
1       1  11.019853  1.694106e+09      3.1092         0.01   
2       2  11.995674  1.694106e+09      3.3207         0.02   
3       