In [1]:
# autoreload your package
%load_ext autoreload
%autoreload 2

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

print("Loading model and tokenizer...")
model_name = "databricks/dolly-v2-3b"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    use_cache=True,
    torch_dtype=torch.float16,
    attn_implementation='eager',
).to("cuda:0")
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, use_cache=True)
print("Loaded model and tokenizer")

  from .autonotebook import tqdm as notebook_tqdm


Loading model and tokenizer...


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded model and tokenizer


# Continue

In [4]:
from prob_jsonformer.format import highlight_values
from prob_jsonformer.main import Jsonformer

ecomm = {
    "type": "object",
    "properties": {
        "store": {
            "type": "object",
            "properties": {
                "name": {"type": "string"},
                "location": {"type": "string"},
                "choice_probs": {"type": "choice_probs", "enum": ["ski", "snowboard", "walk", "pretend"]},
                "inventory": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "productId": {"type": "string"},
                            "name": {"type": "string"},
                            "description": {"type": "string"},
                            "category": {"type": "string"},
                            "price": {"type": "number"},
                            "inStock": {"type": "boolean"},
                            "rating": {"type": "number"},
                            "images": {"type": "array", "items": {"type": "string"}},
                        },
                    },
                },
            },
        }
    },
}


builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=ecomm,
    prompt="write a description about mike's ski shop which sells premium skis and snowboards",
    max_string_token_length=20,
)

print("Generating...")
output = builder()

highlight_values(output)

Generating...
{
  store: {
    name: [32m"Mike's Ski Shop"[0m,
    location: [32m"Somewhere"[0m,
    choice_probs: [
      {
        prob: [32m0.01739501953125[0m,
        choice: [32m"pretend"[0m
      },
      {
        prob: [32m0.002094268798828125[0m,
        choice: [32m"snowboard"[0m
      },
      {
        prob: [32m0.0007467269897460938[0m,
        choice: [32m"walk"[0m
      },
      {
        prob: [32m0.97998046875[0m,
        choice: [32m"ski"[0m
      }
    ],
    inventory: [
      {
        productId: [32m"1"[0m,
        name: [32m"Snowboard X-15"[0m,
        description: [32m"Snowboard for all levels"[0m,
        category: [32m"Snowboards"[0m,
        price: [32m20.0375[0m,
        inStock: [32mTrue[0m,
        rating: [32m5.0[0m,
        images: [
          [32m"https://s3.amazonaws.com/mikesskisport/images/Snow"[0m
        ]
      }
    ]
  }
}


In [5]:
car = {
    "type": "object",
    "properties": {
        "make": {"type": "string"},
        "model": {"type": "choice_probs", "enum": ["Mazda", "Kea"]},
        "new": {"type": "choice_probs", "enum": ["true", "false"]},
        "rating": {"type": "choice_probs", "enum": ["1", "2", "3", "4"]},
        "year": {"type": "number"},
        "colors_available": {
            "type": "array",
            "items": {"type": "string"},
        },
    },
}

builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=car,
    prompt="generate an example car",
)

print("Generating...")
output = builder()

highlight_values(output)

Generating...
{
  make: [32m"Mazda"[0m,
  model: [
    {
      prob: [32m0.8154296875[0m,
      choice: [32m"Kea"[0m
    },
    {
      prob: [32m0.184814453125[0m,
      choice: [32m"Mazda"[0m
    }
  ],
  new: [
    {
      prob: [32m0.90185546875[0m,
      choice: [32m"true"[0m
    },
    {
      prob: [32m0.09808349609375[0m,
      choice: [32m"false"[0m
    }
  ],
  rating: [
    {
      prob: [32m0.221435546875[0m,
      choice: [32m"1"[0m
    },
    {
      prob: [32m0.394775390625[0m,
      choice: [32m"2"[0m
    },
    {
      prob: [32m0.382568359375[0m,
      choice: [32m"3"[0m
    },
    {
      prob: [32m0.0013370513916015625[0m,
      choice: [32m"4"[0m
    }
  ],
  year: [32m2016.0[0m,
  colors_available: [
    [32m"red"[0m
  ]
}


In [6]:
complex_car = {
    "type": "object",
    "properties": {
        "car": {
            "type": "object",
            "properties": {
                "make": {"type": "string"},
                "model": {"type": "string"},
                "year": {"type": "number"},
                "colors": {"type": "choice_probs", "enum": ["red", "green", "blue", "black", "white"]},
                "as_new": {"type": "choice_probs", "enum": ["true", "false"]},
                "rating": {"type": "choice_probs", "enum": ["1", "2", "3", "4"]},
                "features": {
                    "type": "object",
                    "properties": {
                        "audio": {
                            "type": "object",
                            "properties": {
                                "brand": {"type": "string"},
                                "speakers": {"type": "number"},
                                "hasBluetooth": {"type": "boolean"},
                            },
                        },
                        "safety": {
                            "type": "object",
                            "properties": {
                                "airbags": {"type": "number"},
                                "parkingSensors": {"type": "boolean"},
                                "laneAssist": {"type": "boolean"},
                            },
                        },
                        "performance": {
                            "type": "object",
                            "properties": {
                                "engine": {"type": "string"},
                                "horsepower": {"type": "number"},
                                "topSpeed": {"type": "number"},
                            },
                        },
                    },
                },
            },
        },
        "owner": {
            "type": "object",
            "properties": {
                "firstName": {"type": "string"},
                "lastName": {"type": "string"},
                "age": {"type": "number"},
            },
        },
    },
}
builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=complex_car,
    prompt="generate an example Rolls Royce Phantom",
)

print("Generating...")
output = builder()

highlight_values(output)

Generating...
{
  car: {
    make: [32m"Rolls Royce"[0m,
    model: [32m"Phantom"[0m,
    year: [32m2014.0[0m,
    colors: [
      {
        prob: [32m0.001560211181640625[0m,
        choice: [32m"white"[0m
      },
      {
        prob: [32m0.833984375[0m,
        choice: [32m"red"[0m
      },
      {
        prob: [32m0.0865478515625[0m,
        choice: [32m"black"[0m
      },
      {
        prob: [32m0.048553466796875[0m,
        choice: [32m"blue"[0m
      },
      {
        prob: [32m0.0294342041015625[0m,
        choice: [32m"green"[0m
      }
    ],
    as_new: [
      {
        prob: [32m0.96533203125[0m,
        choice: [32m"true"[0m
      },
      {
        prob: [32m0.03460693359375[0m,
        choice: [32m"false"[0m
      }
    ],
    rating: [
      {
        prob: [32m0.05462646484375[0m,
        choice: [32m"1"[0m
      },
      {
        prob: [32m0.233642578125[0m,
        choice: [32m"2"[0m
      },
      {
        prob: [32