In [1]:
import json
import outlines
import torch
from transformers import AutoTokenizer
from textwrap import dedent

In [2]:
model_name = "Qwen/Qwen2-0.5B-Instruct"
model = outlines.models.transformers(
    model_name,
    device='mps',
    model_kwargs={
        'torch_dtype': torch.bfloat16,
        'trust_remote_code': True
    })
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [3]:
departments = ["clothing","electronics","kitchen","automotive"]

In [4]:
with open("../examples.json",'r') as fin:
    complaint_data = json.loads(fin.read())

In [5]:
complaint_data

[{'message': 'Hi, my name is Olivia Brown.I recently ordered a knife set from your wellness range, and it arrived earlier this week. Unfortunately, my satisfaction with the product has been less than ideal.My order was A123456',
  'order_number': 'A12-3456',
  'department': 'kitchen'},
 {'message': 'Hi, my name is John Smith.I recently ordered a dress for an upcoming event, which was alleged to meet my expectations both in fit and style. However, upon arrival, it became apparent that the fabric was of subpar quality, leading to a less than satisfactory appearance.The order number is A12-3456',
  'order_number': 'A12-3456',
  'department': 'clothing'},
 {'message': 'Hi, my name is Sarah Johnson.I recently ordered the ultimate ChefMaster 8 Drawer Cooktop. However, upon delivery, I discovered that one of the burners is malfunctioning.My order was A458739',
  'order_number': 'A45-8739',
  'department': 'kitchen'},
 {'message': 'Hi, my name is Jane Doeandcommn.I recently ordered a stylish b

In [6]:
def create_prompt(complaint):
    prompt_messages = [
        {
            "role": "system",
            "content": "You are as agent designed to help label complaints."
        },
        {
        "role": "user",
        "content": dedent("""
        I'm going to provide you with a consumer complaint to analyze.
        The complaint is going to be regarding a product from one of our
        departments. Here is the list of departments:
            - "clothing"
            - "electronics"
            - "kitchen"
            - "automotive"
        Please reply with *only* the name of the department.
        """)
    },{
        "role": "assistant",
        "content": "I understand and will only answer with the department name"
    },{
        "role": "user",
        "content": f"Great! Here is the complaint: {complaint['message']}"
    }
                       
                      ]
    prompt = tokenizer.apply_chat_template(prompt_messages, tokenize=False)
    return prompt

In [7]:
create_prompt(complaint_data[0])

'<|im_start|>system\nYou are as agent designed to help label complaints.<|im_end|>\n<|im_start|>user\n\nI\'m going to provide you with a consumer complaint to analyze.\nThe complaint is going to be regarding a product from one of our\ndepartments. Here is the list of departments:\n    - "clothing"\n    - "electronics"\n    - "kitchen"\n    - "automotive"\nPlease reply with *only* the name of the department.\n<|im_end|>\n<|im_start|>assistant\nI understand and will only answer with the department name<|im_end|>\n<|im_start|>user\nGreat! Here is the complaint: Hi, my name is Olivia Brown.I recently ordered a knife set from your wellness range, and it arrived earlier this week. Unfortunately, my satisfaction with the product has been less than ideal.My order was A123456<|im_end|>\n'

## Unstructured Generation

In [8]:
generator = outlines.generate.text(model)

In [9]:
for complaint in complaint_data[0:4]:
    prompt = create_prompt(complaint)
    result = generator(prompt, max_tokens=12)
    print(f"LLM: {result} Actual: {complaint['department']}")

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


LLM: assistant
My apologies, my system does not recognize this Actual: kitchen
LLM: assistant
Department: "clothing" Actual: clothing
LLM: assistant
The department with which the complaint related is: Actual: kitchen
LLM: assistant
electronics Actual: electronics


## Structured Generation

In [10]:
correct = 0
generator_struct = outlines.generate.choice(model,departments)
for complaint in complaint_data:

    prompt = create_prompt(complaint)
    result = generator_struct(prompt)
    if result == complaint['department']:
        correct += 1
    print(f"LLM: {result} Actual: {complaint['department']}")
print(correct/len(complaint_data))

LLM: electronics Actual: kitchen
LLM: electronics Actual: clothing
LLM: electronics Actual: kitchen
LLM: electronics Actual: electronics
LLM: electronics Actual: electronics
LLM: clothing Actual: kitchen
LLM: electronics Actual: electronics
LLM: electronics Actual: automotive
LLM: electronics Actual: electronics
LLM: electronics Actual: kitchen
LLM: electronics Actual: automotive
LLM: electronics Actual: automotive
LLM: electronics Actual: kitchen
LLM: electronics Actual: kitchen
LLM: electronics Actual: automotive
LLM: electronics Actual: kitchen
LLM: electronics Actual: automotive
LLM: electronics Actual: kitchen
LLM: electronics Actual: kitchen
LLM: electronics Actual: clothing
LLM: electronics Actual: electronics
LLM: electronics Actual: automotive
LLM: electronics Actual: electronics
LLM: electronics Actual: kitchen
LLM: electronics Actual: kitchen
LLM: electronics Actual: clothing
LLM: clothing Actual: kitchen
LLM: electronics Actual: clothing
LLM: clothing Actual: kitchen
LLM: e