In [12]:
from syncode import SyncodeLogitsProcessor, Grammar
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

grammar = """ start: " " city "."
city: "Paris" | "Berlin" | "Madrid" | "Marseille" | "Algeirs" | "Dublin" | "Seattle"         
"""

model_name = "EleutherAI/pythia-14m"

# Load the unconstrained original model
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map='cuda')
tok = AutoTokenizer.from_pretrained(model_name)

grammar = Grammar(grammar)
syncode_logits_processor = SyncodeLogitsProcessor(grammar=grammar, tokenizer=tok, parse_output_only=True, num_samples=2, mode='grammar_strict')

In [17]:
prompt = 'The capital of France is'
syncode_logits_processor.reset(prompt)
inputs = tok([prompt], return_tensors='pt')
output = model.generate(
          inputs.input_ids.cuda(),
          max_length=50, 
          num_return_sequences=2,
          num_beams=2,
          pad_token_id=tok.eos_token_id, 
          logits_processor=[syncode_logits_processor]
        )

[print(tok.decode(x)) for x in output]
# Output: The capital of France is Berlin.
# The output is ok according to the grammar, but the model seems to be too bad.

The capital of France is Berlin.<|endoftext|>
The capital of France is Berlin.<|endoftext|>


[None, None]