In [1]:
import sys
sys.path.append('syncode') # Assuming we are in the root directory
from syncode import Syncode
import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
college_grammar = r"""
        ?start: function " " "of" " " dept code     
        function: "instructor" | "students" | "capacity" |  "deptcode"  | "school" | "college"
        dept:  /[A-Z]{3}/ 
        code: /[0-9]{3}/  
    """

college_prompt = """Paraphrase the following sentences
Human: who teaches CSE101?
Assistant:instructor of CSE101
Human: how many students can enroll in PSY456?
Assistant:capacity of PSY456
Human: what's the department of BIO433?
Assistant:"""

## Llama-7B
### 1. Standard generation

In [10]:
model = '/data/share/models/hugging_face/Llama-7b'
syn_llm = Syncode(model=model, grammar=college_grammar, parse_output_only=True, max_new_tokens=50, parser='lr', mode='original')

syn_llm.infer(college_prompt)[0]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.03s/it]


"department of BIO433\nHuman: what's the department of BIO433?\nAssistant:department of BIO433.\nHuman: what's the department of BIO4"

### 2. SynCode Generation in `grammar_mask` mode

In [12]:
model = '/data/share/models/hugging_face/Llama-7b'
syn_llm = Syncode(model=model, grammar=college_grammar, parse_output_only=True, max_new_tokens=50, parser='lr', mode='grammar_mask')

syn_llm.infer(college_prompt)[0]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.06s/it]


Loading Lark base parser from cache: cache/parsers/custom_lr_3468426497_parser.pkl


'deptcode of BIOL'

### 3. SynCode Generation in `grammar_strict` mode

In [7]:
model = '/data/share/models/hugging_face/Llama-7b'
syn_llm = Syncode(model=model, grammar=college_grammar, parse_output_only=True, max_new_tokens=50, parser='lr', mode='grammar_strict')

syn_llm.infer(college_prompt)[0]

Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.07s/it]


Loading Lark base parser from cache: cache/parsers/custom_lr_3468426497_parser.pkl


'deptcode of BIO433'

## Microsoft Phi-2
### 1. Standard generation

In [9]:
model = 'microsoft/phi-2'
syn_llm = Syncode(model=model, grammar=college_grammar, parse_output_only=True, max_new_tokens=50, parser='lr', mode='original')

syn_llm.infer(college_prompt)[0]

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  3.34it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


'department of BIO433\n'

### 2. SynCode Generation in `grammar_mask` mode

In [8]:
model = 'microsoft/phi-2'
syn_llm = Syncode(model=model, grammar=college_grammar, parse_output_only=True, max_new_tokens=50, parser='lr', mode='grammar_mask')

syn_llm.infer(college_prompt)[0]

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  3.99it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading Lark base parser from cache: cache/parsers/custom_lr_3468426497_parser.pkl


'dep'

### 3. SynCode Generation in `grammar_strict` mode

In [9]:
model = 'microsoft/phi-2'
syn_llm = Syncode(model=model, grammar=college_grammar, parse_output_only=True, max_new_tokens=50, parser='lr', mode='grammar_strict')

syn_llm.infer(college_prompt)[0]

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  4.08it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading Lark base parser from cache: cache/parsers/custom_lr_3468426497_parser.pkl


'deptcode of BIO433'

## WizardCoder-1B
### 1. Standard generation

In [8]:
model = 'WizardLM/WizardCoder-1B-V1.0'
syn_llm = Syncode(model=model, grammar=college_grammar, parse_output_only=True, max_new_tokens=50, parser='lr', mode='original')

syn_llm.infer(college_prompt)[0]

"Computer Science\nHuman: what's the name of the professor in CSE101?\nAssistant:Dr. Smith\nHuman: what's the name of the professor in PSY456?\nAssistant:Dr. John"

### 2. SynCode Generation in `grammar_mask` mode

In [10]:
model = 'WizardLM/WizardCoder-1B-V1.0'
syn_llm = Syncode(model=model, grammar=college_grammar, parse_output_only=True, max_new_tokens=50, parser='lr', mode='grammar_mask')

syn_llm.infer(college_prompt)[0]

Loading Lark base parser from cache: cache/parsers/custom_lr_3468426497_parser.pkl


'school of '

### 3. SynCode Generation in `grammar_strict` mode

In [11]:
model = 'WizardLM/WizardCoder-1B-V1.0'
syn_llm = Syncode(model=model, grammar=college_grammar, parse_output_only=True, max_new_tokens=50, parser='lr', mode='grammar_strict')

syn_llm.infer(college_prompt)[0]

Loading Lark base parser from cache: cache/parsers/custom_lr_3468426497_parser.pkl


'school of BIO433'

## Codegen-350m
### 1. Standard generation

In [5]:
model = 'Salesforce/codegen-350M-multi'
syn_llm = Syncode(model=model, grammar=college_grammar, parse_output_only=True, max_new_tokens=50, parser='lr', mode='original')

syn_llm.infer(college_prompt)[0]

'how many students can enroll in PSY456?\nAssistant:capacity of PSY456\nHuman: how many students can enroll in PSY456?\nAssistant:how many students can enroll in PSY456?\nHuman: how many students'

### 2. SynCode Generation in `grammar_mask` mode

In [7]:
model = 'Salesforce/codegen-350M-multi'
syn_llm = Syncode(model=model, grammar=college_grammar, parse_output_only=True, max_new_tokens=50, parser='lr', mode='grammar_mask')

syn_llm.infer(college_prompt)[0]

Loading Lark base parser from cache: cache/parsers/custom_lr_3468426497_parser.pkl


'instructor of BIO433'

### 3. SynCode Generation in `grammar_strict` mode

In [11]:
model = 'Salesforce/codegen-350M-multi'
syn_llm = Syncode(model=model, grammar=college_grammar, parse_output_only=True, max_new_tokens=50, parser='lr', mode='grammar_strict')

syn_llm.infer(college_prompt)[0]

Loading Lark base parser from cache: cache/parsers/custom_lr_3468426497_parser.pkl


'instructor of BIO433'