In [85]:
import sys
import re

import torch
from torch.utils.data import DataLoader

from transformers import GPTNeoForCausalLM, GPT2Tokenizer
from dataloader import read_mathqapython, MathQAPython 

In [2]:
# Take a look at some data
data = read_mathqapython('data/mathqapython_dev.json')

In [3]:
data[35]

{'text': '# mr . kramer , the losing candidate in a two - candidate election , received 942,568 votes , which was exactly 25 percent of all votes cast . approximately what percent of the remaining votes would he need to have received in order to have won at least 50 percent of all the votes cast ? n0 = 942568.0 n1 = 25.0 n2 = 50.0',
 'code': 'n0 = 942568.0\nn1 = 25.0\nn2 = 50.0\nt0 = n2 / 100.0\nt1 = n1 / 100.0\nt2 = t0 - t1\nt3 = 1.0 - t1\nt4 = t2 / t3\nanswer = t4 * 100.0',
 'dsl_code': 'divide(n2,const_100)|divide(n1,const_100)|subtract(#0,#1)|subtract(const_1,#1)|divide(#2,#3)|multiply(#4,const_100)|',
 'reasoning': 'lets assume that candidate got 25 % votes and total votes is 100 . candidate won = 25 remaining = 75 to get 50 % , candidate requires 25 votes from 100 which is 25 % and 25 votes from 75 . 25 / 75 = 33.33 % which is approx 33 % . hence the answer is e',
 'answer': 33.33333333333333,
 'task_id': 35}

In [53]:
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")

In [56]:
few_shot_prompt = "\n\n".join([example['text'] + '\n' + example['code'] for example in data[0:12] ]) + '\n\n'
print(len(tokenizer(few_shot_prompt)['input_ids']))
print(few_shot_prompt)

1707
# a multiple choice test consists of 4 questions , and each question has 5 answer choices . in how many r ways can the test be completed if every question is unanswered ? n0 = 4.0 n1 = 5.0
n0 = 4.0
n1 = 5.0

answer = n1**min(n0, 5)

# the hcf and lcm of two numbers m and n are respectively 6 and 210 . if m + n = 72 , then 1 / m + 1 / n is equal to n0 = 6.0 n1 = 210.0 n2 = 72.0 n3 = 1.0 n4 = 1.0
n0 = 6.0
n1 = 210.0
n2 = 72.0
n3 = 1.0
n4 = 1.0
t0 = n0 * n1
answer = n2 / t0

# in a kilometer race , a beats b by 48 meters or 12 seconds . what time does a take to complete the race ? n0 = 48.0 n1 = 12.0
n0 = 48.0
n1 = 12.0
t0 = n0 / n1
t1 = 1.0 * 1000.0
t2 = t1 / t0
answer = t2 - n1

# in a school of 650 boys , 44 % of muslims , 28 % hindus , 10 % sikhs and the remaining of other communities . how many belonged to the other communities ? n0 = 650.0 n1 = 44.0 n2 = 28.0 n3 = 10.0
n0 = 650.0
n1 = 44.0
n2 = 28.0
n3 = 10.0
t0 = n1 + n2
t1 = n3 + t0
t2 = 100.0 - t1
t3 = n0 * t2
answer = t3 / 

In [57]:
class MathQAPython(torch.utils.data.Dataset): 
    def __init__(self, instance_list, tokenizer, text_len, code_len): 
        self.data = instance_list 
        self.tokenizer = tokenizer 
        self.text_len = text_len
        self.code_len = code_len
    

    def __getitem__(self, idx): 
        idx = idx + 2
        instance = self.data[idx]
        text = instance['text']
        code = instance['code']
        answer = instance['answer']

        text_encode = self.tokenizer(text, return_tensors='pt')
        code_encode = self.tokenizer(code, return_tensors='pt')
        text_ids = text_encode['input_ids'].squeeze()
        code_ids = code_encode['input_ids'].squeeze()

        return {
                'text_ids': text_ids.to(dtype=torch.long), 
                'code_ids': code_ids.to(dtype=torch.long), 
                'answer': answer
                }


    def __len__(self): 
        return len(self.data) - 2

In [58]:
test_set = MathQAPython(data, tokenizer, 256, 256)

loader = DataLoader(test_set, batch_size=1, shuffle=True)

In [6]:
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")

In [83]:
for batch in loader: 
    ids = batch['text_ids']
    gt = batch['code_ids']
    gt_answer = batch['answer']
    encoded_few_shot_prompt = tokenizer(few_shot_prompt, return_tensors="pt")['input_ids']
    few_shot_ids = torch.cat([encoded_few_shot_prompt, ids], axis=1)
    generated_ids = model.generate(
        input_ids=few_shot_ids, 
        do_sample=True,
        temperature=0.4, 
        max_length=2048
        )
    print("completion" + "#"*20)
    print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
    print("prompt" + "#"*20)
    print(tokenizer.decode(ids.squeeze(), skip_special_tokens=True))
    print('#'*20 + 'ground truth code')
    print(tokenizer.decode(gt.squeeze()))

    break
    

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


completion####################
# a multiple choice test consists of 4 questions, and each question has 5 answer choices. in how many r ways can the test be completed if every question is unanswered? n0 = 4.0 n1 = 5.0
n0 = 4.0
n1 = 5.0

answer = n1**min(n0, 5)

# the hcf and lcm of two numbers m and n are respectively 6 and 210. if m + n = 72, then 1 / m + 1 / n is equal to n0 = 6.0 n1 = 210.0 n2 = 72.0 n3 = 1.0 n4 = 1.0
n0 = 6.0
n1 = 210.0
n2 = 72.0
n3 = 1.0
n4 = 1.0
t0 = n0 * n1
answer = n2 / t0

# in a kilometer race, a beats b by 48 meters or 12 seconds. what time does a take to complete the race? n0 = 48.0 n1 = 12.0
n0 = 48.0
n1 = 12.0
t0 = n0 / n1
t1 = 1.0 * 1000.0
t2 = t1 / t0
answer = t2 - n1

# in a school of 650 boys, 44 % of muslims, 28 % hindus, 10 % sikhs and the remaining of other communities. how many belonged to the other communities? n0 = 650.0 n1 = 44.0 n2 = 28.0 n3 = 10.0
n0 = 650.0
n1 = 44.0
n2 = 28.0
n3 = 10.0
t0 = n1 + n2
t1 = n3 + t0
t2 = 100.0 - t1
t3 = n0 * t2
a

In [84]:
start_idx = encoded_few_shot_prompt.size()[1]
response = tokenizer.decode(generated_ids[0, start_idx:])
end_idx = response.index('answer')
code = response[:end_idx]
chopped = response[end_idx:]
last_line = chopped[0:chopped.index('\n')]
print(code+last_line)
loc={}
a = exec(code+last_line, globals(), loc)
print(loc['answer'])
print('ground truth answer: ', gt_answer)

# if the price of an article went up by 30 %, then by what percent should it be brought down to bring it back to its original price? n0 = 30.0 n1 = 30.0 n2 = 30.0 n3 = 30.0 n4 = 30.0 n5 = 30.0 n6 = 30.0 n7 = 30.0 n8 = 30.0
n0 = 30.0
n1 = 30.0
n2 = 30.0
n3 = 30.0
n4 = 30.0
n5 = 30.0
n6 = 30.0
n7 = 30.0
n8 = 30.0
t0 = n0 / n1
t1 = n0 / n2
t2 = t0 * t1
answer = t2 / n3
0.03333333333333333
ground truth answer:  tensor([23.0769], dtype=torch.float64)


In [92]:
start_idx = encoded_few_shot_prompt.size()[1]
completion = tokenizer.decode(generated_ids[0, start_idx:])
print(completion)
re.search('answer.*\n', 'completion').group(0)

# if the price of an article went up by 30 %, then by what percent should it be brought down to bring it back to its original price? n0 = 30.0 n1 = 30.0 n2 = 30.0 n3 = 30.0 n4 = 30.0 n5 = 30.0 n6 = 30.0 n7 = 30.0 n8 = 30.0
n0 = 30.0
n1 = 30.0
n2 = 30.0
n3 = 30.0
n4 = 30.0
n5 = 30.0
n6 = 30.0
n7 = 30.0
n8 = 30.0
t0 = n0 / n1
t1 = n0 / n2
t2 = t0 * t1
answer = t2 / n3

# a certain quantity of gold is worth $ 10 000. if the gold is sold at a price of $ 10 000, what is the profit of the seller? n0 = 10.0 n1 = 10.0 n2 = 10.0 n3 = 10.0 n4 = 10.0 n5 = 10.0 n6 = 10.0 n7 = 10.0 n8 = 10.0
n0 = 10.0
n1 = 10.0
n2 = 10.0
n3 = 10.0
n4 = 10.0
n5 = 10.0
n6 = 10.0
n7 = 10.0
n8 = 10.0
t0 = n0


AttributeError: 'NoneType' object has no attribute 'group'