## Loading Data

In [1]:
import json

data_path = '/home/norrman/GitHub/multi-morph-checklist/M2C.json'
out_path = '/home/norrman/GitHub/multi-morph-checklist/M2C_response.json'

data = json.load(open(data_path))
len(list(data.keys()))

31500

## Running Model

In [2]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

XGLM_tokenizer = AutoTokenizer.from_pretrained("facebook/xglm-564M", padding_side='left')
XGLM_model = AutoModelForCausalLM.from_pretrained("facebook/xglm-564M")
XGLM_model = XGLM_model.to(device='cuda')

XGLM_model

  from .autonotebook import tqdm as notebook_tqdm


XGLMForCausalLM(
  (model): XGLMModel(
    (embed_tokens): Embedding(256008, 1024, padding_idx=1)
    (embed_positions): XGLMSinusoidalPositionalEmbedding()
    (layers): ModuleList(
      (0-23): 24 x XGLMDecoderLayer(
        (self_attn): XGLMAttention(
          (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
          (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
          (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
          (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
        )
        (activation_fn): GELUActivation()
        (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (fc1): Linear(in_features=1024, out_features=4096, bias=True)
        (fc2): Linear(in_features=4096, out_features=1024, bias=True)
        (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      )
    )
    (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine

In [3]:
from collections import Counter

counter = Counter()
for item in data.values():
    counter[(item['task'], item['lang'])] += 1

counter

len(data)

31500

In [4]:
item = data["0"]

prompt = f"{item['prefix']} {item['altered_example_prompt']} {item['prompt']}"

example_data = [data[str(i)] for i in range(0, len(data), 500)]

example_data

[{'task': 'negation1',
  'lang': 'en',
  'prefix': 'Please answer the question in English.',
  'context': 'Susan is not an architect, but Lola is.',
  'question': 'Who is an architect?',
  'answer': 'Lola.',
  'prompt': 'Context: Susan is not an architect, but Lola is. Question: Who is an architect? Answer:',
  'targets': {'N1': 'susan', 'P2': 'architect', 'P1': 'lola'},
  'example_prompt': 'Context: Dennis is not a programmer, but Lisa is. Question: Who is a programmer? Answer: Lisa.',
  'altered_example_prompt': 'Context: Dennis is not a programmer, but Lisa is. Question: Who is not a programmer? Answer: Dennis.',
  'raw_response': {},
  'response': {}},
 {'task': 'negation2',
  'lang': 'en',
  'prefix': 'Please answer the question in English.',
  'context': 'Ross and Joey are accountants, Christopher and Charles are architects.',
  'question': 'Who are not accountants?',
  'answer': 'Christopher and Charles.',
  'prompt': 'Context: Ross and Joey are accountants, Christopher and Char

In [5]:
targets = []

for id, item in data.items():
    for v in item['targets'].values():
        targets.append(v)

set(targets)

{'aatto',
 'accountant',
 'accountants',
 'acht',
 'achtzehn',
 'actor',
 'actors',
 'actress',
 'actresses',
 'alice',
 'amelie',
 'ananas',
 'ananasse',
 'ananasta',
 'andrew',
 'anna',
 'annukka',
 'anwalt',
 'anwälte',
 'anwältin',
 'anwältinnen',
 'apartment',
 'apfel',
 'appelsiini',
 'appelsiinia',
 'apple',
 'apples',
 'architect',
 'architects',
 'architekt',
 'architekten',
 'architektin',
 'architektinnen',
 'arkkitehtejä',
 'arkkitehti',
 'arto',
 'arzt',
 'asunnon',
 'asunto',
 'asuntoa',
 'audrey',
 'auf dem fenster',
 'auf dem regal',
 'auf dem sofa',
 'auf dem stuhl',
 'auf dem tisch',
 'auf den boden',
 'auto',
 'autoa',
 'auton',
 'banaani',
 'banaania',
 'banana',
 'bananas',
 'banane',
 'bananen',
 'bauer',
 'bauern',
 'behind the chair',
 'behind the shelf',
 'behind the sofa',
 'behind the table',
 'behind the window',
 'benutzen',
 'bigger',
 'bike',
 'birgit',
 'birne',
 'birnen',
 'boat',
 'bob',
 'book',
 'books',
 'boot',
 'buch',
 'buchhalter',
 'buchhalteri

In [6]:
import json, tqdm, re

settings = [{'shot': 'zero', 'beam': 1},
            {'shot': 'zero', 'beam': 2},
            {'shot': 'one', 'beam': 1},
            {'shot': 'one', 'beam': 2},
            {'shot': 'alt_one', 'beam': 1},
            {'shot': 'alt_one', 'beam': 2},]


for setting in settings: 
    for id, item in tqdm.tqdm(data.items(), total=len(data)):
        example = ''
        if setting['shot'] != 'zero':
            example = item['example_prompt'] if setting['shot'] == 'one' else item['altered_example_prompt']
            example = example + ' '
        
        prompt = f"{item['prefix']} {example}{item['prompt']}"
        inputs = XGLM_tokenizer(prompt, return_tensors='pt', padding=True)
        inputs = inputs.to(device='cuda')
        outputs = XGLM_model.generate(**inputs, 
                                    max_new_tokens=15,
                                    num_beams=setting['beam'])

        outputs = XGLM_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        prompt = prompt.replace('？', '?')

        if prompt in outputs:
            trimmed_outputs = outputs[len(prompt):].strip()
        elif 'ja' in item['lang']:
            zero_pattern = r"(?P<prompt>.+\s文脈:\s.+\s質問:\s.+\s回答:)(?P<response>.+)"
            one_pattern = r"(?P<prompt>.+\s文脈:\s.+\s質問:\s.+\s回答:\s.+\s文脈:\s.+\s質問:\s.+\s回答:)(?P<response>.+)"
            if setting['shot'] == 'zero':
                trimmed_outputs = re.match(zero_pattern, outputs).groupdict()['response'].strip()
            else:
                trimmed_outputs = re.match(one_pattern, outputs).groupdict()['response'].strip()
        
        data[id]['raw_response'][f"xglm_{setting['shot']}_{setting['beam']}beam"] = outputs
        data[id]['response'][f"xglm_{setting['shot']}_{setting['beam']}beam"] = trimmed_outputs
    
    with open(f"/home/norrman/GitHub/multi-morph-checklist/M2C_checkpoint_{setting['shot']}_beam{setting['beam']}.json", 'w') as f:
        json.dump(data, f, indent=4)

with open(f"/home/norrman/GitHub/multi-morph-checklist/M2C_xglm_responses.json", 'w') as f:
    json.dump(data, f, indent=4)


100%|██████████| 31500/31500 [1:25:45<00:00,  6.12it/s]
100%|██████████| 31500/31500 [1:45:31<00:00,  4.98it/s]
100%|██████████| 31500/31500 [1:45:15<00:00,  4.99it/s]
100%|██████████| 31500/31500 [1:55:28<00:00,  4.55it/s]
100%|██████████| 31500/31500 [1:45:42<00:00,  4.97it/s]
100%|██████████| 31500/31500 [1:55:26<00:00,  4.55it/s]


In [7]:
data

{'0': {'task': 'negation1',
  'lang': 'en',
  'prefix': 'Please answer the question in English.',
  'context': 'Susan is not an architect, but Lola is.',
  'question': 'Who is an architect?',
  'answer': 'Lola.',
  'prompt': 'Context: Susan is not an architect, but Lola is. Question: Who is an architect? Answer:',
  'targets': {'N1': 'susan', 'P2': 'architect', 'P1': 'lola'},
  'example_prompt': 'Context: Dennis is not a programmer, but Lisa is. Question: Who is a programmer? Answer: Lisa.',
  'altered_example_prompt': 'Context: Dennis is not a programmer, but Lisa is. Question: Who is not a programmer? Answer: Dennis.',
  'raw_response': {'xglm_zero_1beam': 'Please answer the question in English. Context: Susan is not an architect, but Lola is. Question: Who is an architect? Answer: Susan is an architect.',
   'xglm_zero_2beam': 'Please answer the question in English. Context: Susan is not an architect, but Lola is. Question: Who is an architect? Answer: Susan is an architect, Lola is

In [22]:
untrimmed_responses = []
for id, item in data.items():
    for model, response in item['response'].items():
        if not response and 'ja' not in item['lang']:
            untrimmed_responses.append((id, model, item['raw_response'][model]))

In [23]:
len(untrimmed_responses)


474

In [24]:
set([i[1] for i in untrimmed_responses])

{'xglm_zero_1beam'}

In [25]:
for id, item in tqdm.tqdm(data.items(), total=len(data)):
    if not item['response']['xglm_zero_1beam'] and 'ja' not in item['lang']:  
        example = ''
        
        prompt = f"{item['prefix']} {item['prompt']}"
        inputs = XGLM_tokenizer(prompt, return_tensors='pt', padding=True)
        inputs = inputs.to(device='cuda')
        outputs = XGLM_model.generate(**inputs, 
                                    max_new_tokens=30,
                                    num_beams=1)

        outputs = XGLM_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        prompt = prompt.replace('？', '?')

        if prompt in outputs:
            trimmed_outputs = outputs[len(prompt):].strip()
        else:
            trimmed_outputs = ""
        
        data[id]['raw_response'][f"xglm_zero_1beam"] = outputs
        data[id]['response'][f"xglm_zero_1beam"] = trimmed_outputs
        
with open(f"/home/norrman/GitHub/multi-morph-checklist/M2C_zero_1beam_rerun.json", 'w') as f:
    json.dump(data, f, indent=4)

 35%|███▌      | 11031/31500 [00:05<00:07, 2718.20it/s]