## Loading Data

In [3]:
import json

data_path = '/home/norrman/GitHub/multi-morph-checklist/M2C.json'

data = json.load(open(data_path))
len(list(data.keys()))

31500

## Running Model

In [2]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("facebook/xglm-1.7B", padding_side='left')
model = AutoModelForCausalLM.from_pretrained("facebook/xglm-1.7B")
model = model.to(device='cuda')

model

  from .autonotebook import tqdm as notebook_tqdm


XGLMForCausalLM(
  (model): XGLMModel(
    (embed_tokens): Embedding(256008, 2048, padding_idx=1)
    (embed_positions): XGLMSinusoidalPositionalEmbedding()
    (layers): ModuleList(
      (0-23): 24 x XGLMDecoderLayer(
        (self_attn): XGLMAttention(
          (k_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (v_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (q_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (out_proj): Linear(in_features=2048, out_features=2048, bias=True)
        )
        (activation_fn): GELUActivation()
        (self_attn_layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (fc1): Linear(in_features=2048, out_features=8192, bias=True)
        (fc2): Linear(in_features=8192, out_features=2048, bias=True)
        (final_layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
      )
    )
    (layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine

In [4]:
import json, tqdm, re

settings = [
            {'shot': 'one', 'beam': 1},
            {'shot': 'alt', 'beam': 1}]


for setting in settings: 
    for id, item in tqdm.tqdm(data.items(), total=len(data)):
        example = ''
        if setting['shot'] != 'zero':
            example = item['example_prompt'] if setting['shot'] == 'one' else item['altered_example_prompt']
            example = example + ' '
        
        prompt = f"{item['prefix']} {example}{item['prompt']}"
        inputs = tokenizer(prompt, return_tensors='pt', padding=True)
        inputs = inputs.to(device='cuda')
        outputs = model.generate(**inputs, 
                                    max_new_tokens=15,
                                    num_beams=setting['beam'])

        outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        prompt = prompt.replace('？', '?')

        trimmed_outputs = ''

        if prompt in outputs:
            trimmed_outputs = outputs[len(prompt):].strip()
        elif 'ja' in item['lang']:
            zero_pattern = r"(?P<prompt>.+\s文脈:\s.+\s質問:\s.+\s回答:)(?P<response>.+)"
            one_pattern = r"(?P<prompt>.+\s文脈:\s.+\s質問:\s.+\s回答:\s.+\s文脈:\s.+\s質問:\s.+\s回答:)(?P<response>.+)"
            if setting['shot'] == 'zero':
                trimmed_outputs = re.match(zero_pattern, outputs).groupdict()['response'].strip()
            else:
                trimmed_outputs = re.match(one_pattern, outputs).groupdict()['response'].strip()
        
        data[id]['raw_response'][f"xglm1.7B_{setting['shot']}_{setting['beam']}beam"] = outputs
        data[id]['response'][f"xglm1.7B_{setting['shot']}_{setting['beam']}beam"] = trimmed_outputs
    
    with open(f"/home/norrman/GitHub/multi-morph-checklist/M2C_xglm1.7B_one_shot_checkpoint_{setting['shot']}_beam{setting['beam']}.json", 'w') as f:
        json.dump(data, f, indent=4)

with open(f"/home/norrman/GitHub/multi-morph-checklist/M2C_xglm1.7B_one_shot_responses.json", 'w') as f:
    json.dump(data, f, indent=4)


 64%|██████▎   | 20011/31500 [1:50:15<1:03:18,  3.02it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 126.00 MiB. GPU 0 has a total capacity of 7.78 GiB of which 89.06 MiB is free. Including non-PyTorch memory, this process has 6.84 GiB memory in use. Of the allocated memory 6.53 GiB is allocated by PyTorch, and 131.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [8]:
for i, item in data.items():
    item['raw_response']['xglm1.7B_zero_1beam'] = item['raw_response']['xglm_zero_1beam']
    item['response']['xglm1.7B_zero_1beam'] = item['response']['xglm_zero_1beam']
    del item['raw_response']['xglm_zero_1beam']
    del item['response']['xglm_zero_1beam']

In [22]:
untrimmed_responses = []
for id, item in data.items():
    for model, response in item['response'].items():
        if not response and 'ja' not in item['lang']:
            untrimmed_responses.append((id, model, item['raw_response'][model]))

In [23]:
len(untrimmed_responses)


474

In [24]:
set([i[1] for i in untrimmed_responses])

{'xglm_zero_1beam'}

In [25]:
for id, item in tqdm.tqdm(data.items(), total=len(data)):
    if not item['response']['xglm_zero_1beam'] and 'ja' not in item['lang']:  
        example = ''
        
        prompt = f"{item['prefix']} {item['prompt']}"
        inputs = XGLM_tokenizer(prompt, return_tensors='pt', padding=True)
        inputs = inputs.to(device='cuda')
        outputs = XGLM_model.generate(**inputs, 
                                    max_new_tokens=30,
                                    num_beams=1)

        outputs = XGLM_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        prompt = prompt.replace('？', '?')

        if prompt in outputs:
            trimmed_outputs = outputs[len(prompt):].strip()
        else:
            trimmed_outputs = ""
        
        data[id]['raw_response'][f"xglm_zero_1beam"] = outputs
        data[id]['response'][f"xglm_zero_1beam"] = trimmed_outputs
        
with open(f"/home/norrman/GitHub/multi-morph-checklist/M2C_zero_1beam_rerun.json", 'w') as f:
    json.dump(data, f, indent=4)

 35%|███▌      | 11031/31500 [00:05<00:07, 2718.20it/s]