In [22]:
#!pip install rouge-score

In [1]:
import torch
import json
import time
import numpy as np

from transformers import AutoTokenizer
from transformers.models.bloom.configuration_bloom import BloomConfig
from pruning.pruned_bloom import PrunedBloomForCausalLM
from node_attribution.utils import count_params
from rouge_score import rouge_scorer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = AutoTokenizer.from_pretrained(f"bigscience/bloom-560m")

In [3]:
def score(model, tokenizer, sentence):
    tensor_input = tokenizer.encode(sentence, return_tensors='pt')
    repeat_input = tensor_input.repeat(tensor_input.size(-1)-2, 1)
    mask = torch.ones(tensor_input.size(-1) - 1).diag(1)[:-2]
    masked_input = repeat_input.masked_fill(mask == 1, tokenizer.pad_token_id)
    labels = repeat_input.masked_fill( masked_input != tokenizer.pad_token_id, -100)
    with torch.inference_mode():
        loss = model(masked_input, labels=labels).loss
    return np.exp(loss.item())

In [4]:
weights_path = "pruned_560m_bloom.pt"
state_dict_shapes_path = "state_dict_shapes.pkl"
config_path = "bloom_560m_config.json"
config_json = json.load(open(config_path, "rb"))
bloom_config = BloomConfig(
    vocab_size=250880,
    hidden_size=1024,
    n_layer=24,
    n_head=16,
    layer_norm_epsilon=1e-5,
    initializer_range=0.02,
    use_cache=True,
    bos_token_id=1,
    eos_token_id=2,
    apply_residual_connection_post_layernorm=False,
    hidden_dropout=0.0,
    attention_dropout=0.0,
    pretraining_tp=1,  # TP rank used when training with megatron
    slow_but_exact=False,
    attention_softmax_in_fp32=True,
    bias_dropout_fusion=True,
    masked_softmax_fusion=True,
    offset_alibi=100,
    pad_token_id=3,
    seq_length=2048,
    skip_bias_add=True,
    skip_bias_add_qkv=False,
    unk_token_id=0,
    
)

In [14]:
pruned_model = PrunedBloomForCausalLM(bloom_config, state_dict_shapes_path)

In [15]:
pruned_model.load_state_dict(torch.load(weights_path))

<All keys matched successfully>

In [16]:
pruned_percent = 1.0 - (count_params(pruned_model)[-1] / 559214592)
print(pruned_percent)

0.10005102656548703


In [17]:
num_trials = 1

In [18]:
print(score(sentence="Oh, interesting, I am not familiar with that movie! Can you tell me more about it?", model=pruned_model, tokenizer=tokenizer)) 

631.8655196564496


In [19]:
line = "Hello, I am a social bot! "
inputs = tokenizer(line, return_tensors="pt")
pruned_times = []

for i in range(num_trials):
    start = time.time()
    outputs = pruned_model.generate(
        input_ids=inputs["input_ids"], 
        max_new_tokens=100, 
        do_sample=True, 
        top_k=50, 
        top_p=0.95,
    )
    end = time.time()
    pruned_times.append(end - start)
    print(f"inference time: {end - start}")
    
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))

inference time: 7.487040042877197
['Hello, I am a social bot!  e. t a, cal c h e t e r s. the e z, a b l the a y o n m b y t e r s. o c e r e z ( 2009 s the e a l c e a z y ). i m e a、 a、 v i m c al y o r b e y, k i y y r e r v e c k i o, y e c l r a b r e r a ( c']


In [20]:
pruned_times

[7.487040042877197]

In [21]:
mean_pruned = np.mean(pruned_times)
std_pruned = np.std(pruned_times)
print(mean_pruned)
print(std_pruned)

7.487040042877197
0.0


In [22]:
while True:
    line = input("You:")
    inputs = tokenizer(line, return_tensors="pt")
    outputs = pruned_model.generate(
        input_ids=inputs["input_ids"], 
        max_new_tokens=20, 
        do_sample=True, 
        top_k=50, 
        top_p=0.95,
    )
    print(tokenizer.batch_decode(outputs, skip_special_tokens=True))

You: Oh, interesting, I am not familiar with that movie! Can you


['Oh, interesting, I am not familiar with that movie! Can you m de de de re t a!s to de de en a! t a!s!']


KeyboardInterrupt: Interrupted by user

In [None]:
# full_line = "Person: My favorite movie is the The Day After Tomorrow\nSocialBot: Oh, interesting, I am not familiar with that movie! Can you tell me more about it?"
# prompt_line = "Person: My favorite movie is The Day After Tomorrow\nSocialBot: "
# completion = full_line.split(prompt_line)[-1]
# inputs = tokenizer(prompt_line, return_tensors="pt")

# #for i in range(num_trials):
# outputs = pruned_model.generate(
#     input_ids=inputs["input_ids"], 
#     max_new_tokens=25, 
#     do_sample=True, 
#     top_k=50, 
#     top_p=0.95,
# )
# out_seq = tokenizer.batch_decode(outputs, skip_special_tokens=True)
# out_seq = out_seq[0].split("Person: My favorite movie is The Day After Tomorrow\nSocialBot: ")[-1]
# r_scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
# rouge_scores = r_scorer.score(completion, out_seq)

In [15]:
# from transformers import AutoTokenizer, BloomForCausalLM

In [16]:
# model = BloomForCausalLM.from_pretrained("bigscience/bloom-560m")

In [17]:
# inputs = tokenizer("Person: My favorite movie is The Day After Tomorrow\nSocialBot: ", return_tensors="pt")
# start = time.time()
# outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=25, do_sample=True, top_k=50, top_p=0.95)
# end = time.time()
# print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
# print(f"inference time: {end - start}")