In [3]:
import sys
sys.path.append('../src/')
from transformers import XLMRobertaTokenizer, XLMRobertaConfig, XLMRobertaForSequenceClassification
from textpruner import EmbeddingPruner, TransformerPruner
from textpruner import GeneralConfig, EmbeddingPruningConfig, TransformerPruningConfig
from modeling import XLMRForGLUESimple
import torch,os,json,tqdm
import logging
logger = logging.getLogger(__name__)

In [4]:
device='cuda' #'cuda'
output_dir='./pruned_models'
batch_size=32
eval_langs = ['zh']
data_dir='/yrfs1/rc/zqyang5/Xtreme/datasets/XNLI'
split = 'test'
max_seq_length=128
taskname='xnli'

#### Init Model

In [5]:
xlmr_vocab_file='./pretrained-models/xlm-r-base/sentencepiece.bpe.model'
xlmr_config_file='./pretrained-models/xlm-r-base/config.json'
xlmr_classification_ckpt_file ='/work/rc/zqyang5/cross-lingual/xnli/xlmr/xnli_XbTrainEn_lr3e4_s4_bs32/gs42948.pkl'

def init_xlmrForSentenceClassification(config_file, vocab_file, ckpt_file, num_labels=3):
    tokenizer=XLMRobertaTokenizer(vocab_file=vocab_file)
    config=XLMRobertaConfig.from_json_file(config_file)
    config.num_labels=num_labels
    model = XLMRobertaForSequenceClassification.from_pretrained(ckpt_file,config=config)
    return tokenizer,model


def init_xlmr_class_model(config_file, vocab_file, ckpt_file, num_labels=3):
    tokenizer=XLMRobertaTokenizer(vocab_file=vocab_file)
    config=XLMRobertaConfig.from_pretrained(config_file)
    model = XLMRForGLUESimple(config=config, num_labels=3)
    sd = torch.load(ckpt_file,map_location='cpu')
    a1, a2 = model.load_state_dict(sd,strict=False)
    print (a1)
    print (a2)
    return tokenizer,model


#model.to(device)
#print("Current Vocab size:",tokenizer.vocab_size)
#print("Current Embedding size:", model.get_input_embeddings().weight.shape)

In [4]:
tokenizer, model = init_xlmr_class_model(xlmr_config_file, xlmr_vocab_file, xlmr_classification_ckpt_file)

[]
[]


In [5]:
# from transformers import BertTokenizer 
# bert_tokenizer = BertTokenizer(vocab_file='/yrfs1/rc/zqyang5/pretrained-models/bert/base_uncased/vocab.txt')
# print(tokenizer([("Hello world","Goodbye!")],max_length=10,truncation=True,padding='max_length',return_token_type_ids=True))
# print(bert_tokenizer([("Hello world","Goodbye!")],max_length=10,truncation=True,padding='max_length',return_token_type_ids=True))

# Embedding Pruning

#### Init data: extract sentences from the datasets

In [6]:
def extract_sentences_from_xnli(data_files):
    results = []
    for data_file in data_files:
        with open(data_file,'r',encoding='utf-8') as f:
            lines = f.readlines()
        for line in tqdm.tqdm(lines):
                fields = line.strip().split('\t')
                for field in fields:
                    results.append(field)
    return results

data_files = ['./datasets/multinli.train.zh.tsv']
lines = extract_sentences_from_xnli(data_files)

100%|██████████| 392703/392703 [00:00<00:00, 639307.28it/s]


#### Init Embedding Pruner

In [None]:
general_config = GeneralConfig(output_dir='pruned_models-mysimple')
embedding_pruner = EmbeddingPruner(model, tokenizer, general_config=general_config, base_model_prefix='roberta')
embedding_pruner.prune_embeddings(dataiter=lines)

In [12]:
print("New embedding size:", embedding_pruner.base_model.get_input_embeddings().weight.shape)
embedding_pruner.save_model()

New embedding size: torch.Size([23553, 768])
New embedding size 23553 pruned vocab file has been saved to pruned_models-mysimple/pruned_23553V/sentencepiece.bpe.model. Reintialize the tokenizer!


#### reload

In [6]:
pruned_tokenizer, pruned_model = init_xlmr_class_model(
    config_file='./pruned_models-mysimple/pruned_23553V/config.json',
    vocab_file='./pruned_models-mysimple/pruned_23553V/sentencepiece.bpe.model',
    ckpt_file='./pruned_models-mysimple/pruned_23553V/model.pkl')

pruned_model.to(device)
print("New vocab size:", pruned_tokenizer.vocab_size)

[]
[]
New vocab size: 23553


# Measure Performance

In [7]:
from predict_function import predict
def measure_performance(model, eval_datasets,eval_langs,output_dir, device, predict_batch_size, head_mask=None, ffn_mask=None, base_model_prefix=None):
    if base_model_prefix is not None:
        base_model = getattr(model, base_model_prefix, model)
    else:
        if hasattr(model, 'base_model_prefix'):
            base_model = getattr(model, model.base_model_prefix, model)
        else:
            raise ValueError("Cannot infer/get model_type! Maybe you should provide base_model_prefix") 
    n_layers = base_model.config.num_hidden_layers
    inter_weights = []
    inter_biases = []
    output_weights = []
    layers = base_model.encoder.layer
    if ffn_mask is not None:
        print("Masking intermediate FFN")
        for layer_num in range(n_layers):
                inter_weights.append(layers[layer_num].intermediate.dense.weight.data.clone()) #.detach().to(device)
                inter_biases.append(layers[layer_num].intermediate.dense.bias.data.clone()) #.detach().to(device)
                output_weights.append(layers[layer_num].output.dense.weight.data.clone()) #.detach().to(device)

                layers[layer_num].intermediate.dense.weight.data *= ffn_mask[layer_num].unsqueeze(1)
                layers[layer_num].intermediate.dense.bias.data *= ffn_mask[layer_num]
                layers[layer_num].output.dense.weight.data *= ffn_mask[layer_num].unsqueeze(0)

    res = predict(model,eval_datasets,step=0,eval_langs=eval_langs,output_dir=output_dir,
                   device=device, predict_batch_size=predict_batch_size, head_mask = head_mask, in_lang = None)
    # restore intermediate ffn weight TODO
    if ffn_mask is not None:
        for layer_num in range(n_layers):
                layers[layer_num].intermediate.dense.weight.data.copy_(inter_weights[layer_num])
                layers[layer_num].intermediate.dense.bias.data.copy_(inter_biases[layer_num])
                layers[layer_num].output.dense.weight.data.copy_(output_weights[layer_num])

    
    if head_mask is not None:
        print (head_mask)
        print (f"number of heads: {head_mask.sum()}/{head_mask.view(-1).size(0)}")
        num_heads_per_layer = head_mask.sum(dim=-1)
        print (f"Min/Max number of heads per layer: {num_heads_per_layer.min()}, {num_heads_per_layer.max()}")
    if ffn_mask is not None:
        print (f"ffn size: {ffn_mask.sum()}/{ffn_mask.view(-1).size(0)}")
        ffn_size_per_layer = ffn_mask.sum(dim=-1)
        print (f"Min/Max number of ffn per layer: {ffn_size_per_layer.min()}, {ffn_size_per_layer.max()}")
    print (f"Performance: {res}")
    metric_filename = os.path.join(output_dir, f'eval_results.txt')
    with open(metric_filename,'a') as f:
        if head_mask is not None:
            line = f"{head_mask.sum()}/{head_mask.view(-1).size(0)} {res}\n"
        else:
            line = f"Full-Head {res}\n"
        f.write(line)

In [8]:
from utils import MultilingualNLIDataset
from torch.utils.data import DataLoader, SequentialSampler

eval_dataset = MultilingualNLIDataset(
    task=taskname, data_dir=data_dir, split=split, prefix='xlmrbase_pruned',
    max_seq_length=max_seq_length, langs=eval_langs, local_rank=-1, tokenizer=pruned_tokenizer)
eval_sampler = SequentialSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=batch_size)
eval_datasets = [eval_dataset.lang_datasets[lang] for lang in eval_langs]

Init NLIDataset


In [9]:
measure_performance(pruned_model, eval_datasets, eval_langs,output_dir, device, batch_size, base_model_prefix='roberta')

Evaluating: 100%|██████████| 157/157 [00:32<00:00,  4.88it/s]


Performance: {'zh': {'acc': 0.7379241516966067}}


# Transformer Pruning

In [14]:
# from utils import MultilingualNLIDataset
# from torch.utils.data import DataLoader, SequentialSampler

# eval_dataset = MultilingualNLIDataset(
#     task=taskname, data_dir=data_dir, split=split, prefix='xlmrbase',
#     max_seq_length=max_seq_length, langs=eval_langs, local_rank=-1, tokenizer=tokenizer)
# eval_sampler = SequentialSampler(eval_dataset)
# eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=batch_size)

#### Init Transofmer Pruner

In [10]:
general_config = GeneralConfig(output_dir='pruned_models-mysimple')
transformer_pruning_config = TransformerPruningConfig(
    ffn_size=2048, num_of_heads=9)
transformer_pruner = TransformerPruner(
    pruned_model,general_config,
    transformer_pruning_config=transformer_pruning_config, base_model_prefix='roberta')

#### Define aux functions

In [11]:
def adaptor(model_outputs, batch):
    return {'loss':model_outputs[-1]}
def batch_postprocessor(batch):
    input_ids, attention_mask, token_type_ids, labels = batch
    return {'input_ids':input_ids, 'attention_mask':attention_mask, 
            'token_type_ids':token_type_ids, 'labels':labels}

In [12]:
#transformer_pruner.prune_transformer_with_masks(head_mask=transformer_pruner.head_mask,ffn_mask=transformer_pruner.ffn_mask)
transformer_pruner.prune_transformer(eval_dataloader,adaptor,batch_postprocessor)

Evaluating: 100%|██████████| 157/157 [02:33<00:00,  1.02it/s]


Num_layers:12
{0: [1, 9, 10], 1: [2, 5, 6], 2: [4, 8, 11], 3: [2, 4, 6], 4: [5, 7, 10], 5: [5, 6, 7], 6: [1, 4, 11], 7: [1, 3, 11], 8: [0, 2, 11], 9: [0, 2, 6], 10: [4, 10, 11], 11: [1, 4, 9]}


In [13]:
transformer_pruner.save_model()
#transformer_pruner.save_masks()

In [14]:
#new_model=XLMRobertaForSequenceClassification.from_pretrained('./pruned_models/pruned_108H2048FFN/model.pkl',config='./pruned_models/pruned_108H2048FFN/config.json')
pruned_tokenizer, pruned_model = init_xlmr_class_model(
    config_file='./pruned_models-mysimple/pruned_108H2048FFN/config.json',
    vocab_file='./pruned_models-mysimple/pruned_23553V/sentencepiece.bpe.model',
    ckpt_file='./pruned_models-mysimple/pruned_108H2048FFN/model.pkl')

#pruned_model.to(device)

#XLMRobertaForSequenceClassification.from_pretrained('./pruned_models/pruned_108H2048FFN/model.pkl',config='./pruned_models/pruned_108H2048FFN/config.json')

[]
[]


In [34]:
from transformers import XLMRobertaModel
config1 = XLMRobertaConfig.from_pretrained('./pruned_models/pruned_108H2048FFN/config.json')
config2 = XLMRobertaConfig.from_json_file('./pruned_models/pruned_108H2048FFN/config.json')

#for key in config1.__dict__.keys():
#    if not (config1.__dict__[key]==config2.__dict__[key]):
#        print (config1.__dict__[key],config2.__dict__[key])

{0: [], 1: [11, 5, 6], 2: [8, 11, 4], 3: [2, 4, 5, 6], 4: [], 5: [], 6: [11], 7: [11, 1, 3], 8: [0, 9, 11], 9: [9, 2, 6, 7], 10: [0, 1, 4, 5, 10, 11], 11: [0, 1, 2, 3, 4, 5, 8, 9, 10]} {'0': [], '1': [11, 5, 6], '2': [8, 11, 4], '3': [2, 4, 5, 6], '4': [], '5': [], '6': [11], '7': [11, 1, 3], '8': [0, 9, 11], '9': [9, 2, 6, 7], '10': [0, 1, 4, 5, 10, 11], '11': [0, 1, 2, 3, 4, 5, 8, 9, 10]}


## Measure performance

In [15]:
pruned_model.to(device)
measure_performance(pruned_model, eval_datasets, eval_langs,output_dir, device, batch_size,base_model_prefix='roberta') #, ffn_mask=ffn_mask,head_mask =head_mask)

Evaluating: 100%|██████████| 157/157 [00:40<00:00,  3.83it/s]

Performance: {'zh': {'acc': 0.729940119760479}}



