In [8]:
import logging
import os
import random
import sys
from dataclasses import dataclass, field
from typing import Optional


import datasets
import numpy as np
from datasets import load_dataset, load_metric
from torch import embedding, embedding_bag

import transformers
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoModelForQuestionAnswering,
    AutoTokenizer,
    DataCollatorWithPadding,
    EvalPrediction,
    HfArgumentParser,
    PretrainedConfig,
    Trainer,
    TrainingArguments,
    default_data_collator,
    set_seed,
)
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
from transformers.utils.versions import require_version

from nncf.torch.nncf_network import NNCFNetwork
from nncf.torch.dynamic_graph.graph_tracer import create_input_infos, create_dummy_forward_fn
import torch.nn as nn

import pandas as pd
import numpy as np
import functools

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 2000)
pd.set_option('display.float_format', '{:20,.2f}'.format)
pd.set_option('display.max_colwidth', None)

In [3]:
def show_full_df(x):
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 2000)
    pd.set_option('display.float_format', '{:20,.2f}'.format)
    pd.set_option('display.max_colwidth', None)
    print(x)
    pd.reset_option('display.max_rows')
    pd.reset_option('display.max_columns')
    pd.reset_option('display.width')
    pd.reset_option('display.float_format')
    pd.reset_option('display.max_colwidth')

In [4]:
def create_multiindex(list_of_string, level_delimiter="/"):
    def pad_tokenlist_to_depth(tokens, depth):
        gap = depth - len(tokens)
        tokens += [None] * gap
        return tokens
    
    list_of_tokenlist = list(map(lambda x: x.split(sep="/"), list_of_string))
    depth = max(map(len, list_of_tokenlist))
    mapfunc = functools.partial(pad_tokenlist_to_depth, depth=depth)
    equal_depth_tokenlist = list(map(mapfunc, list_of_tokenlist))
    midx = pd.MultiIndex.from_arrays(np.array(equal_depth_tokenlist).transpose())
    return midx

In [5]:
def analyze_module(module, input_, output, dict_to_save, module_node_name):
    def t2l(t):
        #tuple to list
        return list(t)

    def get_param(module):
        w_numel = None
        w_shape = None
        b_numel = None
        b_shape = None
        if hasattr(module, 'weight'):
            w_shape = t2l(module.weight.data.shape)
            w_numel = np.prod(w_shape)
        if hasattr(module, 'bias'):
            b_shape = t2l(module.bias.data.shape)
            b_numel = np.prod(b_shape)
        return {'weight_numel': w_numel, 
                'weight_shape': w_shape,
                'bias_numel': b_numel,
                'bias_shape': b_shape}

    def get_tensor_dim_size(t, prefix):
        return { 
            '{}_shape'.format(prefix): t2l(t.shape), 
            '{}_numel'.format(prefix): t.numel()
        }

    if isinstance(module, 
                    (nn.Conv1d, nn.ConvTranspose1d, 
                     nn.Conv2d, nn.ConvTranspose2d, 
                     nn.Conv3d, nn.ConvTranspose3d)):
        ks = module.weight.data.shape
        mac = np.prod(ks) * np.prod(output.shape[2:])
              
    elif isinstance(module, nn.Linear):
        if len(input_[0].shape) == 1:
            # In some test cases input tensor could have dimension [N]
            mac = input_[0].shape[0] * output.shape[-1]
        else:
            mac = np.prod(input_[0].shape[1:]) * output.shape[-1]

    elif isinstance(module, (nn.Embedding, nn.EmbeddingBag)):
        mac = 0

    else:
        return
    dict_to_save[module_node_name] = {}
    dict_to_save[module_node_name]['optype'] = module.__class__.__name__
    dict_to_save[module_node_name]['MAC'] = mac
    dict_to_save[module_node_name].update(get_param(module))
    dict_to_save[module_node_name].update(get_tensor_dim_size(input_[0], 'input'))
    dict_to_save[module_node_name].update(get_tensor_dim_size(output, 'output'))

    # dict_to_save[module_node_name]['param'] = get_param(module) 
    # dict_to_save[module_node_name]['input'] = get_tensor_dim_size(input_[0], 'input')
    # dict_to_save[module_node_name]['output'] = get_tensor_dim_size(output, 'output') 

def model_per_module_analysis(nncfnetwork):
    """
    Calculates FLOPS count for modules.
    """
    model = nncfnetwork
    flops_count_dict = {}

    def get_hook(name):
        return functools.partial(analyze_module, 
                                 dict_to_save=flops_count_dict,
                                 module_node_name=name)

    hook_list = []
    for nncf_node in model._original_graph.get_all_nodes():
        node_module = model.get_containing_module(nncf_node.node_name)
        if node_module is not None:
            hook_list.append(node_module.register_forward_hook(get_hook(nncf_node.node_name)))
        else:
            print("module is none for nncf node: {}".format(nncf_node.node_name))

    model.do_dummy_forward(force_eval=True)

    for h in hook_list:
        h.remove()
    return flops_count_dict

In [9]:
model_label = 'bert-large-uncased-whole-word-masking-finetuned-squad'

GLUE_TASKS = ["cola", "mnli", "mnli-mam", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
task = "mnli"

# num_labels = 3 if task.startswith("mnli") else 1 if task=="stsb" else 2
model = AutoModelForQuestionAnswering.from_pretrained(model_label)
# , num_labels=num_labels)

In [11]:
batchsize = 1
seqlen = 384
input_cfg = {"input_info": [dict(sample_size=[batchsize, seqlen], type="long")]*3}
input_info_list = create_input_infos(input_cfg)
nncf_model = NNCFNetwork(model, input_info_list)

INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/Embedding[word_embeddings] by BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/Embedding[position_embeddings] by BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/Embedding[token_type_embeddings] by BertForQuestionAnswering/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]
INFO:nncf:Wrapping module BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/Linear[query] by BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelf

In [12]:
d = model_per_module_analysis(nncf_model)
df = pd.DataFrame.from_dict(d, orient='index')
midx = create_multiindex(df.index.values.tolist())
df = df.set_index(midx)

module is none for nncf node: BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
module is none for nncf node: BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
module is none for nncf node: BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
module is none for nncf node: BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
module is none for nncf node: BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/Softmax/softmax_0
module is none for nncf node: BertForQuestionAnswe

In [13]:
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,optype,MAC,weight_numel,weight_shape,bias_numel,bias_shape,input_shape,input_numel,output_shape,output_numel
BertForQuestionAnswering,BertModel[bert],BertEmbeddings[embeddings],NNCFEmbedding[word_embeddings],embedding_0,,,,,NNCFEmbedding,0,31254528,"[30522, 1024]",,,"[1, 384]",384,"[1, 384, 1024]",393216
BertForQuestionAnswering,BertModel[bert],BertEmbeddings[embeddings],NNCFEmbedding[token_type_embeddings],embedding_0,,,,,NNCFEmbedding,0,2048,"[2, 1024]",,,"[1, 384]",384,"[1, 384, 1024]",393216
BertForQuestionAnswering,BertModel[bert],BertEmbeddings[embeddings],NNCFEmbedding[position_embeddings],embedding_0,,,,,NNCFEmbedding,0,524288,"[512, 1024]",,,"[1, 384]",384,"[1, 384, 1024]",393216
BertForQuestionAnswering,BertModel[bert],BertEncoder[encoder],ModuleList[layer],BertLayer[0],BertAttention[attention],BertSelfAttention[self],NNCFLinear[query],linear_0,NNCFLinear,402653184,1048576,"[1024, 1024]",1024.0,[1024],"[1, 384, 1024]",393216,"[1, 384, 1024]",393216
BertForQuestionAnswering,BertModel[bert],BertEncoder[encoder],ModuleList[layer],BertLayer[0],BertAttention[attention],BertSelfAttention[self],NNCFLinear[key],linear_0,NNCFLinear,402653184,1048576,"[1024, 1024]",1024.0,[1024],"[1, 384, 1024]",393216,"[1, 384, 1024]",393216
BertForQuestionAnswering,BertModel[bert],BertEncoder[encoder],ModuleList[layer],BertLayer[0],BertAttention[attention],BertSelfAttention[self],NNCFLinear[value],linear_0,NNCFLinear,402653184,1048576,"[1024, 1024]",1024.0,[1024],"[1, 384, 1024]",393216,"[1, 384, 1024]",393216
BertForQuestionAnswering,BertModel[bert],BertEncoder[encoder],ModuleList[layer],BertLayer[0],BertAttention[attention],BertSelfOutput[output],NNCFLinear[dense],linear_0,NNCFLinear,402653184,1048576,"[1024, 1024]",1024.0,[1024],"[1, 384, 1024]",393216,"[1, 384, 1024]",393216
BertForQuestionAnswering,BertModel[bert],BertEncoder[encoder],ModuleList[layer],BertLayer[0],BertIntermediate[intermediate],NNCFLinear[dense],linear_0,,NNCFLinear,1610612736,4194304,"[4096, 1024]",4096.0,[4096],"[1, 384, 1024]",393216,"[1, 384, 4096]",1572864
BertForQuestionAnswering,BertModel[bert],BertEncoder[encoder],ModuleList[layer],BertLayer[0],BertOutput[output],NNCFLinear[dense],linear_0,,NNCFLinear,1610612736,4194304,"[1024, 4096]",1024.0,[1024],"[1, 384, 4096]",1572864,"[1, 384, 1024]",393216
BertForQuestionAnswering,BertModel[bert],BertEncoder[encoder],ModuleList[layer],BertLayer[1],BertAttention[attention],BertSelfAttention[self],NNCFLinear[query],linear_0,NNCFLinear,402653184,1048576,"[1024, 1024]",1024.0,[1024],"[1, 384, 1024]",393216,"[1, 384, 1024]",393216


In [None]:
# df['flop']=df.MAC*2
# df['total_mem']=df.input_numel+df.weight_numel+df.output_numel
# df['ai'] = df['flop']/df['total_mem']
# df.loc[df.index[6:8]].set_index(pd.Index(['mhsa', 'ffnn']))

In [18]:
df_fc = df[df.optype == 'NNCFLinear'][:-1]

In [21]:
len(df_fc), len(df_fc)/6

(144, 24.0)

In [28]:
df_fc['weight_numel'].sum()

301989888

In [38]:
(np.arange(16)+1) * 64

array([  64,  128,  192,  256,  320,  384,  448,  512,  576,  640,  704,
        768,  832,  896,  960, 1024])

In [53]:
# Attention head
for seqlen in (np.arange(6)+1) *64:
    for headgroup in (np.arange(16)+1) * 64:
        print("sparsity: {:.3f} | nhead: {:5} | (seqlen:{:4}, {:4} :ncol/nrow)".format(1-headgroup/1024, headgroup/64, seqlen, headgroup))
        if (seqlen*headgroup % 4096) > 0:
            print("invalid (seqlen, nhead): ({}, {})".format(seqlen, headgroup/64))

sparsity: 0.938 | nhead:   1.0 | (seqlen:  64,   64 :ncol/nrow)
sparsity: 0.875 | nhead:   2.0 | (seqlen:  64,  128 :ncol/nrow)
sparsity: 0.812 | nhead:   3.0 | (seqlen:  64,  192 :ncol/nrow)
sparsity: 0.750 | nhead:   4.0 | (seqlen:  64,  256 :ncol/nrow)
sparsity: 0.688 | nhead:   5.0 | (seqlen:  64,  320 :ncol/nrow)
sparsity: 0.625 | nhead:   6.0 | (seqlen:  64,  384 :ncol/nrow)
sparsity: 0.562 | nhead:   7.0 | (seqlen:  64,  448 :ncol/nrow)
sparsity: 0.500 | nhead:   8.0 | (seqlen:  64,  512 :ncol/nrow)
sparsity: 0.438 | nhead:   9.0 | (seqlen:  64,  576 :ncol/nrow)
sparsity: 0.375 | nhead:  10.0 | (seqlen:  64,  640 :ncol/nrow)
sparsity: 0.312 | nhead:  11.0 | (seqlen:  64,  704 :ncol/nrow)
sparsity: 0.250 | nhead:  12.0 | (seqlen:  64,  768 :ncol/nrow)
sparsity: 0.188 | nhead:  13.0 | (seqlen:  64,  832 :ncol/nrow)
sparsity: 0.125 | nhead:  14.0 | (seqlen:  64,  896 :ncol/nrow)
sparsity: 0.062 | nhead:  15.0 | (seqlen:  64,  960 :ncol/nrow)
sparsity: 0.000 | nhead:  16.0 | (seqlen

In [58]:
# FFNN
for seqlen in (np.arange(6)+1) *64:
    for ffnn_dim in (np.arange(4096)+1):
        if (seqlen*ffnn_dim % 4096) == 0:
            # print("(seqlen:{:4}, {:4} :ncol/nrow)".format(seqlen, ffnn_dim))
            print("sparsity: {:.3f} | ffnn: {:5} | (seqlen:{:4}, {:4} :ncol/nrow)".format(1-ffnn_dim/4096, ffnn_dim, seqlen, ffnn_dim))
            

sparsity: 0.984 | ffnn:    64 | (seqlen:  64,   64 :ncol/nrow)
sparsity: 0.969 | ffnn:   128 | (seqlen:  64,  128 :ncol/nrow)
sparsity: 0.953 | ffnn:   192 | (seqlen:  64,  192 :ncol/nrow)
sparsity: 0.938 | ffnn:   256 | (seqlen:  64,  256 :ncol/nrow)
sparsity: 0.922 | ffnn:   320 | (seqlen:  64,  320 :ncol/nrow)
sparsity: 0.906 | ffnn:   384 | (seqlen:  64,  384 :ncol/nrow)
sparsity: 0.891 | ffnn:   448 | (seqlen:  64,  448 :ncol/nrow)
sparsity: 0.875 | ffnn:   512 | (seqlen:  64,  512 :ncol/nrow)
sparsity: 0.859 | ffnn:   576 | (seqlen:  64,  576 :ncol/nrow)
sparsity: 0.844 | ffnn:   640 | (seqlen:  64,  640 :ncol/nrow)
sparsity: 0.828 | ffnn:   704 | (seqlen:  64,  704 :ncol/nrow)
sparsity: 0.812 | ffnn:   768 | (seqlen:  64,  768 :ncol/nrow)
sparsity: 0.797 | ffnn:   832 | (seqlen:  64,  832 :ncol/nrow)
sparsity: 0.781 | ffnn:   896 | (seqlen:  64,  896 :ncol/nrow)
sparsity: 0.766 | ffnn:   960 | (seqlen:  64,  960 :ncol/nrow)
sparsity: 0.750 | ffnn:  1024 | (seqlen:  64, 1024 :nco