In [1]:
import data_parser as dp
import os

In [2]:
names = [
    'conv', 
    'avg',
    'max',
    'dense',
    'norm',
    'drop',
    'relu',
    'tanh'
]

names1d = list(map(lambda x: x + '1d.torch', names))
names2d = list(map(lambda x: x + '2d.torch', names))

source_dir = ['profiling_results', 'vms_1']
the_dir = ['intermediate_results', 'ptorch']

In [3]:
# Preprocess 1
# Just renaming files
for name in names1d+names2d:
    print(name)
    dct = dp.load(os.path.join(*source_dir, '__'+name))
    dp._save(dct, os.path.join(*the_dir, name))

dct = dp.load(os.path.join(*source_dir, '__alone1d.torch'))
dp._save(dct, os.path.join(*the_dir, 'flatten1d.torch'))

dct = dp.load(os.path.join(*source_dir, '__alone2d.torch'))
dp._save(dct, os.path.join(*the_dir, 'flatten2d.torch'))

conv1d.torch
avg1d.torch
max1d.torch
dense1d.torch
norm1d.torch
drop1d.torch
relu1d.torch
tanh1d.torch
conv2d.torch
avg2d.torch
max2d.torch
dense2d.torch
norm2d.torch
drop2d.torch
relu2d.torch
tanh2d.torch


In [4]:
# Preprocess 2
# dense layers dont depend on 'channels' or 'dim', but only on the number of features

# dense1d, dense2d -> dense
dictionary_ls = [dp.load(os.path.join(*the_dir, 'dense1d.torch')), 
                 dp.load(os.path.join(*the_dir, 'dense2d.torch'))]

updated_dictionary = {}

for dim, dictionary in enumerate(dictionary_ls,1):
    for fz_key, value in dictionary.items():
        dct_key = dp.from_key(fz_key)
        
        new_fz_key = dp.my_key({
            'batch':dct_key['batch'],
            'nodes':dct_key['nodes'],
            'units':dct_key['units'],
            'channels':dct_key['channels'],
            'numf':(dct_key['numf']**dim),
            'old_numf':dct_key['numf'],
            'old_dim':dim
        })
        
        updated_dictionary[new_fz_key] = value 

dp._save(updated_dictionary, os.path.join(*the_dir, 'dense.torch'))

In [5]:
# more complicated
pt_map_just = {
    
    'dense': [
        #Forward
        'addmm', 'mm'
        
        #Backward
        'AddmmBackward'
    ],
    
    'conv1d': [
        #Forward
        'conv1d', 
        #Backward
        'MkldnnConvolutionBackward'
    ],
    
    'conv2d':[
        #Forward
        'conv2d', 
        #Backward
        'MkldnnConvolutionBackward'
    ],
    
    'avg1d': [
        'avg_pool1d'
    ],
    
    'avg2d': [
        'avg_pool2d'
    ],
    
    'max1d': [
        'max_pool1d'
    ],
    
    'max2d': [
        'max_pool2d'
    ],
    
    'norm1d': [
        #Forward
        'batch_norm',
        #Backward
        'NativeBatchNormBackward'
    ],
    
    'norm2d': [
        #Forward
        'batch_norm',
        #Backward
        'NativeBatchNormBackward'
    ],
    
    'drop1d': [
        'dropout'
    ],
    
    'drop2d': [
        'feature_dropout'
    ],
    
    'tanh1d': [
        'tanh'
    ],
    
    'tanh2d': [
        'tanh'
    ],
    
    'relu1d': [
        'relu'
    ],
    
    'relu2d': [
        'relu'
    ],
    
    'flatten1d': [
        'flatten'
    ],
    
    'flatten2d': [
        'flatten'
    ]
}

def total_on_just(df, words):
    column = 'CPU total (us)'
    
    mask = df['Name'].apply(dp.check_just(words))
    return df[mask][column].sum()

In [6]:
target_dir = ['database', 'ptorch']

In [7]:
# Part 1
for name, ops in pt_map_just.items():
    dct = dp.load(os.path.join(*the_dir, name + '.torch'))
    print(name)
    
    pt_db = {}
    for key, value in dct.items():
        batch = dp.from_key(key)['batch']
        dataset_size = 1024
        steps_per_epoch = dataset_size//batch
        epochs = 5
        steps = epochs*steps_per_epoch
#         steps = 1
        
        # Compute time per step
        pt_db[key] = total_on_just(value.reset_index(), ops)/steps
        
    dp._save(pt_db, os.path.join(*target_dir, name + '.torch_db'))

dense
conv1d
conv2d
avg1d
avg2d
max1d
max2d
norm1d
norm2d
drop1d
drop2d
tanh1d
tanh2d
relu1d
relu2d
flatten1d
flatten2d


In [None]:
# Part 2 - Dataset DB
def create_db(files):
    db = {}
    for file in files:
        dct = dp.load(os.path.join(*the_dir, file))
        print(file)

        for fz_key, value in dct.items():
            key = dp.from_key(fz_key)

            batch = key['batch']
            dataset_size = 1024
            steps_per_epoch = dataset_size//batch
            epochs = 5
            steps = epochs*steps_per_epoch
#             steps = 1

            key['file'] = file
            db[dp.my_key(key)] = total_on_just(value.reset_index(), ['stack', 'select'])/steps
    
    return db

ds_1d = create_db(names1d)
ds_2d = create_db(names2d)

dp._save(ds_1d, os.path.join(*target_dir, 'dataset1d.torch_db'))
dp._save(ds_2d, os.path.join(*target_dir, 'dataset2d.torch_db'))