In [1]:
import data_parser as dp
import numpy as np
import os

In [2]:
names = [
    'conv', 
    'avg',
    'max',
    'dense',
    'norm',
    'drop',
    'relu',
    'tanh'
]

names1d = list(map(lambda x: x + '1d.tflow', names))
names2d = list(map(lambda x: x + '2d.tflow', names))

source_dir = ['profiling_results', 'vms_1']
the_dir = ['intermediate_results', 'tflow']

In [3]:
# Preprocess 1
#remove idle from every single file, and rename files
def no_idle(df):
    df = df[df['Operation']!='IDLE']
    
    return df

for name in names1d+names2d:
    dct = dp.load(os.path.join(*source_dir, '__'+name))
    print(name)
    
    new_dct = {}
    for key,value in dct.items():
        new_dct[key] = no_idle(value)

    dp._save(new_dct, os.path.join(*the_dir, name))

conv1d.tflow
avg1d.tflow
max1d.tflow
dense1d.tflow
norm1d.tflow
drop1d.tflow
relu1d.tflow
tanh1d.tflow
conv2d.tflow
avg2d.tflow
max2d.tflow
dense2d.tflow
norm2d.tflow
drop2d.tflow
relu2d.tflow
tanh2d.tflow


In [4]:
# Preprocess 2
# dense layers dont depend on 'channels' or 'dim', but only on the number of features

# dense1d, dense2d -> dense
dictionary_ls = [dp.load(os.path.join(*the_dir, 'dense1d.tflow')), 
                 dp.load(os.path.join(*the_dir, 'dense2d.tflow'))]

updated_dictionary = {}

for dim, dictionary in enumerate(dictionary_ls,1):
    for fz_key, value in dictionary.items():
        dct_key = dp.from_key(fz_key)
        
        new_fz_key = dp.my_key({
            'batch':dct_key['batch'],
            'nodes':dct_key['nodes'],
            'units':dct_key['units'],
            'channels':dct_key['channels'],
            'numf':(dct_key['numf']**dim),
            'old_numf':dct_key['numf'],
            'old_dim':dim
        })
        
        updated_dictionary[new_fz_key] = value

dp._save(updated_dictionary, os.path.join(*the_dir, 'dense.tflow'))

In [5]:
tf_map = {
    'conv1d': ['CONV1D'],
    
    'conv2d':['CONV2D'],
    
    'avg1d': ['AVG1D'],
    
    'avg2d': ['AVG2D'],
    
    'max1d': ['MAX1D'],
    
    'max2d': ['MAX2D'],
    
    'dense': ['DENSE1D', 'DENSE2D'],
    
    'norm1d': ['NORM1D'],
    
    'norm2d': ['NORM2D'],
    
    'drop1d': ['DROP1D'],
    
    'drop2d': ['DROP2D'],
    
    'relu1d': ['RELU1D'],
    
    'relu2d': ['RELU2D'],
    
    'tanh1d': ['TANH1D'],
    
    'tanh2d': ['TANH2D']
}

def total_on(df, words, column='Operation'):
    mask = df[column].apply(dp.check(words))
    return df[mask]['Total self-time (us)'].sum()

In [6]:
target_dir = ['database', 'tflow']

In [7]:
# Part 1 - Database
for name, ops in tf_map.items():
    dct = dp.load(os.path.join(*the_dir, name + '.tflow'))
    print(name)
    
    tf_db = {}
    for key, value in dct.items():
        batch = dp.from_key(key)['batch']
        dataset_size = 1024
        steps_per_epoch = dataset_size//batch
        epochs = 5
        steps = epochs*steps_per_epoch
#         steps = 1
        
        tf_db[key] = total_on(value, ops)/steps
        
    dp._save(tf_db, os.path.join(*target_dir, name + '.tflow_db'))

conv1d
conv2d
avg1d
avg2d
max1d
max2d
dense
norm1d
norm2d
drop1d
drop2d
relu1d
relu2d
tanh1d
tanh2d


In [None]:
# Part 2 - Dataset DB
def create_db(files):
    db = {}
    for file in files:
        dct = dp.load(os.path.join(*the_dir, file))
        print(file)

        for fz_key, value in dct.items():
            key = dp.from_key(fz_key)

            batch = key['batch']
            dataset_size = 1024
            steps_per_epoch = dataset_size//batch
            epochs = 5
            steps = epochs*steps_per_epoch
#             steps = 1

            key['file'] = file
            db[dp.my_key(key)] = total_on(value, ['Dataset'], 'Type')/steps
    
    return db

ds_1d = create_db(names1d)
ds_2d = create_db(names2d)

dp._save(ds_1d, os.path.join(*target_dir, 'dataset1d.tflow_db'))
dp._save(ds_2d, os.path.join(*target_dir, 'dataset2d.tflow_db'))

# No flatten ops weren't recorded, so don't create a predictor