In [None]:
import data_parser as dp
import numpy as np
import os

In [None]:
names = [
    'alone',
    'conv', 
    'avg',
    'max',
    'dense',
    'norm',
    'drop',
    'relu',
    'tanh'
]

names1d = list(map(lambda x: x + '1d.tflow', names))
names2d = list(map(lambda x: x + '2d.tflow', names))

source_dir = ['profiling_results', 'vms_1']
the_dir = ['intermediate_results/tflow']

In [None]:
# PART 1
#remove idle from every single file
def no_idle(df):
    df = df[df['Operation']!='IDLE']
    
    return df

for name in names1d+names2d:
    dct = dp.load(os.path.join(*source_dir, '__'+name))
    print(name)
    
    new_dct = {}
    for key,value in dct.items():
        new_dct[key] = no_idle(value)

    dp._save(new_dct, os.path.join(*the_dir, name))

dct = dp.load(os.path.join(*source_dir, '__alone1d.tflow'))
dp._save(dct, os.path.join(*the_dir, 'flatten1d.tflow'))

dct = dp.load(os.path.join(*source_dir, '__alone2d.tflow'))
dp._save(dct, os.path.join(*the_dir, 'flatten2d.tflow'))

In [None]:
# PART 2
# dense layers dont depend on 'channels' or 'dim', but only on the number of features

# dense1d, dense2d -> dense
dictionary_ls = [dp.load(os.path.join(*the_dir, 'dense1d.tflow')), 
                 dp.load(os.path.join(*the_dir, 'dense2d.tflow'))]

updated_dictionary = {}

for dim, dictionary in enumerate(dictionary_ls,1):
    for fz_key, value in dictionary.items():
        dct_key = dp.from_key(fz_key)
        
        new_fz_key = dp.my_key({
            'batch':dct_key['batch'],
            'nodes':dct_key['nodes'],
            'units':dct_key['units'],
            'numf':(dct_key['numf']**dim)*dct_key['channels'],
            'old_numf':dct_key['numf'],
            'old_channels':dct_key['channels'],
            'old_dim':dim
        })
        
        updated_dictionary[new_fz_key] = value

In [None]:
dp._save(updated_dictionary, os.path.join(*the_dir, 'dense.tflow'))

In [None]:
tf_map = {
    'conv1d': ['CONV1D'],
    
    'conv2d':['CONV2D'],
    
    'avg1d': ['AVG1D'],
    
    'avg2d': ['AVG2D'],
    
    'max1d': ['MAX1D'],
    
    'max2d': ['MAX2D'],
    
    'dense': ['DENSE1D', 'DENSE2D'],
    
    'norm1d': ['NORM1D'],
    
    'norm2d': ['NORM2D'],
    
    'drop1d': ['DROP1D'],
    
    'drop2d': ['DROP2D'],
    
    'relu1d': ['RELU1D'],
    
    'relu2d': ['RELU2D'],
    
    'tanh1d': ['TANH1D'],
    
    'tanh2d': ['TANH2D'],
    
    'flatten1d': ['FLATTEN1D'],
    
    'flatten2d': ['FLATTEN2D'],
    
    'alone1d': [],
    
    'alone2d': []
}

def total_on(df, words):
    if len(words) == 0:
        return df['Total self-time (us)'].sum()
    
    mask = df['Operation'].apply(dp.check(words))
    return df[mask]['Total self-time (us)'].sum()

In [None]:
target_dir = ['database/tflow']

In [None]:
# PART 3
for name, ops in tf_map.items():
    dct = dp.load(os.path.join(*the_dir, name + '.tflow'))
    print(name)
    
    tf_db = {}
    for key, value in dct.items():
        batch = dp.from_key(key)['batch']
        dataset_size = 1024
        steps_per_epoch = dataset_size//batch
        epochs = 5
        
        steps = epochs*steps_per_epoch
        
        tf_db[key] = total_on(value, ops)/steps
        
    dp._save(tf_db, os.path.join(*target_dir, name + '.tflow_db'))

# No flatten ops weren't recorded, so don't create a predictor

In [None]:
df = dp.load(os.path.join(*target_dir, 'flatten2d.tflow_db'))

In [None]:
df

In [None]:
# PART 4