In [None]:
def conv_size_out(size_in, kern, stride):
    pad = 0
    dilation = 1
    return (size_in + 2*pad - dilation*(kern - 1) - 1) // stride + 1

def avg_size_out(size_in, kern, stride):
    pad = 0
    return (size_in + 2*pad - kern) // stride + 1

def max_size_out(size_in, kern, stride):
    pad = 0
    dilation = 1
    return (size_in + 2*pad - dilation*(kern - 1) - 1) // stride + 1

In [None]:
import torch 
import torch.nn as nn
import torch.nn.functional as F

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        
        self.model = nn.Sequential(            
#             nn.MaxPool2d(4,2),
            nn.Flatten(),
#             nn.Linear(in_features=225, out_features=10)
        )

    def forward(self, x):
        logits = self.model(x)
        probs = F.softmax(logits, dim=1)
        return logits, probs

In [None]:
x = torch.rand(100,1,32,32)
y = torch.randint(low=0,high=9,size=(100,))

In [None]:
train_dataset = torch.utils.data.TensorDataset(x, y)

train_loader = torch.utils.data.DataLoader(
        dataset = train_dataset,
        batch_size = 32,
        shuffle = True
    )

In [None]:
def profile(model, train_loader):    
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
    
    def train(epochs):
        total_step = len(train_loader)
        print(total_step)

        for epoch in range(epochs):
            for i, (images, labels) in enumerate(train_loader):
                # Forward pass
                outputs = model(images)            
                loss = criterion(outputs, labels)

                # Backward and optimize
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                if (i+1) % 10 == 0:
                    print ('Epoch [{}/{}], Step [{}/{}], Loss: {}' 
                           .format(epoch+1, epochs, i+1, total_step, loss))
    
    EPOCHS = 1
    with torch.autograd.profiler.profile() as prof:
        train(EPOCHS)
        
    return prof.key_averages()

In [None]:
ls = profile(Model().model,train_loader)

In [None]:
print(ls)

In [None]:
for x in ls:
    print(x.key)
    print()

In [None]:
model = Model().model

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)

In [None]:
for i, (images, labels) in enumerate(train_loader):
    # Forward pass
    with torch.autograd.profiler.profile() as prof:
        outputs = model(images)
    break

    loss = criterion(outputs, labels)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    break



In [None]:
print(prof.key_averages())

In [None]:
for k in prof.key_averages():
    print(k.key)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers  
from tensorflow.keras.layers import Dense, Flatten

from numpy.random import RandomState as R

seed = 42

def give(dim, n, channels):
    ds_size = 1024
    out_size = 10
    if dim == 1:
        x = R(seed).random((ds_size, n, channels))
        x = x.reshape(x.shape[0], n, channels)
    else:
        x = R(seed).random((ds_size, n, n, channels))
        x = x.reshape(x.shape[0], n, n, channels)
    
    y = R(seed).randint(0,out_size,ds_size)
    y = tf.keras.utils.to_categorical(y, out_size)
    
    return x,y

In [None]:
opt = tf.keras.optimizers.SGD(learning_rate=0.01)
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

def profile(model, x, y, batch, epochs):
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.batch(batch)
    
    EPOCHS = epochs
    prof_file = 'out_tflow.csv'
    logdir = 'logs'

    with tf.profiler.experimental.Profile(logdir):
        model.fit(dataset, epochs = EPOCHS)
        pass

    return prof_file
    
class Model:
    def create(self):
        model = Sequential()
        model.add( 
            Conv2D(filters = 3, kernel_size = 4, strides = 2)
        )
        model.add( Flatten(name='FLATTEN') )
        model.add( Dense(units = 10, name='FINAL_DENSE') )
        model.compile(loss = loss, optimizer = opt, metrics=['accuracy'])
        self.model = model

Model = Model()
Model.create()

x,y = give(2, 32, 1)

In [None]:
prof = profile(Model.model, x, y, 32, 1)

In [None]:
import data_parser as dp

In [None]:
the_dir = ['intermediate_results', 'ptorch']

In [None]:
import os

In [None]:
dct = dp.load(os.path.join(*the_dir, 'dense.torch'))

In [None]:
for x,y in dct.items():
    break

In [None]:
y.sort_values(by='CPU total (us)')

In [1]:
import data_parser as dp

In [2]:
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [3]:
dct = dp.load('profiling_results/vms_1/__avg1d.tflow')

In [7]:
for k in dct.values():
    break

In [9]:
k.sort_values(by='Operation', ascending=False)

Unnamed: 0,Type,Operation,#Occurrences,Avg. self-time (us),Total self-time (us),Avg. time (us),Total time (us)
59,StridedSlice,strided_slice,20,24.314,486.28,24.314,486.28
55,ReadVariableOp,sequential/FINAL_DENSE/MatMul/ReadVariableOp,20,5.14555,102.911,5.14555,102.911
54,ReadVariableOp,sequential/FINAL_DENSE/BiasAdd/ReadVariableOp,20,2.92005,58.401,2.92005,58.401
70,_FusedMatMul,sequential/FINAL_DENSE/BiasAdd,20,2143.5195,42870.39,2143.5195,42870.39
19,Cast,sequential/Cast,20,79.07415,1581.483,79.07415,1581.483
12,AvgPool,sequential/AVG1D/AvgPool,20,374.6834,7493.668,374.6834,7493.668
37,MatMul,gradient_tape/sequential/FINAL_DENSE/MatMul,20,49.992,999.84,49.992,999.84
14,BiasAddGrad,gradient_tape/sequential/FINAL_DENSE/BiasAdd/BiasAddGrad,20,11.0096,220.192,11.0096,220.192
28,DivNoNan,gradient_tape/categorical_crossentropy/weighted_loss/value/div_no_nan,20,2.11085,42.217,2.11085,42.217
67,Tile,gradient_tape/categorical_crossentropy/weighted_loss/Tile_1,20,6.5807,131.614,6.5807,131.614


In [5]:
t=k[['Type', 'Total self-time (us)']].groupby(by='Type').sum()

In [6]:
t.sort_values(by='Total self-time (us)', ascending=False)

Unnamed: 0_level_0,Total self-time (us)
Type,Unnamed: 1_level_1
IDLE,1134280.877
_FusedMatMul,42870.39
Dataset,13923.1
OptimizeDataset,12698.664
AvgPool,7493.668
SoftmaxCrossEntropyWithLogits,2523.884
Cast,1778.266
ArgMax,1121.439
DeleteIterator,1079.745
MatMul,999.84
