## Data Analysis

After running the epa script and saving the cleaned data to disk, it is ready to be analyzed. This notebook contains a variety of visualizations and tables to help get a better understanding of the data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import os
import re

### Restore the Saved Data from Disk

In [9]:
# the default location eda_notebook saves 
data_location = "../../data/"
conv2d_kernels = pd.read_csv(filepath_or_buffer = (data_location + "conv2d_kernels.csv"), index_col="op_name")
conv2d_layers = pd.read_csv(filepath_or_buffer = (data_location + "conv2d_layers.csv"), index_col="op_name")

# visual check that the data was loaded properly
print("Kernels DF shape: ", conv2d_kernels.shape)
print("Layers DF shape: ", conv2d_layers.shape)

Kernels DF shape:  (4337, 9)
Layers DF shape:  (1380, 5)


### Visualizing the Data 

In [None]:
conv2D_kinfo_s1k1 = conv2D_kinfo.where((conv2D_kinfo["stride_length"] == 1) & (conv2D_kinfo["kernel_size"] == 1))
conv2D_kinfo_s1k1.plot.scatter(x="filter_count",
                               y="batch_size",
                               c="total_duration_us",
                               colormap='viridis')
plt.title("Filter Count vs. Convolution Execution Time for 1-layer ConvNet")
#plt.ylim(0, 100)
#plt.xlim(0, 322)
plt.show()

In [None]:
conv2D_k3 = conv2D_kinfo[(conv2D_kinfo["kernel_size"] == 3) & (conv2D_kinfo["filter_count"] < 6)& (conv2D_kinfo["stride_length"] <4)]
conv2D_k3

In [None]:
fig = plt.figure()
ax = plt.axes(projection="3d")

ax.scatter(xs=conv2D_k3["stride_length"], 
           ys=conv2D_k3["filter_count"], 
           zs=conv2D_k3["batch_size"],
           c=conv2D_k3["total_duration_us"],
           cmap='viridis')

ax.set_xlabel('Stride Length')
ax.set_xticks(np.arange(1, 4, 1))
ax.set_ylabel('Filter Count')
ax.set_yticks(np.arange(1, 11, 1))
ax.set_zlabel('Batch Size')

plt.show()

In [None]:
conv2D_k3_b64 = conv2D_k3.where((conv2D_k3["batch_size"] == 310))
conv2D_k3_b64.head()

In [None]:
fig = plt.figure()
ax = plt.axes(projection="3d")

ax.plot_wireframe(X=conv2D_k3_b64["stride_length"], 
           Y=conv2D_k3_b64["filter_count"], 
           Z=conv2D_k3_b64["total_duration_us"],
           cmap='viridis')

ax.set_xlabel('Stride Length')
ax.set_xticks(np.arange(1, 4, 1))
ax.set_ylabel('Filter Count')
ax.set_yticks(np.arange(1, 11, 1))
ax.set_zlabel('Total Run Duration')

plt.show()