# Machine Learning Benchmarking

Back propagation benchmarking 

Raw implantations

In [38]:
import numpy as np
import time
def sigmoid(inputVector):
    return np.exp(inputVector)/(1+np.exp(inputVector))
def relu(inputVector):
    inputVector[inputVector <= 0] = 0
    return inputVector
def sigmoid_derivative(inputVector):
    return inputVector*(1 - inputVector)
def relu_derivative(inputVector):
    inputVector[inputVector > 0] = 1
    inputVector[inputVector <= 0] = 0
    return inputVector
    
def training_func_relu(inputSample, label, startTime, LEARNING_RATE = 0.0001, epoches = 1000, print_loss = False):
    # Raw Implentation: Relu & Mean-squared
    #32 to 32 --> relu --> 32 to 8 --> relu --> 8 to 1 --> 1-dimension label
    errorLog = []
    firstLayerWeight = np.random.rand(32,32)
    firstLayerBias = np.random.rand(1,1)
    secondLayerWeight = np.random.rand(32,8)
    secondLayerBias = np.random.rand(1,1)
    thirdLayerWeight = np.random.rand(8,1)
    thirdLayerBias = np.random.rand(1,1)
    while epoches > 0:
        #Forward Propagation
        firstLayerOutput_raw = np.dot(inputSample, firstLayerWeight) + firstLayerBias
        firstLayerOutput_activation = relu(firstLayerOutput_raw)
        secondLayerOutput_raw = np.dot(firstLayerOutput_activation, secondLayerWeight) + secondLayerBias
        secondLayerOutput_activation = relu(secondLayerOutput_raw)
        thirdLayerOutput = np.dot(secondLayerOutput_activation, thirdLayerWeight) + thirdLayerBias
        # Error calculation
        error = 0.5*(thirdLayerOutput - label)**2
        # Back Propagation
        deriativeThirdLayer = (thirdLayerOutput - label)*secondLayerOutput_activation.transpose()
        deriativeSecondLayer = (thirdLayerOutput - label)*thirdLayerWeight.transpose()*relu_derivative(secondLayerOutput_raw)*firstLayerOutput_activation.transpose()
        deriativeFirstLayer = (thirdLayerOutput - label)*thirdLayerWeight.transpose()*relu_derivative(secondLayerOutput_raw)*secondLayerWeight*(relu_derivative(firstLayerOutput_raw)).transpose()
        deriativeFirstLayer = deriativeFirstLayer.sum(axis = 1)*inputSample.transpose()
        #Weight updates
        firstLayerWeight = firstLayerWeight - deriativeFirstLayer*LEARNING_RATE
        secondLayerWeight = secondLayerWeight - deriativeSecondLayer*LEARNING_RATE
        thirdLayerWeight = thirdLayerWeight - deriativeThirdLayer*LEARNING_RATE
        #Run epoch = 100
        epoches -= 1
        errorLog.append(error)
        if print_loss == False:
            continue
        else:
            print('Current Error:', error)
    print(time.time() - startTime)

In [45]:
from multiprocessing import Process
batchNum = 4
np.random.seed(20)
inputSamples = np.random.rand(batchNum,1,32)
labels = np.random.rand(batchNum,1,1)
if __name__ == '__main__':
    startTime = time.time()
    for i in range(batchNum):
        p = Process(target = training_func_relu, args = (inputSamples[i], labels[i], startTime))
        p.start()

0.07369589805603027
0.07944393157958984
0.08042073249816895
0.08733081817626953


In [None]:
if __name__ == '__main__':
    startTime = time.time()
    for i in range(batchNum):
        training_func_relu(inputSamples[i], labels[i], startTime)

In [None]:
np.random.seed(20)
inputSample = np.random.rand(32,1,32)
label = np.random.rand(32,1,1)

In [None]:
inputSamples[i].shape

Implantation with frameworks 

Pytorch using relu

In [3]:
import torch
import time
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class testNet1(nn.Module):

    def __init__(self):
        super(testNet1, self).__init__()
        self.fc1 = nn.Linear(32, 32)
        self.fc2 = nn.Linear(32, 8)
        self.fc3 = nn.Linear(8, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = testNet1()

Non-Batch Processing

In [4]:
batchNum = 128
#MSE loss function
criterion = nn.MSELoss()
# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.0001)
inputVector = torch.randn(batchNum, 1, 1, 1, 32)
target = torch.randn(batchNum, 1, 1, 1, 1)
startTime = time.time()
for i in range(1000):
    for k in range(batchNum):
        optimizer.zero_grad()   # zero the gradient buffers
        output = net(inputVector[k])
        loss = criterion(output, target[k])
        loss.backward()
        optimizer.step()
    endTime = time.time()
    print(endTime - startTime)

0.044406890869140625
0.08062386512756348
0.1151738166809082
0.14807987213134766
0.17965102195739746
0.21108078956604004
0.24225473403930664
0.27547287940979004
0.30689573287963867
0.33834290504455566
0.3717339038848877
0.40358591079711914
0.43546581268310547
0.46640777587890625
0.50152587890625
0.5372879505157471
0.5722579956054688
0.6064238548278809
0.6392266750335693
0.6744449138641357
0.7099578380584717
0.7449278831481934
0.7829890251159668
0.8148939609527588
0.84686279296875
0.8780348300933838
0.909550666809082
0.9424219131469727
0.9737768173217773
1.0045297145843506
1.0357780456542969
1.0670788288116455
1.0983529090881348
1.129401683807373
1.1613869667053223
1.193342924118042
1.2251167297363281
1.2568156719207764
1.288132667541504
1.3239428997039795
1.3593189716339111
1.3945956230163574
1.4267308712005615
1.4595658779144287
1.49131178855896
1.5225508213043213
1.553699016571045
1.5854136943817139
1.6204087734222412
1.6538989543914795
1.685492992401123
1.7165207862854004
1.747493743

In [18]:
batchNum = 1024+64
#MSE loss function
criterion = nn.MSELoss()
# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.0001)
inputVector = torch.randn(batchNum, 1,1, 1, 32)
target = torch.randn(batchNum, 1,1, 1, 1)
startTime = time.time()
for i in range(1000):
    optimizer.zero_grad()   # zero the gradient buffers
    output = net(inputVector)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
endTime = time.time()
print(endTime - startTime)

0.853369951248169


Keras

In [10]:
import keras
import numpy as np
import time
from keras.models import Sequential
from keras.layers import Dense
np.random.seed(20)
inputSample = np.random.rand(1,32)
label = np.random.rand(1,1)

Keras using relu

In [11]:
model = Sequential()
model.add(Dense(units=32, activation='relu', input_dim=32))
model.add(Dense(units=8, activation='relu'))
model.add(Dense(units=1))
model.compile(loss=keras.losses.mean_squared_error,
              optimizer=keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True))
startTime = time.time()
model.fit(inputSample, label, epochs=1000, batch_size=1, verbose=0)
endTime = time.time()
print(endTime - startTime)

0.7702760696411133
