In [1]:
import numpy as np 

In [2]:
def relu_func(x):
    return np.maximum(0, x)

def relu_deriv(x):
    return (x > 0).astype(float)

def tanh_func(x):
    return np.tanh(x)

def tanh_deriv(x):
    return 1 - np.tanh(x)**2

def sigmoid_func(x):
    x = np.clip(x, -250, 250)
    return 1 / (1 + np.exp(-x))

def sigmoid_deriv(x):
    s = sigmoid_func(x)
    return s * (1 - s)

activations = {
    'relu': (relu_func, relu_deriv),
    'tanh': (tanh_func, tanh_deriv), 
    'sigmoid': (sigmoid_func, sigmoid_deriv)
}

In [3]:
def run_NN(input,W0,W1,b0,b1,func):
    input_array=np.array(list(input)).reshape(-1, 1)
    
    return W1.dot(activations[func][0](W0.dot(input_array) + b0)) + b1

In [4]:
def calculate_MSE(W0,W1,b0,b1,inputs,func):
    results=[]
    for i in inputs:
        r=run_NN(i[0],W0,W1,b0,b1,func)-i[1]
  
        results.append(float(r**2))
     
    return sum(results)/len(results)

In [5]:
class derivatives():

    def __init__(self,inpt,W0,W1,b0,b1,func):
        self.W0=W0
        self.W1=W1
        self.b0=b0
        self.b1=b1
        self.inp=np.array(inpt[0]).reshape(2,1)
        self.activ_f= activations[func][0]
        self.d_activ_f= activations[func][1]

        self.yhat=W1.dot(self.activ_f(W0.dot(self.inp) + b0)) + b1
        self.y=inpt[1]
        self.loss=(self.yhat-self.y)**2


    #
    def calc_dlb1(self):
        self.dlb1=[float(2*(self.yhat-self.y))]
        return  self.dlb1
    #
    def calc_dlW1(self):
        self.dlW1=(self.dlb1[0]*self.activ_f(self.W0.dot(self.inp)+self.b0)).flatten()
        
        return list(self.dlW1)


    def calc_dlb0(self):
        gradients = np.multiply(self.d_activ_f(self.W0.dot(self.inp)+self.b0),self.W1.T*self.dlb1[0])
      
        self.dlb0=gradients
        
        gradients=gradients.flatten()
     
        return list(gradients)

    def calc_dlW0(self):
        return list((self.dlb0.dot(self.inp.T)).flatten())

    def construct_gradients(self):

        return np.array(self.calc_dlb1() + self.calc_dlW1() + self.calc_dlb0() +self.calc_dlW0())
    
        


In [37]:
#param init
inputs=[((0,0),0),((0,1),1),((1,0),1),((1,1),0)]

def train(a,func):
    W0 = np.random.randn(2, 2) * 0.1
    b0 = np.zeros((2, 1))
    W1 = np.random.randn(1, 2) * 0.1  
    b1 = np.zeros((1, 1))

    inputs=[((0,0),0),((0,1),1),((1,0),1),((1,1),0)]

    while calculate_MSE(W0,W1,b0,b1,inputs,func) > 0.001: 
        print(calculate_MSE(W0,W1,b0,b1,inputs,func))    
        sums=[]
        for i in inputs:
            backprop=derivatives(i,W0,W1,b0,b1,func)  
            grads=backprop.construct_gradients()
            sums.append(np.array(grads))
        
        sums=np.array(sums)
        average=np.mean(sums,axis=0)
        
        b1 = b1 - a * average[0].reshape(b1.shape)
        W1 = W1 - a * average[1:3].reshape(W1.shape) 
        b0 = b0 - a * average[3:5].reshape(b0.shape)
        W0 = W0 - a * average[5:9].reshape(W0.shape)
    print(calculate_MSE(W0,W1,b0,b1,inputs,func))
    return(b1,W1,b0,W0)

relu_weights=train(0.01,"relu")
print(relu_weights)

  results.append(float(r**2))
  self.dlb1=[float(2*(self.yhat-self.y))]


0.4988646754705267
0.48874081845766276
0.47902718058325017
0.4697070128322024
0.4607642628785286
0.45218354473031575
0.4439501098330347
0.4360498195482199
0.4284691189304589
0.4211950117310017
0.41421503656120934
0.4075172441535464
0.40109017566194294
0.39492284194712574
0.3890047037959931
0.38332565302730426
0.37787599443890335
0.37264642855441943
0.36762803512990394
0.36281225738319395
0.35819088691095563
0.35375604926036436
0.34950019012424705
0.34541606213024634
0.34149671219618327
0.3377354694253077
0.3341259335165295
0.33066196366604916
0.3273376679380327
0.32414739308313584
0.32108571478476616
0.3181474283139861
0.315327539574923
0.3126212565234424
0.3100239809426957
0.30753130055994593
0.30513898148982904
0.3028429609899173
0.3006393405151215
0.2985243790581025
0.2964944867634596
0.29454621880403187
0.2926762695081836
0.29088146672745463
0.28915876643443594
0.2875052475411924
0.2859181069289815
0.2843946546804376
0.2829323095057741
0.28152877287172234
0.28018181945033616
0.2788

In [39]:
for i in inputs:
    b1,W1,b0,W0=relu_weights
    prediction=run_NN(i[0],W0,W1,b0,b1,"relu")
    print(f'Predicted score for input {i[0]} is {int(np.round(prediction))}')

Predicted score for input (0, 0) is 0
Predicted score for input (0, 1) is 1
Predicted score for input (1, 0) is 1
Predicted score for input (1, 1) is 0


  print(f'Predicted score for input {i[0]} is {int(np.round(prediction))}')


In [42]:
sig_weights=train(0.1,"sigmoid")
print(sig_weights)

  results.append(float(r**2))
  self.dlb1=[float(2*(self.yhat-self.y))]


0.5633439430068695
0.40463374542480524
0.32636993202727876
0.28771058913780356
0.2686123632136958
0.25918196383836445
0.25452813736649915
0.25223276765835245
0.2511011494636204
0.2505434552232865
0.25026867877519154
0.25013332187624404
0.2500666531353572
0.2500338191921567
0.25001764967069773
0.2500096870187916
0.25000576581246553
0.2500038347109555
0.25000288355271727
0.2500024149149949
0.25000218386468026
0.25000206979823236
0.25000201333212907
0.2500019852270677
0.25000197108629624
0.2500019638212324
0.25000195994166063
0.2500019577291045
0.2500019563374408
0.25000195535006636
0.2500019545618596
0.2500019538718261
0.25000195323023966
0.2500019526126167
0.25000195200690256
0.25000195140716197
0.25000195081047244
0.250001950215395
0.25000194962122124
0.2500019490276023
0.25000194843436635
0.25000194784142865
0.2500019472487475
0.2500019466563024
0.250001946064083
0.2500019454720844
0.2500019448803039
0.25000194428874034
0.2500019436973929
0.2500019431062614
0.2500019425153454
0.250001

In [43]:
for i in inputs:
    b1,W1,b0,W0=sig_weights
    prediction=run_NN(i[0],W0,W1,b0,b1,"sigmoid")
    print(f'Predicted score for input {i[0]} is {int(np.round(prediction))}')

Predicted score for input (0, 0) is 0
Predicted score for input (0, 1) is 1
Predicted score for input (1, 0) is 1
Predicted score for input (1, 1) is 0


  print(f'Predicted score for input {i[0]} is {int(np.round(prediction))}')


In [44]:
tahn_weights=train(0.1,"tanh")
print(tahn_weights)


  results.append(float(r**2))
  self.dlb1=[float(2*(self.yhat-self.y))]


0.49726989111762504
0.4068669528907594
0.34942062034752974
0.3129427278694623
0.289805574928849
0.2751489302591967
0.26587637802032577
0.2600171946011573
0.2563189058001042
0.25398678122038265
0.2525173390348834
0.25159208018762425
0.25100978912920413
0.25064349032659583
0.2504131342594578
0.2502682958011673
0.250177232069164
0.25011997160181765
0.2500839546133954
0.2500612852202497
0.2500470009739999
0.25003798375350017
0.2500322746603754
0.2500286432302162
0.25002631664452696
0.250024809561482
0.2500238171999628
0.25002314817614657
0.2500226823176602
0.2500223441690296
0.250022086353271
0.25002187912372875
0.25002170380342326
0.25002154866561516
0.2500214063472868
0.25002127222515064
0.25002114339601605
0.2500210180366158
0.2500208950016797
0.2500207735715922
0.2500206532939649
0.25002053388417333
0.2500204151629122
0.2500202970169929
0.25002017937473225
0.2500200621905025
0.2500199454350318
0.25001982908931664
0.25001971314079907
0.2500195975809691
0.2500194824038583
0.2500193676050

In [45]:
for i in inputs:
    b1,W1,b0,W0=tahn_weights
    prediction=run_NN(i[0],W0,W1,b0,b1,"tanh")
    print(f'Predicted score for input {i[0]} is {int(np.round(prediction))}')

Predicted score for input (0, 0) is 0
Predicted score for input (0, 1) is 1
Predicted score for input (1, 0) is 1
Predicted score for input (1, 1) is 0


  print(f'Predicted score for input {i[0]} is {int(np.round(prediction))}')
