In [19]:
import torch
from torch import nn

In [20]:
import sys
import os

In [21]:
# Number of hidden layers
K=10
# number of input nodes
d=10

In [22]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 1, d, [2*d+1,K], 1

In [23]:
learning_rate=1e-4
epoches=10 

In [24]:
torch.manual_seed(2020)

<torch._C.Generator at 0x16e48db5a90>

In [25]:
x_train = torch.rand(1,D_in)
y_train= torch.mean(x_train*x_train) 

In [26]:
print(x_train)

tensor([[0.4869, 0.1052, 0.5883, 0.1161, 0.4949, 0.2824, 0.5899, 0.8105, 0.2512,
         0.6307]])


In [27]:
print(y_train)

tensor(0.2398)


In [28]:
class Q1NN:
    def __init__(self,N, D_in, H, D_out ):
        
        # nn_architecture 
        
        # parameters
        self.batchSize=N
        self.inputSize = D_in 
        self.hiddenLayer = H
        self.outputSize = D_out
        
        # weights & bias
        # dictionary of parameters
        self.layers=[]   
        
        """
        weight=torch.rand((D_in, H[0]), dtype=torch.float)
        bias=torch.rand(H[0],dtype=torch.float)
        output=torch.rand(H[0],dtype=torch.float)
        dw = torch.zeros((D_in, H[0]), dtype=torch.float)
        db =torch.zeros(H[0],dtype=torch.float)
        self.layers.append({"weight":weight,"bias":bias,"output":output,"dw":dw,"db":db})
        """
        
        for ii in range(0, H[1]):
            
            # input dimention of the 1st layer
            tmp_in = D_in  if ii == 0 else H[0]
            # output dimention of the last layer
            tmp_out= D_out if ii == H[1]-1 else H[0]
        
            # option _1 : weights and bias 
            #weight=torch.zeros((tmp_in, tmp_out), dtype=torch.float)
            #bias=torch.ones(tmp_out,dtype=torch.float)
            
            # option _2 : weights and bias
            weight=torch.rand((tmp_in, tmp_out), dtype=torch.float)
            bias=torch.rand(tmp_out,dtype=torch.float)
            
            
            #print("weight={}".format(weight))
            #print("bias={}".format(bias))
            
            # output of that neuron
            ll = torch.zeros(tmp_out,dtype=torch.float)
            output= torch.zeros(tmp_out,dtype=torch.float)
            # 
            dw = torch.zeros((tmp_in, tmp_out), dtype=torch.float)
            db = torch.zeros(tmp_out,dtype=torch.float) 
            self.layers.append({"weight":weight,"bias":bias,"ll":ll,"output":output,"dw":dw,"db":db})
        
        """
        # the Last Layer
        weight=torch.rand((H[0], D_out), dtype=torch.float)
        bias=torch.rand(D_out,dtype=torch.float)
        output=torch.rand(D_out,dtype=torch.float)
        # 
        dw = torch.zeros((H[0], D_out), dtype=torch.float)
        db = torch.zeros(D_out,dtype=torch.float) 
        self.layers.append({"weight":weight,"bias":bias,"output":output,"dw":dw,"db":db})        
        """
    def relu(self,s):
        tmp_zeros = torch.zeros(s.size())
        return torch.max(tmp_zeros, s)
    
    def reluPrime(self,s):
        tmp_zeros = torch.zeros(s.size())
        tmp_ones = torch.ones(s.size())
        s=torch.where(s > 0, tmp_ones, tmp_zeros)
        return s    
                
    def forward(self, X):
        temp=X
        for ii in range (0,H[1]):
            #print("=> layer: %d"%ii)
            #print(X)
            #print(self.layers[ii]["weight"])
            #print(self.layers[ii]["bias"])
            a=torch.matmul(temp, self.layers[ii]["weight"])+self.layers[ii]["bias"]
            self.layers[ii]["ll"]=a
            z=self.relu(a)
            self.layers[ii]["output"]=z
            temp=z
        return z
                    
    
    def backward(self,yhat,y,x):
                
        #m = D_out    
        dz = 2*torch.mean((yhat-y))
        
        for ii in range (H[1]-1,-1,-1):            
            if ii==H[1]-1:
                dz= dz*self.reluPrime(self.layers[ii]["ll"])
                self.layers[ii]["dw"] =torch.mm(self.layers[ii-1]["output"].T , dz )
                #self.layers[ii]["db"] =dz  
                                
            if ii<H[1]-1 and ii>0 :
                dz= torch.mm(dz,self.layers[ii+1]["weight"].T) * self.reluPrime(self.layers[ii]["ll"])
                self.layers[ii]["dw"]= torch.mm(self.layers[ii-1]["output"].T, dz)
                #self.layers[ii]["db"] =dz
                
            if ii==0:
                dz= torch.mm(dz,self.layers[ii+1]["weight"].T) * self.reluPrime(self.layers[ii]["ll"])
                self.layers[ii]["dw"]= torch.mm(x.T, dz)
                #self.layers[ii]["db"] =dz.squeeze()
            
            #print("dz.size={}".format(dz.size()))
            #print("dz(squeeze).size={}".format(dz.squeeze().size()))
            self.layers[ii]["db"]=dz.squeeze()
            
            #print("layer{}(weight) gradient:{}".format(ii,self.layers[ii]["dw"]))
            #print("layer{}(bias) gradient:{}".format(ii,self.layers[ii]["db"]))

    
    def training(self,x,y,epoches,learning_rate):
        lost_list=[]
        for e in range (epoches):
            #x = torch.rand(N, D_in)
            #y=torch.mean(x*x/D_in)
            yhat= self.forward(x)
            lost=torch.square(yhat-y)
            #print(lost)
            lost_list.append(lost)
            #y = torch.sum(x*x)/D_in
            #print(lost)
            self.backward(yhat,y,x)                 
            
            for layer in self.layers:
                layer["weight"]-=learning_rate*layer["dw"]
                layer["bias"]-=learning_rate*layer["db"]
                
               
            if True:#e==0:
                file_name='my_autograd_epoch_'+str(e)+'.dat'
                if os.path.exists(file_name):
                    os.remove(file_name) #this deletes the file
                    
                original_stdout = sys.stdout # Save a reference to the original standard output
                with open(file_name, 'w') as f:
                    sys.stdout = f # Change the standard output to the file we created.    
                    
                    index=0
                    for layer in self.layers:
                        print("[Layer %d]"%index)
                        #print("weight:{}".format(list(layer["dw"])))  
                        #print("bias:{}".format(list(layer["db"])))
                        print(layer["dw"])
                        print(layer["db"])
                        index+=1
                    
                f.close()    
                sys.stdout = original_stdout # Reset the standard output to its original value 
             
        #print(lost_list)        
        return(lost_list)
        
    
    def printLayers(self):
        index=0
        for ii in self.layers:
            print("# layer {}: {} ".format(index,list(ii["weight"].size())))
            index+=1            

myNN = Q1NN(N, D_in, H, D_out)

In [29]:
myNN.printLayers()

# layer 0: [10, 21] 
# layer 1: [21, 21] 
# layer 2: [21, 21] 
# layer 3: [21, 21] 
# layer 4: [21, 21] 
# layer 5: [21, 21] 
# layer 6: [21, 21] 
# layer 7: [21, 21] 
# layer 8: [21, 21] 
# layer 9: [21, 1] 


In [30]:
costs = myNN.training(x_train,y_train,epoches,learning_rate)

In [31]:
for ii in range(epoches):
    print("epoch # {} lost: {:.5f}".format(ii,costs[ii].item()))

epoch # 0 lost: 19422226391825907712.00000
epoch # 1 lost: 0.05752
epoch # 2 lost: 0.05752
epoch # 3 lost: 0.05752
epoch # 4 lost: 0.05752
epoch # 5 lost: 0.05752
epoch # 6 lost: 0.05752
epoch # 7 lost: 0.05752
epoch # 8 lost: 0.05752
epoch # 9 lost: 0.05752


In [None]:
costs