In [None]:
# Goal: implement _very_ basic functionalities of pytorch from scratch (but I'm going to cheat and use numpy)

# large inspiration from:
# - https://github.com/karpathy/micrograd
# - https://github.com/geohot/tinygrad

In [2]:
import torch 
import torchvision
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms

In [54]:
# pytorch

# Create tensors.
x = torch.tensor(15., requires_grad=True)
print(x)
w = torch.tensor(10., requires_grad=True)
b = torch.tensor(900., requires_grad=True)

# Build a computational graph.
y = w * x + b * 100    # y = 2 * x + 3
print(y)

# Compute gradients.
# Computes the sum of gradients of given tensors with respect to graph leaves.
y.backward()

# Print out the gradients.
# Computes and returns the sum of gradients of outputs with respect to the inputs.
print(x.grad)    # x.grad = 2 = dy/dx = w
print(w.grad)    # w.grad = 1 = dy/dw = x
print(b.grad)    # b.grad = 1 = dy/db = 1

tensor(15., requires_grad=True)
tensor(90150., grad_fn=<AddBackward0>)
tensor(10.)
tensor(15.)
tensor(100.)


In [78]:
# not pytorch

class Tensor:
    def __init__(self, data, children=()):
        self.data = np.array(data, dtype=np.float32)
        self.children = children
        self.grad = 1
        self.op = None
        
    def __mul__(self, other):
        op = Multiply
        output = op.forward(self, other)
        output.op = op
        return output
    
    def __add__(self, other):
        op = Add
        output = op.forward(self, other)
        output.op = op
        return output    

    def backward(self):
        if self.op is not None:
            children_grads = self.op.backward(self, *self.children)
            for node, grad in zip(self.children, children_grads):
                node.grad = grad
        for node in self.children:
            node.backward()
            

class Multiply:
    def forward(a, b):
        return Tensor(np.matmul(a.data, b.data), (a, b))
    
    def backward(parent, a, b):
        return b.data * parent.grad, a.data * parent.grad
    
class Add:
    def forward(a, b):
        return Tensor(np.add(a.data, b.data), (a, b))
    
    def backward(parent, a, b):
        return parent.grad, parent.grad    

        

x = Tensor([15])
w = Tensor([10])
b = Tensor([900])
print(f'x: {x.data}')
print(f'w: {w.data}')
print(f'b: {b.data}')
print(f'')

y = w * x + b
print(f'y: {y.data}')

y.backward()

print(x.grad)
print(w.grad)
print(b.grad)

x: [15.]
w: [10.]
b: [900.]

y: [1050.]
[10.]
[15.]
1


In [91]:
# TODO: make random
x = Tensor([[-0.6541, -0.4835,  1.2342],
            [-0.3676,  1.2874, -0.3080],
            [-1.1146,  0.1940,  1.0058],
            [ 0.4186, -1.3168,  0.3667],
            [ 0.6491, -1.0791,  0.4444],
            [ 1.8933,  0.0439, -0.2866],
            [-0.3996,  0.9862, -0.7550],
            [ 1.5314,  0.2774,  0.5163],
            [-1.4798, -0.4294, -0.6591],
            [-1.5002, -0.2480, -1.0131]])

y = Tensor([[-1.2484, -0.2801],
            [-0.6393, -0.3241],
            [ 0.6526,  0.2269],
            [ 2.0617, -0.3693],
            [ 0.9553,  0.3633],
            [-0.0317,  0.8028],
            [-1.5480,  0.5453],
            [-0.8418,  0.4886],
            [-2.5214,  1.6596],
            [ 0.6438,  0.6399]])

# nn building blocks
# start with linear layer

class Linear:
    def __init__(self, in_dim, out_dim):
        self.in_dim = in_dim
        self.out_dim = out_dim
        # TODO: make random and transpose weight
        self.weight = Tensor(np.transpose([[0.2566, -0.2468, -0.1282], [0.2287, -0.2199,  0.1502]]))
        self.bias = Tensor([0.0570, 0.4628])
        
    def __call__(self, x):
        return x * self.weight + self.bias
    

linear = Linear(3, 2)
print ('w: ', linear.weight.data)
print ('b: ', linear.bias.data)

pred = linear(x)
print(pred.data)

# loss function

class MSELoss:
    def __init__(self):
        pass
    def __call__(self, pred, true):
        return Tensor((true.data - pred.data)**2)

criterion = MSELoss()
    
loss = criterion(pred, y)

print(loss.data)


# optimizer

w:  [[ 0.2566  0.2287]
 [-0.2468 -0.2199]
 [-0.1282  0.1502]]
b:  [0.057  0.4628]
[[-0.1497387   0.60490584]
 [-0.31557086  0.04936901]
 [-0.40582907  0.31630155]
 [ 0.4423881   0.9031764 ]
 [ 0.43290886  0.91529214]
 [ 0.5687283   0.8430968 ]
 [-0.19214053  0.04114512]
 [ 0.31530526  0.8295792 ]
 [-0.13224413  0.11979797]
 [-0.13686551  0.02207184]]
[[1.20705664e+00 7.83235312e-01]
 [1.04800545e-01 1.39479086e-01]
 [1.12027192e+00 7.99263828e-03]
 [2.62217140e+00 1.61919630e+00]
 [2.72892445e-01 3.04695368e-01]
 [3.60514194e-01 1.62383157e-03]
 [1.83835471e+00 2.54172176e-01]
 [1.33889246e+00 1.16266809e-01]
 [5.70806551e+00 2.37099051e+00]
 [6.09438658e-01 3.81711662e-01]]
