In [None]:
#hide
from fastai.gen_doc.nbdoc import *

### Mnożenie macierzy od podstaw

In [None]:
import torch
from torch import tensor

In [None]:
def matmul(a,b):
    ar,ac = a.shape # n_rows * n_cols
    br,bc = b.shape
    assert ac==br
    c = torch.zeros(ar, bc)
    for i in range(ar):
        for j in range(bc):
            for k in range(ac): c[i,j] += a[i,k] * b[k,j]
    return c

In [None]:
m1 = torch.randn(5,28*28)
m2 = torch.randn(784,10)

In [None]:
%time t1=matmul(m1, m2)

In [None]:
%timeit -n 20 t2=m1@m2

### Arytmetyka składowych

In [None]:
a = tensor([10., 6, -4])
b = tensor([2., 8, 7])
a + b

In [None]:
a < b

In [None]:
(a < b).all(), (a==b).all()

In [None]:
(a + b).mean().item()

In [None]:
m = tensor([[1., 2, 3], [4,5,6], [7,8,9]])
m*m

In [None]:
n = tensor([[1., 2, 3], [4,5,6]])
m*n

In [None]:
def matmul(a,b):
    ar,ac = a.shape
    br,bc = b.shape
    assert ac==br
    c = torch.zeros(ar, bc)
    for i in range(ar):
        for j in range(bc): c[i,j] = (a[i] * b[:,j]).sum()
    return c

In [None]:
%timeit -n 20 t3 = matmul(m1,m2)

#### Rozgłaszanie wartości skalarnej

In [None]:
a = tensor([10., 6, -4])
a > 0

In [None]:
m = tensor([[1., 2, 3], [4,5,6], [7,8,9]])
(m - 5) / 2.73

#### Rozgłaszanie wektora na macierz

In [None]:
c = tensor([10.,20,30])
m = tensor([[1., 2, 3], [4,5,6], [7,8,9]])
m.shape,c.shape

In [None]:
m + c

In [None]:
c.expand_as(m)

In [None]:
t = c.expand_as(m)
t.storage()

In [None]:
t.stride(), t.shape

In [None]:
c + m

In [None]:
c = tensor([10.,20,30])
m = tensor([[1., 2, 3], [4,5,6]])
c+m

In [None]:
c = tensor([10.,20])
m = tensor([[1., 2, 3], [4,5,6]])
c+m

In [None]:
c = tensor([10.,20,30])
m = tensor([[1., 2, 3], [4,5,6], [7,8,9]])
c = c.unsqueeze(1)
m.shape,c.shape

In [None]:
c+m

In [None]:
t = c.expand_as(m)
t.storage()

In [None]:
t.stride(), t.shape

In [None]:
c = tensor([10.,20,30])
c.shape, c.unsqueeze(0).shape,c.unsqueeze(1).shape

In [None]:
c.shape, c[None,:].shape,c[:,None].shape

In [None]:
c[None].shape,c[...,None].shape

In [None]:
def matmul(a,b):
    ar,ac = a.shape
    br,bc = b.shape
    assert ac==br
    c = torch.zeros(ar, bc)
    for i in range(ar):
#       c[i,j] = (a[i,:]          * b[:,j]).sum() # poprzednia wersja
        c[i]   = (a[i  ].unsqueeze(-1) * b).sum(dim=0)
    return c

In [None]:
%timeit -n 20 t4 = matmul(m1,m2)

### Konwencja sumacyjna Einsteina

In [None]:
def matmul(a,b): return torch.einsum('ik,kj->ij', a, b)

In [None]:
%timeit -n 20 t5 = matmul(m1,m2)

### Definiowanie i inicjalizowanie warstwy

In [None]:
def lin(x, w, b): return x @ w + b

In [None]:
x = torch.randn(200, 100)
y = torch.randn(200)

In [None]:
w1 = torch.randn(100,50)
b1 = torch.zeros(50)
w2 = torch.randn(50,1)
b2 = torch.zeros(1)

In [None]:
l1 = lin(x, w1, b1)
l1.shape

In [None]:
l1.mean(), l1.std()

In [None]:
x = torch.randn(200, 100)
for i in range(50): x = x @ torch.randn(100,100)
x[0:5,0:5]

In [None]:
x = torch.randn(200, 100)
for i in range(50): x = x @ (torch.randn(100,100) * 0.01)
x[0:5,0:5]

In [None]:
x = torch.randn(200, 100)
for i in range(50): x = x @ (torch.randn(100,100) * 0.1)
x[0:5,0:5]

In [None]:
x.std()

In [None]:
x = torch.randn(200, 100)
y = torch.randn(200)

In [None]:
from math import sqrt
w1 = torch.randn(100,50) / sqrt(100)
b1 = torch.zeros(50)
w2 = torch.randn(50,1) / sqrt(50)
b2 = torch.zeros(1)

In [None]:
l1 = lin(x, w1, b1)
l1.mean(),l1.std()

In [None]:
def relu(x): return x.clamp_min(0.)

In [None]:
l2 = relu(l1)
l2.mean(),l2.std()

In [None]:
x = torch.randn(200, 100)
for i in range(50): x = relu(x @ (torch.randn(100,100) * 0.1))
x[0:5,0:5]

In [None]:
x = torch.randn(200, 100)
for i in range(50): x = relu(x @ (torch.randn(100,100) * sqrt(2/100)))
x[0:5,0:5]

In [None]:
x = torch.randn(200, 100)
y = torch.randn(200)

In [None]:
w1 = torch.randn(100,50) * sqrt(2 / 100)
b1 = torch.zeros(50)
w2 = torch.randn(50,1) * sqrt(2 / 50)
b2 = torch.zeros(1)

In [None]:
l1 = lin(x, w1, b1)
l2 = relu(l1)
l2.mean(), l2.std()

In [None]:
def model(x):
    l1 = lin(x, w1, b1)
    l2 = relu(l1)
    l3 = lin(l2, w2, b2)
    return l3

In [None]:
out = model(x)
out.shape

In [None]:
def mse(output, targ): return (output.squeeze(-1) - targ).pow(2).mean()

In [None]:
loss = mse(out, y)

### Gradienty i przejście wstecz

In [None]:
def mse_grad(inp, targ): 
    # gradient straty w odniesieniu do danych wyjściowych z poprzedniej warstwy
    inp.g = 2. * (inp.squeeze() - targ).unsqueeze(-1) / inp.shape[0]

In [None]:
def relu_grad(inp, out):
    # gradient funkcji relu w odniesieniu do aktywacji wejściowych
    inp.g = (inp>0).float() * out.g

In [None]:
def lin_grad(inp, out, w, b):
    # gradient funkcji matmul w odniesieniu do danych wejściowych
    inp.g = out.g @ w.t()
    w.g = inp.t() @ out.g
    b.g = out.g.sum(0)

### Biblioteka SymPy

In [None]:
from sympy import symbols,diff
sx,sy = symbols('sx sy')
diff(sx**2, sx)

In [None]:
def forward_and_backward(inp, targ):
    # przejście w przód:
    l1 = inp @ w1 + b1
    l2 = relu(l1)
    out = l2 @ w2 + b2
    # w rzeczywistości nie potrzebujemy straty podczas przejścia wstecz!
    loss = mse(out, targ)
    
    #  przejście wstecz:
    mse_grad(out, targ)
    lin_grad(l2, out, w2, b2)
    relu_grad(l1, l2)
    lin_grad(inp, l1, w1, b1)

### Modyfikowanie modelu

In [None]:
class Relu():
    def __call__(self, inp):
        self.inp = inp
        self.out = inp.clamp_min(0.)
        return self.out
    
    def backward(self): self.inp.g = (self.inp>0).float() * self.out.g

In [None]:
class Lin():
    def __init__(self, w, b): self.w,self.b = w,b
        
    def __call__(self, inp):
        self.inp = inp
        self.out = inp@self.w + self.b
        return self.out
    
    def backward(self):
        self.inp.g = self.out.g @ self.w.t()
        self.w.g = self.inp.t() @ self.out.g
        self.b.g = self.out.g.sum(0)

In [None]:
class Mse():
    def __call__(self, inp, targ):
        self.inp = inp
        self.targ = targ
        self.out = (inp.squeeze() - targ).pow(2).mean()
        return self.out
    
    def backward(self):
        x = (self.inp.squeeze()-self.targ).unsqueeze(-1)
        self.inp.g = 2.*x/self.targ.shape[0]

In [None]:
class Model():
    def __init__(self, w1, b1, w2, b2):
        self.layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]
        self.loss = Mse()
        
    def __call__(self, x, targ):
        for l in self.layers: x = l(x)
        return self.loss(x, targ)
    
    def backward(self):
        self.loss.backward()
        for l in reversed(self.layers): l.backward()

In [None]:
model = Model(w1, b1, w2, b2)

In [None]:
loss = model(x, y)

In [None]:
model.backward()

### Implementacja przy użyciu biblioteki PyTorch

In [None]:
class LayerFunction():
    def __call__(self, *args):
        self.args = args
        self.out = self.forward(*args)
        return self.out
    
    def forward(self):  raise Exception('nie zaimplementowano')
    def bwd(self):      raise Exception('nie zaimplementowano')
    def backward(self): self.bwd(self.out, *self.args)

In [None]:
class Relu(LayerFunction):
    def forward(self, inp): return inp.clamp_min(0.)
    def bwd(self, out, inp): inp.g = (inp>0).float() * out.g

In [None]:
class Lin(LayerFunction):
    def __init__(self, w, b): self.w,self.b = w,b
        
    def forward(self, inp): return inp@self.w + self.b
    
    def bwd(self, out, inp):
        inp.g = out.g @ self.w.t()
        self.w.g = self.inp.t() @ self.out.g
        self.b.g = out.g.sum(0)

In [None]:
class Mse(LayerFunction):
    def forward (self, inp, targ): return (inp.squeeze() - targ).pow(2).mean()
    def bwd(self, out, inp, targ): 
        inp.g = 2*(inp.squeeze()-targ).unsqueeze(-1) / targ.shape[0]

In [None]:
from torch.autograd import Function

class MyRelu(Function):
    @staticmethod
    def forward(ctx, i):
        result = i.clamp_min(0.)
        ctx.save_for_backward(i)
        return result
    
    @staticmethod
    def backward(ctx, grad_output):
        i, = ctx.saved_tensors
        return grad_output * (i>0).float()

In [None]:
import torch.nn as nn

class LinearLayer(nn.Module):
    def __init__(self, n_in, n_out):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(n_out, n_in) * sqrt(2/n_in))
        self.bias = nn.Parameter(torch.zeros(n_out))
    
    def forward(self, x): return x @ self.weight.t() + self.bias

In [None]:
lin = LinearLayer(10,2)
p1,p2 = lin.parameters()
p1.shape,p2.shape

In [None]:
class Model(nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(n_in,nh), nn.ReLU(), nn.Linear(nh,n_out))
        self.loss = mse
        
    def forward(self, x, targ): return self.loss(self.layers(x).squeeze(), targ)

In [None]:
class Model(Module):
    def __init__(self, n_in, nh, n_out):
        self.layers = nn.Sequential(
            nn.Linear(n_in,nh), nn.ReLU(), nn.Linear(nh,n_out))
        self.loss = mse
        
    def forward(self, x, targ): return self.loss(self.layers(x).squeeze(), targ)