In [64]:
import torch 
from tnn import DecisionUnit
from torch.distributions import Categorical

x = torch.tensor([[1,-1],[1,-1],[1,-1],[1,-1]], dtype=torch.float)
probs = torch.tensor([[1, 0, 0], [0, 1, 0], [0,0,1], [0.3, 0.3, 0.4]], dtype=torch.float)
print("probs:\n", probs)
dist = Categorical(probs)
for i in range(5):
    print(dist.sample(), end=";   ")
print()

sample = dist.sample()
print("sample of dist of batch:\n", sample)

b1 = lambda x : 0*x 
b2 = lambda x : x 
b3 = lambda x : 2 * x 
branches = [b1, b2, b3]

outputs = [None] * 4  # placeholder

for index, branch in enumerate(branches):
    # Find rows where samples == model_idx
    mask = (sample == index)
    if mask.any():
        x_subset = x[mask]  # rows for this model
        out_subset = branch(x_subset)  # batch
        # Put back into outputs
        indices = mask.nonzero(as_tuple=True)[0]
        for j, idx in enumerate(indices):
            outputs[idx] = out_subset[j]
outputs = torch.stack(outputs, dim=0)
outputs


probs:
 tensor([[1.0000, 0.0000, 0.0000],
        [0.0000, 1.0000, 0.0000],
        [0.0000, 0.0000, 1.0000],
        [0.3000, 0.3000, 0.4000]])
tensor([0, 1, 2, 1]);   tensor([0, 1, 2, 2]);   tensor([0, 1, 2, 0]);   tensor([0, 1, 2, 0]);   tensor([0, 1, 2, 0]);   
sample of dist of batch:
 tensor([0, 1, 2, 0])


tensor([[ 0., -0.],
        [ 1., -1.],
        [ 2., -2.],
        [ 0., -0.]])

In [None]:
import torch 
from tnn import DecisionUnit
from torch.distributions import Categorical

x = torch.tensor([[1,-1],[1,-1],[1,-1],[1,-1]], dtype=torch.float)
probs = torch.tensor([[1, 0, 0], [0, 1, 0], [0,0,1], [0.3, 0.3, 0.4]], dtype=torch.float)

sample =  torch.argmax(probs, dim=1) 
print("sample of dist of batch:\n", sample)

b1 = lambda x : 0*x 
b2 = lambda x : x 
b3 = lambda x : 2 * x 
branches = [b1, b2, b3]

outputs = [None] * 4  # placeholder

for index, branch in enumerate(branches):
    # Find rows where samples == model_idx
    mask = (sample == index)
    if mask.any():
        x_subset = x[mask]  # rows for this model
        out_subset = branch(x_subset)  # batch
        # Put back into outputs
        indices = mask.nonzero(as_tuple=True)[0]
        for j, idx in enumerate(indices):
            outputs[idx] = out_subset[j]
outputs = torch.stack(outputs, dim=0)
outputs


In [35]:
import torch 
from tnn import DecisionUnit

batch = torch.tensor([[i, 2*i, 3*i] for i in range(4)], dtype= torch.float)
print("batch:\n", batch)

d = DecisionUnit(3, 2)
probs = d(batch)
probs = torch.tensor([[1 + i, 0 + i] for i in range(4)], dtype= torch.float)
print("probs for two different branches (brach 1 has probability 1):\n", probs)

# batch, mapped by different branches
# b1 sums the row
# b2 returns 0 for each row
b1 = lambda x : torch.zeros((x.shape[0], 1)) + 1
b2 = lambda x : torch.zeros((x.shape[0], 1)) -1
branches = torch.stack([b1(batch), b2(batch)])
print("outputs of different branches\n" , branches)


probs_t = probs.transpose(0,1)
E = branches * probs_t.unsqueeze(2)

print("branches * probs:\n", E, "\nof shape", E.shape )

#E = torch.reshape(E, (4, 2))
E.sum(dim=0)


batch:
 tensor([[0., 0., 0.],
        [1., 2., 3.],
        [2., 4., 6.],
        [3., 6., 9.]])
probs for two different branches (brach 1 has probability 1):
 tensor([[1., 0.],
        [2., 1.],
        [3., 2.],
        [4., 3.]])
outputs of different branches
 tensor([[[ 1.],
         [ 1.],
         [ 1.],
         [ 1.]],

        [[-1.],
         [-1.],
         [-1.],
         [-1.]]])
branches * probs:
 tensor([[[ 1.],
         [ 2.],
         [ 3.],
         [ 4.]],

        [[-0.],
         [-1.],
         [-2.],
         [-3.]]]) 
of shape torch.Size([2, 4, 1])


tensor([[1.],
        [1.],
        [1.],
        [1.]])

Kako dela autograd: 

In [1]:
import torch
import torch.nn as nn
from models import FeedForward


model = FeedForward(2, 2, 1)
list(model.parameters())

[Parameter containing:
 tensor([[ 0.5200,  0.0397],
         [ 0.4938, -0.6666]], requires_grad=True),
 Parameter containing:
 tensor([0.6722, 0.1079], requires_grad=True),
 Parameter containing:
 tensor([[-0.1349,  0.3691]], requires_grad=True),
 Parameter containing:
 tensor([0.4685], requires_grad=True)]

In [11]:
x = torch.ones((2))
print(model(x, training=True))

# množimo iz desne s transposed matriko. TO je isto, kot če bi množil vrstico z vrstico
torch.dot(list(model.parameters())[0][0], x) + list(model.parameters())[1][0]

[tensor([-0.9274, -0.4060], grad_fn=<ViewBackward0>), tensor([0., 0.], grad_fn=<ReluBackward0>), tensor([0., 0.], grad_fn=<MulBackward0>), tensor([-0.0010], grad_fn=<ViewBackward0>)]


tensor(-0.9274, grad_fn=<AddBackward0>)

Lets use grad

In [12]:

grads = torch.autograd.grad(
outputs=model(x),
inputs=[p for p in model.parameters() if p.requires_grad],
create_graph=True  # allows you to compute gradients of this gradient
)
grads

(tensor([[0., 0.],
         [0., 0.]], grad_fn=<TBackward0>),
 tensor([0., 0.], grad_fn=<ViewBackward0>),
 tensor([[0., 0.]], grad_fn=<TBackward0>),
 tensor([1.]))

In [15]:
g = grads[0]
torch.relu(1-torch.norm(g, dim=0))


tensor([1., 1.], grad_fn=<ReluBackward0>)