In [1]:
import torch

### Unsqueeze: add a new dim to the tensor.  
* For example, torch.unsqueeze(x, 0) can change the shape of a tensor from [3,4] to [1,3,4].
* torch.unsqueeze(x, 1) can change the shape of a tensor from [2,3] to [2,1,3].
* torch.unsqueeze(x, 2) can change the shape of a tensor from [2,3] to [2,3,1].

In [2]:
x = torch.tensor([[ 1,  2,  3,  4],
                  [11, 12, 13, 14],
                  [21, 22, 23, 24]])
x.shape,  torch.unsqueeze(x, 0).shape,  torch.unsqueeze(x, 1).shape,  torch.unsqueeze(x, 2).shape

(torch.Size([3, 4]),
 torch.Size([1, 3, 4]),
 torch.Size([3, 1, 4]),
 torch.Size([3, 4, 1]))

### Broadcasting: prove a1 == a2, and b1 == b2

In [3]:
a1 = torch.tensor(
    [[[1, 2, 3]],
     [[3, 4, 5]]]) 
a2 = torch.tensor(
    [[[1, 2, 3],
      [1, 2, 3],
      [1, 2, 3]],
     [[3, 4, 5],
      [3, 4, 5],
      [3, 4, 5]], 
    ])  
b1 = torch.tensor(
    [[[1, 2, 3],
      [4, 5, 6],
      [7, 8, 9]]]) 
b2 = torch.tensor(
    [[[1, 2, 3],
      [4, 5, 6],
      [7, 8, 9]],
     [[1, 2, 3],
      [4, 5, 6],
      [7, 8, 9]]])

print(torch.equal(a1 + b1, a2 + b2))
a1 + b1

True


tensor([[[ 2,  4,  6],
         [ 5,  7,  9],
         [ 8, 10, 12]],

        [[ 4,  6,  8],
         [ 7,  9, 11],
         [10, 12, 14]]])

### Construction of a custom Dataset 

In [4]:
from torch.utils.data import IterableDataset, DataLoader
class MyIterableDataset(IterableDataset):  
    def __iter__(self):  
        return iter([(j,str(i)) for i,j in enumerate('abcdefghij')]) 

print(list(DataLoader(MyIterableDataset(), batch_size=4)))

[[('a', 'b', 'c', 'd'), ('0', '1', '2', '3')], [('e', 'f', 'g', 'h'), ('4', '5', '6', '7')], [('i', 'j'), ('8', '9')]]


### Reshape 

In [5]:
shape_2x4 = torch.tensor([
    [1.,2.,3.,4.,5.,6.],
    [7.,8.,9.,0.,11,12]])
print(torch.equal(shape_2x4.view(2,2,3),shape_2x4.view(2,-1,3)), 
      torch.equal(shape_2x4.view(2,2,3),shape_2x4.view(2,2,-1)))
shape_2x4.view(2,-1,3).transpose(1,2)

True True


tensor([[[ 1.,  4.],
         [ 2.,  5.],
         [ 3.,  6.]],

        [[ 7.,  0.],
         [ 8., 11.],
         [ 9., 12.]]])

In [6]:
shape_2x4

tensor([[ 1.,  2.,  3.,  4.,  5.,  6.],
        [ 7.,  8.,  9.,  0., 11., 12.]])

### Contiguity and Memory Continuity

In [7]:
continous_memory = torch.arange(8)
continous_memory

tensor([0, 1, 2, 3, 4, 5, 6, 7])

In [8]:
still_continous_memory = continous_memory.reshape(2,4)
still_continous_memory

tensor([[0, 1, 2, 3],
        [4, 5, 6, 7]])

In [9]:
no_continous_as_shown = still_continous_memory.T
no_continous_as_shown

tensor([[0, 4],
        [1, 5],
        [2, 6],
        [3, 7]])

* still_continous_memory and no_continous_as_shown are accessed in different manner. 
* But they share the same memory block 

In [10]:
no_continous_as_shown[0,1]=8 # Let's access 4 in no_continous_as_shown and change it to 8
still_continous_memory[1,0]  # when we try to access 4 in still_continous_memory we get 8

tensor(8)

* We can view all elements continuously, if they are stored continuously. 

In [11]:
print(continous_memory)
print(still_continous_memory)
still_continous_memory.view(1,8)

tensor([0, 1, 2, 3, 8, 5, 6, 7])
tensor([[0, 1, 2, 3],
        [8, 5, 6, 7]])


tensor([[0, 1, 2, 3, 8, 5, 6, 7]])

* But it is not the same with no_continous_as_shown
* We may expect get tensor([[0, 8, 1, 5], [2, 6, 3, 7]]) after we run no_continous_as_shown.view(2,4)

In [12]:
print(no_continous_as_shown)
print('expect:', torch.tensor([[0, 8, 1, 5], [2, 6, 3, 7]]))
try:
    no_continous_as_shown.view(2,4)
except RuntimeError as e:
    print('[[ Error ]]', e)

tensor([[0, 8],
        [1, 5],
        [2, 6],
        [3, 7]])
expect: tensor([[0, 8, 1, 5],
        [2, 6, 3, 7]])
[[ Error ]] view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.


* now we need tensor.contiguous()

In [13]:
no_continous_as_shown.contiguous().view(2,4)

tensor([[0, 8, 1, 5],
        [2, 6, 3, 7]])

* Reshaping is not like view, it create new tensor in memory.

In [14]:
no_continous_as_shown.reshape(2,4)

tensor([[0, 8, 1, 5],
        [2, 6, 3, 7]])

### Softmax

torch.nn.Softmax(dim=None) 
$$
\text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
$$ 

In [15]:
sm = torch.nn.Softmax(dim=-1)
sm(torch.tensor([[0.,4, 1, 5], 
                 [2, 6, 3, 7]]))

tensor([[0.0048, 0.2641, 0.0131, 0.7179],
        [0.0048, 0.2641, 0.0131, 0.7179]])

$$
0.0048 = \frac{e^{0}}{e^{0}+e^{4}+e^{1}+e^{5}} = \frac{e^{2}}{e^{2}+e^{6}+e^{3}+e^{7}}
$$ 

### High dimension Matrix Multiplication

* Calculate three multiplication separately

In [16]:
a = torch.tensor([[1., 2., 3., 4.]])
b = torch.tensor([[1., 2., 3., 4.]]) 
c = torch.matmul(a,b.transpose(-2, -1)) 
print(a.shape, b.shape, c.shape)
print(c)
a = torch.tensor([[4., 3., 2., 1.]])
b = torch.tensor([[1., 1., 1., 1.]]) 
c = torch.matmul(a,b.transpose(-2, -1))  
print(c)
a = torch.tensor([[1., 1., 1., 1.]])
b = torch.tensor([[1., 1., 1., 1.]]) 
c = torch.matmul(a,b.transpose(-2, -1))  
print(c) 

torch.Size([1, 4]) torch.Size([1, 4]) torch.Size([1, 1])
tensor([[30.]])
tensor([[10.]])
tensor([[4.]])


* All in once

In [17]:
a = torch.tensor(
    [[[[1., 2., 3., 4.]], 
      [[4., 3., 2., 1.]], 
      [[1., 1., 1., 1.]]]])
b = torch.tensor(
    [[[[1., 2., 3., 4.]], 
      [[1., 1., 1., 1.]], 
      [[1., 1., 1., 1.]]]]) 
c = torch.matmul(a,b.transpose(-2, -1)) 
print(a.shape, b.shape, c.shape)
c

torch.Size([1, 3, 1, 4]) torch.Size([1, 3, 1, 4]) torch.Size([1, 3, 1, 1])


tensor([[[[30.]],

         [[10.]],

         [[ 4.]]]])

### Only want upper triangle

In [18]:
torch.triu(torch.ones(3, 3, dtype=int), diagonal=-2)

tensor([[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]])

In [19]:
torch.triu(torch.ones(3, 3, dtype=int), diagonal=-1)

tensor([[1, 1, 1],
        [1, 1, 1],
        [0, 1, 1]])

In [20]:
torch.triu(torch.ones(3, 3, dtype=int), diagonal=0)

tensor([[1, 1, 1],
        [0, 1, 1],
        [0, 0, 1]])

In [21]:
torch.triu(torch.ones(3, 3, dtype=int), diagonal=1)

tensor([[0, 1, 1],
        [0, 0, 1],
        [0, 0, 0]])

In [22]:
torch.triu(torch.ones(3, 3, dtype=int), diagonal=2)

tensor([[0, 0, 1],
        [0, 0, 0],
        [0, 0, 0]])

### Mask

In [24]:
mask = torch.triu(torch.ones(3, 3, dtype=int), diagonal=1)
print(mask) 
a = torch.tensor(
    [[11, 12, 13],
     [11, 12, 13],
     [11, 12, 13]])
a.masked_fill(mask, 0)    

tensor([[0, 1, 1],
        [0, 0, 1],
        [0, 0, 0]])


tensor([[11,  0,  0],
        [11, 12,  0],
        [11, 12, 13]])