In [2]:
import torch
print(torch.__version__)

1.12.0+cu113


In [3]:
# understand matrix multiply in pytorch

# torch.matmul(tensor1, tensor2, out=None) → Tensor
##Matrix product of two tensors. see: https://pytorch.org/docs/stable/torch.html?highlight=matmul#torch.matmul
#The behavior depends on the dimensionality of the tensors as follows:

#If both tensors are 1-dimensional, the dot product (scalar) is returned.
#If both arguments are 2-dimensional, the matrix-matrix product is returned.
#If the first argument is 1-dimensional and the second argument is 2-dimensional, a 1 is prepended to its dimension for the purpose of the matrix multiply. After the matrix multiply, the prepended dimension is removed.
#If the first argument is 2-dimensional and the second argument is 1-dimensional, the matrix-vector product is returned.
#If both arguments are at least 1-dimensional and at least one argument is N-dimensional (where N > 2), then a batched matrix multiply is returne

## 1D vector x 1D vector

In [16]:
tensor1 = torch.tensor([1,2], dtype=torch.float)
print(tensor1)
print(tensor1.shape)
print(tensor1.size())

tensor([1., 2.])
torch.Size([2])
torch.Size([2])


In [17]:
tensor2 = torch.tensor([1,3],  dtype=torch.float)
output = torch.matmul(tensor1, tensor2)
print(output)
print(output.size())

tensor(7.)
torch.Size([])


## 1D vector x 2D matrix

In [19]:
# 1D vector x 2D matrix -> understand what is matrix transformation: 2 attributes + different weight -> 3 new attributes
tensor1 = torch.tensor([1,2], dtype=torch.float)             # sample_size x attribute_size: 1 x 2
tensor2 = torch.tensor([[1,0,1],[0,1,1]], dtype=torch.float) # attribute_size x attribute_size_new: 2 x 3, weight matrix, plays a attribute transformation role
output = torch.matmul(tensor1, tensor2)
print(output)
print(output.size())

tensor([1., 2., 3.])
torch.Size([3])


## 2D matrix x 2D matrix

In [22]:
tensor1 = torch.tensor([[1,2],[2,2]], dtype=torch.float)     # sample_size x attribute_size
print(tensor1)

tensor2 = torch.tensor([[1,0,1],[0,1,1]], dtype=torch.float) # attribute transformation matrix, has no relation with sample size in tensor1
print(tensor2)

output = torch.matmul(tensor1, tensor2)
print(output)
print(output.size())

tensor([[1., 2.],
        [2., 2.]])
tensor([[1., 0., 1.],
        [0., 1., 1.]])
tensor([[1., 2., 3.],
        [2., 2., 4.]])
torch.Size([2, 3])


## 3D matrix x 2D matrix

In [23]:
## understand sample independent and time independent
tensor1 = torch.tensor([[[1,2],[2,2]],[[1,1],[1,0]],[[2,2],[2,1]]], dtype=torch.float)  # T x sample_size x attribute_size 3x2x2
print(tensor1)

tensor2 = torch.tensor([[1,0,1],[0,1,1]], dtype=torch.float) # attribute transformation matrix, has no relation with sample size and T in tensor1
print(tensor2)

output = torch.matmul(tensor1, tensor2)
print(output)
print(output.size())

tensor([[[1., 2.],
         [2., 2.]],

        [[1., 1.],
         [1., 0.]],

        [[2., 2.],
         [2., 1.]]])
tensor([[1., 0., 1.],
        [0., 1., 1.]])
tensor([[[1., 2., 3.],
         [2., 2., 4.]],

        [[1., 1., 2.],
         [1., 0., 1.]],

        [[2., 2., 4.],
         [2., 1., 3.]]])
torch.Size([3, 2, 3])


## 3D matrix x 3D matrix

In [25]:
tensor1 = torch.tensor([[[1,2],[2,2]],[[2,1],[1,1]],[[2,2],[2,1]]], dtype=torch.float) # T x sample_size x attribute_size: 3x2x2
print(tensor1)

tensor2 = torch.tensor([[[1,0,1],[0,1,1]],[[0,1,0],[0,0,1]],[[1,1,1],[0,1,0]]], dtype=torch.float)# T x attribute_size x attribute_size_new
print(tensor2)

output = torch.matmul(tensor1, tensor2)
print(output)
print(output.size())

tensor([[[1., 2.],
         [2., 2.]],

        [[2., 1.],
         [1., 1.]],

        [[2., 2.],
         [2., 1.]]])
tensor([[[1., 0., 1.],
         [0., 1., 1.]],

        [[0., 1., 0.],
         [0., 0., 1.]],

        [[1., 1., 1.],
         [0., 1., 0.]]])
tensor([[[1., 2., 3.],
         [2., 2., 4.]],

        [[0., 2., 1.],
         [0., 1., 1.]],

        [[2., 4., 2.],
         [2., 3., 2.]]])
torch.Size([3, 2, 3])


In [26]:
output = torch.bmm(tensor1, tensor2)
print(output)
print(output.size())

tensor([[[1., 2., 3.],
         [2., 2., 4.]],

        [[0., 2., 1.],
         [0., 1., 1.]],

        [[2., 4., 2.],
         [2., 3., 2.]]])
torch.Size([3, 2, 3])
