In [1]:
# Scalar Arithmetic and Matrix Multiplication

import torch

In [2]:
# Scalar operations

x = torch.tensor(3.0)
y = torch.tensor(2.0)

x+y, x-y, x*y, x/y, x**y


(tensor(5.), tensor(1.), tensor(6.), tensor(1.5000), tensor(9.))

In [3]:
# Vectors are 1st order tensors

x = torch.arange(3)
x

tensor([0, 1, 2])

In [4]:
x[2]

tensor(2)

In [5]:
len(x)

3

In [7]:
x.shape

torch.Size([3])

# Matrices 

In [8]:
# Scalar = 0th order tensor
# Vector = 1st order tensor
# Matrix = 2nd order tensor

A = torch.arange(6).reshape(3, 2)
A



tensor([[0, 1],
        [2, 3],
        [4, 5]])

In [9]:
# Transpose of a Tensor Matrix
A.T

tensor([[0, 2, 4],
        [1, 3, 5]])

In [10]:
# SYmmetric Matrices

A = torch.tensor([[1,2,3], [2,0,4], [3,4,5]])
A == A.T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

# Tensors

In [11]:
# Higher order arrays (nth where n > 2)

torch.arange(24).reshape(2,3,4)

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

# Basic Properties of Tensor Arithmetic

In [12]:
# Element wise operations have same shape as operands.

A = torch.arange(6, dtype=torch.float32).reshape(2,3)
B = A.clone()
A, A+B

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([[ 0.,  2.,  4.],
         [ 6.,  8., 10.]]))

In [13]:
A*B

tensor([[ 0.,  1.,  4.],
        [ 9., 16., 25.]])

In [14]:
A/B

tensor([[nan, 1., 1.],
        [1., 1., 1.]])

In [15]:
# Scalar Tensor Addition Multiplication

a = 2
X = torch.arange(24).reshape(2,3,4)
a + X, (a*X).shape

(tensor([[[ 2,  3,  4,  5],
          [ 6,  7,  8,  9],
          [10, 11, 12, 13]],
 
         [[14, 15, 16, 17],
          [18, 19, 20, 21],
          [22, 23, 24, 25]]]),
 torch.Size([2, 3, 4]))

# Reduction

In [16]:
# Sum of Tensors

x = torch.arange(3, dtype=torch.float32)
x, x.sum()

(tensor([0., 1., 2.]), tensor(3.))

In [17]:
A

tensor([[0., 1., 2.],
        [3., 4., 5.]])

In [18]:
A.shape, A.sum()

(torch.Size([2, 3]), tensor(15.))

In [19]:
# Sum of rows.

A.shape, A.sum(axis = 0),  A.sum(axis = 0).shape

(torch.Size([2, 3]), tensor([3., 5., 7.]), torch.Size([3]))

In [20]:
# Sum of Columns


A.shape, A.sum(axis = 1),  A.sum(axis = 1).shape

(torch.Size([2, 3]), tensor([ 3., 12.]), torch.Size([2]))

In [21]:
# Mean of Tensors

A.mean(), A.sum() / A.numel()

(tensor(2.5000), tensor(2.5000))

In [22]:
# Mean along an axis:

A.mean(axis = 0), A.sum(axis = 0) / A.shape[0]

(tensor([1.5000, 2.5000, 3.5000]), tensor([1.5000, 2.5000, 3.5000]))

# Non Reduction Sum

In [23]:
# Keeping the axes unchanged to calculate the sum or mean. Works for broadcast mechanism.

sum_A = A.sum(axis = 1, keepdims = True) # dimension or dimensions to reduce. If False, all dimensions are reduced.
sum_A, sum_A.shape

(tensor([[ 3.],
         [12.]]),
 torch.Size([2, 1]))

In [24]:
A

tensor([[0., 1., 2.],
        [3., 4., 5.]])

In [25]:
A/ sum_A

tensor([[0.0000, 0.3333, 0.6667],
        [0.2500, 0.3333, 0.4167]])

In [26]:
# Cumulative sum across axis

A.cumsum(axis = 0)

tensor([[0., 1., 2.],
        [3., 5., 7.]])

# Dot Products

In [27]:
# Dot Product = ∑ (xi * yi)

y = torch.ones(3, dtype = torch.float32)

x, y, torch.dot(x, y)

(tensor([0., 1., 2.]), tensor([1., 1., 1.]), tensor(3.))

In [28]:
# Alternatively Dot product = SUM( X*Y)

torch.sum(x*y)

tensor(3.)

In [29]:
# Dot product when = 1 denotes the mean
# Generally it is the weighted average of elements
# Dot Product expresses the cosine of angle between vectors.


# Matrix - Vector Products

In [30]:
# torch.mv is used to get the matrix-vector product.
# Python also uses M @ V to compute the matrix-vector or matrci-matrix product
# Multiplication possible in case of Matrix m*n and vector n*p (p=1 for vector, p > 1 for matrix multiplication

A.shape, x.shape, torch.mv(A,x), A@x

(torch.Size([2, 3]), torch.Size([3]), tensor([ 5., 14.]), tensor([ 5., 14.]))

# Matrix - Matrix Multiplication

In [31]:
# Matrices need to be of the order m*n, n*p

B = torch.ones(3, 4)
torch.mm(A, B), A@B

(tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]),
 tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]))

In [32]:
A

tensor([[0., 1., 2.],
        [3., 4., 5.]])

In [33]:
B

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

# NORMS (Operator)

In [34]:
# Norm of a vector tells us how big it is.
# For example l2 norm measures (Euclidean) length of a vector.
# Size here concerns magnitude and not the dimensionality.

# Norm is given by || . || - It maps a vector to a scalar & satisfies 3 properties.

#     1. Given vector (x) if we scale all elements of x by a scalar (a) then norm scales:
#                        || ax || = || a || ||x||

#     2. For any vectors (x) and (y), norms satisfy the triangle inequality:
#                        ||x + y || <= ||x|| + ||y||
#     3. Norm of a vector is a non negative and only vanishes when the vector is 0.
#                        || x || > 0 for all x != 0
# L2 Norm:
#            ||x||2 = (∑(xi^2))^0.5

In [35]:
u = torch.tensor([3.0, -4.0])
torch.norm(u)

tensor(5.)

In [36]:
# L1 norm is called manhattan distance.
#            || x || = ∑xi

# Less sensitive to outliers

torch.abs(u).sum()


tensor(7.)

In [37]:
# General case 
# Minkowski distance

#        || x || = (∑xi^p)^(1/p)

In [38]:
# Frobenius Norm - for matrices

#        || X || = sqrt(∑∑xij^2)

# It behaves like Euclidean Norm

In [39]:
torch.norm(torch.ones((4,9)))

tensor(6.)

In [40]:
# USage:

# Maximise the probability assigned to observed data
# maximize the revenue associated with a recommendation model
# Minimze tge distance between representations of photos of same person.
# Maximize the distance between representations of photos of different people.

# They are all in norms