In [None]:
# Tensor Fundamentals
# also known as arrays
# PyTorch tensors the same as arrays in R (and numpy arrays), but are more suited for
# machine learning, because they can also hold a gradient

# 1 dimensional tensors are also vectors
# 2 dimensional tensors are also matrices
# 3 dimensional tensors are also cubes, etc

# preliminary
import torch
# math is a built in library from C
import math

In [None]:
# initialize an empty tensor, 3 x 4 in dimensions

x = torch.empty(3, 4)
print(type(x))
print(x)

# notes
# by defualt, torch.Tensor is an alias for Torch.Float, i.e. 32 bit floats
# torch.empty() only allocates memory for a tensor
# so the values are just whatever was in memory at the time

<class 'torch.Tensor'>
tensor([[1.2810e-31, 8.4078e-45, 0.0000e+00, 0.0000e+00],
        [1.6928e+22, 1.7592e+22, 2.7664e+29, 2.5763e+20],
        [4.7465e+27, 7.1440e+31, 1.4153e-43, 0.0000e+00]])


In [None]:
zeros = torch.zeros(2, 3)
print(zeros)

ones = torch.ones(2, 3)
print(ones)

torch.manual_seed(27935248)
# torch.rand generates random numbers from a uniform distribution on [0, 1]
random = torch.rand(2, 3)
print(random)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.5240, 0.5863, 0.1338],
        [0.2952, 0.7607, 0.2487]])


In [6]:
# Tensor shapes

x = torch.empty(2, 2, 3)
print(x.shape)
print(x)

# torch.empty_like creates a tensor with the same shape/dims as the input tensor
empty_like_x = torch.empty_like(x)
print(empty_like_x.shape)
print(empty_like_x)

zeros_like_x = torch.zeros_like(x)
print(zeros_like_x.shape)
print(zeros_like_x)

ones_like_x = torch.ones_like(x)
print(ones_like_x.shape)
print(ones_like_x)

rand_like_x = torch.rand_like(x)
print(rand_like_x.shape)
print(rand_like_x)



torch.Size([2, 2, 3])
tensor([[[3.0343e+24, 3.4713e-41, 0.0000e+00],
         [0.0000e+00, 1.4020e+18, 3.4710e-41]],

        [[1.0842e-19, 0.0000e+00, 1.0000e+00],
         [1.0000e+00, 1.0000e+00, 1.0000e+00]]])
torch.Size([2, 2, 3])
tensor([[[3.0343e+24, 3.4713e-41, 0.0000e+00],
         [0.0000e+00, 1.4020e+18, 3.4710e-41]],

        [[1.0842e-19, 0.0000e+00, 2.7096e-09],
         [4.2195e-08, 1.0606e-08, 1.2470e+16]]])
torch.Size([2, 2, 3])
tensor([[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]])
torch.Size([2, 2, 3])
tensor([[[1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.]]])
torch.Size([2, 2, 3])
tensor([[[0.3260, 0.7191, 0.1391],
         [0.8715, 0.0733, 0.9410]],

        [[0.9588, 0.9555, 0.6474],
         [0.5305, 0.0480, 0.3496]]])


In [None]:
# Maths and Logic for Tensors
# scalar addition
ones = torch.zeros(2, 2) + 1
# multiplication
twos = torch.ones(2, 2) * 2
threes = (torch.ones(2, 2) * 7 - 1) / 2
# element by element powers
fours = twos ** 2
sqrt2s = twos ** 0.5

print(ones)
print(twos)
print(threes)
print(fours)
print(sqrt2s)



tensor([[1., 1.],
        [1., 1.]])
tensor([[2., 2.],
        [2., 2.]])
tensor([[3., 3.],
        [3., 3.]])
tensor([[4., 4.],
        [4., 4.]])
tensor([[1.4142, 1.4142],
        [1.4142, 1.4142]])
tensor([[0.2145, 0.3257, 0.0873, 0.9181],
        [0.6234, 0.2417, 0.8330, 0.7578]])
tensor([[0.4290, 0.6514, 0.1747, 1.8362],
        [1.2468, 0.4835, 1.6660, 1.5157]])


In [None]:
# Tensor Broadcasting
# Broadcasting is a way to perform operations on tensors of different shapes
# the smaller tensor is broadcasted to the larger tensor's shape
# Rules (going from last to first)
# Each dimension must be equal (just conformable multiplication, boring) OR
# One of the dimensions must be 1, OR
# The dimension does not exist in one of the tensors

rand = torch.rand(2, 4)
doubled = rand * (torch.ones(1, 4) * 2)

# "multiplying" a 2 x 4 tensors with a 1 x 4

print(rand)
print(doubled)

a =     torch.ones(4, 3, 2)
print(a)
# 3rd & 2nd dims identical to a, dim 1 absent
b = a * torch.rand(   3, 2) 
print(b)

# 3rd dim = 1, 2nd dim identical to a
# operation is broadcast over every layer and row of a, s.t. every column is identical
c = a * torch.rand(   3, 1) 
print(c)

# 3rd dim identical to a, 2nd dim = 1
# operation is broadcast over every layer and col of a, s.t. every row is identical
d = a * torch.rand(   1, 2) 
print(d)

# These operations will fail
# a =     torch.ones(4, 3, 2)
# 
# b = a * torch.rand(4, 3)    # dimensions must match last-to-first
# 
# c = a * torch.rand(   2, 3) # both 3rd & 2nd dims different
# 
# d = a * torch.rand((0, ))   # can't broadcast with an empty tensor

tensor([[0.1272, 0.3320, 0.4989, 0.2165],
        [0.0294, 0.8858, 0.8213, 0.1649]])
tensor([[0.2545, 0.6640, 0.9979, 0.4330],
        [0.0589, 1.7717, 1.6426, 0.3298]])
tensor([[[1., 1.],
         [1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.],
         [1., 1.]]])
tensor([[[0.5772, 0.7683],
         [0.9139, 0.2316],
         [0.4643, 0.9097]],

        [[0.5772, 0.7683],
         [0.9139, 0.2316],
         [0.4643, 0.9097]],

        [[0.5772, 0.7683],
         [0.9139, 0.2316],
         [0.4643, 0.9097]],

        [[0.5772, 0.7683],
         [0.9139, 0.2316],
         [0.4643, 0.9097]]])
tensor([[[0.1157, 0.1157],
         [0.8757, 0.8757],
         [0.0276, 0.0276]],

        [[0.1157, 0.1157],
         [0.8757, 0.8757],
         [0.0276, 0.0276]],

        [[0.1157, 0.1157],
         [0.8757, 0.8757],
         [0.0276, 0.0276]],

        [[0.1

In [None]:
# There are many more math operations available in PyTorch
# https://pytorch.org/docs/stable/torch.html#math-operations

# Modifying a tensor in place
# The _ suffix denotes an in-place operation
# This is more efficient than creating a new tensor
a = torch.tensor([0, math.pi / 4, math.pi / 2, 3 * math.pi / 4])
print(f"a: {a}")
# note the underscore
torch.sin_(a)
# now a has changed
print(f"a: {a}")

a: tensor([0.0000, 0.7854, 1.5708, 2.3562])
a: tensor([0.0000, 0.7071, 1.0000, 0.7071])


In [None]:
# Shallow vs Deep Copying
# As is the usual case in Python, assignment only creates a new reference to the same object
# this has roots in efficient memory management
a = torch.ones(2, 2)
b = a

a[0][1] = 999

print(a)
# b is changed as well
print(b)

# to get around this, we can force a deep copy via the clone method
a = torch.ones(2, 2)
b = a.clone()
a[0][1] = 999
print(a)
print(b)

# IMPORTANT
# clone also copies over the gradient, if the tensor has one.
# this is typically the desired behaviour, but if not, you can use the .detach() method to 
# remove any gradients

tensor([[  1., 999.],
        [  1.,   1.]])
tensor([[  1., 999.],
        [  1.,   1.]])
tensor([[  1., 999.],
        [  1.,   1.]])
tensor([[1., 1.],
        [1., 1.]])


In [None]:
# Managing CPU/GPU resources
# PyTorch tensors can be moved to the GPU for faster computation
torch.accelerator.current_accelerator()

device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else torch.device('cpu')
print('Device: {}'.format(device))

x = torch.rand(2, 2, device=device)
print(x)

# very annoyingly, by default ALL tensors are created on the CPU
# unfortunately, there doesn't seem to be an elegant way to get around this
# so you will have to be clever and remember to move tensors to the GPU

Device: cuda
tensor([[0.5767, 0.0558],
        [0.8559, 0.9901]], device='cuda:0')
