In [None]:
import torch
import math

**PyTorch Tensors**: Tensors are the central data abstraction in PyTorch.


**Creating Tensors**: The simplest way to create a tensor is with the torch.empty() call:

In [None]:
x = torch.empty(3, 4)
print(type(x))
print(x)
#The torch.empty() call allocates memory for the tensor, but does not initialize it with any values.

<class 'torch.Tensor'>
tensor([[3.4578e-06, 4.3198e-41, 3.4578e-06, 4.3198e-41],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]])


In [None]:
#More often than not, you’ll want to initialize your tensor with some value.
#Common cases are all zeros, all ones, or random values,
zeros = torch.zeros(2, 3)
print(zeros)

ones = torch.ones(2, 3)
print(ones)

torch.manual_seed(1729)
random = torch.rand(2, 3)
print(random)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.3126, 0.3791, 0.3087],
        [0.0736, 0.4216, 0.0691]])


In [None]:
#We call the .empty_like(), .zeros_like(), .ones_like(), and .rand_like() methods.
#Using the .shape property, we can verify that each of these methods returns a tensor of identical dimensionality and extent.
x = torch.empty(2, 2, 3)
print(x.shape)
print(x)

empty_like_x = torch.empty_like(x)
print(empty_like_x.shape)
print(empty_like_x)

zeros_like_x = torch.zeros_like(x)
print(zeros_like_x.shape)
print(zeros_like_x)

ones_like_x = torch.ones_like(x)
print(ones_like_x.shape)
print(ones_like_x)

rand_like_x = torch.rand_like(x)
print(rand_like_x.shape)
print(rand_like_x)

torch.Size([2, 2, 3])
tensor([[[-4.4341e+09,  3.3448e-41, -4.4163e+09],
         [ 3.3448e-41,  1.3563e-19,  1.1632e+33]],

        [[ 1.6529e+19,  1.6676e+19,  7.1450e+31],
         [ 3.9173e-02,  1.0658e-32,  1.3563e-19]]])
torch.Size([2, 2, 3])
tensor([[[-4.4427e+09,  3.3448e-41, -4.4366e+09],
         [ 3.3448e-41,  7.0065e-45,  7.0065e-45]],

        [[ 1.4013e-45,  0.0000e+00, -4.4426e+09],
         [ 3.3448e-41, -4.4426e+09,  3.3448e-41]]])
torch.Size([2, 2, 3])
tensor([[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]])
torch.Size([2, 2, 3])
tensor([[[1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.]]])
torch.Size([2, 2, 3])
tensor([[[0.2332, 0.4047, 0.2162],
         [0.9927, 0.4128, 0.5938]],

        [[0.6128, 0.1519, 0.0453],
         [0.5035, 0.9978, 0.3884]]])


**Tensor Data Types**:
Available data types include:

torch.bool

torch.int8

torch.uint8

torch.int16

torch.int32

torch.int64

torch.half

torch.float

torch.double

torch.bfloat
Setting the datatype of a tensor is possible a couple of ways:

In [None]:
#1. The simplest way to set the underlying data type of a tensor is with an optional argument at creation time.
a = torch.ones((2, 3), dtype=torch.int16)
print(a)

b = torch.rand((2, 3), dtype=torch.float64) * 20.
print(b)
# 2. The other way to set the datatype is with the .to() method.
c = b.to(torch.int32)
print(c)

tensor([[1, 1, 1],
        [1, 1, 1]], dtype=torch.int16)
tensor([[10.8626,  2.1505, 19.6913],
        [ 0.9956,  1.4148,  5.8364]], dtype=torch.float64)
tensor([[10,  2, 19],
        [ 0,  1,  5]], dtype=torch.int32)


**Math & Logic with PyTorch Tensors**

In [None]:
#1. Airthmetic with scalars
ones = torch.zeros(2, 2) + 1
twos = torch.ones(2, 2) * 2
threes = (torch.ones(2, 2) * 7 - 1) / 2
fours = twos ** 2
sqrt2s = twos ** 0.5

print(ones)
print(twos)
print(threes)
print(fours)
print(sqrt2s)

tensor([[1., 1.],
        [1., 1.]])
tensor([[2., 2.],
        [2., 2.]])
tensor([[3., 3.],
        [3., 3.]])
tensor([[4., 4.],
        [4., 4.]])
tensor([[1.4142, 1.4142],
        [1.4142, 1.4142]])


In [None]:
#2. Similar operations between two tensors
powers2 = twos ** torch.tensor([[1, 2], [3, 4]])
print(powers2)

fives = ones + fours
print(fives)

dozens = threes * fours
print(dozens)

tensor([[ 2.,  4.],
        [ 8., 16.]])
tensor([[5., 5.],
        [5., 5.]])
tensor([[12., 12.],
        [12., 12.]])


In [None]:
#3. Tensor Broadcasting
# Same rule as broadcasting
rand = torch.rand(2, 4)
doubled = rand * (torch.ones(1, 4) * 2)

print(rand)
print(doubled)


tensor([[0.0703, 0.5105, 0.9451, 0.2359],
        [0.1979, 0.3327, 0.6146, 0.5999]])
tensor([[0.1405, 1.0210, 1.8901, 0.4717],
        [0.3959, 0.6655, 1.2291, 1.1998]])


The rules for broadcasting are:

Each tensor must have at least one dimension - no empty tensors.

Comparing the dimension sizes of the two tensors, going from last to first:

Each dimension must be equal, or

One of the dimensions must be of size 1, or

The dimension does not exist in one of the tensors

In [None]:
a =     torch.ones(4, 3, 2)

b = a * torch.rand(   3, 2) # 3rd & 2nd dims identical to a, dim 1 absent
print(b)

c = a * torch.rand(   3, 1) # 3rd dim = 1, 2nd dim identical to a
print(c)

d = a * torch.rand(   1, 2) # 3rd dim identical to a, 2nd dim = 1
print(d)

tensor([[[0.5013, 0.9397],
         [0.8656, 0.5207],
         [0.6865, 0.3614]],

        [[0.5013, 0.9397],
         [0.8656, 0.5207],
         [0.6865, 0.3614]],

        [[0.5013, 0.9397],
         [0.8656, 0.5207],
         [0.6865, 0.3614]],

        [[0.5013, 0.9397],
         [0.8656, 0.5207],
         [0.6865, 0.3614]]])
tensor([[[0.6493, 0.6493],
         [0.2633, 0.2633],
         [0.4762, 0.4762]],

        [[0.6493, 0.6493],
         [0.2633, 0.2633],
         [0.4762, 0.4762]],

        [[0.6493, 0.6493],
         [0.2633, 0.2633],
         [0.4762, 0.4762]],

        [[0.6493, 0.6493],
         [0.2633, 0.2633],
         [0.4762, 0.4762]]])
tensor([[[0.0548, 0.2024],
         [0.0548, 0.2024],
         [0.0548, 0.2024]],

        [[0.0548, 0.2024],
         [0.0548, 0.2024],
         [0.0548, 0.2024]],

        [[0.0548, 0.2024],
         [0.0548, 0.2024],
         [0.0548, 0.2024]],

        [[0.0548, 0.2024],
         [0.0548, 0.2024],
         [0.0548, 0.2024]]])


In [None]:
a =     torch.ones(4, 3, 2)

b = a * torch.rand(4, 3)    # dimensions must match last-to-first

c = a * torch.rand(   2, 3) # both 3rd & 2nd dims different

d = a * torch.rand((0, ))   # can't broadcast with an empty tensor

RuntimeError: The size of tensor a (2) must match the size of tensor b (3) at non-singleton dimension 2

**More Mathematical Functions**

In [None]:
# common functions
a = torch.rand(2, 4) * 2 - 1
print('Common functions:')
print(torch.abs(a))
print(torch.ceil(a))
print(torch.floor(a))
print(torch.clamp(a, -0.5, 0.5))

Common functions:
tensor([[0.9755, 0.9295, 0.8190, 0.1029],
        [0.7480, 0.4949, 0.3846, 0.5091]])
tensor([[1., -0., -0., -0.],
        [1., -0., 1., 1.]])
tensor([[ 0., -1., -1., -1.],
        [ 0., -1.,  0.,  0.]])
tensor([[ 0.5000, -0.5000, -0.5000, -0.1029],
        [ 0.5000, -0.4949,  0.3846,  0.5000]])


In [None]:
# trigonometric functions and their inverses
angles = torch.tensor([0, math.pi / 4, math.pi / 2, 3 * math.pi / 4])
sines = torch.sin(angles)
inverses = torch.asin(sines)
print('\nSine and arcsine:')
print(angles)
print(sines)
print(inverses)


Sine and arcsine:
tensor([0.0000, 0.7854, 1.5708, 2.3562])
tensor([0.0000, 0.7071, 1.0000, 0.7071])
tensor([0.0000, 0.7854, 1.5708, 0.7854])


In [None]:
# bitwise operations
print('\nBitwise XOR:')
b = torch.tensor([1, 5, 11])
c = torch.tensor([2, 7, 10])
print(torch.bitwise_xor(b, c))

# comparisons:
print('\nBroadcasted, element-wise equality comparison:')
d = torch.tensor([[1., 2.], [3., 4.]])
e = torch.ones(1, 2)  # many comparison ops support broadcasting!
print(torch.eq(d, e)) # returns a tensor of type bool



Bitwise XOR:
tensor([3, 2, 1])

Broadcasted, element-wise equality comparison:
tensor([[ True, False],
        [False, False]])


In [None]:
# reductions:
print('\nReduction ops:')
print(torch.max(d))        # returns a single-element tensor
print(torch.max(d).item()) # extracts the value from the returned tensor
print(torch.mean(d))       # average
print(torch.std(d))        # standard deviation
print(torch.prod(d))       # product of all numbers
print(torch.unique(torch.tensor([1, 2, 1, 2, 1, 2]))) # filter unique elements


Reduction ops:
tensor(4.)
4.0
tensor(2.5000)
tensor(1.2910)
tensor(24.)
tensor([1, 2])


In [None]:
# vector and linear algebra operations
v1 = torch.tensor([1., 0., 0.])         # x unit vector
v2 = torch.tensor([0., 1., 0.])         # y unit vector
m1 = torch.rand(2, 2)                   # random matrix
m2 = torch.tensor([[3., 0.], [0., 3.]]) # three times identity matrix

print('\nVectors & Matrices:')
print(torch.cross(v2, v1)) # negative of z unit vector (v1 x v2 == -v2 x v1)
print(m1)
m3 = torch.matmul(m1, m2)
print(m3)                  # 3 times m1
print(torch.svd(m3))       # singular value decomposition


Vectors & Matrices:
tensor([ 0.,  0., -1.])
tensor([[0.7746, 0.2330],
        [0.8441, 0.9004]])
tensor([[2.3237, 0.6990],
        [2.5322, 2.7011]])
torch.return_types.svd(
U=tensor([[-0.5247, -0.8513],
        [-0.8513,  0.5247]]),
S=tensor([4.3010, 1.0478]),
V=tensor([[-0.7847, -0.6199],
        [-0.6199,  0.7847]]))


**Altering Tensors in Place**:Most binary operations on tensors will return a third, new tensor. When we say c = a * b (where a and b are tensors), the new tensor c will occupy a region of memory distinct from the other tensors.

There are times, though, that you may wish to alter a tensor in place - for example, if you’re doing an element-wise computation where you can discard intermediate values. For this, most of the math functions have a version with an appended underscore (_) that will alter a tensor in place.

In [None]:
a = torch.tensor([0, math.pi / 4, math.pi / 2, 3 * math.pi / 4])
print('a:')
print(a)
print(torch.sin(a))   # this operation creates a new tensor in memory
print(a)              # a has not changed

b = torch.tensor([0, math.pi / 4, math.pi / 2, 3 * math.pi / 4])
print('\nb:')
print(b)
print(torch.sin_(b))  # note the underscore
print(b)              # b has changed

a:
tensor([0.0000, 0.7854, 1.5708, 2.3562])
tensor([0.0000, 0.7071, 1.0000, 0.7071])
tensor([0.0000, 0.7854, 1.5708, 2.3562])

b:
tensor([0.0000, 0.7854, 1.5708, 2.3562])
tensor([0.0000, 0.7071, 1.0000, 0.7071])
tensor([0.0000, 0.7071, 1.0000, 0.7071])


In [None]:
#For arithmetic operations, there are functions that behave similarly:
a = torch.ones(2, 2)
b = torch.rand(2, 2)

print('Before:')
print(a)
print(b)
print('\nAfter adding:')
print(a.add_(b))
print(a)
print(b)
print('\nAfter multiplying')
print(b.mul_(b))
print(b)

Before:
tensor([[1., 1.],
        [1., 1.]])
tensor([[0.3995, 0.6324],
        [0.9464, 0.0113]])

After adding:
tensor([[1.3995, 1.6324],
        [1.9464, 1.0113]])
tensor([[1.3995, 1.6324],
        [1.9464, 1.0113]])
tensor([[0.3995, 0.6324],
        [0.9464, 0.0113]])

After multiplying
tensor([[1.5959e-01, 3.9991e-01],
        [8.9568e-01, 1.2838e-04]])
tensor([[1.5959e-01, 3.9991e-01],
        [8.9568e-01, 1.2838e-04]])



**Moving to GPU**:One of the major advantages of PyTorch is its robust acceleration on CUDA-compatible Nvidia GPUs. (“CUDA” stands for Compute Unified Device Architecture, which is Nvidia’s platform for parallel computing.) So far, everything we’ve done has been on CPU. How do we move to the faster hardware?



In [None]:
#First, we should check whether a GPU is available, with the is_available() method.
if torch.cuda.is_available():
    print('We have a GPU!')
else:
    print('Sorry, CPU only.')

Sorry, CPU only.


Once we’ve determined that one or more GPUs is available, we need to put our data someplace where the GPU can see it. Your CPU does computation on data in your computer’s RAM. Your GPU has dedicated memory attached to it. Whenever you want to perform a computation on a device, you must move all the data needed for that computation to memory accessible by that device. (Colloquially, “moving the data to memory accessible by the GPU” is shorted to, “moving the data to the GPU”.)

In [None]:
#There are multiple ways to get your data onto your target device. You may do it at creation time:
if torch.cuda.is_available():
    gpu_rand = torch.rand(2, 2, device='cuda')
    print(gpu_rand)
else:
    print('Sorry, CPU only.')
#You can query the number of GPUs with torch.cuda.device_count().
#If you have more than one GPU, you can specify them by index: device='cuda:0', device='cuda:1', etc.

Sorry, CPU only.


As a coding practice, specifying our devices everywhere with string constants is pretty fragile. In an ideal world, your code would perform robustly whether you’re on CPU or GPU hardware. You can do this by creating a device handle that can be passed to your tensors instead of a string:

In [None]:
if torch.cuda.is_available():
    my_device = torch.device('cuda')
else:
    my_device = torch.device('cpu')
print('Device: {}'.format(my_device))

x = torch.rand(2, 2, device=my_device)
print(x)

Device: cpu
tensor([[0.3449, 0.1340],
        [0.4216, 0.1073]])


In [None]:
#If you have an existing tensor living on one device, you can move it to another with the to() method.
y = torch.rand(2, 2)
y = y.to(my_device)

In [None]:
#It is important to know that in order
#to do computation involving two or more tensors, all of the tensors must be on the same device.
x = torch.rand(2, 2)
y = torch.rand(2, 2, device='gpu')
z = x + y  # exception will be thrown

RuntimeError: Expected one of cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia, privateuseone device type at start of device string: gpu