In [None]:
'''
3 Core functions:

1. Tensor library  
- implements an efficient array/tensor data structure on which all its other functionality is based on.

2. Automatic differentiation engine
- one of the corner stones behind training deep neural networks. 
- PyTorch also implements standalone functions for automatic differentiation.

3. Deep learning library
- PyTorch offers various abstractions and utilities to fasciliate the implementation and training of deep neural networks.
'''

'\n3 Core functions:\n\n1. Tensor library  \n- implements an efficient array/tensor data structure on which all its other functionality is based on.\n\n2. Automatic differentiation engine\n- one of the corner stones behind training deep neural networks. \n- PyTorch also implements standalone functions for automatic differentiation.\n\n3. Deep learning library\n- PyTorch offers various abstractions and utilities to fasciliate the implementation and training of deep neural networks.\n'

In [None]:
import torch
import numpy as np
import random

random.seed(123)

## Tensors

Tensors - generalization of vectors and matrices.   
Tensors - data containers for storing multi-dimensional arrays.

scalar (rank-0 tensor) - in Python a (float) number

In [None]:
a = torch.tensor(1.)
print(a)

tensor(1.)


In [None]:
print(a.shape)

torch.Size([])


Vector (rank-1 tensor) ~ in Python list()

In [None]:
a = torch.tensor([1., 2., 3.])
print(a)

tensor([1., 2., 3.])


In [None]:
a.shape

torch.Size([3])

Matrix (rank-2 tensor)

In [None]:
a = torch.tensor([[1., 2., 3.],
                  [4., 5., 6.]])

print(a.shape)

torch.Size([2, 3])


3D Tensor (rank-2 tensor)  

Color image as a stack of matrices. Each color channel (R=red, G=green, B=blue) can be represented as a matrix. An a 3D tensor is essentially a stack of multiple matrices of the same size.

In [None]:
a = torch.tensor([[[1., 2., 3.],
                   [2., 3., 4.]],
                  [[5., 6., 7.],
                   [8., 9., 10.]]])
a.shape

torch.Size([2, 2, 3])

4D tensor (rank-4 tensor)

In [None]:
b = torch.stack((a, a))
b.shape

torch.Size([2, 2, 2, 3])

In [None]:
b

tensor([[[[ 1.,  2.,  3.],
          [ 2.,  3.,  4.]],

         [[ 5.,  6.,  7.],
          [ 8.,  9., 10.]]],


        [[[ 1.,  2.,  3.],
          [ 2.,  3.,  4.]],

         [[ 5.,  6.,  7.],
          [ 8.,  9., 10.]]]])

Tensor Library vs Array Library  

torch.tensor ~ numpy.array (very simillar)  
'+' GPU support for computation  
'+' automatic differentiation support  


Python Lists vs Arrays/Tensors
- can store heterogenous types (mix str, float etc) **vs** all elements have to have the same type (e.g. int, float)  
- elements can be easily added or removed **vs** fixed size, can not add/remove elements
- numerical computations are slow **vs** fast

## Top 10 Tensor Functions and Methods

In [None]:
# 1. Creating Tensors
m = torch.tensor([[1., 2., 3.],
                  [4., 5., 6.]])
print(m)

tensor([[1., 2., 3.],
        [4., 5., 6.]])


In [None]:
# 2. Checking the Shape
m.shape

torch.Size([2, 3])

In [None]:
# 3. Checking the Rank / Number of Dimensions
m.ndim

2

In [None]:
# 4. Checking the Data Type
m.dtype

# 32-bit precision

torch.float32

In [None]:
k = torch.tensor([[1, 2, 3],
                  [4, 5, 6]])
k.dtype

torch.int64

In [None]:
# 5. Creating a Tensor From NumPy Arrays

np_arr = np.array([1., 2., 3.])
m2 = torch.from_numpy(np_arr)   # same memory - more efficient in memory
print(m2)

# or 
m3 = torch.tensor(np_arr)   # makes copies the contents of the NumPy array into a tensor format
print(m3)

# float64 - default precision in numpy

'''
torch.from_numpy(a) creates a tensor that shares the same memory as the NumPy array. 
In terms of memory usage, this is more efficient than torch.tensor(a)
'''

tensor([1., 2., 3.], dtype=torch.float64)
tensor([1., 2., 3.], dtype=torch.float64)


In [None]:
# 6. Changing Data Types
print(m2.dtype)

m2 = m2.to(torch.float32)
print(m2.dtype)

torch.float64
torch.float32


In [None]:
# 7. Checking the Device Type - where the tensor is located
m2.device

device(type='cpu')

In [None]:
# 8. Changing the Tensor Shape
print(m)

print(m.view(3, 2))  # reshape

tensor([[1., 2., 3.],
        [4., 5., 6.]])
tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])


In [None]:
print(m.view(-1, 2))   # -1 - dimension determined automatically

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])


In [None]:
print(m.view(3, -1))   # -1 - dimension determined automatically

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])


In [None]:
# 9. Transposing a Tensor
print(m)
print(m.T)

tensor([[1., 2., 3.],
        [4., 5., 6.]])
tensor([[1., 4.],
        [2., 5.],
        [3., 6.]])


In [None]:
# 10. Multiplying Matrices
m.matmul(m.T)

tensor([[14., 32.],
        [32., 77.]])

## Linear Algebra

### Weighted Sum Calculation

In [None]:
# From For-Loops to Dot Products
b = 0.
x = [1.2, 2.2]
w = [3.3, 4.3]

output = b
for x_j, w_j in zip(x, w):
    output += x_j * w_j
    
print(output)

13.42


In [None]:
# PyTorch approach
b = torch.tensor([0.])
x = torch.tensor([1.2, 2.2]) # vector
w = torch.tensor([3.3, 4.3])

x.dot(w) + b

# more compact + much faster

tensor([13.4200])

In [None]:
def plain_python(x, w, b):
    output = b
    for x_j, w_j in zip(x, w):
        output += x_j * w_j
    return output

In [None]:
def pytorch_dot(x, w, b):
    return x.dot(w) + b

In [None]:
b = 0.
x = [random.random() for _ in range(1000)]
w = [random.random() for _ in range(1000)]

In [None]:
%timeit plain_python(x, w, b)

69.9 µs ± 12.2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
# Convert to tensors from lists
t_b = torch.tensor(b)
t_x = torch.tensor(x)
t_w = torch.tensor(w)

In [None]:
%timeit pytorch_dot(t_x, t_w, t_b)

6.12 µs ± 967 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


### Dealing with Multiple Training Examples via Matrix Multiplication

In [None]:
b = 0.
X = [[1.2, 2.2], 
     [4.4, 5.5]]
w = [3.3, 4.3]


outputs = []
for x in X:   # iterate through rows
    output = b
    for x_j, w_j in zip(x, w):  # # iterate through cols
        output += x_j * w_j    
    outputs.append(output)

outputs

[13.42, 38.17]

In [None]:
b = torch.tensor([0.])
X = torch.tensor(
   [[1.2, 2.2], 
    [4.4, 5.5]]
)
w = torch.tensor([3.3, 4.3])

X.matmul(w) + b

tensor([13.4200, 38.1700])

In [None]:
b = 0.
X = [[random.random() for _ in range(1000)] # 500 rows
     for i in range(500)]
w = [random.random() for _ in range(1000)]

In [None]:
def plain_python(X, w, b):
    outputs = []
    for x in X:
        output = b
        for x_j, w_j in zip(x, w):
            output += x_j * w_j    
        outputs.append(output)
    return outputs

In [None]:
t_b = torch.tensor(b)
t_X = torch.tensor(X)
t_w = torch.tensor(w)

In [None]:
def pytorch_implementation(X, w, b):
    return X.matmul(w) + b

In [None]:
%timeit plain_python(X, w, b)

67.4 ms ± 14.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
%timeit pytorch_implementation(t_X, t_w, t_b)

73.4 µs ± 4.62 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


### Multiplying Two Matrices

In [None]:
X = torch.rand(100, 10)
W = torch.rand(50, 10)

R = torch.matmul(X, W.T)

In [None]:
R.shape

torch.Size([100, 50])

### Broadcasting — Computations with Unequal Tensor Shapes

Another means of vectorizing operations is to use broadcasting functionality.  

Broadcasting handles arrays with different shapes during arithmetic operations.Subject to certain constraints, the smaller array is “broadcast” across the larger array so that they have compatible shapes. Broadcasting provides a means of vectorizing array operations so that looping occurs in C instead of Python. 

In [None]:
a = torch.tensor([1.1, 2.1, 3.1, 4.1])

b = torch.tensor(5.4)

a + b

tensor([6.5000, 7.5000, 8.5000, 9.5000])

In [None]:
A = torch.tensor([[1.1, 2.1, 3.1, 4.1],
                  [1.2, 2.2, 3.2, 4.2]])

b = torch.tensor([5.4, 5.5, 5.6, 5.7])

A + b

tensor([[6.5000, 7.6000, 8.7000, 9.8000],
        [6.6000, 7.7000, 8.8000, 9.9000]])

## Debugging with PDB


In [None]:
b = 0.
X = [[random.random() for _ in range(1000)] # 500 rows
     for i in range(500)]
w = [random.random() for _ in range(1000)]


X[10][10] = 'a'

In [None]:
def my_func(X, w, b):
    outputs = []
    for j, x in enumerate(X):
        output = b
        for i, (x_j, w_j) in enumerate(zip(x, w)):
            #import pdb; pdb.set_trace()
            try:
                output += x_j * w_j
            except:
                import pdb; pdb.post_mortem()
        outputs.append(output)
    return outputs

In [None]:
r = my_func(X, w, b)

> [0;32m<ipython-input-4-bf7575058cf7>[0m(8)[0;36mmy_func[0;34m()[0m
[0;32m      6 [0;31m            [0;31m#import pdb; pdb.set_trace()[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      7 [0;31m            [0;32mtry[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m----> 8 [0;31m                [0moutput[0m [0;34m+=[0m [0mx_j[0m [0;34m*[0m [0mw_j[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      9 [0;31m            [0;32mexcept[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     10 [0;31m                [0;32mimport[0m [0mpdb[0m[0;34m;[0m [0mpdb[0m[0;34m.[0m[0mpost_mortem[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> p w_j
0.10190626841302075
ipdb> p x_j
'a'
ipdb> i
10
ipdb> j
*** The 'jump' command requires a line number
ipdb> p j
10
ipdb> exit



sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/lib/python3.10/bdb.py", line 361, in set_quit
    sys.settrace(None)

