In [2]:
import torch
from torch.nn import functional as F
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### create a tensor

In [3]:
tensor = torch.tensor([[0.1, 0.2], [0.2, 0.4], [0.1, 0.5]])
tensor

tensor([[0.1000, 0.2000],
        [0.2000, 0.4000],
        [0.1000, 0.5000]])

### randint

In [4]:
tensor = torch.randint(-100, 100, (5,))
tensor

tensor([ 74,  25, -80, -66, -90])

### zeros

In [5]:
zeros = torch.zeros(2,3)
zeros

tensor([[0., 0., 0.],
        [0., 0., 0.]])

### ones

In [6]:
ones = torch.ones(3,4)
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

### empty

In [7]:
input = torch.empty(2,3)
input

tensor([[0.0000e+00, 3.0787e-41, 7.7052e+31],
        [7.2148e+22, 1.5766e-19, 1.0256e-08]])

### arange

In [8]:
arange = torch.arange(5)
arange

tensor([0, 1, 2, 3, 4])

### linespace

In [9]:
linspace = torch.linspace(3, 10, steps=5)
linspace

tensor([ 3.0000,  4.7500,  6.5000,  8.2500, 10.0000])

### logspace

In [10]:
logspace = torch.logspace(-10, 10, 5)
logspace

tensor([1.0000e-10, 1.0000e-05, 1.0000e+00, 1.0000e+05, 1.0000e+10])

### eye

In [11]:
eye = torch.eye(5)
eye

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

### empty_like

In [12]:
a = torch.empty((2,3), dtype=torch.int64)
empty_like = torch.empty_like(a)
empty_like

tensor([[              0,               0,  94361062353792],
        [ 94361032476320, 140402811780880,               1]])

### multinomial

In [13]:
probabilities = torch.tensor([0.1, 0.9])
samples = torch.multinomial(probabilities, num_samples=10, replacement=True)
samples

tensor([1, 1, 1, 1, 1, 0, 1, 1, 1, 1])

### concetenate

In [14]:
tensor = torch.tensor([0.1, 0.2, 0.3, 0.4])
out = torch.cat((tensor, torch.tensor([0.5])), dim=0)
out

tensor([0.1000, 0.2000, 0.3000, 0.4000, 0.5000])

### tril

In [15]:
out = torch.tril(torch.ones(5,5))
out

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

### triu

In [16]:
out = torch.triu(torch.ones(5,5))
out

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

### masked_fill

In [17]:
out = torch.zeros(5,5).masked_fill(torch.tril(torch.ones(5,5)) == 0, float('-inf'))
out

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

### exponentiate

In [18]:
torch.exp(out)

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

### transpose

In [19]:
input = torch.zeros(2,3,4)
out = input.transpose(0,2)
out.shape

torch.Size([4, 3, 2])

### stack

In [20]:
tensor1 = torch.tensor([1,2,3])
tensor2 = torch.tensor([2,3,4])
tensor3 = torch.tensor([3,4,5])

stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor

tensor([[1, 2, 3],
        [2, 3, 4],
        [3, 4, 5]])

## linear

In [21]:
import torch.nn as nn

sample = torch.tensor([0.1, 0.2, 0.3])
linear = nn.Linear(3,3,bias=False)
linear(sample)

tensor([ 0.0286,  0.1155, -0.0533], grad_fn=<SqueezeBackward4>)

### softmax (exponentiate / sum) (normalization!)

In [22]:
import torch.nn.functional as F

tensor1 = torch.tensor([1.0,2.0,3.0])
softmax_output = F.softmax(tensor1, dim=0)
softmax_output

tensor([0.0900, 0.2447, 0.6652])

### embeddings

In [23]:
vocab_size = 1000
embedding_dim = 100
embedding = nn.Embedding(vocab_size, embedding_dim)

input_indices = torch.LongTensor([1,5,3,2])
embedded_output = embedding(input_indices)
embedded_output.shape

torch.Size([4, 100])

### matmul ( x @ y )

In [24]:
a = torch.tensor([[1,2],[3,4],[5,6]])
b = torch.tensor([[7,8,9],[10,11,12]])

torch.matmul(a,b) # a @ b

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

In [25]:
# int_64 = torch.randint(1,(3,2)) # int64
# float_32 = torch.rand(2,3) # float32

# result = torch.matmul(int_64, float_32)
# result # Error

int_64_casted = torch.randint(1,(3,2)).float()
float_32 = torch.rand(2,3)

int_64_casted @ float_32


tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [26]:
a = torch.rand(2,3,5)
x,y,z = a.shape
a = a.view(x,y,z)
a.shape

torch.Size([2, 3, 5])

### ReLU
f(x) = max(0, x)
Any negative input will be set to zero, positice inputs remain unchanged. Introduces non-linearity to the model, useful for learning complex relationships.

In [34]:
m = nn.ReLU()
input = torch.randn(2)
output = m(input)
output

tensor([0.1122, 0.4796])

### Sigmoid
f(x) = 1 / (1 + e^(-x))
Maps any input value to a value between 0 and 1. Useful for modeling probabilities.

In [33]:
x = torch.tensor([-0.05], dtype=torch.float32)
y = F.sigmoid(x)
y

tensor([0.4875])

### Tanh
f(x) = (e^x - e^(-x)) / (e^x + e^(-x))
Hyperbolic tangent function. Maps any input value to a value between -1 and 1. Useful for modeling non-linear relationships.

In [35]:
m = nn.Tanh()
input = torch.randn(2)
output = m(input)
output

tensor([ 0.3187, -0.2604])