In [38]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import numpy as np
import time

# important to node cuda is not M1 compatible, instead we use mps
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
print(device)

mps


In [39]:
%%time
start_time = time.time()
# matrix operations here
zeros = torch.zeros(1, 1)
end_time = time.time()

elapsed_time = end_time - start_time
# number following the colon denotes # of decimal places our result will show
print(f"{elapsed_time:.8f}")

0.00133705
CPU times: user 1.13 ms, sys: 1.51 ms, total: 2.64 ms
Wall time: 1.56 ms


In [40]:
torch_rand1 = torch.rand(100, 100, 100, 100).to(device)
torch_rand2 = torch.rand(100, 100, 100, 100).to(device)
np_rand1 = torch.rand(100, 100, 100, 100)
np_rand2 = torch.rand(100, 100, 100, 100)

# torch using GPU
start_time = time.time()

rand = (torch_rand1 @ torch_rand2) # use @ to multiply matrices in pytorch

end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

# numpy using CPU
start_time = time.time()

rand = np.multiply(np_rand1, np_rand2)

end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

0.01102209
0.24082708


  rand = np.multiply(np_rand1, np_rand2)


In [41]:
# torch.stack, torch.multinomial, torch.tril, torch.triu, input.T / input.transpose, nn.linear, torch.cat, F.softmax
# show all the examples of functions/methods with pytorch docs

# define a probability tensor
probabilities = torch.tensor([0.1, 0.9])
# 10% 0r 0.1 => 0, 90%or 0.9 => 1. each probability points to the index of the probability in the tensor
# draw 5 examples from the multinomial distribution
samples = torch.multinomial(probabilities, num_samples=10, replacement=True)
samples

# we will use this for text probability and prediction

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [42]:
tensor = torch.tensor([1, 2, 3, 4])
out = torch.cat((tensor, torch.tensor([5])), dim=0)
out

# we will use this when we are generating text given a context

tensor([1, 2, 3, 4, 5])

In [43]:
# tril ~ triangle lower
out = torch.tril(torch.ones(5, 5))
out

# we will use this to represent history/context (1s) and future predictions (0s)

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [44]:
# triu ~ triangle upper
out = torch.triu(torch.ones(5, 5))
out

# same concept as above but reversed

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [45]:
out = torch.zeros(5, 5).masked_fill(torch.tril(torch.ones(5, 5)) == 0, float('-inf'))
out

# exponentiating these elements will get us to the point above, 0 becomes 1 and -inf becomes 0

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

In [46]:
torch.exp(out)

# as described in the above cell

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [47]:
input = torch.zeros(2, 3, 4)
out = input.transpose(0, 2)
out.shape

# this swaps the elements of the 0th index and the 2nd index

torch.Size([4, 3, 2])

In [48]:
tensor1 = torch.tensor([1, 2, 3])
tensor2 = torch.tensor([4, 5, 6])
tensor3 = torch.tensor([7, 8, 9])

# stack the tensors along a new dimension
stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor

# we will use this with our blocks to make batches

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [49]:
import torch.nn as nn

sample = torch.tensor([10., 10., 10.])
linear = nn.Linear(3, 3, bias=False) # linear transformation
print(linear(sample))

# we will use these for training and parameter selection

tensor([-0.6283, -3.3214, -1.7670], grad_fn=<SqueezeBackward4>)


In [50]:
import torch.nn.functional as F

# create a tensor
tensor1 = torch.tensor([1.0, 2.0, 3.0])

# apply softmax using torch.nn.function.softmax()
softmax_output = F.softmax(tensor1, dim=0)

print(softmax_output)

tensor([0.0900, 0.2447, 0.6652])


In [51]:
# initialize an embedding layer
vocab_size = 1000
embedding_dim = 100
embedding = nn.Embedding(vocab_size, embedding_dim)

# create some input indices
input_indices = torch.LongTensor([1, 5, 3, 2])

# apply the embedding layer
embedded_output = embedding(input_indices)

# the output will be a tensor of shape (4, 100), where 4 is the number of inputs
# and 100 is the dimensionality of the embedding vectors
print(embedded_output.shape)

torch.Size([4, 100])


In [52]:
a = torch.tensor([[1, 2], [3, 4], [5, 6]])
b = torch.tensor([[7, 8, 9], [10, 11, 12]])
print(a @ b)
print(torch.matmul(a, b))

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])
tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


In [53]:
# type int64
int64 = torch.randint(1, (3, 2)).float()
print(int64)

# type float32
float32 = torch.rand(2, 3)
print(float32)

result = torch.matmul(int64, float32)
print(result)

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.5060, 0.2227, 0.6172],
        [0.8846, 0.8201, 0.8661]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [54]:
a = torch.rand(2, 3, 5)
x, y, z = a.shape
a = a.view(x, y, z)
print(a.shape)

torch.Size([2, 3, 5])


In [57]:
input = torch.rand((4, 8, 10))
B, T, C = input.shape
output = input.view(B*T, C)
print(output)

tensor([[0.7619, 0.5314, 0.3582, 0.0539, 0.8084, 0.6856, 0.3212, 0.9076, 0.5600,
         0.2982],
        [0.2797, 0.8773, 0.8572, 0.0230, 0.0947, 0.3313, 0.8494, 0.4594, 0.6813,
         0.4347],
        [0.5512, 0.3078, 0.7862, 0.2602, 0.4782, 0.0165, 0.5352, 0.8503, 0.9971,
         0.5553],
        [0.0164, 0.8575, 0.8180, 0.9545, 0.8254, 0.0084, 0.0653, 0.3390, 0.7389,
         0.3482],
        [0.5612, 0.8002, 0.5717, 0.8086, 0.3394, 0.3796, 0.2710, 0.6163, 0.8348,
         0.6676],
        [0.4080, 0.0220, 0.5716, 0.4639, 0.1927, 0.8059, 0.8064, 0.0735, 0.8600,
         0.3534],
        [0.2942, 0.6435, 0.3932, 0.2838, 0.8932, 0.4533, 0.2948, 0.1759, 0.5860,
         0.7337],
        [0.4874, 0.4482, 0.6619, 0.4689, 0.7296, 0.9578, 0.9322, 0.9309, 0.1110,
         0.4954],
        [0.9930, 0.8780, 0.0415, 0.0237, 0.7410, 0.1854, 0.5681, 0.4599, 0.8085,
         0.9338],
        [0.1778, 0.8424, 0.7371, 0.6263, 0.4157, 0.9547, 0.6770, 0.1393, 0.7189,
         0.8767],
        [0