In [22]:
import torch
import time
import numpy as np
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

### Using CPU

In [4]:
randint = torch.randint(-100, 100, (6,))
randint

tensor([  69,   38,   94, -100,   -8,  -77])

In [5]:
tensor = torch.tensor([[0.1, 0.2], [2.2, 3.1], [3.4, 5.6]])
tensor

tensor([[0.1000, 0.2000],
        [2.2000, 3.1000],
        [3.4000, 5.6000]])

In [7]:
zeros = torch.zeros(2,4)
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [8]:
ones = torch.ones(5,6)
ones

tensor([[1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.]])

In [12]:
input = torch.empty(3,4)
input

tensor([[-2.1837e-11,  1.3719e-42,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]])

In [14]:
arange = torch.arange(5)
arange

tensor([0, 1, 2, 3, 4])

In [15]:
linspace = torch.linspace(3, 10, steps=4)
linspace

tensor([ 3.0000,  5.3333,  7.6667, 10.0000])

In [17]:
logspace = torch.logspace(start=-10, end=10, steps=5)
logspace

tensor([1.0000e-10, 1.0000e-05, 1.0000e+00, 1.0000e+05, 1.0000e+10])

In [18]:
eye = torch.eye(5)
eye

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [19]:
a = torch.empty((2,3), dtype=torch.int64)
empty = torch.empty_like(a)
empty

tensor([[0, 0, 0],
        [0, 0, 0]])

### Using GPU

In [32]:
%%time

start_time = time.time()

zeros = torch.zeros(1,1)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

0.00000000
CPU times: total: 0 ns
Wall time: 0 ns


In [53]:
torch_rand1 = torch.rand(100, 100, 100, 100).to(device)
torch_rand2 = torch.rand(100, 100, 100, 100).to(device)

np_rand1 = torch.rand(100, 100, 100, 100)
np_rand2 = torch.rand(100, 100, 100, 100)

start_time = time.time()
rand = (torch_rand1 @ torch_rand2)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

start_time = time.time()
rand = (np_rand1 @ np_rand2)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

0.00000000
0.07679486


In [51]:
# embeddings, torch.stack, torch.multinomial, torch.tril, torch.triu, input.T / input.transpose, nn.Linear, torch.cat, F.softmax (show all the examples of functions/methods with pytorch docs)


# Define a probability tensor
probabilities = torch.tensor([0.1, 0.9])
# 10% or 0.1 => 0, 90% or 0.9 => 1. each probability points to the index of the probability in the tensor
# Draw 5 samples from the multinomial distribution
samples = torch.multinomial(probabilities, num_samples=10, replacement=True)
print(samples)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])


In [34]:
tensor = torch.tensor([1, 2, 3, 4])
out = torch.cat((tensor, torch.tensor([5])), dim=0)
out

tensor([1, 2, 3, 4, 5])

In [35]:
out = torch.tril(torch.ones(5, 5))
out

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [36]:
out = torch.triu(torch.ones(5, 5))
out

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [37]:
out = torch.zeros(5, 5).masked_fill(torch.tril(torch.ones(5, 5)) == 0, float('-inf'))
out

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

In [38]:
torch.exp(out)

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [39]:
input = torch.zeros(2, 3, 4)
out1 = input.transpose(0, 1)
out2 = input.transpose(-2,-1)
print(out1.shape)
print(out2.shape)
# torch.permute works the same but you provide the new order of dimensions instead of the dimensions you'd like to swap.

torch.Size([3, 2, 4])
torch.Size([2, 4, 3])


In [40]:
tensor1 = torch.tensor([1, 2, 3])
tensor2 = torch.tensor([4, 5, 6])
tensor3 = torch.tensor([7, 8, 9])

# Stack the tensors along a new dimension
stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [41]:
import torch.nn as nn
sample = torch.tensor([10.,10.,10.])
linear = nn.Linear(3, 3, bias=False)
print(linear(sample))

tensor([5.8387, 6.0255, 0.6137], grad_fn=<SqueezeBackward4>)


In [42]:
import torch.nn.functional as F

# Create a tensor
tensor1 = torch.tensor([1.0, 2.0, 3.0])

# Apply softmax using torch.nn.functional.softmax()
softmax_output = F.softmax(tensor1, dim=0)

print(softmax_output)

tensor([0.0900, 0.2447, 0.6652])


In [43]:
# Initialize an embedding layer
vocab_size = 80
embedding_dim = 6
embedding = nn.Embedding(vocab_size, embedding_dim)

# Create some input indices
input_indices = torch.LongTensor([1, 5, 3, 2])

# Apply the embedding layer
embedded_output = embedding(input_indices)

# The output will be a tensor of shape (4, 100), where 4 is the number of inputs
# and 100 is the dimensionality of the embedding vectors
print(embedded_output.shape)
print(embedded_output)

torch.Size([4, 6])
tensor([[-0.0342, -0.5610, -0.1342, -1.0413,  0.4674, -0.3756],
        [ 1.3363,  0.9289,  1.7737,  0.4407, -0.6064,  0.8697],
        [ 1.2897, -0.0320,  0.4434, -1.1364,  1.4955, -0.9158],
        [ 0.6246,  0.8570,  0.9902, -0.4887, -1.1813, -0.3814]],
       grad_fn=<EmbeddingBackward0>)


In [44]:
a = torch.tensor([[1,2],[3,4],[5,6]])
b = torch.tensor([[7,8,9],[10,11,12]])
# print(a @ b)
print(torch.matmul(a, b))

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


In [45]:
int_64 = torch.randint(1, (3, 2)).float()
#type int64
float_32 = torch.rand(2,3)
#type float32
# print(int_64.dtype, float_32.dtype)
result = torch.matmul(int_64, float_32)
print(result)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [46]:
a = torch.rand(2, 3, 5)
print(a.shape)
x, y, z = a.shape
a = a.view(x,y,z)
# print(x, y, z)
print(a.shape)

torch.Size([2, 3, 5])
torch.Size([2, 3, 5])


In [47]:
input = torch.rand((4, 8, 10))
B, T, C = input.shape
output = input.view(B*T, C)
print(output)
# print(input)
print(output[:, -1, :])

tensor([[0.0380, 0.6446, 0.3043, 0.1192, 0.0034, 0.7705, 0.4017, 0.9776, 0.8991,
         0.7471],
        [0.3170, 0.6393, 0.9079, 0.5486, 0.6373, 0.3036, 0.1951, 0.9936, 0.4114,
         0.0867],
        [0.0610, 0.6969, 0.9511, 0.3646, 0.0396, 0.6634, 0.4220, 0.7266, 0.2512,
         0.4025],
        [0.8012, 0.2910, 0.5102, 0.5836, 0.1949, 0.9349, 0.8421, 0.5095, 0.1934,
         0.3128],
        [0.4647, 0.8768, 0.5804, 0.2791, 0.5018, 0.7237, 0.7678, 0.1322, 0.7692,
         0.5916],
        [0.4112, 0.2615, 0.7308, 0.1337, 0.5127, 0.4604, 0.3065, 0.2358, 0.4960,
         0.2089],
        [0.6438, 0.0485, 0.8246, 0.0017, 0.1391, 0.6700, 0.9416, 0.6678, 0.8761,
         0.6230],
        [0.1801, 0.1604, 0.4700, 0.1760, 0.0309, 0.7841, 0.2895, 0.2754, 0.5312,
         0.1033],
        [0.4730, 0.1699, 0.9658, 0.8737, 0.3037, 0.4603, 0.4809, 0.8128, 0.8902,
         0.4238],
        [0.5983, 0.8379, 0.4862, 0.4985, 0.6754, 0.8206, 0.7204, 0.4520, 0.9384,
         0.7762],
        [0

IndexError: too many indices for tensor of dimension 2

In [48]:
x = torch.tensor([10], dtype=torch.float32)
y = F.tanh(x)
print(y)

tensor([1.])
