<a href="https://colab.research.google.com/github/tteodorescu0/M4ML/blob/main/PyTorch_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn

# Import pprint, module we use for making our print statements prettier
import pprint
pp = pprint.PrettyPrinter()

In [None]:
# Initialize a tensor from a Python List
data = [
        [0, 1], 
        [2, 3],
        [4, 5]
       ]
x_python = torch.tensor(data)

# Print the tensor
x_python

tensor([[0, 1],
        [2, 3],
        [4, 5]])

In [None]:
# We are using the dtype to create a tensor of particular type
x_float = torch.tensor(data, dtype=torch.float)
x_float

tensor([[0., 1.],
        [2., 3.],
        [4., 5.]])

In [None]:
# We are using the dtype to create a tensor of particular type
x_bool = torch.tensor(data, dtype=torch.bool)
x_bool

tensor([[False,  True],
        [ True,  True],
        [ True,  True]])

In [None]:
x_python.float()

tensor([[0., 1.],
        [2., 3.],
        [4., 5.]])

In [None]:
# `torch.Tensor` defaults to float
# Same as torch.FloatTensor(data)
x = torch.Tensor(data) 
x

tensor([[0., 1.],
        [2., 3.],
        [4., 5.]])

In [None]:
import numpy as np

# Initialize a tensor from a NumPy array
ndarray = np.array(data)
x_numpy = torch.from_numpy(ndarray)

# Print the tensor
x_numpy

tensor([[0, 1],
        [2, 3],
        [4, 5]])

In [None]:
# Initialize a base tensor
x = torch.tensor([[1., 2.], [3., 4.]])
x

tensor([[1., 2.],
        [3., 4.]])

In [None]:
# Initialize a tensor of 0s
x_zeros = torch.zeros_like(x)
x_zeros

tensor([[0., 0.],
        [0., 0.]])

In [None]:
# Initialize a tensor of 1s
x_ones = torch.ones_like(x)
x_ones

tensor([[1., 1.],
        [1., 1.]])

In [None]:
# Initialize a tensor where each element is sampled from a uniform distribution
# between 0 and 1
x_rand = torch.rand_like(x)
x_rand

tensor([[0.5652, 0.8113],
        [0.4618, 0.7516]])

In [None]:
# Initialize a tensor where each element is sampled from a normal distribution
x_randn = torch.randn_like(x)
x_randn

tensor([[ 0.9831, -1.2836],
        [ 0.1175, -1.6673]])

In [None]:
# Initialize a 4x2x2 tensor of 0s
shape = (4, 2, 2)
x_zeros = torch.zeros(shape) # x_zeros = torch.zeros(4, 2, 2) is an alternative
x_zeros

tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])

In [None]:
# Create a tensor with values 0-9
x = torch.arange(10)
x

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
# Initialize a 3x2 tensor, with 3 rows and 2 columns
x = torch.ones(3, 2)
x.dtype

torch.float32

In [None]:
# Initialize a 3x2 tensor, with 3 rows and 2 columns
x = torch.Tensor([[1, 2], [3, 4], [5, 6]])
x

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])

In [None]:
# Print out its shape
# Same as x.size()
x.shape 

torch.Size([3, 2])

In [None]:
# Print out the number of elements in a particular dimension
# 0th dimension corresponds to the rows
x.shape[0] 

3

In [None]:
# Get the size of the 0th dimension
x.size(0)

3

In [None]:
# Example use of view()
# x_view shares the same memory as x, so changing one changes the other
x_view = x.view(3, 2)
x_view

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])

In [None]:
# We can ask PyTorch to infer the size of a dimension with -1
x_view = x.view(-1, 3)
x_view

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [None]:
# Change the shape of x to be 3x2
# x_reshaped could be a reference to or copy of x
x_reshaped = torch.reshape(x, (2, 3))
x_reshaped

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [None]:
# Initialize a 5x2 tensor, with 5 rows and 2 columns
x = torch.arange(10).reshape(5, 2)
x

tensor([[0, 1],
        [2, 3],
        [4, 5],
        [6, 7],
        [8, 9]])

In [None]:
# Add a new dimension of size 1 at the 1st dimension
x = x.unsqueeze(1)
x.shape

torch.Size([5, 1, 2])

In [None]:
# Squeeze the dimensions of x by getting rid of all the dimensions with 1 element
x = x.squeeze()
x.shape

torch.Size([5, 2])

In [None]:
x

tensor([[0, 1],
        [2, 3],
        [4, 5],
        [6, 7],
        [8, 9]])

In [None]:
# Get the number of elements in tensor.
x.numel()

10

In [None]:
# Initialize an example tensor
x = torch.Tensor([[1, 2], [3, 4]])
x

tensor([[1., 2.],
        [3., 4.]])

In [None]:
# Get the device of the tensor
x.device

device(type='cpu')

In [None]:
# Check if a GPU is available, if so, move the tensor to the GPU
if torch.cuda.is_available():
  x.to('cuda') 

In [None]:
x.device

device(type='cpu')

In [None]:
# Initialize an example tensor
x = torch.Tensor([
                  [[1, 2], [3, 4]],
                  [[5, 6], [7, 8]], 
                  [[9, 10], [11, 12]] 
                 ])
x

tensor([[[ 1.,  2.],
         [ 3.,  4.]],

        [[ 5.,  6.],
         [ 7.,  8.]],

        [[ 9., 10.],
         [11., 12.]]])

In [None]:
x.shape

torch.Size([3, 2, 2])

In [None]:
# Access the 0th element, which is the first row
x[0] # Equivalent to x[0, :]

tensor([[1., 2.],
        [3., 4.]])

In [None]:
# Get the top left element of each element in our tensor
x[:, 0, 0]

tensor([1., 5., 9.])

In [None]:
# Print x again to see our tensor
x

tensor([[[ 1.,  2.],
         [ 3.,  4.]],

        [[ 5.,  6.],
         [ 7.,  8.]],

        [[ 9., 10.],
         [11., 12.]]])

In [None]:
# Let's access the 0th and 1st elements, each twice
i = torch.tensor([0, 0, 1, 1])
x[i]

tensor([[[1., 2.],
         [3., 4.]],

        [[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]],

        [[5., 6.],
         [7., 8.]]])

In [None]:
# Let's access the 0th elements of the 1st and 2nd elements
i = torch.tensor([1, 2])
j = torch.tensor([0])
x[i, j]

tensor([[ 5.,  6.],
        [ 9., 10.]])

In [None]:
x[0, 0, 0]

tensor(1.)

In [None]:
x[0, 0, 0].item()

1.0

In [None]:
# Create an example tensor
x = torch.ones((3,2,2))
x

tensor([[[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]]])

In [None]:
# Perform elementwise addition
# Use - for subtraction
x + 2

tensor([[[3., 3.],
         [3., 3.]],

        [[3., 3.],
         [3., 3.]],

        [[3., 3.],
         [3., 3.]]])

In [None]:
# Perform elementwise multiplication
# Use / for division
x * 2

tensor([[[2., 2.],
         [2., 2.]],

        [[2., 2.],
         [2., 2.]],

        [[2., 2.],
         [2., 2.]]])

In [None]:
# Create a 4x3 tensor of 6s
a = torch.ones((4,3)) * 6
a

tensor([[6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.]])

In [None]:
# Create a 1D tensor of 2s
b = torch.ones(3) * 2
b

tensor([2., 2., 2.])

In [None]:
# Divide a by b
a / b

tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])

In [None]:
# Alternative to a.matmul(b)
# a @ b.T returns the same result since b is 1D tensor and the 2nd dimension
# is inferred
a @ b 

tensor([36., 36., 36., 36.])

In [None]:
a @ b.T

tensor([36., 36., 36., 36.])

In [None]:
pp.pprint(a.shape)
pp.pprint(a.T.shape)

torch.Size([4, 3])
torch.Size([3, 4])


In [None]:
# Create an example tensor
m = torch.tensor(
    [
     [1., 1.],
     [2., 2.],
     [3., 3.],
     [4., 4.]
    ]
)

pp.pprint(m.shape)
pp.pprint("Mean: {}".format(m.mean()))
pp.pprint("Mean in the 0th dimension: {}".format(m.mean(0)))
pp.pprint("Mean in the 1st dimension: {}".format(m.mean(1)))


torch.Size([4, 2])
'Mean: 2.5'
'Mean in the 0th dimension: tensor([2.5000, 2.5000])'
'Mean in the 1st dimension: tensor([1., 2., 3., 4.])'


In [None]:
# Concatenate in dimension 0 and 1
a_cat0 = torch.cat([a, a, a], dim=0)
a_cat1 = torch.cat([a, a, a], dim=1)

print("Initial shape: {}".format(a.shape))
print("Shape after concatenation in dimension 0: {}".format(a_cat0.shape))
print("Shape after concatenation in dimension 1: {}".format(a_cat1.shape))

Initial shape: torch.Size([4, 3])
Shape after concatenation in dimension 0: torch.Size([12, 3])
Shape after concatenation in dimension 1: torch.Size([4, 9])


In [None]:
# Print our tensor
a

tensor([[6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.]])

In [None]:
# add() is not in place
a.add(a)
a

tensor([[6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.]])

In [None]:
# add_() is in place
a.add_(a)
a

tensor([[12., 12., 12.],
        [12., 12., 12.],
        [12., 12., 12.],
        [12., 12., 12.]])

In [None]:
# Create an example tensor
# requires_grad parameter tells PyTorch to store gradients
x = torch.tensor([2.], requires_grad=True)

# Print the gradient if it is calculated
# Currently None since x is a scalar
pp.pprint(x.grad)

None


In [None]:
# Calculating the gradient of y with respect to x
y = x * x * 3 # 3x^2
y.backward()
pp.pprint(x.grad) # d(y)/d(x) = d(3x^2)/d(x) = 6x = 12

tensor([12.])


In [None]:
z = x * x * 3 # 3x^2
z.backward()
pp.pprint(x.grad)


tensor([24.])


In [None]:
import torch.nn as nn

In [None]:
# Create the inputs
input = torch.ones(2,3,4)

# Make a linear layers transforming N,*,H_in dimensinal inputs to N,*,H_out
# dimensional outputs
linear = nn.Linear(4, 2)
linear_output = linear(input)
linear_output

tensor([[[-0.3505,  0.2580],
         [-0.3505,  0.2580],
         [-0.3505,  0.2580]],

        [[-0.3505,  0.2580],
         [-0.3505,  0.2580],
         [-0.3505,  0.2580]]], grad_fn=<AddBackward0>)

In [None]:
sigmoid = nn.Sigmoid()
output = sigmoid(linear_output)
output

tensor([[[0.4133, 0.5641],
         [0.4133, 0.5641],
         [0.4133, 0.5641]],

        [[0.4133, 0.5641],
         [0.4133, 0.5641],
         [0.4133, 0.5641]]], grad_fn=<SigmoidBackward0>)

In [None]:
block = nn.Sequential(
    nn.Linear(4, 2),
    nn.Sigmoid()
)

input = torch.ones(2,3,4)
output = block(input)
output

tensor([[[0.5001, 0.6976],
         [0.5001, 0.6976],
         [0.5001, 0.6976]],

        [[0.5001, 0.6976],
         [0.5001, 0.6976],
         [0.5001, 0.6976]]], grad_fn=<SigmoidBackward0>)

In [None]:
class MultilayerPerceptron(nn.Module):

  def __init__(self, input_size, hidden_size):
    # Call to the __init__ function of the super class
    super(MultilayerPerceptron, self).__init__()

    # Bookkeeping: Saving the initialization parameters
    self.input_size = input_size 
    self.hidden_size = hidden_size 

    # Defining of our model
    # There isn't anything specific about the naming of `self.model`. It could
    # be something arbitrary.
    self.model = nn.Sequential(
        nn.Linear(self.input_size, self.hidden_size),
        nn.ReLU(),
        nn.Linear(self.hidden_size, self.input_size),
        nn.Sigmoid()
    )
    
  def forward(self, x):
    output = self.model(x)
    return output

In [None]:
class MultilayerPerceptron(nn.Module):

  def __init__(self, input_size, hidden_size):
    # Call to the __init__ function of the super class
    super(MultilayerPerceptron, self).__init__()

    # Bookkeeping: Saving the initialization parameters
    self.input_size = input_size 
    self.hidden_size = hidden_size 

    # Defining of our layers
    self.linear = nn.Linear(self.input_size, self.hidden_size)
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(self.hidden_size, self.input_size)
    self.sigmoid = nn.Sigmoid()
    
  def forward(self, x):
    linear = self.linear(x)
    relu = self.relu(linear)
    linear2 = self.linear2(relu)
    output = self.sigmoid(linear2)
    return output

In [None]:
# Make a sample input
input = torch.randn(2, 5)

# Create our model
model = MultilayerPerceptron(5, 3)

# Pass our input through our model
model(input)

tensor([[0.4971, 0.4546, 0.3648, 0.6287, 0.5184],
        [0.4956, 0.5256, 0.3396, 0.5785, 0.4762]], grad_fn=<SigmoidBackward0>)

In [None]:
list(model.named_parameters())

[('linear.weight', Parameter containing:
  tensor([[ 0.0185, -0.2973, -0.2101, -0.0728, -0.2612],
          [ 0.0867,  0.0981, -0.3456,  0.3157,  0.2642],
          [ 0.1166,  0.3799,  0.0456,  0.3035, -0.1279]], requires_grad=True)),
 ('linear.bias', Parameter containing:
  tensor([-0.3900, -0.2565, -0.2979], requires_grad=True)),
 ('linear2.weight', Parameter containing:
  tensor([[ 0.3720, -0.3646,  0.4718],
          [ 0.1675,  0.5422, -0.0155],
          [ 0.1991, -0.0720, -0.1789],
          [ 0.1478,  0.0124, -0.5396],
          [ 0.1791, -0.4878,  0.2302]], requires_grad=True)),
 ('linear2.bias', Parameter containing:
  tensor([ 0.0770, -0.3138, -0.5373,  0.5237,  0.1923], requires_grad=True))]

In [None]:
import torch.optim as optim

In [None]:
# Create the y data
y = torch.ones(10, 5)

# Add some noise to our goal y to generate our x
# We want out model to predict our original data, albeit the noise
x = y + torch.randn_like(y)
x

tensor([[-0.5201,  1.1245,  2.8198,  0.3053,  0.7961],
        [ 3.3225,  1.2779,  2.3740,  2.2591,  1.4387],
        [ 2.0315,  2.4435,  1.5156,  0.2438,  2.8969],
        [ 1.6863,  2.5134,  1.2443,  1.8507,  0.8603],
        [ 1.7786,  1.0041,  1.2477,  1.0688,  1.8487],
        [ 2.0911, -0.0059,  0.3691, -0.0545,  1.8223],
        [ 0.2103,  1.1538,  2.8098,  2.0906,  1.2300],
        [ 1.9023,  0.8938,  2.6727,  3.3851,  0.0622],
        [-0.6441,  1.8047,  0.9976,  1.0014,  0.1746],
        [ 1.1468,  2.2376,  1.5109,  0.7485,  4.2413]])

In [None]:
# Instantiate the model
model = MultilayerPerceptron(5, 3)

# Define the optimizer
adam = optim.Adam(model.parameters(), lr=1e-1)

# Define loss using a predefined loss function
loss_function = nn.BCELoss()

# Calculate how our model is doing now
y_pred = model(x)
loss_function(y_pred, y).item()

0.6481157541275024

In [None]:
# Set the number of epoch, which determines the number of training iterations
n_epoch = 10 

for epoch in range(n_epoch):
  # Set the gradients to 0
  adam.zero_grad()

  # Get the model predictions
  y_pred = model(x)

  # Get the loss
  loss = loss_function(y_pred, y)

  # Print stats
  print(f"Epoch {epoch}: traing loss: {loss}")

  # Compute the gradients
  loss.backward()

  # Take a step to optimize the weights
  adam.step()

Epoch 0: traing loss: 0.6481157541275024
Epoch 1: traing loss: 0.5528567433357239
Epoch 2: traing loss: 0.4403615891933441
Epoch 3: traing loss: 0.31936633586883545
Epoch 4: traing loss: 0.20793749392032623
Epoch 5: traing loss: 0.12089211493730545
Epoch 6: traing loss: 0.06400315463542938
Epoch 7: traing loss: 0.032195284962654114
Epoch 8: traing loss: 0.016033204272389412
Epoch 9: traing loss: 0.008101556450128555


In [None]:
# See how our model performs on the training data
y_pred = model(x)
y_pred

tensor([[0.9911, 0.9985, 0.9980, 0.9823, 0.9768],
        [0.9999, 1.0000, 1.0000, 0.9999, 0.9997],
        [0.9996, 1.0000, 1.0000, 0.9994, 0.9986],
        [0.9996, 1.0000, 1.0000, 0.9993, 0.9985],
        [0.9989, 1.0000, 0.9999, 0.9981, 0.9963],
        [0.9915, 0.9986, 0.9982, 0.9833, 0.9778],
        [0.9993, 1.0000, 1.0000, 0.9989, 0.9976],
        [0.9998, 1.0000, 1.0000, 0.9998, 0.9994],
        [0.9858, 0.9968, 0.9958, 0.9712, 0.9653],
        [0.9998, 1.0000, 1.0000, 0.9997, 0.9992]], grad_fn=<SigmoidBackward0>)

In [None]:
# Create test data and check how our model performs on it
x2 = y + torch.randn_like(y)
y_pred = model(x2)
y_pred

tensor([[0.9706, 0.9891, 0.9865, 0.9379, 0.9349],
        [0.9984, 0.9999, 0.9999, 0.9972, 0.9949],
        [0.9792, 0.9939, 0.9923, 0.9569, 0.9517],
        [0.9993, 1.0000, 1.0000, 0.9989, 0.9976],
        [0.8037, 0.7410, 0.7308, 0.6168, 0.6923],
        [0.9945, 0.9994, 0.9991, 0.9895, 0.9850],
        [0.9948, 0.9994, 0.9992, 0.9901, 0.9857],
        [0.9937, 0.9992, 0.9988, 0.9878, 0.9829],
        [0.9966, 0.9997, 0.9996, 0.9936, 0.9900],
        [0.9993, 1.0000, 1.0000, 0.9988, 0.9974]], grad_fn=<SigmoidBackward0>)

In [None]:
# Our raw data, which consists of sentences
corpus = [
          "We always come to Paris",
          "The professor is from Australia",
          "I live in Stanford",
          "He comes from Taiwan",
          "The capital of Turkey is Ankara"
         ]

In [None]:
# The preprocessing function we will use to generate our training examples
# Our function is a simple one, we lowercase the letters
# and then tokenize the words.
def preprocess_sentence(sentence):
  return sentence.lower().split()

# Create our training set
train_sentences = [sent.lower().split() for sent in corpus]
train_sentences

[['we', 'always', 'come', 'to', 'paris'],
 ['the', 'professor', 'is', 'from', 'australia'],
 ['i', 'live', 'in', 'stanford'],
 ['he', 'comes', 'from', 'taiwan'],
 ['the', 'capital', 'of', 'turkey', 'is', 'ankara']]

In [None]:
# Set of locations that appear in our corpus
locations = set(["australia", "ankara", "paris", "stanford", "taiwan", "turkey"])

# Our train labels
train_labels = [[1 if word in locations else 0 for word in sent] for sent in train_sentences]
train_labels

[[0, 0, 0, 0, 1],
 [0, 0, 0, 0, 1],
 [0, 0, 0, 1],
 [0, 0, 0, 1],
 [0, 0, 0, 1, 0, 1]]

In [None]:
# Find all the unique words in our corpus 
vocabulary = set(w for s in train_sentences for w in s)
vocabulary

{'always',
 'ankara',
 'australia',
 'capital',
 'come',
 'comes',
 'from',
 'he',
 'i',
 'in',
 'is',
 'live',
 'of',
 'paris',
 'professor',
 'stanford',
 'taiwan',
 'the',
 'to',
 'turkey',
 'we'}

In [None]:
# Add the unknown token to our vocabulary
vocabulary.add("<unk>")

In [None]:
# Add the <pad> token to our vocabulary
vocabulary.add("<pad>")

# Function that pads the given sentence
# We are introducing this function here as an example
# We will be utilizing it later in the tutorial
def pad_window(sentence, window_size, pad_token="<pad>"):
  window = [pad_token] * window_size
  return window + sentence + window

# Show padding example
window_size = 2
pad_window(train_sentences[0], window_size=window_size)

['<pad>', '<pad>', 'we', 'always', 'come', 'to', 'paris', '<pad>', '<pad>']

In [None]:
# We are just converting our vocabularly to a list to be able to index into it
# Sorting is not necessary, we sort to show an ordered word_to_ind dictionary
# That being said, we will see that having the index for the padding token
# be 0 is convenient as some PyTorch functions use it as a default value
# such as nn.utils.rnn.pad_sequence, which we will cover in a bit
ix_to_word = sorted(list(vocabulary))

# Creating a dictionary to find the index of a given word
word_to_ix = {word: ind for ind, word in enumerate(ix_to_word)}
word_to_ix

{'<pad>': 0,
 '<unk>': 1,
 'always': 2,
 'ankara': 3,
 'australia': 4,
 'capital': 5,
 'come': 6,
 'comes': 7,
 'from': 8,
 'he': 9,
 'i': 10,
 'in': 11,
 'is': 12,
 'live': 13,
 'of': 14,
 'paris': 15,
 'professor': 16,
 'stanford': 17,
 'taiwan': 18,
 'the': 19,
 'to': 20,
 'turkey': 21,
 'we': 22}

In [None]:
# Given a sentence of tokens, return the corresponding indices
def convert_token_to_indices(sentence, word_to_ix):
  indices = []
  for token in sentence:
    # Check if the token is in our vocabularly. If it is, get it's index. 
    # If not, get the index for the unknown token.
    if token in word_to_ix:
      index = word_to_ix[token]
    else:
      index = word_to_ix["<unk>"]
    indices.append(index)
  return indices

# More compact version of the same function
def _convert_token_to_indices(sentence, word_to_ix):
  return [word_to_ind.get(token, word_to_ix["<unk>"]) for token in sentence]

# Show an example
example_sentence = ["we", "always", "come", "to", "kuwait"]
example_indices = convert_token_to_indices(example_sentence, word_to_ix)
restored_example = [ix_to_word[ind] for ind in example_indices]

print(f"Original sentence is: {example_sentence}")
print(f"Going from words to indices: {example_indices}")
print(f"Going from indices to words: {restored_example}")

Original sentence is: ['we', 'always', 'come', 'to', 'kuwait']
Going from words to indices: [22, 2, 6, 20, 1]
Going from indices to words: ['we', 'always', 'come', 'to', '<unk>']


In [None]:
# Converting our sentences to indices
example_padded_indices = [convert_token_to_indices(s, word_to_ix) for s in train_sentences]
example_padded_indices

[[22, 2, 6, 20, 15],
 [19, 16, 12, 8, 4],
 [10, 13, 11, 17],
 [9, 7, 8, 18],
 [19, 5, 14, 21, 12, 3]]

In [None]:
# Creating an embedding table for our words
embedding_dim = 5
embeds = nn.Embedding(len(vocabulary), embedding_dim)

# Printing the parameters in our embedding table
list(embeds.parameters())

[Parameter containing:
 tensor([[-1.0207, -0.4155, -0.4455,  0.5530, -0.0827],
         [ 2.3797,  0.9986, -0.5204,  0.8646, -0.9387],
         [-1.3858,  2.8090,  0.0605,  1.7894,  1.0193],
         [-0.3486,  0.9282,  0.8121, -0.1171, -0.5078],
         [-0.0412, -0.5681,  0.7853, -0.2875,  1.7119],
         [ 1.1045, -0.4355, -0.6396, -2.3790,  0.8405],
         [ 0.4996,  1.1708,  0.6860,  1.0173,  0.9535],
         [ 0.6595,  0.6539, -1.0657,  2.1262, -1.5407],
         [ 0.9136, -1.8704, -0.4744,  0.8512,  0.1283],
         [ 0.0556,  0.0262, -0.5204,  1.5210, -0.5573],
         [-0.3280, -0.5257,  1.8148, -0.3171, -1.3584],
         [ 0.7348,  0.8987, -0.1398, -0.6335, -0.1783],
         [ 0.5672, -0.7722,  0.0240, -0.0764, -0.8612],
         [-1.1372, -0.8732,  0.1117, -1.5149,  0.0826],
         [-0.0223,  1.4856, -0.6319,  0.1937,  1.0040],
         [-0.0343, -0.8882,  0.0959,  0.3277, -0.5461],
         [ 1.1448, -0.5967, -2.2082, -2.3471,  0.2281],
         [ 0.3072, -0.669

In [None]:
# Get the embedding for the word Paris
index = word_to_ix["paris"]
index_tensor = torch.tensor(index, dtype=torch.long)
paris_embed = embeds(index_tensor)
paris_embed

tensor([-0.0343, -0.8882,  0.0959,  0.3277, -0.5461],
       grad_fn=<EmbeddingBackward0>)

In [None]:
# We can also get multiple embeddings at once
index_paris = word_to_ix["paris"]
index_ankara = word_to_ix["ankara"]
indices = [index_paris, index_ankara]
indices_tensor = torch.tensor(indices, dtype=torch.long)
embeddings = embeds(indices_tensor)
embeddings

tensor([[-0.0343, -0.8882,  0.0959,  0.3277, -0.5461],
        [-0.3486,  0.9282,  0.8121, -0.1171, -0.5078]],
       grad_fn=<EmbeddingBackward0>)