In [2]:
import numpy as np
import torch
from torch import nn
from torch import optim

sentence = "The quick brown fox jumped over the lazy dog"

unique_words = sorted(list(set([word.lower() for word in sentence.split()])))
length = len(unique_words)
word_to_index = {word: i for i, word in enumerate(unique_words)}
index_to_word = {i: word for i, word in enumerate(unique_words)}

vectors = []
vector_template = np.zeros(length)

for index, word in enumerate(unique_words):
    vector = vector_template.copy()
    vector[index] = 1
    vectors.append(vector)

vectors = np.array(vectors)
print(f"Vocabulary size: {length}")
print(vectors)


Vocabulary size: 8
[[1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1.]]


In [3]:
words = [word.lower() for word in sentence.split()]
completions = [
    {
        "x": "the quick",
        "y": "brown"
    },
    {
        "x": "quick brown",
        "y": "fox"
    }
]

for i in range(2, len(words) - 2):
    completions.append({
        "x": f"{words[i]} {words[i+1]}",
        "y": words[i+2]
    })

def get_input_tensor(text):
    words = text.lower().split()
    # Concatenate vectors for the two input words
    # Each word vector is size 8 (length of vocab)
    # Resulting tensor size is 16
    input_vector = []
    for word in words:
        idx = word_to_index[word]
        input_vector.extend(vectors[idx])
    return torch.tensor(input_vector, dtype=torch.float32).unsqueeze(0) # Add batch dimension

def get_target_index_tensor(target_word):
    # CrossEntropyLoss expects class index, not one-hot vector
    idx = word_to_index[target_word.lower()]
    return torch.tensor([idx], dtype=torch.long)

network = nn.Sequential(
    nn.Linear(16, 8), # input layer
    nn.ReLU(),
    nn.Linear(8, 8), # hidden layer 1
    nn.ReLU(),
    nn.Linear(8, 8),
    nn.ReLU(),
    nn.Linear(8, 8),
    nn.ReLU(),
    nn.Linear(8, 8) 
)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(network.parameters(), lr=0.01)

epochs = 500
for epoch in range(epochs):
    total_loss = 0
    for completion in completions:
        input_tensor = get_input_tensor(completion["x"])
        target_tensor = get_target_index_tensor(completion["y"])
        
        optimizer.zero_grad()
        output = network(input_tensor)
        
        loss = criterion(output, target_tensor)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Epoch 100, Loss: 0.0933
Epoch 200, Loss: 0.0108
Epoch 300, Loss: 0.0030
Epoch 400, Loss: 0.0013
Epoch 500, Loss: 0.0007


In [8]:
print(type(network))

<class 'torch.nn.modules.container.Sequential'>


In [4]:
def predict(text):
    network.eval()
    with torch.no_grad():
        input_tensor = get_input_tensor(text)
        output_logits = network(input_tensor)
        probabilities = torch.softmax(output_logits, dim=1)
        predicted_idx = torch.argmax(probabilities).item()
        return index_to_word[predicted_idx]

# Test predictions
print("Predictions:")
print(f"jumped over -> {predict('jumped over')}")
print(f"the quick -> {predict('the quick')}")
print(f"fox jumped -> {predict('fox jumped')}")
print(f"over the -> {predict('over the')}")



# Full sentence prediction
print("\n============ full sentence prediction ============")
full_sentence = "the quick"
start = full_sentence

for i in range(7):
    print(i, start)
    prediction = predict(start)
    full_sentence = " ".join(full_sentence.split() + [prediction])
    start = " ".join(full_sentence.split()[-2:])

print(full_sentence)


Predictions:
jumped over -> the
the quick -> brown
fox jumped -> over
over the -> lazy

0 the quick
1 quick brown
2 brown fox
3 fox jumped
4 jumped over
5 over the
6 the lazy
the quick brown fox jumped over the lazy dog


In [5]:
print(len(network))

9


In [6]:
# Visualizing the Embedding Vectors
# Since we use One-Hot encoding feeding into a Linear layer, 
# the weights of the input layer ACT as the embeddings.
# Input size is 16 (8 dimensions for first word, 8 for second word).


input_layer = network[0]
weights = input_layer.weight.detach().numpy() # Shape: (8 hidden_units, 16 inputs)

# Transpose to see inputs as rows: Shape (16, 8)
weights_T = weights.T

print("Embedding Weights shape:", weights.shape)
print("Columns 0-7 correspond to the first word's embedding.")
print("Columns 8-15 correspond to the second word's embedding.")

print("\nExample: Embedding for 'fox' (index 3)")
# Let's say 'fox' is at index 3.
fox_idx = word_to_index['fox']
print(f"Fox index: {fox_idx}")

print("Fox embedding when in position 1:")
print(weights[:, fox_idx])

print("Fox embedding when in position 2:")
print(weights[:, fox_idx + 8])


Embedding Weights shape: (8, 16)
Columns 0-7 correspond to the first word's embedding.
Columns 8-15 correspond to the second word's embedding.

Example: Embedding for 'fox' (index 3)
Fox index: 2
Fox embedding when in position 1:
[-0.3145869  -0.04705364 -0.19518547  1.2083002  -0.08225289  1.8026392
 -0.5869664   1.8209193 ]
Fox embedding when in position 2:
[ 0.6153387   0.22792171 -0.04185193  0.06262119 -0.09889055 -0.03212455
  0.44432968 -0.07397515]


In [20]:
example_input = torch.randn(1, 16)
onnx_program = torch.onnx.export(network, example_input)
onnx_program.save("fox_word_predictor.onnx")

[torch.onnx] Obtain model graph for `Sequential([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `Sequential([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅


In [21]:
import onnx
onnx_model = onnx.load("fox_word_predictor.onnx")
onnx.checker.check_model(onnx_model)

In [41]:
import onnxruntime

ort_session = onnxruntime.InferenceSession(
    "./fox_word_predictor.onnx", providers=["CPUExecutionProvider"]
)

def predict(phrase_input):
    tensor_input = get_input_tensor(phrase_input)

    onnxruntime_input = {
        "input": tensor_input.numpy(),
    }
    output = ort_session.run(None, onnxruntime_input)

    predicted_idx = np.argmax(output[0][0])
    return index_to_word[predicted_idx]

print(predict("the quick"))
print(predict("quick brown"))
print(predict("brown fox"))


brown
fox
jumped
