## Setup

Get pretrained embeddings

In [8]:
import numpy as np

path = r"C:\Users\markus\.keras\datasets\glove.6B\glove.6B.100d.txt"

def load_glove_embeddings(file_path):
    embeddings_index = {}
    with open(file_path, encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    return embeddings_index

# Load GloVe embeddings
glove_embeddings = load_glove_embeddings(path)

In [9]:
from sklearn.metrics.pairwise import cosine_similarity

# Get the word vectors for "man", "woman", and "king"
vec_man = glove_embeddings['man']
vec_woman = glove_embeddings['woman']
vec_king = glove_embeddings['king']

# Calculate the vector for the expression: vec("woman") - vec("man") + vec("king")
result_vector = vec_woman - vec_man + vec_king

# Function to find the most similar word
def find_most_similar(vector, embeddings, top_n=1):
    similarities = {}
    for word, embedding in embeddings.items():
        similarity = cosine_similarity([vector], [embedding])[0][0]
        similarities[word] = similarity
    sorted_similarities = sorted(similarities.items(), key=lambda item: item[1], reverse=True)
    return sorted_similarities[:top_n]

# Find the nearest vector(s) to the result_vector
nearest_words = find_most_similar(result_vector, glove_embeddings, top_n=5)

for i, (word, similarity) in enumerate(nearest_words, 1):
    print(f"{i}: {word} - {similarity}")

1: king - 0.8551837205886841
2: queen - 0.7834413647651672
3: monarch - 0.6933802366256714
4: throne - 0.6833109855651855
5: daughter - 0.6809083223342896


## Result

The result of the expression `vec("woman") - vec("man") + vec("king")` is expected to be close to the vector for the word "queen". This is because the vector arithmetic captures the relationship between these words in the embedding space, where the difference between "man" and "woman" is similar to the difference between "king" and "queen".