In [1]:
from llama_index.embeddings.openai import OpenAIEmbedding
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Initialize the OpenAI embedding model
embedding_model = OpenAIEmbedding(model="text-embedding-3-small")

# Define the trivia questions and matching answers
phrases = [
    "Who was the first president of the United States?",
    "What is the capital city of France?",
    "In what year did humans first land on the moon?",
    "Which element on the periodic table has the chemical symbol O?",
    "What is the largest planet in the solar system?",
    "The first president of the United States was George Washington.",
    "The capital city of France is Paris.",
    "Humans first landed on the moon in the year 1969.",
    "The chemical symbol O represents the element Oxygen.",
    "The largest planet in the solar system is Jupiter."
]

# Generate embeddings for each phrase using OpenAI embeddings
embeddings = embedding_model.get_text_embedding_batch(phrases)

# Convert embeddings to a numpy array
embeddings_array = np.array(embeddings)

# Print the first phrase and the first several elements of its embedding
print(f"Phrase: {phrases[0]}")
print(f"First 5 elements of its embedding: {embeddings_array[0][:5]}\n")

# Compute cosine similarity between the embeddings
similarity_matrix = cosine_similarity(embeddings_array)

# Print the cosine similarity matrix
print("Cosine Similarity Matrix:")
print(np.round(similarity_matrix, 2))
print("\nDetailed Similarity Results:\n")

# Output comparison between phrases with improved readability
for i in range(len(phrases)):
    for j in range(i + 1, len(phrases)):
        print(f"Cosine similarity between:\n  '{phrases[i]}'\n  and\n  '{phrases[j]}'\n  => {similarity_matrix[i, j]:.4f}\n")

Phrase: Who was the first president of the United States?
First 5 elements of its embedding: [ 0.00629776 -0.07437885  0.01580183  0.06622069  0.00270101]

Cosine Similarity Matrix:
[[1.   0.19 0.28 0.08 0.09 0.75 0.13 0.23 0.06 0.06]
 [0.19 1.   0.1  0.08 0.16 0.17 0.88 0.09 0.1  0.14]
 [0.28 0.1  1.   0.14 0.19 0.19 0.08 0.74 0.08 0.15]
 [0.08 0.08 0.14 1.   0.25 0.07 0.1  0.11 0.74 0.23]
 [0.09 0.16 0.19 0.25 1.   0.08 0.13 0.13 0.15 0.88]
 [0.75 0.17 0.19 0.07 0.08 1.   0.21 0.21 0.09 0.13]
 [0.13 0.88 0.08 0.1  0.13 0.21 1.   0.12 0.15 0.21]
 [0.23 0.09 0.74 0.11 0.13 0.21 0.12 1.   0.1  0.16]
 [0.06 0.1  0.08 0.74 0.15 0.09 0.15 0.1  1.   0.17]
 [0.06 0.14 0.15 0.23 0.88 0.13 0.21 0.16 0.17 1.  ]]

Detailed Similarity Results:

Cosine similarity between:
  'Who was the first president of the United States?'
  and
  'What is the capital city of France?'
  => 0.1913

Cosine similarity between:
  'Who was the first president of the United States?'
  and
  'In what year did humans fi

In [3]:
from llama_index.embeddings.openai import OpenAIEmbedding
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import openai

# Initialize the OpenAI embedding model
embedding_model = OpenAIEmbedding(model="text-embedding-3-small")

# Define the new space-related questions and answers
phrases = [
    "What year did the first human land on the moon?",
    "Which planet is known as the Red Planet?",
    "What is the largest moon of Saturn?",
    "Who was the first person to travel into space?",
    "What is the name of NASA's rover that landed on Mars in 2021?",
    "The first human landed on the moon in 1969.",
    "The planet known as the Red Planet is Mars.",
    "The largest moon of Saturn is Titan.",
    "Yuri Gagarin was the first person to travel into space.",
    "NASA's rover that landed on Mars in 2021 is named Perseverance."
]

# Generate embeddings for each phrase using OpenAI embeddings
embeddings = embedding_model.get_text_embedding_batch(phrases)

# Convert embeddings to a numpy array
embeddings_array = np.array(embeddings)

# Print the first phrase and the first several elements of its embedding
print(f"Phrase: {phrases[0]}")
print(f"First 5 elements of its embedding: {embeddings_array[0][:5]}\n")
print(f"Embdding length: {len(embeddings_array[0])}")

# Compute cosine similarity between the embeddings
similarity_matrix = cosine_similarity(embeddings_array)

# Print the cosine similarity matrix
print("Cosine Similarity Matrix:")
print(np.round(similarity_matrix, 2))
print("\nDetailed Similarity Results:\n")

# Output comparison between phrases with improved readability
for i in range(len(phrases)):
    for j in range(i + 1, len(phrases)):
        print(f"Cosine similarity between:\n  '{phrases[i]}'\n  and\n  '{phrases[j]}'\n  => {similarity_matrix[i, j]:.4f}\n")

Phrase: What year did the first human land on the moon?
First 5 elements of its embedding: [ 0.02788309 -0.00738564  0.01544735  0.04675181 -0.00116649]

Embdding length: 1536
Cosine Similarity Matrix:
[[1.   0.26 0.26 0.46 0.41 0.81 0.23 0.19 0.35 0.34]
 [0.26 1.   0.33 0.22 0.42 0.2  0.86 0.3  0.14 0.37]
 [0.26 0.33 1.   0.17 0.23 0.2  0.25 0.84 0.09 0.2 ]
 [0.46 0.22 0.17 1.   0.26 0.46 0.17 0.12 0.73 0.26]
 [0.41 0.42 0.23 0.26 1.   0.33 0.43 0.2  0.18 0.78]
 [0.81 0.2  0.2  0.46 0.33 1.   0.2  0.2  0.38 0.32]
 [0.23 0.86 0.25 0.17 0.43 0.2  1.   0.31 0.14 0.4 ]
 [0.19 0.3  0.84 0.12 0.2  0.2  0.31 1.   0.08 0.2 ]
 [0.35 0.14 0.09 0.73 0.18 0.38 0.14 0.08 1.   0.2 ]
 [0.34 0.37 0.2  0.26 0.78 0.32 0.4  0.2  0.2  1.  ]]

Detailed Similarity Results:

Cosine similarity between:
  'What year did the first human land on the moon?'
  and
  'Which planet is known as the Red Planet?'
  => 0.2648

Cosine similarity between:
  'What year did the first human land on the moon?'
  and
  'What 

In [4]:
from llama_index.embeddings.openai import OpenAIEmbedding
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import openai

# Set your OpenAI API key
openai.api_key = 'YOUR_OPENAI_API_KEY'

# Initialize the OpenAI embedding model
embedding_model = OpenAIEmbedding(model="text-embedding-ada-002")

# Define the question and answers (1 correct, 4 closely related wrong ones)
phrases = [
    "What spacecraft was used in the mission to carry the first humans to the moon?",  # Question
    "Apollo 11 was the spacecraft used to carry the first humans to the moon.",       # Correct Answer
    "Apollo 12 was the spacecraft used to carry the first humans to the moon.",         # Wrong Answer
    "Apollo 14 was the spacecraft used to carry astronauts on the third successful moon landing mission.", # Wrong Answer
    "Apollo 10 was the spacecraft used to carry the first humans to the moon.", # Wrong Answer
    "Apollo 16 was the spacecraft that carried astronauts to explore the lunar highlands."   # Wrong Answer
]

# Generate embeddings for the question and answers using OpenAI embeddings
embeddings = embedding_model.get_text_embedding_batch(phrases)

# Convert embeddings to a numpy array
embeddings_array = np.array(embeddings)

# Print the first phrase and the first several elements of its embedding
print(f"Phrase: {phrases[0]}")
print(f"First 5 elements of its embedding: {embeddings_array[0][:5]}\n")

# Compute cosine similarity between the embeddings
similarity_matrix = cosine_similarity(embeddings_array)

print("\nDetailed Similarity Results:\n")

# Output comparison between question and answers with improved readability
for i in range(1, len(phrases)):
    print(f"Cosine similarity between the question and:\n  '{phrases[i]}'\n  => {similarity_matrix[0, i]:.4f}\n")

Phrase: What spacecraft was used in the mission to carry the first humans to the moon?
First 5 elements of its embedding: [ 0.02214183 -0.01236451  0.02112453 -0.0116612  -0.00801903]


Detailed Similarity Results:

Cosine similarity between the question and:
  'Apollo 11 was the spacecraft used to carry the first humans to the moon.'
  => 0.9344

Cosine similarity between the question and:
  'Apollo 12 was the spacecraft used to carry the first humans to the moon.'
  => 0.9308

Cosine similarity between the question and:
  'Apollo 14 was the spacecraft used to carry astronauts on the third successful moon landing mission.'
  => 0.8903

Cosine similarity between the question and:
  'Apollo 10 was the spacecraft used to carry the first humans to the moon.'
  => 0.9283

Cosine similarity between the question and:
  'Apollo 16 was the spacecraft that carried astronauts to explore the lunar highlands.'
  => 0.8864

