# Step 1. Install required libraries



In [None]:
!pip install hnswlib transformers sentence-transformers  -q

# Step 2. Generate text embeddings

In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

texts = ["IBM AI is revolutionizing enterprise search.", "HNSW accelerates nearest neighbor search."]

# Convert texts to vector embeddings
embeddings = model.encode(texts, normalize_embeddings=True)

print(embeddings)

# Step 3. Build an HNSW index

In [None]:
import hnswlib

print(type(embeddings))
dim = embeddings.shape[1]
print(f"Dimension: {dim}")
num_elements = len(embeddings)
print(f"Number of elements: {num_elements}")
# Initialize the HNSW index
p = hnswlib.Index(space="cosine", dim=dim)
p.init_index(max_elements=num_elements, ef_construction=200, M=16)
p.add_items(embeddings, np.arange(num_elements))

# Save the index for future use
p.save_index("rag_hnsw_index.bin")

# Step 4. Perform fast retrieval

In [None]:
# Load the index
p.load_index("rag_hnsw_index.bin")

# Querying the system
query_text = "How does IBM use AI?"
query_embedding = model.encode([query_text], normalize_embeddings=True)
print(query_embedding)

# Retrieve nearest neighbors
labels, distances = p.knn_query(query_embedding, k=2)

print(f"Nearest neighbors' labels: {labels}")
print(f"\nDistance: {distances}")

# Print retrieved results
print(f"Retrieved documents: {[texts[i] for i in labels[0]]}")

# Misc. Numpy arrays

In [None]:
import numpy as np

a_1d = np.arange(3)
print(a_1d)
print(a_1d.ndim)
print(type(a_1d.ndim))
print(a_1d.shape)
print(type(a_1d.shape))
print(a_1d.size)
print(type(a_1d.size))
print(len(a_1d))
print(a_1d.shape[0])
print(a_1d.size)

In [None]:
a_2d = np.arange(12).reshape((3, 4))
print(a_2d)
print(a_2d.ndim)
print(a_2d.shape)
print(a_2d.shape[0])
print(a_2d.shape[1])
row, col = a_2d.shape
print(row)
print(col)
print(a_2d.size)
print(len(a_2d))
print(a_2d.shape[0])

In [None]:
a_3d = np.arange(24).reshape((2, 3, 4))
print(a_3d)
print(a_3d.ndim)
print(a_3d.shape)
print(a_3d.size)
print(len(a_3d))
print(a_3d.shape[0])