In [1]:
import json
import numpy as np
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

CATALOG_PATH = r"E:\book_recommender\data\catalog.json"
VECTOR_MATRIX_PATH = r"E:\book_recommender\vectors\embeddings.npy"
ID_MAP_PATH = r"E:\book_recommender\vectors\id_map.json"


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open(CATALOG_PATH, "r") as f:
    catalog = json.load(f)

book_ids = list(catalog.keys())

print("Total books:", len(book_ids))


Total books: 161


In [3]:
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")


In [4]:
def build_text(book):
    parts = []

    if book.get("title"):
        parts.append(book["title"])

    if book.get("description"):
        parts.append(book["description"])

    if book.get("genres"):
        parts.append("Genres: " + ", ".join(book["genres"]))

    if book.get("tags"):
        parts.append("Tags: " + ", ".join(book["tags"]))

    if book.get("moods"):
        parts.append("Moods: " + ", ".join(book["moods"]))

    return "\n".join(parts)


In [5]:
texts = []
id_map = {}

for idx, book_id in enumerate(book_ids):
    book = catalog[book_id]
    texts.append(build_text(book))
    id_map[idx] = book_id  # FAISS index → book_id


In [6]:
embeddings = model.encode(texts, show_progress_bar=True, normalize_embeddings=True)


Batches: 100%|██████████| 6/6 [00:04<00:00,  1.32it/s]


In [7]:
np.save(VECTOR_MATRIX_PATH, embeddings)

with open(ID_MAP_PATH, "w") as f:
    json.dump(id_map, f, indent=4)

print("Saved embeddings:", embeddings.shape)
print("Saved id_map:", len(id_map))


Saved embeddings: (161, 384)
Saved id_map: 161


In [3]:
import faiss
import numpy as np
import json

# Paths
EMBED_PATH = r"E:\book_recommender\vectors\embeddings.npy"
ID_MAP_PATH = r"E:\book_recommender\vectors\id_map.json"
FAISS_PATH = r"E:\book_recommender\vectors\embeddings.faiss"

# Load embeddings
embeddings = np.load(EMBED_PATH).astype("float32")
print("Loaded embeddings:", embeddings.shape)

# Build FAISS index (L2 distance)
d = embeddings.shape[1]      # dimension of vectors
index = faiss.IndexFlatL2(d) # exact L2 search

# Add vectors to index
index.add(embeddings)
print("FAISS index size:", index.ntotal)

# Save index
faiss.write_index(index, FAISS_PATH)
print("Saved FAISS index to:", FAISS_PATH)

Loaded embeddings: (161, 384)
FAISS index size: 161
Saved FAISS index to: E:\book_recommender\vectors\embeddings.faiss
