# Generate Embeddings for Mobile Reviews using Voyage AI

This notebook reads `mobile_reviews.json`, generates vector embeddings for the `review_text` using Voyage AI model, and saves the result.

In [None]:
# Install the Voyage AI Python library if not already installed
%pip install voyageai

In [None]:
import json
import voyageai
import os

# Configuration
INPUT_FILE = 'mobile_reviews.json'
OUTPUT_FILE = 'mobile_reviews_with_embeddings.json'
EMBEDDING_MODEL = 'voyage-4-large'  # Voyage AI model

In [None]:
# Load the dataset
try:
    with open(INPUT_FILE, 'r') as f:
        reviews = json.load(f)
    print(f"Loaded {len(reviews)} reviews from {INPUT_FILE}")
except FileNotFoundError:
    print(f"Error: {INPUT_FILE} not found. Make sure you are in the correct directory.")

In [None]:
# Generate embeddings
print(f"Generating embeddings using model: {EMBEDDING_MODEL}...")

# Initialize Voyage AI Client
# Ensure VOYAGE_API_KEY environment variable is set
vo = voyageai.Client(api_key="")

for i, review in enumerate(reviews):
    text_to_embed = review.get('review_text', '')
    
    if text_to_embed:
        try:
            response = vo.embed([text_to_embed], model=EMBEDDING_MODEL, input_type="document")
            review['review_embedding'] = response.embeddings[0]
            print(f"[ {i+1}/{len(reviews)} ] Embedded review {review.get('review_id', 'unknown')}")
        except Exception as e:
            print(f"Error embedding review {i}: {e}")
    else:
        print(f"Skipping review {i} (no text found)")

print("Embedding generation complete.")

In [None]:
# Preview one record to verify structure
if reviews:
    print(json.dumps(reviews[0], indent=2))

In [None]:
# Save the augmented dataset
with open(OUTPUT_FILE, 'w') as f:
    json.dump(reviews, f, indent=2)
    
print(f"Saved data with embeddings to {OUTPUT_FILE}")