# Step 1: Setup prerequisites

In [None]:
import os
from pymongo import MongoClient
from utils import track_progress

In [None]:
# If you are using your own MongoDB Atlas cluster, use the connection string for your cluster here
MONGODB_URI = os.environ.get("mongodb+srv://visilvestre_db_user:<db_password>@cluster0.ft0kvvw.mongodb.net/retryWrites=true&w=majority&appName=Cluster0")

#Initialize a MongoDB Python Client
mongodb_client = MongoClient(MONGODB_URI)

# Check the connection to the server
mongodb_client.admin.command("ping")

In [None]:
# Track progress of key steps-- DO NOT CHANGE
track_progress("cluster_creation", "ai_agents_lab")

### **Pick an LLM provider**

In [None]:
SERVERLESS_URL = os.environ.get("SERVERLESS_URL")

# Can be one of "aws", "google", "microsoft"
LLM_PROVIDER = "microsoft"

# Step 2: Import data into MongoDB

In [None]:
import json

In [None]:
""" DO NOT CHANGE THE VALUES ASSIGNED TO THE VARIABLES BELOW """
# Database name
DB_NAME = "mongodb_genai_devday_agents"

# Name of the collection with full documents- used for summarization
FULL_COLLECTION_NAME = "mongodb_docs"

# Name of the collection for vector search- used for Q&A
VS_COLLECTION_NAME = "mongodb_docs_embeddings"

# Name of the vector search index
VS_INDEX_NAME = "vector_index"

In [None]:
# Connect to the "VS_COLLECTION_NAME" collection
vs_collection = mongodb_client[DB_NAME][VS_COLLECTION_NAME]

# Connect to the "FULL_COLLECTION_NAME" collection
full_collection = mongodb_client[DB_NAME][FULL_COLLECTION_NAME]

In [None]:
# Insert a dataset of MongoDB docs with embeddings into the "VS_COLLECTION_NAME" collection
with open(f"../data/{VS_COLLECTION_NAME}.json", "r") as data_file:
    json_data = data_file.read()
    
data = json.loads(json_data)

print(f"Deleting existing documents from the '{VS_COLLECTION_NAME}' collection...")
vs_collection.delete_many({})
vs_collection.insert_many(data)
print(f"{vs_collection.count_documents({})} documents inserted into the '{VS_COLLECTION_NAME}' collection.")

In [None]:
# Insert a dataset of MongoDB docs with embeddings into the "FULL_COLLECTION_NAME" collection
with open(f"../data/{FULL_COLLECTION_NAME}.json", "r") as data_file:
    json_data = data_file.read()
    
data = json.loads(json_data)

print(f"Deleting existing documents from the '{FULL_COLLECTION_NAME}' collection...")
full_collection.delete_many({})
full_collection.insert_many(data)
print(f"{full_collection.count_documents({})} documents inserted into the '{FULL_COLLECTION_NAME}' collection.")

# Step 3: Create a vector search index

In [None]:
from utils import create_index, check_index_ready

In [None]:
""" Create vector index definition specifying:
path: Path to the embeddings
numDimensions: Number of embedding dimensions- depends on the embedding model used
similarity: Similarity metric. One of cosine, euclidean, dotProduct"""

model = {
    "name": VS_INDEX_NAME,
    "type": "vectorSearch",
    "definition": {
        "fields": [
            {
                "type": "vector",
                "path": "embedding",
                "numDimensions": 384,
                "similarity": "cosine",
            }
        ]
    },
}