In [13]:
import chromadb
import json
import csv
import uuid
import os

In [None]:
dbvs1 = chromadb.HttpClient(host="localhost", port=8000)
dbvs2 = chromadb.HttpClient(host="localhost", port=8001)


In [None]:
INTERNAL_DATA = "./internal"
EXTERNAL_DATA = "./external"
INTERNAL_DATA_COPY = "./internal_copy"
EXTERNAL_DATA_COPY = "./external_copy"

internal = [
    "courses",
    "documents",
    "exam",
    "programs",
    "students"
]

external = [
    "professor_reviews",
    "students",
    "support_tickets",
    "support_responses",
]

In [None]:
def import_csv_to_chroma(client, base_folder, collection_name, filename, label):
    """Import CSV data into a specific ChromaDB collection."""
    filepath = os.path.join(base_folder, filename)
    if not os.path.exists(filepath):
        print(f"⚠️ Skipping {collection_name}, file {filepath} not found.")
        return

    try:
        client.delete_collection(collection_name)
        print(f"🗑️ Old '{collection_name}' collection deleted ({label}).")
    except Exception:
        print(f"ℹ️ No existing '{collection_name}' collection found ({label}), creating new one...")

    # Create or get collection
    collection = client.get_or_create_collection(collection_name)

    # Read and import data
    ids, documents, metadatas = [], [], []
    with open(filepath, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                ids.append(str(uuid.uuid4()))
                documents.append(row["document"].strip())
                metadatas.append(json.loads(row["metadata"]))
            except Exception as e:
                print(f"❌ Error reading row in {filename}: {e}")

    if documents:
        collection.add(ids=ids, documents=documents, metadatas=metadatas)
        print(f"✅ Imported {len(documents)} records into '{collection_name}' ({label}).")
    else:
        print(f"⚠️ No valid rows found in {filename}.")


In [24]:
print("📦 Importing FRAGMENT 1 (dbvs1:8000)...")

for name in internal:
    filename = f"{name}.csv"
    import_csv_to_chroma(dbvs1, INTERNAL_DATA, name, filename, "Fragment 1")

for name in external:
    filename = f"{name}.csv"
    import_csv_to_chroma(dbvs1, EXTERNAL_DATA, name, filename, "Fragment 1")

📦 Importing FRAGMENT 1 (dbvs1:8000)...
🗑️ Old 'courses' collection deleted (Fragment 1).
✅ Imported 10 records into 'courses' (Fragment 1).
🗑️ Old 'documents' collection deleted (Fragment 1).
✅ Imported 10 records into 'documents' (Fragment 1).
🗑️ Old 'exam' collection deleted (Fragment 1).
✅ Imported 10 records into 'exam' (Fragment 1).
🗑️ Old 'programs' collection deleted (Fragment 1).
✅ Imported 10 records into 'programs' (Fragment 1).
🗑️ Old 'students' collection deleted (Fragment 1).
✅ Imported 1 records into 'students' (Fragment 1).
🗑️ Old 'professor_reviews' collection deleted (Fragment 1).
✅ Imported 10 records into 'professor_reviews' (Fragment 1).
🗑️ Old 'students' collection deleted (Fragment 1).
✅ Imported 1 records into 'students' (Fragment 1).
🗑️ Old 'support_tickets' collection deleted (Fragment 1).
✅ Imported 10 records into 'support_tickets' (Fragment 1).
🗑️ Old 'support_responses' collection deleted (Fragment 1).
✅ Imported 10 records into 'support_responses' (Fragmen

In [22]:
print("\n📦 Importing FRAGMENT 2 (dbvs2:8001)...")

for name in internal:
    filename = f"{name}.csv"
    import_csv_to_chroma(dbvs2, INTERNAL_DATA_COPY, name, filename, "Fragment 2")

for name in external:
    filename = f"{name}.csv"
    import_csv_to_chroma(dbvs2, EXTERNAL_DATA_COPY, name, filename, "Fragment 2")


📦 Importing FRAGMENT 2 (dbvs2:8001)...
ℹ️ No existing 'courses' collection found (Fragment 2), creating new one...
✅ Imported 10 records into 'courses' (Fragment 2).
ℹ️ No existing 'documents' collection found (Fragment 2), creating new one...
✅ Imported 10 records into 'documents' (Fragment 2).
ℹ️ No existing 'exam' collection found (Fragment 2), creating new one...
✅ Imported 10 records into 'exam' (Fragment 2).
ℹ️ No existing 'programs' collection found (Fragment 2), creating new one...
✅ Imported 10 records into 'programs' (Fragment 2).
ℹ️ No existing 'students' collection found (Fragment 2), creating new one...
✅ Imported 1 records into 'students' (Fragment 2).
ℹ️ No existing 'professor_reviews' collection found (Fragment 2), creating new one...
✅ Imported 10 records into 'professor_reviews' (Fragment 2).
🗑️ Old 'students' collection deleted (Fragment 2).
✅ Imported 1 records into 'students' (Fragment 2).
ℹ️ No existing 'support_tickets' collection found (Fragment 2), creating ne