In [2]:
import chromadb
import json
import csv
import uuid
import os

In [3]:
client = chromadb.HttpClient(host="localhost", port=8000)

In [4]:
DATA_FOLDER = "./data"

collections = [
    "support_responses",
    "support_tickets",
    "documents",
    "courses",
    "exam",
    "programs",
    "successors",
    "professor_reviews",
]

print("📦 Collections to import:", collections)

📦 Collections to import: ['support_responses', 'support_tickets', 'documents', 'courses', 'exam', 'programs', 'successors', 'professor_reviews']


In [5]:
def import_csv_to_chroma(collection_name, filename):
    filepath = os.path.join(DATA_FOLDER, filename)
    if not os.path.exists(filepath):
        print(f"⚠️ Skipping {collection_name}, file {filename} not found.")
        return

    try:
        client.delete_collection(collection_name)
        print(f"🗑️ Old '{collection_name}' collection deleted.")
    except Exception:
        print(f"ℹ️ No existing '{collection_name}' collection found, creating new one...")

    collection = client.get_or_create_collection(collection_name)

    ids, documents, metadatas = [], [], []
    with open(filepath, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                ids.append(str(uuid.uuid4()))
                documents.append(row["document"].strip())
                metadatas.append(json.loads(row["metadata"]))
            except Exception as e:
                print(f"❌ Error reading row: {e}")

    if documents:
        collection.add(ids=ids, documents=documents, metadatas=metadatas)
        print(f"✅ Imported {len(documents)} records into '{collection_name}'.")
    else:
        print(f"⚠️ No valid rows found in {filename}.")

    return collection

In [6]:
for name in collections:
    filename = f"{name}.csv"
    import_csv_to_chroma(name, filename)

print("\n🎉 All CSV files imported into Chroma collections!")

🗑️ Old 'support_responses' collection deleted.
✅ Imported 10 records into 'support_responses'.
🗑️ Old 'support_tickets' collection deleted.
✅ Imported 10 records into 'support_tickets'.
🗑️ Old 'documents' collection deleted.
✅ Imported 10 records into 'documents'.
🗑️ Old 'courses' collection deleted.
✅ Imported 10 records into 'courses'.
🗑️ Old 'exam' collection deleted.
✅ Imported 10 records into 'exam'.
🗑️ Old 'programs' collection deleted.
✅ Imported 10 records into 'programs'.
🗑️ Old 'successors' collection deleted.
✅ Imported 10 records into 'successors'.
🗑️ Old 'professor_reviews' collection deleted.
✅ Imported 10 records into 'professor_reviews'.

🎉 All CSV files imported into Chroma collections!
