Imports and MongoDB Connection

In [1]:
from pymongo import MongoClient
from datetime import datetime
import pandas as pd
import json

# Connect to MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client["eduhub_db"]

print("Connected to MongoDB:", db)


Connected to MongoDB: Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'eduhub_db')


Create Collections with Validation

In [2]:
# Example schema validation for users collection
user_validator = {
    "$jsonSchema": {
        "bsonType": "object",
        "required": ["userId", "email", "firstName", "lastName", "role"],
        "properties": {
            "userId": {"bsonType": "string"},
            "email": {"bsonType": "string"},
            "firstName": {"bsonType": "string"},
            "lastName": {"bsonType": "string"},
            "role": {"enum": ["student", "instructor"]},
            "dateJoined": {"bsonType": "date"},
            "isActive": {"bsonType": "bool"}
        }
    }
}

db.create_collection("users", validator=user_validator)
db.create_collection("courses")
db.create_collection("enrollments")
db.create_collection("lessons")
db.create_collection("assignments")
db.create_collection("submissions")

print("Collections created with schema validation!")


ServerSelectionTimeoutError: localhost:27017: [WinError 10061] No connection could be made because the target machine actively refused it (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms), Timeout: 30s, Topology Description: <TopologyDescription id: 68e03f3ba8c0be8b348c225d, topology_type: Unknown, servers: [<ServerDescription ('localhost', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('localhost:27017: [WinError 10061] No connection could be made because the target machine actively refused it (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms)')>]>

Load and Insert Sample Data

In [None]:
with open("../data/sample_data.json") as f:
    sample_data = json.load(f)

db.users.insert_many(sample_data["users"])
db.courses.insert_many(sample_data["courses"])
db.enrollments.insert_many(sample_data["enrollments"])
db.lessons.insert_many(sample_data["lessons"])
db.assignments.insert_many(sample_data["assignments"])
db.submissions.insert_many(sample_data["submissions"])

print("Sample data inserted!")


CRUD: Create Operations

In [None]:
# Add a new student
new_student = {
    "userId": "stu_21",
    "email": "student21@eduhub.com",
    "firstName": "John",
    "lastName": "Doe",
    "role": "student",
    "dateJoined": datetime.now(),
    "isActive": True
}
db.users.insert_one(new_student)

# Create a new course
new_course = {
    "courseId": "c_09",
    "title": "Data Science Basics",
    "instructorId": "inst_02",
    "category": "Data Science",
    "level": "beginner",
    "duration": 12,
    "price": 99,
    "tags": ["python", "ml"],
    "createdAt": datetime.now(),
    "isPublished": False
}
db.courses.insert_one(new_course)


CRUD: Read Operations

In [None]:
# Find all active students
active_students = list(db.users.find({"role": "student", "isActive": True}))
pd.DataFrame(active_students)

# Retrieve course details with instructor info
course_details = list(db.courses.aggregate([
    {"$lookup": {
        "from": "users",
        "localField": "instructorId",
        "foreignField": "userId",
        "as": "instructor_info"
    }}
]))
pd.DataFrame(course_details)


CRUD: Update Operations

In [None]:
# Update user profile
db.users.update_one(
    {"userId": "stu_21"},
    {"$set": {"profile": {"bio": "Enthusiastic learner", "skills": ["Python", "SQL"]}}}
)

# Publish a course
db.courses.update_one({"courseId": "c_09"}, {"$set": {"isPublished": True}})


CRUD: Delete Operations

In [None]:
# Soft delete user
db.users.update_one({"userId": "stu_21"}, {"$set": {"isActive": False}})

# Delete an enrollment
db.enrollments.delete_one({"studentId": "stu_05", "courseId": "c_01"})


Aggregation Example

In [None]:
# Total enrollments per course
enrollment_stats = list(db.enrollments.aggregate([
    {"$group": {"_id": "$courseId", "total_students": {"$sum": 1}}}
]))
pd.DataFrame(enrollment_stats)


Indexing

In [None]:
db.users.create_index("email", unique=True)
db.courses.create_index([("title", "text"), ("category", 1)])
print("Indexes created!")
