In [22]:
import json
import pandas as pd
from pymongo import MongoClient
from datetime import datetime, timezone

### MongoDB connection

In [23]:
db_name = 'fleximart'
collection = 'products'
mongo_url = 'mongodb://localhost:27017/'

client = MongoClient(mongo_url)
db = client[db_name]
products_col = db[collection]

### Operation 1: Load Data (1 mark)
Import the provided JSON file into collection 'products'

In [24]:
with open(file='products_catalog.json', mode='r',encoding='utf-8') as f:
    data = json.load(f)

In [25]:
products_col.delete_many({})  # optional: clean reload
products_col.insert_many(data)
print(f"{len(data)} products inserted successfully.")

12 products inserted successfully.


### Operation 2: Basic Query (2 marks)
- Find all products in "Electronics" category with price less than 50000
- Return only: name, price, stock

In [26]:
q2 = products_col.find(
    {
        "category": "Electronics",
        "price": {"$lt": 50000}
    },
    {
        "_id": 0,
        "name": 1,
        "price": 1,
        "stock": 1
    }
)

print("Electronics products under 50000:")
# for doc in q2:
#     print(doc)
pd.DataFrame(list(q2))

Electronics products under 50000:


Unnamed: 0,name,price,stock
0,Sony WH-1000XM5 Headphones,29990.0,200
1,Dell 27-inch 4K Monitor,32999.0,60
2,OnePlus Nord CE 3,26999.0,180


### Operation 3: Review Analysis (2 marks)
- Find all products that have average rating >= 4.0
- Use aggregation to calculate average from reviews array

In [27]:
pipeline = [
    {"$unwind": "$reviews"},
    {
        "$group": {
            "_id": "$name",
            "avg_rating": {"$avg": "$reviews.rating"}
        }
    },
    {
        "$match": {
            "avg_rating": {"$gte": 4.0}
        }
    }
]

q3 = products_col.aggregate(pipeline)

print("Products with average rating >= 4.0:")
# for doc in q3:
#     print(doc)
pd.DataFrame(list(q3))

Products with average rating >= 4.0:


Unnamed: 0,_id,avg_rating
0,Nike Air Max 270 Sneakers,4.5
1,Levi's 511 Slim Fit Jeans,4.666667
2,Apple MacBook Pro 14-inch,5.0
3,Samsung Galaxy S21 Ultra,4.666667
4,Sony WH-1000XM5 Headphones,4.666667
5,OnePlus Nord CE 3,4.0
6,Samsung 55-inch QLED TV,4.5
7,Dell 27-inch 4K Monitor,4.0
8,Adidas Originals T-Shirt,4.333333
9,Puma RS-X Sneakers,4.5


### Operation 4: Update Operation (2 marks)
- Add a new review to product "ELEC001"
- Review: {user: "U999", rating: 4, comment: "Good value", date: ISODate()}

In [28]:
products_col.update_one(
    {"product_id": "ELEC001"},
    {
        "$push": {
            "reviews": {
                "user_id": "U999",
                "username": "ValueSeeker",
                "rating": 4,
                "comment": "Good value",
                "date": datetime.now()
            }
        }
    }
)
print("New review added to product ELEC001.")

New review added to product ELEC001.


In [29]:
updated = products_col.find_one(
    {"product_id": "ELEC001"},
    {
        "_id": 0,
        "product_id": 1,
        "name": 1,
        "reviews": 1
    }
)
updated

{'product_id': 'ELEC001',
 'name': 'Samsung Galaxy S21 Ultra',
 'reviews': [{'user_id': 'U001',
   'username': 'TechGuru',
   'rating': 5,
   'comment': 'Excellent phone with amazing camera quality!',
   'date': '2024-01-15'},
  {'user_id': 'U012',
   'username': 'MobileUser',
   'rating': 4,
   'comment': 'Great performance but a bit pricey.',
   'date': '2024-02-10'},
  {'user_id': 'U023',
   'username': 'PhotoEnthusiast',
   'rating': 5,
   'comment': "Best camera phone I've ever used!",
   'date': '2024-03-05'},
  {'user_id': 'U999',
   'username': 'ValueSeeker',
   'rating': 4,
   'comment': 'Good value',
   'date': datetime.datetime(2026, 1, 8, 8, 23, 59, 929000)}]}

### Operation 5: Complex Aggregation (3 marks)
- Calculate average price by category
- Return: category, avg_price, product_count
- Sort by avg_price descending

In [33]:
pipeline = [
    {
        "$group": {
            "_id": "$category",
            "avg_price": {"$avg": "$price"},
            "product_count": {"$sum": 1}
        }
    },
    {
        "$project": {
            "_id": 0,
            "category": "$_id",
            "avg_price": {"$round": ["$avg_price", 2]},
            "product_count": 1
        }
    },
    {"$sort": {"avg_price": -1}}
]

q5 = products_col.aggregate(pipeline)

print("Average price by category:")
# for doc in q5:
#     print(doc)
pd.DataFrame(list(q5))

Average price by category:


Unnamed: 0,product_count,category,avg_price
0,6,Electronics,70830.83
1,6,Fashion,5215.0
