In [None]:
import pandas as pd
import json
import csv
from datetime import datetime

# Load MongoDB JSON data and read each line as separate JSON objects
def load_mongo_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = []
        for line in file:
            data.append(json.loads(line))
        return data

# Write data to CSV
def write_to_csv(file_name, data, fieldnames):
    with open(file_name, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()  
        writer.writerows(data)  

# Convert MongoDB data to a structured format
def segregate_data(users_file, brands_file, receipts_file):
    users_data = load_mongo_json(users_file)
    brands_data = load_mongo_json(brands_file)
    receipts_data = load_mongo_json(receipts_file)

    # Extract Users data (Flatten MongoDB document)
    usersnew = []
    for user in users_data:
        usersnew.append({
            # MongoDB ObjectID ($oid) converted to string
           "user_id": user["_id"]["$oid"],  
            "active": user.get("active", None),
            "created_date": convert_mongo_date(user.get("createdDate", None)),
            "last_login": convert_mongo_date(user.get("lastLogin", None)),
            "role": user.get("role", None),
            "signup_source": user.get("signUpSource", None),
            "state": user.get("state", None)
        })
    
    # Define the fieldnames for Users CSV
    users_fieldnames = ["user_id", "active","created_date", "last_login", "role", "signup_source", "state"]
    write_to_csv("users_data.csv", usersnew, users_fieldnames)

    # Extract Brands and CPGs data
    brandsnew = []
    cpgs = {}
    for brand in brands_data:
        brandsnew.append({
            "brand_id": brand["_id"]["$oid"],
            "barcode": brand.get("barcode", None),
            "brand_code": brand.get("brandCode", None),
            "category": brand.get("category", None),
            "category_code": brand.get("categoryCode", None),
            "name": brand.get("name", None),
            "top_brand": brand.get("topBrand", None),
            "cpg_id": brand.get("cpg", {}).get("$id", {}).get("$oid", None) 
        })
        
        cpgs[brand.get("cpg", {}).get("$id", {}).get("$oid", None)] = {             
            "cpg_id": brand.get("cpg", {}).get("$id", {}).get("$oid", None), 
            "name": brand.get("cpg", {}).get("$ref", None)
        }

    # Define the fieldnames for Brands CSV
    brands_fieldnames = ["brand_id", "barcode", "brand_code", "category", "category_code", "name", "top_brand", "cpg_id"]
    write_to_csv("brands_data.csv", brandsnew, brands_fieldnames)
    
    # Write CPGs data to CSV
    cpgs_list = list(cpgs.values())
    cpgs_fieldnames = ["cpg_id", "name"]
    write_to_csv("cpgs_data.csv", cpgs_list, cpgs_fieldnames)
    
    # Extract Receipts and ReceiptItems data
    receiptsnew  = []
    receipt_items = []
    for receipt in receipts_data:
        receiptsnew.append({
            # MongoDB ObjectID ($oid) converted to string
            "r_id": receipt["_id"]["$oid"], 
            "bonus_points_earned": receipt.get("bonusPointsEarned", None),
            "bonus_points_earned_reason": receipt.get("bonusPointsEarnedReason", None),
            "create_date": convert_mongo_date(receipt.get("createDate", None)),
            "date_scanned": convert_mongo_date(receipt.get("dateScanned", None)),
            "finished_date": convert_mongo_date(receipt.get("finishedDate", None)),
            "modify_date": convert_mongo_date(receipt.get("modifyDate", None)),
            "points_awarded_date": convert_mongo_date(receipt.get("pointsAwardedDate", None)),
            "points_earned": receipt.get("pointsEarned", None),
            "purchase_date": convert_mongo_date(receipt.get("purchaseDate", None)),
            "purchased_item_count": receipt.get("purchasedItemCount", None),
            "rewards_receipt_status": receipt.get("rewardsReceiptStatus", None),
            "total_spent": receipt.get("totalSpent", None),
            "user_id": receipt.get("userId", None)
        })
       
        rewardsReceiptItemList = receipt.get("rewardsReceiptItemList", []) 
        for item in rewardsReceiptItemList:
            receipt_items.append({
                "receipt_id": receipt["_id"]["$oid"],  
                "barcode": item.get("barcode", None),
                "description": item.get("description", None),
                "final_price": item.get("finalPrice", None),
                "item_price": item.get("itemPrice", None),
                "quantity_purchased": item.get("quantityPurchased", None),
                "needs_fetch_review": item.get("needsFetchReview", None),
                "needs_fetch_review_reason": item.get("needsFetchReviewReason", None),
                "partner_item_id": item.get("partnerItemId", None),
                "prevent_target_gap_points": item.get("preventTargetGapPoints", None),
                "user_flagged_barcode": item.get("userFlaggedBarcode", None),
                "user_flagged_description": item.get("userFlaggedDescription", None),
                "user_flagged_new_item": item.get("userFlaggedNewItem", None),
                "user_flagged_price": item.get("userFlaggedPrice", None),
                "user_flagged_quantity": item.get("userFlaggedQuantity", None),
                "points_not_awarded_reason": item.get("pointsNotAwardedReason", None),
                "points_payer_id": item.get("pointsPayerId", None),
                "rewards_group": item.get("rewardsGroup", None),
                "rewards_product_partner_id": item.get("rewardsProductPartnerId", None)
            })

    # Define the fieldnames for Receipts CSV
    receipts_fieldnames = ["r_id", "bonus_points_earned", "bonus_points_earned_reason", "create_date", "date_scanned", 
                           "finished_date", "modify_date", "points_awarded_date", "points_earned", "purchase_date", 
                           "purchased_item_count", "rewards_receipt_status", "total_spent", "user_id"]
    write_to_csv("receipts_data.csv", receiptsnew, receipts_fieldnames)
    
    # Define the fieldnames for ReceiptItems CSV
    receipt_items_fieldnames = ["receipt_id", "barcode", "description", "final_price", "item_price", 
                                "quantity_purchased", "needs_fetch_review", "needs_fetch_review_reason","partner_item_id", "prevent_target_gap_points", 
                                "user_flagged_barcode", "user_flagged_description", "user_flagged_new_item", 
                                "user_flagged_price", "user_flagged_quantity", "points_not_awarded_reason", "points_payer_id", 
                                "rewards_group", "rewards_product_partner_id"]
    write_to_csv("receipt_items_data.csv", receipt_items, receipt_items_fieldnames)

    print("json to csv completed and Files are saved as per below:")
    print("users_data.csv")
    print("brands_data.csv")
    print("cpgs_data.csv")
    print("receipts_data.csv")
    print("receipt_items_data.csv")

# Function to convert MongoDB date format (timestamps) to SQL-friendly format
def convert_mongo_date(mongo_date):
    if mongo_date:
        if isinstance(mongo_date, dict) and "$date" in mongo_date:
            # Extract the timestamp from the $date field and convert it to datetime
            return datetime.utcfromtimestamp(mongo_date["$date"] / 1000).strftime('%Y-%m-%d %H:%M:%S')
    return None

# Paths to original MongoDB JSON files
users_file = '../data_folder/users.json'
brands_file = '../data_folder/brands.json'
receipts_file = '../data_folder/receipts.json'

# Segregate the data
segregate_data(users_file, brands_file, receipts_file)

: 