In [8]:
import os
import uuid
import json
import random
import time
from faker import Faker
from datetime import datetime
from kafka import KafkaProducer

fake = Faker()



In [9]:
# ----- Output folder for NDJSON -----
OUTPUT_DIR = "./stream_data/json_orders"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ----- Kafka setup -----
producer = KafkaProducer(
    bootstrap_servers="ed-kafka:29092",
    value_serializer=lambda v: json.dumps(v).encode("utf-8")
)
KAFKA_TOPIC = "custords"

# ----- Product catalog -----
PRODUCTS = [
    {"id": "P1001", "name": "Wireless Mouse", "category": "Electronics", "price": 799},
    {"id": "P1002", "name": "Mechanical Keyboard", "category": "Electronics", "price": 1999},
    {"id": "P1003", "name": "Running Shoes", "category": "Footwear", "price": 2499},
    {"id": "P1004", "name": "Coffee Mug", "category": "Kitchen", "price": 299},
    {"id": "P1005", "name": "Office Chair", "category": "Furniture", "price": 5999},
]

# ----- Generate a realistic order event -----
def generate_order():
    product = random.choice(PRODUCTS)
    quantity = random.randint(1, 3)

    return {
        "event_id": str(uuid.uuid4()),
        "event_type": "ORDER_PLACED",
        "event_timestamp": datetime.utcnow().isoformat(),

        "order": {
            "order_id": f"ORD-{uuid.uuid4().hex[:8]}",
            "customer_id": f"CUST-{random.randint(1000, 9999)}",
            "currency": "INR",
            "product_id": product["id"],
            "product_name": product["name"],
            "category": product["category"],
            "quantity": quantity,
            "unit_price": product["price"],
            "total_price": quantity * product["price"],
        },

        "payment": {
            "payment_method": random.choice(["UPI", "Card", "NetBanking"]),
            "status": "PAID",
            "transaction_id": f"TXN-{uuid.uuid4().hex[:10]}",
        },

        "customer_context": {
            "device": random.choice(["Android", "iOS", "Web"]),
            "location": fake.city(),
            "ip_address": fake.ipv4_public(),
            "session_id": uuid.uuid4().hex
        }
    }

# ----- Write NDJSON file & send each JSON line to Kafka -----
def write_json_and_stream(batch_size=5):
    timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S_%f")
    json_filename = f"orders_{timestamp}.json"
    json_path = os.path.join(OUTPUT_DIR, json_filename)

    rows = [generate_order() for _ in range(batch_size)]

    # ---- Write NDJSON file (each line = 1 JSON object) ----
    with open(json_path, "w") as f:
        for row in rows:
            pass
            #f.write(json.dumps(row) + "\n")

    print(f"[FILE] Generated NDJSON: {json_path}")

    # ---- Stream to Kafka (each row is a JSON message) ----
    for row in rows:
        producer.send(KAFKA_TOPIC, row)
        print("[KAFKA] Sent:", row["order"]["order_id"])


In [10]:

# ----- Main streaming loop -----
def start_stream(file_interval=3, batch_size=10):
    print("Streaming single-line JSON files + Kafka messages...")
    while True:
        write_json_and_stream(batch_size)
        time.sleep(file_interval)


if __name__ == "__main__":
    start_stream(file_interval=3, batch_size=10)  # Create file every 3 seconds

Streaming single-line JSON files + Kafka messages...
[FILE] Generated NDJSON: ./stream_data/json_orders/orders_20251214_044259_670849.json
[KAFKA] Sent: ORD-e815f7d7
[KAFKA] Sent: ORD-385400ce
[KAFKA] Sent: ORD-a537937e
[KAFKA] Sent: ORD-16739179
[KAFKA] Sent: ORD-f21e9842
[KAFKA] Sent: ORD-9d252ca3
[KAFKA] Sent: ORD-1d4c37fd
[KAFKA] Sent: ORD-4665d706
[KAFKA] Sent: ORD-3feda164
[KAFKA] Sent: ORD-7fbc0e3d
[FILE] Generated NDJSON: ./stream_data/json_orders/orders_20251214_044302_697776.json
[KAFKA] Sent: ORD-c89a7a84
[KAFKA] Sent: ORD-4c43fd51
[KAFKA] Sent: ORD-ea60adbd
[KAFKA] Sent: ORD-d534b052
[KAFKA] Sent: ORD-ee2afe1a
[KAFKA] Sent: ORD-d56f74eb
[KAFKA] Sent: ORD-ddd6271d
[KAFKA] Sent: ORD-7473dc2b
[KAFKA] Sent: ORD-cd4dd0d6
[KAFKA] Sent: ORD-1dbea346
[FILE] Generated NDJSON: ./stream_data/json_orders/orders_20251214_044305_703966.json
[KAFKA] Sent: ORD-9c0a25cb
[KAFKA] Sent: ORD-dadce72d
[KAFKA] Sent: ORD-156de17b
[KAFKA] Sent: ORD-5539dbad
[KAFKA] Sent: ORD-6e98ba65
[KAFKA] Sent: 

KeyboardInterrupt: 