In [1]:
import os
import uuid
import json
import random
import time
from faker import Faker
from datetime import datetime
from kafka import KafkaProducer

fake = Faker()



In [2]:
# ----- Output folder for NDJSON -----
OUTPUT_DIR = "./stream_data/json_orders"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ----- Kafka setup -----
producer = KafkaProducer(
    bootstrap_servers="ed-kafka:29092",
    value_serializer=lambda v: json.dumps(v).encode("utf-8")
)
KAFKA_TOPIC = "custords"



In [3]:
# ----- Product catalog -----
PRODUCTS = [
    {"id": "P1001", "name": "Wireless Mouse", "category": "Electronics", "price": 799},
    {"id": "P1002", "name": "Mechanical Keyboard", "category": "Electronics", "price": 1999},
    {"id": "P1003", "name": "Running Shoes", "category": "Footwear", "price": 2499},
    {"id": "P1004", "name": "Coffee Mug", "category": "Kitchen", "price": 299},
    {"id": "P1005", "name": "Office Chair", "category": "Furniture", "price": 5999},
]



In [4]:
# ----- Generate a realistic order event -----
def generate_order():
    product = random.choice(PRODUCTS)
    quantity = random.randint(1, 3)

    return {
        "event_id": str(uuid.uuid4()),
        "event_type": "ORDER_PLACED",
        "event_timestamp": datetime.utcnow().isoformat(),

        "order": {
            "order_id": f"ORD-{uuid.uuid4().hex[:8]}",
            "customer_id": f"CUST-{random.randint(1000, 9999)}",
            "currency": "INR",
            "product_id": product["id"],
            "product_name": product["name"],
            "category": product["category"],
            "quantity": quantity,
            "unit_price": product["price"],
            "total_price": quantity * product["price"],
        },

        "payment": {
            "payment_method": random.choice(["UPI", "Card", "NetBanking"]),
            "status": "PAID",
            "transaction_id": f"TXN-{uuid.uuid4().hex[:10]}",
        },

        "customer_context": {
            "device": random.choice(["Android", "iOS", "Web"]),
            "location": fake.city(),
            "ip_address": fake.ipv4_public(),
            "session_id": uuid.uuid4().hex
        }
    }



In [5]:
# Write JSON file & send each JSON data to Kafka
def write_json_and_stream(batch_size=5):
    timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S_%f")
    json_filename = f"orders_{timestamp}.json"
    json_path = os.path.join(OUTPUT_DIR, json_filename)

    rows = [generate_order() for _ in range(batch_size)]

    # Write single line JSON file
    with open(json_path, "w") as f:
        for row in rows:
            pass
            #f.write(json.dumps(row) + "\n")

    print(f"[FILE] Generated NDJSON: {json_path}")

    # Stream to Kafka
    for row in rows:
        producer.send(KAFKA_TOPIC, row)
        print("[KAFKA] Sent:", row["order"]["order_id"])


In [8]:

# Main streaming loop
def start_stream(file_interval=3, batch_size=10):
    print(f"Streaming single-line JSON files to Kafka topic {KAFKA_TOPIC}")
    while True:
        write_json_and_stream(batch_size)
        time.sleep(file_interval)


if __name__ == "__main__":
    start_stream(file_interval=3, batch_size=10)  # Create file every 3 seconds

Streaming single-line JSON files to Kafka topic custords
[FILE] Generated NDJSON: ./stream_data/json_orders/orders_20251215_063037_042439.json
[KAFKA] Sent: ORD-2f99cd79
[KAFKA] Sent: ORD-4262fdd5
[KAFKA] Sent: ORD-b8c046f0
[KAFKA] Sent: ORD-884e1ad5
[KAFKA] Sent: ORD-b0ce7f69
[KAFKA] Sent: ORD-3b50a94c
[KAFKA] Sent: ORD-340c6ffa
[KAFKA] Sent: ORD-d3bfe6a7
[KAFKA] Sent: ORD-9aea3826
[KAFKA] Sent: ORD-718df504
[FILE] Generated NDJSON: ./stream_data/json_orders/orders_20251215_062937_864919.json
[KAFKA] Sent: ORD-eed73eba
[KAFKA] Sent: ORD-cc78a024
[KAFKA] Sent: ORD-5e73ce0c
[KAFKA] Sent: ORD-9fe2cc82
[KAFKA] Sent: ORD-667191ad
[KAFKA] Sent: ORD-768fa218
[KAFKA] Sent: ORD-2295cc46
[KAFKA] Sent: ORD-c4830bd7
[KAFKA] Sent: ORD-461ebd25
[KAFKA] Sent: ORD-d38485c4
[FILE] Generated NDJSON: ./stream_data/json_orders/orders_20251215_062940_872954.json
[KAFKA] Sent: ORD-ecb388d8
[KAFKA] Sent: ORD-09df632f
[KAFKA] Sent: ORD-3102cf39
[KAFKA] Sent: ORD-abe3307b
[KAFKA] Sent: ORD-d422d245
[KAFKA] Se

KeyboardInterrupt: 