In [0]:
%pip install faker

In [0]:
%restart_python

In [0]:
dbutils.fs.mkdirs("dbfs:/tmp/streaming_landing_store/")

In [0]:
from dataclasses import dataclass, asdict
from faker import Faker
import random
import json
import time
import time
from datetime import datetime

In [0]:
fake = Faker()

In [0]:
@dataclass
class FakeRecord:
    transaction_id: str

    # Customer Dimension
    customer_id: str
    customer_name: str
    customer_email: str
    customer_phone: str
    customer_birthdate: str
    customer_gender: str

    # Time Dimension
    transaction_timestamp: str
    transaction_date: str
    transaction_hour: str
    transaction_day_of_week: str
    transaction_month: str
    transaction_year: str

    # Location Dimension
    store_id: str
    store_name: str
    store_city: str
    store_state: str
    store_country: str
    store_postcode: str

    # Product Dimension
    product_id: str
    product_name: str
    product_category: str
    product_price: str

    # Fact Table Measures
    quantity: str
    total_amount: str
    payment_method: str

    @staticmethod
    def generate():
        timestamp = fake.date_time_this_year()
        quantity = random.randint(1, 5)
        price = round(random.uniform(10, 500), 2)
        return FakeRecord(
            transaction_id=fake.uuid4(),

            customer_id=fake.uuid4(),
            customer_name=fake.name(),
            customer_email=fake.email(),
            customer_phone=fake.phone_number(),
            customer_birthdate=str(fake.date_of_birth()),
            customer_gender=random.choice(["Male", "Female", "Other"]),

            transaction_timestamp=str(timestamp),
            transaction_date=timestamp.strftime("%Y-%m-%d"),
            transaction_hour=str(timestamp.hour),
            transaction_day_of_week=timestamp.strftime("%A"),
            transaction_month=timestamp.strftime("%B"),
            transaction_year=str(timestamp.year),

            store_id=fake.uuid4(),
            store_name=fake.company(),
            store_city=fake.city(),
            store_state=fake.state(),
            store_country=fake.country(),
            store_postcode=fake.postcode(),

            product_id=fake.uuid4(),
            product_name=fake.word().capitalize(),
            product_category=random.choice(["Electronics", "Clothing", "Home", "Books", "Toys"]),
            product_price=str(price),

            quantity=str(quantity),
            total_amount=str(round(price * quantity, 2)),
            payment_method=random.choice(["Credit Card", "Cash", "PayPal", "Gift Card"])
        )

In [0]:
import uuid

def generate_streaming_data(num_files=1, records_per_file=5, delay=1):
    for i in range(num_files):
        records = [asdict(FakeRecord.generate()) for _ in range(records_per_file)]
        timestamp_suffix = datetime.now().strftime("%Y%m%d_%H%M%S")
        unique_id = uuid.uuid4().hex
        file_name = f"data_{timestamp_suffix}_{unique_id}.json"
        file_path = f"/tmp/streaming_landing_store/{file_name}"
        dbutils.fs.put(file_path, json.dumps(records), True)
        print(f"Generated: {file_path}")
        time.sleep(delay)
generate_streaming_data()

In [0]:
%fs ls dbfs:/tmp/streaming_landing_store/
