In [16]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [17]:
# Configuration

# Number of rows to generate
n_rows = 10000

# Data with prefix, suitable for client identity, for example
client_ids = [f"PREF_{str(random.randint(1, 1500)).zfill(7)}" for _ in range(n_rows)] 

# Categorical data
product_choices = ['product a', 'product b', 'product c', 'product d']

# Product distribution: Charlie should be the dominant category (50–65%). This rule can also be applied when generating other data distributions.
product_probs = [0.1, 0.1, 0.6, 0.2]  # product a. product b, product c, product d
products = np.random.choice(product_choices, size=n_rows, p=product_probs)

# Generate random dates within the last 4 years
start_date = datetime.today() - timedelta(days=4*365)
dates = [start_date + timedelta(days=random.randint(0, 4*365)) for _ in range(n_rows)]

# Define base price range for each product
base_price_map = {
    'product a': (100, 200),
    'product b': (150, 250),
    'product c': (200, 300),
    'product d': (100, 220)
}

# Assigning base prices to products
amounts = []
for i in range(n_rows):
    product = products[i]
    low, high = base_price_map[product]
    base_price = random.randint(low, high)
    
    amounts.append(base_price)

In [18]:
# Build DataFrame
df = pd.DataFrame({
    'transaction_id': [f"TX{str(i+1).zfill(5)}" for i in range(n_rows)],
    'client_id': client_ids,
    'product_name': products,
    'order_date': dates,
    'amount': amounts
})

df.head()

Unnamed: 0,transaction_id,client_id,product_name,order_date,amount
0,TX00001,PREF_0000582,product c,2022-09-17 19:22:45.393098,202
1,TX00002,PREF_0000196,product c,2024-02-01 19:22:45.393098,255
2,TX00003,PREF_0000008,product b,2023-07-21 19:22:45.393098,205
3,TX00004,PREF_0001281,product c,2023-02-23 19:22:45.393098,212
4,TX00005,PREF_0000557,product a,2022-07-29 19:22:45.393098,177


In [None]:
# Save to CSV
df.to_csv("transaction_data.csv", index=False)

print("✅ Dummy transaction data generated and saved as 'dummy_transaction_data.csv'")