In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Set a seed for reproducibility
np.random.seed(42)
random.seed(42)

# --- Data Generation Parameters ---
num_rows = 1000
product_categories = ['Electronics', 'Books', 'Clothing', 'Home Goods', 'Food']
regions = ['North', 'South', 'East', 'West']
payment_methods = ['Credit Card', 'Debit Card', 'PayPal', 'Bank Transfer']
customer_ages = range(18, 70)
product_names = [
    'Laptop Pro', 'Smartphone X', 'E-Reader Basic', 'Mystery Novel', 'Sci-Fi Classic',
    'T-Shirt V-Neck', 'Jeans Slim Fit', 'Blender Deluxe', 'Coffee Maker', 'Organic Apples',
    'Gaming Mouse', 'Headphones ANC', 'Cookbook Veggie', 'Sneakers Urban', 'Dining Table Set'
]

# Generate Dates
start_date = datetime(2023, 1, 1)
dates = [start_date + timedelta(days=random.randint(0, 364)) for _ in range(num_rows)]

# Generate Customer IDs (some repetition for grouping)
customer_ids = [f'CUST{random.randint(100, 300)}' for _ in range(num_rows)]

# Generate Product IDs (some repetition)
product_ids = [f'PROD{random.randint(1, 50)}' for _ in range(num_rows)]


data = {
    'OrderID': range(1, num_rows + 1),
    'CustomerID': customer_ids,
    'OrderDate': dates,
    'Region': np.random.choice(regions, num_rows),
    'ProductCategory': np.random.choice(product_categories, num_rows),
    'ProductName': np.random.choice(product_names, num_rows),
    'Quantity': np.random.randint(1, 10, num_rows),
    'PricePerUnit': np.round(np.random.uniform(5, 500, num_rows), 2),
    'PaymentMethod': np.random.choice(payment_methods, num_rows),
    'CustomerAge': np.random.choice(customer_ages, num_rows)
}

df = pd.DataFrame(data)

# Calculate TotalPrice
df['TotalPrice'] = df['Quantity'] * df['PricePerUnit']

# Save to CSV
csv_file_path = 'orders.csv'
df.to_csv(csv_file_path, index=False)

print(f"Dataset with {num_rows} rows generated and saved to '{csv_file_path}'")
print("\nFirst 5 rows of the dataset:")
print(df.head())

Dataset with 1000 rows generated and saved to 'orders.csv'

First 5 rows of the dataset:
   OrderID CustomerID  OrderDate Region ProductCategory       ProductName  \
0        1    CUST239 2023-11-24   East     Electronics      Smartphone X   
1        2    CUST157 2023-02-27   West     Electronics  Dining Table Set   
2        3    CUST265 2023-01-13  North     Electronics      Smartphone X   
3        4    CUST138 2023-05-21   East            Food    Organic Apples   
4        5    CUST168 2023-05-06   East      Home Goods    Jeans Slim Fit   

   Quantity  PricePerUnit  PaymentMethod  CustomerAge  TotalPrice  
0         4        356.14  Bank Transfer           52     1424.56  
1         9        474.68         PayPal           22     4272.12  
2         9         20.55  Bank Transfer           44      184.95  
3         9        369.42     Debit Card           61     3324.78  
4         7        168.47     Debit Card           29     1179.29  
