In [12]:
#generate a database for Tableau showcase

#install requirements with below line
#pip install -r requirements.txt

import pandas as pd
import numpy as np
import random
from faker import Faker
from datetime import datetime, timedelta
from google.colab import files #for google colab

fake = Faker()

# Parameters
num_orders = 1000
categories = {
    "Furniture": ["Chair", "Desk", "Table", "Bookshelf"],
    "Technology": ["Laptop", "Monitor", "Phone", "Tablet"],
    "Office Supplies": ["Pen", "Notebook", "Stapler", "Paper"]
}
regions = ["North America", "Europe", "Asia", "Australia"]
shipping_modes = ["Standard Class", "Second Class", "First Class", "Same Day"]
segments = ["Consumer", "Corporate", "Home Office"]
countries_by_region = {
    "North America": ["USA", "Canada", "Mexico"],
    "Europe": ["Germany", "France", "UK", "Italy"],
    "Asia": ["China", "India", "Japan", "South Korea"],
    "Australia": ["Australia", "New Zealand"]
}

# Generate orders
orders = []
for i in range(1, num_orders + 1):
    region = random.choice(regions)
    country = random.choice(countries_by_region[region])
    order_date = fake.date_between(start_date="-2y", end_date="today")
    ship_date = order_date + timedelta(days=random.randint(2, 10))
    category = random.choice(list(categories.keys()))
    sub_category = random.choice(categories[category])
    unit_price = round(random.uniform(10, 1000), 2)
    quantity = random.randint(1, 10)
    discount = round(random.choice([0, 0.1, 0.2]), 2)
    sales = round(unit_price * quantity * (1 - discount), 2)
    cost = unit_price * quantity * (random.uniform(0.6, 0.9))  # simulate cost for profit calculation
    profit = round(sales - cost, 2)

    orders.append({
        "Order ID": f"O-{i:04d}",
        "Order Date": order_date,
        "Ship Date": ship_date,
        "Customer ID": f"C-{random.randint(100, 999)}",
        "Customer Name": fake.name(),
        "Segment": random.choice(segments),
        "Region": region,
        "Country": country,
        "City": fake.city(),
        "Product ID": f"P-{random.randint(1000, 9999)}",
        "Product Name": sub_category,
        "Category": category,
        "Sub-Category": sub_category,
        "Quantity": quantity,
        "Unit Price": unit_price,
        "Sales": sales,
        "Discount": discount,
        "Profit": profit,
        "Shipping Mode": random.choice(shipping_modes)
    })

df_sales = pd.DataFrame(orders)
file_path = "/content/sales_database_tableau_showcase.xlsx" #for Google Colab
df_sales.to_excel(file_path, index=False)

files.download(file_path) #for Google Colab

df_sales
file_path


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

'/content/sales_database_tableau_showcase.xlsx'