In [None]:
import pandas as pd
from faker import Faker
import random

# Initialize Faker and seed for consistent data generation
fake = Faker()
Faker.seed(42)
random.seed(42)

# Generate medical product data for UK counties
data = []
for _ in range(5000):
    row_id = fake.uuid4()
    order_priority = random.choice(['High', 'Medium', 'Low'])
    discount = random.uniform(0, 0.5)
    unit_price = random.uniform(50, 500)
    shipping_cost = random.uniform(5, 30)
    customer_id = fake.random_int(min=1000, max=9999)
    customer_name = fake.name()
    ship_mode = random.choice(['Standard', 'Express'])
    segment = random.choice(['Consumer', 'Corporate', 'Home Office'])
    category = random.choice(['Medical', 'Pharmaceutical', 'Healthcare'])
    sub_category = random.choice(['Syringes', 'Bandages', 'Medications', 'Medical Devices'])
    product_container = random.choice(['Box', 'Pack', 'Case', 'Bag'])
    product_name = fake.word()
    product_id = fake.uuid4()
    product_base_margin = random.uniform(0.1, 0.5)
    region = 'UK'
    state = random.choice(['England', 'Scotland', 'Wales', 'Northern Ireland'])
    county = random.choice([
        'Aberdeenshire', 'Angus', 'Argyll', 'Ayrshire', 'Banffshire', 'Berwickshire', 'Buckinghamshire',
        'Caithness', 'Cambridgeshire', 'Carmarthenshire', 'Cheshire', 'Clackmannanshire', 'Cornwall', 'Cumbria',
        'Denbighshire', 'Derbyshire', 'Devon', 'Dorset', 'Dumfriesshire', 'Dunbartonshire', 'Durham', 'East Lothian',
        'Essex', 'Fife', 'Flintshire', 'Gloucestershire', 'Greater London', 'Greater Manchester', 'Gwynedd', 'Hampshire',
        'Herefordshire', 'Hertfordshire', 'Inverness-shire', 'Isle of Anglesey', 'Kent', 'Kincardineshire',
        'Kinross-shire', 'Kirkcudbrightshire', 'Lanarkshire', 'Lancashire', 'Leicestershire', 'Lincolnshire',
        'Merseyside', 'Midlothian', 'Monmouthshire', 'Moray', 'Nairnshire', 'Norfolk', 'North Yorkshire', 'Northamptonshire',
        'Northumberland', 'Nottinghamshire', 'Orkney', 'Oxfordshire', 'Peeblesshire', 'Pembrokeshire', 'Perthshire',
        'Powys', 'Renfrewshire', 'Ross-shire', 'Roxburghshire', 'Rutland', 'Selkirkshire', 'Shetland', 'Shropshire',
        'Somerset', 'South Yorkshire', 'Staffordshire', 'Stirlingshire', 'Suffolk', 'Surrey', 'Sutherland', 'Tyne and Wear',
        'Warwickshire', 'West Lothian', 'West Midlands', 'West Sussex', 'West Yorkshire', 'Wigtownshire', 'Wiltshire',
        'Worcestershire', 'Wrexham', 'Yorkshire'
    ])
    country = 'United Kingdom'
    order_date_old = fake.date_between(start_date='-5y', end_date='today')
    order_date = order_date_old.strftime('%d-%m-%Y')
    ship_date = (order_date_old + pd.Timedelta(days=random.randint(1, 14))).strftime('%d-%m-%Y')
    profit = random.uniform(-100, 200)
    quantity = random.randint(1, 10)
    sales_old = unit_price * quantity + profit
    sales = round(sales_old * (1 - discount), 2)
    order_id = fake.random_int(min=10000, max=99999)

    # Append data to the list
    data.append([row_id, order_priority, discount, unit_price, shipping_cost, customer_id, customer_name, ship_mode,
                 segment, category, sub_category, product_container, product_name, product_id, product_base_margin,
                 region, state, county, country, order_date_old, order_date, ship_date, profit, quantity, sales_old,
                 sales, order_id])

# Create the DataFrame
columns = ['Row ID', 'Order Priority', 'Discount', 'Unit Price', 'Shipping Cost', 'Customer ID', 'Customer Name',
           'Ship Mode', 'Segment', 'Category', 'Sub-Category', 'Product Container', 'Product Name', 'Product ID',
           'Product Base Margin', 'Region', 'State', 'County', 'Country', 'Order Date-Old', 'Order Date', 'Ship Date',
           'Profit', 'Quantity', 'Sales-Old', 'Sales', 'Order ID']

df = pd.DataFrame(data, columns=columns)

# Save the DataFrame to a CSV file
df.to_csv('medical_products_data_uk.csv', index=False)

print("CSV file saved successfully.")


In [None]:
import pandas as pd
from faker import Faker
import random

# Initialize Faker and seed for consistent data generation
fake = Faker()
Faker.seed(42)
random.seed(42)

# Load the previously generated medical products data
df_medical_products = pd.read_csv('medical_products_data_uk.csv')

# Extract unique dates from the medical products data
unique_dates = df_medical_products['Order Date'].unique()

# Generate expenses data based on the unique dates
expense_data = []
for date in unique_dates:
    amount = round(random.uniform(10, 100), 2)
    budget = round(random.uniform(500, 5000), 2)
    subcategory = random.choice(df_medical_products['Sub-Category'].unique())
    store = random.choice(['PharmaCare', 'MediPlus', 'HealthMart', 'QuickHealth', 'MedLife'])
    
    # Append data to the list
    expense_data.append([date, amount, budget, subcategory, store])

# Create the DataFrame
expense_columns = ['Date', 'Amount', 'Budget', 'Subcategory', 'Store']
df_expenses = pd.DataFrame(expense_data, columns=expense_columns)

# Save the DataFrame to a CSV file
df_expenses.to_csv('expenses_table.csv', index=False)

print("Expenses table CSV file saved successfully.")
