In [1]:
import pandas as pd
import numpy as np

In [2]:
# Define parameters
np.random.seed(42)  # for reproducibility
num_regions = 3000
start_date = pd.to_datetime("2021-01-01")
end_date = pd.to_datetime("2023-12-31")
all_dates = pd.date_range(start=start_date, end=end_date, freq='D')

# Helper: Normalize day of year to [0, 1] for seasonal multiplier
def seasonal_multiplier(date, peak_months, amplitude=0.2):
    month = date.month
    if month in peak_months:
        return 1 + amplitude
    elif (month - 1) % 12 in peak_months or (month + 1) % 12 in peak_months:
        return 1 + amplitude / 2
    return 1

# Helper: Weekend effect
def is_weekend(date):
    return date.weekday() >= 5

# Generate fresh dummy marketing data with trends and seasonality
data_daily = []
for region in range(1, num_regions + 1):
    for idx, date in enumerate(all_dates):
        days_since_start = (date - start_date).days
        growth_factor = 1 + 0.0005 * days_since_start  # slow growth over time

        # Sales: boosted by holidays, mid-year
        season_sales = seasonal_multiplier(date, peak_months=[6, 11, 12], amplitude=0.3)
        base_sales = np.random.randint(70, 800)
        sales = int(base_sales * season_sales * growth_factor)

        # HCP Calls
        calls_base = np.random.randint(5, 70)
        if is_weekend(date):
            calls_base *= 0.5
        hcp_calls = int(calls_base)
        hcp_calls_spend = hcp_calls * np.random.randint(90, 200)

        # Samples
        sample_season = seasonal_multiplier(date, peak_months=[1, 9], amplitude=0.15)
        hcp_samples = int(np.random.randint(2, 25) * sample_season)
        hcp_samples_spend = hcp_samples * np.random.randint(120, 250)

        # Print
        print_season = seasonal_multiplier(date, peak_months=[1, 9], amplitude=0.1)
        hcp_print = int(np.random.randint(0, 10) * print_season)
        hcp_print_spend = hcp_print * np.random.randint(400, 800)

        # TV and Display
        digital_season = seasonal_multiplier(date, peak_months=[4, 5, 10, 11], amplitude=0.25)
        tv_growth = growth_factor * 1.1  # slightly stronger trend for TV
        dtc_tv = int(np.random.randint(50, 1200) * digital_season)
        dtc_tv_spend = int(dtc_tv * np.random.randint(150, 300) * tv_growth)

        display_growth = growth_factor * 1.15
        dtc_display = int(np.random.randint(30, 800) * digital_season)
        dtc_display_spend = int(dtc_display * np.random.randint(10, 40) * display_growth)

        data_daily.append([
            f"HCP_{region}", date, sales, hcp_calls, hcp_calls_spend,
            hcp_samples, hcp_samples_spend, hcp_print, hcp_print_spend,
            dtc_tv, dtc_tv_spend, dtc_display, dtc_display_spend
        ])

# Create DataFrame
columns = [
    "HCP ID", "Date", "Sales", "HCP Calls", "HCP Calls Spend",
    "HCP Samples", "HCP Samples Spend", "HCP Print", "HCP Print Spend",
    "DTC TV", "DTC TV Spend", "DTC Display", "DTC Display Spend"
]
df_daily = pd.DataFrame(data_daily, columns=columns)

# Add 'Month' and 'Year' columns
df_daily['Month'] = df_daily['Date'].dt.month
df_daily['Year'] = df_daily['Date'].dt.year

# Reorder columns
cols = df_daily.columns.tolist()
date_index = cols.index("Date")
cols.insert(date_index + 1, cols.pop(cols.index("Month")))
cols.insert(date_index + 2, cols.pop(cols.index("Year")))
df_daily = df_daily[cols]

# Sample preview
df_daily.head()

Unnamed: 0,HCP ID,Date,Month,Year,Sales,HCP Calls,HCP Calls Spend,HCP Samples,HCP Samples Spend,HCP Print,HCP Print Spend,DTC TV,DTC TV Spend,DTC Display,DTC Display Spend
0,HCP_1,2021-01-01,1,2021,172,56,10192,18,4068,7,4116,1094,303256,151,4862
1,HCP_1,2021-01-02,1,2021,284,14,1288,26,4472,1,743,855,262530,221,9408
2,HCP_1,2021-01-03,1,2021,346,18,2970,26,5408,0,0,560,101125,219,9831
3,HCP_1,2021-01-04,1,2021,757,66,9240,14,2436,3,2157,696,130347,196,6094
4,HCP_1,2021-01-05,1,2021,457,64,6592,21,2688,9,4068,947,243201,121,3346


In [7]:
df_daily['Date'].nunique()

1095

In [3]:
df_daily.to_csv(r'C:\Users\UtsyoChakraborty\OneDrive - ProcDNA Analytics Pvt. Ltd\General - Data Science\17_MMix_tool_dev\database\hcp_daily_marketing_dummy_data.csv', index=False)