# insert_dws_campaign_cost_sale_prd_d_inc

- Name: dws_campaign_cost_sale_prd_d_inc
- Description: Combine cost and sales for each product and campaign result
- Data modeling reference: https://docs.google.com/spreadsheets/d/1FlbiZEBue2SCAUo1WOkdtX5WbEDyDrZ9B2uiZUSiRio/edit?gid=286240572#gid=286240572
- Target Table: dws_campaign_cost_sales_prd_d_inc
- Source Table: dws_sale_customer_prd_d_inc, dws_campaign_cost_prd_d_inc, dim_product_full
- Created by: alvinxyzhang
- Created Date: 2025-04-29
- Version: v1.0

In [12]:
from google.cloud import bigquery
import datetime
import pandas as pd

## Scheduled Task & Partition Table

In [13]:
# Construct a BigQuery client object.
client = bigquery.Client()

table_schema = [
        bigquery.SchemaField("date", "DATE"),
        bigquery.SchemaField("product_id", "INTEGER"),
        bigquery.SchemaField("purchase_type", "STRING"),
        bigquery.SchemaField("product_name", "STRING"),
        bigquery.SchemaField("category", "STRING"),
        bigquery.SchemaField("brand", "STRING"),
        bigquery.SchemaField("number_of_items", "INTEGER"),
        bigquery.SchemaField("retail_price", "FLOAT"),
        bigquery.SchemaField("retail_cost", "FLOAT"),
        bigquery.SchemaField("total_price", "FLOAT"),
        bigquery.SchemaField("total_cost", "FLOAT"),
        bigquery.SchemaField("campaign_id", "INTEGER"),
        bigquery.SchemaField("campaign_name", "STRING"),
        bigquery.SchemaField("campaign_type", "STRING"),
        bigquery.SchemaField("start_date", "DATE"),
        bigquery.SchemaField("end_date", "DATE"),
        bigquery.SchemaField("estimated_budget", "INTEGER"),
        bigquery.SchemaField("approved_budget", "INTEGER"),
        bigquery.SchemaField("real_spent_budget", "INTEGER"),
        bigquery.SchemaField("discount", "FLOAT"),
        bigquery.SchemaField("estimated_sales_increase", "FLOAT"),
        bigquery.SchemaField("count_product", "INTEGER"),
        bigquery.SchemaField("cost_in_store", "FLOAT"),
        bigquery.SchemaField("cost_flyer", "FLOAT"),
        bigquery.SchemaField("cost_community", "FLOAT"),
        bigquery.SchemaField("cost_loyalty", "FLOAT"),
        bigquery.SchemaField("cost_push", "FLOAT"),
        bigquery.SchemaField("cost_ad_total", "FLOAT"),
        bigquery.SchemaField("cost_ad_search", "FLOAT"),
        bigquery.SchemaField("cost_ad_display", "FLOAT"),
        bigquery.SchemaField("cost_ad_event", "FLOAT"),
        bigquery.SchemaField("cost_ad_retargeting", "FLOAT"),
        bigquery.SchemaField("cost_ad_email", "FLOAT"),
        bigquery.SchemaField("impressions_ad_total", "FLOAT"),
        bigquery.SchemaField("impressions_ad_search", "FLOAT"),
        bigquery.SchemaField("impressions_ad_display", "FLOAT"),
        bigquery.SchemaField("impressions_ad_event", "FLOAT"),
        bigquery.SchemaField("impressions_ad_retargeting", "FLOAT"),
        bigquery.SchemaField("impressions_ad_email", "FLOAT"),
        bigquery.SchemaField("clicks_ad_total", "FLOAT"),
        bigquery.SchemaField("clicks_ad_search", "FLOAT"),
        bigquery.SchemaField("clicks_ad_display", "FLOAT"),
        bigquery.SchemaField("clicks_ad_event", "FLOAT"),
        bigquery.SchemaField("clicks_ad_retargeting", "FLOAT"),
        bigquery.SchemaField("clicks_ad_email", "FLOAT"),
        bigquery.SchemaField("conversions_ad_total", "FLOAT"),
        bigquery.SchemaField("conversions_ad_search", "FLOAT"),
        bigquery.SchemaField("conversions_ad_display", "FLOAT"),
        bigquery.SchemaField("conversions_ad_event", "FLOAT"),
        bigquery.SchemaField("conversions_ad_retargeting", "FLOAT"),
        bigquery.SchemaField("conversions_ad_email", "FLOAT"),
        bigquery.SchemaField("retail_discount_price", "FLOAT"),
        bigquery.SchemaField("total_discount_price", "FLOAT")
    ]

insert_query = f"""
    WITH sale_tbl AS (
      SELECT `date`
          , product_id
          , purchase_type
          , product_name
          , category
          , brand
          , SUM(number_of_items) AS number_of_items
          , AVG(retail_price) AS retail_price
          , AVG(retail_cost) AS retail_cost
          , SUM(total_price) AS total_price
          , SUM(total_cost) AS total_cost
      FROM `positive-karma-457703-i3.retail_dashboard.dws_sale_customer_prd_d_inc`
      WHERE `date` = @date_
      GROUP BY `date`
          , product_id
          , purchase_type
          , product_name
          , category
          , brand
    ), campaign_tbl AS (
      SELECT `date`
          , product_id
          , campaign_id
          , campaign_name
          , campaign_type
          , start_date
          , end_date
          , estimated_budget
          , approved_budget
          , real_spent_budget
          , discount
          , estimated_sales_increase
          , count_product
          , SUM(cost_in_store) AS cost_in_store
          , SUM(cost_flyer) AS cost_flyer
          , SUM(cost_community) AS cost_community
          , SUM(cost_loyalty) AS cost_loyalty
          , SUM(cost_push) AS cost_push
          , SUM(cost_ad_total) AS cost_ad_total
          , SUM(cost_ad_search) AS cost_ad_search
          , SUM(cost_ad_display) AS cost_ad_display
          , SUM(cost_ad_event) AS cost_ad_event
          , SUM(cost_ad_retargeting) AS cost_ad_retargeting
          , SUM(cost_ad_email) AS cost_ad_email
          , SUM(impressions_ad_total) AS impressions_ad_total
          , SUM(impressions_ad_search) AS impressions_ad_search
          , SUM(impressions_ad_display) AS impressions_ad_display
          , SUM(impressions_ad_event) AS impressions_ad_event
          , SUM(impressions_ad_retargeting) AS impressions_ad_retargeting
          , SUM(impressions_ad_email) AS impressions_ad_email
          , SUM(clicks_ad_total) AS clicks_ad_total
          , SUM(clicks_ad_search) AS clicks_ad_search
          , SUM(clicks_ad_display) AS clicks_ad_display
          , SUM(clicks_ad_event) AS clicks_ad_event
          , SUM(clicks_ad_retargeting) AS clicks_ad_retargeting
          , SUM(clicks_ad_email) AS clicks_ad_email
          , SUM(conversions_ad_total) AS conversions_ad_total
          , SUM(conversions_ad_search) AS conversions_ad_search
          , SUM(conversions_ad_display) AS conversions_ad_display
          , SUM(conversions_ad_event) AS conversions_ad_event
          , SUM(conversions_ad_retargeting) AS conversions_ad_retargeting
          , SUM(conversions_ad_email) AS conversions_ad_email
      FROM `positive-karma-457703-i3.retail_dashboard.dws_campaign_cost_prd_d_inc`
      WHERE `date` = @date_
      GROUP BY `date`
            , product_id
            , campaign_id
            , campaign_name
            , campaign_type
            , start_date
            , end_date
            , estimated_budget
            , approved_budget
            , real_spent_budget
            , discount
            , estimated_sales_increase
            , count_product
      ORDER BY product_id
    )

    SELECT @date_ AS `date`
          , prd_tbl.product_id AS product_id
          , purchase_type
          , prd_tbl.product_name AS product_name
          , prd_tbl.category AS category
          , prd_tbl.brand AS brand
          , coalesce(number_of_items, 0) AS number_of_items
          , coalesce(sale_tbl.retail_price, prd_tbl.retail_price) AS retail_price
          , coalesce(sale_tbl.retail_cost, prd_tbl.cost) AS retail_cost
          , coalesce(total_price, 0) AS total_price
          , coalesce(total_cost, 0) AS total_cost
          , campaign_id
          , campaign_name
          , campaign_type
          , start_date
          , end_date
          , estimated_budget
          , approved_budget
          , real_spent_budget
          , discount
          , estimated_sales_increase
          , count_product
          , cost_in_store
          , cost_flyer
          , cost_community
          , cost_loyalty
          , cost_push
          , cost_ad_total
          , cost_ad_search
          , cost_ad_display
          , cost_ad_event
          , cost_ad_retargeting
          , cost_ad_email
          , impressions_ad_total
          , impressions_ad_search
          , impressions_ad_display
          , impressions_ad_event
          , impressions_ad_retargeting
          , impressions_ad_email
          , clicks_ad_total
          , clicks_ad_search
          , clicks_ad_display
          , clicks_ad_event
          , clicks_ad_retargeting
          , clicks_ad_email
          , conversions_ad_total
          , conversions_ad_search
          , conversions_ad_display
          , conversions_ad_event
          , conversions_ad_retargeting
          , conversions_ad_email
          , coalesce(sale_tbl.retail_price, prd_tbl.retail_price) * (1 - discount) AS retail_discount_price
          , coalesce(total_price, 0) * (1 - discount) AS total_discount_price
    FROM `positive-karma-457703-i3.retail_dashboard.dim_product_full` AS prd_tbl
    LEFT JOIN sale_tbl
      ON sale_tbl.product_id = prd_tbl.product_id
    LEFT JOIN campaign_tbl
      ON campaign_tbl.product_id = prd_tbl.product_id
    ORDER BY prd_tbl.product_id ASC
"""

# Step 1: Check if table exists
def check_table(table_id_, full_table_id_, schema_, partition_column_):
  try:
      table = client.get_table(full_table_id_)
      print(f"    Table {table_id_} already exists.")
  except:
      print(f"    Creating partitioned table {table_id_}...")

      table = bigquery.Table(full_table_id_, schema=schema_)
      table.time_partitioning = bigquery.TimePartitioning(
          type_=bigquery.TimePartitioningType.DAY,
          field=partition_col
      )

      table = client.create_table(table)
      print(f"Created partitioned table {full_table_id_}")

# Step 2: Insert Query
def insert_table(insert_query_, full_table_id_partition_, query_parameters_):
  job_config = bigquery.QueryJobConfig(
      destination = full_table_id_partition_,
      write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
      use_legacy_sql=False,
      query_parameters=query_parameters_
  )
  query_job_ = client.query(insert_query_, job_config=job_config)  # Make an API request.

  print('   Result:', query_job_.result())

  print('   Errors: ', query_job_.errors)

  return(query_job_)

# Step 3: Check Insert Rows
def check_insert_rows(partition_col_, full_table_id_, job_date_):
  query = f"""
      SELECT {partition_col_}, count(1) row_cnt
      FROM {full_table_id_}
      WHERE {partition_col_} = @date_
      GROUP BY {partition_col_}
  """
  job_config = bigquery.QueryJobConfig(
      query_parameters=[
          bigquery.ScalarQueryParameter("date_", "DATE", job_date_)
      ]
  )
  query_job = client.query_and_wait(query, job_config=job_config)

  row_cnt_df = query_job.to_dataframe()
  return(row_cnt_df)

# Main excution
dataset_id = 'retail_dashboard'
target_table = 'dws_campaign_cost_sale_prd_d_inc'
partition_col = 'date'
job_date = datetime.date(2024, 1, 1)

# Full table IDs
partition = job_date.strftime("%Y%m%d")
full_table_id = f"{client.project}.{dataset_id}.{target_table}"
full_table_id_partition = f"{client.project}.{dataset_id}.{target_table}${partition}"
query_parameters = [
        bigquery.ScalarQueryParameter("date_", "DATE", job_date)
    ]

print('- Execution: ', job_date, ' Partition: ', partition)
check_table(table_id_=target_table, full_table_id_=full_table_id, schema_ = table_schema, partition_column_ = partition_col)
query_job = insert_table(insert_query_ = insert_query, full_table_id_partition_ = full_table_id_partition, query_parameters_ = query_parameters)
print('- Finish writing to Partition ', partition)
row_cnt_df = check_insert_rows(partition_col_ = partition_col, full_table_id_ = full_table_id, job_date_ = job_date)
print('- Check: insert', str(row_cnt_df['row_cnt'].values[0]), 'rows.')

- Execution:  2024-01-01  Partition:  20240101
    Table dws_campaign_cost_sale_prd_d_inc already exists.
   Result: <google.cloud.bigquery.table.RowIterator object at 0x794739e67850>
   Errors:  None
- Finish writing to Partition  20240101
- Check: insert 2570 rows.


## Iterate over Date


In [14]:
start_date = datetime.date(2024, 1, 5)
end_date = datetime.date(2025, 4, 25)
dataset_id = 'retail_dashboard'
target_table = 'dws_campaign_cost_sale_prd_d_inc'
partition_col = 'date'
full_table_id = f"{client.project}.{dataset_id}.{target_table}"

total_row_cnt_df = pd.DataFrame()

# iterate over range of dates
while (start_date <= end_date):
  job_date = start_date

  # Full table IDs
  partition = job_date.strftime("%Y%m%d")
  full_table_id_partition = f"{client.project}.{dataset_id}.{target_table}${partition}"
  query_parameters = [
        bigquery.ScalarQueryParameter("date_", "DATE", job_date)
    ]

  print('- Execution: ', job_date, ' Partition: ', partition)
  check_table(table_id_=target_table, full_table_id_=full_table_id, schema_ = table_schema, partition_column_ = partition_col)
  query_job = insert_table(insert_query_ = insert_query, full_table_id_partition_ = full_table_id_partition, query_parameters_ = query_parameters)
  print('- Finish writing to Partition ', partition)
  row_cnt_df = check_insert_rows(partition_col_ = partition_col, full_table_id_ = full_table_id, job_date_ = job_date)
  if row_cnt_df.shape[0] == 0:
    print('- Check: insert 0 rows.')
  else:
    print('- Check: insert', str(row_cnt_df['row_cnt'].values[0]), 'rows.')

  total_row_cnt_df = pd.concat([total_row_cnt_df, row_cnt_df], axis=0, join='outer', ignore_index=True)

  start_date += datetime.timedelta(days=1)


- Execution:  2024-01-05  Partition:  20240105
    Table dws_campaign_cost_sale_prd_d_inc already exists.
   Result: <google.cloud.bigquery.table.RowIterator object at 0x794739fed7b0>
   Errors:  None
- Finish writing to Partition  20240105
- Check: insert 2742 rows.
- Execution:  2024-01-06  Partition:  20240106
    Table dws_campaign_cost_sale_prd_d_inc already exists.
   Result: <google.cloud.bigquery.table.RowIterator object at 0x794739cae0e0>
   Errors:  None
- Finish writing to Partition  20240106
- Check: insert 3622 rows.
- Execution:  2024-01-07  Partition:  20240107
    Table dws_campaign_cost_sale_prd_d_inc already exists.
   Result: <google.cloud.bigquery.table.RowIterator object at 0x794739e0d660>
   Errors:  None
- Finish writing to Partition  20240107
- Check: insert 3891 rows.
- Execution:  2024-01-08  Partition:  20240108
    Table dws_campaign_cost_sale_prd_d_inc already exists.
   Result: <google.cloud.bigquery.table.RowIterator object at 0x794739cacf70>
   Errors:  

### Insert Summary

In [None]:
total_row_cnt_df

Unnamed: 0,date,row_cnt
0,2024-01-01,2570
1,2024-01-02,2556
2,2024-01-03,2240
3,2024-01-04,2700
4,2024-01-05,2742
5,2024-01-06,3622
6,2024-01-07,3891
7,2024-01-08,2581
8,2024-01-09,2906
9,2024-01-10,2602
