In [1]:
import pandas as pd
import glob
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd

In [2]:
# 파이썬 구글 빅쿼리 연동 코드
# json 파일

key_path = glob.glob("./*.json")[0]
credentials = service_account.Credentials.from_service_account_file(key_path)
client = bigquery.Client(credentials = credentials, 
                         project = credentials.project_id)

In [3]:
def sql_to_dataframe(sql:str) -> pd.DataFrame:
    """
    Args:
        sql (str): sql for extraction

    Returns:
        pd.DataFrame: extract data with sql
    """
    query_job = client.query(sql)
    df = query_job.to_dataframe()
    return df

### 시간별 조회된 프로모션 상품

In [4]:
sql = """
SELECT
  event_date,
  items.promotion_name,
  COUNT(*) AS item_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name = "view_promotion"
GROUP BY event_date, items.promotion_name
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,event_date,promotion_name,item_count
0,20201126,Reach New Heights,1545
1,20201126,Complete Your Collection,16
2,20201126,Google Mural Collection,12
3,20201128,Reach New Heights,1192
4,20201128,Complete Your Collection,16
...,...,...,...
108,20201201,Google Mural Collection,7
109,20201201,Complete Your Collection,31
110,20201110,Act Responsible,17
111,20201110,Reach New Heights,1616


### 프로모션에서 조회된 상품

In [5]:
sql = """
SELECT
  items.promotion_name,
  COUNT(*) AS view_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name = "view_promotion"
GROUP BY items.promotion_name
ORDER BY view_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,promotion_name,view_count
0,Reach New Heights,40593
1,Act Responsible,1046
2,Complete Your Collection,522
3,Google Mural Collection,340
4,Not available in demo dataset,29
5,,1


### 프로모션에서 클릭된 상품

In [6]:
sql = """
SELECT
  items.promotion_name,
  COUNT(*) AS click_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name = "select_promotion"
GROUP BY items.promotion_name
ORDER BY click_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,promotion_name,click_count
0,Act Responsible,927
1,Reach New Heights,874
2,Complete Your Collection,512
3,Google Mural Collection,343
4,Not available in demo dataset,5
5,,2


### 상품 프로모션 클릭률

In [7]:
sql = """
WITH view_user AS(
  SELECT
    items.promotion_name,
    COUNT(DISTINCT user_pseudo_id) AS view_user_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name = "view_promotion"
  GROUP BY items.promotion_name
),
select_user AS(
  SELECT
    items.promotion_name,
    COUNT(DISTINCT user_pseudo_id) AS select_user_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name = "select_promotion"
  GROUP BY items.promotion_name
)

SELECT
  select_user.promotion_name,
  select_user.select_user_count / view_user.view_user_count AS click_ratio
FROM view_user
LEFT JOIN select_user
ON view_user.promotion_name = select_user.promotion_name
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,promotion_name,click_ratio
0,Reach New Heights,0.032657
1,Act Responsible,0.882118
2,Complete Your Collection,0.982318
3,Google Mural Collection,1.00303
4,Not available in demo dataset,0.153846
5,,2.0


### 장바구니에 추가된 상품

In [8]:
sql = """
SELECT
items.promotion_name,
COUNT(*) AS cart_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name = "add_to_cart"
GROUP BY items.promotion_name
ORDER BY cart_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,promotion_name,cart_count
0,(not set),167459
1,Reach New Heights,3115
2,Act Responsible,1665
3,Google Mural Collection,1549
4,Complete Your Collection,1284
5,,566


### 결제된 상품

In [9]:
sql = """
SELECT
items.promotion_name,
COUNT(*) AS begin_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name = "begin_checkout"
GROUP BY items.promotion_name
ORDER BY begin_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,promotion_name,begin_count
0,(not set),5835
1,Not available in demo dataset,5224
2,,4305
3,Reach New Heights,531
4,Act Responsible,359
5,Google Mural Collection,254
6,Complete Your Collection,199


### 구매한 상품

In [10]:
sql = """
SELECT
items.promotion_name,
COUNT(*) AS purchase_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name = "purchase"
GROUP BY items.promotion_name
ORDER BY purchase_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,promotion_name,purchase_count
0,,3970
1,Not available in demo dataset,1301
2,Reach New Heights,541
3,(not set),269
4,Act Responsible,205
5,Google Mural Collection,183
6,Complete Your Collection,152


### 상품 수익

In [11]:
sql = """
SELECT
items.promotion_name,
TRUNC(CAST(SUM(COALESCE((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "value"),0) + COALESCE((SELECT value.float_value FROM UNNEST(event_params) WHERE key = "value"),0) + COALESCE((SELECT value.double_value FROM UNNEST(event_params) WHERE key = "value"),0)) AS NUMERIC),2) AS value
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name = "purchase"
GROUP BY items.promotion_name
ORDER BY value DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,promotion_name,value
0,,442827.49
1,Not available in demo dataset,184327.51
2,Reach New Heights,37301.98
3,(not set),16875.82
4,Act Responsible,15329.51
5,Google Mural Collection,12401.59
6,Complete Your Collection,9866.26


### 보고서

In [12]:
sql = """
WITH view_item AS(
  SELECT
    items.promotion_name,
    COUNT(*) AS view_count,
    COUNT(DISTINCT user_pseudo_id) AS view_user_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name = "view_promotion"
  GROUP BY items.promotion_name
),
click_item AS(
  SELECT
    items.promotion_name,
    COUNT(*) AS click_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name = "select_promotion"
  GROUP BY items.promotion_name
),
select_user AS(
  SELECT
    items.promotion_name,
    COUNT(DISTINCT user_pseudo_id) AS select_user_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name = "select_promotion"
  GROUP BY items.promotion_name
),
cart_item AS(
  SELECT
    items.promotion_name,
    COUNT(*) AS cart_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name = "add_to_cart"
  GROUP BY items.promotion_name
),
begin_checkout_item AS(
  SELECT
    items.promotion_name,
    COUNT(*) AS begin_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name = "begin_checkout"
  GROUP BY items.promotion_name
),
purchase_item AS(
  SELECT
    items.promotion_name,
    COUNT(*) AS purchase_count,
    TRUNC(CAST(SUM(COALESCE((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "value"),0) + COALESCE((SELECT value.float_value FROM UNNEST(event_params) WHERE key = "value"),0) + COALESCE((SELECT value.double_value FROM UNNEST(event_params) WHERE key = "value"),0)) AS NUMERIC),2) AS value
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name = "purchase"
  GROUP BY items.promotion_name
)

SELECT
  click_item.promotion_name,
  view_item.view_count,
  click_item.click_count,
  select_user.select_user_count / view_item.view_user_count AS click_ratio,
  cart_item.cart_count,
  begin_checkout_item.begin_count,
  purchase_item.purchase_count,
  purchase_item.value
FROM view_item
LEFT JOIN click_item
ON view_item.promotion_name = click_item.promotion_name
LEFT JOIN select_user
ON click_item.promotion_name = select_user.promotion_name
LEFT JOIN cart_item
ON click_item.promotion_name = cart_item.promotion_name
LEFT JOIN begin_checkout_item
ON click_item.promotion_name = begin_checkout_item.promotion_name
LEFT JOIN purchase_item
ON click_item.promotion_name = purchase_item.promotion_name
ORDER BY view_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,promotion_name,view_count,click_count,click_ratio,cart_count,begin_count,purchase_count,value
0,Reach New Heights,40593,874,0.032657,3115.0,531,541,37301.98
1,Act Responsible,1046,927,0.882118,1665.0,359,205,15329.51
2,Complete Your Collection,522,512,0.982318,1284.0,199,152,9866.26
3,Google Mural Collection,340,343,1.00303,1549.0,254,183,12401.59
4,Not available in demo dataset,29,5,0.153846,,5224,1301,184327.51
5,,1,2,2.0,566.0,4305,3970,442827.49
