In [1]:
import pandas as pd
import glob
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd

In [2]:
# 파이썬 구글 빅쿼리 연동 코드
# json 파일

key_path = glob.glob("./*.json")[0]
credentials = service_account.Credentials.from_service_account_file(key_path)
client = bigquery.Client(credentials = credentials, 
                         project = credentials.project_id)

In [3]:
def sql_to_dataframe(sql:str) -> pd.DataFrame:
    """
    Args:
        sql (str): sql for extraction

    Returns:
        pd.DataFrame: extract data with sql
    """
    query_job = client.query(sql)
    df = query_job.to_dataframe()
    return df

### 조회된 상품

In [9]:
sql = """
SELECT
  items.item_name,
  COUNT(*) AS item_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name = "view_item"
GROUP BY items.item_name
ORDER BY item_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,item_name,item_count
0,Google Navy Speckled Tee,21140
1,Super G Unisex Joggers,20393
2,Google Campus Bike Eco Tee Navy,19725
3,Google Zip Hoodie F/C,18195
4,Google Heather Green Speckled Tee,18167
...,...,...
393,Google Separating Keyring,4
394,Gift Card - $10.00,3
395,Gift Card - $250.00,3
396,Gift Card- $100.00,2


### 장바구니에 추가된 상품

In [10]:
sql = """
SELECT
  items.item_name,
  COUNT(*) AS cart_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name = "add_to_cart"
GROUP BY items.item_name
ORDER BY cart_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,item_name,cart_count
0,Google Campus Bike Eco Tee Navy,4036
1,Google Navy Speckled Tee,3897
2,Super G Unisex Joggers,3690
3,Google Zip Hoodie F/C,3288
4,Google F/C Long Sleeve Tee Charcoal,3070
...,...,...
379,Google Large Pet Collar (Blue/Green),4
380,Google Large Pet Collar (Red/Yellow),4
381,Google PNW Campus Ladies Tee,3
382,Google Large Pet Leash (Red/Yellow),2


### 구매한 상품

In [11]:
sql = """
SELECT
  items.item_name,
  COUNT(*) AS buy_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name = "purchase"
GROUP BY items.item_name
ORDER BY buy_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,item_name,buy_count
0,Google F/C Longsleeve Charcoal,137
1,Google Badge Heavyweight Pullover Black,127
2,Google Crewneck Sweatshirt Navy,126
3,Super G Unisex Joggers,109
4,Google Leather Strap Hat Blue,108
...,...,...
350,Android Iconic Backpack,1
351,Gift Card - $50.00,1
352,Google Tudes Thermal Bottle,1
353,Google PNW Campus Lapel Pin,1


### 상품 수익

In [13]:
sql = """
SELECT
  items.item_name,
  TRUNC(CAST(SUM(COALESCE((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "value"),0) + COALESCE((SELECT value.float_value FROM UNNEST(event_params) WHERE key = "value"),0) + COALESCE((SELECT value.double_value FROM UNNEST(event_params) WHERE key = "value"),0)) AS NUMERIC),2) AS value
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name = "purchase"
GROUP BY items.item_name
ORDER BY value DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,item_name,value
0,Google Badge Heavyweight Pullover Black,17644.200000000
1,Google Crewneck Sweatshirt Navy,16709.670000000
2,Google F/C Longsleeve Charcoal,13744.780000000
3,Google Zip Hoodie F/C,12710.370000000
4,Google Navy Speckled Tee,12699.580000000
...,...,...
350,Google PNW Campus Lapel Pin,54.160000000
351,Gift Card - $50.00,50.000000000
352,#IamRemarkable Pen,34.000000000
353,Google Cambridge Campus Bottle,17.920000000


### 보고서

In [14]:
sql = """
WITH view_item AS(
  SELECT
    items.item_name,
    COUNT(*) AS item_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name = "view_item"
  GROUP BY items.item_name
),
cart_item AS(
  SELECT
    items.item_name,
    COUNT(*) AS cart_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name = "add_to_cart"
  GROUP BY items.item_name
),
buy_item AS(
  SELECT
    items.item_name,
    COUNT(*) AS buy_count,
    TRUNC(CAST(SUM(COALESCE((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "value"),0) + COALESCE((SELECT value.float_value FROM UNNEST(event_params) WHERE key = "value"),0) + COALESCE((SELECT value.double_value FROM UNNEST(event_params) WHERE key = "value"),0)) AS NUMERIC),2) AS value
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items) AS items
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name = "purchase"
  GROUP BY items.item_name
)

SELECT
  cart_item.item_name,
  view_item.item_count,
  cart_item.cart_count,
  buy_item.buy_count,
  buy_item.value
FROM view_item
LEFT JOIN cart_item
ON view_item.item_name = cart_item.item_name
LEFT JOIN buy_item
ON cart_item.item_name = buy_item.item_name
WHERE buy_count IS NOT NULL
ORDER BY item_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,item_name,item_count,cart_count,buy_count,value
0,Google Navy Speckled Tee,21140,3897,107,12699.580000000
1,Super G Unisex Joggers,20393,3690,109,9644.630000000
2,Google Campus Bike Eco Tee Navy,19725,4036,32,2948.700000000
3,Google Zip Hoodie F/C,18195,3288,105,12710.370000000
4,Google Heather Green Speckled Tee,18167,3035,36,3672.560000000
...,...,...,...,...,...
340,Google Large Pet Leash (Red/Yellow),32,2,3,434.800000000
341,Google Chicago Campus Lapel Pin,26,10,1,70.560000000
342,Google Boulder Campus Tote,23,8,3,147.280000000
343,Google PNW Campus Ladies Tee,21,3,2,84.800000000
