In [1]:
import pandas as pd
import glob
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd

In [2]:
# 파이썬 구글 빅쿼리 연동 코드
# json 파일

key_path = glob.glob("./*.json")[0]
credentials = service_account.Credentials.from_service_account_file(key_path)
client = bigquery.Client(credentials = credentials, 
                         project = credentials.project_id)

In [3]:
def sql_to_dataframe(sql:str) -> pd.DataFrame:
    """
    Args:
        sql (str): sql for extraction

    Returns:
        pd.DataFrame: extract data with sql
    """
    query_job = client.query(sql)
    df = query_job.to_dataframe()
    return df

### 이벤트수

In [4]:
sql = """SELECT
  event_name,
  COUNT(*) AS conversion
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
GROUP BY event_name
ORDER BY conversion DESC"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,event_name,conversion
0,page_view,440286
1,user_engagement,392661
2,scroll,169102
3,view_item,144134
4,session_start,102342
5,first_visit,69380
6,view_promotion,62199
7,add_to_cart,15488
8,begin_checkout,11825
9,view_search_results,8744


### 총사용자

In [5]:
sql = """
SELECT
  event_name,
  COUNT(DISTINCT user_pseudo_id) AS user
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
GROUP BY event_name
ORDER BY user DESC"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,event_name,user
0,page_view,76272
1,session_start,75515
2,first_visit,69344
3,user_engagement,63672
4,scroll,46303
5,view_promotion,32591
6,view_item,21197
7,view_search_results,4568
8,begin_checkout,4231
9,add_shipping_info,4230


### 사용자당 이벤트 수

In [6]:
sql = """
SELECT
  event_name,
  COUNT(*) AS conversion,
  COUNT(DISTINCT user_pseudo_id) AS user,
  COUNT(*) / COUNT(DISTINCT user_pseudo_id) AS user_per_conversion
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
GROUP BY event_name
ORDER BY user_per_conversion DESC"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,event_name,conversion,user,user_per_conversion
0,view_item,144134,21197,6.799736
1,user_engagement,392661,63672,6.166934
2,page_view,440286,76272,5.772577
3,add_to_cart,15488,3942,3.92897
4,scroll,169102,46303,3.652074
5,begin_checkout,11825,4231,2.794848
6,add_payment_info,5350,2267,2.359947
7,select_item,6363,2898,2.195652
8,view_search_results,8744,4568,1.914186
9,view_promotion,62199,32591,1.908472


### 총수익

In [7]:
sql = """
SELECT
  event_name,
  SUM((int_value + float_value + double_value)) AS value
FROM(
  SELECT
    event_name,
    CASE
      WHEN int_value IS NULL THEN 0
      ELSE int_value
    END AS int_value,
    CASE 
      WHEN float_value IS NULL THEN 0
      ELSE float_value
    END AS float_value,
    CASE
      WHEN double_value IS NULL THEN 0
      ELSE double_value
    END AS double_value
  FROM(
    SELECT
      event_name,
      (SELECT value.int_value FROM UNNEST(event_params) WHERE key = "value") AS int_value,
      (SELECT value.float_value FROM UNNEST(event_params) WHERE key = "value") AS float_value,
      (SELECT value.double_value FROM UNNEST(event_params) WHERE key = "value") AS double_value
    FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
    WHERE _table_suffix BETWEEN "20201110" AND "20201206"
      )
    )
GROUP BY event_name
ORDER BY value DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,event_name,value
0,purchase,150484.93
1,first_visit,0.0
2,user_engagement,0.0
3,scroll,0.0
4,view_promotion,0.0
5,begin_checkout,0.0
6,add_payment_info,0.0
7,click,0.0
8,select_promotion,0.0
9,view_item_list,0.0
