In [1]:
!pip install google-cloud-bigquery
!pip install db-dtypes



In [3]:
import pandas as pd
import glob
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd

In [4]:
# 파이썬 구글 빅쿼리 연동 코드
# json 파일

key_path = glob.glob("./*.json")[0]
credentials = service_account.Credentials.from_service_account_file(key_path)
client = bigquery.Client(credentials = credentials, 
                         project = credentials.project_id)

In [5]:
# sql 추출 및 데이터 프레임 변환

def sql_to_dataframe(sql:str) -> pd.DataFrame:
    """
    Args:
        sql (str): sql for extraction

    Returns:
        pd.DataFrame: extract data with sql
    """
    query_job = client.query(sql)
    df = query_job.to_dataframe()
    return df

### 조회수

In [8]:
sql = """
SELECT (SELECT value.string_value FROM UNNEST(event_params) WHERE key = "page_location") AS page_location
      , COUNT(user_pseudo_id) AS page_view
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
    AND event_name = "page_view"
  GROUP BY 1
  ORDER BY 2 DESC
 """
df = sql_to_dataframe(sql)
df

Unnamed: 0,page_location,page_view
0,https://shop.googlemerchandisestore.com/,52355
1,https://shop.googlemerchandisestore.com/basket...,29921
2,https://shop.googlemerchandisestore.com/store....,21778
3,https://googlemerchandisestore.com/,20024
4,https://shop.googlemerchandisestore.com/signin...,15820
...,...,...
1274,https://shop.googlemerchandisestore.com/ case ...,1
1275,https://shop.googlemerchandisestore.com/Google...,1
1276,https://shop.googlemerchandisestore.com/google...,1
1277,https://shop.googlemerchandisestore.com/google...,1


### 사용자 수

In [9]:
sql = """
SELECT (SELECT value.string_value FROM UNNEST(event_params) WHERE key = "page_location") AS page_location
     , COUNT(DISTINCT user_pseudo_id) AS user
   FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  GROUP BY 1
  ORDER BY 2 DESC
"""
df = sql_to_dataframe(sql)
df


Unnamed: 0,page_location,user
0,https://shop.googlemerchandisestore.com/,30887
1,https://googlemerchandisestore.com/,14048
2,https://shop.googlemerchandisestore.com/Google...,10510
3,https://shop.googlemerchandisestore.com/store....,10034
4,https://shop.googlemerchandisestore.com/signin...,8811
...,...,...
1274,https://shop.googlemerchandisestore.com/shop.a...,1
1275,https://shop.googlemerchandisestore.com/Google...,1
1276,https://shop.googlemerchandisestore.com/345782...,1
1277,https://shop.googlemerchandisestore.com/Google...,1


### 사용자당 조회수

In [11]:
sql = """
SELECT user.page_location
     , user.page_view
     , user_time.user
     , ROUND(user.page_view / user_time.user,2) AS page_view_per_user
  FROM (
          SELECT (SELECT value.string_value FROM UNNEST(event_params) WHERE key = "page_location") AS page_location
                , COUNT(user_pseudo_id) AS page_view
            FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
           WHERE _table_suffix BETWEEN "20201110" AND "20201206"
             AND event_name = "page_view"
           GROUP BY 1
        ) user
 INNER JOIN (
             SELECT (SELECT value.string_value FROM UNNEST(event_params) WHERE key = "page_location") AS page_location
                  , COUNT(DISTINCT user_pseudo_id) AS user
               FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
               WHERE _table_suffix BETWEEN "20201110" AND "20201206"
               GROUP BY 1
            ) user_time
    ON user.page_location = user_time.page_location
 ORDER BY 4 DESC
"""
df = sql_to_dataframe(sql)
df

Unnamed: 0,page_location,page_view,user,page_view_per_user
0,https://shop.googlemerchandisestore.com/storei...,327,38,8.61
1,https://shop.googlemerchandisestore.com/ColoCo...,6,1,6.00
2,https://shop.googlemerchandisestore.com/Google...,5,1,5.00
3,https://shop.googlemerchandisestore.com/Google...,4,1,4.00
4,https://shop.googlemerchandisestore.com/Google...,4,1,4.00
...,...,...,...,...
1274,https://shop.googlemerchandisestore.com/google...,1,1,1.00
1275,https://shop.googlemerchandisestore.com/google...,1,1,1.00
1276,https://shop.googlemerchandisestore.com/Google...,4,5,0.80
1277,https://shop.googlemerchandisestore.com/Google...,3,4,0.75


### 평균 참여 시간

In [13]:
sql = """
SELECT (SELECT value.string_value FROM UNNEST(event_params) WHERE key = "page_location") AS page_location
     , COUNT(DISTINCT user_pseudo_id) AS user
     , SUM((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "engagement_time_msec")) / 1000 AS engagement_time
     , SUM((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "engagement_time_msec")) / 1000 / COUNT(DISTINCT user_pseudo_id) AS engagement_time_per_user
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
 WHERE _table_suffix BETWEEN "20201110" AND "20201206"
 GROUP BY 1
 ORDER BY 4 DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,page_location,user,engagement_time,engagement_time_per_user
0,https://shop.googlemerchandisestore.com/payment,1,140.807,140.807000
1,https://shop.googlemerchandisestore.com/Google...,2,224.771,112.385500
2,https://shop.googlemerchandisestore.com/Google...,96,10489.848,109.269250
3,https://shop.googlemerchandisestore.com/Google...,12,1223.448,101.954000
4,https://shop.googlemerchandisestore.com/paymen...,2269,215651.420,95.042494
...,...,...,...,...
1274,https://shop.googlemerchandisestore.com/google...,1,,
1275,https://shop.googlemerchandisestore.com/google...,1,,
1276,https://shop.googlemerchandisestore.com/google...,1,,
1277,https://shop.googlemerchandisestore.com/google...,1,,


### 이벤트 수

In [14]:
sql = """
SELECT (SELECT value.string_value FROM UNNEST(event_params) WHERE key = "page_location") AS page_location
     , COUNT(event_name) AS event
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  GROUP BY 1
  ORDER BY 2 DESC
"""
df = sql_to_dataframe(sql)
df

Unnamed: 0,page_location,event
0,https://shop.googlemerchandisestore.com/,215012
1,https://googlemerchandisestore.com/,82834
2,https://shop.googlemerchandisestore.com/basket...,74480
3,https://shop.googlemerchandisestore.com/store....,73791
4,https://shop.googlemerchandisestore.com/Google...,54667
...,...,...
1274,https://shop.googlemerchandisestore.com/google...,1
1275,https://shop.googlemerchandisestore.com/google...,1
1276,https://shop.googlemerchandisestore.com/google...,1
1277,https://shop.googlemerchandisestore.com/items_all,1


### 전환 이벤트

In [15]:
sql = """
SELECT (SELECT value.string_value FROM UNNEST(event_params) WHERE key = "page_location") AS page_location
     , COUNT(event_name) AS conversion
 FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
 WHERE _table_suffix BETWEEN "20201110" AND "20201206"
   AND event_name IN ("page_view","view_item","first_visit","predict_top_spenders","view_cart","add_to_cart","begin_checkout","purchase")
 GROUP BY 1
 ORDER BY 2 DESC
"""
df = sql_to_dataframe(sql)
df

Unnamed: 0,page_location,conversion
0,https://shop.googlemerchandisestore.com/,71502
1,https://shop.googlemerchandisestore.com/store....,34360
2,https://googlemerchandisestore.com/,31723
3,https://shop.googlemerchandisestore.com/Google...,30324
4,https://shop.googlemerchandisestore.com/basket...,30201
...,...,...
1274,https://shop.googlemerchandisestore.com/cart,1
1275,https://shop.googlemerchandisestore.com/google...,1
1276,https://shop.googlemerchandisestore.com/google...,1
1277,https://shop.googlemerchandisestore.com/Google...,1


### 총수익

In [16]:
sql = """
SELECT page_location
     , SUM(int_value + float_value + double_value) AS value
  FROM(
         SELECT (SELECT value.string_value FROM UNNEST(event_params) WHERE key = "page_location") AS page_location
              , CASE
                  WHEN (SELECT  value.int_value FROM UNNEST(event_params) WHERE key = "value") IS NULL THEN 0
                  ELSE (SELECT  value.int_value FROM UNNEST(event_params) WHERE key = "value")
                END AS int_value
              , CASE
                  WHEN (SELECT  value.float_value FROM UNNEST(event_params) WHERE key = "value") IS NULL THEN 0
                  ELSE (SELECT  value.float_value FROM UNNEST(event_params) WHERE key = "value")
                END AS float_value
              , CASE
                  WHEN (SELECT  value.double_value FROM UNNEST(event_params) WHERE key = "value") IS NULL THEN 0
                  ELSE (SELECT  value.double_value FROM UNNEST(event_params) WHERE key = "value")
                END AS double_value
           FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
          WHERE (_table_suffix BETWEEN "20201110" AND "20201206")
      )
 GROUP BY 1
 ORDER BY 2 DESC
"""
df = sql_to_dataframe(sql)
df

Unnamed: 0,page_location,value
0,https://shop.googlemerchandisestore.com/orderc...,150484.93
1,https://shop.googlemerchandisestore.com/signin...,0.00
2,https://shop.googlemerchandisestore.com/Google...,0.00
3,https://shop.googlemerchandisestore.com/Google...,0.00
4,https://shop.googlemerchandisestore.com/specia...,0.00
...,...,...
1274,https://shop.googlemerchandisestore.com/eco/re...,0.00
1275,https://shop.googlemerchandisestore.com/google...,0.00
1276,https://shop.googlemerchandisestore.com/google...,0.00
1277,https://shop.googlemerchandisestore.com/google...,0.00


### 구글 애널리틱스 페이지 및 화면 보고서

In [17]:
sql = """
SELECT kpi.page_location
     , * EXCEPT(page_location)
  FROM (
        SELECT engagement.page_location
             , engagement.page_view
             , engagement.user
             , engagement.user_per_page_view
             , engagement.engagement_time
             , engagement.user_per_engagement_time
             , engagement.event
             , conversion_event.conversion 
         FROM
            (
             SELECT user.page_location
            , user.page_view
            , user_time.user
            , ROUND(user.page_view / user_time.user,2) AS user_per_page_view
            , user_time.engagement_time
            , FLOOR(user_time.engagement_time / user_time.user) AS user_per_engagement_time
            , user_time.event
               FROM(
                      SELECT (SELECT value.string_value FROM UNNEST(event_params) WHERE key = "page_location") AS page_location
                           , COUNT(user_pseudo_id) AS page_view
                        FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
                       WHERE _table_suffix BETWEEN "20201110" AND "20201206"
                         AND event_name = "page_view"
                       GROUP BY 1
                      ) user
               INNER JOIN (
                            SELECT (SELECT value.string_value FROM UNNEST(event_params) WHERE key = "page_location") AS page_location
                                 , COUNT(DISTINCT user_pseudo_id) AS user
                                 , SUM((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "engagement_time_msec")) / 1000 AS engagement_time
                                 , COUNT(event_name) AS event
                              FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
                             WHERE _table_suffix BETWEEN "20201110" AND "20201206"
                             GROUP BY 1
                            ) user_time
                  ON user.page_location = user_time.page_location
               ORDER BY 2 DESC
             ) engagement
       INNER JOIN (
            SELECT (SELECT value.string_value FROM UNNEST(event_params) WHERE key = "page_location") AS page_location
                  , COUNT(event_name) AS conversion
               FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
              WHERE _table_suffix BETWEEN "20201110" AND "20201206"
                AND event_name IN ("page_view","view_item","first_visit","predict_top_spenders","view_cart","add_to_cart","begin_checkout","purchase")
              GROUP BY 1
                      ) conversion_event
              ON engagement.page_location = conversion_event.page_location
            ) kpi
 INNER JOIN (
               SELECT page_location
                    , SUM(int_value + float_value + double_value) AS value
                FROM(
                      SELECT (SELECT value.string_value FROM UNNEST(event_params) WHERE key = "page_location") AS page_location
                           , CASE
                               WHEN (SELECT  value.int_value FROM UNNEST(event_params) WHERE key = "value") IS NULL THEN 0
                               ELSE (SELECT  value.int_value FROM UNNEST(event_params) WHERE key = "value")
                             END AS int_value
                           , CASE
                               WHEN (SELECT  value.float_value FROM UNNEST(event_params) WHERE key = "value") IS NULL THEN 0
                               ELSE (SELECT  value.float_value FROM UNNEST(event_params) WHERE key = "value")
                             END AS float_value
                           , CASE
                               WHEN (SELECT  value.double_value FROM UNNEST(event_params) WHERE key = "value") IS NULL THEN 0
                               ELSE (SELECT  value.double_value FROM UNNEST(event_params) WHERE key = "value")
                             END AS double_value
                        FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
                       WHERE (_table_suffix BETWEEN "20201110" AND "20201206")
                    )
                GROUP BY 1
            ) sum_value
    ON kpi.page_location = sum_value.page_location
 ORDER BY 2 DESC
"""
df = sql_to_dataframe(sql)
df

Unnamed: 0,page_location,page_view,user,user_per_page_view,engagement_time,user_per_engagement_time,event,conversion,value
0,https://shop.googlemerchandisestore.com/,52355,30887,1.70,787966.232,25.0,215012,71502,0.0
1,https://shop.googlemerchandisestore.com/basket...,29921,8797,3.40,572247.833,65.0,74480,30201,0.0
2,https://shop.googlemerchandisestore.com/store....,21778,10034,2.17,494329.116,49.0,73791,34360,0.0
3,https://googlemerchandisestore.com/,20024,14048,1.43,133386.360,9.0,82834,31723,0.0
4,https://shop.googlemerchandisestore.com/signin...,15820,8811,1.80,155346.302,17.0,40013,16328,0.0
...,...,...,...,...,...,...,...,...,...
1274,https://shop.googlemerchandisestore.com/Google...,1,1,1.00,10.438,10.0,2,1,0.0
1275,https://shop.googlemerchandisestore.com/Google...,1,1,1.00,8.365,8.0,2,1,0.0
1276,https://shop.googlemerchandisestore.com/Google...,1,1,1.00,,,3,2,0.0
1277,https://shop.googlemerchandisestore.com/Google...,1,1,1.00,13.744,13.0,5,2,0.0
