In [1]:
import pandas as pd
import glob
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas as pd

In [2]:
# 파이썬 구글 빅쿼리 연동 코드
# json 파일

key_path = glob.glob("./*.json")[0]
credentials = service_account.Credentials.from_service_account_file(key_path)
client = bigquery.Client(credentials = credentials, 
                         project = credentials.project_id)

In [3]:
def sql_to_dataframe(sql:str) -> pd.DataFrame:
    """
    Args:
        sql (str): sql for extraction

    Returns:
        pd.DataFrame: extract data with sql
    """
    query_job = client.query(sql)
    df = query_job.to_dataframe()
    return df

### 시간별 브라우저 사용자

In [4]:
sql = """
SELECT
  event_date,
  device.web_info.browser,
  COUNT(DISTINCT user_pseudo_id) AS user_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
GROUP BY event_date, device.web_info.browser
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,event_date,browser,user_count
0,20201129,Chrome,1987
1,20201129,Safari,681
2,20201129,Firefox,58
3,20201129,Android Webview,23
4,20201129,<Other>,70
...,...,...,...
157,20201121,Android Webview,35
158,20201121,Safari,641
159,20201121,Firefox,55
160,20201121,<Other>,70


### 사용자

In [6]:
sql = """
SELECT
  device.web_info.browser,
  COUNT(DISTINCT user_pseudo_id) AS user_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
GROUP BY device.web_info.browser
ORDER BY user_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,browser,user_count
0,Chrome,52256
1,Safari,18664
2,<Other>,1956
3,Edge,1744
4,Firefox,1421
5,Android Webview,1022


### 새사용자

In [7]:
sql = """
SELECT
  device.web_info.browser,
  COUNT(DISTINCT user_pseudo_id) AS first_user_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name = "first_visit"
GROUP BY device.web_info.browser
ORDER BY first_user_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,browser,first_user_count
0,Chrome,47131
1,Safari,16737
2,<Other>,1745
3,Edge,1561
4,Firefox,1259
5,Android Webview,911


### 참여 세션수

In [8]:
sql = """
SELECT
  device.web_info.browser,
  COUNT(DISTINCT CONCAT(user_pseudo_id, (SELECT value.int_value FROM UNNEST(event_params) WHERE key = "ga_session_id"))) AS engaged_session_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND COALESCE((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "session_engaged"),0) + CAST(COALESCE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = "session_engaged"),"0") AS INT64) = 1
GROUP BY device.web_info.browser
ORDER BY engaged_session_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,browser,engaged_session_count
0,Chrome,60042
1,Safari,21144
2,<Other>,2269
3,Edge,1999
4,Firefox,1648
5,Android Webview,1170


### 세션수

In [9]:
sql = """
SELECT
  device.web_info.browser,
  COUNT(DISTINCT CONCAT(user_pseudo_id, (SELECT value.int_value FROM UNNEST(event_params) WHERE key = "ga_session_id"))) AS session_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
GROUP BY device.web_info.browser
ORDER BY session_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,browser,session_count
0,Chrome,70819
1,Safari,24836
2,<Other>,2688
3,Edge,2353
4,Firefox,1959
5,Android Webview,1375


### 참여율

In [11]:
sql = """
WITH session AS(
  SELECT
    device.web_info.browser,
    COUNT(DISTINCT CONCAT(user_pseudo_id, (SELECT value.int_value FROM UNNEST(event_params) WHERE key = "ga_session_id"))) AS session_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  GROUP BY device.web_info.browser
),
engaged_session AS(
  SELECT
    device.web_info.browser,
    COUNT(DISTINCT CONCAT(user_pseudo_id, (SELECT value.int_value FROM UNNEST(event_params) WHERE key = "ga_session_id"))) AS engaged_session_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND COALESCE((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "session_engaged"),0) + CAST(COALESCE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = "session_engaged"),"0") AS INT64) = 1
  GROUP BY device.web_info.browser
)
SELECT
  session.browser,
  ROUND(engaged_session.engaged_session_count / session.session_count,2) AS engagement_ratio
FROM session
LEFT JOIN engaged_session
ON session.browser = engaged_session.browser
ORDER BY engagement_ratio DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,browser,engagement_ratio
0,Safari,0.85
1,Chrome,0.85
2,Android Webview,0.85
3,Edge,0.85
4,Firefox,0.84
5,<Other>,0.84


### 사용자당 참여 세션수

In [12]:
sql = """
WITH session AS(
  SELECT
    device.web_info.browser,
    COUNT(DISTINCT CONCAT(user_pseudo_id, (SELECT value.int_value FROM UNNEST(event_params) WHERE key = "ga_session_id"))) AS session_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  GROUP BY device.web_info.browser
),
engaged_session AS(
  SELECT
    device.web_info.browser,
    COUNT(DISTINCT CONCAT(user_pseudo_id, (SELECT value.int_value FROM UNNEST(event_params) WHERE key = "ga_session_id"))) AS engaged_session_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND COALESCE((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "session_engaged"),0) + CAST(COALESCE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = "session_engaged"),"0") AS INT64) = 1
  GROUP BY device.web_info.browser
),
user AS(
  SELECT
    device.web_info.browser,
    COUNT(DISTINCT user_pseudo_id) AS user_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  GROUP BY device.web_info.browser
)
SELECT
  user.browser,
  ROUND(engaged_session.engaged_session_count / user.user_count,2) AS user_per_engaged_session_count
FROM user
LEFT JOIN engaged_session
ON user.browser = engaged_session.browser
ORDER BY user_per_engaged_session_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,browser,user_per_engaged_session_count
0,<Other>,1.16
1,Firefox,1.16
2,Chrome,1.15
3,Edge,1.15
4,Android Webview,1.14
5,Safari,1.13


### 평균 참여 시간

In [13]:
sql = """
SELECT
  device.web_info.browser,
  SUM(FLOOR((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "engagement_time_msec") / 1000)) AS engagement_time,
  COUNT(DISTINCT user_pseudo_id) AS user_count,
  FLOOR(SUM(FLOOR((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "engagement_time_msec") / 1000)) / COUNT(DISTINCT user_pseudo_id)) AS user_engagement_time
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
GROUP BY device.web_info.browser
ORDER BY user_engagement_time DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,browser,engagement_time,user_count,user_engagement_time
0,<Other>,328287.0,1956,167.0
1,Android Webview,123125.0,1022,120.0
2,Chrome,6133852.0,52256,117.0
3,Safari,2137902.0,18664,114.0
4,Firefox,156440.0,1421,110.0
5,Edge,185896.0,1744,106.0


### 이벤트 수

In [14]:
sql = """
SELECT
  device.web_info.browser,
  COUNT(event_name) AS event_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
GROUP BY device.web_info.browser
ORDER BY event_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,browser,event_count
0,Chrome,978846
1,Safari,342685
2,<Other>,41895
3,Edge,31370
4,Firefox,26866
5,Android Webview,19768


### 전환

In [15]:
sql = """
SELECT
  device.web_info.browser,
  COUNT(event_name) AS conversion_count
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
AND event_name IN ("purchase","begin_checkout","first_visit","predict_ltv_payer","add_to_cart","view_item","view_cart","qualified_visit","Membership","predicted_top_spenders")
GROUP BY device.web_info.browser
ORDER BY conversion_count DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,browser,conversion_count
0,Chrome,164792
1,Safari,57939
2,<Other>,7165
3,Edge,5330
4,Firefox,4432
5,Android Webview,3370


### 총 수익

In [16]:
sql = """
SELECT
  device.web_info.browser,
  TRUNC(CAST(SUM(COALESCE((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "value"),0) + COALESCE((SELECT value.float_value FROM UNNEST(event_params) WHERE key = "value"),0) + COALESCE((SELECT value.double_value FROM UNNEST(event_params) WHERE key = "value"),0)) AS NUMERIC),2) AS value
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _table_suffix BETWEEN "20201110" AND "20201206"
GROUP BY device.web_info.browser
ORDER BY value DESC
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,browser,value
0,Chrome,104984.61
1,Safari,35605.67
2,Android Webview,3353.96
3,<Other>,3315.43
4,Edge,1727.02
5,Firefox,1498.24


### 보고서

In [26]:
sql = """
WITH user AS(
  SELECT
    device.web_info.browser,
    COUNT(DISTINCT user_pseudo_id) AS user_count,
    COUNT(DISTINCT CONCAT(user_pseudo_id, (SELECT value.int_value FROM UNNEST(event_params) WHERE key = "ga_session_id"))) AS session_count,
    SUM(FLOOR((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "engagement_time_msec") / 1000)) AS engagement_time,
    SUM(FLOOR((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "engagement_time_msec") / 1000)) / COUNT(DISTINCT user_pseudo_id) AS user_engagement_time,
    COUNT(event_name) AS event_count,
    TRUNC(CAST(SUM(COALESCE((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "value"),0) + COALESCE((SELECT value.float_value FROM UNNEST(event_params) WHERE key = "value"),0) + COALESCE((SELECT value.double_value FROM UNNEST(event_params) WHERE key = "value"),0)) AS NUMERIC),2) AS value
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  GROUP BY device.web_info.browser
),
new_user AS(
  SELECT
    device.web_info.browser,
    COUNT(DISTINCT user_pseudo_id) AS new_user_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name = "first_visit"
  GROUP BY device.web_info.browser
),
engaged_session AS(
  SELECT
    device.web_info.browser,
    COUNT(DISTINCT CONCAT(user_pseudo_id, (SELECT value.int_value FROM UNNEST(event_params) WHERE key = "ga_session_id"))) AS engaged_session_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND COALESCE((SELECT value.int_value FROM UNNEST(event_params) WHERE key = "session_engaged"),0) + CAST(COALESCE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = "session_engaged"),"0") AS INT64) = 1
  GROUP BY device.web_info.browser
),
conversion AS(
  SELECT
    device.web_info.browser,
    COUNT(event_name) AS conversion_count
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _table_suffix BETWEEN "20201110" AND "20201206"
  AND event_name IN ("purchase","begin_checkout","first_visit","predict_ltv_payer","add_to_cart","view_item","view_cart","qualified_visit","Membership","predicted_top_spenders")
  GROUP BY device.web_info.browser
)

SELECT
  user.browser,
  user.user_count,
  new_user.new_user_count,
  user.session_count,
  engaged_session.engaged_session_count,
  ROUND(engaged_session.engaged_session_count / user.session_count,2) AS engagement_ratio,
  ROUND(engaged_session.engaged_session_count / user.user_count,2) AS user_per_engagement_session_count,
  user.user_engagement_time,
  user.event_count,
  conversion.conversion_count,
  user.value
FROM user
LEFT JOIN new_user
ON user.browser = new_user.browser
LEFT JOIN engaged_session
ON user.browser = engaged_session.browser
LEFT JOIN conversion
ON user.browser = conversion.browser
"""

df = sql_to_dataframe(sql)
df

Unnamed: 0,browser,user_count,new_user_count,session_count,engaged_session_count,engagement_ratio,user_per_engagement_session_count,user_engagement_time,event_count,conversion_count,value
0,Chrome,52256,47131,70819,60042,0.85,1.15,117.380818,978846,164792,104984.61
1,Safari,18664,16737,24836,21144,0.85,1.13,114.546828,342685,57939,35605.67
2,<Other>,1956,1745,2688,2269,0.84,1.16,167.83589,41895,7165,3315.43
3,Firefox,1421,1259,1959,1648,0.84,1.16,110.091485,26866,4432,1498.24
4,Edge,1744,1561,2353,1999,0.85,1.15,106.591743,31370,5330,1727.02
5,Android Webview,1022,911,1375,1170,0.85,1.14,120.47456,19768,3370,3353.96
