In [0]:
%sql
CREATE OR REPLACE TABLE hive_metastore.zhastay_yeltay_03_gold.city_geodata
USING DELTA
AS 
SELECT
  state,
  city,
  ROUND(AVG(lat), 10) AS lat,
  ROUND(AVG(lng), 10) AS lng
FROM zhastay_yeltay_02_silver.addressline_enriched
WHERE 
  lat IS NOT NULL 
  AND lng IS NOT NULL
GROUP BY
  state,
  city

num_affected_rows,num_inserted_rows


In [0]:
%sql


CREATE OR REPLACE TABLE hive_metastore.zhastay_yeltay_03_gold.01v2_cities_by_vip_customer_count
USING DELTA
AS
WITH cities_by_vip AS (
  SELECT
    a.city,
    COUNT(DISTINCT c.id) AS total_vip_customers
  FROM
    hive_metastore.zhastay_yeltay_02_silver.addresses AS a
    INNER JOIN hive_metastore.zhastay_yeltay_02_silver.orders AS o ON a.id = o.address_id
    INNER JOIN hive_metastore.zhastay_yeltay_02_silver.customers AS c ON o.customer_id = c.id
  WHERE
    c.status = 'VIP'
  GROUP BY
    a.city
)
SELECT
  cg.lat,
  cg.lng,
  q1.city,
  q1.total_vip_customers
FROM cities_by_vip AS q1
INNER JOIN zhastay_yeltay_03_gold.city_geodata AS cg
  ON cg.city = q1.city

num_affected_rows,num_inserted_rows


In [0]:
%sql

CREATE OR REPLACE TABLE hive_metastore.zhastay_yeltay_03_gold.total_revenue_by_all_categories
USING DELTA
AS
WITH all_joined AS (
  SELECT
    o.id AS order_id,
    o.created_on AS o_created_on,
    o.delivered_on,
    o.delivery_date,
    od.quantity,
    i.price,
    c.id AS customer_id,
    c.status,
    c.type,
    a.id AS address_id,
    a.created_on AS a_created_on,
    ae.id AS addressline_id,
    a.addressline,
    ae.completed_address,
    ae.postal_code,
    ae.neighborhood,
    ae.county,
    a.city,
    a.state,
    scc.abbreviation AS state_code,
    a.country,
    ae.lat,
    ae.lng,
    mc.metropolitan,
    mc.metropolitan IS NOT NULL AS is_metropolitan,
    scc.city = a.city AS is_state_capital_city
  FROM zhastay_yeltay_02_silver.orders AS o
  INNER JOIN zhastay_yeltay_02_silver.addresses AS a
    ON o.address_id = a.id
  INNER JOIN zhastay_yeltay_02_silver.customers AS c
    ON o.customer_id = c.id
  INNER JOIN zhastay_yeltay_02_silver.order_details AS od
    ON o.id = od.order_id
  INNER JOIN zhastay_yeltay_02_silver.items AS i
    ON od.item_id = i.id
  INNER JOIN zhastay_yeltay_02_silver.addressline_enriched AS ae
    ON a.country = ae.country
    AND a.state = ae.state
    AND a.city = ae.city
    AND a.addressline = ae.addressline
  LEFT JOIN zhastay_yeltay_02_silver.metropolitan_cities AS mc
    ON mc.state = a.state
    AND mc.city = a.city
  LEFT JOIN zhastay_yeltay_02_silver.state_capital_cities AS scc
    ON scc.state = a.state
)
SELECT
  state,
  state_code,
  status,
  type,
  is_metropolitan,
  is_state_capital_city,
  COUNT(DISTINCT order_id) AS cnt,
  COUNT(quantity * price) AS total_revenue
  -- COUNT(DISTINCT order_id) FILTER(WHERE status = 'VIP')::NUMERIC / COUNT(DISTINCT order_id) AS vip_orders_percentage,
  -- COUNT(DISTINCT order_id) FILTER(WHERE status = 'Regular')::NUMERIC / COUNT(DISTINCT order_id) AS regular_orders_percentage,

  -- COUNT(DISTINCT order_id) FILTER(WHERE type = 'Individual')::NUMERIC / COUNT(DISTINCT order_id) AS individual_percentage,
  -- COUNT(DISTINCT order_id) FILTER(WHERE type = 'Affiliate')::NUMERIC / COUNT(DISTINCT order_id) AS affiliate_orders_percentage,

  -- COUNT(DISTINCT order_id) FILTER(WHERE is_metropolitan)::NUMERIC / COUNT(DISTINCT order_id) AS metropolitan_percentage,
  -- COUNT(DISTINCT order_id) FILTER(WHERE NOT is_metropolitan)::NUMERIC / COUNT(DISTINCT order_id) AS not_metropolitan_percentage,

  -- COUNT(DISTINCT order_id) FILTER(WHERE is_state_capital_city)::NUMERIC / COUNT(DISTINCT order_id) AS state_capital_city_percentage,
  -- COUNT(DISTINCT order_id) FILTER(WHERE NOT is_state_capital_city)::NUMERIC / COUNT(DISTINCT order_id) AS not_state_capital_city_percentage
FROM all_joined
GROUP BY
  state,
  state_code,
  status,
  type,
  is_metropolitan,
  is_state_capital_city

num_affected_rows,num_inserted_rows


In [0]:
%sql

CREATE OR REPLACE TABLE hive_metastore.zhastay_yeltay_03_gold.daily_total_revenue
USING DELTA
AS
SELECT
o.delivered_on,
COUNT(*) AS total_orders,
SUM(od.quantity * i.price) AS revenue
FROM zhastay_yeltay_02_silver.orders AS o
INNER JOIN zhastay_yeltay_02_silver.order_details AS od
  ON o.id = od.order_id
INNER JOIN zhastay_yeltay_02_silver.items AS i
  ON od.item_id = i.id
GROUP BY
  o.delivered_on
ORDER BY
  o.delivered_on


num_affected_rows,num_inserted_rows


In [0]:
%sql
CREATE OR REPLACE TABLE hive_metastore.zhastay_yeltay_03_gold.customers_havent_reached_at_least_5_orders_all_weeks
USING DELTA
AS 
WITH affiliate_by_weekly_orders AS (
  SELECT
    c.id AS customer_id,
    c.status AS customer_status,
    a.city,
    a.state,
    (CAST(o.created_on AS DATE) - (dayofweek(o.created_on) + 5) % 7) AS monday_of_week,
    COUNT(*) AS weekly_total_orders
  FROM hive_metastore.zhastay_yeltay_02_silver.addresses AS a
  INNER JOIN hive_metastore.zhastay_yeltay_02_silver.orders AS o
    ON a.id = o.address_id
  INNER JOIN hive_metastore.zhastay_yeltay_02_silver.customers AS c
    ON o.customer_id = c.id
  GROUP BY
    c.id,
    c.status,
    a.city,
    a.state,
    monday_of_week
)
SELECT
  city,
  state,
  customer_id,
  COUNT(*) FILTER(WHERE weekly_total_orders >= 5) = 0 AS reached_at_least_5_orders_all_weeks
FROM affiliate_by_weekly_orders
GROUP BY
  city,
  state,
  customer_id

num_affected_rows,num_inserted_rows
