## SNOWBALL Spark SQL version
#### **Notebook to create TESTS layer**
##### **Creating TESTS schema to create required TESTS tables**


In [None]:
%%sql
CREATE SCHEMA IF NOT EXISTS tests;

##### **customer_level_granularity**

In [None]:
%%sql
DROP TABLE IF EXISTS tests.customer_level_granularity;
CREATE TABLE tests.customer_level_granularity AS
/*
 This test identifies mismatches between total revenue (from 'revenue' table)
 and monthly revenue (from 'monthly_revenue' table) for each customer.
 It checks the aggregated revenue across all products per customer
*/


WITH revenue_summary AS (
    SELECT
        customer_key
        , SUM(mrr) AS revenue
    FROM "arr_sandbox"."core"."revenue"
    GROUP BY customer_key
)

, monthly_summary AS (
    SELECT
        customer_key
        , SUM(mrr) AS monthly_revenue
    FROM "arr_sandbox"."analysis"."monthly_revenue"
    GROUP BY customer_key
)

SELECT
    r.customer_key
    , r.revenue
    , m.monthly_revenue
FROM revenue_summary r
LEFT JOIN monthly_summary m
    ON r.customer_key = m.customer_key
WHERE
    r.revenue != m.monthly_revenue


##### **missing_month_check**

In [None]:
%%sql
DROP TABLE IF EXISTS tests.missing_month_check;
CREATE TABLE tests.missing_month_check AS
/* Test checks if 'monthly_revenue' has complete monthly coverage
   for each customer-product pair based on revenue duration.
*/

-- Calculate the expected number of revenue months
WITH revenue_month AS (
    SELECT
        customer_key
        , product_key
        , min(month)                                                    AS min_month
        , max(month)                                                    AS max_month
        , datediff(MONTH, min_month, dateadd(MONTH, 12, max_month)) + 1 AS expected_months
    FROM
        "arr_sandbox"."core"."revenue"
    WHERE revenue <> 0 AND revenue_type = 1
    GROUP BY
        customer_key
        , product_key

)

--Count how many distinct months are actually present in monthly_revenue
, month_counts AS (
    SELECT
        customer_key
        , product_key
        , count(DISTINCT month_roll) AS actual_month
    FROM
        "arr_sandbox"."analysis"."monthly_revenue"
    GROUP BY
        customer_key
        , product_key
)

SELECT
    r.customer_key
    , r.product_key
    , r.min_month
    , r.max_month
    , r.expected_months
    , m.actual_month
FROM revenue_month r
LEFT JOIN month_counts m
    ON
        r.customer_key = m.customer_key
        AND r.product_key = m.product_key
WHERE
    m.actual_month <> r.expected_months


##### **product_level_granularity**

In [None]:
%%sql
DROP TABLE IF EXISTS tests.product_level_granularity;
CREATE TABLE tests.product_level_granularity AS
/*
-- This test identifies mismatches between total revenue (from 'revenue' table)
 and monthly revenue (from 'monthly_revenue' table) for each customer-product pair.
 It checks the aggregated revenue for each customer-product combination consistent across both table
 */


WITH revenue_summary AS (
    SELECT
        customer_key
        , product_key
        , SUM(mrr) AS revenue
    FROM
        "arr_sandbox"."core"."revenue"
    GROUP BY
        customer_key
        , product_key
)

, monthly_summary AS (
    SELECT
        customer_key
        , product_key
        , SUM(mrr) AS monthly_revenue
    FROM
        "arr_sandbox"."analysis"."monthly_revenue"
    GROUP BY
        customer_key
        , product_key
)

SELECT
    r.customer_key
    , r.product_key
    , r.revenue
    , m.monthly_revenue
FROM
    revenue_summary r
LEFT JOIN
    monthly_summary m
    ON
        r.customer_key = m.customer_key
        AND r.product_key = m.product_key
WHERE
    r.revenue != m.monthly_revenue


##### **reconciliation_bridge_balance_test**

In [None]:
%%sql
DROP TABLE IF EXISTS tests.reconciliation_bridge_balance_test;
CREATE TABLE tests.reconciliation_bridge_balance_test AS
-- Write your own SQL object definition here, and it'll be included in your package.

WITH test AS (

    SELECT

        snowball_key
        , s.customer_key
        , c.customer_level_1
        , p.product_level_1
        , month_roll
        , period_type
        , bop_arr
        , customer_churn
        , product_churn
        , downsell
        , grr
        , upsell
        , cross_sell
        , nrr
        , new_customer
        , eop_arr
        , SUM(bop_arr) OVER (PARTITION BY period_type, c.customer_level_1, p.product_level_1, month_roll)        AS net_bop
        , SUM(downsell) OVER (PARTITION BY period_type, c.customer_level_1, p.product_level_1, month_roll)       AS net_downsell
        , SUM(upsell) OVER (PARTITION BY period_type, c.customer_level_1, p.product_level_1, month_roll)         AS net_upsell
        , SUM(cross_sell) OVER (PARTITION BY period_type, c.customer_level_1, p.product_level_1, month_roll)     AS net_cross_sell
        , SUM(nrr) OVER (PARTITION BY period_type, c.customer_level_1, p.product_level_1, month_roll)            AS net_nrr
        , SUM(new_customer) OVER (PARTITION BY period_type, c.customer_level_1, p.product_level_1, month_roll)   AS net_new_customer
        , SUM(customer_churn) OVER (PARTITION BY period_type, c.customer_level_1, p.product_level_1, month_roll) AS net_customer_churn
        , SUM(product_churn) OVER (PARTITION BY period_type, c.customer_level_1, p.product_level_1, month_roll)  AS net_product_churn
        , SUM(eop_arr) OVER (PARTITION BY period_type, c.customer_level_1, p.product_level_1, month_roll)        AS net_eop
    FROM "arr_sandbox"."reporting"."rpt_revenue_bridge" s
    INNER JOIN "arr_sandbox"."datamart"."dim_customer" c
        ON s.customer_key = c.customer_key
    INNER JOIN "arr_sandbox"."datamart"."dim_product" p
        ON s.product_key = p.product_key
)

SELECT *
FROM
    test
WHERE
    ROUND(
        net_bop
        + net_customer_churn
        + net_product_churn
        + net_downsell
        + net_upsell
        + net_cross_sell
        + net_new_customer, 0
    )
    - ROUND(net_eop, 0) <> 0


##### **unique_record**

In [None]:
%%sql
DROP TABLE IF EXISTS tests.unique_record;
CREATE TABLE tests.unique_record AS
-- Identify duplicate revenue records 

WITH ranked_revenue AS (
    SELECT
        *
        , ROW_NUMBER() OVER (
            PARTITION BY customer_key, product_key, month, revenue_type
            ORDER BY month
        ) AS rn

    FROM "arr_sandbox"."core"."revenue"
)

SELECT *
FROM
    ranked_revenue
WHERE
    rn > 1
