# Transform customer Data
1. Remove records with NULL customer_id
2. Remove exact duplicate records
3. Remove duplicate records based on created_timestamp
4. CAST the columns to the correct Data Types
5. Write transformed data ot the Silver schema

1. Remove records with NULL customer_id

SELECT *
FROM udemyttstorage_catalog.bronze.customers_view
WHERE customer_id IS NOT NULL;

2. Remove exact duplicate records

In [0]:
SELECT DISTINCT *
FROM udemyttstorage_catalog.bronze.customers_view
WHERE customer_id IS NOT NULL
ORDER BY customer_id;

3. Remove duplicate records based on created_timestamp

In [0]:
CREATE OR REPLACE TEMP VIEW distinct_customers_view
AS
SELECT DISTINCT *
FROM udemyttstorage_catalog.bronze.customers_view
WHERE customer_id IS NOT NULL
ORDER BY customer_id;

In [0]:
SELECT customer, MAX(created_timestamp) AS max_created_timestamp
FROM distinct_customers_view
GROUP BY customer_id;

In [0]:
WITH cte_max AS
(
    SELECT
        customer_id
        MAX(created_timestamp) as max_created_timestamp
    FROM distinct_customer_view
    GROUP BY customer_id
)
SELECT
FROM distinct_customer_view t
JOIN cte_max m
    ON t.customer_id = m.customer_id
    AND t.created_timestamp = m.created_timestamp; 

4. CAST the columns to the correct Data Types

In [0]:
WITH cte_max AS
(
    SELECT
        customer_id
        MAX(created_timestamp) as max_created_timestamp
    FROM distinct_customer_view
    GROUP BY customer_id
)
SELECT
    CAST(t.created_timestamp AS TIMESTAMP) AS max_created_timestamp,
    t.customer_id,
    t.customer_name,
    CAST(t.date_of_birth AS DATE) as date_of_birth,
    t.email,
    CAST(t.member_since AS DATE) AS member_since,
    t.telephone
FROM distinct_customer_view t
JOIN cte_max m
    ON t.customer_id = m.customer_id
    AND t.created_timestamp = m.created_timestamp; 

5. Write transformed data ot the Silver schema

In [0]:
CREATE OR REPLACE TABLE udemyttstorage_catalog.silver.customers
AS
WITH cte_max AS
    (
        SELECT
            customer_id
            MAX(created_timestamp) as max_created_timestamp
        FROM distinct_customer_view
        GROUP BY customer_id
    )
    SELECT
        CAST(t.created_timestamp AS TIMESTAMP) AS max_created_timestamp,
        t.customer_id,
        t.customer_name,
        CAST(t.date_of_birth AS DATE) as date_of_birth,
        t.email,
        CAST(t.member_since AS DATE) AS member_since,
        t.telephone
    FROM distinct_customer_view t
    JOIN cte_max m
        ON t.customer_id = m.customer_id
        AND t.created_timestamp = m.created_timestamp; 