In [0]:
-- =====================================================
-- DAY 4 COMPLETE POC - SET OPERATIONS & SUBQUERIES
-- All topics with simple examples
-- =====================================================

USE bank_management;

-- =====================================================
-- TOPIC 1: UNION vs UNION ALL
-- =====================================================

-- Create archive table for demo
CREATE TABLE IF NOT EXISTS customers_archive (
    customer_id INT,
    first_name STRING,
    last_name STRING
);

INSERT INTO customers_archive VALUES
(1001, 'John', 'Anderson'),
(1002, 'Sarah', 'Mitchell'),
(2001, 'Tom', 'Wilson');

-- Example 1A: UNION (removes duplicates)
SELECT '=== UNION (Removes Duplicates) ===' AS example;
SELECT first_name, last_name, 'Active' AS source FROM customers
UNION
SELECT first_name, last_name, 'Archive' AS source FROM customers_archive
ORDER BY last_name;

-- Example 1B: UNION ALL (keeps duplicates - FASTER)
SELECT '=== UNION ALL (Keeps All Rows) ===' AS example;
SELECT first_name, last_name, 'Active' AS source FROM customers
UNION ALL
SELECT first_name, last_name, 'Archive' AS source FROM customers_archive
ORDER BY last_name;

-- Real-world example: Combine current and old transactions
SELECT '=== Real Use Case: Combine Current + Archive ===' AS example;
SELECT 
    transaction_id,
    amount,
    transaction_date,
    'Current Month' AS period
FROM transactions
WHERE transaction_date >= DATE_TRUNC('month', CURRENT_DATE())
UNION ALL
SELECT 
    transaction_id,
    amount,
    transaction_date,
    'Previous Months' AS period
FROM transactions
WHERE transaction_date < DATE_TRUNC('month', CURRENT_DATE())
ORDER BY transaction_date DESC
LIMIT 10;

-- =====================================================
-- TOPIC 2: SUBQUERY IN WHERE CLAUSE
-- =====================================================

-- Example 2A: Find accounts above average balance
SELECT '=== Accounts Above Average Balance ===' AS example;
SELECT 
    account_id,
    account_number,
    balance,
    (SELECT ROUND(AVG(balance), 2) FROM accounts) AS avg_balance
FROM accounts
WHERE balance > (SELECT AVG(balance) FROM accounts)
ORDER BY balance DESC;

-- Example 2B: IN operator with subquery
SELECT '=== Transactions for VIP Customers ===' AS example;
SELECT 
    t.transaction_id,
    t.amount,
    t.transaction_type,
    t.transaction_date
FROM transactions t
WHERE t.account_id IN (
    SELECT a.account_id
    FROM accounts a
    JOIN customers c ON a.customer_id = c.customer_id
    WHERE c.customer_type = 'VIP'
)
LIMIT 5;

-- Example 2C: NOT IN - Find customers without transactions
SELECT '=== Customers Without Transactions ===' AS example;
SELECT 
    c.customer_id,
    c.first_name,
    c.last_name
FROM customers c
WHERE c.customer_id NOT IN (
    SELECT DISTINCT a.customer_id
    FROM accounts a
    JOIN transactions t ON a.account_id = t.account_id
);

-- =====================================================
-- TOPIC 3: SUBQUERY IN FROM CLAUSE (DERIVED TABLE)
-- =====================================================

-- Example 3: Multi-step analysis with derived table
SELECT '=== Customer Summary (Derived Table) ===' AS example;
SELECT 
    customer_name,
    total_balance,
    account_count,
    CASE 
        WHEN total_balance >= 50000 THEN 'Gold'
        WHEN total_balance >= 20000 THEN 'Silver'
        ELSE 'Bronze'
    END AS tier
FROM (
    -- This subquery creates a temporary table
    SELECT 
        c.customer_id,
        c.first_name || ' ' || c.last_name AS customer_name,
        COUNT(a.account_id) AS account_count,
        COALESCE(SUM(a.balance), 0) AS total_balance
    FROM customers c
    LEFT JOIN accounts a ON c.customer_id = a.customer_id
    GROUP BY c.customer_id, c.first_name, c.last_name
) AS customer_summary
WHERE total_balance > 10000
ORDER BY total_balance DESC;

-- =====================================================
-- TOPIC 4: EXISTS vs IN
-- =====================================================

-- Example 4A: EXISTS (faster for checking existence)
SELECT '=== Customers with Accounts (EXISTS) ===' AS example;
SELECT 
    c.customer_id,
    c.first_name,
    c.last_name
FROM customers c
WHERE EXISTS (
    SELECT 1 FROM accounts a WHERE a.customer_id = c.customer_id
)
LIMIT 5;

-- Example 4B: NOT EXISTS (find missing relationships)
SELECT '=== Customers WITHOUT Accounts (NOT EXISTS) ===' AS example;
SELECT 
    c.customer_id,
    c.first_name,
    c.last_name,
    c.email
FROM customers c
WHERE NOT EXISTS (
    SELECT 1 FROM accounts a WHERE a.customer_id = c.customer_id
);

-- Comparison: IN vs EXISTS (same result, different performance)
SELECT '=== Same Query Using IN ===' AS example;
SELECT 
    c.customer_id,
    c.first_name,
    c.last_name
FROM customers c
WHERE c.customer_id IN (
    SELECT customer_id FROM accounts
)
LIMIT 5;

-- =====================================================
-- TOPIC 5: CORRELATED SUBQUERY
-- =====================================================

-- Example 5A: Each customer's largest transaction
SELECT '=== Largest Transaction Per Customer ===' AS example;
SELECT 
    c.customer_id,
    c.first_name || ' ' || c.last_name AS customer_name,
    (
        SELECT MAX(t.amount)
        FROM transactions t
        JOIN accounts a ON t.account_id = a.account_id
        WHERE a.customer_id = c.customer_id
    ) AS largest_transaction,
    (
        SELECT COUNT(t.transaction_id)
        FROM transactions t
        JOIN accounts a ON t.account_id = a.account_id
        WHERE a.customer_id = c.customer_id
    ) AS total_transactions
FROM customers c
WHERE EXISTS (
    SELECT 1 FROM accounts a WHERE a.customer_id = c.customer_id
)
ORDER BY largest_transaction DESC;

-- Example 5B: Accounts above their customer's average
SELECT '=== Accounts Above Customer Average ===' AS example;
SELECT 
    a.account_id,
    a.account_number,
    a.balance,
    c.first_name || ' ' || c.last_name AS customer_name,
    (
        SELECT ROUND(AVG(a2.balance), 2)
        FROM accounts a2
        WHERE a2.customer_id = a.customer_id
    ) AS customer_avg_balance
FROM accounts a
JOIN customers c ON a.customer_id = c.customer_id
WHERE a.balance > (
    SELECT AVG(a2.balance)
    FROM accounts a2
    WHERE a2.customer_id = a.customer_id
);

-- =====================================================
-- TOPIC 6: INTERSECT & EXCEPT
-- =====================================================

-- Example 6A: INTERSECT (customers in both systems)
SELECT '=== Customers in Both Active and Archive ===' AS example;
SELECT customer_id, first_name, last_name FROM customers
INTERSECT
SELECT customer_id, first_name, last_name FROM customers_archive;

-- Example 6B: EXCEPT (customers only in active, not in archive)
SELECT '=== Customers Only in Active System ===' AS example;
SELECT customer_id, first_name, last_name FROM customers
EXCEPT
SELECT customer_id, first_name, last_name FROM customers_archive
LIMIT 5;

=

-- Interview Q1: Second highest balance
SELECT '=== Second Highest Balance ===' AS example;
SELECT MAX(balance) AS second_highest_balance
FROM accounts
WHERE balance < (SELECT MAX(balance) FROM accounts);

-- Interview Q2: Find duplicates (if any existed)
SELECT '=== Check for Duplicate Emails ===' AS example;
SELECT email, COUNT(*) AS duplicate_count
FROM customers
GROUP BY email
HAVING COUNT(*) > 1;

-- Interview Q3: Customers with above-average transactions
SELECT '=== Customers with More Than Avg Transactions ===' AS example;
SELECT 
    customer_name,
    transaction_count,
    avg_transactions
FROM (
    SELECT 
        c.customer_id,
        c.first_name || ' ' || c.last_name AS customer_name,
        COUNT(t.transaction_id) AS transaction_count,
        (SELECT CAST(COUNT(*) AS DECIMAL) / COUNT(DISTINCT account_id) FROM transactions) AS avg_transactions
    FROM customers c
    JOIN accounts a ON c.customer_id = a.customer_id
    JOIN transactions t ON a.account_id = t.account_id
    GROUP BY c.customer_id, c.first_name, c.last_name
) AS customer_txns
WHERE transaction_count > avg_transactions
ORDER BY transaction_count DESC;

-- Interview Q4: Customers and their rank by balance
SELECT '=== Top 5 Customers by Total Balance ===' AS example;
SELECT 
    c.customer_id,
    c.first_name || ' ' || c.last_name AS customer_name,
    SUM(a.balance) AS total_balance,
    RANK() OVER (ORDER BY SUM(a.balance) DESC) AS balance_rank
FROM customers c
JOIN accounts a ON c.customer_id = a.customer_id
GROUP BY c.customer_id, c.first_name, c.last_name
ORDER BY total_balance DESC
LIMIT 5;

-- Interview Q5: Month-over-month comparison
SELECT '=== Monthly Transaction Trends ===' AS example;
SELECT 
    DATE_TRUNC('month', transaction_date) AS month,
    COUNT(*) AS transaction_count,
    SUM(amount) AS total_amount,
    ROUND(AVG(amount), 2) AS avg_amount
FROM transactions
GROUP BY DATE_TRUNC('month', transaction_date)
ORDER BY month DESC;

-- =====================================================
-- SUMMARY REPORT
-- =====================================================

SELECT '=== SUMMARY: All Concepts Demonstrated ===' AS summary;
SELECT 
    'UNION/UNION ALL' AS topic,
    'Combines results from multiple queries' AS description,
    'Use UNION ALL when no duplicates expected (faster)' AS best_practice
UNION ALL
SELECT 
    'Subquery in WHERE',
    'Filter based on calculated values',
    'Most common pattern: WHERE col > (SELECT AVG...)'
UNION ALL
SELECT 
    'Subquery in FROM',
    'Derived tables for multi-step analysis',
    'Always give alias: FROM (...) AS alias'
UNION ALL
SELECT 
    'EXISTS vs IN',
    'Test existence vs membership',
    'EXISTS faster for large datasets, IN for small lists'
UNION ALL
SELECT 
    'Correlated Subquery',
    'Row-specific calculations',
    'Runs per row, use when need per-group logic'
UNION ALL
SELECT 
    'INTERSECT/EXCEPT',
    'Set operations for data reconciliation',
    'Find common records or differences';

-- Clean up
DROP TABLE IF EXISTS customers_archive;

-- =====================================================
-- END OF DAY 4 POC
-- =====================================================