In [0]:
-- =====================================================
-- DAY 7 POC - SUBQUERIES & CTEs
-- Simple Problem-Solving Approach
-- =====================================================

USE bank_management;

-- =====================================================
-- SCENARIO 1: SIMPLE SUBQUERY - Above Average
-- WHEN: Need one calculated value for comparison
-- =====================================================

SELECT '=== SCENARIO 1: Customers Above Average Balance ===' AS scenario;

-- Problem: Find customers with above-average balance
-- Think: Average is ONE number → Use subquery
-- Solution: Calculate average once, compare each customer

SELECT 
    c.first_name || ' ' || c.last_name AS customer_name,
    SUM(a.balance) AS total_balance,
    (SELECT ROUND(AVG(balance), 2) FROM accounts) AS avg_balance
FROM customers c
JOIN accounts a ON c.customer_id = a.customer_id
GROUP BY c.customer_id, c.first_name, c.last_name
HAVING SUM(a.balance) > (SELECT AVG(balance) FROM accounts)
ORDER BY total_balance DESC;

-- How it works:
-- 1. Subquery calculates average: e.g., 20000
-- 2. HAVING compares each customer's total to 20000
-- 3. Only shows customers above that average

-- =====================================================
-- SCENARIO 2: SUBQUERY WITH IN - Filter by List
-- WHEN: Need to filter by IDs from another table
-- =====================================================

SELECT '=== SCENARIO 2: Transactions for Premium Customers ===' AS scenario;

-- Problem: Show transactions only for Premium customers
-- Think: Need list of Premium account IDs → Use IN
-- Solution: Get Premium account IDs, filter transactions

SELECT 
    t.transaction_id,
    a.account_number,
    t.transaction_type,
    t.amount,
    t.transaction_date
FROM transactions t
JOIN accounts a ON t.account_id = a.account_id
WHERE a.account_id IN (
    -- Subquery returns LIST: [2001, 2008, ...]
    SELECT a.account_id
    FROM accounts a
    JOIN customers c ON a.customer_id = c.customer_id
    WHERE c.customer_type = 'Premium'
)
ORDER BY t.transaction_date DESC
LIMIT 5;

-- How it works:
-- 1. Inner query finds all Premium account IDs
-- 2. WHERE IN checks if transaction's account is in that list
-- 3. Only shows matching transactions

-- =====================================================
-- SCENARIO 3: EXISTS vs IN - Performance Pattern
-- WHEN: Check if related record exists
-- =====================================================

SELECT '=== SCENARIO 3A: Customers WITH Accounts (EXISTS) ===' AS scenario;

-- Problem: Find customers who have accounts
-- Think: Just checking existence → Use EXISTS (faster)
-- Solution: Check if ANY account exists for customer

SELECT 
    c.customer_id,
    c.first_name || ' ' || c.last_name AS customer_name,
    c.customer_type
FROM customers c
WHERE EXISTS (
    -- Just checks: "Does this customer have ANY account?"
    -- Stops at first match (fast!)
    SELECT 1 FROM accounts a WHERE a.customer_id = c.customer_id
)
LIMIT 5;

SELECT '=== SCENARIO 3B: Customers WITHOUT Accounts (NOT EXISTS) ===' AS scenario;

-- Problem: Find customers who don't have accounts
-- Think: Check if nothing exists → Use NOT EXISTS
-- Solution: Check if NO accounts exist for customer

SELECT 
    c.customer_id,
    c.first_name || ' ' || c.last_name AS customer_name,
    c.email
FROM customers c
WHERE NOT EXISTS (
    -- Checks: "Does this customer have ANY account?"
    -- If NO → include them
    SELECT 1 FROM accounts a WHERE a.customer_id = c.customer_id
);

-- How EXISTS works:
-- 1. For EACH customer, runs subquery
-- 2. If subquery returns ANY row → EXISTS is true
-- 3. Stops at first match (doesn't need full list)
-- 4. Faster than IN for large datasets

-- =====================================================
-- SCENARIO 4: CTE - Multi-Step Analysis
-- WHEN: Multiple calculations needed, step by step
-- =====================================================

SELECT '=== SCENARIO 4: Customer Summary (Multi-Step) ===' AS scenario;

-- Problem: Show customer name, account count, total balance, transaction count
-- Think: 3 different calculations → Break into steps with CTE
-- Solution: Calculate each metric separately, then combine

WITH 
-- STEP 1: Get account info per customer
account_info AS (
    SELECT 
        c.customer_id,
        c.first_name || ' ' || c.last_name AS customer_name,
        c.customer_type,
        COUNT(a.account_id) AS account_count,
        COALESCE(SUM(a.balance), 0) AS total_balance
    FROM customers c
    LEFT JOIN accounts a ON c.customer_id = a.customer_id
    GROUP BY c.customer_id, c.first_name, c.last_name, c.customer_type
),
-- STEP 2: Get transaction info per customer
transaction_info AS (
    SELECT 
        c.customer_id,
        COUNT(t.transaction_id) AS transaction_count,
        COALESCE(SUM(t.amount), 0) AS total_volume
    FROM customers c
    LEFT JOIN accounts a ON c.customer_id = a.customer_id
    LEFT JOIN transactions t ON a.account_id = t.account_id
    GROUP BY c.customer_id
)
-- STEP 3: Combine everything
SELECT 
    ai.customer_name,
    ai.customer_type,
    ai.account_count,
    ROUND(ai.total_balance, 2) AS balance,
    ti.transaction_count,
    CASE 
        WHEN ai.total_balance >= 50000 THEN 'Gold'
        WHEN ai.total_balance >= 10000 THEN 'Silver'
        ELSE 'Bronze'
    END AS tier
FROM account_info ai
JOIN transaction_info ti ON ai.customer_id = ti.customer_id
WHERE ai.account_count > 0
ORDER BY ai.total_balance DESC
LIMIT 5;

-- Why CTE?
-- 1. Breaks complex problem into logical steps
-- 2. Each CTE is easy to understand and test
-- 3. More readable than nested subqueries
-- 4. Can reference earlier CTEs in later ones

-- =====================================================
-- SCENARIO 5: RECURSIVE CTE - Hierarchy
-- WHEN: Tree structure, unknown depth
-- =====================================================

-- Setup: Create simple employee hierarchy
CREATE TABLE IF NOT EXISTS emp_tree (
    emp_id INT,
    emp_name STRING,
    manager_id INT
);

DELETE FROM emp_tree WHERE emp_id IS NOT NULL;

INSERT INTO emp_tree VALUES
(1, 'CEO Alice', NULL),
(2, 'VP Bob', 1),
(3, 'VP Carol', 1),
(4, 'Manager Dan', 2),
(5, 'Manager Eve', 2),
(6, 'Rep Frank', 4),
(7, 'Rep Grace', 4);

SELECT '=== SCENARIO 5: Organization Chart (Recursive) ===' AS scenario;

-- Problem: Show all employees in hierarchy order
-- Think: Don't know how many levels → Use recursive CTE
-- Solution: Start at CEO, recursively find reports

WITH RECURSIVE org_chart AS (
    -- ANCHOR: Start at the top (CEO has no manager)
    SELECT 
        emp_id,
        emp_name,
        manager_id,
        1 AS level,
        CAST(emp_name AS STRING) AS path
    FROM emp_tree
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- RECURSIVE: Find employees who report to people we already have
    SELECT 
        e.emp_id,
        e.emp_name,
        e.manager_id,
        oc.level + 1,
        CAST(oc.path || ' → ' || e.emp_name AS STRING)
    FROM emp_tree e
    JOIN org_chart oc ON e.manager_id = oc.emp_id
)
SELECT 
    REPEAT('  ', level - 1) || emp_name AS org_structure,
    level,
    path
FROM org_chart
ORDER BY level, emp_name;

-- How recursive works:
-- Iteration 1: Finds CEO (anchor)
-- Iteration 2: Finds Bob, Carol (report to CEO)
-- Iteration 3: Finds Dan, Eve (report to Bob/Carol)
-- Iteration 4: Finds Frank, Grace (report to Dan/Eve)
-- Iteration 5: No more employees → STOP

-- =====================================================
-- SCENARIO 6: CORRELATED SUBQUERY - Per-Row Calc
-- WHEN: Each row needs different calculation
-- =====================================================

SELECT '=== SCENARIO 6: Compare to Type Average ===' AS scenario;

-- Problem: Compare each customer to THEIR customer type's average
-- Think: VIP vs Regular have different averages → Per-row calculation
-- Solution: For each customer, calculate their type's average

WITH customer_balances AS (
    SELECT 
        c.customer_id,
        c.first_name || ' ' || c.last_name AS customer_name,
        c.customer_type,
        COALESCE(SUM(a.balance), 0) AS total_balance
    FROM customers c
    LEFT JOIN accounts a ON c.customer_id = a.customer_id
    GROUP BY c.customer_id, c.first_name, c.last_name, c.customer_type
)
SELECT 
    customer_name,
    customer_type,
    ROUND(total_balance, 2) AS balance,
    -- This calculates average for THIS customer's type
    ROUND((
        SELECT AVG(cb2.total_balance)
        FROM customer_balances cb2
        WHERE cb2.customer_type = cb.customer_type
    ), 2) AS type_avg,
    CASE 
        WHEN total_balance > (
            SELECT AVG(cb2.total_balance)
            FROM customer_balances cb2
            WHERE cb2.customer_type = cb.customer_type
        ) THEN 'Above Avg'
        ELSE 'Below Avg'
    END AS status
FROM customer_balances cb
WHERE total_balance > 0
ORDER BY customer_type, total_balance DESC
LIMIT 6;

-- How correlated subquery works:
-- For VIP customer: Calculates VIP average, compares to it
-- For Premium customer: Calculates Premium average, compares to it
-- For Regular customer: Calculates Regular average, compares to it
-- Each customer gets their own group's average

-- =====================================================
-- COMPARISON: All Techniques Side-by-Side
-- =====================================================

SELECT '=== SUMMARY: When to Use What ===' AS summary;

SELECT 
    'Simple Subquery' AS technique,
    'One value needed (avg, max)' AS when_use,
    'WHERE balance > (SELECT AVG...)' AS example,
    'Fast - runs once' AS performance
UNION ALL
SELECT 
    'IN with Subquery',
    'Filter by list of IDs',
    'WHERE id IN (SELECT...)',
    'Good for small lists'
UNION ALL
SELECT 
    'EXISTS',
    'Check if related exists',
    'WHERE EXISTS (SELECT 1...)',
    'Faster - stops at first match'
UNION ALL
SELECT 
    'CTE',
    'Multi-step logic',
    'WITH step1 AS (...)',
    'Readable, maintainable'
UNION ALL
SELECT 
    'Recursive CTE',
    'Hierarchy, tree structure',
    'WITH RECURSIVE... UNION ALL',
    'Unknown depth handling'
UNION ALL
SELECT 
    'Correlated Subquery',
    'Per-row calculation',
    'SELECT (SELECT ... WHERE outer.id)',
    'Slower - runs per row';

-- =====================================================
-- DECISION TREE
-- =====================================================

SELECT '=== Decision Framework ===' AS decision;

SELECT 
    'Need one value?' AS question,
    'Simple Subquery' AS answer,
    'SELECT AVG(balance) FROM accounts' AS example
UNION ALL
SELECT 
    'Need list of IDs?',
    'IN + Subquery',
    'WHERE id IN (SELECT id...)'
UNION ALL
SELECT 
    'Check existence?',
    'EXISTS',
    'WHERE EXISTS (SELECT 1...)'
UNION ALL
SELECT 
    'Multiple steps?',
    'CTE',
    'WITH step1 AS (...), step2 AS (...)'
UNION ALL
SELECT 
    'Hierarchy/Tree?',
    'Recursive CTE',
    'WITH RECURSIVE... anchor UNION ALL recursive'
UNION ALL
SELECT 
    'Per-row different?',
    'Correlated Subquery',
    'Calculate based on each row''s group';

-- =====================================================
-- REAL INTERVIEW QUESTIONS
-- =====================================================

SELECT '=== Interview Q1: Top 2 Transactions Per Account ===' AS interview;

WITH ranked AS (
    SELECT 
        a.account_number,
        t.amount,
        t.transaction_date,
        ROW_NUMBER() OVER (PARTITION BY a.account_id ORDER BY t.amount DESC) AS rank
    FROM transactions t
    JOIN accounts a ON t.account_id = a.account_id
)
SELECT account_number, amount, transaction_date
FROM ranked
WHERE rank <= 2
ORDER BY account_number, rank
LIMIT 6;

SELECT '=== Interview Q2: Customers With No Recent Activity ===' AS interview;

SELECT 
    c.first_name || ' ' || c.last_name AS customer_name,
    MAX(t.transaction_date) AS last_transaction
FROM customers c
JOIN accounts a ON c.customer_id = a.customer_id
LEFT JOIN transactions t ON a.account_id = t.account_id
GROUP BY c.customer_id, c.first_name, c.last_name
HAVING MAX(t.transaction_date) IS NULL 
    OR MAX(t.transaction_date) < DATE_SUB(CURRENT_DATE(), 90);

SELECT '=== Interview Q3: Month-over-Month Growth ===' AS interview;

WITH monthly AS (
    SELECT 
        DATE_TRUNC('month', transaction_date) AS month,
        COUNT(*) AS txn_count
    FROM transactions
    GROUP BY DATE_TRUNC('month', transaction_date)
)
SELECT 
    month,
    txn_count,
    LAG(txn_count) OVER (ORDER BY month) AS prev_month,
    txn_count - LAG(txn_count) OVER (ORDER BY month) AS growth
FROM monthly
ORDER BY month DESC;

-- Clean up
DROP TABLE IF EXISTS emp_tree;

-- =====================================================
-- END OF DAY 7 POC
-- All scenarios demonstrated with problem-solving approach
-- =====================================================

SELECT '=== ✅ POC COMPLETE - All Day 7 Concepts Covered ===' AS done;