# Advanced SQL Queries

Master advanced SQL techniques for data analysis.

## Topics Covered
- Subqueries
- CASE statements
- String functions
- Date/time functions
- HAVING clause
- NULL handling

In [None]:
import sqlite3
import pandas as pd

# Create in-memory database
conn = sqlite3.connect(':memory:')

## Setup: Create Sample Data

In [None]:
# Create employees table
cursor = conn.cursor()

cursor.execute('''
CREATE TABLE employees (
    id INTEGER PRIMARY KEY,
    name TEXT,
    department TEXT,
    salary INTEGER,
    hire_date TEXT,
    manager_id INTEGER
)
''')

# Insert sample data
employees = [
    (1, 'Alice', 'Engineering', 95000, 
     '2020-01-15', None),
    (2, 'Bob', 'Engineering', 85000, 
     '2020-03-20', 1),
    (3, 'Charlie', 'Engineering', 80000, 
     '2021-06-10', 1),
    (4, 'David', 'Marketing', 70000, 
     '2019-11-05', None),
    (5, 'Eve', 'Marketing', 65000, 
     '2021-02-14', 4),
    (6, 'Frank', 'HR', 60000, 
     '2020-08-22', None),
    (7, 'Grace', 'HR', 55000, 
     '2022-01-10', 6),
    (8, 'Henry', 'Sales', 75000, 
     '2020-05-18', None),
    (9, 'Ivy', 'Sales', 72000, 
     '2021-09-30', 8),
    (10, 'Jack', 'Sales', None, 
     '2023-03-01', 8)
]

cursor.executemany(
    'INSERT INTO employees VALUES (?,?,?,?,?,?)', 
    employees
)
conn.commit()

# View the data
pd.read_sql_query(
    'SELECT * FROM employees', 
    conn
)

## 1. Subqueries

Subqueries are queries nested inside another query.

### Scalar Subquery
Returns a single value.

In [None]:
# Find employees earning above average salary
query = '''
SELECT 
    name, 
    salary,
    (SELECT AVG(salary) 
     FROM employees 
     WHERE salary IS NOT NULL) AS avg_salary
FROM employees
WHERE salary > (
    SELECT AVG(salary) 
    FROM employees
    WHERE salary IS NOT NULL
)
ORDER BY salary DESC
'''

pd.read_sql_query(query, conn)

### Column Subquery
Returns a list of values.

In [None]:
# Find employees in departments with 
# more than 2 people
query = '''
SELECT name, department
FROM employees
WHERE department IN (
    SELECT department
    FROM employees
    GROUP BY department
    HAVING COUNT(*) > 2
)
ORDER BY department, name
'''

pd.read_sql_query(query, conn)

### Correlated Subquery
References columns from outer query.

In [None]:
# Find highest paid employee per department
query = '''
SELECT 
    e1.name,
    e1.department,
    e1.salary
FROM employees e1
WHERE e1.salary = (
    SELECT MAX(e2.salary)
    FROM employees e2
    WHERE e2.department = e1.department
    AND e2.salary IS NOT NULL
)
ORDER BY e1.department
'''

pd.read_sql_query(query, conn)

## 2. CASE Statements

Conditional logic in SQL (like if-else).

In [None]:
# Categorize salaries
query = '''
SELECT 
    name,
    salary,
    CASE 
        WHEN salary IS NULL THEN 'Unknown'
        WHEN salary >= 90000 THEN 'High'
        WHEN salary >= 70000 THEN 'Medium'
        ELSE 'Low'
    END AS salary_category
FROM employees
ORDER BY salary DESC
'''

pd.read_sql_query(query, conn)

In [None]:
# Count employees by salary category
query = '''
SELECT 
    CASE 
        WHEN salary IS NULL THEN 'Unknown'
        WHEN salary >= 90000 THEN 'High'
        WHEN salary >= 70000 THEN 'Medium'
        ELSE 'Low'
    END AS category,
    COUNT(*) AS count
FROM employees
GROUP BY category
ORDER BY count DESC
'''

pd.read_sql_query(query, conn)

## 3. NULL Handling

### COALESCE
Returns first non-NULL value.

In [None]:
# Replace NULL salaries with 0
query = '''
SELECT 
    name,
    COALESCE(salary, 0) AS salary,
    COALESCE(manager_id, 0) AS manager_id
FROM employees
'''

pd.read_sql_query(query, conn)

### NULLIF
Returns NULL if two values are equal.

In [None]:
# Example: Convert 0 to NULL
query = '''
SELECT 
    name,
    NULLIF(manager_id, 0) AS manager_id
FROM employees
'''

pd.read_sql_query(query, conn)

## 4. String Functions

In [None]:
# String manipulation examples
query = '''
SELECT 
    name,
    UPPER(name) AS uppercase,
    LOWER(name) AS lowercase,
    LENGTH(name) AS name_length,
    SUBSTR(name, 1, 3) AS first_3_chars,
    name || ' - ' || department AS full_info
FROM employees
LIMIT 5
'''

pd.read_sql_query(query, conn)

In [None]:
# Filter using string functions
query = '''
SELECT name, department
FROM employees
WHERE name LIKE '%e%'
AND LENGTH(name) > 4
ORDER BY name
'''

pd.read_sql_query(query, conn)

## 5. Date/Time Functions

In [None]:
# Extract date parts
query = '''
SELECT 
    name,
    hire_date,
    SUBSTR(hire_date, 1, 4) AS hire_year,
    SUBSTR(hire_date, 6, 2) AS hire_month,
    CASE SUBSTR(hire_date, 6, 2)
        WHEN '01' THEN 'Q1'
        WHEN '02' THEN 'Q1'
        WHEN '03' THEN 'Q1'
        WHEN '04' THEN 'Q2'
        WHEN '05' THEN 'Q2'
        WHEN '06' THEN 'Q2'
        WHEN '07' THEN 'Q3'
        WHEN '08' THEN 'Q3'
        WHEN '09' THEN 'Q3'
        ELSE 'Q4'
    END AS hire_quarter
FROM employees
ORDER BY hire_date
'''

pd.read_sql_query(query, conn)

## 6. HAVING Clause

Filter groups (use after GROUP BY).

In [None]:
# Departments with average salary > 70000
query = '''
SELECT 
    department,
    COUNT(*) AS employee_count,
    AVG(salary) AS avg_salary,
    MIN(salary) AS min_salary,
    MAX(salary) AS max_salary
FROM employees
WHERE salary IS NOT NULL
GROUP BY department
HAVING AVG(salary) > 70000
ORDER BY avg_salary DESC
'''

pd.read_sql_query(query, conn)

## 7. DISTINCT and Aggregation

In [None]:
# Count distinct departments
query = '''
SELECT 
    COUNT(DISTINCT department) AS dept_count,
    COUNT(*) AS total_employees,
    COUNT(DISTINCT manager_id) AS manager_count
FROM employees
'''

pd.read_sql_query(query, conn)

## 8. Complex Example

Combine multiple techniques.

In [None]:
# Department analysis with multiple metrics
query = '''
SELECT 
    department,
    COUNT(*) AS total_employees,
    COUNT(CASE 
        WHEN manager_id IS NULL 
        THEN 1 
    END) AS managers,
    ROUND(AVG(COALESCE(salary, 0)), 2) 
        AS avg_salary,
    CASE 
        WHEN AVG(salary) >= 80000 
        THEN 'High Pay'
        WHEN AVG(salary) >= 65000 
        THEN 'Medium Pay'
        ELSE 'Low Pay'
    END AS pay_category
FROM employees
GROUP BY department
HAVING COUNT(*) >= 2
ORDER BY avg_salary DESC
'''

pd.read_sql_query(query, conn)

## Practice Exercises

Try these on your own!

### Exercise 1
Find employees hired in 2020 or 2021.

In [None]:
# Your code here


### Exercise 2
Calculate salary difference from department average.

In [None]:
# Your code here


### Exercise 3
Find departments where all employees earn > 60000.

In [None]:
# Your code here


In [None]:
# Close connection
conn.close()

## Key Takeaways

✅ **Subqueries** - Nest queries for complex logic  
✅ **CASE** - Conditional logic in SQL  
✅ **COALESCE** - Handle NULL values  
✅ **String functions** - Manipulate text  
✅ **HAVING** - Filter grouped results  

**Next:** Window Functions →