In [1]:
import duckdb as db
import pandas as pd

#### Question 1: Load and View Data
Use DuckDB to select all data from the CSV file.

In [2]:
employees = db.sql("select * from 'employees.csv'")

#### Question 2: Select Specific Columns
Select only the `employee_id` column.

In [3]:
db.sql("select employee_id from 'employees.csv'")

┌─────────────┐
│ employee_id │
│    int64    │
├─────────────┤
│           1 │
│           2 │
│           3 │
│           4 │
│           5 │
│           6 │
│           7 │
│           8 │
│           9 │
│          10 │
│          11 │
│          12 │
│          13 │
│          14 │
│          15 │
│          16 │
│          17 │
│          18 │
│          19 │
│          20 │
├─────────────┤
│   20 rows   │
└─────────────┘

#### Question 3: Filter with WHERE Clause
Select all employees with a salary greater than 60,000.

In [4]:
db.sql("select * from 'employees.csv' where salary > 60000")

┌─────────────┬─────────┐
│ employee_id │ salary  │
│    int64    │ double  │
├─────────────┼─────────┤
│           4 │ 72000.0 │
│           6 │ 68000.0 │
│           8 │ 61000.0 │
│           9 │ 70000.0 │
│          12 │ 75000.0 │
│          13 │ 64000.0 │
│          17 │ 66000.0 │
│          19 │ 61000.0 │
└─────────────┴─────────┘

Combine two conditions: salary greater than 50,000 and less than 65,000.

In [5]:
db.sql("select * from 'employees.csv' where salary > 50000 and salary < 65000")

┌─────────────┬─────────┐
│ employee_id │ salary  │
│    int64    │ double  │
├─────────────┼─────────┤
│           1 │ 55000.0 │
│           2 │ 60000.0 │
│           5 │ 51000.0 │
│           8 │ 61000.0 │
│          10 │ 53000.0 │
│          13 │ 64000.0 │
│          15 │ 59000.0 │
│          16 │ 52000.0 │
│          19 │ 61000.0 │
│          20 │ 57000.0 │
├─────────────┴─────────┤
│ 10 rows     2 columns │
└───────────────────────┘

#### Question 4: Order the Results
Show all employees ordered by salary descending.

In [6]:
db.sql("select * from 'employees.csv' order by salary desc")

┌─────────────┬─────────┐
│ employee_id │ salary  │
│    int64    │ double  │
├─────────────┼─────────┤
│          12 │ 75000.0 │
│           4 │ 72000.0 │
│           9 │ 70000.0 │
│           6 │ 68000.0 │
│          17 │ 66000.0 │
│          13 │ 64000.0 │
│           8 │ 61000.0 │
│          19 │ 61000.0 │
│           2 │ 60000.0 │
│          15 │ 59000.0 │
│          20 │ 57000.0 │
│           1 │ 55000.0 │
│          10 │ 53000.0 │
│          16 │ 52000.0 │
│           5 │ 51000.0 │
│          11 │ 49000.0 │
│          18 │ 48000.0 │
│           3 │ 48000.0 │
│          14 │ 47000.0 │
│           7 │ 45000.0 │
├─────────────┴─────────┤
│ 20 rows     2 columns │
└───────────────────────┘

#### Question 5: Aggregate — Find Max Salary
Find the highest salary in the dataset.

In [7]:
db.sql("select max(salary) from 'employees.csv'")

┌─────────────┐
│ max(salary) │
│   double    │
├─────────────┤
│     75000.0 │
└─────────────┘

Find the minimum salary.

In [25]:
db.sql("select min(salary) from 'employees.csv'")

┌─────────────┐
│ min(salary) │
│   double    │
├─────────────┤
│     45000.0 │
└─────────────┘

#### Question 6: Create a New Column 
Add a new column called `annual_bonus` which is 10% of salary, and select only those employees who have an annual bonus greater than 5500

In [8]:
db.sql("create table employees as select * from 'employees.csv'")
db.sql("alter table employees add column annual_bonus double")
db.sql("update employees set annual_bonus = salary * 0.1")
db.sql("select * from employees where annual_bonus>5500")

┌─────────────┬─────────┬──────────────┐
│ employee_id │ salary  │ annual_bonus │
│    int64    │ double  │    double    │
├─────────────┼─────────┼──────────────┤
│           2 │ 60000.0 │       6000.0 │
│           4 │ 72000.0 │       7200.0 │
│           6 │ 68000.0 │       6800.0 │
│           8 │ 61000.0 │       6100.0 │
│           9 │ 70000.0 │       7000.0 │
│          12 │ 75000.0 │       7500.0 │
│          13 │ 64000.0 │       6400.0 │
│          15 │ 59000.0 │       5900.0 │
│          17 │ 66000.0 │       6600.0 │
│          19 │ 61000.0 │       6100.0 │
│          20 │ 57000.0 │       5700.0 │
├─────────────┴─────────┴──────────────┤
│ 11 rows                    3 columns │
└──────────────────────────────────────┘