# SQL Workshop Exercises

In [None]:
import sqlalchemy
import pandas as pd

# pd.read_sql takes in a parameter for a SQLite engine, which we create below
sqlite_uri = "sqlite:///candies.db"
sqlite_engine = sqlalchemy.create_engine(sqlite_uri)

## SELECT/FROM Demo

In [None]:
# List all columns in the candytypes table
sql_expr = """

"""                                         # add a number inside .head() to see that many elements
pd.read_sql(sql_expr, sqlite_engine).head() # or just remove it entirely to see all the data

In [None]:
# List all columns in the candy table
sql_expr = """

"""
pd.read_sql(sql_expr, sqlite_engine).head()

In [None]:
# Select columns name and chocolate from candy table
sql_expr = """

"""
pd.read_sql(sql_expr, sqlite_engine).head()

In [None]:
# Select all distinct values from the "chocolate" column in candy.
sql_expr = """

"""
pd.read_sql(sql_expr, sqlite_engine)

In [None]:
# Select the name AS bars and type AS category from candytype.
"""

"""
pd.read_sql(sql_expr, sqlite_engine).head()

## SELECT Exercises

##### Question 1:

List all the *distinct* types of candies from the **candytypes** relation.

In [None]:
# Exercise 1

sql_expr = """
SELECT ...
FROM ...
"""
pd.read_sql(sql_expr, sqlite_engine).head()

##### Question 2:

How much sugar was in all the candies you listed above? Is it chocolate?
Select the *name*, *chocolate*, and *sugarpercent* from the **candy** relation.

In [None]:
# Exercise 2

sql_expr = """
...
"""
pd.read_sql(sql_expr, sqlite_engine).head()

##### Question 3:

Select all distinct values from the *type* column in candytypes.

In [None]:
# Exercise 3

sql_expr = """
...
"""
pd.read_sql(sql_expr, sqlite_engine).head()

---
## WHERE Demo

In [None]:
# Select all the candies that are not chocolate and have sugar less than 30%

sql_expr = """

"""
pd.read_sql(sql_expr, sqlite_engine).head()

## WHERE Exercises

##### Question 1:

Find all different candies that have **both chocolate and caramel**. Select columns *name*, *chocolate*, and *caramel* to check.

In [None]:
# Exercise 1

sql_expr = """
SELECT ...
FROM ...
WHERE ...
"""
pd.read_sql(sql_expr, sqlite_engine).head()

##### Question 2:

Find all different candies that have **either chocolate or caramel, but not both**. Select columns *name*, *chocolate*, and *caramel* to check.  
*Statistics hint: Either or but not both = (A + not B) or (B + not A)*

In [None]:
#Exercise 2

sql_expr = """
...
"""
pd.read_sql(sql_expr, sqlite_engine).head()

##### Question 3:

Which **hard candies** can you buy that have **at least 40% sugar content but less than 70% sugar content**?

In [None]:
#Exercise 3

sql_expr = """
...
"""
pd.read_sql(sql_expr, sqlite_engine)

## ORDER BY and LIMIT Exercises

##### Problem 1:

List the **10 most sugary fruit candies**, displaying both *name* and *sugarpercent*.

In [None]:
#Exercise 1

sql_expr = """
SELECT ...
FROM ...
WHERE ...
ORDER ...
LIMIT ...
"""
pd.read_sql(sql_expr, sqlite_engine)

##### Problem 2:

List the **top 5 cheapest candies and their prices** you can buy with **over 50% of sugar** content and **are not nougat**.

In [None]:
#Exercise 2

sql_expr = """
...
"""
pd.read_sql(sql_expr, sqlite_engine)

## Aggregation Demo

In [None]:
#Let’s find how many candies there are.

sql_expr = """

"""
pd.read_sql(sql_expr, sqlite_engine)

In [None]:
#Let’s find how many categories (chocolate, nougat, caramel, etc) there are.

sql_expr = """

"""

pd.read_sql(sql_expr, sqlite_engine)


In [None]:
#Let’s find the highest sugar item on the list.
sql_expr = """

"""

pd.read_sql(sql_expr, sqlite_engine)

## GROUP BY Demo

In [None]:
# Find the number of candies repeated in the table candytypes.

sql_expr = """

"""
pd.read_sql(sql_expr, sqlite_engine).head()

## Aggregation, GROUP BY, HAVING Exercises

##### Question 1:

Find the minimum, maximum, and average sugar percentage. Name the columns something better using aliases (**AS**).

In [None]:
# Exercise 1

sql_expr = """
SELECT ...
FROM ...
"""
pd.read_sql(sql_expr, sqlite_engine)

##### Question 2:

Find the average sugar % of candies based on chocolate/non-chocolate

In [None]:
# Exercise 2

sql_expr = """
...
"""
pd.read_sql(sql_expr, sqlite_engine)

##### Question 3:
Select the **types** and their count, including only types that have a count more than 10. *(HINT: use HAVING)*

In [None]:
# Exercise 3

sql_expr = """
...
"""
pd.read_sql(sql_expr, sqlite_engine)

## JOIN Exercises

If we are joining one table with itself, we need to make an **alias** of itself. 

We do this creating unique names for each alias of the table. These names are strings.

In our example, we use the statement:

FROM candy AS a INNER JOIN candy AS b

where the two aliases of candy are a and b.

Through this, we can refer to each alias separately through the syntax "alias_name.column_name".

##### Question 1: 
Return the names, price and sugar percentages of all unique pairs of candies with the same price and sugar percentages.

In [None]:
# Exercise 1

sql_expr = """ 
SELECT a.name, b.name, a.pricepercent, a.sugarpercent
FROM ... AS a INNER JOIN ... AS b
WHERE ... = ... AND ... = ... 
  AND a.name != b.name AND a.name < b.name
"""
pd.read_sql(sql_expr, sqlite_engine)

##### Question 2: 
Return the names and winpercents of all unique pairs of candies containing both chocolate and caramel, and having winpercents of greater than 50% (the winpercents do not have to be equal).

In [None]:
# Exercise 2

sql_expr = """
SELECT a.name, b.name, a.pricepercent, a.sugarpercent
FROM ... AS a INNER JOIN ... AS b
WHERE ... = ... AND ... = ... AND ... = ... AND ... = ... 
  AND ... > ... AND ... > ... 
  AND a.name != b.name AND a.name < b.name
"""
pd.read_sql(sql_expr, sqlite_engine).head()

##### Question 3:
    
Find the **top 3** average *sugarpercent* for each *type* of candy having more than 10 items in its group.

In [None]:
# Exercise 3

sql_expr = """ 
SELECT ...
FROM ...
GROUP BY ...
HAVING ...
ORDER BY ...
LIMIT ...
"""
pd.read_sql(sql_expr, sqlite_engine)