<a target="_blank" href="https://colab.research.google.com/github/lukebarousse/Int_SQL_Data_Analytics_Course/blob/main/Resources/Blank_SQL_Notebook.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Chaptor 1 - Pivot with Case Statements

The first chaptor is like an EDA query in the Contoso database, by using case statements in aggregations to pivot the table.  
I went a bit more than Luke's instruction by going all the way in timeline from 2015 to 2024.

Load SQL Module and Database

In [None]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Update package installer
    !sudo apt-get update -qq > /dev/null 2>&1

    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

In [None]:
%%sql
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public'
ORDER BY table_name;

Unnamed: 0,table_name
0,currencyexchange
1,customer
2,date
3,product
4,sales
5,store


In [None]:
%%sql
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_name = 'customer'
ORDER BY ordinal_position;

Unnamed: 0,column_name,data_type
0,customerkey,integer
1,geoareakey,integer
2,startdt,date
3,enddt,date
4,continent,character varying
5,gender,character varying
6,title,character varying
7,givenname,character varying
8,middleinitial,character varying
9,surname,character varying


In [None]:
%%sql
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_name = 'product'
ORDER BY ordinal_position;

Unnamed: 0,column_name,data_type
0,productkey,integer
1,productcode,integer
2,productname,character varying
3,manufacturer,character varying
4,brand,character varying
5,color,character varying
6,weightunit,character varying
7,weight,double precision
8,cost,double precision
9,price,double precision


In [None]:
%%sql
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_name = 'sales'
ORDER BY ordinal_position;

Unnamed: 0,column_name,data_type
0,orderkey,integer
1,linenumber,integer
2,orderdate,date
3,deliverydate,date
4,customerkey,integer
5,storekey,integer
6,productkey,integer
7,quantity,integer
8,unitprice,double precision
9,netprice,double precision


In [None]:
%%sql

SELECT DISTINCT
  continent
FROM
  customer;

Unnamed: 0,continent
0,Europe
1,North America
2,Australia


In [None]:
%%sql

SELECT
  ORDERKEY,
  ORDERDATE,
  CONCAT(GIVENNAME, ' ', SURNAME) AS CUSTOMER_NAME,
  COUNTRYFULL,
  AGE,
  MANUFACTURER,
  PRODUCTNAME,
  TO_CHAR(ROUND((NETPRICE * QUANTITY * EXCHANGERATE)::numeric, 2), 'FM9,999,999,999.00') AS REVENUE_USD
FROM
  SALES AS S
LEFT JOIN
  CUSTOMER AS C
ON
  S.CUSTOMERKEY = C.CUSTOMERKEY
LEFT JOIN
  PRODUCT AS P
ON
  S.PRODUCTKEY = P.PRODUCTKEY;

Unnamed: 0,orderkey,orderdate,customer_name,countryfull,age,manufacturer,productname,revenue_usd
0,1001,2015-01-01,Beverly Tejeda,United States,50,Tailspin Toys,MGS Dungeon Siege: Legends of Aranna M330,108.75
1,1004,2015-01-01,Jack Gabor,Canada,76,Adventure Works,"Adventure Works 19"" Color Digital TV E35 Brown",263.11
2,1004,2015-01-01,Jack Gabor,Canada,76,"Contoso, Ltd",Contoso Microwave 1.5CuFt X0110 Black,578.52
3,1004,2015-01-01,Jack Gabor,Canada,76,"Contoso, Ltd",Contoso DVD 58 DVD Storage Binder M55 Silver,9.65
4,1004,2015-01-01,Jack Gabor,Canada,76,Wide World Importers,WWI Desktop PC2.33 X2330 Brown,2395.10
...,...,...,...,...,...,...,...,...
199868,3398034,2024-04-20,Karlotta Rivière,France,39,"Contoso, Ltd",Contoso DVD 9-Inch Player Portable M300 Silver,914.61
199869,3398034,2024-04-20,Karlotta Rivière,France,39,"Contoso, Ltd",Contoso DVD 9-Inch Player Portable M300 Black,150.18
199870,3398035,2024-04-20,Michael Wilson,Canada,83,Southridge Video,SV DVD Player M140 Gold,147.78
199871,3398035,2024-04-20,Michael Wilson,Canada,83,"Proseware, Inc.",Proseware Laptop8.9 E089 White,2019.62


In [None]:
%%sql

SELECT
  ORDERKEY,
  ORDERDATE,
  CONCAT(GIVENNAME, ' ', SURNAME) AS CUSTOMER_NAME,
  COUNTRYFULL,
  AGE,
  MANUFACTURER,
  PRODUCTNAME,
  ROUND((NETPRICE * QUANTITY * EXCHANGERATE)::numeric, 2) AS REVENUE_USD
FROM
  SALES AS S
LEFT JOIN
  CUSTOMER AS C
ON
  S.CUSTOMERKEY = C.CUSTOMERKEY
LEFT JOIN
  PRODUCT AS P
ON
  S.PRODUCTKEY = P.PRODUCTKEY;

Unnamed: 0,orderkey,orderdate,customer_name,countryfull,age,manufacturer,productname,revenue_usd
0,1001,2015-01-01,Beverly Tejeda,United States,50,Tailspin Toys,MGS Dungeon Siege: Legends of Aranna M330,108.75
1,1004,2015-01-01,Jack Gabor,Canada,76,Adventure Works,"Adventure Works 19"" Color Digital TV E35 Brown",263.11
2,1004,2015-01-01,Jack Gabor,Canada,76,"Contoso, Ltd",Contoso Microwave 1.5CuFt X0110 Black,578.52
3,1004,2015-01-01,Jack Gabor,Canada,76,"Contoso, Ltd",Contoso DVD 58 DVD Storage Binder M55 Silver,9.65
4,1004,2015-01-01,Jack Gabor,Canada,76,Wide World Importers,WWI Desktop PC2.33 X2330 Brown,2395.10
...,...,...,...,...,...,...,...,...
199868,3398034,2024-04-20,Karlotta Rivière,France,39,"Contoso, Ltd",Contoso DVD 9-Inch Player Portable M300 Silver,914.61
199869,3398034,2024-04-20,Karlotta Rivière,France,39,"Contoso, Ltd",Contoso DVD 9-Inch Player Portable M300 Black,150.18
199870,3398035,2024-04-20,Michael Wilson,Canada,83,Southridge Video,SV DVD Player M140 Gold,147.78
199871,3398035,2024-04-20,Michael Wilson,Canada,83,"Proseware, Inc.",Proseware Laptop8.9 E089 White,2019.62


In [None]:
%%sql

SELECT
  s.orderdate,
  COUNT(DISTINCT s.customerkey) AS number_of_customers,
  COUNT(DISTINCT CASE WHEN c.continent = 'Europe' THEN s.customerkey END) AS eu_customers,
  COUNT(DISTINCT CASE WHEN c.continent = 'North America' THEN s.customerkey END) AS na_customers,
  COUNT(DISTINCT CASE WHEN c.continent = 'Australia' THEN s.customerkey END) AS au_customers
FROM
  sales AS s
LEFT JOIN
  customer AS c
ON
  s.customerkey = c.customerkey
WHERE
  EXTRACT(YEAR FROM orderdate) = 2015
GROUP BY
  orderdate
ORDER BY
  orderdate;

Unnamed: 0,orderdate,number_of_customers,eu_customers,na_customers,au_customers
0,2015-01-01,9,3,6,0
1,2015-01-02,6,1,5,0
2,2015-01-03,11,6,5,0
3,2015-01-05,4,0,4,0
4,2015-01-06,5,2,3,0
...,...,...,...,...,...
320,2015-12-27,1,0,1,0
321,2015-12-28,10,5,4,1
322,2015-12-29,13,5,8,0
323,2015-12-30,18,5,10,3


In [None]:
%%sql

SELECT
  p.categoryname,
  SUM(S.NETPRICE * S.QUANTITY * S.EXCHANGERATE) AS revenue_total,
  SUM(CASE WHEN s.orderdate BETWEEN '2021-01-01' AND '2021-12-31' THEN S.NETPRICE * S.QUANTITY * S.EXCHANGERATE END) AS revenue_2021,
  SUM(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN S.NETPRICE * S.QUANTITY * S.EXCHANGERATE END) AS revenue_2022,
  SUM(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN S.NETPRICE * S.QUANTITY * S.EXCHANGERATE END) AS revenue_2023
FROM
  sales AS s
LEFT JOIN
  product AS p
ON
  s.productkey = p.productkey
GROUP BY
  p.categoryname
ORDER BY
  revenue_total DESC;




Unnamed: 0,categoryname,revenue_total,revenue_2021,revenue_2022,revenue_2023
0,Computers,90619022.05,9900174.58,17862213.49,11650867.21
1,Cell phones,32624265.72,3871630.45,8119665.07,6002147.63
2,Home Appliances,26607245.54,2101224.8,6612446.68,5919992.87
3,TV and Video,20466861.38,2250754.86,5815336.61,4412178.23
4,Cameras and camcorders,18520360.66,1449672.87,2382532.56,1983546.29
5,"Music, Movies and Audio Books",10588311.0,1236253.2,2989297.28,2180768.13
6,Audio,5312898.1,393160.16,766938.21,688690.18
7,Games and Toys,1668574.13,155105.75,316127.3,270374.96


In [None]:
%%sql

SELECT
  p.categoryname,
  AVG(CASE WHEN s.orderdate BETWEEN '2021-01-01' AND '2021-12-31' THEN S.NETPRICE * S.QUANTITY * S.EXCHANGERATE END) AS average_order_2021,
  MIN(CASE WHEN s.orderdate BETWEEN '2021-01-01' AND '2021-12-31' THEN S.NETPRICE * S.QUANTITY * S.EXCHANGERATE END) AS min_order_2021,
  MAX(CASE WHEN s.orderdate BETWEEN '2021-01-01' AND '2021-12-31' THEN S.NETPRICE * S.QUANTITY * S.EXCHANGERATE END) AS max_order_2021,
  AVG(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN S.NETPRICE * S.QUANTITY * S.EXCHANGERATE END) AS order_size_2022,
  MIN(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN S.NETPRICE * S.QUANTITY * S.EXCHANGERATE END) AS min_order_2022,
  MAX(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN S.NETPRICE * S.QUANTITY * S.EXCHANGERATE END) AS max_order_2022,
  AVG(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN S.NETPRICE * S.QUANTITY * S.EXCHANGERATE END) AS order_size_2023,
  MIN(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN S.NETPRICE * S.QUANTITY * S.EXCHANGERATE END) AS min_order_2023,
  MAX(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN S.NETPRICE * S.QUANTITY * S.EXCHANGERATE END) AS max_order_2023
FROM
  sales AS s
LEFT JOIN
  product AS p
ON
  s.productkey = p.productkey
WHERE
  s.netprice <> 0
  AND s.quantity <> 0
  AND s.exchangerate <> 0
GROUP BY
  p.categoryname
ORDER BY
  p.categoryname DESC;

Unnamed: 0,categoryname,average_order_2021,min_order_2021,max_order_2021,order_size_2022,min_order_2022,max_order_2022,order_size_2023,min_order_2023,max_order_2023
0,TV and Video,1510.57,43.65,26304.07,1535.61,41.3,30259.41,1687.9,42.3,27503.12
1,"Music, Movies and Audio Books",409.63,8.12,4370.0,386.61,7.29,5415.19,334.58,6.91,3804.91
2,Home Appliances,1480.78,7.11,28799.91,1755.36,4.04,31654.55,1886.55,4.54,32915.59
3,Games and Toys,79.83,2.84,4850.28,81.29,2.83,5202.01,80.83,3.49,3357.3
4,Computers,1956.56,1.24,37485.0,1565.62,0.83,38082.66,1292.39,0.75,27611.6
5,Cell phones,849.04,2.96,8332.3,722.2,2.53,7692.37,623.28,2.28,8912.22
6,Cameras and camcorders,1200.06,4.64,20007.34,1210.02,6.74,15008.39,1210.96,5.98,13572.0
7,Audio,361.69,8.86,2758.8,392.3,9.31,3473.36,425.38,10.85,2730.87


In [None]:
%%sql

SELECT
  EXTRACT(YEAR FROM S.ORDERDATE) AS sales_year,
  PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY S.NETPRICE * S.QUANTITY * S.EXCHANGERATE) AS median_order_size,
  AVG(S.NETPRICE * S.QUANTITY * S.EXCHANGERATE) AS average_order_size
FROM
  sales s
GROUP BY
  sales_year
ORDER BY
  sales_year;

Unnamed: 0,sales_year,median_order_size,average_order_size
0,2015,329.41,1076.37
1,2016,406.16,1207.12
2,2017,412.06,1218.44
3,2018,396.77,1116.02
4,2019,409.67,1177.66
5,2020,394.67,995.69
6,2021,444.57,1079.5
7,2022,421.34,980.62
8,2023,367.06,882.5
9,2024,340.56,828.8


In [None]:
%%sql

SELECT
  p.categoryname,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN s.orderdate BETWEEN '2015-01-01' AND '2015-12-31' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_size_2015,
  AVG(CASE WHEN s.orderdate BETWEEN '2015-01-01' AND '2015-12-31' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_size_2015,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN s.orderdate BETWEEN '2016-01-01' AND '2016-12-31' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_size_2016,
  AVG(CASE WHEN s.orderdate BETWEEN '2016-01-01' AND '2016-12-31' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_size_2016,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN s.orderdate BETWEEN '2017-01-01' AND '2017-12-31' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_size_2017,
  AVG(CASE WHEN s.orderdate BETWEEN '2017-01-01' AND '2017-12-31' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_size_2017,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN s.orderdate BETWEEN '2018-01-01' AND '2018-12-31' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_size_2018,
  AVG(CASE WHEN s.orderdate BETWEEN '2018-01-01' AND '2018-12-31' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_size_2018,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN s.orderdate BETWEEN '2019-01-01' AND '2019-12-31' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_size_2019,
  AVG(CASE WHEN s.orderdate BETWEEN '2019-01-01' AND '2019-12-31' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_size_2019,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN s.orderdate BETWEEN '2020-01-01' AND '2020-12-31' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_size_2020,
  AVG(CASE WHEN s.orderdate BETWEEN '2020-01-01' AND '2020-12-31' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_size_2020,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN s.orderdate BETWEEN '2021-01-01' AND '2021-12-31' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_size_2021,
  AVG(CASE WHEN s.orderdate BETWEEN '2021-01-01' AND '2021-12-31' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_size_2021,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_size_2022,
  AVG(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_size_2022,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_size_2023,
  AVG(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_size_2023,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN s.orderdate BETWEEN '2024-01-01' AND '2024-12-31' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_size_2024,
  AVG(CASE WHEN s.orderdate BETWEEN '2024-01-01' AND '2024-12-31' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_size_2024
FROM
  sales AS s
LEFT JOIN
  product AS p
ON
  s.productkey = p.productkey
GROUP BY
  p.categoryname
ORDER BY
  p.categoryname;

Unnamed: 0,categoryname,median_order_size_2015,average_order_size_2015,median_order_size_2016,average_order_size_2016,median_order_size_2017,average_order_size_2017,median_order_size_2018,average_order_size_2018,median_order_size_2019,...,median_order_size_2020,average_order_size_2020,median_order_size_2021,average_order_size_2021,median_order_size_2022,average_order_size_2022,median_order_size_2023,average_order_size_2023,median_order_size_2024,average_order_size_2024
0,Audio,173.04,272.09,203.66,317.93,191.25,297.2,210.26,324.83,208.05,...,238.94,369.63,228.9,361.69,257.21,392.3,266.59,425.38,263.32,422.68
1,Cameras and camcorders,852.24,1700.57,844.49,1613.93,843.5,1463.84,685.38,1372.75,754.34,...,691.62,1363.5,656.68,1200.06,651.46,1210.02,672.6,1210.96,676.0,1202.81
2,Cell phones,564.52,1048.78,634.53,1116.33,575.41,976.57,512.26,927.99,563.13,...,492.8,843.42,502.42,849.04,418.6,722.2,375.88,623.28,393.85,661.6
3,Computers,1570.43,3048.31,1694.17,3029.54,1497.5,2894.05,1338.82,2505.46,1436.4,...,1048.5,1892.62,1030.5,1956.56,809.7,1565.62,657.18,1292.39,609.58,1266.4
4,Games and Toys,24.46,57.77,27.31,69.25,34.19,75.84,37.75,87.07,38.14,...,37.15,87.43,31.44,79.83,33.78,81.29,32.62,80.83,32.88,82.88
5,Home Appliances,593.25,1326.49,599.96,1377.64,562.81,1293.85,595.6,1350.67,624.64,...,629.54,1361.73,656.73,1480.78,791.0,1755.36,825.25,1886.55,772.5,1718.96
6,"Music, Movies and Audio Books",94.64,188.33,89.7,189.95,107.99,242.98,134.25,277.94,144.21,...,200.28,411.6,199.47,409.63,186.58,386.61,159.63,334.58,154.74,316.59
7,TV and Video,586.92,1244.23,533.4,1231.69,591.3,1348.06,592.2,1329.57,612.83,...,719.16,1679.94,669.37,1510.57,730.46,1535.61,790.79,1687.9,798.65,1658.9


In [None]:
%%sql

SELECT
  EXTRACT(YEAR FROM s.orderdate) AS order_year,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN TRIM(p.categoryname) = 'Audio' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_audio,
  AVG(CASE WHEN TRIM(p.categoryname) = 'Audio' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_audio,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN TRIM(p.categoryname) = 'Cameras and camcorders' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_cameras,
  AVG(CASE WHEN TRIM(p.categoryname) = 'Cameras and camcorders' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_cameras,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN TRIM(p.categoryname) = 'Cell phones' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_cell_phones,
  AVG(CASE WHEN TRIM(p.categoryname) = 'Cell phones' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_cell_phones,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN TRIM(p.categoryname) = 'Computers' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_computers,
  AVG(CASE WHEN TRIM(p.categoryname) = 'Computers' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_computers,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN TRIM(p.categoryname) = 'Games and Toys' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_games,
  AVG(CASE WHEN TRIM(p.categoryname) = 'Games and Toys' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_games,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN TRIM(p.categoryname) = 'Home Appliances' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_appliances,
  AVG(CASE WHEN TRIM(p.categoryname) = 'Home Appliances' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_appliances,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN TRIM(p.categoryname) = 'Music, Movies and Audio Books' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_music_movies,
  AVG(CASE WHEN TRIM(p.categoryname) = 'Music, Movies and Audio Books' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_music_movies,
  percentile_cont(0.5) WITHIN GROUP (ORDER BY CASE
    WHEN TRIM(p.categoryname) = 'TV and Video' THEN s.netprice * s.quantity * s.exchangerate
  END) AS median_order_tv_video,
  AVG(CASE WHEN TRIM(p.categoryname) = 'TV and Video' THEN s.netprice * s.quantity * s.exchangerate END) AS average_order_tv_video
FROM
  sales AS s
LEFT JOIN
  product AS p
ON
  s.productkey = p.productkey
GROUP BY
  order_year
ORDER BY
  order_year;

Unnamed: 0,order_year,median_order_audio,average_order_audio,median_order_cameras,average_order_cameras,median_order_cell_phones,average_order_cell_phones,median_order_computers,average_order_computers,median_order_games,average_order_games,median_order_appliances,average_order_appliances,median_order_music_movies,average_order_music_movies,median_order_tv_video,average_order_tv_video
0,2015,173.04,272.09,852.24,1700.57,564.52,1048.78,1570.43,3048.31,24.46,57.77,593.25,1326.49,94.64,188.33,586.92,1244.23
1,2016,203.66,317.93,844.49,1613.93,634.53,1116.33,1694.17,3029.54,27.31,69.25,599.96,1377.64,89.7,189.95,533.4,1231.69
2,2017,191.25,297.2,843.5,1463.84,575.41,976.57,1497.5,2894.05,34.19,75.84,562.81,1293.85,107.99,242.98,591.3,1348.06
3,2018,210.26,324.83,685.38,1372.75,512.26,927.99,1338.82,2505.46,37.75,87.07,595.6,1350.67,134.25,277.94,592.2,1329.57
4,2019,208.05,326.76,754.34,1477.84,563.13,941.95,1436.4,2669.64,38.14,89.57,624.64,1418.38,144.21,298.67,612.83,1365.92
5,2020,238.94,369.63,691.62,1363.5,492.8,843.42,1048.5,1892.62,37.15,87.43,629.54,1361.73,200.28,411.6,719.16,1679.94
6,2021,228.9,361.69,656.68,1200.06,502.42,849.04,1030.5,1956.56,31.44,79.83,656.73,1480.78,199.47,409.63,669.37,1510.57
7,2022,257.21,392.3,651.46,1210.02,418.6,722.2,809.7,1565.62,33.78,81.29,791.0,1755.36,186.58,386.61,730.46,1535.61
8,2023,266.59,425.38,672.6,1210.96,375.88,623.28,657.18,1292.39,32.62,80.83,825.25,1886.55,159.63,334.58,790.79,1687.9
9,2024,263.32,422.68,676.0,1202.81,393.85,661.6,609.58,1266.4,32.88,82.88,772.5,1718.96,154.74,316.59,798.65,1658.9


In [None]:
%%sql

SELECT
  orderdate,
  netprice,
  quantity,
  CASE WHEN netprice IS NOT NULL AND quantity IS NOT NULL THEN netprice * quantity ELSE NULL END AS revenue,
  CASE
    WHEN quantity >= 2 AND netprice >= 100 THEN 'multiple_high_order'
    WHEN quantity >= 2 AND netprice <100 THEN 'multiple_standard_order'
    WHEN quantity = 1 AND netprice >= 100 THEN 'single_high_order'
    WHEN quantity = 1 AND netprice < 100 THEN 'single_standard_order'
    ELSE 'other'
  END AS order_type
FROM
  sales
ORDER BY orderdate
LIMIT 100;


Unnamed: 0,orderdate,netprice,quantity,revenue,order_type
0,2015-01-01,58.73,1,58.73,single_standard_order
1,2015-01-01,659.78,1,659.78,single_high_order
2,2015-01-01,74.99,3,224.98,multiple_standard_order
3,2015-01-01,286.69,4,1146.75,multiple_high_order
4,2015-01-01,222.00,1,222.00,single_high_order
...,...,...,...,...,...
95,2015-01-08,11.82,8,94.58,multiple_standard_order
96,2015-01-08,139.49,2,278.99,multiple_high_order
97,2015-01-08,903.17,1,903.17,single_high_order
98,2015-01-08,88.77,3,266.31,multiple_standard_order


In [None]:
%%sql

WITH median_value AS
  (
    SELECT
    p.categoryname,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (netprice * quantity * exchangerate)) AS median_order_amount
  FROM
    sales s
  LEFT JOIN
    product p
  ON
    s.productkey = p.productkey
  WHERE
    orderdate BETWEEN '2022-01-01' AND '2023-12-31'
  GROUP BY
    p.categoryname
  )
SELECT
  p.categoryname,
  AVG(m.median_order_amount) AS median_order_amount,
  SUM(CASE WHEN (s.netprice * s.quantity * s.exchangerate) < m.median_order_amount
                AND orderdate BETWEEN '2022-01-01' AND '2022-12-31'
          THEN (s.netprice * s.quantity * s.exchangerate) END) AS lower_order_amount_2022,
  SUM(CASE WHEN (s.netprice * s.quantity * s.exchangerate) >= m.median_order_amount
                AND orderdate BETWEEN '2022-01-01' AND '2022-12-31'
          THEN (s.netprice * s.quantity * s.exchangerate) END) AS higher_order_amount_2022,
  SUM(CASE WHEN (s.netprice * s.quantity * s.exchangerate) < m.median_order_amount
                AND orderdate BETWEEN '2023-01-01' AND '2023-12-31'
          THEN (s.netprice * s.quantity * s.exchangerate) END) AS lower_order_amount_2023,
  SUM(CASE WHEN (s.netprice * s.quantity * s.exchangerate) >= m.median_order_amount
                AND orderdate BETWEEN '2023-01-01' AND '2023-12-31'
          THEN (s.netprice * s.quantity * s.exchangerate) END) AS higher_order_amount_2023
FROM
  sales s
LEFT JOIN
  product p
ON
  s.productkey = p.productkey
LEFT JOIN
  median_value m
ON
  p.categoryname = m.categoryname
GROUP BY
  p.categoryname
ORDER BY
  p.categoryname;


Unnamed: 0,categoryname,median_order_amount,lower_order_amount_2022,higher_order_amount_2022,lower_order_amount_2023,higher_order_amount_2023
0,Audio,262.25,126838.74,640099.47,104149.73,584540.45
1,Cameras and camcorders,659.87,293581.87,2088950.69,233563.52,1749982.77
2,Cell phones,399.18,823209.9,7296455.17,738857.59,5263290.05
3,Computers,738.0,1786099.84,16076113.65,1477250.73,10173616.48
4,Games and Toys,33.12,29987.45,286139.85,26589.38,243785.58
5,Home Appliances,806.53,678256.92,5934189.76,553520.9,5366471.98
6,"Music, Movies and Audio Books",171.31,244819.89,2744477.39,218088.71,1962679.42
7,TV and Video,758.32,683842.56,5131494.05,460417.94,3951760.28


In [None]:
%%sql
SELECT DISTINCT
  categoryname
FROM
  product;

Unnamed: 0,categoryname
0,Audio
1,Cell phones
2,Cameras and camcorders
3,TV and Video
4,Home Appliances
5,Games and Toys
6,"Music, Movies and Audio Books"
7,Computers


In [None]:
%%sql

WITH percentile AS
(
  SELECT
    p.categoryname,
    PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY (netprice * quantity * exchangerate)) AS percentile_25,
    PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY (netprice * quantity * exchangerate)) AS percentile_75
  FROM
    sales s
  LEFT JOIN
    product p
  ON
    s.productkey = p.productkey
  WHERE
    orderdate BETWEEN '2022-01-01' AND '2023-12-31'
  GROUP BY
    p.categoryname
)
SELECT
  p.categoryname,
  AVG(percentile_25) AS percentile_25,
  AVG(percentile_75) AS percentile_75,
  CASE
    WHEN (s.netprice * s.quantity * s.exchangerate) < percentile_25 THEN 'below_25'
    WHEN (s.netprice * s.quantity * s.exchangerate) > percentile_75 THEN 'over_75'
    ELSE 'between_25_75'
  END AS order_amount_percentile,
  SUM(s.netprice * s.quantity * s.exchangerate)
FROM
  sales s
LEFT JOIN
  product p
ON
  s.productkey = p.productkey
LEFT JOIN
  percentile pc
ON
  p.categoryname = pc.categoryname
WHERE
  orderdate BETWEEN '2022-01-01' AND '2023-12-31'
GROUP BY
  p.categoryname,
  percentile_25,
  percentile_75,
  order_amount_percentile
ORDER BY
  p.categoryname,
  order_amount_percentile;

Unnamed: 0,categoryname,percentile_25,percentile_75,order_amount_percentile,sum
0,Audio,121.81,526.37,below_25,62761.6
1,Audio,121.81,526.37,between_25_75,502767.57
2,Audio,121.81,526.37,over_75,890099.23
3,Cameras and camcorders,276.02,1536.86,below_25,117397.97
4,Cameras and camcorders,276.02,1536.86,between_25_75,1355028.83
5,Cameras and camcorders,276.02,1536.86,over_75,2893652.05
6,Cell phones,116.96,949.41,below_25,219556.85
7,Cell phones,116.96,949.41,between_25_75,4714464.23
8,Cell phones,116.96,949.41,over_75,9187791.63
9,Computers,294.0,1722.22,below_25,723419.28
