# Chaptor 2 - Date Time

Load SQL Module and Database

In [None]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Update package installer
    !sudo apt-get update -qq > /dev/null 2>&1

    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

##2.1 Datetrunc() Function
Summarize net revenue by month: Use precise date truncation to aggregate sales data by month.  
:spiral_notepad:	 [postgresql_documentation_functions_datetime](https://www.postgresql.org/docs/9.5/functions-datetime.html)


In [None]:
%%sql

SELECT
  DATE_TRUNC('MONTH', orderdate)::DATE AS order_month,
  SUM(netprice * quantity * exchangerate) AS revenue,
  COUNT(DISTINCT customerkey) AS customers
FROM
  sales
GROUP BY
  order_month
ORDER BY
  order_month;

Unnamed: 0,order_month,revenue,customers
0,2015-01-01,384092.66,200
1,2015-02-01,706374.12,291
2,2015-03-01,332961.59,139
3,2015-04-01,160767.00,78
4,2015-05-01,548632.63,236
...,...,...,...
107,2023-12-01,2928550.93,1484
108,2024-01-01,2677498.55,1340
109,2024-02-01,3542322.55,1718
110,2024-03-01,1692854.89,877


##2.2 TO_CHAR() Function
Create human-readable monthly sales summaries: Use TO_CHAR() to format dates for reporting purposes.
https://www.postgresql.org/docs/18/functions-formatting.html

In [None]:
%%sql

SELECT
  TO_CHAR(orderdate, 'YY-MM') AS order_month,
  SUM(netprice * quantity * exchangerate) AS revenue,
  COUNT(DISTINCT customerkey) AS customers
FROM
  sales
GROUP BY
  order_month
ORDER BY
  order_month;

Unnamed: 0,order_month,revenue,customers
0,15-01,384092.66,200
1,15-02,706374.12,291
2,15-03,332961.59,139
3,15-04,160767.00,78
4,15-05,548632.63,236
...,...,...,...
107,23-12,2928550.93,1484
108,24-01,2677498.55,1340
109,24-02,3542322.55,1718
110,24-03,1692854.89,877


##2.3 DATE_PART(), EXTRACT(), CURRENT_DATE, NOW()
DATE_PART() and EXTRACT() are similar functions, all have the output of numeric values from a date source.

CURRENT_DATE returns the date based on the system's time zone.
NOW() is similar to CURRENT_DATE and returns the date and time.

**Task**: Get sales data aggregated by month and category of the past five years.

In [None]:
%%sql

SELECT
  EXTRACT(YEAR FROM orderdate) AS order_year,
  EXTRACT(MONTH FROM orderdate) AS order_month,
  p.categoryname,
  SUM(netprice * quantity * exchangerate) AS revenue
FROM
  sales s
LEFT JOIN
  product p
ON
  s.productkey = p.productkey
WHERE
  EXTRACT(YEAR FROM orderdate) >= EXTRACT(YEAR FROM CURRENT_DATE) - 5
GROUP BY
  order_year,
  order_month,
  p.categoryname
ORDER BY
  order_year,
  order_month,
  p.categoryname;

Unnamed: 0,order_year,order_month,categoryname,revenue
0,2020,1,Audio,83677.75
1,2020,1,Cameras and camcorders,279266.24
2,2020,1,Cell phones,339875.77
3,2020,1,Computers,890105.59
4,2020,1,Games and Toys,30417.31
...,...,...,...,...
411,2024,4,Computers,206212.81
412,2024,4,Games and Toys,4728.20
413,2024,4,Home Appliances,42736.88
414,2024,4,"Music, Movies and Audio Books",27540.51


##2.4 Interval
Interval 'Value Unit' covers all kinds of time span from Millenium to second.

In [None]:
%%sql

SELECT INTERVAL '1 YEAR'

Unnamed: 0,interval
0,365 days


Rewrite the query in 2.3 with INTERVAL.

In [None]:
%%sql

SELECT
  EXTRACT(YEAR FROM orderdate) AS order_year,
  EXTRACT(MONTH FROM orderdate) AS order_month,
  p.categoryname,
  SUM(netprice * quantity * exchangerate) AS revenue
FROM
  sales s
LEFT JOIN
  product p
ON
  s.productkey = p.productkey
WHERE
  ORDERDATE >= CURRENT_DATE - INTERVAL '5 YEARS'
GROUP BY
  order_year,
  order_month,
  p.categoryname
ORDER BY
  order_year,
  order_month,
  p.categoryname;

Unnamed: 0,order_year,order_month,categoryname,revenue
0,2020,11,Audio,4519.24
1,2020,11,Cameras and camcorders,30271.75
2,2020,11,Cell phones,23946.87
3,2020,11,Computers,71211.72
4,2020,11,Games and Toys,1914.10
...,...,...,...,...
331,2024,4,Computers,206212.81
332,2024,4,Games and Toys,4728.20
333,2024,4,Home Appliances,42736.88
334,2024,4,"Music, Movies and Audio Books",27540.51


##2.5 AGE() Function
- AGE calculates the difference between two dates and returns the result as an interval.
- Syntax: AGE(end_date, start_date)

Show average order processing time and net revenue of the last five years.

In [None]:
%%sql

SELECT
  EXTRACT(YEAR FROM orderdate) AS order_year,
  ROUND(SUM(netprice * quantity * exchangerate)::NUMERIC,0) AS revenue,
  ROUND(AVG(EXTRACT(DAYS FROM AGE(deliverydate,orderdate))),2) AS processing_time
FROM
  sales
WHERE
  orderdate >= CURRENT_DATE - INTERVAL '5 YEARS'
GROUP BY
  order_year
ORDER BY
  order_year

Unnamed: 0,order_year,revenue,processing_time
0,2020,696572,0.97
1,2021,21357977,1.36
2,2022,44864557,1.62
3,2023,33108566,1.75
4,2024,8396527,1.67
