# Load SQLite DB

In [3]:
import pandas as pd
from paths import PF_DB_PATH
db_path = f'sqlite:///{PF_DB_PATH}'

In [4]:
%%capture
%load_ext sql
%env DATABASE_URL= $db_path

# Querys

#### How is property type distributed in the dataset?

In [5]:
%%sql 
SELECT
    property_type AS propertyType,
    COUNT(property_type) AS total,
    -- format function of SQLite
    PRINTF("%.1f%",
        100*CAST(COUNT(property_type) AS real) / (
            SELECT
                COUNT(*)
            FROM
                record
        )
    ) AS proportion
FROM
    record
GROUP BY
    property_type
ORDER BY
    total DESC

Done.


propertyType,total,proportion
House,349058,47.7%
Unit,340298,46.5%
Other,22206,3.0%
Vacant Land,12576,1.7%
Commercial,6310,0.9%
Industrial,1104,0.2%


#### How much have average house prices changes in the last 5 years?

In [6]:
%%sql
-- window for average house prices
WITH houseAvg AS (
    SELECT
        CAST(strftime('%Y', record_date) AS decimal) AS year,
        AVG(price) AS price
    FROM
        record
    WHERE
        property_type = 'House'
        AND
        -- only include sales price in reasonable range
        price between 100000 and 3000000
    GROUP BY
        year
)
SELECT
    year,
    PRINTF("$%,d", price) AS avgPrice,
    PRINTF("%.1f%",
        100*(price / lag(price, 1) over(ORDER BY year)-1)
    ) AS growth
FROM
    houseAvg
WHERE
    year < 2022
ORDER BY
    year DESC
LIMIT
    5

 * sqlite:///D:\_CORE\static\dataset\Australia\pricefinder\pricefinder_db.sqlite
Done.


year,avgPrice,growth
2021,"$1,431,570",20.0%
2020,"$1,193,125",5.5%
2019,"$1,131,452",-2.1%
2018,"$1,155,428",-3.6%
2017,"$1,199,063",8.1%


#### Which suburbs have the highest growth rate in house prices in 2021?

In [7]:
%%sql
WITH houseAvg AS (
    SELECT
        p.locality AS suburb,
        CAST(strftime('%Y', r.record_date) AS decimal) AS year,
        AVG(r.price) AS price
    FROM
        record AS r
        LEFT JOIN
            property AS p
        ON
            r.prop_id = p.address
    WHERE
        r.property_type = 'House'
        AND
        r.price BETWEEN 100000 AND 3000000
        AND
        -- include 2020 and 2021 prices to calculate growth in 2021
        year BETWEEN 2020 AND 2021
    GROUP BY
        suburb,
        year
)
SELECT
    suburb,
    year,
    PRINTF(
        '%.1f%',
        -- growth rate = (this-year-price / prev-year-price) -1
        100 *(
            price / lag(price, 1) OVER(
                PARTITION by suburb
                ORDER BY
                    year
            ) -1
        )
    ) AS growth
FROM
    houseAvg
ORDER BY
    growth DESC
LIMIT
    10

 * sqlite:///D:\_CORE\static\dataset\Australia\pricefinder\pricefinder_db.sqlite
Done.


suburb,year,growth
MAIANBAR,2021,98.9%
MILSONS PASSAGE,2021,90.4%
COLLAROY,2021,9.6%
RANKIN PARK,2021,9.6%
PENDLE HILL,2021,9.5%
HUNTERS HILL,2021,9.4%
THE ENTRANCE NORTH,2021,9.4%
WOOLLAHRA,2021,9.3%
SEAFORTH,2021,9.2%
BEACONSFIELD,2021,9.1%


#### What is 5-year moving avearge house price for suburb Chatswood?

In [8]:
%%sql

WITH 
    houseAvg AS(
        SELECT
            CAST(STRFTIME('%Y', r.record_date) AS integer) AS year,
            AVG(r.price) AS price
        FROM
            record AS r
            LEFT JOIN 
                property AS p 
            ON 
                r.prop_id = p.address
        WHERE
            p.locality = 'CHATSWOOD'
        GROUP BY
            year
    )

SELECT
    year,
    PRINTF(
        '$%,d',
        price
    ) as price, 
    PRINTF(
        '$%,d',
        AVG(price) OVER (
        ORDER BY
            year ROWS BETWEEN 
                    4 PRECEDING 
                    AND
                    CURRENT ROW
         )
    ) as MA_price
FROM
   houseAvg
ORDER BY
    year DESC
LIMIT 10

 * sqlite:///D:\_CORE\static\dataset\Australia\pricefinder\pricefinder_db.sqlite
Done.


year,price,MA_price
2021,"$1,431,015","$1,358,968"
2020,"$1,286,734","$1,329,015"
2019,"$1,440,307","$1,404,810"
2018,"$1,220,792","$1,342,923"
2017,"$1,415,993","$1,311,506"
2016,"$1,281,250","$1,186,230"
2015,"$1,665,709","$1,103,184"
2014,"$1,130,871","$908,982"
2013,"$1,063,707","$843,971"
2012,"$789,614","$749,801"


#### What are the yearly median prices for Maianbar?

In [16]:
%%sql

WITH prices AS(
    SELECT
        CAST(STRFTIME('%Y', r.record_date) AS integer) AS year,
        price
    FROM
        record AS r
        LEFT JOIN property AS p ON r.prop_id = p.address
    WHERE
        p.locality = 'MAIANBAR' 
        AND
        r.price BETWEEN 100000 AND 3000000
    ORDER BY
        year
)

SELECT 
    DISTINCT year,
    AVG(
    CASE WHEN n_record % 2 = 0
        THEN CASE WHEN row_num IN (n_record/2, n_record/2+1) THEN price END
        ELSE CASE WHEN row_num = n_record/2+1 THEN price END
    END) OVER(PARTITION BY year) as median
FROM (
    SELECT 
        *,
        ROW_NUMBER() OVER(PARTITION BY year ORDER BY price) as row_num,
        COUNT(*) OVER(PARTITION BY year) as n_record
    FROM 
        prices
)

 * sqlite:///D:\_CORE\static\dataset\Australia\pricefinder\pricefinder_db.sqlite
Done.


year,median
2007,310000.0
2012,552500.0
2013,890000.0
2014,1005000.0
2016,845000.0
2018,670000.0
2020,789750.0
2021,1435000.0


#### List the years from 2010 to 2021 when there was no sale record of house in Maianbar.

In [95]:
%%sql
WITH 
    houseAvg AS(
        SELECT
            CAST(STRFTIME('%Y', r.record_date) AS integer) AS year,
            AVG(r.price) AS price
        FROM
            record AS r
            LEFT JOIN 
                property AS p 
            ON 
                r.prop_id = p.address
        WHERE
            p.locality = 'MAIANBAR'
            AND
            year BETWEEN 2010 AND 2021
        GROUP BY
            year
    ),
    
    year_series(value) AS (
        SELECT 2010
        UNION ALL
        SELECT value+1 FROM year_series
        WHERE value+1<=2021
)

SELECT 
    y.value as year
FROM
    year_series as y
    LEFT JOIN
        houseAvg AS h
        ON
        h.year = y.value
WHERE
    h.price IS NULL
    

 * sqlite:///D:\_CORE\static\dataset\Australia\pricefinder\pricefinder_db.sqlite
Done.


year
2010
2011
2015
2017
2019
