In [1]:
import numpy as np
import pandas as pd
import altair as alt
from datetime import datetime
import fidap
import config

# instantiate connection
fidap = fidap.fidap_client(api_key = config.api_key)

### FRED  
  
The Federal Reserve Bank of St. Louis uploads a ton of useful economic data that we can access using `fidap`.   

In [None]:
fred_meta = fidap.sql("""
SELECT *
FROM fidap-301014.fred.INFORMATION_SCHEMA.TABLES
""")

# get all table names
print(fred_meta.table_name)

#meta for each of the tables to figure out which might be useful 
categories_meta = fidap.sql("""
SELECT * EXCEPT(is_generated, generation_expression, is_stored, is_updatable)
FROM fidap-301014.fred.INFORMATION_SCHEMA.COLUMNS
WHERE table_name = 'categories'
""")

series_info_meta = fidap.sql("""
SELECT * EXCEPT(is_generated, generation_expression, is_stored, is_updatable)
FROM fidap-301014.fred.INFORMATION_SCHEMA.COLUMNS
WHERE table_name = 'series_info'
""")

series_observations_meta = fidap.sql("""
SELECT * EXCEPT(is_generated, generation_expression, is_stored, is_updatable)
FROM fidap-301014.fred.INFORMATION_SCHEMA.COLUMNS
WHERE table_name = 'series_observations'
""")

**Housing Data**  
  
Suppose I want information on the following tables that can provide us with more information on the housing market in US:  
  
1) Monthly Supply of Houses in the United States  
2) New Privately-Owned Housing Units Started: Total Units  
3) New Houses Sold by Stage of Construction, Total  
4) New One Family Houses for Sale in the United States  
5) Producer Price Index by Commodity: Lumber and Wood Products: Hardwood Lumber  
6) Producer Price Index by Commodity: Lumber and Wood Products: Oak and Maple Hardwood Flooring    
7) New Privately-Owned Housing Units Authorized in Permit-Issuing Places: Total Units  
8) Retail Sales of Building Materials, Furnishings and Appliances
  
I can obtain some information from the `series_info` table of the `fred` project.  

In [5]:
housing_data = fidap.sql("""
SELECT *
FROM fidap-301014.fred.series_info
WHERE series_id IN ('HOUST', 'MSACSR', 'NHSDPTS', 'HNFSUSNSA', 'WPU0812', 'WPU08120401', 'COMPUTSA', 'UNDCONTSA', 'RELDCBMO27SBOG', 'H8B1026NCBCMG', 'MRTSSM444USN', 'MRTSSM4423XUSS', 'PRRESCONS', 'MPCV00XXS', 'PERMIT')
""")

Now that I know what I am looking at, I can begin querying in earnest. What I would really like to find out is whether we can eyeball any relationship between supply of houses and construction of new houses. A quick inspection of the data tells us that these series do not all start at the same time.   

In [None]:
# query for housing supply data 
monthly_supply_df = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'MSACSR';
""")

flooring_pricing = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'WPU080120401'
""")

# houses sold
new_houses_sold = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'NHSDPTS'
""")

The queries above were duds. Those series were not available in the table for some reason.

In [None]:
# id the series available 
available_series = fidap.sql("""
WITH all_series AS (
SELECT DISTINCT series_id 
FROM fidap-301014.fred.series_observations
)

SELECT * 
FROM fidap-301014.fred.series_info AS i
INNER JOIN all_series as s
ON s.series_id = i.series_id;
""")

A quick manual trawl through the data suggested that some datasets are not imported. But that is okay, we can work with others instead. It is a little weird because we know that they exist from the `series_observations` table.  

In [2]:
# housing construction has started
housing_started = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'HOUST';
""")

housing_started_nsa = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'HOUSTNSA';
""")

# lumber pricing index
hardwood_lumber_pricing = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'WPU0812'; 
""")

# real estate loans 
delta_real_estate_loans = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'H8B1026NCBCMG'; 
""")

# private residential spending
building_materials_expenditure = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'MRTSSM444USS'; 
""")

furnishing_expenditure = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'MRTSSM4423XUSS'; 
""")

# housing expenditure
private_residential_expenditure = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'PRRESCONS'; 
""")

residential_construction_expenditure = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'MPCV00XXS'; 
""")

Let us start by seeing if there is a relationship between retail spending of building materials and home furnishings.

In [70]:
# joining the two datasets
building_furnishing = furnishing_expenditure.merge(building_materials_expenditure, on = 'date', how = 'inner')

# building the plot
furnishing_scatter_base = alt.Chart(building_furnishing,
                                    title = "Building Materials and Home Furnishings Retail Sales").mark_circle().encode(
    x = alt.X('value_y',
              title = 'Building Materials (Mil. of $)'),
    y = alt.Y('value_x',
              title = 'Home Furnishings and Appliances (Mil. of $)'))

furnishing_poly_fit = [
    furnishing_scatter_base.transform_regression(
        'value_y', 'value_x', method = 'poly', order = 3
    ).mark_line(color = '#fb8072').encode()
]

alt.layer(furnishing_scatter_base, furnishing_scatter_base.transform_regression('value_y', 'value_x', method = 'log').mark_line(color = '#fdc086'), *furnishing_poly_fit)

Generally, we can see that the relationship between expenditure on building materials and home furnishings can be roughly modelled as:  
  
${Home Furnishings} = {Building Materials}^{3}$  
  
This is represented by the red line.   
  
The orange is a logarithmic regression function fitted to the distribution of the data. This can be expressed as:    
  
${Home Furnishings} = {a} + {log(Building Materials)}^{b}$  

It is often said that peak building season is during the warmer summer months. Can we see this in the data? Basically, do the number of housing units started increase in the summer months?

In [25]:
# extract month from date column 
housing_started['month'] = housing_started['date'].dt.month
housing_started_nsa['month'] = housing_started_nsa['date'].dt.month

In [28]:
housing_started_chart = alt.Chart(housing_started).mark_circle(
    opacity = 0.4,
    color = '#fdb462'
).encode(
    x = alt.X(
        'month',
        title = 'Month'),
    y = alt.Y(
        'value',
        title = 'Thousands of Units')
).properties(
    title = {
        "text": "New Housing Units Started",
        "subtitle": "Seasonally Adjusted"
    }
)

housing_started_chart.configure_title(
    anchor = 'start'
)
alt.layer(housing_started_chart, housing_started_chart.transform_loess('month', 'value').mark_line(color = '#8dd3c7'))

Okay, the weird thing is that we do not seem to see any seasonal patterns. This might be because of the fact that it is a seasonally adjusted dataset, thus month-to-month variations may be tamped down. 

In [27]:
housing_started_nsa_chart = alt.Chart(housing_started_nsa).mark_circle(
    opacity = 0.4,
    color = '#fdb462'
).encode(
    x = alt.X(
        'month',
        title = 'Month'),
    y = alt.Y(
        'value',
        title = 'Thousands of Units')
).properties(
    title = {
        "text": "New Housing Units Started",
        "subtitle": "Not Seasonally Adjusted"
    }
)

housing_started_nsa_chart.configure_title(
    anchor = 'start'
)

alt.layer(housing_started_nsa_chart, housing_started_nsa_chart.transform_loess('month', 'value').mark_line(color = '#8dd3c7'))

When the seasonal adjustment is removed, the impact of seasons can be seen a lot more clearly. Do we see this same effect with home furnishing and appliances, and building materials?  

In [29]:
# getting unadjusted data
home_furnishing_building_materials_nsa = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id IN ('MRTSSM4423XUSN', 'MRTSSM444USN'); 
""")

# getting month
home_furnishing_building_materials_nsa['month'] = home_furnishing_building_materials_nsa['date'].dt.month

# descriptive titles
id_title_map = {
    'MRTSSM4423XUSN': 'Home Furnishing and Appliances',
    'MRTSSM444USN': 'Home Building Materials'
}
home_furnishing_building_materials_nsa['series'] = home_furnishing_building_materials_nsa['series_id'].map(id_title_map)

In [44]:
home_furnishing_building_nsa_scatter = alt.Chart(home_furnishing_building_materials_nsa).mark_circle(
    opacity = 0.4).encode(
    x = alt.X('month', title = 'Month'),
    y = alt.Y('value', title = 'Retail Sales (Mil of $)'),
    color = 'series'
).properties(
    title = {
        "text": "Retail Sales of Furniture and Building Materials",
        "subtitle": "Not Seasonally Adjusted"
    })

home_furnishing_building_nsa_scatter.configure_title(anchor = 'start')
    
    
    
alt.layer(home_furnishing_building_nsa_scatter, home_furnishing_building_nsa_scatter.transform_loess('month', 'value', groupby = ['series']).mark_line())

Interestingly enough, we do not see the same seasonal spike in the summer months for home furnishing and appliances. Instead, the spike occurs at the end of the year. This is probably due to the Thanksgiving and the Christmas holiday season. 

From a more meta perspective, can we look at how much total private expenditure on residential construction has changed over the years? 

In [48]:
alt.Chart(
    residential_construction_expenditure,
    title = 'Total Private Residential Construction Expenditure').mark_bar().encode(
    x = alt.X(
        'date',
        title = "Year"),
    y = alt.Y(
        'value',
        title = "% Change From Preceeding Period"),
    color = alt.condition(
        alt.datum.value > 0,
        alt.value('#ccebc5'),
        alt.value('#fbb4ae')),
    tooltip = ['date', 'value']
)

What I find interesting is that the expenditure dipped for a very short period of time in 2020 even amidst a pandemic-induced recession. The decline, while sharp, was also very short in comparison to the period right before and during the 2008 recession. Are the changes in total private residential construction expenditure correlated with the gap between construction permits issued and the number of housing units where work has started?   

In [75]:
# by running this query,
# we elim. the step of having to figure out when the series started or ended
# we can also calculate the permit-construction commencement gap directly

construction_permit_gap = fidap.sql("""
WITH permit AS ( 
SELECT date, CAST(value AS NUMERIC) AS permits,
(LAG(CAST(value AS NUMERIC), 1) OVER (ORDER BY date ASC)) AS permits_lagged_1,
(LAG(CAST(value AS NUMERIC), 2) OVER (ORDER BY date ASC)) AS permits_lagged_2
FROM fidap-301014.fred.series_observations
WHERE series_id = 'PERMIT'),

gap AS(
SELECT c.date, 
CAST(p.permits AS FLOAT64)-CAST(c.value AS NUMERIC) AS gap,
p.permits_lagged_1 - CAST(c.value AS NUMERIC) AS gap_lagged_1,
p.permits_lagged_2 - CAST(c.value AS NUMERIC) AS gap_lagged_2
FROM fidap-301014.fred.series_observations AS c
INNER JOIN permit AS p
ON p.date = c.date
WHERE series_id = 'HOUST')

SELECT e.date, e.value AS ex_delta, g.gap, g.gap_lagged_1, g.gap_lagged_2
FROM fidap-301014.fred.series_observations AS e
INNER JOIN gap AS g
ON g.date = e.date
WHERE series_id = 'MPCV00XXS';
""")

In [78]:
gap_ex_scatter = alt.Chart(construction_permit_gap).mark_circle(color = '#e5c494').encode(
    x = alt.X('ex_delta', title = "Change in Residential Construction Expenditure (%)"),
    y = alt.Y('gap', title = 'Permits and Construction Gap (Thousand Units)')
).properties(
    title = {
        "text": "Construction, Permitting, and Expenditure",
        "subtitle": "No Time Lag Betw. Permit Issued and Construction Commencement"
    })

alt.layer(gap_ex_scatter, gap_ex_scatter.transform_regression('ex_delta', 'gap', method = 'linear').mark_line(color = '#bebada'))

Interestingly enough, this is not what we expected. We would expect periods of increased residential construction expenditure to correlate with months where commencement of construction outstrips permits issued.  
  
What if we introduce a lag? There could be a time lag between the issuance of a permit and the commencement of construction.  

In [79]:
gap_ex_lag1_scatter = alt.Chart(construction_permit_gap).mark_circle(color = '#e5c494').encode(
    x = alt.X('ex_delta', title = "Change in Residential Construction Expenditure (%)"),
    y = alt.Y('gap_lagged_1', title = 'Permits and Construction Gap (Thousand Units)')
).properties(
    title = {
        "text": "Construction, Permitting, and Expenditure",
        "subtitle": "One Month's Time Lag Betw. Permit Issued and Construction Commencement"
    })

alt.layer(gap_ex_lag1_scatter, gap_ex_lag1_scatter.transform_regression('ex_delta', 'gap_lagged_1', method = 'linear').mark_line(color = '#bebada'))

In [85]:
gap_ex_lag2_scatter = alt.Chart(construction_permit_gap).mark_circle(color = '#e5c494').encode(
    x = alt.X('ex_delta', title = "Change in Residential Construction Expenditure (%)"),
    y = alt.Y('gap_lagged_2', title = 'Permits and Construction Gap (Thousand Units)')
).properties(
    title = {
        "text": "Construction, Permitting, and Expenditure",
        "subtitle": "Two Months' Time Lag Betw. Permit Issued and Construction Commencement"
    })

alt.layer(gap_ex_lag2_scatter, gap_ex_lag2_scatter.transform_regression('ex_delta', 'gap_lagged_2', method = 'linear').mark_line(color = '#bebada'))

Predictably enough, the relationship starts to show once a time lag is introduced. Hence, we can say that as residential construction expenditure increases, month on month, the deficit between the number of construction permits issued and construction commencement grows.