In [1]:
import numpy as np
import pandas as pd
import altair as alt
from datetime import datetime
import fidap
import config

# instantiate connection
fidap = fidap.fidap_client(api_key = config.api_key)

In [None]:
fred_meta = fidap.sql("""
SELECT *
FROM fidap-301014.fred.INFORMATION_SCHEMA.TABLES
""")

In [None]:
# get all table names
print(fred_meta.table_name)

In [None]:
#meta for each of the tables to figure out which might be useful 
categories_meta = fidap.sql("""
SELECT * EXCEPT(is_generated, generation_expression, is_stored, is_updatable)
FROM fidap-301014.fred.INFORMATION_SCHEMA.COLUMNS
WHERE table_name = 'categories'
""")

series_info_meta = fidap.sql("""
SELECT * EXCEPT(is_generated, generation_expression, is_stored, is_updatable)
FROM fidap-301014.fred.INFORMATION_SCHEMA.COLUMNS
WHERE table_name = 'series_info'
""")

series_observations_meta = fidap.sql("""
SELECT * EXCEPT(is_generated, generation_expression, is_stored, is_updatable)
FROM fidap-301014.fred.INFORMATION_SCHEMA.COLUMNS
WHERE table_name = 'series_observations'
""")

Suppose I want information on these few tables:  
1) Monthly Supply of Houses in the United States  
2) New Privately-Owned Housing Units Started: Total Units  
3) New Houses Sold by Stage of Construction, Total  
4) New One Family Houses for Sale in the United States  
5) Producer Price Index by Commodity: Lumber and Wood Products: Hardwood Lumber  
6) Producer Price Index by Commodity: Lumber and Wood Products: Oak and Maple Hardwood Flooring    
  
I can obtain some information from the `series_info` table of the `fred` project.  

In [5]:
housing_data = fidap.sql("""
SELECT *
FROM fidap-301014.fred.series_info
WHERE series_id IN ('HOUST', 'MSACSR', 'NHSDPTS', 'HNFSUSNSA', 'WPU0812', 'WPU08120401', 'COMPUTSA', 'UNDCONTSA', 'RELDCBMO27SBOG', 'H8B1026NCBCMG', 'MRTSSM444USN', 'MRTSSM4423XUSS', 'PRRESCONS', 'MPCV00XXS')
""")

Now that I know what I am looking at, I can begin querying in earnest. What I would really like to find out is whether we can eyeball any relationship between supply of houses and construction of new houses. A quick inspection of the data tells us that we can get most of the data starting from the 1960s except for `Producer Price Index by Commodity: Lumber and Wood Products: Oak and Maple Hardwood Flooring` where the earliest records were from 1984.   

In [None]:
# query for housing supply data 
monthly_supply_df = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'MSACSR';
""")

flooring_pricing = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'WPU080120401'
""")

# houses sold
new_houses_sold = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'NHSDPTS'
""")

The queries above were duds. Those series were not available in the table for some reason.

In [None]:
# id the series available 
available_series = fidap.sql("""
WITH all_series AS (
SELECT DISTINCT series_id 
FROM fidap-301014.fred.series_observations
)

SELECT * 
FROM fidap-301014.fred.series_info AS i
INNER JOIN all_series as s
ON s.series_id = i.series_id;
""")

A quick manual trawl through the data suggested that some datasets are not imported. But that is okay, we can work with others instead. It is a little weird because we know that they exist from the `series_observations` table.  

In [2]:
# housing construction has started
housing_started = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'HOUST';
""")

housing_started_nsa = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'HOUSTNSA';
""")

# lumber pricing index
hardwood_lumber_pricing = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'WPU0812'; 
""")

# real estate loans 
delta_real_estate_loans = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'H8B1026NCBCMG'; 
""")

# private residential spending
building_materials_expenditure = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'MRTSSM444USS'; 
""")

furnishing_expenditure = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'MRTSSM4423XUSS'; 
""")

# housing expenditure
private_residential_expenditure = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id = 'PRRESCONS'; 
""")



Let us start by seeing if there is a relationship between retail spending of building materials and home furnishings.

In [3]:
# joining the two datasets
building_furnishing = furnishing_expenditure.merge(building_materials_expenditure, on = 'date', how = 'inner')

# building the plot
furnishing_scatter_base = alt.Chart(building_furnishing,
                                    title = "Building Materials and Home Furnishings Retail Sales").mark_circle().encode(
    x = alt.X('value_y',
              title = 'Building Materials (Mil. of $)'),
    y = alt.Y('value_x',
              title = 'Home Furnishings and Appliances (Mil. of $)'))

furnishing_poly_fit = [
    furnishing_scatter_base.transform_regression(
        'value_y', 'value_x', method = 'poly', order = 3
    ).mark_line(color = '#fb8072').encode()
]

alt.layer(furnishing_scatter_base, furnishing_scatter_base.transform_loess('value_y', 'value_x').mark_line(color = '#fdc086'), *furnishing_poly_fit)

Generally, we can see that the relationship between expenditure on building materials and home furnishings can be roughly modelled as:  
  
${Home Furnishings} = {Building Materials}^{3}$  
  
This is represented by the red line. The orange is a locally estimated scatterplot smoothing (LOESS) function fitted to the distribution of the data. We can see that they are very similar to one another.   

It is often said that peak building season is during the warmer summer months. Can we see this in the data? Basically, do the number of housing units started increase in the summer months?

In [25]:
housing_started['month'] = housing_started['date'].dt.month
housing_started_nsa['month'] = housing_started_nsa['date'].dt.month

In [28]:
housing_started_chart = alt.Chart(housing_started).mark_circle(
    opacity = 0.4,
    color = '#fdb462'
).encode(
    x = alt.X(
        'month',
        title = 'Month'),
    y = alt.Y(
        'value',
        title = 'Thousands of Units')
).properties(
    title = {
        "text": "New Housing Units Started",
        "subtitle": "Seasonally Adjusted"
    }
)

housing_started_chart.configure_title(
    anchor = 'start'
)
alt.layer(housing_started_chart, housing_started_chart.transform_loess('month', 'value').mark_line(color = '#8dd3c7'))

Okay, the weird thing is that we do not seem to see any seasonal patterns. This might be because of the fact that it is a seasonally adjusted dataset, thus month-to-month variations may be tamped down. 

In [27]:
housing_started_nsa_chart = alt.Chart(housing_started_nsa).mark_circle(
    opacity = 0.4,
    color = '#fdb462'
).encode(
    x = alt.X(
        'month',
        title = 'Month'),
    y = alt.Y(
        'value',
        title = 'Thousands of Units')
).properties(
    title = {
        "text": "New Housing Units Started",
        "subtitle": "Not Seasonally Adjusted"
    }
)

housing_started_nsa_chart.configure_title(
    anchor = 'start'
)

alt.layer(housing_started_nsa_chart, housing_started_nsa_chart.transform_loess('month', 'value').mark_line(color = '#8dd3c7'))

When the seasonal adjustment is removed, the impact of seasons can be seen a lot more clearly. Do we see this same effect with home furnishing and appliances, and building materials?  

In [29]:
# getting unadjusted data
home_furnishing_building_materials_nsa = fidap.sql("""
SELECT series_id, date, value
FROM fidap-301014.fred.series_observations
WHERE series_id IN ('MRTSSM4423XUSN', 'MRTSSM444USN'); 
""")

# getting month
home_furnishing_building_materials_nsa['month'] = home_furnishing_building_materials_nsa['date'].dt.month

In [32]:
id_title_map = {
    'MRTSSM4423XUSN': 'Home Furnishing and Appliances',
    'MRTSSM444USN': 'Home Building Materials'
}
home_furnishing_building_materials_nsa['series'] = home_furnishing_building_materials_nsa['series_id'].map(id_title_map)

In [44]:
home_furnishing_building_nsa_scatter = alt.Chart(home_furnishing_building_materials_nsa).mark_circle(
    opacity = 0.4).encode(
    x = alt.X('month', title = 'Month'),
    y = alt.Y('value', title = 'Retail Sales (Mil of $)'),
    color = 'series'
).properties(
    title = {
        "text": "Retail Sales of Furniture and Building Materials",
        "subtitle": "Not Seasonally Adjusted"
    })

home_furnishing_building_nsa_scatter.configure_title(anchor = 'start')
    
    
    
alt.layer(home_furnishing_building_nsa_scatter, home_furnishing_building_nsa_scatter.transform_loess('month', 'value', groupby = ['series']).mark_line())

Interestingly enough, we do not see the same seasonal spike in the summer months for home furnishing and appliances. Instead, the spike occurs at the end of the year. This is probably due to the Thanksgiving and the Christmas holiday season. 