# Interrogating building age distributions

This notebook is to explore the distribution of building ages in communities in Western Australia. 

In [None]:
%matplotlib inline

from os.path import join as pjoin
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

from ipywidgets import interact, fixed, Dropdown
import ipywidgets as widgets
import re
import seaborn as sns
sns.set_context("poster")
sns.set_style('darkgrid')

The source file `WA_Residential_Wind_Exposure_2018_TCRM.CSV` can be found in HPRM D2018-6256. Download a local version (by using the 'Supercopy' option when right-clicking on the record), and change the path to the appropriate folder.

In [None]:
inputFile = "C:\WorkSpace\data\derived\exposure\WA\WA_Residential_Wind_Exposure_2018_TCRM.CSV"
df = pd.read_csv(inputFile)

In [None]:
localities = sorted(list(pd.unique(df['SA2_NAME'])))

In [None]:
locdropdown = Dropdown(options=localities, description="Locality")
ages = sorted(list(pd.unique(df['YEAR_BUILT'])))

In [None]:
def plotAgeDist(df, locality):
    fig, ax = plt.subplots(1, 2, figsize=(16,8))
    locdf = df[df['SA2_NAME'] == locality]
    sns.countplot(x="YEAR_BUILT", data=locdf, order=ages, ax=ax[0],
                 palette=sns.color_palette("Set2", 8))

    ax[0].set_xlabel("Year built")
    ax[0].set_ylabel("Number")
    #locs0, labels0 = fig.xticks()
    plt.setp(ax[0].get_xticklabels(), rotation=90)
    ax[0].set_title("{0} - {1:,} residential buildings".format(locality, len(locdf.index)))
    
    suburblist = locdf[locdf['SUBURB'].notnull()]['SUBURB']
    suburbs = sorted(list(pd.unique(suburblist)))
    sns.countplot(x='SUBURB', hue='YEAR_BUILT', data=locdf, order=suburbs, hue_order=ages,
                  palette=sns.color_palette("Set2", 8),ax=ax[1])
    ax[1].set_xlabel("Suburb")
    ax[1].set_ylabel("Number")
    labels = [item.get_text() for item in ax[1].get_xticklabels()]
    
    labels = [l.replace(' ', '\n') for l in labels]

    ax[1].set_xticklabels(labels)
    locs, labels = plt.xticks()

    plt.setp(labels, rotation=90)
    l = ax[1].legend(title="Year built", ncol=2)
    fig.tight_layout()
    plt.show()

In [None]:
interact(plotAgeDist, df=fixed(df), locality=locdropdown)

There's two aspects to the age distribution - communities where there has been substantial growth since the last significant cyclone, and communities with a large proportion of older (pre-1980) era construction. 

TODO: 
1. Add a chart that ranks the localities by proportion of a selected age group. The list of age groups is already compiled (`ages`), just need to do the calculations to get proportions for the specified age group.
2. Add another figure that plots the predominant age group for each suburb in the locality. If there's a spatial layer of the boundaries for `SUBURB_2015`, then one could plot up a categorised map of the suburbs based on predominant age group.

In [None]:
def plotBySuburb(df, locality):
    fig, ax = plt.subplots(1, 1, figsize=(16,12))
    locdf = df[df['SA2_NAME'] == locality]
    suburblist = locdf[locdf['SUBURB'].notnull()]['SUBURB']
    suburbs = sorted(list(pd.unique(suburblist)))
    sns.countplot(x='SUBURB', hue='YEAR_BUILT', data=locdf, order=suburbs, hue_order=ages,
                  palette=sns.color_palette("Set2", 8),ax=ax)
    ax.set_xlabel("Suburb")
    ax.set_ylabel("Number")
    locs, labels = plt.xticks()
    plt.setp(labels, rotation=90)
    l = ax.legend(title="Year built", ncol=2)
    plt.show()

In [None]:
locdropdown2 = Dropdown(options=localities, description="Locality")
interact(plotBySuburb, df=fixed(df), locality=locdropdown2)

For the Perth region, we perform the analysis at a larger aggregation, due to the number of suburbs that make up the Greater Perth area.

In [None]:
urbanareas = sorted(list(pd.unique(df['UCL_NAME'])))[1:]
cities = sorted(list(pd.unique(df['SA2_NAME'])))


In [None]:
regex = re.compile(r"\([A-Z]\)")
def plotAgeDistCity(df, locality):
    fig, ax = plt.subplots(1, 2, figsize=(16,8))
    locdf = df[df['UCL_NAME'] == locality]
    sns.countplot(x="YEAR_BUILT", data=locdf, order=ages, ax=ax[0],
                 palette=sns.color_palette("Set2", 8))

    ax[0].set_xlabel("Year built")
    ax[0].set_ylabel("Number")
    #locs0, labels0 = fig.xticks()
    plt.setp(ax[0].get_xticklabels(), rotation=90)
    ax[0].set_title("{0} - {1:,} residential buildings".format(locality, len(locdf.index)))
    
    suburblist = locdf[locdf['SA2_NAME'].notnull()]['SA2_NAME']
    suburbs = sorted(list(pd.unique(suburblist)))
    sns.countplot(x='SA2_NAME', hue='YEAR_BUILT', data=locdf, order=suburbs, hue_order=ages,
                  palette=sns.color_palette("Set2", 8),ax=ax[1])
    ax[1].set_xlabel("Suburb")
    ax[1].set_ylabel("Number")
    labels = [item.get_text() for item in ax[1].get_xticklabels()]
    
    labels = [re.sub(regex, "", l) for l in labels]

    ax[1].set_xticklabels(labels)
    locs, labels = plt.xticks()

    plt.setp(labels, rotation=90)
    l = ax[1].legend(title="Year built", ncol=2)
    fig.tight_layout()
    plt.show()

In [None]:
locdropdown3 = Dropdown(options=urbanareas, description="Urban area")
interact(plotAgeDistCity, df=fixed(df), locality=locdropdown3)

In [None]:
def plotByCity(df, locality):
    fig, ax = plt.subplots(1, 1, figsize=(16,12))
    locdf = df[df['UCL_NAME'] == locality]
    suburblist = locdf[locdf['LGA_NAME'].notnull()]['LGA_NAME']
    suburbs = sorted(list(pd.unique(suburblist)))
    sns.countplot(x='LGA_NAME', hue='YEAR_BUILT', data=locdf, order=suburbs, hue_order=ages,
                  palette=sns.color_palette("Set2", 8),ax=ax)
    ax.set_xlabel("Local Government Area")
    ax.set_ylabel("Number")
    
    labels = [item.get_text() for item in ax.get_xticklabels()]
    labels = [re.sub(regex, "", l) for l in labels]
    ax.set_xticklabels(labels)
    locs, labels = plt.xticks()
    plt.setp(labels, rotation=90)
    l = ax.legend(title="Year built", ncol=2)
    plt.show()
    
locdropdown4 = Dropdown(options=urbanareas, description="Region")
interact(plotByCity, df=fixed(df), locality=locdropdown4)

In [None]:
locdf = df[df['UCL_NAME'] == 'Geraldton']
suburbs = locdf.groupby(['SUBURB', 'YEAR_BUILT']).size()
ageprofile = suburbs.groupby(level=0).apply(lambda x: 100*x/float(x.sum()))

f,ax = plt.subplots()
cmap = ListedColormap(sns.color_palette("Set2", 7).as_hex())
ageprofile.unstack().plot(kind='barh', stacked=True, ax=ax, cmap=cmap)
ax.set_xlabel("Percentage")
ax.set_ylabel("Suburb")
labels = [item.get_text() for item in ax.get_yticklabels()]
labels = [l.title() for l in labels]
ax.set_yticklabels(labels)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1),
          fancybox=True, shadow=True, ncol=7)

In [None]:
urbanareas = sorted(list(pd.unique(df['UCL_NAME'])))[1:]
cities = sorted(list(pd.unique(df['SA2_NAME'])))
def plotByCity(df, locality):
    locdf = df[df['UCL_NAME'] == locality]
    suburbs = locdf.groupby(['SUBURB', 'YEAR_BUILT']).size()
    ageprofile = suburbs.groupby(level=0).apply(lambda x: 100*x/float(x.sum()))
    nsuburbs=len(suburbs)
    f,ax = plt.subplots() #figsize=(12, nsuburbs/12))
    cmap = ListedColormap(sns.color_palette("Set2", 7).as_hex())
    ageprofile.unstack().plot(kind='barh', stacked=True, ax=ax, cmap=cmap)
    ax.set_xlabel("Percentage")
    ax.set_xlim((0,100))
    ax.set_ylabel("Suburb")
    labels = [item.get_text() for item in ax.get_yticklabels()]
    ax.set_yticklabels([re.sub(regex, "", l).title() for l in labels])
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1),
              fancybox=True, shadow=True, ncol=7)
    plt.show()
    
locdropdown5 = Dropdown(options=urbanareas, description="Region")
interact(plotByCity, df=fixed(df), locality=locdropdown5)

In [None]:
sa2 = df.groupby(['SA2_CODE','YEAR_BUILT',]).size().unstack(level=1)
#sa2ageprofile = sa2.groupby(level=0).apply(lambda x: 100*x/float(x.sum()))

In [None]:
sa2

In [None]:
sa2['PROP_1980'] = sa2[ages[:4]].sum(axis=1)/sa2[ages].sum(axis=1)
#sa2['TOTAL'] = sa2[ages].sum(axis=1)

In [None]:
sa2.fillna(0).to_csv("C:\WorkSpace\data\derived\exposure\WA\SA2_building_age.csv")

In [None]:
sa1 = df.groupby(['SA1_CODE', 'YEAR_BUILT']).size()
sa1ageprofile = sa1.groupby(level=0).apply(lambda x: 100*x/float(x.sum()))
sa1ageprofile

In [None]:
sa1 = df.groupby(['SA1_CODE', 'YEAR_BUILT']).size().unstack(level=1)
sa1['PROP_1980'] = sa1[ages[:4]].sum(axis=1)/sa1[ages].sum(axis=1)
sa1.fillna(0).to_csv("C:\WorkSpace\data\derived\exposure\WA\SA1_building_age.csv")