# Setup

In [0]:
!pip install OWSLib
!pip install geopandas

import os
from pathlib import Path
from getpass import getpass
from urllib.request import urlretrieve
from owslib.wfs import WebFeatureService
import geopandas as gpd
from IPython.display import display

# Current working directory
%cd /content/drive/My Drive/unimelb-cluster-and-cloud-computing-comp90024-2020-sm1/city_analytics/geospatial

# Set environment variables
os.environ["AURIN_USERNAME"] = input("Please enter your AURIN username: ")
os.environ["AURIN_PASSWORD"] = getpass("Please enter your AURIN password: ")

# Global constants
DATADIR = Path("../geodata")
URL = "http://openapi.aurin.org.au/wfs"  # WFS source
LAYERNAMES = ["aurin:datasource-AU_Govt_ABS-UoM_AURIN_DB_3_sa4_aggregated_pop_and_dwelling_counts_census_2016"]  # typename (dataset ID)

# Download basemaps

In [0]:
def retrieve(url, filename, force=False):
  """
  Download url to filename
  url: link to network object
  filename: local file
  force: if True, overwrite existing filename
  """

  filename = Path(filename).resolve()  # to avoid symbolic link
  if not force and filename.exists():
    return filename

  filename.parent.mkdir(parents=True, exist_ok=True)  # make dirs if not exist
  filename, headers = urlretrieve(url, filename)

  return filename

# Download WFS data

In [8]:
def download_wfs(url, version="1.1.0", typename="", outputFormat="json",
                 filename="", force=False):
  """
  Download Web Feature Service (WFS) data to filename
  url: WFS url
  version: WFS version
  typename: layer name
  outputFormat: format of response data
  fname: output file. Defaults to typename, stored in data directory
  force: if True, overwrite existing filename
  """

  filename = Path(filename or f"{DATADIR}/{typename}.{outputFormat}").resolve()  # to avoid symbolic link
  if not force and filename.exists():
    return filename

  # Connect service
  wfs = WebFeatureService(url, version=version,
                          username=os.environ["AURIN_USERNAME"],
                          password=os.environ["AURIN_PASSWORD"])
  
  # Request data
  r = wfs.getfeature(typename=typename, outputFormat=outputFormat)

  # Save data
  filename.parent.mkdir(parents=True, exist_ok=True)  # make dirs if not exist
  filename.write_bytes(r.read())

  return filename

wfs_files = [download_wfs(URL, typename=name) for name in LAYERNAMES]

[PosixPath('/content/drive/My Drive/unimelb-cluster-and-cloud-computing-comp90024-2020-sm1/city_analytics/geodata/aurin:datasource-AU_Govt_ABS-UoM_AURIN_DB_3_sa4_aggregated_pop_and_dwelling_counts_census_2016.json')]

# Simplify geometry of WFS data (reduce size)

In [0]:
def simplify(filename, tolerance=0.01, preserve_topology=True):
  """
  Simplify geometry of filename to reduce size
  """
  
  # Clean data
  data = gpd.read_file(filename)
  data.dropna(subset=[data.geometry.name], inplace=True)  # remove na values from geometry

  data.geometry = data.geometry.simplify(tolerance, preserve_topology)  # simplify geometry
  data.to_file(filename, driver="GeoJSON")  # export data

  return filename

wfs_files_small = list(map(simplify, wfs_files))  # reduced size
data = gpd.read_file(wfs_files_small[0])
print(data.info())
display(data.head())
data.plot()

# References
https://data.aurin.org.au/dataset/au-govt-abs-sa4-aggregated-pop-and-dwelling-counts-census-2016-sa4-2016  
