In [1]:
import pandas as pd
import folium
import datetime

## Introduction
This notebook presents different techniques on visualizing data using [folium](https://python-visualization.github.io/folium/) that allows to manipulate data in Python and then visualize it in a Leaflet map.

Data that has been analysed is about property prices in Ireland and can be downloaded from [Property Price Register offical site](https://www.propertypriceregister.ie/)

The aim was to use Folium Choropleth Map with a slider that would show how property prices have evolved month over month and what were the most expensive areas.

### Data Loading & Cleanup

Save data obtained from Propert Price Registrar as 'PPR-ALL.csv' and load data into dataframe.

In [28]:
df = pd.read_csv("PPR-ALL.csv", ',', encoding='utf-8')

In [29]:
df.shape

(369860, 9)

In [30]:
df.columns

Index(['Date of Sale (dd/mm/yyyy)', 'Address', 'Postal Code', 'County',
       'Price', 'Not Full Market Price', 'VAT Exclusive',
       'Description of Property', 'Property Size Description'],
      dtype='object')

Format price column

In [31]:
ascii(df['Price'][0][0])

"'\\ufffd'"

In [32]:
df['Price'] = df['Price'].apply(lambda x : float(x.replace(u'\ufffd','').replace(',', '')))

In [6]:
#df.dropna(subset = ['Price'], inplace = True)

All properties are categorized via Description and Size. As for geolocation we only know the County of the property

In [33]:
df['Description of Property'].unique()

array(['Second-Hand Dwelling house /Apartment',
       'New Dwelling house /Apartment',
       'Second-Hannd Dwelling house /Apartment',
       'Teach/�ras�n C�naithe Ath�imhe', 'Teach/�ras�n C�naithe Nua',
       'Teach/?ras?n C?naithe Nua'], dtype=object)

In [37]:
def fix_description(desc):
    if 'Second-' in desc:
        return "second-hand house/apartment"
    elif 'Teach' in desc:
        return 'Teach/'
    else:
        return desc

Fix formatting of property description

In [38]:
df['Description of Property'] = df['Description of Property'].apply(lambda x : fix_description(x))

In [34]:
df['Property Size Description'].unique()

array([nan,
       'greater than or equal to 38 sq metres and less than 125 sq metres',
       'greater than 125 sq metres', 'less than 38 sq metres',
       'greater than or equal to 125 sq metres',
       'n�os m� n� n� cothrom le 38 m�adar cearnach agus n�os l� n� 125 m�adar cearnach',
       'n?os l? n? 38 m?adar cearnach'], dtype=object)

In [35]:
df['County'].unique()

array(['Dublin', 'Laois', 'Meath', 'Kilkenny', 'Limerick', 'Carlow',
       'Cork', 'Clare', 'Sligo', 'Cavan', 'Tipperary', 'Wicklow',
       'Roscommon', 'Wexford', 'Mayo', 'Donegal', 'Longford', 'Galway',
       'Offaly', 'Kildare', 'Waterford', 'Louth', 'Kerry', 'Westmeath',
       'Monaghan', 'Leitrim'], dtype=object)

DataFrame.groupby(by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, **kwargs)[source]¶

Format date column

In [39]:
df['date'] = df['Date of Sale (dd/mm/yyyy)'].apply (lambda x:  datetime.datetime.strptime(x,'%d/%m/%Y'))

In [40]:
df['year'] = df['date'].apply(lambda x: x.year)
df['month'] = df['date'].apply(lambda x: x.month)

In [41]:
df.head(5)

Unnamed: 0,Date of Sale (dd/mm/yyyy),Address,Postal Code,County,Price,Not Full Market Price,VAT Exclusive,Description of Property,Property Size Description,date,year,month
0,01/01/2010,"5 Braemor Drive, Churchtown, Co.Dublin",,Dublin,343000.0,No,No,second-hand house/apartment,,2010-01-01,2010,1
1,03/01/2010,"134 Ashewood Walk, Summerhill Lane, Portlaoise",,Laois,185000.0,No,Yes,New Dwelling house /Apartment,greater than or equal to 38 sq metres and less...,2010-01-03,2010,1
2,04/01/2010,"1 Meadow Avenue, Dundrum, Dublin 14",,Dublin,438500.0,No,No,second-hand house/apartment,,2010-01-04,2010,1
3,04/01/2010,"1 The Haven, Mornington",,Meath,400000.0,No,No,second-hand house/apartment,,2010-01-04,2010,1
4,04/01/2010,"11 Melville Heights, Kilkenny",,Kilkenny,160000.0,No,No,second-hand house/apartment,,2010-01-04,2010,1


### CHOROPLETH MAP BY COUNTY 

To make choropleth map with slider only price and county information is needed, prices will be aggregated by month

In [42]:
by_county = df[['County', 'Price', 'year', 'month']]

In [43]:
by_county = by_county.groupby(['County', 'year', 'month']).agg('Price').sum()

In [44]:
by_county.min(), by_county.max()

(333853.99, 983975061.2300009)

Round to millions

In [30]:
by_county = by_county.astype(int)/1000000 

In [31]:
by_county = by_county.reset_index()

In [32]:
by_county.max()

County    Wicklow
year         2019
month          12
Price     983.975
dtype: object

In [33]:
### check the quantiles by house price

In [34]:
i = by_county['Price'].quantile([0.1, 0.25, 0.5, 0.75, 1])
j = by_county['Price'].agg(['count'])

pd.concat([i, j], 1)

Unnamed: 0,Price,Price.1
0.1,2.54344,
0.25,4.625162,
0.5,9.216138,
0.75,19.15844,
1.0,983.975061,
count,,2964.0


In [24]:
by_county.head()

Unnamed: 0,County,year,month,Price
0,Carlow,2010,1,2.541683
1,Carlow,2010,2,2.126469
2,Carlow,2010,3,2.203867
3,Carlow,2010,4,2.5855
4,Carlow,2010,5,3.496886


In [35]:
test = by_county[(by_county['year'] == 2010) &(by_county['month'] == 1)]
test2 = by_county[(by_county['year'] == 2010) &(by_county['month'] == 2)]

In [36]:
test = test[['County', 'Price']]
test2 = test2[['County', 'Price']]

In [37]:
test=test.reset_index()
test2 = test2.reset_index()

In [38]:
test2.head()

Unnamed: 0,index,County,Price
0,1,Carlow,2.126469
1,115,Cavan,3.297104
2,229,Clare,6.03785
3,343,Cork,42.713664
4,457,Donegal,5.872675


In [39]:
bins = list(test['Price'].quantile([0, 0.25, 0.5, 0.75, 1]))

In [40]:
#bins = list(test['Price'].quantile([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,0.8, 0.9,1]))

In [41]:
bins

[0.601469, 2.42559725, 4.9281310000000005, 7.729165500000001, 137.715197]

In [42]:
state_geo = f'ireland.json'

m = folium.Map(location=[53.305494, -7.737649], zoom_start=6)

folium.Choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=test,
    columns=['County', 'Price'],
    key_on='feature.properties.id',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    nan_fill_color='yellow',
    legend_name='Total property price in Mil',
    bins = bins
).add_to(m)

folium.LayerControl().add_to(m)


<folium.map.LayerControl at 0x7efefe1bdb00>

In [43]:
m

## Choropleth with Time Slider

adding time slider Example:
    https://github.com/python-visualization/folium/blob/master/examples/TimeSliderChoropleth.ipynb

In [44]:
from folium import plugins
plugins.TimeSliderChoropleth

folium.plugins.time_slider_choropleth.TimeSliderChoropleth

##### Creating styledictionary

In [45]:
df['date'].min()

Timestamp('2010-01-01 00:00:00')

In [46]:
df['date'].max()

Timestamp('2019-06-14 00:00:00')

In [47]:
min_color, max_color = by_county['Price'].min(), by_county['Price'].max()

In [48]:
min_color, max_color

(0.333853, 983.975061)

In [49]:
from branca.colormap import linear


cmap = linear.PuRd_09.scale(0, 100)


In [50]:
data = by_county.copy()

In [51]:
data.columns

Index(['County', 'year', 'month', 'Price'], dtype='object')

In [53]:
data['color'] = data['Price'].apply(cmap)

In [54]:
data['County'].unique()

array(['Carlow', 'Cavan', 'Clare', 'Cork', 'Donegal', 'Dublin', 'Galway',
       'Kerry', 'Kildare', 'Kilkenny', 'Laois', 'Leitrim', 'Limerick',
       'Longford', 'Louth', 'Mayo', 'Meath', 'Monaghan', 'Offaly',
       'Roscommon', 'Sligo', 'Tipperary', 'Waterford', 'Westmeath',
       'Wexford', 'Wicklow'], dtype=object)

In [55]:
data[data['County'] == 'Carlow'].head()

Unnamed: 0,County,year,month,Price,color
0,Carlow,2010,1,2.541683,#f4f1f7
1,Carlow,2010,2,2.126469,#f5f1f8
2,Carlow,2010,3,2.203867,#f5f1f8
3,Carlow,2010,4,2.5855,#f4f1f7
4,Carlow,2010,5,3.496886,#f3eff7


In [56]:
styledata = {}
for county in data['County'].unique():
    county_data = data[data['County'] == county]
    #county_data = county_data.reset_index()
    county_data['epoch'] = county_data.apply(lambda x : int(datetime.datetime(x['year'], x['month'], 1).timestamp()), axis=1)
    county_data['epoch']= county_data['epoch'].astype('U10')
    county_data['opacity'] = 1
    county_data = county_data[['epoch', 'color', 'opacity']]
    county_data.set_index('epoch',inplace=True)
    styledata[county] = county_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [57]:
styledata['Carlow'].head()

Unnamed: 0_level_0,color,opacity
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1
1262300400,#f4f1f7,1
1264978800,#f5f1f8,1
1267398000,#f5f1f8,1
1270072800,#f4f1f7,1
1272664800,#f3eff7,1


In [58]:
styledict = {
    str(country): data.to_dict(orient='index') for
    country, data in styledata.items()
}

In [60]:
styledict2 = {}
styledict2['Carlow'] = styledict.get('Carlow')

In [61]:
styledict2

{'Carlow': {'1262300400': {'color': '#f4f1f7', 'opacity': 1},
  '1264978800': {'color': '#f5f1f8', 'opacity': 1},
  '1267398000': {'color': '#f5f1f8', 'opacity': 1},
  '1270072800': {'color': '#f4f1f7', 'opacity': 1},
  '1272664800': {'color': '#f3eff7', 'opacity': 1},
  '1275343200': {'color': '#f5f2f8', 'opacity': 1},
  '1277935200': {'color': '#f3eff7', 'opacity': 1},
  '1280613600': {'color': '#f2eef6', 'opacity': 1},
  '1283292000': {'color': '#f2eef6', 'opacity': 1},
  '1285884000': {'color': '#f3f0f7', 'opacity': 1},
  '1288566000': {'color': '#f0ecf5', 'opacity': 1},
  '1291158000': {'color': '#f1edf5', 'opacity': 1},
  '1293836400': {'color': '#f5f1f8', 'opacity': 1},
  '1296514800': {'color': '#f4f1f8', 'opacity': 1},
  '1298934000': {'color': '#f5f1f8', 'opacity': 1},
  '1301608800': {'color': '#f6f2f8', 'opacity': 1},
  '1304200800': {'color': '#f5f2f8', 'opacity': 1},
  '1306879200': {'color': '#f4f0f7', 'opacity': 1},
  '1309471200': {'color': '#f4f1f8', 'opacity': 1},
  

In [62]:
import json
with open('ireland.json') as json_file:
    geo_data = json.load(json_file)

In [63]:
for county in geo_data['features']:
    county['id'] = county['properties']['id']

In [64]:
#geo_data['features'] = [geo_data['features'][0]]

In [65]:
geo_data['features']


[{'geometry': {'coordinates': [[[-6.592415, 52.708085],
     [-6.614788, 52.654218],
     [-6.71789, 52.633128],
     [-6.810019, 52.481218],
     [-6.916185, 52.446304],
     [-6.914688, 52.590519],
     [-6.975609, 52.653791],
     [-6.952951, 52.705377],
     [-7.090468, 52.732311],
     [-7.072441, 52.797934],
     [-7.065957, 52.822872],
     [-6.915828, 52.856717],
     [-6.735489, 52.884007],
     [-6.712332, 52.924335],
     [-6.463234, 52.893982],
     [-6.51247, 52.826791],
     [-6.631176, 52.831494],
     [-6.592415, 52.708085]]],
   'type': 'Polygon'},
  'id': 'Carlow',
  'properties': {'id': 'Carlow'},
  'type': 'Feature'},
 {'geometry': {'coordinates': [[[-7.311848, 54.114105],
     [-7.188226, 54.079548],
     [-7.03596, 54.08653],
     [-6.753231, 53.901916],
     [-6.753302, 53.900348],
     [-6.779095, 53.873842],
     [-6.942976, 53.874982],
     [-6.95573, 53.766893],
     [-7.275153, 53.783851],
     [-7.28235, 53.795394],
     [-7.394715, 53.781713],
     [-7.471

In [66]:
geo_data['features'][0]['id'] = 'Carlow'

In [67]:
geo_data = json.dumps(geo_data)

# TimestampedWmsTileLayers

In [62]:
plugins.TimestampedWmsTileLayers

folium.plugins.timestamped_wmstilelayer.TimestampedWmsTileLayers

#### Exploring WMS with OWSLib
https://geopython.github.io/OWSLib/

In [88]:
from owslib.wms import WebMapService
url='https://www.gebco.net/data_and_products/gebco_web_services/web_map_service/mapserv'
wms = WebMapService(url, version='1.1.1')



In [89]:
wms.identification.type

'OGC:WMS'

In [90]:
name = wms.identification.title

In [91]:
from folium.plugins import TimestampedWmsTileLayers
from folium import WmsTileLayer

In [94]:
dict(wms.contents)

{'GEBCO_08_GRID': <owslib.map.wms111.ContentMetadata at 0x7efed6a45400>,
 'GEBCO_Grid': <owslib.map.wms111.ContentMetadata at 0x7efeda584748>,
 'GEBCO_LATEST': <owslib.map.wms111.ContentMetadata at 0x7efed6a455f8>,
 'GEBCO_LATEST_2': <owslib.map.wms111.ContentMetadata at 0x7efed6a45da0>,
 'GEBCO_LATEST_SID': <owslib.map.wms111.ContentMetadata at 0x7efed6a456a0>}

In [95]:
layer = 'GEBCO_Grid'
wms = dict(wms.contents)[layer]

name = wms.title

lon = (wms.boundingBox[0] + wms.boundingBox[2]) / 2.
lat = (wms.boundingBox[1] + wms.boundingBox[3]) / 2.
center = lat, lon



In [99]:
wms.boundingBox

(-180.0, -90.0, 360.0, 90.0, 'EPSG:4326')

In [105]:
df['date'].min().isoformat()

'2010-01-01T00:00:00'

In [106]:
time_interval = '{0}/{1}'.format(
    df['date'].min().isoformat(),
    df['date'].max().isoformat()
)
style = 'boxfill/sst_36'

if style not in wms.styles:
    style = None

In [107]:
m = folium.Map(location=[53.305494, -7.737649], tiles='Stamen Toner',  zoom_start=6)

w = folium.raster_layers.WmsTileLayer(
    url=url,
    name=name,
    styles=style,
    fmt='image/png',
    transparent=True,
    layers=layer,
    overlay=True,
    COLORSCALERANGE='1.2,28',
)

w.add_to(m)

time = plugins.TimestampedWmsTileLayers(
    w,
    period='PT1H',
    time_interval=time_interval
)

time.add_to(m)

folium.LayerControl().add_to(m)

m

#### Summary 
This does not really work with the data I have because I would need to creata all tile layers day by day 

# HEAT MAP
inspired by https://towardsdatascience.com/data-101s-spatial-visualizations-and-analysis-in-python-with-folium-39730da2adf


requires lat long data

In [None]:
from folium.plugins import HeatMap
df_copy = df[df.month>4].copy()
df_copy['count'] = 1
base_map = generateBaseMap()
HeatMap(data=df_copy[['pickup_latitude', 'pickup_longitude', 'count']].groupby(['pickup_latitude', 'pickup_longitude']).sum().reset_index().values.tolist(), radius=8, max_zoom=13).add_to(base_map)

In [None]:
### fetch geocoordinates and use for heat map
Ideas on what to do next:
- prediction model on property prices ()