In [133]:
import re

import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from graphly.api_client import SparqlClient

# TO DO: write sparql queries to limit data manipulation

In [134]:
ENDPOINT = "https://ld.zazuko.com/query/"

sparql = SparqlClient(ENDPOINT)
sparql.add_prefixes({
    "schema": "<http://schema.org/>",
    "cube": "<https://cube.link/>",
    "property": "<https://ld.stadt-zuerich.ch/statistics/property/>",
    "measure": "<https://ld.stadt-zuerich.ch/statistics/measure/>",
    "skos": "<http://www.w3.org/2004/02/skos/core#>",
    "ssz": "<https://ld.stadt-zuerich.ch/statistics/>"
})

In [135]:
query = """
    SELECT ?time ?place ?rooms ?price
    FROM <https://lindas.admin.ch/stadtzuerich/stat>
    WHERE {
      ssz:QMP-EIG-HAA-OBJ-ZIM a cube:Cube;
                 cube:observationSet/cube:observation ?observation.   
      ?observation property:TIME ?time ;
                           property:RAUM ?place_uri;
                           property:ZIM/schema:name ?rooms;
                           measure:QMP ?price .
      ?place_uri skos:inScheme <https://ld.stadt-zuerich.ch/statistics/scheme/Kreis> ;
             schema:name ?place .
      FILTER regex(str(?place),"ab|Stadtgebiet vor")
      FILTER (?price > 0)
    }
    ORDER BY ?time
"""

df = sparql.send_query(query)
df.head()

Unnamed: 0,time,place,rooms,price
0,2009-12-31,Kreis 3 (ab 1915),"4- und 4,5-Zimmer Wohnung",7958.0
1,2009-12-31,Kreis 4 (ab 1915),"4- und 4,5-Zimmer Wohnung",5647.0
2,2009-12-31,Kreis 5 (ab 1915),"4- und 4,5-Zimmer Wohnung",8993.0
3,2009-12-31,Kreis 9 (ab 1934),"4- und 4,5-Zimmer Wohnung",7653.0
4,2009-12-31,Kreis 8 (ab 1915),"4- und 4,5-Zimmer Wohnung",11195.0


In [136]:
df.place.unique()

array(['Kreis 3 (ab 1915)', 'Kreis 4 (ab 1915)', 'Kreis 5 (ab 1915)',
       'Kreis 9 (ab 1934)', 'Kreis 8 (ab 1915)', 'Kreis 7 (ab 1934)',
       'Kreis 6 (ab 1934)', 'Kreis 10 (ab 1934)', 'Kreis 2 (ab 1893)',
       'Kreis 11 (ab 1970)',
       'altes Quartier Schwamendingen (1934-1969); Kreis 12 (ab 1970)',
       'Kreis 1 (Stadtgebiet vor 1893)'], dtype=object)

In [137]:
df.rooms.unique()

array(['4- und 4,5-Zimmer Wohnung', '6- und 6,5-Zimmer Wohnung',
       '5- und 5,5-Zimmer Wohnung', '1- und 1,5-Zimmer Wohnung',
       '8- und mehr-Zimmer Wohnung', '7- und 7,5-Zimmer Wohnung',
       '2- und 2,5-Zimmer Wohnung', '3- und 3,5-Zimmer Wohnung'],
      dtype=object)

In [138]:
df.place = df.place.apply(lambda x: re.findall('Kreis \d+', x)[0])
df.rooms = df.rooms.apply(lambda x: int(re.findall('\d+', x)[0]))
df.head()

Unnamed: 0,time,place,rooms,price
0,2009-12-31,Kreis 3,4,7958.0
1,2009-12-31,Kreis 4,4,5647.0
2,2009-12-31,Kreis 5,4,8993.0
3,2009-12-31,Kreis 9,4,7653.0
4,2009-12-31,Kreis 8,4,11195.0


In [97]:
# PRICE + ROOMS => histogram
plotting_df = df[["rooms", "price"]][df.time == df.time.max()].groupby(["rooms"]).mean().astype(int).sort_values(by="rooms").reset_index()
plotting_df

Unnamed: 0,rooms,price
0,1,12706
1,2,12841
2,3,12618
3,4,12138
4,5,13792
5,6,13361
6,7,12146
7,8,20804


In [102]:
fig = px.bar(plotting_df, x="rooms", y="price", title="Housing prices in Zurich", labels={'price':'CHF per m2'})
fig.show()

In [103]:
# PRICE + DISTRICTS => barplot
plotting_df = df[["place", "price"]][df.time == df.time.max()].groupby(["place"]).mean().astype(int).sort_values(by="price").reset_index()
plotting_df

Unnamed: 0,place,price
0,Kreis 12,8748
1,Kreis 9,9995
2,Kreis 4,10620
3,Kreis 11,11051
4,Kreis 10,11281
5,Kreis 3,12380
6,Kreis 1,13466
7,Kreis 6,13818
8,Kreis 5,14176
9,Kreis 2,14624


In [105]:
fig = px.bar(plotting_df, x="place", y="price", title="Housing prices in Zurich", labels={'price':'CHF per m2', "place": "district"})
fig.show()

In [225]:
# PRICE + ROOM + PLACE => ridgeline; histogram overlay; heatmap
plot_df = df[["place", "rooms", "price"]][df.time == df.time.max()]
plot_df = plot_df[plot_df.rooms <= 4]
plot_df["avg_price"] = plot_df[["price", "place"]].groupby(["place"]).transform(lambda x: sum(x)/len(x))
plot_df = plot_df.sort_values(by=["avg_price", "rooms"])

plotting_df.head()

Unnamed: 0,place,rooms,price,avg_price
550,Kreis 12,1,7826.0,8748.5
566,Kreis 12,2,8404.0,8748.5
531,Kreis 12,3,8987.0,8748.5
534,Kreis 12,4,9777.0,8748.5
558,Kreis 4,1,9955.0,10620.75


In [222]:
fig = make_subplots(rows=3, cols=4, subplot_titles=dff["place"].unique(), shared_yaxes=True, y_title='CHF per m2', x_title='rooms', vertical_spacing=0.1)

for i, district in enumerate(dff["place"].unique()):
    
    row = i//4 + 1
    col = i%4 + 1
    subset = dff[dff["place"] == district]
    fig.append_trace(go.Bar(
        x=subset["rooms"],
        y=subset["price"],
        name=district, 
        marker_color=px.colors.qualitative.Dark24[0]
    ), row=row, col=col)

fig.update_layout(height=800, width=1000, title={"text": "Housing prices in Zurich", "x": 0.5}, showlegend=False)
fig.update_yaxes(range=[0,20000])
fig.show()

In [286]:
# PRICE + TIME => ts
# PRICE + TIME + ROOMS => ts
# PRICE + TIME + PLACE => ts

plot_df = df[["time", "price"]].groupby(["time"]).mean().reset_index()
plot_df.head()

Unnamed: 0,time,price
0,2009-12-31,8702.231884
1,2010-12-31,9206.983871
2,2011-12-31,10061.363636
3,2012-12-31,11442.333333
4,2013-12-31,11096.770492


In [287]:
fig = px.line(plot_df, x='time', y="price")
fig.show()

In [288]:
plot_df = pd.pivot_table(df, index="time", columns="rooms", values="price", aggfunc=np.mean).reset_index()
plot_df

rooms,time,1,2,3,4,5,6,7,8
0,2009-12-31,8262.818182,8375.75,8570.636364,8296.636364,9629.4,9994.666667,7241.25,9571.75
1,2010-12-31,9363.8,8776.272727,9397.666667,8553.818182,8574.4,9704.2,10356.0,18815.0
2,2011-12-31,8950.727273,12086.166667,8647.166667,9840.833333,9371.1,10297.0,12704.0,14049.0
3,2012-12-31,10207.0,10508.916667,10583.0,11544.909091,11655.4,13159.857143,18831.0,13346.0
4,2013-12-31,10805.571429,10163.181818,11118.0,11710.75,11023.0,12801.8,11085.0,8092.0
5,2014-12-31,11492.3,10387.666667,10257.0,11924.5,11145.777778,12105.0,14548.0,10094.0
6,2015-12-31,9958.666667,10774.25,11555.916667,12144.666667,12305.909091,12760.428571,16236.0,
7,2016-12-31,11721.6,12258.363636,11409.545455,12904.0,11339.125,12756.8,16469.666667,7570.0
8,2017-12-31,12706.363636,12841.166667,12618.0,12138.833333,13792.444444,13361.5,12146.0,20804.0


In [337]:
fig = make_subplots(rows=3, cols=1, y_title='CHF per m2')

for i, j in enumerate([2,4,6]):
    
    fig.append_trace(go.Scatter(
        x=plot_df["time"],
        y=plot_df[j],
        name="Rooms: {}".format(j), 
        marker_color=px.colors.qualitative.Dark24[i]
    ), row=i+1, col=1)

fig.update_layout(title={"text": "Housing prices in Zurich", "x": 0.5}, showlegend=True)
fig.update_yaxes(range=[8000,14000])
fig.show()