In [1]:
from ast import literal_eval

import folium
import folium.plugins
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [2]:
stations = pd.read_csv('data/bike_stations.csv', index_col=0)
# Turn stations to GeoDataFrame
# Create shapely Points from longitude (x) and latitude (y)
station_points = stations.apply(lambda x: Point(x.lon, x.lat), axis=1)
# Turn df to geopandas df, with the above points as the geometry
# Specify the original coordinate system (CRS), which is in degrees
stations = gpd.GeoDataFrame(stations, geometry=station_points, crs='epsg:4326')
# Turn CRS to Web Mercator, which is in meters
stations.to_crs(epsg=3857, inplace=True)

# Drop stations in Vantaa (no bike trip data)
stations.drop(stations.index[stations.city == 'Vantaa'], inplace=True)

In [3]:
# Read neighborhood locations for Helsinki and Espoo (XML/GML)
# neighborhood = kaupunginosa
helsinki = gpd.read_file('https://kartta.hel.fi/ws/geoserver/avoindata/wfs?request=GetFeature&typeName=avoindata:Kaupunginosajako')
espoo = gpd.read_file('https://kartat.espoo.fi/teklaogcweb/wfs.ashx?request=GetFeature&typeName=GIS:Kaupunginosat')
neighborhood_epsg = 3879
helsinki.set_crs(epsg=neighborhood_epsg, inplace=True)
espoo.set_crs(epsg=neighborhood_epsg, inplace=True);

# Preprocess Helsinki
hel = helsinki[['tunnus', 'nimi_fi', 'geometry']].copy()
hel.rename(columns={'nimi_fi': 'name', 'tunnus': 'number'}, inplace=True)
hel.name = hel.name.apply(str.lower)
hel.drop(hel[hel.number == 'Aluemeri'].index, inplace=True)
hel.set_index(hel.number.astype(int), inplace=True)
hel.drop('number', axis=1, inplace=True)
hel.sort_index(inplace=True)

# Preprocess Espoo
esp = espoo.drop(['gml_id', 'KAUPUNGINOSANNIMI_SE'], axis=1)
esp.rename(columns={'KAUPUNGINOSANNIMI_FI': 'name', 'KAUPUNGINOSANNUMERO': 'number'}, inplace=True)
esp.name = esp.name.apply(str.lower)
esp.set_index(esp.number.astype(int), inplace=True)
esp.drop('number', axis=1, inplace=True)
esp.sort_index(inplace=True)

# Join neighborhoods as one
nei = pd.concat([hel, esp], ignore_index=True)
# Helsinki neighborhoods will have their actual 'number' as index, Espoo will have wrong numbers.
nei.index += 1
# Remove neighborhoods that don't have bike stations, except for Eira (6) and Suomenlinna (52)
nei = nei[nei.geometry.apply(lambda x: stations.to_crs(epsg=neighborhood_epsg).within(x).any())]
nei = nei.append(hel.loc[[6, 52]])
nei.sort_index(inplace=True)
nei.head(6)

Unnamed: 0,name,geometry
1,kruununhaka,"POLYGON ((25497245.691 6673725.015, 25497247.6..."
2,kluuvi,"POLYGON ((25496230.663 6674007.871, 25496260.2..."
3,kaartinkaupunki,"POLYGON ((25496735.972 6672760.566, 25496754.8..."
4,kamppi,"POLYGON ((25495593.942 6672440.069, 25495597.9..."
5,punavuori,"POLYGON ((25495960.748 6671953.785, 25495956.6..."
6,eira,"POLYGON ((25496320.850 6671637.111, 25496298.1..."


In [4]:
events = pd.read_csv('data/events_with_ratios_2019.csv')
# Convert event Timestamps to the the same timezone as bike trips
events['start_time'] = pd.to_datetime(events['start_time'], utc=True).apply(lambda x: x.tz_convert('Europe/Helsinki'))
events['end_time'] = pd.to_datetime(events['end_time'], utc=True).apply(lambda x: x.tz_convert('Europe/Helsinki'))
# Turn events to GeoDataFrame
event_points = events.apply(lambda x: Point(x.lon, x.lat), axis=1)
events = gpd.GeoDataFrame(events, geometry=event_points, crs='epsg:4326')

# Make sure stations is type list
events.stations = events.stations.apply(literal_eval)
# Remove quotes from event name (folium doesn't render otherwise)
events.name = events.name.str.replace("['`\"]", '')

In [5]:
# Synchronize coordinates 
# Web mercator: 3857 (meters, projected) 4326 (degrees, unprojected)
nei.to_crs(epsg=4326, inplace=True)
stations.to_crs(epsg=4326, inplace=True)

In [6]:
def get_station_markers():
    markers = folium.FeatureGroup(name='Bike Stations', show=False)
    for id, station in stations.iterrows():
        lon = station['geometry'].x
        lat = station['geometry'].y
        info = f"""
                ID: {id}<br>
                Name: {station['name']}<br>
                Address: {station['address']}
                """
        icon = folium.Icon(color='orange', icon='bicycle', prefix='fa')
        popup = folium.Popup(info, max_width=200)

        folium.Marker([lat, lon], popup, icon=icon, tooltip=id).add_to(markers)
    return markers

In [7]:
events.loc[:, 'film':'sports'].columns

Index(['film', 'concert', 'cultural event', 'visual arts', 'lecture', 'music',
       'exhibition', 'activism', 'games', 'food', 'dance', 'theatre',
       'sports'],
      dtype='object')

In [8]:
icons = {'film': 'film', 'concert': 'users', 'cultural event': 'bank', 'visual arts': 'eye',
         'lecture': 'graduation-cap', 'music': 'music', 'exhibition': 'picture-o', 
         'activism': 'microphone', 'games': 'play', 'food': 'cutlery', 'dance': 'female', 
         'theatre': 'street-view', 'sports': 'futbol-o'}

def get_event_cluster():
    cluster = folium.plugins.MarkerCluster(name='Events', show=False)
    for _, event in events.iterrows():
        lon = event['geometry'].x
        lat = event['geometry'].y
        categories = [idx for idx, value in event['film':'sports'].iteritems() if value]
        
        if event['start_ratio'] > event['end_ratio']:
            color = 'green'
        elif event['start_ratio'] < event['end_ratio']:
            color = 'red'
        else:
            color = 'blue'
        
        #                {event['price']} €<br>
        icon = folium.Icon(color=color, icon=icons.get(categories[0], 'info-sign'), prefix='fa')
        info = f"""
                <i>{event['name'][:70]}</i><br><br>
                
                {', '.join(map(str.title, categories))}<br>
                {event['start_time'].strftime('%H:%M')} - 
                {event['end_time'].strftime('%H:%M')}<br>
                {event['start_time'].strftime('%d/%m/%Y')}<br><br>
                
                Start ratio: <b>{event['start_ratio']:.2f}</b><br>
                End ratio: <b>{event['end_ratio']:.2f}</b><br>
                Nearest stations: {', '.join(map(str, event['stations']))}
                """
        
        popup = folium.Popup(info, max_width=200)
        folium.Marker([lat, lon], popup, icon=icon).add_to(cluster)
    return cluster

In [9]:
def get_neighborhood_choropleth():
    # Create groupby object
    group = events.groupby('neighborhood')
    # Calculate average start and end ratios
    mean_ratios = group[['start_ratio', 'end_ratio']].mean()
    # Count number of events
    num_events = group['id'].count()
    num_events.name = 'num_events'
    # Get most common category
    categories = events.loc[:, 'film':'sports'].columns
    top_category = group[list(categories)].sum().idxmax(axis=1).apply(str.title)
    top_category.name = 'top_category'
    # Join these new columns together
    hoods = pd.concat([nei, mean_ratios, num_events, top_category], axis=1)
    # Create a new value, 'bike_use', to use in Choropleth map
    hoods['bike_use'] = mean_ratios.apply(lambda x: x.start_ratio - x.end_ratio, axis=1)
    hoods.name = hoods.name.apply(str.title)
    # Create column on which to join the data in Choropleth map
    hoods['number'] = hoods.index.astype(str)
    hoods_geojson = hoods.to_json()

    cho = folium.Choropleth(hoods_geojson, data=hoods, columns=['number', 'bike_use'], key_on="feature.id",
                            bins=7, fill_color='YlGn', nan_fill_color='lightblue', line_color='white',
                            legend_name='Bike usage correlation with events', name='Neighborhoods', 
                            line_weight=0.5, highlight=True, show=False)

    # Show name on hover
    cho.geojson.add_child(folium.features.GeoJsonTooltip(['name'], labels=False))
    # Show more info on click
    cho.geojson.add_child(folium.features.GeoJsonPopup(['name', 'num_events', 'top_category', 
                                                        'start_ratio', 'end_ratio'],
                                                        aliases=['Name', 'Number of events', 'Top category', 
                                                                 'Average start ratio', 'Average end ratio']))
    return cho

In [10]:
# Create the final visualisation

canvas = folium.Map(location=[60.18, 24.94], zoom_start=12, control_scale=True)

event_cluster = get_event_cluster()
event_cluster.show = True

station_markers = get_station_markers()

choropleth = get_neighborhood_choropleth()

canvas.add_child(event_cluster)
canvas.add_child(station_markers)
canvas.add_child(choropleth)
canvas.add_child(folium.LayerControl())

canvas

In [11]:
# canvas.save('docs/map.html')