In [1]:
import geopandas as gpd
import pandas as pd
from geopy.geocoders import Nominatim
from pandas import DataFrame

In [2]:
def read_data(file_path: str) -> DataFrame:
    return pd.read_csv(file_path)


# Create a geocoder instance for defining the cities based on their x and y
geolocator = Nominatim(user_agent="my_geocoder")


# Define a function to perform reverse geocoding
def reverse_geocode(x, y):
    """
    Performs reverse geocoding to obtain the city name based on coordinates (x, y).

    Args:
        x (float): The longitude coordinate.
        y (float): The latitude coordinate.

    Returns:
        str: The city name corresponding to the given coordinates. Returns an empty string if the location is not found.

    """
    location = geolocator.reverse(f"{y}, {x}", exactly_one=True)
    if location is not None:
        return location.raw["address"].get("city", "")
    else:
        return ""

In [3]:
# Load the data frame for bike station location
station_locations = read_data("data/station_locations.csv")

# Apply the reverse geocoding function to each row in the data frame
station_locations["City"] = station_locations.apply(lambda row: reverse_geocode(row["x"], row["y"]), axis=1)

# Filter the data frame to include only rows where the city is 'Helsinki'
helsinki_stations = station_locations[station_locations["City"] == "Helsinki"].reset_index()

In [4]:
# Read the necessary data
raw_data = read_data("data/2021-08.csv")
location_df = gpd.GeoDataFrame(helsinki_stations,
                               geometry=gpd.points_from_xy(helsinki_stations.x, helsinki_stations.y)).set_crs("epsg:4326")

# Convert the columns to datetime objects
raw_data["departure_timestamp"] = pd.to_datetime(raw_data["Departure"])
raw_data["return_timestamp"] = pd.to_datetime(raw_data["Return"])

# Normalize data into events showing activity
time_df = pd.DataFrame(raw_data[["departure_timestamp", "return_timestamp"]].stack(),
                       columns=["timestamp"]).reset_index(drop=True)
station_df = pd.DataFrame(raw_data[["Departure station name", "Return station name"]].stack(),
                          columns=["station"]).reset_index(drop=True)
station_id_df = pd.DataFrame(raw_data[["Departure station id", "Return station id"]].stack(),
                             columns=["station_id"]).reset_index(drop=True)

# Concatenate the stacked dataframes horizontally
data = pd.concat([time_df, station_df, station_id_df], axis=1)

# Add additional date columns
data["date"] = data["timestamp"].dt.date
data["weekday"] = data["timestamp"].dt.strftime('%a')
data["hour"] = data["timestamp"].dt.strftime('%H')

# Group the data by diverse aggregators
data_by_date = data.groupby(["station_id", "station", "date", "weekday", "hour"]).size().reset_index(name="count")
data_by_weekday = data_by_date.groupby(["station_id", "station", "weekday", "hour"])["count"].mean().reset_index(name="mean_activity")

In [5]:
data_with_location = pd.merge(data_by_weekday, location_df[["ID", "x", "y", "geometry"]], left_on=["station_id"], right_on=["ID"], how="left")
data_with_location = gpd.GeoDataFrame(data_with_location[data_with_location["geometry"].notna()])
data_with_location.head()

Unnamed: 0,station_id,station,weekday,hour,mean_activity,ID,x,y,geometry
0,1,Kaivopuisto,Fri,0,3.0,1.0,24.950211,60.15537,POINT (24.95021 60.15537)
1,1,Kaivopuisto,Fri,1,3.5,1.0,24.950211,60.15537,POINT (24.95021 60.15537)
2,1,Kaivopuisto,Fri,2,3.0,1.0,24.950211,60.15537,POINT (24.95021 60.15537)
3,1,Kaivopuisto,Fri,3,2.0,1.0,24.950211,60.15537,POINT (24.95021 60.15537)
4,1,Kaivopuisto,Fri,5,1.5,1.0,24.950211,60.15537,POINT (24.95021 60.15537)


In [6]:
import plotly.express as px
import dash
from dash import dcc
from dash import html

def create_dash_app(data):
    # Create a Dash app
    app = dash.Dash(__name__)

    # Define the options for the dropdown menu
    weekdays = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
    dropdown_options = [{"label": weekday, "value": weekday} for weekday in weekdays]

    # Define the layout of the Dash app
    app.layout = html.Div([
        dcc.Dropdown(
            id="weekday-dropdown",
            options=dropdown_options,
            value=None,
            placeholder="Select a weekday"
        ),
        dcc.Graph(id="map-graph")
    ])

    # Define the callback function to update the graph based on the selected weekday
    @app.callback(
        dash.dependencies.Output("map-graph", "figure"),
        [dash.dependencies.Input("weekday-dropdown", "value")]
    )
    def update_graph(weekday):
        if weekday is None:
            filtered_data = data
        else:
            filtered_data = data[data["weekday"] == weekday]

        fig = px.scatter_mapbox(filtered_data, lat=filtered_data.y, lon=filtered_data.x,
                                color="weekday",
                                size="mean_activity",
                                hover_name="station",
                                animation_frame="hour",
                                animation_group="weekday",
                                color_continuous_scale=px.colors.cyclical.IceFire, size_max=15, zoom=10)

        fig.update_mapboxes(style="carto-positron")
        fig.update_layout(
            autosize=False,
            width=1400,
            height=600,
        )

        return fig

    return app


app = create_dash_app(data_with_location)
app.run_server(debug=True)