This iPython notebook makes several graphs as an example. It does not include a full selection of the graphs in the visualisation toolkit

## Imports

In [1]:
import time
from convertbng.util import convert_bng, convert_lonlat
import branca
import branca.colormap

import pandas as pd
import numpy as np
from datetime import datetime
import math
from math import atan2, degrees
import random
import pickle

import re
import os
import requests
import json
from urllib.parse import quote

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import ipywidgets as widgets
from IPython.display import display
from geopy.geocoders import Nominatim
import folium
from ipywidgets import interact, Dropdown
from folium.plugins import MarkerCluster
from folium import LinearColormap


import geopandas as gpd
import contextily as ctx
from ipywidgets import interact, SelectionSlider

import chardet

import branca
import networkx as nx

import matplotlib.cm as cm
import matplotlib.colors as colors
from matplotlib.lines import Line2D
import matplotlib.pyplot as plt

from mpl_toolkits.axes_grid1 import make_axes_locatable

## Load Data

Unpickle NUMBAT data to load it

In [80]:
#First set the directory
directoryfilepath = "/NUMBAT-main/data"
os.chdir(directoryfilepath)


In [4]:
#Load data (deserialize)
with open('datadict.pickle', 'rb') as handle:
    data_dict = pickle.load(handle)

## Map Over London to Visualize magnitudes and changes

Visualise a single period of time over a map of London.
Pick the dataframe, column of interst, and direction of travel

In [114]:

def oneday_with_nodes(dfone, column_name, mode='total'):
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."
    df1 = dfone.copy()
    
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )

        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]

    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]


    # Merge both dataframes and fill missing values with 0
    merged_df = df1
    merged_df.fillna(0, inplace=True)


    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', "Hex", column_name]]

    
    #Create a background map
    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron')
    
    #Set line width multiple
    lineFactor = 15/max(abs(plot_df[column_name]))
    
    for _, row in plot_df.iterrows():
        
        #weight of lines
        color = "#" + row.at["Hex"]
        
    
        linewidth = abs(row[column_name]) * lineFactor
            
        #add line from start lat to stop lat
        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,  # Increase this value to make the clickable area larger
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

    return map_london


Look at traffic in all directions during AMPM Peak in Friday 2020 without any background 

In [None]:
oneday_with_nodes(data_dict.get("data20FRI"),  "AMPM Peak", mode='total')

Compare two different columns

In [112]:
#Compare two times with colour scheme
#2nd minus 1st
def two_times_dfs_with_colour(dfone, dftwo, column_one, column_two, mode='total'):
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."
    df1 = dfone.copy()
    df2 = dftwo.copy()
    
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_one: "sum"}).reset_index()
        df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_two: "sum"}).reset_index()    
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
    df1 = df1[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', "Hex",column_one]]
    df2 = df2[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', "Hex",column_two]]

    # Merge both dataframes and fill missing values with 0
    merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', "Hex"], how='outer')
    merged_df.fillna(0, inplace=True)

    # Calculate the difference and set missing routes to df2-0 or 0-df1
    merged_df['Difference'] = merged_df[column_two] - merged_df[column_one]
            
    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong", "Hex"]).agg({"Difference": "sum"}).reset_index()
    
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference', "Hex"]]


    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron')
    
    #Set line width multiple
    lineFactor = 10/max(abs(plot_df['Difference'])+ .0001) #0 division problem

    for _, row in plot_df.iterrows():
        
        #Set colours of lines
        color = "#" + row.at["Hex"]
        
        linewidth = abs(row['Difference']) * lineFactor
            
        #Add lines from starting coordinates to end coordinates
        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='5, 10' if row['Difference'] < 0 else None,
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,  # Increase this value to make the clickable area larger
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

    return map_london


Compare AM vs PM peak traffic on Saturday 2016 looking at South, Out, and West traffic only with the TfL colour scheme

In [None]:
two_times_dfs_with_colour(data_dict.get("data16SAT"), data_dict.get("data16SAT"), "AM Peak", "PM Peak", mode='SOW')

### Add Extra Data for More Advanced Maps


Download Shape Files

In [119]:
Directory = "~/NUMBAT-main/data/"
OA_geo = gpd.read_file(Directory + 'LOAC.shp')
borough_geo = gpd.read_file(Directory + 'london_boroughs.geojson')

Add OA area data from GLA for additional context

This data comes from the GLA datastore and is called the MyLondon dataset https://data.london.gov.uk/dataset/mylondon

In [120]:
Directory = "~/NUMBAT-main/data/"

file_path = Directory + "MyLondon_LOAC_area_description_text_v3.csv"

with open(file_path, 'rb') as f:
    result = chardet.detect(f.read())

MyLondon_LOAC_area_description_text_v3 = pd.read_csv(file_path, encoding=result['encoding'])

file_path = Directory + 'Summary.csv'
transSummary = pd.read_csv(file_path)

file_path = Directory + 'modelled_OA_rents.csv'
modelled_OA_rents = pd.read_csv(file_path)

file_path = Directory + 'MyLondon_fare_zone_OA.csv'
MyLondon_fare_zone_OA  = pd.read_csv(file_path)


file_path = Directory +'MyLondon_postcode_OA.csv'
MyLondon_postcode_OA  = pd.read_csv(file_path)

Get OA and Borough Coordinate Data

## Maps with different Background

Add fare zones to the basic comparison map

In [304]:
def fare_map_colour_one(dfone, column_name, fare_data, geoson, mode='total', show_station_names=True):
    # Check if the mode is valid
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."

    df1 = dfone.copy()
    
    # Filter the dataframe based on direction
    # If the mode is "total", flip stations in North, In, or East directions and add to their complement
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )

        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    merged_df = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', column_name, "Hex"]]
    
    # Create a color scale for Fare Zones
    color_scale = LinearColormap(
        ['#1a1a1a', '#404040', '#696969', '#a3a3a3', '#cccccc', '#f5f5f5'],
        vmin=1,
        vmax=6,
        caption='Fare Zones'
    )
        
    # Update the style function to use fare_data
    def style_function(feature):
        OA11CD = feature['properties']['OA11CD']
        fare_zone_data = fare_data.loc[fare_data['OA11CD'] == OA11CD, 'Fare_Zone']

        if fare_zone_data.empty:
            return {
                'fillColor': 'none',
                'fillOpacity': 0,
                'color': 'none',
                'weight': 0
            }
        else:
            fare_zone = fare_zone_data.values[0]
            return {
                'fillColor': color_scale(fare_zone),
                'fillOpacity': 0.5,
                'color': 'none',
                'weight': 0
            }

    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron', zoom_control='topright')

    # Add choropleth layer for Fare Zones
    folium.GeoJson(
        geoson,
        name="Fare Zones",
        style_function=style_function,
        tooltip=folium.GeoJsonTooltip(fields=['OA11CD'], aliases=['LSOA Code'])
    ).add_to(map_london)

    # Cap line width
    lineFactor = 15 / max(abs(plot_df['Difference']) + .0001)  # Add .0001 to avoid division by zero

    for _, row in plot_df.iterrows():
        # Use red lines for negative volume and green for positive
        color = 'green' if row['Difference'] > 0 else 'red'

        # Cap line width
        linewidth = abs(row['Difference']) * lineFactor

        # Add PolyLine for each row
        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='1, 15' if row['Difference'] < 0 else None,  # Use dashed lines for negative volume
            weight=linewidth
        ).add_to(map_london)


        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

    legend_html = '''
    <div style="position: fixed; bottom: 10px; right: 20px; width: 150px; background-color: white; z-index:9999; font-size:14px; padding: 10px; border:2px solid grey;">
    <strong>Fare Zones</strong><br>
    '''

    fare_zones = fare_data[['Fare_Zone']].drop_duplicates().sort_values('Fare_Zone')

    for _, row in fare_zones.iterrows():
        legend_html += f'<span style="color:{color_scale(row["Fare_Zone"])}">Zone {row["Fare_Zone"]}</span><br>'

    legend_html += '</div>'

    # Add the legend to the map
    map_london.get_root().html.add_child(folium.Element(legend_html))

    return map_london


Look at Normalised Peak traffic Tuesday-Thursday in 2022 over fare zone

In [None]:
map_london = fare_map_colour_one(data_dict.get("data22TWT"), "Normalized AMPM Peak", MyLondon_fare_zone_OA, OA_geo, mode='NIE')
map_london

Look at information at the OA level

Select OACol, a column to use as background. These graphs are highly maliable. They can display a single NUMBAT dataframe and single time of day or compare two of either. You can also select the colour scheme for OA Col.

In [314]:
def OA_transSummary_multtype_tflcolours(dfone, column_name, OA_data, OACol, geoson, colours = ['lightblue','green'], mode='total', dftwo = False, col2 = False):
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."

    
    if OACol == "Rent_per_m":
        OA_data[OACol] = OA_data[OACol]/max(OA_data[OACol])
    
    trans_data = OA_data[['OA', OACol]].set_index('OA')

    
    cmap = branca.colormap.LinearColormap(colors=colours, index=[0,1],vmin=0,vmax=1)

    # Define the color map
    #cmap = cm.get_cmap('hot')  # Choose the desired color map

    # Function to map rent values to colors
    def map_to_color(value):
        return colors.rgb2hex(cmap(value))

    #Strings true, dfs error
    df1 = dfone.copy()
    
    #See how many dfs there are
    twodfs = True
    try:
        df2 = dftwo.copy()    
    except:
        twodfs = False
        
    #See how many cols there are
    twocols = bool(col2)
        
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        if twocols:
            df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum", col2: "sum"}).reset_index()

        else:
            df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()

        if twodfs:
            df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
                lambda row: pd.Series(
                    [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                    if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                    else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                    index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
                ),
                axis=1
            )

            df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()   

    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        if twodfs:
            df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        if twodfs:
            df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    # Merge both dataframes and fill missing values with 0
    if twodfs:
        merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', "Hex"], how='outer', suffixes=('_df1', '_df2'))
            
            # Calculate the difference and set missing routes to df2-0 or 0-df1
        merged_df['Difference'] = merged_df[column_name + '_df2'] - merged_df[column_name + '_df1']
        merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong", "Hex"]).agg({"Difference": "sum"}).reset_index()

        
    elif twocols:
        merged_df = df1
        merged_df['Difference'] = merged_df[col2] - merged_df[column_name]
    
    else:
        merged_df = df1
        merged_df['Difference'] =  merged_df[column_name]
        
    merged_df.fillna(0, inplace=True)

       
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference', "Hex"]]


    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron', zoom_control='topright')


    
    # Define the style function
    def style_function(feature):
        OA11CD = feature['properties']['OA11CD']
        trans = trans_data.loc[trans_data.index == OA11CD, OACol]
        

        

        trans_value = trans.values[0]
        color = map_to_color(trans_value)  # Map the rent value to a color

        return {
            'fillColor': color,
            'fillOpacity': np.interp(trans_value, [0, 1], [0.1, .9]),
            'color': None,
            'weight': 1
        }
        
    folium.GeoJson(
        geoson,
        name="Fare Zones",
        style_function=style_function
        ).add_to(map_london)

    

    # Add PolyLines and CircleMarkers for each row in plot_df
    tot = sum(abs(plot_df['Difference']))
    
    lineFactor = 10/max(plot_df['Difference'] + .0001) #0 division problem
    for _, row in plot_df.iterrows():
        
        
        color = "#" + row.at["Hex"]
        
        linewidth = abs(row['Difference']) * lineFactor
        


        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='1, 15' if row['Difference'] < 0 else None,
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,        color='transparent',
        fill=True,
        fill_opacity=0.0
    ).add_child(folium.Popup(row['From Station'])).add_to(map_london)
        



    map_london.add_child(cmap)


    return map_london



Use green space as a background using gray for areas that lack green space and green for OAs with lots of green. Compare AM Peak traffic Monday-Thursday in 2019 with 2020.

In [None]:
dfone= data_dict.get("data19MTT")
dftwo= data_dict.get("data20MTT")
column_name = "AM Peak"

OA_transSummary_multtype_tflcolours(dfone, column_name, transSummary, "green_spac", OA_geo, ["gray", "green"], mode='total', dftwo = dftwo, col2 = False)
