This file can be used to visualise the NUMBAT dataset

## Imports

In [1]:
import time
from convertbng.util import convert_bng, convert_lonlat
import branca
import branca.colormap

import pandas as pd
import numpy as np
from datetime import datetime
import math
from math import atan2, degrees
import random
import pickle

import re
import os
import requests
import json
from urllib.parse import quote

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import ipywidgets as widgets
from IPython.display import display
from geopy.geocoders import Nominatim
import folium
from ipywidgets import interact, Dropdown
from folium.plugins import MarkerCluster
from folium import LinearColormap


import geopandas as gpd
import contextily as ctx
from ipywidgets import interact, SelectionSlider

import chardet

import branca
import networkx as nx

import matplotlib.cm as cm
import matplotlib.colors as colors
from matplotlib.lines import Line2D
import matplotlib.pyplot as plt

from mpl_toolkits.axes_grid1 import make_axes_locatable

### Unpickle data_dict

Unpickle data to load it

In [80]:
#First set the directory
directoryfilepath = "/NUMBAT-main/data"
os.chdir(directoryfilepath)


In [4]:
#Load data (deserialize)
with open('datadict.pickle', 'rb') as handle:
    data_dict = pickle.load(handle)

## Create Aggregated Dataframes to Look at Trends

Aggregate all data into columns by year

In [130]:
#Create the combined NUMBAT dataframe
numcomb = pd.DataFrame(columns=['year', 'Total', 'MTT', 'SUN', "SAT", "FRI", "WkDay"])
yr = range(16,23)

#For each year NUMBAT covers add the required data
for y in yr:
    
    #Convert year into a string to use as part of the key to access data_dict
    y = str(y)
    
    #How many weekdays there are without Friday
    wkdayfactor = 4
    try:
        MTT = sum(data_dict.get("data" + y + "MTT")["Total"])
    
    except:
        MTT = sum(data_dict.get("data" + y + "MON")["Total"])
        MTT += (sum(data_dict.get("data" + y + "TWT")["Total"]) * 3)
        MTT = MTT/4
        
    SUN = sum(data_dict.get("data" + y + "SUN")["Total"])
    SAT = sum(data_dict.get("data" + y + "SAT")["Total"])
    
    #Because FRI does not exist prior to 2018, use a try-except function to prevent an error
    try:
        FRI = sum(data_dict.get("data" + y + "FRI")["Total"])
        
        #If Friday is part of the dataset add 1 to the weekday factor in order to properly compare years before and after Friday data was seperated
        wkdayfactor +=1
    
    except:
        FRI = 0
        
    #Add a column for summed Weekday and Total load
    WkDay = (FRI + 4*MTT)/wkdayfactor
    Total = WkDay + SUN + SAT
   
    # Create a new row as a dictionary
    new_row = {'year': int(y), 'Total': Total, 'MTT': MTT, 'SUN': SUN, "SAT": SAT, "FRI": FRI, "WkDay": WkDay}
    
    # Append the new row to the DataFrame
    numcomb = numcomb.append(new_row, ignore_index=True)

#Add a weekend column    
numcomb["WkEnd"] = numcomb["SAT"] + numcomb['SUN']


In [131]:
numcomb.head(1)

Unnamed: 0,year,Total,MTT,SUN,SAT,FRI,WkDay,WkEnd
0,16.0,97343910.0,42939500.0,22327650.0,32076760.0,0.0,42939500.0,54404410.0


Aggregate the NUMBAT dataset with a row for each dataframe (instead of for each year)

In [140]:
#Go over each year and day of the week type
yr = range(16,23)
days = ["MTT", "SUN", "SAT", "FRI"]

#Pick a random dataframe from NUMBAT to select its column names
df = data_dict.get("data21SUN")
cols = list(df.columns[10:113]) + ["year", "type"]


#Initialize our new dataframe holding the aggregated NUMBAT data
FullNum = pd.DataFrame(columns=cols)

for y in yr:
    y = str(y)
    
    for d in days:
        
        #If the dataframe is in NUMBAT
        try:
            df = data_dict.get("data" + y + d)
            newRow = []
            
            #Sum every load value
            for r in range(10,113):
                newRow.append(sum(df.iloc[:,r]))
               
            #Add the year and day of the week type 
            newRow.append(y)
            newRow.append(d)
            new_row = pd.DataFrame([newRow], columns=cols)
        
            FullNum = FullNum.append(new_row, )
            
        except:
            pass
        
FullNum = FullNum.reset_index(drop=True)

In [141]:
FullNum.head(1)

Unnamed: 0,Total,Early,AM Peak,Midday,PM Peak,Evening,Late,0500-0515,0515-0530,0530-0545,...,0245-0300,0300-0315,0315-0330,0330-0345,0345-0400,0400-0415,0415-0430,0430-0445,year,type
0,42939500.0,1911959.0,11471550.0,9950113.0,11685930.0,5681167.0,2238779.0,18158.529793,44802.078277,87907.021045,...,257.99757,245.332993,212.06942,163.125683,96.914683,53.156679,178.403471,166.12486,16,MTT


## Find links with biggest change

These create basic bar graphs to show what links have the biggest absolute or relative change

In [58]:
#Find the most changed row
def find_most_changed_rows(df1, df2, cola, colb, threshold):

    # Calculate the difference between colb in df2 and cola in df1
    diff = df2[colb] - df1[cola]

    # Create a new dataframe with columns: diff, From staion name, and To station name
    new_df = pd.DataFrame({
        "diff": diff,
        "From": df1.iloc[:, 6],
        "To": df1.iloc[:, 9]
    })

    # Filter the rows with absolute diff greater than the threshold
    most_changed_rows = new_df[abs(new_df["diff"]) > threshold]

    return most_changed_rows

#Return a DF of all the changes between two dfs
def changed_rows(df1, df2, col = ['Total', 'Normalized Total', 'Normalized AMPM Peak','Normalized Not Peak' ]): # Select the columns of interest

    # Subtract the columns of interest between the two dataframes
    diff_df = df2[columns_of_interest] - df1[columns_of_interest]

    # Add the first two columns from df1 to diff_df
    diff_df.insert(0, 'From Station', df1.iloc[:, 6])
    diff_df.insert(1, 'To Station', df1.iloc[:, 9])
    
    
    diff_df = diff_df.assign(abs_col3=abs(diff_df['Normalized Total']))
    diff_df = diff_df.sort_values(by='abs_col3', ascending=False).drop(columns=['abs_col3'])

    return diff_df


In [71]:
#Make a bar chart of the 10 most changed rows

def compare_df(df1, df2, n=10, col='Normalized Total'):
    
"""    
    Args:
- df1: First dataframe
- df2: Second dataframe
- n: Number of rows to display in the bar chart (default: 10)
- col: Column to compare and display in the bar chart (default: 'Normalized Total')

"""

    # Get the dataframe of changed rows based on the specified column
    diff_df = changed_rows(df1, df2, col=col)

    # Get the top n rows with the largest difference in the specified column
    top_diff = diff_df.nlargest(n, col).drop_duplicates(subset=['From Station', 'To Station'])

    # Calculate the mean absolute difference in the specified column
    mean_abs_diff = diff_df[col].abs().mean()

    # Calculate the mean absolute difference in the 'Total' column
    mean_abs_diff_total = diff_df['Total'].abs().mean()

    # Create a figure and axis for the bar chart
    fig, ax = plt.subplots(figsize=(12, 6))

    # Create bars for each row in the top_diff dataframe
    bars = ax.bar(top_diff['From Station'] + ' - ' + top_diff['To Station'], top_diff[col])

    # Add a dashed red line representing the mean absolute difference
    ax.axhline(mean_abs_diff, color='red', linestyle='dashed', linewidth=2, label='Mean Absolute Difference')

    # Set labels and title for the bar chart
    ax.set_ylabel(f'{col} Difference')
    ax.set_xlabel('From - To Station')
    ax.set_title(f'Top {n} Differences in {col} between DataFrames')

    # Annotate the bars with the difference values
    for bar, diff in zip(bars, top_diff[col]):
        height = bar.get_height()
        ax.annotate('{:.4f}'.format(diff),
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(0, 6),
                    textcoords='offset points',
                    ha='center', va='bottom')

    # Annotate the bars with the total values
    for bar, total in zip(bars, top_diff['Total']):
        height = bar.get_height()
        ax.annotate('{:.0f}'.format(total),
                    xy=(bar.get_x() + bar.get_width() / 2, height/2),
                    xytext=(0, 0),
                    textcoords='offset points',
                    ha='center', va='center', color='white', fontsize=10)

    # Annotate the mean absolute difference
    ax.annotate('Mean Absolute Difference: {:.6f} Total {:.1f}'.format(mean_abs_diff, mean_abs_diff_total),
                xy=(0.5, mean_abs_diff),
                xytext=(20, 5),
                textcoords='offset points',
                arrowprops=dict(facecolor='black', shrink=0.05),
                fontsize=10, color='black')

    # Display legend, rotate x-axis labels, and show the plot
    ax.legend()
    plt.xticks(rotation=45)
    plt.show()




In [None]:
# Create an empty DataFrame to store the differences across all combinations and Visualize the top 5 stations
all_diffs = pd.DataFrame()

# Iterate over the list of years
for i, yr1 in enumerate(year):
    for yr2 in year[i:]:
        # Compare DataFrames with different years
        if yr1 != yr2:
            for day in days:
                # Skip the non-existent combinations
                if (yr1 == 16 and day == 'FRI') or (yr1 == 17 and day == 'FRI') or (yr2 == 16 and day == 'FRI') or (yr2 == 17 and day == 'FRI'):
                    continue
                
                df1 = data_dict[f'data{yr1}{day}']
                df2 = data_dict[f'data{yr2}{day}']
                temp_diffs = changed_rows(df1, df2, columns_of_interest)
                temp_diffs['Combination'] = f'data{yr1}{day} vs data{yr2}{day}'
                all_diffs = pd.concat([all_diffs, temp_diffs])

# Iterate over the list of days
for i, day1 in enumerate(days):
    for day2 in days[i:]:
        # Compare DataFrames with different days
        if day1 != day2:
            for yr in year:
                # Skip the non-existent combinations
                if (yr == 16 and day1 == 'FRI') or (yr == 17 and day1 == 'FRI') or (yr == 16 and day2 == 'FRI') or (yr == 17 and day2 == 'FRI'):
                    continue
                
                df1 = data_dict[f'data{yr}{day1}']
                df2 = data_dict[f'data{yr}{day2}']
                temp_diffs = changed_rows(df1, df2, columns_of_interest)
                temp_diffs['Combination'] = f'data{yr}{day1} vs data{yr}{day2}'
                all_diffs = pd.concat([all_diffs, temp_diffs])

# Sort all_diffs based on the absolute value of the difference and select the top 30 stations
all_diffs['abs_diff'] = all_diffs['Normalized Total'].abs()
all_diffs_sorted = all_diffs.sort_values(by='abs_diff', ascending=False).drop_duplicates(subset=['From Station', 'To Station']).head(30)

# Visualize the top 30 stations
for combination in all_diffs_sorted['Combination'].unique():
    subset = all_diffs_sorted[all_diffs_sorted['Combination'] == combination]
    if len(subset) > 0:
        df1_key, df2_key = combination.split(' vs ')
        df1 = data_dict[df1_key]
        df2 = data_dict[df2_key]
        print(f'Comparing {combination}:')
        compare_df(df1, df2, n=len(subset), col='Normalized Total')


## Map Over London to Visualise Magnitudes and Changes

Visualise a single period of time over a map of London.
Pick the dataframe, column of interst, and direction of travel

For this and all graphs, mode should be one of NIE (North, In, East), SOW (South, Out, West), or total

In [114]:

def oneday_with_nodes(dfone, column_name, mode='total'):
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."
    df1 = dfone.copy()
    
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )

        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]

    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]


    # Merge both dataframes and fill missing values with 0
    merged_df = df1
    merged_df.fillna(0, inplace=True)


    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', "Hex", column_name]]

    
    #Create a background map
    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron')
    
    #Cap maximum line width at 15. Change this for thicker lines
    lineFactor = 15/max(abs(plot_df[column_name]))
    
    for _, row in plot_df.iterrows():
        
        #Use the Hex row which contains TfL colour scheme colour
        color = "#" + row.at["Hex"]
        
        #Cap maximum line width
        linewidth = abs(row[column_name]) * lineFactor
            
        #add line from start lat to stop lat
        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,  # Increase this value to make the clickable area larger
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

    return map_london


Compare the loads in two periods of time. Pick the dataframes, column of interst, and direction of travel.

In all cases, difference is 2nd minus 1st. ie 2nd dataframe - 1st dataframe

In [1]:
def plain_map_with_nodes(dfone, dftwo, column_name, mode='total'):
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."
    
    df1 = dfone.copy()
    df2 = dftwo.copy()
    
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum"}).reset_index()
        df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum"}).reset_index()   
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    # Merge both dataframes and fill missing values with 0
    merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong'], how='outer', suffixes=('_df1', '_df2'))
    merged_df.fillna(0, inplace=True)

    # Calculate the difference and set missing routes to df2-0 or 0-df1
    merged_df['Difference'] = merged_df[column_name + '_df2'] - merged_df[column_name + '_df1']
    
    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong"]).agg({"Difference": "sum"}).reset_index()
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference']]



    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron')
    
    #Cap maximum line width
    lineFactor = 15/max(abs(plot_df['Difference'])+ .0001) #add .0001 for 0 division problem
        
    for _, row in plot_df.iterrows():
        
        #weight of lines
        color = 'green' if row['Difference'] > 0 else 'red'
        
        #Cap maximum line width
        linewidth = abs(row['Difference']) * lineFactor
            
        #Add lines from starting coordinates to end coordinates
        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='5, 10' if row['Difference'] < 0 else None, #Add dashes for negative volume
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,  # Increase this value to make the clickable area larger
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

    return map_london


Compare two periods of time using the TfL colour scheme instead of red-green

In [111]:
def colourscheme_map_with_nodes(dfone, dftwo, column_name, mode='total'):
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."
    
    df1 = dfone.copy()
    df2 = dftwo.copy()
    
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "Hex","StopLong"]).agg({column_name: "sum"}).reset_index()
        df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "Hex","StopLong"]).agg({column_name: "sum"}).reset_index()   
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    # Merge both dataframes and fill missing values with 0
    merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', "Hex",'StopLong'], how='outer', suffixes=('_df1', '_df2'))
    merged_df.fillna(0, inplace=True)

    # Calculate the difference and set missing routes to df2-0 or 0-df1
    merged_df['Difference'] = merged_df[column_name + '_df2'] - merged_df[column_name + '_df1']

    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "Hex","StopLong"]).agg({"Difference": "sum"}).reset_index()
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', "Hex", 'Difference']]



    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron')
    
    #Cap maximum line width
    lineFactor = 10/max(abs(plot_df['Difference'])+ .0001) #add .0001 for 0 division problem

    for _, row in plot_df.iterrows():
        
        color = "#" + row.at["Hex"]
        
        #Cap maximum line width
        linewidth = abs(row['Difference']) * lineFactor
            
        #Add lines from starting coordinates to end coordinates
        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='5, 10' if row['Difference'] < 0 else None, #Add dashes to represent negative volume
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,  # Increase this value to make the clickable area larger
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

    return map_london


This is similar to the above graph but for comparing two columns in the same period of time. As such, instead if one column you need to select both columns you are comparing.

This version uses the TfL colour scheme

In [112]:
def two_times_dfs_with_colour(dfone, dftwo, column_one, column_two, mode='total'):
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."
    df1 = dfone.copy()
    df2 = dftwo.copy()
    
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_one: "sum"}).reset_index()
        df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_two: "sum"}).reset_index()    
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        
    df1 = df1[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', "Hex",column_one]]
    df2 = df2[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', "Hex",column_two]]

    # Merge both dataframes and fill missing values with 0
    merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', "Hex"], how='outer')
    merged_df.fillna(0, inplace=True)

    # Calculate the difference and set missing routes to df2-0 or 0-df1
    merged_df['Difference'] = merged_df[column_two] - merged_df[column_one]
            
    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong", "Hex"]).agg({"Difference": "sum"}).reset_index()
    
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference', "Hex"]]


    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron')
    
    #Cap maximum line width
    lineFactor = 10/max(abs(plot_df['Difference'])+ .0001) #add .0001 for 0 division problem

    for _, row in plot_df.iterrows():
        
        #Set colours of lines
        color = "#" + row.at["Hex"]
        
        #Cap maximum line width
        linewidth = abs(row['Difference']) * lineFactor
            
        #Add lines from starting coordinates to end coordinates
        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='5, 10' if row['Difference'] < 0 else None, #Add dashes for negative volume
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,  # Increase this value to make the clickable area larger
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

    return map_london


In [113]:
def two_times_dfs(dfone, dftwo, column_one, column_two, mode='total'):
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."
    df1 = dfone.copy()
    df2 = dftwo.copy()
    
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_one: "sum"}).reset_index()
        df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_two: "sum"}).reset_index()    
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        
    df1 = df1[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong',column_one]]
    df2 = df2[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong',column_two]]

    # Merge both dataframes and fill missing values with 0
    merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong'], how='outer')
    merged_df.fillna(0, inplace=True)

    # Calculate the difference and set missing routes to df2-0 or 0-df1
    merged_df['Difference'] = merged_df[column_two] - merged_df[column_one]
          
    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong"]).agg({"Difference": "sum"}).reset_index()
    
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference']]


    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron')
    
    #Cap line width
    lineFactor = 15/max(abs(plot_df['Difference'])+ .0001) #+ .0001 for 0 division problem

    for _, row in plot_df.iterrows():
        
        #Use red for negative volume
        color = 'green' if row['Difference'] > 0 else 'red'
        
        #Cap line width
        linewidth = abs(row['Difference']) * lineFactor
            
        #Add lines from starting coordinates to end coordinates
        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='5, 10' if row['Difference'] < 0 else None, #Add dashes for negative volume
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,  # Increase this value to make the clickable area larger
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

    return map_london


### Add Extra Data for More Advanced Maps

Download Shape Files

In [119]:
Directory = "~/NUMBAT-main/data/"
OA_geo = gpd.read_file(Directory + 'LOAC.shp')
borough_geo = gpd.read_file(Directory + 'london_boroughs.geojson')

Add OA area data from GLA for additional context

This data comes from the GLA datastore and is called the MyLondon dataset 
https://data.london.gov.uk/dataset/mylondon

In [120]:
Directory = "~/NUMBAT-main/data/"

file_path = Directory + "MyLondon_LOAC_area_description_text_v3.csv"

with open(file_path, 'rb') as f:
    result = chardet.detect(f.read())

MyLondon_LOAC_area_description_text_v3 = pd.read_csv(file_path, encoding=result['encoding'])

file_path = Directory + 'Summary.csv'
transSummary = pd.read_csv(file_path)

file_path = Directory + 'modelled_OA_rents.csv'
modelled_OA_rents = pd.read_csv(file_path)

file_path = Directory + 'MyLondon_fare_zone_OA.csv'
MyLondon_fare_zone_OA  = pd.read_csv(file_path)


file_path = Directory +'MyLondon_postcode_OA.csv'
MyLondon_postcode_OA  = pd.read_csv(file_path)




Add Loac data for context
https://data.london.gov.uk/dataset/london-area-classification

In [121]:
file_path = Directory + 'LOAC classification.xls'

LOAC = pd.read_excel(file_path, sheet_name='LOAC classification')


Get OA and Borough Coordinate Data

Add download a dataset with Longitude Latitude; OA; Easting, Northing - to convert OA to Long/Lat
https://data.london.gov.uk/dataset/public-transport-accessibility-levels?resource=ca17d14f-379e-469e-917c-ff1f21c5e3d4


In [122]:
file_path = Directory + 'bbox.csv'
bbox = pd.read_csv(file_path)
LOAC = bbox.merge(LOAC, left_on='oa', right_on='OA')

Use geopy to get Longitude and Latitude

In [123]:
# Create a geolocator object to get borough long/lat coords
locations = {}
geolocator = Nominatim(user_agent="explorer")

# Function to get the location of a London borough
def get_london_location(name):

    loc = geolocator.geocode(f"Borough of {name}, London, UK", timeout=500)
    if loc == None:
        loc = geolocator.geocode(f"{name}, London, UK")

    return (loc.latitude, loc.longitude)

# Extract the lat and long for each location
for b in np.unique(LOAC["Local Authority"]):
    locations[b] = get_london_location(b)
LOAC["Lat"] = LOAC["Local Authority"].map(lambda name: locations[name][0])
LOAC["Long"] = LOAC["Local Authority"].map(lambda name: locations[name][1])

Make Dictionary to map OA level groups to colours

In [124]:
#Color group dictionary to add colour to the map for added context

# Define a list of possible colors for each group. If a new dataset has more groups extend the dataset apropriately

group_colors = {
    'A': ['black', 'blue', 'navy', 'royalblue', 'slateblue'],
    'B': ['black', 'darkgreen', 'lime', 'seagreen', 'green'],
    'C': ['black', 'red', 'darkred', 'firebrick', 'crimson'],
    'D': ['black', 'darkgray', 'slategray', 'gray', 'dimgrey'],
    'E': ['black', 'lightgray', 'silver', 'gainsboro', 'whitesmoke'],
    'F': ['black', 'brown', 'sienna', 'saddlebrown', 'chocolate'],
    'G': ['black', 'yellow', 'gold', 'khaki', 'lightgoldenrodyellow'],
    'H': ['black', 'purple', 'plum', 'orchid', 'lavender']
}


# Get unique combinations of Group and SuperGroup from the LOAC DataFrame
unique_groups = np.unique(LOAC['Group'])

# Create a dictionary mapping each SuperGroup to a color
colordict = {}
for row in unique_groups:
    group = row
    supergroup = row[0]
    group_colors_list = group_colors[supergroup]
    color = group_colors_list[int(row[1])]
    colordict[group] = color


color_dict = colordict


Add Borough Level Data

Income data from GLA datastore https://data.london.gov.uk/download/average-income-tax-payers-borough


In [125]:
income_df = pd.read_excel("https://data.london.gov.uk/download/average-income-tax-payers-borough/392e86d4-f1d3-4f06-a6a5-7fcd0fd65948/income-of-tax-payers.xls",
                          sheet_name=1, header=[0,1])

#Clean data for analysis
income_df = income_df.drop(income_df.index[0])
income_df.columns = [' '.join(col) for col in income_df.columns.values]
income_df = income_df.rename(columns={'Unnamed: 0_level_0 Code': 'lsoa', 'Unnamed: 1_level_0 Area': "Borough" })
income_df = income_df.dropna(subset=["Borough"])
income_df["Borough"] = income_df["Borough"].map(lambda name: name.replace("-"," "))

#Remove rows that aren't boroughs
income_df = income_df[~income_df["Borough"].isin(['East Midlands', 'Yorkshire and The Humber', 'East of England', 'England', 'London', 'North East', 'North West', 'Northern Ireland', 'Scotland', 'South East',
       'South West','United Kingdom', 'Wales', 'West Midlands'])]


## Maps with Different Backgrounds for Context

### Borough Level Maps

These maps take borough_geo, the goeson at the borough level. They also take BoroughCol, a column from income_data we care about, as well as income_data, the borough level data for context. To select BoroughCol you may run income_df.columns.

This compares two dataframes from data_dict and uses the TfL colour scheme

In [308]:
def borough_map_colour_scheme(dfone, dftwo, column_name, income_data, borough_geo, BoroughCol, mode='total'):
    # Check if the mode is valid
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."

    df1 = dfone.copy()
    df2 = dftwo.copy()
    
    # Filter the dataframe based on direction
    # If the mode is "total", flip stations in North, In, or East directions and add to their complement
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()
        df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()   
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    # Merge both dataframes and fill missing values with 0
    merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', "Hex"], how='outer', suffixes=('_df1', '_df2'))
    merged_df.fillna(0, inplace=True)

    # Calculate the difference and set missing routes to df2-0 or 0-df1
    merged_df['Difference'] = merged_df[column_name + '_df2'] - merged_df[column_name + '_df1']
            
    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong", "Hex"]).agg({"Difference": "sum"}).reset_index()
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference', "Hex"]]

    london_coords = [51.5074, -0.1278]
    
    # Create a map of London
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron')
    
    # Add Borough Data
    # Use 'YlOrRd' or another colour scheme of your choice
    folium.Choropleth(
        geo_data=borough_geo,
        data=income_data,
        columns=['Borough', BoroughCol],
        key_on='feature.properties.name',
        fill_color='YlOrRd',
        fill_opacity=0.5, 
        line_opacity=0.4,
        legend_name=BoroughCol,
        name=BoroughCol,
        overlay=False,
        show=False
    ).add_to(map_london)
    
    # Set a cap on line width
    lineFactor = 15 / max(abs(plot_df['Difference']) + .0001)  # Add .0001 for division problem
    
    for _, row in plot_df.iterrows():
        # Use the TfL colour scheme
        color = "#" + row.at["Hex"]
        
        # Set a cap on line width
        linewidth = abs(row['Difference']) * lineFactor

        # Add PolyLine for each row
        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='5, 10' if row['Difference'] < 0 else None,  # Add dashes for volume
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,  # Increase this value to make the clickable area larger
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)
    
    return map_london


This map does not use the TfL colour scheme and instead uses red for negative volume and gree for positive.

In [307]:
def borough_map_with_nodes(dfone, dftwo, column_name, income_data, borough_geo, BoroughCol, mode='total'):
    # Check if the mode is valid
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."

    df1 = dfone.copy()
    df2 = dftwo.copy()
    
    # Filter the dataframe based on direction
    # If the mode is "total", flip stations in North, In, or East directions and add to their complement
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum"}).reset_index()
        df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum"}).reset_index()   
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    # Merge both dataframes and fill missing values with 0
    merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong'], how='outer', suffixes=('_df1', '_df2'))
    merged_df.fillna(0, inplace=True)

    # Calculate the difference
    merged_df['Difference'] = merged_df[column_name + '_df2'] - merged_df[column_name + '_df1']
           
    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong"]).agg({"Difference": "sum"}).reset_index()
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference']]
    
    london_coords = [51.5074, -0.1278]
    
    # Create a map of London
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron')
    
    # Add Borough Data
    # Use 'YlOrRd' or another color scheme of your choice
    folium.Choropleth(
        geo_data=borough_geo,
        data=income_data,
        columns=['Borough', BoroughCol],
        key_on='feature.properties.name',
        fill_color='YlOrRd',
        fill_opacity=0.5, 
        line_opacity=0.4,
        legend_name=BoroughCol,
        name=BoroughCol,
        overlay=False,
        show=False
    ).add_to(map_london)
    
    # Set a cap on line width
    lineFactor = 10 / max(abs(plot_df['Difference']) + .0001)  # Add .0001 to avoid division by zero

    for _, row in plot_df.iterrows():
        # Use green for positive values and red for negative
        color = 'green' if row['Difference'] > 0 else 'red'
        
        # Set a cap on line width
        linewidth = abs(row['Difference']) * lineFactor

        # Add PolyLine for each row
        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='5, 10' if row['Difference'] < 0 else None,  # Add dashes for volume
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,  # Increase this value to make the clickable area larger
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)
    
    return map_london


This map displays a single time and column with background context

In [306]:
def borough_map_one_period_colour_scheme(dfone, col, income_data, borough_geo, BoroughCol, mode='total'):

    
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."

    df1 = dfone.copy()
    
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )

        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({col: "sum"}).reset_index()
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]


    df1.fillna(0, inplace=True)

    # Calculate the difference
              
    df1 = df1.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong", "Hex"]).agg({col: "sum"}).reset_index()
    
    plot_df = df1[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', col, "Hex"]]


    
    
    london_coords = [51.5074, -0.1278]
    
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron')
    
    
    #Add Borough Data
    #Use 'YlOrRd' or another colour scheme of your choice
    folium.Choropleth(geo_data = borough_geo,
                  data = income_data ,
                  columns = ['Borough', BoroughCol],
                  key_on = 'feature.properties.name',
                  fill_color = 'YlOrRd',
                  fill_opacity = 0.5, 
                  line_opacity = 0.4,
                  legend_name = BoroughCol,
                  name= BoroughCol,
                  overlay=False,
                  show=False).add_to(map_london)
    
    
    #Cap line width
    lineFactor = 10/max(abs(plot_df[col])+ .0001) #Add .0001 to avoid 0 division problem 

        
    for _, row in plot_df.iterrows():
        
        #Use TfL colour scheme
        color = "#" + row.at["Hex"]
        
        #Cap line width
        linewidth = abs(row[col]) * lineFactor

        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,  # Increase this value to make the clickable area larger
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)


    
    
    return map_london


Instead of comparing two different days of the week or years, compare two different times of day

In [305]:
def borough_map__same_day_colour_scheme(dfone, col1, col2, income_data, borough_geo, BoroughCol, mode='total'):
    
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."

    df1 = dfone.copy()
    
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )

        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({col1: "sum", col2: "sum"}).reset_index()
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    merged_df = df1
    merged_df.fillna(0, inplace=True)

    # Calculate the difference
    merged_df['Difference'] = merged_df[col2] - merged_df[col1]
              
    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong", "Hex"]).agg({"Difference": "sum"}).reset_index()
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference', "Hex"]]


    
    
    london_coords = [51.5074, -0.1278]
    
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron')
    
    
    #Add Borough Data
    #Use 'YlOrRd' or another colour scheme of your choice
    folium.Choropleth(geo_data = borough_geo,
                  data = income_data ,
                  columns = ['Borough', BoroughCol],
                  key_on = 'feature.properties.name',
                  fill_color = 'YlOrRd',
                  fill_opacity = 0.5, 
                  line_opacity = 0.4,
                  legend_name = BoroughCol,
                  name= BoroughCol,
                  overlay=False,
                  show=False).add_to(map_london)
    
    
    #Cap line width
    lineFactor = 15/max(abs(plot_df['Difference'])+ .0001) #Add .0001 to avoid 0 division problem
    
    for _, row in plot_df.iterrows():
        
        #Use colour scheme
        color = "#" + row.at["Hex"]
        
        #Cap line width
        linewidth = abs(row['Difference']) * lineFactor

        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            
            dash_array='1, 15' if row['Difference'] < 0 else None, #Use dashed lines for negative volume
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,  # Increase this value to make the clickable area larger
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)


    
    
    return map_london


Add fare zones to the basic comparison map, both for looking at a single time and for comparing two

In [303]:
# Map with Fare Zones
def fare_map_colour_one(dfone, column_name, fare_data, geoson, mode='total', show_station_names=True):
    # Check if the mode is valid
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."

    df1 = dfone.copy()
    
    # Filter the dataframe based on direction
    # If the mode is "total", flip stations in North, In, or East directions and add to their complement
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )

        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    merged_df = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', column_name, "Hex"]]
    
    # Create a color scale for Fare Zones
    color_scale = LinearColormap(
        ['#1a1a1a', '#404040', '#696969', '#a3a3a3', '#cccccc', '#f5f5f5'],
        vmin=1,
        vmax=6,
        caption='Fare Zones'
    )
        
    # Update the style function to use fare_data
    def style_function(feature):
        OA11CD = feature['properties']['OA11CD']
        fare_zone_data = fare_data.loc[fare_data['OA11CD'] == OA11CD, 'Fare_Zone']

        if fare_zone_data.empty:
            return {
                'fillColor': 'none',
                'fillOpacity': 0,
                'color': 'none',
                'weight': 0
            }
        else:
            fare_zone = fare_zone_data.values[0]
            return {
                'fillColor': color_scale(fare_zone),
                'fillOpacity': 0.5,
                'color': 'none',
                'weight': 0
            }

    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron', zoom_control='topright')

    # Add choropleth layer for Fare Zones
    folium.GeoJson(
        geoson,
        name="Fare Zones",
        style_function=style_function,
        tooltip=folium.GeoJsonTooltip(fields=['OA11CD'], aliases=['LSOA Code'])
    ).add_to(map_london)

    # Cap line width
    lineFactor = 15 / max(abs(plot_df['Difference']) + .0001)  # Add .0001 to avoid division by zero

    for _, row in plot_df.iterrows():
        # Use red lines for negative volume and green for positive
        color = 'green' if row['Difference'] > 0 else 'red'

        # Cap line width
        linewidth = abs(row['Difference']) * lineFactor

        # Add PolyLine for each row
        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='1, 15' if row['Difference'] < 0 else None,  # Use dashed lines for negative volume
            weight=linewidth
        ).add_to(map_london)


        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

    legend_html = '''
    <div style="position: fixed; bottom: 10px; right: 20px; width: 150px; background-color: white; z-index:9999; font-size:14px; padding: 10px; border:2px solid grey;">
    <strong>Fare Zones</strong><br>
    '''

    fare_zones = fare_data[['Fare_Zone']].drop_duplicates().sort_values('Fare_Zone')

    for _, row in fare_zones.iterrows():
        legend_html += f'<span style="color:{color_scale(row["Fare_Zone"])}">Zone {row["Fare_Zone"]}</span><br>'

    legend_html += '</div>'

    # Add the legend to the map
    map_london.get_root().html.add_child(folium.Element(legend_html))

    return map_london


In [304]:
# Map with Fare Zones
def fare_map(dfone, dftwo, column_name, fare_data, geoson, mode='total', show_station_names=True):
    # Check if the mode is valid
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."

    # Make copies of the input dataframes
    df1 = dfone.copy()
    df2 = dftwo.copy()

    # Filter the dataframes based on the direction
    # If the mode is "total", flip stations in North, In, or East directions and add to their complement
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        
        # Group and aggregate the dataframes based on the specified columns
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum"}).reset_index()
        df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum"}).reset_index()   
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    # Merge both dataframes and fill missing values with 0
    merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong'], how='outer', suffixes=('_df1', '_df2'))
    merged_df.fillna(0, inplace=True)

    # Calculate the difference
    merged_df['Difference'] = merged_df[column_name + '_df2'] - merged_df[column_name + '_df1']
            
    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong"]).agg({"Difference": "sum"}).reset_index()
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference']]
    
    # Create a color scale for Fare Zones
    color_scale = LinearColormap(
        ['#1a1a1a', '#404040', '#696969', '#a3a3a3', '#cccccc', '#f5f5f5'],
        vmin=1,
        vmax=6,
        caption='Fare Zones'
    )
        
    # Update the style function to use fare_data
    def style_function(feature):
        OA11CD = feature['properties']['OA11CD']
        fare_zone_data = fare_data.loc[fare_data['OA11CD'] == OA11CD, 'Fare_Zone']

        if fare_zone_data.empty:
            return {
                'fillColor': 'none',
                'fillOpacity': 0,
                'color': 'none',
                'weight': 0
            }
        else:
            fare_zone = fare_zone_data.values[0]
            return {
                'fillColor': color_scale(fare_zone),
                'fillOpacity': 0.5,
                'color': 'none',
                'weight': 0
            }

    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron', zoom_control='topright')

    # Add choropleth layer for Fare Zones
    folium.GeoJson(
        geoson,
        name="Fare Zones",
        style_function=style_function,
        tooltip=folium.GeoJsonTooltip(fields=['OA11CD'], aliases=['LSOA Code'])
    ).add_to(map_london)

    # Cap line width
    lineFactor = 15 / max(abs(plot_df['Difference']) + .0001)  # Add .0001 to avoid division by zero

    for _, row in plot_df.iterrows():
        # Use red lines for negative volume and green for positive
        color = 'green' if row['Difference'] > 0 else 'red'

        # Cap line width
        linewidth = abs(row['Difference']) * lineFactor

        # Add PolyLine for each row
        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='1, 15' if row['Difference'] < 0 else None,  # Use dashed lines for negative volume
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,
            color='transparent',
            fill=True,
            fill_opacity=0.0
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

    # Generate the legend HTML
    legend_html = '''
    <div style="position: fixed; bottom: 10px; right: 20px; width: 150px; background-color: white; z-index:9999; font-size:14px; padding: 10px; border:2px solid grey;">
    <strong>Fare Zones</strong><br>
    '''

    fare_zones = fare_data[['Fare_Zone']].drop_duplicates().sort_values('Fare_Zone')

    for _, row in fare_zones.iterrows():
        legend_html += f'<span style="color:{color_scale(row["Fare_Zone"])}">Zone {row["Fare_Zone"]}</span><br>'

    legend_html += '</div>'

    # Add the legend to the map
    map_london.get_root().html.add_child(folium.Element(legend_html))

    return map_london


### OA Level Maps

Add rent information at the OA level

In [204]:

def rent_map(dfone, dftwo, column_name, rent_data, geoson, rents, AvgRent='Ave_rent_2_bedroom', mode='total'):
    if mode not in ['NIE', 'SOW', 'total']:
        mode = 'total'
    
    #These are the options for average rent
    if AvgRent not in ['Ave_rent_1_bedroom', 'Ave_rent_2_bedroom', 'Ave_rent_3_bedroom', 'Ave_rent_4_bedroom']:
        AvgRent = 'Ave_rent_3_bedroom'
        

    # Create a new DataFrame with the average rent
    rent_data = rents[['OA11CD', AvgRent]].set_index('OA11CD')
    


    # Update the color scale to represent rent prices
    max_rent = rent_data[AvgRent].max()
    min_rent = rent_data[AvgRent].min()
    interval_1 = min_rent + (max_rent - min_rent) * 1 / 3
    interval_2 = min_rent + (max_rent - min_rent) * 2 / 3
    
    #Set the colour scheme for the rent data
    color_scale = LinearColormap(
        ['#808080', '#32cd32', '#006400'],
        vmin=min_rent,
        vmax=max_rent,
        index=[min_rent, interval_1, interval_2],
        caption='Average Rent'
    )

    df1 = dfone.copy()
    df2 = dftwo.copy()
    
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum"}).reset_index()
        df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum"}).reset_index()   
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    # Merge both dataframes and fill missing values with 0
    merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong'], how='outer', suffixes=('_df1', '_df2'))
    merged_df.fillna(0, inplace=True)

    # Calculate the difference
    merged_df['Difference'] = merged_df[column_name + '_df2'] - merged_df[column_name + '_df1']
            
    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong"]).agg({"Difference": "sum"}).reset_index()
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference']]




    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron', zoom_control='topright')
    
    # Update the style function to use rent_data
    def style_function(feature):
        OA11CD = feature['properties']['OA11CD']
        rent = rent_data.loc[rent_data.index == OA11CD, AvgRent]

        if rent.empty:
            return {
                'fillColor': 'none',
                'fillOpacity': 0,
                'color': 'none',
                'weight': 0
            }
        else:
            rent_value = rent.values[0]
            return {
                'fillColor': color_scale(rent_value),
                'fillOpacity': np.interp(rent_value, [min_rent, max_rent], [0.1, 1]),
                'color': 'none',
                'weight': 0
            }
    folium.GeoJson(
        geoson,
        name="Fare Zones",
        style_function=style_function
        ).add_to(map_london)



    # Cap line width
    lineFactor = 15/max(abs(plot_df['Difference'])+ .0001) #Add .0001 for 0 division problem

    for _, row in plot_df.iterrows():
        
        #Set colour based on if volume is positive or not
        color = 'green' if row['Difference'] > 0 else 'red'
        # Cap line width
        linewidth = abs(row['Difference']) * lineFactor

        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='1, 15' if row['Difference'] < 0 else None, #Use dashed lines for negative volume
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,        color='transparent',
        fill=True,
        fill_opacity=0.0
    ).add_child(folium.Popup(row['From Station'])).add_to(map_london)
    
    color_scale.add_to(map_london)


    return map_london


Map super groups as a background

In [313]:
def OA_supergroup_map(dfone, dftwo, column_name, OA_data, geoson, mode='total'):
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."

    df1 = dfone.copy()
    df2 = dftwo.copy()
    
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        
        df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum"}).reset_index()
        df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum"}).reset_index()   
        
    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    # Merge both dataframes and fill missing values with 0
    merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong'], how='outer', suffixes=('_df1', '_df2'))
    merged_df.fillna(0, inplace=True)

    # Calculate the difference
    merged_df['Difference'] = merged_df[column_name + '_df2'] - merged_df[column_name + '_df1']
            
    merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong"]).agg({"Difference": "sum"}).reset_index()
    
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference']]


    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron', zoom_control='topright')
    


    # Add choropleth layer for Super Group Names
    folium.GeoJson(
        geoson,
        name="Super Group Name",
        style_function=lambda feature: {

        'fillColor': color_dict.get(OA_data[OA_data['lsoa'] == feature['properties']['LSOA11CD']]['Group'].values[0]),

    'fillOpacity': 0.2,
    'color': None,
    'weight': 1
}



    ).add_to(map_london)

    # Cap line width
    
    lineFactor = 10/max(abs(plot_df['Difference']) + .0001) #0 division problem
    for _, row in plot_df.iterrows():
        
        color = 'green' if row['Difference'] > 0 else 'red'
        
        # Cap line width
        linewidth = abs(row['Difference']) * lineFactor

        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='1, 15' if row['Difference'] < 0 else None,
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,        color='transparent',
        fill=True,
        fill_opacity=0.0
    ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

    # Add a custom legend
    legend_html = '''
    <div style="position: fixed; bottom: 10px; right: 20px; width: 150px; background-color: white; z-index:9999; font-size:14px; padding: 10px; border:2px solid grey;">
    <strong>Super Group</strong><br>
    '''

    super_group_names = OA_data[['Group', 'Group Name']].drop_duplicates().sort_values('Group')

    for _, row in super_group_names.iterrows():
        legend_html += f"<span style='color:{(color_dict.get(row['Group']))}; font-weight: bold;'>{row['Group']}</span> - {row['Group Name']}<br>"

    legend_html += '</div>'



    # Add the legend to the map
    map_london.get_root().html.add_child(folium.Element(legend_html))

    return map_london


Use summary section from MyLondon data to make maps

Select OACol, a column to use as background 

In [249]:
def OA_transSummary_multtype(dfone, column_name, OA_data, OACol, geoson, colours = ['lightblue','green'], mode='total', dftwo = False, col2 = False):
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."

    #Rent_per_m is in pounds not a scale between 0-1, so for consistency convert this column
    if OACol == "Rent_per_m":
        OA_data[OACol] = OA_data[OACol]/max(OA_data[OACol])
    
    trans_data = OA_data[['OA', OACol]].set_index('OA')

    
    cmap = branca.colormap.LinearColormap(colors=colours, index=[0,1],vmin=0,vmax=1)

    # Define the color map
    # Function to map rent values to colors
    def map_to_color(value):
        return colors.rgb2hex(cmap(value))

    #Strings true, dfs error
    df1 = dfone.copy()
    
    #See how many dfs there are
    twodfs = True
    try:
        df2 = dftwo.copy()    
    except:
        twodfs = False
        
    #See how many cols there are
    twocols = bool(col2)
        
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        if twocols:
            df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum", col2: "sum"}).reset_index()

        else:
            df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum"}).reset_index()

        if twodfs:
            df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
                lambda row: pd.Series(
                    [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                    if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                    else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                    index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
                ),
                axis=1
            )

            df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong"]).agg({column_name: "sum"}).reset_index()   

    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        if twodfs:
            df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        if twodfs:
            df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    # Merge both dataframes and fill missing values with 0
    if twodfs:
        merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong'], how='outer', suffixes=('_df1', '_df2'))
            
            # Calculate the difference and set missing routes to df2-0 or 0-df1
        merged_df['Difference'] = merged_df[column_name + '_df2'] - merged_df[column_name + '_df1']
        merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong"]).agg({"Difference": "sum"}).reset_index()

        
    elif twocols:
        merged_df = df1
        merged_df['Difference'] = merged_df[col2] - merged_df[column_name]
    
    else:
        merged_df = df1
        merged_df['Difference'] =  merged_df[column_name]
        
    merged_df.fillna(0, inplace=True)

       
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference']]


    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron', zoom_control='topright')


    
    # Define the style function
    def style_function(feature):
        OA11CD = feature['properties']['OA11CD']
        trans = trans_data.loc[trans_data.index == OA11CD, OACol]
        

        

        trans_value = trans.values[0]
        color = map_to_color(trans_value)  # Map the rent value to a color

        return {
            'fillColor': color,
            'fillOpacity': np.interp(trans_value, [0, 1], [0.1, .9]),
            'color': 'black',
            'weight': 1
        }
        
    folium.GeoJson(
        geoson,
        name="Fare Zones",
        style_function=style_function
        ).add_to(map_london)

    

    # Add PolyLines and CircleMarkers for each row in plot_df
    
    #Cap line width
    lineFactor = 15/max(plot_df['Difference']+ .0001) #Add .0001 for 0 division problem
        
    for _, row in plot_df.iterrows():
        
        #Use red for negative, green for positive 
        color = 'green' if row['Difference'] > 0 else 'red'
        
        #Cap line width
        linewidth = abs(row['Difference']) * lineFactor
        


        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='1, 15' if row['Difference'] < 0 else None,
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,        color='transparent',
        fill=True,
        fill_opacity=0.0
    ).add_child(folium.Popup(row['From Station'])).add_to(map_london)
        



    map_london.add_child(cmap)


    return map_london


In [314]:
#OA level LOAC
def OA_transSummary_multtype_tflcolours(dfone, column_name, OA_data, OACol, geoson, colours = ['lightblue','green'], mode='total', dftwo = False, col2 = False):
    assert mode in ['NIE', 'SOW', 'total'], "Invalid mode. Choose from 'NIE', 'SOW', or 'total'."

    #Rent_per_m is in pounds not a scale between 0-1, so for consistency convert this column
    if OACol == "Rent_per_m":
        OA_data[OACol] = OA_data[OACol]/max(OA_data[OACol])
    
    trans_data = OA_data[['OA', OACol]].set_index('OA')

    cmap = branca.colormap.LinearColormap(colors=colours, index=[0,1],vmin=0,vmax=1)

    # Function to map rent values to colors
    def map_to_color(value):
        return colors.rgb2hex(cmap(value))

    #Strings true, dfs error
    df1 = dfone.copy()
    
    #See how many dfs there are
    twodfs = True
    try:
        df2 = dftwo.copy()    
    except:
        twodfs = False
        
    #See how many cols there are
    twocols = bool(col2)
        
    #Filter the dataframe based on direction
    #If the direction is total, flip stations in North, In, or East directions and add to their compliment
    
    if mode == "total":
        df1[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df1.apply(
            lambda row: pd.Series(
                [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
            ),
            axis=1
        )
        if twocols:
            df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum", col2: "sum"}).reset_index()

        else:
            df1 = df1.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()

        if twodfs:
            df2[['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']] = df2.apply(
                lambda row: pd.Series(
                    [row['To Station'], row['From Station'], row['StopLat'], row['StartLat'], row['StopLong'], row['StartLong']]
                    if row['Dir'] in ['EB', 'IB', 'IR', 'NB']
                    else [row['From Station'], row['To Station'], row['StartLat'], row['StopLat'], row['StartLong'], row['StopLong']],
                    index=['From Station', 'To Station', 'StartLat', 'StopLat', 'StartLong', 'StopLong']
                ),
                axis=1
            )

            df2 = df2.groupby(["From Station", "To Station", "StartLat", "StartLong", "StopLat", "StopLong", "Hex"]).agg({column_name: "sum"}).reset_index()   

    elif mode == 'NIE':
        df1 = df1[df1['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
        if twodfs:
            df2 = df2[df2['Dir'].isin(['EB', 'IB', 'IR', 'NB'])]
    elif mode == 'SOW':
        df1 = df1[df1['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]
        if twodfs:
            df2 = df2[df2['Dir'].isin(['OB', 'OR', 'SB', 'WB'])]

    # Merge both dataframes and fill missing values with 0
    if twodfs:
        merged_df = pd.merge(df1, df2, on=['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', "Hex"], how='outer', suffixes=('_df1', '_df2'))
            
            # Calculate the difference
        merged_df['Difference'] = merged_df[column_name + '_df2'] - merged_df[column_name + '_df1']
        merged_df = merged_df.groupby(["From Station", "To Station", "StartLat","StartLong", "StopLat", "StopLong", "Hex"]).agg({"Difference": "sum"}).reset_index()

        
    elif twocols:
        merged_df = df1
        merged_df['Difference'] = merged_df[col2] - merged_df[column_name]
    
    else:
        merged_df = df1
        merged_df['Difference'] =  merged_df[column_name]
        
    merged_df.fillna(0, inplace=True)

       
    plot_df = merged_df[['From Station', 'To Station', 'StartLat', 'StartLong', 'StopLat', 'StopLong', 'Difference', "Hex"]]


    london_coords = [51.5074, -0.1278]
    map_london = folium.Map(location=london_coords, zoom_start=12, tiles='cartodb positron', zoom_control='topright')


    
    # Define the style function
    def style_function(feature):
        OA11CD = feature['properties']['OA11CD']
        trans = trans_data.loc[trans_data.index == OA11CD, OACol]
        

        

        trans_value = trans.values[0]
        color = map_to_color(trans_value)  # Map the rent value to a color

        return {
            'fillColor': color,
            'fillOpacity': np.interp(trans_value, [0, 1], [0.1, .9]),
            'color': None,
            'weight': 1
        }
        
    folium.GeoJson(
        geoson,
        name="Fare Zones",
        style_function=style_function
        ).add_to(map_london)

    

    # Add PolyLines and CircleMarkers for each row in plot_df
   
    #Cap line width
    lineFactor = 10/max(plot_df['Difference'] + .0001) #Add .0001 for 0 division problem
    for _, row in plot_df.iterrows():
        
        #Use TfL colour scheme
        color = "#" + row.at["Hex"]
        
        #Cap line width
        linewidth = abs(row['Difference']) * lineFactor
        


        folium.PolyLine(
            locations=[[row['StartLat'], row['StartLong']], [row['StopLat'], row['StopLong']]],
            color=color,
            dash_array='1, 15' if row['Difference'] < 0 else None, #Add dashes for negative volume
            weight=linewidth
        ).add_to(map_london)

        # Add station nodes and popup text
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=5,
            color='black',
            fill=True,
            fill_color='black'
        ).add_child(folium.Popup(row['From Station'])).add_to(map_london)

        # Add a larger transparent circle to make it easier to click on the station
        folium.CircleMarker(
            location=[row['StartLat'], row['StartLong']],
            radius=10,        color='transparent',
        fill=True,
        fill_opacity=0.0
    ).add_child(folium.Popup(row['From Station'])).add_to(map_london)
        



    map_london.add_child(cmap)


    return map_london

