In [48]:
import re
import json
import folium
import fileinput
import numpy as np
import pandas as pd
import seaborn as sns
import branca.colormap as cm
from datetime import datetime
from selenium import webdriver
from BindColorMap import BindColormap
from IPython.core.display import display
from folium.plugins import TimestampedGeoJson

In [282]:
def merge_transport_df(file_lines, file_systems, file_stations, file_station_lines, file_cities):
    
    # 1) load dataframes
    cities = pd.read_csv(file_cities)
    lines = pd.read_csv(file_lines).rename(columns = {'id':'line_id', 'name':'line_name', 'url_name':'line_url_name', 'color':'line_color'})
    systems = pd.read_csv(file_systems).rename(columns = {'id':'system_id', 'name':'system_name'}).drop('city_id', axis = 1)
    stations = pd.read_csv(file_stations).rename(columns = {'id':'station_id', 'name':'station_name', 'geometry':'station_geometry',
                                                   'buildstart':'station_buildstart', 'opening':'station_opening',
                                                   'closure':'station_closure'}).drop('city_id', axis = 1)
    station_lines = pd.read_csv(file_station_lines).rename(columns = {'id':'station_lines_id'}).drop(['city_id', 
                                                   'created_at', 'updated_at'], axis = 1)
    
    
    # 2) Fix duplicated city names by adding the state to the city name
    dupl_cities = cities[cities.duplicated(['url_name'], keep = False)]
    for i in  dupl_cities.index.values:
        cities.loc[i, 'url_name'] = cities.loc[i, 'url_name'] + '_' + cities.loc[i, 'country_state']
    cities = cities[['id', 'url_name']].rename(columns = {'id':'city_id', 'url_name':'city_name'})
    
    # 3) Merge dataframes
    lines = pd.merge(lines, cities, on = ['city_id'], how = 'outer')
    tmp1 = pd.merge(lines, systems, on = ['system_id'], how = 'outer')
    tmp2 =  pd.merge(station_lines, stations, on = ['station_id'], how = 'outer')
    transports_df = pd.merge(tmp1, tmp2, on = 'line_id', how = 'outer')
    
    return transports_df

In [368]:
class GeoMap():
    def __init__(self, coord_start = [46.519164, 6.566719]):
        self.geo_map = folium.Map(location = coord_start, control_scale = True, zoom_start = 11)
    
    def map_to_color(self, color_palette, list_of_values, min_range = None, max_range = None):
        if min_range == None:
            min_range = np.min(list_of_values)
        if max_range == None:
            max_range = np.max(list_of_values)
        
        intervals = np.linspace(min_range, max_range, len(color_palette)+1)

        mapped_colors = []
        for i in list_of_values:
            # we have N-1 colors
            for c in range (0, len(intervals) - 1):
                if c == 0 and i < intervals[c]:
                    mapped_colors = np.append(mapped_colors, color_palette[c])
                if c < len(intervals) and i >= intervals[c] and i < intervals[c + 1]:
                    mapped_colors = np.append(mapped_colors, color_palette[c])
                    break
                elif c == len(intervals) - 2 and i >= intervals[c]:
                    mapped_colors = np.append(mapped_colors, color_palette[c])
        return mapped_colors
    
    def generate_gradient_dict(self, color_palette):
        intervals = np.linspace(0, 1, len(color_palette)+1)
        
        # compute center of each interval:
        centers = np.round((intervals[1:] + intervals[:-1]) / 2, 3)
        
        gradient = {}
        for c in range(0, len(color_palette)):
            gradient.update({centers[c] : color_palette[c]})
        return gradient
    
    # displayColorData: a vector of values
    def geojson_layer(self, layer_name, jsonFile, displayColorData = None):
        layer_gjson = folium.FeatureGroup(name = layer_name)
        
        if displayColorData != None:
            max_value_color = np.quantile(displayColorData.to_list, 0.95)
            min_value_color = np.quantile(displayColorData.to_list, 0.05)
            color_palette = sns.color_palette('RdYlGn').as_hex()
            colors = self.map_to_color(color_palette, displayColorData.to_list(), min_range = min_value_color, max_range = max_value_color)
            colormap = cm.StepColormap(colors = color_palette, vmin=min_value_color, vmax=max_value_color,
                                                      caption = layer_name)
            
        print('\n')
        for i, geo_json in enumerate(jsonFile['features']):
            print('geojson_layer construction running... ' + str(int(np.ceil((i/len(jsonFile['features']))*100))) + '%', end='\r')
            
            if displayColorData != None:
                fill_col = colors[i]
                edges_color = 'white'
            else:
                fill_col = 'yellow'
                edges_color = 'blue'
            lj = folium.GeoJson(
                geo_json,
                name='geojson',
                style_function=lambda feature: {
                    'fillColor': fill_col,
                    'color' : edges_color,
                    'weight' : 1,
                    'fillOpacity' : 0.2,
                    }
                )
            popup = folium.Popup(geo_json['properties']['neighbourhood'])
            lj.add_child(popup)
            lj.add_to(layer_gjson)
            #airbnb_map.add_child(gj)
        
        if displayColorData != None:
            return [layer_gjson, colormap]
        else:
            return [layer_gjson, None]
        
        #layer_gjson.add_to(airbnb_map)
        #folium.LayerControl().add_to(airbnb_map)
        
    
    # displayType can be: Density, Points, Circles
    def pandas_layer(self, df, layer_name, displayType = 'Density', densityDataName = None):
        
        layer_pandas = folium.FeatureGroup(name = layer_name);
        
        if densityDataName != None:
            max_value = np.max(df[densityDataName])
            max_value_color = np.quantile(df[densityDataName].tolist(), 0.95)
            min_value_color = np.quantile(df[densityDataName].tolist(), 0.05)
            color_palette = sns.color_palette('RdYlGn').as_hex()
            colors = self.map_to_color(color_palette, df[densityDataName].tolist(), min_range = min_value_color, max_range = max_value_color)
            colormap = cm.StepColormap(colors = color_palette, vmin=min_value_color, vmax=max_value_color,
                                                      caption = layer_name)
        
        print('\n')
        if displayType in ['Points', 'Circles']:
            for index, row in df.iterrows():
                print('pandas_layer construction  running... ' + str(int(np.ceil((index/df.shape[0])*100))) + '%', end='\r')

                if displayType == 'Points' and densityDataName == None:
                    lp = folium.CircleMarker([row['latitude'], row['longitude']],
                                            radius = 2,
                                            color = 'red',
                                            fill_color = 'red')
                    lp.add_to(layer_pandas)
                elif displayType == 'Points' and densityDataName != None:
                    lp = folium.CircleMarker([row['latitude'], row['longitude']],
                                            radius = 2,
                                            color = colors[index-1],
                                            fill_color = str(colors[index]))
                    lp.add_to(layer_pandas)
                elif displayType == 'Circles' and densityDataName != None:
                    lp = folium.CircleMarker([row['latitude'], row['longitude']],
                                            radius = 20 * row[densityDataName]/max_value,
                                            color = colors[index],
                                            fill_color = colors[index])
                    lp.add_to(layer_pandas)
                    
        elif (displayType == 'Density') and (densityDataName != None):
            print('pandas_layer construction  running... ')
            
            points = df[['latitude', 'longitude', densityDataName]]
            points = points.loc[(df[densityDataName] >= min_value_color) & (df[densityDataName] <= max_value_color), :].values
            lp = folium.plugins.HeatMap(points, gradient = self.generate_gradient_dict(color_palette))
            print(self.generate_gradient_dict(color_palette))
            lp.add_to(layer_pandas)
        
        if densityDataName != None:
            return [layer_pandas, colormap]
        else:
            return [layer_pandas, None]
    
    # df columns must contain: [latitude, longitude, icon_type]
    def icon_layer(self, df, layer_name):
        layer_icons = folium.FeatureGroup(name = layer_name);
        
        return [icon_layer, None]
    
    def add_icons(self, df):
        pass
        
    # layer_list is a list of [layer, color_map] or [layer, None] if the colormap is not present
    def add_layer_list(self, layer_list):
        for layer, cmap in layer_list:
            if cmap != None:
                self.geo_map.add_child(layer)
                self.geo_map.add_child(cmap)
                self.geo_map.add_child(BindColormap(layer, cmap))
            else:
                self.geo_map.add_child(layer)
        folium.LayerControl('topleft').add_to(self.geo_map)
            
    def add_tiles(self, tiles_list = ['OpenStreetMap', 'cartodbdark_matter'], tiles_names = ['Street Map', 'Dark Map']):
        # remove the default tile:
        del self.geo_map._children['openstreetmap']
        count = 0
        for tile, name_ in zip(tiles_list, tiles_names):
            # show first added tile when open the map
            if count == 0:
                lt = folium.TileLayer(tiles = tile, name = name_, show = True).add_to(self.geo_map)
            else:
                lt = folium.TileLayer(tiles = tile, name = name_).add_to(self.geo_map)
            count += 1
            
    def add_mini_map(self):
        minimap = folium.plugins.MiniMap()
        self.geo_map.add_child(minimap)
    
    def get_map(self):
        return self.geo_map
    
    def save_map(self, saving_destination_path, file_name):
        self.geo_map.save(saving_destination_path + '/' + file_name + '.html')
        
    def get_non_null_rows(self, df, column_check):
        return df[np.isfinite(df[column_check])].reset_index()
            

In [None]:
# TODO: 
# check circles sizing system
# check density: should sample the data otherwise too slow in execution and too large output files -> too slow in display

In [372]:
input_csv_file = '/Volumes/Disk2/Courses MA3/MA3 - ADA/AIRBNB data/DataSet/2019-09-14_Amsterdam_listings_detailed.csv'
geojson_file   = '/Volumes/Disk2/Courses MA3/MA3 - ADA/AIRBNB data/DataSet/NaT_Amsterdam_neighbourhoods.geojson'
saving_path    = '/Volumes/Disk2/Courses MA3/MA3 - ADA/AIRBNB data/Outputs'

df = pd.read_csv(input_csv_file, low_memory = False);
coord_start = [df.latitude.mean(), df.longitude.mean()]

with open(geojson_file) as f:
    jsonFile = json.load(f)

columns = ['review_scores_rating', 'review_scores_cleanliness', 'review_scores_checkin',
           'review_scores_communication', 'review_scores_location', 'review_scores_value']
columns_names = [w.replace('_', ' ') for w in columns]

# non_null = df[np.isfinite(df['review_scores_rating'])].reset_index()
# non_null = non_null.loc[0:1000, :]

In [396]:
cart = GeoMap(coord_start);
cart.add_tiles()
layers = [cart.geojson_layer('Neighborhood', jsonFile, displayColorData = None)]
for col, col_name in zip(columns, columns_names):
    non_null = df[np.isfinite(df[col])].sample(n = 1000).reset_index()
    layers += [cart.pandas_layer(non_null, col_name, displayType = 'Points', densityDataName = col)]
    
cart.add_mini_map()
cart.add_layer_list(layers)

output = cart.get_map()
cart.save_map(saving_path, 'test')
output




geojson_layer construction running... 96%

pandas_layer construction  running... 100%

pandas_layer construction  running... 100%

pandas_layer construction  running... 100%

pandas_layer construction  running... 100%

pandas_layer construction  running... 100%

pandas_layer construction  running... 100%

In [307]:
file_lines         = '/Volumes/Disk2/Courses MA3/MA3 - ADA/AIRBNB data/Transports/lines.csv'
file_systems       = '/Volumes/Disk2/Courses MA3/MA3 - ADA/AIRBNB data/Transports/systems.csv'
file_stations      = '/Volumes/Disk2/Courses MA3/MA3 - ADA/AIRBNB data/Transports/stations.csv'
file_station_lines = '/Volumes/Disk2/Courses MA3/MA3 - ADA/AIRBNB data/Transports/station_lines.csv'
file_cities        = '/Volumes/Disk2/Courses MA3/MA3 - ADA/AIRBNB data/Transports/cities.csv'


transports_df = merge_transport_df(file_lines, file_systems, file_stations, file_station_lines, file_cities)
# drop closed transportation systems if info exist
transports_df = transports_df.loc[(transports_df.station_closure > 2019) | (transports_df.station_closure.isna())]

transports_amsterdam = transports_df[transports_df.city_name == 'london']
transports_amsterdam.head()






Unnamed: 0,line_id,city_id,line_name,line_url_name,line_color,system_id,transport_mode_id,city_name,system_name,station_lines_id,station_id,station_name,station_geometry,station_buildstart,station_opening,station_closure
5178,118.0,69.0,Waterloo & City Line,118-waterloo-&-city-line,#50e3c2,259.0,4.0,london,London Underground,3712.0,782.0,\n\nBank,POINT(-0.0898129050862336 51.5132922457233),1894.0,1898.0,999999.0
5179,118.0,69.0,Waterloo & City Line,118-waterloo-&-city-line,#50e3c2,259.0,4.0,london,London Underground,3739.0,783.0,,POINT(-0.113046029341803 51.502667678402),1894.0,1898.0,999999.0
5180,115.0,69.0,Northern Line,115-norther-line,#000,259.0,4.0,london,London Underground,3705.0,784.0,,POINT(-0.113300125722532 51.5025623884227),1923.0,1926.0,999999.0
5181,115.0,69.0,Northern Line,115-norther-line,#000,259.0,4.0,london,London Underground,3826.0,842.0,,POINT(-0.147704295554433 51.4528010475682),1923.0,1926.0,999999.0
5182,115.0,69.0,Northern Line,115-norther-line,#000,259.0,4.0,london,London Underground,3831.0,843.0,,POINT(-0.151918266139774 51.4447032610585),1923.0,1926.0,999999.0


In [None]:
display(cities.head())
display(lines.head())  
display(systems.head())
display(stations.head())
display(station_lines.head())