In [1]:
import os
import os.path
from os import path
import time
import datetime
import pytz

import pandas as pd
import numpy as np
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point, LineString
# import geoplot as gplt

import plotnine
from plotnine import *
# set the plotnine figure size
plotnine.options.figure_size = (8, 6)
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
%matplotlib widget

from sodapy import Socrata
import json
import matplotlib.pyplot as plt
import folium
from IPython.core.display import display, HTML
import ipywidgets as wg

import warnings
warnings.filterwarnings('ignore')

# predictive model libraries:

import statsmodels.formula.api as smf
from sklearn.neighbors import KNeighborsRegressor as knn
from sklearn.ensemble import RandomForestRegressor as rf
from sklearn.model_selection import train_test_split, cross_val_score

# PCA - Dimension Reduction
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import DistanceMetric
from sklearn.neighbors import BallTree
# All valid metrics:
# BallTree.valid_metrics

# Nominatim requests
import requests
import urllib.parse

DATASET_ID = {
"CarCrashes": "h9gi-nx95",
"LiveTraffic": "i4gi-tjb9"
}

FIG_WIDTH = 16
FIG_HEIGHT = 9

# sets timezone 
os.environ['TZ'] = "America/New_York"
time.tzset()

# import my own project files 
from Open_Data import * # class definition
from all_functions import * # helper functions
from reg_func import * # regressor optimization functions

In [2]:
def get_pop_estimate(usr_str):
    
    '''
    
    This function uses the Population Estimation Service (PES) [Gridded 
    Population of the World (GPW), v4] to derive the estimated population
    within the boundary coordinates of the address entered by the user
    in string format. 
    
    --- >>> References and Data Sources <<< ---
    
    OpenStreetMap (OSM) - Nominatim
    (c) OpenStreetMap contributors
    
    OpenStreetMap® is open data, licensed under the Open Data Commons Open Database License (ODbL) 
    by the OpenStreetMap Foundation (OSMF). The data is available under the Open Database License.
    https://www.openstreetmap.org/copyright
    
    ---
    
    Center for International Earth Science Information Network - CIESIN - Columbia University. 
    2018. Population Estimation Service, Version 3 (PES-v3). Palisades, NY: NASA Socioeconomic 
    Data and Applications Center (SEDAC). https://doi.org/10.7927/H4DR2SK5. Accessed DAY MONTH YEAR.
    
    https://sedac.ciesin.columbia.edu/data/collection/gpw-v4/population-estimation-service
    
    '''
    
    # URL request to Nominatim (Open Street Map)
    url = 'https://nominatim.openstreetmap.org/search/' +\
    urllib.parse.quote(usr_str) +\
    '?format=json'

    # get the first (most relevant) returned search result
    response = requests.get(url).json(strict = False)[0]
    osm_obj = response
    
    # -----------------------------------------------------------
    # min latitude, max latitude, min longitude, max longitude
    bbox = [float(item) for item in response['boundingbox']]
    
    # drawing the polygon around the input address
    add_polygon = [[bbox[2],bbox[0]], [bbox[3],bbox[0]],
    [bbox[3],bbox[1]], [bbox[2],bbox[1]], [bbox[2],bbox[0]]]
    
    # verify area size of boundary around input address
    temp_loc_x = [item[0] for item in add_polygon]
    temp_loc_y = [item[1] for item in add_polygon]

    temp_loc_pts = gpd.points_from_xy(temp_loc_x, 
                                      temp_loc_y, 
                                      crs = "EPSG:4326")

    temp_loc_geometry = LineString(temp_loc_pts)
    temp_loc_tr_dist = temp_loc_geometry.length
    
    
    # -----------------------------------------------------------
    # a longer periphery, so that data is obtainable
    lbox = bbox
    lbox[0] -= 0.05
    lbox[2] -= 0.05
    lbox[1] += 0.05
    lbox[3] += 0.05
    
    # make the `longer` polygon
    long_polygon = [[lbox[2],lbox[0]], [lbox[3],lbox[0]],
    [lbox[3],lbox[1]], [lbox[2],lbox[1]], [lbox[2],lbox[0]]]
    # print(long_polygon)
    
    # LineString obj for the `longer` area
    
    temp_long_x = [item[0] for item in long_polygon]
    temp_long_y = [item[1] for item in long_polygon]

    temp_long_pts = gpd.points_from_xy(temp_long_x, 
                                       temp_long_y, 
                                       crs = "EPSG:4326")

    temp_long_geometry = LineString(temp_long_pts)
    temp_long_tr_dist = temp_long_geometry.length
    
    # calculate the short-to-long ratio
    # ratio of length, so square it
    loc_long_ratio = (temp_loc_tr_dist / temp_long_tr_dist) ** 1.5
    # print(loc_long_ratio)

    # the server to query the population estimation data(base):
    quest_server = "https://sedac.ciesin.columbia.edu/arcgis/rest/services/sedac/pesv3Broker/GPServer/pesv3Broker/execute?"

    # temporary python dictionary,
    # functioning as form (query) input fields/parameters
    temp = {"Input_Data": {
        "polygon": long_polygon,
        "variables": ["gpw-v4-population-count-rev10_2020",
                     "gpw-v4-land-water-area-rev10_landareakm"],
        "statistics": ["COUNT", "SUM", "MEAN"],
        "requestID": "123456789"}, "f": "pjson"}

    url = quest_server + urllib.parse.urlencode(temp)
    # not strict json parsing, to allow for control characters
    response = requests.get(url).json(strict = False)
    
    
    # -----------------------------------------------------------
    # obtain estimates of `longer` polygon, 
    # extrapolate to derive estimates of the shorter (actual) region
    
    long_pop = int(response['results'][0]['value']['estimates']\
    ['gpw-v4-population-count-rev10_2020']['SUM'])
    long_area = int(response['results'][0]['value']['estimates']\
    ['gpw-v4-land-water-area-rev10_landareakm']['SUM'])
    
    short_pop = loc_long_ratio * long_pop
    short_area = loc_long_ratio * long_area
    
    return {"population(persons)": short_pop,
           "area(km^2)": short_area,
           "OSM_info": osm_obj}


In [3]:
temp_long_geometry = LineString([[-74.0482334, 40.6783249], [-73.9481334, 40.6783249], [-73.9481334, 40.7784249], [-74.0482334, 40.7784249], [-74.0482334, 40.6783249]])
temp_long_tr_dist = temp_long_geometry.length

In [4]:
list(temp_long_geometry.coords)



[(-74.0482334, 40.6783249),
 (-73.9481334, 40.6783249),
 (-73.9481334, 40.7784249),
 (-74.0482334, 40.7784249),
 (-74.0482334, 40.6783249)]

In [5]:
get_pop_estimate("(40.7171769, -74.0020572529242)"[1:-1])

{'population(persons)': 610.184834302248,
 'area(km^2)': 0.03137386561153701,
 'OSM_info': {'place_id': 163093236,
  'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
  'osm_type': 'way',
  'osm_id': 249842305,
  'boundingbox': ['40.7168923', '40.7174582', '-74.0024489', '-74.0017503'],
  'lat': '40.7171769',
  'lon': '-74.0020572529242',
  'display_name': 'Lafayette Hall, 80, Lafayette Street, Lower Manhattan, Chinatown, Manhattan, New York County, New York, 10013, United States',
  'class': 'building',
  'type': 'dormitory',
  'importance': 0.001}}

In [6]:
folium.Icon?

In [7]:
def comp_model2(all_links, make_pred, predictor_obj_1, predictor_obj_2,
Borough, Times, Modification, the_obj,
Link_Name, start_str, end_str, all_address):

    '''
    Traffic Light Color Scheme (hex code):
    Red: #BB1E10
    Green: #33A532
    Yellow: #F7B500
    '''

    # Modification:
    # options = ["None", "Construction", "New Road", "New Multi-Road"]

    # kv for focus of map (depends on borough)
    focus_of_map = {"Bronx": [40.8448, -73.8648],
                    "Brooklyn": [40.6782, -73.9442],
                    "Manhattan": [40.7831, -73.9712],
                    "Queens": [40.7282, -73.7949],
                    "Staten Island": [40.5795, -74.1502]
                   }

    # define temporary df to operate on
    temp_exp_df = the_obj.pol_df.copy()
    temp_exp_df = temp_exp_df[["names", 'names_code', 'boro', 'boro_code', 'geometry', 'tr_dist']].drop_duplicates()


    # ------>>>>>> Function Based on User's Choice of Modification <<<<<<------

    if Modification == "New Road":

        # try to get coordinates from user inputs
        try:
            start_pt = get_coor(start_str)
            end_pt = get_coor(end_str)
        except:
            print("Invalid starting/ending address!")
            return None

        # if can be found:
        # WE NEED:
        # - 'names_code' (based on proximity)
        # - 'boro_code' (based on address)
        # - 'tr_dist' (can be calculated based on start & end pts)
        # - 'time_float' (user input)

        # determine borough:

        st_borough = det_boro(start_pt[2])
        end_borough = det_boro(end_pt[2])

        # for now: start and end should be in the same borough

        if st_borough != end_borough:
            # print(st_borough, end_borough, st_borough == end_borough, sep = "\n")
            print("The starting and ending points must be in the same borough!")
            return None

        # name of corresponding borough
        correspond_boro = st_borough

        # determine travel distance
        # and construct Linestring object

        temp_pts = gpd.points_from_xy([start_pt[1], end_pt[1]],
                                      [start_pt[0], end_pt[0]],
                                      crs = "EPSG:4326")

        this_geometry = LineString(temp_pts)
        this_tr_dist = this_geometry.length

        # approximate - to which existing streets this new street
        # behaves most similarly to (KNN)

        # use 'geometry' ?
        this_appr_name_code = knn().fit(X = temp_exp_df[['boro_code', 'tr_dist']],
                                        y = temp_exp_df['names_code']).\
        predict(X = [[the_obj.df_boro_kv[correspond_boro], this_tr_dist]])


        # add entry of this new road into dataframe
        temp_exp_df = temp_exp_df.append({"names": "***Proposed New Road***",
                                          'names_code': this_appr_name_code,
                                          'boro': correspond_boro,
                                          'boro_code': the_obj.df_boro_kv[correspond_boro],
                                          'geometry': this_geometry,
                                          'tr_dist': this_tr_dist
                                         }
                                         , ignore_index = True)

    elif Modification == "New Multi-Road":

        add_ls = []

        for address_line in all_address.split("\n"):

            # get rid of extra white spaces/blank characters
            address_line = address_line.strip()

            # try to get coordinates from user inputs
            try:
                pt_coord = get_coor(address_line)
            except:
                print("Invalid point address!")
                return None

            add_ls.append(pt_coord)

        # if can be found:
        # WE NEED:
        # - 'names_code' (based on proximity)
        # - 'boro_code' (based on address)
        # - 'tr_dist' (can be calculated based on start & end pts)
        # - 'time_float' (user input)

        # determine borough:

        boro_ls = [det_boro(pt[2]) for pt in add_ls]

        # for now: all points should be in the same borough

        if not all(item == boro_ls[0] for item in boro_ls):
            print("All point addresses must be in the same borough!")
            print(boro_ls)
            return None

        # name of corresponding borough
        correspond_boro = boro_ls[0]

        # determine travel distance
        # and construct Linestring object

        temp_pts = gpd.points_from_xy([item[1] for item in add_ls],
        [item[0] for item in add_ls],
        crs = "EPSG:4326")

        this_geometry = LineString(temp_pts)
        this_tr_dist = this_geometry.length

        # approximate - to which existing streets this new street
        # behaves most similarly to (KNN)

        # use 'geometry' ?
        this_appr_name_code = knn().fit(X = temp_exp_df[['boro_code', 'tr_dist']],
                                        y = temp_exp_df['names_code']).\
        predict(X = [[the_obj.df_boro_kv[correspond_boro], this_tr_dist]])


        # add entry of this new road into dataframe
        temp_exp_df = temp_exp_df.append({"names": "***Proposed New Road***",
                                          'names_code': this_appr_name_code,
                                          'boro': correspond_boro,
                                          'boro_code': the_obj.df_boro_kv[correspond_boro],
                                          'geometry': this_geometry,
                                          'tr_dist': this_tr_dist
                                         }
                                         , ignore_index = True)


    elif Modification == "Construction":

        # verifies that link name is available
        if Link_Name not in all_links:
            print("Please input your selections...")
            return None

        # if road blocked: drop the cat_code correponding to the name
        road_blocked = Link_Name # TB user input

        correspond_boro = temp_exp_df[temp_exp_df['names_code'] ==\
        the_obj.df_names_kv[road_blocked]]['boro'].unique().to_list()[0]

        # make a temporary df for analysis
        # temp_exp_df = temp_exp_df[temp_exp_df['names_code'] != the_obj.df_names_kv[road_blocked]]

    else:
        # Modification == "None"

        # verifies that link name is available
        if Link_Name not in all_links:
            print("Please input your selections...")
            return None

        correspond_boro = temp_exp_df[temp_exp_df['names_code'] ==\
        the_obj.df_names_kv[Link_Name]]['boro'].unique().to_list()[0]

    # ------>>>>>> Operations Below to Display DF on Map <<<<<<------

    # make the relevant predictions
    make_pred(Times, temp_exp_df, predictor_obj_1, predictor_obj_2)

    # extract sub-df with relevant borough name
    this_df = temp_exp_df[temp_exp_df['boro_code'] == the_obj.df_boro_kv[correspond_boro]]

    this_f_map = folium.Map(focus_of_map[correspond_boro],
                            tiles = "Stamen Toner",
                            zoom_start = 12)

    # firstly, add (display) all the linestrings for the corresponding borough

    l_idx = 0

    for l_str in this_df["geometry"]:

        ave_speed = this_df['pred_rou_speed'].apply(float).mean()

        this_speed = float(this_df["pred_rou_speed"].tolist()[l_idx])

        # the modified road here
        if Modification == "Construction":
            if (this_df["names_code"].tolist()[l_idx] == the_obj.df_names_kv[road_blocked]):
                this_color = '#F7B500'
            # for other unimpacted roads
            elif this_speed > ave_speed:
                # faster than **average** speed
                this_color = '#33A532'
            else:
                this_color = '#BB1E10'

        elif Modification in ["New Road", "New Multi-Road"]:
            if (this_df["names_code"].tolist()[l_idx] == this_appr_name_code):
                
                this_color = '#F7B500'
                
                # ethical info about this new road proposal
                
                max_count = len(list(l_str.coords))

                f = wg.IntProgress(min = 0, 
                                   max = max_count) # instantiate the bar
                display(f) # display the bar
                f.description = 'Loading:'
                count = 0
                
                for rd_pt in list(l_str.coords):
                    
                    # print([rd_pt[1], rd_pt[0]])
                    
                    # use coordinates to get population estimate
                    temp_pt_res = get_pop_estimate(str(rd_pt[::-1])[1:-1])
                    
                    # progress bar
                    
                    f.description = temp_pt_res['OSM_info']['display_name'].\
                    split(correspond_boro)[0].strip()[:-1]
                    
                    ethics = '''
                    <center>
                    <p style="color:{html_c};">
                    <b>NEW ROAD</b>
                    </p>
                    </center>
                    <hr>
                    <center>
                    <p><b>Date:</b> {date_rec} <b>|</b> <b>Time:</b> {t_rec}
                    </p>
                    </center>
                    <hr>
                    <p>This address point of the <b>new road</b> that you have proposed is:
                    </p>
                    <ul>
                        <li>Of the full address name: <b>{addr}</b></li>
                        <li>With <b>latitude {lat}</b> and <b>longitude {long}</b></li>
                        <li>Of the OSM class <b>{which_class}</b></li>
                        <li>Of the OSM type <b>{which_type}</b></li>
                        <li>Of the OSM importance <b>{which_imp}</b></li>
                    </ul>
                    According to OpenStreetMap, 
                    Data (c) OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright
                    <hr>
                    <p>
                    Also, a new road construction will negatively impact: 
                    </p>
                    <ul>
                        <li><b>The lives of {ppl:.0f} people</b></li>
                        <li><b>An area of {area:.4f} acres</b></li>
                    </ul>
                    <hr>
                    <p><b>Predicted Speed at This Time:</b> {speed:.2f} mph
                    </p>
                    <p><b>Predicted Travel Time:</b> {tr_t:.2f} minutes.
                    </p>
                    <hr>
                    <p><b>TrafficLink Name:</b> {lk_nm}
                    </p>
                    <p><b>Borough:</b> {bor}
                    </p>
                    '''.format(lk_nm = this_df["names"].tolist()[l_idx],
                       bor = correspond_boro,
                       date_rec = this_df["time_s"].tolist()[l_idx].split("T")[0],
                       t_rec = this_df["time_s"].tolist()[l_idx].split("T")[1],
                       tr_t = float(this_df["pred_tr_time"].tolist()[l_idx]) / 60,
                       speed = this_speed,
                       html_c = this_color,
                        which_class = temp_pt_res['OSM_info']['class'],
                        which_type = temp_pt_res['OSM_info']['type'],
                        which_imp = temp_pt_res['OSM_info']['importance'],
                        ppl = temp_pt_res['population(persons)'],
                        addr = temp_pt_res['OSM_info']['display_name'].\
                               split(correspond_boro)[0].strip()[:-1],
                        lat = temp_pt_res['OSM_info']['lat'],
                        long = temp_pt_res['OSM_info']['lon'],
                        area = temp_pt_res['area(km^2)'])
                    
                    temp_iframe = folium.IFrame(ethics)
                    msg_popup = folium.Popup(temp_iframe,
                                             min_width=300,
                                             max_width=300,
                                             min_height=500,
                                             max_height=500)

                    folium.Marker(
                        location = [rd_pt[1], rd_pt[0]],
                        popup = msg_popup,
                        icon = folium.Icon(color="red", prefix='fa', icon="exclamation-triangle"),
                        tooltip = "About this proposed new road location!"
                    ).add_to(this_f_map)
                    
                    # signal to increment the progress bar
                    time.sleep(.1)
                    count += 1
                    f.value += 1 
                

            # for other unimpacted roads
            elif this_speed > ave_speed:
                # faster than **average** speed
                this_color = '#33A532'
            else:
                this_color = '#BB1E10'

        else:
            if this_speed > ave_speed:
                # faster than **average** speed
                this_color = '#33A532'
            else:
                this_color = '#BB1E10'

        folium.Choropleth(
            l_str,
            line_weight=8,
            line_color=this_color,
            key_on='names'
        ).add_to(this_f_map)

        l_idx += 1

    # then, add the meta-info, in the form of pop-up markers,
    # at the starting point of each recorded road

    row_idx = 0

    # for the starting point in every geometry (recorded road)
    for start_pt in this_df["geometry"].apply(lambda x: list(list(x.coords)[0])[::-1]):

        ave_speed = this_df['pred_rou_speed'].apply(float).mean()

        this_speed = float(this_df["pred_rou_speed"].tolist()[row_idx])

        constr_rd = False
        new_rd = False
        travel_color = ""
        # the modified road here
        if Modification == "Construction":
            if (this_df["names_code"].tolist()[row_idx] == the_obj.df_names_kv[road_blocked]):
                this_color = '#F7B500'
                constr_rd = True
                travel_color = "Yellow"

            elif this_speed > ave_speed:
                # faster than **average** speed
                this_color = '#33A532'
                travel_color = "Green"

            else:
                this_color = '#BB1E10'
                travel_color = "Red"

        elif Modification in ["New Road", "New Multi-Road"]:
            if (this_df["names_code"].tolist()[row_idx] == this_appr_name_code):
                this_color = '#F7B500'
                new_rd = True
                travel_color = "Yellow"

            elif this_speed > ave_speed:
                # faster than **average** speed
                this_color = '#33A532'
                travel_color = "Green"

            else:
                this_color = '#BB1E10'
                travel_color = "Red"

        else:
            if this_speed > ave_speed:
                # faster than **average** speed
                this_color = '#33A532'
                travel_color = "Green"

            else:
                this_color = '#BB1E10'
                travel_color = "Red"

        html_color = this_color

        if constr_rd:
            text_message_short = '''
            <center>
            <p style="color:{html_c};">
            <b>UNDER CONSTRUCTION</b>
            </p>
            </center>
            <hr>
            <center>
            <p><b>Date:</b> {date_rec} <b>|</b> <b>Time:</b> {t_rec}
            </p>
            </center>
            <hr>
            <p>You have chosen this road to be <b>under construction</b>,
            so it is not in operation.
            </p>
            <p><b>Historical Average Speed:</b> {ave_s:.2f} mph
            </p>
            <hr>
            <p><b>TrafficLink Name:</b> {lk_nm}
            </p>
            <p><b>Borough:</b> {bor}
            </p>
            '''.format(lk_nm = this_df["names"].tolist()[row_idx],
                       bor = correspond_boro,
                       ave_s = ave_speed,
                       date_rec = this_df["time_s"].tolist()[row_idx].split("T")[0],
                       t_rec = this_df["time_s"].tolist()[row_idx].split("T")[1],
                       html_c = html_color)

        elif new_rd:

            text_message_short = '''
            <hr>
            <center>
            <p style="color:{html_c};">
            <b>NEW ROAD</b>
            </p>
            </center>
            <hr>
            <center>
            <p><b>Date:</b> {date_rec} <b>|</b> <b>Time:</b> {t_rec}
            </p>
            </center>
            <hr>
            <p>This is a <b>new road</b> that you have proposed,
            thus yet to be constructed.
            </p>
            <hr>
            <p><b>Predicted Speed at This Time:</b> {speed:.2f} mph
            </p>
            <p><b>Predicted Travel Time:</b> {tr_t:.2f} minutes.
            </p>
            <hr>
            <p><b>TrafficLink Name:</b> {lk_nm}
            </p>
            <p><b>Borough:</b> {bor}
            </p>
            <hr>
            '''.format(lk_nm = this_df["names"].tolist()[row_idx],
                       bor = correspond_boro,
                       date_rec = this_df["time_s"].tolist()[row_idx].split("T")[0],
                       t_rec = this_df["time_s"].tolist()[row_idx].split("T")[1],
                       tr_t = float(this_df["pred_tr_time"].tolist()[row_idx]) / 60,
                       speed = this_speed,
                       html_c = html_color)
            
            display(HTML(text_message_short))
            break

        else:
            text_message_short = '''
            <center>
            <p style="color:{html_c};">
            <b>TRAVEL COLOR: {clr}</b>
            </p>
            </center>
            <hr>
            <center>
            <p><b>Date:</b> {date_rec} <b>|</b> <b>Time:</b> {t_rec}
            </p>
            </center>
            <hr>
            <p><b>Speed at This Time:</b> {speed:.2f} mph
            </p>
            <p><b>Travel Time:</b> {tr_t:.2f} minutes.
            </p>
            <p><b>Average Speed:</b> {ave_s:.2f} mph
            </p>
            <hr>
            <p><b>TrafficLink Name:</b> {lk_nm}
            </p>
            <p><b>Borough:</b> {bor}
            </p>
            '''.format(lk_nm = this_df["names"].tolist()[row_idx],
                       bor = correspond_boro,
                       date_rec = this_df["time_s"].tolist()[row_idx].split("T")[0],
                       t_rec = this_df["time_s"].tolist()[row_idx].split("T")[1],
                       tr_t = float(this_df["pred_tr_time"].tolist()[row_idx]) / 60,
                       speed = this_speed,
                       ave_s = ave_speed,
                       html_c = html_color,
                       clr = travel_color.upper())

        # decide which message to use here:
        temp_iframe = folium.IFrame(text_message_short)
        msg_popup = folium.Popup(temp_iframe,
                                 min_width=300,
                                 max_width=300,
                                 min_height=500,
                                 max_height=500)

        folium.Marker(
            location = start_pt,
            popup = msg_popup,
            icon = folium.Icon(color="blue", icon="info-sign"),
            tooltip = "Click here to see more meta-info about this starting location!"
        ).add_to(this_f_map)

        row_idx += 1

    print("Proposed times is:", Times.strftime("Date: %Y-%m-%d, Time: %H:%M:%S"))
    print("Proposed modification is:", Modification)

    display(this_f_map)
    # return this_f_map

    #this_f_map.save("{}_map.html".format(Link_Name))

In [8]:
# MyCrashes = Crashes("CarCrashes", 10000)
# MyCrashes.static_map()

MyTraffic = RTraffic("LiveTraffic", 10000)
MyTraffic.get_polylines()
# MyTraffic.display_folium()

all_links = list(MyTraffic.pol_df["names"].unique())
all_boros = ["Bronx", "Brooklyn", "Manhattan", "Queens", "Staten Island"]

### Descriptive Visualization Model

In [9]:
link_widget = wg.Dropdown()
times_widget = wg.Dropdown()
boros_widget = wg.Dropdown(options = all_boros)

# observe necessary updates (selection set contingent upon choice of another field)
# Define a function that updates the content of link based on what we select for boros
def update1(*args):
    link_widget.options = sorted(list(MyTraffic.pol_df[MyTraffic.pol_df["boro"].apply(str.lower) \
                                                       == boros_widget.value.lower()]['names'].unique()))
boros_widget.observe(update1)

# Define a function that updates the content of times based on what we select for link
def update(*args):
    times_widget.options = sorted(list(MyTraffic.pol_df[MyTraffic.pol_df["names"] \
                                                        == link_widget.value]['time_s']))

link_widget.observe(update)

# NOTE: use wg.fixed to make an argument not subject to user input
wg.interact(show_map,
            all_links = wg.fixed(all_links),
            Borough = boros_widget,
            Link_Name = link_widget,
            Times = times_widget,
            Modification = ["Construction", "One-Way", "Block", "New Road", "..."],
            dataset = wg.fixed(MyTraffic.pol_df));

interactive(children=(Dropdown(description='Borough', options=('Bronx', 'Brooklyn', 'Manhattan', 'Queens', 'St…

### Predictive (User-Interactive) Visualization Model

In [10]:
# make street names and boroughs categorical data
# and extract categorical code (for predictions)

MyTraffic.pol_df.names = pd.Categorical(MyTraffic.pol_df.names)
MyTraffic.pol_df['names_code'] = MyTraffic.pol_df.names.cat.codes

MyTraffic.pol_df.boro = pd.Categorical(MyTraffic.pol_df.boro)
MyTraffic.pol_df['boro_code'] = MyTraffic.pol_df.boro.cat.codes


# make the speed and time col vars numeric (apply float)

MyTraffic.pol_df["rou_speed"] = MyTraffic.pol_df["rou_speed"].apply(float)
MyTraffic.pol_df["tr_time"] = MyTraffic.pol_df["tr_time"].apply(float)


# convert time (timestamp) to a np.float64 object

MyTraffic.pol_df["time_float"] = MyTraffic.pol_df["time_s"].apply(lambda x: np.datetime64(x).astype("float"))

# NOTE: this_time = np.datetime64('2018-04-01T15:30:00').astype("float")
# revert_date = np.datetime64(datetime.datetime.utcfromtimestamp(this_time))

# Alt: convert time (timestamp) to a Pandas Datetime object
# MyTraffic.pol_df["time_s"] = pd.to_datetime(MyTraffic.pol_df["time_s"], format = "%Y-%m-%dT%H:%M:%S")

# how to parse string into datetime object:
# https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes


# calculate the distance (travel distance) for each recorded street (one end point to another)

MyTraffic.pol_df["tr_dist"] = MyTraffic.pol_df["geometry"].apply(lambda x: float(x.length))


# drop all recorded speed of 0 mile per hour or a travel time of 0 seconds
# (invalid observation - based on realistic life intuition)
# WHAT IS MISSING (EXCLUDED DUE TO INVALID OBS)

MyTraffic.pol_df = MyTraffic.pol_df[MyTraffic.pol_df["rou_speed"] != 0]
MyTraffic.pol_df = MyTraffic.pol_df[MyTraffic.pol_df["tr_time"] != 0]

In [11]:
# MyTraffic.pol_df

In [12]:
# KNN Second model (trying to predict travel time)

In [13]:
opt_knn_fit_model_2 = knn(n_neighbors = 7).fit(X = MyTraffic.pol_df[['names_code', 'boro_code', 
                                                                     'time_float', 'tr_dist']],
y = MyTraffic.pol_df['tr_time'])

# create the column of predicted KNN values
MyTraffic.pol_df['opt_pred_knn_2'] = opt_knn_fit_model_2.predict(MyTraffic.pol_df[['names_code', 'boro_code',
                                                                                   'time_float', 'tr_dist']])

# obtain score of fit
opt_knn_fit_model_2.score(X = MyTraffic.pol_df[['names_code', 'boro_code', 'time_float', 'tr_dist']], 
                  y = MyTraffic.pol_df['tr_time'])

0.2080880195280952

In [14]:
# KNN First model (trying to predict travel speed)

In [15]:
# if actual data is used (df = 1)
opt_knn_fit_model_1 = knn(n_neighbors = 7).fit(X = MyTraffic.pol_df[['names_code', 'boro_code', 
                                                                   'time_float', 'tr_time', 'tr_dist']],
y = MyTraffic.pol_df['rou_speed'])

# create the column of predicted KNN values
MyTraffic.pol_df['opt_pred_knn_1'] = opt_knn_fit_model_1.predict(MyTraffic.pol_df[['names_code', 'boro_code', 
                                                                   'time_float', 'tr_time', 'tr_dist']])

# obtain score of fit
opt_knn_fit_model_1.score(X = MyTraffic.pol_df[['names_code', 'boro_code', 'time_float', 'tr_time', 'tr_dist']], 
                  y = MyTraffic.pol_df['rou_speed'])

0.6230941642206309

In [16]:
# if predicted data is used (df = 2)
opt_knn_fit_model_1 = knn(n_neighbors = 7).fit(X = MyTraffic.pol_df[['names_code', 'boro_code', 
                                                                   'time_float', 'opt_pred_knn_2', 'tr_dist']],
y = MyTraffic.pol_df['rou_speed'])

# create the column of predicted KNN values
MyTraffic.pol_df['opt_pred_knn_1_df2'] = opt_knn_fit_model_1.predict(MyTraffic.pol_df[['names_code', 'boro_code', 
                                                                   'time_float', 'opt_pred_knn_2', 'tr_dist']])

# obtain score of fit
opt_knn_fit_model_1.score(X = MyTraffic.pol_df[['names_code', 'boro_code', 'time_float', 'opt_pred_knn_2', 'tr_dist']], 
                  y = MyTraffic.pol_df['rou_speed'])

0.3236902505432734

### The First Full-Scale Simulation Based on KNN
Based on the ten-thousand data point sample. Not going with the full fifty million observations... yet... 

In [17]:
# kv maps to correspond categorical codes with labels 

MyTraffic.df_names_kv = dict(zip(MyTraffic.pol_df.names.unique().tolist(),
                                 MyTraffic.pol_df['names_code'].unique().tolist()))

MyTraffic.df_names_kv_rev = dict(zip(MyTraffic.pol_df['names_code'].unique().tolist(),
                                     MyTraffic.pol_df.names.unique().tolist()))

MyTraffic.df_boro_kv = dict(zip(MyTraffic.pol_df.boro.unique().tolist(),
                                MyTraffic.pol_df['boro_code'].unique().tolist()))


In [18]:
def make_pred(usr_time, some_df, predictor_obj_1, predictor_obj_2):
    # convert datetime obj to str and float
    time_float = np.datetime64(usr_time).astype("float")
    time_str = usr_time.strftime("%Y-%m-%dT%H:%M:%S")
    
    # add value to df cols 
    some_df['time_float'] = time_float // 1000
    some_df['time_s'] = time_str
    
    # first predictor object, to derive travel time
    some_df['pred_tr_time'] = predictor_obj_1.predict(some_df[['names_code', 'boro_code',
                                                                   'time_float', 'tr_dist']])
    
    # second predictor object, to derive travel speed
    some_df['pred_rou_speed'] = predictor_obj_2.predict(some_df[['names_code', 'boro_code',
                                                                     'time_float', 'pred_tr_time', 'tr_dist']])
    

In [19]:
link_widget_2 = wg.Dropdown()
boros_widget_2 = wg.Dropdown(options = all_boros)

min_time = pytz.timezone('America/New_York').localize(sorted([pd.to_datetime(item) for item in list(MyTraffic.pol_df['time_s'].unique())])[0])
max_time = pytz.timezone('America/New_York').localize(sorted([pd.to_datetime(item) for item in list(MyTraffic.pol_df['time_s'].unique())])[-1])

# Define a function that updates the content of link based on what we select for boros
def update2(*args):
    link_widget_2.options = sorted(list(MyTraffic.pol_df[MyTraffic.pol_df["boro"].apply(str.lower) == boros_widget_2.value.lower()]['names'].unique()))
boros_widget_2.observe(update2)

wg.interact(show_modify_map,
            all_links = wg.fixed(all_links),
            make_pred = wg.fixed(make_pred),
            predictor_obj_1 = wg.fixed(opt_knn_fit_model_2),
            # 2nd optimal KNN model prediction (to derive travel time)
            predictor_obj_2 = wg.fixed(opt_knn_fit_model_1), 
            # 1st optimal KNN model prediction (to derive travel speed)
            Borough = boros_widget_2,
            Link_Name = link_widget_2,
            Times = wg.DatetimePicker(
                value = min_time,
                min = min_time,
                max = max_time,
                description='Pick a Time',
                disabled=False),
            Modification = ["Construction"],
            the_obj = wg.fixed(MyTraffic)
           );

AttributeError: module 'ipywidgets' has no attribute 'DatetimePicker'

In [None]:
MyTraffic.pol_df

In [None]:
MyTraffic.pol_df['pred_rou_speed'] = opt_knn_fit_model_1.predict(MyTraffic.pol_df[['names_code', 'boro_code',
                                                                     'time_float', 'opt_pred_knn_2', 'tr_dist']])
    

In [None]:
(ggplot(MyTraffic.pol_df[['rou_speed', 'pred_rou_speed']], 
        aes(x = 'rou_speed', y = 'pred_rou_speed'))
+ geom_point(color = 'green', alpha = 0.5)
+ geom_smooth(aes(x = 'rou_speed', y = 'pred_rou_speed'), color = 'darkred', size = 1.5)
+ geom_abline(linetype = 'dashed', color = 'blue', size = 1.5)
+ xlab("Actual Travel Speed")
+ ylab("Predicted Travel Speed (Optimal K = 7)")
+ ggtitle("KNN: Actual Travel Speed vs. Optimal Prediction (K = 7)")
+ theme_bw())

### New Road Construction (Proposal)

Traffic Light Color Scheme (hex code):
- Red: `#BB1E10`
- Green: `#33A532`
- Yellow: `#F7B500`

In [None]:
get_coor("80 Lafayette Street, New York, NY")

In [None]:
get_coor("Washington Square Park, New York, NY")

In [None]:
# wg.ToggleButtons?

In [None]:
link_widget_3 = wg.Dropdown(description = 'Traffic Link name:')

boros_widget_3 = wg.ToggleButtons(
    options = all_boros,
    description = 'Choose a borough:',
    value = None,
    disabled = False,
    button_style = '', # 'success', 'info', 'warning', 'danger' or ''
    icons = ['road'] * 5
)

modif = wg.ToggleButtons(
    options = ["None", "Construction", "New Road", "New Multi-Road"],
    description = 'Choose a modification proposal:',
    value = None,
    disabled = False,
    button_style = '', # 'success', 'info', 'warning', 'danger' or ''
    icons = ['cogs'] * 5
)

st_input = wg.Text(
    value = '80 Lafayette Street, New York, NY',
    placeholder = 'Enter starting point address',
    description = 'Starting Point:',
    disabled = False
)

end_input = wg.Text(
    value = 'Washington Square Park, New York, NY',
    placeholder = 'Enter ending point address',
    description = 'Ending Point:',
    disabled = False
)

addresses = wg.Textarea(
    placeholder = 'Type all point addresses, one per line (separate by enter/return)',
    description = 'Multi-Road:',
    disabled = False
)

min_time = pytz.timezone('America/New_York').localize(sorted([pd.to_datetime(item) for item in list(MyTraffic.pol_df['time_s'].unique())])[0])
max_time = pytz.timezone('America/New_York').localize(sorted([pd.to_datetime(item) for item in list(MyTraffic.pol_df['time_s'].unique())])[-1])

# Define a function that updates the content of link based on what we select for boros
def update3(*args):
    if boros_widget_3.value != None:
        link_widget_3.options = sorted(list(MyTraffic.pol_df[MyTraffic.pol_df["boro"].apply(str.lower) == boros_widget_3.value.lower()]['names'].unique()))

boros_widget_3.observe(update3)
link_widget_3.observe(update3)

def irrelavance(*args):
    if modif.value in ["None", "Construction"]:
        boros_widget_3.disabled = False # can choose boro
        link_widget_3.disabled = False # and choose street link based on boro
        st_input.disabled = True
        end_input.disabled = True
        addresses.disabled = True 
        
    elif modif.value == "New Road":
        boros_widget_3.disabled = True
        link_widget_3.disabled = True
        # only start-end bi-point input allowed
        st_input.disabled = False
        end_input.disabled = False
        addresses.disabled = True 
        
    elif modif.value == "New Multi-Road":
        boros_widget_3.disabled = True
        link_widget_3.disabled = True
        st_input.disabled = True
        end_input.disabled = True
        # only multi-line address input allowed
        addresses.disabled = False 
        
modif.observe(irrelavance)
boros_widget_3.observe(irrelavance)
link_widget_3.observe(irrelavance)
st_input.observe(irrelavance)
end_input.observe(irrelavance)
addresses.observe(irrelavance)
        
# new_road(all_links, make_pred, predictor_obj_1, predictor_obj_2, 
#             Borough, Times, Modification, the_obj, 
#             Link_Name = "", start_str = "", end_str = ""):

In [None]:
this_interact_manual = wg.interact_manual.options(manual_name = "Run Simulation!")

this_interact_manual(comp_model2,
            all_links = wg.fixed(all_links),
            make_pred = wg.fixed(make_pred),
            predictor_obj_1 = wg.fixed(opt_knn_fit_model_2),
            # 2nd optimal KNN model prediction (to derive travel time)
            predictor_obj_2 = wg.fixed(opt_knn_fit_model_1), 
            # 1st optimal KNN model prediction (to derive travel speed)
            Borough = boros_widget_3,
            Link_Name = link_widget_3,
            Times = wg.DatetimePicker(
                value = min_time,
                min = min_time,
                max = max_time,
                description='Pick a Time',
                disabled=False),
            Modification = modif,
            the_obj = wg.fixed(MyTraffic),
            start_str = st_input,
            end_str = end_input,
            all_address = addresses
           );

<br>

- Put/add drop-pin of meta-info about the neighborhood/address point of a proposed new road/multi-road