# Construct Speed Data

In [6]:
import numpy as np

In [7]:
import pandas as pd

links = pd.read_csv("data/links.csv")
nodes = pd.read_csv("data/nodes.csv")

### This is the full links data from last year, and I use the street length, latitude, and longtitude.

In [8]:
links

Unnamed: 0,link_id,begin_node_id,end_node_id,begin_angle,end_angle,street_length,osm_name,osm_class,osm_way_id,startX,startY,endX,endY,osm_changeset,birth_timestamp,death_timestamp
0,1,103235840,103225947,-161.51,16.99,84.295,PalisadeAvenue,tertiary,223694294,-74.021807,40.884127,-74.021994,40.883384,16353009,1405267167641,40000000000000
1,2,42516422,42516427,175.46,-4.68,260.392,SchenectadyAvenue,residential,5679907,-73.932264,40.641950,-73.932013,40.639616,19443546,1405267167641,40000000000000
2,3,42516422,42516418,-4.60,175.40,256.804,SchenectadyAvenue,residential,5679907,-73.932264,40.641950,-73.932509,40.644252,19443546,1405267167641,40000000000000
3,4,103235530,103235525,-103.43,82.40,76.107,HarristownRoad,tertiary,11580026,-74.142573,40.948839,-74.143464,40.948718,14802365,1405267167641,40000000000000
4,5,42762376,42756156,-23.92,156.25,197.829,KingslandAvenue,residential,5698286,-73.844794,40.877882,-73.845749,40.879508,8219326,1405267167641,40000000000000
5,6,43007594,43018713,115.13,-64.87,82.482,AnnStreet,residential,5717250,-74.130497,40.637941,-74.129612,40.637626,8732371,1405267167641,40000000000000
6,7,254214079,254214081,49.24,-137.03,61.160,CentralParkAvenue,secondary,248399491,-73.861801,40.921879,-73.861286,40.922267,19119747,1405267167641,40000000000000
7,8,105156161,105037609,63.50,-64.98,465.913,MountainParkRoad,tertiary,11744255,-74.178142,40.892406,-74.173820,40.894911,14808369,1405267167641,40000000000000
8,9,42445950,596775946,119.18,-60.82,147.286,East59thStreet,residential,198924552,-73.971404,40.763598,-73.969877,40.762952,15789857,1405267167641,40000000000000
9,10,42811333,42811336,54.08,-126.20,75.782,27thAvenue,residential,91600913,-73.867784,40.763677,-73.867057,40.764079,6761976,1405267167641,40000000000000


### subset the required data

In [None]:
full_links = pd.read_csv("full_link_ids.csv")
full_links = [int(link) for link in full_links]

In [10]:
import json
with open('data/full_link_travel_times.json') as json_data:
    d = json.load(json_data)
d = np.array(d)

In [115]:
d.shape

(8760, 2302)

In [None]:
good_links = links[links['link_id'].isin(full_links)]

## Maka a graph class. 
#### 1. For Edges class, we add function to get time and average speed;
#### 2. for node class, we can get the coordinates. 
#### 3. Here are also objective function for Edges class, which can retruen the connected components and strongly connected components.

In [101]:
class Edge:
    def __init__(self, idx, startNodeId, endNodeId, length):
        self.idx = idx
        self.start = startNodeId
        self.end = endNodeId
        self.length = length
        
    def setTime(self, times):
        self.times = times
        
    def getSpeed(self):
        return self.length / self.times
    
    def getSpeedAt(self, day, hour):
        return self.length / self.times[24*day + hour]
    
    def __str__(self):
        return "{},{},{},{}".format(self.idx, self.start, self.end, self.length)
    
    def __repr__(self):
        return self.__str__()
    
class Node:
    def __init__(self, idx, x, y):
        self.idx = idx
        self.x = x
        self.y = y    
        
    def __str__(self):
        return "{},{}".format(self.x, self.y)
    
    def __repr__(self):
        return self.__str__()

class Graph:
    
    def __init__(self):
        self.nodes = {}
        self.edges = {}
        self.out = {}
        
    def add_edge(self, link):
        new_edge = Edge(link['link_id'], link['begin_node_id'],
                        link['end_node_id'], link['street_length'])
        self.nodes[link['begin_node_id']] = Node(link['begin_node_id'],
                                            link['startX'], link['startY'])
        self.nodes[link['end_node_id']] = Node(link['end_node_id'],
                                            link['endX'], link['endY'])
        if link['begin_node_id'] not in self.out:
            self.out[link['begin_node_id']] = []
        self.out[link['begin_node_id']].append(new_edge)
        self.edges[link['link_id']] = new_edge
        
    def filterEdges(self, func):
        return [self.edges[k] for k in self.edges if func(self.edges[k])]
    
    def filterNodes(self, func):
        return [self.nodes[k] for k in self.nodes if func(self.nodes[k])]
    
    def getEdgeCoords(self, edges):
        return [[[self.nodes[e.start].y,self.nodes[e.start].x],
          [self.nodes[e.end].y,self.nodes[e.end].x]] for e in edges]
    
    def drawEdges(self, m, edges):
        for loc in self.getEdgeCoords(edges):
            m.add_children(PolyLine(locations=loc,color='red'))
            
    def subgraph(self, edges):
        newg = Graph()
        newg.edges = edges
        #for k, e in edges.items():
        for k, e in enumerate(edges):
            newg.nodes[e.start] = self.nodes[e.start]
            newg.nodes[e.end] = self.nodes[e.end]
            if e.start not in newg.out:
                newg.out[e.start] = []
            newg.out[e.start].append(e)
        return newg
    
    def _getEdgesFromScc(self, scc):
        cset = {}
        for i, c in enumerate(scc):
            for n in c:
                cset[n] = i
        eset = {}
        for e in self.edges:
            if cset[e.start] == cset[e.end]:
                if cset[e.start] not in eset:
                    eset[cset[e.start]] = []
                eset[cset[e.start]].append(e)
            
        edges_to_draw = list(eset.values())
        return edges_to_draw
    
    def scc(self):
        scs = tarjan({k:[e.end for e in g.out[k]] for k in g.out})
        return self._getEdgesFromScc(scs)
    
    def cc(self):
        d = {}
        for e in self.edges:
            if e.start not in d:
                d[e.start] = set()
            if e.end not in d:
                d[e.end] = set()
            d[e.start].add(e.end)
            d[e.end].add(e.start)
        cs = tarjan({k: list(d[k]) for k in d})
        return (cs,self._getEdgesFromScc(cs))
        
        

In [102]:
g = Graph()
for i, l in good_links.iterrows():
    g.add_edge(l)

In [103]:
idxToId = list(good_links['link_id'])
for i, idx in enumerate(idxToId):
    g.edges[idx].setTime(d[:,i])

In [None]:
speeds = np.array([g.edges[k].getSpeed() for k in g.edges])

### Speed matrix. To get the speed for  specific e, just call the function e.getSpeed().

In [105]:
speeds

array([[  8.55887934,   8.60734215,   5.88653532, ...,   7.34898027,
          6.6646842 ,   3.00252465],
       [  7.13242724,  10.65293031,   8.47659521, ...,   7.04273335,
          9.30516214,   7.32096804],
       [  7.13240157,   8.60735642,   5.17875279, ...,   7.76169771,
          4.48743749,   5.08403183],
       ..., 
       [  7.13241128,   5.97733197,   9.05768352, ...,   8.45129557,
          9.30516232,  10.54223049],
       [  5.94367545,   5.9773156 ,   5.8865243 , ...,   7.76169515,
          9.30516649,   7.32096663],
       [  1.33723222,   2.47752885,   6.01441409, ...,   2.37188483,
          1.17997175,   3.89851609]])

In [37]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Visualize Speed in Map

In [163]:
#get specific time speed
[(g.edges[k].getSpeedAt(0,1), g.edges[k].times[1]) for k in g.edges]

[(8.6073421451768901, 9.2436199999999999),
 (10.652930307055769, 7.3546899999999997),
 (8.6073564241131599, 15.284599999999999),
 (3.2765374033824619, 54.557899999999997),
 (8.6073455921595254, 10.3055),
 (3.1386907206988188, 23.857399999999998),
 (7.397843927825912, 10.491300000000001),
 (3.4513056264744852, 24.7544),
 (2.4021471120905975, 22.9145),
 (2.0017945533817669, 40.344299999999997),
 (7.3978622217689978, 10.786899999999999),
 (13.659847843960517, 6.7259900000000004),
 (3.8122053740714996, 21.149699999999999),
 (3.8482421245027414, 37.204000000000001),
 (5.9773146456861292, 11.223100000000001),
 (5.9773154513060804, 14.256399999999999),
 (4.7182070796606119, 12.127700000000001),
 (4.1509211811455566, 37.3705),
 (5.2490092431952551, 17.234300000000001),
 (5.4896043219272705, 11.3653),
 (2.4775306808850495, 62.229300000000002),
 (5.9773298223311846, 13.2156),
 (8.607342970730393, 9.0930499999999999),
 (2.7948765151558734, 34.7654),
 (0.98815024934803497, 54.642499999999998),
 (5

In [33]:
#Exist nan values,ignore them
cur_speeds = [g.edges[k].getSpeedAt(0,1) for k in g.edges]
valid_cSpeeds = [cur_speed for cur_speed in cur_speeds if not np.isnan(cur_speed)]


In [34]:
max(valid_cSpeeds)

34.663739857781017

In [68]:
qualify_links = g.filterEdges(lambda e: e.getSpeedAt(0, 3) < 3)

In [116]:
#g.getEdgeCoords(qualify_links)

In [19]:
import folium
from folium.features import (WmsTileLayer, RegularPolygonMarker, Vega, GeoJson,
                       CircleMarker, LatLngPopup,
                       ClickForMarker, TopoJson, PolyLine, MultiPolyLine,
                       )
NY_COORDINATES = (40.7472,-73.98)

m =folium.Map(location=NY_COORDINATES, tiles='cartodbpositron',zoom_start=14)

#coords=links[['startX','endX','startY','endY']][links['link_id']==260854].values[0]
#loc=[[coords[2],coords[0]],[coords[3],coords[1]]]
#m.add_children(PolyLine(locations=loc,color='red'))

In [20]:
g.drawEdges(m,qualify_links)

### Visulization for qualified road under specific speed criteria. Here is the example that average speed under 3 m/s.

In [21]:
 m

In [52]:
import tarjan
tarjan = tarjan.tarjan

## Connected Components

In [98]:
thresholds = list(range(1,35))
linkslists = []
comLists = []
for i in range(len(thresholds)):
    threshold = thresholds[i]
    qualify_links = g.filterEdges(lambda e: e.getSpeedAt(0, 1) < threshold)
    sub_g = g.subgraph(qualify_links)
    cur_cc, cur_c= sub_g.cc()[1],sub_g.cc()[0]
    linkslists.append(cur_cc)
    comLists.append(cur_c)

In [117]:
len(linkslists[10])

2163

In [118]:
len(comLists)

34

In [58]:
g.drawEdges(m,linkslists[10])

### Connected Components Visulization. Here is the example of the connected components under avergae speed 11. The above length of Linkslists show the number of links that qualified. The above length of ComLists shows the number of connected components.

In [59]:
m

## Strongly Connected Components

In [100]:
from functools import reduce

In [112]:
Clinkslists = []
CompLists = []

for i in range(len(thresholds)):
    threshold = thresholds[i]
    qualify_links = g.filterEdges(lambda e: e.getSpeedAt(0, 1) < threshold)
    sub_g = g.subgraph(qualify_links)
    cur_scc = sub_g.scc()
    Clinkslists.append(cur_scc)
    CompLists.append(reduce(lambda x, y: x+y, cur_scc, []))

### Connected Components Visulization. Here is the example of the connected components under avergae speed 11. The following length of  ComLists show the number of links that qualified. The following length of Linkslists shows the number of Strongly connected components.

In [74]:
g.drawEdges(m,Clinkslists[10])
m

In [None]:
orderDic = {}
orderDicList = []

count = 0
for perThres in comLists:
    print(perThres)
    for perComp in perThres:
        print("perComp" ,perComp)
        #get the minimum order in this component if exist, get the order for this component
        orders = [orderDic[link] for link in perComp if link in orderDic]
        if len(orders) == 0:
            count = count + 1
            minOrder = count
        else:
            minOrder = min(orders)
        #sign the link with minimum order
        for link in perComp:
            orderDic[link] = minOrder
    #print(len(orderDic))
    #temp = copy.deepcopy(orderDic)
    #deep copy
    temp = {}
    for key in orderDic:
        temp[key] = orderDic[key]
    orderDicList.append(temp)

In [119]:
len(CompLists[10])

1269

In [120]:
len(Clinkslists[10])

30