#### This notebook uses the Overpass API from Open Street Maps to determine all the traffic signals within a given bounding box
#### The overpy library is used to send the request to the API and this call returns the latitude and longitude of all traffic signals
#### Next, the distance between each traffic intersection and each point in the monitoring data is measured
#### A traffic score is calculated as the 'Number of traffic intersections within a 1,000 ft buffer' to each point in the monitoring data

In [164]:
import overpy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import descartes
import geopandas as gpd
from shapely.geometry import Point, Polygon
from shapely.ops import nearest_points

import seaborn as sns

from mpl_toolkits.axes_grid1 import make_axes_locatable

import math

import time


from scipy.stats import boxcox


from matplotlib import cm

import matplotlib.lines as mlines

sns.set(style = 'whitegrid')
sns.set_palette('bright')
%matplotlib inline

####  Most of the code below is from <a href = "https://python-overpy.readthedocs.io/en/latest/introduction.html"> here </a>. Fetch all nodes using API Query. Here the node is specified as 'Highway - Traffic_signals'. 

       

In [None]:
api = overpy.Overpass()
result = api.query("""
    node(37.68,-122.36,37.8712,-122.03) ["highway"="traffic_signals"];
    (._;>;);
    out body;
    """)
traffic_lat = []
traffic_lon = []
for node in result.nodes:
    traffic_lat.append(node.lat)
    traffic_lon.append(node.lon)


In [None]:
traffic_df = pd.DataFrame(list(zip(traffic_lat, traffic_lon)), columns = ['Latitude', 'Longitude'])

In [None]:
traffic_df.to_csv("Data/all_traffic_intersections.csv")

### Reading traffic intersection data

In [74]:
traffic_df = pd.read_csv("Data/all_traffic_intersections.csv")

In [75]:
#Drop the first column
traffic_df.drop(columns = ['Unnamed: 0'], inplace=True)

In [76]:
## Rename index and intersection number
traffic_df.rename(columns = {'index':'Intersection'}, inplace=True)

In [77]:
### Add an empty column for distance
traffic_df['dist'] = 0
traffic_df['dist'].astype(float)

0       0.0
1       0.0
2       0.0
3       0.0
4       0.0
       ... 
1631    0.0
1632    0.0
1633    0.0
1634    0.0
1635    0.0
Name: dist, Length: 1636, dtype: float64

### Reading Air Pollution Monitoring Data

In [78]:
df = pd.read_csv('EDF_Data.csv', header = 1)
df.tail()

Unnamed: 0,Longitude,Latitude,NO Value,NO2 Value,BC Value
21483,-122.034943,37.560076,129.999995,44.77822,3.923761
21484,-122.034724,37.560164,60.799998,39.027545,1.408693
21485,-122.034681,37.55983,34.622951,28.816797,2.659885
21486,-122.034504,37.559958,74.764705,35.735434,1.776353
21487,-122.034503,37.559957,78.754782,41.062757,2.014664


In [91]:
BC_df = df[['Longitude', 'Latitude', 'BC Value']]

In [92]:
NO2_df = df[['Longitude', 'Latitude', 'NO2 Value']]

### Convert traffic dataset into a column format to calculate distance

In [79]:
# Create individual dataframes
traffic_lat = traffic_df[['Intersection', 'Latitude']]
traffic_long = traffic_df[['Intersection', 'Longitude']]
traffic_dist = traffic_df[['Intersection', 'dist']]

In [80]:
# Transpose all the dataframes
traffic_lat = traffic_lat.T
traffic_long = traffic_long.T
traffic_dist  = traffic_dist.T

In [81]:
## Make the header as the first row in each transposed dataframe
traffic_lat = traffic_lat.rename(columns=traffic_lat.iloc[0].astype(int)).drop(traffic_lat.index[0])
traffic_long = traffic_long.rename(columns=traffic_long.iloc[0].astype(int)).drop(traffic_long.index[0])
traffic_dist = traffic_dist.rename(columns=traffic_dist.iloc[0].astype(int)).drop(traffic_dist.index[0])

In [82]:
## Add suffix to column header based on the dataframe type
traffic_lat.columns = [str(col) + '_latitude' for col in traffic_lat.columns]
traffic_long.columns = [str(col) + '_longitude' for col in traffic_long.columns]
traffic_dist.columns = [str(col) + '_distance' for col in traffic_dist.columns]

In [83]:
## Remove index for each dataframe
traffic_lat.reset_index(drop=True, inplace=True)
traffic_long.reset_index(drop=True, inplace=True)
traffic_dist.reset_index(drop=True, inplace=True)

In [84]:
### Combine individual dataframes into one
traffic_combined = traffic_lat.join(traffic_long).join(traffic_dist)

In [85]:
### Sort based on column names
traffic_combined = traffic_combined.reindex(columns=sorted(traffic_combined.columns))

In [86]:
#Create a datafram where each row contains emissions of PM2.5 for each facility
traffic_combined = traffic_combined.loc[traffic_combined.index.repeat(21488)].reset_index(drop=True)

In [93]:
combined_BC_traffic = BC_df.join(traffic_combined)

In [94]:
combined_NO2_traffic = NO2_df.join(traffic_combined)

In [98]:
combined_BC_traffic.head()

Unnamed: 0,Longitude,Latitude,BC Value,0_distance,0_latitude,0_longitude,1000_distance,1000_latitude,1000_longitude,1001_distance,...,998_longitude,999_distance,999_latitude,999_longitude,99_distance,99_latitude,99_longitude,9_distance,9_latitude,9_longitude
0,-122.322594,37.806781,0.818032,0.0,37.828247,-122.280475,0.0,37.820811,-122.205609,0.0,...,-122.23735,0.0,37.817592,-122.305863,0.0,37.808595,-122.267023,0.0,37.693106,-122.066624
1,-122.32231,37.80615,0.551475,0.0,37.828247,-122.280475,0.0,37.820811,-122.205609,0.0,...,-122.23735,0.0,37.817592,-122.305863,0.0,37.808595,-122.267023,0.0,37.693106,-122.066624
2,-122.322301,37.80642,0.593712,0.0,37.828247,-122.280475,0.0,37.820811,-122.205609,0.0,...,-122.23735,0.0,37.817592,-122.305863,0.0,37.808595,-122.267023,0.0,37.693106,-122.066624
3,-122.322299,37.80588,0.489898,0.0,37.828247,-122.280475,0.0,37.820811,-122.205609,0.0,...,-122.23735,0.0,37.817592,-122.305863,0.0,37.808595,-122.267023,0.0,37.693106,-122.066624
4,-122.322267,37.806689,0.739341,0.0,37.828247,-122.280475,0.0,37.820811,-122.205609,0.0,...,-122.23735,0.0,37.817592,-122.305863,0.0,37.808595,-122.267023,0.0,37.693106,-122.066624


In [95]:
# Convert distance or emissions distance column to float type
for idx, col in enumerate(combined_BC_traffic.columns):
        if "_dist" in col:
            combined_BC_traffic[col] = pd.to_numeric(combined_BC_traffic[col], downcast="float")



In [88]:
### Defining a function to calculate the distance between two GPS coordinates (latitude and longitude)
def distance(origin, destination):
    lat1, lon1 = origin
    lat2, lon2 = destination
    radius = 6371 # km

    dlat = math.radians(lat2-lat1)
    dlon = math.radians(lon2-lon1)
    a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
        * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = radius * c

    return d


In [99]:
time1 = time.time()
for index, row in combined_BC_traffic.iterrows():
    for idx, col in enumerate(combined_BC_traffic.columns):
        if "_dist" in col:
            combined_BC_traffic.at[index,col] = float(distance((row.iloc[1], row.iloc[0]), (row.iloc[idx+1], row.iloc[idx+2])))*3280.84
            #BC_Facility.at[index,col] = float(row.iloc[idx])
time2 = time.time()            
    
print(time2 - time1)

1296.3486068248749


In [100]:
combined_BC_traffic.head()

Unnamed: 0,Longitude,Latitude,BC Value,0_distance,0_latitude,0_longitude,1000_distance,1000_latitude,1000_longitude,1001_distance,...,998_longitude,999_distance,999_latitude,999_longitude,99_distance,99_latitude,99_longitude,9_distance,9_latitude,9_longitude
0,-122.322594,37.806781,0.818032,14445.248047,37.828247,-122.280475,34101.859375,37.820811,-122.205609,34133.441406,...,-122.23735,6229.799805,37.817592,-122.305863,16030.678711,37.808595,-122.267023,84684.304688,37.693106,-122.066624
1,-122.32231,37.80615,0.551475,14503.258789,37.828247,-122.280475,34056.515625,37.820811,-122.205609,34088.199219,...,-122.23735,6316.371582,37.817592,-122.305863,15960.22168,37.808595,-122.267023,84500.679688,37.693106,-122.066624
2,-122.322301,37.80642,0.593712,14446.459961,37.828247,-122.280475,34038.492188,37.820811,-122.205609,34070.132812,...,-122.23735,6249.635254,37.817592,-122.305863,15952.337891,37.808595,-122.267023,84546.40625,37.693106,-122.066624
3,-122.322299,37.80588,0.489898,14555.602539,37.828247,-122.280475,34069.003906,37.820811,-122.205609,34100.730469,...,-122.23735,6379.527832,37.817592,-122.305863,15962.838867,37.808595,-122.267023,84449.929688,37.693106,-122.066624
4,-122.322267,37.806689,0.739341,14384.481445,37.828247,-122.280475,34013.839844,37.820811,-122.205609,34045.441406,...,-122.23735,6178.677734,37.817592,-122.305863,15938.02832,37.808595,-122.267023,84585.8125,37.693106,-122.066624


In [102]:
## Write the entire dataset to a csv file
combined_BC_traffic.to_csv("Data/Unused-data/BC_traffic_full.csv")

### Read Traffic Distance Data

In [108]:
combined_BC_traffic = pd.read_csv("Data/Unused-data/BC_traffic_full.csv")

In [109]:
combined_BC_traffic = combined_BC_traffic[combined_BC_traffic.columns.drop(list(combined_BC_traffic.filter(regex='_latitude')))]

In [110]:
combined_BC_traffic = combined_BC_traffic[combined_BC_traffic.columns.drop(list(combined_BC_traffic.filter(regex='_longitude')))]

In [111]:
combined_BC_traffic = combined_BC_traffic[combined_BC_traffic.columns.drop(list(combined_BC_traffic.filter(regex='BC Value')))]

In [113]:
combined_BC_traffic.drop(columns = ['Unnamed: 0'], inplace=True)

In [115]:
combined_BC_traffic.to_csv("Data/Unused-data/BC_traffic_distance.csv")

### For each row, count the number of intersections with distance <1,000 feet

In [166]:
combined_BC_traffic = pd.read_csv("Data/Unused-data/BC_traffic_distance.csv")

In [167]:
combined_BC_traffic['number_intersections'] = 0

### Define a function using range that returns True or False is a value is between 0 - 1,000 feet. 

In [168]:
def count_values_in_range(series, range_min, range_max):

    # "between" returns a boolean Series equivalent to left <= series <= right.
    # NA values will be treated as False.
    return series.between(left=range_min, right=range_max).sum()

range_min, range_max = 0, 2000


combined_BC_traffic['number_intersections'] = combined_BC_traffic.apply(
    func=lambda row: count_values_in_range(row, range_min, range_max), axis=1)


In [169]:
BC_traffic_score = combined_BC_traffic[['Latitude','Longitude','number_intersections']]

In [170]:
BC_traffic_score.to_csv("Data/Traffic_score_2000.csv")