In [1]:
import pandas as pd
import geopandas as gpd

pd.set_option('display.max_columns', None)

In [2]:
permits = pd.read_json('../data/permits_clean.json')
nyc = gpd.read_file('../data/intersections.geojson')

In [3]:
permits.head(3)

Unnamed: 0,eventid,startdate,enddate,entered,boro,category,subcategory,country,zipcode,parkingheld,m_street,c1_street,c2_street,m_front_card,m_middle_card,m_back_card,c1_front_card,c1_middle_card,c1_back_card,c2_front_card,c2_middle_card,c2_back_card,m_st_num,m_way_type,m_way_name,c1_st_num,c1_way_type,c1_way_name,c2_st_num,c2_way_type,c2_way_name,bronx,brooklyn,manhattan,queens,staten_island
0,623629,2022-02-02,2022-02-03,2022-01-28,manhattan,Television,Episodic series,United States of America,"[10010, 10011, 10012, 10013]",prince street between bowery and lafayette street,prince street,bowery,lafayette street,,,,,,,,,,,street,prince,,,bowery,,street,lafayette,False,False,True,False,False
1,623629,2022-02-02,2022-02-03,2022-01-28,manhattan,Television,Episodic series,United States of America,"[10010, 10011, 10012, 10013]",mott street between east houston street and pr...,mott street,east houston street,prince street,,,,east,,,,,,,street,mott,,street,houston,,street,prince,False,False,True,False,False
2,623629,2022-02-02,2022-02-03,2022-01-28,manhattan,Television,Episodic series,United States of America,"[10010, 10011, 10012, 10013]",mulberry street between east houston street an...,mulberry street,east houston street,prince street,,,,east,,,,,,,street,mulberry,,street,houston,,street,prince,False,False,True,False,False


In [4]:
nyc.head(3)

Unnamed: 0,street_1,street_2,boro_1,boro_2,geometry
0,100 avenue,100 drive,queens,queens,POINT (-73.73408 40.71508)
1,100 drive,100 avenue,queens,queens,POINT (-73.73408 40.71508)
2,100 avenue,193 street,queens,queens,POINT (-73.76575 40.70904)


In [5]:
# Minor cleaning of special case
permits['m_street'] = permits['m_street'].map(lambda x: '6 avenue' if x == 'ave of the americas' else x)
permits['c1_street'] = permits['c1_street'].map(lambda x: '6 avenue' if x == 'ave of the americas' else x)
permits['c2_street'] = permits['c2_street'].map(lambda x: '6 avenue' if x == 'ave of the americas' else x)

In [6]:
# Check if all streets that intersect are within the same borough (answer: True)
nyc['temp'] = nyc.apply(lambda x: True if x['boro_1'] == x['boro_2'] else False, axis=1)
nyc.loc[nyc['temp'] == False]

Unnamed: 0,street_1,street_2,boro_1,boro_2,geometry,temp


In [7]:
nyc.drop(columns='temp', inplace=True)
nyc['boro'] = nyc['boro_1']
nyc.drop(columns=['boro_1', 'boro_2'], inplace=True)

In [8]:
intersections = {}
for i in range(0, len(nyc)):
    street = nyc.iloc[i]['street_1'] + ', ' + nyc.iloc[i]['boro']
    if street not in intersections.keys():
        intersections[street] = {}
    cross = nyc.iloc[i]['street_2'] + ', ' + nyc.iloc[i]['boro']
    intersections[street][cross] = nyc.iloc[i]['geometry']

In [9]:
# Example
list(intersections['100 avenue, queens'].keys())[:5]

['100 drive, queens',
 '193 street, queens',
 '194 street, queens',
 '195 street, queens',
 '196 street, queens']

In [10]:
def has_inter_match(row: pd.Series, intersections: dict) -> tuple[int]:
    m_st = row['m_street']
    c1_st = row['c1_street']
    c2_st = row['c2_street']
    boro = row['boro']

    street = m_st + ', ' + boro
    cross1 = c1_st + ', ' + boro
    cross2 = c2_st + ', ' + boro

    inter1 = None
    inter2 = None
    if street in intersections.keys():
        if (cross1 in intersections[street].keys()) and (cross2 in intersections[street].keys()):
            inter1 = intersections[street][cross1]
            inter2 = intersections[street][cross2]

    # Check if streets for parking held are outside of the primary production borough
    if inter1 is None:
        check_cols = ['bronx', 'brooklyn', 'manhattan', 'queens', 'staten_island']
        boros = []
        for col in check_cols:
            if row[col]:
                if col == 'staten_island':
                    col = 'staten island'
                if col != boro:
                    boros.append(col)
        for b in boros:
            street = m_st + ', ' + b
            cross1 = c1_st + ', ' + b
            cross2 = c2_st + ', ' + b
            if street in intersections.keys():
                if (cross1 in intersections[street].keys()) and (cross2 in intersections[street].keys()):
                    inter1 = intersections[street][cross1]
                    inter2 = intersections[street][cross2]
                    boro = b
                    break

    # Create intersection and streets dictionary
    inter_dict = {
        'boro': boro,
        'inter1': inter1,
        'inter2': inter2
    }

    return inter_dict

In [11]:
permits['matches'] = permits.apply(lambda x: has_inter_match(x, intersections), axis=1)

In [12]:
permits['inter1'] = permits['matches'].map(lambda x: x['inter1'])
permits['inter2'] = permits['matches'].map(lambda x: x['inter2'])
permits['boro'] = permits['matches'].map(lambda x: x['boro'])

permits.drop(columns='matches', inplace=True)

In [13]:
found = len(permits)
missing = len(permits.loc[permits['inter1'].isnull()])
print(f'{found - missing} of {found} intersection pairs found.')

211785 of 237863 intersection pairs found.


In [14]:
from shapely.geometry import MultiLineString, LineString
from shapely.ops import nearest_points

In [15]:
streets = gpd.read_file('../data/nyc_clean.geojson')

In [16]:
streets.head(3)

Unnamed: 0,name,boro,osm_name,front_card,middle_card,back_card,st_num,way_type,way_name,geometry
0,100 avenue,queens,100th avenue,,,,100,avenue,,"MULTILINESTRING ((-73.74175 40.71573, -73.7413..."
1,100 drive,queens,100th drive,,,,100,drive,,"MULTILINESTRING ((-73.73203 40.71458, -73.7319..."
2,100 place,queens,100th place,,,,100,place,,"LINESTRING (-73.81879 40.60576, -73.81889 40.6..."


In [17]:
streets_dict = {}
for i in range(0, len(streets)):
    street = streets.iloc[i]['name']
    boro = streets.iloc[i]['boro']
    name = street + ', ' + boro
    streets_dict[name] = streets.iloc[i]['geometry']

In [18]:
def get_parkingheld(row: pd.Series, streets_dict: dict) -> MultiLineString | LineString | None:
    inter1 = row['inter1']
    if inter1 is None:
        return None
    inter2 = row['inter2']
    boro = row['boro']

    # If MultiPoint objects only choose closest points (smallest distance between intersections)
    inter1, inter2 = nearest_points(inter1, inter2)
    both_inter = inter1.union(inter2)
    
    # Capture street segments between street intersections with circle between street intersections
    m_street = streets_dict[row['m_street'] + ', ' + boro]
    center = both_inter.centroid
    diameter = inter1.distance(inter2)
    circle = center.buffer(diameter / 2)
    ph = circle.intersection(m_street)

    return ph

In [19]:
permits.columns

Index(['eventid', 'startdate', 'enddate', 'entered', 'boro', 'category',
       'subcategory', 'country', 'zipcode', 'parkingheld', 'm_street',
       'c1_street', 'c2_street', 'm_front_card', 'm_middle_card',
       'm_back_card', 'c1_front_card', 'c1_middle_card', 'c1_back_card',
       'c2_front_card', 'c2_middle_card', 'c2_back_card', 'm_st_num',
       'm_way_type', 'm_way_name', 'c1_st_num', 'c1_way_type', 'c1_way_name',
       'c2_st_num', 'c2_way_type', 'c2_way_name', 'bronx', 'brooklyn',
       'manhattan', 'queens', 'staten_island', 'inter1', 'inter2'],
      dtype='object')

In [20]:
permits['geometry'] = permits.apply(lambda x: get_parkingheld(x, streets_dict), axis=1)
permits = permits[[
    'eventid',
    'startdate',
    'enddate',
    'boro',
    'category',
    'subcategory',
    'country',
    'parkingheld',
    'm_street',
    'c1_street',
    'c2_street',
    'geometry'
]]

  arr = construct_1d_object_array_from_listlike(values)
  result[:] = values


In [21]:
geo_permits = gpd.GeoDataFrame(permits)

In [22]:
geo_permits.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 237863 entries, 0 to 237878
Data columns (total 12 columns):
 #   Column       Non-Null Count   Dtype   
---  ------       --------------   -----   
 0   eventid      237863 non-null  int64   
 1   startdate    237863 non-null  object  
 2   enddate      237863 non-null  object  
 3   boro         237863 non-null  object  
 4   category     237863 non-null  object  
 5   subcategory  237863 non-null  object  
 6   country      237863 non-null  object  
 7   parkingheld  237863 non-null  object  
 8   m_street     237863 non-null  object  
 9   c1_street    237863 non-null  object  
 10  c2_street    237863 non-null  object  
 11  geometry     211785 non-null  geometry
dtypes: geometry(1), int64(1), object(10)
memory usage: 23.6+ MB


In [23]:
geo_permits.to_file('../data/geo_permits.geojson', driver='GeoJSON')