In [1]:
from sodapy import Socrata
from configparser import ConfigParser

In [2]:
config = ConfigParser()
config.read('./config.ini')
api_key = config['socrata']['APP_TOKEN']
client = Socrata("data.cityofnewyork.us", api_key)

In [3]:
query = (
    """
    SELECT *
    WHERE eventtype = "Shooting Permit" OR eventtype = "DCAS Prep/Shoot/Wrap Permit"
    ORDER BY startdatetime DESC
    LIMIT 100000
    """
)
results = client.get("tg4x-b46p", query=query)

In [4]:
print('Number of permits:', len(results))

Number of permits: 66480


In [5]:
sample = results[0]
print(sample)

{'eventid': '623629', 'eventtype': 'Shooting Permit', 'startdatetime': '2022-02-02T09:00:00.000', 'enddatetime': '2022-02-03T00:00:00.000', 'enteredon': '2022-01-28T17:02:55.000', 'eventagency': "Mayor's Office of Film, Theatre & Broadcasting", 'parkingheld': 'PRINCE STREET between BOWERY and LAFAYETTE STREET,  MOTT STREET between EAST HOUSTON STREET and PRINCE STREET,  MULBERRY STREET between EAST HOUSTON STREET and PRINCE STREET,  LAFAYETTE STREET between EAST HOUSTON STREET and PRINCE STREET,  MOTT STREET between PRINCE STREET and SPRING STREET,  MULBERRY STREET between PRINCE STREET and SPRING STREET,  BROADWAY between BROOME STREET and GRAND STREET,  WOOSTER STREET between PRINCE STREET and BROOME STREET,  SPRING STREET between WOOSTER STREET and GREENE STREET,  BROOME STREET between WOOSTER STREET and GREENE STREET,  WEST   21 STREET between 5 AVENUE and 6 AVENUE,  WEST   21 STREET between 6 AVENUE and 7 AVENUE,  6 AVENUE between WEST   22 STREET and WEST   21 STREET,  6 AVENUE b

In [6]:
for r in results:
    r['parkingheld'] = [ ' '.join(street.split()).lower() for street in r['parkingheld'].split(', ') ]
    r['zipcode_s'] = r['zipcode_s'].split(', ')
    r['startdatetime'] = r['startdatetime'].split('T')[0]
    r['enddatetime'] = r['enddatetime'].split('T')[0]
    r['enteredon'] = r['enteredon'].split('T')[0]

In [7]:
print(sample)

{'eventid': '623629', 'eventtype': 'Shooting Permit', 'startdatetime': '2022-02-02', 'enddatetime': '2022-02-03', 'enteredon': '2022-01-28', 'eventagency': "Mayor's Office of Film, Theatre & Broadcasting", 'parkingheld': ['prince street between bowery and lafayette street', 'mott street between east houston street and prince street', 'mulberry street between east houston street and prince street', 'lafayette street between east houston street and prince street', 'mott street between prince street and spring street', 'mulberry street between prince street and spring street', 'broadway between broome street and grand street', 'wooster street between prince street and broome street', 'spring street between wooster street and greene street', 'broome street between wooster street and greene street', 'west 21 street between 5 avenue and 6 avenue', 'west 21 street between 6 avenue and 7 avenue', '6 avenue between west 22 street and west 21 street', '6 avenue between west 21 street and west 20

In [8]:
def get_streets(permit: dict) -> list[dict]:
    streets = []
    for street in permit['parkingheld']:
        street_dict = {
            'eventid': permit['eventid'],
            'startdate': permit['startdatetime'],
            'enddate': permit['enddatetime'],
            'entered': permit['enteredon'],
            'boro': permit['borough'].lower(),
            'category': permit['category'],
            'subcategory': permit['subcategoryname'],
            'country': permit['country'],
            'zipcode': permit['zipcode_s']
        }
        street_dict['parkingheld'] = street
        streets.append(street_dict)

    return streets

In [9]:
data = []
for permit in results:
    data += get_streets(permit)

In [10]:
print(data[0])

{'eventid': '623629', 'startdate': '2022-02-02', 'enddate': '2022-02-03', 'entered': '2022-01-28', 'boro': 'manhattan', 'category': 'Television', 'subcategory': 'Episodic series', 'country': 'United States of America', 'zipcode': ['10010', '10011', '10012', '10013'], 'parkingheld': 'prince street between bowery and lafayette street'}


In [11]:
len(data)

237879

In [12]:
def split_streets(parkingheld: str) -> tuple:
    try:
        parkingheld = parkingheld.split(' between ')
        main = parkingheld[0]
        sec_streets = parkingheld[1].split(' and ')
        sec1 = sec_streets[0]
        sec2 = sec_streets[1]
    except:
        return None

    return (main, sec1, sec2)

In [13]:
for datum in data:
    streets = split_streets(datum['parkingheld'])
    datum['m_street'] = streets[0] if streets else None
    datum['c1_street'] = streets[1] if streets else None
    datum['c2_street'] = streets[2] if streets else None

In [14]:
print(data[0])

{'eventid': '623629', 'startdate': '2022-02-02', 'enddate': '2022-02-03', 'entered': '2022-01-28', 'boro': 'manhattan', 'category': 'Television', 'subcategory': 'Episodic series', 'country': 'United States of America', 'zipcode': ['10010', '10011', '10012', '10013'], 'parkingheld': 'prince street between bowery and lafayette street', 'm_street': 'prince street', 'c1_street': 'bowery', 'c2_street': 'lafayette street'}


In [15]:
import json

In [16]:
with open('../data/permits.json', 'w') as f:
    json.dump(data, f)