In [2]:
import requests
import math
import json
import pandas as pd
import re
from sodapy import Socrata
import multiprocessing

In [3]:
def cd():
    data = requests.get('https://data.cityofnewyork.us/api/views/cc5c-sm6z/rows.json')
    raw_data = data.json()['data']
    cleaner_data = []
    for lane in raw_data:
        coords = lane[8].split(',')
        if lane[-1] != None:
            cleaner_data.append({'first_coord': (float(coords[0][18:].split(' ')[1]), float(coords[0][18:].split(' ')[0])), 'second_coord': (float(coords[1][1:-2].split(' ')[1]), float(coords[1][1:-2].split(' ')[0])), 'type': lane[-1]})
        else:
            cleaner_data.append({'first_coord': (float(coords[0][18:].split(' ')[1]), float(coords[0][18:].split(' ')[0])), 'second_coord': (float(coords[1][1:-2].split(' ')[1]), float(coords[1][1:-2].split(' ')[0])), 'type': 'Unknown'})
    return cleaner_data

In [4]:
lanes_from_api = cd()

In [5]:
def quick_dist(coord1, coord2):
    return math.sqrt(math.pow(12430*((coord1[1]-coord2[1])/180),2)+math.pow(24901*((coord1[0]-coord2[0])/360)*0.16133111759,2))

In [6]:
def cds(data):
    actionable_data = []
    assembly_array = []
    count = 0
    for lane in data:
        newlane = lane
        count += 1
        for option in data:
            if quick_dist(lane['first_coord'], option['first_coord']) < 0.5 and lane['first_coord'] != option['first_coord'] and lane['second_coord'] != option['second_coord']:
                assembly_array.append({'first_coord': option['first_coord'], 'second_coord': option['second_coord'], 'type': option['type']})
        newlane['options'] = assembly_array
        assembly_array = []
        if count % 5000 == 0:
            print(str(int(count/len(data)*100))+'%')
        actionable_data.append(newlane)
    return actionable_data

In [7]:
lane_data = cds(lanes_from_api)

26%
53%
80%


In [8]:
def retrieve_crashes():
    client = Socrata("data.cityofnewyork.us", None)
    results = client.get("h9gi-nx95", limit="100000000")
    return results

In [9]:
crash_data = retrieve_crashes()



In [10]:
def parse_crashes(data):
    regex = r"vehicle_type_code\d+"
    bike_accidents = []
    errors = 0
    for crash in data:
        for item in crash.keys():
            if re.match(regex, item) and crash[item] == 'Bike' and int(crash['number_of_cyclist_injured'] + crash['number_of_cyclist_killed']) > 0:
                try:
                    bike_accidents.append({
                        'date': crash['crash_date'],
                        'time': crash['crash_time'],
                        'latitude': float(crash['latitude']),
                        'longitude': float(crash['longitude']),
                        'number_of_cyclist_injured': crash['number_of_cyclist_injured'],
                        'number_of_cyclist_killed': crash['number_of_cyclist_killed'],
                    })
                except:
                    errors += 1
                break
    print("The percentage of bike crashes which are invalid is:", errors/len(bike_accidents))
    return bike_accidents

In [11]:
bike_crashes = parse_crashes(crash_data)

The percentage of bike crashes which are invalid is: 0.08198757763975155


In [29]:
def score_lane(crash_data, lane):
    lane_dist = quick_dist(lane['first_coord'], lane['second_coord']) / 2
    for crash in crash_data:
        first_dist = quick_dist(lane['first_coord'], (crash['latitude'], crash['longitude']))
        second_dist = quick_dist(lane['second_coord'], (crash['latitude'], crash['longitude']))
        if lane_dist > first_dist or lane_dist > second_dist:
            try:
                lane['injuries'] += int(crash['number_of_cyclist_injured'])
                lane['deaths'] += int(crash['number_of_cyclist_killed'])
            except:
                lane['injuries'] = int(crash['number_of_cyclist_injured'])
                lane['deaths'] = int(crash['number_of_cyclist_killed'])
    return lane

In [30]:
def call_scoring_func(lane):
    return score_lane(bike_crashes, lane)

In [31]:
def pool(lanes):
    with multiprocessing.Pool() as pool:
        multiproc = pool.map(call_scoring_func, lanes)
    return multiproc

In [32]:
if __name__ == '__main__':
    threaded = pool(lane_data)

In [34]:
count = 0
for x in threaded:
    if 'injuries' in x.keys() or 'deaths' in x.keys():
        print(x)
        break
count / len(threaded)

{'first_coord': (40.78648646037843, -73.9721741500592), 'second_coord': (40.78716324678382, -73.97168048331152), 'type': 'Protected Path', 'options': [{'first_coord': (40.7778646920849, -73.97475424100871), 'second_coord': (40.77816778473558, -73.9745323145838), 'type': 'Unknown'}, {'first_coord': (40.77879717015083, -73.97290217117619), 'second_coord': (40.7786908839273, -73.97290238027503), 'type': 'Greenway'}, {'first_coord': (40.801403636691894, -73.96874146768884), 'second_coord': (40.80163546957009, -73.96858360849811), 'type': 'Bike-Friendly Parking'}, {'first_coord': (40.7788054165548, -73.97406668320338), 'second_coord': (40.779477862000086, -73.97357117526421), 'type': 'Unknown'}, {'first_coord': (40.789347524074394, -73.96907668154942), 'second_coord': (40.78937735683118, -73.96914749772212), 'type': 'Unknown'}, {'first_coord': (40.79315730856958, -73.97473796399309), 'second_coord': (40.79332480443144, -73.97461559640033), 'type': 'Bike-Friendly Parking'}, {'first_coord': (

0.0