## Finding the closest stations to open spaces, food trucks, farmers markets, and murals

In [1]:
import pandas as pd
import requests
import json
import numpy as np
import time as tm
import geopy.distance

In [2]:
# constants
k = 5 # k-nearest neighbors
API_KEY = "134fa47e7emshb3f020ce14798ccp10976djsn5ae387c1da73" # turning addresses into lat/long coordinates, geocode.maps
open_space_source = 'data/raw/Open_Space.geojson'
stations_source = 'data/raw/current_bluebikes_stations.csv'
food_truck_source = 'data/raw/food_truck_schedule.geojson'
farmers_market_source = 'data/raw/winter_farmers_markets.csv'

In [3]:
stations = pd.read_csv(stations_source, skiprows=1)

In [4]:
# getting average coordinate of each open space using max/min long/lat
def average_coord (i):
    num_coords = len(data['features'][i]['geometry']["coordinates"][0][0])
    min_lat = 1000
    max_lat = -1000
    min_long = 1000
    max_long = -1000
    for j in range(0, num_coords):
        lat = data['features'][i]['geometry']["coordinates"][0][0][j][0]
        long = data['features'][i]['geometry']["coordinates"][0][0][j][1]
        if lat < min_lat: 
            min_lat = lat
        if lat > max_lat:
            max_lat = lat
        if long < min_long: 
            min_long = long
        if long > max_long:
            max_long = long
    return [(min_lat+max_lat)/2, (min_long+max_long)/2]

## Open spaces

In [5]:
f = open(open_space_source)
data = json.load(f)

In [6]:
open_spaces = pd.DataFrame(columns=['OBJECTID', 'OS_ID', 'PARK_ID', 'Name', 'Avg. Lat', 'Avg. Long'])


In [7]:
for i in range(0, len(data['features'])):
    oid = data['features'][i]['properties']['OBJECTID']
    osid = data['features'][i]['properties']['OS_ID']
    parkid = data['features'][i]['properties']['PARK_ID']
    name = data['features'][i]['properties']['SITE_NAME']
    ll = average_coord(i)
    open_spaces.loc[len(open_spaces.index)] = [oid, osid, parkid, name, ll[1], ll[0]]  


In [8]:
open_spaces

Unnamed: 0,OBJECTID,OS_ID,PARK_ID,Name,Avg. Lat,Avg. Long
0,1,2000,,Wilson Park,42.340991,-71.145929
1,2,156,156,James H. Roberts Playground,42.359526,-71.125332
2,3,284,284,Smith Playground,42.365072,-71.131046
3,4,115,115,Fern Square,42.359534,-71.133291
4,5,259,259,Ringer Playground,42.350456,-71.138129
...,...,...,...,...,...,...
529,530,7273,7273,Mahoney Park,42.342008,-71.051382
530,531,2106,2106,Boundary I,42.269348,-71.137990
531,1340,7275,7275,Sprague Pond,42.233786,-71.138006
532,2541,7276,7276,Codman Square,42.290418,-71.071082


In [9]:
open_spaces.to_csv('open_space_coords.csv', index=False)

In [10]:
meta_dists = []

for i in range(len(open_spaces)):
    space = (open_spaces["Avg. Lat"][i], open_spaces["Avg. Long"][i])
    dists = []

    for j in range(len(stations)):
        stat = (stations['Latitude'][j], stations['Longitude'][j])
        distance = geopy.distance.geodesic(space, stat).km
        dists += [(distance, stations['Number'][j])]

    meta_dists += [sorted(dists)[0:k]]

In [11]:
open_spaces_top_stations = pd.DataFrame(columns=['OBJECTID', 'OS_ID', 'PARK_ID', 'Name', 'Avg. Lat', 'Avg. Long', 'Station 1', 'Station 1 Dist', 'Station 2', 'Station 2 Dist', 'Station 3', 'Station 3 Dist', 'Station 4', 'Station 4 Dist', 'Station 5', 'Station 5 Dist'])

In [12]:
for i in range(len(meta_dists)):
    oid = open_spaces['OBJECTID'][i]
    osid = open_spaces['OS_ID'][i]
    parkid = open_spaces['PARK_ID'][i]
    name = open_spaces['Name'][i]
    lat = open_spaces['Avg. Lat'][i]
    long = open_spaces['Avg. Long'][i]

    s1 = meta_dists[i][0][1]
    s1_d = meta_dists[i][0][0]
    s2 = meta_dists[i][1][1]
    s2_d = meta_dists[i][1][0]
    s3 = meta_dists[i][2][1]
    s3_d = meta_dists[i][2][0]
    s4 = meta_dists[i][3][1]
    s4_d = meta_dists[i][3][0] 
    s5 = meta_dists[i][4][1]
    s5_d = meta_dists[i][4][0]
    
    open_spaces_top_stations.loc[len(open_spaces_top_stations.index)] = [oid, osid, parkid, name, lat, long, s1, s1_d, s2, s2_d, s3, s3_d, s4, s4_d, s5, s5_d]


In [13]:
open_spaces_top_stations

Unnamed: 0,OBJECTID,OS_ID,PARK_ID,Name,Avg. Lat,Avg. Long,Station 1,Station 1 Dist,Station 2,Station 2 Dist,Station 3,Station 3 Dist,Station 4,Station 4 Dist,Station 5,Station 5 Dist
0,1,2000,,Wilson Park,42.340991,-71.145929,D32033,0.481750,D32054,0.645125,K32002,0.650113,B32035,0.717898,D32028,0.953610
1,2,156,156,James H. Roberts Playground,42.359526,-71.125332,A32040,0.275789,A32011,0.469040,A32019,0.605326,A32006,0.784174,A32012,0.816372
2,3,284,284,Smith Playground,42.365072,-71.131046,A32019,0.227202,A32011,0.552233,A32040,0.572042,A32052,0.652070,A32005,0.677867
3,4,115,115,Fern Square,42.359534,-71.133291,A32040,0.401155,A32005,0.430834,A32019,0.680403,A32001,0.764282,A32045,0.765656
4,5,259,259,Ringer Playground,42.350456,-71.138129,D32034,0.278138,A32001,0.326645,A32017,0.463595,D32035,0.659100,D32054,0.731125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
529,530,7273,7273,Mahoney Park,42.342008,-71.051382,C32024,0.257203,C32029,0.322083,C32009,0.543137,C32053,0.560262,C32028,0.680634
530,531,2106,2106,Boundary I,42.269348,-71.137990,C32106,1.553058,E32013,1.887437,C32059,1.891039,C32104,1.892559,E32014,2.005260
531,1340,7275,7275,Sprague Pond,42.233786,-71.138006,C32104,2.669196,E32013,2.862156,E32014,3.928874,C32106,4.114102,Z32999,5.231180
532,2541,7276,7276,Codman Square,42.290418,-71.071082,C32038,0.060420,C32055,0.339595,E32009,0.520038,C32110,0.603584,C32066,0.716651


In [31]:
open_spaces_top_stations.to_csv('open_spaces_nearest_stations.csv', index=False)

## Food trucks

In [16]:
f = open(food_truck_source)
data = json.load(f)

In [17]:
food_trucks = pd.DataFrame(columns=['OBJECTID', 'Name', 'Lat', 'Long'])

In [18]:
data['features'][0]['properties']['ObjectId']

1

In [19]:
for i in range(len(data['features'])):
    oid = data['features'][i]['properties']['ObjectId']
    name = data['features'][i]['properties']['Truck']
    lat = data['features'][i]['properties']['y']
    long = data['features'][i]['properties']['x']
    food_trucks.loc[len(food_trucks.index)] = [oid, name, lat, long]

In [20]:
food_trucks

Unnamed: 0,OBJECTID,Name,Lat,Long
0,1,Indian Street Cravings,42.350534,-71.075124
1,2,Moyzilla,42.350534,-71.075124
2,3,Hungry Nomads,42.350534,-71.075124
3,4,Chicken and Rice Guys,42.350534,-71.075124
4,5,Tacos Calleteco,42.350534,-71.075124
5,6,Hungry Nomads,42.350534,-71.075124
6,7,Indian Street Cravings,42.360581,-71.057741
7,8,Chick-fil-A,42.377834,-71.051037
8,9,Chick-fil-A,42.377834,-71.051037
9,10,Indian Street Cravings,42.340478,-71.088974


In [21]:
meta_dists = []

for i in range(len(food_trucks)):
    truck = (food_trucks["Lat"][i], food_trucks["Long"][i])
    dists = []

    for j in range(len(stations)):
        stat = (stations['Latitude'][j], stations['Longitude'][j])
        distance = geopy.distance.geodesic(truck, stat).km
        dists += [(distance, stations['Number'][j])]

    meta_dists += [sorted(dists)[0:k]]

In [22]:
food_trucks_top_stations = pd.DataFrame(columns=['OBJECTID', 'Name', 'Lat', 'Long', 'Station 1', 'Station 1 Dist', 'Station 2', 'Station 2 Dist', 'Station 3', 'Station 3 Dist', 'Station 4', 'Station 4 Dist', 'Station 5', 'Station 5 Dist'])

In [23]:
for i in range(len(meta_dists)):
    oid = food_trucks['OBJECTID'][i]
    name = food_trucks['Name'][i]
    lat = food_trucks['Lat'][i]
    long = food_trucks['Long'][i]

    s1 = meta_dists[i][0][1]
    s1_d = meta_dists[i][0][0]
    s2 = meta_dists[i][1][1]
    s2_d = meta_dists[i][1][0]
    s3 = meta_dists[i][2][1]
    s3_d = meta_dists[i][2][0]
    s4 = meta_dists[i][3][1]
    s4_d = meta_dists[i][3][0] 
    s5 = meta_dists[i][4][1]
    s5_d = meta_dists[i][4][0]
    
    food_trucks_top_stations.loc[len(food_trucks_top_stations.index)] = [oid, name, lat, long, s1, s1_d, s2, s2_d, s3, s3_d, s4, s4_d, s5, s5_d]


In [24]:
food_trucks_top_stations.to_csv('food_trucks_top_stations.csv', index=False)

## Farmers markets (winter only)

In [25]:
data = pd.read_csv(farmers_market_source)
data

Unnamed: 0,Market Name,Type,Address,Zip code,Latitude,Longitude,Neighborhood,Market days,Market Hours,First day of Market,Last day of Market,WIC and Senior Farmers Market Nutrition Program Vouchers,Accepts City of Boston Coupons
0,Brighton Farmers Market,In-Person,"640 Washington St, Brighton MA 02135",2135,42.347872,-71.153686,Brighton,"Every other Saturday: 01/27, 02/10, 02/24, 03/...",11am-2pm,01/27/24,03/30/24,WIC,
1,Dorchester Winter Farmers Market,In-Person,"6 Norfolk St., Dorchester MA, 02124",2124,42.28996,-71.07184,Dorchester,Saturday,11am-2pm,01/27/23,03/30/24,"WIC, SFMNPV",
2,East Boston Farmers Market,In-Person,"250 Sumner St, East Boston MA 02128",2128,42.375231,-71.040673,East Boston,Thursday,2pm-6pm,11/16/23,06/20/24,"WIC, SFMNPV",
3,WeGrowMicrogreens,In-Person,"21 Norton St, Hyde Park MA 02136",2136,42.242854,-71.136693,Hyde Park,Friday,1pm-4:30pm,10/27/23,03/08/24,"WIC, SFMNPV",
4,JP Centre St Farmers Market,In-Person,677 Centre St. Bank of America Parking Lot,2130,42.312173,-71.11437,Jamaica Plain,Saturday,12pm-3pm,11/11/24,03/30/24,"WIC, SFMNPV",
5,Roslindale Farmers Market,In-Person,"Adams Park, Roslindale 02131",2131,42.286545,-71.128441,Roslindale,Sunday,11am-2pm,01/7/24,03/31/24,Neither,
6,Dudley Town Common Farmers Market,In-Person,"11 Brook Ave, Roxbury MA 02119",2119,42.325115,-71.074876,Roxbury,Thursday,2:30pm-6:00pm,11/3/23,02/01/24,"WIC, SFMNPV",


In [26]:
meta_dists = []

for i in range(len(data)):
    market = (data["Latitude"][i], data["Longitude"][i])
    dists = []

    for j in range(len(stations)):
        stat = (stations['Latitude'][j], stations['Longitude'][j])
        distance = geopy.distance.geodesic(market, stat).km
        dists += [(distance, stations['Number'][j])]

    meta_dists += [sorted(dists)[0:k]]

In [27]:
farmers_markets_top_stations = pd.DataFrame(columns=['id', 'Name', 'Lat', 'Long', 'Station 1', 'Station 1 Dist', 'Station 2', 'Station 2 Dist', 'Station 3', 'Station 3 Dist', 'Station 4', 'Station 4 Dist', 'Station 5', 'Station 5 Dist'])

In [28]:
for i in range(len(meta_dists)):
    name = data['Market Name'][i]
    lat = data['Latitude'][i]
    long = data['Longitude'][i]

    s1 = meta_dists[i][0][1]
    s1_d = meta_dists[i][0][0]
    s2 = meta_dists[i][1][1]
    s2_d = meta_dists[i][1][0]
    s3 = meta_dists[i][2][1]
    s3_d = meta_dists[i][2][0]
    s4 = meta_dists[i][3][1]
    s4_d = meta_dists[i][3][0] 
    s5 = meta_dists[i][4][1]
    s5_d = meta_dists[i][4][0]
    
    farmers_markets_top_stations.loc[len(farmers_markets_top_stations.index)] = [i, name, lat, long, s1, s1_d, s2, s2_d, s3, s3_d, s4, s4_d, s5, s5_d]


In [29]:
farmers_markets_top_stations

Unnamed: 0,id,Name,Lat,Long,Station 1,Station 1 Dist,Station 2,Station 2 Dist,Station 3,Station 3 Dist,Station 4,Station 4 Dist,Station 5,Station 5 Dist
0,0,Brighton Farmers Market,42.347872,-71.153686,D32028,0.305716,D32029,0.559374,D32054,0.685311,D32033,0.862867,D32031,1.096804
1,1,Dorchester Winter Farmers Market,42.28996,-71.07184,C32038,0.041527,C32055,0.294878,C32110,0.566166,E32009,0.60012,C32066,0.788985
2,2,East Boston Farmers Market,42.375231,-71.040673,A32041,0.124877,A32036,0.215055,A32033,0.576942,A32031,0.640816,A32032,0.66339
3,3,WeGrowMicrogreens,42.242854,-71.136693,C32104,1.739842,E32013,1.944959,E32014,3.025722,C32106,3.152501,C32079,4.315839
4,4,JP Centre St Farmers Market,42.312173,-71.11437,E32001,0.35736,E32005,0.605974,D32052,0.68957,D32040,0.834842,D32041,0.98108
5,5,Roslindale Farmers Market,42.286545,-71.128441,C32046,0.032757,C32047,0.0815,C32059,0.683307,C32048,0.905221,C32079,1.283593
6,6,Dudley Town Common Farmers Market,42.325115,-71.074876,C32015,0.046263,C32022,0.70996,B32017,0.908234,B32026,1.001743,B32022,1.017302


In [30]:
farmers_markets_top_stations.to_csv('farmers_markets_top_stations.csv', index=False)