In [18]:
import pandas as pd
import json

# Open Spaces

In [19]:
with open('data/locations/raw/Open_Space.geojson') as f:
    data = json.load(f)

In [20]:
open_spaces = pd.DataFrame(columns=['SPACEID', 'Name', 'Type', 'Acres', 'Avg. Lat', 'Avg. Long'])

In [21]:
# getting average coordinate of each open space using max/min long/lat
def average_coord (i):
    num_coords = len(data['features'][i]['geometry']["coordinates"][0][0])
    min_lat = 1000
    max_lat = -1000
    min_long = 1000
    max_long = -1000
    for j in range(0, num_coords):
        lat = data['features'][i]['geometry']["coordinates"][0][0][j][0]
        long = data['features'][i]['geometry']["coordinates"][0][0][j][1]
        if lat < min_lat: 
            min_lat = lat
        if lat > max_lat:
            max_lat = lat
        if long < min_long: 
            min_long = long
        if long > max_long:
            max_long = long
    return [(min_lat+max_lat)/2, (min_long+max_long)/2]

In [22]:
for i in range(0, len(data['features'])):
    sid = data['features'][i]['properties']['OBJECTID']
    name = data['features'][i]['properties']['SITE_NAME']
    space_type = data['features'][i]['properties']['TypeLong']
    if space_type == 'Parks, Playgrounds & Athletic Fields':
        if 'Playground' in name:
            space_type = 'Playgrounds'
        elif 'Field' in name:
            space_type = 'Athletic Fields'
        else:
            space_type = 'Parks'
    acres = data['features'][i]['properties']['ACRES']
    ll = average_coord(i)
    open_spaces.loc[len(open_spaces.index)] = [sid, name, space_type, acres, ll[1], ll[0]]  


In [23]:
open_spaces

Unnamed: 0,SPACEID,Name,Type,Acres,Avg. Lat,Avg. Long
0,1,Wilson Park,"Malls, Squares & Plazas",0.097184,42.340991,-71.145929
1,2,James H. Roberts Playground,Playgrounds,1.004136,42.359526,-71.125332
2,3,Smith Playground,Playgrounds,15.025247,42.365072,-71.131046
3,4,Fern Square,"Malls, Squares & Plazas",0.044509,42.359534,-71.133291
4,5,Ringer Playground,Playgrounds,10.263093,42.350456,-71.138129
...,...,...,...,...,...,...
529,530,Mahoney Park,Parks,0.381102,42.342008,-71.051382
530,531,Boundary I,Urban Wilds,6.925776,42.269348,-71.137990
531,1340,Sprague Pond,Urban Wilds,1.196202,42.233786,-71.138006
532,2541,Codman Square,"Malls, Squares & Plazas",0.350710,42.290418,-71.071082


In [24]:
# fixing duplicate names
counts = open_spaces['Name'].value_counts()
duplicate_ids = list(open_spaces.loc[open_spaces['Name'].isin(list(counts.loc[counts > 1].index))].index)
counter = 1
for n in range(0, len(duplicate_ids) - 1):
    open_spaces.loc[duplicate_ids[n], 'Name'] = f"{open_spaces.loc[duplicate_ids[n], 'Name']} {counter}"
    if open_spaces.loc[duplicate_ids[n], 'Name'][:-2] == open_spaces.loc[duplicate_ids[n+1], 'Name']:
        counter += 1
    else:
        counter = 1
open_spaces.loc[duplicate_ids[-1], 'Name'] = f"{open_spaces.loc[duplicate_ids[-1], 'Name']} {counter}"
open_spaces.loc[duplicate_ids]

Unnamed: 0,SPACEID,Name,Type,Acres,Avg. Lat,Avg. Long
382,383,Lincoln Square 1,"Malls, Squares & Plazas",0.054628,42.351612,-71.067861
389,390,Lincoln Square 2,"Malls, Squares & Plazas",0.216593,42.334658,-71.033808
393,394,Rink Grounds 1,Parks,0.734344,42.374113,-71.066424
516,517,Rink Grounds 2,Parks,2.525996,42.288451,-71.04482


In [25]:
open_spaces.to_csv('data/locations/open_spaces.csv', index=False)

# Bluebike Stations

In [26]:
bluebike_stations = pd.read_csv('data/locations/raw/bluebike_stations.csv').rename(columns={'Number': 'ID'})
bluebike_stations.to_csv('data/locations/bluebike_stations.csv', index=False)

# Restaurants

In [27]:
restaurants = pd.read_csv('data/locations/raw/restaurants.csv').rename(columns={'businessname': 'Name', 'latitude': 'Latitude', 'longitude': 'Longitude'})
restaurants['ID'] = restaurants.index
restaurants.to_csv('data/locations/restaurants.csv', index=False)

# Food Trucks

In [28]:
with open('data/locations/raw/food_truck_schedule.geojson') as f:
    data = json.load(f)

In [29]:
food_trucks = pd.DataFrame(columns=['ID', 'Name', 'Latitude', 'Longitude'])

In [30]:
for i in range(len(data['features'])):
    oid = data['features'][i]['properties']['ObjectId']
    name = data['features'][i]['properties']['Truck']
    lat = data['features'][i]['properties']['y']
    long = data['features'][i]['properties']['x']
    food_trucks.loc[len(food_trucks.index)] = [oid, name, lat, long]

In [31]:
food_trucks

Unnamed: 0,ID,Name,Latitude,Longitude
0,1,Indian Street Cravings,42.350534,-71.075124
1,2,Moyzilla,42.350534,-71.075124
2,3,Hungry Nomads,42.350534,-71.075124
3,4,Chicken and Rice Guys,42.350534,-71.075124
4,5,Tacos Calleteco,42.350534,-71.075124
5,6,Hungry Nomads,42.350534,-71.075124
6,7,Indian Street Cravings,42.360581,-71.057741
7,8,Chick-fil-A,42.377834,-71.051037
8,9,Chick-fil-A,42.377834,-71.051037
9,10,Indian Street Cravings,42.340478,-71.088974


In [32]:
food_trucks.to_csv('data/locations/food_trucks.csv', index=False)

# Winter Farmers Markets

In [33]:
farmers_markets = pd.read_csv('data/locations/raw/winter_farmers_markets.csv').rename(columns={'Market Name': 'Name'})
farmers_markets['ID'] = farmers_markets.index
farmers_markets.to_csv('data/locations/winter_farmers_markets.csv', index=False)

# Murals

In [35]:
murals = pd.read_csv('data/locations/raw/murals.csv').rename(columns={'Title': 'Name'})
murals[['Latitude', 'Longitude']] = murals.apply(lambda x: x['Coordinates'].split(', '), axis=1, result_type='expand')
murals['ID'] = murals.index
murals.to_csv('data/locations/murals.csv', index=False)