#### This jupyter notebook is made by Ziyu. I reran her notebook to update best sites for map-making.
#### You can go to Ziyu's branch for more details about this jupyter notebook

In [39]:
# import necessary libraries
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt

# import API-related libraries
import requests
import json
import time #will use this to pause execution for a few seconds

# suppress warnings
import warnings
warnings.filterwarnings("ignore")

# Load & Process Dataframes

In [40]:
# creating one data set with centers and buffers for each cluster
## read files
galv_centers = gpd.read_file('galveston_parcel_cluster_center_125.geojson')
galv_buffers = gpd.read_file('galveston_parcel_cluster_buffer_125.geojson')

# create a combined dataframe containing both centroids and buffers
galv_centers = galv_centers.rename(columns = {'geometry' : 'center'})
galv_clusters = galv_centers.assign(buffer = galv_buffers['geometry'])

galv_clusters['site_name'] = np.nan
galv_clusters['site_location'] = np.nan
galv_clusters['site_dist'] = np.nan
galv_clusters.head()

Unnamed: 0,lat,long,center,buffer,site_name,site_location,site_dist
0,29.30116,-94.819354,POINT (-94.81935 29.30116),"POLYGON ((-94.81735 29.30116, -94.81736 29.300...",,,
1,29.15064,-95.037411,POINT (-95.03741 29.15064),"POLYGON ((-95.03541 29.15064, -95.03542 29.150...",,,
2,29.21525,-94.928393,POINT (-94.92839 29.21525),"POLYGON ((-94.92639 29.21525, -94.92640 29.215...",,,
3,29.261063,-94.87239,POINT (-94.87239 29.26106),"POLYGON ((-94.87039 29.26106, -94.87040 29.260...",,,
4,29.304434,-94.777173,POINT (-94.77717 29.30443),"POLYGON ((-94.77517 29.30443, -94.77518 29.304...",,,


In [41]:
# Load the distribution points dataframe
dist = pd.read_csv("complete_galveston.csv")
dist.head(3)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Name,Address,Coordinates,Name + Address,second_best_match,second_best_score
0,0,0,"Moody Gardens Hotel, Spa and Convention Center","7 Hope Boulevard, Galveston","{'lat': 29.2734603, 'lng': -94.85032249999999}","Moody Gardens Hotel, Spa and Convention Center...","Rainforest Cafe5310 Seawall Boulevard, Galveston",86
1,1,1,Rainforest Cafe,"5310 Seawall Boulevard, Galveston","{'lat': 29.2707531, 'lng': -94.8202072}","Rainforest Cafe5310 Seawall Boulevard, Galveston","Moody Gardens Hotel, Spa and Convention Center...",86
2,2,2,Landry's Prime Seafood & Steaks,"5310 Seawall Boulevard, Galveston","{'lat': 29.2707352, 'lng': -94.8202317}",Landry's Prime Seafood & Steaks5310 Seawall Bo...,Landry's Prime Seafood & Steaks5310 Seawall Bl...,88


In [42]:
# Load the collection points dataframe
clct = pd.read_csv('complete_galveston2.csv')

# manipuate to make sure clct has the same strcture as dist
# for the ease of later combining
clct['Unnamed: 0.1'] = ['-']*len(clct)
first_col= clct.pop('Unnamed: 0.1')
clct.insert(0,'Unnamed: 0.1',first_col)

# look at dataframe
clct.head(3)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Name,Address,Coordinates,Name + Address,second_best_match,second_best_score
0,-,2,3018 Texas Clipper Rd Parking,"3018 Texas Clipper Road, Galveston","{'lat': 29.3157226, 'lng': -94.81716879999999}",3018 Texas Clipper Rd Parking3018 Texas Clippe...,"Parking utmb365-379 11th Street, Galveston",86
1,-,3,Parking lot,Galveston,"{'lat': 29.3353876, 'lng': -94.77805839999999}",Parking lotGalveston,"Parking utmb365-379 11th Street, Galveston",86
2,-,4,East Beach Free Parking,"Apffel Park Road, Galveston","{'lat': 29.325156, 'lng': -94.7386875}","East Beach Free ParkingApffel Park Road, Galve...","UTMB - Public Parking, Galveston Hospitals200 ...",86


In [43]:
# Combine distribution and collection points to get a complete dataframe of all businesses
complete = pd.concat([dist, clct], ignore_index=True)
complete.head(3)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Name,Address,Coordinates,Name + Address,second_best_match,second_best_score
0,0,0,"Moody Gardens Hotel, Spa and Convention Center","7 Hope Boulevard, Galveston","{'lat': 29.2734603, 'lng': -94.85032249999999}","Moody Gardens Hotel, Spa and Convention Center...","Rainforest Cafe5310 Seawall Boulevard, Galveston",86
1,1,1,Rainforest Cafe,"5310 Seawall Boulevard, Galveston","{'lat': 29.2707531, 'lng': -94.8202072}","Rainforest Cafe5310 Seawall Boulevard, Galveston","Moody Gardens Hotel, Spa and Convention Center...",86
2,2,2,Landry's Prime Seafood & Steaks,"5310 Seawall Boulevard, Galveston","{'lat': 29.2707352, 'lng': -94.8202317}",Landry's Prime Seafood & Steaks5310 Seawall Bo...,Landry's Prime Seafood & Steaks5310 Seawall Bl...,88


In [44]:
# There are several rows with empty coordinates
# We need to drop them before proceeding
complete.dropna(axis=0, how = 'any', inplace=True)

# after dropping, we need to reset the index
complete['order'] = [i for i in range(0,len(complete))]
complete = complete.set_index("order")

In [45]:
# view the shape of the cleaned dataframe
complete.shape

(2961, 8)

In [46]:
# Create two columns of coordinates in the complete dataframe for the ease of computation
lats = []
longs  = []

# loop through the dataframe
for i in range(0,len(complete)):
    
    # get the content in each cell
    coordinate = complete['Coordinates'][i]
    
    # transform string to a dictionary
    coordinate = eval(coordinate)
    
    # get latitudes (keys are different: some are "lat" and some are "latitude")
    # so we have to use "next(iter(coordinate))"
    lat = coordinate[next(iter(coordinate))]
    
    # get longtitudes
    long = list(coordinate.values())[1]
    
    # append to the bigger list
    lats.append(lat)
    longs.append(long)

# create columns for lat and long
complete['lats'] = lats
complete['longs'] = longs

In [47]:
# make it a geo df
complete = gpd.GeoDataFrame(complete, geometry=gpd.points_from_xy(complete.longs, complete.lats))
# only select columns we need for later use
complete = complete[['Name', 'Address', 'geometry', 'lats', 'longs']]
complete = complete.rename(columns = {'geometry':'Coordinates'})
complete.head(3)

Unnamed: 0_level_0,Name,Address,Coordinates,lats,longs
order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,"Moody Gardens Hotel, Spa and Convention Center","7 Hope Boulevard, Galveston",POINT (-94.85032 29.27346),29.27346,-94.850322
1,Rainforest Cafe,"5310 Seawall Boulevard, Galveston",POINT (-94.82021 29.27075),29.270753,-94.820207
2,Landry's Prime Seafood & Steaks,"5310 Seawall Boulevard, Galveston",POINT (-94.82023 29.27074),29.270735,-94.820232


# ---------------------------------------------
# Using Mapbox Matrix API

In [48]:
# My access token to mapbox matrix api
# replace it with yours if you are using my code later
token = "pk.eyJ1IjoiYXNobGV5eml5dXJlbiIsImEiOiJjbGU3a2JncGswNmJwM25wN3Rwa211bG04In0.pGiFstl_qOeFnehPWJhhww"

In [49]:
import time #will use this to pause execution for a few seconds

In [52]:
# finding closest business to each cluster centroid within the cluster boundaries
# this closest business will serve as the actual colletion site (because our centroids are merely theoratical)
# The code below searches within the boundaries for each cluster

for idx in range(len(galv_clusters)):  
    boundary = galv_clusters['buffer'][idx]
    #identify sites within the cluster boundaries
    within_boundary = complete[complete.Coordinates.within(boundary)].reset_index(drop = True)
    
    #rest of the code is the same as above basically
    nearby = within_boundary
    distances = []
    
    #added this if len(nearby) > 0 bc of the troublesome cases where filtering was inaccurate and nothing was nearby
    if len(nearby) > 0:
        # mapbox matrix api can take no more than 25 locations each time
        # we can search for the first 25 stores first
        while len(nearby) > 24:
            short_list = nearby.head(24)
            nearby = nearby[24:].reset_index(drop = True)
            
            coord = str(galv_clusters['long'][idx]) + "," + str(galv_clusters['lat'][idx])
            for i in range(len(short_list)):
                coord = coord + ";" + str(short_list['longs'][i]) +  "," + str(short_list['lats'][i])
                
            url = "https://api.mapbox.com/directions-matrix/v1/mapbox/walking/" +\
                    coord + "?sources=0&annotations=distance,duration"
            r = requests.get(url, params=({'access_token':token}))
            r =  json.loads(r.content)
            time.sleep(2) # no more than 60 requests per minute, so we need to put it to sleep

            for d in r['durations'][0][1:]:
                distances.append(d)
        # then we can search for the rest of stores (if there is less than 25 stores, the code will directly go to this part)    
        coord = str(galv_clusters['long'][idx]) + "," + str(galv_clusters['lat'][idx])
        for i in range(len(nearby)):
            coord = coord + ";" + str(nearby['longs'][i]) +  "," + str(nearby['lats'][i])
        url = "https://api.mapbox.com/directions-matrix/v1/mapbox/walking/" +\
                coord + "?sources=0&annotations=distance,duration"

        r = requests.get(url, params=({'access_token':token}))
        r =  json.loads(r.content)

        for d in r['durations'][0][1:]:
            distances.append(d)
        
        # attach the distances we found to the within-boundary businesses df
        within_boundary = within_boundary.assign(Dist = distances)
        # finding the closest one
        closest = within_boundary[within_boundary.Dist == within_boundary.Dist.min()]

        # attach to the bigger centroid df
        galv_clusters['site_name'][idx] = closest['Name'].values[0]
        galv_clusters['site_location'][idx] = closest['Coordinates'].values[0]
        galv_clusters['site_dist'][idx] = closest['Dist'].values[0]
        
    # if there is no nearby businesses found, the code will directly go the this part below
    else:
        galv_clusters['site_name'][idx] = np.nan
        galv_clusters['site_location'][idx] = np.nan
        galv_clusters['site_dist'][idx] = np.nan

In [53]:
# filter out those clusters where there is no nearby business
galv_clusters_nan = galv_clusters[galv_clusters['site_name'].isnull()]
galv_clusters_nan = galv_clusters_nan.reset_index()
# also filter out those clusters where we have already found the nearby business
galv_clusters_found = galv_clusters[galv_clusters['site_name'].notnull()]

In [54]:
# create a copy of the complete business df
complete_copy = complete

# finding closest dist or collection site for each cluster center where there were no sites within the boundary

for idx in range(len(galv_clusters_nan)):  
    print(idx)
    nearby = complete   
    
    lat = galv_clusters_nan.lat[idx]
    long = galv_clusters_nan.long[idx]
    nearby['euc_diff'] = ((nearby['lats']-lat)**2 + (nearby['longs']-long)**2)**0.5
    nearby = complete_copy.nsmallest(20, ['euc_diff'])
    nearby = nearby.reset_index()
    
    #create a list to store distances
    distances = []
    
    coord = str(galv_clusters_nan['long'][idx]) + "," + str(galv_clusters_nan['lat'][idx])
    for i in range(len(nearby)):
        coord = coord + ";" + str(nearby['longs'][i]) +  "," + str(nearby['lats'][i])
    url = "https://api.mapbox.com/directions-matrix/v1/mapbox/walking/" +\
            coord + "?sources=0&annotations=distance,duration"
            
    r = requests.get(url, params=({'access_token':token}))
    r =  json.loads(r.content)
        
    # add distances to list of distances
    for d in r['durations'][0][1:]:
        distances.append(d)
    
    # add distances as a column to our copied version of hilo_sites data
    nearby = nearby.assign(Dist = distances)
    # extract the row with the closest distance
    closest = nearby[nearby.Dist == nearby.Dist.min()]

    # add info from that closest row to our hilo_clusters data
    galv_clusters_nan['site_name'][idx] = closest['Name'].values[0]
    galv_clusters_nan['site_location'][idx] = closest['Coordinates'].values[0]
    galv_clusters_nan['site_dist'][idx] = closest['Dist'].values[0]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37


In [55]:
# save as a geojson file for the ease of mapping
galv_clusters = galv_clusters[['lat', 'long', 'site_name', 'site_location', 'site_dist']]
galv_clusters = gpd.GeoDataFrame(galv_clusters, geometry = galv_clusters.site_location)
galv_clusters = galv_clusters.iloc[:,[0,1,2,4,5]]

In [56]:
galv_clusters = galv_clusters[galv_clusters["geometry"]!=None]
galv_clusters.head()

Unnamed: 0,lat,long,site_name,site_dist,geometry
2,29.21525,-94.928393,Ibis Mechanical,113.8,POINT (-94.92950 29.21631)
3,29.261063,-94.87239,Evia,138.5,POINT (-94.87065 29.26108)
4,29.304434,-94.777173,Palm House,97.0,POINT (-94.77783 29.30542)
5,29.114196,-95.088045,Laguna San Luis,198.4,POINT (-95.08820 29.11323)
6,29.275006,-94.843709,Gulf Coast Electric,127.9,POINT (-94.84343 29.27590)


In [59]:
galv_clusters_nan = galv_clusters_nan.iloc[:, [1,2,5,6,7]]
galv_clusters_nan

Unnamed: 0,lat,long,site_name,site_location,site_dist
0,29.30116,-94.819354,Savage Sulfur Services Ltd,POINT (-94.8193817 29.3036899),50.8
1,29.15064,-95.037411,Parijatha (Coral Jasmine),POINT (-95.0351914 29.1435052),678.8
2,29.16933,-95.001003,Next Residence LLC,POINT (-95.00166899999999 29.17157689999999),237.0
3,29.227835,-94.921667,The Kislyuk Retreat.,POINT (-94.9174117 29.22441),760.8
4,29.235289,-94.889919,Beachside Village Information Center-Beachside...,POINT (-94.8914415 29.2330479),276.5
5,29.332274,-94.736772,Galveston Island Horse and Pony Rides,POINT (-94.7344875 29.3336225),53.5
6,29.256168,-94.888279,Sweetwater Cove,POINT (-94.897785 29.262011),703.9
7,29.268351,-94.843049,Island Bay Resort Apartment Homes,POINT (-94.8446427 29.2706019),238.9
8,29.233335,-94.911624,3rd Coast Company,POINT (-94.9049271 29.2366563),656.6
9,29.2845,-94.875346,Hedgepeth Photography,POINT (-94.8747786 29.2817133),277.7


In [60]:
galv_clusters_nan = gpd.GeoDataFrame(galv_clusters_nan, geometry = galv_clusters_nan.site_location)
galv_clusters_nan

Unnamed: 0,lat,long,site_name,site_location,site_dist,geometry
0,29.30116,-94.819354,Savage Sulfur Services Ltd,POINT (-94.8193817 29.3036899),50.8,POINT (-94.81938 29.30369)
1,29.15064,-95.037411,Parijatha (Coral Jasmine),POINT (-95.0351914 29.1435052),678.8,POINT (-95.03519 29.14351)
2,29.16933,-95.001003,Next Residence LLC,POINT (-95.00166899999999 29.17157689999999),237.0,POINT (-95.00167 29.17158)
3,29.227835,-94.921667,The Kislyuk Retreat.,POINT (-94.9174117 29.22441),760.8,POINT (-94.91741 29.22441)
4,29.235289,-94.889919,Beachside Village Information Center-Beachside...,POINT (-94.8914415 29.2330479),276.5,POINT (-94.89144 29.23305)
5,29.332274,-94.736772,Galveston Island Horse and Pony Rides,POINT (-94.7344875 29.3336225),53.5,POINT (-94.73449 29.33362)
6,29.256168,-94.888279,Sweetwater Cove,POINT (-94.897785 29.262011),703.9,POINT (-94.89778 29.26201)
7,29.268351,-94.843049,Island Bay Resort Apartment Homes,POINT (-94.8446427 29.2706019),238.9,POINT (-94.84464 29.27060)
8,29.233335,-94.911624,3rd Coast Company,POINT (-94.9049271 29.2366563),656.6,POINT (-94.90493 29.23666)
9,29.2845,-94.875346,Hedgepeth Photography,POINT (-94.8747786 29.2817133),277.7,POINT (-94.87478 29.28171)


In [61]:
galv_clusters_nan = galv_clusters_nan.iloc[:,[0,1,2,4,5]]
galv_clusters_nan.head(5)

Unnamed: 0,lat,long,site_name,site_dist,geometry
0,29.30116,-94.819354,Savage Sulfur Services Ltd,50.8,POINT (-94.81938 29.30369)
1,29.15064,-95.037411,Parijatha (Coral Jasmine),678.8,POINT (-95.03519 29.14351)
2,29.16933,-95.001003,Next Residence LLC,237.0,POINT (-95.00167 29.17158)
3,29.227835,-94.921667,The Kislyuk Retreat.,760.8,POINT (-94.91741 29.22441)
4,29.235289,-94.889919,Beachside Village Information Center-Beachside...,276.5,POINT (-94.89144 29.23305)


In [62]:
galv_clusters_complete = pd.concat([galv_clusters_nan,galv_clusters]).reset_index()
galv_clusters_complete = gpd.GeoDataFrame(galv_clusters_complete, geometry = galv_clusters_complete.geometry)
galv_clusters_complete

Unnamed: 0,index,lat,long,site_name,site_dist,geometry
0,0,29.301160,-94.819354,Savage Sulfur Services Ltd,50.8,POINT (-94.81938 29.30369)
1,1,29.150640,-95.037411,Parijatha (Coral Jasmine),678.8,POINT (-95.03519 29.14351)
2,2,29.169330,-95.001003,Next Residence LLC,237.0,POINT (-95.00167 29.17158)
3,3,29.227835,-94.921667,The Kislyuk Retreat.,760.8,POINT (-94.91741 29.22441)
4,4,29.235289,-94.889919,Beachside Village Information Center-Beachside...,276.5,POINT (-94.89144 29.23305)
...,...,...,...,...,...,...
120,117,29.284563,-94.828017,Maid 2 Clean Galveston,32.3,POINT (-94.82766 29.28456)
121,118,29.286174,-94.811209,Maricela Beauty Salon,107.5,POINT (-94.80977 29.28665)
122,119,29.291136,-94.861522,J & J Telecommunications Inc,26.7,POINT (-94.86177 29.29091)
123,123,29.287899,-94.839207,Rodeway Inn,102.7,POINT (-94.83857 29.28789)


In [63]:
galv_clusters_complete = galv_clusters_complete.iloc[:,[1,2,3,4,5]]

In [64]:
galv_clusters_complete.to_file('galv_cluster_collection_sites_125.geojson', driver = 'GeoJSON')

In [65]:
buffer = galv_clusters_complete.buffer(0.002)
galv_copy = galv_clusters_complete[:]
galv_copy["geometry"] = buffer
galv_copy.to_file("galv_cluster_collection_sites_buffer_125.geojson",driver="GeoJSON")
galv_copy.head(5)

Unnamed: 0,lat,long,site_name,site_dist,geometry
0,29.30116,-94.819354,Savage Sulfur Services Ltd,50.8,"POLYGON ((-94.81738 29.30369, -94.81739 29.303..."
1,29.15064,-95.037411,Parijatha (Coral Jasmine),678.8,"POLYGON ((-95.03319 29.14351, -95.03320 29.143..."
2,29.16933,-95.001003,Next Residence LLC,237.0,"POLYGON ((-94.99967 29.17158, -94.99968 29.171..."
3,29.227835,-94.921667,The Kislyuk Retreat.,760.8,"POLYGON ((-94.91541 29.22441, -94.91542 29.224..."
4,29.235289,-94.889919,Beachside Village Information Center-Beachside...,276.5,"POLYGON ((-94.88944 29.23305, -94.88945 29.232..."
