In [2]:
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import googlemaps

%matplotlib inline

# Load Dataset

In [3]:
address = pd.read_csv('Employee_Addresses.csv')
address.head()

Unnamed: 0,address,employee_id
0,"98 Edinburgh St, San Francisco, CA 94112, USA",206
1,"237 Accacia St, Daly City, CA 94014, USA",2081
2,"1835 Folsom St, San Francisco, CA 94103, USA",178
3,"170 Cambridge St, San Francisco, CA 94134, USA",50
4,"16 Roanoke St, San Francisco, CA 94131, USA",1863


In [4]:
address.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2191 entries, 0 to 2190
Data columns (total 2 columns):
address        2191 non-null object
employee_id    2191 non-null int64
dtypes: int64(1), object(1)
memory usage: 34.3+ KB


In [5]:
stops = pd.read_csv('Potentail_Bust_Stops.csv')
stops.head()

Unnamed: 0,Street_One,Street_Two
0,MISSION ST,ITALY AVE
1,MISSION ST,NEW MONTGOMERY ST
2,MISSION ST,01ST ST
3,MISSION ST,20TH ST
4,MISSION ST,FREMONT ST


In [6]:
stops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119 entries, 0 to 118
Data columns (total 2 columns):
Street_One    119 non-null object
Street_Two    119 non-null object
dtypes: object(2)
memory usage: 1.9+ KB


# Data Processing

In [7]:
# get unique address and stop information
unique_address = list(set(address['address']))

def merge_stop(x):
    return x[0] + ' & ' + x[1] + ' CA'

unique_stop = list(set(stops.apply(merge_stop, axis=1)))

In [8]:
unique_address[:10]

['21 Lyell St, San Francisco, CA 94112, USA',
 '128 Lippard Ave, San Francisco, CA 94131, USA',
 '327 Arleta Ave, San Francisco, CA 94134, USA',
 '551 Athens St, San Francisco, CA 94112, USA',
 '3120 Jackson St, San Francisco, CA 94115, USA',
 '2760 Vallejo St, San Francisco, CA 94123, USA',
 '1851 15th St, San Francisco, CA 94103, USA',
 '254 Oriente St, Daly City, CA 94014, USA',
 '98 Erie St, San Francisco, CA 94103, USA',
 '3172 Sacramento St, San Francisco, CA 94115, USA']

In [9]:
unique_stop[:10]

['MISSION ST & PARK ST CA',
 'MISSION ST & ITALY AVE CA',
 'MISSION ST & 26TH ST CA',
 'MISSION ST & SPEAR ST CA',
 'MISSION ST & 25TH ST CA',
 'MISSION ST & RUTH ST CA',
 'MISSION ST & AVALON AVE CA',
 'MISSION ST & OTTAWA AVE CA',
 'MISSION ST & THERESA ST CA',
 'MISSION ST & 17TH ST CA']

# Distance Measure

Here, I am using the Google Distance Matrix API for origin and destination walking distance measure. The GitHub link for google-maps-services-python can be found at: [https://github.com/googlemaps/google-maps-services-python](https://github.com/googlemaps/google-maps-services-python).

Since this is a charged service, I only show one query for demo purpose. After query the distance, I believe the next step is pretty clear.

In [11]:
# start google API with your KEY (you can register at Google.com)
gmaps = googlemaps.Client(key='AIzaSyDu9wM71_vOiZBT48dDt-zy2Y8ftRpe_Qg')

In [12]:
# defind origin and destination
origin = unique_address[0]
destination = unique_stop[0]

print('Origin:\t\t', origin)
print('Destination:\t', destination)

Origin:		 21 Lyell St, San Francisco, CA 94112, USA
Destination:	 MISSION ST & PARK ST CA


In [13]:
# query walking distance between origins and destinations
result = gmaps.distance_matrix(origins=origin, destinations=destination, 
                               mode='walking')
result

{'destination_addresses': ['Mission St, California, USA'],
 'origin_addresses': ['21 Lyell St, San Francisco, CA 94112, USA'],
 'rows': [{'elements': [{'distance': {'text': '1.1 km', 'value': 1129},
     'duration': {'text': '15 mins', 'value': 881},
     'status': 'OK'}]}],
 'status': 'OK'}

# Reference Solution

There is another solution online, you can refer to [Link](https://github.com/stasi009/TakeHomeDataChallenges/blob/master/14.ShuttleStops/shuttle_stops.py).