In [None]:
!pip install geopandas haversine requests 

In [None]:
import itertools
import io
import zipfile

import geopandas
import haversine
import requests
import shapely

from matplotlib import pyplot as plt


In [None]:
%matplotlib inline
plt.rcParams["figure.figsize"] = (10,10)

In [None]:
cota_data_url = "http://cota1974.maps.arcgis.com/sharing/rest/content/items/43e38988fab145e4934d6c6d7b3c2385/data"

In [None]:
cota_data = requests.get(cota_data_url)
cota_data_stream = io.BytesIO(cota_data.content)
cota_data_zip = zipfile.ZipFile(cota_data_stream)

In [None]:
cota_data_zip.namelist()

In [None]:
cota_data_zip.extractall()

In [None]:
cota_stops = geopandas.read_file('Stops_201709.shp')

In [None]:
cota_stops.head(5)

In [None]:
cota_stops.iloc[0]

In [None]:
def point_lon_lat(lon, lat):
    return shapely.geometry.point.Point(lon/1000000, lat/1000000)


In [None]:
cota_stops.geometry = cota_stops.apply(lambda row: point_lon_lat(row.Lon, row.Lat), axis=1)

In [None]:
cota_stops.plot(color='b')

In [None]:
addresses = geopandas.read_file("/home/nbuser/43222.geojson")

In [None]:
addresses.head(5)

In [None]:
addresses.iloc[0]

In [None]:
len(addresses)

In [None]:
fig, ax = plt.subplots(1, 1)
cota_stops.plot(ax=ax, color='b')
addresses.plot(ax=ax, color='r')


In [None]:
cota_stops_filtered = cota_stops[(cota_stops.geometry.x >= -83.1) &
                                 (cota_stops.geometry.x <= -83.0) &
                                 (cota_stops.geometry.y >= 39.25) &
                                 (cota_stops.geometry.y <= 40)]

In [None]:
len(cota_stops_filtered)

In [None]:
fig, ax = plt.subplots(1, 1)
addresses.plot(ax=ax, color='r')
cota_stops_filtered.plot(ax=ax, color='b')


In [None]:
a_stop = cota_stops_filtered.iloc[0].geometry
an_address = addresses.iloc[0].geometry
print(haversine.haversine((a_stop.y, a_stop.x), (an_address.y, an_address.x), miles=True))

In [None]:
def distance(point_1, point_2):
    lat_lon_1 = (point_1.y, point_1.x)
    lat_lon_2 = (point_2.y, point_2.x)
    return haversine.haversine(lat_lon_1, lat_lon_2, miles=True)

In [None]:
number_of_addresses = len(addresses)
greatest_minimum_distance = 0
for index, address_point in enumerate(addresses.geometry):
    
    # display progress
    if index % 100 == 0:
        print("Percent complete: ", index/number_of_addresses * 100)
    
    minimum_stop_distance = 1000  # unreasonable distance, at least one distance has to be less than this
    for stop_point in cota_stops_filtered.geometry:
        stop_distance = distance(address_point, stop_point)
        if stop_distance < minimum_stop_distance:
            minimum_stop_distance = stop_distance
    
    if minimum_stop_distance > greatest_minimum_distance:
        greatest_minimum_distance = minimum_stop_distance
print("Greatest miniumn distance:", greatest_minimum_distance)

## Exercise
Calculate the mean distance to the nearest stop for all addresses in 43222.

**Hint**: Most of the code will be the same.  Here's an updated form of the code above with some new variable names.

```python
number_of_addresses = len(addresses)
total_minimum_distance = 0
for index, address_point in enumerate(addresses_.geometry):
    
    # display progress
    if index % 100 == 0:
        print("Percent complete: ", index/number_of_addresses * 100)
    
    minimum_stop_distance = 1000
    for stop_point in cota_stops_filtered.geometry:
        stop_distance = distance(address_point, stop_point)
        if stop_distance < minimum_stop_distance:
            minimum_stop_distance = stop_distance
    total_minimum_distance = total_minimum_distance + minimum_stop_distance

XXXX
print("Mean miniumn distance:", mean_minimum_distance)
```

Replace `XXXX` with the code needed to calcuate `mean_minimum_distance` from `total_minimum_distance` and `number_of_addresses`.

Recall that the mean of a set of values is the sum of the values divided by the number of values.