## Core Workflow: Geocode given addresses
Purpose: Geocode the training data street addresses using Google Directions API to get an encoded polyline string for the street. The polyline string is then decoded to get a list of lat/lon pairs which are then connected to get the shape of the street. 

<br>
*Date: 11/9/2019*

### Import statements

In [None]:
import warnings
warnings.filterwarnings('ignore')
#
import os
import sys
import json
import itertools
import pickle
from pprint import pprint
#
import numpy as np
import shapely
from shapely.geometry import shape, Point
from shapely.geometry import mapping, Polygon
import geojson
import fiona
import gdal
import h5py
get_ipython().magic(u'matplotlib inline')
import matplotlib.pyplot as plt
import sklearn
from sklearn.preprocessing import StandardScaler 
import ogr, gdal
from glob import glob

import requests
import logging
import time
import json
import pandas as pd
from shapely.geometry import LineString, mapping
import pandas as pd

import collections

from glob import glob

import descarteslabs as dl
from descarteslabs.vectors import FeatureCollection

print (sys.path)



### Helper functions

In [None]:
# function to decode json from the API
# adapted from https://github.com/geodav-tech/decode-google-maps-polyline/blob/master/polyline_decoder.py

def decode_polyline(polyline_str):
    '''Pass a Google Maps encoded polyline string; returns list of lat/lon pairs'''
    index, lat, lng = 0, 0, 0
    coordinates = []
    changes = {'latitude': 0, 'longitude': 0}

    # Coordinates have variable length when encoded, so just keep
    # track of whether we've hit the end of the string. In each
    # while loop iteration, a single coordinate is decoded.
    while index < len(polyline_str):
        # Gather lat/lon changes, store them in a dictionary to apply them later
        for unit in ['latitude', 'longitude']: 
            shift, result = 0, 0

            while True:
                byte = ord(polyline_str[index]) - 63
                index+=1
                result |= (byte & 0x1f) << shift
                shift += 5
                if not byte >= 0x20:
                    break

            if (result & 1):
                changes[unit] = ~(result >> 1)
            else:
                changes[unit] = (result >> 1)

        lat += changes['latitude']
        lng += changes['longitude']

        coordinates.append((lng / 100000.0, lat / 100000.0))

    return coordinates

def get_linestring(geocoded_json):
    with open(geocoded_json, 'r') as data_file:    
        data = json.load(data_file, encoding='ISO-8859-1')

    the_points = []
    for step in data['routes'][0]['legs'][0]['steps']:
        the_points += decode_polyline(step['polyline']['points'])

    return LineString(the_points)


### Set key variables

In [None]:
# Set your input file here
input_filename = "street_addresses.csv"

# Read the data to a Pandas Dataframe
data = pd.read_csv(input_filename, encoding='utf8')
# Formatting the input address column so that it can be interpreted easily by the geocoding API
data['Address'] = 'origin=' + data['NAME'] +'+'+ data['FROM'] +'+ Los Angeles + CA'+ '&destination='  + data['NAME'] +'+'\
                 + data['TO'] + '+ Los Angeles + CA'

# Specify the column name in your input data that contains addresses here
address_column_name = "Address"


### Supply the street addresses and geocode them using Google Direction API

In [None]:
streets= data[['Address','NAME','FROM','TO','start_date','end_date','expected_albedo']].apply(tuple, axis=1)
streets

In [None]:
st_adds = []
st_names = []
st_frms = []
st_tos=[]
start_dates = []
end_dates = []
albedos = []
geocoded_jsons = []

ids = -1
for Y, X in streets.iteritems():
    
    st_add = str(X[0])
    st_name = str(X[1])
    st_frm=str(X[2])
    st_to = str(X[3])
    start_date = str(X[4])
    end_date = str(X[5])
    albedo = X[6]
    
    # Set up your Geocoding url
    geocode_url = 'https://maps.googleapis.com/maps/api/directions/json?'+st_add+'&mode=driving&key=putyourAPIkeyhere'
    results = requests.get(geocode_url)
    results = results.json()
    
    if results['status'] == 'OK':
        print('found result')
        answer = results['geocoded_waypoints'][0]
        part_match = {"partial_match" : answer.get('partial_match')}

        for key,val in part_match.items():
            if val == None:
                ids = ids + 1
                geocoded_json = '/data/phase_i/pavement/geocoded_jsons/la/'+st_name+'_id_'+str(ids)+'.json'
                with open(geocoded_json, "w") as jsonFile:
                    json.dump(results, jsonFile, indent=4, sort_keys=True)

                st_adds.append(st_add)
                st_names.append(st_name)
                st_frms.append(st_frm)
                st_tos.append(st_to)
                start_dates.append(start_date)
                end_dates.append(end_date)
                albedos.append(albedo)
                geocoded_jsons.append(geocoded_json)
                print('successfully geocoded')
            
            else:
                print('Partial match')

# store the results to a pandas library.
df_path = pd.DataFrame({'geocoded_jsons': geocoded_jsons,'street_address':st_adds,  
                        'street_names':st_names, 'street_from': st_frms, 
                        'street_to': st_tos,'start_date': start_dates, 'end_date': end_dates, 'albedos': albedos})

# Write the full results to csv using the pandas library. 
df_path.to_csv('street_addresses_geocoded.csv',encoding='utf8')

In [None]:
# Set your input file here
input_filename = 'street_addresses_geocoded.csv'

# Read the data to a Pandas Dataframe
data = pd.read_csv(input_filename, encoding='utf8')

geocoded_streets= data[['geocoded_jsons','street_address','street_names','street_from','street_to','start_date','end_date', 'albedos']].apply(tuple, axis=1)
geocoded_streets

In [None]:
st_adds = []
st_names = []
st_frms = []
st_tos=[]
start_dates = []
end_dates = []
albedos = []
unprj_shps = []
geocoded_jsons = []


ids = -1
for Y, X in geocoded_streets.iteritems():   
    
    geocoded_json = str(X[0])
    st_add = str(X[1])
    st_name = str(X[2])
    inp = st_name.split(' ')
    inp_len = (len(inp))
    if inp_len > 1:
        st_name = inp[0]
        for i in range(1,inp_len):
            st_name = st_name+'_'+inp[i]
                       
#     print(st_name)
    st_frm=str(X[3])
    st_to = str(X[4])
    start_date = str(X[5])
    end_date = str(X[6])
    albedo = X[7]
    
#     print(geocoded_json)
    
    from fiona.crs import from_epsg
    from_epsg(4326)
    {'init': 'epsg:4326', 'no_defs': True}
    crs=from_epsg(4326)
    
    try:
        ids = ids + 1 
        schema = {'geometry': 'LineString', 'properties': {'route': 'str'}}
        unprj_shp = '/data/phase_i/pavement/la_city/unprj_shp/'+st_name+'_id_'+str(ids)+'.shp'
        with fiona.open(unprj_shp, 'w', driver='ESRI Shapefile', crs=crs, schema=schema) as layer:                        
            layer.write({'geometry': mapping(get_linestring(geocoded_json)),
                            'properties': {'route': 'result'}
                            })
    except:
        print('LineStrings must have at least 2 coordinate tuples')
        pass


    st_adds.append(st_add)
    st_names.append(st_name)
    st_frms.append(st_frm)
    st_tos.append(st_to)
    start_dates.append(start_date)
    end_dates.append(end_date)
    albedos.append(albedo)
    unprj_shps.append(unprj_shp)
    geocoded_jsons.append(geocoded_json)


# store the results to a pandas library.
df_path2 = pd.DataFrame({'geocoded_jsons': geocoded_jsons,'unprj_shps': unprj_shps,
                        'street_address':st_adds, 'street_names':st_names, 'street_from': st_frms, 
                        'street_to': st_tos,'start_date': start_dates, 'end_date': end_dates, 'albedos': albedos})

# Write the full results to csv using the pandas library. 
df_path2.to_csv('street_addresses_geocoded_unprj.csv',encoding='utf8')

### The geocoded polyline needs to be buffered to get a polygon for the streets. Convert the initial line shapefile from 4326 to epsg 32611 so that their units are in meter

In [None]:
# Set your input file here
input_filename = 'street_addresses_geocoded_unprj.csv'

# Read the data to a Pandas Dataframe
data = pd.read_csv(input_filename, encoding='utf8')

geocoded_streets= data[['geocoded_jsons','unprj_shps','street_address','street_names','street_from','street_to','start_date','end_date', 'albedos']].apply(tuple, axis=1)
geocoded_streets

In [None]:
st_adds = []
st_names = []
st_frms = []
st_tos=[]
start_dates = []
end_dates = []
albedos = []
prj_shps = []
geocoded_jsons = []

ids = -1
for Y, X in geocoded_streets.iteritems():
    ids = ids + 1    
    
    geocoded_json = str(X[0])
    unprj_shp = str(X[1])
    st_add = str(X[2])
    st_name = str(X[3])
    st_frm=str(X[4])
    st_to = str(X[5])
    start_date = str(X[6])
    end_date = str(X[7])
    albedo = X[8]
    
#     print(unprj_shp)
    dest = str(unprj_shp[41:])
    dest_path = '/data/phase_i/pavement/la_city/prj_shp/' + dest
#     print(dest_path)

    !ogr2ogr -t_srs EPSG:32611 -s_srs EPSG:4326 {dest_path} {unprj_shp}

    st_adds.append(st_add)
    st_names.append(st_name)
    st_frms.append(st_frm)
    st_tos.append(st_to)
    start_dates.append(start_date)
    end_dates.append(end_date)
    albedos.append(albedo)
    prj_shps.append(dest_path)
    geocoded_jsons.append(geocoded_json)

# store the results to a pandas library.
df_path2 = pd.DataFrame({'geocoded_jsons': geocoded_jsons,'prj_shps': prj_shps,
                        'street_address':st_adds, 'street_names':st_names, 'street_from': st_frms, 
                        'street_to': st_tos,'start_date': start_dates, 'end_date': end_dates, 'albedos': albedos})

# Write the full results to csv using the pandas library. 
df_path2.to_csv('street_addresses_geocoded_prj.csv',encoding='utf8')

### Buffer the line shapefiles by 2m in each side

In [None]:
# Set your input file here
input_filename = 'street_addresses_geocoded_prj.csv'

# Read the data to a Pandas Dataframe
data = pd.read_csv(input_filename, encoding='utf8')

geocoded_streets= data[['geocoded_jsons','prj_shps','street_address','street_names','street_from','street_to','start_date','end_date', 'albedos']].apply(tuple, axis=1)
geocoded_streets

In [None]:
st_adds = []
st_names = []
st_frms = []
st_tos=[]
start_dates = []
end_dates = []
albedos = []
prj_shps = []
geocoded_jsons = []
pavement_shapes = []
buf_paths = []

ids = -1
for Y, X in geocoded_streets.iteritems():
    ids = ids + 1    
    try:
        geocoded_json = str(X[0])
        prj_shp = str(X[1])
        st_add = str(X[2])
        st_name = str(X[3])
        st_frm=str(X[4])
        st_to = str(X[5])
        start_date = str(X[6])
        end_date = str(X[7])
        albedo = X[8]

    #     print(prj_shp)

        buf_path = '/data/phase_i/pavement/la_city/buf_shp/'+st_name+'_id_'+str(ids)+'.shp'

        print(buf_path)

        inputfn = prj_shp
        outputBufferfn = buf_path
        bufferDist = 2
        
        inputds = ogr.Open(inputfn)
        inputlyr = inputds.GetLayer()

        shpdriver = ogr.GetDriverByName('ESRI Shapefile')
        if os.path.exists(outputBufferfn):
            shpdriver.DeleteDataSource(outputBufferfn)
        outputBufferds = shpdriver.CreateDataSource(outputBufferfn)
        bufferlyr = outputBufferds.CreateLayer(outputBufferfn, geom_type=ogr.wkbPolygon)
        featureDefn = bufferlyr.GetLayerDefn()

        for feature in inputlyr:
            ingeom = feature.GetGeometryRef()
            geomBuffer = ingeom.Buffer(bufferDist)
            outFeature = ogr.Feature(featureDefn)
            outFeature.SetGeometry(geomBuffer)
            bufferlyr.CreateFeature(outFeature)

        st_adds.append(st_add)
        st_names.append(st_name)
        st_frms.append(st_frm)
        st_tos.append(st_to)
        start_dates.append(start_date)
        end_dates.append(end_date)
        albedos.append(albedo)
        prj_shps.append(prj_shp)
        geocoded_jsons.append(geocoded_json)
        buf_paths.append(buf_path)
        pavement_shapes.append(geomBuffer)
    except:
        print("AttributeError: 'NoneType' object has no attribute 'GetLayer'")
        

# store the results to a pandas library.
df_path3 = pd.DataFrame({'geocoded_jsons': geocoded_jsons,'prj_shps': prj_shps,'buffered_path': buf_paths,
                         'pavement_shapes':pavement_shapes,
                        'street_address':st_adds, 'street_names':st_names, 'street_from': st_frms, 
                        'street_to': st_tos,'start_date': start_dates, 'end_date': end_dates, 'albedos': albedos})

# Write the full results to csv using the pandas library. 
df_path3.to_csv('street_addresses_geocoded_prj_buffered.csv',encoding='utf8')

### Convert the buffered shapefile (with no initial crs) from epsg 32611 to epsg 4326 again so that they can be used to retreive imagery from Descartes Lab Platform

In [None]:
# Set your input file here
input_filename = 'LAcity_cc_pavement_buffered_6-25.csv'

# Read the data to a Pandas Dataframe
data = pd.read_csv(input_filename, encoding='utf8')

geocoded_streets= data[['geocoded_jsons','prj_shps','buffered_path','pavement_shapes','street_address','street_names','street_from','street_to','start_date','end_date', 'albedos']].apply(tuple, axis=1)
geocoded_streets

In [None]:
st_adds = []
st_names = []
st_frms = []
st_tos=[]
start_dates = []
end_dates = []
albedos = []
prj_shps = []
geocoded_jsons = []
pavement_shapes = []
buf_paths = []
re_buf_paths = []

ids = -1
for Y, X in geocoded_streets.iteritems():
    ids = ids + 1    
    
    geocoded_json = str(X[0])
    prj_shp = str(X[1])
    buf_path = str(X[2])
    pavement_shape = str(X[3])
    st_add = str(X[4])
    st_name = str(X[5])
    st_frm=str(X[6])
    st_to = str(X[7])
    start_date = str(X[8])
    end_date = str(X[9])
    albedo = X[10]
    
#     print(buf_path)
    dest = str(buf_path[39:])
#     print(dest)
    dest_path = '/data/phase_i/pavement/la_city/re_buf/' + dest
    print(dest_path)

    !ogr2ogr -t_srs EPSG:4326 -s_srs EPSG:32611 {dest_path} {buf_path}

    st_adds.append(st_add)
    st_names.append(st_name)
    st_frms.append(st_frm)
    st_tos.append(st_to)
    start_dates.append(start_date)
    end_dates.append(end_date)
    albedos.append(albedo)
    prj_shps.append(prj_shp)
    geocoded_jsons.append(geocoded_json)
    buf_paths.append(buf_path)
    pavement_shapes.append(pavement_shape)
    re_buf_paths.append(dest_path)

# store the results to a pandas library.
df_path3 = pd.DataFrame({'geocoded_jsons': geocoded_jsons,'prj_shps': prj_shps,'buffered_path': buf_paths,
                        're_buffered_path': re_buf_paths,'pavement_shapes':pavement_shapes,
                        'street_address':st_adds, 'street_names':st_names, 'street_from': st_frms, 
                        'street_to': st_tos,'start_date': start_dates, 'end_date': end_dates, 'albedos': albedos})

# Write the full results to csv using the pandas library. 
df_path3.to_csv('street_addresses_geocoded_prj_re_buffered.csv',encoding='utf8')
df_path3