## Core Workflow: Get NAIP imageries from street shapefiles
Purpose: Specify the desired satellite imagery—from where, from when, including what spectral bands—and store it locally as multi-band, geospatial raster files. 
<br>
*Date: 2019-02-08*
<br>
*Author: Taufiq Rashid*


### Import statements

In [None]:
import warnings
warnings.filterwarnings('ignore')
#
import os
import sys
import json
import itertools
import pickle
from pprint import pprint
#
import numpy as np
import shapely
from shapely.geometry import shape, Point
from shapely.geometry import mapping, Polygon
# import cartopy
import geojson
import fiona
import gdal
import h5py
get_ipython().magic(u'matplotlib inline')
import matplotlib.pyplot as plt
import sklearn
from sklearn.preprocessing import StandardScaler 
# import ogr, gdal
from glob import glob

import requests
import logging
import time
import datetime
import pandas as pd

import collections

import descarteslabs as dl
from descarteslabs.vectors import FeatureCollection

print (sys.path)

### Set key variables

In [None]:
data_path='/data/phase_i/pavement/la_city/naip_imagery/'

bands=['red','green','blue','nir']; suffix='RGBNA'  # S2, Lx
resolution=1

### Search and acquire NAIP imagery 

In [None]:
def load_shape(place_shapefile):
    c = fiona.open(place_shapefile)
    pol = c.next()
    shape = {}
    shape['type'] = pol['type']
    shape['properties'] = pol['properties']
    shape['geometry'] = {}
    shape['geometry']['type'] = 'Polygon'  # pol['geometry']['type']
    shape['geometry']['coordinates'] = [[]]
    # if MultiPolygon (e.g., city='kampala')
    if (len(pol['geometry']['coordinates'])>1):
        # identify largest single polygon
#         print ("MultiPolygon", len(pol['geometry']['coordinates']))
        p_argmax = 0 
        pn_max = 0
        for p in range(len(pol['geometry']['coordinates'])):
            pn = len(pol['geometry']['coordinates'][p][0])
            if pn>pn_max:
                p_argmax = p
                pn_max = pn
#             print (p, pn, p_argmax, pn_max )
        # make largest polygon the only polygon, move other polys to a backup variable 
        polygon = pol['geometry']['coordinates'][p_argmax]
        
        xmin =  180
        xmax = -180
        ymin =  90
        ymax = -90
        for x,y in polygon:
            xmin = xmin if xmin < x else x
            xmax = xmax if xmax > x else x
            ymin = ymin if ymin < y else y
            ymax = ymax if ymax > y else y
            shape['geometry']['coordinates'][0].append([x,y])
        shape['bbox'] = [xmin,ymin,xmax,ymax]

        return shape
    else:
#         print ('simple polygon')
        polygon = pol['geometry']['coordinates']
       
        xmin =  180
        xmax = -180
        ymin =  90
        ymax = -90
        for x,y in polygon[0]:
            xmin = xmin if xmin < x else x
            xmax = xmax if xmax > x else x
            ymin = ymin if ymin < y else y
            ymax = ymax if ymax > y else y
            shape['geometry']['coordinates'][0].append([x,y])
        shape['bbox'] = [xmin,ymin,xmax,ymax]
    
    return shape

### Download the imageries for the above shapes

### Modify the footprint csv to add 3 year albedos first

In [None]:
# Set your input file here
output_filename = 'LAcity_pavement_re_buffered_5-2.csv'

# Read the data to a Pandas Dataframe
df_2 = pd.read_csv(output_filename, encoding='utf8')
df_2

In [None]:
addresses= df_2[['re_buffered_path','pavement_shapes','street_address', 'street_names', 'street_from','street_to','start_date', 'end_date', 'albedos']].apply(tuple, axis=1)
addresses

In [None]:
buf_paths = []
pavement_shapes = []
st_adds = []
st_names = []
st_frms = []
st_tos=[]
start_dates = []
end_dates = []
albedos = []
img_path = []
scene_ids = []

roof_id = 0

format_str = '%m/%d/%Y' # The format

# for shapefiles in paths:
for Y, X in addresses.iteritems():
    try:
        buf_path = str(X[0])
        pavement_shape = str(X[1])
        st_add = str(X[2])
        st_name = str(X[3])
        st_frm = str(X[4])    
        st_to = str(X[5])
        start_date = str(X[6])
    #         st_datetime_obj = datetime.datetime.strptime(start_date, format_str)
    #         start_date = str(st_datetime_obj.date())
        end_date = str(X[7])   
    #         end_datetime_obj = datetime.datetime.strptime(end_date, format_str)
    #         end_date = str(end_datetime_obj.date())
        albedo = X[8]

#         print(buf_path)
        shape = load_shape(buf_path)
    #         print(shape)

#         print ('searching imageries for '+st_name)

        product = u'usda:naip:rgbn:v1'


        #  Search metadata given a spatio-temporal query
        feature_collection = dl.metadata.search(products=[product], start_datetime=start_date, end_datetime=end_date,
                                                 fields=['acquired'], sort_field='acquired',sort_order='asc',geom=shape['geometry'])
    #     print(feature_collection)

        naip_ids = [f['id'] for f in feature_collection['features']]
        naip_ids.sort()
    #         print (len(naip_ids))


        if len(naip_ids) != 0:
            roof_id = roof_id+1
            naip_dates = [f['properties'] for f in feature_collection['features']]
    #         print (naip_dates)
            naip_dates = naip_dates[0]


        img_id = - 1

        for imageries in naip_ids:        
    #         print ('downloading '+imageries)
            ids = imageries[23:49]
    #         print(ids)
            continue_index = 0
            img_id = img_id + 1
            naip_band_file =  data_path+str(imageries[-6:-4])+'_naipV1_'+str(naip_dates)+'_street_'+str(roof_id).zfill(5)+'_'+'img_'+str(img_id).zfill(2)+'_'+str(resolution)+'m'
    #         print (naip_band_file)
            date = naip_band_file[71:81]
#             print (date)
            naip_band_file =  data_path+st_name+'_naipV1_'+date+'_st_'+str(roof_id).zfill(5)+'_'+'img_'+str(img_id).zfill(2)+'_'+str(resolution)+'m'
#             print (naip_band_file)
            naip = dl.raster.raster(
                    imageries,
                    bands=bands,
                    data_type='UInt16',
                    cutline=shape['geometry'],
                    save=True,
                    outfile_basename=naip_band_file)
            pt = str(naip_band_file)+'.tif'
            print(pt)
            img_path.append(pt)
            st_adds.append(st_add)
            st_names.append(st_name)
            st_frms.append(st_frm)
            st_tos.append(st_to)
            start_dates.append(start_date)
            end_dates.append(end_date)
            albedos.append(albedo)
            buf_paths.append(buf_path)
            pavement_shapes.append(pavement_shape)
            scene_ids.append(ids)
    except:
        pass
# store the results to a pandas library.
df_path = pd.DataFrame({'img_path': img_path,'buffered_path': buf_paths,'pavement_shapes':pavement_shapes,'scene_ids':scene_ids,
                        'street_address':st_adds, 'street_names':st_names, 'street_from': st_frms, 
                        'street_to': st_tos,'start_date': start_dates, 'end_date': end_dates, 'albedos': albedos})

# Write the full results to csv using the pandas library. 
df_path.to_csv('LAcity_pavement_naip_5-2.csv',encoding='utf8')

----------------------------------------