## Core Workflow: Download NAIP imagery for training data
Purpose: Specify the desired satellite imagery—from where, from when, including what spectral bands—and store it locally as multi-band, geospatial raster files. 
<br>
*Date: 10-31-2019*


### Import statements

In [1]:
import warnings
warnings.filterwarnings('ignore')
#
import os
import sys
import json
import itertools
import pickle
from pprint import pprint
#
import numpy as np
import shapely
from shapely.geometry import shape, Point
from shapely.geometry import mapping, Polygon
# import cartopy
import geojson
import fiona
import h5py
get_ipython().magic(u'matplotlib inline')
import matplotlib as mpl
import matplotlib.pyplot as plt

import gdal
from glob import glob

import jenkspy

import rasterio as rio
from rasterio.plot import show

import pandas as pd

import collections
from numpy import mean

import random
import statistics

import time

import descarteslabs as dl
print (sys.path)

['', '/opt/caffe/python', '/opt/caffe2/build', '/data/home/peter/notebooks/urban_heat', '/anaconda/envs/py36/lib/python36.zip', '/anaconda/envs/py36/lib/python3.6', '/anaconda/envs/py36/lib/python3.6/lib-dynload', '/anaconda/envs/py36/lib/python3.6/site-packages', '/anaconda/envs/py36/lib/python3.6/site-packages/IPython/extensions', '/data/home/peter/.ipython']


### Helper functions

In [None]:
def load_shape(place_shapefile):
    c = fiona.open(place_shapefile)
    pol = c.next()
    shape = {}
    shape['type'] = pol['type']
    shape['properties'] = pol['properties']
    shape['geometry'] = {}
    shape['geometry']['type'] = 'Polygon'  # pol['geometry']['type']
    shape['geometry']['coordinates'] = [[]]
    # if MultiPolygon (e.g., city='kampala')
    if (len(pol['geometry']['coordinates'])>1):
        # identify largest single polygon
#         print ("MultiPolygon", len(pol['geometry']['coordinates']))
        p_argmax = 0 
        pn_max = 0
        for p in range(len(pol['geometry']['coordinates'])):
            pn = len(pol['geometry']['coordinates'][p][0])
            if pn>pn_max:
                p_argmax = p
                pn_max = pn
#             print (p, pn, p_argmax, pn_max )
        # make largest polygon the only polygon, move other polys to a backup variable 
        polygon = pol['geometry']['coordinates'][p_argmax]
        
        xmin =  180
        xmax = -180
        ymin =  90
        ymax = -90
        for x,y in polygon:
            xmin = xmin if xmin < x else x
            xmax = xmax if xmax > x else x
            ymin = ymin if ymin < y else y
            ymax = ymax if ymax > y else y
            shape['geometry']['coordinates'][0].append([x,y])
        shape['bbox'] = [xmin,ymin,xmax,ymax]

        return shape
    else:
#         print ('simple polygon')
        polygon = pol['geometry']['coordinates']
       
        xmin =  180
        xmax = -180
        ymin =  90
        ymax = -90
        for x,y in polygon[0]:
            xmin = xmin if xmin < x else x
            xmax = xmax if xmax > x else x
            ymin = ymin if ymin < y else y
            ymax = ymax if ymax > y else y
            shape['geometry']['coordinates'][0].append([x,y])
        shape['bbox'] = [xmin,ymin,xmax,ymax]
    
    return shape

### Set key variables

In [2]:
data_root='/data/phase_i/'

bands=['red','green','blue','nir']; suffix='RGBNA'  # S2, Lx


### Download the imageries for the saved footprints from DL 

In [None]:
# Set your input file here
input_filename = "footprints_data.csv"

# Read the data to a Pandas Dataframe
df_2 = pd.read_csv(input_filename, encoding='utf8')
df_2

In [None]:
addresses= df_2[['roof_add','footprint_path','footprint_shapes','longitude','latitude','start_date','end_date','Solar-Initial']].apply(tuple, axis=1)
addresses

In [None]:
# set the path for download imageries
data_path='/data/phase_i/roof_img/george/naip_v1/'

roof_id = -1

img_path = []
footprint_shapes = []
lats = []
lons=[]
exp_alb = []
tile_id = []
roof_add = []

# for shapefiles in paths:
for Y, X in addresses.iteritems():
    shapefiles = str(X[1])
    lati = str(X[4])
    lat=lati[0:7]
    lon = str(X[3])
    start_date = str(X[5])
    end_date = str(X[6])
    albedo = X[7]
    rf_ad = X[0]
    ft_shp = X[2]
    
    
    shape = load_shape(shapefiles)

    print ('searching imageries for'+shapefiles)
    
    product = u'usda:naip:rgbn:v1'

    #  Search metadata given a spatio-temporal query
    feature_collection = dl.metadata.search(products=[product], start_datetime=start_date, end_datetime=end_date,
                                             fields=['acquired'], sort_field='acquired',sort_order='asc',geom=shape['geometry'])
    
    naip_ids = [f['id'] for f in feature_collection['features']]
    naip_ids.sort()
    print (len(naip_ids), naip_ids)
    
    
    if len(naip_ids) != 0:
        roof_id = roof_id+1
        naip_dates = [f['properties'] for f in feature_collection['features']]
#     print (naip_dates)
        naip_dates = naip_dates[0]
    
    continue_index = 0
    
    img_id = - 1
    
    for imageries in naip_ids:        
        print ('downloading '+imageries)
        ids = imageries[18:49]
#         print(ids)
        continue_index = 0
        img_id = img_id + 1
        naip_band_file =  data_path+str(imageries[-6:-4])+'_naipV1_'+str(naip_dates)+'_roof_'+str(roof_id).zfill(5)+'_'+'img_'+str(img_id).zfill(2)+'_'+str(resolution)+'m'
#         print (naip_band_file)
        date = naip_band_file[65:75]
#         print (date)
        naip_band_file =  data_path+'naipV1_'+date+'_rf_'+str(roof_id).zfill(5)+'_'+'img_'+str(img_id).zfill(2)+'_lat_'+lat+'_'+str(resolution)+'m'
#         print (naip_band_file)
        naip = dl.raster.raster(
                imageries,
                bands=bands,
                data_type='UInt16',
                cutline=shape['geometry'],
                save=True,
                outfile_basename=naip_band_file)
        pt = str(naip_band_file)+'.tif'
        print(pt)
        img_path.append(pt)
        lats.append(lati)
        lons.append(lon)
        exp_alb.append(albedo)
        tile_id.append(ids)
        roof_add.append(rf_ad)
        footprint_shapes.append(ft_shp)

# store the results to a pandas library.
df_path = pd.DataFrame({'roof_address':roof_add, 'img_path': img_path, 'footprint_shapes':footprint_shapes, 'tile_id': tile_id, 
                        'latitude': lats,'longitude': lons, 'expected_albedo': exp_alb})

# Write the full results to csv using the pandas library. 
df_path.to_csv('path_imagery.csv',encoding='utf8')
        
print('largest roof id: ',roof_id)

------------------------------------