## Core Workflow: Get NAIP imageries from given addresses
Purpose: Find matching polygons from Microsoft Building foorprint data using geocoded addresses. Use the shape of the polygons to download NAIP imageries for the rooftops.
<br>
*Author: Taufiq Rashid*


### Import statements

In [1]:
import warnings
warnings.filterwarnings('ignore')
#
import os
import sys
import json
import itertools
import pickle
from pprint import pprint
#
import numpy as np
import shapely
from shapely.geometry import shape, Point
from shapely.geometry import mapping, Polygon
# import cartopy
import geojson
import fiona
import gdal

get_ipython().magic(u'matplotlib inline')
import matplotlib.pyplot as plt 

from glob import glob

import requests
import logging
import time

import pandas as pd
import geopandas as gpd

import collections

import descarteslabs as dl
from descarteslabs.vectors import FeatureCollection

print (sys.path)

['', '/opt/caffe/python', '/opt/caffe2/build', '/data/home/peter/notebooks/urban_heat/dev', '/anaconda/envs/py36/lib/python36.zip', '/anaconda/envs/py36/lib/python3.6', '/anaconda/envs/py36/lib/python3.6/lib-dynload', '/anaconda/envs/py36/lib/python3.6/site-packages', '/anaconda/envs/py36/lib/python3.6/site-packages/IPython/extensions', '/data/home/peter/.ipython']


### Set key variables

In [2]:
data_root='/data/phase_i/'

bands=['red','green','blue','nir']; suffix='RGBNA'  # S2, Lx
resolution=1

### load DL vector product file containing all the MS footprints

In [3]:
microsoft_footprints  = FeatureCollection('a35126a241bd022c026e96ab9fe5e0ea23967d08:USBuildingFootprints')

# Set your input file containing the geocoded addresses
input_filename = "geocded_addresses.csv"

# Read the data to a Pandas Dataframe
df = pd.read_csv(input_filename, encoding='utf8')

addresses= df[['longitude','latitude','input_string','Warranty Start Date','Twelve Months','Solar-Initial']].apply(tuple, axis=1)
addresses

0     (-118.5306867, 34.2420096, Juniper HallPlummer...
1     (-118.53086259999999, 34.240396200000006, Bayr...
2     (-118.4452261, 34.071596899999996, John Wooden...
3     (-118.44078049999999, 34.0689305, 603 Charles ...
4     (-118.44078049999999, 34.0689305, 603 Charles ...
5     (-118.30806640000002, 34.0562404, 849 S Oxford...
6     (-118.2836324, 34.158845299999996, 1014 Grand ...
7     (-118.0942533, 34.0343508, 1445 N Montebello B...
8     (-118.28819979999999, 34.1589824, 601 Circle 7...
9     (-118.15209399999999, 33.76687020000001, 264 R...
10    (-121.49992959999999, 38.5777742, 650 Capitol ...
11    (-121.49992959999999, 38.5777742, 650 Capitol ...
12    (-121.74964469999999, 38.535877500000005, 400 ...
13    (-121.74964469999999, 38.535877500000005, 400 ...
14    (-121.5119534, 38.6074449, 2275 Gateway Oaks D...
15    (-117.0911054, 33.0154547, 16535 Via Esprillo,...
16    (-117.12633740000001, 32.8274448, 5055 Ruffin ...
17    (-117.151307, 32.887166, 7696 Miramar Rd, 

### Batch processing for finding matching footprints

In [4]:
roof_add=[]
foot_path = []
start_dt=[]
end_dt=[]
in_solar = []
# thr_solar = []
lat=[]
long=[]
bld_shp = []

cnt=0
for Y, X in addresses.iteritems():
    cnt=cnt+1
    print(cnt)
    print ('Searching matching polygon for:', X[0],X[1])
    # construct point based on lat/long returned by geocoder
    point = Point(X[0],X[1])
    buf = point.buffer(0.00001)
    buf = mapping(buf)
    
    s = dl.Vector().search_features('a35126a241bd022c026e96ab9fe5e0ea23967d08:USBuildingFootprints', geometry=buf, query_expr=None, query_limit=None,)

    for feature in s:
#         print(feature)
        att = feature.attributes
#         print(att)
        polygon = shape(att['geometry'])
#         print(polygon)
        
        # Define a polygon feature geometry with one attribute
        schema = {
            'geometry': 'Polygon',
            'properties': {'id': 'int'},
            }

        # Write a new Shapefile
        with fiona.open(data_root+'footprint_shapes/george/'+str(X[1])+','+str(X[0])+'_msfootprint.shp', 'w', 'ESRI Shapefile', schema) as c:
            ## If there are multiple geometries, put the "for" loop here
            c.write({
                'geometry': mapping(polygon),
                'properties': {'id': 123},
            })
        
        roof_add.append(X[2])
        foot_path.append(data_root+'footprint_shapes/george/'+str(X[1])+','+str(X[0])+'_msfootprint.shp')
        start_dt.append(X[3])
        end_dt.append(X[4])
        in_solar.append(X[5])
#         thr_solar.append(X[6])
        lat.append(X[1])
        long.append(X[0])
        bld_shp.append(polygon)
        
        break

# store the results to a pandas library.
df = pd.DataFrame({'roof_add': roof_add, 'footprint_path': foot_path, 'footprint_shapes': bld_shp, 'longitude': long,'latitude': lat, 
                   'start_date': start_dt,'end_date': end_dt,'Solar-Initial': in_solar})

# Write the full results to csv using the pandas library. 
df.to_csv('footprints_gg_4-6.csv',encoding='utf8')

1
Searching matching polygon for: -118.5306867 34.2420096
2
Searching matching polygon for: -118.53086259999999 34.240396200000006
3
Searching matching polygon for: -118.4452261 34.071596899999996
4
Searching matching polygon for: -118.44078049999999 34.0689305
5
Searching matching polygon for: -118.44078049999999 34.0689305
6
Searching matching polygon for: -118.30806640000002 34.0562404
7
Searching matching polygon for: -118.2836324 34.158845299999996
8
Searching matching polygon for: -118.0942533 34.0343508
9
Searching matching polygon for: -118.28819979999999 34.1589824
10
Searching matching polygon for: -118.15209399999999 33.76687020000001
11
Searching matching polygon for: -121.49992959999999 38.5777742
12
Searching matching polygon for: -121.49992959999999 38.5777742
13
Searching matching polygon for: -121.74964469999999 38.535877500000005
14
Searching matching polygon for: -121.74964469999999 38.535877500000005
15
Searching matching polygon for: -121.5119534 38.6074449
16
Sear

### Search and acquire NAIP imagery 

In [3]:
def load_shape(place_shapefile):
    c = fiona.open(place_shapefile)
    pol = c.next()
    shape = {}
    shape['type'] = pol['type']
    shape['properties'] = pol['properties']
    shape['geometry'] = {}
    shape['geometry']['type'] = 'Polygon'  # pol['geometry']['type']
    shape['geometry']['coordinates'] = [[]]
    # if MultiPolygon (e.g., city='kampala')
    if (len(pol['geometry']['coordinates'])>1):
        # identify largest single polygon
#         print ("MultiPolygon", len(pol['geometry']['coordinates']))
        p_argmax = 0 
        pn_max = 0
        for p in range(len(pol['geometry']['coordinates'])):
            pn = len(pol['geometry']['coordinates'][p][0])
            if pn>pn_max:
                p_argmax = p
                pn_max = pn
#             print (p, pn, p_argmax, pn_max )
        # make largest polygon the only polygon, move other polys to a backup variable 
        polygon = pol['geometry']['coordinates'][p_argmax]
        
        xmin =  180
        xmax = -180
        ymin =  90
        ymax = -90
        for x,y in polygon:
            xmin = xmin if xmin < x else x
            xmax = xmax if xmax > x else x
            ymin = ymin if ymin < y else y
            ymax = ymax if ymax > y else y
            shape['geometry']['coordinates'][0].append([x,y])
        shape['bbox'] = [xmin,ymin,xmax,ymax]

        return shape
    else:
#         print ('simple polygon')
        polygon = pol['geometry']['coordinates']
       
        xmin =  180
        xmax = -180
        ymin =  90
        ymax = -90
        for x,y in polygon[0]:
            xmin = xmin if xmin < x else x
            xmax = xmax if xmax > x else x
            ymin = ymin if ymin < y else y
            ymax = ymax if ymax > y else y
            shape['geometry']['coordinates'][0].append([x,y])
        shape['bbox'] = [xmin,ymin,xmax,ymax]
    
    return shape

### Download the imageries for the saved footprints

In [4]:
# Set your input file here
input_filename = "footprints_gg_4-6.csv"

# Read the data to a Pandas Dataframe
df_2 = pd.read_csv(input_filename, encoding='utf8')
df_2

Unnamed: 0.1,Unnamed: 0,roof_add,footprint_path,footprint_shapes,longitude,latitude,start_date,end_date,Solar-Initial
0,0,"Juniper HallPlummer St, Northridge, Los Angele...",/data/phase_i/footprint_shapes/george/34.24200...,"POLYGON ((-118.531063 34.242118, -118.530268 3...",-118.530687,34.24201,1/1/2009,12/31/2009,0.09
1,1,"Bayramian HallVincennes St, Northridge, Los An...",/data/phase_i/footprint_shapes/george/34.24039...,"POLYGON ((-118.530137 34.240627, -118.530717 3...",-118.530863,34.240396,1/1/2009,12/31/2009,0.2
2,2,"John Wooden Center, 221 Westwood Plaza, Los An...",/data/phase_i/footprint_shapes/george/34.07159...,"POLYGON ((-118.446067 34.071999, -118.446022 3...",-118.445226,34.071597,1/1/2009,12/31/2009,0.12
3,3,"603 Charles E Young Dr E, Los Angeles, CA 90024",/data/phase_i/footprint_shapes/george/34.06893...,"POLYGON ((-118.4419 34.069312, -118.441917 34....",-118.440781,34.068931,1/1/2009,12/31/2009,0.19
4,4,"603 Charles E Young Dr E, Los Angeles, CA 90024",/data/phase_i/footprint_shapes/george/34.06893...,"POLYGON ((-118.4419 34.069312, -118.441917 34....",-118.440781,34.068931,1/1/2009,12/31/2009,0.19
5,5,"849 S Oxford Ave, Los Angeles, CA 90005, USA",/data/phase_i/footprint_shapes/george/34.05624...,"POLYGON ((-118.307967 34.056151, -118.307966 3...",-118.308066,34.05624,1/1/2009,12/31/2009,0.27
6,6,"1014 Grand Central Ave, Glendale, CA 91201, USA",/data/phase_i/footprint_shapes/george/34.15884...,"POLYGON ((-118.283708 34.158517, -118.283253 3...",-118.283632,34.158845,1/1/2009,12/31/2009,0.83
7,7,"1445 N Montebello Blvd, Montebello, CA 90640, USA",/data/phase_i/footprint_shapes/george/34.03435...,"POLYGON ((-118.095207 34.032554, -118.095208 3...",-118.094253,34.034351,1/1/2009,12/31/2009,0.83
8,8,"601 Circle 7 Dr, Glendale, CA 91201, USA",/data/phase_i/footprint_shapes/george/34.15898...,"POLYGON ((-118.287987 34.158608, -118.287891 3...",-118.2882,34.158982,1/1/2009,12/31/2009,0.83
9,9,"264 Redondo AveLong Beach, Long Beach, CA 90803",/data/phase_i/footprint_shapes/george/33.76687...,"POLYGON ((-118.15187 33.766894, -118.152254 33...",-118.152094,33.76687,1/1/2009,12/31/2009,0.83


In [5]:
addresses= df_2[['roof_add','footprint_path','footprint_shapes','longitude','latitude','start_date','end_date','Solar-Initial']].apply(tuple, axis=1)
addresses

0     (Juniper HallPlummer St, Northridge, Los Angel...
1     (Bayramian HallVincennes St, Northridge, Los A...
2     (John Wooden Center, 221 Westwood Plaza, Los A...
3     (603 Charles E Young Dr E, Los Angeles, CA 900...
4     (603 Charles E Young Dr E, Los Angeles, CA 900...
5     (849 S Oxford Ave, Los Angeles, CA 90005, USA,...
6     (1014 Grand Central Ave, Glendale, CA 91201, U...
7     (1445 N Montebello Blvd, Montebello, CA 90640,...
8     (601 Circle 7 Dr, Glendale, CA 91201, USA, /da...
9     (264 Redondo AveLong Beach, Long Beach, CA 908...
10    (650 Capitol Mall, Sacramento, CA 95814, /data...
11    (650 Capitol Mall, Sacramento, CA 95814, /data...
12    (400 Mrak Hall Dr, Davis Sacramento CA 95616, ...
13    (400 Mrak Hall Dr, Davis Sacramento CA 95616, ...
14    (2275 Gateway Oaks Dr, Sacramento, CA 95833, /...
15    (16535 Via Esprillo, San Diego, CA 92127, /dat...
16    (5055 Ruffin Rd, San Diego, CA 92123, USA, /da...
17    (7696 Miramar Rd, San Diego, CA 92126, USA

In [6]:
# set the path for download imageries
data_path='/data/phase_i/roof_img/george/naip_v1/'

roof_id = 22

img_path = []
footprint_shapes = []
lats = []
lons=[]
exp_alb = []
tile_id = []
roof_add = []

# for shapefiles in paths:
for Y, X in addresses.iteritems():
    shapefiles = str(X[1])
    lati = str(X[4])
    lat=lati[0:7]
    lon = str(X[3])
    start_date = str(X[5])
    end_date = str(X[6])
    albedo = X[7]
    rf_ad = X[0]
    ft_shp = X[2]
    
    
    shape = load_shape(shapefiles)

    print ('searching imageries for'+shapefiles)
    
    product = u'usda:naip:rgbn:v1'

    #  Search metadata given a spatio-temporal query
    feature_collection = dl.metadata.search(products=[product], start_datetime='01/01/2009', end_datetime='01/01/2010',
#                                            geom=shape['geometry'])
                                             fields=['acquired'], sort_field='acquired',sort_order='asc',geom=shape['geometry'])
    #                                         limit=10, )
#     print feature_collection
    
    naip_ids = [f['id'] for f in feature_collection['features']]
    naip_ids.sort()
    print (len(naip_ids), naip_ids)
    
    
    if len(naip_ids) != 0:
        roof_id = roof_id+1
        naip_dates = [f['properties'] for f in feature_collection['features']]
#     print (naip_dates)
        naip_dates = naip_dates[0]
    
    continue_index = 0
    
    img_id = - 1
    
    for imageries in naip_ids:        
        print ('downloading '+imageries)
        ids = imageries[18:49]
#         print(ids)
        continue_index = 0
        img_id = img_id + 1
        naip_band_file =  data_path+str(imageries[-6:-4])+'_naipV1_'+str(naip_dates)+'_roof_'+str(roof_id).zfill(5)+'_'+'img_'+str(img_id).zfill(2)+'_'+str(resolution)+'m'
#         print (naip_band_file)
        date = naip_band_file[65:75]
#         print (date)
        naip_band_file =  data_path+'naipV1_'+date+'_rf_'+str(roof_id).zfill(5)+'_'+'img_'+str(img_id).zfill(2)+'_lat_'+lat+'_'+str(resolution)+'m'
#         print (naip_band_file)
        naip = dl.raster.raster(
                imageries,
                bands=bands,
                data_type='UInt16',
                cutline=shape['geometry'],
                save=True,
                outfile_basename=naip_band_file)
        pt = str(naip_band_file)+'.tif'
        print(pt)
        img_path.append(pt)
        lats.append(lati)
        lons.append(lon)
        exp_alb.append(albedo)
        tile_id.append(ids)
        roof_add.append(rf_ad)
        footprint_shapes.append(ft_shp)

# store the results to a pandas library.
df_path = pd.DataFrame({'roof_address':roof_add, 'img_path': img_path, 'footprint_shapes':footprint_shapes, 'tile_id': tile_id, 
                        'latitude': lats,'longitude': lons, 'expected_albedo': exp_alb})

# Write the full results to csv using the pandas library. 
df_path.to_csv('path_gg_img_4-6.csv',encoding='utf8')
        
print('largest roof id: ',roof_id)

searching imageries for/data/phase_i/footprint_shapes/george/34.2420096,-118.5306867_msfootprint.shp
1 ['usda:naip:rgbn:v1:m_3411852_ne_11_1_20090626']
downloading usda:naip:rgbn:v1:m_3411852_ne_11_1_20090626
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-26_rf_00023_img_00_lat_34.2420_1m.tif
searching imageries for/data/phase_i/footprint_shapes/george/34.240396200000006,-118.53086259999999_msfootprint.shp
1 ['usda:naip:rgbn:v1:m_3411852_ne_11_1_20090626']
downloading usda:naip:rgbn:v1:m_3411852_ne_11_1_20090626
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-26_rf_00024_img_00_lat_34.2403_1m.tif
searching imageries for/data/phase_i/footprint_shapes/george/34.071596899999996,-118.4452261_msfootprint.shp
1 ['usda:naip:rgbn:v1:m_3411861_nw_11_1_20090626']
downloading usda:naip:rgbn:v1:m_3411861_nw_11_1_20090626
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-26_rf_00025_img_00_lat_34.0715_1m.tif
searching imageries for/data/phase_i/footprint_shapes/george/34.0689305,-118.

1 ['usda:naip:rgbn:v1:m_3712213_se_10_1_20090619']
downloading usda:naip:rgbn:v1:m_3712213_se_10_1_20090619
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-19_rf_00044_img_00_lat_37.7809_1m.tif
searching imageries for/data/phase_i/footprint_shapes/george/37.7667021,-122.4110658_msfootprint.shp
1 ['usda:naip:rgbn:v1:m_3712213_se_10_1_20090619']
downloading usda:naip:rgbn:v1:m_3712213_se_10_1_20090619
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-19_rf_00045_img_00_lat_37.7667_1m.tif
searching imageries for/data/phase_i/footprint_shapes/george/37.7742771,-122.4218489_msfootprint.shp
1 ['usda:naip:rgbn:v1:m_3712213_se_10_1_20090619']
downloading usda:naip:rgbn:v1:m_3712213_se_10_1_20090619
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-19_rf_00046_img_00_lat_37.7742_1m.tif
searching imageries for/data/phase_i/footprint_shapes/george/37.430077000000004,-121.9416144_msfootprint.shp
1 ['usda:naip:rgbn:v1:m_3712133_sw_10_1_20090608']
downloading usda:naip:rgbn:v1:m_3712133_s

In [26]:
df_path


Unnamed: 0,roof_address,img_path,footprint_shapes,tile_id,latitude,longitude,expected_albedo
0,"Juniper HallPlummer St, Northridge, Los Angele...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.531063 34.242118, -118.530268 3...",m_3411852_ne_11_1_20090626,34.2420096,-118.5306867,0.09
1,"Bayramian HallVincennes St, Northridge, Los An...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.530137 34.240627, -118.530717 3...",m_3411852_ne_11_1_20090626,34.240396200000006,-118.5308626,0.2
2,"John Wooden Center, 221 Westwood Plaza, Los An...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.446067 34.071999, -118.446022 3...",m_3411861_nw_11_1_20090626,34.0715969,-118.4452261,0.12
3,"603 Charles E Young Dr E, Los Angeles, CA 90024",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.4419 34.069312, -118.441917 34....",m_3411861_ne_11_1_20090626,34.0689305,-118.4407805,0.19
4,"603 Charles E Young Dr E, Los Angeles, CA 90024",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.4419 34.069312, -118.441917 34....",m_3411861_nw_11_1_20090626,34.0689305,-118.4407805,0.19
5,"603 Charles E Young Dr E, Los Angeles, CA 90024",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.4419 34.069312, -118.441917 34....",m_3411861_ne_11_1_20090626,34.0689305,-118.4407805,0.19
6,"603 Charles E Young Dr E, Los Angeles, CA 90024",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.4419 34.069312, -118.441917 34....",m_3411861_nw_11_1_20090626,34.0689305,-118.4407805,0.19
7,"849 S Oxford Ave, Los Angeles, CA 90005, USA",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.307967 34.056151, -118.307966 3...",m_3411862_se_11_1_20090622,34.0562404,-118.30806640000002,0.27
8,"1014 Grand Central Ave, Glendale, CA 91201, USA",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.283708 34.158517, -118.283253 3...",m_3411854_se_11_1_20090622,34.1588453,-118.2836324,0.83
9,"1445 N Montebello Blvd, Montebello, CA 90640, USA",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.095207 34.032554, -118.095208 3...",m_3411864_sw_11_1_20090622,34.0343508,-118.0942533,0.83


----------------------------------------