## Core Workflow: Get NAIP imageries from given addresses
Purpose: The geometries of the streets are used to acquire NAIP imagery for all the streets within the AOI. The mean band values (R, G, B, NIR) for each streets are saved and used for subsequent operation. 
<br>
*Date: 11-09-2019*
<br>
*Author: Taufiq Rashid*


### Import statements

In [None]:
import warnings
warnings.filterwarnings('ignore')
#
import os
import sys
import json
import itertools
import pickle
from pprint import pprint
#
import numpy as np
import shapely
from shapely.geometry import shape, Point
from shapely.geometry import mapping, Polygon
# import cartopy
import geojson
import fiona
import gdal
import h5py
get_ipython().magic(u'matplotlib inline')
import matplotlib.pyplot as plt
import sklearn
from sklearn.preprocessing import StandardScaler 
# import ogr, gdal
from glob import glob

import requests
import logging
import time

import pandas as pd

import collections

import rasterio as rio
from rasterio.plot import show
from numpy import mean

import random
import statistics

import descarteslabs as dl
from descarteslabs.vectors import FeatureCollection

print (sys.path)

### Load helper function

In [None]:
def load_shape(place_shapefile):
    c = fiona.open(place_shapefile)
    pol = c.next()
    shape = {}
    shape['type'] = pol['type']
    shape['properties'] = pol['properties']
    shape['geometry'] = {}
    shape['geometry']['type'] = 'Polygon'  # pol['geometry']['type']
    shape['geometry']['coordinates'] = [[]]
    # if MultiPolygon (e.g., city='kampala')
    if (len(pol['geometry']['coordinates'])>1):
        # identify largest single polygon
#         print("MultiPolygon", len(pol['geometry']['coordinates']))
        p_argmax = 0 
        pn_max = 0
        for p in range(len(pol['geometry']['coordinates'])):
            pn = len(pol['geometry']['coordinates'][p][0])
            if pn>pn_max:
                p_argmax = p
                pn_max = pn
#             print(p, pn, p_argmax, pn_max )
        # make largest polygon the only polygon, move other polys to a backup variable 
        polygon = pol['geometry']['coordinates'][p_argmax]
    else:
#         print('simple polygon')
        polygon = pol['geometry']['coordinates']
       
    xmin =  180
    xmax = -180
    ymin =  90
    ymax = -90
    for x,y in polygon[0]:
        xmin = xmin if xmin < x else x
        xmax = xmax if xmax > x else x
        ymin = ymin if ymin < y else y
        ymax = ymax if ymax > y else y
        shape['geometry']['coordinates'][0].append([x,y])
    shape['bbox'] = [xmin,ymin,xmax,ymax]
    
    return shape


import itertools
from multiprocessing import Process, cpu_count
from multiprocessing import Pool
from multiprocessing.pool import ThreadPool
from datetime import datetime


#
# CONFIG
#
MAX_POOL_PROCESSES=cpu_count()-1
MAX_THREADPOOL_PROCESSES=16

def map_with_pool(map_function,args_list,max_processes=MAX_POOL_PROCESSES):
  pool=Pool(processes=min(len(args_list),max_processes))
  return _run_pool(pool,map_function,args_list)


def map_with_threadpool(map_function,args_list,max_processes=MAX_THREADPOOL_PROCESSES):
  pool=ThreadPool(processes=min(len(args_list),max_processes))
  return _run_pool(pool,map_function,args_list)


def map_sequential(map_function,args_list,print_args=False,noisy=False,**dummy_kwargs):
  if noisy:
    print('multiprocessing(test):')
  out=[]
  for i,args in enumerate(args_list):
      if noisy: 
        print('\t{}...'.format(i))
      if print_args:
        print('\t{}'.format(args))
      out.append(map_function(args))
  if noisy: 
    print('-'*25)
  return out

def simple(function,args_list,join=True):
  procs=[]
  for args in args_list:
      proc=Process(
          target=function, 
          args=args)
      procs.append(proc)
      proc.start()
  if join:
    for proc in procs:
        proc.join()
  return procs

class MPList():
    #
    # POOL TYPES
    #
    POOL='pool'
    THREAD='threading'
    SEQUENTIAL='sequential'
    

    #
    # PUBLIC
    #
    def __init__(self,pool_type=None,max_processes=None,jobs=None):
        self.pool_type=pool_type or self.POOL
        self.max_processes=max_processes
        self.jobs=jobs or []

        
    def append(self,target,*args,**kwargs):
        self.jobs.append((target,)+(args,)+(kwargs,))
        
    
    def run(self):
        self.start_time=datetime.now()
        map_func,self.max_processes=self._map_func_max_processes()
        out=map_func(self._target,self.jobs,max_processes=self.max_processes)
        self.end_time=datetime.now()
        self.duration=str(self.end_time-self.start_time)
        return out
        

    def __len__(self):
        return len(self.jobs)
    
    
    #
    # INTERNAL
    #    
    def _map_func_max_processes(self):
        if self.pool_type==MPList.THREAD:
            map_func=map_with_threadpool
            max_processes=self.max_processes or MAX_THREADPOOL_PROCESSES
        elif self.pool_type==MPList.SEQUENTIAL:
            map_func=map_sequential
            max_processes=False
        else:
            map_func=map_with_pool
            max_processes=self.max_processes or MAX_POOL_PROCESSES
        return map_func, max_processes
        
        
    def _target(self,args):
        target,args,kwargs=args
        return target(*args,**kwargs)
        
    

#
# INTERNAL METHODS
#
def _stop_pool(pool,success=True):
  pool.close()
  pool.join()
  return success


def _map_async(pool,map_func,objects):
  try:
    return pool.map_async(map_func,objects)
  except KeyboardInterrupt:
    print("Caught KeyboardInterrupt, terminating workers")
    pool.terminate()
    return False
  else:
    print("Failure")
    return _stop_pool(pool,False)


def _run_pool(pool,map_function,args_list):
  out=_map_async(pool,map_function,args_list)
  _stop_pool(pool)
  return out.get()

In [None]:
def arg_dict_decorator(func):
    def decorator(arg_dict):
        return func(**arg_dict)
    return decorator


@arg_dict_decorator
def calc_bands(type,properties,geometry):    
    attempts = 0

    while attempts < 2:
        cnt = (properties['cnt'])
        rf_no = cnt
        img_id = - 1

        try:
            polygon = shape(geometry)

            scenes, ctx = dl.scenes.search(geometry, products=product, start_datetime='2009-01-01', 
                                           end_datetime='2009-12-31', limit=None)                            

            for scene in scenes:

                img_id = img_id + 1

                naip_data = scene.ndarray(bands="red green blue nir", ctx=ctx.assign(resolution=1),mask_alpha=False)
                red = naip_data[0]
                red = red.astype(float)

                green = naip_data[1]
                green = green.astype(float)

                blue = naip_data[2]
                blue = blue.astype(float)

                nir = naip_data[3]
                nir = nir.astype(float)

                arr = [red,green,blue,nir]

                flat_arr = []
                # flattened array of tuples
                flat_list = zip(*map(lambda x:x.flatten(),arr))
                for i in flat_list:
                    flat_arr.append(i)   

                selected_pixels=[]
                # remove blank pixels and normalize for scenes
                for pixels in flat_arr:
                    if pixels[0] != 0 or pixels[1] != 0 or pixels[2] != 0 or pixels[3] != 0:
                        selected_pixels.append(pixels)

                # raw band values        
                raw_red_b = []
                raw_green_b = []
                raw_blue_b = []
                raw_nir_b = []

                for pixels in selected_pixels:
                    raw_red_b.append(pixels[0]) 
                    raw_green_b.append(pixels[1])
                    raw_blue_b.append(pixels[2])
                    raw_nir_b.append(pixels[3])

                # calculate the mean values for all the bands from this list
                raw_red_mean=mean(raw_red_b)
                raw_green_mean=mean(raw_green_b)
                raw_blue_mean=mean(raw_blue_b)
                raw_nir_mean=mean(raw_nir_b)

                total_pixel = len(selected_pixels) # calculate the size of the roof               

                imgs.append(img_id)
                roofs.append(rf_no)
                footprint_shapes.append(polygon)

                total_pixels.append(total_pixel)              

                raw_reds.append(raw_red_mean)
                raw_greens.append(raw_green_mean)
                raw_blues.append(raw_blue_mean)
                raw_nirs.append(raw_nir_mean)
            break
        except Exception as e:
#             print ('some type of error at count ', cnt)
#             print (e)
            attempts += 1
            if attempts == 2:
                print('unsuccessfull at count', cnt)
                unsuc_rfs.append(rf_no)
            else:
                time.sleep(2)


 

### Calculate band values for all roofs in LA

In [None]:
with open('LA_official_city_streets_2016.geojson') as f:
    js = json.load(f)

In [None]:
arg_list = js['features']
print(len(arg_list))

cnt = -1

for feat in arg_list:
    cnt = cnt + 1
    feat['properties']['cnt'] = cnt
    
# arg_list = arg_list[0:20000]

In [None]:
product = u'usda:naip:rgbn:v1'

roofs = []
imgs = []
footprint_shapes=[]
total_pixels = []

raw_reds = []
raw_greens = []
raw_blues = []
raw_nirs = []

unsuc_rfs = []

%time out = map_with_threadpool(calc_bands,arg_list,max_processes=64)
    

In [None]:
print('finished multiprocessing') 

In [None]:
# store the results to a pandas library.
df = pd.DataFrame({ 'roof_no': roofs, 'img_id':imgs, 'footprint_shapes':footprint_shapes,'total_pixels': total_pixels,
                  'raw_red_mean':raw_reds,'raw_green_mean': raw_greens,'raw_blue_mean': raw_blues,'raw_nir_mean': raw_nirs})

# Write the full results to csv using the pandas library. 
df.to_csv('band_values_NAIP_LA_city_rd_2009_10-28.csv',encoding='utf8')

In [None]:
df

In [None]:
fail_count = 0 

----------------------------------------