## Core Workflow: Calculating mean band values from the rooftop imageries
Purpose: Calculate mean band values from the rooftop imageries using pixels with high band values
<br>
*Date: 2019-03-11*
<br>
*Author: Taufiq Rashid*


### Import statements

In [34]:
import warnings
warnings.filterwarnings('ignore')
#
import os
import sys
import json
import itertools
import pickle
from pprint import pprint
#
import numpy as np
from numpy import mean
import shapely
from shapely.geometry import shape, Point
from shapely.geometry import mapping, Polygon
# import cartopy
import geojson
import fiona
# import gdal
import h5py
get_ipython().magic(u'matplotlib inline')
import matplotlib as mpl
import matplotlib.pyplot as plt
import sklearn
from sklearn.preprocessing import StandardScaler 
# import ogr, gdal
from glob import glob

import requests
import logging
import time

import rasterio as rio
from rasterio.plot import show

import pandas as pd
import collections
import jenkspy
import random
import csv
import datetime
import descarteslabs as dl
print (sys.path)

['', '/opt/caffe/python', '/opt/caffe2/build', '/data/home/peter/notebooks/urban_heat', '/anaconda/envs/py36/lib/python36.zip', '/anaconda/envs/py36/lib/python3.6', '/anaconda/envs/py36/lib/python3.6/lib-dynload', '/anaconda/envs/py36/lib/python3.6/site-packages', '/anaconda/envs/py36/lib/python3.6/site-packages/IPython/extensions', '/data/home/peter/.ipython']


### Batch processing for calculating mean band values for the addresses with known dates

In [35]:
# set the path for imageries
data_path='/data/phase_i/roof_img/known_date/'

selector=data_path+'*m.tif'

paths=glob(selector)

print(len(paths))
paths

20


['/data/phase_i/roof_img/known_date/LA_naipCA_2009-06-22_rf_00004_img_00_lat_34.0343_1m.tif',
 '/data/phase_i/roof_img/known_date/SF_naipCA_2009-06-19_rf_00013_img_00_lat_37.7809_1m.tif',
 '/data/phase_i/roof_img/known_date/SF_naipCA_2009-06-19_rf_00001_img_00_lat_37.7667_1m.tif',
 '/data/phase_i/roof_img/known_date/LA_naipCA_2009-06-22_rf_00010_img_00_lat_34.1589_1m.tif',
 '/data/phase_i/roof_img/known_date/SA_naipCA_2009-06-21_rf_00011_img_00_lat_38.6074_1m.tif',
 '/data/phase_i/roof_img/known_date/SD_naipCA_2009-06-23_rf_00018_img_00_lat_33.2061_1m.tif',
 '/data/phase_i/roof_img/known_date/SF_naipCA_2009-06-19_rf_00008_img_00_lat_37.7742_1m.tif',
 '/data/phase_i/roof_img/known_date/LA_naipCA_2009-06-22_rf_00021_img_00_lat_33.7668_1m.tif',
 '/data/phase_i/roof_img/known_date/SF_naipCA_2009-06-19_rf_00013_img_01_lat_37.7809_1m.tif',
 '/data/phase_i/roof_img/known_date/LA_naipCA_2009-06-22_rf_00004_img_01_lat_34.0343_1m.tif',
 '/data/phase_i/roof_img/known_date/SD_naipCA_2009-06-23_rf_

## Set the values of the coefficients

In [36]:
cb = 0.17
cg = -0.13
cr = 0.33
ci = 0.54

### Calculate band values for each imageries in the path
### Normalize the band values
### Calculate solar reflectance using equation from Ban-Weiss et al.
### Use solar reflectance values to group the pixels
### Pick the group of pixels with high albedos 
### Calculate mean band values from ten random pixels from that group

In [37]:
r_m = []
g_m = []
b_m = []
n_m = []
lat=[]

for imageries in paths:
    print(imageries)
    with rio.open(imageries) as src:
        naip_data = src.read()
        naip_meta = src.profile
    
    red = naip_data[0]
    red = red.astype(float)
    red_max = red.max()
    red = red/red_max
    red = np.clip(red,0.0,1.0)

    green = naip_data[1]
    green = green.astype(float)
    green_max = green.max()
    green = green/green_max
    green = np.clip(green,0.0,1.0)

    blue = naip_data[2]
    blue = blue.astype(float)
    blue_max = blue.max()
    blue = blue/blue_max
    blue = np.clip(blue,0.0,1.0)

    nir = naip_data[3]
    nir = nir.astype(float)
    nir_max = nir.max()
    nir = nir/nir_max
    nir = np.clip(nir,0.0,1.0)
    
    arr = [red,green,blue,nir]
    
    flat_arr = []
    # flattened array of tuples
    flat_list = zip(*map(lambda x:x.flatten(),arr))
    for i in flat_list:
        flat_arr.append(i)   
    
    albedos=[]
    selected_pixels=[]
    # Calculate albedo values for all pixels
    for pixels in flat_arr:
        solar_reflectance = ((cb*pixels[2])+(cg*pixels[1])+(cr*pixels[0])+(ci*pixels[3]))
        if solar_reflectance != 0:
            albedos.append(solar_reflectance)
            selected_pixels.append(pixels)
    
    # Create cluster of pixels using jenks breaks
    breaks = jenkspy.jenks_breaks(albedos, nb_class=3)
    low_break = breaks[2]
    high_break = breaks[3]
    
    pixel_num = []
    pixel_no = -1
    top_albedos = []
    # store the top albedos and the pixel number that belong to those albedos
    for pixels in albedos:
        pixel_no = pixel_no + 1
        if breaks[2] < pixels < breaks[3]:
            top_albedos.append(pixels)
            pixel_num.append(pixel_no)
    
    # searching and storing the top pixels using the pixel numbers
    inter_pixels = []
    for pix_num in pixel_num:
        inter_pixels.append(selected_pixels[pix_num])
    
    # round the albedo values to 2 sig fig so that the highly frequent pixles can be grouped        
    rounded_albedos = []

    for pixels in top_albedos:
        r_p = round(pixels, 2)
        rounded_albedos.append(r_p)
        
    # remove values with frequency of less than 5%
    rm_percentage = 0.05*len(rounded_albedos)
    rm_percentage = int(rm_percentage)
    y = np.array(rounded_albedos)

    items, count = np.unique(y, return_counts=True)
    to_remove = items[count < rm_percentage]
    rem_albedos=[]
    item_num = []
    item_index = -1
    for items in y:
        item_index = item_index + 1
        if items not in to_remove:
            rem_albedos.append(items)
            item_num.append(item_index)
    
    # searching and storing the remaining pixels using the pixel numbers
    final_pixels = []
    for pix_num in item_num:        
        final_pixels.append(inter_pixels[pix_num])
            
    # shuffle the remaining pixels randomly from the top group and pick ten pixels
    rand_arr = random.sample(final_pixels, len(final_pixels))
    ten_pixels = rand_arr[0:10]
    
    # store the band values for all the bands in ten_pixels into separate list
    red_b = []
    green_b = []
    blue_b = []
    nir_b = []
    for pixels in ten_pixels:
        red_b.append(pixels[0]) 
        green_b.append(pixels[1])
        blue_b.append(pixels[2])
        nir_b.append(pixels[3])
    
    # calculate the mean values for all the bands from this list
    red_mean=mean(red_b)
    green_mean=mean(green_b)
    blue_mean=mean(blue_b)
    nir_mean=mean(nir_b)

    latitude = imageries[-14:-7]
    lat.append(latitude)
    r_m.append(red_mean)
    g_m.append(green_mean)
    b_m.append(blue_mean)
    n_m.append(nir_mean)

    
# store the results to a pandas library.
df = pd.DataFrame({'imageries': paths, 'latitude': lat, 'red_mean': r_m,'green_mean': g_m,'blue_mean': b_m,'nir_mean': n_m})


/data/phase_i/roof_img/known_date/LA_naipCA_2009-06-22_rf_00004_img_00_lat_34.0343_1m.tif
/data/phase_i/roof_img/known_date/SF_naipCA_2009-06-19_rf_00013_img_00_lat_37.7809_1m.tif
/data/phase_i/roof_img/known_date/SF_naipCA_2009-06-19_rf_00001_img_00_lat_37.7667_1m.tif
/data/phase_i/roof_img/known_date/LA_naipCA_2009-06-22_rf_00010_img_00_lat_34.1589_1m.tif
/data/phase_i/roof_img/known_date/SA_naipCA_2009-06-21_rf_00011_img_00_lat_38.6074_1m.tif
/data/phase_i/roof_img/known_date/SD_naipCA_2009-06-23_rf_00018_img_00_lat_33.2061_1m.tif
/data/phase_i/roof_img/known_date/SF_naipCA_2009-06-19_rf_00008_img_00_lat_37.7742_1m.tif
/data/phase_i/roof_img/known_date/LA_naipCA_2009-06-22_rf_00021_img_00_lat_33.7668_1m.tif
/data/phase_i/roof_img/known_date/SF_naipCA_2009-06-19_rf_00013_img_01_lat_37.7809_1m.tif
/data/phase_i/roof_img/known_date/LA_naipCA_2009-06-22_rf_00004_img_01_lat_34.0343_1m.tif
/data/phase_i/roof_img/known_date/SD_naipCA_2009-06-23_rf_00009_img_00_lat_33.0154_1m.tif
/data/phas

In [38]:
df

Unnamed: 0,imageries,latitude,red_mean,green_mean,blue_mean,nir_mean
0,/data/phase_i/roof_img/known_date/LA_naipCA_20...,34.0343,0.735976,0.745739,0.883817,0.735976
1,/data/phase_i/roof_img/known_date/SF_naipCA_20...,37.7809,0.503325,0.511075,0.572303,0.503325
2,/data/phase_i/roof_img/known_date/SF_naipCA_20...,37.7667,0.84174,0.838448,0.865329,0.84174
3,/data/phase_i/roof_img/known_date/LA_naipCA_20...,34.1589,0.86221,0.868145,0.878015,0.86221
4,/data/phase_i/roof_img/known_date/SA_naipCA_20...,38.6074,0.856006,0.893356,0.912277,0.856006
5,/data/phase_i/roof_img/known_date/SD_naipCA_20...,33.2061,0.695879,0.780718,0.905079,0.695879
6,/data/phase_i/roof_img/known_date/SF_naipCA_20...,37.7742,0.509684,0.528174,0.576447,0.509684
7,/data/phase_i/roof_img/known_date/LA_naipCA_20...,33.7668,0.954264,0.954128,0.935197,0.954264
8,/data/phase_i/roof_img/known_date/SF_naipCA_20...,37.7809,0.481323,0.308162,0.693603,0.481323
9,/data/phase_i/roof_img/known_date/LA_naipCA_20...,34.0343,0.761203,0.777678,0.932343,0.761203


-------

### Batch processing for calculating mean band values for the addresses with field measured

In [39]:
# set the path for imageries
data_path='/data/phase_i/roof_img/field_measured/'

selector=data_path+'*m.tif'

paths=glob(selector)

print(len(paths))
paths

13


['/data/phase_i/roof_img/field_measured/BA_naipCA_2009-06-21_rf_00019_img_00_lat_35.2987_1m.tif',
 '/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-18_rf_00015_img_01_lat_34.2420_1m.tif',
 '/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-26_rf_00005_img_01_lat_34.0715_1m.tif',
 '/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-18_rf_00015_img_00_lat_34.2420_1m.tif',
 '/data/phase_i/roof_img/field_measured/SA_naipCA_2009-06-21_rf_00000_img_00_lat_38.5777_1m.tif',
 '/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-18_rf_00003_img_00_lat_34.2403_1m.tif',
 '/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-26_rf_00005_img_00_lat_34.0715_1m.tif',
 '/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-26_rf_00007_img_00_lat_34.0689_1m.tif',
 '/data/phase_i/roof_img/field_measured/SA_naipCA_2009-06-07_rf_00014_img_00_lat_38.5358_1m.tif',
 '/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-18_rf_00003_img_01_lat_34.2403_1m.tif',
 '/data/phase_i/roof

## Set the values of the coefficients

In [40]:
cb = 0.17
cg = -0.13
cr = 0.33
ci = 0.54

### Calculate band values for each imageries in the path
### Normalize the band values
### Calculate solar reflectance using equation from Ban-Weiss et al.

In [41]:
r_m = []
g_m = []
b_m = []
n_m = []
lat=[]

for imageries in paths:
    print(imageries)
    with rio.open(imageries) as src:
        naip_data = src.read()
        naip_meta = src.profile
    
    red = naip_data[0]
    red = red.astype(float)
    red_max = red.max()
    red = red/red_max
    red = np.clip(red,0.0,1.0)
    read_mean = red.mean()

    green = naip_data[1]
    green = green.astype(float)
    green_max = green.max()
    green = green/green_max
    green = np.clip(green,0.0,1.0)
    green_mean = green.mean()

    blue = naip_data[2]
    blue = blue.astype(float)
    blue_max = blue.max()
    blue = blue/blue_max
    blue = np.clip(blue,0.0,1.0)
    blue_mean = blue.mean()    

    nir = naip_data[3]
    nir = nir.astype(float)
    nir_max = nir.max()
    nir = nir/nir_max
    nir = np.clip(nir,0.0,1.0)
    nir_mean = nir.mean()
    
    latitude = imageries[-14:-7]
    lat.append(latitude)
    r_m.append(red_mean)
    g_m.append(green_mean)
    b_m.append(blue_mean)
    n_m.append(nir_mean)

df2 = pd.DataFrame({'imageries': paths, 'latitude': lat, 'red_mean': r_m,'green_mean': g_m,'blue_mean': b_m,'nir_mean': n_m})


/data/phase_i/roof_img/field_measured/BA_naipCA_2009-06-21_rf_00019_img_00_lat_35.2987_1m.tif
/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-18_rf_00015_img_01_lat_34.2420_1m.tif
/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-26_rf_00005_img_01_lat_34.0715_1m.tif
/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-18_rf_00015_img_00_lat_34.2420_1m.tif
/data/phase_i/roof_img/field_measured/SA_naipCA_2009-06-21_rf_00000_img_00_lat_38.5777_1m.tif
/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-18_rf_00003_img_00_lat_34.2403_1m.tif
/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-26_rf_00005_img_00_lat_34.0715_1m.tif
/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-26_rf_00007_img_00_lat_34.0689_1m.tif
/data/phase_i/roof_img/field_measured/SA_naipCA_2009-06-07_rf_00014_img_00_lat_38.5358_1m.tif
/data/phase_i/roof_img/field_measured/LA_naipCA_2009-06-18_rf_00003_img_01_lat_34.2403_1m.tif
/data/phase_i/roof_img/field_measured/SA_naipCA_2009-06-07_r

In [42]:
df2

Unnamed: 0,imageries,latitude,red_mean,green_mean,blue_mean,nir_mean
0,/data/phase_i/roof_img/field_measured/BA_naipC...,35.2987,0.550728,0.639004,0.694746,0.676906
1,/data/phase_i/roof_img/field_measured/LA_naipC...,34.242,0.550728,0.11586,0.174757,0.11901
2,/data/phase_i/roof_img/field_measured/LA_naipC...,34.0715,0.550728,0.169758,0.351311,0.208196
3,/data/phase_i/roof_img/field_measured/LA_naipC...,34.242,0.550728,0.091225,0.156596,0.088027
4,/data/phase_i/roof_img/field_measured/SA_naipC...,38.5777,0.550728,0.17971,0.233506,0.153828
5,/data/phase_i/roof_img/field_measured/LA_naipC...,34.2403,0.550728,0.171663,0.240571,0.179452
6,/data/phase_i/roof_img/field_measured/LA_naipC...,34.0715,0.550728,0.21065,0.358756,0.237838
7,/data/phase_i/roof_img/field_measured/LA_naipC...,34.0689,0.550728,0.152851,0.216928,0.148456
8,/data/phase_i/roof_img/field_measured/SA_naipC...,38.5358,0.550728,0.39972,0.446435,0.407563
9,/data/phase_i/roof_img/field_measured/LA_naipC...,34.2403,0.550728,0.223862,0.302994,0.229165


In [43]:
# Write the full results to csv using the pandas library.
final_df = df.append(df2, ignore_index=True)  
final_df.to_csv('mean_band_values_3-11.csv',encoding='utf8')

In [44]:
final_df

Unnamed: 0,imageries,latitude,red_mean,green_mean,blue_mean,nir_mean
0,/data/phase_i/roof_img/known_date/LA_naipCA_20...,34.0343,0.735976,0.745739,0.883817,0.735976
1,/data/phase_i/roof_img/known_date/SF_naipCA_20...,37.7809,0.503325,0.511075,0.572303,0.503325
2,/data/phase_i/roof_img/known_date/SF_naipCA_20...,37.7667,0.84174,0.838448,0.865329,0.84174
3,/data/phase_i/roof_img/known_date/LA_naipCA_20...,34.1589,0.86221,0.868145,0.878015,0.86221
4,/data/phase_i/roof_img/known_date/SA_naipCA_20...,38.6074,0.856006,0.893356,0.912277,0.856006
5,/data/phase_i/roof_img/known_date/SD_naipCA_20...,33.2061,0.695879,0.780718,0.905079,0.695879
6,/data/phase_i/roof_img/known_date/SF_naipCA_20...,37.7742,0.509684,0.528174,0.576447,0.509684
7,/data/phase_i/roof_img/known_date/LA_naipCA_20...,33.7668,0.954264,0.954128,0.935197,0.954264
8,/data/phase_i/roof_img/known_date/SF_naipCA_20...,37.7809,0.481323,0.308162,0.693603,0.481323
9,/data/phase_i/roof_img/known_date/LA_naipCA_20...,34.0343,0.761203,0.777678,0.932343,0.761203
