## Core Workflow: Prepare training data
Purpose: Albedo of each roof is first calculated using band values from all the pixels within a roof and utilizing an equation from Ban-Weiss et al. This calculated albedo is then used to draw the histogram for all pixels within the roof. The pixels are then grouped based on the calculated albedo values and using natural breaks. The expected albedo value for the particular roof is then used to find the optimum group of pixels. The algorithm searches for the group of pixel that contains the expected albedo value and then it checks whether that group of pixel contains at least 20% of total pixels. If both conditions are satisfied, that group of pixels are selected for future analysis. If not, the algorithm searches for the closest group of pixels. The search goes on until both conditions are met. From the final selection of pixels, 10 random pixels are selected for the ultimate analysis. Due to the low amount of data for low albedo roofs, multiple samples of 10 pixels are taken from roofs with low expected albedo. The band values from the selected pixels will be used as an input for the model.
<br>
<br>
*Date: 10-31-2019*


### Import statements

In [1]:
import warnings
warnings.filterwarnings('ignore')
#
import os
import sys
import json
import itertools
import pickle
from pprint import pprint
#
import numpy as np
import shapely
from shapely.geometry import shape, Point
from shapely.geometry import mapping, Polygon
# import cartopy
import geojson
import fiona
import h5py
get_ipython().magic(u'matplotlib inline')
import matplotlib as mpl
import matplotlib.pyplot as plt

import gdal
from glob import glob

import jenkspy

import rasterio as rio
from rasterio.plot import show

import pandas as pd

import collections
from numpy import mean

import random
import statistics

import time

import descarteslabs as dl
print (sys.path)

['', '/opt/caffe/python', '/opt/caffe2/build', '/data/home/peter/notebooks/urban_heat', '/anaconda/envs/py36/lib/python36.zip', '/anaconda/envs/py36/lib/python3.6', '/anaconda/envs/py36/lib/python3.6/lib-dynload', '/anaconda/envs/py36/lib/python3.6/site-packages', '/anaconda/envs/py36/lib/python3.6/site-packages/IPython/extensions', '/data/home/peter/.ipython']


### Helper functions

In [None]:
def load_shape(place_shapefile):
    c = fiona.open(place_shapefile)
    pol = c.next()
    shape = {}
    shape['type'] = pol['type']
    shape['properties'] = pol['properties']
    shape['geometry'] = {}
    shape['geometry']['type'] = 'Polygon'  # pol['geometry']['type']
    shape['geometry']['coordinates'] = [[]]
    # if MultiPolygon (e.g., city='kampala')
    if (len(pol['geometry']['coordinates'])>1):
        # identify largest single polygon
#         print ("MultiPolygon", len(pol['geometry']['coordinates']))
        p_argmax = 0 
        pn_max = 0
        for p in range(len(pol['geometry']['coordinates'])):
            pn = len(pol['geometry']['coordinates'][p][0])
            if pn>pn_max:
                p_argmax = p
                pn_max = pn
#             print (p, pn, p_argmax, pn_max )
        # make largest polygon the only polygon, move other polys to a backup variable 
        polygon = pol['geometry']['coordinates'][p_argmax]
        
        xmin =  180
        xmax = -180
        ymin =  90
        ymax = -90
        for x,y in polygon:
            xmin = xmin if xmin < x else x
            xmax = xmax if xmax > x else x
            ymin = ymin if ymin < y else y
            ymax = ymax if ymax > y else y
            shape['geometry']['coordinates'][0].append([x,y])
        shape['bbox'] = [xmin,ymin,xmax,ymax]

        return shape
    else:
#         print ('simple polygon')
        polygon = pol['geometry']['coordinates']
       
        xmin =  180
        xmax = -180
        ymin =  90
        ymax = -90
        for x,y in polygon[0]:
            xmin = xmin if xmin < x else x
            xmax = xmax if xmax > x else x
            ymin = ymin if ymin < y else y
            ymax = ymax if ymax > y else y
            shape['geometry']['coordinates'][0].append([x,y])
        shape['bbox'] = [xmin,ymin,xmax,ymax]
    
    return shape

### Set key variables

In [2]:
data_root='/data/phase_i/'

bands=['red','green','blue','nir']; suffix='RGBNA'  # S2, Lx

# Constants to be used for calculating solar reflectance using equation from Ban Weiss paper
cb = 0.17
cg = -0.13
cr = 0.33
ci = 0.54

### Save the band values from ten random pixels from the optimum group of pixels

In [None]:
# Set your input file here
path_data = 'path_imagery.csv'

# Read the data to a Pandas Dataframe
path_df = pd.read_csv(path_data, encoding='utf8')
print(path_df)

img_info= path_df[['img_path','tile_id','expected_albedo','latitude','longitude','roof_address','footprint_shapes',]].apply(tuple, axis=1)



In [None]:
# create lists to hold all the necessary outputs
r_m = []
g_m = []
b_m = []
n_m = []
lat=[]
lon=[]
roofs = []
all_bands = []
red_std=[]
green_std=[]
blue_std=[]
nir_std=[]
mean_albedos=[]
exp_values = []
img_path = []
tile_id = []
roof_add=[]
footprint_shapes=[]

### Calculate band values for each imageries in the path
for Y, X in img_info.iteritems():
    imageries = X[0]
    print(imageries)
    tile = X[1]
    exp_val = X[2]
    latitude = X[3]
    longitude = X[4]
    rf_ad=X[5]
    ft_shp=X[6]

    with rio.open(imageries) as src:
        naip_data = src.read()
        naip_meta = src.profile
        
    ### Normalize the band values
    red = naip_data[0]
    red = red.astype(float)
    scn_red_mean = red.mean()
    red = red/255
    red = np.clip(red,0.0,1.0)

    green = naip_data[1]
    green = green.astype(float)
    scn_green_mean = green.mean()
    green = green/255
    green = np.clip(green,0.0,1.0)

    blue = naip_data[2]
    blue = blue.astype(float)
    scn_blue_mean =blue.mean()
    blue = blue/255
    blue = np.clip(blue,0.0,1.0)

    nir = naip_data[3]
    nir = nir.astype(float)
    scn_nir_mean = nir.mean()
    nir = nir/255
    nir = np.clip(nir,0.0,1.0)

    arr = [red,green,blue,nir]

    flat_arr = []
    # flattened array of tuples
    flat_list = zip(*map(lambda x:x.flatten(),arr))
    for i in flat_list:
        flat_arr.append(i)   

    albedos=[]
    selected_pixels=[]
    ### Calculate solar reflectance using equation from Ban-Weiss et al.
    for pixels in flat_arr:
        if pixels[0] != 0 or pixels[1] != 0 or pixels[2] != 0 or pixels[3] != 0:
            solar_reflectance = ((cb*pixels[2])+(cg*pixels[1])+(cr*pixels[0])+(ci*pixels[3]))
            albedos.append(solar_reflectance)
            selected_pixels.append(pixels)

    # create a numpy array of albedos for breaks calculation        
    albedos_arr = np.asarray(albedos)
    
    ### Use solar reflectance values to group the pixels
    # use goodness of variance to find optimum number of classes
    gvf = 0.0
    nclasses = 2
    while gvf < .90:
        gvf = goodness_of_variance_fit(albedos_arr, nclasses)
        nclasses += 1

    # Create cluster of pixels using jenks breaks
    breaks = jenkspy.jenks_breaks(albedos, nclasses)

    if nclasses == 3:
    # count no of pixels in each breaks

        cnt_1_brk = 0
        cnt_2_brk = 0
        cnt_3_brk = 0

        for pixels in albedos:
            if pixels < breaks[1]:
                cnt_1_brk = cnt_1_brk + 1
            if breaks[1] <= pixels < breaks[2]:
                cnt_2_brk = cnt_2_brk + 1
            if breaks[2] <= pixels:
                cnt_3_brk = cnt_3_brk + 1

        total_pixels_brk = cnt_1_brk+cnt_2_brk+cnt_3_brk

        prcnt_brk = []

        prcnt_1_brk = cnt_1_brk/total_pixels_brk
        prcnt_2_brk = cnt_2_brk/total_pixels_brk
        prcnt_3_brk = cnt_3_brk/total_pixels_brk

        top_breaks = {}

        t_grps = []

        if prcnt_1_brk > 0.20:
            top_breaks.update({breaks[0] : breaks[1]})
            t_grps.append(breaks[0])
            t_grps.append(breaks[1])

        if prcnt_2_brk > 0.20:
            top_breaks.update({breaks[1] : breaks[2]})
            t_grps.append(breaks[1])
            t_grps.append(breaks[2])

        if prcnt_3_brk > 0.20:
            top_breaks.update({breaks[2] : breaks[3]})
            t_grps.append(breaks[2])
            t_grps.append(breaks[3])


        pick_1='False'
        pick_2='False'
        pick_3='False'

        f_group = []

        if exp_val < breaks[1]:
            if prcnt_1_brk > 0.20:
                pick_1 = 'True'
                f_group.append(breaks[0])
                f_group.append(breaks[1])
        if breaks[1] <exp_val < breaks[2]:
            if prcnt_2_brk > 0.20:
                pick_2 = 'True'
                f_group.append(breaks[1])
                f_group.append(breaks[2])
        if breaks[2] < exp_val:
            if prcnt_3_brk > 0.20:
                pick_3 = 'True'
                f_group.append(breaks[2])
                f_group.append(breaks[3])

        if pick_1 == 'False' and pick_2 == 'False' and pick_3 == 'False':
            closest_grp = min(t_grps, key=lambda x:abs(x-exp_val))

            for a,b in top_breaks.items(): 
                if closest_grp == a or  closest_grp == b:
                    f_group.append(a)
                    f_group.append(b)

        pixel_num = []
        pixel_no = -1
        top_albedos = []

        # store the top albedos and the pixel number that belong to those albedos
        for pixels in albedos:
            pixel_no = pixel_no + 1
            if f_group[0] < pixels < f_group[1]:
                top_albedos.append(pixels)
                if pixel_no not in pixel_num:
                    pixel_num.append(pixel_no)

    if nclasses == 4:
        # count no of pixels in each breaks

        cnt_1_brk = 0
        cnt_2_brk = 0
        cnt_3_brk = 0
        cnt_4_brk = 0

        for pixels in albedos:
            if pixels < breaks[1]:
                cnt_1_brk = cnt_1_brk + 1
            if breaks[1] <= pixels < breaks[2]:
                cnt_2_brk = cnt_2_brk + 1
            if breaks[2] <= pixels < breaks[3]:
                cnt_3_brk = cnt_3_brk + 1
            if breaks[3] <= pixels:
                cnt_4_brk = cnt_4_brk + 1

        total_pixels_brk = cnt_1_brk+cnt_2_brk+cnt_3_brk+cnt_4_brk

        prcnt_brk = []

        prcnt_1_brk = cnt_1_brk/total_pixels_brk
        prcnt_2_brk = cnt_2_brk/total_pixels_brk
        prcnt_3_brk = cnt_3_brk/total_pixels_brk
        prcnt_4_brk = cnt_4_brk/total_pixels_brk

        top_breaks = {}

        t_grps = []

        if prcnt_1_brk > 0.20:
            top_breaks.update({breaks[0] : breaks[1]})
            t_grps.append(breaks[0])
            t_grps.append(breaks[1])

        if prcnt_2_brk > 0.20:
            top_breaks.update({breaks[1] : breaks[2]})
            t_grps.append(breaks[1])
            t_grps.append(breaks[2])

        if prcnt_3_brk > 0.20:
            top_breaks.update({breaks[2] : breaks[3]})
            t_grps.append(breaks[2])
            t_grps.append(breaks[3])

        if prcnt_4_brk > 0.20:
            top_breaks.update({breaks[3] : breaks[4]})
            t_grps.append(breaks[3])
            t_grps.append(breaks[4])


        pick_1='False'
        pick_2='False'
        pick_3='False'
        pick_4='False'

        f_group = []

        if exp_val < breaks[1]:
            if prcnt_1_brk > 0.20:
                pick_1 = 'True'
                f_group.append(breaks[0])
                f_group.append(breaks[1])
        if breaks[1] <exp_val < breaks[2]:
            if prcnt_2_brk > 0.20:
                pick_2 = 'True'
                f_group.append(breaks[1])
                f_group.append(breaks[2])
        if breaks[2] < exp_val < breaks[3]:
            if prcnt_3_brk > 0.20:
                pick_3 = 'True'
                f_group.append(breaks[2])
                f_group.append(breaks[3])
        if breaks[3] < exp_val:
            if prcnt_4_brk > 0.20:
                pick_4 = 'True'
                f_group.append(breaks[3])
                f_group.append(breaks[4])

        if pick_1 == 'False' and pick_2 == 'False' and pick_3 == 'False' and pick_4 == 'False':
            closest_grp = min(t_grps, key=lambda x:abs(x-exp_val))

            for a,b in top_breaks.items(): 
                if closest_grp == a or  closest_grp == b:
                    f_group.append(a)
                    f_group.append(b)

        pixel_num = []
        pixel_no = -1
        top_albedos = []

        # store the top albedos and the pixel number that belong to those albedos
        for pixels in albedos:
            pixel_no = pixel_no + 1
            if f_group[0] < pixels < f_group[1]:
                top_albedos.append(pixels)
                if pixel_no not in pixel_num:
                    pixel_num.append(pixel_no)   

    if nclasses == 5:
        cnt_1_brk = 0
        cnt_2_brk = 0
        cnt_3_brk = 0
        cnt_4_brk = 0
        cnt_5_brk = 0

        for pixels in albedos:
            if pixels < breaks[1]:
                cnt_1_brk = cnt_1_brk + 1
            if breaks[1] <= pixels < breaks[2]:
                cnt_2_brk = cnt_2_brk + 1
            if breaks[2] <= pixels < breaks[3]:
                cnt_3_brk = cnt_3_brk + 1
            if breaks[3] <= pixels < breaks[4]:
                cnt_4_brk = cnt_4_brk + 1
            if breaks[4] <= pixels:
                cnt_5_brk = cnt_5_brk + 1
        total_pixels_brk = cnt_1_brk+cnt_2_brk+cnt_3_brk+cnt_4_brk+cnt_5_brk

        prcnt_brk = []

        prcnt_1_brk = cnt_1_brk/total_pixels_brk
        prcnt_2_brk = cnt_2_brk/total_pixels_brk
        prcnt_3_brk = cnt_3_brk/total_pixels_brk
        prcnt_4_brk = cnt_4_brk/total_pixels_brk
        prcnt_5_brk = cnt_5_brk/total_pixels_brk

        top_breaks = {}

        t_grps = []

        if prcnt_1_brk > 0.20:
            top_breaks.update({breaks[0] : breaks[1]})
            t_grps.append(breaks[0])
            t_grps.append(breaks[1])

        if prcnt_2_brk > 0.20:
            top_breaks.update({breaks[1] : breaks[2]})
            t_grps.append(breaks[1])
            t_grps.append(breaks[2])

        if prcnt_3_brk > 0.20:
            top_breaks.update({breaks[2] : breaks[3]})
            t_grps.append(breaks[2])
            t_grps.append(breaks[3])

        if prcnt_4_brk > 0.20:
            top_breaks.update({breaks[3] : breaks[4]})
            t_grps.append(breaks[3])
            t_grps.append(breaks[4])

        if prcnt_5_brk > 0.20:
            top_breaks.update({breaks[4] : breaks[5]})
            t_grps.append(breaks[4])
            t_grps.append(breaks[5])

        pick_1='False'
        pick_2='False'
        pick_3='False'
        pick_4='False'
        pick_5='False'

        f_group = []

        if breaks[4] < exp_val:
            if prcnt_5_brk > 0.20:
                pick_5 = 'True'
                f_group.append(breaks[4])
                f_group.append(breaks[5])
        if breaks[3] < exp_val < breaks[4]:
            if prcnt_4_brk > 0.20:
                pick_4 = 'True'
                f_group.append(breaks[3])
                f_group.append(breaks[4])
        if breaks[2] < exp_val < breaks[3]:
            if prcnt_3_brk > 0.20:
                pick_3 = 'True'
                f_group.append(breaks[2])
                f_group.append(breaks[3])
        if breaks[1] < exp_val < breaks[2]:
            if prcnt_2_brk > 0.20:
                pick_2 = 'True'
                f_group.append(breaks[1])
                f_group.append(breaks[2])
        if exp_val < breaks[1]:
            if prcnt_1_brk > 0.20:
                pick_1 = 'True'
                f_group.append(breaks[0])
                f_group.append(breaks[1])

        if pick_1 == 'False' and pick_2 == 'False' and pick_3 == 'False' and pick_4 == 'False' and pick_5 == 'False':
            closest_grp = min(t_grps, key=lambda x:abs(x-exp_val))

            for a,b in top_breaks.items(): 
                if closest_grp == a or  closest_grp == b:
                    f_group.append(a)
                    f_group.append(b)

        pixel_num = []
        pixel_no = -1
        top_albedos = []

        # store the top albedos and the pixel number that belong to those albedos
        for pixels in albedos:
            pixel_no = pixel_no + 1
            if f_group[0] < pixels < f_group[1]:
                top_albedos.append(pixels)
                if pixel_no not in pixel_num:
                    pixel_num.append(pixel_no)  


    if nclasses == 6:
        cnt_1_brk = 0
        cnt_2_brk = 0
        cnt_3_brk = 0
        cnt_4_brk = 0
        cnt_5_brk = 0
        cnt_6_brk = 0

        for pixels in albedos:
            if pixels < breaks[1]:
                cnt_1_brk = cnt_1_brk + 1
            if breaks[1] <= pixels < breaks[2]:
                cnt_2_brk = cnt_2_brk + 1
            if breaks[2] <= pixels < breaks[3]:
                cnt_3_brk = cnt_3_brk + 1
            if breaks[3] <= pixels < breaks[4]:
                cnt_4_brk = cnt_4_brk + 1
            if breaks[4] <= pixels <= breaks[5]:
                cnt_5_brk = cnt_5_brk + 1
            if breaks[5] <= pixels:
                cnt_6_brk = cnt_6_brk + 1
        total_pixels_brk = cnt_1_brk+cnt_2_brk+cnt_3_brk+cnt_4_brk+cnt_5_brk+cnt_6_brk

        prcnt_brk = []

        prcnt_1_brk = cnt_1_brk/total_pixels_brk
        prcnt_2_brk = cnt_2_brk/total_pixels_brk
        prcnt_3_brk = cnt_3_brk/total_pixels_brk
        prcnt_4_brk = cnt_4_brk/total_pixels_brk
        prcnt_5_brk = cnt_5_brk/total_pixels_brk
        prcnt_6_brk = cnt_6_brk/total_pixels_brk

        top_breaks = {}

        t_grps = []

        if prcnt_1_brk > 0.20:
            top_breaks.update({breaks[0] : breaks[1]})
            t_grps.append(breaks[0])
            t_grps.append(breaks[1])

        if prcnt_2_brk > 0.20:
            top_breaks.update({breaks[1] : breaks[2]})
            t_grps.append(breaks[1])
            t_grps.append(breaks[2])

        if prcnt_3_brk > 0.20:
            top_breaks.update({breaks[2] : breaks[3]})
            t_grps.append(breaks[2])
            t_grps.append(breaks[3])

        if prcnt_4_brk > 0.20:
            top_breaks.update({breaks[3] : breaks[4]})
            t_grps.append(breaks[3])
            t_grps.append(breaks[4])

        if prcnt_5_brk > 0.20:
            top_breaks.update({breaks[4] : breaks[5]})
            t_grps.append(breaks[4])
            t_grps.append(breaks[5])

        if prcnt_6_brk > 0.20:
            top_breaks.update({breaks[5] : breaks[6]})
            t_grps.append(breaks[5])
            t_grps.append(breaks[6])

        pick_1='False'
        pick_2='False'
        pick_3='False'
        pick_4='False'
        pick_5='False'
        pick_6='False'

        f_group = []

        if breaks[5] < exp_val:
            if prcnt_6_brk > 0.20:
                pick_6 = 'True'
                f_group.append(breaks[5])
                f_group.append(breaks[6])
        if breaks[4] < exp_val < breaks[5]:
            if prcnt_5_brk > 0.20:
                pick_5 = 'True'
                f_group.append(breaks[4])
                f_group.append(breaks[5])
        if breaks[3] < exp_val < breaks[4]:
            if prcnt_4_brk > 0.20:
                pick_4 = 'True'
                f_group.append(breaks[3])
                f_group.append(breaks[4])
        if breaks[2] < exp_val < breaks[3]:
            if prcnt_3_brk > 0.20:
                pick_3 = 'True'
                f_group.append(breaks[2])
                f_group.append(breaks[3])
        if breaks[1] < exp_val < breaks[2]:
            if prcnt_2_brk > 0.20:
                pick_2 = 'True'
                f_group.append(breaks[1])
                f_group.append(breaks[2])
        if exp_val < breaks[1]:
            if prcnt_1_brk > 0.20:
                pick_1 = 'True'
                f_group.append(breaks[0])
                f_group.append(breaks[1])

        if pick_1 == 'False' and pick_2 == 'False' and pick_3 == 'False' and pick_4 == 'False' and pick_5 == 'False' and pick_6 == 'False':

            closest_grp = min(t_grps, key=lambda x:abs(x-exp_val))

            for a,b in top_breaks.items(): 
                if closest_grp == a or  closest_grp == b:
                    f_group.append(a)
                    f_group.append(b)

        pixel_num = []
        pixel_no = -1
        top_albedos = []

        # store the top albedos and the pixel number that belong to those albedos
        for pixels in albedos:
            pixel_no = pixel_no + 1
            if f_group[0] < pixels < f_group[1]:
                top_albedos.append(pixels)
                if pixel_no not in pixel_num:
                    pixel_num.append(pixel_no) 

    if nclasses == 7:
        cnt_1_brk = 0
        cnt_2_brk = 0
        cnt_3_brk = 0
        cnt_4_brk = 0
        cnt_5_brk = 0
        cnt_6_brk = 0
        cnt_7_brk = 0

        for pixels in albedos:
            if pixels < breaks[1]:
                cnt_1_brk = cnt_1_brk + 1
            if breaks[1] <= pixels < breaks[2]:
                cnt_2_brk = cnt_2_brk + 1
            if breaks[2] <= pixels < breaks[3]:
                cnt_3_brk = cnt_3_brk + 1
            if breaks[3] <= pixels < breaks[4]:
                cnt_4_brk = cnt_4_brk + 1
            if breaks[4] <= pixels <= breaks[5]:
                cnt_5_brk = cnt_5_brk + 1
            if breaks[5] <= pixels <= breaks[6]:
                cnt_6_brk = cnt_6_brk + 1
            if breaks[6] <= pixels:
                cnt_7_brk = cnt_7_brk + 1
        total_pixels_brk = cnt_1_brk+cnt_2_brk+cnt_3_brk+cnt_4_brk+cnt_5_brk+cnt_6_brk+cnt_7_brk

        prcnt_1_brk = cnt_1_brk/total_pixels_brk
        prcnt_2_brk = cnt_2_brk/total_pixels_brk
        prcnt_3_brk = cnt_3_brk/total_pixels_brk
        prcnt_4_brk = cnt_4_brk/total_pixels_brk
        prcnt_5_brk = cnt_5_brk/total_pixels_brk
        prcnt_5_brk = cnt_5_brk/total_pixels_brk
        prcnt_6_brk = cnt_6_brk/total_pixels_brk
        prcnt_7_brk = cnt_7_brk/total_pixels_brk

        prcnt_brk = []

        top_breaks = {}
        t_grps = []

        if prcnt_1_brk > 0.20:
            top_breaks.update({breaks[0] : breaks[1]})
            t_grps.append(breaks[0])
            t_grps.append(breaks[1])

        if prcnt_2_brk > 0.20:
            top_breaks.update({breaks[1] : breaks[2]})
            t_grps.append(breaks[1])
            t_grps.append(breaks[2])

        if prcnt_3_brk > 0.20:
            top_breaks.update({breaks[2] : breaks[3]})
            t_grps.append(breaks[2])
            t_grps.append(breaks[3])

        if prcnt_4_brk > 0.20:
            top_breaks.update({breaks[3] : breaks[4]})
            t_grps.append(breaks[3])
            t_grps.append(breaks[4])

        if prcnt_5_brk > 0.20:
            top_breaks.update({breaks[4] : breaks[5]})
            t_grps.append(breaks[4])
            t_grps.append(breaks[5])

        if prcnt_6_brk > 0.20:
            top_breaks.update({breaks[5] : breaks[6]})
            t_grps.append(breaks[5])
            t_grps.append(breaks[6])

        if prcnt_7_brk > 0.20:
            top_breaks.update({breaks[6] : breaks[7]})
            t_grps.append(breaks[6])
            t_grps.append(breaks[7])

        pick_1='False'
        pick_2='False'
        pick_3='False'
        pick_4='False'
        pick_5='False'
        pick_6='False'
        pick_7='False'

        f_group = []

        if breaks[6] < exp_val:
            if prcnt_7_brk > 0.20:
                pick_7 = 'True'
                f_group.append(breaks[6])
                f_group.append(breaks[7])
        if breaks[5] < exp_val < breaks[6]:
            if prcnt_6_brk > 0.20:
                pick_6 = 'True'
                f_group.append(breaks[5])
                f_group.append(breaks[6])
        if breaks[4] < exp_val < breaks[5]:
            if prcnt_5_brk > 0.20:
                pick_5 = 'True'
                f_group.append(breaks[4])
                f_group.append(breaks[5])
        if breaks[3] < exp_val < breaks[4]:
            if prcnt_4_brk > 0.20:
                pick_4 = 'True'
                f_group.append(breaks[3])
                f_group.append(breaks[4])
        if breaks[2] < exp_val < breaks[3]:
            if prcnt_3_brk > 0.20:
                pick_3 = 'True'
                f_group.append(breaks[2])
                f_group.append(breaks[3])
        if breaks[1] < exp_val < breaks[2]:
            if prcnt_2_brk > 0.20:
                pick_2 = 'True'
                f_group.append(breaks[1])
                f_group.append(breaks[2])
        if exp_val < breaks[1]:
            if prcnt_1_brk > 0.20:
                pick_1 = 'True' 
                f_group.append(breaks[0])
                f_group.append(breaks[1])

        if pick_1 == 'False' and pick_2 == 'False' and pick_3 == 'False' and pick_4 == 'False' and pick_5 == 'False' and pick_6 == 'False' and pick_7 == 'False':
            closest_grp = min(t_grps, key=lambda x:abs(x-exp_val))

            for a,b in top_breaks.items(): 
                if closest_grp == a or  closest_grp == b:
                    f_group.append(a)
                    f_group.append(b)

        pixel_num = []
        pixel_no = -1
        top_albedos = []

        # store the top albedos and the pixel number that belong to those albedos
        for pixels in albedos:
            pixel_no = pixel_no + 1
            if f_group[0] < pixels < f_group[1]:
                top_albedos.append(pixels)
                if pixel_no not in pixel_num:
                    pixel_num.append(pixel_no)  

    if nclasses == 8:
        cnt_1_brk = 0
        cnt_2_brk = 0
        cnt_3_brk = 0
        cnt_4_brk = 0
        cnt_5_brk = 0
        cnt_6_brk = 0
        cnt_7_brk = 0
        cnt_8_brk = 0

        for pixels in albedos:
            if pixels < breaks[1]:
                cnt_1_brk = cnt_1_brk + 1
            if breaks[1] <= pixels < breaks[2]:
                cnt_2_brk = cnt_2_brk + 1
            if breaks[2] <= pixels < breaks[3]:
                cnt_3_brk = cnt_3_brk + 1
            if breaks[3] <= pixels < breaks[4]:
                cnt_4_brk = cnt_4_brk + 1
            if breaks[4] <= pixels <= breaks[5]:
                cnt_5_brk = cnt_5_brk + 1
            if breaks[5] <= pixels <= breaks[6]:
                cnt_6_brk = cnt_6_brk + 1
            if breaks[6] <= pixels <= breaks[7]:
                cnt_7_brk = cnt_7_brk + 1
            if breaks[8] <= pixels:
                cnt_8_brk = cnt_8_brk + 1

        total_pixels_brk = cnt_1_brk+cnt_2_brk+cnt_3_brk+cnt_4_brk+cnt_5_brk+cnt_6_brk+cnt_7_brk+cnt_8_brk

        prcnt_1_brk = cnt_1_brk/total_pixels_brk
        prcnt_2_brk = cnt_2_brk/total_pixels_brk
        prcnt_3_brk = cnt_3_brk/total_pixels_brk
        prcnt_4_brk = cnt_4_brk/total_pixels_brk
        prcnt_5_brk = cnt_5_brk/total_pixels_brk
        prcnt_5_brk = cnt_5_brk/total_pixels_brk
        prcnt_6_brk = cnt_6_brk/total_pixels_brk
        prcnt_7_brk = cnt_7_brk/total_pixels_brk
        prcnt_8_brk = cnt_8_brk/total_pixels_brk

        prcnt_brk = []

        top_breaks = {}

        t_grps = []

        if prcnt_1_brk > 0.20:
            top_breaks.update({breaks[0] : breaks[1]})
            t_grps.append(breaks[0])
            t_grps.append(breaks[1])

        if prcnt_2_brk > 0.20:
            top_breaks.update({breaks[1] : breaks[2]})
            t_grps.append(breaks[1])
            t_grps.append(breaks[2])

        if prcnt_3_brk > 0.20:
            top_breaks.update({breaks[2] : breaks[3]})
            t_grps.append(breaks[2])
            t_grps.append(breaks[3])

        if prcnt_4_brk > 0.20:
            top_breaks.update({breaks[3] : breaks[4]})
            t_grps.append(breaks[3])
            t_grps.append(breaks[4])

        if prcnt_5_brk > 0.20:
            top_breaks.update({breaks[4] : breaks[5]})
            t_grps.append(breaks[4])
            t_grps.append(breaks[5])

        if prcnt_6_brk > 0.20:
            top_breaks.update({breaks[5] : breaks[6]})
            t_grps.append(breaks[5])
            t_grps.append(breaks[6])

        if prcnt_7_brk > 0.20:
            top_breaks.update({breaks[6] : breaks[7]})
            t_grps.append(breaks[6])
            t_grps.append(breaks[7])

        if prcnt_8_brk > 0.20:
            top_breaks.update({breaks[7] : breaks[8]})
            t_grps.append(breaks[7])
            t_grps.append(breaks[8])

        pick_1='False'
        pick_2='False'
        pick_3='False'
        pick_4='False'
        pick_5='False'
        pick_6='False'
        pick_7='False'
        pick_8='False'

        f_group = []

        if breaks[7] < exp_val:
            if prcnt_8_brk > 0.20:
                pick_8 = 'True'
                f_group.append(breaks[7])
                f_group.append(breaks[8])
        if breaks[6] < exp_val < breaks[7]:
            if prcnt_7_brk > 0.20:
                pick_7 = 'True'
                f_group.append(breaks[6])
                f_group.append(breaks[7])
        if breaks[5] < exp_val < breaks[6]:
            if prcnt_6_brk > 0.20:
                pick_6 = 'True'
                f_group.append(breaks[5])
                f_group.append(breaks[6])
        if breaks[4] <exp_val < breaks[5]:
            if prcnt_5_brk > 0.20:
                pick_5 = 'True'
                f_group.append(breaks[4])
                f_group.append(breaks[5])
        if breaks[3] < exp_val < breaks[4]:
            if prcnt_4_brk > 0.20:
                pick_4 = 'True'
                f_group.append(breaks[3])
                f_group.append(breaks[4])
        if breaks[2] < exp_val < breaks[3]:
            if prcnt_3_brk > 0.20:
                pick_3 = 'True'
                f_group.append(breaks[2])
                f_group.append(breaks[3])
        if breaks[1] < exp_val < breaks[2]:
            if prcnt_2_brk > 0.20:
                pick_2 = 'True'
                f_group.append(breaks[1])
                f_group.append(breaks[2])
        if exp_val < breaks[1]:
            if prcnt_1_brk > 0.20:
                pick_1 = 'True'
                f_group.append(breaks[0])
                f_group.append(breaks[1]) 

        if pick_1 == 'False' and pick_2 == 'False' and pick_3 == 'False' and pick_4 == 'False' and pick_5 == 'False' and pick_6 == 'False' and pick_7 == 'False' and pick_8 == 'False':
            closest_grp = min(t_grps, key=lambda x:abs(x-exp_val))

            for a,b in top_breaks.items(): 
                if closest_grp == a or  closest_grp == b:
                    f_group.append(a)
                    f_group.append(b)

        pixel_num = []
        pixel_no = -1
        top_albedos = []

        # store the top albedos and the pixel number that belong to those albedos
        for pixels in albedos:
            pixel_no = pixel_no + 1
            if f_group[0] < pixels < f_group[1]:
                top_albedos.append(pixels)
                if pixel_no not in pixel_num:
                    pixel_num.append(pixel_no)  


    # searching and storing the top pixels using the pixel numbers
    final_pixels = []
    for pix_num in pixel_num:
        final_pixels.append(selected_pixels[pix_num])

    # shuffle the remaining pixels randomly from the top group and pick twenty pixels
    rand_arr = random.sample(final_pixels, len(final_pixels))
    
    # for low albedo values take 10 samples of 20 pixels from each roof to balalnce final trainig data
    if exp_val < 0.50:
        px_cnt=-20
        t_px_cnt=0
        for i in range(10):
            px_cnt=px_cnt+20
            t_px_cnt=px_cnt+20            
            ten_pixels = rand_arr[px_cnt:t_px_cnt]

            # store the band values for all the bands in ten_pixels into separate list
            red_b = []
            green_b = []
            blue_b = []
            nir_b = []
            for pixels in ten_pixels:
                red_b.append(pixels[0]) 
                green_b.append(pixels[1])
                blue_b.append(pixels[2])
                nir_b.append(pixels[3])

            # calculate the mean values for all the bands from this list
            red_mean=mean(red_b)
            green_mean=mean(green_b)
            blue_mean=mean(blue_b)
            nir_mean=mean(nir_b)

            # calculate the standard deviation for all the bands from this list
            red_sd=statistics.stdev(red_b)
            green_sd=statistics.stdev(green_b)
            blue_sd=statistics.stdev(blue_b)
            nir_sd=statistics.stdev(nir_b)

            red_std.append(red_sd)
            green_std.append(green_sd)
            blue_std.append(blue_sd)
            nir_std.append(nir_sd)

            r_m.append(red_mean)
            g_m.append(green_mean)
            b_m.append(blue_mean)
            n_m.append(nir_mean)

            # store the ten_pixels from each image to a single list
            all_bands.append(ten_pixels)

            img_path.append(imageries)
            tile_id.append(tile)
            exp_values.append(exp_val)
            lat.append(latitude)
            lon.append(longitude)
            roofs.append(imageries[-31:-26])
            roof_add.append(rf_ad)
            footprint_shapes.append(ft_shp)
    else:            
        ten_pixels = rand_arr[0:20]

        # store the band values for all the bands in ten_pixels into separate list
        red_b = []
        green_b = []
        blue_b = []
        nir_b = []
        for pixels in ten_pixels:
            red_b.append(pixels[0]) 
            green_b.append(pixels[1])
            blue_b.append(pixels[2])
            nir_b.append(pixels[3])

        # calculate the mean values for all the bands from this list
        red_mean=mean(red_b)
        green_mean=mean(green_b)
        blue_mean=mean(blue_b)
        nir_mean=mean(nir_b)

        # calculate the standard deviation for all the bands from this list
        red_sd=statistics.stdev(red_b)
        green_sd=statistics.stdev(green_b)
        blue_sd=statistics.stdev(blue_b)
        nir_sd=statistics.stdev(nir_b)

        red_std.append(red_sd)
        green_std.append(green_sd)
        blue_std.append(blue_sd)
        nir_std.append(nir_sd)

        r_m.append(red_mean)
        g_m.append(green_mean)
        b_m.append(blue_mean)
        n_m.append(nir_mean)

        # store the ten_pixels from each image to a single list
        all_bands.append(ten_pixels)

        img_path.append(imageries)
        tile_id.append(tile)
        exp_values.append(exp_val)
        lat.append(latitude)
        lon.append(longitude)
        roofs.append(imageries[-31:-26])
        roof_add.append(rf_ad)
        footprint_shapes.append(ft_shp)
        
        scn_red_means.append(scn_red_mean)
        scn_green_means.append(scn_green_mean)
        scn_blue_means.append(scn_blue_mean)
        scn_nir_means.append(scn_nir_mean)

    # calculate albedos from the final selection of pixels
    calc_alb = []
    for pixels in ten_pixels:
        albedos = ((cb*pixels[2])+(cg*pixels[1])+(cr*pixels[0])+(ci*pixels[3]))
        calc_alb.append(albedos)
    mean_albedo = mean(calc_alb)
#     print(mean_albedo)
    mean_albedos.append(mean_albedo)

# store the results to a pandas library.
df = pd.DataFrame({'roof_address':roof_add, 'img_path': img_path, 'footprint_shapes':footprint_shapes,'tile_id': tile_id,
                   'scn_red_m':red_means,'scn_green_m':green_means,'scn_blue_m':blue_means,'scn_nir_m':nir_means,
                   'roof_no': roofs, 'latitude': lat, 'longitude': lon, 
                   'red_mean': r_m,'green_mean': g_m,'blue_mean': b_m,'nir_mean': n_m,
                   'red_std': red_std,'green_std': green_std,'blue_std': blue_std,'nir_std': nir_std,
                   'all_bands': all_bands,'expected_albedo': exp_values})

# Write the full results to csv using the pandas library. 
df.to_csv('band_values_train_data.csv',encoding='utf8')


----------------------------------------------------