# Overview

This Jupyter notebook predicts large-area tiles downloaded in `4a-download-large-area` with a trained model from `3-model-master`. The notebook is broken down into the following sections:

   * **Model loading**:
   * **Coordinate identification**
   * **Tiling**
   * **Loading and predicting**
   * **Mosaicing**
   * **Writing TIF**
   * **Writing COG**

# 1.0 Package imports

In [1]:
import tensorflow as tf
sess = tf.Session()
from keras import backend as K
K.set_session(sess)
from osgeo import ogr, osr
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import os
import rasterio
from rasterio.transform import from_origin
from tqdm import tnrange, tqdm_notebook
from scipy.ndimage import median_filter
from skimage.transform import resize
import hickle as hkl
from time import sleep

%run ../src/downloading/utils.py
%run ../src/models/utils.py

Using TensorFlow backend.


# 1.1 Parameter definitions

In [2]:
LANDSCAPE = 'elsalvador-country'
YEAR = 2018


In [3]:
database = pd.read_csv("../project-monitoring/database.csv")
coords = database[database['landscape'] == LANDSCAPE]
path = coords['path'].tolist()[0]
coords = (float(coords['longitude']), float(coords['latitude']))

IO_PARAMS = {'prefix': '../',
             'bucket': 'restoration-monitoring',
             'coords': coords,
             'bucket-prefix': '',
             'path': path}

OUTPUT = IO_PARAMS['prefix'] + IO_PARAMS['path'] + str(YEAR) + '/output/'
TIF_OUTPUT = IO_PARAMS['prefix'] + IO_PARAMS['path'] + str(YEAR) + ".tif"
INPUT = IO_PARAMS['prefix'] + IO_PARAMS['path'] + str(YEAR) + '/processed/'

if not os.path.exists(OUTPUT):
    os.makedirs(OUTPUT)
    
print(coords, OUTPUT)

(-90.08, 13.145) ../project-monitoring/el-salvador/sonsonate/acajutla/2018/output/


# 2.0 Model loading

In [4]:
path = '../models/master-2021-s1/'
new_saver = tf.train.import_meta_graph(path + 'model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint(path))

In [5]:
for i in range(50):
    try:
        logits = tf.get_default_graph().get_tensor_by_name("conv2d_{}/Sigmoid:0".format(i))
    except Exception:
        pass
    
inp = tf.get_default_graph().get_tensor_by_name("Placeholder:0")
length = tf.get_default_graph().get_tensor_by_name("Placeholder_1:0")


#inp_median = tf.get_default_graph().get_tensor_by_name("Placeholder_4:0")
rmax = tf.get_default_graph().get_tensor_by_name("Placeholder_4:0")
rmin = tf.get_default_graph().get_tensor_by_name("Placeholder_5:0")
dmax = tf.get_default_graph().get_tensor_by_name("Placeholder_6:0")

## 2.2 Tiling functions

In [6]:
def fspecial_gauss(size, sigma):

    """Function to mimic the 'fspecial' gaussian MATLAB function
    """

    x, y = np.mgrid[-size//2 + 1:size//2 + 1, -size//2 + 1:size//2 + 1]
    g = np.exp(-((x**2 + y**2)/(2.0*sigma**2)))
    return g

arr = fspecial_gauss(14, 2)
arr = arr[:7, :7]

SIZE = 10
SIZE_N = SIZE*SIZE
SIZE_UR = (SIZE - 1) * (SIZE - 1)
SIZE_R = (SIZE - 1) * SIZE
SIZE_U = SIZE_R
TOTAL = SIZE_N + SIZE_UR + SIZE_R + SIZE_U
print(SIZE_N, SIZE_UR, SIZE_R, SIZE_U, TOTAL)

arr = np.concatenate([arr, np.flip(arr, 0)], 0)
base_filter = np.concatenate([arr, np.flip(arr, 1)], 1)
normal = np.tile(base_filter, (SIZE, SIZE))
normal[:, 0:7] = 1.
normal[:, -7:] = 1.
normal[0:7, :] = 1.
normal[-7:, :] = 1.
upright = np.tile(base_filter, (SIZE - 1, SIZE - 1))
upright = np.pad(upright, (7, 7), 'constant', constant_values = 0)
right_filter = np.tile(base_filter, (SIZE, SIZE - 1))
right_filter = np.pad(right_filter, ((0, 0), (7, 7)), 'constant', constant_values = 0)
up_filter = np.tile(base_filter, (SIZE - 1, SIZE))
up_filter = np.pad(up_filter, ((7, 7), (0, 0)), 'constant', constant_values = 0)

sums = (up_filter + right_filter + upright + normal)
up_filter /= sums
right_filter /= sums
upright /= sums
normal /= sums

100 81 90 90 361


# 2.3 Prediction functions

In [7]:
min_all = [0.01588921781629324, 0.03045072354376316,
 0.017705744933336973, 0.08037136927247047,
 0.04978184312582016, 0.07456922113895416,
 0.081697703525424, 0.08504692040383816,
 0.06000244345515966, 0.0359250520914793,
 0.0, 0.0031033563450910146,
 -0.37605552971363065, 0.0027289406443014733,
 0.003591871485114094, 0.0057775299064815044,
 0.0]

max_all = [0.188370236158371, 0.28401015907526017,
 0.41655176877975464, 0.5010248422622681,
 0.45965318948030487, 0.47227429449558267,
 0.49787560522556307, 0.5122129917144775,
 0.6436399221420288, 0.5832849562168123,
 0.36779049038887024, 0.717898428440094,
 0.3190168184041977, 0.6600269079208374,
 0.8889312487840653, 0.6703135967254639,
 0.14510338470339812]

In [8]:
def load_and_predict_folder(y_col, folder, overlap_filter = upright,
                            normal_filter = normal, histogram_match = False):
    """Insert documentation here
    """
    pred_files = INPUT + str(y_col) + "/" + str(folder) + ".hkl"
    reference_files = f"../tile_data/{LANDSCAPE}/2019/processed/{str(y_col)}/{str(folder)}.hkl"
    
    clipping_params = {
        'rmax': rmax,
        'rmin': rmin,
        'dmax': dmax
    }
    
    pred_x = []
    x = hkl.load(pred_files)
    if not isinstance(x.flat[0], np.floating):
        assert np.max(x) > 1
        x = x / 65535.

    filtered = median_filter(x[0, :, :, 10], size = 5)
    x[:, :, :, 10] = np.stack([filtered] * x.shape[0])

    x[..., 11:15] = (x[..., 11:15] * 2) - 1
    
    x = tile_images(x)
    pred_x = np.stack(x)   
    for band in range(0, pred_x.shape[-1]):
        mins = min_all[band]
        maxs = max_all[band]
        pred_x[..., band] = np.clip(pred_x[..., band], mins, maxs)
        midrange = (maxs + mins) / 2
        rng = maxs - mins
        standardized = (pred_x[..., band] - midrange) / (rng / 2)
        pred_x[..., band] = standardized

    preds = []
    batches = [x for x in range(0, 341, 20)] + [361]
    for i in range(len(batches)-1):
        batch_x = pred_x[batches[i]:batches[i+1]]
        lengths = np.full((batch_x.shape[0], 1), 12)
        batch_pred = sess.run(logits,
                              feed_dict={inp:batch_x, 
                                         clipping_params['rmax']: 5,
                                         clipping_params['rmin']: 0,
                                         clipping_params['dmax']: 3,
                                         length:lengths}).reshape(batch_x.shape[0], 14, 14)
        for sample in range(batch_pred.shape[0]):
            preds.append(batch_pred[sample, :, :])
            
    preds_stacked = []
    for i in range(0, SIZE_N, SIZE):
        preds_stacked.append(np.concatenate(preds[i:i + SIZE], axis = 1))
    stacked = np.concatenate(preds_stacked, axis = 0) * normal

    preds_overlap = []
    for scene in range(SIZE_N, SIZE_N+SIZE_UR, SIZE - 1):
        to_concat = np.concatenate(preds[scene:scene+ (SIZE - 1)], axis = 1)
        preds_overlap.append(to_concat)    
    overlapped = np.concatenate(preds_overlap, axis = 0)
    overlapped = np.pad(overlapped, (7, 7), 'constant', constant_values = 0)
    overlapped = overlapped * upright

    preds_up = []
    for scene in range(SIZE_N+SIZE_UR, SIZE_N+SIZE_UR+SIZE_R, SIZE):
        to_concat = np.concatenate(preds[scene:scene+SIZE], axis = 1)
        preds_up.append(to_concat)   
    up = np.concatenate(preds_up, axis = 0)
    up = np.pad(up, ((7,7), (0,0)), 'constant', constant_values = 0)
    up = up * up_filter
        
    preds_right = []
    for scene in range(SIZE_N+SIZE_UR+SIZE_R, TOTAL, SIZE - 1):
        to_concat = np.concatenate(preds[scene:scene+SIZE-1], axis = 1)
        preds_right.append(to_concat)   
    right = np.concatenate(preds_right, axis = 0)
    right = np.pad(right, ((0, 0), (7, 7)), 'constant', constant_values = 0)
    right = right * right_filter
    
    stacked = stacked + overlapped + right + up
    return stacked

# Development Code

# 2.4 Run predictions

In [15]:
all_preds = []
for row in tnrange((0*5), (14*5)): 
    for column in range((0*5), (12*5)):
        output_file = f"{OUTPUT}{str(row)}/{str(column)}.npy"
        input_file = f"{INPUT}{str(row)}/{str(column)}.hkl"
        if os.path.exists(input_file) and not os.path.exists(output_file):
            prediction = load_and_predict_folder(row, column, histogram_match = False)
            if not os.path.exists(OUTPUT + str(row) + "/"):
                os.makedirs(OUTPUT + str(row) + "/")
            prediction = prediction[7:-7, 7:-7]
            np.save(output_file, prediction)

HBox(children=(IntProgress(value=0, max=70), HTML(value='')))

E0204 12:54:03.245737 4683167168 ultratb.py:155] Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Users/john.brandt/.local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3325, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-967cb0cd9b21>", line 7, in <module>
    prediction = load_and_predict_folder(row, column, histogram_match = False)
  File "<ipython-input-8-9f67080a577d>", line 15, in load_and_predict_folder
    x = hkl.load(pred_files)
  File "/Users/john.brandt/anaconda3/envs/remote_sensing/lib/python3.6/site-packages/hickle/hickle.py", line 550, in load
    py_container = _load(py_container, h_root_group['data'])
  File "/Users/john.brandt/anaconda3/envs/remote_sensing/lib/python3.6/site-packages/hickle/hickle.py", line 634, in _load
    subdata = load_dataset(h_group)
  File "/Users/john.brandt/anaconda3/envs/remote_sensing/lib/python3.6/site-packages/hickle/hickle.py", line 581, in load_dataset
    data = load_fn(h_node)
  File "/Users/john.brandt/anaconda3/envs/rem

KeyboardInterrupt: 


# 2.5 Mosaic predictions

In [10]:
max_x = 12*5
max_y = 14*5

start_x = 0*5
start_y = 0*5

predictions = np.full(
    ((max_y-start_y)*126,
     (max_x-start_x)*126), 0, dtype = np.uint8 )

max_y_out = predictions.shape[0]
max_x_out = predictions.shape[1]

numb = 0
for row in tnrange(start_y, max_y):
    for column in range(start_x, max_x):
        input_file = f"{OUTPUT}{str(row)}/{str(column)}.npy"
        if os.path.exists(input_file):
            prediction = np.load(input_file)
            x_value = (column-start_x) *126
            y_value = (max_y_out - ((row - start_y + 1) *126))
            if (row % 5 == 0) and (column % 5 == 0):
                numb += 1
            predictions[y_value:y_value+126, 
                        x_value:x_value+126,
                        ] = (prediction * 255).astype(np.uint8)
            
predictions[predictions < 41] = 0.
print(f"There are {numb*4000} hectares processed")


HBox(children=(IntProgress(value=0, max=70), HTML(value='')))


There are 308000 hectares processed


#### 2.6 Sharpen predictions

In [11]:
def recover_new(arr, thresh):
    """Not currently used. Identifies small trees that may be below the
       threshold for binary map creation.
    """
    adding = 0
    stacked = np.copy(arr)
    for window_x in tnrange(2, stacked.shape[0]-2, 1):
        for window_y in range(2, stacked.shape[1]-2, 1):
            #
            five_w = stacked[window_x-2:window_x+3, window_y-2:window_y+3]
            three_w = stacked[window_x-1:window_x+2, window_y-1:window_y+2]
            
            n_five_above = len(five_w[np.argwhere(five_w > 0.15)])
            n_three_above = len(three_w[np.argwhere(three_w > 0.15)])
            
            n_five_below = len(five_w[np.argwhere(five_w < thresh)])
            
            
            if n_five_below >= 24:                 
                # if less than 2 of the 5x5 are positive
                if n_three_above >= 2 and n_three_above < 6:            
                    # and at least 2 of the 3x3 are above 0.1
                    if n_three_above <= (n_five_above + 4):  
                        # and less than 1/4 of the outer border is above 0.1
                        if np.argmax(three_w) == 4:          
                            # and the center of the 3 x 3 is the largest
                            stacked[window_x, window_y] = -1.
                            adding += 1
    stacked[np.where(stacked == -1)] = 1.
    return stacked

In [12]:
stacked = predictions

threshold = False
if threshold:
    stacked = recover_new(predictions, 0.3)
    stacked[np.where(stacked > thresh_p)] = 0.71
    stacked[np.where(stacked < thresh_p)] = -1
    stacked[np.where(stacked == 1.0)] = 1.

In [13]:
plot = False
if plot:
    plt.figure(figsize=(15, 15))
    sns.heatmap(stacked, cbar = False, cmap = "Greens")

# 3.0 Write GeoTiff

In [14]:
point = bounding_box(coords, (max_x*1260)-0, ((max_y)*1260)-0, expansion = 0)
west = point[1][0]
east = point[0][0]
north = point[0][1]
south = point[1][1]

stacked[np.where(stacked < 0)] = 0.
stacked = stacked.astype(np.uint8)
transform = rasterio.transform.from_bounds(west = west, south = south,
                                           east = east, north = north,
                                           width = stacked.shape[1], 
                                           height = stacked.shape[0])

print("Writing", TIF_OUTPUT)
new_dataset = rasterio.open(TIF_OUTPUT, 'w', driver = 'GTiff',
                           height = stacked.shape[0], width = stacked.shape[1], count = 1,
                           dtype = 'uint8',#str(stacked.dtype),
                           crs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs',
                           transform=transform)
new_dataset.write(stacked, 1)
new_dataset.close()

Writing ../project-monitoring/el-salvador/sonsonate/acajutla/2018.tif


# 3.1 Cloud optimized Geotiff

In [24]:
!gdal_translate ../../ce-hosting/includes/drc-kafubu.tif ../tile_data/cog/drc-kafubu.tif \
               -co TILED=YES -co COMPRESS=LZW
!gdaladdo -r average -ro ../tile_data/cog/drc-kafubu.tif 2 4 8 16

Input file size is 630, 630
0...10...20...30...40...50...60...70...80...90...100 - done.
0...10...20...30...40...50...60...70...80...90...100 - done.


# 3.2 One-hectare tree cover Geotiff

In [None]:
summed = np.reshape(stacked, (stacked.shape[0] // 10, 10, stacked.shape[1] // 10, 10))
summed = np.mean(summed, (1, 3))

summed = summed.astype(np.float32)
transform = rasterio.transform.from_bounds(west = west, south = south,
                                           east = east, north = north,
                                           width = summed.shape[1], height = summed.shape[1])

new_dataset = rasterio.open('../../ce-hosting/includes/bonanza1.tif', 'w', driver = 'GTiff',
                           height = summed.shape[1], width = summed.shape[1], count = 1,
                           dtype = 'float32',#str(stacked.dtype),
                           crs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs',
                           transform=transform)
new_dataset.write(summed, 1)
new_dataset.close()