# Development: Applying 3-Category Classifier with New Architecture
Utilize already-trained model to create comprehensive LULC classifications from imagery, either downloaded or on-the-fly.  

This notebook is an interim product for troubleshooting as we adapt the workflows to Python 3 and, more importantly, transition to executing these functions as a script, with the additional change of using imagery getting pulled in on the fly.

Using chips and scoring models is not included.
  
Date: 2019-06-10  
Author: DC Team  

### Import statements
(may be over-inclusive)

In [None]:
# typical, comprehensive imports
import warnings
warnings.filterwarnings('ignore')
#
import os
import sys
import json
import itertools
import pickle
from pprint import pprint
import math
#
import numpy as np
import shapely
import cartopy
import geojson
import fiona
import gdal
import h5py
get_ipython().magic(u'matplotlib inline')
import matplotlib.pyplot as plt
import ogr, gdal
import pandas as pd

import tensorflow as tf
import tensorflow.keras.backend as K

import descarteslabs as dl
print (dl.places.find('illinois')) ## TEST

ULU_REPO = os.environ["ULU_REPO"]
sys.path.append(ULU_REPO+'/utils')
sys.path.append(ULU_REPO)
print (sys.path)

import util_rasters
import util_vectors
import util_training
# from image_sample_generator import ImageSampleGenerator
import util_imagery
import util_workflow
import util_chips
# from batch_generator import BatchGenerator
import util_scoring
import util_mapping

In [None]:
# core
data_root='/data/phase_iv/'
place = 'kozhikode'
data_path=data_root+place+'/'

resolution = 5  # Lx:15 S2:10

# tiling
tile_resolution = resolution
tile_size = 256
tile_pad = 32

# misc
s2_bands=['blue','green','red','nir','swir1','swir2','alpha']; suffix='BGRNS1S2A'  # S2, Lx

# ground truth source: aue, aue+osm, aue+osm2
label_suffix = 'aue'

### Importing shapefile

In [None]:
print (place, place.title()) # capitalized version of place name
place_title = place.title()
place_shapefile = data_path+place_title+"_studyAreaEPSG4326.shp"

util_vectors.info_studyareas(data_path, place)

shape = util_vectors.load_shape(place_shapefile)
polygon = shape['geometry']['coordinates']
#print polygon
#pprint(shape)
place_bbox = shape['bbox']
#print bbox

# using Albers projection
lonlat_crs = cartopy.crs.PlateCarree()
clat, clon = (place_bbox[0]+place_bbox[2])/2.0, (place_bbox[1]+place_bbox[3])/2.0
print ("center co-ordinates", clat, clon)
albers = cartopy.crs.AlbersEqualArea(central_latitude=clat, central_longitude=clon)

# visualize Study Region
fig = plt.figure(figsize=(6,6))
ax = plt.subplot(projection=albers) # Specify projection of the map here
shp = shapely.geometry.shape(shape['geometry'])
ax.add_geometries([shp], lonlat_crs)
ax.set_extent((place_bbox[0], place_bbox[2], place_bbox[1], place_bbox[3]), crs=lonlat_crs)
ax.gridlines(crs=lonlat_crs)
plt.show()

### Looping through each tile for all the available scenes, implementing weighted composite per tile

In [None]:
cloud_threshold = 0.3
test_type = 2

for tiles in range(2117):
    tile_no = str(tiles).zfill(5)
    lulc_list = !find "/data/phase_iv/scenes/kozhikode/post18monsoon" -iname {'*_tile*'+tile_no+'_lulc.tif'} -type f
    score_list = !find "/data/phase_iv/scenes/kozhikode/post18monsoon" -iname {'*_tile*'+str(tile_no)+'_cloudscore.tif'} -type f
    pred_list = !find "/data/phase_iv/scenes/kozhikode/post18monsoon" -iname {'*_tile*'+str(tile_no)+'_pred.tif'} -type f
    
    lulcs, scores, preds = util_mapping.prep_lulc_derivation_arrays(lulc_list, score_list, pred_list,3)
    
    if test_type == 1: 
#         simple weighted
        lulc_derived = util_mapping.derive_lulc_map_predweighted_simple(lulcs, scores, preds, threshold=cloud_threshold)
        type = "predW"
    elif test_type == 2:
#         scaled weighted 
        lulc_derived = util_mapping.derive_lulc_map_predweighted_scaled(lulcs, scores, preds, threshold=cloud_threshold)
        type = "predScl"

    img, geo, prj, cols, rows = util_rasters.load_geotiff(lulc_list[0],dtype='uint8')

    lulc_derived_path = '/data/phase_iv/maps/kozhikode/post18monsoon/kozhikode_'+type+'_cld_'+str(cloud_threshold)+'_tile_'+tile_no+'.tif'

    util_rasters.write_1band_geotiff(lulc_derived_path, lulc_derived, geo, prj)
    
    if tiles%100==0:
        print(tiles)
        print(lulc_derived_path)

### Creating and cropping mosaic

In [None]:
zfill = 5
qmarks = '?????'[0:zfill]

path_template = '/data/phase_iv/maps/kozhikode/post18monsoon/kozhikode_'+type+'_cld_'+str(cloud_threshold)+'_tile_'+qmarks+'.tif'
path_destination = '/data/phase_iv/maps/kozhikode/final_composites/post18monsoon/kozhikode_'+type+'_cld_'+str(cloud_threshold)+'_complete_post18.tif'
!gdal_merge.py -n 255 -a_nodata 255 -o {path_destination} {path_template}

In [None]:
util_rasters.crop_maps(place_shapefile, [path_destination])

### *Moved to code base* - Checking uniformity between, lulc, score and pred tiffs 

In [None]:
def prep_lulc_derivation_arrays(lulc_paths, score_paths, pred_paths, num_cats):
    assert len(lulc_paths)==len(score_paths)
    assert len(lulc_paths)==len(pred_paths)
    img, geo, prj, cols, rows = util_rasters.load_geotiff(lulc_paths[0],dtype='uint8')
    assert img.ndim==2
    lulcs = np.zeros((len(lulc_paths),)+img.shape, dtype='uint8')
    scores = np.zeros((len(lulc_paths),)+img.shape, dtype='float32')
    preds = np.zeros((len(lulc_paths),num_cats)+img.shape, dtype='float32')
    for i in range(len(lulc_paths)):
        lulc_img, lulc_geo, lulc_prj, lulc_cols, lulc_rows = util_rasters.load_geotiff(lulc_paths[i],dtype='uint8')
        assert img.shape==lulc_img.shape
        assert geo==lulc_geo
        assert prj==lulc_prj
        assert cols==lulc_cols
        assert rows==lulc_rows
        scores_img, scores_geo, scores_prj, scores_cols, scores_rows = util_rasters.load_geotiff(score_paths[0],dtype='float32')
        assert img.shape==scores_img.shape
        assert geo==scores_geo
        assert prj==scores_prj
        assert cols==scores_cols
        assert rows==scores_rows
        preds_img, preds_geo, preds_prj, preds_cols, preds_rows = util_rasters.load_geotiff(pred_paths[i],dtype='float32')
        assert geo==preds_geo
        assert prj==preds_prj
        assert cols==preds_cols
        assert rows==preds_rows
        lulcs[i]=lulc_img
        scores[i]=scores_img
        preds[i]=preds_img
    return lulcs, scores, preds

### *Moved to code base* - Create Prediction-Weighted Composite 
#### (simple sum, includes cloudscore)

In [None]:
def derive_lulc_map_predweighted_simple(lulcs, scores, preds, categories=[0,1,2], threshold=0.3, stretch=False):

    array_shape = lulcs[0].shape
    cats = list(categories)
    cats.append(255)
    votes = np.zeros(((len(cats),)+array_shape), dtype='float32')
    valid_masks = (scores<=threshold)

    if stretch:
        reverse_scores = np.subtract(np.ones(scores.shape, dtype='float32'), np.divide(scores, threshold))
    else:
        reverse_scores = np.subtract(np.ones(scores.shape, dtype='float32'), scores) # 1 - scores

    for i in range(len(cats)):
        c = cats[i]
        cat_masks = (lulcs==c)
        full_masks = (cat_masks & valid_masks)
        if i < 3:
            pred_disag = preds[:,i] 
            votes_stack = np.multiply(full_masks, pred_disag, reverse_scores)
        else:
            pred_disag = 1 
            votes_stack = np.multiply(full_masks, pred_disag, reverse_scores)
        votes[i] = np.sum(votes_stack, axis=0)

    cat_votes = np.sum(votes[:-1], axis=0)
    nodata_mask = (cat_votes==0)
    winner_indices = np.argmax(votes[:-1], axis=0)

    lulc_derived = np.zeros(array_shape, dtype='uint8')

    for i in range(len(cats)):
        mask = (winner_indices==i)
        lulc_derived[mask] = cats[i]
    lulc_derived[nodata_mask]=255
    return lulc_derived

### *Moved to code base* - Create Prediction-Weighted Composite 
#### (brookie method)

In [None]:
def linear_scale(val,in_a,in_b,out_a,out_b,scale=1.0):
    mn,mx=sorted([in_a,in_b])
    val=np.clip(val,mn,mx)
    slope=(out_b-out_a)/(in_b-in_a)
    return scale * ( (val-in_a)*slope + out_a )

In [None]:
def cloud_scale(
        score,
        min_score=0.05,
        max_score=0.9,
        min_value=0.1,
        max_value=1.1,
        scale=1.0):
    return linear_scale(
        val=score,
        in_a=max_score,
        in_b=min_score,
        out_a=min_value,
        out_b=max_value,
        scale=scale)

def pred_scale(
        pred,
        min_pred=0.6,
        max_pred=0.99,
        min_value=0.7,
        max_value=1.0,
        scale=1.0):
    return linear_scale(
        val=pred,
        in_a=min_pred,
        in_b=max_pred,
        out_a=min_value,
        out_b=max_value,
        scale=scale)

In [None]:
def weight(pred,cloud_score):
    return pred_scale(pred)+cloud_scale(cloud_score)

In [None]:
def get_scores(cpc,classes=range(4)):
    scores={}
    for c in classes:
        scores[c]=0
        pred_clouds=cpc[cpc[:,0]==c][:,1:]
        for pc in pred_clouds:
            scores[c]+=weight(*pc)
    return scores

In [None]:
def derive_lulc_map_predweighted_scaled(lulcs, scores, preds, categories=[0,1,2], threshold=0.3, stretch=False):

    array_shape = lulcs[0].shape
    cats = list(categories)
    cats.append(255)
    votes = np.zeros(((len(cats),)+array_shape), dtype='float32')
    valid_masks = (scores<=threshold)

    if stretch:
        reverse_scores = np.subtract(np.ones(scores.shape, dtype='float32'), np.divide(scores, threshold))
    else:
        reverse_scores = np.subtract(np.ones(scores.shape, dtype='float32'), scores) # 1 - scores

    for i in range(len(cats)):
        c = cats[i]
        cat_masks = (lulcs==c)
        full_masks = (cat_masks & valid_masks)
        if i < 3:
            pred_disag = preds[:,i] 
            scaled_weight = weight(pred_disag, scores)
            votes_stack = np.multiply(full_masks, scaled_weight)
        else:
            pred_disag = 1 
            scaled_weight = weight(pred_disag, scores)
            votes_stack = np.multiply(full_masks, scaled_weight)
        votes[i] = np.sum(votes_stack, axis=0)

    cat_votes = np.sum(votes[:-1], axis=0)
    nodata_mask = (cat_votes==0)
    winner_indices = np.argmax(votes[:-1], axis=0)

    lulc_derived = np.zeros(array_shape, dtype='uint8')

    for i in range(len(cats)):
        mask = (winner_indices==i)
        lulc_derived[mask] = cats[i]
    lulc_derived[nodata_mask]=255
    return lulc_derived