# Development: Applying 3-Category Classifier with New Architecture
Utilize already-trained model to classify input data, within the new chips & catalog paradigm.  

Two main modes of application:  
(1) Apply to arbitrary set of catalog data to generate scores;  
(2) Apply to imagery tiles in order to generate comprehensive LULC maps  

Both of these modes will be captured in a single notebook, with the intention that each can be executed independently.  
For starters, notebook will cover (2).
  
Date: 2019-02-11  
Author: Peter Kerins  

### Import statements
(may be over-inclusive)

In [5]:
from importlib import reload

# typical, comprehensive imports
import warnings
warnings.filterwarnings('ignore')
#
import os
import sys
import json
import itertools
import pickle
from pprint import pprint
#
import numpy as np
import shapely
import cartopy
import geojson
import fiona
import gdal
import h5py
get_ipython().magic(u'matplotlib inline')
import matplotlib.pyplot as plt
import ogr, gdal
import pandas as pd

from tensorflow.keras.models import load_model
import math
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.layers import Dropout
from tensorflow.keras.utils import to_categorical

import tensorflow as tf

import tensorflow.keras as keras
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Input, Add, Lambda
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, History

import pandas as pd

import descarteslabs as dl

ULU_REPO = os.environ["ULU_REPO"]
sys.path.append(ULU_REPO)
print (sys.path)

import utils.util_rasters
import utils.util_vectors
import utils.util_training
from utils.image_generator import ImageGenerator
import utils.util_imagery
import utils.util_workflow
import utils.util_chips
from utils.catalog_generator import CatalogGenerator
import utils.util_scoring
import utils.util_network

from importlib import reload

['/home/Taufiq.Rashid/anaconda3/envs/geoml/lib/python36.zip', '/home/Taufiq.Rashid/anaconda3/envs/geoml/lib/python3.6', '/home/Taufiq.Rashid/anaconda3/envs/geoml/lib/python3.6/lib-dynload', '', '/home/Taufiq.Rashid/anaconda3/envs/geoml/lib/python3.6/site-packages', '/home/Taufiq.Rashid/anaconda3/envs/geoml/lib/python3.6/site-packages/IPython/extensions', '/home/Taufiq.Rashid/.ipython', '/home/Taufiq.Rashid/UrbanLandUse', '/home/Taufiq.Rashid/UrbanLandUse', '/home/Taufiq.Rashid/UrbanLandUse', '/home/Taufiq.Rashid/UrbanLandUse', '/home/Taufiq.Rashid/UrbanLandUse']


## Preparation

### Set key variables

In [6]:
# core
data_root='/data/phase_iv/'

resolution = 5  # Lx:15 S2:10

# tiling
tile_resolution = resolution
tile_size = 256
tile_pad = 32


# misc
s2_bands=['blue','green','red','nir','swir1','swir2','alpha']; suffix='BGRNS1S2A'  # S2, Lx

# ground truth source: aue, aue+osm, aue+osm2
label_suffix = 'aue'

In [7]:
category_label = {0:'Open Space',1:'Non-Residential',\
                   2:'Residential Atomistic',3:'Residential Informal Subdivision',\
                   4:'Residential Formal Subdivision',5:'Residential Housing Project',\
                   6:'Roads',7:'Study Area',8:'Labeled Study Area',254:'No Data',255:'No Label'}

cats_map = {}
cats_map[0] = 0
cats_map[1] = 1
cats_map[2] = 2
cats_map[3] = 2
cats_map[4] = 2
cats_map[5] = 2

### Set input stack and model parameters

In [15]:
window = 17

# bands stuff outdated! needs to be reconciled with catalog filtering
# will ignore for the moment since this is a bigger fix...
# haven't done any examples yet incorporating additional chips beyond s2
# into construction of a training sample
bands_vir=s2_bands[:-1]
bands_sar=None
bands_ndvi=None
bands_ndbi=None
bands_osm=None

# this can get updated when cloudmasking is added
haze_removal = False

batch_size = 128
balancing = None

# move as appropriate

model_id = '3cat_14ct_green_2017_2-img-bl'
unflatten_input = True # is the model a cnn?
n_cats = 3 # number of categories

water_overwrite = False
water_mask = False

In [9]:
stack_label, feature_count = utils.util_workflow.build_stack_label(
        bands_vir=bands_vir,
        bands_sar=bands_sar,
        bands_ndvi=bands_ndvi,
        bands_ndbi=bands_ndbi,
        bands_osm=bands_osm,)
print(stack_label, feature_count)

vir 6


***

## Apply model: score results
Apply model to some set of chips and compare its predictions to the actual LULC values

In [24]:
place_images = {}
place_images['hindupur']=['U', 'V', 'W', 'X', 'Y', 'Z']
# place_images['singrauli']=['O','P','Q','R','S','T','U']
# place_images['vijayawada']=['H','I']
# place_images['jaipur']=['T','U','W','X','Y','Z']
# place_images['hyderabad']=['P','Q','R','S','T','U']
# place_images['sitapur']=['Q','R','T','U','V']
# place_images['kanpur']=['AH', 'AK', 'AL', 'AM', 'AN']
# place_images['belgaum']=['P','Q','R','S','T']
# place_images['parbhani']=['T','V','W','X','Y','Z']
# place_images['pune']=['P', 'Q', 'T', 'U', 'S']
# place_images['ahmedabad']= ['Z', 'V', 'W', 'X', 'Y', 'AA']
# place_images['malegaon']=  ['V', 'W', 'X', 'Y', 'Z']
# place_images['kolkata'] =  ['M','N','O','P','Q','R']
# place_images['mumbai']=['P','Q','R','S','U','V']

    
for place,image_list in place_images.items():

    network_filename = data_root+'models/'+model_id+'.hd5'
    network = load_model(network_filename, custom_objects={'loss': 'categorical_crossentropy'})
    network.summary()
    
    # compile the model if needed
#     utils.util_network.compile_network(network, loss, LR=0.0001)
    
    for image in image_list:

        label_suffix = 'aue'
        label_lot = '0'
        source = 's2'
        resolution = int(tile_resolution)
        resampling = 'bilinear'
        processing_level = None

        look_window = 17
        batch_size = 128

#         notes = 'application of ' + model_id + ' to 2017 green imagery from ' + place + '(' + image + ')'
        notes = 'testing py 3 migration'
        print(notes)
        
        # Set your input validation dataset file here
        input_filename = data_root+'models/'+'multi_city_2img-valid.csv'

        # Read the validation dataset to a Pandas Dataframe
        df = pd.read_csv(input_filename, encoding='utf8')
        print(len(df))
        
        mask = pd.Series(data=np.zeros(len(df.index),dtype='uint8'), index=range(len(df)), dtype='uint8')

        mask |= (df['city']==place) & (df['image']==image)

        # straight away remove road samples
        mask &= (df['lulc']!=6)

        # filter others according to specifications
        mask &= (df['gt_type']==label_suffix)
        mask &= (df['gt_lot']==int(label_lot))
        mask &= (df['source']==source)
        mask &= (df['resolution']==int(resolution))
        mask &= (df['resampling']==resampling)
        mask &= (df['processing']==str(processing_level).lower())

        print(np.sum(mask))
        

        # applying the mask to exclude all unnecessary samples
        df = df[mask]
        df.reset_index(drop=True,inplace=True)
        print(len(df))
        
        generator = CatalogGenerator(df,remapping='3cat',look_window=window,batch_size=batch_size,one_hot=3)
        
        generator.reset()

        #predict_generator(generator, steps=None, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0)
        predictions = network.predict_generator(generator, steps=generator.steps, verbose=1,
                          use_multiprocessing=True, max_queue_size=40, workers=64,)

        print(predictions.shape)
        
        Yhat = predictions.argmax(axis=-1)
        print(Yhat.shape)
        
        Y = generator.get_label_series().values
        print(Y.shape)
        
        print("evaluate validation")
        # hardcoded categories
        categories=[0,1,2]
        confusion = utils.util_scoring.calc_confusion(Yhat,Y,categories)
        recalls, precisions, accuracy = utils.util_scoring.calc_confusion_details(confusion)

        # Calculate f-score
        beta = 2
        f_score = (beta**2 + 1) * precisions * recalls / ( (beta**2 * precisions) + recalls )
        f_score_open = f_score[0] 
        f_score_nonres = f_score[1]  
        f_score_res = f_score[2]  
        f_score_roads = None#f_score[3]  
        f_score_average = np.mean(f_score)
        
        # expanding lists to match expected model_record stuff
        recalls_expanded = [recalls[0],recalls[1],recalls[2],None]
        precisions_expanded = [precisions[0],precisions[1],precisions[2],None]
        
        utils.util_scoring.record_model_application(
            model_id, notes, place + '(' + image + ')', label_suffix, resolution, stack_label, feature_count, 
            generator.look_window,cats_map, 
            confusion, recalls[0], recalls[1], recalls[2], recalls[3], precisions[0], precisions[1], precisions[2], precisions[3], accuracy, 
            f_score_open, f_score_nonres, f_score_res, f_score_roads, f_score_average)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 17, 17, 6)    0                                            
__________________________________________________________________________________________________
conv2d_4 (Conv2D)               (None, 17, 17, 32)   1760        input_2[0][0]                    
__________________________________________________________________________________________________
activation_7 (Activation)       (None, 17, 17, 32)   0           conv2d_4[0][0]                   
__________________________________________________________________________________________________
conv2d_5 (Conv2D)               (None, 17, 17, 32)   9248        activation_7[0][0]               
__________________________________________________________________________________________________
activation

AssertionError: 