# Core: Applying Classifier for Scoring
Utilize already-trained model to classify input data, within the new chips & catalog paradigm. Notebook can theoretically be used for any type of model (eg 3-category, 6-category, etc) but any custom category consolidations would require additional code.  

There are two main modes of application for a trained model:  
(1) Apply to arbitrary set of catalog data to generate scores;  
(2) Apply to imagery tiles in order to generate comprehensive LULC maps  

This notebook deals with (1).

Importantly, this notebook permits the sequential application of a model to an arbitrary number of "images" from any number of cities. (Here "images" actually refers to the chips drawn from training images.) This also permits the scoring of a single image.
  
Date: 2019-01-14  
Author: Peter Kerins  

### Import statements
(may be over-inclusive)

In [None]:
# typical, comprehensive imports
import warnings
warnings.filterwarnings('ignore')

import os
import sys
import json
import itertools
import pickle
import collections
from pprint import pprint
import numpy as np

import pandas as pd

import sklearn
from sklearn.preprocessing import StandardScaler 
import ogr, gdal
from tensorflow.keras.models import load_model
import math

import descarteslabs as dl

ULU_REPO = os.environ["ULU_REPO"]
if not ULU_REPO in sys.path:
    sys.path.append(ULU_REPO+'/utils')
    sys.path.append(ULU_REPO)
print(sys.path)

import util_chips
import util_training
import util_scoring
import util_workflow
from catalog_generator import CatalogGenerator

## Preparation

### Set key variables

In [None]:
# core
data_root='/data/phase_iv/'

resolution = 5  # Lx:15 S2:10

# tiling
tile_resolution = resolution
tile_size = 256
tile_pad = 32

# misc
s2_bands=['blue','green','red','nir','swir1','swir2','alpha']; suffix='BGRNS1S2A'  # S2, Lx

# ground truth source: aue, aue+osm, aue+osm2
label_suffix = 'aue'

In [None]:
label_suffix = 'aue'
label_lot = '0'
source = 's2'
resolution = int(tile_resolution)
resampling = 'bilinear'
processing_level = None

In [None]:
# n_cats = 3
# remapping = '3cat'
# categories = [0,1,2,]

# category_label = {0:'Open Space',1:'Non-Residential',\
#                    2:'Residential Atomistic',3:'Residential Informal Subdivision',\
#                    4:'Residential Formal Subdivision',5:'Residential Housing Project',\
#                    6:'Roads',7:'Study Area',8:'Labeled Study Area',254:'No Data',255:'No Label'}

# cats_map = {}
# cats_map[0] = 0
# cats_map[1] = 1
# cats_map[2] = 2
# cats_map[3] = 2
# cats_map[4] = 2
# cats_map[5] = 2

In [None]:
n_cats = 6
remapping = None
categories = [0,1,2,3,4,5]

category_label = {0:'Open Space',1:'Non-Residential',\
                   2:'Residential Atomistic',3:'Residential Informal Subdivision',\
                   4:'Residential Formal Subdivision',5:'Residential Housing Project',\
                   6:'Roads',7:'Study Area',8:'Labeled Study Area',254:'No Data',255:'No Label'}

cats_map = {}
cats_map[0] = 0
cats_map[1] = 1
cats_map[2] = 2
cats_map[3] = 3
cats_map[4] = 4
cats_map[5] = 5

In [None]:
exclude_locales_bad = True
exclude_roads = True
exclude_locales_training = True

### Set input stack and model parameters

In [None]:
window = 17

# bands stuff outdated! needs to be reconciled with catalog filtering
# will ignore for the moment since this is a bigger fix...
# haven't done any examples yet incorporating additional chips beyond s2
# into construction of a training sample
bands_vir=s2_bands[:-1]
bands_sar=None
bands_ndvi=None
bands_ndbi=None
bands_osm=None

batch_size = 128

# Name of the model to use for prediciton

# model_id = '3cat_s-asia_all_exc' # South Asia Model
# model_id = '3cat_s+c-asia_all_exc'  # South & Central Asia Model
# model_id = '3cat_14ct_green_2017_2-img-bl'  # 2 img, all locales
# model_id = '3cat_14ct_green_2017_man_exLocales'  # 2 img, locales excluded 
model_id = '6cat_7city_EastAfrica' 

In [None]:
stack_label, feature_count = util_workflow.build_stack_label(
        bands_vir=bands_vir,
        bands_sar=bands_sar,
        bands_ndvi=bands_ndvi,
        bands_ndbi=bands_ndbi,
        bands_osm=bands_osm,)
print(stack_label, feature_count)

***

## Apply model: score results
Apply model to some set of chips and compare its predictions to the actual LULC values

### Select places and images

In [None]:
place_images = {}

In [None]:
place_images['hindupur']=['U', 'V', 'W', 'X', 'Y', 'Z'],[13]
place_images['singrauli']=['O','P','Q','R','S','T','U'],[38]
place_images['vijayawada']=['H','I'],[68]
place_images['jaipur']=['T','U','W','X','Y','Z'],[27, 72]
place_images['hyderabad']=['P','Q','R','S','T','U'],[10, 44, 46, 60, 79, 55, 60]
place_images['sitapur']=['Q','R','T','U','V'],[2, 27, 43]
place_images['kanpur']=['AH', 'AK', 'AL', 'AM', 'AN'],[6, 19, 57, 67]
place_images['belgaum']=['P','Q','R','S','T'],[13]
place_images['parbhani']=['T','V','W','X','Y','Z'],[10, 42, 54]
place_images['pune']=['P', 'Q', 'T', 'U', 'S'],[9, 54, 73]
place_images['ahmedabad']= ['Z', 'V', 'W', 'X', 'Y', 'AA'],[22, 25, 45, 65, 70]
place_images['malegaon']=  ['V', 'W', 'X', 'Y', 'Z'],[6]
place_images['kolkata'] =  ['M','N','O','P','Q','R'],[16, 90, 105,  195, 218]
place_images['mumbai']=['P','Q','R','S','U','V'],[24, 42, 73, 98, 99, 103, 123, 131, 133, 152, 160, 172]
place_images['coimbatore']=['Q','R','S'],[15, 21, 68, 74]
place_images['jalna']=['AV','AW','AX'],[12, 20, 31, 34, 42, 44, 65, 69, 73]
place_images['kozhikode']=['J','K','L'],[41]

In [None]:
# place_images['dhaka']=['A','B','C'],[72]
# place_images['saidpur']=['A','B','C'],[2, 21, 32, 39, 43, 47, 52]
# place_images['rajshahi']=['A','B','C'],[17]
# place_images['lahore']=['A','B','C'],[33, 70]
# place_images['karachi']=['A','B','C'],[20, 29, 62]
# place_images['sialkot']=['A','B','C'],[32, 53]

In [None]:
# place_images['gorgan']=['A','B','C'],[36, 59, 69]
# place_images['qom']=['A','B','C'],[1]
# place_images['tehran']=['A','B','C'],[28, 56, 76]
# place_images['shymkent']=['A','B','C'],[62]
# place_images['pokhara']=['A','B','C'],[25, 28, 31, 49, 51]
# place_images['bukhara']=['A','B','C'],[61]
# place_images['tashkent']=['A','B','C'],[42]

In [None]:
# place_images['culiacan']=['A', 'B'],[1, 9, 29, 51, 61, 66, 71]
# place_images['guadalajara']=['A', 'B'],[22, 47, 65]
# place_images['leon']=['A', 'B'],[3, 16, 27, 36, 38, 46, 67]
# place_images['mexico-city']=['A', 'B'],[0, 13, 57, 75, 112, 183, 198]
# place_images['reynosa']=['A', 'B'],[25, 31, 40, 52, 61]
# place_images['tijuana']=['A', 'B'],[9, 45, 49, 53]
# place_images['merida']=['A', 'B'],[25, 55, 57]
# place_images['monterrey']=['A', 'B'],[1]
# place_images['tuxtla']=['A', 'B'],[3, 20, 58, 64]

### Load model

In [None]:
category_weights_filename =data_root+'models/'+model_id+'_category_weights.pkl'
category_weights = pickle.load( open( category_weights_filename, "rb" ) )
weights = list(zip(*category_weights.items()))[1]
print(weights)

# Load the model
network_filename = data_root+'models/'+model_id+'.hd5'
network = load_model(network_filename, custom_objects={'loss': util_training.make_loss_function_wcc(weights)})
network.summary()

### Load chips

In [None]:
df = util_chips.load_catalog()
print(len(df.index))

new_places = [
    'dhaka',
    'saidpur',
    'rajshahi',
    'lahore',
    'karachi',
    'sialkot',
    'coimbatore',
    'jalna',
    'kozhikode',
    'bukhara',
    'gorgan',
    'pokhara',
    'qom',
    'shymkent',
    'tashkent',
    'tehran',
    'culiacan',
    'guadalajara',
    'leon',
    'reynosa',
    'tijuana',
    'merida',
    'monterrey',
    'tuxtla',
]

included_places = list(set(new_places) & set(place_images.keys()))

for place in included_places:
    print(place)
    place_catalog_path = data_root+'chip_catalog_'+place+'.csv'
    print(place_catalog_path)

    df_place = pd.read_csv(place_catalog_path)
    print('no of chips:', len(df_place))

    df = df.append(df_place, ignore_index=True)

df.reset_index(drop=True,inplace=True)
print(len(df.index))

### Load training/validation locale assignments

In [None]:
combined_place_locales = {}
for place in place_images:
    place_locales_filename = data_root+'models/'+'locales_'+place+'.pkl'
    with open(place_locales_filename, "rb") as f:
        place_locales = pickle.load(f,encoding='latin1')
#         print(place_locales)
    combined_place_locales.update(place_locales)
pprint(combined_place_locales)

### Sequentially apply model to every included "image" (ie set of chips)

In [None]:
# Loop through each imagery from above and score 
for place,image_list in place_images.items():    
    
#     for image in ### Select places and imagesimage_list:  #includes all locales
    for image in image_list[0]:  #filters bad locales

        notes = 'application of ' + model_id + ' to 2017 green imagery from ' + place + '(' + image + ')'
        print(notes)
        
        mask = pd.Series(data=np.zeros(len(df.index),dtype='uint8'), index=range(len(df)), dtype='uint8')
        
        mask |= (df['city']==place) & (df['image']==image)
        
        if exclude_locales_training:
            mask &= (df['locale'].isin(combined_place_locales[place][1]))
        
        if exclude_roads:
            mask &= (df['lulc']!=6)

        mask &= (df['gt_type']==label_suffix)
        mask &= (df['gt_lot']==int(label_lot))
        mask &= (df['source']==source)
        mask &= (df['resolution']==int(resolution))
        mask &= (df['resampling']==resampling)
        mask &= (df['processing']==str(processing_level).lower())
        
        if exclude_locales_bad:
            mask &= (~df['locale'].isin(place_images[place][1]))

        print(np.sum(mask))
        
        df_sub = df[mask]
        df_sub.reset_index(drop=True,inplace=True)
        print(len(df_sub))
        
        generator = CatalogGenerator(df_sub,remapping=remapping,look_window=window,batch_size=batch_size,one_hot=n_cats)
        
        generator.reset()
        #predict_generator(generator, steps=None, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0)
        predictions = network.predict_generator(generator, steps=generator.steps, verbose=1,
                          use_multiprocessing=False, max_queue_size=40, workers=64,)
        print(predictions.shape)
        
        Yhat = predictions.argmax(axis=-1)
        print(Yhat.shape)
        
        Y = generator.get_label_series().values
        print(Y.shape)
        
        print("evaluate validation")
        
        confusion = util_scoring.calc_confusion(Yhat,Y,categories)
        recalls, precisions, accuracy = util_scoring.calc_confusion_details(confusion)

        # Calculate f-score
        beta = 2
        f_scores = (beta**2 + 1) * precisions * recalls / ( (beta**2 * precisions) + recalls )
        f_score_average = np.mean(f_scores)
        
        # expanding lists to match expected model_record stuff
        recalls_expanded = [None,None,None,None,None,None,None,]
        precisions_expanded = [None,None,None,None,None,None,None,]
        f_scores_expanded = [None,None,None,None,None,None,None,]
        for r in range(0,len(recalls)):
            recalls_expanded[r] = recalls[r]
            precisions_expanded[r] = precisions[r]
            f_scores_expanded[r] = f_scores[r]
        util_scoring.record_model_application(
                model_id, notes, place + '(' + image + ')', label_suffix, resolution, stack_label, feature_count, 
                generator.look_window, cats_map, 
                confusion, recalls_expanded, precisions_expanded, accuracy,
                f_scores_expanded, f_score_average)

-------------------------