# Core: Assign locales to training & validation tranches
Creates pickle file that assigns locales to either the training or the validation tranche, and then stores that information in a permanent file. This is a necessary input for model training. This step only needs to be performed once per city.

Date: 2019-09-19  
Author: Eric Pietraszkiewicz, Peter Kerins  

## Preparation

### Import statements
(may be over-inclusive)

In [None]:
# typical, comprehensive imports
import warnings
warnings.filterwarnings('ignore')
#
import os
import sys
import json
import itertools
import pickle
from pprint import pprint
#
import numpy as np

import pandas as pd
import ogr, gdal

import collections
from pprint import pprint

import descarteslabs as dl
# print dl.places.find('illinois') ## TEST

ULU_REPO = os.environ["ULU_REPO"]
sys.path.append(ULU_REPO+'/utils')
sys.path.append(ULU_REPO)
print(sys.path)

import util_descartes
import util_ml
import util_rasters
import util_vectors
import util_workflow
import util_chips
import util_training
import util_network
import util_scoring

### Set key variables

In [None]:
data_root='/data/phase_iv/'

tile_resolution = 5
tile_size = 256
tile_pad = 32
resolution=tile_resolution  # Lx:15 S2:10

processing_level = None
source = 's2'

s2_bands=['blue','green','red','nir','swir1','swir2','alpha']; s2_suffix='BGRNS1S2A'  # S2, Lx

s1_bands=['vv','vh']; s1_suffix='VVVH'  

resampling='bilinear'
processing = None

label_suffix = 'aue'
label_lot = '0'

In [None]:
place = 'addis-ababa'

#### Create new dataset for training

In [None]:
catalog_path= data_root+'chip_catalog_'+place+'.csv'
df = util_chips.load_catalog(catalog_path)
print(len(df.index))

In [None]:
df.groupby('city').count()

#### Masking without excluding locales

In [None]:
mask = pd.Series(data=np.ones(len(df.index),dtype='uint8'), index=range(len(df)), dtype='uint8')

mask &= (df['city']==place)
print(np.sum(mask))

# filter others according to specifications
mask &= (df['gt_type']==label_suffix)
mask &= (df['gt_lot']==int(label_lot))
mask &= (df['source']==source)
mask &= (df['resolution']==int(resolution))
mask &= (df['resampling']==resampling)
mask &= (df['processing']==str(processing).lower())

print(np.sum(mask))

In [None]:
df = df[mask]
df.reset_index(drop=True,inplace=True)
len(df)

In [None]:
# make place locales if pickle files dont already exist!
place_locales = util_chips.apportion_locales(df)
print (place_locales)

#### Store object(s)

In [None]:
for place, locales in place_locales.items():
    place_locales_filename = data_root+'models/'+'locales'+'_'+place+'.pkl'
    if not os.path.exists(place_locales_filename):
        pickle.dump(place_locales, open(place_locales_filename, 'wb'))
    else:
        print('File already exists with name: '+ place_locales_filename)

---