In [16]:
import pandas as pd
import numpy as np
from random import shuffle
from osgeo import ogr, osr
from sentinelhub import WmsRequest, WcsRequest, MimeType, CRS, BBox, constants
import logging
from collections import Counter
import datetime
import os
import yaml
from sentinelhub import DataSource
import scipy.sparse as sparse
import scipy
from scipy.sparse.linalg import splu
from skimage.transform import resize
from sentinelhub import CustomUrlParam
from time import time as timer
from time import sleep as sleep
import multiprocessing
import math
import reverse_geocoder as rg
import pycountry
import pycountry_convert as pc
import hickle as hkl
from shapely.geometry import Point, Polygon
import geopandas
from tqdm import tnrange, tqdm_notebook
import math
import boto3
from pyproj import Proj, transform
from timeit import default_timer as timer

In [17]:
with open("../config.yaml", 'r') as stream:
    key = (yaml.safe_load(stream))
    API_KEY = key['key']
    AWSKEY = key['awskey']
    AWSSECRET = key['awssecret']

In [67]:
%run ../src/utils/slope.py
%run ../src/utils/utils.py
%run ../src/utils/whittaker_smoother.py
%run ../src/utils/download_utils.py
%run ../src/dsen2/utils/DSen2Net.py

In [107]:
dates = ('2018-12-15', '2020-01-15')
SIZE = 9*5
IMSIZE = (SIZE * 14)+2

In [108]:
landscapes = {
    'ethiopia-tigray': (13.540810, 38.177220),
    'kenya-makueni-2': (-1.817109, 37.44563),
    'ghana': (9.259359, -0.83375),
    'niger-koure': (13.18158, 2.478),
    'cameroon-farnorth': (10.596, 14.2722),
    'mexico-campeche': (18.232495, -92.1234215),
    'malawi-rumphi': (-11.044, 33.818),
    'ghana-sisala-east': (10.385, -1.765),
    'ghana-west-mamprusi': (10.390084, -0.846330),
    'ghana-kwahu': (6.518909, -0.826008),
    'senegal-16b': (15.82585, -15.34166),
    'india-kochi': (9.909, 76.254),
    'india-sidhi': (24.0705, 81.607),
    'brazil-esperito-santo': (-20.147, -40.837),
    'brazil-paraiba': (-22.559943, -44.186629),
    'brazil-goias': (-14.905595, -48.907399),
    'colombia-talima': (4.179529, -74.889171),
    'drc-kafubu': (-11.749636, 27.586622),
    'thailand-khon-kaen': (15.709725, 102.546518),
    'indonesia-west-java': (-6.721101, 108.280949),
    'madagascar': (-18.960152, 47.469587),
    'tanzania': (-6.272258, 36.679824),
    'chile': (-36.431237, -71.872030),
    'indonesia-jakarta': (-6.352580, 106.677072),
    'caf-baboua': (5.765917, 14.791618),   
    'honduras': (14.096664, -88.720304),
    'nicaragua': (12.398014, -86.963042),
    'china': (26.673679, 107.464231),
    'australia-west': (-32.666762, 117.411197),
    'mexico-sonora': (29.244288, -111.243230),
    'south-africa': (-30.981698, 28.727301),
    'maldonado-uraguay': (-34.629250, -55.004331),
    'dominican-rep-la-salvia': (18.872589, -70.462961),
    'guatemala-coban': (15.3, -90.8),
    'senegal-tucker-a': (15.350595, -15.459789),
    'elsalvador-imposible': (13.727334, -90.015579)
}

landscape = 'senegal-tucker-a'

#coords = (7.702058, -0.709011) # brong ahafo, bono east
#coords = (7.398111, -1.269223) # cocoa
#coords = (16.032170, -90.144511) # Guatemala
#coords = (13.757749, -90.004949) # elsalvador imposible
#coords = (13.933745, -84.690842) # Bonanza, Nicaragua

OUTPUT_FOLDER = '../tile_data/{}/'.format(landscape)
coords = landscapes[landscape]
coords = (coords[1], coords[0])
print(OUTPUT_FOLDER, coords)

../tile_data/senegal-tucker-a/ (-15.459789, 15.350595)


In [109]:
landscape_df = pd.DataFrame({'landscape': [x for x in landscapes.keys()], 
                             'latitude': [x[0] for x in landscapes.values()],
                             'longitude': [x[1] for x in landscapes.values()]
})

landscape_df.to_csv("../data/latlongs/landscapes.csv", index=False)
print(len(landscape_df))

36


# Helper functions (to be moved to a utils file)

In [110]:
GRID_SIZE_X = 1
GRID_SIZE_Y = 1

IMAGE_X = 14*GRID_SIZE_X
IMAGE_Y = 14*GRID_SIZE_Y

TEST_X = 5
TEST_Y = 5

In [111]:
# These arrays are for smoothly overlapping the cloud and shadow interpolation
c_arr = np.array([[1, 1, 1, 1, 1,],
                  [1, 2, 2, 2, 1,],
                  [1, 2, 3, 2, 1,],
                  [1, 2, 2, 2, 1,],
                  [1, 1, 1, 1, 1,],])
                  
c_arr = c_arr / 3
o_arr = 1 - c_arr
c_arr = np.tile(c_arr[:, :, np.newaxis], (1, 1, 10))
o_arr = np.tile(o_arr[:, :, np.newaxis], (1, 1, 10))

In [112]:
def calculate_bbx(coord, step_x, step_y, expansion, multiplier = 1.):
    ''' Calculates the four corners of a bounding box of step_x * step_y offset from coord'''
    coord_bl = np.copy(coord)
    coord1 = offset_x(coord_bl, 6300*step_x - expansion)
    coord1 = offset_y(coord1 , 6300*step_y - expansion)
    
    coord_tr = np.copy(coord)
    coord2 = offset_x(coord_tr, 6300*(step_x + multiplier) + expansion)
    coord2 = offset_y(coord2, 6300*(step_y + multiplier) + expansion)
    bbx = (coord2, coord1)
    return bbx


def calculate_bbx_pyproj(coord, step_x, step_y, expansion, multiplier = 1.):
    ''' Calculates the four corners of a bounding box as above
        but uses pyproj instead of OGR. It seems sentinelhub uses
        pyproj, so this may be more pixel accurate (?)
        x, y format
    '''
    
    inproj = Proj('epsg:4326')
    outproj_code = calculate_epsg(coord)
    outproj = Proj('epsg:' + str(outproj_code))
    
    
    
    coord_utm =  transform(inproj, outproj, coord[1], coord[0])
    coord_utm_bottom_left = (coord_utm[0] + step_x*6300 - expansion,
                             coord_utm[1] + step_y*6300 - expansion)
    
    coord_utm_top_right = (coord_utm[0] + (step_x+multiplier) * 6300 + expansion,
                           coord_utm[1] + (step_y+multiplier) * 6300 + expansion)

    
    zone = str(outproj_code)[3:]
    direction = 'N' if coord[1] >= 0 else 'S'
    utm_epsg = "UTM_" + zone + direction
    return (coord_utm_bottom_left, coord_utm_top_right), CRS[utm_epsg]

def pts_in_geojson(lats, longs, geojson):  
    polys = geopandas.read_file(geojson)['geometry']
    polys = geopandas.GeoSeries(polys)
    pnts = [Point(x, y) for x, y in zip(list(lats), list(longs))]
    
    def _contains(pt):
        return polys.contains(pt)[0]

    if any([_contains(pt) for pt in pnts]):
        return True
    else: return False

# Data download

In [158]:
def identify_clouds_new(bbox, epsg, dates = dates):

    #for try_ in range(0, 5):
        #try:
    box = BBox(bbox, crs = epsg)
    cloud_request = WmsRequest(
        layer='CLOUD_NEW',
        bbox=box,
        time=dates,
        width=(5*9*14)+2,
        height=(5*9*14)+2,
        image_format = MimeType.TIFF_d8,
        maxcc=0.7,
        instance_id=API_KEY,
        custom_url_params = {constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
        time_difference=datetime.timedelta(hours=72),
    )


    shadow_request = WmsRequest(
        layer='SHADOW',
        bbox=box,
        time=dates,
        width=(5*9*14)+2,
        height=(5*9*14)+2,
        image_format =  MimeType.TIFF_d16,
        maxcc=0.7,
        instance_id=API_KEY,
        custom_url_params = {constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
        time_difference=datetime.timedelta(hours=72))

    cloud_img = cloud_request.get_data()
    cloud_img = np.array(cloud_img)
    print(f"The max clouds is {np.max(cloud_img)}")
    if np.max(cloud_img > 10):
        cloud_img = cloud_img / 255
        
    assert np.max(cloud_img) <= 1.
    print("Cloud_probs shape: {}".format(cloud_img.shape))
    
    n_cloud_px = np.array([len(np.argwhere(cloud_img[x, :, :].reshape((632)*(632)) > 0.33))
                           for x in range(cloud_img.shape[0])])
    cloud_steps = np.argwhere(n_cloud_px > 632**2 / 5)
    clean_steps = [x for x in range(cloud_img.shape[0]) if x not in cloud_steps]
    print(f"Removing {len(cloud_steps)} from S2 download, saving {7.32 * len(cloud_steps)} PU")

    shadow_img = shadow_request.get_data(data_filter = clean_steps)
    shadow_img = np.array(shadow_img)
    print("Shadows_shape: {}".format(shadow_img.shape))
    print(f"The max shadows is {np.max(shadow_img)}")
    if np.max(shadow_img > 10):
        shadow_img = shadow_img / 65535
    print(np.max(shadow_img))
 
    cloud_img = np.delete(cloud_img, cloud_steps, 0)
    shadows = mcm_shadow_mask(np.array(shadow_img), cloud_img)

    print(f"Cloud probs: {cloud_img.shape}")
    print(f"Shadow shape {shadows.shape}")
    return cloud_img, cloud_img, shadows, clean_steps

    
    
def download_dem(bbox, epsg):

    box = BBox(bbox, crs = epsg)
    dem_s = (630)+4
    dem_request = WmsRequest(data_source=DataSource.DEM,
                         layer='DEM',
                         bbox=box,
                         width=dem_s,
                         height=dem_s,
                         instance_id=API_KEY,
                         image_format=MimeType.TIFF_d32f,
                         custom_url_params={CustomUrlParam.SHOWLOGO: False})
    dem_image = dem_request.get_data()[0]
    dem_image = calcSlope(dem_image.reshape((1, dem_s, dem_s)),
                  np.full((dem_s, dem_s), 10), np.full((dem_s, dem_s), 10), zScale = 1, minSlope = 0.02)
    dem_image = dem_image.reshape((dem_s,dem_s, 1))
    dem_image = dem_image[1:dem_s-1, 1:dem_s-1, :]
    return dem_image #/ np.max(dem_image)
 

def download_layer(bbox,  clean_steps, epsg, dates = dates, year = 2019):
    """ Downloads the L2A sentinel layer with 10 and 20 meter bands
        
        Parameters:
         bbox (list): output of calc_bbox
         epsg (float): EPSG associated with bbox 
         time (tuple): YY-MM-DD - YY-MM-DD bounds for downloading 
    
        Returns:
         img (arr):
         img_request (obj): 
    """
    box = BBox(bbox, crs = epsg)
    image_request = WcsRequest(
            layer='L2A20',
            bbox=box,
            time=dates,
            image_format = MimeType.TIFF_d16,
            maxcc=0.7,
            resx='20m', resy='20m',
            instance_id=API_KEY,
            custom_url_params = {constants.CustomUrlParam.DOWNSAMPLING: 'NEAREST',
                                constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
            time_difference=datetime.timedelta(hours=72),
        )
    print("Downloading L2A 20m layer")
    img_bands = image_request.get_data(data_filter = clean_steps)
    img_20 = np.stack(img_bands)
    print(f"The max 20m is {np.max(img_20)}")
    if np.max(img_20) >= 10:
        img_20 = img_20 / 65535
    assert np.max(img_20) <= 2.
    
    s2_20_usage = (img_20.shape[1]*img_20.shape[2])/(512*512) * (6/3) * img_20.shape[0]
    print("Original 20 meter bands size: {}, using {} PU".format(img_20.shape, s2_20_usage))
    img_20 = resize(img_20, (img_20.shape[0], 632, 632, img_20.shape[-1]), order = 0)

    image_request = WcsRequest(
            layer='L2A10',
            bbox=box,
            time=dates,
            image_format = MimeType.TIFF_d16,
            maxcc=0.7,
            resx='10m', resy='10m',
            instance_id=API_KEY,
            custom_url_params = {constants.CustomUrlParam.DOWNSAMPLING: 'BICUBIC',
                                constants.CustomUrlParam.UPSAMPLING: 'BICUBIC'},
            time_difference=datetime.timedelta(hours=72),
    )
    print("Downloading L2A 10m layer")
    
    img_bands = image_request.get_data(data_filter = clean_steps)
    img_10 = np.stack(img_bands)
    print(f"The max 10m is {np.max(img_10)}")
    if np.max(img_10) >= 10:
        img_10 = img_10 / 65535
    assert np.max(img_10) <= 2.
    
    s2_10_usage = (img_10.shape[1]*img_10.shape[2])/(512*512) * (4/3) * img_10.shape[0]
    print("Original 20 meter bands size: {}, using {} PU".format(img_10.shape, s2_10_usage))
    img_10 = resize(img_10, (img_10.shape[0], 632, 632, img_10.shape[-1]), order = 0)

    img = np.concatenate([img_10, img_20], axis = -1)
    
    
    print(f"Sentinel 2 used {s2_20_usage + s2_10_usage} PU")

    image_dates = []
    for date in image_request.get_dates():
        if date.year == year - 1:
            image_dates.append(-365 + starting_days[(date.month-1)] + date.day)
        if date.year == year:
            image_dates.append(starting_days[(date.month-1)] + date.day)
        if date.year == year + 1:
            image_dates.append(365 + starting_days[(date.month-1)]+date.day)
    image_dates = [val for idx, val in enumerate(image_dates) if idx in clean_steps]
    image_dates = np.array(image_dates)

    return img, image_dates
        
        
def download_sentinel_1(bbox, epsg, imsize = 632, 
                        dates = dates, layer = "SENT", year = 2019):
    #for try_ in range(5):
        #try:
    box = BBox(bbox, crs = epsg)
    image_request = WcsRequest(
            layer=layer,
            bbox=box,
            time=dates,
            image_format = MimeType.TIFF_d16,
            data_source=DataSource.SENTINEL1_IW,
            maxcc=1.0,
            resx='10m', resy='5m',
            instance_id=API_KEY,
            custom_url_params = {constants.CustomUrlParam.DOWNSAMPLING: 'NEAREST',
                                constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
            time_difference=datetime.timedelta(hours=96),
        )
    data_filter = None
    if len(image_request.download_list) > 50:
        data_filter = [x for x in range(len(image_request.download_list)) if x % 2 == 0]
    img_bands = image_request.get_data(data_filter = data_filter)
    s1 = np.stack(img_bands)
    if np.max(s1) >= 1000:
            s1 = s1 / 65535.
    
    print(f"The max s1 is {np.max(s1)}")
    print(f"Sentinel 1 used {(2/3)*s1.shape[0] * (s1.shape[1]*s1.shape[2])/(512*512)} PU for \
          {s1.shape[0]} out of {len(image_request.download_list)} images")
    s1 = resize(s1, (s1.shape[0], imsize*2, imsize*2, s1.shape[-1]), order = 0)
    s1 = np.reshape(s1, (s1.shape[0], s1.shape[1]//2, 2, s1.shape[2] // 2, 2, s1.shape[-1]))
    s1 = np.mean(s1, (2, 4))
    
   

    image_dates = []
    for date in image_request.get_dates():
        if date.year == year - 1:
            image_dates.append(-365 + starting_days[(date.month-1)] + date.day)
        if date.year == year:
            image_dates.append(starting_days[(date.month-1)] + date.day)
        if date.year == year + 1:
            image_dates.append(365 + starting_days[(date.month-1)]+date.day)
    image_dates = np.array(image_dates)
    s1c = np.copy(s1)
    s1c[np.where(s1c < 1.)] = 0
    n_pix_oob = np.sum(s1c, axis = (1, 2, 3))
    to_remove = np.argwhere(n_pix_oob > (imsize*2*imsize*2)/50)
    s1 = np.delete(s1, to_remove, 0)
    image_dates = np.delete(image_dates, to_remove)
    return s1, image_dates

        #except Exception as e:
            #logging.fatal(e, exc_info=True)
            #sleep((try_+1)*30)

def identify_s1_layer(coords):
    results = rg.search(coords)
    country = results[-1]['cc']
    continent_name = pc.country_alpha2_to_continent_code(country)
    if continent_name in ['AF', 'OC']:
        layer = "SENT"
    if continent_name in ['SA']:
        if coords[0] > -7.11:
            layer = "SENT"
        else:
            layer = "SENT_DESC"
    if continent_name in ['AS']:
        if coords[0] > 23.3:
            layer = "SENT"
        else:
            layer = "SENT_DESC"
    if continent_name in ['NA']:
        layer = "SENT_DESC"
    print(continent_name)
    print(layer)
    return layer

# Cloud and shadow removal

In [159]:
def remove_cloud_and_shadows(tiles, c_probs, shadows, image_dates):
    """ Interpolates clouds and shadows for each time step with 
        linear combination of proximal clean time steps for each
        region of specified window size
        
        Parameters:
         tiles (arr):
         probs (arr): 
         shadows (arr):
         image_dates (list):
         wsize (int): 
    
        Returns:
         tiles (arr): 
    """
    wsize = 5
    c_probs = c_probs - np.min(c_probs, axis = 0)
    c_probs[np.where(c_probs > 0.33)] = 1.
    c_probs[np.where(c_probs < 0.33)] = 0.
    c_probs = np.reshape(c_probs, (c_probs.shape[0], 632//8, 8, 632//8, 8))
    c_probs = np.sum(c_probs, (2, 4))
    c_probs = resize(c_probs, (c_probs.shape[0], 632, 632), 0)
    c_probs[np.where(c_probs < 16)] = 0
    c_probs[np.where(c_probs >= 16)] = 1
    secondary_c_probs = np.copy(c_probs)
    c_probs += shadows
    c_probs[np.where(c_probs >= 1.)] = 1.
    number_interpolated = 0
    for cval in tnrange(0, IMSIZE - 4, 2):
        for rval in range(0, IMSIZE - 4, 2):
            subs = c_probs[:, cval:cval + wsize, rval:rval+wsize]
            sums = np.sum(subs, axis = (1, 2))
            satisfactory = [x for x in range(c_probs.shape[0]) if sums[x] < 8]
            if len(satisfactory) == 0:
                satisfactory = [x for x in range(c_probs.shape[0])]
            satisfactory = np.array(satisfactory)
            for date in range(0, tiles.shape[0]):
                if np.sum(subs[date, :, :]) > 8:
                    number_interpolated += 1
                    before, after = calculate_proximal_steps(date, satisfactory)
                    before = date + before
                    after = date + after
                    bef = tiles[before, cval:cval+wsize, rval:rval+wsize, : ]
                    aft = tiles[after, cval:cval+wsize, rval:rval+wsize, : ]
                    before = image_dates[before]
                    after = image_dates[after]
                    before_diff = abs(image_dates[date] - before)
                    after_diff = abs(image_dates[date] - after)
                    bef_wt = 1 - before_diff / (before_diff + after_diff)
                    aft_wt = 1 - bef_wt
                    candidate = bef_wt*bef + aft_wt*aft
                    candidate = candidate*c_arr + tiles[date, cval:cval+wsize, rval:rval+wsize, : ]*o_arr
                    tiles[date, cval:cval+wsize, rval:rval+wsize, : ] = candidate 
    print("A total of {} pixels were interpolated".format(number_interpolated))
    return tiles, c_probs, secondary_c_probs

def remove_missed_clouds(img):
    iqr = np.percentile(img[:, :, :, 3].flatten(), 75) - np.percentile(img[:, :, :, 3].flatten(), 25)
    thresh_t = np.percentile(img[:, :, :, 3].flatten(), 75) + iqr*2
    thresh_b = np.percentile(img[:, :, :, 3].flatten(), 25) - iqr*2
    diffs_fw = np.diff(img, 1, axis = 0)
    diffs_fw = np.mean(diffs_fw, axis = (1, 2, 3))
    diffs_fw = np.array([0] + list(diffs_fw))
    diffs_bw = np.diff(np.flip(img, 0), 1, axis = 0)
    diffs_bw = np.flip(np.mean(diffs_bw, axis = (1, 2, 3)))
    diffs_bw = np.array(list(diffs_bw) + [0])
    diffs = abs(diffs_fw - diffs_bw) * 100
    #diffs = [int(x) for x in diffs]
    outlier_percs = []
    for step in range(img.shape[0]):
        bottom = len(np.argwhere(img[step, :, :, 3].flatten() > thresh_t))
        top = len(np.argwhere(img[step, :, :, 3].flatten() < thresh_b))
        p = 100* ((bottom + top) / (IMSIZE*IMSIZE))
        outlier_percs.append(p)
    to_remove = np.argwhere(np.array(outlier_percs) > 20)
    return to_remove

# Data interpolation

Because the `smooth` function is called 5.6 million times on each 4000 hectare array, most of the computations are done outside of the function with the predefined `lmbd` and `d`, and then passed in as the `coefmat`, to avoid needless recomputation. This saves 141 CPU minutes per 4000 hectare array.

In [160]:
coefmat = intialize_smoother()
test = np.ones((72, 10000))
s = timer()
x = parallel_apply_along_axis(smooth, 0, test)
e = timer()
print(e - s)

0.14889642199705122


In [161]:
test = np.ones((72, 10000))
s = timer()
x = np.apply_along_axis(smooth, 0, test)
e = timer()
print(e - s)

0.17638493900449248


# Tiling and coordinate selection functions

In [162]:
MDL_PATH = "../src/dsen2/models/"

input_shape = ((4, None, None), (6, None, None))
model = s2model(input_shape, num_layers=6, feature_size=128)
predict_file = MDL_PATH+'s2_032_lr_1e-04.hdf5'
print('Symbolic Model Created.')

model.load_weights(predict_file)

def DSen2(d10, d20):
    """Super resolves 20 meter bans using the DSen2 convolutional
       neural network, as specified in Lanaras et al. 2018
       https://github.com/lanha/DSen2

        Parameters:
         d10 (arr): (4, X, Y) shape array with 10 meter resolution
         d20 (arr): (6, X, Y) shape array with 20 meter resolution

        Returns:
         prediction (arr): (6, X, Y) shape array with 10 meter superresolved
                          output of DSen2 on d20 array
    """
    test = [d10, d20]
    input_shape = ((4, None, None), (6, None, None))
    prediction = _predict(test, input_shape, deep=False)
    return prediction

def _predict(test, input_shape, model = model, deep=False, run_60=False):
    
    prediction = model.predict(test, verbose=1)
    return prediction

Symbolic Model Created.


In [163]:
days_per_month = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30]
starting_days = np.cumsum(days_per_month)

In [164]:
def calculate_proximal_steps(date, satisfactory):
    arg_before = None
    arg_after = None
    if date > 0:
        idx_before = satisfactory - date
        arg_before = idx_before[np.where(idx_before < 0, idx_before, -np.inf).argmax()]
    if date < np.max(satisfactory):
        idx_after = satisfactory - date
        arg_after = idx_after[np.where(idx_after > 0, idx_after, np.inf).argmin()]
    if not arg_after and not arg_before:
        arg_after = date
        arg_before = date
    if not arg_after:
        arg_after = arg_before
    if not arg_before:
        arg_before = arg_after
    #print(arg_before, date, arg_after)
    return arg_before, arg_after

In [165]:
def mcm_shadow_mask(arr, c_probs):
    """ Calculates the multitemporal shadow mask for Sentinel-2 using
        the methods from Candra et al. 2020 on L1C images and matching
        outputs to the s2cloudless cloud probabilities

        Parameters:
         arr (arr): (Time, X, Y, Band) array of L1C data scaled from [0, 1]
         c_probs (arr): (Time, X, Y) array of S2cloudless cloud probabilities
    
        Returns:
         shadows_new (arr): cloud mask after Candra et al. 2020 and cloud matching 
         shadows_original (arr): cloud mask after Candra et al. 2020
    """
    def _rank_array(arr):
        order = arr.argsort()
        ranks = order.argsort()
        return ranks
    mean_c_probs = np.mean(c_probs, axis = (1, 2))
    cloudy_steps = np.argwhere(mean_c_probs > 0.25)
    images_clean = np.delete(arr, cloudy_steps, 0)
    cloud_ranks = _rank_array(mean_c_probs)
    diffs = abs(np.sum(arr - np.mean(images_clean, axis = 0), axis = (1, 2, 3)))
    diff_ranks = _rank_array(diffs)
    overall_rank = diff_ranks + cloud_ranks
    reference_idx = np.argmin(overall_rank)
    ri = arr[reference_idx]
        
    shadows = np.zeros((arr.shape[0], 632, 632))    
    # Candra et al. 2020
    
    for time in tnrange(arr.shape[0]):
        for x in range(arr.shape[1]):
            for y in range(arr.shape[2]):
                ti_slice = arr[time, x, y]
                ri_slice = ri[x, y]
                deltab2 = ti_slice[0] - ri_slice[0]
                deltab8a = ti_slice[1] - ri_slice[1]
                deltab11 = ti_slice[2] - ri_slice[2]

                if deltab2 < 0.10: #(1000/65535):
                    if deltab8a < -0.04: #(-400/65535):
                        if deltab11 < -0.04: #(-400/65535):
                            if ti_slice[0] < 0.095: #(950/65535):
                                shadows[time, x, y] = 1.
                                                       
                            
    # Remove shadows if cannot coreference a cloud
    shadow_large = np.reshape(shadows, (shadows.shape[0], 79, 8, 79, 8))
    shadow_large = np.sum(shadow_large, axis = (2, 4))
    
    cloud_large = np.copy(c_probs)
    cloud_large[np.where(c_probs > 0.33)] = 1.
    cloud_large[np.where(c_probs < 0.33)] = 0.
    cloud_large = np.reshape(cloud_large, (shadows.shape[0], 79, 8, 79, 8))
    cloud_large = np.sum(cloud_large, axis = (2, 4))
    for time in tnrange(shadow_large.shape[0]):
        for x in range(shadow_large.shape[1]):
            x_low = np.max([x - 8, 0])
            x_high = np.min([x + 8, shadow_large.shape[1] - 1])
            for y in range(shadow_large.shape[2]):
                y_low = np.max([y - 8, 0])
                y_high = np.min([y + 8, shadow_large.shape[1] - 1])
                if shadow_large[time, x, y] < 8:
                    shadow_large[time, x, y] = 0.
                if shadow_large[time, x, y] >= 8:
                    shadow_large[time, x, y] = 1.
                c_prob_window = cloud_large[time, x_low:x_high, y_low:y_high]
                if np.max(c_prob_window) < 16:
                    shadow_large[time, x, y] = 0.
                    
    shadow_large = resize(shadow_large, (shadow_large.shape[0], 632, 632), order = 0)
    shadows *= shadow_large
    
    # Go through and aggregate the shadow map to an 80m grid, and extend it one grid size around
    # any positive ID
    
    shadows = np.reshape(shadows, (shadows.shape[0], 79, 8, 79, 8))
    shadows = np.sum(shadows, axis = (2, 4))
    shadows[np.where(shadows < 12)] = 0.
    shadows[np.where(shadows >= 12)] = 1.
    
    shadows = resize(shadows, (shadows.shape[0], 632, 632), order = 0)
    shadows = np.reshape(shadows, (shadows.shape[0], 632//4, 4, 632//4, 4))
    shadows = np.max(shadows, (2, 4))
    
    shadows_new = np.zeros_like(shadows)
    for time in range(shadows.shape[0]):
        for x in range(shadows.shape[1]):
            for y in range(shadows.shape[2]):
                if shadows[time, x, y] == 1:
                    min_x = np.max([x - 1, 0])
                    max_x = np.min([x + 2, 157])
                    min_y = np.max([y - 1, 0])
                    max_y = np.min([y + 2, 157])
                    for x_idx in range(min_x, max_x):
                        for y_idx in range(min_y, max_y):
                            shadows_new[time, x_idx, y_idx] = 1.
    shadows_new = resize(shadows_new, (shadows.shape[0], 632, 632), order = 0)
    return shadows_new

In [166]:
def make_output_and_temp_folders(idx, output_folder = OUTPUT_FOLDER):
    
    def _find_and_make_dirs(dirs):
        if not os.path.exists(os.path.realpath(dirs)):
            os.makedirs(os.path.realpath(dirs))
            
    _find_and_make_dirs(output_folder + "raw/")
    _find_and_make_dirs(output_folder + "raw/clouds/")
    _find_and_make_dirs(output_folder + "raw/s1/")
    _find_and_make_dirs(output_folder + "raw/s2/")
    _find_and_make_dirs(output_folder + "raw/misc/")
    _find_and_make_dirs(output_folder + "processed/")
    

def check_contains(coord, step_x, step_y, folder = OUTPUT_FOLDER, year = 2019, s1_layer = "SENT"):
    contains = False
    bottomleft = offset_x(coord, step_x*6300)
    bottomleft = offset_y(bottomleft , step_y*6300)
    for subtile in range(1, 5, 1):
        #bottomright = offset_x(bottomleft, (subtile*6300) / 5)
        #bottomright = offset_y(bottomright, (subtile*6300) / 5)
        #bbx = calculate_bbx(coord, step_x * (subtile/10), step_y * (subtile / 10), expansion = 10)
        if os.path.exists(folder):
            if any([x.endswith(".geojson") for x in os.listdir(folder)]):
                geojson_path = folder + [x for x in os.listdir(folder) if x.endswith(".geojson")][0]
                bool_contains = pts_in_geojson(lats = [bottomleft[0], bottomright[0]], 
                                               longs = [bottomleft[1], bottomright[1]],
                                               geojson = geojson_path)
               # bool_contains = pts_in_geojson([bbx[0][0], bbx[1][0]],
                #                          [bbx[0][1], bbx[1][1]],
                #                          geojson_path)
                contains = True if bool_contains else contains
            else:
                contains = True
    return contains

def download_large_tile(coord, step_x, step_y, folder = OUTPUT_FOLDER, year = 2019, s1_layer = "SENT"):
    
    bbx, epsg = calculate_bbx_pyproj(coord, step_x, step_y, expansion = 10)
    dem_bbx, _ = calculate_bbx_pyproj(coord, step_x, step_y, expansion = 20)
    idx = str(step_y) + "_" + str(step_x)
    idx = str(idx)
    make_output_and_temp_folders(idx)

    print("Calculating cloud cover")
    if not os.path.exists(folder + "output/" + str(((step_y+1)*5)-1) + "/" + str(((step_x+1)*5)-1) + ".npy"):
        if not os.path.exists(folder + "processed/" + str(((step_y+1)*5)-1) + "/" + str(((step_x+1)*5)-1) + ".hkl"):
            if not os.path.exists(folder + "raw/clouds/clouds_{}.hkl".format(idx)):
                l1c, cloud_probs, shadows, clean_steps = identify_clouds_new(bbx, epsg = epsg)
                hkl.dump(cloud_probs, folder + "raw/clouds/clouds_{}.hkl".format(idx), mode='w', compression='gzip')
                hkl.dump(shadows, folder + "raw/clouds/shadows_{}.hkl".format(idx), mode='w', compression='gzip')
                hkl.dump(clean_steps, folder + "raw/clouds/clean_steps_{}.hkl".format(idx), mode='w', compression='gzip')
            
            if not os.path.exists(folder + "raw/s1/{}.hkl".format(idx)):
                print("Downloading S1")
                s1_layer = identify_s1_layer((coord[1], coord[0]))
                s1, s1_dates = download_sentinel_1(bbx, layer = s1_layer, epsg = epsg)
                s1 = process_sentinel_1_tile(s1, s1_dates)
                hkl.dump(s1, folder + "raw/s1/{}.hkl".format(idx), mode='w', compression='gzip')
                hkl.dump(s1_dates, folder + "raw/misc/s1_dates_{}.hkl".format(idx), mode='w', compression='gzip')

            if not os.path.exists(folder + "raw/s2/{}.hkl".format(idx)):
                print("Downloading S2")
                if 'clean_steps' not in globals() or locals():
                    clean_steps = hkl.load(folder + "raw/clouds/clean_steps_{}.hkl".format(idx))
                s2, s2_dates = download_layer(bbx, clean_steps = clean_steps, epsg = epsg)
                hkl.dump(s2, folder + "raw/s2/{}.hkl".format(idx), mode='w', compression='gzip')
                hkl.dump(s2_dates, folder + "raw/misc/s2_dates_{}.hkl".format(idx), mode='w', compression='gzip')

            if not os.path.exists(folder + "raw/misc/dem_{}.hkl".format(idx)):
                print("Downloading DEM")
                dem = download_dem(dem_bbx, epsg = epsg) # get the DEM BBOX
                hkl.dump(dem, folder + "raw/misc/dem_{}.hkl".format(idx), mode='w', compression='gzip')

In [167]:
def calculate_bad_steps(sentinel2, clouds):
    n_cloud_px = np.array([len(np.argwhere(clouds[x, :, :].reshape((632)*(632)) > 0.33)) for x in range(clouds.shape[0])])
    cloud_steps = np.argwhere(n_cloud_px > 632**2 / 5)
    missing_images = [np.argwhere(sentinel2[x, :, : :10].flatten() == 0.0) for x in range(sentinel2.shape[0])]
    missing_images = np.array([len(x) for x in missing_images])
    missing_images_p = [np.argwhere(sentinel2[x, :, : :10].flatten() >= 1) for x in range(sentinel2.shape[0])]
    missing_images_p = np.array([len(x) for x in missing_images_p])
    missing_images += missing_images_p
    missing_images = np.argwhere(missing_images >= (632**2) / 50)
    to_remove = np.unique(np.concatenate([cloud_steps.flatten(), missing_images.flatten()]))
    return to_remove

def superresolve(sentinel2):
    d10 = sentinel2[:, :, :, 0:4]
    d20 = sentinel2[:, :, :, 4:10]

    d10 = np.swapaxes(d10, 1, -1)
    d10 = np.swapaxes(d10, 2, 3)
    d20 = np.swapaxes(d20, 1, -1)
    d20 = np.swapaxes(d20, 2, 3)
    superresolved = DSen2(d10, d20)
    superresolved = np.swapaxes(superresolved, 1, -1)
    superresolved = np.swapaxes(superresolved, 1, 2)
    return superresolved # returns band IDXs 3, 4, 5, 7, 8, 9

def process_sentinel_1_tile(sentinel1, dates):
    s1 = calculate_and_save_best_images(sentinel1, dates)
    # Retain only iamgery every 15 days
    biweekly_dates = np.array([day for day in range(0, 360, 5)])
    to_remove = np.argwhere(biweekly_dates % 15 != 0)
    s1 = np.delete(s1, to_remove, 0)
    return s1


def process_large_tile(coord, step_x, step_y, folder = OUTPUT_FOLDER):
    idx = str(step_y) + "_" + str(step_x)
    x_vals = []
    y_vals = []
    # save to disk
    for i in range(25):
        y_val = (24 - i) // 5
        x_val = 5 - ((25 - i) % 5)
        x_val = 0 if x_val == 5 else x_val
        x_vals.append(x_val)
        y_vals.append(y_val)

    y_vals = [i + (5*step_y) for i in y_vals]
    x_vals = [i + (5*step_x) for i in x_vals]

    processed = False
    for x, y in zip(x_vals, y_vals):
        if (os.path.exists(folder + "processed/{}/{}.hkl".format(str(y), str(x))) or
            (os.path.exists(folder + "output/{}/{}.npy".format(str(y), str(x))))):
            processed = True
    if not processed:

        clouds = hkl.load(folder + "raw/clouds/clouds_{}.hkl".format(idx))
        sentinel1 = hkl.load(folder + "raw/s1/{}.hkl".format(idx))
        radar_dates = hkl.load(folder + "raw/misc/s1_dates_{}.hkl".format(idx))
        sentinel2 = hkl.load(folder + "raw/s2/{}.hkl".format(idx))
        dem = hkl.load(folder + "raw/misc/dem_{}.hkl".format(idx))
        image_dates = hkl.load(folder + "raw/misc/s2_dates_{}.hkl".format(idx))
        if os.path.exists(folder + "raw/clouds/shadows_{}.hkl".format(idx)):
            shadows = hkl.load(folder + "raw/clouds/shadows_{}.hkl".format(idx))
        else:
            print("No shadows file, so calculating shadows with L2A")
            shadows = mcm_shadow_mask(sentinel2, clouds)
        print("The files have been loaded")

        #sentinel1 = process_sentinel_1_tile(sentinel1, radar_dates)
        to_remove = calculate_bad_steps(sentinel2, clouds)
        sentinel2 = np.delete(sentinel2, to_remove, axis = 0)
        clouds = np.delete(clouds, to_remove, axis = 0)
        shadows = np.delete(shadows, to_remove, axis = 0)
        image_dates = np.delete(image_dates, to_remove)
        print("Cloudy and missing images removed, radar processed")

        to_remove = remove_missed_clouds(sentinel2)
        sentinel2 = np.delete(sentinel2, to_remove, axis = 0)
        clouds = np.delete(clouds, to_remove, axis = 0)
        image_dates = np.delete(image_dates, to_remove)
        shadows = np.delete(shadows, to_remove, axis = 0)
        print("Missed cloudy images removed")

        x, _, _ = remove_cloud_and_shadows(sentinel2, clouds, shadows, image_dates)
        print("Clouds and shadows interpolated")

        index = 0
        for start_x, end_x in zip(range(0, 633, 126), range(128, 633, 126)):
            for start_y, end_y in zip(range(0, 633, 126), range(128, 633, 126)):
                print(index)
                if not os.path.exists(folder + "processed/{}/{}.hkl".format(str(y_vals[index]), str(x_vals[index]))):
                    subtile = x[:, start_x:end_x, start_y:end_y, :]
                    resolved = superresolve(subtile)
                    subtile[:, :, :, 4:10] = resolved
                    
                    dem_i = np.tile(dem[np.newaxis, start_x:end_x, start_y:end_y, :], (x.shape[0], 1, 1, 1))
                    subtile = np.concatenate([subtile, dem_i / 90], axis = -1)
                    t2 = timer()
                    subtile = evi(subtile, verbose = True)
                    subtile = bi(subtile, verbose = True)
                    subtile = msavi2(subtile, verbose = True)
                    subtile = si(subtile, verbose = True)
                    t3 = timer()
                    print("Indices: {}".format(t3 - t2))

                    subtile = calculate_and_save_best_images(subtile, image_dates)
                    subtile = interpolate_array(subtile)
                    t5 = timer()
                    print("Interpolate: {}".format(t5 - t3))
                    subtile = np.concatenate([subtile, sentinel1[:, start_x:end_x,
                                                                start_y:end_y, :]], axis = -1)
                    

                    out_y_folder = folder + "processed/{}/".format(str(y_vals[index]))
                    if not os.path.exists(os.path.realpath(out_y_folder)):
                        os.makedirs(os.path.realpath(out_y_folder))
                    sleep(2)
                    hkl.dump(subtile,
                             folder + "processed/{}/{}.hkl".format(str(y_vals[index]), str(x_vals[index])),
                             mode='w', compression='gzip')
                    #np.save(folder + "processed/{}/{}.npy".format(str(y_vals[index]), str(x_vals[index])), subtile)
                index += 1
            
def clean_up_folders():
    pass

In [168]:
expansion = -10
multiplier = 1
step_x = 1
coords_init = offset_x(coords, 0)
coords_init[1] += 0

coord1 = offset_x(coords, 6300*(step_x + multiplier) + expansion)
coord1[1] += 6300*(step_x + multiplier) + expansion


calculate_area([coords_init, coord1])

-15.459789 15.350595
-15.459789 15.350595
15850


In [None]:
# Start at 695
downloaded = 0

if not os.path.exists(os.path.realpath(OUTPUT_FOLDER)):
            os.makedirs(os.path.realpath(OUTPUT_FOLDER))
        
for x_tile in range(0, 2):
    for y_tile in range(0, 2):
        contains = True
        #contains = check_contains(coords, x_tile, y_tile)
        print(contains)
        if contains:
            print("X: {} Y:{}".format(x_tile, y_tile))
            downloaded += 1
            print(f"Downloaded {downloaded}")
            download_large_tile(coord = coords, step_x = x_tile, step_y = y_tile)
            process_large_tile(coords, x_tile, y_tile)
            print("\n")
            #clean_up_folders(x_tile, y_tile)
            