# Training data download pipeline

Downloads 16x16 training data plots from Sentinel Hub, with the following steps:

*  Convert coordinates to UTM, identify bounding boxes of 160 and 180 meter borders
*  Download all L1C steps, correct missing bands, and calculate cloud cover
*  Select L2A imagery corresponding to the best imagery per 15 days, with missing imagery calculated as the weighted average of the nearest time steps

In [None]:
import pandas as pd
import numpy as np
from random import shuffle
from osgeo import ogr, osr
from sentinelhub import WmsRequest, WcsRequest, MimeType, CRS, BBox, constants, DataSource, CustomUrlParam
from s2cloudless import S2PixelCloudDetector, CloudMaskRequest
import logging
from collections import Counter
import datetime
import os
import yaml

import scipy.sparse as sparse
from scipy.sparse.linalg import splu


with open("../config.yaml", 'r') as stream:
        key = (yaml.safe_load(stream))
        API_KEY = key['key'] 

In [None]:
DATA_LOCATION = '../data/train-csv/ghana-train.csv'
OUTPUT_FOLDER = '../data/train-new-shadow/'
EPSG = CRS.WGS84
IMSIZE = 48
existing = [int(x[:-4]) for x in os.listdir("../data/train-new-shadow/") if ".DS" not in x]

In [None]:
%run ../src/slope.py
%run ../src/utils-bilinear.py
%run ../src/dsen2/utils/DSen2Net.py

In [None]:
# setup function to reproject coordinates
def convertCoords(xy, src='', targ=''):

    srcproj = osr.SpatialReference()
    srcproj.ImportFromEPSG(src)
    targproj = osr.SpatialReference()
    if isinstance(targ, str):
        targproj.ImportFromProj4(targ)
    else:
        targproj.ImportFromEPSG(targ)
    transform = osr.CoordinateTransformation(srcproj, targproj)

    pt = ogr.Geometry(ogr.wkbPoint)
    pt.AddPoint(xy[0], xy[1])
    pt.Transform(transform)
    return([pt.GetX(), pt.GetY()])

def bounding_box(points, expansion = 160):
    # LONG, LAT FOR SOME REASON
    bl = list(points[0])
    tr = list(points[1])
    
    if 48 <= bl[0] <= 54:
        epsg = 32639 if bl[1] > 0 else 32739
    if 42 <= bl[0] <= 48:
        epsg = 32638 if bl[1] > 0 else 32738
    if 36 <= bl[0] <= 42:
        epsg = 32637 if bl[1] > 0 else 32737
    if 30 <= bl[0] <= 36:
        epsg = 32636 if bl[1] > 0 else 32736
    if 24 <= bl[0] <= 30:
        epsg = 32635 if bl[1] > 0 else 32735
    if 18 <= bl[0] <= 24:
        epsg = 32634 if bl[1] > 0 else 32734
    if 12 <= bl[0] <= 18:
        epsg = 32633 if bl[1] > 0 else 32733
    if 6 <= bl[0] <= 12:
        epsg = 32632 if bl[1] > 0 else 32732
    if 0 <= bl[0] <= 6:
        epsg = 32631 if bl[1] > 0 else 32731
    if -6 <= bl[0] <= 0:
        epsg = 32630 if bl[1] > 0 else 32730
    if -90 <= bl[0] <= -84:
        epsg = 32616 if bl[1] > 0 else 32716
    if -96 <= bl[0] <= -90:
        epsg = 32615 if bl[1] > 0 else 32715
    bl = convertCoords(bl, 4326, epsg)
    tr = convertCoords(tr, 4326, epsg)
    init = [b - a for a,b in zip(bl, tr)]
    distance1 = tr[0] - bl[0]
    distance2 = tr[1] - bl[1]
    expansion1 = (expansion - distance1)/2
    expansion2 = (expansion - distance2)/2
   # EXPANSION = (expansion - np.mean([distance1, distance2]))/2 # should this be 155 or 160?
    bl = [bl[0] - expansion1, bl[1] - expansion2]
    tr = [tr[0] + expansion1, tr[1] + expansion2]
    #bl = [a - expansion1 for a in bl]
    #tr = [a + expansion2 for a in tr]
    
    after = [b - a for a,b in zip(bl, tr)]   
    print(after)
    if max(init) > 130:
        print("ERROR: Initial field greater than 130m")
    if min(init) < 120:
        print("ERROR: Initial field less than 130m")
        
    if min(after) < (expansion - 4.5):
        print("ERROR")
    if max(after) > (expansion + 5):
        print("ERROR")
    diffs = [b - a for b, a in zip(after, init)]

    bl = convertCoords(bl, epsg, 4326)
    tr = convertCoords(tr, epsg, 4326)
    return bl, tr

In [None]:
def calc_bbox(plot_id):
    subs = df[df['PLOT_ID'] == plot_id]
    # TOP, LEFT, BOTTOM, RIGHT
    # (min x, min y), (max x, max y)
    return [(min(subs['LON']), min(subs['LAT'])),
            (max(subs['LON']), max(subs['LAT']))]


df = pd.read_csv(DATA_LOCATION)
df = df.drop('IMAGERY_TITLE', axis = 1)
df = df.dropna(axis = 0)
plot_ids = sorted(df['PLOT_ID'].unique())

In [None]:
bounding_box(calc_bbox(plot_ids[1]))

In [None]:
cloud_detector = S2PixelCloudDetector(threshold=0.4, average_over=4, dilation_size=2)

def threshold_shadows(arr):
    arr = np.copy(arr)
    iqr = np.percentile(arr.flatten(), 75) - np.percentile(arr.flatten(), 25)
    low = np.percentile(arr.flatten(), 25)
    #high = np.percentile(arr.flatten(), 75)
    thresh_low = low - 1.5*iqr
    #thresh_high = high + 2*iqr
    #arr[np.where(arr > thresh_high)] = 1.
    arr[np.where(arr < thresh_low)] = 1.
    arr[np.where(arr < 1)] = 0.
    arr = np.reshape(arr, (arr.shape[0], 6, 8, 6, 8))
    arr = np.sum(arr, axis = (2, 4))
    arr = resize(arr, (arr.shape[0], 48, 48), 0)
    fake_shadows = np.zeros((arr.shape[0], arr.shape[1], arr.shape[2]))
    for step in range(arr.shape[0]):
        if step > 0:
            for x in range(arr.shape[1]):
                for y in range(arr.shape[2]):
                    if arr[step, x, y] > 0:
                        before = arr[step - 1, x, y]
                        if abs(before - arr[step, x, y]) <= 16:
                            fake_shadows[step, x, y] = 1
    arr[np.where(arr > 2)] = 1.
    before = np.sum(arr)
    arr[np.where(fake_shadows == 1)] = 0.
    after = np.sum(arr)
    print("Removed {} fake shadows".format(before - after))
    print("There are now {} shadows to fix".format(np.sum(arr)))
    return arr

def calculate_proximal_steps(uniques, date, clean_steps):
    arg_before = None
    arg_after = None
    uniques = np.array(uniques)
    satisfactory = np.argwhere(uniques > 2)
    satisfactory = np.array([x for x in satisfactory if x in clean_steps])
    if date > 0:
        idx_before = satisfactory - date
        arg_before = idx_before[np.where(idx_before < 0, idx_before, -np.inf).argmax()]
    if date < np.max(satisfactory):
        idx_after = satisfactory - date
        arg_after = idx_after[np.where(idx_after > 0, idx_after, np.inf).argmin()]
    if not arg_after and not arg_before:
        arg_after = date
        arg_before = date
    if not arg_after:
        arg_after = arg_before
    if not arg_before:
        arg_before = arg_after
    print(arg_before, date, arg_after)
    return arg_before, arg_after

def calculate_proximal_steps_index(date, satisfactory):
    arg_before = None
    arg_after = None
    if date > 0:
        idx_before = satisfactory - date
        arg_before = idx_before[np.where(idx_before < 0, idx_before, -np.inf).argmax()]
    if date < np.max(satisfactory):
        idx_after = satisfactory - date
        arg_after = idx_after[np.where(idx_after > 0, idx_after, np.inf).argmin()]
    if not arg_after and not arg_before:
        arg_after = date
        arg_before = date
    if not arg_after:
        arg_after = arg_before
    if not arg_before:
        arg_before = arg_after
    return arg_before, arg_after


def identify_clouds(bbox, epsg = EPSG, time = ('2018-12-15', '2019-12-15')):
    try:
        box = BBox(bbox, crs = epsg)
        cloud_request = WmsRequest(
            layer='CLOUD_DETECTION',
            bbox=box,
            time=time,
            width=IMSIZE,
            height=IMSIZE,
            image_format = MimeType.TIFF_d32f,
            maxcc=1.,
            instance_id=API_KEY,
            custom_url_params = {constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
            time_difference=datetime.timedelta(hours=24),
        )
        
        cloud_img = cloud_request.get_data()
        cloud_probs = cloud_detector.get_cloud_probability_maps(np.array(cloud_img))
        means = np.mean(cloud_probs, (1, 2))
        clean_steps = [i for i, val in enumerate(means) if val < 0.20]
        return clean_steps, means, cloud_probs
    except Exception as e:
        logging.fatal(e, exc_info=True)
    
    
def download_dem(val, epsg = EPSG):
    location = calc_bbox(val)
    bbox = bounding_box(location, expansion = (IMSIZE+2)*10)
    box = BBox(bbox, crs = epsg)
    dem_request = WmsRequest(data_source=DataSource.DEM,
                         layer='DEM',
                         bbox=box,
                         width=IMSIZE+2,
                         height=IMSIZE+2,
                         instance_id=API_KEY,
                         image_format=MimeType.TIFF_d32f,
                         custom_url_params={CustomUrlParam.SHOWLOGO: False})
    dem_image = dem_request.get_data()[0]
    dem_image = calcSlope(dem_image.reshape((1, IMSIZE+2, IMSIZE+2)),
                  np.full((IMSIZE+2, IMSIZE+2), 10), np.full((IMSIZE+2, IMSIZE+2), 10), zScale = 1, minSlope = 0.02)
    dem_image = dem_image.reshape((IMSIZE+2, IMSIZE+2, 1))
    dem_image = dem_image[1:IMSIZE+1, 1:IMSIZE+1, :]
    return dem_image

def calculate_shadows(bbox, epsg = EPSG, time = ('2018-12-15', '2019-12-15')):
    box = BBox(bbox, crs = epsg)
    image_request = WcsRequest(
            layer='SCENE',
            bbox=box,
            time=time,
            image_format = MimeType.TIFF_d32f,
            maxcc=1.,
            resx='10m', resy='10m',
            instance_id=API_KEY,
            custom_url_params = {constants.CustomUrlParam.DOWNSAMPLING: 'NEAREST',
                                constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
            time_difference=datetime.timedelta(hours=24),
        )
    img_bands = image_request.get_data()
    img = np.stack(img_bands)
    img[np.where(img != 3)] = 0
    print("Shadows: {}".format(img.shape))
    shadow_sum = np.sum(img, axis = (1, 2))
    shadow_steps = np.argwhere(shadow_sum) > (IMSIZE*IMSIZE) / 4
    return img, shadow_steps


def check_zenith(bbox, epsg = EPSG, time = ('2018-12-15', '2019-12-15')):
    try:
        box = BBox(bbox, crs = epsg)
        zenith = WmsRequest(
            layer='ZENITH',
            bbox=box,
            time=time,
            width=IMSIZE,
            height=IMSIZE,
            image_format = MimeType.TIFF_d32f,
            maxcc=0.5,
            instance_id=API_KEY,
            custom_url_params = {constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
            time_difference=datetime.timedelta(hours=24),
        )
        
        zenith = zenith.get_data()
        return zenith
    except Exception as e:
        logging.fatal(e, exc_info=True)
    
        
        
def calculate_and_save_best_images(img_bands, image_dates):
    # This function interpolates data to 5 day windows linearly

    biweekly_dates = [day for day in range(0, 360, 5)] # ideal imagery dates are every 15 days
    
    # Clouds have been removed at this step, so all steps are satisfactory
    satisfactory_ids = [x for x in range(0, img_bands.shape[0])]
    satisfactory_dates = [value for idx, value in enumerate(image_dates) if idx in satisfactory_ids]
    
    
    selected_images = {}
    for i in biweekly_dates:
        distances = [abs(date - i) for date in satisfactory_dates]
        closest = np.min(distances)
        closest_id = np.argmin(distances)
        # If there is imagery within 5 days, select it
        if closest < 5:
            date = satisfactory_dates[closest_id]
            image_idx = int(np.argwhere(np.array(image_dates) == date)[0])
            selected_images[i] = {'image_date': [date], 'image_ratio': [1], 'image_idx': [image_idx]}
        # If there is not imagery within 5 days, look for the closest above and below imagery
        else:
            distances = np.array([(date - i) for date in satisfactory_dates])
            # Number of days above and below the selected date of the nearest clean imagery
            above = distances[np.where(distances < 0, distances, -np.inf).argmax()]
            below = distances[np.where(distances > 0, distances, np.inf).argmin()]
            if abs(above) > 100: # If date is the last date, occassionally argmax would set above to - number
                above = below
            if abs(below) > 100:
                below = above
            if above != below:
                below_ratio = above / (above - below)
                above_ratio = 1 - below_ratio
            else:
                above_ratio = below_ratio = 0.5
                
            # Extract the image date and imagery index for the above and below values
            above_date = i + above
            above_image_idx = int(np.argwhere(np.array(image_dates) == above_date)[0])
            
            below_date = i + below
            below_image_idx = int(np.argwhere(np.array(image_dates) == below_date)[0])
            
            selected_images[i] = {'image_date': [above_date, below_date], 'image_ratio': [above_ratio, below_ratio],
                                 'image_idx': [above_image_idx, below_image_idx]}
                               
    max_distance = 0
    
    for i in selected_images.keys():
        #print(i, selected_images[i])
        if len(selected_images[i]['image_date']) == 2:
            dist = selected_images[i]['image_date'][1] - selected_images[i]['image_date'][0]
            if dist > max_distance:
                max_distance = dist
    
    print("Maximum time distance: {}".format(max_distance))
        
    # Compute the weighted average of the selected imagery for each time step
    keep_steps = []
    for i in selected_images.keys():
        info = selected_images[i]
        if len(info['image_idx']) == 1:
            step = img_bands[info['image_idx'][0]]
        if len(info['image_idx']) == 2:
            step1 = img_bands[info['image_idx'][0]] * info['image_ratio'][0]
            step2 = img_bands[info['image_idx'][1]] * info['image_ratio'][1]
            step = step1 + step2
        keep_steps.append(step)
        
    keep_steps = np.stack(keep_steps)
    return keep_steps, max_distance



In [None]:
def speyediff(N, d, format = 'csc'):
    shape = (N-d, N)
    diagonals = np.zeros(2*d + 1)
    diagonals[d] = 1.
    for i in range(d):
        diff = diagonals[:-1] - diagonals[1:]
        diagonals = diff
    offsets = np.arange(d+1)
    spmat = sparse.diags(diagonals, offsets, shape, format = format)
    return spmat

def smooth(y, lmbd, d = 2):
    m = len(y)
    E = sparse.eye(m, format = 'csc')
    D = speyediff(m, d, format = 'csc')
    coefmat = E + lmbd * D.conj().T.dot(D)
    z = splu(coefmat).solve(y)
    return z

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
MDL_PATH = "../src/dsen2/models/"

input_shape = ((4, None, None), (6, None, None))
model = s2model(input_shape, num_layers=6, feature_size=128)
predict_file = MDL_PATH+'s2_032_lr_1e-04.hdf5'
print('Symbolic Model Created.')

model.load_weights(predict_file)

def DSen2(d10, d20):
    test = [d10, d20]
    input_shape = ((4, None, None), (6, None, None))
    prediction = _predict(test, input_shape, deep=False)
    #prediction *= 5
    return prediction

def _predict(test, input_shape, model = model, deep=False, run_60=False):
    
    print("Predicting using file: {}".format(predict_file))
    prediction = model.predict(test, verbose=1)
    return prediction

In [None]:
from skimage.transform import resize

c_arr = np.array([[1, 1, 1, 1, 1,],
                  [1, 2, 2, 2, 1,],
                  [1, 2, 3, 2, 1,],
                  [1, 2, 2, 2, 1,],
                  [1, 1, 1, 1, 1,],])
                  
c_arr = c_arr / 3
o_arr = 1 - c_arr
c_arr = np.tile(c_arr[:, :, np.newaxis], (1, 1, 11))
o_arr = np.tile(o_arr[:, :, np.newaxis], (1, 1, 11))

def download_layer(bbox, epsg = EPSG, time = ('2018-12-15', '2019-12-15')):
    try:
        box = BBox(bbox, crs = epsg)
        image_request = WcsRequest(
                layer='L2A20',
                bbox=box,
                time=time,
                image_format = MimeType.TIFF_d32f,
                maxcc=1.,
                resx='10m', resy='10m',
                instance_id=API_KEY,
                custom_url_params = {constants.CustomUrlParam.DOWNSAMPLING: 'NEAREST',
                                    constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
                time_difference=datetime.timedelta(hours=24),
            )
        img_bands = image_request.get_data()
        img_20 = np.stack(img_bands)
        img_20 = resize(img_20, (img_20.shape[0], IMSIZE, IMSIZE, img_20.shape[-1]), order = 0)
        
        image_request = WcsRequest(
                layer='L2A10',
                bbox=box,
                time=time,
                image_format = MimeType.TIFF_d32f,
                maxcc=1.,
                resx='10m', resy='10m',
                instance_id=API_KEY,
                custom_url_params = {constants.CustomUrlParam.DOWNSAMPLING: 'BICUBIC',
                                    constants.CustomUrlParam.UPSAMPLING: 'BICUBIC'},
                time_difference=datetime.timedelta(hours=24),
        )
        
        img_bands = image_request.get_data()
        img_10 = np.stack(img_bands)
        img_10 = resize(img_10, (img_10.shape[0], IMSIZE, IMSIZE, img_10.shape[-1]), order = 0)
        shadows = img_10[:, :, :, -1]
        img_10 = img_10[:, :, :, :-1]
        
        shadows[np.where(shadows != 3)] = 0
        shadows[np.where(shadows == 3)] = 1
        shadows_sums = np.sum(shadows, axis = 0)
        before = np.sum(shadows)
        #shadows[np.where(shadows_sums > shadows.shape[0]/2)] = 0.
        print("Difference: {}".format(np.sum(shadows) - before))
        print("Shadows: {}".format(shadows.shape))
        shadow_sum = np.sum(shadows, axis = (1, 2))
        shadow_steps = np.argwhere(shadow_sum > (IMSIZE*IMSIZE) / 5)
        
        img = np.concatenate([img_10, img_20], axis = -1)
        return img, image_request, shadows, shadow_steps

    except Exception as e:
        logging.fatal(e, exc_info=True)
        
def remove_cloud_and_shadows(tiles, probs, shadows, wsize = 5):
    c_probs = np.copy(probs)
    c_probs = c_probs - np.min(c_probs, axis = 0)
    c_probs = np.reshape(c_probs, [c_probs.shape[0], int(IMSIZE/8), 8, int(IMSIZE/8), 8])
    c_probs = np.max(c_probs, axis = (2, 4))
    c_probs = resize(c_probs, (c_probs.shape[0], IMSIZE, IMSIZE), 0)
    c_probs[np.where(c_probs > 0.3)] = 1.
    c_probs += shadows
    
    for cval in range(0, IMSIZE - 4, 1):
        for rval in range(0, IMSIZE - 4, 1):
            subs = c_probs[:, cval:cval + wsize, rval:rval+wsize]
            satisfactory = [x for x in range(c_probs.shape[0]) if np.sum(subs[x, :, :]) < 5]
            satisfactory = np.array(satisfactory)
            for date in range(0, tiles.shape[0]):
                if np.sum(subs[date, :, :]) > 10:
                    before, after = calculate_proximal_steps_index(date, satisfactory)
                    before = date + before
                    after = date + after
                    bef = tiles[before, cval:cval+wsize, rval:rval+wsize, : ]
                    aft = tiles[after, cval:cval+wsize, rval:rval+wsize, : ]
                    bef_wt = 1 - (abs(before) / (abs(before) + abs(after)))
                    aft_wt = 1 - bef_wt
                    candidate = bef_wt*bef + aft_wt*aft
                    candidate = candidate*c_arr + tiles[date, cval:cval+wsize, rval:rval+wsize, : ]*o_arr
                    tiles[date, cval:cval+wsize, rval:rval+wsize, : ] = candidate                    
    return tiles

def remove_missed_clouds(img):
    iqr = np.percentile(img[:, :, :, 3].flatten(), 75) - np.percentile(img[:, :, :, 3].flatten(), 25)
    thresh_t = np.percentile(img[:, :, :, 3].flatten(), 75) + iqr*2
    thresh_b = np.percentile(img[:, :, :, 3].flatten(), 25) - iqr*2
    diffs_fw = np.diff(img, 1, axis = 0)
    diffs_fw = np.mean(diffs_fw, axis = (1, 2, 3))
    diffs_fw = np.array([0] + list(diffs_fw))
    diffs_bw = np.diff(np.flip(img, 0), 1, axis = 0)
    diffs_bw = np.flip(np.mean(diffs_bw, axis = (1, 2, 3)))
    diffs_bw = np.array(list(diffs_bw) + [0])
    diffs = abs(diffs_fw - diffs_bw) * 100 # 3, -3 -> 6, -3, 3 -> 6, -3, -3
    #diffs = [int(x) for x in diffs]
    outlier_percs = []
    for step in range(img.shape[0]):
        bottom = len(np.argwhere(img[step, :, :, 3].flatten() > thresh_t))
        top = len(np.argwhere(img[step, :, :, 3].flatten() < thresh_b))
        p = 100* ((bottom + top) / (IMSIZE*IMSIZE))
        outlier_percs.append(p)
    to_remove = np.argwhere(np.array(outlier_percs) > 15)
    print([int(x) for x in outlier_percs])
    return to_remove

In [None]:
super_resolve = False
to_download = [x for x in plot_ids if x not in existing]
#to_download = [135542409]
errors = []
year = 2019
print("STARTING DOWNLOAD OF {} plots from {} to {}".format(len(to_download), DATA_LOCATION, OUTPUT_FOLDER))
for i, val in enumerate(to_download):
    print("Downloading {}/{}, {}".format(i+1, len(to_download), val))
    location = calc_bbox(val)
    location = bounding_box(location, expansion = IMSIZE*10)
    try:
        # Identify cloud steps, download DEM, and download L2A series
        clean_steps, means, probs = identify_clouds(location)
        dem = download_dem(val)
        img, image_request, shadows, shadow_steps = download_layer(location)

        # Subset zenith < 70
        zenith = check_zenith(location)
        zenith = np.mean(np.stack(zenith), axis = (1, 2))
        zenith_outliers = np.argwhere(zenith > 70)
        if len(zenith_outliers) > 0:
            print("Zenith outlier: {}".format(zenith_outliers))

        # Calculate imagery dates
        image_dates = []
        for date in image_request.get_dates():
            if date.year == year - 1:
                image_dates.append(-360 + (date.month-1)*30 + date.day)
            if date.year == year:
                image_dates.append((date.month-1)*30 + date.day)
            if date.year == year + 1:
                image_dates.append(365 + (date.month-1)*30+date.day)
        image_dates = np.array(image_dates)
        print(image_dates)

        # Remove imagery where >4% is clouds, and where there is null data
        args = np.array([len(np.argwhere(probs[x].flatten() > 0.3)) for x in range(probs.shape[0])])
        dirty_steps = np.argwhere(args > (IMSIZE)*(IMSIZE) / 10)
        missing_images = [np.argwhere(img[x, :, : :].flatten() == 0.0) for x in range(img.shape[0])]
        missing_images = np.array([len(x) for x in missing_images])
        missing_images_p = [np.argwhere(img[x, :, : :].flatten() >= 1) for x in range(img.shape[0])]
        missing_images_p = np.array([len(x) for x in missing_images_p])
        missing_images += missing_images_p
        missing_images = list(np.argwhere(missing_images >= 25))
        to_remove = np.unique(np.array(list(dirty_steps) + list(missing_images) + list(zenith_outliers) + list(shadow_steps)))

        # Remove null steps
        print("There are {}/{} dirty steps: {} cloud, {} missing, {} zenith, {} shadows".format(len(to_remove),
                                                                                    len(img), len(dirty_steps),
                                                                                    len(missing_images),
                                                                                    len(zenith_outliers),
                                                                                    len(shadow_steps)))

        img = np.delete(img, to_remove, 0)
        probs = np.delete(probs, to_remove, 0)
        shadows = np.delete(shadows, to_remove, 0)
        image_dates = np.delete(image_dates, to_remove)
        
        to_remove = remove_missed_clouds(img)
        img = np.delete(img, to_remove, 0)
        shadows = np.delete(shadows, to_remove, 0)
        probs = np.delete(probs, to_remove, 0)
        image_dates = np.delete(image_dates, to_remove)
        print("Removing {} steps based on ratio".format(len(to_remove)))


        # Concatenate DEM
        dem = np.tile(dem.reshape((1, IMSIZE, IMSIZE, 1)), (img.shape[0], 1, 1, 1))
        tiles = np.concatenate([img, dem], axis = -1)
        tiles[:, :, :, -1] /= 90
        
        new_shadows = threshold_shadows(tiles[:, :, :, 3])
        x = remove_cloud_and_shadows(tiles, probs, new_shadows)
        if super_resolve:
            x = x[:, 8:40, 8:40, :]
            print("Before super: {}".format(x.shape))

            d10 = x[:, :, :, 0:4]
            d20 = x[:, :, :, 4:10]

            d10 = np.swapaxes(d10, 1, -1)
            d10 = np.swapaxes(d10, 2, 3)
            d20 = np.swapaxes(d20, 1, -1)
            d20 = np.swapaxes(d20, 2, 3)
            superresolved = DSen2(d10, d20)
            print(superresolved.shape)
            superresolved = np.swapaxes(superresolved, 1, -1)
            print(superresolved.shape)
            superresolved = np.swapaxes(superresolved, 1, 2)
            print(superresolved.shape)
            print(x.shape)

            # returns band IDXs 3, 4, 5, 7, 8, 9
            x[:, :, :, 4:10] = superresolved
            x = x[:, 8:24, 8:24, :]
            print("After super shape: {}".format(x.shape))
        else:
            bottom = int(IMSIZE/2 - 8)
            top = int(IMSIZE/2 + 8)
            x = x[:, bottom:top, bottom:top, :]

        # Calculate indices
        tiles, amin = evi(x, True)
        # Where evi is OOB, remove (likely cloud cover missed)
        if len(amin) > 0:
            satisfactory = [x for x in range(tiles.shape[0]) if x not in amin]
            for i in amin:
                before, after = calculate_proximal_steps_index(i, satisfactory)
                print("Interpolating {} with {} and {}".format(i, before, after))
                bef = tiles[before, :, :, :]
                aft = tiles[after, :, :, :]
                tiles[i, :, :, :] = (bef + aft) / 2

        tiles = bi(tiles, True)
        tiles = msavi2(tiles, True)
        x = si(tiles, True)

        print("Clean: {}".format(x.shape))

        # Interpolate linearly to 5 day frequency
        tiles, max_distance = calculate_and_save_best_images(x, image_dates) # 22, 16, 16, 10

        # Smooth linear interpolation
        for row in range(0, 16):
            for column in range(0, 16):
                for band in [x for x in range(0, 15) if x != 10]:
                    sm = smooth(tiles[:, row, column, band], 1.5, d = 3)
                    tiles[:, row, column, band] = sm

        # Retain only iamgery every 15 days
        biweekly_dates = np.array([day for day in range(0, 360, 5)])
        to_remove = np.argwhere(biweekly_dates % 15 != 0)
        tiles = np.delete(tiles, to_remove, 0)
        print(tiles.shape)

        if max_distance <= 135:
            np.save(OUTPUT_FOLDER + str(val), tiles)
            print("\n")
        else:
            print("Skipping {} because there is a {} distance".format(val, max_distance))
            print("\n")

    except Exception as e:
        print(e)
        logging.fatal(e, exc_info=True)
        errors.append(img)
        #continue