# Download and process sentinel 2 data

## John Brandt
## April 1, 2020

## Package imports, API import, source scripts

In [65]:
import datetime
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import os
import scipy.sparse as sparse
import seaborn as sns
import yaml

from collections import Counter
from osgeo import ogr, osr
from random import shuffle
from scipy.sparse.linalg import splu
from sentinelhub import WmsRequest, WcsRequest, MimeType
from sentinelhub import CRS, BBox, constants, DataSource, CustomUrlParam
from skimage.transform import resize

with open("../config.yaml", 'r') as stream:
        key = (yaml.safe_load(stream))
        API_KEY = key['key'] 
        
%matplotlib inline
%run ../src/utils/slope.py
%run ../src/utils/utils.py
%run ../src/utils/download_utils.py
%run ../src/utils/whittaker_smoother.py
%run ../src/dsen2/utils/DSen2Net.py

## Parameters

In [66]:
# Parameters
SUPER_RESOLVE = True
YEAR = 2019
TIME = ('{}-12-15'.format(str(YEAR - 1)), '{}-01-15'.format(str(YEAR + 1)))
EPSG = CRS.WGS84
IMSIZE = 48
CLOUD_DETECTOR = S2PixelCloudDetector(threshold=0.4, average_over=4, dilation_size=2)
DATA_LOCATION = '../data/ghana-test.csv'
OUTPUT_FOLDER = '../data/test-smooth-200/'

# For DSen2 superresolve
MDL_PATH = "../src/dsen2/models/"
INPUT_SHAPE = ((4, None, None), (6, None, None))
MODEL = s2model(INPUT_SHAPE, num_layers=6, feature_size=128)
PREDICT_FILE = MDL_PATH+'s2_032_lr_1e-04.hdf5'
MODEL.load_weights(PREDICT_FILE)

# Constants
starting_days = np.cumsum([0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30])
c_arr = np.array([[1, 1, 1, 1, 1,],
                  [1, 2, 2, 2, 1,],
                  [1, 2, 3, 2, 1,],
                  [1, 2, 2, 2, 1,],
                  [1, 1, 1, 1, 1,],])
                  
c_arr = c_arr / 3
o_arr = 1 - c_arr
c_arr = np.tile(c_arr[:, :, np.newaxis], (1, 1, 11))
o_arr = np.tile(o_arr[:, :, np.newaxis], (1, 1, 11))

# Helper functions

In [67]:
def calculate_proximal_steps_index(date, satisfactory):
    """Returns proximal steps that are cloud and shadow free

         Parameters:
          date (int): current time step
          satisfactory (list): time steps with no clouds or shadows

         Returns:
          arg_before (str): index of the prior clean image
          arg_after (int): index of the next clean image
    """
    arg_before, arg_after = None, None
    if date > 0:
        idx_before = satisfactory - date
        arg_before = idx_before[np.where(idx_before < 0, idx_before, -np.inf).argmax()]
    if date < np.max(satisfactory):
        idx_after = satisfactory - date
        arg_after = idx_after[np.where(idx_after > 0, idx_after, np.inf).argmin()]
    if not arg_after and not arg_before:
        arg_after = date
        arg_before = date
    if not arg_after:
        arg_after = arg_before
    if not arg_before:
        arg_before = arg_after
    return arg_before, arg_after


def DSen2(d10, d20):
    """Super resolves 20 meter bans using the DSen2 convolutional
       neural network, as specified in Lanaras et al. 2018
       https://github.com/lanha/DSen2

        Parameters:
         d10 (arr): (4, X, Y) shape array with 10 meter resolution
         d20 (arr): (6, X, Y) shape array with 20 meter resolution

        Returns:
         prediction (arr): (6, X, Y) shape array with 10 meter superresolved
                          output of DSen2 on d20 array
    """
    test = [d10, d20]
    input_shape = ((4, None, None), (6, None, None))
    prediction = _predict(test, input_shape, deep=False)
    return prediction

def _predict(test, input_shape, model = MODEL, deep=False, run_60=False):
    
    prediction = model.predict(test, verbose=1)
    return prediction

# Bounding boxes

In [68]:
def calc_bbox(plot_id, df):
    """ Calculates the corners of a bounding box from an input
        pandas dataframe as output by Collect Earth Online

        Parameters:
         plot_id (int): plot_id of associated plot
         df (pandas.DataFrame): dataframe of associated CEO survey
    
        Returns:
         bounding_box (list): [(min(x), min(y)),
                              (max(x), max_y))]
    """
    subs = df[df['PLOT_ID'] == plot_id]
    # TOP, LEFT, BOTTOM, RIGHT
    # (min x, min y), (max x, max y)
    return [(min(subs['LON']), min(subs['LAT'])),
            (max(subs['LON']), max(subs['LAT']))]

def bounding_box(points, expansion = 160):
    """ Calculates the corners of a bounding box with an
        input expansion in meters from a given bounding_box
        
        Subcalls:
         calculate_epsg, convertCoords

        Parameters:
         points (list): output of calc_bbox
         expansion (float): number of meters to expand or shrink the
                            points edges to be
    
        Returns:
         bl (tuple): x, y of bottom left corner with edges of expansion meters
         tr (tuple): x, y of top right corner with edges of expansion meters
    """
    bl = list(points[0])
    tr = list(points[1])
    inproj = Proj('epsg:4326')
    outproj_code = calculate_epsg(bl)
    outproj = Proj('epsg:' + str(outproj_code))
    
    bl_utm =  transform(inproj, outproj, bl[1], bl[0])
    tr_utm =  transform(inproj, outproj, tr[1], tr[0])
    #print("Before the expansion")
    #print((bl_utm, tr_utm))
    
    distance1 = tr_utm[0] - bl_utm[0]
    distance2 = tr_utm[1] - bl_utm[1]
    expansion1 = (expansion - distance1)/2
    expansion2 = (expansion - distance2)/2
        
    bl_utm = [bl_utm[0] - expansion1, bl_utm[1] - expansion2]
    tr_utm = [tr_utm[0] + expansion1, tr_utm[1] + expansion2]

    assert (tr_utm[0] - bl_utm[0]) == expansion
    assert (tr_utm[1] - bl_utm[1]) == expansion

    
    zone = str(outproj_code)[3:]
    zone = zone[1:] if zone[0] == "0" else zone
    direction = 'N' if tr[1] >= 0 else 'S'
    utm_epsg = "UTM_" + zone + direction
    return (bl_utm, tr_utm), CRS[utm_epsg]

# Data download

In [69]:
def mcm_shadow_mask(arr, c_probs):
    """ Calculates the multitemporal shadow mask for Sentinel-2 using
        the methods from Candra et al. 2020 on L1C images and matching
        outputs to the s2cloudless cloud probabilities

        Parameters:
         arr (arr): (Time, X, Y, Band) array of L1C data scaled from [0, 1]
         c_probs (arr): (Time, X, Y) array of S2cloudless cloud probabilities
    
        Returns:
         shadows_new (arr): cloud mask after Candra et al. 2020 and cloud matching 
         shadows_original (arr): cloud mask after Candra et al. 2020
    """
    
    def _rank_array(arr):
        order = arr.argsort()
        ranks = order.argsort()
        return ranks
    
    mean_c_probs = np.mean(c_probs, axis = (1, 2))
    cloudy_steps = np.argwhere(mean_c_probs > 0.25)
    images_clean = np.delete(arr, cloudy_steps, 0)
    cloud_ranks = _rank_array(mean_c_probs)
    diffs = abs(np.sum(arr - np.mean(images_clean, axis = 0), axis = (1, 2, 3)))
    diff_ranks = _rank_array(diffs)
    overall_rank = diff_ranks + cloud_ranks
    reference_idx = np.argmin(overall_rank)
    ri = arr[reference_idx]
    print("The shadow reference index is: {}".format(reference_idx))
    
    shadows = np.zeros((arr.shape[0], 96, 96))    
    # Candra et al. 2020
    
    for time in range(arr.shape[0]):
        for x in range(arr.shape[1]):
            for y in range(arr.shape[2]):
                ti_slice = arr[time, x, y]
                ri_slice = ri[x, y]
                deltab2 = ti_slice[0] - ri_slice[0]
                #deltab3 = ti_slice[2] - ri_slice[2]
                #deltab4 = ti_slice[3] - ri_slice[3]
                deltab8a = ti_slice[1] - ri_slice[1]
                deltab11 = ti_slice[2] - ri_slice[2]

                if deltab2 <0.1: #(1000/65535):
                    #if deltab3 < (800/65535)
                        #if deltab4 < (800/65535)
                    if deltab8a < -0.04: # (-400/65535):
                        if deltab11 < -0.04: ##(-400/65535):
                            if ti_slice[0] < 0.095: #(950/65535):
                                shadows[time, x, y] = 1.
                                        
    shadows_original = np.copy(shadows)
    # Remove shadows if cannot coreference a cloud
    print(shadows.shape)
    shadow_large = np.reshape(shadows, (shadows.shape[0], 96//8, 8, 96//8, 8))
    shadow_large = np.sum(shadow_large, axis = (2, 4))
    
    cloud_large = np.copy(c_probs)
    cloud_large[np.where(c_probs > 0.33)] = 1.
    cloud_large[np.where(c_probs < 0.33)] = 0.
    cloud_large = np.reshape(cloud_large, (shadows.shape[0], 96//8, 8, 96//8, 8))
    cloud_large = np.sum(cloud_large, axis = (2, 4))
    for time in range(shadow_large.shape[0]):
        for x in range(shadow_large.shape[1]):
            x_low = np.max([x - 8, 0])
            x_high = np.min([x + 8, shadow_large.shape[1] - 1])
            for y in range(shadow_large.shape[2]):
                y_low = np.max([y - 8, 0])
                y_high = np.min([y + 8, shadow_large.shape[1] - 1])
                if shadow_large[time, x, y] < 8:
                    shadow_large[time, x, y] = 0.
                if shadow_large[time, x, y] >= 8:
                    shadow_large[time, x, y] = 1.
                c_prob_window = cloud_large[time, x_low:x_high, y_low:y_high]
                if np.max(c_prob_window) < 24:
                    shadow_large[time, x, y] = 0.
                    
    
    shadow_large = resize(shadow_large, (shadow_large.shape[0], 96, 96), order = 0)
    shadows *= shadow_large
    
    # Go through and aggregate the shadow map to an 80m grid, and extend it one grid size around
    # any positive ID
    
    
    shadows = np.reshape(shadows, (shadows.shape[0], 96//8, 8, 96//8, 8))
    shadows = np.sum(shadows, axis = (2, 4))
    shadows[np.where(shadows < 16)] = 0.
    shadows[np.where(shadows >= 16)] = 1.
    shadows = resize(shadows, (shadows.shape[0], 96, 96), order = 0)
    shadows = np.reshape(shadows, (shadows.shape[0], 96//4, 4, 96//4, 4))
    shadows = np.max(shadows, (2, 4))
    
    shadows_new = np.zeros_like(shadows)
    for time in range(shadows.shape[0]):
        for x in range(shadows.shape[1]):
            for y in range(shadows.shape[2]):
                if shadows[time, x, y] == 1:
                    min_x = np.max([x - 1, 0])
                    max_x = np.min([x + 2, shadows.shape[1] - 1])
                    min_y = np.max([y - 1, 0])
                    max_y = np.min([y + 2, shadows.shape[1] - 1])
                    for x_idx in range(min_x, max_x):
                        for y_idx in range(min_y, max_y):
                            shadows_new[time, x_idx, y_idx] = 1.
    shadows_new = resize(shadows_new, (shadows.shape[0], 96, 96), order = 0)
    print("The shadow probability is: {}".format(100*np.sum(shadows_new)/(96*96*shadows_new.shape[0])))
    return np.array(shadows_new)


def identify_clouds(bbox, epsg, time = TIME):

    for try_ in range(0, 5):
        try:
            box = BBox(bbox, crs = epsg)
            cloud_request = WmsRequest(
                layer='CLOUD_NEW',
                bbox=box,
                time=time,
                width=96,
                height=96,
                image_format =  MimeType.TIFF_d8,
                maxcc=0.75,
                instance_id=API_KEY,
                custom_url_params = {constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
                time_difference=datetime.timedelta(hours=48))
            
            shadow_request = WmsRequest(
                layer='SHADOW',
                bbox=box,
                time=time,
                width=96,
                height=96,
                image_format =  MimeType.TIFF_d16,
                maxcc=0.75,
                instance_id=API_KEY,
                custom_url_params = {constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
                time_difference=datetime.timedelta(hours=48))

            cloud_img = cloud_request.get_data()
            cloud_img = np.array(cloud_img)

            if np.max(cloud_img > 10):
                cloud_img = cloud_img / 255
            assert np.max(cloud_img) <= 1.
            print("Cloud_probs shape: {}".format(cloud_img.shape))
            
            shadow_img = shadow_request.get_data()
            shadow_img = np.array(shadow_img)
            print("Shadows_shape: {}".format(shadow_img.shape))

            if np.max(shadow_img > 10):
                shadow_img = shadow_img / 65535
            print(np.max(shadow_img))
            
            shadows = mcm_shadow_mask(np.array(shadow_img), cloud_img)
            shadows = shadows[:, 24:-24, 24:-24]
            return cloud_img[:, 24:-24, 24:-24], shadows
        except Exception as e:
            logging.fatal(e, exc_info=True)
    
    
def download_dem(plot_id, df, epsg, image_format = MimeType.TIFF_d32f):
    #! TODO: ensure that centroid vs. bbox is correctly distinguished
    """ Downloads MapZen digital elevation model and return slope

        Parameters:
         plot_id (tuple): plot id from collect earth online (CEO)
         df (pandas.DataFrame): data associated with plot_id from CEO
         epsg (int): UTM EPSG associated with plot_id
    
        Returns:
         slope (arr): (X, Y, 1) array of per-pixel slope from [0, 1]
    """
    location = calc_bbox(plot_id, df = df)
    bbox, epsg = bounding_box(location, expansion = (IMSIZE+2)*10)
    box = BBox(bbox, crs = epsg)
    dem_request = WmsRequest(data_source=DataSource.DEM,
                         layer='DEM',
                         bbox=box,
                         width=IMSIZE+2,
                         height=IMSIZE+2,
                         instance_id=API_KEY,
                         image_format= image_format,
                         custom_url_params={CustomUrlParam.SHOWLOGO: False})
    dem_image = dem_request.get_data()[0]
    slope = calcSlope(dem_image.reshape((1, IMSIZE+2, IMSIZE+2)),
                      np.full((IMSIZE+2, IMSIZE+2), 10),
                      np.full((IMSIZE+2, IMSIZE+2), 10), 
                      zScale = 1, minSlope = 0.02)
    slope = slope.reshape((IMSIZE+2, IMSIZE+2, 1))
    slope = slope[1:IMSIZE+1, 1:IMSIZE+1, :]
    return slope

        
def download_layer(bbox, epsg, time = TIME, image_format = MimeType.TIFF_d16):
    """ Downloads the L2A sentinel layer with 10 and 20 meter bands
        
        Parameters:
         bbox (list): output of calc_bbox
         epsg (float): EPSG associated with bbox 
         time (tuple): YY-MM-DD - YY-MM-DD bounds for downloading 
    
        Returns:
         img (arr):
         img_request (obj): 
    """
    try:
        box = BBox(bbox, crs = epsg)
        image_request = WcsRequest(
                layer='L2A20',
                bbox=box,
                time=time,
                image_format = image_format,
                maxcc=0.75,
                resx='20m', resy='20m',
                instance_id=API_KEY,
                custom_url_params = {constants.CustomUrlParam.DOWNSAMPLING: 'NEAREST',
                                    constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
                time_difference=datetime.timedelta(hours=48),
            )
        img_bands = image_request.get_data()
        img_20 = np.stack(img_bands)

        if np.max(img_20) >= 10:
            img_20 = img_20 / 65535
        assert np.max(img_20) <= 2.

        s2_20_usage = (img_20.shape[1]*img_20.shape[2])/(512*512) * (6/3) * img_20.shape[0]
        print("Original 20 meter bands size: {}, using {} PU".format(img_20.shape, s2_20_usage))
        img_20 = resize(img_20, (img_20.shape[0], IMSIZE, IMSIZE, img_20.shape[-1]), order = 0)
        
        image_request = WcsRequest(
                layer='L2A10',
                bbox=box,
                time=time,
                image_format = image_format,
                maxcc=0.75,
                resx='10m', resy='10m',
                instance_id=API_KEY,
                custom_url_params = {constants.CustomUrlParam.DOWNSAMPLING: 'BICUBIC',
                                    constants.CustomUrlParam.UPSAMPLING: 'BICUBIC'},
                time_difference=datetime.timedelta(hours=48),
        )
        
        img_bands = image_request.get_data()
        img_10 = np.stack(img_bands)
        print("The original L2A image size is: {}".format(img_10.shape))
        img_10 = resize(img_10, (img_10.shape[0], IMSIZE, IMSIZE, img_10.shape[-1]), order = 0)
        img = np.concatenate([img_10, img_20], axis = -1)

        if np.max(img_10) >= 10:
            img_10 = img_10 / 65535
        assert np.max(img_10) <= 2.
        return img, image_request

    except Exception as e:
        logging.fatal(e, exc_info=True)

# Cloud and shadow removal

In [70]:
def remove_cloud_and_shadows(tiles, probs, shadows, image_dates, wsize = 5):
    """ Interpolates clouds and shadows for each time step with 
        linear combination of proximal clean time steps for each
        region of specified window size
        
        Parameters:
         tiles (arr):
         probs (arr): 
         shadows (arr):
         image_dates (list):
         wsize (int): 
    
        Returns:
         tiles (arr): 
    """
    c_probs = np.copy(probs)
    c_probs = c_probs - np.min(c_probs, axis = 0)
    c_probs[np.where(c_probs > 0.33)] = 1.
    c_probs[np.where(c_probs < 0.33)] = 0.
    c_probs = np.reshape(c_probs, [c_probs.shape[0], int(IMSIZE/8), 8, int(IMSIZE/8), 8])
    c_probs = np.sum(c_probs, (2, 4))
    c_probs = resize(c_probs, (c_probs.shape[0], IMSIZE, IMSIZE), 0)
    c_probs[np.where(c_probs < 12)] = 0.
    c_probs[np.where(c_probs >= 12)] = 1.
    c_probs += shadows
    c_probs[np.where(c_probs >= 1.)] = 1.
    n_interp = 0
    for cval in range(0, IMSIZE - 5, 1):
        for rval in range(0, IMSIZE - 5, 1):
            subs = c_probs[:, cval:cval + wsize, rval:rval+wsize]
            satisfactory = [x for x in range(c_probs.shape[0]) if np.sum(subs[x, :, :]) < 10]
            satisfactory = np.array(satisfactory)
            for date in range(0, tiles.shape[0]):
                if np.sum(subs[date, :, :]) > 10:
                    n_interp += 1
                    before, after = calculate_proximal_steps_index(date, satisfactory)
                    before = date + before
                    after = date + after
                    if after >= tiles.shape[0]:
                        after = before
                    if before < 0:
                        before = after
                    bef = tiles[before, cval:cval+wsize, rval:rval+wsize, : ]
                    aft = tiles[after, cval:cval+wsize, rval:rval+wsize, : ]
                    before = image_dates[before]
                    after = image_dates[after]
                    before_diff = abs(image_dates[date] - before)
                    after_diff = abs(image_dates[date] - after)
                    bef_wt = 1 - before_diff / (before_diff + after_diff)
                    aft_wt = 1 - bef_wt
                    candidate = bef_wt*bef + aft_wt*aft
                    candidate = candidate*c_arr + tiles[date, cval:cval+wsize, rval:rval+wsize, : ]*o_arr
                    tiles[date, cval:cval+wsize, rval:rval+wsize, : ] = candidate  
    print("Interpolated {} px".format(n_interp))
    return tiles

def remove_missed_clouds(img):
    """ Removes steps that are likely to be missed cloud or shadows
        based on two interquartile ranges for the near infrared band
        
        Parameters:
         img (arr):

        Returns:
         to_remove (list): 
    """
    iqr = np.percentile(img[:, :, :, 3].flatten(), 75) - np.percentile(img[:, :, :, 3].flatten(), 25)
    thresh_t = np.percentile(img[:, :, :, 3].flatten(), 75) + iqr*2
    thresh_b = np.percentile(img[:, :, :, 3].flatten(), 25) - iqr*2
    diffs_fw = np.diff(img, 1, axis = 0)
    diffs_fw = np.mean(diffs_fw, axis = (1, 2, 3))
    diffs_fw = np.array([0] + list(diffs_fw))
    diffs_bw = np.diff(np.flip(img, 0), 1, axis = 0)
    diffs_bw = np.flip(np.mean(diffs_bw, axis = (1, 2, 3)))
    diffs_bw = np.array(list(diffs_bw) + [0])
    diffs = abs(diffs_fw - diffs_bw) * 100 # 3, -3 -> 6, -3, 3 -> 6, -3, -3
    #diffs = [int(x) for x in diffs]
    outlier_percs = []
    for step in range(img.shape[0]):
        bottom = len(np.argwhere(img[step, :, :, 3].flatten() > thresh_t))
        top = len(np.argwhere(img[step, :, :, 3].flatten() < thresh_b))
        p = 100* ((bottom + top) / (IMSIZE*IMSIZE))
        outlier_percs.append(p)
    to_remove = np.argwhere(np.array(outlier_percs) > 15)
    return to_remove

# Download function

In [71]:
def download_plots(data_location = DATA_LOCATION, output_folder = OUTPUT_FOLDER, image_format = MimeType.TIFF_d16):
    """ Downloads slope and sentinel-2 data for all plots associated
        with an input CSV from a collect earth online survey
        
        Parameters:
         data_location (os.path)
         output_folder (os.path)
        
        Subcalls:
         calc_bbox, bounding_box
         identify_clouds, download_layer, check_zenith, download_dem
         remove_clouds_and_shadows, remove_missed_clouds
         DSen2
         calculate_and_save_best_images
         
        Creates:
         output_folder/{plot_id}.npy
    
        Returns:
         None
    """
    df = pd.read_csv(data_location)
    for column in ['IMAGERY_TITLE', 'STACKINGPROFILEDG', 'PL_PLOTID', 'IMAGERYYEARDG']:
        if column in df.columns:
            df = df.drop(column, axis = 1)
    df = df.dropna(axis = 0)
    plot_ids = sorted(df['PLOT_ID'].unique())
    existing = [int(x[:-4]) for x in os.listdir(output_folder) if ".DS" not in x]
    to_download = [x for x in plot_ids if x not in existing]
    print("STARTING DOWNLOAD OF {} plots from {} to {}".format(len(to_download), data_location, output_folder))
    errors = []
    for i, val in enumerate(to_download):
        print("Downloading {}/{}, {}".format(i+1, len(to_download), val))
        initial_bbx = calc_bbox(val, df = df)
        sentinel2_bbx, epsg = bounding_box(initial_bbx, expansion = IMSIZE*10)
        cloud_bbx, _ = bounding_box(initial_bbx, expansion = 96*10)
        try:
            # Identify cloud steps, download DEM, and download L2A series
            probs, shadows = identify_clouds(cloud_bbx, epsg = epsg)
            shadow_sums = np.sum(shadows, axis = (1, 2))
            shadow_steps = np.argwhere(shadow_sums > (48*48/3))
            dem = download_dem(val, epsg = epsg, df = df)
            img, image_request = download_layer(sentinel2_bbx, epsg = epsg, image_format = image_format)

            #np.save("../data/raw/train-raw/" + str(val) + ".npy", img)
            #np.save("../data/raw/train-dates/" + str(val) + ".npy", image_request.get_dates())

            # Calculate imagery dates
            image_dates = []
            for date in image_request.get_dates():
                if date.year == YEAR - 1:
                    image_dates.append(-365 + starting_days[(date.month-1)] + date.day)
                if date.year == YEAR:
                    image_dates.append(starting_days[(date.month-1)] + date.day)
                if date.year == YEAR + 1:
                    image_dates.append(365 + starting_days[(date.month-1)]+date.day)
            image_dates = np.array(image_dates)

            # Remove imagery where >4% is clouds, and where there is null data
            args = np.array([len(np.argwhere(probs[x].flatten() > 0.3)) for x in range(probs.shape[0])])
            dirty_steps = np.argwhere(args > (IMSIZE)*(IMSIZE) / 5)
            missing_images = [np.argwhere(img[x, :, : :].flatten() == 0.0) for x in range(img.shape[0])]
            missing_images = np.array([len(x) for x in missing_images])
            missing_images_p = [np.argwhere(img[x, :, : :].flatten() >= 1) for x in range(img.shape[0])]
            missing_images_p = np.array([len(x) for x in missing_images_p])
            missing_images += missing_images_p
            missing_images = list(np.argwhere(missing_images >= 25))
            to_remove = np.unique(np.array(list(dirty_steps) + list(missing_images) + list(shadow_steps)))

            # Remove null steps
            print("There are {}/{} dirty steps: {}"
                  " cloud, {} missing, {} shadow".format(len(to_remove),
                                                         len(img), len(dirty_steps),
                                                         len(missing_images),
                                                         #len(zenith_outliers),
                                                         len(shadow_steps)))

            img = np.delete(img, to_remove, 0)
            probs = np.delete(probs, to_remove, 0)
            image_dates = np.delete(image_dates, to_remove)
            shadows = np.delete(shadows, to_remove, 0)

            to_remove = remove_missed_clouds(img)
            img = np.delete(img, to_remove, 0)
            probs = np.delete(probs, to_remove, 0)
            image_dates = np.delete(image_dates, to_remove)
            shadows = np.delete(shadows, to_remove, 0)
            print("Removing {} steps based on ratio".format(len(to_remove)))


            # Concatenate DEM
            dem = np.tile(dem.reshape((1, IMSIZE, IMSIZE, 1)), (img.shape[0], 1, 1, 1))
            tiles = np.concatenate([img, dem], axis = -1)
            tiles[:, :, :, -1] /= 90

            x = remove_cloud_and_shadows(tiles, probs, shadows, image_dates)
            if SUPER_RESOLVE:
                x = x[:, 8:40, 8:40, :]
                print("Shape before super: {}".format(x.shape))

                d10 = x[:, :, :, 0:4]
                d20 = x[:, :, :, 4:10]

                d10 = np.swapaxes(d10, 1, -1)
                d10 = np.swapaxes(d10, 2, 3)
                d20 = np.swapaxes(d20, 1, -1)
                d20 = np.swapaxes(d20, 2, 3)
                superresolved = DSen2(d10, d20)
                superresolved = np.swapaxes(superresolved, 1, -1)
                superresolved = np.swapaxes(superresolved, 1, 2)
                print(superresolved.shape)
                print(x.shape)

                # returns band IDXs 3, 4, 5, 7, 8, 9
                x[:, :, :, 4:10] = superresolved
                x = x[:, 8:24, 8:24, :]
                print("Shape after super: {}".format(x.shape))
            else:
                bottom = int(IMSIZE/2 - 8)
                top = int(IMSIZE/2 + 8)
                x = x[:, bottom:top, bottom:top, :]

            # Calculate indices
            tiles = evi(x, True)
            tiles = bi(tiles, True)
            tiles = msavi2(tiles, True)
            x = si(tiles, True)

            print("Shape after vegetation indexes: {}".format(x.shape))
            
            missing_pixels = 0
            for band in range(0, 15):
                for time in range(0, x.shape[0]):
                    x_i = x[time, :, :, band]
                    missing_pixels += len(np.argwhere(np.isnan(x_i)))
                    x_i[np.argwhere(np.isnan(x_i))] = np.mean(x_i)
                    x[time, :, :, band] = x_i
            print("There are {} missing pixels".format(missing_pixels))

            # Interpolate linearly to 5 day frequency
            tiles, max_distance = calculate_and_save_best_images(x, image_dates)

            # Smooth linear interpolation
            coefmat = intialize_smoother()
            tiles = interpolate_array(tiles, dim = tiles.shape[1])
            
            if max_distance <= 240:
                np.save(output_folder + str(val), tiles)
                #np.save("../data/raw/train-clouds/" + str(val) + ".npy", probs)
                #np.save("../data/raw/train-shadows/" + str(val) + ".npy", shadows)
                print("Saved array of {} shape to {}".format(tiles.shape, val))
                print("\n")
            else:
                print("Skipping {} because there is a {} distance".format(val, max_distance))
                print("\n")

        except Exception as e:
            print(e)
            logging.fatal(e, exc_info=True)
            #errors.append(img)
            #continue

In [None]:
for i in os.listdir("../data/test-csv/"):
    if ".csv" in i:
    #if ".csv" in i:
        #if any(x in i for x in ["africa-west", "cameroon", "koure", "niger"]):
        tile = download_plots("../data/test-csv/" + i, "../data/test-s2/", image_format = MimeType.TIFF_d16)

STARTING DOWNLOAD OF 44 plots from ../data/test-csv/africaoceana-test.csv to ../data/test-s2/
Downloading 1/44, 136752954
13.485773900035722 -1.6591198644713157
13.485773900035722 -1.6591198644713157
Cloud_probs shape: (36, 96, 96)
Shadows_shape: (36, 96, 96, 3)
1.0
The shadow reference index is: 4
(36, 96, 96)
The shadow probability is: 0.7185570987654321
13.485773900035722 -1.6591198644713157
Original 20 meter bands size: (36, 24, 24, 6), using 0.158203125 PU
The original L2A image size is: (36, 48, 48, 4)
There are 31/36 dirty steps: 31 cloud, 4 missing, 0 shadow
Removing 0 steps based on ratio
Interpolated 272 px
Shape before super: (5, 32, 32, 11)
(5, 32, 32, 6)
(5, 32, 32, 11)
Shape after super: (5, 16, 16, 11)
Shape after vegetation indexes: (5, 16, 16, 15)
There are 0 missing pixels
Maximum time distance: 270
Skipping 136752954 because there is a 270 distance


Downloading 2/44, 136752976
22.84186361835352 -7.914100617866899
22.84186361835352 -7.914100617866899
Cloud_probs shap

Interpolated 1061 px
Shape before super: (50, 32, 32, 11)
(50, 32, 32, 6)
(50, 32, 32, 11)
Shape after super: (50, 16, 16, 11)
Shape after vegetation indexes: (50, 16, 16, 15)
There are 0 missing pixels
Maximum time distance: 35
Saved array of (24, 16, 16, 15) shape to 136752984


Downloading 11/44, 136752985
33.60337388066582 -13.252282405690938
33.60337388066582 -13.252282405690938
Cloud_probs shape: (57, 96, 96)
Shadows_shape: (57, 96, 96, 3)
1.0
The shadow reference index is: 11
(57, 96, 96)
The shadow probability is: 3.3717105263157894
33.60337388066582 -13.252282405690938
Original 20 meter bands size: (57, 24, 24, 6), using 0.25048828125 PU
The original L2A image size is: (57, 48, 48, 4)
There are 24/57 dirty steps: 22 cloud, 7 missing, 2 shadow
Removing 4 steps based on ratio
Interpolated 1005 px
Shape before super: (29, 32, 32, 11)
(29, 32, 32, 6)
(29, 32, 32, 11)
Shape after super: (29, 16, 16, 11)
Shape after vegetation indexes: (29, 16, 16, 15)
There are 0 missing pixels
Max

Original 20 meter bands size: (59, 24, 24, 6), using 0.25927734375 PU
The original L2A image size is: (59, 48, 48, 4)
There are 35/59 dirty steps: 35 cloud, 6 missing, 0 shadow
Removing 6 steps based on ratio
Interpolated 737 px
Shape before super: (18, 32, 32, 11)
(18, 32, 32, 6)
(18, 32, 32, 11)
Shape after super: (18, 16, 16, 11)
Shape after vegetation indexes: (18, 16, 16, 15)
There are 0 missing pixels
Maximum time distance: 110
Saved array of (24, 16, 16, 15) shape to 136752999


Downloading 21/44, 136753000
15.439100449869725 14.703126097794076
15.439100449869725 14.703126097794076
Cloud_probs shape: (70, 96, 96)
Shadows_shape: (70, 96, 96, 3)
1.0
The shadow reference index is: 1
(70, 96, 96)
The shadow probability is: 0.0
15.439100449869725 14.703126097794076
Original 20 meter bands size: (70, 24, 24, 6), using 0.3076171875 PU
The original L2A image size is: (70, 48, 48, 4)
There are 11/70 dirty steps: 11 cloud, 1 missing, 0 shadow
Removing 1 steps based on ratio
Interpolated 0

Cloud_probs shape: (52, 96, 96)
Shadows_shape: (52, 96, 96, 3)
1.0
The shadow reference index is: 36
(52, 96, 96)
The shadow probability is: 0.484107905982906
172.24343757481532 -43.466939339830105
Original 20 meter bands size: (52, 24, 24, 6), using 0.228515625 PU
The original L2A image size is: (52, 48, 48, 4)
There are 26/52 dirty steps: 26 cloud, 1 missing, 0 shadow
Removing 0 steps based on ratio
Interpolated 940 px
Shape before super: (26, 32, 32, 11)
(26, 32, 32, 6)
(26, 32, 32, 11)
Shape after super: (26, 16, 16, 11)
Shape after vegetation indexes: (26, 16, 16, 15)
There are 0 missing pixels
Maximum time distance: 60
Saved array of (24, 16, 16, 15) shape to 136753129


Downloading 31/44, 136753130
175.66697476108934 -39.792533062341214
175.66697476108934 -39.792533062341214
Cloud_probs shape: (57, 96, 96)
Shadows_shape: (57, 96, 96, 3)
1.0
The shadow reference index is: 11
(57, 96, 96)
The shadow probability is: 5.254020467836257
175.66697476108934 -39.792533062341214
Original 

Saved array of (24, 16, 16, 15) shape to 136753140


Downloading 40/44, 136753141
172.2821592206133 -42.80563832618447
172.2821592206133 -42.80563832618447
Cloud_probs shape: (51, 96, 96)
Shadows_shape: (51, 96, 96, 3)
1.0
The shadow reference index is: 11
(51, 96, 96)
The shadow probability is: 4.9223856209150325
172.2821592206133 -42.80563832618447
Original 20 meter bands size: (51, 24, 24, 6), using 0.22412109375 PU
The original L2A image size is: (51, 48, 48, 4)
There are 29/51 dirty steps: 21 cloud, 9 missing, 4 shadow
Removing 0 steps based on ratio
Interpolated 916 px
Shape before super: (22, 32, 32, 11)
(22, 32, 32, 6)
(22, 32, 32, 11)
Shape after super: (22, 16, 16, 11)
Shape after vegetation indexes: (22, 16, 16, 15)
There are 0 missing pixels
Maximum time distance: 85
Saved array of (24, 16, 16, 15) shape to 136753141


Downloading 41/44, 136753143
175.3605339460013 -39.5962528014897
175.3605339460013 -39.5962528014897
Cloud_probs shape: (74, 96, 96)
Shadows_shape: (74, 96, 

(37, 32, 32, 6)
(37, 32, 32, 11)
Shape after super: (37, 16, 16, 11)
Shape after vegetation indexes: (37, 16, 16, 15)
There are 0 missing pixels
Maximum time distance: 140
Saved array of (24, 16, 16, 15) shape to 135804031


Downloading 6/51, 135804032
77.24170290182931 23.99244558906391
77.24170290182931 23.99244558906391
Cloud_probs shape: (72, 96, 96)
Shadows_shape: (72, 96, 96, 3)
1.0
The shadow reference index is: 53
(72, 96, 96)
The shadow probability is: 0.0
77.24170290182931 23.99244558906391
Original 20 meter bands size: (71, 24, 24, 6), using 0.31201171875 PU
The original L2A image size is: (71, 48, 48, 4)
There are 31/71 dirty steps: 30 cloud, 6 missing, 0 shadow
Removing 0 steps based on ratio
Interpolated 29 px
Shape before super: (40, 32, 32, 11)
(40, 32, 32, 6)
(40, 32, 32, 11)
Shape after super: (40, 16, 16, 11)
Shape after vegetation indexes: (40, 16, 16, 15)
There are 0 missing pixels
Maximum time distance: 110
Saved array of (24, 16, 16, 15) shape to 135804032


Down

The original L2A image size is: (71, 48, 48, 4)
There are 24/71 dirty steps: 23 cloud, 8 missing, 1 shadow
Removing 0 steps based on ratio
Interpolated 408 px
Shape before super: (47, 32, 32, 11)
(47, 32, 32, 6)
(47, 32, 32, 11)
Shape after super: (47, 16, 16, 11)
Shape after vegetation indexes: (47, 16, 16, 15)
There are 0 missing pixels
Maximum time distance: 70
Saved array of (24, 16, 16, 15) shape to 135804055


Downloading 16/51, 135804056
81.30095565091828 25.820013771554123
81.30095565091828 25.820013771554123
Cloud_probs shape: (62, 96, 96)
Shadows_shape: (62, 96, 96, 3)
0.9573052567330435
The shadow reference index is: 22
(62, 96, 96)
The shadow probability is: 0.0
81.30095565091828 25.820013771554123
Original 20 meter bands size: (62, 24, 24, 6), using 0.2724609375 PU
The original L2A image size is: (62, 48, 48, 4)
There are 37/62 dirty steps: 37 cloud, 1 missing, 0 shadow
Removing 0 steps based on ratio
Interpolated 94 px
Shape before super: (25, 32, 32, 11)
(25, 32, 32, 6)


(59, 96, 96)
The shadow probability is: 0.7003295668549906
81.20082957852716 18.77409194446543
Original 20 meter bands size: (59, 24, 24, 6), using 0.25927734375 PU
The original L2A image size is: (59, 48, 48, 4)
There are 22/59 dirty steps: 20 cloud, 9 missing, 0 shadow
Removing 1 steps based on ratio
Interpolated 35 px
Shape before super: (36, 32, 32, 11)
(36, 32, 32, 6)
(36, 32, 32, 11)
Shape after super: (36, 16, 16, 11)
Shape after vegetation indexes: (36, 16, 16, 15)
There are 0 missing pixels
Maximum time distance: 150
Saved array of (24, 16, 16, 15) shape to 135804071


Downloading 26/51, 135804072
73.3096995361436 20.139212622442038
73.3096995361436 20.139212622442038
Cloud_probs shape: (55, 96, 96)
Shadows_shape: (55, 96, 96, 3)
1.0
The shadow reference index is: 11
(55, 96, 96)
The shadow probability is: 0.9059343434343434
73.3096995361436 20.139212622442038
Original 20 meter bands size: (55, 24, 24, 6), using 0.24169921875 PU
The original L2A image size is: (55, 48, 48, 4)


77.7957746788239 24.602383583285693
77.7957746788239 24.602383583285693
Cloud_probs shape: (65, 96, 96)
Shadows_shape: (65, 96, 96, 3)
1.0
The shadow reference index is: 18
(65, 96, 96)
The shadow probability is: 0.0
77.7957746788239 24.602383583285693
Original 20 meter bands size: (65, 24, 24, 6), using 0.28564453125 PU
The original L2A image size is: (65, 48, 48, 4)
There are 23/65 dirty steps: 23 cloud, 1 missing, 0 shadow
Removing 0 steps based on ratio
Interpolated 260 px
Shape before super: (42, 32, 32, 11)
(42, 32, 32, 6)
(42, 32, 32, 11)
Shape after super: (42, 16, 16, 11)
Shape after vegetation indexes: (42, 16, 16, 15)
There are 0 missing pixels
Maximum time distance: 75
Saved array of (24, 16, 16, 15) shape to 135804082


Downloading 36/51, 135804084
76.35248556800641 23.922089838854625
76.35248556800641 23.922089838854625
Cloud_probs shape: (61, 96, 96)
Shadows_shape: (61, 96, 96, 3)
0.9584954604409858
The shadow reference index is: 15
