# Download and process sentinel 2 data

This notebook downloads and processes one year of the training and validation plots as labelled on Collect Earth Online. 

## John Brandt
## Last edit: Sept 20, 2021

## Package imports, API import, source scripts

In [1]:
import datetime
import logging
import pandas as pd
import numpy as np

import math
import os
import hickle as hkl
import scipy.sparse as sparse

import seaborn as sns
import matplotlib.pyplot as plt
import yaml

from collections import Counter
from random import shuffle
from scipy.sparse.linalg import splu
from sentinelhub import WmsRequest, WcsRequest, MimeType
from sentinelhub import CRS, BBox, constants, DataSource, CustomUrlParam
from skimage.transform import resize
from typing import Tuple, List
from scipy.ndimage import median_filter
from sentinelhub.config import SHConfig
import sys

with open("../../config.yaml", 'r') as stream:
        key = (yaml.safe_load(stream))
        API_KEY = key['key'] 
        

module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
        
%matplotlib inline
%run ../../src/preprocessing/slope.py
%run ../../src/preprocessing/indices.py
%run ../../src/downloading/utils.py
%run ../../src/preprocessing/cloud_removal.py
%run ../../src/preprocessing/whittaker_smoother.py
%run ../../src/downloading/io.py

In [2]:
with open("../../config.yaml", 'r') as stream:
    key = (yaml.safe_load(stream))
    API_KEY = key['key']
    SHUB_SECRET = key['shub_secret']
    SHUB_KEY = key['shub_id']
    AWSKEY = key['awskey']
    AWSSECRET = key['awssecret']
            
shconfig = SHConfig()
shconfig.instance_id = API_KEY
shconfig.sh_client_id = SHUB_KEY
shconfig.sh_client_secret = SHUB_SECRET
    

uploader = FileUploader(awskey = AWSKEY, awssecret = AWSSECRET)

## Parameters

In [3]:
# Parameters
YEAR = 2020
TIME = ('{}-11-15'.format(str(YEAR - 1)), '{}-02-15'.format(str(YEAR + 1)))
EPSG = CRS.WGS84
IMSIZE = 32

# Constants
starting_days = np.cumsum([0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30])

# Geographic utility functions

These cell blocks calculate the min_x, min_y, max_x, max_y of the area of interest (AOI)

In [4]:
def calc_bbox(plot_id: int, df: pd.DataFrame) -> List:
    """ Calculates the corners of a bounding box from an input
        pandas dataframe as output by Collect Earth Online

        Parameters:
         plot_id (int): plot_id of associated plot
         df (pandas.DataFrame): dataframe of associated CEO survey
    
        Returns:
         bounding_box (list): [(min(x), min(y)),
                              (max(x), max_y))]
    """
    subs = df[df['PLOT_FNAME'] == plot_id]
    # (min x, min y), (max x, max y)
    return [(min(subs['LON']), min(subs['LAT'])),
            (max(subs['LON']), max(subs['LAT']))]


def bounding_box(points: List[Tuple[float, float]], 
                 expansion: int = 160) -> ((Tuple, Tuple), str):
    """ Calculates the corners of a bounding box with an
        input expansion in meters from a given bounding_box
        
        Subcalls:
         calculate_epsg, convertCoords

        Parameters:
         points (list): output of calc_bbox
         expansion (float): number of meters to expand or shrink the
                            points edges to be
    
        Returns:
         bl (tuple): x, y of bottom left corner with edges of expansion meters
         tr (tuple): x, y of top right corner with edges of expansion meters
    """
    bl = list(points[0])
    tr = list(points[1])
    inproj = Proj('epsg:4326')
    outproj_code = calculate_epsg(bl)
    outproj = Proj('epsg:' + str(outproj_code))
    bl_utm =  transform(inproj, outproj, bl[1], bl[0])
    tr_utm =  transform(inproj, outproj, tr[1], tr[0])

    distance1 = tr_utm[0] - bl_utm[0]
    distance2 = tr_utm[1] - bl_utm[1]
    expansion1 = (expansion - distance1)/2
    expansion2 = (expansion - distance2)/2
        
    bl_utm = [bl_utm[0] - expansion1, bl_utm[1] - expansion2]
    tr_utm = [tr_utm[0] + expansion1, tr_utm[1] + expansion2]

    zone = str(outproj_code)[3:]
    zone = zone[1:] if zone[0] == "0" else zone
    direction = 'N' if tr[1] >= 0 else 'S'
    utm_epsg = "UTM_" + zone + direction
    return (bl_utm, tr_utm), CRS[utm_epsg]

# Data download

In [5]:
def extract_dates(date_dict: dict, year: int) -> List:
    """ Transforms a SentinelHub date dictionary to a
         list of integer calendar dates
    """
    dates = []
    days_per_month = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30]
    starting_days = np.cumsum(days_per_month)
    for date in date_dict:
        if date.year == year - 1:
            dates.append(-365 + starting_days[(date.month-1)] + date.day)
        if date.year == year:
            dates.append(starting_days[(date.month-1)] + date.day)
        if date.year == year + 1:
            dates.append(365 + starting_days[(date.month-1)]+date.day)
    return dates

def to_float32(array: np.array) -> np.array:
    """Converts an int_x array to float32"""
    print(f'The original max value is {np.max(array)}')
    if not isinstance(array.flat[0], np.floating):
        assert np.max(array) > 1
        array = np.float32(array) / 65535.
    assert np.max(array) <= 1
    return array

## Cloud and cloud shadow calculation functions

This cell block identifies clouds using s2Cloudless, and identifies clouds (and shadows) using Candra et al. 2019.

The output is per-px cloud/shadow masks, and a list of sentinel 2 dates to download.

In [6]:
def mcm_shadow_mask(arr: np.ndarray,
                    c_probs: np.ndarray) -> np.ndarray:
    """ Calculates the multitemporal shadow mask for Sentinel-2 using
        the methods from Candra et al. 2020 on L1C images and matching
        outputs to the s2cloudless cloud probabilities
        Parameters:
         arr (arr): (Time, X, Y, Band) array of L1C data scaled from [0, 1]
         c_probs (arr): (Time, X, Y) array of S2cloudless cloud probabilities
        Returns:
         shadows_new (arr): cloud mask after Candra et al. 2020 and cloud matching
         shadows_original (arr): cloud mask after Candra et al. 2020
    """
    warnings.warn("mcm_shadow_mask is deprecated; use remove_missed_clouds", category=DeprecationWarning)
    import time
    imsize = arr.shape[1]

    # Create empty arrays for shadows, clouds
    shadows = np.empty_like(arr)[..., 0]
    clouds = np.empty_like(shadows)
    # Iterate through time steps, develop local reference images
    # and calculate cloud/shadow based on Candra et al. 2020
    for time in range(arr.shape[0]):
        lower = np.max([0, time - 3])
        upper = np.min([arr.shape[0], time + 4])
        ri = np.median(arr[lower:upper], axis = 0).astype(np.float32)

        deltab2 = (arr[time, ..., 0] - ri[..., 0]) > int(0.10 * 65535)
        deltab8a = (arr[time, ..., 3] - ri[..., 3]) < int(-0.04 * 65535)
        deltab11 = (arr[time, ..., 5] - ri[..., 5]) < int(-0.04 * 65535)
        deltab3 = (arr[time, ..., 1] - ri[..., 1]) > int(0.08 * 65535)
        deltab4 = (arr[time, ..., 2] - ri[..., 2]) > int(0.08 * 65535)
        ti0 = arr[time, ..., 0] < int(0.10 * 65535)
        ti10 = arr[time, ..., 4] > int(0.005 * 65535)
        clouds_i = (deltab2 * deltab3 * deltab4)
        clouds_i = clouds_i * 1
        clouds_i[clouds_i > 1] = 1.

        shadows_i = ((1 - clouds_i) * deltab11 * deltab8a * ti0)
        shadows_i = shadows_i * 1

        clouds[time] = clouds_i
        shadows[time] = shadows_i

    shadows = np.maximum(shadows, clouds)
    return shadows

def identify_clouds(bbox: List[Tuple[float, float]], epsg: 'CRS', time: dict = TIME):
    """ Downloads and calculates cloud cover and shadow
        
        Parameters:
         bbox (list): output of calc_bbox
         epsg (float): EPSG associated with bbox 
         time (tuple): YY-MM-DD - YY-MM-DD bounds for downloading 
    
        Returns:
         cloud_img (np.array): (X, 96, 96) array of cloud probs
         shadows (np.array):  (X, 96, 96) array of shadow binary
         clean_steps (np.array): (N,) array of clean idx
         cloud_dates (np.array): (N,) array of clean cloud datets
    """
    box = BBox(bbox, crs = epsg)
    cloud_request = WcsRequest(
        layer='CLOUD_NEW',
        bbox=box, time=time,
        resx='160m', resy='160m',
        image_format = MimeType.TIFF,
        maxcc=0.75, config=shconfig,
        custom_url_params = {constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
        time_difference=datetime.timedelta(hours=96))

    shadow_request = WcsRequest(
        layer='SHADOW',
        bbox=box, time=time,
        resx='60m', resy='60m',
        image_format =  MimeType.TIFF,
        maxcc=0.75, config=shconfig,
        custom_url_params = {constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
        time_difference=datetime.timedelta(hours=96))

    cloud_img = np.array(cloud_request.get_data())
    if not isinstance(cloud_img.flat[0], np.floating):
        assert np.max(cloud_img) > 1
        cloud_img = cloud_img / 255.
    assert np.max(cloud_img) <= 1

    cloud_img = resize(cloud_img, (cloud_img.shape[0], 96, 96), order = 0)
    n_cloud_px = np.sum(cloud_img > 0.33, axis = (1, 2))
    cloud_steps = np.argwhere(n_cloud_px > (96**2 * 0.15))
    clean_steps = [x for x in range(cloud_img.shape[0]) if x not in cloud_steps]
    
    
    cloud_dates_dict = [x for x in cloud_request.get_dates()]
    cloud_dates = extract_dates(cloud_dates_dict, YEAR)
    cloud_dates = [val for idx, val in enumerate(cloud_dates) if idx in clean_steps]
    
    shadow_dates_dict = [x for x in shadow_request.get_dates()]
    shadow_dates = extract_dates(shadow_dates_dict, YEAR)
    shadow_steps = [idx for idx, val in enumerate(shadow_dates) if val in cloud_dates]    
    
    shadow_img = np.array(shadow_request.get_data(data_filter = shadow_steps))
    shadow_pus = (shadow_img.shape[1]*shadow_img.shape[2])/(512*512) * shadow_img.shape[0]
    shadow_img = resize(shadow_img, (shadow_img.shape[0], 96, 96, shadow_img.shape[-1]), order = 0,
                        anti_aliasing = False, preserve_range = True).astype(np.uint16)

    cloud_img = np.delete(cloud_img, cloud_steps, 0)
    assert shadow_img.shape[0] == cloud_img.shape[0], (shadow_img.shape, cloud_img.shape)
    #shadows, _ = 
    shadows = mcm_shadow_mask(shadow_img, cloud_img) # Make usre this makes sense??
    print(f"Shadows ({shadows.shape}) used {round(shadow_pus, 1)} processing units")
    return cloud_img, shadows, clean_steps, np.array(cloud_dates)

# DEM and slope

In [7]:
def download_dem(plot_id: int, df: 'DataFrame', epsg: 'CRS') -> (np.ndarray, np.ndarray):
    """ Downloads MapZen digital elevation model and return slope

        Parameters:
         plot_id (tuple): plot id from collect earth online (CEO)
         df (pandas.DataFrame): data associated with plot_id from CEO
         epsg (int): UTM EPSG associated with plot_id
    
        Returns:
         slope (arr): (X, Y, 1) array of per-pixel slope from [0, 1]
    """
    location = calc_bbox(plot_id, df = df)
    bbox, epsg = bounding_box(location, expansion = (32+2)*10)
    box = BBox(bbox, crs = epsg)
    dem_request = WcsRequest(
                         layer='DEM', bbox=box,
                         resx = "10m", resy = "10m",
                         config=shconfig,
                         image_format= MimeType.TIFF,
                         custom_url_params={CustomUrlParam.SHOWLOGO: False})
    dem_image_init = dem_request.get_data()[0]
    dem_image_init = dem_image_init - 12000
    dem_image_init = dem_image_init.astype(np.float32)
    dem_image = np.copy(dem_image_init)
    dem_image = median_filter(dem_image_init, size = 5)
    slope = calcSlope(dem_image.reshape((1, 32+2, 32+2)),
                      np.full((32+2, 32+2), 10),
                      np.full((32+2, 32+2), 10), 
                      zScale = 1, minSlope = 0.02)
    slope = slope / 90
    slope = slope.reshape((32+2, 32+2, 1))
    slope = slope[1:32+1, 1:32+1, :]
    return slope, dem_image_init

## 10 and 20 meter L2A bands

In [8]:
def download_layer(bbox: List[Tuple[float, float]],
                   clean_steps: np.ndarray, epsg: 'CRS',
                   dates: dict = TIME, year: int = YEAR) -> (np.ndarray, np.ndarray):
    """ Downloads the L2A sentinel layer with 10 and 20 meter bands
        
        Parameters:
         bbox (list): output of calc_bbox
         epsg (float): EPSG associated with bbox 
         time (tuple): YY-MM-DD - YY-MM-DD bounds for downloading 
    
        Returns:
         img (arr):
         img_request (obj): 
    """
    try:
        box = BBox(bbox, crs = epsg)
        image_request = WcsRequest(
                layer='L2A20',
                bbox=box, time=dates,
                image_format = MimeType.TIFF,
                data_source = DataSource.SENTINEL2_L2A,
                maxcc=0.75,
                resx='20m', resy='20m',
                config=shconfig,
                custom_url_params = {constants.CustomUrlParam.DOWNSAMPLING: 'NEAREST',
                                    constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
                time_difference=datetime.timedelta(hours=96),
            )
        
        image_dates = []
        for date in image_request.get_dates():
            if date.year == YEAR - 1:
                image_dates.append(-365 + starting_days[(date.month-1)] + date.day)
            if date.year == YEAR:
                image_dates.append(starting_days[(date.month-1)] + date.day)
            if date.year == YEAR + 1:
                image_dates.append(365 + starting_days[(date.month-1)]+date.day)
        
        steps_to_download = [i for i, val in enumerate(image_dates) if val in clean_steps]
        dates_to_download = [val for i, val in enumerate(image_dates) if val in clean_steps]
              
        img_bands = image_request.get_data(data_filter = steps_to_download)
        img_20 = np.stack(img_bands)
        img_20 = to_float32(img_20)

        s2_20_usage = (img_20.shape[1]*img_20.shape[2])/(512*512) * (6/3) * img_20.shape[0]
        if (img_20.shape[1] * img_20.shape[2]) != 14*14:
            print(f"Original 20 meter bands size: {img_20.shape}, using {s2_20_usage} PU")
        img_20 = resize(img_20, (img_20.shape[0], IMSIZE, IMSIZE, img_20.shape[-1]), order = 0)
        
        image_request = WcsRequest(
                layer='L2A10',
                bbox=box, time=dates,
                image_format = MimeType.TIFF,
                data_source = DataSource.SENTINEL2_L2A,
                maxcc=0.75,
                resx='10m', resy='10m',
                config=shconfig,
                custom_url_params = {constants.CustomUrlParam.DOWNSAMPLING: 'BICUBIC',
                                    constants.CustomUrlParam.UPSAMPLING: 'BICUBIC'},
                time_difference=datetime.timedelta(hours=96),
        )
        
        img_bands = image_request.get_data(data_filter = steps_to_download)
        img_10 = np.stack(img_bands)
        if (img_10.shape[1] * img_10.shape[2]) != 28*28:
            print(f"The original L2A image size is: {img_10.shape}")
        img_10 = to_float32(img_10)
            
        img_10 = resize(img_10, (img_10.shape[0], IMSIZE, IMSIZE, img_10.shape[-1]), order = 0)
        img = np.concatenate([img_10, img_20], axis = -1)

        
        return img, np.array(dates_to_download)

    except Exception as e:
        logging.fatal(e, exc_info=True)

# Super resolution

Super-resolve the 20 meter bands to 10 meters using DSen2.

In [9]:
import tensorflow as tf
sess = tf.Session()
from keras import backend as K
K.set_session(sess)

MDL_PATH = "../../models/supres/nov-40k-swir/"

model = tf.train.import_meta_graph(MDL_PATH + 'model.meta')
model.restore(sess, tf.train.latest_checkpoint(MDL_PATH))

logits = tf.get_default_graph().get_tensor_by_name("Add_2:0")
inp = tf.get_default_graph().get_tensor_by_name("Placeholder:0")
inp_bilinear = tf.get_default_graph().get_tensor_by_name("Placeholder_1:0")

def superresolve(input_data):
    bilinear_upsample = input_data[..., 4:]
    x = sess.run([logits], 
                 feed_dict={inp: input_data,
                            inp_bilinear: bilinear_upsample})
    return x[0]

def superresolve_tile(x):
    twentym = x[..., 4:]
    imsize = x.shape[1]
    twentym = np.reshape(twentym, (x.shape[0], imsize // 2, 2, imsize // 2, 2, 6))
    twentym = np.mean(twentym, (2, 4))
    twentym = resize(twentym, (x.shape[0], imsize, imsize, 6), 1)
    x[..., 4:] = twentym
    x[..., 4:] = superresolve(x)
    if imsize > 28:
        crop_amt = (imsize - 28) // 2
        x = x[:, crop_amt:-crop_amt, crop_amt:-crop_amt, :]
    return x

INFO:tensorflow:Restoring parameters from ../../models/supres/nov-40k-swir/model


Using TensorFlow backend.


# Download function

In [15]:
def download_new_dem(data_location: 'os.Path',
                     output_folder: 'os.Path',
                     image_format: 'MimeType' = MimeType.TIFF):
    """ Downloads and saves DEM and slope files
        
        Parameters:
         data_location (os.path): 
         output_folder (os.path): 
         image_format (MimeType): 
    
        Returns:
         None
    """
    
    df = pd.read_csv(data_location)
    df.columns = [x.upper() for x in df.columns]
    
    if 'PLOT_FNAME' not in df.columns:
        df['PLOT_FNAME'] = df['PLOT_ID']
    
    print(df.columns)
    columns = ['PLOT_ID', 'SAMPLEID', 'LON', 'LAT', 'PLANTATION', 'PLOT_FNAME']
    for column in df.columns:
        if column not in columns:
            df = df.drop(column, axis = 1)
    print(df.columns)
    df = df.dropna(axis = 0)
    plot_ids = sorted(df['PLOT_FNAME'].unique())
    existing = [int(x[:-4]) for x in os.listdir(output_folder) if ".DS" not in x]
    to_download = [x for x in plot_ids if x not in existing]
    print(f"Starting download of {len(to_download)}"
          f" plots from {data_location} to {output_folder}")
    errors = []
    for i, val in enumerate(to_download):
        print(f"Downloading {i + 1}/{len(to_download)}, {val}")
        initial_bbx = calc_bbox(val, df = df)
        dem_bbx, epsg = bounding_box(initial_bbx, expansion = 32*10)
        slope, dem = download_dem(val, epsg = epsg, df = df)
        val = str(val).zfill(5)
        np.save(output_folder + str(val), dem)
        np.save("train-slope/" + str(val), slope)
        


def concatenate_dem(x, dem):
    dem = np.tile(dem.reshape((1, 32, 32, 1)), (x.shape[0], 1, 1, 1))
    dem = dem[:, 2:-2, 2:-2, :]
    x = np.concatenate([x, dem], axis = -1)
    assert x.shape[1] == x.shape[2] == 28
    return x

In [11]:
def id_missing_px(sentinel2: np.ndarray, thresh: int = 100) -> np.ndarray:
    missing_images_0 = np.sum(sentinel2[..., :10] == 0.0, axis = (1, 2, 3))
    missing_images_p = np.sum(sentinel2[..., :10] >= 1., axis = (1, 2, 3))
    missing_images = missing_images_0 + missing_images_p
    
    missing_images = np.argwhere(missing_images >= (sentinel2.shape[1]**2) / thresh).flatten()
    return missing_images


def download_raw_data(data_location, output_folder, fmt = "train", image_format = MimeType.TIFF):
    """ Downloads slope and sentinel-2 data for all plots associated
        with an input CSV from a collect earth online survey
        
        Parameters:
         data_location (os.path)
         output_folder (os.path)
        
        Creates:
         output_folder/{plot_id}.npy
    
        Returns:
         None
    """
    df = pd.read_csv(data_location)
    df.columns = [x.upper() for x in df.columns]
    if 'PLOT_FNAME' not in df.columns:
        df['PLOT_FNAME'] = df['PLOT_ID']
    columns = ['PLOT_ID', 'SAMAPLEID', 'LON', 'LAT', 'PLANTATION', 'PLOT_FNAME']
    for column in df.columns:
        if column not in columns:
            df = df.drop(column, axis = 1)
    print(df.columns)

    df = df.dropna(axis = 0)
    plot_ids = sorted(df['PLOT_FNAME'].unique())
    existing = [int(x[:-4]) for x in os.listdir(f"{fmt}-dates/") if ".DS" not in x]
    to_download = [x for x in plot_ids if x not in existing]
    print(to_download)
    print(f"Starting download of {len(to_download)}"
          f" plots from {data_location} to {output_folder}")
    for i, val in enumerate(reversed(to_download)):
        print(f"Downloading {i + 1}/{len(to_download)}, {val}")
        initial_bbx = calc_bbox(val, df = df)
        sentinel2_bbx, epsg = bounding_box(initial_bbx, expansion = IMSIZE*10)
        cloud_bbx, _ = bounding_box(initial_bbx, expansion = 96*10)
        try:
            # Identify cloud steps, download DEM, and download L2A series
            cloud_probs, shadows, _, clean_dates = identify_clouds(cloud_bbx, epsg = epsg)
            dem, _ = download_dem(val, epsg = epsg, df = df)
            #to_remove, _ = calculate_cloud_steps(cloud_probs, clean_dates)
            
            #if len(to_remove) > 0:
            #    cloud_probs = np.delete(cloud_probs, to_remove, 0)
            #    clean_dates = np.delete(clean_dates, to_remove)
            #    shadows = np.delete(shadows, to_remove, 0)
                
            to_remove = subset_contiguous_sunny_dates(clean_dates, cloud_probs)
            if len(to_remove) > 0:
                cloud_probs = np.delete(cloud_probs, to_remove, 0)
                clean_dates = np.delete(clean_dates, to_remove)
                shadows = np.delete(shadows, to_remove, 0)
                
            _ = print_dates(clean_dates, np.mean(cloud_probs, axis = (1, 2)))
        
            s2, s2_dates = download_layer(sentinel2_bbx, clean_steps = clean_dates, epsg = epsg)    
            
            # Step to ensure that shadows, clouds, sentinel l2a have aligned dates
            to_remove_clouds = [i for i, val in enumerate(clean_dates) if val not in s2_dates]
            to_remove_dates = [val for i, val in enumerate(clean_dates) if val not in s2_dates]
            if len(to_remove_clouds) > 0:
                print(f"Removing {to_remove_dates} from clouds because not in S2")
                cloud_probs = np.delete(cloud_probs, to_remove_clouds, 0)
                shadows = np.delete(shadows, to_remove_clouds, 0)
            print(f"Shadows {shadows.shape}, clouds {cloud_probs.shape},"
                  f" S2, {s2.shape}, S2d, {s2_dates.shape}")
            
            print(s2.shape)
            
            cloud_probs = cloud_probs[:, 32:-32, 32:-32]
            shadows = shadows[:, 32:-32, 32:-32]
            pfcps = np.zeros_like(shadows)
            print(s2.shape, cloud_probs.shape, shadows.shape)
            x, interp, _ = remove_cloud_and_shadows(s2, cloud_probs, shadows, s2_dates, pfcps)
            to_remove = np.argwhere(np.mean(interp, axis = (1, 2)) > 0.7)
            if len(to_remove) > 0:
                print(f"Removing {len(to_remove)} steps with >50% interpolation: {to_remove}")
                x = np.delete(x, to_remove, 0)
                cloud_probs = np.delete(cloud_probs, to_remove, 0)
                s2_dates = np.delete(s2_dates, to_remove)
                shadows = np.delete(shadows, to_remove, 0)
                print(np.sum(shadows, axis = (1, 2)))
            
            x_to_save = np.copy(x)
            x_to_save = np.clip(x_to_save, 0, 1)
            x_to_save = np.trunc(x_to_save * 65535).astype(np.uint16)
            val = str(val).zfill(5)
            print(f"Saving: {fmt}-raw/{str(val)}")
            np.save(f"{fmt}-dates/{str(val)}", s2_dates)
            np.save(f"{fmt}-raw/{str(val)}", x_to_save)
            #file = f"/data/{fmt}-raw/{str(val)}.npy"
            #key = f'restoration-mapper/model-data/{fmt}/raw/{str(val)}.npy'
            #uploader.upload(bucket = 'restoration-monitoring', key = key, file = file)
            print("\n")

        except Exception as e:
            print(e)
            logging.fatal(e, exc_info=True)

In [12]:
def select_dates(dates):
    """For imagery that was downloaded prior to capping the number 
       of monthly images to be 3, it is necessary to enforce that cap
       on the training / testing data.
       
       This function identifies the indices of the imagery to deletet
       such that there is a maximum of three images per month.
    
    """
    
    
    before = len(dates)
    selected_indices = np.arange(len(dates))
    begin = [-60, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]
    end = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 390]
    indices_to_remove = []
    for x, y in zip(begin, end):
        indices_month = np.argwhere(np.logical_and(dates >= x, dates < y)).flatten()
        if len(indices_month) > 3:
            to_delete = np.empty((0,))
            if begin == -60:
                to_delete = indices_month[:-3]
            elif begin == 334:
                to_delete = indices_month[3:]
            elif len(indices_month) == 4:
                to_delete = indices_month[1]
            elif len(indices_month) == 5:
                to_delete = np.array([indices_month[1],
                                      indices_month[3]])
            elif len(indices_month) == 6:
                to_delete = np.array([indices_month[1],
                                      indices_month[3],
                                      indices_month[4]])
                
            to_delete = np.array(to_delete)
            if to_delete.size > 0:
                indices_to_remove.append(to_delete.flatten())
                
    if len(indices_to_remove) > 0:
        indices_to_remove = np.concatenate(indices_to_remove)
        after = before - len(indices_to_remove)
        print(f"Keeping {after}/{before}")
        return indices_to_remove
    
    else:
        return []
    

def subset_contiguous_sunny_dates(dates, probs):
    """
    The general imagery subsetting strategy is as below:
        - Select all images with < 30% cloud cover
        - For each month, select up to 2 images that are <30% CC and are the closest to
          the beginning and the midde of the month
        - Select only one image per month for each month if the following criteria are met
              - Within Q1 and Q4, apply if at least 3 images in quarter
              - Otherwise, apply if at least 8 total images for year
              - Select the second image if max CC < 15%, otherwise select least-cloudy image
        - If more than 10 images remain, remove any images for April and September

    """
    
    probs = np.mean(probs, axis = (1, 2))
    begin = [-60, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]
    end = [31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 410]
    n_per_month = []
    months_to_adjust = []
    months_to_adjust_again = []
    indices_to_rm = []
    indices = [x for x in range(len(dates))]
    
    def _indices_month(dates, x, y):
        indices_month = np.argwhere(np.logical_and(
                    dates >= x, dates < y)).flatten()
        return indices_month

    
    _ = print_dates(dates, probs)
    # Select the best 2 images per month to start with
    best_two_per_month = []
    for x, y in zip(begin, end):
        indices_month = np.argwhere(np.logical_and(
            dates >= x, dates < y)).flatten()

        month_dates = dates[indices_month]
        month_clouds = probs[indices_month]
        month_good_dates = month_dates[month_clouds < 0.20]
        indices_month = indices_month[month_clouds < 0.20]

        if len(month_good_dates) >= 2:
            if x > 0:
                ideal_dates = [x, x + 15]
            else:
                ideal_dates = [0, 15]

            # We first pick the 2 images with <30% cloud cover that are the closest
            # to the 1st and 15th of the month
            # todo: if both these images are above 15%, and one below 15% is available, include it
            closest_to_first_img = np.argmin(abs(month_good_dates - ideal_dates[0]))
            closest_to_second_img = np.argmin(abs(month_good_dates - ideal_dates[1]))
            if closest_to_second_img == closest_to_first_img:
                distances = abs(month_good_dates - ideal_dates[1])
                closest_to_second_img = np.argsort(distances)[1]

            first_image = indices_month[closest_to_first_img]
            second_image = indices_month[closest_to_second_img]
            best_two_per_month.append(first_image)
            best_two_per_month.append(second_image)
                    
        elif len(month_good_dates) >= 1:
            if x > 0:
                ideal_dates = [x, x + 15]
            else:
                ideal_dates = [0, 15]

            closest_to_second_img = np.argmin(abs(month_good_dates - ideal_dates[1]))
            second_image = indices_month[closest_to_second_img]
            best_two_per_month.append(second_image)
                
    dates_round_2 = dates[best_two_per_month]
    probs_round_2 = probs[best_two_per_month]
    
    # We then select between those two images to keep a max of one per month
    # We select the least cloudy image if the most cloudy has >15% cloud cover
    # Otherwise we select the second image

    # If there are more than 8 images, subset so only 1 image per month,
    # To bring down to a min of 8 images
    if len(dates_round_2) >= 8:
        n_to_rm = len(dates_round_2) - 8
        monthly_dates = []
        monthly_probs = []
        monthly_dates_date = []
        removed = 0
        for x, y in zip(begin, end):
            indices_month = np.argwhere(np.logical_and(
                dates >= x, dates < y)).flatten()
            dates_month = dates[indices_month]
            indices_month = [val for i, val in enumerate(indices_month) if dates_month[i] in dates_round_2]
            if len(indices_month) > 1:
                month_dates = dates[indices_month]
                month_clouds = probs[indices_month]

                subset_month = True
                if x == -60:
                    feb_mar = np.argwhere(np.logical_and(
                        dates >= 31, dates < 90)).flatten()
                    subset_month = False if len(feb_mar) < 2 else True
                if x == 334:
                    oct_nov = np.argwhere(np.logical_and(
                        dates >= 273, dates < 334)).flatten()
                    subset_month = False if len(oct_nov) < 2 else True

                if subset_month:
                    subset_month = True if removed <= n_to_rm else False
                if subset_month:
                    if np.max(month_clouds) >= 0.10:
                        month_best_date = [indices_month[np.argmin(month_clouds)]]
                    else:
                        month_best_date = [indices_month[1]]
                else:
                    month_best_date = indices_month
                monthly_dates.extend(month_best_date)
                monthly_probs.extend(probs[month_best_date])
                monthly_dates_date.extend(dates[month_best_date])
                removed += 1
            elif len(indices_month) == 1:
                monthly_dates.append(indices_month[0])
                monthly_probs.append(probs[indices_month[0]])
                monthly_dates_date.append(dates[indices_month[0]])
    else:
        monthly_dates = best_two_per_month
        
    indices_to_rm = [x for x in indices if x not in monthly_dates]


    dates_round_3 = dates[monthly_dates]
    probs_round_3 = probs[monthly_dates]

    if len(dates_round_3) >= 10:
        delete_max = False
        if np.max(probs_round_3) >= 0.15:
            delete_max = True
            indices_to_rm.append(monthly_dates[np.argmax(probs_round_3)])
        for x, y in zip(begin, end):
            indices_month = np.argwhere(np.logical_and(
                dates >= x, dates < y)).flatten()
            dates_month = dates[indices_month]
            indices_month = [x for x in indices_month if x in monthly_dates]

            n_removed = 0
            if len(indices_month) >= 1:
                if len(monthly_dates) == 11 and delete_max:
                    continue
                elif len(monthly_dates) >= 11:
                    if x in [90, 243]:
                        indices_to_rm.append(indices_month[0])

    return indices_to_rm


def to_int16(array: np.array) -> np.array:
    '''Converts a float32 array to uint16, reducing storage costs by three-fold'''
    array = np.clip(array, 0, 1)
    array = np.trunc(array * 65535)
    assert np.min(array >= 0)
    assert np.max(array <= 65535)
    
    return array.astype(np.uint16)


def process_raw(plot_id, path = 'train'):
    """ Downloads slope and sentinel-2 data for all plots associated
        with an input CSV from a collect earth online survey
        
        Parameters:
         data_location (os.path)
         output_folder (os.path)
        
        Creates:
         output_folder/{plot_id}.npy
    
        Returns:
         None
    """         

    x = np.load(f"{path}-raw/{plot_id}.npy")
    x = np.float32(x) / 65535
    if x.shape[-1] == 10:
        s2_dates = np.load(f"{path}-dates/{plot_id}.npy")
        dem = np.load(f"{path}-slope/{plot_id}.npy")

        assert x.shape[0] == s2_dates.shape[0]

        missing_px = id_missing_px(x)
        if len(missing_px) > 0:
            print(f"Deleting {missing_px} because of missing data")
            x = np.delete(x, missing_px, 0)
            s2_dates = np.delete(s2_dates, missing_px)


        n_images = x.shape[0]

        to_remove = select_dates(s2_dates)
        if len(to_remove) > 0:
            x = np.delete(x, to_remove, 0)
            s2_dates = np.delete(s2_dates, to_remove)

        print(x.shape)
        #to_remove = subset_contiguous_sunny_dates(s2_dates)
        #if len(to_remove) > 0:
        #    x = np.delete(x, to_remove, 0)
        #    s2_dates = np.delete(s2_dates, to_remove)

        for band in range(0, 10):
            for time in range(0, x.shape[0]):
                x_i = x[time, :, :, band]
                x_i[np.argwhere(np.isnan(x_i))] = np.mean(x_i)
                x[time, :, :, band] = x_i

        # Interpolate linearly to 5 day frequency
        tiles, max_distance = calculate_and_save_best_images(x, s2_dates)
        sm = Smoother(lmbd = 150, size = tiles.shape[0],
                      nbands = 10, dimx = tiles.shape[1], dimy = tiles.shape[2])
        x = sm.interpolate_array(tiles)
        x = superresolve_tile(x)
        tiles = concatenate_dem(x, dem)
        dates = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]
        dates = np.array(dates) + 15
        closest_date = []
        for date in dates:
            date_diff = s2_dates[np.argmin(abs(s2_dates - date))]
            closest_date.append(date_diff)

        closest_date = np.array(closest_date)
        closest_date = closest_date[:, np.newaxis, np.newaxis, np.newaxis]
        closest_date = np.broadcast_to(closest_date, (12, 28, 28, 1))
        closest_date = (closest_date + 45)  / 411

        #tiles = np.concatenate([tiles, closest_date], axis = -1)
        #tiles = np.median(tiles, axis = 0)
        #tiles = tiles[7:-7, 7:-7, :]
        print(tiles.shape)

        if np.sum(np.isnan(tiles)) == 0:
            print(f"There are {np.sum(np.isnan(tiles))} NA values")
            if max_distance <= 340 and n_images >= 3:
                tiles = to_int16(tiles)
                #np.save(f"../data/{path}-s2-new/{plot_id}", tiles)
                tile_path = f"{path}-s2/{plot_id}"
                tile_path = tile_path + ".hkl"
                hkl.dump(tiles, tile_path, mode='w', compression='gzip')
                print(f"Saved {tiles.shape} shape, {n_images} img,"
                      f" to {tile_path} \n")
            else:
                print(f"Skipping {plot_id} because {max_distance} distance, and {n_images} img \n")

        return tiles

# Function execution
## 1. Download DEM and Slope

In [16]:
def make_plot_ids(csv_path):
    df = pd.read_csv(csv_path)
    df.columns = [x.upper() for x in df.columns]
    print(df['PLOTID'][0])
    #if abs(df['PLOTID'][0]) == 0:
    print(df['PLOTID'][0])
    print(f"No unique ID for {csv_path}")
    for index, row in df.iterrows():
        row['PLOTID'] = abs(row['PLOTID'])
        df['PLOTID'][index] = int(str(csv_path[-6:-4]) + '00' + str(row['PLOTID']))
    df.to_csv(csv_path, index = False)
    return df
            
df = make_plot_ids('train-csv/ceo-plantations-train-v17.csv')

0
0
No unique ID for train-csv/ceo-plantations-train-v17.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [18]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

for i in (os.listdir("train-csv/")):
    if ".csv" in i:
        tile = download_new_dem("train-csv/" + i,
                                "train-dem/",
                                image_format = MimeType.TIFF)     


Index(['INDEX', 'PLOT_ID', 'SAMPLE_ID', 'LON', 'LAT', 'EMAIL', 'FLAGGED',
       'COLLECTION_TIME', 'ANALYSIS_DURATION', 'IMAGERY_TITLE',
       'IMAGERY_ATTRIBUTIONS', 'SAMPLE_GEOM', 'PLANTATION', 'PLOT_FNAME'],
      dtype='object')
Index(['PLOT_ID', 'LON', 'LAT', 'PLANTATION', 'PLOT_FNAME'], dtype='object')
Starting download of 0 plots from train-csv/ceo-plantations-train-v04.csv to train-dem/
Index(['INDEX', 'PLOT_ID', 'SAMPLE_ID', 'LON', 'LAT', 'EMAIL', 'FLAGGED',
       'COLLECTION_TIME', 'ANALYSIS_DURATION', 'IMAGERY_TITLE',
       'IMAGERY_ATTRIBUTIONS', 'SAMPLE_GEOM', 'PL_CID', 'PL_OID_',
       'PL_MERGE_SRC', 'PLANTATION', 'PLOT_FNAME'],
      dtype='object')
Index(['PLOT_ID', 'LON', 'LAT', 'PLANTATION', 'PLOT_FNAME'], dtype='object')
Starting download of 0 plots from train-csv/ceo-plantations-train-v10.csv to train-dem/
Index(['INDEX', 'PLOT_ID', 'SAMPLE_ID', 'LON', 'LAT', 'EMAIL', 'FLAGGED',
       'COLLECTION_TIME', 'ANALYSIS_DURATION', 'IMAGERY_TITLE',
       'IMAGERY_AT

Index(['PLOT_ID', 'LON', 'LAT', 'PLOT_FNAME'], dtype='object')
Starting download of 0 plots from train-csv/ceo-plantations-train-v14.csv to train-dem/
Index(['INDEX', 'PLOT_ID', 'SAMPLE_ID', 'LON', 'LAT', 'EMAIL', 'FLAGGED',
       'COLLECTION_TIME', 'ANALYSIS_DURATION', 'IMAGERY_TITLE',
       'IMAGERY_ATTRIBUTIONS', 'SAMPLE_GEOM', 'PL_GROW_SOURC', 'PL_SPECIES_SI',
       'PL_SHAPE_LENG', 'PL_METHOD', 'PL_ISO', 'PL_SPECIES', 'PL_SOURCE',
       'PL_COMMON_NAM', 'PL_SIZE', 'PL_PLANT_AG', 'PL_CONIFER_BR',
       'PL_TIMBER_AG', 'PL_SD_ERROR', 'PL_ORG_CODE', 'PL_FINAL_CODE',
       'PL_SHAPE_AREA', 'PL_OWNERSHIP', 'PL_EVER_DEC', 'PL_FINAL_ID',
       'PL_CREATION_Y', 'PL_OBJECTID', 'PL_GROWTH', 'PL_HARD_SOFT',
       'PL_COUNTRY', 'PL_ORG_NAME', 'PLANTATION', 'PLOT_FNAME'],
      dtype='object')
Index(['PLOT_ID', 'LON', 'LAT', 'PLANTATION', 'PLOT_FNAME'], dtype='object')
Starting download of 0 plots from train-csv/ceo-plantations-train-v00.csv to train-dem/
Index(['INDEX', 'PLOT_ID', 'SA

## 2. Download Raw data files

In [19]:
for i in (os.listdir("train-csv/")):
    if "17" in i:
        download_raw_data("train-csv/" + i,
                          "train-raw/", 
                          fmt = 'train',
                          image_format = MimeType.TIFF)

Index(['PLOT_ID', 'LON', 'LAT', 'PLANTATION', 'PLOT_FNAME'], dtype='object')
[17000, 17001, 17002, 17003, 17004, 17005, 17006, 17007, 17008, 17009, 170010, 170011, 170012, 170013, 170014, 170015, 170016, 170017, 170018, 170019, 170020, 170021, 170022, 170023, 170024, 170025, 170026, 170027, 170028, 170029, 170030, 170031, 170032, 170033, 170034, 170035, 170036, 170037, 170038, 170039, 170040, 170041, 170042, 170043, 170044, 170045, 170046, 170047, 170048, 170049, 170050, 170051, 170052, 170053, 170054, 170055, 170056, 170057, 170058, 170059, 170060, 170061, 170062, 170063, 170064, 170065, 170066, 170067, 170068, 170069, 170070, 170071, 170072, 170073, 170074, 170075, 170076, 170077, 170078, 170079, 170080, 170081, 170082, 170083, 170084, 170085, 170086, 170087, 170088, 170089, 170090, 170091, 170092, 170093, 170094, 170095, 170096, 170097, 170098, 170099, 1700100, 1700101, 1700102, 1700103, 1700104, 1700105, 1700106, 1700107, 1700108, 1700109, 1700110, 1700111, 1700112, 1700113, 170011



The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
The multiplier is 0.005688888888888889 and the clean is 1024 for 7
Saving: train-raw/1700149


Downloading 2/150, 1700148
Shadows ((19, 96, 96)) used 0.0 processing units
1, Dates: [[  0]
 [ 15]
 [ 20]
 [ 30]
 [379]], Probs: [array([0.06]), array([0.08]), array([0.1]), array([0.06]), array([0.04])]
2, Dates: [[ 35]
 [ 40]
 [399]
 [409]], Probs: [array([0.05]), array([0.14]), array([0.14]), array([0.07])]
3, Dates: [[74]], Probs: [array([0.04])]
4, Dates: [[ 99]
 [104]
 [119]], Probs: [array([0.01]), array([0.11]), array([0.1])]
5, Dates: [[149]], Probs: [array([0.01])]
6, Dates: [], Probs: [

The original max value is 33646
Original 20 meter bands size: (7, 16, 16, 6), using 0.013671875 PU
The original L2A image size is: (7, 32, 32, 4)
The original max value is 32794
Shadows (7, 96, 96), clouds (7, 96, 96), S2, (7, 32, 32, 10), S2d, (7,)
(7, 32, 32, 10)
(7, 32, 32, 10) (7, 32, 32) (7, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
Saving: train-raw/1700144


Downloading 7/150, 1700143
Shadows ((15, 96, 96)) used 0.0 processing units
1, Dates: [[  0]
 [ 15]
 [ 20]
 [369]], Probs: [array([0.05]), array([0.13]), array([0.11]), array([0.07])]
2, Dates: [[ 

1, Dates: [[ 12]
 [ 27]
 [376]], Probs: [array([0.06]), array([0.06]), array([0.09])]
2, Dates: [[ 32]
 [ 37]
 [ 57]
 [401]], Probs: [array([0.04]), array([0.05]), array([0.1]), array([0.11])]
3, Dates: [[66]
 [71]], Probs: [array([0.04]), array([0.06])]
4, Dates: [], Probs: []
5, Dates: [[144]], Probs: [array([0.15])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [[314]], Probs: [array([0.12])]
12, Dates: [], Probs: []
1, Dates: [[ 27]
 [376]], Probs: [array([0.06]), array([0.09])]
2, Dates: [[ 37]
 [401]], Probs: [array([0.05]), array([0.11])]
3, Dates: [[71]], Probs: [array([0.06])]
4, Dates: [], Probs: []
5, Dates: [[144]], Probs: [array([0.15])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [[314]], Probs: [array([0.12])]
12, Dates: [], Probs: []
The original max value is 33292
Original 20 meter bands size: (7, 

The original max value is 29727
Original 20 meter bands size: (9, 16, 16, 6), using 0.017578125 PU
The original L2A image size is: (9, 32, 32, 4)
The original max value is 31699
Shadows (9, 96, 96), clouds (9, 96, 96), S2, (9, 32, 32, 10), S2d, (9,)
(9, 32, 32, 10)
(9, 32, 32, 10) (9, 32, 32) (9, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
The multiplier is 0.005688888888888889 and the clean is 1024 for 7
The multiplier is 0.005688888888888889 and the clean is 1024 for 8
Saving: train-raw/1700135


Downloading 16/150, 1700134
Shadows ((12, 96, 96)) used 0.0 pro

Shadows ((13, 96, 96)) used 0.0 processing units
1, Dates: [[ 20]
 [ 30]
 [369]], Probs: [array([0.12]), array([0.06]), array([0.12])]
2, Dates: [[35]
 [40]], Probs: [array([0.05]), array([0.11])]
3, Dates: [[60]
 [74]], Probs: [array([0.07]), array([0.02])]
4, Dates: [[99]], Probs: [array([0.01])]
5, Dates: [[144]
 [149]], Probs: [array([0.11]), array([0.03])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [], Probs: []
12, Dates: [], Probs: []
1, Dates: [[ 20]
 [369]], Probs: [array([0.12]), array([0.12])]
2, Dates: [[35]], Probs: [array([0.05])]
3, Dates: [[74]], Probs: [array([0.02])]
4, Dates: [[99]], Probs: [array([0.01])]
5, Dates: [[144]
 [149]], Probs: [array([0.11]), array([0.03])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [], Probs: []
12, Dates: [], Probs: []
The original max value is 31345
Original 20

The original L2A image size is: (8, 32, 32, 4)
The original max value is 30965
Shadows (8, 96, 96), clouds (8, 96, 96), S2, (8, 32, 32, 10), S2d, (8,)
(8, 32, 32, 10)
(8, 32, 32, 10) (8, 32, 32) (8, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
The multiplier is 0.005688888888888889 and the clean is 1024 for 7
Saving: train-raw/1700126


Downloading 25/150, 1700125
Shadows ((15, 96, 96)) used 0.0 processing units
1, Dates: [[ 15]
 [ 30]
 [369]], Probs: [array([0.06]), array([0.07]), array([0.13])]
2, Dates: [[35]
 [40]], Probs: [array([0.04]), array([0.11])]
3, D

Shadows ((27, 96, 96)) used 0.0 processing units
1, Dates: [[  0]
 [ 15]
 [ 30]
 [389]], Probs: [array([0.08]), array([0.06]), array([0.11]), array([0.1])]
2, Dates: [[ 35]
 [404]
 [409]], Probs: [array([0.13]), array([0.13]), array([0.09])]
3, Dates: [[69]
 [74]], Probs: [array([0.11]), array([0.04])]
4, Dates: [[119]], Probs: [array([0.05])]
5, Dates: [[134]], Probs: [array([0.09])]
6, Dates: [[159]], Probs: [array([0.06])]
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [[-40]
 [314]
 [319]], Probs: [array([0.06]), array([0.05]), array([0.1])]
12, Dates: [[-25]
 [-10]
 [349]
 [359]], Probs: [array([0.07]), array([0.11]), array([0.07]), array([0.07])]
1, Dates: [[15]], Probs: [array([0.06])]
2, Dates: [[35]], Probs: [array([0.13])]
3, Dates: [[74]], Probs: [array([0.04])]
4, Dates: [[119]], Probs: [array([0.05])]
5, Dates: [[134]], Probs: [array([0.09])]
6, Dates: [[159]], Probs: [array([0.06])]
7, Dates: [], Probs: []
8, Da



Shadows ((17, 96, 96)) used 0.0 processing units
1, Dates: [[ 15]
 [ 25]
 [ 30]
 [376]
 [386]], Probs: [array([0.07]), array([0.07]), array([0.05]), array([0.12]), array([0.13])]
2, Dates: [[ 35]
 [ 40]
 [396]], Probs: [array([0.04]), array([0.1]), array([0.13])]
3, Dates: [[74]], Probs: [array([0.03])]
4, Dates: [[ 99]
 [119]], Probs: [array([0.07]), array([0.05])]
5, Dates: [], Probs: []
6, Dates: [[159]], Probs: [array([0.06])]
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [], Probs: []
12, Dates: [[361]], Probs: [array([0.08])]
1, Dates: [[ 15]
 [376]], Probs: [array([0.07]), array([0.12])]
2, Dates: [[35]], Probs: [array([0.04])]
3, Dates: [[74]], Probs: [array([0.03])]
4, Dates: [[119]], Probs: [array([0.05])]
5, Dates: [], Probs: []
6, Dates: [[159]], Probs: [array([0.06])]
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [], Probs: []
12, Dates: [[361]], Prob

The original max value is 32709
Original 20 meter bands size: (8, 16, 16, 6), using 0.015625 PU
The original L2A image size is: (8, 32, 32, 4)
The original max value is 33803
Shadows (8, 96, 96), clouds (8, 96, 96), S2, (8, 32, 32, 10), S2d, (8,)
(8, 32, 32, 10)
(8, 32, 32, 10) (8, 32, 32) (8, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
The multiplier is 0.005688888888888889 and the clean is 1024 for 7
Saving: train-raw/1700115


Downloading 36/150, 1700114
Shadows ((17, 96, 96)) used 0.0 processing units
1, Dates: [[ 2]
 [17]], Probs: [array([0.08]), array([0.

The original max value is 32394
Original 20 meter bands size: (9, 16, 16, 6), using 0.017578125 PU
The original L2A image size is: (9, 32, 32, 4)
The original max value is 30690
Shadows (9, 96, 96), clouds (9, 96, 96), S2, (9, 32, 32, 10), S2d, (9,)
(9, 32, 32, 10)
(9, 32, 32, 10) (9, 32, 32) (9, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
The multiplier is 0.005688888888888889 and the clean is 1024 for 7
The multiplier is 0.005688888888888889 and the clean is 1024 for 8
Saving: train-raw/1700110


Downloading 41/150, 1700109
Shadows ((15, 96, 96)) used 0.0 pro

The original max value is 36208
Original 20 meter bands size: (7, 16, 16, 6), using 0.013671875 PU
The original L2A image size is: (7, 32, 32, 4)
The original max value is 35120
Shadows (7, 96, 96), clouds (7, 96, 96), S2, (7, 32, 32, 10), S2d, (7,)
(7, 32, 32, 10)
(7, 32, 32, 10) (7, 32, 32) (7, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
Saving: train-raw/1700105


Downloading 46/150, 1700104
Shadows ((8, 96, 96)) used 0.0 processing units
1, Dates: [[12]], Probs: [array([0.08])]
2, Dates: [[32]], Probs: [array([0.04])]
3, Dates: [[74]], Probs: [array([0.13])

The original max value is 33239
Original 20 meter bands size: (1, 16, 16, 6), using 0.001953125 PU


CRITICAL:root:cannot do a non-empty take from an empty axes.
Traceback (most recent call last):
  File "<ipython-input-11-872f99f4fd4d>", line 83, in download_raw_data
    x, interp, _ = remove_cloud_and_shadows(s2, cloud_probs, shadows, s2_dates, pfcps)
  File "/Users/jbrandt.terminal/Documents/GitHub/sentinel-tree-cover/src/preprocessing/cloud_removal.py", line 430, in remove_cloud_and_shadows
    pfcps)
  File "/Users/jbrandt.terminal/Documents/GitHub/sentinel-tree-cover/src/preprocessing/cloud_removal.py", line 286, in calculate_clouds_in_mosaic
    reference_blue = np.percentile(mosaic[..., 0][~only_1_img], 99)
  File "<__array_function__ internals>", line 6, in percentile
  File "/Users/jbrandt.terminal/opt/anaconda3/envs/tf/lib/python3.7/site-packages/numpy/lib/function_base.py", line 3697, in percentile
    a, q, axis, out, overwrite_input, interpolation, keepdims)
  File "/Users/jbrandt.terminal/opt/anaconda3/envs/tf/lib/python3.7/site-packages/numpy/lib/function_base.py", lin

The original L2A image size is: (1, 32, 32, 4)
The original max value is 32020
Removing [12, 17, 32] from clouds because not in S2
Shadows (1, 96, 96), clouds (1, 96, 96), S2, (1, 32, 32, 10), S2d, (1,)
(1, 32, 32, 10)
(1, 32, 32, 10) (1, 32, 32) (1, 32, 32)
cannot do a non-empty take from an empty axes.
Downloading 51/150, 170099
Shadows ((12, 96, 96)) used 0.0 processing units
1, Dates: [[ 20]
 [374]], Probs: [array([0.12]), array([0.15])]
2, Dates: [[35]], Probs: [array([0.05])]
3, Dates: [[74]], Probs: [array([0.02])]
4, Dates: [[119]], Probs: [array([0.06])]
5, Dates: [[144]], Probs: [array([0.06])]
6, Dates: [[164]], Probs: [array([0.1])]
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [[309]], Probs: [array([0.2])]
12, Dates: [[-30]], Probs: [array([0.15])]
1, Dates: [[ 20]
 [374]], Probs: [array([0.12]), array([0.15])]
2, Dates: [[35]], Probs: [array([0.05])]
3, Dates: [[74]], Probs: [array([0.02])]
4, Dates: [[119]], 



Shadows ((11, 96, 96)) used 0.0 processing units
1, Dates: [[17]
 [27]], Probs: [array([0.13]), array([0.13])]
2, Dates: [[32]
 [37]], Probs: [array([0.05]), array([0.11])]
3, Dates: [], Probs: []
4, Dates: [], Probs: []
5, Dates: [], Probs: []
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [], Probs: []
12, Dates: [[-23]
 [359]], Probs: [array([0.09]), array([0.12])]
1, Dates: [[17]], Probs: [array([0.13])]
2, Dates: [[32]], Probs: [array([0.05])]
3, Dates: [], Probs: []
4, Dates: [], Probs: []
5, Dates: [], Probs: []
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [], Probs: []
12, Dates: [[359]], Probs: [array([0.12])]
The original max value is 28960
Original 20 meter bands size: (5, 16, 16, 6), using 0.009765625 PU
The original L2A image size is: (5, 32, 32, 4)
The original max value is 28626
Shadows (5, 96, 96), cl

The original max value is 34131
Original 20 meter bands size: (6, 16, 16, 6), using 0.01171875 PU
The original L2A image size is: (6, 32, 32, 4)
The original max value is 32951
Shadows (6, 96, 96), clouds (6, 96, 96), S2, (6, 32, 32, 10), S2d, (6,)
(6, 32, 32, 10)
(6, 32, 32, 10) (6, 32, 32) (6, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
Saving: train-raw/170089


Downloading 62/150, 170088
Shadows ((13, 96, 96)) used 0.0 processing units
1, Dates: [[  5]
 [389]], Probs: [array([0.22]), array([0.25])]
2, Dates: [[35]], Probs: [array([0.06])]
3, Dates: [], Probs: []
4, Dates: [[119]], Probs: [array([0.08])]
5, Dates: [[134]], 

The original max value is 27616
Original 20 meter bands size: (7, 16, 16, 6), using 0.013671875 PU
The original L2A image size is: (7, 32, 32, 4)
The original max value is 28180
Shadows (7, 96, 96), clouds (7, 96, 96), S2, (7, 32, 32, 10), S2d, (7,)
(7, 32, 32, 10)
(7, 32, 32, 10) (7, 32, 32) (7, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
Saving: train-raw/170084


Downloading 67/150, 170083
Shadows ((41, 96, 96)) used 0.0 processing units
1, Dates: [[  2]
 [ 17]
 [371]
 [376]
 [381]
 [386]], Probs: [array([0.04]), array([0.05]), array([0.11]), array([0.04]), 

The original max value is 37787
Original 20 meter bands size: (9, 16, 16, 6), using 0.017578125 PU
The original L2A image size is: (9, 32, 32, 4)
The original max value is 35389
Shadows (9, 96, 96), clouds (9, 96, 96), S2, (9, 32, 32, 10), S2d, (9,)
(9, 32, 32, 10)
(9, 32, 32, 10) (9, 32, 32) (9, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
The multiplier is 0.005688888888888889 and the clean is 1024 for 7
The multiplier is 0.005688888888888889 and the clean is 1024 for 8
Saving: train-raw/170080


Downloading 71/150, 170079
Shadows ((16, 96, 96)) used 0.0 proce

CRITICAL:root:cannot do a non-empty take from an empty axes.
Traceback (most recent call last):
  File "<ipython-input-11-872f99f4fd4d>", line 83, in download_raw_data
    x, interp, _ = remove_cloud_and_shadows(s2, cloud_probs, shadows, s2_dates, pfcps)
  File "/Users/jbrandt.terminal/Documents/GitHub/sentinel-tree-cover/src/preprocessing/cloud_removal.py", line 430, in remove_cloud_and_shadows
    pfcps)
  File "/Users/jbrandt.terminal/Documents/GitHub/sentinel-tree-cover/src/preprocessing/cloud_removal.py", line 286, in calculate_clouds_in_mosaic
    reference_blue = np.percentile(mosaic[..., 0][~only_1_img], 99)
  File "<__array_function__ internals>", line 6, in percentile
  File "/Users/jbrandt.terminal/opt/anaconda3/envs/tf/lib/python3.7/site-packages/numpy/lib/function_base.py", line 3697, in percentile
    a, q, axis, out, overwrite_input, interpolation, keepdims)
  File "/Users/jbrandt.terminal/opt/anaconda3/envs/tf/lib/python3.7/site-packages/numpy/lib/function_base.py", lin

The original L2A image size is: (8, 32, 32, 4)
The original max value is 33370
Shadows (8, 96, 96), clouds (8, 96, 96), S2, (8, 32, 32, 10), S2d, (8,)
(8, 32, 32, 10)
(8, 32, 32, 10) (8, 32, 32) (8, 32, 32)
cannot do a non-empty take from an empty axes.
Downloading 73/150, 170077
Shadows ((11, 96, 96)) used 0.0 processing units
1, Dates: [[ 2]
 [17]], Probs: [array([0.08]), array([0.12])]
2, Dates: [], Probs: []
3, Dates: [], Probs: []
4, Dates: [], Probs: []
5, Dates: [[136]], Probs: [array([0.04])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [[216]], Probs: [array([0.03])]
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [[-38]
 [316]
 [321]], Probs: [array([0.13]), array([0.1]), array([0.13])]
12, Dates: [[ -8]
 [351]], Probs: [array([0.07]), array([0.06])]
1, Dates: [[ 2]
 [17]], Probs: [array([0.08]), array([0.12])]
2, Dates: [], Probs: []
3, Dates: [], Probs: []
4, Dates: [], Probs: []
5, Dates: [[136]], Probs: [array([0.04])]
6, Dates: [], Probs: []
7, D

The original max value is 35153
Original 20 meter bands size: (8, 16, 16, 6), using 0.015625 PU
The original L2A image size is: (8, 32, 32, 4)
The original max value is 33646
Shadows (8, 96, 96), clouds (8, 96, 96), S2, (8, 32, 32, 10), S2d, (8,)
(8, 32, 32, 10)
(8, 32, 32, 10) (8, 32, 32) (8, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
The multiplier is 0.005688888888888889 and the clean is 1024 for 7
Saving: train-raw/170073


Downloading 78/150, 170072
Shadows ((18, 96, 96)) used 0.0 processing units
1, Dates: [[ 2]
 [12]
 [17]], Probs: [array([0.06]), array

Shadows ((21, 96, 96)) used 0.0 processing units
1, Dates: [[  2]
 [ 12]
 [ 17]
 [381]], Probs: [array([0.06]), array([0.1]), array([0.05]), array([0.05])]
2, Dates: [[47]], Probs: [array([0.15])]
3, Dates: [], Probs: []
4, Dates: [], Probs: []
5, Dates: [[126]], Probs: [array([0.21])]
6, Dates: [[161]], Probs: [array([0.06])]
7, Dates: [], Probs: []
8, Dates: [[216]
 [221]], Probs: [array([0.]), array([0.12])]
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [[-43]
 [-38]
 [321]
 [326]], Probs: [array([0.07]), array([0.12]), array([0.08]), array([0.06])]
12, Dates: [[-23]
 [-13]
 [ -8]
 [ -3]], Probs: [array([0.09]), array([0.12]), array([0.06]), array([0.06])]
1, Dates: [[ 17]
 [381]], Probs: [array([0.05]), array([0.05])]
2, Dates: [[47]], Probs: [array([0.15])]
3, Dates: [], Probs: []
4, Dates: [], Probs: []
5, Dates: [], Probs: []
6, Dates: [[161]], Probs: [array([0.06])]
7, Dates: [], Probs: []
8, Dates: [[216]], Probs: [array([0.])]
9, Dates: [], Probs: []
10, Dates: 

The original max value is 28724
Original 20 meter bands size: (8, 16, 16, 6), using 0.015625 PU
The original L2A image size is: (8, 32, 32, 4)
The original max value is 29222
Shadows (8, 96, 96), clouds (8, 96, 96), S2, (8, 32, 32, 10), S2d, (8,)
(8, 32, 32, 10)
(8, 32, 32, 10) (8, 32, 32) (8, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
The multiplier is 0.005688888888888889 and the clean is 1024 for 7
Saving: train-raw/170064


Downloading 87/150, 170063
Shadows ((17, 96, 96)) used 0.0 processing units
1, Dates: [[ 2]
 [12]
 [17]], Probs: [array([0.07]), array

Shadows ((18, 96, 96)) used 0.0 processing units
1, Dates: [[ 15]
 [ 30]
 [379]], Probs: [array([0.07]), array([0.06]), array([0.15])]
2, Dates: [[35]
 [40]], Probs: [array([0.04]), array([0.12])]
3, Dates: [[60]
 [74]], Probs: [array([0.11]), array([0.02])]
4, Dates: [[ 99]
 [104]], Probs: [array([0.02]), array([0.08])]
5, Dates: [[144]
 [149]], Probs: [array([0.06]), array([0.01])]
6, Dates: [[159]], Probs: [array([0.1])]
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [[-45]], Probs: [array([0.04])]
12, Dates: [[344]], Probs: [array([0.14])]
1, Dates: [[ 30]
 [379]], Probs: [array([0.06]), array([0.15])]
2, Dates: [[35]], Probs: [array([0.04])]
3, Dates: [[74]], Probs: [array([0.02])]
4, Dates: [[104]], Probs: [array([0.08])]
5, Dates: [[149]], Probs: [array([0.01])]
6, Dates: [[159]], Probs: [array([0.1])]
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [], Probs:

The original max value is 26155
Original 20 meter bands size: (7, 16, 16, 6), using 0.013671875 PU
The original L2A image size is: (7, 32, 32, 4)
The original max value is 25729
Shadows (7, 96, 96), clouds (7, 96, 96), S2, (7, 32, 32, 10), S2d, (7,)
(7, 32, 32, 10)
(7, 32, 32, 10) (7, 32, 32) (7, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
Saving: train-raw/170055


Downloading 96/150, 170054
Shadows ((15, 96, 96)) used 0.0 processing units
1, Dates: [[  0]
 [369]], Probs: [array([0.1]), array([0.12])]
2, Dates: [[ 35]
 [404]], Probs: [array([0.04]), array([0.1

The original max value is 31824
Original 20 meter bands size: (6, 16, 16, 6), using 0.01171875 PU
The original L2A image size is: (6, 32, 32, 4)
The original max value is 31116
Shadows (6, 96, 96), clouds (6, 96, 96), S2, (6, 32, 32, 10), S2d, (6,)
(6, 32, 32, 10)
(6, 32, 32, 10) (6, 32, 32) (6, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
Saving: train-raw/170050


Downloading 101/150, 170049
Shadows ((14, 96, 96)) used 0.0 processing units
1, Dates: [[  0]
 [ 15]
 [ 30]
 [379]], Probs: [array([0.06]), array([0.08]), array([0.14]), array([0.09])]
2, Dates: [[35]], Probs: [array([0.04])]
3, Dates: [[74]
 [79]], Probs: [array([0

The original max value is 34098
Original 20 meter bands size: (7, 16, 16, 6), using 0.013671875 PU
The original L2A image size is: (7, 32, 32, 4)
The original max value is 33508
Shadows (7, 96, 96), clouds (7, 96, 96), S2, (7, 32, 32, 10), S2d, (7,)
(7, 32, 32, 10)
(7, 32, 32, 10) (7, 32, 32) (7, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
Saving: train-raw/170045


Downloading 106/150, 170044
Shadows ((17, 96, 96)) used 0.0 processing units
1, Dates: [[  0]
 [ 15]
 [ 20]
 [ 30]
 [369]], Probs: [array([0.1]), array([0.14]), array([0.09]), array([0.11]), array([

Shadows ((18, 96, 96)) used 0.0 processing units
1, Dates: [[  0]
 [ 30]
 [369]
 [379]], Probs: [array([0.06]), array([0.13]), array([0.15]), array([0.11])]
2, Dates: [[35]], Probs: [array([0.04])]
3, Dates: [[69]
 [74]
 [79]], Probs: [array([0.05]), array([0.03]), array([0.12])]
4, Dates: [[ 99]
 [104]], Probs: [array([0.03]), array([0.14])]
5, Dates: [[134]], Probs: [array([0.05])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [[319]], Probs: [array([0.2])]
12, Dates: [[-5]], Probs: [array([0.2])]
1, Dates: [[  0]
 [369]
 [379]], Probs: [array([0.06]), array([0.15]), array([0.11])]
2, Dates: [[35]], Probs: [array([0.04])]
3, Dates: [[74]], Probs: [array([0.03])]
4, Dates: [[99]], Probs: [array([0.03])]
5, Dates: [[134]], Probs: [array([0.05])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [], Probs: []
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [], Probs: []
12, Dates: [

Shadows ((8, 96, 96)) used 0.0 processing units
1, Dates: [[12]], Probs: [array([0.18])]
2, Dates: [[411]], Probs: [array([0.16])]
3, Dates: [], Probs: []
4, Dates: [[111]], Probs: [array([0.04])]
5, Dates: [[146]], Probs: [array([0.09])]
6, Dates: [], Probs: []
7, Dates: [[206]], Probs: [array([0.2])]
8, Dates: [[216]], Probs: [array([0.14])]
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [[-33]], Probs: [array([0.11])]
12, Dates: [[-3]], Probs: [array([0.06])]
1, Dates: [[12]], Probs: [array([0.18])]
2, Dates: [], Probs: []
3, Dates: [], Probs: []
4, Dates: [[111]], Probs: [array([0.04])]
5, Dates: [[146]], Probs: [array([0.09])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [[216]], Probs: [array([0.14])]
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [], Probs: []
12, Dates: [[-3]], Probs: [array([0.06])]
The original max value is 30441
Original 20 meter bands size: (5, 16, 16, 6), using 0.009765625 PU
The original L2A image size is: (5, 32, 32

The original max value is 31345
Original 20 meter bands size: (5, 16, 16, 6), using 0.009765625 PU
The original L2A image size is: (5, 32, 32, 4)
The original max value is 31332
Shadows (5, 96, 96), clouds (5, 96, 96), S2, (5, 32, 32, 10), S2d, (5,)
(5, 32, 32, 10)
(5, 32, 32, 10) (5, 32, 32) (5, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
Saving: train-raw/170030


Downloading 121/150, 170029
Shadows ((8, 96, 96)) used 0.0 processing units
1, Dates: [[12]], Probs: [array([0.14])]
2, Dates: [[396]], Probs: [array([0.25])]
3, Dates: [], Probs: []
4, Dates: [[111]], Probs: [array([0.04])]
5, Dates: [[121]], Probs: [array([0.03])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [[216]]

Shadows ((10, 96, 96)) used 0.0 processing units
1, Dates: [[12]
 [17]], Probs: [array([0.1]), array([0.14])]
2, Dates: [], Probs: []
3, Dates: [[86]], Probs: [array([0.25])]
4, Dates: [[111]], Probs: [array([0.05])]
5, Dates: [[121]], Probs: [array([0.03])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [[216]], Probs: [array([0.08])]
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [[-43]], Probs: [array([0.14])]
12, Dates: [[-23]
 [341]], Probs: [array([0.21]), array([0.15])]
1, Dates: [[12]
 [17]], Probs: [array([0.1]), array([0.14])]
2, Dates: [], Probs: []
3, Dates: [], Probs: []
4, Dates: [[111]], Probs: [array([0.05])]
5, Dates: [[121]], Probs: [array([0.03])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [[216]], Probs: [array([0.08])]
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [], Probs: []
12, Dates: [[341]], Probs: [array([0.15])]
The original max value is 29858
Original 20 meter bands size: (6, 16, 16, 6), using 0.01171875

Shadows ((10, 96, 96)) used 0.0 processing units
1, Dates: [[ 2]
 [12]], Probs: [array([0.05]), array([0.1])]
2, Dates: [[406]], Probs: [array([0.08])]
3, Dates: [], Probs: []
4, Dates: [[111]], Probs: [array([0.09])]
5, Dates: [[121]], Probs: [array([0.02])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [[216]], Probs: [array([0.])]
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [[316]], Probs: [array([0.14])]
12, Dates: [[336]], Probs: [array([0.11])]
1, Dates: [[ 2]
 [12]], Probs: [array([0.05]), array([0.1])]
2, Dates: [], Probs: []
3, Dates: [], Probs: []
4, Dates: [[111]], Probs: [array([0.09])]
5, Dates: [[121]], Probs: [array([0.02])]
6, Dates: [], Probs: []
7, Dates: [], Probs: []
8, Dates: [[216]], Probs: [array([0.])]
9, Dates: [], Probs: []
10, Dates: [], Probs: []
11, Dates: [[316]], Probs: [array([0.14])]
12, Dates: [[336]], Probs: [array([0.11])]
The original max value is 29923
Original 20 meter bands size: (8, 16, 16, 6), using 0.015625 PU
The o

The original max value is 34871
Original 20 meter bands size: (6, 16, 16, 6), using 0.01171875 PU
The original L2A image size is: (6, 32, 32, 4)
The original max value is 32191
Shadows (6, 96, 96), clouds (6, 96, 96), S2, (6, 32, 32, 10), S2d, (6,)
(6, 32, 32, 10)
(6, 32, 32, 10) (6, 32, 32) (6, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
Saving: train-raw/170015


Downloading 136/150, 170014
Shadows ((6, 96, 96)) used 0.0 processing units
1, Dates: [[12]], Probs: [array([0.08])]
2, Dates: [[57]], Probs: [array([0.18])]
3, Dates: [[71]], Probs: [array([0.13])]
4, Dates: [], Probs: []
5, Dates: [[121]], Probs: [array([0.01])]
6

The original max value is 27872
Original 20 meter bands size: (7, 16, 16, 6), using 0.013671875 PU
The original L2A image size is: (7, 32, 32, 4)
The original max value is 27505
Shadows (7, 96, 96), clouds (7, 96, 96), S2, (7, 32, 32, 10), S2d, (7,)
(7, 32, 32, 10)
(7, 32, 32, 10) (7, 32, 32) (7, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
Saving: train-raw/170010


Downloading 141/150, 17009
Shadows ((14, 96, 96)) used 0.0 processing units
1, Dates: [[12]], Probs: [array([0.08])]
2, Dates: [], Probs: []
3, Dates: [[81]], Probs: [array([0.1])]
4, Dates: [[111]]

The original max value is 33246
Original 20 meter bands size: (7, 16, 16, 6), using 0.013671875 PU
The original L2A image size is: (7, 32, 32, 4)
The original max value is 31254
Shadows (7, 96, 96), clouds (7, 96, 96), S2, (7, 32, 32, 10), S2d, (7,)
(7, 32, 32, 10)
(7, 32, 32, 10) (7, 32, 32) (7, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
The multiplier is 0.005688888888888889 and the clean is 1024 for 6
Saving: train-raw/17005


Downloading 146/150, 17004
Shadows ((10, 96, 96)) used 0.0 processing units
1, Dates: [[12]], Probs: [array([0.08])]
2, Dates: [], Probs: []
3, Dates: [], Probs: []
4, Dates: [[101]
 [111]], Probs: [

The original max value is 30428
Original 20 meter bands size: (6, 16, 16, 6), using 0.01171875 PU
The original L2A image size is: (6, 32, 32, 4)
The original max value is 28986
Shadows (6, 96, 96), clouds (6, 96, 96), S2, (6, 32, 32, 10), S2d, (6,)
(6, 32, 32, 10)
(6, 32, 32, 10) (6, 32, 32) (6, 32, 32)
The multiplier is 0.005688888888888889 and the clean is 1024 for 0
The multiplier is 0.005688888888888889 and the clean is 1024 for 1
The multiplier is 0.005688888888888889 and the clean is 1024 for 2
The multiplier is 0.005688888888888889 and the clean is 1024 for 3
The multiplier is 0.005688888888888889 and the clean is 1024 for 4
The multiplier is 0.005688888888888889 and the clean is 1024 for 5
Saving: train-raw/17000




## 3. Process train / test data

In [21]:
i = 0
plots = [str(x[:-4]) for x in os.listdir("train-raw/") if ".npy" in x]
for plot in plots:
    i += 1
    if not os.path.exists("train-s2/" + plot + ".hkl"):
        try:
            tiles = process_raw(plot, path = 'train')
            print(i, plot)
        except Exception as e:
            print(e)
            continue

(2, 32, 32, 10)
(12, 28, 28, 11)
There are 0 NA values
Skipping 06013 because 45 distance, and 2 img 

414 06013
Deleting [0] because of missing data
(4, 32, 32, 10)
(12, 28, 28, 11)
There are 0 NA values
Skipping 170098 because 364 distance, and 4 img 

739 170098
Deleting [1] because of missing data
(2, 32, 32, 10)
(12, 28, 28, 11)
There are 0 NA values
Skipping 14027 because 214 distance, and 2 img 

777 14027
(2, 32, 32, 10)
(12, 28, 28, 11)
There are 0 NA values
Skipping 08124 because 104 distance, and 2 img 

1433 08124
