# Package import, API keys

In [1]:
import pandas as pd
import numpy as np
from random import shuffle
from osgeo import ogr, osr
from sentinelhub import WmsRequest, WcsRequest, MimeType, CRS, BBox, constants
import logging
from collections import Counter
import datetime
import os
import yaml
from sentinelhub import DataSource
import scipy.sparse as sparse
from scipy.sparse.linalg import splu
from skimage.transform import resize
from sentinelhub import CustomUrlParam
from time import time as timer
import multiprocessing
import math
import reverse_geocoder as rg
import pycountry
import pycountry_convert as pc
import hickle as hkl
from shapely.geometry import Point, Polygon
import geopandas
from tqdm import tnrange, tqdm_notebook
import math
import boto3
from pyproj import Proj, transform
from timeit import default_timer as timer
from typing import Tuple, List
import warnings
from scipy.ndimage import median_filter

In [2]:
%run ../src/preprocessing/slope.py
%run ../src/preprocessing/indices.py
%run ../src/downloading/utils.py
%run ../src/preprocessing/cloud_removal.py
%run ../src/preprocessing/whittaker_smoother.py
%run ../src/io/upload.py
%run ../src/tof/tof_downloading.py

In [3]:
if os.path.exists("../config.yaml"):
    with open("../config.yaml", 'r') as stream:
        key = (yaml.safe_load(stream))
        API_KEY = key['key']
        AWSKEY = key['awskey']
        AWSSECRET = key['awssecret']
else:
    API_KEY = "none"

In [4]:
year = 2020

if year > 2017:
    dates = (f'{str(year - 1)}-11-15' , f'{str(year + 1)}-02-15')
else: 
    dates = (f'{str(year)}-01-01' , f'{str(year + 1)}-02-15')
    
dates_sentinel_1 = (f'{str(year)}-01-01' , f'{str(year)}-12-31')
SIZE = 9*5
IMSIZE = (7*2) + (SIZE * 14)+2 # process 6320 x 6320 m blocks

days_per_month = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30]
starting_days = np.cumsum(days_per_month)

# (Optional) Make the tiling ID structure

In [51]:
data = pd.read_csv("final_processing_area_noclip.csv")

make_tile = False

if make_tile:
    def id_tile_number(idx, col):
        vals = data[col]
        vals_set = sorted(np.unique(vals))
        no = np.argwhere(vals_set == idx).flatten()
        return str(no[0])

    y_tiles = np.empty((len(data)))
    data = data.reset_index()
    for idx in tnrange(len(data)):
        y_tiles[idx] = (id_tile_number(data['Y'][idx], 'Y'))

    y_tiles = list(y_tiles)
    y_tiles = [str(x) for x in y_tiles]
    data['Y_tile'] = y_tiles
    data.to_csv("final_processing_area_noclip.csv")

    x_tiles = np.empty((len(data)))
    data = data.reset_index()
    for idx in tnrange(len(data)):
        x_tiles[idx] = (id_tile_number(data['X'][idx], 'X'))

    x_tiles = list(x_tiles)
    x_tiles = [str(x) for x in x_tiles]
    data['X_tile'] = x_tiles
    data.to_csv("final_processing_area_noclip.csv")
    
    
tracker = pd.DataFrame({'X_tile': [], 'Y_tile': []})
x_tiles = [x for x in os.listdir("../project-monitoring/tof/") if '.DS' not in x]
x_tiles = [x for x in x_tiles if '.csv' not in x]
for x_tile in x_tiles:
    y_tiles = os.listdir("../project-monitoring/tof/" + x_tile)
    y_tiles = [y for y in y_tiles if '.DS' not in y]
    y_tiles = [y for y in y_tiles if '.tif' not in y]
    for y_tile in y_tiles:
        tracker = tracker.append({'X_tile': int(x_tile), 'Y_tile': int(y_tile)}, ignore_index = True)

tracker = pd.merge(tracker, data)
tracker.to_csv("../project-monitoring/tof/tracker.csv")

In [6]:
#data = pd.read_csv("processing_area_1_percent.csv")

# Helper functions

In [7]:
days_per_month = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30]
starting_days = np.cumsum(days_per_month)

def to_int16(array: np.array) -> np.array:
    '''Converts a float32 array to uint16, reducing storage costs by three-fold'''
    assert np.min(array) >= 0, np.min(array)
    assert np.max(array) <= 1, np.max(array)
    
    array = np.clip(array, 0, 1)
    array = np.trunc(array * 65535)
    assert np.min(array >= 0)
    assert np.max(array <= 65535)
    
    return array.astype(np.uint16)

def to_float32(array: np.array) -> np.array:
    """Converts an int_x array to float32"""
    print(f'The original max value is {np.max(array)}')
    if not isinstance(array.flat[0], np.floating):
        assert np.max(array) > 1
        array = np.float32(array) / 65535.
    assert np.max(array) <= 1
    assert array.dtype == np.float32
    return array

def process_sentinel_1_tile(sentinel1: np.ndarray, dates: np.ndarray) -> np.ndarray:
    """Converts a (?, X, Y, 2) Sentinel 1 array to (12, X, Y, 2)

        Parameters:
         sentinel1 (np.array):
         dates (np.array):

        Returns:
         s1 (np.array)
    """
    s1, _ = calculate_and_save_best_images(sentinel1, dates)
    monthly = np.empty((12, sentinel1.shape[1], sentinel1.shape[2], 2))
    index = 0
    for start, end in zip(range(0, 72 + 6, 72 // 12), #0, 72, 6
                          range(72 // 12, 72 + 6, 72 // 12)): # 6, 72, 6
        monthly[index] = np.median(s1[start:end], axis = 0)
        index += 1
        
    return monthly

# Super resolution

In [8]:
import tensorflow as tf
sess = tf.Session()
from keras import backend as K
K.set_session(sess)

MDL_PATH = "../models/supres/"

model = tf.train.import_meta_graph(MDL_PATH + 'model.meta')
model.restore(sess, tf.train.latest_checkpoint(MDL_PATH))

logits = tf.get_default_graph().get_tensor_by_name("Add_6:0")
inp = tf.get_default_graph().get_tensor_by_name("Placeholder:0")
inp_bilinear = tf.get_default_graph().get_tensor_by_name("Placeholder_1:0")

def superresolve(input_data, bilinear_upsample):
    """ Worker function to run predictions on input data
    """
    x = sess.run([logits], 
                 feed_dict={inp: input_data,
                            inp_bilinear: bilinear_upsample})
    return x[0]

def superresolve_tile(arr: np.ndarray) -> np.ndarray:
    """Superresolves each 56x56 subtile in a 646x646 input tile
       by padding the subtiles to 64x64 and removing the pad after prediction,
       eliminating boundary artifacts

        Parameters:
         arr (arr): (?, 646, 646, 10) array

        Returns:
         superresolved (arr): (?, 646, 646, 10) array
    """
    print(f"The input array to superresolve is {arr.shape}")

    to_resolve = np.pad(arr, ((0, 0), (4, 4), (4, 4), (0, 0)), 'reflect')

    bilinear = to_resolve[..., 4:]

    resolved = superresolve(
        to_resolve, bilinear)
    resolved = resolved[:, 4:-4, 4:-4, :]
    arr[..., 4:] = resolved
    return arr

Using TensorFlow backend.


# Tiling functions

In [9]:
def make_bbox2(initial_bbx, expansion = 10):
    earth_vertical = 6356.76
    earth_horizontal = 6378.137
    m_vertical = 1 / ((2 * math.pi / 360) * earth_vertical) / 1000
    m_horizontal = 1 / ((2 * math.pi / 360) * earth_horizontal) / 1000
    
    exp_horizontal = expansion * m_horizontal
    exp_vertical = expansion * m_vertical
    
    bbx = copy.deepcopy(initial_bbx)
    bbx[0] -= exp_horizontal
    bbx[1] -= exp_vertical
    bbx[2] += exp_horizontal
    bbx[3] += exp_vertical
    return bbx

In [10]:
import rasterio
from rasterio.transform import from_origin
import copy

def write_tif(file, point, arr, dtype):
    print(point)
    # (min_x, min_y, max_x, max_y)
    west = point[0]
    east = point[2]
    north = point[3]
    south = point[1]
    

    arr[np.array(arr < 0)] = 0.
    if dtype == 'uint8':
        arr = arr.astype(np.uint8)
    if dtype == 'uint16':
        arr = arr.astype(np.uint16)

    print(west, east)
    transform = rasterio.transform.from_bounds(west = west, south = south,
                                               east = east, north = north,
                                               width = arr.shape[1], 
                                               height = arr.shape[0])

    print("Writing", file)
    new_dataset = rasterio.open(file, 'w', driver = 'GTiff',
                               height = arr.shape[0], width = arr.shape[1], count = 1,
                               dtype = dtype,
                               crs = '+proj=longlat +datum=WGS84 +no_defs',
                               transform=transform)
    new_dataset.write(arr, 1)
    new_dataset.close()

def make_output_and_temp_folders(output_folder: str) -> None:
    """Makes necessary folder structures for IO of raw and processed data

        Parameters:
         idx (str)
         output_folder (path)

        Returns:
         None
    """
    def _find_and_make_dirs(dirs):
        if not os.path.exists(os.path.realpath(dirs)):
            os.makedirs(os.path.realpath(dirs))
            
    folders = ['raw/', 'raw/clouds/', 'raw/misc/', 'raw/s1/',
              'raw/s2_10/', 'raw/s2_20/']
    
    for folder in folders:
        _find_and_make_dirs(output_folder + folder)
        
def make_bbox(initial_bbx, expansion = 10):
    
    #multiplier = 0.002777777777777999928 # ESA LULC pixel size
    multiplier = 1/360
    bbx = copy.deepcopy(initial_bbx)
    bbx[0] -= expansion * multiplier
    bbx[1] -= expansion * multiplier
    bbx[2] += expansion * multiplier
    bbx[3] += expansion * multiplier
    return bbx
    

def download_tile(x, y, data):
    data = data[data['Y_tile'] == int(y)]
    data = data[data['X_tile'] == int(x)]
    print(data)
    data = data.reset_index(drop = True)
    x = str(int(x))
    y = str(int(y))
    print(x)
    if ".0" in x:
        x = x[:-2]
    if ".0" in y:
        y = y[:-2]
        
    print(x, y)
    val = data['VALUE']
    initial_bbx = [data['X'][0], data['Y'][0], data['X'][0], data['Y'][0]]

    #bbx = make_bbox(initial_bbx)
    bbx = make_bbox(initial_bbx, expansion = 300/30)
    dem_bbx = make_bbox(initial_bbx, expansion = 301/30)
    print(bbx)
        
    folder = f"../project-monitoring/tof/{str(x)}/{str(y)}/"
    tile_idx = f'{str(x)}X{str(y)}Y'
    
    make_output_and_temp_folders(folder)        
    clouds_file = f'{folder}raw/clouds/clouds_{tile_idx}.hkl'
    shadows_file = f'{folder}raw/clouds/shadows_{tile_idx}.hkl'
    s1_file = f'{folder}raw/s1/{tile_idx}.hkl'
    s1_dates_file = f'{folder}raw/misc/s1_dates_{tile_idx}.hkl'
    s2_10_file = f'{folder}raw/s2_10/{tile_idx}.hkl'
    s2_20_file = f'{folder}raw/s2_20/{tile_idx}.hkl'
    s2_dates_file = f'{folder}raw/misc/s2_dates_{tile_idx}.hkl'
    s2_file = f'{folder}raw/s2/{tile_idx}.hkl'
    clean_steps_file = f'{folder}raw/clouds/clean_steps_{tile_idx}.hkl'
    dem_file = f'{folder}raw/misc/dem_{tile_idx}.hkl'
    
    
    if not (os.path.exists(clouds_file)):# or processed):
        print(f"Downloading {clouds_file}")

        cloud_probs, shadows, _, image_dates, _ = identify_clouds(bbox = bbx,
                                                               dates = dates,
                                                              imsize = 600,
                                                              api_key = API_KEY,
                                                              year = 2020)

        to_remove, _ = calculate_cloud_steps(cloud_probs, image_dates)

        if len(to_remove) > 0:
            clean_dates = np.delete(image_dates, to_remove)
            cloud_probs = np.delete(cloud_probs, to_remove, 0)
            shadows = np.delete(shadows, to_remove, 0)
        else:
            clean_dates = image_dates
            
        to_remove = subset_contiguous_sunny_dates(clean_dates)
        if len(to_remove) > 0:
            clean_dates = np.delete(clean_dates, to_remove)
            cloud_probs = np.delete(cloud_probs, to_remove, 0)
            shadows = np.delete(shadows, to_remove, 0)

        hkl.dump(cloud_probs, clouds_file, mode='w', compression='gzip')
        hkl.dump(shadows, shadows_file, mode='w', compression='gzip')
        hkl.dump(clean_dates, clean_steps_file, mode='w', compression='gzip')
            
    
    if not (os.path.exists(s2_10_file)):
        print(f"Downloading {s2_10_file}")
        clean_steps = list(hkl.load(clean_steps_file))
        cloud_probs = hkl.load(clouds_file)
        shadows = hkl.load(shadows_file)    
        s2_10, s2_20, s2_dates = download_sentinel_2(bbx, clean_steps = clean_steps,
                                                     api_key = API_KEY,
                                                     dates = dates,
                                                     year = 2020
                                                    )

        # Steps to ensure that L2A, L1C derived products have exact matching dates
        print(f"Shadows {shadows.shape}, clouds {cloud_probs.shape},"
              f" S2, {s2_10.shape}, S2d, {s2_dates.shape}")
        to_remove_clouds = [i for i, val in enumerate(clean_steps) if val not in s2_dates]
        to_remove_dates = [val for i, val in enumerate(clean_steps) if val not in s2_dates]
        if len(to_remove_clouds) >= 1:
            print(f"Removing {to_remove_dates} from clouds because not in S2")
            cloud_probs = np.delete(cloud_probs, to_remove_clouds, 0)
            shadows = np.delete(shadows, to_remove_clouds, 0)
            print(f"Shadows {shadows.shape}, clouds {cloud_probs.shape}"
                  f" S2, {s2_10.shape}, S2d, {s2_dates.shape}")
            hkl.dump(cloud_probs, clouds_file, mode='w', compression='gzip')
            hkl.dump(shadows, shadows_file, mode='w', compression='gzip')

        assert cloud_probs.shape[0] == s2_10.shape[0], "There is a date mismatch"
        hkl.dump(to_int16(s2_10), s2_10_file, mode='w', compression='gzip')
        hkl.dump(to_int16(s2_20), s2_20_file, mode='w', compression='gzip')
        hkl.dump(s2_dates, s2_dates_file, mode='w', compression='gzip')
        
        s210_arr = to_int16(s2_10[0, ..., 0])
        s220_arr = to_int16(s2_20[0, ..., 0])
            
    if not (os.path.exists(s1_file)):
        print(f"Downloading {s1_file}")
        s1_layer = identify_s1_layer((data['X'][0], data['Y'][0]))
        s1, s1_dates = download_sentinel_1(bbx,
                                           layer = s1_layer,
                                           api_key = API_KEY,
                                           year = 2020,
                                           dates = dates_sentinel_1)
        if s1.shape[0] == 0:
            s1_layer = "SENT_DESC" if s1_layer == "SENT" else "SENT"
            print(f'Switching to {s1_layer}')
            s1, s1_dates = download_sentinel_1(bbx,
                                               layer = s1_layer,
                                               api_key = API_KEY,
                                               year = 2020,
                                               dates = dates_sentinel_1)
        s1 = process_sentinel_1_tile(s1, s1_dates)
        hkl.dump(to_int16(s1), s1_file, mode='w', compression='gzip')
        hkl.dump(s1_dates, s1_dates_file, mode='w', compression='gzip')
        
        s1_arr = to_int16(s1[0, ..., 0])
        
    if not os.path.exists(dem_file):
        print(f'Downloading {dem_file}')
        dem = download_dem(dem_bbx, api_key = API_KEY)
        hkl.dump(dem, dem_file, mode='w', compression='gzip')
    

In [11]:
def id_missing_px(sentinel2: np.ndarray, thresh: int = 11) -> np.ndarray:
    """Identifies missing (na) values in input array
    """
    missing_images_0 = np.sum(sentinel2[..., :10] == 0.0, axis = (1, 2, 3))
    missing_images_p = np.sum(sentinel2[..., :10] >= 1., axis = (1, 2, 3))
    missing_images = missing_images_0 + missing_images_p
    
    missing_images = np.argwhere(missing_images >= (sentinel2.shape[1]**2) / thresh)
    missing_images = missing_images.flatten()
    if len(missing_images) > 0:
        print(f"The missing image bands (0) are: {missing_images_0}")
        print(f"The missing image bands (1.0) are: {missing_images_p}")
    return missing_images

def process_tile(x, y, data):
    
    x = str(int(x))
    y = str(int(y))
    print(x)
    if ".0" in x:
        x = x[:-2]
    if ".0" in y:
        y = y[:-2]
        
    print(x, y)
    
    folder = f"../project-monitoring/tof/{str(x)}/{str(y)}/"
    tile_idx = f'{str(x)}X{str(y)}Y'
    
    clouds_file = f'{folder}raw/clouds/clouds_{tile_idx}.hkl'
    shadows_file = f'{folder}raw/clouds/shadows_{tile_idx}.hkl'
    s1_file = f'{folder}raw/s1/{tile_idx}.hkl'
    s1_dates_file = f'{folder}raw/misc/s1_dates_{tile_idx}.hkl'
    s2_10_file = f'{folder}raw/s2_10/{tile_idx}.hkl'
    s2_20_file = f'{folder}raw/s2_20/{tile_idx}.hkl'
    s2_dates_file = f'{folder}raw/misc/s2_dates_{tile_idx}.hkl'
    s2_file = f'{folder}raw/s2/{tile_idx}.hkl'
    clean_steps_file = f'{folder}raw/clouds/clean_steps_{tile_idx}.hkl'
    dem_file = f'{folder}raw/misc/dem_{tile_idx}.hkl'
    
    
    clouds = hkl.load(clouds_file)
    shadows = hkl.load(shadows_file)
    s1 = hkl.load(s1_file)
    s2_10 = to_float32(hkl.load(s2_10_file))
    s2_20 = to_float32(hkl.load(s2_20_file))
    dem = hkl.load(dem_file)
    image_dates = hkl.load(s2_dates_file)
    
    print(f'Clouds: {clouds.shape}, \nShadows: {shadows.shape} \n'
          f'S1: {s1.shape} \nS2: {s2_10.shape}, {s2_20.shape} \nDEM: {dem.shape}')
    
    width = s2_10.shape[1]
    height = s2_20.shape[2] * 2
    
    if clouds.shape[1] < width:
        pad_amt =  (width - clouds.shape[1]) // 2
        clouds = np.pad(clouds, ((0, 0), (pad_amt, pad_amt), (0,0)), 'edge')
        print(clouds.shape)
        
    if shadows.shape[1] < width:
        pad_amt =  (width - shadows.shape[1]) // 2
        shadows = np.pad(shadows, ((0, 0), (pad_amt, pad_amt), (0,0)), 'edge')
        print(shadows.shape)
        
    if dem.shape[0] < width:
        pad_amt =  (width - dem.shape[0]) // 2
        dem = np.pad(dem, ((pad_amt, pad_amt), (0, 0)), 'edge')
        print(dem.shape)
        
    if s2_10.shape[2] < height:
        pad_amt =  (height - s2_10.shape[2]) / 2
        if pad_amt % 2 == 0:
            pad_amt = int(pad_amt)
            s2_10 = np.pad(s2_10, ((0, 0), (0, 0), (pad_amt, pad_amt), (0,0)), 'edge')
        else:
            s2_10 = np.pad(s2_10, ((0, 0), (0, 0), (0, int(pad_amt * 2)), (0,0)), 'edge')
    
    if s2_10.shape[2] > height:
        pad_amt =  abs(height - s2_10.shape[2])
        print(pad_amt)
        s2_10 = s2_10[:, :, :-pad_amt, :]
        print(s2_10.shape)
       
    if dem.shape[1] < height:
        pad_amt =  (height - dem.shape[1]) / 2
        if pad_amt % 2 == 0:
            pad_amt = int(pad_amt)
            dem = np.pad(dem, ((0, 0), (pad_amt, pad_amt)), 'edge')
        else:
            dem = np.pad(dem, ( (0, 0), (0, int(pad_amt * 2))), 'edge')
            
    if dem.shape[1] > height:
        pad_amt =  abs(height - dem.shape[1])
        dem = dem[:, :-pad_amt]
        
        
    print(f'Clouds: {clouds.shape}, \nShadows: {shadows.shape} \n'
          f'S1: {s1.shape} \nS2: {s2_10.shape}, {s2_20.shape} \nDEM: {dem.shape}')
            
  
    sentinel2 = np.empty((s2_10.shape[0], width, height, 10))
    sentinel2[..., :4] = s2_10
    for band in range(6):
        for time in range(sentinel2.shape[0]):
            sentinel2[time, ..., band + 4] = resize(s2_20[time,..., band], (width, height), 1)

    missing_px = id_missing_px(sentinel2, 3)
    if len(missing_px) > 0:
        print(f"Removing {missing_px} dates due to missing data")
        clouds = np.delete(clouds, missing_px, axis = 0)
        shadows = np.delete(shadows, missing_px, axis = 0)
        image_dates = np.delete(image_dates, missing_px)
        sentinel2 = np.delete(sentinel2, missing_px, axis = 0)

    x, interp = remove_cloud_and_shadows(sentinel2, clouds, shadows, image_dates) 

    dem_i = np.tile(dem[np.newaxis, :, :, np.newaxis], (x.shape[0], 1, 1, 1))
    dem_i = dem_i / 90
    x = np.concatenate([x, dem_i], axis = -1)
    x = np.clip(x, 0, 1)
    return x, image_dates, interp, s1

In [12]:
def make_overlapping_windows(tiles):
    tiles2 = np.copy(tiles)
    n_x = np.sum(tiles2[:, 0] == 0)
    n_y = np.sum(tiles2[:, 1] == 0)

    tiles2[:n_x, 2] += 5
    tiles2[-n_x:, 2] += 5
    to_adjust = np.full((tiles.shape[0]), 10).astype(np.uint16)
    
    for i in range(len(to_adjust)):
        if (i % n_y == 0) or ((i + 1) % n_y == 0):
            to_adjust[i] -= 5
    tiles2 = tiles2.astype(np.int64)
    tiles2[:, 3] += to_adjust
    tiles2[n_x:-n_x, 2] += 10
    tiles2[n_x:, 0] -= 5
    tiles2[:, 1] -=5
    
    tiles2[tiles2 < 0] = 0.

    return tiles2

In [77]:
def make_subtiles(folder, tiles):
    
    y_tiles = np.unique(tiles[:, 1])
    x_tiles = np.unique(tiles[:, 0])
    
    def _find_and_make_dirs(dirs):
        if not os.path.exists(os.path.realpath(dirs)):
            os.makedirs(os.path.realpath(dirs))
    
    for y_tile in y_tiles:
        _find_and_make_dirs(folder + str(y_tile) + '/')
            
def interpolate_na_vals(s2):
    '''Interpolates NA values with closest time steps, to deal with
       the small potential for NA values in calculating indices'''
    for x_loc in range(s2.shape[1]):
        for y_loc in range(s2.shape[2]):
            n_na = np.sum(np.isnan(s2[:, x_loc, y_loc, :]), axis = 1)
            for date in range(s2.shape[0]):
                if n_na.flatten()[date] > 0:
                    before, after = calculate_proximal_steps(date, np.argwhere(n_na == 0))
                    s2[date, x_loc, y_loc, :] = ((s2[date + before, x_loc, y_loc] + 
                                                 s2[date + after, x_loc, y_loc]) / 2)
    numb_na = np.sum(np.isnan(s2), axis = (1, 2, 3))
    if np.sum(numb_na) > 0:
        print(f"There are {numb_na} NA values")
    return s2
    

def process_subtiles(x, y, s2: np.ndarray = None, 
                       dates: np.ndarray = None,
                       interp: np.ndarray = None, s1 = None) -> None:
    '''Wrapper function to interpolate clouds and temporal gaps, superresolve tiles,
       calculate relevant indices, and save analysis-ready data to the output folder
       
       Parameters:
        coord (tuple)
        step_x (int):
        step_y (int):
        folder (str):

       Returns:
        None
    '''
    x = str(int(x))
    y = str(int(y))
    if ".0" in x:
        x = x[:-2]
    if ".0" in y:
        y = y[:-2]
        
    print(x, y)
    
    s2 = interpolate_na_vals(s2)
    
    tiles_folder = tile_window(s1.shape[2], s1.shape[1], window_size = 140)
    tiles_array = make_overlapping_windows(tiles_folder)
    
    
    make_subtiles(f'../project-monitoring/tof/{str(x)}/{str(y)}/processed/',
                  tiles_folder)
    path = f'../project-monitoring/tof/{str(x)}/{str(y)}/processed/'
    for t in range(len(tiles_folder)):
        tile_folder = tiles_folder[t]
        tile_array = tiles_array[t]
        
        start_x, start_y = tile_array[0], tile_array[1]
        folder_x, folder_y = tile_folder[0], tile_folder[1]
        end_x = start_x + tile_array[2]
        end_y = start_y + tile_array[3]
        subset = s2[:, start_x:end_x, start_y:end_y, :]
        interp_tile = interp[:, start_x:end_x, start_y:end_y]
        interp_tile = np.sum(interp_tile, axis = (1, 2))
        
        dates_tile = np.copy(dates)
        to_remove = np.argwhere(interp_tile > ((150*150) / 6.67)).flatten()
        if len(to_remove) > 0:
            dates_tile = np.delete(dates_tile, to_remove)
            subset = np.delete(subset, to_remove, 0)
            print(f"Removing {to_remove} interp, leaving {len(dates_tile)} / {len(dates)}")

        missing_px = id_missing_px(subset)
        if len(missing_px) > 0:
            dates_tile = np.delete(dates_tile, missing_px)
            subset = np.delete(subset, missing_px, 0)
            print(f"Removing {missing_px} missing, leaving {len(dates_tile)}")

        to_remove = remove_missed_clouds(subset)
        if len(to_remove) > 0:
            subset = np.delete(subset, to_remove, axis = 0)
            dates_tile = np.delete(dates_tile, to_remove)
            print(f"{len(to_remove)} missed clouds, leaving {len(dates_tile)}")
        try:
            subtile, _ = calculate_and_save_best_images(subset, dates_tile)
        except:
            subtile = np.zeros((72, end_x-start_x, end_y - start_y, 11))
            dates_tile = [0,]
        output = f"{path}{str(folder_y)}/{str(folder_x)}.hkl"
        s1_subtile = s1[:, start_x:end_x, start_y:end_y, :]
        print(subtile.shape)
        
        if subtile.shape[2] == 145: 
            pad_u, pad_d = 0, 0
            if start_y == 0:
                pad_u = 5
            else:
                pad_d = 5
            subtile = np.pad(subtile, ((0, 0,), (0, 0), (pad_u, pad_d), (0, 0)), 'reflect')
            s1_subtile = np.pad(s1_subtile, ((0, 0,), (0, 0), (pad_u, pad_d), (0, 0)), 'reflect')
        if subtile.shape[1] == 145:
            pad_l, pad_r = 0, 0
            if start_x == 0:
                pad_l = 5
            else:
                pad_r = 5
            subtile = np.pad(subtile, ((0, 0,), (pad_l, pad_r), (0, 0), (0, 0)), 'reflect')
            s1_subtile = np.pad(s1_subtile, ((0, 0,), (pad_l, pad_r), (0, 0), (0, 0)), 'reflect')
        
        print(subtile.shape)
        dem = subtile[..., -1]
        sm = Smoother(lmbd = 800, size = subtile.shape[0], nbands = 10, dim = subtile.shape[1])
        subtile = sm.interpolate_array(subtile[..., :-1])
        subtile = superresolve_tile(subtile)
        
        subtile = np.concatenate([subtile, dem[:12, :, :, np.newaxis]], axis = -1)
        subtile = np.concatenate([subtile,  s1_subtile], axis = -1)
        subtile[..., -2:] = subtile[..., -2:] / 65535
        print(subtile.shape)
        
        output_folder = "/".join(output.split("/")[:-1])
        if not os.path.exists(os.path.realpath(output_folder)):
            os.makedirs(os.path.realpath(output_folder))
            
        
        subtile = np.clip(subtile, 0, 1)
        subtile = to_int16(subtile)
        print(f"Writing {output}")
        print(len(dates_tile))
        assert subtile.shape[1] >= 145, f"subtile shape is {subtile.shape}"
        assert subtile.shape[0] == 12, f"subtile shape is {subtile.shape}"
        if len(dates_tile) < 5:
            subtile = np.zeros_like(subtile)
        hkl.dump(subtile, output, mode='w', compression='gzip')
        


In [78]:
data.head(5)

Unnamed: 0.2,Unnamed: 0,level_0,Unnamed: 0.1,index,Y,X,VALUE,Unnamed: 3,Y_tile,X_tile
0,0,0,0,0,37.472222,-105.027778,1.0,,1622.0,96.0
1,1,1,1,1,37.472222,-92.583333,1.0,,1622.0,313.0
2,2,2,2,2,37.472222,-92.416667,1.0,,1622.0,316.0
3,3,3,3,3,37.472222,-89.916667,1.0,,1622.0,361.0
4,4,4,4,4,37.472222,-89.861111,1.0,,1622.0,362.0


In [79]:
from functools import wraps
from time import time

def timing(f):
    @wraps(f)
    def wrap(*args, **kw):
        ts = time()
        result = f(*args, **kw)
        te = time()
        print(f'{f.__name__}, {np.around(te-ts, 2)}')
        return result
    return wrap

In [81]:
import datetime
%run ../src/tof/tof_downloading.py
from time import time

time1 = time()
data = data.reset_index(drop=True)

year = 2020
dates = (f'{str(year)}-01-01' , f'{str(year + 1)}-02-15')
    
dates_sentinel_1 = (f'{str(year)}-01-01' , f'{str(year)}-12-31')

days_per_month = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30]
starting_days = np.cumsum(days_per_month)

#y = data['Y_tile'][13]
#x = data['X_tile'][13] # 1607, 1166, 

x = 499
y = 1377
download_tile(x = x, y = y, data = data)
s2, dates, interp, s1 = process_tile(x = x, y = y, data = data)
process_subtiles(x, y, s2, dates, interp, s1)
time2 = time()
print(f"Finished in {np.around(time2 - time1, 1)} seconds")

       Unnamed: 0  level_0  Unnamed: 0.1  index          Y      X  VALUE  \
10567       10567    10567         10567  10567  23.083333 -82.25    1.0   

       Unnamed: 3  Y_tile  X_tile  
10567         NaN  1377.0   499.0  
499
499 1377
[-82.27777777777777, 23.055555555555113, -82.22222222222223, 23.11111111111067]
Downloading ../project-monitoring/tof/499/1377/raw/clouds/clouds_499X1377Y.hkl
The original cloud size is (69, 624, 576)
255
100.98488789793137
There are 22 clean steps
Shadows ((22, 616, 568, 6)) used 0.9 processing units
Removing 576, time 4
Removing 384, time 5
Removing 64, time 12
Removing 2432, time 13
Removing 2688, time 14
Removing 960, time 15
Removing 384, time 16
249472 212544
1, Dates: [19 24], Dist: 10, Thresh: 0.02
2, Dates: [34], Dist: 29, Thresh: 0.01
3, Dates: [63], Dist: 30, Thresh: 0.03
4, Dates: [93 98], Dist: 85, Thresh: 0.06
5, Dates: [], Dist: 365, Thresh: 0.2
6, Dates: [], Dist: 365, Thresh: 0.2
7, Dates: [183], Dist: 75, Thresh: 0.1
8, Dates: [228], 

Maximum time distance: 150
(72, 145, 150, 11)
(72, 150, 150, 11)
The input array to superresolve is (12, 150, 150, 10)
(12, 150, 150, 13)
Writing ../project-monitoring/tof/499/1377/processed/321/478.hkl
9
Removing [1 3 6 7 8 9] interp, leaving 8 / 14
Maximum time distance: 235
(72, 145, 145, 11)
(72, 150, 150, 11)
The input array to superresolve is (12, 150, 150, 10)
(12, 150, 150, 13)
Writing ../project-monitoring/tof/499/1377/processed/428/478.hkl
8
Finished in 206.0 seconds


In [None]:
import os
from osgeo import gdal
from glob import glob
li_dirs = glob("../project-monitoring/tof/*/*")


print(li_dirs)
li_all_files = list()
for folder in li_dirs:
    files = [file for file in os.listdir(folder) if os.path.splitext(file)[-1] == '.tif']
    for file in files:
        li_all_files.append(os.path.join(folder, file))

gdal.BuildVRT('out.vrt', li_all_files)

#!gdal_translate -of GTiff out.vrt out.tif

In [None]:
!gdalwarp -s_srs "EPSG:4326" -t_srs "EPSG:32663" -of vrt in2.tif out.vrt
!gdal_translate -co compress=LZW out.vrt out.tif