In [2]:
import pandas as pd
import numpy as np
from random import shuffle
from sentinelhub import WmsRequest, WcsRequest, MimeType, CRS, BBox, constants
from s2cloudless import S2PixelCloudDetector, CloudMaskRequest
import logging
from collections import Counter
import datetime



In [4]:
# parameters

DATA_LOCATION = '../data/subplot.csv'
OUTPUT_FOLDER = 'data/processed/'
EPSG = CRS.WGS84
IMAGE_SIZE = 14
API_KEY = 'a14b8591-2127-4d2a-b5df-b47a9f2895e1'

In [None]:
convertCoords((38.18096474931849, 14.042505273551859), '4326', '')

In [8]:
# setup function to reproject coordinates
def convertCoords(xy, src='', targ=''):

    srcproj = osr.SpatialReference()
    srcproj.ImportFromEPSG(src)
    targproj = osr.SpatialReference()
    if isinstance(targ, str):
        targproj.ImportFromProj4(targ)
    else:
        targproj.ImportFromEPSG(targ)
    transform = osr.CoordinateTransformation(srcproj, targproj)

    pt = ogr.Geometry(ogr.wkbPoint)
    pt.AddPoint(xy[0], xy[1])
    pt.Transform(transform)
    return([pt.GetX(), pt.GetY()])

# Build bounding box by adding/subtracting meters
def bounding_box(points):
    # LONG, LAT FOR SOME REASON
    if 48 <= points[0] <= 54: ## 38
        albers = convertCoords(points, 4326, 32639) if points[1] > 0 else convertCoords(points, 4326, 32739)
        epsg = CRS.UTM_39N if points[1] > 0 else CRS.UTM_39S
    if 36 <= points[0] <= 42: ## 38
        albers = convertCoords(points, 4326, 32637) if points[1] > 0 else convertCoords(points, 4326, 32737)
        epsg = CRS.UTM_37N if points[1] > 0 else CRS.UTM_37S
    if 42 <= points[0] <= 48: ## 37
        albers = convertCoords(points, 4326, 32638) if points[1] > 0 else convertCoords(points, 4326, 32738)
        epsg = CRS.UTM_38N if points[1] > 0 else CRS.UTM_38S
    if 30 <= points[0] <= 36: ### 36
        albers = convertCoords(points, 4326, 32636) if points[1] > 0 else convertCoords(points, 4326, 32736)
        epsg = CRS.UTM_36N if points[1] > 0 else CRS.UTM_36S
    if 24 <= points[0] <= 30: ### 36
        albers = convertCoords(points, 4326, 32635) if points[1] > 0 else convertCoords(points, 4326, 32735)
        epsg = CRS.UTM_35N if points[1] > 0 else CRS.UTM_35S
    if 18 <= points[0] <= 24:
        albers = convertCoords(points, 4326, 32634) if points[1] > 0 else convertCoords(points, 4326, 32734)
        epsg = CRS.UTM_34N if points[1] > 0 else CRS.UTM_34S

    top = albers[0] + 35
    left = albers[1] - 35
    bottom = albers[0] - 35
    right = albers[1] + 35
    if left < 0:
        print("Potential OOB")
    return epsg, [top, left, bottom, right]

In [5]:
def calc_bbox(plot_id):
    subs = df[df['PLOT_ID'] == plot_id]
    # TOP, LEFT, BOTTOM, RIGHT
    # (min x, min y), (max x, max y)
    return [(min(subs['LON']), min(subs['LAT'])),
            (max(subs['LON']), max(subs['LAT']))]


df = pd.read_csv('../data/subplot.csv')
df = df.drop('IMAGERY_TITLE', axis = 1)
df = df.dropna(axis = 0)
plot_ids = set(df['PLOT_ID'])

locations = [calc_bbox(x) for x in plot_ids]

In [7]:
locations[1]

[(38.18096474931849, 14.042505273551859),
 (38.18213255918785, 14.043638181775378)]

In [11]:
cloud_detector = S2PixelCloudDetector(threshold=0.4, average_over=4, dilation_size=2)

def identify_clouds(bbox, epsg = EPSG, time = ('2017-01-01', '2017-12-31')):
    try:
        box = BBox(bbox, crs = epsg)
        cloud_request = WmsRequest(
            layer='CLOUD_DETECTION',
            bbox=box,
            time=time,
            width=IMAGE_SIZE,
            height=IMAGE_SIZE,
            image_format = MimeType.TIFF_d32f,
            maxcc=1,
            instance_id=API_KEY,
            custom_url_params = {constants.CustomUrlParam.UPSAMPLING: 'BICUBIC'},
            time_difference=datetime.timedelta(hours=24),
        )
        
        cloud_img = cloud_request.get_data()
        cloud_probs = cloud_detector.get_cloud_probability_maps(np.array(cloud_img))
        means = np.mean(cloud_probs, (1, 2))
        cloud_steps = [i for i, val in enumerate(means) if val > 0.25]
        return cloud_steps, means
    except Exception as e:
        logging.fatal(e, exc_info=True)
    
        
    
def download_tiles(bbox, epsg = EPSG, time = ('2017-01-01', '2017-12-31')):
    try:
        box = BBox(bbox, crs = epsg)
        image_request = WmsRequest(
                layer='ALL_BANDS_NDVI',
                bbox=box,
                time=time,
                width=IMAGE_SIZE,
                height=IMAGE_SIZE,
                image_format = MimeType.TIFF_d32f,
                maxcc=1,
                instance_id=API_KEY,
                custom_url_params = {constants.CustomUrlParam.UPSAMPLING: 'BICUBIC'},
                time_difference=datetime.timedelta(hours=24),
            )
        img_bands = image_request.get_data()
        return img_bands, image_request

    except Exception as e:
        logging.fatal(e, exc_info=True)
    

def calculate_and_save_best_images(cloud_steps, img_bands, image_request, means):
    begining_length = len(img_bands)
    clean_steps = np.array([x for x in range(len(img_bands)) if x not in cloud_steps])
    keep_steps = []
    month_steps = []
    month_hash = []
    for date in image_request.get_dates():
         month_steps.append(date.month)
            
    # Identify two images per month with the least cloud cover
    best_two_per_month = []
    for i in range(1, 13):
        month_i = []
        month_i_clouds = []
        for position, item in enumerate(month_steps):
            if item == i:
                month_i.append(position)
        clouds = [val for x, val in enumerate(means) if x in month_i]
        if len(clouds) > 2:
            clouds = sorted(clouds)[:2]
            ids = [x for x, val in enumerate(means) if val in clouds]
        else:
            ids = month_i_clouds
        for x in ids:
            best_two_per_month.append(x)
                    
        # Append the best two per month
    for i in best_two_per_month:
        counts = Counter([x for x in month_hash])
        current_month_count = counts.get(month_steps[i])
        if current_month_count == None:
            current_month_count = 0
        if i not in cloud_steps and current_month_count <= 1: 
            keep_steps.append(img_bands[i])
            month_hash.append(month_steps[i])
        if i in cloud_steps and min(clean_steps) < i < max(clean_steps):
            if current_month_count <= 1:
                nearest_lower = clean_steps[clean_steps > i].min()
                nearest_upper = clean_steps[clean_steps < i].max()
                img_bands[i] = (img_bands[nearest_lower] + img_bands[nearest_upper])/2
                month_hash.append(month_steps[i])
                keep_steps.append(img_bands[i])
    npify = np.stack(keep_steps)
    print("{}; removed {} steps of {} initial".format(npify.shape, len(cloud_steps), begining_length))
    return(npify)

In [13]:
errors = []
for i in range(0, len(locations)):
    try:
        # Initiate hash tables
        cloud, means = identify_clouds(locations[i])
        img, image_request = download_tiles(locations[i])
        tiles = calculate_and_save_best_images(cloud, img, image_request, means)
        np.save("../data/processed/" + str(i), tiles)

    except Exception as e:
        logging.fatal(e, exc_info=True)
        errors.append(img)
        #continue

(24, 14, 14, 10); removed 22 steps of 103 initial
(22, 14, 14, 10); removed 10 steps of 50 initial
(22, 14, 14, 10); removed 12 steps of 50 initial
(22, 14, 14, 10); removed 14 steps of 50 initial
(24, 14, 14, 10); removed 22 steps of 103 initial
(22, 14, 14, 10); removed 9 steps of 50 initial
(22, 14, 14, 10); removed 15 steps of 50 initial
(24, 14, 14, 10); removed 17 steps of 75 initial
(22, 14, 14, 10); removed 15 steps of 50 initial
(22, 14, 14, 10); removed 4 steps of 50 initial
(24, 14, 14, 10); removed 28 steps of 103 initial
(24, 14, 14, 10); removed 22 steps of 102 initial
(22, 14, 14, 10); removed 14 steps of 50 initial
(22, 14, 14, 10); removed 13 steps of 52 initial
(22, 14, 14, 10); removed 9 steps of 51 initial
(22, 14, 14, 10); removed 9 steps of 52 initial
(22, 14, 14, 10); removed 10 steps of 50 initial
(22, 14, 14, 10); removed 12 steps of 50 initial
(22, 14, 14, 10); removed 13 steps of 50 initial
(22, 14, 14, 10); removed 12 steps of 50 initial
(22, 14, 14, 10); re

(22, 14, 14, 10); removed 14 steps of 50 initial
(22, 14, 14, 10); removed 14 steps of 50 initial
(22, 14, 14, 10); removed 13 steps of 50 initial
(22, 14, 14, 10); removed 12 steps of 50 initial
