# Download and process sentinel 1 data

## John Brandt
## April 1, 2020

## Package imports, API import, source scripts

In [1]:
import datetime
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import os
import scipy.sparse as sparse
import seaborn as sns
import yaml

from collections import Counter
from osgeo import ogr, osr
from random import shuffle
from scipy.sparse.linalg import splu
from sentinelhub import WmsRequest, WcsRequest, MimeType
from sentinelhub import CRS, BBox, constants, DataSource, CustomUrlParam
from skimage.transform import resize

import reverse_geocoder as rg
import pycountry
import pycountry_convert as pc
import hickle as hkl
from shapely.geometry import Point, Polygon

with open("../config.yaml", 'r') as stream:
        key = (yaml.safe_load(stream))
        API_KEY = key['key'] 
        
%matplotlib inline
%run ../src/downloading/utils.py

In [2]:
time = ('2018-12-15', '2020-01-15')
YEAR = 2019
IMSIZE = 32

# Bounding boxes

In [3]:
def identify_s1_layer(coords):
    coords = (coords[1], coords[0])
    results = rg.search(coords)
    admin1 = (results[-1]['admin1'])
    admin2 = results[-1]['admin2']
    country = results[-1]['cc']
    continent_name = pc.country_alpha2_to_continent_code(country)
    print(admin1, admin2, country, continent_name)
    if continent_name in ['AF', 'OC', 'EU']:
        layer = "SENT"
    if continent_name in ['SA']:
        if coords[0] > -7.11:
            layer = "SENT"
        else:
            layer = "SENT_DESC"
    if continent_name in ['AS']:
        if coords[0] > 23.3:
            layer = "SENT"
        else:
            layer = "SENT_DESC"
    if continent_name in ['NA']:
        layer = "SENT_DESC"
    return layer


def calc_bbox(plot_id, df):
    """ Calculates the corners of a bounding box from an input
        pandas dataframe as output by Collect Earth Online

        Parameters:
         plot_id (int): plot_id of associated plot
         df (pandas.DataFrame): dataframe of associated CEO survey
    
        Returns:
         bounding_box (list): [(min(x), min(y)),
                              (max(x), max_y))]
    """
    subs = df[df['PLOT_ID'] == plot_id]
    return [(min(subs['LON']), min(subs['LAT'])),
            (max(subs['LON']), max(subs['LAT']))]


def bounding_box(points, expansion = 160):
    """ Calculates the corners of a bounding box with an
        input expansion in meters from a given bounding_box
        
        Subcalls:
         calculate_epsg, convertCoords

        Parameters:
         points (list): output of calc_bbox
         expansion (float): number of meters to expand or shrink the
                            points edges to be
    
        Returns:
         bl (tuple): x, y of bottom left corner with edges of expansion meters
         tr (tuple): x, y of top right corner with edges of expansion meters
    """
    bl = list(points[0])
    tr = list(points[1])
    inproj = Proj('epsg:4326')
    outproj_code = calculate_epsg(bl)
    outproj = Proj('epsg:' + str(outproj_code))
    
    bl_utm =  transform(inproj, outproj, bl[1], bl[0])
    tr_utm =  transform(inproj, outproj, tr[1], tr[0])

    distance1 = tr_utm[0] - bl_utm[0]
    distance2 = tr_utm[1] - bl_utm[1]
    expansion1 = (expansion - distance1)/2
    expansion2 = (expansion - distance2)/2
    
    bl_utm = [bl_utm[0] - expansion1, bl_utm[1] - expansion2]
    tr_utm = [tr_utm[0] + expansion1, tr_utm[1] + expansion2]
    
    #assert int((tr_utm[0] - bl_utm[0]) == 320), f'size is {(tr_utm[0] - bl_utm[0])}'
    #assert int((tr_utm[1] - bl_utm[1]) == 320), f'size is {(tr_utm[1] - bl_utm[1])}'

    
    zone = str(outproj_code)[3:]
    zone = zone[1:] if zone[0] == "0" else zone
    direction = 'N' if tr[1] >= 0 else 'S'
    utm_epsg = "UTM_" + zone + direction
    return (bl_utm, tr_utm), CRS[utm_epsg]

# Data download

In [4]:
def download_sentinel_1(bbox, epsg, time = time, 
                        layer = "SENT", year = 2019, image_format = MimeType.TIFF_d16, data = DataSource.SENTINEL1_IW_ASC):
    """ Downloads all 10 and 20 meter L2A bands from sentinel-hub
        for input bbox and epsg, within time range
        
        Parameters:
         bbox (list): output of calc_bbox
         epsg (float): UTM EPSG associated with bbox 
         time (tuple): YY-MM-DD - YY-MM-DD bounds for downloading 
    
        Returns:
         s1 (arr): (Time, X, Y, 2) array of sentinel 1 data
         image_dates (list): number of days since time[0] for each
                              image in s1.shape[0]
    """
    try:
        print(f"The data is {data}")
        box = BBox(bbox, crs = epsg)
        image_request = WcsRequest(
                layer=layer,
                bbox=box,
                time=time,
                image_format = image_format,
                data_source= data,
                maxcc=1.0,
                resx='10m', resy='5m',
                instance_id=API_KEY,
                custom_url_params = {constants.CustomUrlParam.DOWNSAMPLING: 'NEAREST',
                                    constants.CustomUrlParam.UPSAMPLING: 'NEAREST'},
                time_difference=datetime.timedelta(hours=72),
            )
        data_filter = None
        if len(image_request.download_list) > 50:
            data_filter = [x for x in range(len(image_request.download_list)) if x % 2 == 0]
        s1 = image_request.get_data()#data_filter = data_filter)
        #print(s1)
        #print(image_request.get_download_list())
        s1 = np.stack(s1)
        print(f'The original max value is {np.max(s1)}')
        if np.max(s1) >= 1000:
            s1 = s1 / 65535.
            
        
        assert np.max(s1) <= 1.
        assert s1.shape[1] == 64.
        assert s1.shape[2] == 32.
        
        print(f"Sentinel 1 used {(2/3)*s1.shape[0] * (s1.shape[1]*s1.shape[2])/(512*512)} PU for \
          {s1.shape[0]} out of {len(image_request.download_list)} images")

        s1 = resize(s1, (s1.shape[0], IMSIZE*2, IMSIZE*2, s1.shape[-1]), order = 0)
        #print(f"Sentinel 1 intermediate shape is {s1.shape}")
        s1 = np.reshape(s1, (s1.shape[0], s1.shape[1]//2, 2, s1.shape[2] // 2, 2, s1.shape[-1]))
        s1 = np.mean(s1, (2, 4))
        s1 = s1[:, 8:24, 8:24, :]
        
        assert s1.shape[1] == 16.
        assert s1.shape[2] == 16.
        
        image_dates = []
        for date in image_request.get_dates():
            if date.year == year - 1:
                image_dates.append(-365 + starting_days[(date.month-1)] + date.day)
            if date.year == year:
                image_dates.append(starting_days[(date.month-1)] + date.day)
            if date.year == year + 1:
                image_dates.append(365 + starting_days[(date.month-1)]+date.day)
        image_dates = np.array(image_dates)
        print(np.max(s1))
        s1c = np.copy(s1)
        s1c[np.where(s1c < 1.)] = 0
        n_pix_oob = np.sum(s1c, axis = (1, 2, 3))
        print(n_pix_oob)
        to_remove = np.argwhere(n_pix_oob > (16*16)/5)
        print(f'A total of {len(to_remove)} steps of {s1.shape[0]} were removed.')
        s1 = np.delete(s1, to_remove, 0)
        image_dates = np.delete(image_dates, to_remove)
        return s1, image_dates

    except Exception as e:
        logging.fatal(e, exc_info=True)

# Download function

In [5]:
starting_days = np.cumsum([0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30])
print(starting_days)

[  0  31  59  90 120 151 181 212 243 273 304 334]


In [6]:
super_resolve = True
year = 2019

DATA_LOCATION = '../data/ghana-test.csv'
OUTPUT_FOLDER = '../data/test-smooth-200/'

def download_plots(data_location = DATA_LOCATION, output_folder = OUTPUT_FOLDER, image_format = MimeType.TIFF_d16):
    """ Downloads sentinel-1 data for the plot IDs associated
        with an input CSV from a collect earth online survey
        
        Parameters:
         data_location (os.path)
         output_folder (os.path)
        
        Subcalls:
         calc_bbox, bounding_box
         download_sentinel_1,
         calculate_and_save_best_images
         
        Creates:
         output_folder/{plot_id}.npy
    
        Returns:
         None
    """
    df = pd.read_csv(data_location, encoding = "ISO-8859-1")
    for column in ['IMAGERY_TITLE', 'STACKINGPROFILEDG', 'PL_PLOTID', 'IMAGERYYEARDG']:
        if column in df.columns:
            df = df.drop(column, axis = 1)
    df = df.dropna(axis = 0)
    plot_ids = sorted(df['PLOT_ID'].unique())
    existing = [int(x[:-4]) for x in os.listdir(output_folder) if ".DS" not in x]
    existing = existing + [136397663, 136792033, 136792071, 136397414, 136792213,
                          136792216, 136792229]
    to_download = [x for x in plot_ids if x not in existing]
    print("STARTING DOWNLOAD OF {} plots from {} to {}".format(len(to_download), data_location, output_folder))
    errors = []
    for i, val in enumerate(to_download):
        print("Downloading {}/{}, {}".format(i+1, len(to_download), val))
        location_wgs = calc_bbox(val, df = df)
        location, epsg = bounding_box(location_wgs, expansion = IMSIZE*10)
        try:
            # Identify cloud steps, download DEM, and download L2A series
            s1_layer = identify_s1_layer(location_wgs[0])
            data_source = DataSource.SENTINEL1_IW_DES if s1_layer == "SENT_DESC" else DataSource.SENTINEL1_IW_ASC
            print(s1_layer, epsg)
            s1, s1_dates = download_sentinel_1(location, layer = s1_layer, epsg = epsg, data = data_source)

            if s1.shape[0] == 0:
                s1_layer = "SENT_DESC" if s1_layer == "SENT" else "SENT"
                data_source = DataSource.SENTINEL1_IW_DES if s1_layer == "SENT_DESC" else DataSource.SENTINEL1_IW_ASC
                print(f'Switching to {s1_layer}')
                s1, s1_dates = download_sentinel_1(location, layer = s1_layer, epsg = epsg, data = data_source)
            
            s1, _ = calculate_and_save_best_images(s1, s1_dates)

            # Retain only iamgery every 15 days
            biweekly_dates = np.array([day for day in range(0, 360, 5)])
            to_remove = np.argwhere(biweekly_dates % 15 != 0)
            s1 = np.delete(s1, to_remove, 0)

            np.save(output_folder + str(val), s1)
            #return s1, s1_dates
            print('\n')

        except Exception as e:
            print(e)
            logging.fatal(e, exc_info=True)
            errors.append(i)
            #continue

In [7]:
for i in (os.listdir("../data/train-csv/new-data/")):
    #if "australia" in i:
    if ".csv" in i:
        #if any(x in i for x in ["africa-west", "cameroon", "koure", "niger"]):
        download_plots("../data/train-csv/new-data/" + i, "../data/train-s1/", image_format = MimeType.TIFF_d16)

STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/ceo-brazil-finetune-sample-data-2020-09-14.csv to ../data/train-s1/
STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/ceo-brazil-mid-coast-sample-data-2020-07-24.csv to ../data/train-s1/
STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/lac-south-train.csv to ../data/train-s1/
STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/ceo-brazil-paraiba-train-sample-data-2020-07-22.csv to ../data/train-s1/
STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/mexico-campeche-train.csv to ../data/train-s1/
STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/sa-train.csv to ../data/train-s1/
STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/ceo-lac-random-points-sample-data-2020-07-29.csv to ../data/train-s1/
STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/honduras-2-train.csv to ../data/train-s1/
STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/ceo-els



The original max value is 65535
Sentinel 1 used 0.34375 PU for           66 out of 66 images
1.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
A total of 0 steps of 66 were removed.
Maximum time distance: 0


Downloading 2/18, 137966722
Sao Paulo Joanopolis BR SA
SENT_DESC EPSG:32723
The data is DataSource.SENTINEL1_IW_DES
The original max value is 65535
Sentinel 1 used 0.34375 PU for           66 out of 66 images
0.7002975509269855
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
A total of 0 steps of 66 were removed.
Maximum time distance: 0


Downloading 3/18, 137966723
Sao Paulo Joanopolis BR SA
SENT_DESC EPSG:32723
The data is DataSource.SENTINEL1_IW_DES
The original max value 

Minas Gerais Extrema BR SA
SENT_DESC EPSG:32723
The data is DataSource.SENTINEL1_IW_DES
The original max value is 49509
Sentinel 1 used 0.34375 PU for           66 out of 66 images
0.5838559548332952
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
A total of 0 steps of 66 were removed.
Maximum time distance: 0


STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/ceo-brazil-south-small-sample-data-2020-07-23.csv to ../data/train-s1/
STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/ceo-brazil-finetune2-sample-data-2020-09-14.csv to ../data/train-s1/
STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/ceo-brazil-sao-paulo-sample-data-2020-07-29.csv to ../data/train-s1/
STARTING DOWNLOAD OF 0 plots from ../data/train-csv/new-data/ceo-brazil-south-sample-data-2020-07-23.csv to ../data/train-s1/
STARTING DOWNL

CRITICAL:root:need at least one array to stack
Traceback (most recent call last):
  File "<ipython-input-4-c15e178e86f4>", line 38, in download_sentinel_1
    s1 = np.stack(s1)
  File "/Users/john.brandt/anaconda3/envs/remote_sensing/lib/python3.6/site-packages/numpy/core/shape_base.py", line 412, in stack
    raise ValueError('need at least one array to stack')
ValueError: need at least one array to stack
CRITICAL:root:'NoneType' object is not iterable
Traceback (most recent call last):
  File "<ipython-input-6-c5d5e3575d6a>", line 47, in download_plots
    s1, s1_dates = download_sentinel_1(location, layer = s1_layer, epsg = epsg, data = data_source)
TypeError: 'NoneType' object is not iterable


'NoneType' object is not iterable
Downloading 2/2, 137588036
Piaui Oeiras BR SA
SENT EPSG:32723
The data is DataSource.SENTINEL1_IW_ASC


CRITICAL:root:need at least one array to stack
Traceback (most recent call last):
  File "<ipython-input-4-c15e178e86f4>", line 38, in download_sentinel_1
    s1 = np.stack(s1)
  File "/Users/john.brandt/anaconda3/envs/remote_sensing/lib/python3.6/site-packages/numpy/core/shape_base.py", line 412, in stack
    raise ValueError('need at least one array to stack')
ValueError: need at least one array to stack
CRITICAL:root:'NoneType' object is not iterable
Traceback (most recent call last):
  File "<ipython-input-6-c5d5e3575d6a>", line 47, in download_plots
    s1, s1_dates = download_sentinel_1(location, layer = s1_layer, epsg = epsg, data = data_source)
TypeError: 'NoneType' object is not iterable


'NoneType' object is not iterable
STARTING DOWNLOAD OF 1 plots from ../data/train-csv/new-data/ceo-brazil-gain-sample-data-2020-09-18.csv to ../data/train-s1/
Downloading 1/1, 137891972
Minas Gerais Extrema BR SA
SENT_DESC EPSG:32723
The data is DataSource.SENTINEL1_IW_DES


KeyboardInterrupt: 