# Create Annual Calibrated Composite Images for Each RTS Polygon
## TODO:
- use `ee.data.listOperations()` to monitor and retry failed exports?

## Set Up Environment

In [None]:
import ee
ee.Initialize()

In [None]:
# Import Libraries
import geemap
import os
from pprint import pprint
import math
import statistics
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely as shp
import xarray as xr
import rioxarray as rxr
from datetime import datetime, timezone, timedelta
import time
from collections import Counter
import re
from google.cloud import storage

In [None]:
# Set up access to abrupt_thaw
storage_client = storage.Client(project="AbruptThawMapping")
abrupt_thaw = storage_client.get_bucket('abrupt_thaw')

## Define Functions

In [None]:
# Set Properties to allow filtering
def setID(image):
    img_id = image.id();
    img_prop = image.setMulti({'ID': img_id});
    img_prop = ee.Image(img_prop);
    return img_prop;

In [None]:
# function to get UTM zone from WGS84 lat and lon
def utm_from_wgs84(lon, lat):
    #Special Cases for Norway and Svalbard
    if (lat > 55 and lat < 64 and lon > 2 and lon < 6):
        return 32
    elif (lat > 71 and lon >= 6 and lon < 9):
        return 31
    elif (lat > 71 and ((lon >= 9 and lon < 12) or (lon >= 18 and lon < 21))):
        return 33
    elif (lat > 71 and ((lon >= 21 and lon < 24) or (lon >= 30 and lon < 33))):
        return 35
    # Rest of the world
    elif (lon >= -180 and lon <= 180):
        return 32600 + (math.floor((lon + 180) / 6) % 60) + 1 # 32600 for northern hemisphere
    else:
        raise ValueError('Cannot figure out UTM zone from given Lat: {0}, Lon: {1}.'.format(lat, lon))

## Import Data and Prepare Visualization Parameters

In [None]:
# Import Planet Data GCS
planet = ee.ImageCollection('projects/abruptthawmapping/assets/yg_train_regions_imagery_calibrated')
planet = planet.map(setID)

In [None]:
planet.first().getInfo()

In [None]:
# Prep Map
Map = geemap.Map()
Map.centerObject(planet)

In [None]:
# View the imagery
vis_params_imagery = {
    'min': [470, 415, 280],'max': [1180, 930, 750],
    'bands': ['red', 'green', 'blue'],
    'gamma': 0.9
}

## Prepare Data

In [None]:
# Mask the data to values greater than 0 (which is how nodata gets imported into GEE by default)
def mask_0(image):
    mask = image.gt(0)
    return image.updateMask(mask)

planet = planet.map(mask_0)

In [None]:
years = [2017, 2018, 2019, 2020, 2021]

## Create Annual Composites

In [None]:
# create a composite image across all regions for each year individually and all years combined
planet_composite_2017 = planet.filter(ee.Filter.stringContains('ID', '2017')).median()
planet_composite_2018 = planet.filter(ee.Filter.stringContains('ID', '2018')).median()
planet_composite_2019 = planet.filter(ee.Filter.stringContains('ID', '2019')).median()
planet_composite_2020 = planet.filter(ee.Filter.stringContains('ID', '2020')).median()
planet_composite_2021 = planet.filter(ee.Filter.stringContains('ID', '2021')).median()

planet_composite_all = planet.median()

## Map Composites

In [None]:
# Add composites to the map as one layer
Map.addLayer(planet_composite_2017,
             vis_params_imagery,
             '2017 Composites')
Map.addLayer(planet_composite_2018,
             vis_params_imagery,
             '2018 Composites')
Map.addLayer(planet_composite_2019,
             vis_params_imagery,
             '2019 Composites')
Map.addLayer(planet_composite_2020,
             vis_params_imagery,
             '2020 Composites')
Map.addLayer(planet_composite_2021,
             vis_params_imagery,
             '2021 Composites')
Map.addLayer(planet_composite_all,
             vis_params_imagery,
             'All')

In [None]:
Map

## Export Annual Composites

In [None]:
# Import shapefile with AOI (multipolygon)
aoi = gpd.read_file("/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/yg_train_regions/bboxes/RTS_buffer_separate.shp")
aoi['pid'] = aoi.index
# convert to json
sites = json.loads(aoi.to_json()) # if multiple sites

In [None]:
zones = pd.DataFrame(columns = ['pid', 'utm_zone'])
for idx, pid in enumerate(aoi.geometry):
    pid_zones = []
    for x, y in zip(pid.exterior.coords.xy[0], pid.exterior.coords.xy[1]):
        pid_zones.append(utm_from_wgs84(x, y))
        
    pid_zones = round(statistics.median(pid_zones))
    temp_df = pd.DataFrame({'pid': [idx],
                            'utm_zone': [pid_zones]})
    zones = pd.concat([zones, temp_df])
zones = zones.set_index('pid')  
zones

In [None]:
# # Export Composites to GCS (2017)
# for pid in aoi.pid:
#     name = 'yg_train_regions_' + str(pid) + '_2017_composite'
#     geometry = sites['features'][pid]['geometry']['coordinates']
#     scale = 3
#     crs = 'EPSG:' + str(zones.iloc[pid].utm_zone)
#     task = ee.batch.Export.image.toCloudStorage(
#         image = planet_composite_2017,
#         description = name,
#         bucket = 'abrupt_thaw',
#         fileNamePrefix = 'planet_processing/data/yg_train_regions/calibrated_composites/' + name,
#         crs = crs,
#         region = geometry,
#         scale = scale,
#         maxPixels = 1e13,
#         fileFormat = 'GeoTIFF',
#         formatOptions = {'cloudOptimized': True}
#     )
#     task.start()

In [None]:
# # Export Composites to Drive (2018)
# for pid in aoi.pid:
#     name = 'yg_train_regions_' + str(pid) + '_2018_composite'
#     geometry = sites['features'][pid]['geometry']['coordinates']
#     scale = 3
#     crs = 'EPSG:' + str(zones.iloc[pid].utm_zone)
#     task = ee.batch.Export.image.toCloudStorage(
#         image = planet_composite_2018,
#         description = name,
#         bucket = 'abrupt_thaw',
#         fileNamePrefix = 'planet_processing/data/yg_train_regions/calibrated_composites/' + name,
#         crs = crs,
#         region = geometry,
#         scale = scale,
#         maxPixels = 1e13,
#         fileFormat = 'GeoTIFF',
#         formatOptions = {'cloudOptimized': True}
#     )
#     task.start()

In [None]:
# # Export Composites to Drive (2019)
# for pid in aoi.pid:
#     name = 'yg_train_regions_' + str(pid) + '_2019_composite'
#     geometry = sites['features'][pid]['geometry']['coordinates']
#     scale = 3
#     crs = 'EPSG:' + str(zones.iloc[pid].utm_zone)
#     task = ee.batch.Export.image.toCloudStorage(
#         image = planet_composite_2019,
#         description = name,
#         bucket = 'abrupt_thaw',
#         fileNamePrefix = 'planet_processing/data/yg_train_regions/calibrated_composites/' + name,
#         crs = crs,
#         region = geometry,
#         scale = scale,
#         maxPixels = 1e13,
#         fileFormat = 'GeoTIFF',
#         formatOptions = {'cloudOptimized': True}
#     )
#     task.start()

In [None]:
# # Export Composites to Drive (2020)
# for pid in aoi.pid:
#     name = 'yg_train_regions_' + str(pid) + '_2020_composite'
#     geometry = sites['features'][pid]['geometry']['coordinates']
#     scale = 3
#     crs = 'EPSG:' + str(zones.iloc[pid].utm_zone)
#     task = ee.batch.Export.image.toCloudStorage(
#         image = planet_composite_2020,
#         description = name,
#         bucket = 'abrupt_thaw',
#         fileNamePrefix = 'planet_processing/data/yg_train_regions/calibrated_composites/' + name,
#         crs = crs,
#         region = geometry,
#         scale = scale,
#         maxPixels = 1e13,
#         fileFormat = 'GeoTIFF',
#         formatOptions = {'cloudOptimized': True}
#     )
#     task.start()

In [None]:
# # Export Composites to Drive (2021)
# for pid in aoi.pid:
#     name = 'yg_train_regions_' + str(pid) + '_2021_composite'
#     geometry = sites['features'][pid]['geometry']['coordinates']
#     scale = 3
#     crs = 'EPSG:' + str(zones.iloc[pid].utm_zone)
#     task = ee.batch.Export.image.toCloudStorage(
#         image = planet_composite_2021,
#         description = name,
#         bucket = 'abrupt_thaw',
#         fileNamePrefix = 'planet_processing/data/yg_train_regions/calibrated_composites/' + name,
#         crs = crs,
#         region = geometry,
#         scale = scale,
#         maxPixels = 1e13,
#         fileFormat = 'GeoTIFF',
#         formatOptions = {'cloudOptimized': True}
#     )
#     task.start()

In [None]:
# # Export Composites to Drive (all)
# for pid in aoi.pid:
#     name = 'yg_train_regions_' + str(pid) + '_all_composite'
#     geometry = sites['features'][pid]['geometry']['coordinates']
#     scale = 3
#     crs = 'EPSG:' + str(zones.iloc[pid].utm_zone)
#     task = ee.batch.Export.image.toCloudStorage(
#         image = planet_composite_all,
#         description = name,
#         bucket = 'abrupt_thaw',
#         fileNamePrefix = 'planet_processing/data/yg_train_regions/calibrated_composites/' + name,
#         crs = crs,
#         region = geometry,
#         scale = scale,
#         maxPixels = 1e13,
#         fileFormat = 'GeoTIFF',
#         formatOptions = {'cloudOptimized': True}
#     )
#     task.start()

In [None]:
# # Monitor tasks and restart as needed
# # currently, this keeps failed tasks in current_info and then 
# # continues to restart that task indefinitely...
# # need to figure out how to remove failed tasks from current_info
# # after restarting
# start_time_all = datetime.now(tz = timezone(timedelta(hours = 0))).isoformat()
# for pid in [47]:
#     name = 'yg_train_regions_' + str(pid) + '_all_composite'
#     geometry = sites['features'][pid]['geometry']['coordinates']
#     scale = 3
#     crs = 'EPSG:' + str(zones.iloc[pid].utm_zone)
#     task = ee.batch.Export.image.toCloudStorage(
#         image = planet_composite_all,
#         description = name,
#         bucket = 'abrupt_thaw',
#         fileNamePrefix = 'planet_processing/data/yg_train_regions/calibrated_composites/' + name,
#         crs = crs,
#         region = geometry,
#         scale = scale,
#         maxPixels = 1e13,
#         fileFormat = 'GeoTIFF',
#         formatOptions = {'cloudOptimized': True}
#     )
#     task.start()
# tasks = ee.data.listOperations()
# current_tasks = [task['metadata']['description'] for task in tasks 
#           if task['metadata']['createTime'] >= start_time_all]
# current_info = [{'name': task['metadata']['description'], 
#                  'state': task['metadata']['state']}
#                 for task in tasks 
#                 if task['metadata']['description'] in current_tasks
#                 and task['metadata']['createTime'] >= start_time_all]
    
# repeated_tasks = []
# while np.any([task['state'] in ['PENDING', 'RUNNING']
#               for task in current_info]):
#     # Check if any tasks have failed
#     any_failed = np.any([task['state'] == 'FAILED' for task in current_info])
#     if any_failed:
#         # Restart tasks that failed
#         names = [task['name'] for task in current_info 
#                   if task['state'] == 'FAILED']
#         pids = [int(name.split('_')[3]) for name in names]
#         print('That sucks,', names, 'failed. Restarting this(these) task(s).')
#         repeated_tasks = [repeated_tasks.append(name) for name in names]
        
#         for pid, name in zip(pids, names):
#             geometry = sites['features'][pid]['geometry']['coordinates']
#             scale = 3
#             crs = 'EPSG:' + str(zones.iloc[pid].utm_zone)
#             task = ee.batch.Export.image.toCloudStorage(
#                 image = planet_composite_all,
#                 description = name,
#                 bucket = 'abrupt_thaw',
#                 fileNamePrefix = 'planet_processing/data/yg_train_regions/calibrated_composites/' + name,
#                 crs = crs,
#                 region = geometry,
#                 scale = scale,
#                 maxPixels = 1e13,
#                 fileFormat = 'GeoTIFF',
#                 formatOptions = {'cloudOptimized': True}
#             )
#             task.start()
    
#     # Check if any tasks have succeeded
#     any_succeeded = np.any([task['state'] == 'SUCCESS' for task in current_info])
#     if any_succeeded:
#         # Remove finished tasks from list of tasks
#         succeeded = [task['name'] for task in current_info if task['state'] == 'SUCCESS']
#         print(succeeded, 'succeeded.')
#         current_tasks = [task for task in current_tasks if task not in succeeded]
#         print('Still need to export', current_tasks)
    
#     # Wait ten seconds, then check task status again
#     time.sleep(10)
#     tasks = ee.data.listOperations()
#     # Make sure old failed tasks are not included
#     current_info = [{'name': task['metadata']['description'], 
#                      'state': task['metadata']['state']}
#                     for task in tasks
#                     if task['metadata']['description'] in current_tasks
#                     and task['metadata']['createTime'] >= start_time_all
#                     and (
#                         task['metadata']['description'] not in repeated_tasks 
#                         or (task['metadata']['description'] in repeated_tasks 
#                             and task['metadata']['state'] != 'FAILED')
#                     )]
    
    