# Planet Image Download

## API and Package Set-Up

In [None]:
# Import Libraries
import sys
import os
import json
import requests
from requests.auth import HTTPBasicAuth
from google.cloud import storage
import time
from datetime import datetime
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely as shp
from pprint import pprint
import ast
import random
import re

In [None]:
# Get Planet API Key
%load_ext dotenv
%dotenv

api_key = os.getenv('PL_API_KEY')
gcs_key = os.getenv('GCS_PL_ORDERS_KEY')

In [None]:
# Set up access to abrupt_thaw
storage_client = storage.Client(project="AbruptThawMapping")
abrupt_thaw = storage_client.get_bucket('abrupt_thaw')

## Data Import

In [None]:
# Import shapefile with AOI (multipolygon)
aoi = gpd.read_file("/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/yg_val_regions/bboxes/yg_validation_bboxes.shp")
aoi['region'] = [0, 1, 2, 3]
# convert from multipolygon to multiple polygons
aoi = aoi.explode(column = 'geometry', ignore_index = True)
# remove inner holes
aoi.geometry = aoi.geometry.exterior
pprint(aoi.geometry)
# convert back to polygon
aoi.geometry = [shp.geometry.Polygon([shp.geometry.Point(x, y) for x, y in list(feature.coords)]) for feature in aoi.geometry]
pprint(aoi.geometry)
# convert to json for planet data search
sites = json.loads(aoi.to_json()) # if multiple sites

In [None]:
# Import information on images to download
images = pd.read_csv('/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/yg_val_regions/planet_images_filtered_manual_cloud_removal.csv')
images

## Data Download

In [None]:
# These values should start at 0 the first time through
# if the code stops running due to a loss of connectivity, 
# change them to reflect the first region/chunk combination
# you want to download
start_region = 0
start_chunk = 0
    
order_info_path = '/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/yg_val_regions/planet_image_orders.csv'
try:
    prior_orders = pd.read_csv(order_info_path,
                               converters = {'order_info': ast.literal_eval})
    prior_orders = [row['order_info'] for idx, row in prior_orders.iterrows()]
    prior_items = [item['products'][0]['item_ids'] for item in prior_orders]
except FileNotFoundError:
    prior_items = []
pprint(prior_items)
    
for region, site in enumerate(sites['features'][start_region:len(sites['features'])]):
    
    pprint(site)
    
    # set chunk to start on
    if region + start_region > start_region:
        start_chunk = 0
    
    # Set the delivery path
    dir_path = (
        "planet_processing/data/yg_val_regions/planet_data/region_"
        + str(region + start_region) 
        + '/'
    )

    # Get ids for images that we want to request
    all_item_ids = list(images[(images.region == region + start_region)].id)

    # Get image ids for images that have already been downloaded
    gcs_files = storage_client.list_blobs("abrupt_thaw", prefix = dir_path)
    current_files = []
    for file in gcs_files:
        if re.match('.*SR.*tif$', file.name):
            current_files.append(file.name.split('/')[-1].split('_3B_Analytic')[0])
    
    # Filter images to download to avoid repeat downloads
    items_to_download = [i for i in all_item_ids if i not in current_files]
    
    pprint(items_to_download)

    print("-------------------------------------")
    print("\n")
    print(len(items_to_download), "items to download for region " + str(region + start_region))
    print("\n")
    print("-------------------------------------")

    # downloading items in defined chunks -- chunks with fewer images (<10 works better than downloading each chunk with many images

    if len(items_to_download) > 0: # if images to download
        
        if len(items_to_download) < 5:
            img_chunks = [items_to_download]
        else:
            n_items = 5
            n_chunks = int(len(items_to_download) / n_items)
            img_chunks = np.array_split(items_to_download, n_chunks)
            img_chunks = [list(feature) for feature in img_chunks]
        
        for chunk_idx, item_ids in enumerate(img_chunks):
            print('Chunk', chunk_idx + start_chunk)
            print('Item IDs:', item_ids, '\n')

            if item_ids not in prior_items: # check if an order has already been placed
                now = datetime.now().strftime("%Y%m%d_%H%M%S")
                chunk_id = 'YG_validation_region' + str(region + start_region) + '_chunk' + str(chunk_idx + start_chunk) + '_' + now

                # create the order info
                order_info = {
                    "name": chunk_id,
                    "source_type": "scenes",
                    "products": [{
                        "item_ids": item_ids,
                        "item_type": "PSScene",
                        "product_bundle": "analytic_sr_udm2,analytic_8b_sr_udm2"
                    }],
                    "tools": [{
                        "clip": {
                            "aoi": {
                                    "type": "Polygon",
                                    "coordinates": site['geometry']['coordinates']
                            }
                        }
                    },
                    {
                        "harmonize": {
                            "target_sensor": "Sentinel-2"
                        }
                    }],
                    "delivery": {
                        "google_cloud_storage": {
                            "bucket":"abrupt_thaw",
                            "credentials": gcs_key,
                            "path_prefix": dir_path
                        }
                    }
                }
                
                # send request to Planet
                request = requests.post('https://api.planet.com/compute/ops/orders/v2', 
                                        auth=(api_key, ''),
                                        json=order_info)
                print(request)
                pprint(request.json())
                
                # wait while the order is queued and runs
                order_status = request.json()
                while order_status['state'] == 'queued':
                    time.sleep(1)
                    order_status = requests.get(request.json()['_links']['_self'], 
                                                auth=(api_key, '')).json()
                    
                while order_status['state'] == 'running':
                    time.sleep(1)
                    order_status = requests.get(request.json()['_links']['_self'], 
                                                auth=(api_key, '')).json()
                
                # If the order succeeded, create and append order info into file
                if order_status['state'] == 'success':
                    order_df = pd.DataFrame({
                        'region': region + start_region,
                        'chunk_idx': chunk_idx + start_chunk,
                        'order_name': chunk_id,
                        'order_info': [request.json()]
                    })

                    order_df.to_csv(order_info_path,
                                    index = False,
                                    mode = 'a',
                                    header = not os.path.exists(order_info_path))
                
                # If the order failed, stop the code from running any farther
                elif order_status['state'] == 'failed':
                    print('last_message:', order_status['last_message'], '\nerror_hints:', order_status['error_hints'])
                    break
    
            else:
                print('Order already placed.')
    
        if order_status['state'] == 'failed':
            break
    
    

In [None]:
# # In case I need to cancel orders quickly
# requests.post('https://api.planet.com/compute/ops/bulk/orders/v2/cancel',
#               auth=(api_key, ''))