# Planet Image Download

## API and Package Set-Up

In [None]:
# Import Libraries
import sys
import os
import json
import requests
from requests.auth import HTTPBasicAuth
import time
from datetime import datetime
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely as shp
from pprint import pprint
import ast
import seaborn as sns
import glob
import random
import urllib.request
import zipfile

In [None]:
# Get Planet API Key
%load_ext dotenv
%dotenv

api_key = os.getenv('PL_API_KEY')

## Data Import

In [None]:
# Import shapefile with AOI (multipolygon)
aoi = gpd.read_file("/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/rts_buffer/RTS_buffer.shp")
# convert from multipolygon to multiple polygons
aoi = aoi.explode(column = 'geometry', ignore_index = True)
# remove inner holes
aoi.geometry = aoi.geometry.exterior
# convert back to polygon
aoi.geometry = [shp.geometry.Polygon([shp.geometry.Point(x, y) for x, y in list(feature.coords)]) for feature in aoi.geometry]
# convert to json for planet data search
sites = json.loads(aoi.to_json()) # if multiple sites

In [None]:
# Import information on images to download
images = pd.read_csv('/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/planet_images_filtered.csv')
images

## Define Functions

In [None]:
def makemydir(dir_path):
    try:
        os.makedirs(dir_path)
    except OSError:
        pass
    os.chdir(dir_path) # this changes the working directory - necessary?

In [None]:
def recu_down(url, filename): # recurrent download with ContentTooShortError
    try:
        urllib.request.urlretrieve(url,filename)
    except urllib.error.ContentTooShortError:
        print('Download failed. Trying again...')
        recu_down(url, filename)

In [None]:
def download_requested(request_info, dir_path, order_download_path):
    order_info = request_info[0]
    req = request_info[1]

    try:
        order_name = order_info['name']
        print(order_name)
        # Request order_info of scene (This will take some time to complete)

        # Poll API to monitor order_info status. Once finished, download and upzip the scene
        order_info_succeeded = False
        while not order_info_succeeded:
            order_info_url = req.json()['_links']['_self']
            # Poll API
            check_state_request = requests.get(order_info_url, 
                                               auth=(api_key, ''))
            # check_state_request.json()['last_message']
            if "429" not in str(check_state_request):
                # If order process succeeded, we are done
                if check_state_request.json()['state'] == 'success':

                    download_url0 = check_state_request.json()['_links']['results'][0]  # ['location']
                    download_url1 = check_state_request.json()['_links']['results'][1]  # ['location']

                    download_urls = [download_url0, download_url1]

                    for url in download_urls:
                        if url['name'].endswith('.json'):
                            manifest_url = url['location']

                        if url['name'].endswith('.zip'):
                            zip_url = url['location']
                    print("scene is ready")
                    
                    # where to save the zip file
                    outfile = os.path.join(dir_path, order_name + ".zip")
                    
                    if not os.path.exists(outfile[0:-4]):
                        print("downloading scene")
                        # download the file
                        start_time = time.time()
                        recu_down(zip_url, outfile)# the actual download

                        elapsed_time = time.time() - start_time
                        print("downloading time =", np.round(elapsed_time / 60, 2), "minutes")

                        # extract the file
                        print("extracting scene")
                        with zipfile.ZipFile(outfile, 'r') as zip_ref: # unzipping the download
                            zip_ref.extractall(outfile[0:-4])

                        # remove the downloaded zip file
                        os.remove(outfile)
                        
                    else:
                        print('order already downloaded')
                    
                    # go get the metadata json
                    outfile_manifest = os.path.join(dir_path, order_name + ".json")
                    
                    if not os.path.exists(outfile_manifest):
                        recu_down(manifest_url, outfile_manifest)
                        order_info_succeeded = True
                        print("scene download and extraction complete")
                        download_df = pd.DataFrame({
                            'order_info': [order_info],
                            'request': [req],
                            'downloaded': order_info_succeeded
                        })
                        download_df.to_csv(order_download_path,
                                           index = False,
                                           mode = 'a',
                                           header = not os.path.exists(order_download_path))

                    
                else:
                    time.sleep(1)
            else:
                time.sleep(1)

        print("Images and manifest download completed for {}".format(order_name))

    except:
        print(str(order_name) + ' did not download.')
        sys.exit()


## Data Download

In [None]:
order_info_path = '/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/planet_images_orders.csv'
order_download_path = '/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/planet_images_downloads.csv'
prior_orders = pd.read_csv(order_info_path,
                           converters = {'order_info': ast.literal_eval})
prior_orders = [row['order_info'] for idx, row in prior_orders.iterrows()]
prior_items = [item['products'][0]['item_ids'] for item in prior_orders]

for polygon_id, site in enumerate(sites['features']):
    
    for year in list(np.unique(images.year)[1:5]):

        # Make sure there is a directory into which data should be downloaded
        dir_path = (
            '/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/planet_data/yamal_gydan_polygons/polygon_id_'
            + str(polygon_id) 
            + '/' 
            + str(year)
        )
        makemydir(dir_path)
        
        # Get ids for images that we want to request
        all_item_ids = list(images[(images.polygon_id == polygon_id) & (images.year == year)]['id'])

        # Get image ids for images that have already been downloaded
        current_files = glob.glob(os.path.join(dir_path, "**/*SR_harmonized_clip.tif"), recursive=True)
        current_files = [i.split('/')[-1].split('_3B_Analytic')[0] for i in current_files]

        # Filter images to download to avoid repeat downloads
        items_to_download = [i for i in all_item_ids if i not in current_files]

        print("-------------------------------------")
        print("\n")
        print(len(items_to_download), "items to download for polygon id " + str(polygon_id) + ', year ' + str(year))
        print("\n")
        print("-------------------------------------")

        # downloading items in defined chunks -- chunks with fewer images (<10 works better than downloading each chunk with many images

        if len(items_to_download) > 0: # if images to download

            n_items = 5
            n_chunks = int(len(items_to_download) / n_items)
            img_chunks = np.array_split(items_to_download, n_chunks)
            img_chunks = [list(feature) for feature in img_chunks]
            requested_list = []

            for chunk_idx, item_ids in enumerate(img_chunks):
                print(item_ids)
                
                if item_ids not in prior_items: # check if an order has already been placed
                    now = datetime.now().strftime("%Y%m%d_%H%M%S")
                    chunk_id = now + '_' + str(polygon_id) + '_' + str(year) + '_' + str(chunk_idx)

                    # create the order info
                    order_info = {
                        "name": chunk_id,
                        "source_type": "scenes",
                        "products": [{
                            "item_ids": item_ids,
                            "item_type": "PSScene",
                            "product_bundle": "analytic_sr_udm2,analytic_8b_sr_udm2"
                        }],
                        "tools": [{
                            "clip": {
                                "aoi": {
                                        "type": "Polygon",  # change from Polygon to Multipolygon
                                        "coordinates": site['geometry']['coordinates']
                                }
                            }
                        }],
                        "delivery": {
                            "archive_type": "zip",
                            "single_archive": True,
                            "archive_filename": "{{name}}_{{order_id}}.zip"
                        }
                    }
                
                    request_succeeded = False
                    print("Requesting items...")
                    while not request_succeeded:
                        request = requests.post('https://api.planet.com/compute/ops/orders/v2', 
                                                auth=(api_key, ''),
                                                json=order_info)
                        print(request)
                        print(request.json())

                        if "202" in str(request):
                            requested_list.append([order_info, request])
                            request_succeeded = True

                            # create and append order info into file
                            order_df = pd.DataFrame({
                                'polygon_id': polygon_id,
                                'year': year,
                                'chunk_idx': chunk_idx,
                                'order_name': chunk_id,
                                'order_info': [request.json()]
                            })

                            order_df.to_csv(order_info_path,
                                            index = False,
                                            mode = 'a',
                                            header = not os.path.exists(order_info_path)) 

                        else:
                            time.sleep(1) # wait until the request is ready to download
                else:
                    print('Order already placed.')
                    order = prior_orders[[idx for idx, order in enumerate(prior_items) if order == item_ids][0]]
                    order_info = {
                        "name": order['name'],
                        "source_type": "scenes",
                        "products": [{
                            "item_ids": item_ids,
                            "item_type": "PSScene",
                            "product_bundle": "analytic_sr_udm2,analytic_8b_sr_udm2"
                        }],
                        "tools": [{
                            "clip": {
                                "aoi": {
                                        "type": "Polygon",  # change from Polygon to Multipolygon
                                        "coordinates": site['geometry']['coordinates']
                                }
                            }
                        }],
                        "delivery": {
                            "archive_type": "zip",
                            "single_archive": True,
                            "archive_filename": "{{name}}_{{order_id}}.zip"
                        }
                    }
                    requested_list.append([order_info, 
                                           requests.get(order['_links']['_self'],
                                                        auth=(api_key, ''))])

                    order_df = pd.DataFrame({
                        'polygon_id': polygon_id,
                        'year': year,
                        'chunk_idx': chunk_idx,
                        'order_name': order['name'],
                        'order_info': [order]
                    })
                    
                    if  not os.path.exists(order_info_path):
                        print('order info file does not exist')
                        order_df.to_csv(order_info_path,
                                        index = False,
                                        mode = 'a',
                                        header = not os.path.exists(order_info_path))
                    else:
                        print('order info file already exists')
                        downloads = pd.read_csv(order_info_path)
                        
                        if order['name'] not in list(downloads.order_name):
                            print('adding order info to order info file')
                            order_df.to_csv(order_info_path,
                                            index = False,
                                            mode = 'a',
                                            header = not os.path.exists(order_info_path))

            if len(requested_list) > 0:
                print('items to download')

                for request_info in requested_list:
                    download_requested(request_info, dir_path, order_download_path)

