# Planet Image Search and MODIS Cloud Cover Filter
### This script searches Planet PSScene imager and filters the search results by cloud cover using MODIS and Planet derived cloud cover metrics.
#### TODO:
- use clear_percent instead of cloud_percent to ensure that all haze and shadows are accounted for
- figure out how to ensure coverage in all cells
- Don't just remove all images <1.5 km^2, check for neighbors first. If it has a neighbor, the images can be merged after download and prior to processing.

## API and Package Set-Up

In [None]:
# Start Earth Engine API
import ee
ee.Initialize()

In [None]:
# Import Libraries
import geemap
import os
import json
import requests
from requests.auth import HTTPBasicAuth
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely as shp
from pprint import pprint
import ast
import seaborn as sns
import re

In [None]:
# Get Planet API Key
%load_ext dotenv
%dotenv

api_key = os.getenv('PL_API_KEY')

## Data Import

In [None]:
# Import MODIS Data
# AOI
yg = ee.Geometry({
    'type': 'Polygon',
    'coordinates': [[
        [65, 65],
        [65, 74],
        [85, 74],
        [85, 65],
        [65, 65]
        ]]
})

# MODIS data
modis = ee.ImageCollection('MODIS/061/MOD09GA');

# MODIS snow
modis_snow = ee.ImageCollection('MODIS/006/MOD10A1')

In [None]:
# Import shapefile with AOI (multipolygon)
aoi = gpd.read_file("/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/rts_buffer/RTS_buffer.shp")
# convert from multipolygon to multiple polygons
aoi = aoi.explode(column = 'geometry', ignore_index = True)
# remove inner holes
aoi.geometry = aoi.geometry.exterior
pprint(aoi.geometry)
# convert back to polygon
aoi.geometry = [shp.geometry.Polygon([shp.geometry.Point(x, y) for x, y in list(feature.coords)]) for feature in aoi.geometry]
pprint(aoi.geometry)
# convert to json for planet data search
sites = json.loads(aoi.to_json()) # if multiple sites

In [None]:
for index, row in aoi.iterrows():
    data = aoi[index:index+1]
    name = '/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/rts_buffer/RTS_buffer_separate_' + str(index) + '.shp'
    data.to_file(name)
    

In [None]:
# Years to test
years = [2017, 2018, 2019, 2020, 2021]

## Define Functions

In [None]:
# define function to extract specific bits from bitmask
def bitwiseExtract(input, fromBit, toBit):
    maskSize = ee.Number(1).add(toBit).subtract(fromBit)
    mask = ee.Number(1).leftShift(maskSize).subtract(1)
    return input.rightShift(fromBit).bitwiseAnd(mask)

In [None]:
# define function to get snow data from MODIS
def modisSnow(modis_snow_imagery, date, aoi):
    # extract NDSI
    snow_cover = (ee.Image(modis_snow_imagery
                           .filter(ee.Filter.date(date))
                           .select(['NDSI_Snow_Cover'])
                           .first())
                  .clip(aoi));
    
    # get average snow cover
    snow_cover = snow_cover.reduceRegion(ee.Reducer.max(), aoi);
    
    return snow_cover.getInfo()

In [None]:
# define function to extract metadata needed for cloud calculation
def getMetadata(feature, aoi, polygon_id):
    
    # get image id
    img_id = feature['id']
    
    # get image date
    img_date = feature['properties']['acquired'].split('T')[0]
    
    # get instrument type
    instrument_type = feature['properties']['instrument']
    
    # get planet cloud/cloud shadow cover
    if instrument_type == 'PS2':
        img_cloud_cover = float(feature['properties']['cloud_cover']*100)
    else:
        img_cloud_cover = float(feature['properties']['cloud_percent'] + feature['properties']['shadow_percent'])
    
    # use intersection of aoi and search result geometry to get actual geometry of cells with data
    img_geometry = (
        aoi[polygon_id:polygon_id+1]['geometry']
        .intersection(
            shp.geometry.Polygon(
                tuple([(feature[0], feature[1]) for feature in feature['geometry']['coordinates'][0]])
            )
        )
    )
    
    # get image area
    img_area = float(img_geometry.to_crs(crs = 32642).area/1e6)
    
    # get image coverage of AOI polygon
    img_coverage = round(float(img_geometry.to_crs(crs = 32642).area/aoi[polygon_id:polygon_id+1]['geometry'].to_crs(crs = 32642).area*100))
    
    return [img_id, img_date, instrument_type, img_coverage, img_area, img_cloud_cover, img_geometry]

In [None]:
def geometryToEE(img_geometry, polygon_id):
    
    # format geometry
    img_geometry = gpd.GeoDataFrame(geometry = img_geometry)
    img_geometry = [[[x, y] for x, y in list(img_geometry.geometry[polygon_id].exterior.coords)]]
    
    # convert geometry to ee.Geometry
    img_geometry_ee = ee.Geometry({
        'type': 'Polygon',
        'coordinates': img_geometry
    })
    
    return img_geometry_ee

In [None]:
# define function to calculate MODIS cloud cover
def modisCloudCover(modis_imagery, date, aoi):
    # extract QC bitmask band from MODIS
    qc = (ee.Image(modis_imagery
              .filter(ee.Filter.date(date))
              .first())
      .select(['state_1km'])
      .clip(aoi));
    
    # extract cloud information from MODIS QC bitmask
    cloud_mask = bitwiseExtract(qc, 0, 1).remap([0, 1, 2, 3], [0, 1, 1, 1])
    area_mask = cloud_mask.remap([0, 1], [1, 1])
    
    # calculate area of cells with clouds
    cloud_area_img = cloud_mask.multiply(ee.Image.pixelArea())
    area_img = area_mask.multiply(ee.Image.pixelArea())
    
    # calculate cloud cover percent
    cloud_area = (
        cloud_area_img
        .reduceRegion(
            reducer = ee.Reducer.sum(), # calculate total cloud area
            geometry = aoi,
            scale = 1000,
            maxPixels = 1e10
        )
        .getNumber('remapped')
        .divide(
            area_img
            .reduceRegion(
                reducer = ee.Reducer.sum(), # calculate total area
                geometry = aoi,
                scale = 1000,
                maxPixels = 1e10
            )
            .getNumber('remapped')
        ) # divide cloud area by total area
        .multiply(ee.Number(100)) # convert to %
        .round() # remove decimal precision
    );
    
    return cloud_area.getInfo()

## Determine Snow Free Dates

In [None]:
# modis_snow_df = pd.DataFrame(columns = ['polygon_id', 'year', 'date', 'snow_cover'])
# for index, row in aoi.iterrows():
#     print(index)
#     geometry = [[[x, y] for x, y in list(row.geometry.exterior.coords)]]
    
#     # convert geometry to ee.Geometry
#     geometry_ee = ee.Geometry({
#         'type': 'Polygon',
#         'coordinates': geometry
#     })
#     for year in years:
#         print(year)
#         dates = pd.date_range(str(year) + '-06-01', str(year) + '-08-31', freq = 'D')
        
#         for date in dates:
            
#             snow = modisSnow(modis_snow, date, geometry_ee)
            
#             # add to output
#             modis_snow_df = pd.concat([modis_snow_df,
#                                        pd.DataFrame({'polygon_id': index,
#                                                      'year': year,
#                                                      'date': date,
#                                                      'snow_cover': snow})])

# modis_snow_df = modis_snow_df.fillna(value = np.NAN)
# modis_snow_df = modis_snow_df.reset_index(drop = True)
# modis_snow_df

In [None]:
# modis_snow_df.to_csv('/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/modis_snow_data.csv',
#                     index = False)
modis_snow_df = pd.read_csv('/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/modis_snow_data.csv')
modis_snow_df['date'] = pd.to_datetime(modis_snow_df['date'])
modis_snow_df = modis_snow_df.dropna(how = 'any').reset_index()
modis_snow_df

In [None]:
# Define first snow-free date as first of three consecutive NDSI == 0 (removing NaN values)
modis_snow_df['snow_free'] = np.where(modis_snow_df.snow_cover == 0, 1, 0)
modis_snow_df['rolling_snow_free'] = (
    modis_snow_df
    .groupby(['polygon_id', 'year'])
    .snow_free
    .rolling(3).sum().shift(-2)
    .reset_index(drop = True)
)
snow_free_date = (
    modis_snow_df[modis_snow_df.rolling_snow_free == 3]
    .groupby(['polygon_id', 'year'])
    .first()
    .rename(columns = {'date': 'snow_free_date'})
    .snow_free_date
)
snow_free_date

In [None]:
modis_snow_df[(modis_snow_df.polygon_id == 27) & 
              (modis_snow_df.date.between('2019-06-10', '2019-07-20'))]

In [None]:
# # import warnings
# with warnings.catch_warnings(): # there is a warning getting triggered inside of sns, I think
#     warnings.simplefilter("ignore")
#     g = sns.FacetGrid(data = modis_snow_df,
#                           col = 'year',
#                           row = 'polygon_id',
#                           sharex = False)
#     g.map(sns.lineplot, 'date', 'snow_cover')
    
#     for ax, pos in zip(g.axes.flat, snow_free_date):
#         ax.axvline(x=pos, color='black', linestyle=':')


## Search Planet Imagery

In [None]:
polygon_id = 0
year = 2017
start_date = snow_free_date.loc[(polygon_id, year)]
start_date < pd.to_datetime('2017-07-01')

In [None]:
# all_metadata = []
# metadata_df = pd.DataFrame(columns = ['polygon_id', 'year', 'metadata'])

# # Data type
# item_type = "PSScene"

# # asset filter
# asset_filter = {
#     'type': 'OrFilter',
#     'config': [
#        {
#            "type": "AndFilter",
#             "config": [
#                 {
#                     "type": "AssetFilter",
#                     "config": [
#                         "ortho_analytic_4b_sr"
#                     ]
#                 },
#                 {
#                     "type": "AssetFilter",
#                     "config": [
#                         "ortho_udm2"
#                     ]
#                 }
#             ]
#         },
#         {
#             "type": "AndFilter",
#             "config": [
#                 {
#                     "type": "AssetFilter",
#                     "config": [
#                         "ortho_analytic_8b_sr"
#                     ]
#                 },
#                 {
#                     "type": "AssetFilter",
#                     "config": [
#                         "ortho_udm2"
#                     ]
#                 }
#             ]
#         } 
#     ]
    
# }

# for polygon_id, site in enumerate(sites['features']):
    
#     print(site['id'])
#     site_name = site['id']

#     session = requests.Session()
#     session.auth = (api_key, '')
#     site_coords = site['geometry']['coordinates']

#     site_dict = {
#         "type": "Polygon",
#         "coordinates": site_coords}

#     # get images that overlap with our aoi
#     geometry_filter = {
#         "type": "GeometryFilter",
#         "field_name": "geometry",
#         "config": site_dict
#     }

#     for year in years:
        
#         if snow_free_date.loc[(polygon_id, year)] < pd.to_datetime('{}-07-01'.format(year)):
#             start_date = str(snow_free_date.loc[(polygon_id, year)])[0:10]
#         else:
#             start_date = '{}-07-01'.format(year)

#         # i only want images between these two dates of each year...easier to search within a year to avoid massive search queries
#         start_date = "{}T00:00:00.000Z".format(start_date)
#         end_date = "{}-08-31T00:00:00.000Z".format(year)

#         # get images acquired within a date range
#         date_range_filter = {
#             "type": "DateRangeFilter",
#             "field_name": "acquired",
#             "config": {
#                 "gte": start_date,
#                 "lte": end_date
#             }
#         }

#         cloud_cover_filter = {
#             "type": "RangeFilter",
#             "field_name": "cloud_cover",
#             "config": {
#                 "lte": 1 # cloud cover threshold - none currently
#             }
#         }

#         combined_filter = {
#             "type": "AndFilter",
#             "config": [
#                 asset_filter,
# #                 instrument_filter,
#                 geometry_filter,
#                 date_range_filter,
#                 cloud_cover_filter
#             ]
#         }

#         # API request object
#         search_request = {
#             "item_types": [item_type],
#             "filter": combined_filter
#         }

#         # fire off the POST request
#         search_result = \
#           requests.post(
#             'https://api.planet.com/data/v1/quick-search',
#             auth=HTTPBasicAuth(api_key, ''),
#             json=search_request)

#         all_metadata.append(search_result.json())
        
#         # format metadata for dataframe
#         temp_df = pd.DataFrame({'polygon_id': site_name,
#                                 'year': year,
#                                 'metadata': [search_result.json()]})
        
#         metadata_df = pd.concat([metadata_df, temp_df], axis = 0)
# pprint('# of searches: ' + str(len(all_metadata)) + ' (should be ' + str(len(sites['features'])*len(years)) + ')')
# metadata_df = metadata_df.reset_index(drop = True)
# metadata_df

In [None]:
# # save output
# metadata_df.to_csv('/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/planet_image_search.csv')
metadata_df = pd.read_csv('/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/planet_image_search.csv',
                          index_col=0,
                          converters = {'metadata': ast.literal_eval})
metadata_df

In [None]:
[feature for feature in metadata_df.metadata[1]['features'] if feature['id'] == '20180722_064609_1032']

## Calculate Cloud Cover

In [None]:
# cloud_data = pd.DataFrame(
#     columns = [
#         'polygon_id', 
#         'year', 
#         'date', 
#         'id',
#         'instrument',
#         'coverage', 
#         'area', 
#         'modis_cloud_cover', 
#         'planet_cloud_cover', 
#         'cloud_cover', 
#         'modis_planet_diff',
#         'geometry']
# )
# for index,row in metadata_df.iterrows():
    
#     print(index);
    
#     # get metadata
#     polygon_id = int(row['polygon_id']);
#     img_year = int(row['year']);
#     metadata = row['metadata']['features'];
    
#     if len(metadata) > 0: 
#         for feature in metadata:
            
#             # extract metadata needed for cloud cover calculations
#             img_id, img_date, instrument_type, img_coverage, img_area, img_cloud_cover, img_geometry = getMetadata(feature, aoi, polygon_id);
# #             print(img_id)
            
#             # calculate modis cloud cover if the geometry is a polygon
#             if str(type(img_geometry[polygon_id])) == "<class 'shapely.geometry.polygon.Polygon'>":
#                 # convert geometry to ee.Geometry
#                 img_geometry_ee = geometryToEE(img_geometry, polygon_id);

#                 # calc modis cloud cover
#                 modis_cloud_cover = modisCloudCover(modis, img_date, img_geometry_ee);
#                 if modis_cloud_cover > 100:
#                     modis_cloud_cover = 100;
                
#             # calculate modis cloud cover if the geometry is a multipolygon
#             elif str(type(img_geometry[polygon_id])) == "<class 'shapely.geometry.multipolygon.MultiPolygon'>":
                
#                 polygon_geometries = gpd.GeoDataFrame(
#                     geometry = img_geometry
#                 ).explode(column = 'geometry', ignore_index = True)
#                 modis_cloud_cover = []
#                 polygon_area = []
#                 for index, row in polygon_geometries.iterrows():
                    
#                     # calculate area of sub polygon
#                     temp_area = gpd.GeoDataFrame(
#                         geometry = row, 
#                         crs = polygon_geometries.crs
#                     ).reset_index().to_crs(crs = 32642).geometry.area/1e6
#                     polygon_area.append(temp_area)
                    
#                     # convert geometry to ee.Geometry
#                     polygon_geometry = [[[x, y] for x, y in list(row.geometry.exterior.coords)]]
    
#                     # convert geometry to ee.Geometry
#                     polygon_geometry_ee = ee.Geometry({
#                         'type': 'Polygon',
#                         'coordinates': polygon_geometry
#                     })

#                     # calc modis cloud cover of sub polygon
#                     temp_cloud_cover = modisCloudCover(modis, img_date, polygon_geometry_ee);
#                     modis_cloud_cover.append(temp_cloud_cover)
                    
#                 # calculate average cloud cover of multipolygon (img_geometry)
#                 modis_cloud_cover = int(round(sum(
#                     [cloud * area for cloud, area in zip(modis_cloud_cover, polygon_area)]
#                 )/img_area))
#                 if modis_cloud_cover > 100:
#                     modis_cloud_cover = 100;
                    
#             # get higher cloud cover estimate
#             cloud_cover = np.maximum(modis_cloud_cover, img_cloud_cover)
            
#             # calculate difference between cloud cover estimates
#             modis_planet_diff = abs(
#                 modis_cloud_cover - img_cloud_cover
#             )
            
#             # organize data into a dataframe
#             temp_df = pd.DataFrame({
#                 'polygon_id': polygon_id,
#                 'year': img_year,
#                 'date': img_date,
#                 'id': img_id,
#                 'instrument': instrument_type,
#                 'coverage': img_coverage,
#                 'area': img_area,
#                 'modis_cloud_cover': modis_cloud_cover,
#                 'planet_cloud_cover': img_cloud_cover,
#                 'cloud_cover': cloud_cover,
#                 'modis_planet_diff': modis_planet_diff,
#                 'geometry': img_geometry
#             });

#             # append new data to old
#             cloud_data = pd.concat([cloud_data, temp_df], axis = 0);

# cloud_data = gpd.GeoDataFrame(cloud_data);
# cloud_data


In [None]:
# # save output
# cloud_data.to_csv('/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/planet_images_modis_cloud.csv')
cloud_data = pd.read_csv('/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/planet_images_modis_cloud.csv',
                  index_col = 0)
cloud_data = gpd.GeoDataFrame(cloud_data,
                              geometry = gpd.GeoSeries.from_wkt(cloud_data['geometry']),
                              crs = 'EPSG:4326')
cloud_data['date'] = pd.to_datetime(cloud_data['date'])
cloud_data

In [None]:
sns.displot(data = cloud_data, 
            x = 'area',
#             hue = 'year',
#             multiple = 'stack',
#             alpha = 0.5,
           )

## Filter Images on Cloud Cover and Date

In [None]:
# get count of images by polygon and year
image_counts = cloud_data[['polygon_id', 'year']].value_counts()
image_counts = pd.DataFrame(image_counts, columns = ['count']).sort_index()
# image_counts.to_csv('/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/image_counts_pre_filter.csv')
image_counts

In [None]:
# filter by area >= 1.5 km^2 to ensure at least a few tie points for AROSICS
# Don't filter on cloud cover yet?
potential_images = cloud_data[cloud_data['area'] >= 1.5]
potential_images

In [None]:
# use this to preview images that have extremely different cloud estimates from the two methods
# it seems like most of the time, if one of the two estimates is high, there are a lot of clouds
# however, a few images are mostly clear that MODIS says are cloudy
# If there aren't enough images, this could be a place to manually change the cloud_cover
potential_images[potential_images['modis_planet_diff'] > 80]

In [None]:
# create list of dates in order of preference which will be used to sort the dataframe so that we can slice it to get image dates
dates = pd.DataFrame({'date': pd.date_range(start = min(snow_free_date),
                                                    end = '2017-08-31',
                                                    freq = 'D').strftime('%Y-%m-%d')})
np.where([bool(re.search('07-31', date)) for date in dates.date])[0][0]


In [None]:
# create list of dates in order of preference which will be used to sort the dataframe so that we can slice it to get image dates
date_order = pd.DataFrame()
for polygon_id in aoi.index:
    for year in potential_images['year'].unique():
        if snow_free_date[polygon_id, year] <= pd.Timestamp(str(year) + '-07-01'):
            start_date = snow_free_date[polygon_id, year]
        else:
            start_date = pd.Timestamp(str(year) + '-07-01')
        dates = pd.DataFrame({'date': pd.date_range(start = start_date,
                                                    end = str(year) + '-08-31',
                                                    freq = 'D').strftime('%Y-%m-%d')})
        split_loc = np.where([bool(re.search('08-01', date)) for date in dates.date])[0][0]
        dates_1 = np.flip(np.arange(0, split_loc))
        dates_2 = np.arange(split_loc, len(dates))
        idx = list(np.insert(dates_1, np.arange(0, len(dates_2)), dates_2))
        dates = dates.iloc[idx].reset_index(drop = True).rename(columns = {0: 'date'})
        dates['polygon_id'] = polygon_id
        dates['year'] = year
        dates['idx'] = dates.index.astype('Int32')
        dates['date'] = pd.to_datetime(dates['date'])
        dates = dates.set_index(['polygon_id', 'year', 'date'])
        date_order = pd.concat([date_order, dates])
date_order

In [None]:
# filter images based on clouds and dates
images_ordered = pd.DataFrame(columns = potential_images.columns)
for polygon_id in aoi.index:
    print('###################################################')
    print(polygon_id)
    print('###################################################')
    
    polygon_geometry = (
        aoi[polygon_id:polygon_id+1].reset_index()
        .rename(columns = {'index': 'polygon_id'})
        .loc[:, ['polygon_id','geometry']]
    )
    
    for year in potential_images['year'].unique():
        print(year)
        
        # get data
        temp_data = potential_images[(potential_images['polygon_id'] == polygon_id) &
                                     (potential_images['year'] == year)]
        
        # first get all images with no clouds
        temp_output = (
            temp_data[(temp_data['cloud_cover'] == 0)]
            # arrange in correct date order
            .join(date_order, on = ['polygon_id', 'year', 'date'])
            .sort_values(by = ['idx'])
            .reset_index(drop = True)
        )
        
        ### It would be nice to get an image count across the entire image and make sure each location
        ### has at least 10, but I haven't been able to figure out how
#         # check number of images
#         polygon_geometry['n_images'] = 0
#         n_images = temp_output.loc[:, ['polygon_id', 'geometry']]
#         n_images['n_images'] = 1
#         n_images = pd.concat([n_images, polygon_geometry])
#         img_union = n_images.overlay(n_images, how = 'union')
        
        if sum(temp_output['coverage']) < 1000:
            cloud_lwr = 0
            cloud_upr = 10
            while sum(temp_output['coverage']) < 1000 and cloud_upr < 50 and len(temp_data) > len(temp_output):
                temp_output = (
                    pd.concat([temp_output,
                               (temp_data[(temp_data['cloud_cover'] > cloud_lwr) &
                                          (temp_data['cloud_cover'] <= cloud_upr)]
                                # arrange in correct date order
                                .join(date_order, on = ['polygon_id', 'year', 'date'])
                                .sort_values(by = ['idx'])
                                .reset_index(drop = True))])
                    .reset_index(drop = True)
                )
                
                cloud_lwr = cloud_lwr + 10
                cloud_upr = cloud_upr + 10
                
        print('coverage: ' + str(sum(temp_output['coverage'])))
        print('coverage - 1: ' + str(sum(temp_output.drop([temp_output.tail(1).index[0]])['coverage'])))
        # check coverage - look for 1000% coverage over all images
        while sum(temp_output.drop([temp_output.tail(1).index[0]])['coverage']) > 1000:
            print('Removing index ' + str(temp_output.tail(1).index[0]))
            temp_output = temp_output.drop([temp_output.tail(1).index[0]])
            print('coverage - 1: ' + str(sum(temp_output.drop([temp_output.tail(1).index[0]])['coverage'])))


        # add images to output
        images_ordered = pd.concat([images_ordered, 
                                    temp_output])
        print('\n')

images_ordered.reset_index()
# images_ordered.to_csv('/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/planet_images_filtered.csv',
#                       index = False)
images_ordered


In [None]:
# a bit of clean-up
images_ordered['coverage'] = pd.to_numeric(images_ordered['coverage'])

In [None]:
# Number of images that have no cloud cover by polygon and year
image_counts_cloud_free = (
    pd.DataFrame(images_ordered[images_ordered['cloud_cover'] == 0][['polygon_id', 'year']]
    .groupby(['polygon_id', 'year'])
    .value_counts())
    .rename(columns = {0: 'cloud_free_image_count'})
)
image_counts_cloud_free

In [None]:
# 1 RTS Polygon has only 3 completely cloud free image in 2018
pprint(image_counts_cloud_free[image_counts_cloud_free['cloud_free_image_count'] == min(image_counts_cloud_free['cloud_free_image_count'])])
pd.DataFrame(image_counts_cloud_free[['cloud_free_image_count']]
             .groupby('year')
             .value_counts()
             .sort_index()).rename(columns = {0: 'polygon_count'})

In [None]:
sns.displot(data = image_counts_cloud_free, 
            x = 'cloud_free_image_count',
#             hue = 'year',
#             multiple = 'stack',
#             alpha = 0.5,
            row = 'year'
           )

In [None]:
# get count of images by polygon and year
image_counts_f1 = images_ordered[['polygon_id', 'year']].value_counts()
image_counts_f1 = pd.DataFrame(image_counts_f1, columns = ['count']).sort_index()
image_counts_f1 = (
    image_counts_f1.join(
        images_ordered[['polygon_id', 'year', 'coverage', 'area', 'cloud_cover']]
        .groupby(['polygon_id', 'year'])
        .aggregate({'coverage': 'sum',
                    'area': 'sum',
                    'cloud_cover': 'max'})
        .rename(columns = {'coverage': 'cumulative_coverage',
                           'area': 'cumulative_area',
                           'cloud_cover': 'max_cloud_cover'})
    ).rename(columns = {'count': 'img_count'})
)
# image_counts_f1.to_csv('/home/hrodenhizer/Documents/permafrost_pathways/rts_mapping/planet_processing_test/data/automated_download/image_counts_filtered.csv')
image_counts_f1

In [None]:
# 11 images is the least for any polygon in any year
image_counts_f1[image_counts_f1['img_count'] == min(image_counts_f1['img_count'])]

In [None]:
# 50 images is the most for any polygon in any year
image_counts_f1[image_counts_f1['img_count'] == max(image_counts_f1['img_count'])]

In [None]:
# 1001% coverage is the least for any polygon in any year
image_counts_f1[image_counts_f1['cumulative_coverage'] == min(image_counts_f1['cumulative_coverage'])]

In [None]:
# 1099% coverage is the least for any polygon in any year
image_counts_f1[image_counts_f1['cumulative_coverage'] == max(image_counts_f1['cumulative_coverage'])]

In [None]:
# 18% is the max cloud cover in an image
image_counts_f1[image_counts_f1['max_cloud_cover'] == max(image_counts_f1['max_cloud_cover'])]

In [None]:
# Calculate the total area if all of these images are downloaded
total_area = sum(images_ordered['area'])
print(total_area)
print(total_area/50000000)

In [None]:
sns.displot(data = image_counts_f1, 
            x = 'img_count',
#             hue = 'year',
#             multiple = 'stack',
#             alpha = 0.5,
            row = 'year'
           )

In [None]:
# Number of RTS polygons by the max cloud cover in the selected images
image_counts_f1.groupby('year').max_cloud_cover.value_counts().sort_index()

In [None]:
sns.displot(data = image_counts_f1, 
            x = 'max_cloud_cover',
#             hue = 'year',
#             multiple = 'stack',
#             alpha = 0.5,
            row = 'year'
           )

In [None]:
# Get info for all orders
orders = requests.get('https://api.planet.com/compute/ops/orders/v2/',
                      auth=HTTPBasicAuth(api_key, '')).json()
all_orders = []
all_orders.append(orders['orders'])
while len(orders['_links']) >= 2:
    orders = requests.get(orders['_links']['next'],
                          auth=HTTPBasicAuth(api_key, '')).json()
    all_orders.append(orders['orders'])
all_orders = [order for page in all_orders for order in page]
all_orders

In [None]:
len(all_orders)

In [None]:
ordered_items = pd.DataFrame({'polygon_id': [int(item) 
                                                for items 
                                                in [[int(order['name'].split('_')[-3])] * len(order['products'][0]['item_ids'])
                                                    for order in all_orders] 
                                                for item in items],
                                 'year': [int(item) 
                                          for items 
                                          in [[int(order['name'].split('_')[-2])] * len(order['products'][0]['item_ids'])
                                              for order in all_orders] 
                                          for item in items],
                                 'id': [item 
                                        for items 
                                        in [order['products'][0]['item_ids'] 
                                            for order in all_orders] 
                                        for item in items],
                                 'order': [int(item) 
                                           for items 
                                           in [[order['name']] * len(order['products'][0]['item_ids'])
                                                for order in all_orders] 
                                            for item in items]}).drop_duplicates()
ordered_items

In [None]:
order_names = [order['name'] for order in all_orders]
order_names

In [None]:
orders_alignment = (images_ordered
                              .join(ordered_items
                                    .set_index(['polygon_id', 'year', 'id']), 
                                    on = ['polygon_id', 'year', 'id'])
                              .reset_index())

In [None]:
idx = orders_alignment.dropna().index
orders_alignment.loc[~orders_alignment.index.isin(idx)]

In [None]:
# Planet charges a minimum of 100 km^2 per image, even if you you only downloaded a fraction of that!
orders_alignment['quota_usage'] = [area if area >= 100 else 100 for area in orders_alignment.area ]
total_area = sum(orders_alignment[orders_alignment.index.isin(idx)].quota_usage)
total_area

In [None]:
### for some reason planet thinks I have downloaded 447,487 km^2
dc = 25*11.5 # average area of entire PS2 image
dr = 25*23.0 # average area of entire PS2.SD image
sd = 32.5*19.6 # average area of entire PSB.SD image
print(round(total_area), # area calculated previously - has been confirmed that downloaded image size was equal to this calculation
      len(ordered_items)*dc, # area if quota usage was calculated on entire image area (not clipped area) for all images coming from PS2
      len(ordered_items)*dr, # area if quota usage was calculated on entire image area (not clipped area) for all images coming from PS2.SD
      len(ordered_items)*sd) # area if quota usage was calculated on entire image area (not clipped area) for all images coming from PSB.SD
# none of those values are close to what planet says I have downloaded