### This script allows to get better control over tasks in earth engine

* Purpose of script: allows the user to list tasks and cancel all pending tasks
* Author: Rutger Hofste
* Kernel used: python27
* Date created: 20170913

In [1]:
SCRIPT_NAME = "Y2017M09D13_RH_EE_task_control_V01"
OUTPUT_VERSION = 1
OUTPUT_FILE_NAME = "detailed_tasks"


ec2_output_path = ("/volumes/data/{}/output_V{:02.0f}/").format(SCRIPT_NAME,OUTPUT_VERSION)
s3_output_path = "s3://wri-projects/Aqueduct30/processData/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)

print("Output s3: " + s3_output_path +
      "\nOutput ec2: " + ec2_output_path)


Output s3: s3://wri-projects/Aqueduct30/processData/Y2017M09D13_RH_EE_task_control_V01/output_V01/
Output ec2: /volumes/data/Y2017M09D13_RH_EE_task_control_V01/output_V01/


In [2]:
!rm -r {ec2_output_path}
!mkdir -p {ec2_output_path}

In [3]:
import time
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
print(dateString,timeString)

Y2018M05D08 UTC 14:34


# Settings

In [4]:
MAXTASKS = 10
CANCELTASKS = 0 # Cancels all pending tasks

In [5]:
import pandas as pd
import ee
from retrying import retry
import datetime
import random

In [6]:
ee.Initialize()

# Functions

In [7]:
def get_tasks():
    return ee.batch.Task.list()

def cancel_task(task):
    print(task)
    random_time = random.random()
    time.sleep(0.5+random_time*0.5)
    if task.config['state'] in (u'RUNNING',u'UNSUBMITTED',u'READY') :
        print('canceling %s' % task)
        task.cancel()
        
        
@retry(wait_exponential_multiplier=10000, wait_exponential_max=100000)
def checkStatus(task):
    return ee.batch.Task.status(task)
           
def get_details(taskList,MAXTASKS):
    df = pd.DataFrame()
    for i in range(0,min(len(taskList),MAXTASKS)):
        dictNew = checkStatus(taskList[i])
        dfNew = pd.DataFrame(dictNew, index=[i])
        try:
            dfNew["calctime(min)"] = (dfNew["update_timestamp_ms"]-dfNew["start_timestamp_ms"])/(1000*60)
            dfNew["queuetime(min)"] = (dfNew["start_timestamp_ms"]-dfNew["creation_timestamp_ms"])/(1000*60)
            dfNew["runtime(min)"]= dfNew["queuetime(min)"]+dfNew["calctime(min)"]
            dfNew["start_timestamp_UTC"] = datetime.datetime.fromtimestamp(dfNew["start_timestamp_ms"]/1000).strftime('%H:%M:%S')
        except:
            pass
        df = df.append(dfNew)
        print(i)
    return df
    


In [8]:
taskList = get_tasks()


In [9]:
type(taskList)

list

In [10]:
len(taskList)

803

In [11]:
detailedTasks = get_details(taskList,MAXTASKS)

0
1
2
3
4
5
6
7
8
9


In [12]:
detailedTasks.to_csv(ec2_output_path + OUTPUT_FILE_NAME + ".csv")
detailedTasks.to_pickle(ec2_output_path + OUTPUT_FILE_NAME + ".pkl")

In [13]:
!aws s3 cp {ec2_output_path} {s3_output_path} --recursive

upload: ../../../../data/Y2017M09D13_RH_EE_task_control_V01/output_V01/detailed_tasks.csv to s3://wri-projects/Aqueduct30/processData/Y2017M09D13_RH_EE_task_control_V01/output_V01/detailed_tasks.csv
upload: ../../../../data/Y2017M09D13_RH_EE_task_control_V01/output_V01/detailed_tasks.pkl to s3://wri-projects/Aqueduct30/processData/Y2017M09D13_RH_EE_task_control_V01/output_V01/detailed_tasks.pkl


# DANGER ZONE

In [14]:
detailedTasks

Unnamed: 0,calctime(min),creation_timestamp_ms,description,error_message,id,output_url,progress,queuetime(min),runtime(min),source_url,start_timestamp_UTC,start_timestamp_ms,state,task_type,update_timestamp_ms
0,0.003067,1525789946875,test3,,CMWBQOJLZP5DB2WZMC6QQJW2,,0.0,0.0667,0.069767,https://code.earthengine.google.com/6c9909ae9b...,14:32:30,1525789950877,RUNNING,EXPORT_FEATURES,1525789951061
1,2.502667,1525788909830,,,IX4UCIRZBMGT7VPH3FNREJ6F,,0.0,15.616133,18.1188,,14:30:46,1525789846798,RUNNING,EXPORT_FEATURES,1525789996958
2,10.035067,1525787507798,,,AYT6V6PIFG2ET45HZNRDSAYW,,,19.9157,29.950767,,14:11:42,1525788702740,CANCELLED,EXPORT_FEATURES,1525789304844
3,14.17795,1525781368639,validmaxfa_hybas_lev06_v1c_merged_fiona_30s_V04,,5QBXF3EKT4D2ZHTNMJJWJ4PU,https://code.earthengine.google.com/?asset=pro...,,0.039717,14.217667,,12:09:31,1525781371022,COMPLETED,EXPORT_IMAGE,1525782221699
4,1.837533,1525780382944,validmaxfa_hybas_lev06_v1c_merged_fiona_30s_V04,,S446CDAYNTIOULLY2Y3GBB6B,https://code.earthengine.google.com/?asset=pro...,,0.069683,1.907217,,11:53:07,1525780387125,COMPLETED,EXPORT_IMAGE,1525780497377
5,8.2333,1525445260026,test_test,,PA543Y2T3NNJKPN5FDOX5ZL5,https://code.earthengine.google.com/?asset=use...,,0.07965,8.31295,https://code.earthengine.google.com/196e55dbb8...,14:47:44,1525445264805,COMPLETED,EXPORT_FEATURES,1525445758803
6,5.48185,1525441407605,Asset ingestion: users/rutgerhofste/kilns,Cannot overwrite asset 'users/rutgerhofste/kil...,HQUCFTUZMOVMQ5C7WTOLH5UL,,,0.035767,5.517617,,13:43:29,1525441409751,FAILED,INGEST,1525441738662
7,6.592267,1525441260294,Asset ingestion: users/rutgerhofste/kilns,,6WXSTROGKPI6X3R65UYF42XG,https://code.earthengine.google.com/?asset=use...,,0.0351,6.627367,,13:41:02,1525441262400,COMPLETED,INGEST,1525441657936
8,12.671533,1525439061778,global_max_maskedaccumulateddrainagearea_km2_3...,,MYQXD2C4RWY3GEVRQRECYTAQ,https://code.earthengine.google.com/?asset=pro...,,0.0925,12.764033,,13:04:27,1525439067328,COMPLETED,EXPORT_IMAGE,1525439827620
9,10.950933,1525439060078,global_count_maskedaccumulateddrainagearea_km2...,,4PMAUCQQHGZADAOOBXWLGFFY,https://code.earthengine.google.com/?asset=pro...,,0.054317,11.00525,,13:04:23,1525439063337,COMPLETED,EXPORT_IMAGE,1525439720393


In [15]:
if CANCELTASKS == 1:
    pendingTasks = [task for task in taskList if task.config['state'] in (u'RUNNING',u'UNSUBMITTED',u'READY')]
    for task in pendingTasks:
        cancel_task(task)