### This script allows to get better control over tasks in earth engine

* Purpose of script: allows the user to list tasks and cancel all pending tasks
* Author: Rutger Hofste
* Kernel used: python27
* Date created: 20170913

In [1]:
SCRIPT_NAME = "Y2017M09D13_RH_EE_task_control_V01"
OUTPUT_VERSION = 1
OUTPUT_FILE_NAME = "detailed_tasks"


ec2_output_path = ("/volumes/data/{}/output_V{:02.0f}/").format(SCRIPT_NAME,OUTPUT_VERSION)
s3_output_path = "s3://wri-projects/Aqueduct30/processData/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)

print("Output s3: " + s3_output_path +
      "\nOutput ec2: " + ec2_output_path)


Output s3: s3://wri-projects/Aqueduct30/processData/Y2017M09D13_RH_EE_task_control_V01/output_V01/
Output ec2: /volumes/data/Y2017M09D13_RH_EE_task_control_V01/output_V01/


In [2]:
!rm -r {ec2_output_path}
!mkdir -p {ec2_output_path}

In [3]:
import time
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
print(dateString,timeString)

Y2018M04D25 UTC 06:46


# Settings

In [4]:
MAXTASKS = 20
CANCELTASKS = 0 # Cancels all pending tasks

In [5]:
import pandas as pd
import ee
from retrying import retry
import datetime
import random

In [6]:
ee.Initialize()

# Functions

In [7]:
def get_tasks():
    return ee.batch.Task.list()

def cancel_task(task):
    print(task)
    random_time = random.random()
    time.sleep(0.5+random_time*0.5)
    if task.config['state'] in (u'RUNNING',u'UNSUBMITTED',u'READY') :
        print('canceling %s' % task)
        task.cancel()
        
        
@retry(wait_exponential_multiplier=10000, wait_exponential_max=100000)
def checkStatus(task):
    return ee.batch.Task.status(task)
           
def get_details(taskList,MAXTASKS):
    df = pd.DataFrame()
    for i in range(0,min(len(taskList),MAXTASKS)):
        dictNew = checkStatus(taskList[i])
        dfNew = pd.DataFrame(dictNew, index=[i])
        try:
            dfNew["calctime(min)"] = (dfNew["update_timestamp_ms"]-dfNew["start_timestamp_ms"])/(1000*60)
            dfNew["queuetime(min)"] = (dfNew["start_timestamp_ms"]-dfNew["creation_timestamp_ms"])/(1000*60)
            dfNew["runtime(min)"]= dfNew["queuetime(min)"]+dfNew["calctime(min)"]
            dfNew["start_timestamp_UTC"] = datetime.datetime.fromtimestamp(dfNew["start_timestamp_ms"]/1000).strftime('%H:%M:%S')
        except:
            pass
        df = df.append(dfNew)
        print(i)
    return df
    


In [8]:
taskList = get_tasks()


In [9]:
type(taskList)

list

In [10]:
len(taskList)

17853

In [11]:
detailedTasks = get_details(taskList,MAXTASKS)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [12]:
detailedTasks.to_csv(ec2_output_path + OUTPUT_FILE_NAME + ".csv")
detailedTasks.to_pickle(ec2_output_path + OUTPUT_FILE_NAME + ".pkl")

In [13]:
!aws s3 cp {ec2_output_path} {s3_output_path} --recursive

upload: ../../../../data/Y2017M09D13_RH_EE_task_control_V01/output_V01/detailed_tasks.pkl to s3://wri-projects/Aqueduct30/processData/Y2017M09D13_RH_EE_task_control_V01/output_V01/detailed_tasks.pkl
upload: ../../../../data/Y2017M09D13_RH_EE_task_control_V01/output_V01/detailed_tasks.csv to s3://wri-projects/Aqueduct30/processData/Y2017M09D13_RH_EE_task_control_V01/output_V01/detailed_tasks.csv


# DANGER ZONE

In [14]:
detailedTasks

Unnamed: 0,creation_timestamp_ms,description,id,output_url,start_timestamp_ms,state,task_type,update_timestamp_ms,calctime(min),queuetime(min),runtime(min),start_timestamp_UTC
0,1524577909915,global_historical_riverdischarge_month_million...,OGNUAR4ZBDPKC2DYUAW44TWG,https://code.earthengine.google.com/?asset=pro...,1524600405136,COMPLETED,EXPORT_IMAGE,1524600595121,3.166417,374.92035,378.086767,20:06:45
1,1524577906512,global_historical_riverdischarge_month_million...,3YMP4UZWNBUVCCZAYK6KZAYY,https://code.earthengine.google.com/?asset=pro...,1524600404277,COMPLETED,EXPORT_IMAGE,1524600599427,3.2525,374.96275,378.21525,20:06:44
2,1524577902508,global_historical_riverdischarge_month_million...,ZR3LHGWIUZFVUEKOS7MWS3ZC,https://code.earthengine.google.com/?asset=pro...,1524600296815,COMPLETED,EXPORT_IMAGE,1524600476611,2.9966,373.23845,376.23505,20:04:56
3,1524577898590,global_historical_riverdischarge_month_million...,UDHOOEIIQFIBCYFGS3Z4P3L5,https://code.earthengine.google.com/?asset=pro...,1524600234429,COMPLETED,EXPORT_IMAGE,1524600408689,2.904333,372.263983,375.168317,20:03:54
4,1524577894652,global_historical_riverdischarge_month_million...,OBCZFCSRMI64HL3CBKQC6O46,https://code.earthengine.google.com/?asset=pro...,1524600224892,COMPLETED,EXPORT_IMAGE,1524600402184,2.954867,372.170667,375.125533,20:03:44
5,1524577890754,global_historical_riverdischarge_month_million...,QD4232MMZDDW6BLWXA5NNFAK,https://code.earthengine.google.com/?asset=pro...,1524600221133,COMPLETED,EXPORT_IMAGE,1524600400887,2.9959,372.172983,375.168883,20:03:41
6,1524577886817,global_historical_riverdischarge_month_million...,YCVMMHPKSC7EOV22K3JRIEU2,https://code.earthengine.google.com/?asset=pro...,1524600217084,COMPLETED,EXPORT_IMAGE,1524600400309,3.05375,372.171117,375.224867,20:03:37
7,1524577882881,global_historical_riverdischarge_month_million...,O76WAOS2WQZHXCNNWJXNKDMW,https://code.earthengine.google.com/?asset=pro...,1524600107607,COMPLETED,EXPORT_IMAGE,1524600292951,3.089067,370.4121,373.501167,20:01:47
8,1524577879054,global_historical_riverdischarge_month_million...,NNTJVY5EGIKFXX5Y7ZKA6ZA2,https://code.earthengine.google.com/?asset=pro...,1524600056432,COMPLETED,EXPORT_IMAGE,1524600228909,2.874617,369.622967,372.497583,20:00:56
9,1524577875171,global_historical_riverdischarge_month_million...,DH2M37QSH4V7RK3YJPVEJKQL,https://code.earthengine.google.com/?asset=pro...,1524600038496,COMPLETED,EXPORT_IMAGE,1524600220305,3.03015,369.38875,372.4189,20:00:38


In [15]:
if CANCELTASKS == 1:
    pendingTasks = [task for task in taskList if task.config['state'] in (u'RUNNING',u'UNSUBMITTED',u'READY')]
    for task in pendingTasks:
        cancel_task(task)