### This script allows to get better control over tasks in earth engine

* Purpose of script: allows the user to list tasks and cancel all pending tasks
* Author: Rutger Hofste
* Kernel used: python27
* Date created: 20170913

In [1]:
SCRIPT_NAME = "Y2017M09D13_RH_EE_task_control_V01"
OUTPUT_VERSION = 1
OUTPUT_FILE_NAME = "detailed_tasks"


ec2_output_path = ("/volumes/data/{}/output_V{:02.0f}/").format(SCRIPT_NAME,OUTPUT_VERSION)
s3_output_path = "s3://wri-projects/Aqueduct30/processData/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)

print("Output s3: " + s3_output_path +
      "\nOutput ec2: " + ec2_output_path)


Output s3: s3://wri-projects/Aqueduct30/processData/Y2017M09D13_RH_EE_task_control_V01/output_V01/
Output ec2: /volumes/data/Y2017M09D13_RH_EE_task_control_V01/output_V01/


In [2]:
!rm -r {ec2_output_path}
!mkdir -p {ec2_output_path}

In [3]:
import time
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
print(dateString,timeString)

Y2018M04D24 UTC 12:54


# Settings

In [4]:
MAXTASKS = 20
CANCELTASKS = 0 # Cancels all pending tasks

In [5]:
import pandas as pd
import ee
from retrying import retry
import datetime
import random

In [6]:
ee.Initialize()

# Functions

In [7]:
def get_tasks():
    return ee.batch.Task.list()

def cancel_task(task):
    print(task)
    random_time = random.random()
    time.sleep(0.5+random_time*0.5)
    if task.config['state'] in (u'RUNNING',u'UNSUBMITTED',u'READY') :
        print('canceling %s' % task)
        task.cancel()
        
        
@retry(wait_exponential_multiplier=10000, wait_exponential_max=100000)
def checkStatus(task):
    return ee.batch.Task.status(task)
           
def get_details(taskList,MAXTASKS):
    df = pd.DataFrame()
    for i in range(0,min(len(taskList),MAXTASKS)):
        dictNew = checkStatus(taskList[i])
        dfNew = pd.DataFrame(dictNew, index=[i])
        try:
            dfNew["calctime(min)"] = (dfNew["update_timestamp_ms"]-dfNew["start_timestamp_ms"])/(1000*60)
            dfNew["queuetime(min)"] = (dfNew["start_timestamp_ms"]-dfNew["creation_timestamp_ms"])/(1000*60)
            dfNew["runtime(min)"]= dfNew["queuetime(min)"]+dfNew["calctime(min)"]
            dfNew["start_timestamp_UTC"] = datetime.datetime.fromtimestamp(dfNew["start_timestamp_ms"]/1000).strftime('%H:%M:%S')
        except:
            pass
        df = df.append(dfNew)
        print(i)
    return df
    


In [8]:
taskList = get_tasks()


In [9]:
type(taskList)

list

In [10]:
len(taskList)

17138

In [11]:
detailedTasks = get_details(taskList,MAXTASKS)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [12]:
detailedTasks.to_csv(ec2_output_path + OUTPUT_FILE_NAME + ".csv")
detailedTasks.to_pickle(ec2_output_path + OUTPUT_FILE_NAME + ".pkl")

In [13]:
!aws s3 cp {ec2_output_path} {s3_output_path} --recursive

upload: ../../../../data/Y2017M09D13_RH_EE_task_control_V01/output_V01/detailed_tasks.csv to s3://wri-projects/Aqueduct30/processData/Y2017M09D13_RH_EE_task_control_V01/output_V01/detailed_tasks.csv
upload: ../../../../data/Y2017M09D13_RH_EE_task_control_V01/output_V01/detailed_tasks.pkl to s3://wri-projects/Aqueduct30/processData/Y2017M09D13_RH_EE_task_control_V01/output_V01/detailed_tasks.pkl


# DANGER ZONE

In [14]:
detailedTasks

Unnamed: 0,creation_timestamp_ms,description,id,output_url,start_timestamp_ms,state,task_type,update_timestamp_ms,calctime(min),queuetime(min),runtime(min),start_timestamp_UTC
0,1524438818738,global_historical_PLivWN_month_m_5min_1960_201...,27BT6NEK45ZWZ6XYFLCLPWDL,https://code.earthengine.google.com/?asset=pro...,1524527615744,COMPLETED,EXPORT_IMAGE,1524527794060,2.971933,1479.9501,1482.922033,23:53:35
1,1524438815003,global_historical_PLivWN_month_m_5min_1960_201...,6DUEXXEB5TTYOBDI7QLG3KYX,https://code.earthengine.google.com/?asset=pro...,1524527606007,COMPLETED,EXPORT_IMAGE,1524527786572,3.009417,1479.850067,1482.859483,23:53:26
2,1524438811261,global_historical_PLivWN_month_m_5min_1960_201...,Z7OI6P3LDZSAXQCBYOEN3XGP,https://code.earthengine.google.com/?asset=pro...,1524527568333,COMPLETED,EXPORT_IMAGE,1524527744301,2.9328,1479.284533,1482.217333,23:52:48
3,1524438807554,global_historical_PLivWN_month_m_5min_1960_201...,HP6EOKA3YSBWJGPYL7TWNXSC,https://code.earthengine.google.com/?asset=pro...,1524527522163,COMPLETED,EXPORT_IMAGE,1524527731189,3.483767,1478.576817,1482.060583,23:52:02
4,1524438659599,global_historical_PLivWN_month_m_5min_1960_201...,L2IBCWUB3D27IJDICMMU2KNE,https://code.earthengine.google.com/?asset=pro...,1524527520278,COMPLETED,EXPORT_IMAGE,1524527699035,2.979283,1481.011317,1483.9906,23:52:00
5,1524438655899,global_historical_PLivWN_month_m_5min_1960_201...,EDFL6IO7VS3AO42N4T6THD2K,https://code.earthengine.google.com/?asset=pro...,1524527508922,COMPLETED,EXPORT_IMAGE,1524527762489,4.226117,1480.883717,1485.109833,23:51:48
6,1524438652216,global_historical_PLivWN_month_m_5min_1960_201...,EFVG4MBINVNIBGWKGQ2WE2RU,https://code.earthengine.google.com/?asset=pro...,1524527427349,COMPLETED,EXPORT_IMAGE,1524527600935,2.8931,1479.58555,1482.47865,23:50:27
7,1524438627579,global_historical_PLivWN_month_m_5min_1960_201...,7AHSZV7M3MO2V7RY45GSIRQG,https://code.earthengine.google.com/?asset=pro...,1524527422383,COMPLETED,EXPORT_IMAGE,1524527598116,2.928883,1479.9134,1482.842283,23:50:22
8,1524438624388,global_historical_PLivWN_month_m_5min_1960_201...,F237ECOLH6BQCUUQQM3WLTZC,https://code.earthengine.google.com/?asset=pro...,1524527340213,COMPLETED,EXPORT_IMAGE,1524527513875,2.894367,1478.597083,1481.49145,23:49:00
9,1524438621225,global_historical_PLivWN_month_m_5min_1960_201...,TSEKDNPJ3YGOWXFKNHIU4WLW,https://code.earthengine.google.com/?asset=pro...,1524527286012,COMPLETED,EXPORT_IMAGE,1524527562580,4.609467,1477.74645,1482.355917,23:48:06


In [15]:
if CANCELTASKS == 1:
    pendingTasks = [task for task in taskList if task.config['state'] in (u'RUNNING',u'UNSUBMITTED',u'READY')]
    for task in pendingTasks:
        cancel_task(task)