# Y2017M11D24_RH_Prepare_Image_Collections_EE_V01

* Purpose of script: put all earth engine imagecollections in the same format (millionm^3  and dimensionless)
* Kernel used: python27
* Date created: 20171124  

The imageCollections that need a unit conversion are : Discharge and Runoff. 



In [1]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

('Y2017M12D04', 'UTC 13:41')


'2.7.13 |Continuum Analytics, Inc.| (default, Dec 20 2016, 23:09:15) \n[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]'

In [2]:
EE_PATH = "projects/WRI-Aquaduct/PCRGlobWB20V07"

AREA_IMAGE_FILE_NAME = "area_5min_m2V11"


SCRIPT_NAME = "Y2017M11D24_RH_Prepare_Image_Collections_EE_V01"

OUTPUT_VERSION = 2

# Unfortunately specifying the dimensions caused the script to crash (internal error on Google's side) Specify scale instead.

DIMENSION5MIN = {}
DIMENSION5MIN["x"] = 4320
DIMENSION5MIN["y"] = 2160
CRS = "EPSG:4326"

MAXPIXELS =1e10

icIds = ["projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_riverdischarge_month_m3second_5min_1960_2014",
        "projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_riverdischarge_year_m3second_5min_1960_2014",
        "projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_runoff_month_mmonth_5min_1958_2014",
        "projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_runoff_year_myear_5min_1958_2014"]


Remove later: Leap year settings incorrect. Rerunning yearly runoff and discharge scripts. 

In [3]:
icIds = ["projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_riverdischarge_year_m3second_5min_1960_2014"]

In [4]:
import ee
import re
import subprocess
import pandas as pd
from calendar import monthrange, isleap

In [5]:
ee.Initialize()

ICs not in right format: discharge (m^3 / s) and runoff (m/month or m/year)

In [6]:
sPerD = 86400 #seconds per day

In [7]:
areaImage = ee.Image("%s/%s"%(EE_PATH,AREA_IMAGE_FILE_NAME))

In [8]:
dimensions = "%sx%s" %(DIMENSION5MIN["x"],DIMENSION5MIN["y"])

In [9]:
crsTransform = [
                0.0833333309780367,
                0,
                -179.99999491255934,
                0,
                -0.0833333309780367,
                90.00000254430942
              ]

In [10]:
def newImageId(imageId):
    return re.sub('m3second|mmonth|myear',"millionm3",imageId)

def findUnit(imageId):
    if re.search("m3second",imageId):
        unit = "m3second"
    elif re.search("mmonth",imageId):
        unit = "mmonth"
    elif re.search("myear",imageId):
        unit = "myear"
    else:
        unit = "error"
    return unit


def toVolumeAndExport(row):
    unit = row["unit"]
    temporalResolution = row["image"].get("temporal_resolution").getInfo()
    year = int(row["image"].get("year").getInfo())
    month = int(row["image"].get("month").getInfo())


    
    if unit == "m3second":        
        if temporalResolution == "month":
            sPerMonth = monthrange(year,month)[1]*(86400)
            newImage = row["image"].multiply(sPerMonth).divide(1e6)
            
             
        elif temporalResolution == "year":
            daysPerYear = 366 if isleap(year) else 365
            sPerYear = daysPerYear*(86400)
            newImage = row["image"].multiply(sPerYear).divide(1e6)
            
        else:
            print("error",row)  
        
    elif unit == "mmonth" or unit == "myear":
        newImage = row["image"].multiply(areaImage).divide(1e6)
    else:
        pass
    
    newImage = newImage.copyProperties(row["image"])
    newImage = newImage.set("units","millionm3")
    newImage = newImage.set("script_used",SCRIPT_NAME)
    newImage = newImage.set("version",OUTPUT_VERSION)
    
    description = "%sV%0.2d" %(row["image"].get("exportdescription").getInfo(),OUTPUT_VERSION)
    print(description)
    
    task = ee.batch.Export.image.toAsset(
        image =  ee.Image(newImage),
        description = description,
        assetId = row["newImageId"],
        dimensions = dimensions,
        crs = CRS,
        crsTransform = crsTransform,
        maxPixels = MAXPIXELS     
    )
    task.start()       
    dfOut.at[index,"newImage"] = newImage
    

In [11]:
dfIcs = pd.DataFrame()
dfIcs["icId"] = icIds

### Creating new ImageCollections

In [12]:
dfIcs["newIcId"] = dfIcs["icId"].apply(newImageId)

In [13]:
for index, row in dfIcs.iterrows():
    command = "earthengine create collection %s" %row["newIcId"]
    result = subprocess.check_output(command,shell=True)
    print(command,result)

('earthengine create collection projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_riverdischarge_year_millionm3_5min_1960_2014', '')


In [14]:
dfImages2 = pd.DataFrame()

for index, row in dfIcs.iterrows():
    command = "earthengine ls %s" %row["icId"]
    assetList = subprocess.check_output(command,shell=True).splitlines()
    dfImages = pd.DataFrame(assetList)
    dfImages.columns = ["imageId"]
    dfImages["icId"] = row["icId"]
    dfImages2= dfImages2.append(dfImages)

In [15]:
dfImages2["newImageId"] = dfImages2["imageId"].apply(newImageId)
dfImages2["newIcId"] = dfImages2["icId"].apply(newImageId)
dfImages2["unit"] = dfImages2["imageId"].apply(findUnit)
dfImages2["image"] = dfImages2["imageId"].apply(lambda x: ee.Image(x))
dfImages2 = dfImages2.set_index("imageId",drop=False)

In [16]:
dfOut = dfImages2.copy()

In [17]:
i = 0
errorlog = []
startLoop = datetime.datetime.now()
for index, row in dfImages2.iterrows():
    i += 1 
    try:
        toVolumeAndExport(row)
    except:
        errorlog.append(index)
    elapsed = datetime.datetime.now() - startLoop
    print(i,dfImages2.shape[0])
    print(elapsed)
    
    

riverdischarge_yearY1960M01V02
(1, 55)
0:00:04.709889
riverdischarge_yearY1961M01V02
(2, 55)
0:00:09.428285
riverdischarge_yearY1962M01V02
(3, 55)
0:00:14.000334
riverdischarge_yearY1963M01V02
(4, 55)
0:00:18.665316
riverdischarge_yearY1964M01V02
(5, 55)
0:00:23.343728
riverdischarge_yearY1965M01V02
(6, 55)
0:00:27.992584
riverdischarge_yearY1966M01V02
(7, 55)
0:00:32.661838
riverdischarge_yearY1967M01V02
(8, 55)
0:00:37.536257
riverdischarge_yearY1968M01V02
(9, 55)
0:00:42.136799
riverdischarge_yearY1969M01V02
(10, 55)
0:00:46.754185
riverdischarge_yearY1970M01V02
(11, 55)
0:00:51.405756
riverdischarge_yearY1971M01V02
(12, 55)
0:00:56.046111
riverdischarge_yearY1972M01V02
(13, 55)
0:01:00.698804
riverdischarge_yearY1973M01V02
(14, 55)
0:01:05.167702
riverdischarge_yearY1974M01V02
(15, 55)
0:01:09.372780
riverdischarge_yearY1975M01V02
(16, 55)
0:01:13.573196
riverdischarge_yearY1976M01V02
(17, 55)
0:01:17.877229
riverdischarge_yearY1977M01V02
(18, 55)
0:01:22.117611
riverdischarge_year

In [18]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:04:04.804005
