# Y2017M11D24_RH_Prepare_Image_Collections_EE_V01

* Purpose of script: put all earth engine imagecollections in the same format (millionm^3  and dimensionless)
* Kernel used: python27
* Date created: 20171124  

The imageCollections that need a unit conversion are : Discharge and Runoff. 



In [1]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

('Y2017M11D28', 'UTC 12:05')


'2.7.13 |Continuum Analytics, Inc.| (default, Dec 20 2016, 23:09:15) \n[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]'

In [2]:
EE_PATH = "projects/WRI-Aquaduct/PCRGlobWB20V07"

AREA_IMAGE_FILE_NAME = "area_5min_m2V11"


SCRIPT_NAME = "Y2017M11D24_RH_Prepare_Image_Collections_EE_V01"

OUTPUT_VERSION = 1

# Unfortunately specifying the dimensions caused the script to crash (internal error on Google's side) Specify scale instead.

DIMENSION5MIN = {}
DIMENSION5MIN["x"] = 4320
DIMENSION5MIN["y"] = 2160
CRS = "EPSG:4326"

MAXPIXELS =1e10

icIds = ["projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_riverdischarge_month_m3second_5min_1960_2014",
        "projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_riverdischarge_year_m3second_5min_1960_2014",
        "projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_runoff_month_mmonth_5min_1958_2014",
        "projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_runoff_year_myear_5min_1958_2014"]


In [3]:
import ee
import re
import subprocess
import pandas as pd
from calendar import monthrange, isleap

In [4]:
ee.Initialize()

ICs not in right format: discharge (m^3 / s) and runoff (m/month or m/year)

In [5]:
sPerD = 86400 #seconds per day

In [6]:
areaImage = ee.Image("%s/%s"%(EE_PATH,AREA_IMAGE_FILE_NAME))

In [7]:
dimensions = "%sx%s" %(DIMENSION5MIN["x"],DIMENSION5MIN["y"])

In [8]:
crsTransform = [
                0.0833333309780367,
                0,
                -179.99999491255934,
                0,
                -0.0833333309780367,
                90.00000254430942
              ]

In [9]:
def newImageId(imageId):
    return re.sub('m3second|mmonth|myear',"millionm3",imageId)

def findUnit(imageId):
    if re.search("m3second",imageId):
        unit = "m3second"
    elif re.search("mmonth",imageId):
        unit = "mmonth"
    elif re.search("myear",imageId):
        unit = "myear"
    else:
        unit = "error"
    return unit


def toVolumeAndExport(row):
    unit = row["unit"]
    temporalResolution = row["image"].get("temporal_resolution").getInfo()
    year = int(row["image"].get("year").getInfo())
    month = int(row["image"].get("month").getInfo())


    
    if unit == "m3second":        
        if temporalResolution == "month":
            sPerMonth = monthrange(year,month)[1]*(86400)
            newImage = row["image"].multiply(sPerMonth).divide(1e6)
            
             
        elif temporalResolution == "year":
            daysPerYear = 365 if isleap(2005) else 366
            sPerYear = daysPerYear*(86400)
            newImage = row["image"].multiply(sPerYear).divide(1e6)
            
        else:
            print("error",row)  
        
    elif unit == "mmonth" or unit == "myear":
        newImage = row["image"].multiply(areaImage).divide(1e6)
    else:
        pass
    
    newImage = newImage.copyProperties(row["image"])
    newImage = newImage.set("units","millionm3")
    newImage = newImage.set("script_used",SCRIPT_NAME)
    newImage = newImage.set("version",OUTPUT_VERSION)
    
    description = "%sV%0.2d" %(row["image"].get("exportdescription").getInfo(),OUTPUT_VERSION)
    print(description)
    
    task = ee.batch.Export.image.toAsset(
        image =  ee.Image(newImage),
        description = description,
        assetId = row["newImageId"],
        dimensions = dimensions,
        crs = CRS,
        crsTransform = crsTransform,
        maxPixels = MAXPIXELS     
    )
    task.start()       
    dfOut.at[index,"newImage"] = newImage
    

In [10]:
dfIcs = pd.DataFrame()
dfIcs["icId"] = icIds

### Creating new ImageCollections

In [11]:
dfIcs["newIcId"] = dfIcs["icId"].apply(newImageId)

In [12]:
for index, row in dfIcs.iterrows():
    command = "earthengine create collection %s" %row["newIcId"]
    result = subprocess.check_output(command,shell=True)
    print(command,result)

('earthengine create collection projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_riverdischarge_month_millionm3_5min_1960_2014', 'Asset projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_riverdischarge_month_millionm3_5min_1960_2014 already exists\n')
('earthengine create collection projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_riverdischarge_year_millionm3_5min_1960_2014', 'Asset projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_riverdischarge_year_millionm3_5min_1960_2014 already exists\n')
('earthengine create collection projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_runoff_month_millionm3_5min_1958_2014', 'Asset projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_runoff_month_millionm3_5min_1958_2014 already exists\n')
('earthengine create collection projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_runoff_year_millionm3_5min_1958_2014', 'Asset projects/WRI-Aquaduct/PCRGlobWB20V07/global_historical_runoff_year_millionm3_5min_1958_2014 already

In [13]:
dfImages2 = pd.DataFrame()

for index, row in dfIcs.iterrows():
    command = "earthengine ls %s" %row["icId"]
    assetList = subprocess.check_output(command,shell=True).splitlines()
    dfImages = pd.DataFrame(assetList)
    dfImages.columns = ["imageId"]
    dfImages["icId"] = row["icId"]
    dfImages2= dfImages2.append(dfImages)

In [14]:
dfImages2["newImageId"] = dfImages2["imageId"].apply(newImageId)
dfImages2["newIcId"] = dfImages2["icId"].apply(newImageId)
dfImages2["unit"] = dfImages2["imageId"].apply(findUnit)
dfImages2["image"] = dfImages2["imageId"].apply(lambda x: ee.Image(x))
dfImages2 = dfImages2.set_index("imageId",drop=False)

In [None]:
dfOut = dfImages2.copy()

In [None]:
i = 0
errorlog = []
startLoop = datetime.datetime.now()
for index, row in dfImages2.iterrows():
    i += 1 
    try:
        toVolumeAndExport(row)
    except:
        errorlog.append(index)
    elapsed = datetime.datetime.now() - startLoop
    print(i,dfImages2.shape[0])
    print(elapsed)
    
    

riverdischarge_monthY1960M01V01
(1, 1456)
0:00:04.235106
riverdischarge_monthY1960M02V01
(2, 1456)
0:00:08.001637
riverdischarge_monthY1960M03V01
(3, 1456)
0:00:12.252586
riverdischarge_monthY1960M04V01
(4, 1456)
0:00:16.135188
riverdischarge_monthY1960M05V01
(5, 1456)
0:00:20.400431
riverdischarge_monthY1960M06V01
(6, 1456)
0:00:24.688635
riverdischarge_monthY1960M07V01
(7, 1456)
0:00:28.962729
riverdischarge_monthY1960M08V01
(8, 1456)
0:00:33.278462
riverdischarge_monthY1960M09V01
(9, 1456)
0:00:37.976822
riverdischarge_monthY1960M10V01
(10, 1456)
0:00:42.710155
riverdischarge_monthY1960M11V01
(11, 1456)
0:00:47.445628
riverdischarge_monthY1960M12V01
(12, 1456)
0:00:52.098980
riverdischarge_monthY1961M01V01
(13, 1456)
0:00:56.816575
riverdischarge_monthY1961M02V01
(14, 1456)
0:01:01.631052
riverdischarge_monthY1961M03V01
(15, 1456)
0:01:06.296174
riverdischarge_monthY1961M04V01
(16, 1456)
0:01:10.981944
riverdischarge_monthY1961M05V01
(17, 1456)
0:01:15.612521
riverdischarge_monthY19

riverdischarge_monthY1971M10V01
(142, 1456)
0:10:14.229211
riverdischarge_monthY1971M11V01
(143, 1456)
0:10:18.474419
riverdischarge_monthY1971M12V01
(144, 1456)
0:10:22.225315
riverdischarge_monthY1972M01V01
(145, 1456)
0:10:26.511654
riverdischarge_monthY1972M02V01
(146, 1456)
0:10:30.452335
riverdischarge_monthY1972M03V01
(147, 1456)
0:10:34.724086
riverdischarge_monthY1972M04V01
(148, 1456)
0:10:39.008632
riverdischarge_monthY1972M05V01
(149, 1456)
0:10:42.855273
riverdischarge_monthY1972M06V01
(150, 1456)
0:10:46.554346
riverdischarge_monthY1972M07V01
(151, 1456)
0:10:50.786587
riverdischarge_monthY1972M08V01
(152, 1456)
0:10:55.052270
riverdischarge_monthY1972M09V01
(153, 1456)
0:10:59.327573
riverdischarge_monthY1972M10V01
(154, 1456)
0:11:03.065576
riverdischarge_monthY1972M11V01
(155, 1456)
0:11:07.318358
riverdischarge_monthY1972M12V01
(156, 1456)
0:11:11.574393
riverdischarge_monthY1973M01V01
(157, 1456)
0:11:15.354130
riverdischarge_monthY1973M02V01
(158, 1456)
0:11:19.6502

riverdischarge_monthY1983M05V01
(281, 1456)
0:19:59.577975
riverdischarge_monthY1983M06V01
(282, 1456)
0:20:04.319968
riverdischarge_monthY1983M07V01
(283, 1456)
0:20:09.037469
riverdischarge_monthY1983M08V01
(284, 1456)
0:20:13.676411
riverdischarge_monthY1983M09V01


In [None]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)