In [None]:
""" Convert Indicators from ASCII to Geotiff
-------------------------------------------------------------------------------
Notebooks in the Aqueduct Project are used in production. This means that the   
notebooks should execute completely by pressing restart and run in the menu.
Commands should not exceed the 80 character limit which is the length of this  |

Code follows the Google for Python Styleguide. Exception are the scripts that 
use earth engine since this is camelCase instead of underscore.


Author: Rutger Hofste
Date: 20180327
Kernel: python36
Docker: rutgerhofste/gisdocker:ubuntu16.04

Args:

    SCRIPT_NAME (string) : Script name


Returns:


"""

# Input Parameters

SCRIPT_NAME = "Y0000M00D00_XX_Script_Template_V01"

# Output Parameters


# Convert Indicators from ASCII to Geotiff

* Purpose of script: Some Utrecht Indicators are shared in Ascii format. This script converts them to geotiff and uploads to GCS
* Author: Rutger Hofste
* Kernel used: python35
* Date created: 20170808

## Settings

In [1]:
EC2_INPUT_PATH = "/volumes/data/Y2017M07D31_RH_download_PCRGlobWB_data_V01/output"
EC2_OUTPUT_PATH = "/volumes/data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output"
EC2_INPUT_PATH_ADDITIONAL = "/volumes/data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/input"
S3_INPUT_PATH_ADDITIONAL = "s3://wri-projects/Aqueduct30/rawData/WRI/samplegeotiff/"
GCS_OUTPUT = "gs://aqueduct30_v01/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/"
S3_OUTPUT_PATH = "s3://wri-projects/Aqueduct30/processData/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/"
EE_OUTPUT_PATH = "projects/WRI-Aquaduct/PCRGlobWB20V05/"


In [2]:
!mkdir -p {EC2_OUTPUT_PATH}
!mkdir -p {EC2_INPUT_PATH_ADDITIONAL}
!aws s3 cp {S3_INPUT_PATH_ADDITIONAL} {EC2_INPUT_PATH_ADDITIONAL} --recursive

download: s3://wri-projects/Aqueduct30/rawData/WRI/samplegeotiff/readme.txt to ../../../../data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/input/readme.txt
download: s3://wri-projects/Aqueduct30/rawData/WRI/samplegeotiff/sampleGeotiff.tiff to ../../../../data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/input/sampleGeotiff.tiff


In [3]:
try:
    from osgeo import ogr, osr, gdal
except:
    sys.exit('ERROR: cannot find GDAL/OGR modules')
    
from netCDF4 import Dataset
import os
import datetime
import subprocess
import pandas as pd
import re
import time
from datetime import timedelta

## Functions

In [4]:
def readFile(filename):
    filehandle = gdal.Open(filename)
    band1 = filehandle.GetRasterBand(1)
    geotransform = filehandle.GetGeoTransform()
    geoproj = filehandle.GetProjection()
    Z = band1.ReadAsArray()
    xsize = filehandle.RasterXSize
    ysize = filehandle.RasterYSize
    filehandle = None
    return xsize,ysize,geotransform,geoproj,Z

def writeFile(filename,geotransform,geoprojection,data):
    (x,y) = data.shape
    format = "GTiff"
    driver = gdal.GetDriverByName(format)
    # you can change the dataformat but be sure to be able to store negative values including -9999
    dst_datatype = gdal.GDT_Float32
    dst_ds = driver.Create(filename,y,x,1,dst_datatype, [ 'COMPRESS=LZW' ])
    dst_ds.GetRasterBand(1).SetNoDataValue(-9999)
    dst_ds.GetRasterBand(1).WriteArray(data)
    dst_ds.SetGeoTransform(geotransform)
    dst_ds.SetProjection(geoprojection)
    dst_ds = None
    return 1

def splitKey(key):
    # will yield the root file code and extension of a set of keys
    prefix, extension = key.split(".")
    fileName = prefix.split("/")[-1]
    values = re.split("_|-", fileName)
    keyz = ["geographic_range","indicator","spatial_resolution","temporal_range_min","temporal_range_max"]
    outDict = dict(zip(keyz, values))
    outDict["fileName"]=fileName
    outDict["extension"]=extension
    return outDict


def uploadEE(index,row):
    target = EE_OUTPUT_PATH + row.fileName
    source = GCS_OUTPUT + row.fileName + "." + row.extension
    metadata = "--nodata_value=%s -p extension=%s -p filename=%s -p geographic_range=%s -p indicator=%s -p spatial_resolution=%s -p temporal_range_max=%s -p temporal_range_min=%s -p units=%s -p ingested_by=%s -p exportdescription=%s" %(row.nodata,row.extension,row.fileName,row.geographic_range,row.indicator,row.spatial_resolution,row.temporal_range_max,row.temporal_range_min, row.units, row.ingested_by, row.exportdescription)
    command = "/opt/anaconda3/bin/earthengine upload image --asset_id %s %s %s" % (target, source,metadata)
    try:
        response = subprocess.check_output(command, shell=True)
        outDict = {"command":command,"response":response,"error":0}
        df_errors2 = pd.DataFrame(outDict,index=[index])
        pass
    except:
        try:
            outDict = {"command":command,"response":response,"error":1}
        except:
            outDict = {"command":command,"response":-9999,"error":2}
        df_errors2 = pd.DataFrame(outDict,index=[index])
        print("error")
    return df_errors2

    

In [5]:
inputLocationSampleGeotiff = os.path.join(EC2_INPUT_PATH_ADDITIONAL,"sampleGeotiff.tiff")

In [6]:
print(inputLocationSampleGeotiff)

/volumes/data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/input/sampleGeotiff.tiff


In [7]:
[xsizeSample,ysizeSample,geotransformSample,geoprojSample,ZSample] = readFile(inputLocationSampleGeotiff)

In [8]:
files = os.listdir(EC2_INPUT_PATH)
newExtension =".tif"
for oneFile in files:
    if oneFile.endswith(".asc"):
        base , extension = oneFile.split(".")
        xsize,ysize,geotransform,geoproj,Z = readFile(os.path.join(EC2_INPUT_PATH,oneFile))
        Z[Z<-9990]= -9999
        Z[Z>1e19] = -9999
        outputFileName = base + newExtension
        writeFile(os.path.join(EC2_OUTPUT_PATH,outputFileName),geotransformSample,geoprojSample,Z)

Upload to GCS

In [9]:
!gsutil -m cp \
{EC2_OUTPUT_PATH}/*.tif \
{GCS_OUTPUT}

Copying file:///volumes/data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_droughtseveritystandardisedsoilmoisture_5min_1960-2014.tif [Content-Type=image/tiff]...
Copying file:///volumes/data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_q2seasonalvariabilitywatersupply_5min_1960-2014.tif [Content-Type=image/tiff]...
Copying file:///volumes/data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_droughtseveritystandardisedstreamflow_5min_1960-2014.tif [Content-Type=image/tiff]...
Copying file:///volumes/data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_environmentalflows_5min_1960-2014.tif [Content-Type=image/tiff]...
Copying file:///volumes/data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_q1seasonalvariabilitywatersupply_5min_1960-2014.tif [Content-Type=image/tiff]...
Copying file:///volumes/data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_interannualvariabilitywatersupply_5min_

The next step is to ingest these rasters to earthengine with appropriate metadata

In [10]:
command = ("/opt/google-cloud-sdk/bin/gsutil ls %s") %(GCS_OUTPUT)

In [11]:
print(command)

/opt/google-cloud-sdk/bin/gsutil ls gs://aqueduct30_v01/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/


In [12]:
keys = subprocess.check_output(command,shell=True)

In [13]:
keys = keys.decode('UTF-8').splitlines()

In [14]:
print(keys)

['gs://aqueduct30_v01/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/global_droughtseveritystandardisedsoilmoisture_5min_1960-2014.tif', 'gs://aqueduct30_v01/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/global_droughtseveritystandardisedstreamflow_5min_1960-2014.tif', 'gs://aqueduct30_v01/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/global_environmentalflows_5min_1960-2014.tif', 'gs://aqueduct30_v01/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/global_interannualvariabilitywatersupply_5min_1960-2014.tif', 'gs://aqueduct30_v01/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/global_q1seasonalvariabilitywatersupply_5min_1960-2014.tif', 'gs://aqueduct30_v01/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/global_q2seasonalvariabilitywatersupply_5min_1960-2014.tif', 'gs://aqueduct30_v01/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/global_q3seasonalvariabilitywatersupply_5min_1960-2014.tif', 'gs://aqueduct30_v01/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/globa

In [15]:
df = pd.DataFrame()
i = 0
for key in keys:
    i = i+1
    outDict = splitKey(key)
    df2 = pd.DataFrame(outDict,index=[i])
    df = df.append(df2)   

In [16]:
df.head()

Unnamed: 0,extension,fileName,geographic_range,indicator,spatial_resolution,temporal_range_max,temporal_range_min
1,tif,global_droughtseveritystandardisedsoilmoisture...,global,droughtseveritystandardisedsoilmoisture,5min,2014,1960
2,tif,global_droughtseveritystandardisedstreamflow_5...,global,droughtseveritystandardisedstreamflow,5min,2014,1960
3,tif,global_environmentalflows_5min_1960-2014,global,environmentalflows,5min,2014,1960
4,tif,global_interannualvariabilitywatersupply_5min_...,global,interannualvariabilitywatersupply,5min,2014,1960
5,tif,global_q1seasonalvariabilitywatersupply_5min_1...,global,q1seasonalvariabilitywatersupply,5min,2014,1960


In [17]:
df["nodata"] = -9999
df["ingested_by"] ="RutgerHofste"
df["exportdescription"] = df["indicator"]
df["units"] = "dimensionless"

In [18]:
df

Unnamed: 0,extension,fileName,geographic_range,indicator,spatial_resolution,temporal_range_max,temporal_range_min,nodata,ingested_by,exportdescription,units
1,tif,global_droughtseveritystandardisedsoilmoisture...,global,droughtseveritystandardisedsoilmoisture,5min,2014,1960,-9999,RutgerHofste,droughtseveritystandardisedsoilmoisture,dimensionless
2,tif,global_droughtseveritystandardisedstreamflow_5...,global,droughtseveritystandardisedstreamflow,5min,2014,1960,-9999,RutgerHofste,droughtseveritystandardisedstreamflow,dimensionless
3,tif,global_environmentalflows_5min_1960-2014,global,environmentalflows,5min,2014,1960,-9999,RutgerHofste,environmentalflows,dimensionless
4,tif,global_interannualvariabilitywatersupply_5min_...,global,interannualvariabilitywatersupply,5min,2014,1960,-9999,RutgerHofste,interannualvariabilitywatersupply,dimensionless
5,tif,global_q1seasonalvariabilitywatersupply_5min_1...,global,q1seasonalvariabilitywatersupply,5min,2014,1960,-9999,RutgerHofste,q1seasonalvariabilitywatersupply,dimensionless
6,tif,global_q2seasonalvariabilitywatersupply_5min_1...,global,q2seasonalvariabilitywatersupply,5min,2014,1960,-9999,RutgerHofste,q2seasonalvariabilitywatersupply,dimensionless
7,tif,global_q3seasonalvariabilitywatersupply_5min_1...,global,q3seasonalvariabilitywatersupply,5min,2014,1960,-9999,RutgerHofste,q3seasonalvariabilitywatersupply,dimensionless
8,tif,global_q4seasonalvariabilitywatersupply_5min_1...,global,q4seasonalvariabilitywatersupply,5min,2014,1960,-9999,RutgerHofste,q4seasonalvariabilitywatersupply,dimensionless


In [19]:
df_errors = pd.DataFrame()
start_time = time.time()
for index, row in df.iterrows():
    elapsed_time = time.time() - start_time 
    print(index,"%.2f" %((index/9289.0)*100), "elapsed: ", str(timedelta(seconds=elapsed_time)))
    df_errors2 = uploadEE(index,row)
    df_errors = df_errors.append(df_errors2)

1 0.01 elapsed:  0:00:00.000821
2 0.02 elapsed:  0:00:01.404694
3 0.03 elapsed:  0:00:02.679707
4 0.04 elapsed:  0:00:04.023834
5 0.05 elapsed:  0:00:05.207816
6 0.06 elapsed:  0:00:06.453202
7 0.08 elapsed:  0:00:07.808105
8 0.09 elapsed:  0:00:09.021175


For the Threshold setting, copying these rasters to S3. 

In [20]:
!aws s3 cp {EC2_OUTPUT_PATH} {S3_OUTPUT_PATH} --recursive

upload: ../../../../data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_droughtseveritystandardisedsoilmoisture_5min_1960-2014.tif to s3://wri-projects/Aqueduct30/processData/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_droughtseveritystandardisedsoilmoisture_5min_1960-2014.tif
upload: ../../../../data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_droughtseveritystandardisedstreamflow_5min_1960-2014.tif to s3://wri-projects/Aqueduct30/processData/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_droughtseveritystandardisedstreamflow_5min_1960-2014.tif
upload: ../../../../data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_environmentalflows_5min_1960-2014.tif to s3://wri-projects/Aqueduct30/processData/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_environmentalflows_5min_1960-2014.tif
upload: ../../../../data/Y2017M08D08_RH_Convert_Indicators_ASC_Geotiff_V01/output/global_q1seasonalva

In [21]:
df_errors

Unnamed: 0,command,error,response
1,/opt/anaconda3/bin/earthengine upload image --...,0,b'Started upload task with ID: WPNPYHO7LKDC4ZI...
2,/opt/anaconda3/bin/earthengine upload image --...,0,b'Started upload task with ID: PKGV7ZT472DC4CQ...
3,/opt/anaconda3/bin/earthengine upload image --...,0,b'Started upload task with ID: I6SXMZIOY27TJDK...
4,/opt/anaconda3/bin/earthengine upload image --...,0,b'Started upload task with ID: MIW2QZZ7IDPPDE3...
5,/opt/anaconda3/bin/earthengine upload image --...,0,b'Started upload task with ID: YFN6UNC7P2CKUZM...
6,/opt/anaconda3/bin/earthengine upload image --...,0,b'Started upload task with ID: 35D3UUHJXCMZVCK...
7,/opt/anaconda3/bin/earthengine upload image --...,0,b'Started upload task with ID: 52AA2G3T7O2SIDH...
8,/opt/anaconda3/bin/earthengine upload image --...,0,b'Started upload task with ID: F724U75TRKRQEHH...
