### Y2018M02D13_RH_GDB_To_Tiff_V01

* Purpose of script: Convert geodatabase to geotiffs and upload to GCS
* Author: Rutger Hofste
* Kernel used: python35
* Date created: 20180213

Data was shared by Tianyi Luo in GDB format. Rasters have been exported to Geotiff format and zipped. 

In [68]:
import os
if 'GDAL_DATA' not in os.environ:
    os.environ['GDAL_DATA'] = r'/usr/share/gdal/2.1'
from osgeo import gdal,ogr,osr
'GDAL_DATA' in os.environ
# If false, the GDAL_DATA variable is set incorrectly. You need this variable to obtain the spatial reference
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import time
import subprocess
import json
%matplotlib notebook

In [62]:
SCRIPT_NAME = "Y2018M02D13_RH_GDB_To_Tiff_V01"
OUTPUT_VERSION = 1



S3_INPUT_PATH = ("s3://wri-projects/Aqueduct30/processData/{}/input/").format(SCRIPT_NAME)
S3_OUTPUT_PATH = ("s3://wri-projects/Aqueduct30/processData/{}/output/").format(SCRIPT_NAME)

EC2_INPUT_PATH  = ("/volumes/data/{}/input/").format(SCRIPT_NAME)
EC2_OUTPUT_PATH = ("/volumes/data/{}/output/").format(SCRIPT_NAME)

INPUT_FILE_NAME = "aq21demand.zip"


GCS_OUTPUT_PATH = "gs://aqueduct30_v01/{}".format(SCRIPT_NAME)

EE_BASE = "projects/WRI-Aquaduct/aqueduct21V01"



In [19]:
!rm -r {EC2_INPUT_PATH}
!rm -r {EC2_OUTPUT_PATH}

!mkdir -p {EC2_INPUT_PATH}
!mkdir -p {EC2_OUTPUT_PATH}

In [20]:
!aws s3 cp {S3_INPUT_PATH} {EC2_INPUT_PATH} --recursive

download: s3://wri-projects/Aqueduct30/processData/Y2018M02D13_RH_GDB_To_Tiff_V01/input/aq21demand.zip to ../../../../data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/aq21demand.zip


Functions

In [53]:
outDict = {}
def splitKey(key):
    # will yield the root file code and extension of a set of keys
    prefix, extension = key.split(".")
    fileName = prefix.split("/")[-1]
    outDict = {"fileName":fileName,"extension":extension}
    return outDict

In [22]:
file_location = "{}{}".format(EC2_INPUT_PATH, INPUT_FILE_NAME)

In [23]:
destination_folder = EC2_INPUT_PATH

In [24]:
print(file_location)

/volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/aq21demand.zip


In [25]:
! ls /volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/

aq21demand.zip


In [26]:
! unzip {file_location} -d {destination_folder}

Archive:  /volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/aq21demand.zip
  inflating: /volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/Ca.tif  
  inflating: /volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/Cd.tif  
  inflating: /volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/Ci.tif  
  inflating: /volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/Ct.tif  
  inflating: /volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/Ua.tif  
  inflating: /volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/Ud.tif  
  inflating: /volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/Ui.tif  
  inflating: /volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/Ut.tif  


In [27]:
!gsutil version



Updates are available for some Cloud SDK components.  To install them,
please run:
  $ gcloud components update

gsutil version: 4.27


In [28]:
!gcloud config set project aqueduct30

Updated property [core/project].


In [30]:
!gsutil -m cp {EC2_INPUT_PATH}*.tif {GCS_OUTPUT_PATH}

Copying file:///volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/Ua.tif [Content-Type=image/tiff]...
Copying file:///volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/Ca.tif [Content-Type=image/tiff]...
==> NOTE: You are uploading one or more large file(s), which would run
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

Copying file:///volumes/data/Y2018M02D13_RH_GDB_To_Tiff_V01/input/Ud.tif [Content-Type=image/tiff]...
Copying file:///vo

In [31]:
print(GCS_OUTPUT_PATH)

gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01


In [35]:
command = ("earthengine create folder %s") %EE_BASE

In [36]:
print(command)

earthengine create folder projects/WRI-Aquaduct/aqueduct21V01


In [37]:
subprocess.check_output(command,shell=True)

b'Asset projects/WRI-Aquaduct/aqueduct21V01 already exists\n'

In [39]:
command = ("/opt/google-cloud-sdk/bin/gsutil ls %s") %(GCS_OUTPUT_PATH)

In [40]:
keys = subprocess.check_output(command,shell=True)

In [41]:
keys = keys.decode('UTF-8').splitlines()

In [42]:
print(keys)

['gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01/Ca.tif', 'gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01/Cd.tif', 'gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01/Ci.tif', 'gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01/Ct.tif', 'gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01/Ua.tif', 'gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01/Ud.tif', 'gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01/Ui.tif', 'gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01/Ut.tif']


In [97]:
df = pd.DataFrame()
i = 0
for key in keys:
    i = i+1
    outDict = splitKey(key)
    df2 = pd.DataFrame(outDict,index=[i])
    df2["source"] = key
    df = df.append(df2)  

In [104]:
properties = {}
properties["script_used"] = SCRIPT_NAME
properties["ingested_by"] = "'Rutger Hofste'"
properties["aqueduct_version"] = "'2.1'"
properties["version"] = OUTPUT_VERSION
properties["units"] = "m3"

In [105]:
propertyString = ""
for key, value in properties.items():
    propertyString = propertyString + " -p " + str(key) + "=" + str(value)

In [106]:
df.head()

Unnamed: 0,extension,fileName,source
1,tif,Ca,gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff...
2,tif,Cd,gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff...
3,tif,Ci,gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff...
4,tif,Ct,gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff...
5,tif,Ua,gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff...


In [108]:
for index, row in df.iterrows():
    asset_id = EE_BASE + "/" + row["fileName"]     
    command =  "earthengine upload image --asset_id {} {} {}".format(asset_id, row["source"], propertyString)
    subprocess.check_output(command,shell=True)
    print(command)

earthengine upload image --asset_id projects/WRI-Aquaduct/aqueduct21V01/Ca gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01/Ca.tif  -p aqueduct_version='2.1' -p units=m3 -p script_used=Y2018M02D13_RH_GDB_To_Tiff_V01 -p version=1 -p ingested_by='Rutger Hofste'
earthengine upload image --asset_id projects/WRI-Aquaduct/aqueduct21V01/Cd gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01/Cd.tif  -p aqueduct_version='2.1' -p units=m3 -p script_used=Y2018M02D13_RH_GDB_To_Tiff_V01 -p version=1 -p ingested_by='Rutger Hofste'
earthengine upload image --asset_id projects/WRI-Aquaduct/aqueduct21V01/Ci gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01/Ci.tif  -p aqueduct_version='2.1' -p units=m3 -p script_used=Y2018M02D13_RH_GDB_To_Tiff_V01 -p version=1 -p ingested_by='Rutger Hofste'
earthengine upload image --asset_id projects/WRI-Aquaduct/aqueduct21V01/Ct gs://aqueduct30_v01/Y2018M02D13_RH_GDB_To_Tiff_V01/Ct.tif  -p aqueduct_version='2.1' -p units=m3 -p script_used=Y2018M02D13_RH_GDB_To_Tiff_