In [1]:
""" convert netCDF4 to Geotiff.
-------------------------------------------------------------------------------

Convert individual images from a netCDF on EC2 to geotiffs. Output is stored in 
Amazon S3 folder and on EC2 / GCS. 


Author: Rutger Hofste
Date: 20180731
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

Args:
    PRINT_METADATA (boolean) : Print out metadata in Jupyter Notebook.
    SCRIPT_NAME (string) : Script name.
    PREVIOUS_SCRIPT_NAME (string) : Previous script name used to identify input files.    
    INPUT_VERSION (integer) : Input version.
    OUTPUT_VERSION (integer) : Output version.     
    X_DIMENSION_5MIN (integer) : horizontal or longitudinal dimension of 
                                 raster.
    Y_DIMENSION_5MIN (integer) : vertical or latitudinal dimension of 
                                 raster.
    
    
Returns:

"""


# Input Parameters
PRINT_METADATA = False
SCRIPT_NAME = "Y2017M07D31_RH_Convert_NetCDF_Geotiff_V02"
PREVIOUS_SCRIPT_NAME = "Y2017M07D31_RH_download_PCRGlobWB_data_V02"
INPUT_VERSION = 1
OUTPUT_VERSION = 2
X_DIMENSION_5MIN = 4320
Y_DIMENSION_5MIN = 2160


# ETL
ec2_input_path = "/volumes/data/{}/output_V{:02.0f}/".format(PREVIOUS_SCRIPT_NAME,INPUT_VERSION)
ec2_output_path = "/volumes/data/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)
s3_output_path = "s3://wri-projects/Aqueduct30/processData/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)
gcs_output_path = "gs://aqueduct30_v01/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)

print("Input ec2: " + ec2_input_path +
      "\nOutput ec2: " + ec2_output_path +
      "\nOutput S3: " + s3_output_path +
      "\nOutput GCS: " +  gcs_output_path)


Input ec2: /volumes/data/Y2017M07D31_RH_download_PCRGlobWB_data_V02/output_V01/
Output ec2: /volumes/data/Y2017M07D31_RH_Convert_NetCDF_Geotiff_V02/output_V02/
Output S3: s3://wri-projects/Aqueduct30/processData/Y2017M07D31_RH_Convert_NetCDF_Geotiff_V02/output_V02/
Output GCS: gs://aqueduct30_v01/Y2017M07D31_RH_Convert_NetCDF_Geotiff_V02/output_V02/


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M04D16 UTC 12:51


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
# Imports
import aqueduct3
import os
import subprocess
import numpy as np
import warnings
import logging

In [4]:
!rm -r {ec2_output_path}
!mkdir -p {ec2_output_path}

rm: cannot remove '/volumes/data/Y2017M07D31_RH_Convert_NetCDF_Geotiff_V02/output_V02/': No such file or directory


In [5]:
"""

This cell loops over the images in a netCDF. There are a couple of PCRGlobWB specific properties so
be careful when using with other netCDFs. PCRGLOBWB specific properties include datatype (float32), 
nodata value, time format, minmax value etc. 

"""

default_geotransform, default_geoprojection = aqueduct3.get_global_georeference(np.ones([Y_DIMENSION_5MIN,X_DIMENSION_5MIN]))

for root, dirs, file_names in os.walk(ec2_input_path):
    for file_name in file_names:
        if file_name.endswith(".nc4") or file_name.endswith(".nc"):
            print(file_name)
            input_path = os.path.join(root, file_name) 
            output_path = aqueduct3.netCDF4_to_geotiff(file_name,input_path,ec2_output_path, default_geotransform, default_geoprojection)

In [6]:
files = os.listdir(ec2_output_path)
print("Number of files: " + str(len(files)))

Number of files: 0


Some files from Utrecht contain double years, removing the erroneous ones (used Panoply/Qgis to inspect those files):

global_historical_PDomWN_year_millionm3_5min_1960_2014I055Y1960M01.tif
global_historical_PDomWN_month_millionm3_5min_1960_2014I660Y1960M01.tif
global_historical_PDomWN_month_millionm3_5min_1960_2014I661Y1960M01.tif




In [7]:
!mkdir /volumes/data/trash

In [8]:
!mv /volumes/data/Y2017M07D31_RH_Convert_NetCDF_Geotiff_V02/output/global_historical_PDomWN_year_millionm3_5min_1960_2014I055Y1960M01.tif /volumes/data/trash/global_historical_PDomWN_year_millionm3_5min_1960_2014I055Y1960M01.tif
!mv /volumes/data/Y2017M07D31_RH_Convert_NetCDF_Geotiff_V02/output/global_historical_PDomWN_month_millionm3_5min_1960_2014I660Y1960M01.tif /volumes/data/trash/global_historical_PDomWN_month_millionm3_5min_1960_2014I660Y1960M01.tif
!mv /volumes/data/Y2017M07D31_RH_Convert_NetCDF_Geotiff_V02/output/global_historical_PDomWN_month_millionm3_5min_1960_2014I661Y1960M01.tif /volumes/data/trash/global_historical_PDomWN_month_millionm3_5min_1960_2014I661Y1960M01.tif

mv: cannot stat '/volumes/data/Y2017M07D31_RH_Convert_NetCDF_Geotiff_V02/output/global_historical_PDomWN_year_millionm3_5min_1960_2014I055Y1960M01.tif': No such file or directory
mv: cannot stat '/volumes/data/Y2017M07D31_RH_Convert_NetCDF_Geotiff_V02/output/global_historical_PDomWN_month_millionm3_5min_1960_2014I660Y1960M01.tif': No such file or directory
mv: cannot stat '/volumes/data/Y2017M07D31_RH_Convert_NetCDF_Geotiff_V02/output/global_historical_PDomWN_month_millionm3_5min_1960_2014I661Y1960M01.tif': No such file or directory


In [9]:
files = os.listdir(ec2_output_path)
print("Number of files: " + str(len(files)))

Number of files: 0


In [10]:
!aws s3 cp {ec2_output_path} {s3_output_path} --recursive

In [11]:
!gsutil -m cp {ec2_output_path}*.tif {gcs_output_path}

CommandException: No URLs matched: /volumes/data/Y2017M07D31_RH_Convert_NetCDF_Geotiff_V02/output_V02/*.tif
CommandException: 1 file/object could not be transferred.


In [12]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:00:07.564181


In [13]:
Previous runs:    

SyntaxError: invalid syntax (<ipython-input-13-8583a6063f40>, line 1)