In [1]:
""" Merge the difference geotiffs into one.
-------------------------------------------------------------------------------

When using 30 arc seconds in earthengine, the results are exported into 
multiple chunks. This script will merge then back.

When the previous script exports to 5 arc minute (for printing), 
you can ignore this script.

Author: Rutger Hofste
Date: 20190530
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

"""

TESTING = 0

SCRIPT_NAME = "Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01"
OUTPUT_VERSION = 1

GCS_INPUT_PATH = "gs://aqueduct30_v01/Y2019M05D28_RH_AQ30VS21_Export_Dif_Geotiff_EE_V01/output_V06"

ec2_input_path = "/volumes/data/{}/input_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION)
ec2_output_path = "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
s3_output_path = "s3://wri-projects/Aqueduct30/Aq30vs21/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION)

target_filenames = ["owr_score_minus_DEFAULT",
                    "bws_score_minus_BWS_s",
                    "sev_score_minus_SV_s",
                    "iav_score_minus_WSV_s"]


print("Input GCS : " + GCS_INPUT_PATH +
      "\nInput ec2: " + ec2_input_path + 
      "\nOutput ec2: " + ec2_output_path +
      "\nOutput s3: " + ec2_output_path)


Input GCS : gs://aqueduct30_v01/Y2019M05D28_RH_AQ30VS21_Export_Dif_Geotiff_EE_V01/output_V06
Input ec2: /volumes/data/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/input_V01
Output ec2: /volumes/data/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01
Output s3: /volumes/data/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01


In [2]:
import time, datetime, sys, logging
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2019M05D30 UTC 14:48


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
!rm -r {ec2_input_path}
!rm -r {ec2_output_path}
!mkdir -p {ec2_input_path}
!mkdir -p {ec2_output_path}

In [4]:
!gsutil -m cp {GCS_INPUT_PATH}/* {ec2_input_path}

Copying gs://aqueduct30_v01/Y2019M05D28_RH_AQ30VS21_Export_Dif_Geotiff_EE_V01/output_V06/bws_score_minus_BWS_s_30s0000000000-0000000000.tif...
Copying gs://aqueduct30_v01/Y2019M05D28_RH_AQ30VS21_Export_Dif_Geotiff_EE_V01/output_V06/bws_score_minus_BWS_s_30s0000000000-0000023296.tif...
Copying gs://aqueduct30_v01/Y2019M05D28_RH_AQ30VS21_Export_Dif_Geotiff_EE_V01/output_V06/iav_score_minus_WSV_s_30s0000000000-0000000000.tif...
Copying gs://aqueduct30_v01/Y2019M05D28_RH_AQ30VS21_Export_Dif_Geotiff_EE_V01/output_V06/iav_score_minus_WSV_s_30s0000000000-0000023296.tif...
Copying gs://aqueduct30_v01/Y2019M05D28_RH_AQ30VS21_Export_Dif_Geotiff_EE_V01/output_V06/owr_score_minus_DEFAULT_30s0000000000-0000000000.tif...
Copying gs://aqueduct30_v01/Y2019M05D28_RH_AQ30VS21_Export_Dif_Geotiff_EE_V01/output_V06/owr_score_minus_DEFAULT_30s0000000000-0000023296.tif...
Copying gs://aqueduct30_v01/Y2019M05D28_RH_AQ30VS21_Export_Dif_Geotiff_EE_V01/output_V06/sev_score_minus_SV_s_30s0000000000-0000023296.tif

In [5]:
import os
import re
import rasterio
import numpy as np
from rasterio import merge


In [6]:
files = os.listdir(ec2_input_path)

In [7]:
files

['owr_score_minus_DEFAULT_30s0000000000-0000000000.tif',
 'bws_score_minus_BWS_s_30s0000000000-0000023296.tif',
 'iav_score_minus_WSV_s_30s0000000000-0000023296.tif',
 'bws_score_minus_BWS_s_30s0000000000-0000000000.tif',
 'sev_score_minus_SV_s_30s0000000000-0000000000.tif',
 'iav_score_minus_WSV_s_30s0000000000-0000000000.tif',
 'sev_score_minus_SV_s_30s0000000000-0000023296.tif',
 'owr_score_minus_DEFAULT_30s0000000000-0000023296.tif']

In [8]:
def merge_selected_files(selected_files, output_filename):
    """
    Merges the files into one geotiff
    
    Args:
        selected_files(list): List of paths
        output_filename(string): Output path including extension (.tif)
    Returns:
        None
    
    """
    
    datasets = []
    for selected_file in selected_files:
        datasets.append(rasterio.open("{}/{}".format(ec2_input_path,selected_file)))
        Z, out_transform = rasterio.merge.merge(datasets=datasets)
        Z = Z[0,:,:]
        Z = np.float32(Z)

        # Write geotiff
        with rasterio.open(
            output_filename,
            'w',
            driver='GTiff',
            height=Z.shape[0],
            width=Z.shape[1],
            count=1,
            dtype=Z.dtype,
            crs='+proj=latlong',
            transform=out_transform,
            compress='lzw'
        ) as dst:
            dst.write(Z, 1)   

In [9]:
for target_filename in target_filenames:
    print("merging: " , target_filename)
    regex = re.compile(r"{}".format(target_filename))
    selected_files = list(filter(regex.search, files))
    print(selected_files)
    output_filename = "{}/{}.tif".format(ec2_output_path,target_filename)
    merge_selected_files(selected_files, output_filename)
    print(output_filename)

merging:  owr_score_minus_DEFAULT
['owr_score_minus_DEFAULT_30s0000000000-0000000000.tif', 'owr_score_minus_DEFAULT_30s0000000000-0000023296.tif']
/volumes/data/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01/owr_score_minus_DEFAULT.tif
merging:  bws_score_minus_BWS_s
['bws_score_minus_BWS_s_30s0000000000-0000023296.tif', 'bws_score_minus_BWS_s_30s0000000000-0000000000.tif']
/volumes/data/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01/bws_score_minus_BWS_s.tif
merging:  sev_score_minus_SV_s
['sev_score_minus_SV_s_30s0000000000-0000000000.tif', 'sev_score_minus_SV_s_30s0000000000-0000023296.tif']
/volumes/data/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01/sev_score_minus_SV_s.tif
merging:  iav_score_minus_WSV_s
['iav_score_minus_WSV_s_30s0000000000-0000023296.tif', 'iav_score_minus_WSV_s_30s0000000000-0000000000.tif']
/volumes/data/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01/iav_score_minus_WSV_s.tif


In [10]:
!aws s3 cp {ec2_output_path} {s3_output_path} --recursive

upload: ../../../../../data/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01/sev_score_minus_SV_s.tif to s3://wri-projects/Aqueduct30/Aq30vs21/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01/sev_score_minus_SV_s.tif
upload: ../../../../../data/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01/bws_score_minus_BWS_s.tif to s3://wri-projects/Aqueduct30/Aq30vs21/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01/bws_score_minus_BWS_s.tif
upload: ../../../../../data/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01/owr_score_minus_DEFAULT.tif to s3://wri-projects/Aqueduct30/Aq30vs21/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01/owr_score_minus_DEFAULT.tif
upload: ../../../../../data/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01/iav_score_minus_WSV_s.tif to s3://wri-projects/Aqueduct30/Aq30vs21/Y2019M05D30_RH_AQ30VS21_Merge_Dif_Geotiff_V01/output_V01/iav_score_minus_WSV_s.tif


In [11]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:07:49.738804


previous run:  
0:07:49.738804
