# Convert layer data to geotiff

Script to convert layers into a EPSG that can be used in the Underworld model

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import netCDF4
import rasterio
import os
from scipy.interpolate import RegularGridInterpolator
import geopandas as gpd

%matplotlib inline

### Directories where data is stored and where to output data
Can be any file format that contains georeferenced data that can be read by rasterio ([more info from rasterio](https://rasterio.readthedocs.io/en/latest/topics/reading.html))

In [None]:
file_directory = "../Zipped zmap surfaces/Zipped zmap surfaces/"

output_geotiff_directory = "../data/GAB_surfaces/GeoTiff/"
output_numpy_directory = "../data/GAB_surfaces/NumPy/"
output_png_directory = "../data/GAB_surfaces/png/"

filenames = sorted(os.listdir(file_directory))
n_files = len(filenames)

list(zip(range(n_files), filenames))

In [None]:
### creates the directory
for i in list([output_geotiff_directory, output_numpy_directory, output_png_directory]):
    if not os.path.exists(i):
        os.makedirs(i)
        print('created directory: ' + i)

### Define EPSG code for data to be converted to
EPSG codes can be found [here](https://epsg.io/)

Should use metres as the unit of measurement so the model mesh can be created in the same coordinate system

In [None]:
# define CRS as GDA94 MGA55
epsg = 28355

crs = rasterio.crs.CRS.from_dict(init='epsg:'+str(epsg))


Ensure each grid is the same shape and extent

In [None]:
all_shapes  = np.zeros((n_files, 2), dtype=int)
all_extents = np.zeros((n_files, 4))


for i, zfilename in enumerate(filenames):
    z_file = rasterio.open(file_directory+zfilename)
    all_extents[i] = z_file.bounds.left, z_file.bounds.right, z_file.bounds.bottom, z_file.bounds.top
    all_shapes[i]  = z_file.shape
    z_file.close()
    
    print("{:02d} {} {} {}".format(i, tuple(all_shapes[i]), all_extents[i], zfilename[:4]))

Pick an optimal shape

In [None]:
idx = 5

z_file = rasterio.open(file_directory+filenames[idx])
z_data = z_file.read(1, masked=True)
global_mask = z_data.mask.copy()
global_extent = z_file.bounds.left, z_file.bounds.right, z_file.bounds.bottom, z_file.bounds.top
global_shape  = tuple(z_file.shape)
global_transform = z_file.transform
z_file.close()

print("Resample zmaps to {:s}\n  shape: {}, \n extent: {}".format(filenames[idx],
                                                            global_shape,
                                                            global_extent))


Create grid for resampled data

In [None]:
xcoords, dx = np.linspace(global_extent[0], global_extent[1], global_shape[1], endpoint=False, retstep=True)
ycoords, dy = np.linspace(global_extent[2], global_extent[3], global_shape[0], endpoint=False, retstep=True)

xq, yq = np.meshgrid(xcoords, ycoords)

In [None]:
def fill_ndimage(data,invalid=None):
    """
    Replace the value of invalid 'data' cells (indicated by 'invalid')
    by the value of the nearest valid data cell
    """
    from scipy.ndimage import distance_transform_edt
    masked_array = hasattr(data, "fill_value")
    if masked_array:
        mask_fill_value = data.data == data.fill_value
        data = data.data.copy()
        data[mask_fill_value] = np.nan
    else:
        data = data.copy()

    if invalid is None:
        invalid = np.isnan(data)
        if masked_array:
            invalid += mask_fill_value
    ind = distance_transform_edt(invalid, return_distances=False, return_indices=True)
    return data[tuple(ind)]

Save the reprojected data in the following formats:
- geotiff
- png
- npz

Geotiffs are used in the model to create the layers

In [None]:
for zfilename in filenames:
    # import surface
    z_file = rasterio.open(file_directory+zfilename)
    z_data = z_file.read(1, masked=True)
    extent = z_file.bounds.left, z_file.bounds.right, z_file.bounds.bottom, z_file.bounds.top
    name = zfilename[:-9] # remove '_zmap.dat'

    # check the shape and bounding box
    if z_data.shape != global_shape or extent != global_extent:
        print(" - Resampling", name)
        # resample
        xc = np.linspace(extent[0], extent[1], z_file.shape[1], endpoint=False)
        yc = np.linspace(extent[2], extent[3], z_file.shape[0], endpoint=False)

         # remove mask and fill NaNs
        z_array = fill_ndimage(z_data)

        # interpolate to global grid
        rgi = RegularGridInterpolator((yc, xc), z_array, bounds_error=False, fill_value=None)
        z_data_interp = rgi((yq, xq), method='linear')
        
#         # hacky way to make sure we get no NaNs at the edges of some arrays
#         z_data_nearest = rgi((yq, xq), method='nearest')
#         mask_NaNs = np.isnan(z_data_interp)
#         z_data_interp[mask_NaNs] = z_data_nearest[mask_NaNs]
        
        # z_data_interp[global_mask] = z_file.nodata
        z_data = np.ma.array(data=z_data_interp, mask=global_mask)


    # create a plot
    fig = plt.figure(figsize=(10,10))
    ax1 = fig.add_subplot(111, title=name)
    im1 = ax1.imshow(z_data, extent=global_extent)
    fig.colorbar(im1)
    fig.savefig(output_png_directory+name+'.png', bbox_inches='tight', dpi=300)
    plt.close(fig)


    # save geotiff
    with rasterio.open(output_geotiff_directory+name+'.tiff', 'w', driver='GTiff',
                       height=global_shape[0], width=global_shape[1],
                       count=1, dtype=z_data.dtype, nodata=z_file.nodata,
                       crs=crs, transform=global_transform, GDAL_TIFF_INTERNAL_MASK=True) as gtif:
        z_data_gtif = z_data.copy()
        z_data_gtif.data[global_mask] = z_file.nodata
        gtif.write(z_data_gtif, 1)
    
    
    # save numpy
    np.savez_compressed(output_numpy_directory+name+'.npz',
                       data=z_data.data, mask=z_data.mask, extent=global_extent, x=xcoords, y=ycoords)
    
    # close zmap
    z_file.close()
    
    print("Finished {}".format(name))
