## Viewing and interacting with a large InSAR dataset on CARC

The InSAR dataset "geo_timeseries_ERA5_ramp_demErr_msk.h5" contains the complete time-series of deformation between 2014 and 2023 over the city of Jakarta, Indonesia. The file "geo_velocity_msk.h5" contains the average rate of deformation at each pixel. Together, these  datasets are about 17 GB in size, so it's inconvenient to download and work with them on our laptop. Thankfully, we can use CARC to make life easier!


In [None]:
import h5py
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime

# might need to install the simplekml package. Use Conda however you normally use it to install packages.
#!conda install -c conda-forge simplekml

from simplekml import (Kml, OverlayXY, ScreenXY, Units, RotationXY, AltitudeMode, Camera)

# our own functions:
import kml_functions

# set high dpi for our images
plt.rcParams['figure.dpi'] = 500

In [None]:
# first, read the map coordinates

fname = 'geo_timeseries_ERA5_ramp_demErr_msk.h5'
with h5py.File(fname,'r') as f:
    # read the metadata attributes:
    # to print all the attributes in the file (a lot!): print(f.attrs.keys())
    x0=float(f.attrs['X_FIRST'])
    dx=float(f.attrs['X_STEP'])
    nx=int(f.attrs['WIDTH'])
    y0=float(f.attrs['Y_FIRST'])
    dy=float(f.attrs['Y_STEP'])
    ny=int(f.attrs['LENGTH'])
    rlon=float(f.attrs['REF_LON'])
    rlat=float(f.attrs['REF_LAT'])
    # so, we have the x0 and dx, but need to turn them into vectors (x0,x1,x2,....xN) etc. for the lat and lon coordinates.
    lonvec = [x0 + i*dx for i in range(nx)]
    latvec = [y0 + i*dy for i in range(ny)]


In [None]:
# now, read some of the timeseries images and plot them in a grid (3x3)

fname = 'geo_timeseries_ERA5_ramp_demErr_msk.h5'
with h5py.File(fname, 'r') as f:
    # Read the dates from the file
    dates = [date.decode('utf-8') for date in f['date'][:]]
    timestep0 = f['timeseries'][0, :, :]
    
    # Calculate nearly-evenly spaced timesteps
    total_timesteps = len(dates)
    spacing = total_timesteps // 9  # 9 is 3x3
    selected_t_indices = np.arange(0, total_timesteps, spacing)[:9]  # Get the first 9 indices

    # Setup the figure
    fig, axes = plt.subplots(3, 3, figsize=(8,8))
    for i, ax in enumerate(axes.ravel()):
        if i < len(selected_t_indices):  # Avoid index out of bounds
            timestep_full = f['timeseries'][selected_t_indices[i], :, :] - timestep0
            # downsample at 9x9
            timestep = kml_functions.downsample_image(timestep_full, (9,9))
            
            im = ax.imshow(timestep, extent=(lonvec[0], lonvec[-1], latvec[-1], latvec[0]), cmap='RdBu_r', vmin=-0.5, vmax=0.5)
            ax.set_aspect(1 / np.cos((np.pi/180) * latvec[int(len(latvec)/2)]))
            ax.set_title('Time: %s' % dates[selected_t_indices[i]], fontsize=8)
            ax.axis('off')  # Hide axis for better visualization

    # Adding a colorbar to the figure
    cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7])
    fig.colorbar(im, cax=cbar_ax)

    # Adjust space between plots
    plt.subplots_adjust(right=0.9)

    plt.show()


In [None]:
# let's extract timeseries at a few points and plot them.

def extract_timeseries(filename, lon, lat):
    """
    Extracts the timeseries data at a specific longitude and latitude.

    Args:
    - filename (str): The name of the HDF5 file.
    - lon (float): The longitude of the desired point.
    - lat (float): The latitude of the desired point.

    Returns:
    - np.array: The timeseries data at the specified point.
    """
    
    with h5py.File(filename, 'r') as f:
        # Extracting attributes
        x0 = float(f.attrs['X_FIRST'])
        dx = float(f.attrs['X_STEP'])
        y0 = float(f.attrs['Y_FIRST'])
        dy = float(f.attrs['Y_STEP'])

        # Calculating indices
        x_index = round((lon - x0) / dx)
        y_index = round((lat - y0) / dy)

        # Extracting timeseries at the point (x_index, y_index)
        ts_at_point = f['timeseries'][:, y_index, x_index] - f['timeseries'][0, y_index, x_index]
        
        # Extract and convert dates
        byte_dates = f['date'][:]
        dates = [datetime.strptime(date.decode('utf-8'), "%Y%m%d") for date in byte_dates]
        
    return dates,ts_at_point


fname = 'geo_timeseries_ERA5_ramp_demErr_msk.h5'

lon1 = 107.16
lat1 = -6.85
dates,ts1 = extract_timeseries(fname,lon1,lat1)

lon2 = 106.8
lat2 = -6.1
dates,ts2 = extract_timeseries(fname,lon2,lat2)

lon3 = 107.16
lat3 = -6.25
dates,ts3 = extract_timeseries(fname,lon3,lat3)



# also read the estimated velocities

fname = 'geo_velocity_msk.h5'
with h5py.File(fname,'r') as f:
    # read the data from the file:
    vel    = f['velocity'][:]
    velStd = f['velocityStd'][:]


fig, axes = plt.subplots(1, 2, figsize=(10,8))

axes[0].imshow(vel,extent=(lonvec[0],lonvec[-1],latvec[-1],latvec[0]), cmap='RdBu_r', vmin=-0.05, vmax=0.05)
axes[0].set_aspect( 1/np.cos((np.pi/180)*latvec[int(len(latvec)/2)]) )
axes[0].set_title('Velocity: m/yr', fontsize=8)
axes[0].plot(lon1,lat1,'kx')
axes[0].plot(lon2,lat2,'rx')
axes[0].plot(lon3,lat3,'mx')

axes[1].plot(dates,ts1,'k-')
axes[1].plot(dates,ts2,'r-')
axes[1].plot(dates,ts3,'m-')

# set a square plot: compute current aspect ratio
x_lim = axes[1].get_xlim()
y_lim = axes[1].get_ylim()
ratio = (x_lim[1] - x_lim[0]) / (y_lim[1] - y_lim[0])
axes[1].set_aspect(ratio)

plt.show()


In [None]:
# plot the estimated velocities

fname = 'geo_velocity_msk.h5'
with h5py.File(fname,'r') as f:
    # read the data from the file:
    vel    = f['velocity'][:]
    velStd = f['velocityStd'][:]

fig = plt.figure(figsize=(12,6))
ax = fig.add_subplot(111)
plt.imshow(vel,extent=(lonvec[0],lonvec[-1],latvec[-1],latvec[0]))
ax.set_aspect( 1/np.cos((np.pi/180)*latvec[int(len(latvec)/2)]) )
plt.colorbar()
plt.clim([-0.05,0.05])
plt.set_cmap('RdBu_r')
plt.title('Estimated velocities, m/yr')
plt.show()

In [None]:
# Optional: create a .png image and kml for the long-term rate

# get the number of pixels and make a meshgrid
num_pixels = len(lonvec)
lon,lat = np.meshgrid(lonvec, latvec)

# create the figure, setting some extra figure-size options related to google earth
fig, ax = kml_functions.gearth_fig(llcrnrlon=lon.min(),llcrnrlat=lat.min(),urcrnrlon=lon.max(),urcrnrlat=lat.max(),pixels=num_pixels)

# create our image here. We can use any matplotlib or cartopy commands.
cs = ax.pcolormesh(lon, lat, vel*1000, cmap='RdBu_r')
cs.set_clim([-50,50])
ax.set_axis_off()
fig.savefig('rate.png', transparent=False, format='png')
plt.show()

# create an image of just the colorbar, to plot separately.
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.facecolor']='white'
fig = plt.figure(figsize=(1.0, 4.0), frameon=False)
ax = fig.add_axes([0.0, 0.05, 0.2, 0.9])
cb = fig.colorbar(cs, cax=ax)
cb.set_label('Long-term LOS velocities [mm/yr]', rotation=-90, color='k', labelpad=20)
fig.savefig('rate_legend.png', transparent=True, format='jpg')

# create the KML file
kml_functions.make_kml(llcrnrlon=lon.min(), llcrnrlat=lat.min(),urcrnrlon=lon.max(), urcrnrlat=lat.max(),
         figs=['rate.png'], colorbar='rate_legend.png',
         kmzfile='rate.kmz', name='Long-term rate from Sentinel-1 P056, 2014-2022')