## Create NetCDF from .csv file

Experiement to use metsim (python pacage) to convert HADS gage time series data into NetCDF format

In [1]:
!conda install -c conda-forge cartopy --y

Solving environment: done


  current version: 4.5.4
  latest version: 4.6.8

Please update conda by running

    $ conda update -n base conda



# All requested packages already installed.



In [2]:
!conda install -c conda-forge geoviews --y

Solving environment: done


  current version: 4.5.4
  latest version: 4.6.8

Please update conda by running

    $ conda update -n base conda



# All requested packages already installed.



In [None]:
!conda install -c conda-forge metsim --y

begin with some standard imports.

In [None]:
%pylab inline
import cartopy
import geoviews as gv
import geopandas as gpd
import holoviews as hv
import pandas as pd
import xarray as xr
from metsim import MetSim

pylab.rcParams['figure.figsize'] = (10,6)
plt.rcParas['figure.dpi'] = 96
hv.notebook_extension('bokeh')

## Put together the required meteorological data
We've gathered some data form the HADS site that we will use as input. There are different sets of CSVs with precipitation data, with data for 2009 and 2010, and will be generating MetSim input for 2010. To do this we must first convert it into an xarray dataset.

## create the dataset with the relevant dimensions

In [None]:
dates = pd.date_range('3/17/2019', '3/23/2019')
shape = (len(dates), 1, 1, )
dims = ('time', 'lat', 'lon', )

# We are running only one site, at these coordinates
lats = [19.8372, 21.4319]
lons = [-155.613, -157.837]
elev = 1706.90 # meters
coords = {'time': dates, 'lat': lats, 'lon': lons}

# Create the initial met data input data structure
met_data = xr.Dataset(coords=coords)
met_data

## create the acutal data arrays to put data into

In [None]:
for varname in ['prec']:
    met_data[varname] = xr.DataArray(data=np.full(shape, np.nan),
                                     coords=coords, dims=dims,
                                     name=varname)

## Read in the data and put it into the dataset

In [None]:
for i in range(len(lats)):
    for j in range(len(lons)):
        # Add the precipitation data
        df = pd.read_csv("./reynolds_creek_data/daily/precip_2010.csv", skiprows=[0,1])
        prec_vals = df[].diff().values[1:]*25.4
        met_data['prec'].values[:, i, j] = prec_vals

# This is what we have now
met_data.to_netcdf('./input/rc_forcing.nc')
met_data

## Put together the required domain

In [None]:
# We form the domain in a similar fashion
# First, by creating the data structure
coords = {'lat': lats, 'lon': lons}
domain = xr.Dataset(coords=coords)
domain['elev'] = xr.DataArray(data=np.full((1,1,), np.nan),
                          coords=coords,
                          dims=('lat', 'lon', ))
domain['mask'] = xr.DataArray(data=np.full((1,1,), np.nan),
                          coords=coords,
                          dims=('lat', 'lon', ))

# Add the data
domain['elev'][0, 0] = elev
domain['mask'][0, 0] = 1
domain.to_netcdf('./input/rc_domain.nc')
domain

## Put together the required state

In [None]:
# Finally, we create the state file - the dates are 90 days prior to 
# the MetSim run dates - as usual, create an empty data structure to
# read the data into
dates = pd.date_range('3/16/2019', '3/23/2019')
shape = (len(dates), 1, 1, )
dims = ('time', 'lat', 'lon', )

coords = {'time': dates, 'lat': lats, 'lon': lons}
state = xr.Dataset(coords=coords)
for varname in ['prec', 't_min', 't_max']:
    state[varname] = xr.DataArray(data=np.full(shape, np.nan),
                               coords=coords, dims=dims,
                               name=varname)

# Do precip data
df = pd.read_csv("./reynolds_creek_data/daily/precip_2009.csv", skiprows=[0,1])
prec_vals = df['PREC.I-1 (in) '].diff().values[-90:] * 25.4
state['prec'].values[:, 0, 0] = prec_vals

# And now temp data
df = pd.read_csv("./reynolds_creek_data/daily/temp_2009.csv", skiprows=[0,1])
tmin_vals = df['TMIN.D-1 (degC) '].values[-90:]
tmax_vals = df['TMAX.D-1 (degC) '].values[-90:]
state['t_min'].values[:, 0, 0] = tmin_vals
state['t_max'].values[:, 0, 0] = tmax_vals
state.to_netcdf('./input/rc_state.nc')
state

## Registering parameters and building the driver

In [None]:
dates = pd.date_range('1/1/2010', '12/31/2010')
params = {
    'time_step'    : "60",       
    'start'        : dates[0],
    'stop'         : dates[-1],
    'forcing'      : './input/rc_forcing.nc',     
    'domain'       : './input/rc_domain.nc',
    'state'        : './input/rc_state.nc',
    'forcing_fmt'  : 'netcdf',
    'out_dir'      : './output',
    'output_prefix': 'reynolds',
    'scheduler'    : 'threading',
    'chunks'       : 
        {'lat': 1, 'lon': 1},
    'forcing_vars' : 
        {'prec' : 'prec', 't_max': 't_max', 't_min': 't_min'},
    'state_vars'   : 
        {'prec' : 'prec', 't_max': 't_max', 't_min': 't_min'},
    'domain_vars'  : 
        {'elev': 'elev', 'lat': 'lat', 'lon': 'lon', 'mask': 'mask'}
    }               

ms = MetSim(params)
ms.run()
output = ms.open_output().load()