# Water Balance Project
1. Display soil moisture data. 
We have 135 soil moisture files. Each file has a time dimension equal to 1, so I presume one per month. Each file has 6 depths for soil moisture readings. And then each file has 4 lat and 6 long locations.

In [20]:
# Import requiried packages
import numpy as np
import netCDF4 as nc
from netCDF4 import MFDataset
from pathlib import Path
import xarray as xr
import dask

# for working with geospatial data
import geopandas as gpd
from pyproj import CRS
from shapely import geometry

# for plotting, import matplotlib.pyplot
import matplotlib.pyplot as plt
# tell jupyter to display plots "inline" in the notebook
%matplotlib inline

In [3]:
# Open 1 file to see what it entails
fn0 = 'data/SoilMoistureData/0.nc4'
ds0 = nc.Dataset(fn0)
print(ds0)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    CDI: Climate Data Interface version 1.9.8 (https://mpimet.mpg.de/cdi)
    Conventions: CF-1.6
    history_L34RS: 'Created by L34RS v1.4.1 @ NASA GES DISC on September 02 2020 23:42:26. Spatial: -121.029 -119.938 46.29 46.805. Variables: SOILM'
    CDO: Climate Data Operators version 1.9.8 (https://mpimet.mpg.de/cdo)
    dimensions(sizes): time(1), lon(9), lat(4), depth(6), bnds(2)
    variables(dimensions): float64 time(time), float64 lon(lon), float64 lat(lat), float64 depth(depth), float64 depth_bnds(depth, bnds), float32 SOILM(time, depth, lat, lon)
    groups: 


In [4]:
# Print the metadata of the file
print(ds0.__dict__)

{'CDI': 'Climate Data Interface version 1.9.8 (https://mpimet.mpg.de/cdi)', 'Conventions': 'CF-1.6', 'history_L34RS': "'Created by L34RS v1.4.1 @ NASA GES DISC on September 02 2020 23:42:26. Spatial: -121.029 -119.938 46.29 46.805. Variables: SOILM'", 'CDO': 'Climate Data Operators version 1.9.8 (https://mpimet.mpg.de/cdo)'}


In [5]:
# Print the meta data for the soil moisture variable
print(ds0['SOILM'])

<class 'netCDF4._netCDF4.Variable'>
float32 SOILM(time, depth, lat, lon)
    long_name: Soil moisture content
    units: kg/m^2
    code: 86
    table: 130
    _FillValue: 1e+20
    missing_value: 1e+20
    original_name: var86
unlimited dimensions: time
current shape = (1, 6, 4, 9)
filling off


In [6]:
# Print the data of the soil moisture variable
soilm0 = ds0['SOILM'][:]

In [7]:
# Save all the file names to a list called soilM
soilM = []
pathlist = Path("data").rglob('**/*.nc4')
for path in pathlist:
     # because path is object not string
     path_in_str = str(path)
     soilM.append(path_in_str)
print(soilM[1])

data/SoilMoistureData/46.nc4


In [17]:
# Open the dataset that is distrubted across a large number of files
# http://xarray.pydata.org/en/stable/io.html#netcdf
ds = xr.open_mfdataset('data/SoilMoistureData/*.nc4',concat_dim="time",
                  data_vars='minimal', coords='minimal', compat='override')

#Check out the dataset
ds

Unnamed: 0,Array,Chunk
Bytes,96 B,96 B
Shape,"(6, 2)","(6, 2)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 96 B 96 B Shape (6, 2) (6, 2) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",2  6,

Unnamed: 0,Array,Chunk
Bytes,96 B,96 B
Shape,"(6, 2)","(6, 2)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,116.64 kB,864 B
Shape,"(135, 6, 4, 9)","(1, 6, 4, 9)"
Count,405 Tasks,135 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 116.64 kB 864 B Shape (135, 6, 4, 9) (1, 6, 4, 9) Count 405 Tasks 135 Chunks Type float32 numpy.ndarray",135  1  9  4  6,

Unnamed: 0,Array,Chunk
Bytes,116.64 kB,864 B
Shape,"(135, 6, 4, 9)","(1, 6, 4, 9)"
Count,405 Tasks,135 Chunks
Type,float32,numpy.ndarray


In [16]:
#Check out the soil moisture variable
ds.SOILM

Unnamed: 0,Array,Chunk
Bytes,116.64 kB,864 B
Shape,"(135, 6, 4, 9)","(1, 6, 4, 9)"
Count,405 Tasks,135 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 116.64 kB 864 B Shape (135, 6, 4, 9) (1, 6, 4, 9) Count 405 Tasks 135 Chunks Type float32 numpy.ndarray",135  1  9  4  6,

Unnamed: 0,Array,Chunk
Bytes,116.64 kB,864 B
Shape,"(135, 6, 4, 9)","(1, 6, 4, 9)"
Count,405 Tasks,135 Chunks
Type,float32,numpy.ndarray


In [26]:
#Create bounding box
bounding_box = gpd.GeoSeries(geometry.box(-121.029510,46.290020,-119.937744,46.804760))
bounding_box.bounds

Unnamed: 0,minx,miny,maxx,maxy
0,-121.02951,46.29002,-119.937744,46.80476
