<a href="https://colab.research.google.com/github/wherediddavidgo/multitemporal_RSSA/blob/main/discharge_model_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install dataretrieval

Collecting dataretrieval
  Downloading dataretrieval-1.0.12-py3-none-any.whl.metadata (9.2 kB)
Downloading dataretrieval-1.0.12-py3-none-any.whl (38 kB)
Installing collected packages: dataretrieval
Successfully installed dataretrieval-1.0.12


In [2]:
# !pip install netCDF4
import xarray as xr
# from netCDF4 import Dataset
import numpy as np
import pandas as pd
import geopandas as gpd
from google.colab import drive
from matplotlib import pyplot as plt
import datetime
import glob
from tqdm import tqdm
from scipy.stats import lognorm, linregress
from dataretrieval import nwis

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
merit_centerlines = gpd.read_file('/content/drive/MyDrive/MERIT/riv_pfaf_7_MERIT_Hydro_v07_Basins_v01_bugfix1.shp').to_crs(3857)

In [6]:
gage_list = ['06764880', '06775900', '06775500', '06770200', '06767970', '06768000',
             '06784000', '06781900', '06785000', '06790500', '06800500', '06793000',
             '06800000', '06777495', '06786000', '06799350', '06799315', '06799000',
             '06797500', '06785500', '06710247', '06711565', '06714215', '06721000',
             '06759500', '06620000', '06680500', '06630000', '06674500', '06635000',
             '06670500', '06657000', '06652000']


gages = nwis.get_info(sites=gage_list)[0][['site_no', 'geometry']]

gage_COMID = gpd.sjoin_nearest(gages.to_crs(3857), merit_centerlines, max_distance=1000)[['site_no', 'COMID']]
gage_Q = nwis.get_dv(sites=gage_list, start='2018-01-01', end='2024-12-31', parameterCd='00060')[0]
gage_Q['00060_Mean'] = gage_Q['00060_Mean'] * (3.28 ** -3)
gage_Q = gage_Q.rename(columns={'00060_Mean': 'Q_cms', '00060_Mean_cd': 'Q_cd'}).reset_index()

gage_Q['date'] = pd.to_datetime(gage_Q.datetime).dt.date
gage_Q = gage_Q.loc[gage_Q.Q_cd == 'A']
gage_Q = gage_Q[['site_no', 'Q_cms', 'date']].set_index(['site_no', 'date'])

gage_Q = pd.merge(gage_Q, gage_COMID, how='left', on='site_no')

gage_Q

Unnamed: 0,site_no,Q_cms,COMID
0,06620000,9.323401,74030717
1,06620000,12.128923,74030717
2,06620000,12.355632,74030717
3,06620000,11.023717,74030717
4,06620000,9.748480,74030717
...,...,...,...
67799,06800500,30.038921,74029691
67800,06800500,32.306010,74029691
67801,06800500,33.439554,74029691
67802,06800500,35.990028,74029691


In [None]:
Q2024 = xr.open_dataset('/content/drive/MyDrive/grades_Q/GRADES-hydroDL_V2.0_pfaf_07_2024.nc')
Q2023 = xr.open_dataset('/content/drive/MyDrive/grades_Q/GRADES-hydroDL_V2.0_pfaf_07_2023.nc')
Q2022 = xr.open_dataset('/content/drive/MyDrive/grades_Q/GRADES-hydroDL_V2.0_pfaf_07_2022.nc')
Q2021 = xr.open_dataset('/content/drive/MyDrive/grades_Q/GRADES-hydroDL_V2.0_pfaf_07_2021.nc')
Q2020 = xr.open_dataset('/content/drive/MyDrive/grades_Q/GRADES-hydroDL_V2.0_pfaf_07_2020.nc')
Q2019 = xr.open_dataset('/content/drive/MyDrive/grades_Q/GRADES-hydroDL_V2.0_pfaf_07_2019.nc')
Q2018 = xr.open_dataset('/content/drive/MyDrive/grades_Q/GRADES-hydroDL_V2.0_pfaf_07_2018.nc')

In [None]:
COMID_arr = widths_filtered.COMID.unique()

Q2024 = Q2024.sel(rivid=COMID_arr)
Q2023 = Q2023.sel(rivid=COMID_arr)
Q2022 = Q2022.sel(rivid=COMID_arr)
Q2021 = Q2021.sel(rivid=COMID_arr)
Q2020 = Q2020.sel(rivid=COMID_arr)
Q2019 = Q2019.sel(rivid=COMID_arr)
Q2018 = Q2018.sel(rivid=COMID_arr)

all_Q = xr.concat([Q2018, Q2019, Q2020, Q2021, Q2022, Q2023, Q2024], dim='time')

  all_Q = xr.concat([Q2018, Q2019, Q2020, Q2021, Q2022, Q2023, Q2024], dim='time')


In [None]:
Qdf = all_Q.to_dataframe()\
    .reset_index()\
    .drop('nerr', axis=1)\
    .drop_duplicates(['time', 'rivid'])\
    .rename(columns={'time': 'date', 'rivid': 'COMID'})

Qdf.COMID = np.uint32(Qdf.COMID)

Qdf = Qdf.set_index(['date', 'COMID'])

Qdf['Q_percentile'] = Qdf.groupby('COMID')['Qout'].rank(pct=True)

decile_bins = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
quintile_bins = [0, 0.2, 0.4, 0.6, 0.8, 1.0]
quartile_bins = [0, 0.25, 0.5, 0.75, 1.0]

Qdf['decile'] = Qdf.groupby('COMID')['Q_percentile'].transform(
    lambda x: pd.cut(x, bins=decile_bins, labels=False)
)
Qdf['quintile'] = Qdf.groupby('COMID')['Q_percentile'].transform(
    lambda x: pd.cut(x, bins=quintile_bins, labels=False)
)
Qdf['quartile'] = Qdf.groupby('COMID')['Q_percentile'].transform(
    lambda x: pd.cut(x, bins=quartile_bins, labels=False)
)


Qdf

Unnamed: 0_level_0,Unnamed: 1_level_0,Qout,Qout_err,crs,lon,lat,Q_percentile,decile,quintile,quartile
date,COMID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-01-01,74023944,0.003375,,,9.969210e+36,9.969210e+36,0.267892,2,1,1
2018-01-01,74024231,0.002171,,,9.969210e+36,9.969210e+36,0.146265,1,0,0
2018-01-01,74023808,0.015069,,,9.969210e+36,9.969210e+36,0.152522,1,0,0
2018-01-01,74024019,0.001689,,,9.969210e+36,9.969210e+36,0.176379,1,0,0
2018-01-01,74023676,0.049701,,,9.969210e+36,9.969210e+36,0.115761,1,0,0
...,...,...,...,...,...,...,...,...,...,...
2024-12-31,74029276,0.101944,9.969210e+36,-2.147484e+09,9.969210e+36,9.969210e+36,0.214314,2,1,0
2024-12-31,74029287,0.022546,9.969210e+36,-2.147484e+09,9.969210e+36,9.969210e+36,0.349628,3,1,1
2024-12-31,74029355,0.081350,9.969210e+36,-2.147484e+09,9.969210e+36,9.969210e+36,0.386781,3,1,1
2024-12-31,74029454,100.371552,9.969210e+36,-2.147484e+09,9.969210e+36,9.969210e+36,0.837309,8,4,3
