# Data Processing script for the NSM/SWEML v2.0
This .ipynb script uses python module for retrieving NASA ASO observations, locating nearest SNOTEL sites, connecting SNOTEL obs with ASO obs, and add geospatial features to the ML training/testing/hindcast dataframes.

In [4]:
pip install ipywidgets==8.1.3

Collecting ipywidgets==8.1.3
  Downloading ipywidgets-8.1.3-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.11 (from ipywidgets==8.1.3)
  Using cached widgetsnbextension-4.0.13-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab-widgets~=3.0.11 (from ipywidgets==8.1.3)
  Using cached jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata (4.1 kB)
Downloading ipywidgets-8.1.3-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.4/139.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hUsing cached jupyterlab_widgets-3.0.13-py3-none-any.whl (214 kB)
Using cached widgetsnbextension-4.0.13-py3-none-any.whl (2.3 MB)
Installing collected packages: widgetsnbextension, jupyterlab-widgets, ipywidgets
  Attempting uninstall: widgetsnbextension
    Found existing installation: widgetsnbextension 3.6.10
    Uninstalling widgetsnbextension-3.6.10:
      Successfully uninstalled widgetsnbextension-3.6.10
  Attempting 

In [1]:
import os
HOME = os.getcwd()
import model_Domain
HOME = os.getcwd()


#If you get a proj.db error below, run the following and put the following into the terminal
import pyproj
# Get the PROJ data directory
proj_data_dir = pyproj.datadir.get_data_dir()
proj_db_path = proj_data_dir + "/proj.db"
os.environ['PROJ_LIB'] =pyproj.datadir.get_data_dir()
os.environ['PROJ_LIB']
print(proj_db_path)

#make SWEMLv2.0 modeling domain for western USA
region_list = model_Domain.modeldomain()
region_list.remove('NorthernRockies') # There is no ASO data for this region
region_list = ['Northwest', 'SouthernRockies', 'Southwest']
region_list

no AWS credentials present, skipping
/uufs/chpc.utah.edu/common/home/u1154915/.conda/envs/SWEML_310/lib/python3.10/site-packages/pyproj/proj_dir/share/proj/proj.db
Checking for required files
ground_measures_metadata.parquet is local


['Northwest', 'SouthernRockies', 'Southwest']

In [3]:
ModuleDir = os.getcwd()
ModuleDir = os.chdir('Dataprocessing')
ModuleDir = os.getcwd()
from ASOget import ASODownload, ASODataProcessing

# # import earthaccess https://earthaccess.readthedocs.io/en/latest/howto/authenticate/
# # earthaccess.login(persist=True)

# Inputs for fetching ASO data for a region
short_name = 'ASO_50M_SWE'
version = '1'
time_start = '2013-04-02T00:00:00Z'
time_end = '2019-07-19T23:59:59Z'
output_res = 1000 #desired spatial resulution in meters (m)
directory = "Raw_ASO_Data"

#Get ASO data, sometime sites will give error and break code, most times you can just rerun it using the data_processor sections below (e.g., comment out other parts
for region in region_list:
    print(region)
    folder_name = f"{region}/{directory}"
    # data_tool = ASODownload(short_name, version)
    # b_box = data_tool.BoundingBox(region)  
    # url_list = data_tool.cmr_search(time_start, time_end, region, b_box)
    # data_tool.cmr_download(directory, region)

    #Convert ASO tifs to parquet
    data_processor = ASODataProcessing() #note, 2019-5-1, 2019-06-11 seems to be bad, manually removed from SW region
    data_processor.convert_tiff_to_parquet_multiprocess(folder_name, output_res, region) 

no AWS credentials present, skipping
Northwest
Converting .tif to parquet
Converting 2 ASO tif files to parquet


  0%|          | 0/2 [00:00<?, ?it/s]

Checking to make sure all files successfully converted...


  0%|          | 0/2 [00:00<?, ?it/s]

SouthernRockies
Converting .tif to parquet
Converting 19 ASO tif files to parquet


  0%|          | 0/19 [00:00<?, ?it/s]

Checking to make sure all files successfully converted...


  0%|          | 0/14 [00:00<?, ?it/s]

## Get Snotel and CDEC in situ observations
- clean in situ observations, specifically the CDEC sites, need a data processing method to remove outtliers and nan/0 obs
- Ideas - add nearest sites elevation, distance from cell, then can bypass sites with bad data. 

In [2]:
ModuleDir = os.getcwd()
ModuleDir = os.chdir('Dataprocessing')
ModuleDir = os.getcwd()
#Get in situ observations
import get_InSitu_obs
import numpy as np

#make a list of dates to align with the ASO observations (they go as early as Jan-29 and as far out as the July-17)
years = np.arange(2013,2020,1)
start_month_day = '10-01'
end_month_day = '08-31'
#datelist = get_InSitu_obs.make_dates(years, start_month_day, end_month_day, WY = True)

# observations 
get_InSitu_obs.Get_Monitoring_Data_Threaded_Updated(years, start_month_day, end_month_day, WY = True)

#combine years
get_InSitu_obs.combine_dfs(years)

no AWS credentials present, skipping
getting in situ snow obs metadata
Getting SNOTEL and CDEC observations for 2013
Getting California Data Exchange Center SWE data from 130 sites...


  0%|          | 0/130 [00:00<?, ?it/s]

Start retrieving data for Farewell Gap, FRW
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/FRW:CA:MSNT%257Cid=%2522%2522%257Cname/2012-10-01,2013-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Bonanza King, BNK
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/BNK:CA:MSNT%257Cid=%2522%2522%257Cname/2012-10-01,2013-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Cedar Pass, CDP
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CDP:CA:MSNT%257Cid=%2522%2522%257Cname/2012-10-01,2013-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Charlotte Lake, CRL
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CRL:CA:MSNT%257Cid=%2522%2522%257Cname/2012-10-01,2013-08-31/WTEQ::value?fitToS

  0%|          | 0/839 [00:00<?, ?it/s]

Start retrieving data for Adin Mtn, 301_CA_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/301:CA:SNTL%7Cid=%22%22%7Cname/2012-10-01,2013-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Agua Canyon, 907_UT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/907:UT:SNTL%7Cid=%22%22%7Cname/2012-10-01,2013-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Albro Lake, 916_MT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/916:MT:SNTL%7Cid=%22%22%7Cname/2012-10-01,2013-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Alexander Lake, 1267_AK_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/1267:AK:SNTL%7Cid=%22%22%7Cname/2012-10-01,2013-08-

  0%|          | 0/130 [00:00<?, ?it/s]

Start retrieving data for Farewell Gap, FRW
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/FRW:CA:MSNT%257Cid=%2522%2522%257Cname/2013-10-01,2014-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Bonanza King, BNK
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/BNK:CA:MSNT%257Cid=%2522%2522%257Cname/2013-10-01,2014-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Cedar Pass, CDP
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CDP:CA:MSNT%257Cid=%2522%2522%257Cname/2013-10-01,2014-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Charlotte Lake, CRL
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CRL:CA:MSNT%257Cid=%2522%2522%257Cname/2013-10-01,2014-08-31/WTEQ::value?fitToS

  0%|          | 0/839 [00:00<?, ?it/s]

Start retrieving data for Adin Mtn, 301_CA_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/301:CA:SNTL%7Cid=%22%22%7Cname/2013-10-01,2014-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Agua Canyon, 907_UT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/907:UT:SNTL%7Cid=%22%22%7Cname/2013-10-01,2014-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Albro Lake, 916_MT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/916:MT:SNTL%7Cid=%22%22%7Cname/2013-10-01,2014-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Alexander Lake, 1267_AK_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/1267:AK:SNTL%7Cid=%22%22%7Cname/2013-10-01,2014-08-

  0%|          | 0/130 [00:00<?, ?it/s]

Start retrieving data for Farewell Gap, FRW
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/FRW:CA:MSNT%257Cid=%2522%2522%257Cname/2014-10-01,2015-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Bonanza King, BNK
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/BNK:CA:MSNT%257Cid=%2522%2522%257Cname/2014-10-01,2015-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Cedar Pass, CDP
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CDP:CA:MSNT%257Cid=%2522%2522%257Cname/2014-10-01,2015-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Charlotte Lake, CRL
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CRL:CA:MSNT%257Cid=%2522%2522%257Cname/2014-10-01,2015-08-31/WTEQ::value?fitToS

  0%|          | 0/839 [00:00<?, ?it/s]

Start retrieving data for Adin Mtn, 301_CA_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/301:CA:SNTL%7Cid=%22%22%7Cname/2014-10-01,2015-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Agua Canyon, 907_UT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/907:UT:SNTL%7Cid=%22%22%7Cname/2014-10-01,2015-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Albro Lake, 916_MT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/916:MT:SNTL%7Cid=%22%22%7Cname/2014-10-01,2015-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Alexander Lake, 1267_AK_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/1267:AK:SNTL%7Cid=%22%22%7Cname/2014-10-01,2015-08-

  0%|          | 0/130 [00:00<?, ?it/s]

Start retrieving data for Farewell Gap, FRW
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/FRW:CA:MSNT%257Cid=%2522%2522%257Cname/2015-10-01,2016-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Bonanza King, BNK
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/BNK:CA:MSNT%257Cid=%2522%2522%257Cname/2015-10-01,2016-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Cedar Pass, CDP
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CDP:CA:MSNT%257Cid=%2522%2522%257Cname/2015-10-01,2016-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Charlotte Lake, CRL
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CRL:CA:MSNT%257Cid=%2522%2522%257Cname/2015-10-01,2016-08-31/WTEQ::value?fitToS

  0%|          | 0/839 [00:00<?, ?it/s]

Start retrieving data for Adin Mtn, 301_CA_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/301:CA:SNTL%7Cid=%22%22%7Cname/2015-10-01,2016-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Agua Canyon, 907_UT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/907:UT:SNTL%7Cid=%22%22%7Cname/2015-10-01,2016-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Albro Lake, 916_MT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/916:MT:SNTL%7Cid=%22%22%7Cname/2015-10-01,2016-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Alexander Lake, 1267_AK_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/1267:AK:SNTL%7Cid=%22%22%7Cname/2015-10-01,2016-08-

  0%|          | 0/130 [00:00<?, ?it/s]

Start retrieving data for Farewell Gap, FRW
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/FRW:CA:MSNT%257Cid=%2522%2522%257Cname/2016-10-01,2017-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Bonanza King, BNK
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/BNK:CA:MSNT%257Cid=%2522%2522%257Cname/2016-10-01,2017-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Cedar Pass, CDP
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CDP:CA:MSNT%257Cid=%2522%2522%257Cname/2016-10-01,2017-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Charlotte Lake, CRL
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CRL:CA:MSNT%257Cid=%2522%2522%257Cname/2016-10-01,2017-08-31/WTEQ::value?fitToS

  0%|          | 0/839 [00:00<?, ?it/s]

Start retrieving data for Adin Mtn, 301_CA_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/301:CA:SNTL%7Cid=%22%22%7Cname/2016-10-01,2017-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Agua Canyon, 907_UT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/907:UT:SNTL%7Cid=%22%22%7Cname/2016-10-01,2017-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Albro Lake, 916_MT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/916:MT:SNTL%7Cid=%22%22%7Cname/2016-10-01,2017-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Alexander Lake, 1267_AK_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/1267:AK:SNTL%7Cid=%22%22%7Cname/2016-10-01,2017-08-

  0%|          | 0/130 [00:00<?, ?it/s]

Start retrieving data for Farewell Gap, FRW
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/FRW:CA:MSNT%257Cid=%2522%2522%257Cname/2017-10-01,2018-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Bonanza King, BNK
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/BNK:CA:MSNT%257Cid=%2522%2522%257Cname/2017-10-01,2018-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Cedar Pass, CDP
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CDP:CA:MSNT%257Cid=%2522%2522%257Cname/2017-10-01,2018-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Charlotte Lake, CRL
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CRL:CA:MSNT%257Cid=%2522%2522%257Cname/2017-10-01,2018-08-31/WTEQ::value?fitToS

  0%|          | 0/839 [00:00<?, ?it/s]

Start retrieving data for Adin Mtn, 301_CA_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/301:CA:SNTL%7Cid=%22%22%7Cname/2017-10-01,2018-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Agua Canyon, 907_UT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/907:UT:SNTL%7Cid=%22%22%7Cname/2017-10-01,2018-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Albro Lake, 916_MT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/916:MT:SNTL%7Cid=%22%22%7Cname/2017-10-01,2018-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Alexander Lake, 1267_AK_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/1267:AK:SNTL%7Cid=%22%22%7Cname/2017-10-01,2018-08-

  0%|          | 0/130 [00:00<?, ?it/s]

Start retrieving data for Farewell Gap, FRW
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/FRW:CA:MSNT%257Cid=%2522%2522%257Cname/2018-10-01,2019-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Bonanza King, BNK
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/BNK:CA:MSNT%257Cid=%2522%2522%257Cname/2018-10-01,2019-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Cedar Pass, CDP
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CDP:CA:MSNT%257Cid=%2522%2522%257Cname/2018-10-01,2019-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Charlotte Lake, CRL
https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/CRL:CA:MSNT%257Cid=%2522%2522%257Cname/2018-10-01,2019-08-31/WTEQ::value?fitToS

  0%|          | 0/839 [00:00<?, ?it/s]

Start retrieving data for Adin Mtn, 301_CA_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/301:CA:SNTL%7Cid=%22%22%7Cname/2018-10-01,2019-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Agua Canyon, 907_UT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/907:UT:SNTL%7Cid=%22%22%7Cname/2018-10-01,2019-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Albro Lake, 916_MT_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/916:MT:SNTL%7Cid=%22%22%7Cname/2018-10-01,2019-08-31/WTEQ::value?fitToScreen=false
Start retrieving data for Alexander Lake, 1267_AK_SNTL using https://wcc.sc.egov.usda.gov/reportGenerator/view_csv/customMultiTimeSeriesGroupByStationReport/daily/start_of_period/1267:AK:SNTL%7Cid=%22%22%7Cname/2018-10-01,2019-08-

# Code for generating ML dataframe using nearest in situ monitoring sites

In [2]:
ModuleDir = os.getcwd()
ModuleDir = os.chdir('Dataprocessing')
ModuleDir = os.getcwd()

#had to change jupyter widgets (https://stackoverflow.com/questions/73484988/tqdm-notebook-bar-outputs-text-in-jupyter-lab), might have to reaload again and again, went from 7.7 to 8.1.3

import GeoDF
output_res = 1000

region_list = ['Northwest']

# GeoDF used to create a dataframe for ML model development. Its function is to connect in situ observations to gridded locations
for region in region_list:
    # path = f"{HOME}/SWEMLv2.0/data/ASO/{region}/{output_res}M_SWE_parquet"
    path = f"{HOME}/data/ASO/{region}/{output_res}M_SWE_parquet"

    if os.path.isdir(path) == True:
        print(region)
        #load snotel meta location data, use haversive function
        GeoDF.fetch_snotel_sites_for_cellids(region, output_res) # Using known up to date sites

        # Get geophysical attributes for each site, need to see how to add output resolution
        gdf = GeoDF.GeoSpatial(region, output_res)

        #use geodataframe with lat/long meta of all sites to determine slope, aspect, and elevation
        metadf = GeoDF.extract_terrain_data_threaded(gdf, region, output_res)
    else:
        print(f"No ASO data for {region}")




no AWS credentials present, skipping
Northwest
Loading all Geospatial prediction/observation files and concatenating into one dataframe


  0%|          | 0/2 [00:00<?, ?it/s]

Identifying unique sites to create geophysical information dataframe
converting to geodataframe
Processing snotel geometry
Calculating haversine distance for 7661 locations to in situ OBS, and saving cell-obs relationships in dictionary


  0%|          | 0/7661 [00:00<?, ?it/s]

Saving nearest SNOTEL in Northwest for each cell id in a pkl file
Loading geospatial data for Northwest
Converting to geodataframe
Calculating dataframe bounding box
-126 46 -122 50
Retrieving Copernicus 90m DEM tiles


  0%|          | 0/22 [00:00<?, ?it/s]

There are 22 tiles in the region
Determining Grid Cell Spatial Features


  0%|          | 0/7661 [00:00<?, ?it/s]

Saving Northwest dataframe in /uufs/chpc.utah.edu/common/home/civil-group1/Johnson/SWEMLv2.0/data/TrainingDFs/Northwest/1000M_Resolution


In [4]:
region_list = ['Northwest', 'SouthernRockies', 'Southwest']

## Connect Snotel to each ASO obs


In [None]:
ModuleDir = os.getcwd()
ModuleDir = os.chdir('Dataprocessing')
ModuleDir = os.getcwd()

import Obs_to_DF

output_res = 1000

#Connect nearest snotel observations with ASO data, makes a parquet file for each date  -  test to see if this works - need to just load the SNOTEL file, not collect them as in the function
for region in region_list:
    # path = f"{HOME}/SWEMLv2.0/data/ASO/{region}/{output_res}M_SWE_parquet"
    path = f"{HOME}/data/ASO/{region}/{output_res}M_SWE_parquet"

    if os.path.isdir(path) == True:
        print(region)
        dates = []
        manual = False
        Obs_to_DF.Nearest_Snotel_2_obs_MultiProcess(region, output_res, manual, dates) 
    else:
        print(f"No ASO data for {region}")


Northwest
Connecting site observations with nearest monitoring network obs
Loading observations from 2013-2019
Loading 1000M resolution grids for Northwest region
Processing datetime component of SNOTEL observation dataframe
Loading 2 processed ASO observations for the Northwest at 1000M resolution
There are 2 aso dates in snotel obs
There are 0 missing snotel obs
Connecting 2 timesteps of observations for Northwest


100%|██████████| 2/2 [00:08<00:00,  4.10s/it]

Site processing complete, adding observtional data to 20160208 df...


  0%|          | 0/7661 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20160329 df...





  0%|          | 0/7661 [00:00<?, ?it/s]

Job complete for connecting SNOTEL obs to sites/dates
SouthernRockies
Connecting site observations with nearest monitoring network obs
Loading observations from 2013-2019
Loading 1000M resolution grids for SouthernRockies region
Processing datetime component of SNOTEL observation dataframe
Loading 14 processed ASO observations for the SouthernRockies at 1000M resolution
There are 14 aso dates in snotel obs
There are 0 missing snotel obs
Connecting 14 timesteps of observations for SouthernRockies


100%|██████████| 14/14 [00:08<00:00,  1.73it/s]

Site processing complete, adding observtional data to 20150406 df...


  0%|          | 0/2580 [00:00<?, ?it/s]




Site processing complete, adding observtional data to 20150407 df...


  0%|          | 0/4261 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150602 df...


  0%|          | 0/2580 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20160403 df...


  0%|          | 0/2126 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20160404 df...


  0%|          | 0/230 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20180330 df...


  0%|          | 0/230 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20180331 df...


  0%|          | 0/2134 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20180524 df...


  0%|          | 0/2134 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190407 df...


  0%|          | 0/3060 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190408 df...


  0%|          | 0/847 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190419 df...


  0%|          | 0/1100 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190609 df...


  0%|          | 0/847 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190610 df...
Site processing complete, adding observtional data to 20190624 df...


  0%|          | 0/1459 [00:00<?, ?it/s]

  0%|          | 0/1110 [00:00<?, ?it/s]

Job complete for connecting SNOTEL obs to sites/dates
Southwest
Connecting site observations with nearest monitoring network obs
Loading observations from 2013-2019
Loading 1000M resolution grids for Southwest region
Processing datetime component of SNOTEL observation dataframe
Loading 99 processed ASO observations for the Southwest at 1000M resolution
There are 99 aso dates in snotel obs
There are 0 missing snotel obs
Connecting 99 timesteps of observations for Southwest


100%|██████████| 99/99 [00:08<00:00, 12.24it/s]

Site processing complete, adding observtional data to 20130403 df...


  0%|          | 0/1488 [00:00<?, ?it/s]




Site processing complete, adding observtional data to 20130429 df...


  0%|          | 0/1488 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20130503 df...


  0%|          | 0/1488 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20130525 df...


  0%|          | 0/1488 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20130601 df...


  0%|          | 0/1488 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20130608 df...


  0%|          | 0/1488 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140320 df...


  0%|          | 0/1656 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140323 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140324 df...


  0%|          | 0/1908 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140406 df...


  0%|          | 0/1908 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140407 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140414 df...


  0%|          | 0/1908 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140420 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140423 df...


  0%|          | 0/1908 [00:00<?, ?it/s]


Site processing complete, adding observtional data to 20140428 df...

  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140429 df...


  0%|          | 0/1908 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140502 df...
Site processing complete, adding observtional data to 20140503 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140511 df...


  0%|          | 0/1908 [00:00<?, ?it/s]




  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140512 df...Site processing complete, adding observtional data to 20140517 df...


  0%|          | 0/1908 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140527 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140531 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20140605 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150217 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150305 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150325 df...
Site processing complete, adding observtional data to 20150326 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150403 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

  0%|          | 0/338 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150409 df...
Site processing complete, adding observtional data to 20150412 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150415 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150426 df...
Site processing complete, adding observtional data to 20150427 df...


  0%|          | 0/3588 [00:00<?, ?it/s]


Site processing complete, adding observtional data to 20150428 df...

  0%|          | 0/4340 [00:00<?, ?it/s]

  0%|          | 0/4340 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150503 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150527 df...


  0%|          | 0/1908 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150528 df...
Site processing complete, adding observtional data to 20150531 df...


  0%|          | 0/338 [00:00<?, ?it/s]

  0%|          | 0/338 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150608 df...


  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20150609 df...


  0%|          | 0/2750 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20160326 df...


  0%|          | 0/338 [00:00<?, ?it/s]

  0%|          | 0/3588 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20160401 df...
Site processing complete, adding observtional data to 20160407 df...


  0%|          | 0/1487 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20160416 df...
Site processing complete, adding observtional data to 20160426 df...


  0%|          | 0/623 [00:00<?, ?it/s]

  0%|          | 0/1279 [00:00<?, ?it/s]

  0%|          | 0/1279 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20160509 df...
Site processing complete, adding observtional data to 20160527 df...
Site processing complete, adding observtional data to 20160607 df...


  0%|          | 0/1281 [00:00<?, ?it/s]




  0%|          | 0/623 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20160614 df...

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/1488 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20160621 df...


  0%|          | 0/35 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20160626 df...
Site processing complete, adding observtional data to 20160708 df...
Site processing complete, adding observtional data to 20170128 df...


  0%|          | 0/35 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20170129 df...


  0%|          | 0/35 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20170717 df...


  0%|          | 0/1488 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20170718 df...


  0%|          | 0/35 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20170719 df...


  0%|          | 0/1487 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20170727 df...


  0%|          | 0/623 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20170815 df...


  0%|          | 0/35 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20170816 df...


  0%|          | 0/35 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20180304 df...


  0%|          | 0/1487 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20180422 df...


  0%|          | 0/1719 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20180423 df...
Site processing complete, adding observtional data to 20180425 df...


  0%|          | 0/1487 [00:00<?, ?it/s]

  0%|          | 0/1719 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20180426 df...


  0%|          | 0/35 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20180528 df...


  0%|          | 0/2374 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20180601 df...


  0%|          | 0/1109 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20180602 df...


  0%|          | 0/3598 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190309 df...


  0%|          | 0/1415 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190315 df...


  0%|          | 0/35 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190316 df...


  0%|          | 0/2374 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190317 df...
Site processing complete, adding observtional data to 20190324 df...


  0%|          | 0/5306 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190325 df...


  0%|          | 0/1804 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/4453 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190326 df...
Site processing complete, adding observtional data to 20190329 df...
Site processing complete, adding observtional data to 20190417 df...

  0%|          | 0/2038 [00:00<?, ?it/s]

  0%|          | 0/1804 [00:00<?, ?it/s]


Site processing complete, adding observtional data to 20190418 df...
Site processing complete, adding observtional data to 20190421 df...


  0%|          | 0/1690 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190427 df...


  0%|          | 0/1055 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190428 df...


  0%|          | 0/2121 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190501 df...


  0%|          | 0/1533 [00:00<?, ?it/s]

  0%|          | 0/4453 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190502 df...


  0%|          | 0/1804 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190503 df...
Site processing complete, adding observtional data to 20190604 df...
Site processing complete, adding observtional data to 20190605 df...


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/1220 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190608 df...


  0%|          | 0/4453 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190609 df...


  0%|          | 0/2121 [00:00<?, ?it/s]

  0%|          | 0/2570 [00:00<?, ?it/s]

  0%|          | 0/2022 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190611 df...
Site processing complete, adding observtional data to 20190703 df...


  0%|          | 0/1296 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190613 df...


  0%|          | 0/1220 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190704 df...
Site processing complete, adding observtional data to 20190614 df...
Site processing complete, adding observtional data to 20190705 df...
Site processing complete, adding observtional data to 20190713 df...
Site processing complete, adding observtional data to 20190714 df...


  0%|          | 0/34 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190715 df...


  0%|          | 0/34 [00:00<?, ?it/s]

Site processing complete, adding observtional data to 20190716 df...


  0%|          | 0/2121 [00:00<?, ?it/s]

  0%|          | 0/1220 [00:00<?, ?it/s]

  0%|          | 0/2121 [00:00<?, ?it/s]

  0%|          | 0/2038 [00:00<?, ?it/s]

  0%|          | 0/1296 [00:00<?, ?it/s]

  0%|          | 0/2038 [00:00<?, ?it/s]

  0%|          | 0/1220 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

In [2]:
HOME

'/uufs/chpc.utah.edu/common/home/civil-group1/Johnson/SWEMLv2.0'

In [4]:
import GeoDF

output_res = 300

#Connect cell ids with ASO obs and snotel obs to geospatial features
for region in region_list:
    path = f"{HOME}/SWEMLv2.0/data/ASO/{region}/{output_res}M_SWE_parquet"
    if os.path.isdir(path) == True:
        print(region)
        GeoDF.add_geospatial_threaded(region, output_res)
    else:
        print(f"No ASO data for {region}")

No ASO data for Northwest
No ASO data for SouthernRockies
No ASO data for Southwest


# Get NASA VIIRS fraction snow covered area for each location 

* Make sure the code grabs all dates for each region


In [None]:
import get_VIIRS_SCA
output_res = 300
threshold = 20

#check to see if the VIIRS data is available locally, if not, get from CIROH AWS - I think all of this data is for the incorrect year...
#get_VIIRS_SCA.get_VIIRS_from_AWS()

#Connect VIIRS data to dataframes
for region in region_list:
    path = f"{HOME}/SWEMLv2.0/data/ASO/{region}/{output_res}M_SWE_parquet"
    if os.path.isdir(path) == True:
        print(region)
        get_VIIRS_SCA.augment_SCA_mutliprocessing(region, output_res, threshold)
    else:
        print(f"No ASO data for {region}")


In [None]:
import get_Precip

'''
note*, if using python > 3.9, you will likely need to change the ee package to from io import StringIO
'''

import os
HOME = os.path.expanduser('~')

#gets precipitation for each location, accumulates it through the water year

#set start/end date for a water year
years = [2013, 2014, 2015, 2016, 2017, 2018, 2019]
output_res = 300
threshold = 20

for region in region_list:
    path = f"{HOME}/SWEMLv2.0/data/ASO/{region}/{output_res}M_SWE_parquet"
    if os.path.isdir(path) == True:
        print(region)
        get_Precip.get_precip_threaded(region, output_res, years)
    else:
        print(f"No ASO data for {region}")

    #Connect precipitation to processed DFs
    get_Precip.Make_Precip_DF(region, output_res, threshold)


In [3]:
import get_Seasonality

output_res = 300
threshold = 20
for region in region_list:
    #process snotel sites to make "snow hydrograph features" to determine above/below average WY conditions
    get_Seasonality.seasonal_snotel()


    #get the Day of season metric for each dataframe
    get_Seasonality.add_Seasonality(region, output_res, threshold)

FileNotFoundError: [Errno 2] No such file or directory: '/uufs/chpc.utah.edu/common/home/u1154915/SWEMLv2.0/data/SNOTEL_Data/ground_measures_dp.parquet'

# Use Sturm's snow classification as features within model framework

In [None]:
import sturm_processer as stpro


for region in region_list:
    input_directory = f"{HOME}/SWEMLv2.0/data/TrainingDFs/{region}/300M_Resolution/Seasonality_PrecipVIIRSGeoObsDFs/20_fSCA_Thresh"
    sturm_file = f"{HOME}/SWEMLv2.0/data/SnowClassification/SnowClass_NA_300m_10.0arcsec_2021_v01.0.tif" #https://nsidc.org/data/nsidc-0768/versions/1
    output_directory = f"{HOME}/SWEMLv2.0/data/TrainingDFs/{region}/300M_Resolution/Sturm_Seasonality_PrecipVIIRSGeoObsDFs/20_fSCA_Thresh"
    
    # Create output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)
    
    stpro.process_sturm_data_for_files(input_directory, sturm_file, output_directory)

In [1]:
import vegetation_processer as vegpro
import os

HOME = os.path.expanduser('~')
region_list = ['Northwest', 'SouthernRockies', 'Southwest']
for region in region_list:
    input_directory = f"{HOME}/SWEMLv2.0/data/TrainingDFs/{region}/300M_Resolution/Sturm_Seasonality_PrecipVIIRSGeoObsDFs/20_fSCA_Thresh"
    vegetation_file = f"{HOME}/SWEMLv2.0/data/LandCover/USA_NALCMS_landcover_2020v2_30m.tif"
    output_directory = f"{HOME}/SWEMLv2.0/data/TrainingDFs/{region}/300M_Resolution/Vegetation_Sturm_Seasonality_PrecipVIIRSGeoObsDFs/20_fSCA_Thresh"
    
    # Create output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)
    
    vegpro.process_vegetation_data_for_files(input_directory, vegetation_file, output_directory)

Vegetation file bounds: BoundingBox(left=-2043060.0, bottom=-2113150.0, right=2529600.0, top=732440.0)
Vegetation CRS: PROJCS["WGS_1984_Lambert_Azimuthal_Equal_Area",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Lambert_Azimuthal_Equal_Area"],PARAMETER["latitude_of_center",45],PARAMETER["longitude_of_center",-100],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1],AXIS["Easting",EAST],AXIS["Northing",NORTH]]


Sampling Vegetation Data: 100%|██████████| 85185/85185 [00:03<00:00, 26538.25it/s]
Sampling Vegetation Data: 100%|██████████| 85185/85185 [00:03<00:00, 23469.18it/s]
Processing Parquet Files: 100%|██████████| 2/2 [01:01<00:00, 30.83s/it]


Vegetation file bounds: BoundingBox(left=-2043060.0, bottom=-2113150.0, right=2529600.0, top=732440.0)
Vegetation CRS: PROJCS["WGS_1984_Lambert_Azimuthal_Equal_Area",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Lambert_Azimuthal_Equal_Area"],PARAMETER["latitude_of_center",45],PARAMETER["longitude_of_center",-100],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1],AXIS["Easting",EAST],AXIS["Northing",NORTH]]


Sampling Vegetation Data: 100%|██████████| 47402/47402 [00:02<00:00, 20751.53it/s]
Sampling Vegetation Data: 100%|██████████| 16241/16241 [00:00<00:00, 23256.92it/s]
Sampling Vegetation Data: 100%|██████████| 12458/12458 [00:00<00:00, 17874.43it/s]
Sampling Vegetation Data: 100%|██████████| 23791/23791 [00:00<00:00, 29297.85it/s]
Sampling Vegetation Data: 100%|██████████| 23791/23791 [00:00<00:00, 23804.06it/s]
Sampling Vegetation Data: 100%|██████████| 29145/29145 [00:01<00:00, 26038.44it/s]
Sampling Vegetation Data: 100%|██████████| 33987/33987 [00:01<00:00, 24757.26it/s]
Sampling Vegetation Data: 100%|██████████| 9378/9378 [00:00<00:00, 28095.87it/s]
Sampling Vegetation Data: 100%|██████████| 2545/2545 [00:00<00:00, 27943.73it/s]
Sampling Vegetation Data: 100%|██████████| 47402/47402 [00:02<00:00, 22029.96it/s]
Sampling Vegetation Data: 100%|██████████| 9378/9378 [00:00<00:00, 30168.67it/s]
Sampling Vegetation Data: 100%|██████████| 9378/9378 [00:00<00:00, 21750.30it/s]
Sampling Veg

Vegetation file bounds: BoundingBox(left=-2043060.0, bottom=-2113150.0, right=2529600.0, top=732440.0)
Vegetation CRS: PROJCS["WGS_1984_Lambert_Azimuthal_Equal_Area",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Lambert_Azimuthal_Equal_Area"],PARAMETER["latitude_of_center",45],PARAMETER["longitude_of_center",-100],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1],AXIS["Easting",EAST],AXIS["Northing",NORTH]]


Sampling Vegetation Data: 100%|██████████| 21472/21472 [00:00<00:00, 25235.93it/s]
Sampling Vegetation Data: 100%|██████████| 18780/18780 [00:00<00:00, 29962.09it/s]
Sampling Vegetation Data: 100%|██████████| 16523/16523 [00:00<00:00, 30785.22it/s]
Sampling Vegetation Data: 100%|██████████| 3741/3741 [00:00<00:00, 30897.74it/s]
Sampling Vegetation Data: 100%|██████████| 20022/20022 [00:00<00:00, 27120.29it/s]
Sampling Vegetation Data: 100%|██████████| 49436/49436 [00:02<00:00, 24185.04it/s]
Sampling Vegetation Data: 100%|██████████| 19140/19140 [00:00<00:00, 27153.63it/s]
Sampling Vegetation Data: 100%|██████████| 16508/16508 [00:00<00:00, 29336.66it/s]
Sampling Vegetation Data: 100%|██████████| 16523/16523 [00:00<00:00, 31083.24it/s]
Sampling Vegetation Data: 100%|██████████| 9169/9169 [00:00<00:00, 29712.80it/s]
Sampling Vegetation Data: 100%|██████████| 9056/9056 [00:00<00:00, 26920.77it/s]
Sampling Vegetation Data: 100%|██████████| 39617/39617 [00:01<00:00, 30220.62it/s]
Sampling V

## Next steps
* Explore why errors in precip sites above
* add in situ obs - seasonality based on the historical neareste x monitoring stations - like a historical average to-date swe value unit hydrograph based on the day of year? This will include a historical time of year of normal swe value and a swe value of year compared to normal
* albedo metric


In [None]:
import pandas as pd

HOME = os.path.expanduser('~')
region = 'Southwest'
output_res = '300'

dfpath = f"{HOME}/SWEMLv2.0/data/TrainingDFs/{region}/{output_res}M_Resolution"

SWmeta = pd.read_parquet(f"{dfpath}/{region}_metadata.parquet")

import UpdateDataFrame

#need to update the topographic features for every dataframe
output_res = '300'
training_cats = ['Obsdf']
fSCA = '' #'20_fSCA_Thresh'


for training_cat in training_cats:
    print(training_cat)

    for region in region_list:
        print(region)
        dfpath = f"{HOME}/SWEMLv2.0/data/TrainingDFs/{region}/{output_res}M_Resolution"
        #file to be used to updated training DF
        updatefile = pd.read_parquet(f"{dfpath}/{region}_metadata.parquet")


        #Update Dataframe
        UpdateDataFrame.updateTrainingDF(region, output_res, training_cat, fSCA, updatefile)

trainfile = pd.read_parquet(f"{dfpath}/{training_cat}/{fSCA}/Sturm_Season_Precip_VIIRS_GeoObsDF_20150406.parquet")

import matplotlib.pyplot as plt
import geopandas as gpd

from mpl_toolkits.axes_grid1 import make_axes_locatable

def SpatialAnalysis(EvalDF):
    #Convert to a geopandas DF
    Pred_Geo = gpd.GeoDataFrame(EvalDF, geometry = gpd.points_from_xy(EvalDF.cen_lon, EvalDF.cen_lat))

    Pred_Geo.plot(column='Elevation_m',
                  legend=False,
                )
    
SpatialAnalysis(trainfile)