# Next steps 
- connect precip to DF,
- add new sites (e.g., regionval) to training DF with all the respective spatial resolution information
- connect regional data together to train model
- connect different regions
- add precipitation phase features (seasonal accumulated rain precip, seasonal accumulated snow precip as a function of temperature)
- explore adding other features stemming from SNOTEL, remote sensing (LULC), Snow Classifications (Sturms), energy balance

Put all units in SI, while it should not matter for model training since they are being normalized, they will be more interpretable.

In [1]:
import os
import pandas as pd
from tqdm import tqdm
import numpy as np

HOME = os.path.expanduser('~')


def average_duplicates(cell_id, aso_file, siteave_dic):
    sitex = aso_file[aso_file['cell_id'] == cell_id]
    mean_lat = np.round(np.mean(sitex['cen_lat']),3)
    mean_lon = np.round(np.mean(sitex['cen_lon']),3)
    mean_swe = np.round(np.mean(sitex['swe_m']),2)

    tempdic = {'cell_id': cell_id,
            'cen_lat': mean_lat,
            'cen_lon': mean_lon,
            'swe_m': mean_swe
    }

    sitedf = pd.DataFrame(tempdic, index = [cell_id])
    siteave_dic[cell_id] = sitedf

region = 'S_Sierras'
output_res = 300
aso_swe_files_folder_path = f"{HOME}/SWEMLv2.0/data/ASO/{region}/{output_res}M_SWE_parquet"
aso_swe_file= f"ASO_300M_SWE_20130403.parquet"

aso_file = pd.read_parquet(os.path.join(aso_swe_files_folder_path, aso_swe_file), engine='fastparquet')

len(aso_file)


1084383

In [3]:
cell_ids = aso_file.cell_id.unique()
cell_ids[:5]

array(['S_Sierras_300M_38.186_-119.59', 'S_Sierras_300M_38.186_-119.589',
       'S_Sierras_300M_38.186_-119.591', 'S_Sierras_300M_38.186_-119.588',
       'S_Sierras_300M_38.185_-119.592'], dtype=object)

In [3]:
cell_ids = aso_file.cell_id.unique()
siteave_dic = {}
from tqdm import tqdm

print(f"Getting unique cell ids and taking the spatial average to get {output_res} m resolution")
[average_duplicates(cell_id, aso_file, siteave_dic) for cell_id in tqdm(cell_ids[:5])]

Getting unique cell ids and taking the spatial average to get 300 m resolution




100%|██████████| 5/5 [00:00<00:00, 15.71it/s]


[None, None, None, None, None]

In [5]:
pd.concat(siteave_dic)

Unnamed: 0,Unnamed: 1,cell_id,cen_lat,cen_lon,swe_m
S_Sierras_300M_38.186_-119.59,S_Sierras_300M_38.186_-119.59,S_Sierras_300M_38.186_-119.59,38.186,-119.59,0.8
S_Sierras_300M_38.186_-119.589,S_Sierras_300M_38.186_-119.589,S_Sierras_300M_38.186_-119.589,38.186,-119.589,0.42
S_Sierras_300M_38.186_-119.591,S_Sierras_300M_38.186_-119.591,S_Sierras_300M_38.186_-119.591,38.186,-119.591,1.21
S_Sierras_300M_38.186_-119.588,S_Sierras_300M_38.186_-119.588,S_Sierras_300M_38.186_-119.588,38.186,-119.588,0.31
S_Sierras_300M_38.185_-119.592,S_Sierras_300M_38.185_-119.592,S_Sierras_300M_38.185_-119.592,38.185,-119.592,0.56


In [None]:
aso_file.drop_duplicates(subset=['cell_id'], inplace=True)
aso_file

In [None]:
cols = ['cen_lat', 'cen_lon']
ASO_meta_loc_DF = ASO_meta_loc_DF[cols]
ASO_meta_loc_DF[~ASO_meta_loc_DF.index.duplicated(keep='first')]


In [None]:
round(37.080205, -3)

In [None]:
Precippath = f"{HOME}/SWEMLv2.0/data/Precipitation/{region}/{output_res}M_NLDAS_Precip"
ppt = pd.read_hdf(f"{Precippath}/WY{year}.h5", key = '11N_cell_-119.59073383567106_38.18624284828164')
ppt.head()

In [None]:
aso_gdf

In [None]:

import os
import pandas as pd

HOME = os.path.expanduser('~')

region = 'S_Sierras'

#ASO observations
aso_swe_files_folder_path = f"{HOME}/SWEMLv2.0/data/TrainingDFs/{region}/Obsdf"

obs1 = pd.read_csv(f"{aso_swe_files_folder_path}/20130403_ObsDF.parquet")

obs1

In [None]:
cols = [
    'cell_id', 'Date',  'cen_lat', 'cen_lon', 'geometry', 'Elevation_m', 'Slope_Deg',
    'Aspect_Deg', 'swe', 'nearest_site_1', 'nearest_site_2', 'nearest_site_3', 'nearest_site_4', 
    'nearest_site_5', 'nearest_site_6'
    ]