In [1]:

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import optimize
from datetime import datetime, date
import ee #pip install earthengine-api
import EE_funcs
import os
from tqdm import tqdm
import concurrent.futures as cf
ee.Authenticate()
ee.Initialize()

%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

HOME = os.path.expanduser('~')



In [6]:
def GetSeasonalAccumulatedPrecipSingleSite(args):
    #get function inputs
    precip, output_res, lat, lon, Precippath, cell_id = args

    #unit conversions and temporal frequency
    temporal_resample = 'D'
    kgm2_to_cm = 0.1

    # Gett lat/long from meta file
    poi = ee.Geometry.Point(lon, lat)

    #Get precipitation
    precip_poi = precip.getRegion(poi, output_res).getInfo()
    site_precip = EE_funcs.ee_array_to_df(precip_poi,['total_precipitation'])

    #Precipitation
    site_precip.set_index('datetime', inplace = True)
    site_precip = site_precip.resample(temporal_resample).sum()
    site_precip.reset_index(inplace = True)

    #make columns for inches
    site_precip['total_precipitation'] = site_precip['total_precipitation']*kgm2_to_cm
    site_precip.rename(columns={'total_precipitation':'daily_precipitation_cm'}, inplace = True)

    #get seasonal accumulated precipitation for site
    site_precip['season_precip'] = site_precip['daily_precipitation_cm'].cumsum()

    with pd.HDFStore( f"{Precippath}/WY{str(year)}.h5", complevel=9, complib='zlib') as store:
        store[cell_id] = site_precip

    return site_precip

def get_precip_threaded(year, region, output_res):
    #  #ASO file path
    aso_swe_files_folder_path = f"{HOME}/SWEMLv2.0/data/ASO/{output_res}M_SWE_csv/{region}"
    #make directory for data 
    Precippath = f"{HOME}/SWEMLv2.0/data/Precipitation/{region}/{output_res}M_NLDAS_Precip"
    if not os.path.exists(Precippath):
        os.makedirs(Precippath, exist_ok=True)
    
    #load metadata and get site info
    path = f"{HOME}/SWEMLv2.0/data/TrainingDFs/{region}/{region}_metadata.parquet"
    meta = pd.read_csv(path)
    #lat, lon, cell_id = meta['cen_lat'][0], meta['cen_lon'][0], meta['cell_id'][0]

    #set water year start/end dates based on ASO flights for the end date
    print(f"Getting date information from observations for WY{year}")
    aso_swe_files = []
    for aso_swe_file in os.listdir(aso_swe_files_folder_path):  #add file names to aso_swe_files
        aso_swe_files.append(aso_swe_file)

    startdate = f"{year-1}-10-01"
    #search for files for water year and get last date, this works because no ASO obs in sep, oct, nov, dec
    end = [x for x in aso_swe_files if str(year) in x][-1]
    month = end[-8:-6]
    day = end[-6:-4]
    enddate = f"{str(year)}-{month}-{day}"

    #NLDAS precipitation
    precip = ee.ImageCollection('NASA/NLDAS/FORA0125_H002').select('total_precipitation').filterDate(startdate, enddate)

    #args = (precip, output_res, lat, lon, Precippath, cell_id)
    nsites = len(meta)
    print(f"Getting daily precipitation data for {nsites} sites")
    with cf.ThreadPoolExecutor(max_workers=None) as executor:
        jobs = {executor.submit(GetSeasonalAccumulatedPrecipSingleSite, (precip, output_res, meta.iloc[i]['cen_lat'], meta.iloc[i]['cen_lon'],Precippath,  meta.iloc[i]['cell_id'])):
                i for i in tqdm(range(nsites))}
        
    print(f"Job complete for getting precipiation datdata")
        # for job in tqdm(cf.as_completed(jobs)):
        #     results.append(job.result())

In [7]:
#set start/end date for a water year
year = 2014
region = 'S_Sierras'
output_res = 100
get_precip_threaded(year, region, output_res)



Getting date information from observations for WY2014
Getting daily precipitation data for 2 sites


100%|██████████| 2/2 [00:00<00:00, 140.41it/s]


Job complete for getting precipiation datdata


In [9]:
path = f"{HOME}/SWEMLv2.0/data/TrainingDFs/{region}/{region}_metadata.parquet"
meta = pd.read_csv(path)
meta.head()

Unnamed: 0,cell_id,cen_lon,cen_lat,geometry,Elevation_m,Slope_Deg,Aspect_Deg
0,11N_cell_-119.59073383567106_38.18624284828164,-119.590255,38.185854,POINT (-119.5902546710551 38.18585423359679),3167,0,180
1,11N_cell_-119.58959364631137_38.186209698720205,-119.589255,38.185854,POINT (-119.58925467105512 38.18585423359679),3168,2,18
2,11N_cell_-119.59309813700962_38.184509449472536,-119.593255,38.184854,POINT (-119.5932546710551 38.1848542335968),3156,5,342
3,11N_cell_-119.59195797185825_38.18447632413384,-119.592255,38.184854,POINT (-119.59225467105512 38.1848542335968),3158,5,34
4,11N_cell_-119.59081780857791_38.184443187748315,-119.591255,38.184854,POINT (-119.59125467105513 38.1848542335968),3165,8,32


In [10]:
#read in h5 file
Precippath = f"{HOME}/SWEMLv2.0/data/Precipitation/{region}/{output_res}M_NLDAS_Precip"
ppt = pd.read_hdf(f"{Precippath}/WY{year}.h5", key = '11N_cell_-119.59073383567106_38.18624284828164')

In [11]:
ppt

Unnamed: 0,datetime,daily_precipitation_cm,season_precip
0,2013-10-01,0.0,0.00000
1,2013-10-02,0.0,0.00000
2,2013-10-03,0.0,0.00000
3,2013-10-04,0.0,0.00000
4,2013-10-05,0.0,0.00000
...,...,...,...
242,2014-05-31,0.0,42.78172
243,2014-06-01,0.0,42.78172
244,2014-06-02,0.0,42.78172
245,2014-06-03,0.0,42.78172
