In [13]:
#Bring in elevation data
import pandas as pd
import json 
import datetime
import matplotlib.pyplot as plt
import numpy as np
from pystac_client import Client
import planetary_computer
import xarray
import rioxarray
from pyproj import Transformer
import h5py
from mpl_toolkits.basemap import Basemap
import math
from tqdm import tqdm
import geopandas
import richdem as rd
import elevation
import hdfdict
import pickle
import random


import io
import os
from datetime import date, timedelta

import xarray as xr
import requests
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cmocean

# Not used directly, but used via xarray
import cfgrib
import netCDF4
import tempfile
import copy

from matplotlib.axes import Axes
from cartopy.mpl.geoaxes import GeoAxes
GeoAxes._pcolormesh_patched = Axes.pcolormesh
from platform import python_version

# Ignore some matplotlib deprecation warnings
import warnings; warnings.simplefilter("ignore")


print(pd.__version__) # should be 1.3.0
print(python_version()) #should be 3.8.2

1.3.0
3.8.2


In [4]:
#added Long,Lat to get polygon points
def GeoStat_func(Geospatial_df, regions, elev_L, slope_L, aspect_L, Long, Lat, tile):

    #loop through
    
    for i in tqdm(range(0, len(Geospatial_df))):
    
        # convert coordinate to raster value
        lon = Geospatial_df.iloc[i][Long]
        lat = Geospatial_df.iloc[i][Lat]

        #set up tile to speed up data processing
        prev_tileid = 'NA'


        #connect point location to geotile
        tileid = 'Copernicus_DSM_COG_30_N' + str(math.floor(lat)) + '_00_W'+str(math.ceil(abs(lon))) +'_00_DEM'
        
        if tileid != prev_tileid:

            indexid = regions.loc[tileid]['sliceID']


           #Assing region
            signed_asset = planetary_computer.sign(tiles[indexid].assets["data"])
            #get elevation data in xarray object
            elevation = rioxarray.open_rasterio(signed_asset.href)
            
            prev_tileid = copy.copy(tileid)

        #create copies to extract other geopysical information
        #Create Duplicate DF's
        slope = elevation.copy()
        aspect = elevation.copy()


        #transform projection
        transformer = Transformer.from_crs("EPSG:4326", elevation.rio.crs, always_xy=True)
        xx, yy = transformer.transform(lon, lat)

        #extract elevation values into numpy array
        tilearray = np.around(elevation.values[0]).astype(int)

        #set tile geo to get slope and set at rdarray
        geo = (math.floor(float(lon)), 90, 0.0, math.ceil(float(lat)), 0.0, -90)
        tilearray = rd.rdarray(tilearray, no_data = -9999)
        tilearray.projection = 'EPSG:4326'
        tilearray.geotransform = geo

        #get slope, note that slope needs to be fixed, way too high
        #get aspect value
        slope_arr = rd.TerrainAttribute(tilearray, attrib='slope_degrees')
        aspect_arr = rd.TerrainAttribute(tilearray, attrib='aspect')

        #save slope and aspect information 
        slope.values[0] = slope_arr
        aspect.values[0] = aspect_arr

        # get point values from grid
        elev = round(elevation.sel(x=(xx,), y=yy, method="nearest").values[0][0])
        slop = round(slope.sel(x=(xx,), y=yy, method="nearest").values[0][0])
        asp = round(aspect.sel(x=(xx,), y=yy, method="nearest").values[0][0])


        #add point values to list
        elev_L.append(elev)
        slope_L.append(slop)
        aspect_L.append(asp)

In [5]:
#get mean Geospatial data
def mean_Geo(df, geo):
    BL = 'BL'+geo
    UL = 'UL'+geo
    UR = 'UR'+geo
    BR = 'BR'+geo
    
    df[geo] = (df[BL] + df[UL]+ df[UR] + df[BR]) /4

In [6]:
def week_num(df):
        #week of water year
    weeklist = []

    for i in tqdm(range(0,len(df))):
        if df['Date'][i].month<11:
            y = df['Date'][i].year-1
        else:
            y = df['Date'][i].year
            
        WY_start = pd.to_datetime(str(y)+'-10-01')
        deltaday = df['Date'][i]-WY_start
        deltaweek = round(deltaday.days/7)
        weeklist.append(deltaweek)


    df['WYWeek'] = weeklist


In [7]:
#make Region identifier. The data already includes Region, but too many 'other' labels

def Region_id(df):
    
    for i in tqdm(range(0, len(df))):

        #Sierras
        #Northern Sierras
        if -122.5 <= df['Long'][i] <=-119 and 39 <=df['Lat'][i] <= 42:
            loc = 'N_Sierras'
            df['Region'].iloc[i] = loc

        #Southern Sierras
        if -121.2 <= df['Long'][i] <=-117 and 35 <=df['Lat'][i] <= 39:
            loc = 'S_Sierras'
            df['Region'].iloc[i] = loc




        #West Coast    
        #CACoastal (Ca-Or boarder)
        if df['Long'][i] <=-122.5 and df['Lat'][i] <= 42:
            loc = 'Ca_Coast'
            df['Region'].iloc[i] = loc



        #Oregon Coastal (Or)?
        if df['Long'][i] <=-122.7 and 42<= df['Lat'][i] <= 46:
            loc = 'Or_Coast'
            df['Region'].iloc[i] = loc



        #Olympis Coastal (Wa)
        if df['Long'][i] <=-122.5 and 46<= df['Lat'][i]:
            loc = 'Wa_Coast'
            df['Region'].iloc[i] = loc    



        #Cascades    
         #Northern Cascades
        if -122.5 <= df['Long'][i] <=-119.4 and 46 <=df['Lat'][i]:
            loc = 'N_Cascade'
            df['Region'].iloc[i] = loc



        #Southern Cascades
        if -122.7 <= df['Long'][i] <=-121 and 42 <=df['Lat'][i] <= 46:
            loc = 'S_Cascade'
            df['Region'].iloc[i] = loc



        #Eastern Cascades and Northern Idaho and Western Montana
        if -119.4 <= df['Long'][i] <=-116.4 and 46 <=df['Lat'][i]:
            loc = 'E_WA_N_Id_W_Mont'
            df['Region'].iloc[i] = loc
        #Eastern Cascades and Northern Idaho and Western Montana
        if -116.4 <= df['Long'][i] <=-114.1 and 46.6 <=df['Lat'][i]:
            loc = 'E_WA_N_Id_W_Mont'
            df['Region'].iloc[i] = loc






        #Eastern Oregon
        if -121 <= df['Long'][i] <=-116.4 and 43.5 <=df['Lat'][i] <= 46:
            loc = 'E_Or'
            df['Region'].iloc[i] = loc



        #Great Basin
        if -121 <= df['Long'][i] <=-112 and 42 <=df['Lat'][i] <= 43.5:
            loc = 'GBasin'
            df['Region'].iloc[i] = loc

        if -119 <= df['Long'][i] <=-112 and 39 <=df['Lat'][i] <= 42:
            loc = 'GBasin'
            df['Region'].iloc[i] = loc
            #note this section includes mojave too
        if -117 <= df['Long'][i] <=-113.2 and df['Lat'][i] <= 39:
            loc = 'GBasin'
            df['Region'].iloc[i] = loc


        #SW Mtns (Az and Nm)
        if -113.2 <= df['Long'][i] <=-107 and df['Lat'][i] <= 37:
            loc = 'SW_Mtns'
            df['Region'].iloc[i] = loc




        #Southern Wasatch + Utah Desert Peaks
        if -113.2 <= df['Long'][i] <=-109 and 37 <= df['Lat'][i] <= 39:
            loc = 'S_Wasatch'
            df['Region'].iloc[i] = loc
        #Southern Wasatch + Utah Desert Peaks
        if -112 <= df['Long'][i] <=-109 and 39 <= df['Lat'][i] <= 40:
            loc = 'S_Wasatch'
            df['Region'].iloc[i] = loc



        #Northern Wasatch + Bear River Drainage
        if -112 <= df['Long'][i] <=-109 and 40 <= df['Lat'][i] <= 42.5:
            loc = 'N_Wasatch'
            df['Region'].iloc[i] = loc




        #YellowStone, Winds, Big horns
        if -111 <= df['Long'][i] <=-106.5 and 42.5 <= df['Lat'][i] <= 45.8:
            loc = 'Greater_Yellowstone'
            df['Region'].iloc[i] = loc



        #North of YellowStone to Boarder
        if -112.5 <= df['Long'][i] <=-106.5 and 45.8 <= df['Lat'][i]:
            loc = 'N_Yellowstone'
            df['Region'].iloc[i] = loc



         #SW Montana and nearby Idaho
        if -112 <= df['Long'][i] <=-111 and 42.5 <= df['Lat'][i] <=45.8:
            loc = 'SW_Mont'
            df['Region'].iloc[i] = loc 
         #SW Montana and nearby Idaho
        if -113 <= df['Long'][i] <=-112 and 43.5 <= df['Lat'][i] <=45.8:
            loc = 'SW_Mont'
            df['Region'].iloc[i] = loc
        #SW Montana and nearby Idaho
        if -113 <= df['Long'][i] <=-112.5 and 45.8 <= df['Lat'][i] <=46.6:
            loc = 'SW_Mont'
            df['Region'].iloc[i] = loc



         #Sawtooths, Idaho
        if -116.4 <= df['Long'][i] <=-113 and 43.5 <= df['Lat'][i] <=46.6:
            loc = 'Sawtooth'
            df['Region'].iloc[i] = loc




        #Greater Glacier
        if -114.1 <= df['Long'][i] <=-112.5 and 46.6 <= df['Lat'][i]:
            loc = 'Greater_Glacier'
            df['Region'].iloc[i] = loc 



         #Southern Wyoming 
        if -109 <= df['Long'][i] <=-104.5 and 40.99 <= df['Lat'][i] <= 42.5 :
            loc = 'S_Wyoming'
            df['Region'].iloc[i] = loc 
        #Southern Wyoming
        if -106.5 <= df['Long'][i] <=-104.5 and 42.5 <= df['Lat'][i] <= 43.2:
            loc = 'S_Wyoming'
            df['Region'].iloc[i] = loc 




         #Northern Colorado Rockies
        if -109 <= df['Long'][i] <=-104.5 and 38.3 <= df['Lat'][i] <= 40.99:
            loc = 'N_Co_Rockies'
            df['Region'].iloc[i] = loc 



         #SW Colorado Rockies
        if -109 <= df['Long'][i] <=-106 and 36.99 <= df['Lat'][i] <= 38.3:
            loc = 'SW_Co_Rockies'
            df['Region'].iloc[i] = loc 




        #SE Colorado Rockies + Northern New Mexico
        if -106 <= df['Long'][i] <=-104.5 and 34 <= df['Lat'][i] <= 38.3:
            loc = 'SE_Co_Rockies'
            df['Region'].iloc[i] = loc  
        #SE Colorado Rockies + Northern New Mexico
        if -107 <= df['Long'][i] <=-106 and 34 <= df['Lat'][i] <= 36.99:
            loc = 'SE_Co_Rockies'
            df['Region'].iloc[i] = loc 
    

In [8]:
#This plots the location of all df data points

def GeoPlot(df):
    fig = plt.gcf()
    fig.set_size_inches(12, 10)

    #merc also works for projection # Cylindrical Equal Area. https://matplotlib.org/basemap/api/basemap_api.html#module-mpl_toolkits.basemap

    m = Basemap(projection='cea', \
                llcrnrlat=29, urcrnrlat=50, \
                llcrnrlon=-125, urcrnrlon=-104, \
                lat_ts=20, \
                resolution='c')

    m.bluemarble(scale=2)   # full scale will be overkill
    m.drawcoastlines(color='white', linewidth=0.2)  # add coastlines


    # draw coastlines, meridians and parallels.
    #m.drawcoastlines()
    m.drawcountries()
    m.drawstates()
    #m.drawmapboundary(fill_color='#99ffff')
    #m.fillcontinents(color='#cc9966',lake_color='#99ffff')
    m.drawparallels(np.arange(20,60,10),labels=[1,1,0,0])
    m.drawmeridians(np.arange(-120,-90,10),labels=[0,0,0,1])


    #Make unique color for each regions
    number_of_colors = len(df.keys())
    color = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])
                 for i in range(number_of_colors)]

    Location = list(df.keys())
    colordict = {k: v for k, v in zip(Location, color)}


    for i in df.keys():
            x, y = m(np.array(df[i]['Long']), np.array(df[i]['Lat'])) 
            m.scatter(x, y, 10, marker='o', color=colordict[i], label = str(i)) 


    plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')

    plt.title('Training Locations')
    plt.tight_layout()
    plt.show()

In [9]:
#This function connects stationary geospatial information to observations
def Geo_to_Data(geodf, SWE, id):
    dfcols = ['Long','Lat','elevation_m','slope_deg','aspect','Date','SWE','Region']
    datadf = geodf.merge(SWE, how='inner', on=id)
    datadf = datadf.set_index(id)
    datadf=datadf[dfcols]
    return datadf

In [10]:

#Attached new data (snotel) to regional training data
def Region_Obs_Snotel(traindf, Snoteldf):
    for i in traindf.keys():
        traindf[i] = pd.concat([traindf[i], Snoteldf[i]])

In [11]:

#This function defines northness: :  sine(Slope) * cosine(Aspect). this gives you a northness range of -1 to 1.
#Note you'll need to first convert to radians. 
#Some additional if else statements to get around sites with low obervations
def northness(df):    
    
    if len(df) == 8: #This removes single value observations, need to go over and remove these locations from training too
        #Determine northness for site
        #convert to radians
        #df = pd.DataFrame(df).T
        
        df['aspect_rad'] = df['aspect']*0.0174533
        df['slope_rad'] = df['slope_deg']*0.0174533
        
        df['northness'] = -9999
        for i in range(0, len(df)):
            df['northness'].iloc[i] = math.sin(df['slope_rad'].iloc[i])*math.cos(df['aspect_rad'].iloc[i])

        #remove slope and aspects to clean df up
        df = df.drop(columns = ['aspect', 'slope_deg', 'aspect_rad', 'slope_rad', 'Region'])
        
        return df
        
    else:
         #convert to radians
        df['aspect_rad'] = df['aspect']*0.0174533
        df['slope_rad'] = df['slope_deg']*0.0174533
        
        df['northness'] = -9999
        for i in range(0, len(df)):
            df['northness'].iloc[i] = math.sin(df['slope_rad'].iloc[i])*math.cos(df['aspect_rad'].iloc[i])

        
         #remove slope and aspects to clean df up
        df = df.drop(columns = ['aspect', 'slope_deg', 'aspect_rad', 'slope_rad', 'Region'])
        
        return df

In [14]:
os.getcwd()
os.chdir('..')
os.getcwd()
os.chdir('..')
os.getcwd()

'/uufs/chpc.utah.edu/common/home/civil-group1/ItalianAlps/SnowCast/Final_Model'

In [15]:
#Set up training DF with key metadata per site
#All coordinates of 1 km polygon used to develop ave elevation, ave slope, ave aspect

colnames = ['cell_id', 'Region', 'BR_Coord', 'UR_Coord', 'UL_Coord', 'BL_Coord']
SWEdata = pd.DataFrame(columns = colnames)

#May or may not need to melt data
#Load Testing SWE locations
TestSWE = pd.read_csv('Data/Pre_Processed/submission_format_eval.csv')

#change first column to cell id
TestSWE = TestSWE.rename(columns = {'Unnamed: 0':'cell_id'})

#drop na and put into modeling df format
TestSWE = TestSWE.melt(id_vars=["cell_id"]) #.dropna()

#change variable to Date and value to SWE
TestSWE = TestSWE.rename(columns ={'variable': 'Date', 'value':'SWE'})


#Load  SWE location data
with open("Data/Pre_Processed/grid_cells_eval.geojson") as f:
    data = json.load(f)
    
#load ground truth values (SNOTEL): Testing
date = '01_27_2022'
obs_path = 'Data/Pre_Processed/ground_measures_features_' + date + '.csv'
GM_Test = pd.read_csv(obs_path)

#change first column to station id
GM_Test = GM_Test.rename(columns = {'Unnamed: 0':'station_id'})

#drop na and put into modeling df format
GM_Test = GM_Test.melt(id_vars=["station_id"]).dropna()

#change variable to Date and value to SWE
GM_Test = GM_Test.rename(columns ={'variable': 'Date', 'value':'SWE'})

#load ground truth meta
GM_Meta = pd.read_csv('Data/Pre_Processed/ground_measures_metadata.csv')

#merge testing ground truth location metadata with snotel data
GM_Test = GM_Meta.merge(GM_Test, how='inner', on='station_id')
GM_Test = GM_Test.set_index('station_id')
GM_Test.rename(columns={'name': 'location', 'latitude': 'Lat', 'longitude': 'Long', 'value': 'SWE'}, inplace=True)




#load ground truth values (SNOTEL): previous week
date = '01_20_2022'
obs_path = 'Data/Pre_Processed/ground_measures_features_' + date + '.csv'
GM_Prev = pd.read_csv(obs_path)

#change first column to station id
GM_Prev = GM_Prev.rename(columns = {'Unnamed: 0':'station_id'})

#drop na and put into modeling df format
GM_Prev = GM_Prev.melt(id_vars=["station_id"]).dropna()

#change variable to Date and value to SWE
GM_Prev = GM_Prev.rename(columns ={'variable': 'Date', 'value':'SWE'})

#merge testing ground truth location metadata with snotel data
GM_Prev = GM_Meta.merge(GM_Prev, how='inner', on='station_id')
GM_Prev = GM_Prev.set_index('station_id')
GM_Prev.rename(columns={'name': 'location', 'latitude': 'Lat', 'longitude': 'Long', 'value': 'SWE'}, inplace=True)

#Add previous week's SWE to DF
GM_Test['Prev_SWE'] = GM_Prev['SWE']

In [16]:
#Make a SWE Grid location DF
for i in tqdm(range(len(data["features"]))):
    properties = data["features"][i]["properties"]
    location = data["features"][i]["geometry"]
    DFdata = [properties ["cell_id"],  properties ["region"],location ["coordinates"][0][0] ,
             location ["coordinates"][0][1], location ["coordinates"][0][2], location ["coordinates"][0][3] ]
    df_length = len(SWEdata)
    SWEdata.loc[df_length] = DFdata

100%|██████████| 20759/20759 [01:14<00:00, 277.64it/s]


In [17]:
#Make SWE location and observation DF
#Testing
#merge site location metadata with observations
TestSWE = TestSWE.merge(SWEdata, how='inner', on='cell_id')
TestSWE = TestSWE.set_index('cell_id')
TestSWE.rename(columns={'variable': 'Date', 'value': 'SWE'}, inplace=True)

#Get Lat Long information
#Bottom right coord
TestSWE[['BR_Coord_Long','BR_Coord_Lat']] = pd.DataFrame(TestSWE.BR_Coord.tolist(), index= TestSWE.index)

#Upper right coord
TestSWE[['UR_Coord_Long','UR_Coord_Lat']] = pd.DataFrame(TestSWE.UR_Coord.tolist(), index= TestSWE.index)

#Upper left coord
TestSWE[['UL_Coord_Long','UL_Coord_Lat']] = pd.DataFrame(TestSWE.UL_Coord.tolist(), index= TestSWE.index)

#Bottom Left coord
TestSWE[['BL_Coord_Long','BL_Coord_Lat']] = pd.DataFrame(TestSWE.BL_Coord.tolist(), index= TestSWE.index)

#Get Lat Long information
#TestSWE[['Long','Lat']] = pd.DataFrame(TestSWE.Lat_Long.tolist(), index= TestSWE.index)

#recolumn df to increase interpretability
testcols = TestSWE.columns.to_list()
testcols = testcols[-13:] + testcols[:-13]
TestSWE = TestSWE[testcols]

In [18]:
#Set up a framework to retrieve geospatial information for each site (elevation, weather, slope, aspect, etc)

#Develop a DF to get each site's geospatial information 
geocols = [ 'BR_Coord_Long', 'BR_Coord_Lat', 'UR_Coord_Long', 'UR_Coord_Lat',
       'UL_Coord_Long', 'UL_Coord_Lat', 'BL_Coord_Long', 'BL_Coord_Lat']


Geospatial_df = TestSWE.copy()
Geospatial_df['rowid'] = Geospatial_df.index
Geospatial_df = Geospatial_df.drop_duplicates(subset = 'rowid')
Geospatial_df = pd.DataFrame(Geospatial_df[geocols])

#Define the AOI around the cell locations from clockwise

area_of_interest = {
    "type": "Polygon",
    "coordinates": [
        [
            #lower left
            [Geospatial_df['BL_Coord_Long'].min(), Geospatial_df['BL_Coord_Lat'].min()],
            #upper left
            [Geospatial_df['UL_Coord_Long'].min(), Geospatial_df['UL_Coord_Lat'].max()],
            #upper right
            [Geospatial_df['UR_Coord_Long'].max(), Geospatial_df['UR_Coord_Lat'].max()],
            #lower right
            [Geospatial_df['UR_Coord_Long'].max(), Geospatial_df['BR_Coord_Lat'].min()],
            #lower left
            [Geospatial_df['BL_Coord_Long'].min(), Geospatial_df['BL_Coord_Lat'].min()],
        ]
    ],
}


#Make a connection to get 90m Copernicus Digital Elevation Model (DEM) data with the Planetary Computer STAC API

client = Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    ignore_conformance=True,
)


search = client.search(
    collections=["cop-dem-glo-90"],
    intersects=area_of_interest
)

tiles = list(search.get_items())

#Make a DF to connect locations with the larger data tile, and then extract elevations
regions = []

for i in range(0, len(tiles)):
    row = [i, tiles[i].id]
    regions.append(row)
regions = pd.DataFrame(columns = ['sliceID', 'tileID'], data = regions)
regions = regions.set_index(regions['tileID'])
del regions['tileID']

In [20]:
#Get geospatial information for each evaluation cell corner

BLelev_L = []
BLslope_L = []
BLaspect_L = []

#run the elevation function, added tqdm to show progress
GeoStat_func(Geospatial_df, regions, BLelev_L, BLslope_L, BLaspect_L, 'BL_Coord_Long', 'BL_Coord_Lat', tiles)


#Save each points elevation in DF
Geospatial_df['BL_Elevation_m'] = BLelev_L
Geospatial_df['BL_slope_Deg'] = BLslope_L
Geospatial_df['BLaspect_L'] = BLaspect_L

100%|██████████| 20759/20759 [3:10:39<00:00,  1.81it/s]  


In [21]:
ULelev_L = []
ULslope_L = []
ULaspect_L = []

#run the elevation function, added tqdm to show progress
GeoStat_func(Geospatial_df, regions, ULelev_L, ULslope_L, ULaspect_L,'UL_Coord_Long', 'UL_Coord_Lat', tiles) 


#Save each points elevation in DF
Geospatial_df['UL_Elevation_m'] = ULelev_L
Geospatial_df['UL_slope_Deg'] = ULslope_L
Geospatial_df['ULaspect_L'] = ULaspect_L

100%|██████████| 20759/20759 [2:56:23<00:00,  1.96it/s]  


In [None]:
URelev_L = []
URslope_L = []
URaspect_L = []

#run the elevation function, added tqdm to show progress
GeoStat_func(Geospatial_df, regions, URelev_L, URslope_L, URaspect_L,'UR_Coord_Long', 'UR_Coord_Lat', tiles)


#Save each points elevation in DF
Geospatial_df['UR_Elevation_m'] = URelev_L
Geospatial_df['UR_slope_Deg'] = URslope_L
Geospatial_df['URaspect_L'] = URaspect_L

 75%|███████▍  | 15555/20759 [2:09:53<41:32,  2.09it/s]  

In [None]:
BRelev_L = []
BRslope_L = []
BRaspect_L = []

#run the elevation function, added tqdm to show progress
GeoStat_func(Geospatial_df, regions, BRelev_L, BRslope_L, BRaspect_L,'BR_Coord_Long', 'BR_Coord_Lat', tiles)


#Save each points elevation in DF
Geospatial_df['BR_Elevation_m'] = BRelev_L
Geospatial_df['BR_slope_Deg'] = BRslope_L
Geospatial_df['BRaspect_L'] = BRaspect_L

In [None]:
#Save Geospatial data into SWE.h5 file
Geospatial_df.to_hdf('Data/Pre_Processed/SWE.h5', key = 'Geospatial_Eval_df')

In [None]:
#reset index to make cell id a column name
Geospatial_df = Geospatial_df.reset_index()

#Get geaspatial means
geospatialcols = ['_Coord_Long', '_Coord_Lat', '_Elevation_m', '_slope_Deg' , 'aspect_L']

#Training data
[mean_Geo(Geospatial_df, i) for i in geospatialcols]

#list of key geospatial component means
geocol = ['cell_id', '_Coord_Long','_Coord_Lat','_Elevation_m','_slope_Deg','aspect_L']
TestGeo_df = Geospatial_df[geocol].copy()

#adjust column names to be consistent with snotel

TestGeo_df = TestGeo_df.rename( columns = {'_Coord_Long':'Long', '_Coord_Lat':'Lat', '_Elevation_m': 'elevation_m',
                               '_slope_Deg':'slope_deg' , 'aspect_L': 'aspect'})

In [None]:
#Attach a region id for each location
TestGeo_df['Region'] = 'other'
Region_id(TestGeo_df)


In [None]:
'''
Slice into regional DF's
This is currently to ensure different regions are correctly classified. Will have to perform slice again

'''
#subset data by each region into dictionary
RegionTest = {name: TestGeo_df.loc[TestGeo_df['Region'] == name] for name in TestGeo_df.Region.unique()}

In [None]:
#check to make sure no test locations classified as other
print('Testing') 
for i in RegionTest.keys():
    print('There are', len(RegionTest[i]), ' test locations in ', i)
print('         ') 


In [None]:
#RegionTest should be the RegionVal. make sure.

In [35]:
#save dictionaries as pkl
# create a binary pickle file 
path = 'Data/Processed/RegionVal.pkl'

RVal = open(path,"wb")


# write the python object (dict) to pickle file
pickle.dump(RegionTest,RVal)


# close file
RVal.close()
