# Compare CW3E and NLDAS2


Runs a comparison between CW3E and NLDAS2 in the following steps:
1) Reads in CONUS1 NLDAS2 forcing data
2) Reads in CONUS2 CW3E forcing data
3) Reads in metadata for CONUS1 SNOTEL sites
4) Performs comparison by subtracting CW3E-NLDAS2 temp each day, and takes the avg (same outputs as second method, which finds annual avg temp and then takes difference)
5) Plots results on CONUS1 domain

In [None]:
import sys
from parflow.tools.io import read_pfb,write_pfb
import numpy as np
import pandas as pd

##### Change paths to directories, run information...

In [None]:
### Directory to save CSV outputs
C1_organized_dir = '/home/ad4430/ashley_intern_research_2023/precip_temp_CONUS1/Organized_csv/temperature' # verde
C2_organized_dir = '/home/ad4430/ashley_intern_research_2023/precip_temp/Organized_csv/temperature' # verde

### Where you want to save plots, etc. to
plot_dir = '/scratch/wh3248/bill-play/forcing_test/figures'
print(f'Figures saving to: {plot_dir}')

### need to change water year and number of days
water_year = 2006
no_days = 365
variable = 'Temp_C'

### CONUS1 domain
ny = 1888
nx = 3342

# 1. Read C1 NLDAS2 forcing

### Note files say CW3E, should be labeled NLDAS2 for C1 files

In [None]:
C1_pf_match_csv = f'CW3E_Daily_matched_CONUS1_{variable}_{water_year}.csv'

In [None]:
NLDAS2_df = pd.read_csv(f'{C1_organized_dir}/{C1_pf_match_csv}')
NLDAS2_df

In [None]:
NLDAS2_df = NLDAS2_df.set_index('site_id')
NLDAS2_df 

# 2. Read in CW3E Forcing and organize

In [None]:
C2_pf_match_csv = f'CW3E_Daily_matched_CONUS2_Temp_C_{water_year}.csv'

In [None]:
CW3E_df = pd.read_csv(f'{C2_organized_dir}/{C2_pf_match_csv}')
CW3E_df

In [None]:
CW3E_df = CW3E_df.set_index('site_id')
CW3E_df

In [None]:
# change C2 df to only include C1 SNOTEL sites
CW3E_df= CW3E_df[CW3E_df.index.isin(NLDAS2_df.index)]
CW3E_df

# 3. Metadata for coords

In [None]:
metadata_df = pd.read_csv(f'{C1_organized_dir}/Hydrodata_temperature_metadata_daily_average_WY{water_year}.csv')
metadata_df = metadata_df.drop(columns = ['Unnamed: 0'])
metadata_df = metadata_df.set_index('site_id')
metadata_df = metadata_df[metadata_df.index.isin(NLDAS2_df.index)]
metadata_df

# 4. Performing comparison

In [None]:

# method 1: subtracting CW3E - NLDAS2 for each day, at each gage then taking avg
difference = np.zeros((len(CW3E_df.index), len(CW3E_df.columns)))
for g in range(len(CW3E_df.index)):
    gage = CW3E_df.index[g]
    for i in range(len(CW3E_df.columns)):
        day = CW3E_df.columns[i]
        difference[g][i] = CW3E_df.loc[gage][day]-NLDAS2_df.loc[gage][day]
    difference_df = pd.DataFrame(difference)
difference_df

# put gage and date headers in difference df
difference_df = difference_df.set_index(CW3E_df.index)
column_headers = list(CW3E_df.columns.values)
difference_df.columns = column_headers
difference_df

# method 2: taking yearly averages, then finding difference
avg_df = pd.DataFrame()
#indices = list(CW3E_df.index.values)
#avg_df.index = indices

avg_CW3E = np.zeros(len(CW3E_df.index))
avg_NLDAS2 = np.zeros(len(NLDAS2_df.index))
for g in range(len(CW3E_df.index)):
    gage = CW3E_df.index[g]
    avg_CW3E[g] = CW3E_df.loc[gage].sum()/len(CW3E_df.columns)
    avg_NLDAS2[g] = NLDAS2_df.loc[gage].sum()/len(NLDAS2_df.columns)


avg_df['CW3E temp'] = pd.Series(avg_CW3E)
avg_df['NLDAS2 temp'] = pd.Series(avg_NLDAS2)

avg_df

In [None]:
# mean difference
comparison_df = pd.DataFrame()
comparison_df['mean_diff'] = difference_df.sum(axis = 1)/365 # method 1 - taking daily difference first
comparison_df['annual_avg_diff'] = avg_CW3E - avg_NLDAS2 # method 2 - taking avg temp first

# adding metadata
comparison_df = comparison_df.join(metadata_df['conus1_x'])
comparison_df = comparison_df.join(metadata_df['conus1_y'])
#comparison_df['num_obs'] = metadata_df['num_obs']
comparison_df

# 5. Plotting Results

In [None]:
#from parflow.tools.io import read_pfb,write_pfb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
#import geopandas as gpd
from PIL import Image
from mpl_toolkits.basemap import Basemap as Basemap

In [None]:
plot_dir = '/home/ad4430/for_ashley/precip_temp_CONUS1/Figures/summary_plots_temp'

In [None]:
# Read in CONUS2 mask and HUC2 png for map plot border and HUC2 outlines
CONUS2_mask = Image.open('/hydrodata/PFCLM/CONUS2_baseline/inputs/domain_files/CONUS2.0.Final1km.Mask.tif')
mask_array = np.array(CONUS2_mask)
HUC2_outline = Image.open('/home/ad4430/for_ashley/precip_temp_CONUS1/Figures/CONUS1_HUCmap.png')

In [None]:
# # Read in CONUS2 mask for map plot border
# CONUS2_mask = Image.open('/hydrodata/PFCLM/CONUS2_baseline/inputs/domain_files/CONUS2.0.Final1km.Mask.tif')
# #CONUS2_mask = Image.open('/home/dtt2/CONUS2/shape_files/CONUS2_HUC2_boundaries_clipped.tif')
# mask_array = np.array(CONUS2_mask)

In [None]:
#
fig, ax = plt.subplots(1,1,figsize=(16,9))
im = ax.imshow(HUC2_outline, extent=[0, nx, 0, ny],zorder=2)
plt.scatter(comparison_df.conus1_x,comparison_df.conus1_y, c=comparison_df.mean_diff, cmap = 'coolwarm', s=50, vmin = -10, vmax = 10)
plt.colorbar(label='mean difference')
plt.axis('off')
plt.title(f'CW3E - NLDAS2, Avg Temperature Difference, WY{water_year}', fontsize = 20)
plt.text(0, -50, "Minimum Difference = " + str(comparison_df.mean_diff.min()))
plt.text(0, -100, "Maximum Difference = " + str(comparison_df.mean_diff.max()))
plt.text(0, -150, "Avg Difference across all sites = " + str(comparison_df.mean_diff.mean()))
plt.savefig(f'{plot_dir}/WY{water_year}_CW3E_minus_NLDAS2_{variable}_RelBias_Map.png',\
             dpi = 200, facecolor='white', transparent=False, bbox_inches="tight")
#print(avg_bias)s