In [1]:
import ee, h5py, warnings
import numpy as np
import pandas as pd

warnings.filterwarnings('ignore')

###
# ------- Set up local proxy to access GEE, may not required for all people ------- #
###
    
import socket
import socks

socks.set_default_proxy(socks.SOCKS5, "127.0.0.1", 10795)
socket.socket = socks.socksocket

In [2]:
def ee_array_to_df(arr, list_of_bands):

    # Transforms client-side ee.Image.getRegion array to pandas.DataFrame
    df = pd.DataFrame(arr)

    # Rearrange the header.
    headers = df.iloc[0]
    df = pd.DataFrame(df.values[1:], columns=headers)

    # Remove rows without data inside.
    df = df[['longitude', 'latitude', 'time', *list_of_bands]].dropna()

    # Convert the data to numeric values.
    for band in list_of_bands:
        df[band] = pd.to_numeric(df[band], errors='coerce')

    # Convert the time field into a datetime.
    df['datetime'] = pd.to_datetime(df['time'], unit='ms')

    # Keep the columns of interest.
    df = df[['datetime',  *list_of_bands]]

    return df

def format_forz(era5df):

    tidy_df = era5df.copy()

    # Silence copy warning
    pd.set_option('mode.chained_assignment', None)

    tidy_df['TMN'] = tidy_df['minimum_2m_air_temperature'] - 273.15
    tidy_df['TMX'] = tidy_df['maximum_2m_air_temperature'] - 273.15

    # Shortwave
    # To W
    tidy_df['RSDS'] = \
        tidy_df['surface_solar_radiation_downwards_sum']/3600/24
    # Remove negative noise
    tidy_df['RSDS'][tidy_df['RSDS'] < 0] = 0
    
    # RH
    TD = tidy_df['dewpoint_2m_temperature'] - 273.15
    T = tidy_df['mean_2m_air_temperature'] - 273.15
    tidy_df['RH'] = 100 * (np.exp((17.67 * TD) / (243.5 + TD)) /
                           np.exp((17.67 * T) / (243.5 + T)))
    # Force bounds
    tidy_df['RH'][tidy_df['RH'] > 100] = 100
    tidy_df['RH'][tidy_df['RH'] < 0] = 0

    e_VPa = 611.2 * np.exp((17.67 * TD) / (243.5 + TD))
    tidy_df['SH'] = 0.622 * e_VPa / (tidy_df['surface_pressure']*0.01 - 0.378 * e_VPa)

    tidy_df['TD'] = tidy_df['dewpoint_2m_temperature'] - 273.15

    tidy_df['VPA'] = e_VPa * 0.1

    # Wind
    U = tidy_df['u_component_of_wind_10m']
    V = tidy_df['v_component_of_wind_10m']
    tidy_df['WSPD'] = np.sqrt(U**2 + V**2)

    # Precipitation [m/day > mm/day]
    tidy_df['PRECC'] = \
        tidy_df['total_precipitation']*1000
    tidy_df['PRECC'][tidy_df['PRECC'] < 0] = 0

    tidy_df['Year'] = tidy_df.index.year
    tidy_df['Day'] = tidy_df.index.day_of_year
    tidy_df['Hour'] = 9
        
    # remove old columns
    del tidy_df['surface_solar_radiation_downwards_sum']
    del tidy_df['surface_thermal_radiation_downwards_sum']
    del tidy_df['dewpoint_2m_temperature']
    del tidy_df['u_component_of_wind_10m']
    del tidy_df['v_component_of_wind_10m']
    del tidy_df['total_precipitation']
    del tidy_df['surface_pressure']
    del tidy_df['minimum_2m_air_temperature']
    del tidy_df['maximum_2m_air_temperature']
    del tidy_df['mean_2m_air_temperature']

    return tidy_df[['Year','Day','Hour','TMX','TMN','RSDS','WSPD','RH','PRECC']]


In [3]:
year_list = np.arange(1984, 1985)

sitename = 'Ebo'

site_lat = {'Altay':47.733, 'Kabahe':47.883, 'Fuyun':47.200, 'Ebo': 38.00, 
            'Barrow':71.309033, 'DrewPoint':70.904, 'Yakutsk':62.02, 
            'Wudaoliang':35.218, 'PT8':38.67, 'PT1': 38.7822,
            'Fairbanks':64.80, 'OldMan': 66.45, 'HappyValley':69.1466}
site_lon = {'Altay':88.083, 'Kabahe':86.200, 'Fuyun':89.783, 'Ebo': 100.92, 
            'Barrow':-156.661517, 'DrewPoint':-153.634, 'Yakutsk': 129.72, 
            'Wudaoliang':93.0833, 'PT8': 98.96, 'PT1':98.7452,
            'Fairbanks':-147.77, 'OldMan': -150.6167, 'HappyValley':-148.85}
            
var_list = ['minimum_2m_air_temperature',
            'maximum_2m_air_temperature',
            'mean_2m_air_temperature',
            'total_precipitation', 
            # 'surface_solar_radiation_downwards_sum', 
            # 'surface_thermal_radiation_downwards_sum', 
            'dewpoint_2m_temperature', 
            'surface_pressure',
            'u_component_of_wind_10m',
            'v_component_of_wind_10m']

var_list_land = ['surface_solar_radiation_downwards_sum', 
            'surface_thermal_radiation_downwards_sum', ]

# forcing name
# h5_name = sitename+'_'+str(year_list.min())+'-'+str(year_list.max())+".h5"

# print('Producing...', h5_name)

# Define the locations of interest in degrees.
era_lon = site_lon[sitename]
era_lat = site_lat[sitename]

print('lat:',era_lat, 'lon:', era_lon)

lat: 38.0 lon: 100.92


In [4]:
# Initialize the library.
ee.Initialize()

# Import the ERA5_LAND collection.
era5 = ee.ImageCollection("ECMWF/ERA5/DAILY")
# Import the ERA5_LAND collection.
era5_land = ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_RAW")

era_poi = ee.Geometry.Point([era_lon, era_lat])

# era5_df = pd.DataFrame(columns=var_list)

In [5]:
for i0, year0 in enumerate(year_list):

	# Initial date of interest (inclusive).
	i_date = str(year0)+"-01-01"

	# Final date of interest (exclusive).
	f_date = str(year0+1)+"-01-01"

	geeid1 = era5.select(var_list).filterDate(i_date, f_date)
	geeid2 = era5_land.select(var_list_land).filterDate(i_date, f_date)

	era5_data = geeid1.getRegion(era_poi, 1000).getInfo()

	era5_land_data = geeid2.getRegion(era_poi, 1000).getInfo()

	# print(len(era5_data))

	era5_land_dftmp = ee_array_to_df(era5_land_data, var_list_land).set_index('datetime')

	era5_dftmp = ee_array_to_df(era5_data, var_list).set_index('datetime').join(era5_land_dftmp)

	# if i0 == 0:
	# 	era5_df = era5_dftmp.copy()
	# else:
	# 	era5_df = pd.concat([era5_df, era5_dftmp], ignore_index=False)

	# era5_dftmp.set_index('datetime')
	
# 	era5_df = era5_df.append(era5_dftmp, ignore_index=True)

	# era5_df = pd.concat([era5_df, era5_dftmp], ignore_index=False)

	# # print(era5_df)
	
	era5_df_forz = format_forz(era5_dftmp)
	
	print(i0, year0, era5_df_forz, sum(era5_df_forz.PRECC.values))


0 1984 0           Year  Day  Hour        TMX        TMN        RSDS      WSPD  \
datetime                                                                  
1984-01-01  1984    1     9  -4.823676 -22.599814  132.138889  0.673588   
1984-01-02  1984    2     9  -7.316351 -22.422766  132.434769  1.103308   
1984-01-03  1984    3     9  -7.081915 -23.299719  114.086296  1.470893   
1984-01-04  1984    4     9  -5.335577 -22.892218  133.065463  0.704954   
1984-01-05  1984    5     9  -5.342749 -22.331183  135.647593  0.736357   
...          ...  ...   ...        ...        ...         ...       ...   
1984-12-27  1984  362     9 -13.638892 -24.701727  109.806898  0.187674   
1984-12-28  1984  363     9 -12.856207 -24.484915  126.148380  0.540662   
1984-12-29  1984  364     9  -8.804419 -22.977408  130.362407  0.790101   
1984-12-30  1984  365     9  -6.590277 -22.044684  128.845463  1.643694   
1984-12-31  1984  366     9  -2.835730 -21.116812  125.455648  0.809766   

0                

In [6]:
era5_df_forz

Unnamed: 0_level_0,Year,Day,Hour,TMX,TMN,RSDS,WSPD,RH,PRECC
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1984-01-01,1984,1,9,-4.823676,-22.599814,132.138889,0.673588,40.886409,0.000000
1984-01-02,1984,2,9,-7.316351,-22.422766,132.434769,1.103308,39.159426,0.000628
1984-01-03,1984,3,9,-7.081915,-23.299719,114.086296,1.470893,44.882711,0.016088
1984-01-04,1984,4,9,-5.335577,-22.892218,133.065463,0.704954,42.462668,0.000000
1984-01-05,1984,5,9,-5.342749,-22.331183,135.647593,0.736357,38.157244,0.000000
...,...,...,...,...,...,...,...,...,...
1984-12-27,1984,362,9,-13.638892,-24.701727,109.806898,0.187674,58.691724,0.020385
1984-12-28,1984,363,9,-12.856207,-24.484915,126.148380,0.540662,57.540085,0.001125
1984-12-29,1984,364,9,-8.804419,-22.977408,130.362407,0.790101,43.598958,0.000000
1984-12-30,1984,365,9,-6.590277,-22.044684,128.845463,1.643694,37.362053,0.000002


In [7]:
fid = open('../examples/test_daily/clmt_spinup.txt', 'wt')
fid.write('DJ0306XDHMNRWHP'+'\n')
fid.write('CCWSRM'+'\n')
fid.write('{:0.2f},{:0.2f},{:0.2f}'.format(10,1,13)+'\n')
fid.write('{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f}'.format(7,0,0,0,0,0,0,0,0,0,0,0,0)+'\n')
for i in era5_df_forz.index:
    fid.write('{:0.1f},{:0.1f},{:0.1f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.8f}'.format(era5_df_forz['Year'][i],
                                       era5_df_forz['Day'][i],
                                       era5_df_forz['Hour'][i],
                                       era5_df_forz['TMX'][i],
                                       era5_df_forz['TMN'][i],
                                       era5_df_forz['RSDS'][i],
                                       era5_df_forz['WSPD'][i] * 0 + 1.75,
                                       era5_df_forz['RH'][i] * 0 + 85,
                                       era5_df_forz['PRECC'][i]) + '\n')
fid.close()

In [8]:
year_list = np.arange(1979, 2015)

In [9]:
for i0, year0 in enumerate(year_list):

	# Initial date of interest (inclusive).
	i_date = str(year0)+"-01-01"

	# Final date of interest (exclusive).
	f_date = str(year0+1)+"-01-01"

	geeid1 = era5.select(var_list).filterDate(i_date, f_date)
	geeid2 = era5_land.select(var_list_land).filterDate(i_date, f_date)

	era5_data = geeid1.getRegion(era_poi, 1000).getInfo()

	era5_land_data = geeid2.getRegion(era_poi, 1000).getInfo()

	# print(len(era5_data))

	era5_land_dftmp = ee_array_to_df(era5_land_data, var_list_land).set_index('datetime')

	era5_dftmp = ee_array_to_df(era5_data, var_list).set_index('datetime').join(era5_land_dftmp)

	# if i0 == 0:
	# 	era5_df = era5_dftmp.copy()
	# else:
	# 	era5_df = pd.concat([era5_df, era5_dftmp], ignore_index=False)

	# era5_dftmp.set_index('datetime')
	
# 	era5_df = era5_df.append(era5_dftmp, ignore_index=True)

	# era5_df = pd.concat([era5_df, era5_dftmp], ignore_index=False)

	# # print(era5_df)
	
	era5_df_forz = format_forz(era5_dftmp)
	
	print(i0, year0, era5_df_forz, sum(era5_df_forz.PRECC.values))


	fid = open('../examples/test_daily/clmt_'+str(year0)+'.txt', 'wt')
	fid.write('DJ0306XDHMNRWHP'+'\n')
	fid.write('CCWSRM'+'\n')
	fid.write('{:0.2f},{:0.2f},{:0.2f}'.format(10,1,13)+'\n')
	fid.write('{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f}'.format(7,0,0,0,0,0,0,0,0,0,0,0,0)+'\n')
	for i in era5_df_forz.index:
		fid.write('{:0.1f},{:0.1f},{:0.1f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.8f}'.format(era5_df_forz['Year'][i],
										era5_df_forz['Day'][i],
										era5_df_forz['Hour'][i],
										era5_df_forz['TMX'][i],
										era5_df_forz['TMN'][i],
										era5_df_forz['RSDS'][i],
										era5_df_forz['WSPD'][i] * 0 + 1.75,
										era5_df_forz['RH'][i] * 0 + 85,
										era5_df_forz['PRECC'][i]) + '\n')
	fid.close()

0 1979 0           Year  Day  Hour       TMX        TMN        RSDS      WSPD  \
datetime                                                                 
1979-01-02  1979    2     9 -0.984106 -20.180243  134.169583  1.295374   
1979-01-03  1979    3     9 -2.776312 -19.611578  133.974120  0.695303   
1979-01-04  1979    4     9 -3.284827 -20.975226  134.443889  1.237428   
1979-01-05  1979    5     9 -3.512640 -21.146872  135.742546  1.160850   
1979-01-06  1979    6     9 -1.497626 -17.961417  137.862407  4.097727   
...          ...  ...   ...       ...        ...         ...       ...   
1979-12-27  1979  361     9 -8.266241 -24.275565  130.187917  1.180757   
1979-12-28  1979  362     9 -3.599554 -22.899512  130.353750  1.473280   
1979-12-29  1979  363     9 -5.023474 -21.099722  123.812593  0.604046   
1979-12-30  1979  364     9 -5.826178 -20.867361  130.016944  0.652530   
1979-12-31  1979  365     9 -3.671820 -19.479819  125.072222  1.021525   

0                  RH     PREC