In [249]:
import ee, h5py, warnings
import numpy as np
import pandas as pd

warnings.filterwarnings('ignore')

###
# ------- Set up local proxy to access GEE, may not required for all people ------- #
###
    
import socket
import socks

socks.set_default_proxy(socks.SOCKS5, "127.0.0.1", 10795)
socket.socket = socks.socksocket

In [250]:
def ee_array_to_df(arr, list_of_bands):

    # Transforms client-side ee.Image.getRegion array to pandas.DataFrame
    df = pd.DataFrame(arr)

    # Rearrange the header.
    headers = df.iloc[0]
    df = pd.DataFrame(df.values[1:], columns=headers)

    # Remove rows without data inside.
    df = df[['longitude', 'latitude', 'time', *list_of_bands]].dropna()

    # Convert the data to numeric values.
    for band in list_of_bands:
        df[band] = pd.to_numeric(df[band], errors='coerce')

    # Convert the time field into a datetime.
    df['datetime'] = pd.to_datetime(df['time'], unit='ms')

    # Keep the columns of interest.
    df = df[['datetime',  *list_of_bands]]

    return df

def format_forz(era5df):

    tidy_df = era5df.copy()

    # Silence copy warning
    pd.set_option('mode.chained_assignment', None)

    tidy_df['TMN'] = tidy_df['minimum_2m_air_temperature'] - 273.15
    tidy_df['TMX'] = tidy_df['maximum_2m_air_temperature'] - 273.15

    # Shortwave
    # To W
    tidy_df['RSDS'] = \
        tidy_df['surface_solar_radiation_downwards_sum']/3600/24
    # Remove negative noise
    tidy_df['RSDS'][tidy_df['RSDS'] < 0] = 0
    
    # RH
    TD = tidy_df['dewpoint_2m_temperature'] - 273.15
    # T = tidy_df['temperature_2m'] - 273.15
    # tidy_df['RH'] = 100 * (np.exp((17.67 * TD) / (243.5 + TD)) /
    #                        np.exp((17.67 * T) / (243.5 + T)))
    # Force bounds
    # tidy_df['RH'][tidy_df['RH'] > 100] = 100
    # tidy_df['RH'][tidy_df['RH'] < 0] = 0

    e_VPa = 611.2 * np.exp((17.67 * TD) / (243.5 + TD))
    tidy_df['SH'] = 0.622 * e_VPa / (tidy_df['surface_pressure']*0.01 - 0.378 * e_VPa)

    tidy_df['TD'] = tidy_df['dewpoint_2m_temperature'] - 273.15

    tidy_df['VPA'] = e_VPa * 0.1

    # Wind
    U = tidy_df['u_component_of_wind_10m']
    V = tidy_df['v_component_of_wind_10m']
    tidy_df['WSPD'] = np.sqrt(U**2 + V**2)

    # Precipitation [m/day > mm/day]
    tidy_df['PRECC'] = \
        tidy_df['total_precipitation']*1000
    tidy_df['PRECC'][tidy_df['PRECC'] < 0] = 0

    tidy_df['Year'] = tidy_df.index.year
    tidy_df['Day'] = tidy_df.index.day_of_year
    tidy_df['Hour'] = 9
        
    # remove old columns
    del tidy_df['surface_solar_radiation_downwards_sum']
    del tidy_df['surface_thermal_radiation_downwards_sum']
    del tidy_df['dewpoint_2m_temperature']
    del tidy_df['u_component_of_wind_10m']
    del tidy_df['v_component_of_wind_10m']
    del tidy_df['total_precipitation']
    del tidy_df['surface_pressure']
    del tidy_df['minimum_2m_air_temperature']
    del tidy_df['maximum_2m_air_temperature']

    return tidy_df[['Year','Day','TMX','TMN','RSDS','WSPD','VPA','PRECC']]


In [251]:
year_list = np.arange(2018, 2020)

sitename = 'Ebo'

site_lat = {'Altay':47.733, 'Kabahe':47.883, 'Fuyun':47.200, 'Ebo': 38.00, 
            'Barrow':71.309033, 'DrewPoint':70.904, 'Yakutsk':62.02, 
            'Wudaoliang':35.218, 'PT8':38.67, 'PT1': 38.7822,
            'Fairbanks':64.80, 'OldMan': 66.45, 'HappyValley':69.1466}
site_lon = {'Altay':88.083, 'Kabahe':86.200, 'Fuyun':89.783, 'Ebo': 100.92, 
            'Barrow':-156.661517, 'DrewPoint':-153.634, 'Yakutsk': 129.72, 
            'Wudaoliang':93.0833, 'PT8': 98.96, 'PT1':98.7452,
            'Fairbanks':-147.77, 'OldMan': -150.6167, 'HappyValley':-148.85}
            
var_list = ['minimum_2m_air_temperature',
            'maximum_2m_air_temperature',
            'total_precipitation', 
            # 'surface_solar_radiation_downwards_sum', 
            # 'surface_thermal_radiation_downwards_sum', 
            'dewpoint_2m_temperature', 
            'surface_pressure',
            'u_component_of_wind_10m',
            'v_component_of_wind_10m']

var_list_land = ['surface_solar_radiation_downwards_sum', 
            'surface_thermal_radiation_downwards_sum', ]

# forcing name
h5_name = sitename+'_'+str(year_list.min())+'-'+str(year_list.max())+".h5"

print('Producing...', h5_name)

# Define the locations of interest in degrees.
era_lon = site_lon[sitename]
era_lat = site_lat[sitename]

print('lat:',era_lat, 'lon:', era_lon)

Producing... Ebo_2018-2019.h5
lat: 38.0 lon: 100.92


In [252]:
# Initialize the library.
ee.Initialize()

# Import the ERA5_LAND collection.
era5 = ee.ImageCollection("ECMWF/ERA5/DAILY")
# Import the ERA5_LAND collection.
era5_land = ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_RAW")

era_poi = ee.Geometry.Point([era_lon, era_lat])

# era5_df = pd.DataFrame(columns=var_list)

In [253]:
for i0, year0 in enumerate(year_list):

	# Initial date of interest (inclusive).
	i_date = str(year0)+"-01-01"

	# Final date of interest (exclusive).
	f_date = str(year0+1)+"-01-01"

	geeid1 = era5.select(var_list).filterDate(i_date, f_date)
	geeid2 = era5_land.select(var_list_land).filterDate(i_date, f_date)

	era5_data = geeid1.getRegion(era_poi, 1000).getInfo()

	era5_land_data = geeid2.getRegion(era_poi, 1000).getInfo()

	# print(len(era5_data))

	era5_land_dftmp = ee_array_to_df(era5_land_data, var_list_land).set_index('datetime')

	era5_dftmp = ee_array_to_df(era5_data, var_list).set_index('datetime').join(era5_land_dftmp)

	# if i0 == 0:
	# 	era5_df = era5_dftmp.copy()
	# else:
	# 	era5_df = pd.concat([era5_df, era5_dftmp], ignore_index=False)

	# era5_dftmp.set_index('datetime')
	
# 	era5_df = era5_df.append(era5_dftmp, ignore_index=True)

	# era5_df = pd.concat([era5_df, era5_dftmp], ignore_index=False)

	# # print(era5_df)
	
	era5_df_forz = format_forz(era5_dftmp)
	
	print(i0, year0, era5_df_forz)


0 2018 0           Year  Day        TMX        TMN        RSDS      WSPD        VPA  \
datetime                                                                       
2018-01-01  2018    1  -6.878058 -20.483252  134.473472  0.184233   7.580433   
2018-01-02  2018    2  -8.450537 -16.876807  105.497824  0.131297  11.505445   
2018-01-03  2018    3  -7.677649 -20.152792  120.206296  0.795835  13.248439   
2018-01-04  2018    4  -2.911475 -20.639899  136.641620  1.857591  10.184364   
2018-01-05  2018    5  -4.513098 -17.029150  132.309583  0.535139  12.788488   
...          ...  ...        ...        ...         ...       ...        ...   
2018-12-27  2018  361 -14.149664 -25.877997   95.001019  0.055747  10.173589   
2018-12-28  2018  362 -12.835150 -25.076270  101.836111  0.511152   9.727484   
2018-12-29  2018  363 -10.227057 -29.556097  126.921991  2.059547   8.909611   
2018-12-30  2018  364 -10.797980 -30.169348  132.186065  1.114620   8.411029   
2018-12-31  2018  365  -8.949103 

In [254]:
era5_df_forz

Unnamed: 0_level_0,Year,Day,TMX,TMN,RSDS,WSPD,VPA,PRECC
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-01-01,2019,1,-5.548651,-25.614706,132.757269,1.969843,11.094482,0.000000
2019-01-02,2019,2,-3.355139,-27.974158,133.896296,1.413478,13.804980,0.028433
2019-01-03,2019,3,-7.810217,-22.615942,93.843889,0.121541,15.689018,0.651268
2019-01-04,2019,4,-9.294043,-24.103125,99.269583,1.113624,14.234474,1.446445
2019-01-05,2019,5,-4.813208,-23.942770,138.918889,2.015325,11.248634,0.001146
...,...,...,...,...,...,...,...,...
2019-12-27,2019,361,-8.142767,-30.049750,134.227824,2.911659,7.629350,0.000000
2019-12-28,2019,362,-4.897070,-24.940192,132.954213,2.013793,9.980468,0.000000
2019-12-29,2019,363,-9.037512,-26.021643,130.213333,0.859722,9.752628,0.005554
2019-12-30,2019,364,-4.853461,-26.678641,133.933704,1.936571,9.919470,0.000000


In [255]:
fid = open('test.txt', 'wt')
fid.write('DJ0206XDMNRWHP'+'\n')
fid.write('CCWSZD'+'\n')
fid.write('{:0.2f},{:0.2f},{:0.2f}'.format(10,1,13)+'\n')
fid.write('{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f}'.format(7,0,0,0,0,0,0,0,0,0,0,0,0)+'\n')
for i in era5_df_forz.index:
    fid.write('{:0.1f},{:0.1f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.2f},{:0.4f}'.format(era5_df_forz['Year'][i],
                                       era5_df_forz['Day'][i],
                                       era5_df_forz['TMX'][i],
                                       era5_df_forz['TMN'][i],
                                       era5_df_forz['RSDS'][i],
                                       era5_df_forz['WSPD'][i],
                                       era5_df_forz['VPA'][i],
                                       era5_df_forz['PRECC'][i]) + '\n')
fid.close()