In [1]:
import ee, h5py, warnings
import numpy as np
import pandas as pd

warnings.filterwarnings('ignore')

###
# ------- Set up local proxy to access GEE, may not required for all people ------- #
###
    
import socket
import socks

socks.set_default_proxy(socks.SOCKS5, "127.0.0.1", 10793)
socket.socket = socks.socksocket

In [2]:
def ee_array_to_df(arr, list_of_bands):

    # Transforms client-side ee.Image.getRegion array to pandas.DataFrame
    df = pd.DataFrame(arr)

    # Rearrange the header.
    headers = df.iloc[0]
    df = pd.DataFrame(df.values[1:], columns=headers)

    # Remove rows without data inside.
    df = df[['longitude', 'latitude', 'time', *list_of_bands]].dropna()

    # Convert the data to numeric values.
    for band in list_of_bands:
        df[band] = pd.to_numeric(df[band], errors='coerce')

    # Convert the time field into a datetime.
    df['datetime'] = pd.to_datetime(df['time'], unit='ms')

    # Keep the columns of interest.
    df = df[['datetime',  *list_of_bands]]

    return df

def format_forz(era5df):

    tidy_df = era5df.copy()

    # Silence copy warning
    pd.set_option('mode.chained_assignment', None)

    # Shortwave
    # To W
    tidy_df['RSDS'] = \
        tidy_df['surface_solar_radiation_downwards_sum']/3600/24
    # Remove negative noise
    tidy_df['RSDS'][tidy_df['RSDS'] < 0] = 0
    
    # RH
    TD = tidy_df['dewpoint_temperature_2m'] - 273.15
    T = tidy_df['temperature_2m'] - 273.15
    tidy_df['RH'] = 100 * (np.exp((17.67 * TD) / (243.5 + TD)) /
                           np.exp((17.67 * T) / (243.5 + T)))
    # Force bounds
    tidy_df['RH'][tidy_df['RH'] > 100] = 100
    tidy_df['RH'][tidy_df['RH'] < 0] = 0

    # tidy_df['VPa'] = 611.2 * np.exp((17.67 * TD) / (243.5 + TD))

    # Wind
    U = tidy_df['u_component_of_wind_10m']
    V = tidy_df['v_component_of_wind_10m']
    tidy_df['WSPD'] = np.sqrt(U**2 + V**2)

    # Precipitation [m/sec]
    tidy_df['PRECC'] = \
        tidy_df['total_precipitation_sum']/3600/24
    tidy_df['PRECC'][tidy_df['PRECC'] < 0] = 0
        
    # remove old columns
    del tidy_df['surface_solar_radiation_downwards_sum']
    del tidy_df['surface_thermal_radiation_downwards_sum']
    del tidy_df['dewpoint_temperature_2m']
    del tidy_df['u_component_of_wind_10m']
    del tidy_df['v_component_of_wind_10m']
    del tidy_df['total_precipitation_sum']

    return tidy_df


In [3]:
year_list = np.arange(2022, 2023)

sitename = 'Ebo'

site_lat = {'Altay':47.733, 'Kabahe':47.883, 'Fuyun':47.200, 'Ebo': 38.00, 
            'Barrow':71.309033, 'DrewPoint':70.904, 'Yakutsk':62.02, 
            'Wudaoliang':35.218, 'PT8':38.67, 'PT1': 38.7822,
            'Fairbanks':64.80, 'OldMan': 66.45, 'HappyValley':69.1466}
site_lon = {'Altay':88.083, 'Kabahe':86.200, 'Fuyun':89.783, 'Ebo': 100.92, 
            'Barrow':-156.661517, 'DrewPoint':-153.634, 'Yakutsk': 129.72, 
            'Wudaoliang':93.0833, 'PT8': 98.96, 'PT1':98.7452,
            'Fairbanks':-147.77, 'OldMan': -150.6167, 'HappyValley':-148.85}
            
var_list = ['temperature_2m',
            'total_precipitation_sum', 
            'surface_solar_radiation_downwards_sum', 
            'surface_thermal_radiation_downwards_sum', 
            'dewpoint_temperature_2m', 
            'surface_pressure',
            'u_component_of_wind_10m',
            'v_component_of_wind_10m']

# forcing name
h5_name = sitename+'_'+str(year_list.min())+'-'+str(year_list.max())+".h5"

print('Producing...', h5_name)

# Define the locations of interest in degrees.
era_lon = site_lon[sitename]
era_lat = site_lat[sitename]

print('lat:',era_lat, 'lon:', era_lon)

Producing... Ebo_2022-2022.h5
lat: 38.0 lon: 100.92


In [4]:
# Initialize the library.
ee.Initialize()

# Import the ERA5_LAND collection.
era5_land = ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_RAW")

era_poi = ee.Geometry.Point([era_lon, era_lat])

era5_df = pd.DataFrame(columns=var_list)

In [5]:
for year0 in year_list:

	# Initial date of interest (inclusive).
	i_date = str(year0)+"-01-01"

	# Final date of interest (exclusive).
	f_date = str(year0+1)+"-01-01"

	geeid2 = era5_land.select(var_list).filterDate(i_date, f_date)

	era5_data = geeid2.getRegion(era_poi, 1000).getInfo()

	# print(len(era5_data))

	era5_dftmp = ee_array_to_df(era5_data, var_list)
	
# 	era5_df = era5_df.append(era5_dftmp, ignore_index=True)

	era5_df = pd.concat([era5_df, era5_dftmp], ignore_index=True)

	print(era5_df)
	
	# <!-- era5_df_forz = format_forz(era5_df) -->
	
	# <!-- print(year0, len(era5_df_forz)) -->


     temperature_2m  total_precipitation_sum  \
0        258.315958                 0.000002   
1        259.601651                 0.000012   
2        259.921521                 0.000044   
3        260.320203                 0.000005   
4        260.817244                 0.000000   
..              ...                      ...   
360      255.881104                 0.000001   
361      255.569118                 0.000002   
362      256.744056                 0.000000   
363      258.306477                 0.000002   
364      260.009851                 0.000216   

     surface_solar_radiation_downwards_sum  \
0                               11649800.0   
1                               11537836.0   
2                               10140924.0   
3                               11123332.0   
4                               11674412.0   
..                                     ...   
360                             11232484.0   
361                             11271884.0   
362      