In [1]:
import ee
import geemap
import pandas as pd
from datetime import datetime, timedelta

In [None]:
geemap.set_proxy(port = '7897')
ee.Authenticate()
geemap.ee_initialize()

In [3]:
start_date = '2019-01-01'
end_date   = '2022-12-31'
variables  = ['dewpoint_temperature_2m', 
              'temperature_2m', 
              'surface_pressure', 
              'total_precipitation_sum']

In [4]:
# 'Collection query aborted after accumulating over 5000 elements.'
def extract_data_for_site(site_name, lon, lat, start_date, end_date):
    # 创建地点
    point = ee.Geometry.Point([lon, lat])
    
    # 获取 ECMWF/ERA5_LAND/DAILY_AGGR 数据集
    # https://developers.google.com/earth-engine/datasets/catalog/ECMWF_ERA5_LAND_MONTHLY_AGGR#bands
    dataset = ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR') \
                .filterDate(start_date, end_date) \
                .filterBounds(point)
    
    # 选择感兴趣的变量
    dataset = dataset.select(variables)
    
    # 将数据转为时间序列，使用 reduceRegion
    def extract_single_date(image):
        date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
        reduced = image.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=point,
            scale=1000,
            maxPixels=1e13
        )
        result = reduced.set('date', date)
        return ee.Feature(None, result)

    # 获取每个日期的数据
    time_series = dataset.map(extract_single_date).getInfo()
    
    # 提取数据并转换为 pandas DataFrame
    records = []
    for feature in time_series['features']:
        properties = feature['properties']
        properties['site'] = site_name
        records.append(properties)
    
    return pd.DataFrame(records)

In [None]:
file_path = '../metadata/sequenced_sample_location_data.csv'
df = pd.read_csv(file_path)

In [7]:
# 按站点和时间提取数据
all_data = []
for index, row in df.iterrows():
    site_data = extract_data_for_site(
        site_name=row['ID'],
        lon=row['lng'],
        lat=row['lat'],
        start_date=start_date,
        end_date=end_date
    )
    all_data.append(site_data)

# 将所有数据合并
final_df = pd.concat(all_data, ignore_index = True)

In [8]:
final_df.to_csv('../metadata/sequenced_sample_location_data_with_ecmwf_era5.csv', index = False)