# Make FLUXNET-CH4 nc data

## Make Daily data

In [None]:
import pandas as pd
import xarray as xr
import os
import numpy as np
import copy
from joblib import Parallel, delayed  # 假设环境有joblib；如果没有，可替换为multiprocessing

dir_path = '/share/home/dq076/data/ME/FLUXNET-CH4/'

# Path to the metadata CSV file
metadata_file = dir_path + 'FLX_AA-Flx_CH4-META_20201112135337801132.csv'

# Read the metadata
metadata = pd.read_csv(metadata_file)

# Filter sites where FLUXNET-CH4_DATA_POLICY is 'CCBY4.0'
sites = metadata[metadata['FLUXNET-CH4_DATA_POLICY'] == 'CCBY4.0']
print('start')

# Define selected columns
selected_columns = [
    'TIMESTAMP', 'NEE', 'H', 'LE', 'FCH4', 'USTAR', 'SW_IN', 'SW_OUT', 'LW_IN', 'LW_OUT',
    'NETRAD', 'PPFD_IN', 'VPD', 'TA', 'P', 'TS_1', 'TS_2', 'TS_3', 'TS_4', 'TS_5',
    'G', 'WTD', 'GPP_NT', 'RECO_NT', 'GPP_DT', 'RECO_DT', 'WD', 'WS', 'NEE_F',
    'H_F', 'LE_F', 'FCH4_F', 'SW_IN_F', 'SW_OUT_F', 'LW_IN_F', 'LW_OUT_F', 'NETRAD_F',
    'PPFD_IN_F', 'VPD_F', 'PA_F', 'TA_F', 'P_F', 'G_F', 'WTD_F', 'WS_F',
    'LE_F_ANNOPTLM', 'NEE_F_ANNOPTLM', 'FCH4_F_ANNOPTLM', 'FCH4_F_ANNOPTLM_QC'
]

# Function to expand 1D to 3D
def expand_to_3d(data_1d, dtype=np.float32):
    """将一维数据扩展为三维 [time, y, x]，指定dtype"""
    data_1d = np.array(data_1d, dtype=dtype)
    data_2d = data_1d.reshape(-1, 1)  # [time, 1]
    data_3d = data_2d[:, :, np.newaxis]  # [time, 1, 1]
    return data_3d

# Define variable metadata as dict (完整列表，包括TS默认值)
var_metadata = {
    'NEE': {'long_name': 'Net ecosystem exchange', 'units': 'umol CO2 m-2 s-1'},
    'H': {'long_name': 'Sensible heat turbulent flux', 'units': 'W m-2'},
    'LE': {'long_name': 'Latent heat turbulent flux', 'units': 'W m-2'},
    'FCH4': {'long_name': 'Methane (CH4) turbulent flux', 'units': 'nmol CH4 m-2 s-1'},
    'USTAR': {'long_name': 'Friction velocity', 'units': 'm s-1'},
    'SW_IN': {'long_name': 'Shortwave radiation, incoming', 'units': 'W m-2'},
    'SW_OUT': {'long_name': 'Shortwave radiation, outgoing', 'units': 'W m-2'},
    'LW_IN': {'long_name': 'Longwave radiation, incoming', 'units': 'W m-2'},
    'LW_OUT': {'long_name': 'Longwave radiation, outgoing', 'units': 'W m-2'},
    'NETRAD': {'long_name': 'Net radiation', 'units': 'W m-2'},
    'PPFD_IN': {'long_name': 'Photosynthetic photon flux density, incoming', 'units': 'umol photon m-2 s-1'},
    'VPD': {'long_name': 'Vapor pressure deficit', 'units': 'hPa'},
    'TA': {'long_name': 'Air temperature', 'units': 'degC'},
    'P': {'long_name': 'Precipitation', 'units': 'mm'},
    'TS_1': {'long_name': 'Soil temperature at probe 1', 'units': 'degC'},
    'TS_2': {'long_name': 'Soil temperature at probe 2', 'units': 'degC'},
    'TS_3': {'long_name': 'Soil temperature at probe 3', 'units': 'degC'},
    'TS_4': {'long_name': 'Soil temperature at probe 4', 'units': 'degC'},
    'TS_5': {'long_name': 'Soil temperature at probe 5', 'units': 'degC'},
    'G': {'long_name': 'Soil heat flux', 'units': 'W m-2'},
    'WTD': {'long_name': 'Water table depth', 'units': 'm'},
    'GPP_NT': {'long_name': 'Gross primary productivity (nighttime method)', 'units': 'umol CO2 m-2 s-1', 
               'description': 'Estimated using Reichstein et al. (2005) nighttime flux partitioning'},
    'RECO_NT': {'long_name': 'Ecosystem respiration (nighttime method)', 'units': 'umol CO2 m-2 s-1', 
                'description': 'Estimated using Reichstein et al. (2005) nighttime flux partitioning'},
    'GPP_DT': {'long_name': 'Gross primary productivity (daytime method)', 'units': 'umol CO2 m-2 s-1', 
               'description': 'Estimated using Lasslop et al. (2010) daytime flux partitioning'},
    'RECO_DT': {'long_name': 'Ecosystem respiration (daytime method)', 'units': 'umol CO2 m-2 s-1', 
                'description': 'Estimated using Lasslop et al. (2010) daytime flux partitioning'},
    'WD': {'long_name': 'Wind direction', 'units': 'Decimal degrees'},
    'WS': {'long_name': 'Wind speed', 'units': 'm s-1'},
    'NEE_F': {'long_name': 'Gap-filled net ecosystem exchange', 'units': 'umol CO2 m-2 s-1', 
              'description': 'Gap-filled using MDS approach (REddyProc)'},
    'H_F': {'long_name': 'Gap-filled sensible heat turbulent flux', 'units': 'W m-2', 
            'description': 'Gap-filled using MDS approach (REddyProc)'},
    'LE_F': {'long_name': 'Gap-filled latent heat turbulent flux', 'units': 'W m-2', 
             'description': 'Gap-filled using MDS approach (REddyProc)'},
    'FCH4_F': {'long_name': 'Gap-filled methane (CH4) turbulent flux', 'units': 'nmol CH4 m-2 s-1', 
               'description': 'Gap-filled using MDS approach (REddyProc)'},
    'SW_IN_F': {'long_name': 'Gap-filled shortwave radiation, incoming', 'units': 'W m-2', 
                'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'SW_OUT_F': {'long_name': 'Gap-filled shortwave radiation, outgoing', 'units': 'W m-2', 
                 'description': 'Gap-filled using MDS approach (REddyProc)'},
    'LW_IN_F': {'long_name': 'Gap-filled longwave radiation, incoming', 'units': 'W m-2', 
                'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'LW_OUT_F': {'long_name': 'Gap-filled longwave radiation, outgoing', 'units': 'W m-2', 
                 'description': 'Gap-filled using MDS approach (REddyProc)'},
    'NETRAD_F': {'long_name': 'Gap-filled net radiation', 'units': 'W m-2', 
                 'description': 'Gap-filled using MDS approach (REddyProc)'},
    'PPFD_IN_F': {'long_name': 'Gap-filled photosynthetic photon flux density, incoming', 
                  'units': 'umol photon m-2 s-1', 'description': 'Gap-filled using MDS approach (REddyProc)'},
    'VPD_F': {'long_name': 'Gap-filled vapor pressure deficit', 'units': 'hPa', 
              'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'PA_F': {'long_name': 'Gap-filled atmospheric pressure', 'units': 'kPa', 
             'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'TA_F': {'long_name': 'Gap-filled air temperature', 'units': 'degC', 
             'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'P_F': {'long_name': 'Gap-filled precipitation', 'units': 'mm', 
            'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'G_F': {'long_name': 'Gap-filled soil heat flux', 'units': 'W m-2', 
            'description': 'Gap-filled using MDS approach (REddyProc)'},
    'WTD_F': {'long_name': 'Gap-filled water table depth', 'units': 'm', 
              'description': 'Gap-filled using MDS approach (REddyProc)'},
    'WS_F': {'long_name': 'Gap-filled wind speed', 'units': 'm s-1', 
             'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'LE_F_ANNOPTLM': {'long_name': 'Gap-filled latent heat turbulent flux (ANNOPTLM)', 'units': 'W m-2', 
                      'description': 'Gap-filled using ANNOPTLM neural network routine (Knox et al., 2016, 2019)'},
    'NEE_F_ANNOPTLM': {'long_name': 'Gap-filled net ecosystem exchange (ANNOPTLM)', 'units': 'umol CO2 m-2 s-1', 
                       'description': 'Gap-filled using ANNOPTLM neural network routine (Knox et al., 2016, 2019)'},
    'FCH4_F_ANNOPTLM': {'long_name': 'Gap-filled methane (CH4) turbulent flux (ANNOPTLM)', 'units': 'nmol CH4 m-2 s-1', 
                        'description': 'Gap-filled using ANNOPTLM neural network routine (Knox et al., 2016, 2019)'},
    'FCH4_F_ANNOPTLM_QC': {'long_name': 'Quality check for gap-filled methane flux (ANNOPTLM)', 'units': 'dimensionless', 
                           'description': 'Quality flag: 1 for gaps < 2 months, 3 for gaps > 2 months'}
}

def process_site(row, dir_path, selected_columns, var_metadata):
    site_id = row['SITE_ID']
    start_year = int(row['YEAR_START'])
    end_year = int(row['YEAR_END'])
    soil_probe_depths = row['SOIL_TEMP_PROBE_DEPTHS']

    # Update TS metadata with soil depths (using local copy)
    for i in range(1, 6):
        ts_key = f'TS_{i}'
        if ts_key in var_metadata:
            var_metadata[ts_key]['long_name'] = f'Soil temperature at probe {i} ({soil_probe_depths})'

    # Construct the folder name
    folder_name = dir_path + f"FLX_{site_id}_FLUXNET-CH4_{start_year}-{end_year}_1-1"
    
    # Check if the folder exists
    if not os.path.exists(folder_name):
        print(f"Folder {folder_name} does not exist. Skipping site {site_id}.")
        return None
    
    # Construct the daily CSV file path
    csv_file = os.path.join(folder_name, f"FLX_{site_id}_FLUXNET-CH4_DD_{start_year}-{end_year}_1-1.csv")
    
    # Check if the CSV file exists
    if not os.path.exists(csv_file):
        print(f"CSV file {csv_file} does not exist. Skipping site {site_id}.")
        return None
    
    # Read the CSV data
    df = pd.read_csv(csv_file)
    
    # Convert TIMESTAMP to datetime
    df['TIMESTAMP'] = pd.to_datetime(df['TIMESTAMP'].astype(str), format='%Y%m%d')
    
    # Select and fill missing columns at once (use float32 for fill_value)
    df = df.reindex(columns=selected_columns, fill_value=np.float32(-9999.0))
    
    # Convert time to numeric double (days since 1900-01-01, float64)
    time_coords = df['TIMESTAMP']
    reference_date = np.datetime64('1900-01-01')
    time_numeric = ((time_coords - reference_date) / np.timedelta64(1, 'D')).astype(np.float64)
    
    x_coords = np.array([1.0], dtype=np.float64)  # double
    y_coords = np.array([1.0], dtype=np.float64)  # double
    lat_value = np.float32(row['LAT'])  # float32 for data_var
    lon_value = np.float32(row['LON'])  # float32 for data_var
    lat_array = np.array([[lat_value]], dtype=np.float32)
    lon_array = np.array([[lon_value]], dtype=np.float32)
    
    # Expand all data to 3D at once (float32)
    data_3d_dict = {var: expand_to_3d(df[var], dtype=np.float32) for var in selected_columns[1:]}  # 排除TIMESTAMP
    
    # Create data_vars dict
    data_vars_dict = {
        var: (['time', 'y', 'x'], data_3d_dict[var], var_metadata[var])
        for var in selected_columns[1:]
    }
    data_vars_dict.update({
        'latitude': (['y', 'x'], lat_array, {
            "standard_name": "latitude", "long_name": "Latitude", "units": "degrees_north"
        }),
        'longitude': (['y', 'x'], lon_array, {
            "standard_name": "longitude", "long_name": "Longitude", "units": "degrees_east"
        })
    })
    
    # Create Dataset (time as numeric double)
    ds = xr.Dataset(
        data_vars=data_vars_dict,
        coords={
            'time': ('time', time_numeric, {'standard_name': 'time', 'long_name': 'time', 'units': 'days since 1900-01-01', 'calendar': 'gregorian'}),
            'x': ('x', x_coords),
            'y': ('y', y_coords),
            'site': [site_id]
        }
    )
    
    # Add site metadata as global attributes
    ds.attrs['site_id'] = site_id
    ds.attrs['site_name'] = row['SITE_NAME']
    ds.attrs['country'] = row['COUNTRY']
    ds.attrs['latitude'] = row['LAT']
    ds.attrs['longitude'] = row['LON']
    ds.attrs['pft'] = row['SITE_CLASSIFICATION']
    ds.attrs['igbp'] = row['IGBP']
    ds.attrs['koppen'] = row['KOPPEN']
    ds.attrs['year_start'] = start_year
    ds.attrs['year_end'] = end_year
    ds.attrs['utc_offset'] = row['UTC_OFFSET']
    ds.attrs['dom_veg'] = row['DOM_VEG']
    ds.attrs['missing_value'] = np.float32(-9999.0)
    
    # Add attributes to dimensions
    ds['site'].attrs['long_name'] = 'Site Identifier'
    
    # Construct the output NetCDF file path
    nc_file = os.path.join(folder_name, f"{site_id}_FLUXNET-CH4_DD.nc")
    
    # Save to NetCDF with encoding for compression and dtype
    encoding = {}
    for var in ds.data_vars:
        enc = {'zlib': True, 'complevel': 1, '_FillValue': np.float32(-9999.0), 'dtype': 'float32'}
        if var in ['latitude', 'longitude']:
            enc['_FillValue'] = None  # 无填充值
        encoding[var] = enc
    # 对于 coords (time, x, y): 指定 dtype='float64'
    encoding['time'] = {'zlib': True, 'complevel': 1, 'dtype': 'float64'}
    encoding['x'] = {'zlib': True, 'complevel': 1, 'dtype': 'float64'}
    encoding['y'] = {'zlib': True, 'complevel': 1, 'dtype': 'float64'}
    ds.to_netcdf(nc_file, encoding=encoding)
    return f"Generated NetCDF file for site {site_id}: {nc_file}"

# 并行执行（n_jobs=-1用所有CPU核，使用deepcopy避免共享状态）
results = Parallel(n_jobs=-1)(delayed(process_site)(row, dir_path, selected_columns, copy.deepcopy(var_metadata)) for index, row in sites.iterrows())
for res in results:
    if res:
        print(res)

start


  arr = np.array(values, dtype=dtype, copy=copy)
  arr = np.array(values, dtype=dtype, copy=copy)
  arr = np.array(values, dtype=dtype, copy=copy)
  arr = np.array(values, dtype=dtype, copy=copy)
  arr = np.array(values, dtype=dtype, copy=copy)


Generated NetCDF file for site AT-Neu: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_AT-Neu_FLUXNET-CH4_2010-2012_1-1/AT-Neu_FLUXNET-CH4_DD.nc
Generated NetCDF file for site BR-Npw: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_BR-Npw_FLUXNET-CH4_2013-2016_1-1/BR-Npw_FLUXNET-CH4_DD.nc
Generated NetCDF file for site BW-Gum: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_BW-Gum_FLUXNET-CH4_2018-2018_1-1/BW-Gum_FLUXNET-CH4_DD.nc
Generated NetCDF file for site BW-Nxr: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_BW-Nxr_FLUXNET-CH4_2018-2018_1-1/BW-Nxr_FLUXNET-CH4_DD.nc
Generated NetCDF file for site CA-SCB: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_CA-SCB_FLUXNET-CH4_2014-2017_1-1/CA-SCB_FLUXNET-CH4_DD.nc
Generated NetCDF file for site CA-SCC: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_CA-SCC_FLUXNET-CH4_2013-2016_1-1/CA-SCC_FLUXNET-CH4_DD.nc
Generated NetCDF file for site CH-Cha: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_CH-Cha_FLUXNET-CH4_2012-2016_1-1/CH-Cha_FLUXNET-CH4_DD.nc
Generated NetCDF file for s

## Make half-hourly data

In [None]:
import pandas as pd
import xarray as xr
import os
import numpy as np
import copy
from joblib import Parallel, delayed  # 假设环境有joblib；如果没有，可替换为multiprocessing

dir_path = '/share/home/dq076/data/ME/FLUXNET-CH4/'

# Path to the metadata CSV file
metadata_file = dir_path + 'FLX_AA-Flx_CH4-META_20201112135337801132.csv'

# Read the metadata
metadata = pd.read_csv(metadata_file)

# Filter sites where FLUXNET-CH4_DATA_POLICY is 'CCBY4.0'
sites = metadata[metadata['FLUXNET-CH4_DATA_POLICY'] == 'CCBY4.0']
print('start')

# Define selected columns
selected_columns = [
    'TIMESTAMP', 'NEE', 'H', 'LE', 'FCH4', 'USTAR', 'SW_IN', 'SW_OUT', 'LW_IN', 'LW_OUT',
    'NETRAD', 'PPFD_IN', 'VPD', 'TA', 'P', 'TS_1', 'TS_2', 'TS_3', 'TS_4', 'TS_5',
    'G', 'WTD', 'GPP_NT', 'RECO_NT', 'GPP_DT', 'RECO_DT', 'WD', 'WS', 'NEE_F',
    'H_F', 'LE_F', 'FCH4_F', 'SW_IN_F', 'SW_OUT_F', 'LW_IN_F', 'LW_OUT_F', 'NETRAD_F',
    'PPFD_IN_F', 'VPD_F', 'PA_F', 'TA_F', 'P_F', 'G_F', 'WTD_F', 'WS_F',
    'LE_F_ANNOPTLM', 'NEE_F_ANNOPTLM', 'FCH4_F_ANNOPTLM', 'FCH4_F_ANNOPTLM_QC'
]

# Function to expand 1D to 3D
def expand_to_3d(data_1d, dtype=np.float32):
    """将一维数据扩展为三维 [time, y, x], 指定dtype"""
    data_1d = np.array(data_1d, dtype=dtype)
    data_2d = data_1d.reshape(-1, 1)  # [time, 1]
    data_3d = data_2d[:, :, np.newaxis]  # [time, 1, 1]
    return data_3d

# Define variable metadata as dict (完整列表，包括TS默认值)
var_metadata = {
    'NEE': {'long_name': 'Net ecosystem exchange', 'units': 'umol CO2 m-2 s-1'},
    'H': {'long_name': 'Sensible heat turbulent flux', 'units': 'W m-2'},
    'LE': {'long_name': 'Latent heat turbulent flux', 'units': 'W m-2'},
    'FCH4': {'long_name': 'Methane (CH4) turbulent flux', 'units': 'nmol CH4 m-2 s-1'},
    'USTAR': {'long_name': 'Friction velocity', 'units': 'm s-1'},
    'SW_IN': {'long_name': 'Shortwave radiation, incoming', 'units': 'W m-2'},
    'SW_OUT': {'long_name': 'Shortwave radiation, outgoing', 'units': 'W m-2'},
    'LW_IN': {'long_name': 'Longwave radiation, incoming', 'units': 'W m-2'},
    'LW_OUT': {'long_name': 'Longwave radiation, outgoing', 'units': 'W m-2'},
    'NETRAD': {'long_name': 'Net radiation', 'units': 'W m-2'},
    'PPFD_IN': {'long_name': 'Photosynthetic photon flux density, incoming', 'units': 'umol photon m-2 s-1'},
    'VPD': {'long_name': 'Vapor pressure deficit', 'units': 'hPa'},
    'TA': {'long_name': 'Air temperature', 'units': 'degC'},
    'P': {'long_name': 'Precipitation', 'units': 'mm'},
    'TS_1': {'long_name': 'Soil temperature at probe 1', 'units': 'degC'},
    'TS_2': {'long_name': 'Soil temperature at probe 2', 'units': 'degC'},
    'TS_3': {'long_name': 'Soil temperature at probe 3', 'units': 'degC'},
    'TS_4': {'long_name': 'Soil temperature at probe 4', 'units': 'degC'},
    'TS_5': {'long_name': 'Soil temperature at probe 5', 'units': 'degC'},
    'G': {'long_name': 'Soil heat flux', 'units': 'W m-2'},
    'WTD': {'long_name': 'Water table depth', 'units': 'm'},
    'GPP_NT': {'long_name': 'Gross primary productivity (nighttime method)', 'units': 'umol CO2 m-2 s-1', 
               'description': 'Estimated using Reichstein et al. (2005) nighttime flux partitioning'},
    'RECO_NT': {'long_name': 'Ecosystem respiration (nighttime method)', 'units': 'umol CO2 m-2 s-1', 
                'description': 'Estimated using Reichstein et al. (2005) nighttime flux partitioning'},
    'GPP_DT': {'long_name': 'Gross primary productivity (daytime method)', 'units': 'umol CO2 m-2 s-1', 
               'description': 'Estimated using Lasslop et al. (2010) daytime flux partitioning'},
    'RECO_DT': {'long_name': 'Ecosystem respiration (daytime method)', 'units': 'umol CO2 m-2 s-1', 
                'description': 'Estimated using Lasslop et al. (2010) daytime flux partitioning'},
    'WD': {'long_name': 'Wind direction', 'units': 'Decimal degrees'},
    'WS': {'long_name': 'Wind speed', 'units': 'm s-1'},
    'NEE_F': {'long_name': 'Gap-filled net ecosystem exchange', 'units': 'umol CO2 m-2 s-1', 
              'description': 'Gap-filled using MDS approach (REddyProc)'},
    'H_F': {'long_name': 'Gap-filled sensible heat turbulent flux', 'units': 'W m-2', 
            'description': 'Gap-filled using MDS approach (REddyProc)'},
    'LE_F': {'long_name': 'Gap-filled latent heat turbulent flux', 'units': 'W m-2', 
             'description': 'Gap-filled using MDS approach (REddyProc)'},
    'FCH4_F': {'long_name': 'Gap-filled methane (CH4) turbulent flux', 'units': 'nmol CH4 m-2 s-1', 
               'description': 'Gap-filled using MDS approach (REddyProc)'},
    'SW_IN_F': {'long_name': 'Gap-filled shortwave radiation, incoming', 'units': 'W m-2', 
                'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'SW_OUT_F': {'long_name': 'Gap-filled shortwave radiation, outgoing', 'units': 'W m-2', 
                 'description': 'Gap-filled using MDS approach (REddyProc)'},
    'LW_IN_F': {'long_name': 'Gap-filled longwave radiation, incoming', 'units': 'W m-2', 
                'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'LW_OUT_F': {'long_name': 'Gap-filled longwave radiation, outgoing', 'units': 'W m-2', 
                 'description': 'Gap-filled using MDS approach (REddyProc)'},
    'NETRAD_F': {'long_name': 'Gap-filled net radiation', 'units': 'W m-2', 
                 'description': 'Gap-filled using MDS approach (REddyProc)'},
    'PPFD_IN_F': {'long_name': 'Gap-filled photosynthetic photon flux density, incoming', 
                  'units': 'umol photon m-2 s-1', 'description': 'Gap-filled using MDS approach (REddyProc)'},
    'VPD_F': {'long_name': 'Gap-filled vapor pressure deficit', 'units': 'hPa', 
              'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'PA_F': {'long_name': 'Gap-filled atmospheric pressure', 'units': 'kPa', 
             'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'TA_F': {'long_name': 'Gap-filled air temperature', 'units': 'degC', 
             'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'P_F': {'long_name': 'Gap-filled precipitation', 'units': 'mm', 
            'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'G_F': {'long_name': 'Gap-filled soil heat flux', 'units': 'W m-2', 
            'description': 'Gap-filled using MDS approach (REddyProc)'},
    'WTD_F': {'long_name': 'Gap-filled water table depth', 'units': 'm', 
              'description': 'Gap-filled using MDS approach (REddyProc)'},
    'WS_F': {'long_name': 'Gap-filled wind speed', 'units': 'm s-1', 
             'description': 'Gap-filled using ERA-Interim reanalysis data'},
    'LE_F_ANNOPTLM': {'long_name': 'Gap-filled latent heat turbulent flux (ANNOPTLM)', 'units': 'W m-2', 
                      'description': 'Gap-filled using ANNOPTLM neural network routine (Knox et al., 2016, 2019)'},
    'NEE_F_ANNOPTLM': {'long_name': 'Gap-filled net ecosystem exchange (ANNOPTLM)', 'units': 'umol CO2 m-2 s-1', 
                       'description': 'Gap-filled using ANNOPTLM neural network routine (Knox et al., 2016, 2019)'},
    'FCH4_F_ANNOPTLM': {'long_name': 'Gap-filled methane (CH4) turbulent flux (ANNOPTLM)', 'units': 'nmol CH4 m-2 s-1', 
                        'description': 'Gap-filled using ANNOPTLM neural network routine (Knox et al., 2016, 2019)'},
    'FCH4_F_ANNOPTLM_QC': {'long_name': 'Quality check for gap-filled methane flux (ANNOPTLM)', 'units': 'dimensionless', 
                           'description': 'Quality flag: 1 for gaps < 2 months, 3 for gaps > 2 months'}
}

def process_site(row, dir_path, selected_columns, var_metadata):
    site_id = row['SITE_ID']
    start_year = int(row['YEAR_START'])
    end_year = int(row['YEAR_END'])
    soil_probe_depths = row['SOIL_TEMP_PROBE_DEPTHS']

    # Update TS metadata with soil depths (using local copy)
    for i in range(1, 6):
        ts_key = f'TS_{i}'
        if ts_key in var_metadata:
            var_metadata[ts_key]['long_name'] = f'Soil temperature at probe {i} ({soil_probe_depths})'

    # Construct the folder name
    folder_name = dir_path + f"FLX_{site_id}_FLUXNET-CH4_{start_year}-{end_year}_1-1"
    
    # Check if the folder exists
    if not os.path.exists(folder_name):
        print(f"Folder {folder_name} does not exist. Skipping site {site_id}.")
        return None
    
    # Construct the daily CSV file path
    csv_file = os.path.join(folder_name, f"FLX_{site_id}_FLUXNET-CH4_HH_{start_year}-{end_year}_1-1.csv")
    
    # Check if the CSV file exists
    if not os.path.exists(csv_file):
        print(f"CSV file {csv_file} does not exist. Skipping site {site_id}.")
        return None
    
    # Read the CSV data
    df = pd.read_csv(csv_file)
    
    # Convert TIMESTAMP to datetime
    if df['TIMESTAMP_START'].iloc[0].astype(str)[:4] == str(start_year):
        df['TIMESTAMP'] = pd.to_datetime(df['TIMESTAMP_START'].astype(str), format='%Y%m%d%H%M%S')
    else:
        df['TIMESTAMP'] = pd.to_datetime(df['TIMESTAMP_END'].astype(str), format='%Y%m%d%H%M%S')

    
    # Select and fill missing columns at once (use float32 for fill_value)
    df = df.reindex(columns=selected_columns, fill_value=np.float32(-9999.0))
    
    # Convert time to numeric double (days since 1900-01-01, float64)
    # time_coords = df['TIMESTAMP']
    # reference_date = np.datetime64('1900-01-01')
    # time_numeric = ((time_coords - reference_date) / np.timedelta64(1, 'D')).astype(np.float64)
    # Convert time to numeric double (seconds since start_year-01-01 00:00:00, float64, step 1800s)
    time_coords = df['TIMESTAMP']
    start_time = pd.Timestamp(f"{start_year}-01-01 00:00:00")
    time_numeric = ((time_coords - start_time).dt.total_seconds()).astype(np.float64)

    x_coords = np.array([1.0], dtype=np.float64)  # double
    y_coords = np.array([1.0], dtype=np.float64)  # double
    lat_value = np.float32(row['LAT'])  # float32 for data_var
    lon_value = np.float32(row['LON'])  # float32 for data_var
    lat_array = np.array([[lat_value]], dtype=np.float32)
    lon_array = np.array([[lon_value]], dtype=np.float32)
    
    # Expand all data to 3D at once (float32)
    data_3d_dict = {var: expand_to_3d(df[var], dtype=np.float32) for var in selected_columns[1:]}  # 排除TIMESTAMP
    
    # Create data_vars dict
    data_vars_dict = {
        var: (['time', 'y', 'x'], data_3d_dict[var], var_metadata[var])
        for var in selected_columns[1:]
    }

    es = 6.1078*np.exp(17.27*df['TA_F']/(df['TA_F']+237.3))
    ea = es - df['VPD_F']
    p = df['P_F']*10
    Qair = 0.622*ea/(p-0.378*ea)
    data_vars_dict.update({
        'latitude': (['y', 'x'], lat_array, {
            "standard_name": "latitude", "long_name": "Latitude", "units": "degrees_north"
        }),
        'longitude': (['y', 'x'], lon_array, {
            "standard_name": "longitude", "long_name": "Longitude", "units": "degrees_east"
        })
    })
    
    # Create Dataset (time as numeric double)
    ds = xr.Dataset(
        data_vars=data_vars_dict,
        coords={
            'time': ('time', time_numeric, {'standard_name': 'time', 'long_name': 'time', 'units': f'seconds since {start_year}-01-01 00:00:00', 'calendar': 'standard'}),
            'x': ('x', x_coords),
            'y': ('y', y_coords),
            'site': [site_id]
        }
    )
    
    # Add site metadata as global attributes
    ds.attrs['site_id'] = site_id
    ds.attrs['site_name'] = row['SITE_NAME']
    ds.attrs['country'] = row['COUNTRY']
    ds.attrs['latitude'] = row['LAT']
    ds.attrs['longitude'] = row['LON']
    ds.attrs['pft'] = row['SITE_CLASSIFICATION']
    ds.attrs['igbp'] = row['IGBP']
    ds.attrs['koppen'] = row['KOPPEN']
    ds.attrs['year_start'] = start_year
    ds.attrs['year_end'] = end_year
    ds.attrs['utc_offset'] = row['UTC_OFFSET']
    ds.attrs['dom_veg'] = row['DOM_VEG']
    ds.attrs['missing_value'] = np.float32(-9999.0)
    
    # Add attributes to dimensions
    ds['site'].attrs['long_name'] = 'Site Identifier'
    
    # Construct the output NetCDF file path
    nc_file = os.path.join(folder_name, f"{site_id}_FLUXNET-CH4_HH.nc")
    
    # Save to NetCDF with encoding for compression and dtype
    encoding = {}
    for var in ds.data_vars:
        enc = {'zlib': True, 'complevel': 1, '_FillValue': np.float32(-9999.0), 'dtype': 'float32'}
        if var in ['latitude', 'longitude']:
            enc['_FillValue'] = None  # 无填充值
        encoding[var] = enc
    # 对于 coords (time, x, y): 指定 dtype='float64'
    encoding['time'] = {'zlib': True, 'complevel': 1, 'dtype': 'float64'}
    encoding['x'] = {'zlib': True, 'complevel': 1, 'dtype': 'float64'}
    encoding['y'] = {'zlib': True, 'complevel': 1, 'dtype': 'float64'}
    ds.to_netcdf(nc_file, encoding=encoding)
    return f"Generated NetCDF file for site {site_id}: {nc_file}"

# 并行执行（n_jobs=-1用所有CPU核，使用deepcopy避免共享状态）
results = Parallel(n_jobs=-1)(delayed(process_site)(row, dir_path, selected_columns, copy.deepcopy(var_metadata)) for index, row in sites.iterrows())
for res in results:
    if res:
        print(res)

start


  arr = np.array(values, dtype=dtype, copy=copy)
  arr = np.array(values, dtype=dtype, copy=copy)
  arr = np.array(values, dtype=dtype, copy=copy)
  arr = np.array(values, dtype=dtype, copy=copy)
  arr = np.array(values, dtype=dtype, copy=copy)
  arr = np.array(values, dtype=dtype, copy=copy)


Generated NetCDF file for site AT-Neu: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_AT-Neu_FLUXNET-CH4_2010-2012_1-1/AT-Neu_FLUXNET-CH4_HH.nc
Generated NetCDF file for site BR-Npw: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_BR-Npw_FLUXNET-CH4_2013-2016_1-1/BR-Npw_FLUXNET-CH4_HH.nc
Generated NetCDF file for site BW-Gum: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_BW-Gum_FLUXNET-CH4_2018-2018_1-1/BW-Gum_FLUXNET-CH4_HH.nc
Generated NetCDF file for site BW-Nxr: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_BW-Nxr_FLUXNET-CH4_2018-2018_1-1/BW-Nxr_FLUXNET-CH4_HH.nc
Generated NetCDF file for site CA-SCB: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_CA-SCB_FLUXNET-CH4_2014-2017_1-1/CA-SCB_FLUXNET-CH4_HH.nc
Generated NetCDF file for site CA-SCC: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_CA-SCC_FLUXNET-CH4_2013-2016_1-1/CA-SCC_FLUXNET-CH4_HH.nc
Generated NetCDF file for site CH-Cha: /share/home/dq076/data/ME/FLUXNET-CH4/FLX_CH-Cha_FLUXNET-CH4_2012-2016_1-1/CH-Cha_FLUXNET-CH4_HH.nc
Generated NetCDF file for s