In [None]:
import os
import h5py
import pandas as pd
from tqdm import tqdm, trange
import time
import geopandas as gpd
from shapely.strtree import STRtree
from shapely.geometry import Point
from PackageDeepLearn.utils import file_search_wash as fsw

# 所有条带
sub_file_list = ['gt1l/', 'gt1r/', 'gt2l/', 'gt2r/', 'gt3l/', 'gt3r/']
start_time = time.time()
addnum = 0
file_list = fsw.search_files(r'E:\SETP_ICESat-2数据\ATL_03', '.h5')[0:65]

shapefile_path = r"D:\BaiduSyncdisk\02_论文相关\在写\SAM冰湖\数据\2023_05_31_to_2023_09_15_样本修正.shp"
gdf_polygons = gpd.read_file(shapefile_path)

# 假设原始的 CRS 是 EPSG:4326
original_crs = gdf_polygons.crs

# 转换为适当的投影坐标系，例如 EPSG:32633
projected_gdf = gdf_polygons.to_crs(epsg=32633)

# 应用100米的缓冲区
projected_gdf['geometry'] = projected_gdf.geometry.buffer(50)

# 将结果转换回原始的地理坐标系
gdf_polygons_buffered = projected_gdf.to_crs(original_crs)

def process_spatial_join(gdf_batch, gdf_polygons_buffered):
    # 执行空间连接
    joined_df = gpd.sjoin(gdf_batch, gdf_polygons_buffered, how='left', predicate='intersects')

    # 确保只有那些与目标几何体相交的条目被保留
    if 'index_right' in joined_df.columns:
        joined_df = joined_df.dropna(subset=['index_right'])

    return joined_df


# ATL_03 Old
节省内存，但是运算效率低

In [None]:
# 构建空间索引
polygon_tree = STRtree(gdf_polygons_buffered.geometry)

def filter_data_by_polygon(df, polygon_tree, batch_size=10000):
    '''求取SETP包含的激光点'''
    filtered_data_list = []
    for start in trange(0, len(df), batch_size):
        end = min(start + batch_size, len(df))
        batch_df = df.iloc[start:end].copy()
        batch_df['geometry'] = gpd.points_from_xy(batch_df['lon'], batch_df['lat'])
        gdf_points = gpd.GeoDataFrame(batch_df, geometry='geometry')
        
        # 利用空间索引进行过滤
        possible_matches_index = polygon_tree.query(gdf_points.geometry, predicate='intersects')
        possible_matches_index = [item for sublist in possible_matches_index for item in sublist]  # 展平数组
        if possible_matches_index:
            possible_matches = gdf_polygons_buffered.iloc[possible_matches_index]
            print('length of possible_matches = {}'.format(len(possible_matches)))
            precise_matches = gdf_points[gdf_points.geometry.apply(lambda x: possible_matches.contains(x).any())]

            if not precise_matches.empty:
                precise_matches = precise_matches.drop(columns='geometry')
                filtered_data_list.append(precise_matches)
    
    if filtered_data_list:
        filtered_data = pd.concat(filtered_data_list, ignore_index=True)
        return filtered_data
    else:
        return pd.DataFrame()

# 提取所需信息，形成数据文件
for idx, file_path in enumerate(tqdm(file_list, desc="Processing Files")):
    combined_data = pd.DataFrame()
    data = h5py.File(file_path, 'r')
    output_file_path = r'E:\SETP_ICESat-2数据\ATL_03\ATL03_SETPGL_ALL_{}.h5'.format(idx + addnum)
    if os.path.exists(output_file_path):
        print('{} 存在，跳过'.format(output_file_path))
        continue
    for subgroup in tqdm(sub_file_list, desc="Processing Subgroups", leave=False):
        if subgroup in data:
            time_data = data.get(os.path.join(subgroup, 'heights/delta_time'))
            lat = data.get(os.path.join(subgroup, 'heights/lat_ph'))
            lon = data.get(os.path.join(subgroup, 'heights/lon_ph'))
            dist_ph_along = data.get(os.path.join(subgroup, 'heights/dist_ph_along'))
            height = data.get(os.path.join(subgroup, 'heights/h_ph'))
            signal_conf_ph = data.get(os.path.join(subgroup, 'heights/signal_conf_ph'))
            quality_ph = data.get(os.path.join(subgroup, 'heights/quality_ph'))

            if all(x is not None for x in [lat, lon, height, time_data, dist_ph_along, quality_ph, signal_conf_ph]):
                df = pd.DataFrame(data={
                    'time': time_data[:],
                    'lat': lat[:],
                    'lon': lon[:],
                    'dist_ph_along': dist_ph_along[:],
                    'height': height[:],
                    'quality_ph': quality_ph[:],
                    'signal_conf_ph_1': signal_conf_ph[:, 0],
                    'signal_conf_ph_2': signal_conf_ph[:, 1],
                    'signal_conf_ph_3': signal_conf_ph[:, 2],
                    'signal_conf_ph_4': signal_conf_ph[:, 3],
                    'signal_conf_ph_5': signal_conf_ph[:, 4]
                })
                df['subgroup'] = subgroup
                
                # 过滤数据，删除 signal_conf_ph_1、signal_conf_ph_2、signal_conf_ph_3、signal_conf_ph_4、signal_conf_ph_5 小于 0 的数据
                df = df[(df['signal_conf_ph_1'] >= 0) |
                        (df['signal_conf_ph_2'] >= 0) |
                        (df['signal_conf_ph_3'] >= 0) |
                        (df['signal_conf_ph_4'] >= 0) |
                        (df['signal_conf_ph_5'] >= 0)]
                
                # 过滤数据，根据多边形的包含关系
                filtered_df = filter_data_by_polygon(df, polygon_tree)
                
                if not filtered_df.empty:
                    # 连接属性表信息
                    gdf_filtered = gpd.GeoDataFrame(filtered_df, geometry=gpd.points_from_xy(filtered_df['lon'], filtered_df['lat']), crs=original_crs)
                    # 将多边形的属性（除去 'geometry' 列）连接到点数据中
                    joined_df = gpd.sjoin(gdf_filtered, gdf_polygons_buffered[['Sort', '关系', 'Area_pre', 'geometry']], how='left', predicate='intersects')
                    
                    if not joined_df.empty:
                        # 移除 'geometry' 列，以便合并到 combined_data 中
                        joined_df = joined_df.drop(columns='geometry')
                        combined_data = pd.concat([combined_data, joined_df], ignore_index=True)
    data.close()

    if not combined_data.empty:
        combined_data.to_hdf(output_file_path, key='df', mode='w')

end_time = time.time()
print(f"Processing completed in {end_time - start_time:.2f} seconds")


# ATL_03 
属性表直接连接筛选，快速

In [None]:
# 提取所需信息，形成数据文件
for idx, file_path in enumerate(tqdm(file_list, desc="Processing Files")):
    combined_data = pd.DataFrame()
    data = h5py.File(file_path, 'r')
    output_file_path = r'E:\SETP_ICESat-2数据\ATL_03\ATL03_SETPGL_ALL_{}.h5'.format(idx + addnum)
    if os.path.exists(output_file_path):
        print('{} 存在，跳过'.format(output_file_path))
        continue
    for subgroup in tqdm(sub_file_list, desc="Processing Subgroups", leave=False):
        if subgroup in data:
            time_data = data.get(os.path.join(subgroup, 'heights/delta_time'))
            lat = data.get(os.path.join(subgroup, 'heights/lat_ph'))
            lon = data.get(os.path.join(subgroup, 'heights/lon_ph'))
            dist_ph_along = data.get(os.path.join(subgroup, 'heights/dist_ph_along'))
            height = data.get(os.path.join(subgroup, 'heights/h_ph'))
            signal_conf_ph = data.get(os.path.join(subgroup, 'heights/signal_conf_ph'))
            quality_ph = data.get(os.path.join(subgroup, 'heights/quality_ph'))

            if all(x is not None for x in [lat, lon, height, time_data, dist_ph_along, quality_ph, signal_conf_ph]):
                df = pd.DataFrame(data={
                    'time': time_data[:],
                    'lat': lat[:],
                    'lon': lon[:],
                    'dist_ph_along': dist_ph_along[:],
                    'height': height[:],
                    'quality_ph': quality_ph[:],
                    'signal_conf_ph_1': signal_conf_ph[:, 0],
                    'signal_conf_ph_2': signal_conf_ph[:, 1],
                    'signal_conf_ph_3': signal_conf_ph[:, 2],
                    'signal_conf_ph_4': signal_conf_ph[:, 3],
                    'signal_conf_ph_5': signal_conf_ph[:, 4]
                })
                df['subgroup'] = subgroup
                
                # 过滤数据，删除 signal_conf_ph_1、signal_conf_ph_2、signal_conf_ph_3、signal_conf_ph_4、signal_conf_ph_5 小于 0 的数据
                df = df[(df['signal_conf_ph_1'] >= 0) |
                        (df['signal_conf_ph_2'] >= 0) |
                        (df['signal_conf_ph_3'] >= 0) |
                        (df['signal_conf_ph_4'] >= 0) |
                        (df['signal_conf_ph_5'] >= 0)]
                

                if not df.empty:
                    # 连接属性表信息
                    gdf_filtered = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs=original_crs)
                    
                    batch_size = 10000
                    n_batches = (len(gdf_filtered) + batch_size - 1) // batch_size  # 计算总批次数
                    result = []

                    for n in tqdm(range(n_batches), desc="Processing batches"):
                        gdf_batch = gdf_filtered[n * batch_size:(n + 1) * batch_size]
                        joined_df = process_spatial_join(gdf_batch, gdf_polygons_buffered)
                        if len(joined_df)>0:
                            pass

                        if not joined_df.empty:
                            # 移除 'geometry' 列，以便合并到 combined_data 中
                            joined_df = joined_df.drop(columns='geometry')
                            combined_data = pd.concat([combined_data, joined_df], ignore_index=True)
    data.close()

    if not combined_data.empty:
        combined_data.to_hdf(output_file_path, key='df', mode='w')

end_time = time.time()
print(f"Processing completed in {end_time - start_time:.2f} seconds")


# ATL_06

In [None]:
file_list = fsw.search_files(r'E:\SETP_ICESat-2数据\ATL_06_Landice', '.h5')
ATL06_NoData = []
# 提取所需信息，形成数据文件
try:
    for idx, file_path in enumerate(tqdm(file_list, desc="Processing Files")):
        combined_data = pd.DataFrame()
        data = h5py.File(file_path, 'r')
        output_file_path = r'E:\SETP_ICESat-2数据\ATL_06_Landice\ATL06_ALL\{}_{}.h5'.format(os.path.basename(file_path).split('.')[0],idx + addnum)
        if os.path.exists(output_file_path):
            print('{} 存在，跳过'.format(output_file_path))
            continue
        for subgroup in tqdm(sub_file_list, desc="Processing Subgroups", leave=False):
            if subgroup in data:
                time_data = data.get(os.path.join(subgroup, 'land_ice_segments/delta_time'))
                lat = data.get(os.path.join(subgroup, 'land_ice_segments/latitude'))
                lon = data.get(os.path.join(subgroup, 'land_ice_segments/longitude'))
                height = data.get(os.path.join(subgroup, 'land_ice_segments/h_li'))  
                dem = data.get(os.path.join(subgroup, 'land_ice_segments/dem/dem_h'))
                signal_conf_ph = data.get(os.path.join(subgroup, 'land_ice_segments/atl06_quality_summary')) #(Meanings: [0 1]) (Values: ['best_quality', 'potential_problem'])
                id = data.get(os.path.join(subgroup, 'land_ice_segments/segment_id'))      

                if all(x is not None for x in [time_data, lat, lon, height,dem, signal_conf_ph ,id]):
                    df = pd.DataFrame(data={
                        'time': time_data[:],
                        'lat': lat[:],
                        'lon': lon[:],
                        'height': height[:],
                        'dem':dem,
                        'signal_conf_ph':signal_conf_ph[:],
                        'id':id[:],
                    })
                    df['subgroup'] = subgroup

                    if not df.empty:
                        # 连接属性表信息
                        gdf_filtered = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs=original_crs)
                        
                        batch_size = 10000
                        n_batches = (len(gdf_filtered) + batch_size - 1) // batch_size  # 计算总批次数
                        result = []

                        for n in tqdm(range(n_batches), desc="Processing batches"):
                            gdf_batch = gdf_filtered[n * batch_size:(n + 1) * batch_size]
                            joined_df = process_spatial_join(gdf_batch, gdf_polygons_buffered)
                            if len(joined_df)>0:
                                pass

                            if not joined_df.empty:
                                # 移除 'geometry' 列，以便合并到 combined_data 中
                                joined_df = joined_df.drop(columns='geometry')
                                combined_data = pd.concat([combined_data, joined_df], ignore_index=True)
        data.close()

        if not combined_data.empty:
            combined_data.to_hdf(output_file_path, key='df', mode='w')
        else:
            print(f"No data found in {file_path}")
            ATL06_NoData.append(file_path)

except:
    print(file_path)

# ATL_07

In [None]:
file_list = fsw.search_files(r'E:\SETP_ICESat-2数据\ATL_07_Seaice', '.h5')
ATL07_NoData = []
# 提取所需信息，形成数据文件
for idx, file_path in enumerate(tqdm(file_list, desc="Processing Files")):
    combined_data = pd.DataFrame()
    data = h5py.File(file_path, 'r')
    output_file_path = r'E:\SETP_ICESat-2数据\ATL_07_Seaice\ATL07_ALL\{}_{}.h5'.format(os.path.basename(file_path).split('.')[0],idx + addnum)
    if os.path.exists(output_file_path):
        print('{} 存在，跳过'.format(output_file_path))
        continue
    for subgroup in sub_file_list:
        if subgroup in data:
            time_data = data.get(os.path.join(subgroup, 'sea_ice_segments/delta_time'))
            lat = data.get(os.path.join(subgroup, 'sea_ice_segments/latitude'))
            lon = data.get(os.path.join(subgroup, 'sea_ice_segments/longitude'))
            height = data.get(os.path.join(subgroup, 'sea_ice_segments/heights/height_segment_height'))
            dem = data.get(os.path.join(subgroup, 'dem/dem_h'))
            id = data.get(os.path.join(subgroup, 'sea_ice_segments/height_segment_id'))
            signal_conf_ph = data.get(os.path.join(subgroup, 'sea_ice_segments/heights/height_segment_quality'))# (Meanings: [0 1]) (Values: ['bad_quality', 'good_quality'])

            if all(x is not None for x in [time_data, lat, lon, height, dem, signal_conf_ph ,id]):
                print([len(i) for i in [time_data, lat, lon, height, dem, signal_conf_ph ,id]])
                df = pd.DataFrame(data={
                    'time': time_data[:],
                    'lat': lat[:],
                    'lon': lon[:],
                    'height': height[:],
                    'dem': dem[:],
                    'signal_conf_ph':signal_conf_ph[:],
                    'id':id[:],
                })
                df['subgroup'] = subgroup

                if not df.empty:
                    # 连接属性表信息
                    gdf_filtered = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs=original_crs)
                    
                    batch_size = 10000
                    n_batches = (len(gdf_filtered) + batch_size - 1) // batch_size  # 计算总批次数
                    result = []

                    for n in range(n_batches):
                        gdf_batch = gdf_filtered[n * batch_size:(n + 1) * batch_size]
                        joined_df = process_spatial_join(gdf_batch, gdf_polygons_buffered)
                        if len(joined_df)>0:
                            pass

                        if not joined_df.empty:
                            # 移除 'geometry' 列，以便合并到 combined_data 中
                            joined_df = joined_df.drop(columns='geometry')
                            combined_data = pd.concat([combined_data, joined_df], ignore_index=True)
    data.close()

    if not combined_data.empty:
        combined_data.to_hdf(output_file_path, key='df', mode='w')
    else:
        print(f"No data found in {file_path}")
        ATL07_NoData.append(file_path)


# ATL_08

In [None]:
file_list = fsw.search_files(r'E:\SETP_ICESat-2数据\ATL_08_LandVegetation', '.h5')
ATL08_NoData = []
# 提取所需信息，形成数据文件
for idx, file_path in enumerate(tqdm(file_list, desc="Processing Files")):
    combined_data = pd.DataFrame()
    data = h5py.File(file_path, 'r')
    output_file_path = r'E:\SETP_ICESat-2数据\ATL_08_LandVegetation\ATL08_ALL\{}_{}.h5'.format(os.path.basename(file_path).split('.')[0],idx + addnum)
    if os.path.exists(output_file_path):
        print('{} 存在，跳过'.format(output_file_path))
        continue
    for subgroup in tqdm(sub_file_list, desc="Processing Subgroups", leave=False):
        if subgroup in data:
            time_data = data.get(os.path.join(subgroup, 'land_segments/delta_time'))
            lat = data.get(os.path.join(subgroup, 'land_segments/latitude'))
            lon = data.get(os.path.join(subgroup, 'land_segments/longitude'))
            height_centroid = data.get(os.path.join(subgroup, 'land_segments/canopy/centroid_height'))
            height_canopy = data.get(os.path.join(subgroup, 'land_segments/canopy/h_canopy'))
            dem = data.get(os.path.join(subgroup, 'land_segments/dem_h'))
            id = data.get(os.path.join(subgroup, 'land_segments/segment_id_beg'))
            # height = data.get(os.path.join(subgroup, 'signal_photons/ph_h'))
            # signal_conf_ph = data.get(os.path.join(subgroup, 'signal_photons/d_flag'))# dragann flag (Meanings: [0 1]) (Values: ['noise', 'signal'])
            cloud = data.get(os.path.join(subgroup, 'land_segments/cloud_flag_atm')) # 如果标志大于0，则可能存在气溶胶或云。有效范围为0-10

            if all(x is not None for x in [time_data, lat, lon,height_centroid,height_canopy, dem ,id,cloud]):
                # print([len(i) for i in [time_data, lat, lon,height_centroid,height_canopy, dem ,id,cloud]])
                df = pd.DataFrame(data={
                    'time': time_data[:],
                    'lat': lat[:],
                    'lon': lon[:],
                    'height_centroid': height_centroid[:],
                    'height_canopy': height_canopy[:],
                    'dem': dem[:],
                    'id':id[:],
                    'cloud':cloud[:],
                })
                df['subgroup'] = subgroup

                if not df.empty:
                    # 连接属性表信息
                    gdf_filtered = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs=original_crs)
                    
                    batch_size = 10000
                    n_batches = (len(gdf_filtered) + batch_size - 1) // batch_size  # 计算总批次数
                    result = []

                    for n in tqdm(range(n_batches), desc="Processing batches"):
                        gdf_batch = gdf_filtered[n * batch_size:(n + 1) * batch_size]
                        joined_df = process_spatial_join(gdf_batch, gdf_polygons_buffered)
                        if len(joined_df)>0:
                            pass

                        if not joined_df.empty:
                            # 移除 'geometry' 列，以便合并到 combined_data 中
                            joined_df = joined_df.drop(columns='geometry')
                            combined_data = pd.concat([combined_data, joined_df], ignore_index=True)
    data.close()

    if not combined_data.empty:
        combined_data.to_hdf(output_file_path, key='df', mode='w')
    else:
        print(f"No data found in {file_path}")
        ATL08_NoData.append(file_path)


# ATL_13

In [42]:
file_list = fsw.search_files(r'E:\SETP_ICESat-2数据\ATL_13_InlandSurfaceWaterData', '.h5')
file_path = file_list[0]
data = h5py.File(file_path, 'r')

In [46]:
data['gt1l'].keys()

<KeysViewHDF5 ['atl13refid', 'bottom_lat', 'bottom_lon', 'cloud_flag_asr_atl09', 'cloud_flag_atm_atl09', 'cycle_number', 'delta_time', 'err_ht_water_surf', 'err_slope_trk', 'ht_ortho', 'ht_water_surf', 'ice_flag', 'inland_water_body_id', 'inland_water_body_region', 'inland_water_body_size', 'inland_water_body_source', 'inland_water_body_type', 'layer_flag_atl09', 'met_ts_atl09', 'met_wind10_atl09', 'met_wind10_atl13', 'qf_bckgrd', 'qf_bias_em', 'qf_bias_fit', 'qf_cloud', 'qf_ht_adj', 'qf_ice', 'qf_iwp', 'qf_lseg_length', 'qf_spec_width', 'qf_sseg_length', 'qf_stdev_lseg', 'qf_stdev_vlseg', 'qf_subsurf_anomaly', 'qf_subsurface_attenuation', 'qf_subsurface_backscat_ampltd', 'rgt', 'segment_apparent_ht', 'segment_azimuth', 'segment_bias_em', 'segment_bias_fit', 'segment_dac', 'segment_dem_ht', 'segment_dem_source', 'segment_fpb_correction', 'segment_full_sat_fract', 'segment_geoid', 'segment_geoid_free2mean', 'segment_id_beg', 'segment_id_end', 'segment_lat', 'segment_lon', 'segment_near_

In [None]:
file_list = fsw.search_files(r'E:\SETP_ICESat-2数据\ATL_13_InlandSurfaceWaterData', '.h5')
ATL08_NoData = []
# 提取所需信息，形成数据文件
for idx, file_path in enumerate(tqdm(file_list, desc="Processing Files")):
    combined_data = pd.DataFrame()
    data = h5py.File(file_path, 'r')
    output_file_path = r'E:\SETP_ICESat-2数据\ATL_13_InlandSurfaceWaterData\ATL13_ALL\{}_{}.h5'.format(os.path.basename(file_path).split('.')[0],idx + addnum)
    if os.path.exists(output_file_path):
        print('{} 存在，跳过'.format(output_file_path))
        continue
    for subgroup in tqdm(sub_file_list, desc="Processing Subgroups", leave=False):
        if subgroup in data:
            time_data = data.get(os.path.join(subgroup, 'delta_time'))
            lat = data.get(os.path.join(subgroup, 'bottom_lat'))
            segment_lat = data.get(os.path.join(subgroup, 'segment_lat'))
            lon = data.get(os.path.join(subgroup, 'bottom_lon'))
            segment_lon = data.get(os.path.join(subgroup, 'segment_lon'))
            height_surface = data.get(os.path.join(subgroup, 'ht_water_surf'))
            water_depth = data.get(os.path.join(subgroup, 'water_depth'))
            dem = data.get(os.path.join(subgroup, 'segment_dem_ht'))
            id = data.get(os.path.join(subgroup, 'segment_id_beg'))
            # Cloud probability from ASR.; (Meanings: [0 1 2 3 4 5]) 
            # (Values: ['clear_with_high_confidence', 'clear_with_medium_confidence', 'clear_with_low_confidence', 
            # 'cloudy_with_low_confidence', 'cloudy_with_medium_confidence', 'cloudy_with_high_confidence'])
            cloud = data.get(os.path.join(subgroup, 'cloud_flag_asr_atl09')) 
            ice_flag = data.get(os.path.join(subgroup, 'ice_flag'))
            inland_water_body_type = data.get(os.path.join(subgroup, 'inland_water_body_type'))

            if all(x is not None for x in [time_data, lat,segment_lat, lon,segment_lon,height_surface,water_depth, dem ,id,cloud,ice_flag,inland_water_body_type]):
                # print([len(i) for i in [time_data, lat, lon,height_centroid,height_canopy, dem ,id,cloud]])
                df = pd.DataFrame(data={
                    'time': time_data[:],
                    'lat': lat[:],
                    'segment_lat': segment_lat[:],
                    'lon': lon[:],
                    'segment_lon': segment_lon[:],
                    'height_surface': height_surface[:],
                    'water_depth': water_depth[:],
                    'dem': dem[:],
                    'id':id[:],
                    'cloud':cloud[:],
                    'ice_flag':ice_flag[:],
                    'inland_water_body_type':inland_water_body_type[:]
                })
                df['subgroup'] = subgroup

                if not df.empty:
                    # 连接属性表信息
                    gdf_filtered = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs=original_crs)
                    
                    batch_size = 10000
                    n_batches = (len(gdf_filtered) + batch_size - 1) // batch_size  # 计算总批次数
                    result = []

                    for n in tqdm(range(n_batches), desc="Processing batches"):
                        gdf_batch = gdf_filtered[n * batch_size:(n + 1) * batch_size]
                        joined_df = process_spatial_join(gdf_batch, gdf_polygons_buffered)
                        if len(joined_df)>0:
                            pass

                        if not joined_df.empty:
                            # 移除 'geometry' 列，以便合并到 combined_data 中
                            joined_df = joined_df.drop(columns='geometry')
                            combined_data = pd.concat([combined_data, joined_df], ignore_index=True)
    data.close()

    if not combined_data.empty:
        combined_data.to_hdf(output_file_path, key='df', mode='w')
    else:
        print(f"No data found in {file_path}")
        ATL08_NoData.append(file_path)
