# 导入包

In [1]:
import os
import h5py
import pandas as pd
from tqdm.notebook import tqdm
import time
import geopandas as gpd
from shapely.strtree import STRtree
from shapely.geometry import Point
from PackageDeepLearn.utils import file_search_wash as fsw

# 所有条带
sub_file_list = ['gt1l/', 'gt1r/', 'gt2l/', 'gt2r/', 'gt3l/', 'gt3r/']
start_time = time.time()

shapefile_path = r"D:\BaiduSyncdisk\02_论文相关\在写\SAM冰湖\数据\2023_05_31_to_2023_09_15_样本修正.shp"
gdf_polygons = gpd.read_file(shapefile_path)

# 假设原始的 CRS 是 EPSG:4326
original_crs = gdf_polygons.crs

# 转换为适当的投影坐标系，例如 EPSG:8859 eq earth asia
projected_gdf = gdf_polygons.to_crs(epsg=8859)

# 应用50米的缓冲区
projected_gdf['geometry'] = projected_gdf.geometry.buffer(100)

# 将结果转换回原始的地理坐标系
gdf_polygons_buffered = projected_gdf.to_crs(original_crs)

def process_spatial_join(gdf_batch, gdf_polygons_buffered):
    # 执行空间连接
    joined_df = gpd.sjoin(gdf_batch, gdf_polygons_buffered, how='left', predicate='intersects')

    # 确保只有那些与目标几何体相交的条目被保留
    if 'index_right' in joined_df.columns:
        joined_df = joined_df.dropna(subset=['index_right'])

    return joined_df

# ATL_03 
属性表直接连接筛选，快速

In [10]:
file_list = fsw.search_files(r'G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton', '.h5')
# 提取所需信息，形成数据文件
for idx, file_path in enumerate(tqdm(file_list, desc="Processing Files")):
    tqdm.write(f'Processing h5 file: {file_path}')
    combined_data = pd.DataFrame()
    data = h5py.File(file_path, 'r')
    output_file_path = r'G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_Noise\ATL03_SETPGL_ALL_{}.h5'.format(idx)
    if os.path.exists(output_file_path):
        print('{} 存在，跳过'.format(output_file_path))
        continue
    for subgroup in tqdm(sub_file_list, desc="Processing Subgroups", leave=False):
        tqdm.write(f'  Processing subgroup: {subgroup}')
        if subgroup in data:
            time_data = data.get(os.path.join(subgroup, 'heights/delta_time'))
            lat = data.get(os.path.join(subgroup, 'heights/lat_ph'))
            lon = data.get(os.path.join(subgroup, 'heights/lon_ph'))
            dist_ph_along = data.get(os.path.join(subgroup, 'heights/dist_ph_along'))
            height = data.get(os.path.join(subgroup, 'heights/h_ph'))
            signal_conf_ph = data.get(os.path.join(subgroup, 'heights/signal_conf_ph'))
            quality_ph = data.get(os.path.join(subgroup, 'heights/quality_ph'))

            if all(x is not None for x in [lat, lon, height, time_data, dist_ph_along, quality_ph, signal_conf_ph]):
                df = pd.DataFrame(data={
                    'time': time_data[:],
                    'lat': lat[:],
                    'lon': lon[:],
                    'dist_ph_along': dist_ph_along[:],
                    'height': height[:],
                    'quality_ph': quality_ph[:],
                    'signal_conf_ph_1': signal_conf_ph[:, 0],
                    'signal_conf_ph_2': signal_conf_ph[:, 1],
                    'signal_conf_ph_3': signal_conf_ph[:, 2],
                    'signal_conf_ph_4': signal_conf_ph[:, 3],
                    'signal_conf_ph_5': signal_conf_ph[:, 4]
                })
                
                df['subgroup'] = subgroup
                
                # 过滤数据，删除 signal_conf_ph_1、signal_conf_ph_2、signal_conf_ph_3、signal_conf_ph_4、signal_conf_ph_5 小于 0 的数据
                # df = df[(df['signal_conf_ph_1'] >= 1) |
                #         (df['signal_conf_ph_2'] >= 1) |
                #         (df['signal_conf_ph_2'] >= 1) |
                #         (df['signal_conf_ph_4'] >= 1) |
                #         (df['signal_conf_ph_5'] >= )]
                
                # 纯噪声
                df = df[(df['signal_conf_ph_1'] < 1) &
                        (df['signal_conf_ph_2'] < 1) &
                        (df['signal_conf_ph_3'] < 5) &
                        (df['signal_conf_ph_4'] < 1) &
                        (df['signal_conf_ph_5'] < 1)]
                

                if not df.empty:
                    # 连接属性表信息
                    gdf_filtered = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs=original_crs)
                    
                    batch_size = 10000
                    n_batches = (len(gdf_filtered) + batch_size - 1) // batch_size  # 计算总批次数
                    result = []

                    for n in tqdm(range(n_batches), desc="Processing batches",leave=False):
                        gdf_batch = gdf_filtered[n * batch_size:(n + 1) * batch_size]
                        joined_df = process_spatial_join(gdf_batch, gdf_polygons_buffered)
                        if len(joined_df)>0:
                            pass

                        if not joined_df.empty:
                            # 移除 'geometry' 列，以便合并到 combined_data 中
                            joined_df = joined_df.drop(columns='geometry')
                            combined_data = pd.concat([combined_data, joined_df], ignore_index=True)
    data.close()

    if not combined_data.empty:
        combined_data.to_hdf(output_file_path, key='df', mode='w')

end_time = time.time()
print(f"Processing completed in {end_time - start_time:.2f} seconds")


Processing Files:   0%|          | 0/237 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20221231195214_01721806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/11 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/38 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/21 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/62 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/15 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/53 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230101192635_01871806_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/14 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/52 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/15 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/45 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/17 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/61 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230102073623_01951802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/3777 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/4694 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/3859 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/4239 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/3516 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/4919 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230105191810_02481806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/5 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/13 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/5 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/13 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/17 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230106072759_02561802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/2877 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/4029 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/2880 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/3592 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/2576 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/4096 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230109190952_03091806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/4 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/11 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/4 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/11 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/5 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/16 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230110071941_03171802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/3822 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/4877 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/3968 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/4362 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/3877 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5205 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230111065402_03321802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/3350 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/4892 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/3339 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/4612 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/3421 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5572 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230113190134_03701806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/14 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/57 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/18 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/60 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/15 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/67 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230114071125_03781802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/5276 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/6402 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/5143 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5637 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/4970 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/6689 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230115064547_03931802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/3236 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/6754 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/3263 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/6321 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/3213 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/7223 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230117185311_04311806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/18 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/70 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/17 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/60 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/15 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/78 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230118070300_04391802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/4957 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/6406 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/5117 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5758 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/5013 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/6942 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230119063718_04541802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/4002 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/5475 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/3931 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/4795 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/3641 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5422 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230121184503_04921806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/19 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/74 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/17 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/66 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/21 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/96 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230123062906_05151802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/3588 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/5612 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/3743 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5075 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/3580 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5810 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230126181106_05681806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/4 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/14 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/5 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/20 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/31 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230127062053_05761802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/4121 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/6097 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/4237 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5543 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/4077 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/6308 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230129182828_06141806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/5 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/17 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/5 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/20 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/7 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/33 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230130180238_06291806_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/8 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/32 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/9 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/30 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/9 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/39 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230131061228_06371802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/4150 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/4985 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/4354 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/4556 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/4281 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5357 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230202182005_06751806_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/4 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/11 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/4 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/14 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/5 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/20 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230203175418_06901806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/19 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/19 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/8 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/28 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230204060409_06981802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/5084 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/6312 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/4993 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5633 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/4601 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/6481 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230207174558_07511806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/9 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/31 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/9 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/27 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/10 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/41 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230208055549_07591802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/5694 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/8256 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/5715 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/7139 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/5582 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/8565 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230209053010_07741802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/4454 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/6030 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/4281 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5159 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/4131 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/6170 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230211173737_08121806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/40 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/9 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/32 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/9 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/33 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/9 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230212054730_08201802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/10158 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/6352 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/8534 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/6304 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/9388 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/6104 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230213052153_08351802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/12358 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/4765 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/11015 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5074 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/11731 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/4933 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230215172922_08731806_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/58 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/14 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/56 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/17 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/67 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/18 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230216053911_08811802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/10732 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/7308 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/8939 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/7602 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/9863 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/7566 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230217051330_08961802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/11042 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/4750 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/9281 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5125 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/10084 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5055 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230219172107_09341806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/45 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/10 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/36 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/10 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/37 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/9 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230221050514_09571802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/8726 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/5010 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/7327 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5019 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/7914 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/4901 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230223171244_09951806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/55 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/13 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/38 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/11 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/43 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/12 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230225045706_10181802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/6886 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/4581 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/5893 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/4851 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/6714 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5018 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230227170420_10561806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/73 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/14 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/63 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/16 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/44 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/11 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230228163841_10711806_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/110 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/24 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/78 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/22 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/77 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/20 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230301044829_10791802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/9750 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/5550 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/8173 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5983 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/8959 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5862 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230303165605_11171806_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/41 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/7 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/28 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/7 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/27 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/7 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230304163025_11321806_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/63 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/13 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/42 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/11 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/38 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/10 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230305044019_11401802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/8255 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/5008 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/6916 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5298 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/7807 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5241 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230308162205_11931806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/47 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/12 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/36 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/11 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/39 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/11 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230309043155_12011802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/10170 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/6021 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/8595 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/6454 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/9324 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/6212 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230312161339_12541806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/24 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/7 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/26 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/9 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/17 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230313042332_12621802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/7054 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/5500 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/5607 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5469 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/6119 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5303 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230314035748_12771802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/8967 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/4622 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/7087 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/4737 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/8010 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/4712 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230316160521_13151806_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/40 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/12 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/30 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/10 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/34 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/10 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230317041517_13231802_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/9098 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/6672 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/7936 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/7060 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/8780 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/6908 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230318034937_13381802_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/7749 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/4401 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/6473 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/4679 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/7043 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/4482 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230320155706_13761806_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/49 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/10 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/40 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/10 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/46 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/10 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230322034119_00121902_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/9606 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/4861 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/8249 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5246 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/8754 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5146 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230324154842_00501906_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/63 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/16 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/66 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/21 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/161 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/39 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230326033257_00731902_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/9559 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/5130 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/8052 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/5683 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/8678 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/5259 [00:00<?, ?it/s]

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230328154015_01111906_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/61 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/16 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/43 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/14 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/60 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/16 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230329151436_01261906_006_02.h5


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['subgroup', 'Times', '关系', '冰川10'], dtype='object')]

  combined_data.to_hdf(output_file_path, key='df', mode='w')


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/64 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/17 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/52 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


Processing batches:   0%|          | 0/16 [00:00<?, ?it/s]

  Processing subgroup: gt3l/


Processing batches:   0%|          | 0/70 [00:00<?, ?it/s]

  Processing subgroup: gt3r/


Processing batches:   0%|          | 0/18 [00:00<?, ?it/s]

Processing h5 file: G:\SETP_ICESat-2\ATL_03_GlobalGeolocatedPhoton\ATL03_20230330032434_01341902_006_02.h5


Processing Subgroups:   0%|          | 0/6 [00:00<?, ?it/s]

  Processing subgroup: gt1l/


Processing batches:   0%|          | 0/8635 [00:00<?, ?it/s]

  Processing subgroup: gt1r/


Processing batches:   0%|          | 0/5330 [00:00<?, ?it/s]

  Processing subgroup: gt2l/


Processing batches:   0%|          | 0/7327 [00:00<?, ?it/s]

  Processing subgroup: gt2r/


# ATL_06

In [None]:
file_list = fsw.search_files(r'G:\SETP_ICESat-2\ATL_06_Landice', '.h5')
ATL06_NoData = []
# 提取所需信息，形成数据文件
for idx, file_path in enumerate(tqdm(file_list, desc="Processing Files")):
    combined_data = pd.DataFrame()
    data = h5py.File(file_path, 'r')
    output_file_path = r'G:\SETP_ICESat-2\ATL_06_Landice\ATL06_ALL\{}_{}.h5'.format(os.path.basename(file_path).split('.')[0],idx)
    if os.path.exists(output_file_path):
        print('{} 存在，跳过'.format(output_file_path))
        continue
    for subgroup in tqdm(sub_file_list, desc="Processing Subgroups", leave=False):
        if subgroup in data:
            time_data = data.get(os.path.join(subgroup, 'land_ice_segments/delta_time'))
            lat = data.get(os.path.join(subgroup, 'land_ice_segments/latitude'))
            lon = data.get(os.path.join(subgroup, 'land_ice_segments/longitude'))
            height = data.get(os.path.join(subgroup, 'land_ice_segments/h_li'))  
            dem = data.get(os.path.join(subgroup, 'land_ice_segments/dem/dem_h'))
            signal_conf_ph = data.get(os.path.join(subgroup, 'land_ice_segments/atl06_quality_summary')) #(Meanings: [0 1]) (Values: ['best_quality', 'potential_problem'])
            id = data.get(os.path.join(subgroup, 'land_ice_segments/segment_id'))      

            if all(x is not None for x in [time_data, lat, lon, height,dem, signal_conf_ph ,id]):
                df = pd.DataFrame(data={
                    'time': time_data[:],
                    'lat': lat[:],
                    'lon': lon[:],
                    'height': height[:],
                    'dem':dem,
                    'signal_conf_ph':signal_conf_ph[:],
                    'id':id[:],
                })
                df['subgroup'] = subgroup

                if not df.empty:
                    # 连接属性表信息
                    gdf_filtered = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs=original_crs)
                    
                    batch_size = 10000
                    n_batches = (len(gdf_filtered) + batch_size - 1) // batch_size  # 计算总批次数
                    result = []

                    for n in tqdm(range(n_batches), desc="Processing batches"):
                        gdf_batch = gdf_filtered[n * batch_size:(n + 1) * batch_size]
                        joined_df = process_spatial_join(gdf_batch, gdf_polygons_buffered)
                        if len(joined_df)>0:
                            pass

                        if not joined_df.empty:
                            # 移除 'geometry' 列，以便合并到 combined_data 中
                            joined_df = joined_df.drop(columns='geometry')
                            combined_data = pd.concat([combined_data, joined_df], ignore_index=True)
    data.close()

    if not combined_data.empty:
        combined_data.to_hdf(output_file_path, key='df', mode='w')
    else:
        print(f"No data found in {file_path}")
        ATL06_NoData.append(file_path)

# ATL_07

In [None]:
file_list = fsw.search_files(r'E:\SETP_ICESat-2\ATL_07_Seaice', '.h5')
ATL07_NoData = []
# 提取所需信息，形成数据文件
for idx, file_path in enumerate(tqdm(file_list, desc="Processing Files")):
    combined_data = pd.DataFrame()
    data = h5py.File(file_path, 'r')
    output_file_path = r'E:\SETP_ICESat-2\ATL_07_Seaice\ATL07_ALL\{}_{}.h5'.format(os.path.basename(file_path).split('.')[0],idx)
    if os.path.exists(output_file_path):
        print('{} 存在，跳过'.format(output_file_path))
        continue
    for subgroup in sub_file_list:
        if subgroup in data:
            time_data = data.get(os.path.join(subgroup, 'sea_ice_segments/delta_time'))
            lat = data.get(os.path.join(subgroup, 'sea_ice_segments/latitude'))
            lon = data.get(os.path.join(subgroup, 'sea_ice_segments/longitude'))
            height = data.get(os.path.join(subgroup, 'sea_ice_segments/heights/height_segment_height'))
            dem = data.get(os.path.join(subgroup, 'dem/dem_h'))
            id = data.get(os.path.join(subgroup, 'sea_ice_segments/height_segment_id'))
            signal_conf_ph = data.get(os.path.join(subgroup, 'sea_ice_segments/heights/height_segment_quality'))# (Meanings: [0 1]) (Values: ['bad_quality', 'good_quality'])

            if all(x is not None for x in [time_data, lat, lon, height, dem, signal_conf_ph ,id]):
                print([len(i) for i in [time_data, lat, lon, height, dem, signal_conf_ph ,id]])
                df = pd.DataFrame(data={
                    'time': time_data[:],
                    'lat': lat[:],
                    'lon': lon[:],
                    'height': height[:],
                    'dem': dem[:],
                    'signal_conf_ph':signal_conf_ph[:],
                    'id':id[:],
                })
                df['subgroup'] = subgroup

                if not df.empty:
                    # 连接属性表信息
                    gdf_filtered = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs=original_crs)
                    
                    batch_size = 10000
                    n_batches = (len(gdf_filtered) + batch_size - 1) // batch_size  # 计算总批次数
                    result = []

                    for n in range(n_batches):
                        gdf_batch = gdf_filtered[n * batch_size:(n + 1) * batch_size]
                        joined_df = process_spatial_join(gdf_batch, gdf_polygons_buffered)
                        if len(joined_df)>0:
                            pass

                        if not joined_df.empty:
                            # 移除 'geometry' 列，以便合并到 combined_data 中
                            joined_df = joined_df.drop(columns='geometry')
                            combined_data = pd.concat([combined_data, joined_df], ignore_index=True)
    data.close()

    if not combined_data.empty:
        combined_data.to_hdf(output_file_path, key='df', mode='w')
    else:
        print(f"No data found in {file_path}")
        ATL07_NoData.append(file_path)


# ATL_08

In [None]:
file_list = fsw.search_files(r'G:\SETP_ICESat-2\ATL_08_LandVegetation', '.h5')
ATL08_NoData = []
# 提取所需信息，形成数据文件
for idx, file_path in enumerate(tqdm(file_list, desc="Processing Files")):
    combined_data = pd.DataFrame()
    data = h5py.File(file_path, 'r')
    output_file_path = r'G:\SETP_ICESat-2\ATL_08_LandVegetation\ATL08_ALL\{}_{}.h5'.format(os.path.basename(file_path).split('.')[0],idx)
    if os.path.exists(output_file_path):
        print('{} 存在，跳过'.format(output_file_path))
        continue
    for subgroup in tqdm(sub_file_list, desc="Processing Subgroups", leave=False):
        if subgroup in data:
            time_data = data.get(os.path.join(subgroup, 'land_segments/delta_time'))
            lat = data.get(os.path.join(subgroup, 'land_segments/latitude'))
            lon = data.get(os.path.join(subgroup, 'land_segments/longitude'))
            height_centroid = data.get(os.path.join(subgroup, 'land_segments/canopy/centroid_height'))
            height_canopy = data.get(os.path.join(subgroup, 'land_segments/canopy/h_canopy'))
            dem = data.get(os.path.join(subgroup, 'land_segments/dem_h'))
            id = data.get(os.path.join(subgroup, 'land_segments/segment_id_beg'))
            # height = data.get(os.path.join(subgroup, 'signal_photons/ph_h'))
            # signal_conf_ph = data.get(os.path.join(subgroup, 'signal_photons/d_flag'))# dragann flag (Meanings: [0 1]) (Values: ['noise', 'signal'])
            cloud = data.get(os.path.join(subgroup, 'land_segments/cloud_flag_atm')) # 如果标志大于0，则可能存在气溶胶或云。有效范围为0-10

            if all(x is not None for x in [time_data, lat, lon,height_centroid,height_canopy, dem ,id,cloud]):
                # print([len(i) for i in [time_data, lat, lon,height_centroid,height_canopy, dem ,id,cloud]])
                df = pd.DataFrame(data={
                    'time': time_data[:],
                    'lat': lat[:],
                    'lon': lon[:],
                    'height_centroid': height_centroid[:],
                    'height_canopy': height_canopy[:],
                    'dem': dem[:],
                    'id':id[:],
                    'cloud':cloud[:],
                })
                df['subgroup'] = subgroup

                if not df.empty:
                    # 连接属性表信息
                    gdf_filtered = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs=original_crs)
                    
                    batch_size = 10000
                    n_batches = (len(gdf_filtered) + batch_size - 1) // batch_size  # 计算总批次数
                    result = []

                    for n in tqdm(range(n_batches), desc="Processing batches"):
                        gdf_batch = gdf_filtered[n * batch_size:(n + 1) * batch_size]
                        joined_df = process_spatial_join(gdf_batch, gdf_polygons_buffered)
                        if len(joined_df)>0:
                            pass

                        if not joined_df.empty:
                            # 移除 'geometry' 列，以便合并到 combined_data 中
                            joined_df = joined_df.drop(columns='geometry')
                            combined_data = pd.concat([combined_data, joined_df], ignore_index=True)
    data.close()

    if not combined_data.empty:
        combined_data.to_hdf(output_file_path, key='df', mode='w')
    else:
        print(f"No data found in {file_path}")
        ATL08_NoData.append(file_path)


# ATL_13

In [None]:
file_list = fsw.search_files(r'G:\SETP_ICESat-2\ATL_13_InlandSurfaceWaterData', '.h5')
file_path = file_list[0]
data = h5py.File(file_path, 'r')
for key in data['gt1l'].keys():
    print(key)

In [None]:
file_list = fsw.search_files(r'G:\SETP_ICESat-2\ATL_13_InlandSurfaceWaterData', '.h5')
ATL13_NoData = []
# 提取所需信息，形成数据文件
for idx, file_path in enumerate(tqdm(file_list, desc="Processing Files")):
    combined_data = pd.DataFrame()
    data = h5py.File(file_path, 'r')
    output_file_path = r'G:\SETP_ICESat-2\ATL_13_InlandSurfaceWaterData\ATL13_ALL\{}_{}.h5'.format(os.path.basename(file_path).split('.')[0],idx)
    if os.path.exists(output_file_path):
        print('{} 存在，跳过'.format(output_file_path))
        continue
    for subgroup in tqdm(sub_file_list, desc="Processing Subgroups", leave=False):
        if subgroup in data:
            time_data = data.get(os.path.join(subgroup, 'delta_time'))
            lat = data.get(os.path.join(subgroup, 'bottom_lat'))
            segment_lat = data.get(os.path.join(subgroup, 'segment_lat'))
            lon = data.get(os.path.join(subgroup, 'bottom_lon'))
            segment_lon = data.get(os.path.join(subgroup, 'segment_lon'))
            height_surface = data.get(os.path.join(subgroup, 'ht_water_surf'))
            water_depth = data.get(os.path.join(subgroup, 'water_depth'))
            dem = data.get(os.path.join(subgroup, 'segment_dem_ht'))
            id = data.get(os.path.join(subgroup, 'segment_id_beg'))
            # Cloud probability from ASR.; (Meanings: [0 1 2 3 4 5]) 
            # (Values: ['clear_with_high_confidence', 'clear_with_medium_confidence', 'clear_with_low_confidence', 
            # 'cloudy_with_low_confidence', 'cloudy_with_medium_confidence', 'cloudy_with_high_confidence'])
            cloud = data.get(os.path.join(subgroup, 'cloud_flag_asr_atl09')) 
            ice_flag = data.get(os.path.join(subgroup, 'ice_flag'))
            inland_water_body_type = data.get(os.path.join(subgroup, 'inland_water_body_type'))

            if all(x is not None for x in [time_data, lat,segment_lat, lon,segment_lon,height_surface,water_depth, dem ,id,cloud,ice_flag,inland_water_body_type]):
                # print([len(i) for i in [time_data, lat, lon,height_centroid,height_canopy, dem ,id,cloud]])
                df = pd.DataFrame(data={
                    'time': time_data[:],
                    'lat': lat[:],
                    'segment_lat': segment_lat[:],
                    'lon': lon[:],
                    'segment_lon': segment_lon[:],
                    'height_surface': height_surface[:],
                    'water_depth': water_depth[:],
                    'dem': dem[:],
                    'id':id[:],
                    'cloud':cloud[:],
                    'ice_flag':ice_flag[:],
                    'inland_water_body_type':inland_water_body_type[:]
                })
                df['subgroup'] = subgroup

                if not df.empty:
                    # 连接属性表信息
                    gdf_filtered = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs=original_crs)
                    
                    batch_size = 10000
                    n_batches = (len(gdf_filtered) + batch_size - 1) // batch_size  # 计算总批次数
                    result = []

                    for n in tqdm(range(n_batches), desc="Processing batches"):
                        gdf_batch = gdf_filtered[n * batch_size:(n + 1) * batch_size]
                        joined_df = process_spatial_join(gdf_batch, gdf_polygons_buffered)
                        if len(joined_df)>0:
                            pass

                        if not joined_df.empty:
                            # 移除 'geometry' 列，以便合并到 combined_data 中
                            joined_df = joined_df.drop(columns='geometry')
                            combined_data = pd.concat([combined_data, joined_df], ignore_index=True)
    data.close()

    if not combined_data.empty:
        combined_data.to_hdf(output_file_path, key='df', mode='w')
    else:
        print(f"No data found in {file_path}")
        ATL13_NoData.append(file_path)
