In [4]:
import os
import h5py
import pandas as pd

# ==============================
# 0) 路径设置（按你自己的改）
# ==============================
h5_folder = "/Users/a1-6/Self_program/underground_water/thesis/ICESat/2022_fullYearData"

csv_path = "/Users/a1-6/Self_program/underground_water/thesis/ICESat/merged_by_rule_withoutWater/rule_merge_utm/UTM_merged_Rule8_full.csv"

out_csv = "/Users/a1-6/Self_program/underground_water/thesis/ICESat/merged_by_rule_withoutWater/rule_merge_utm/UTM_merged_Rule8_full_STRONG.csv"


# ==============================
# 一些小工具函数：统一文件名
# ==============================
def norm_h5_name(fn: str) -> str:
    """
    H5 文件名：
    ATL08_20220314200953_12611402_007_01.h5
    → ATL08_20220314200953_12611402_007_01
    """
    return os.path.splitext(os.path.basename(fn))[0]


def norm_src_name(src: str) -> str:
    """
    CSV 里的 source_file：
    ATL08_20220314200953_12611402_007_01_Rule8_full.csv
    → ATL08_20220314200953_12611402_007_01
    """
    base = os.path.splitext(os.path.basename(src))[0]  # 去掉 .csv
    # 删掉我们自己加的后缀
    if base.endswith("_Rule8_full"):
        base = base[:-len("_Rule8_full")]
    return base


# ==============================
# 1) 根据 sc_orient 构建「文件 → 强光束」
# ==============================
def get_strong_beams(sc_orient: int):
    """
    sc_orient == 0 → l 为强光束
    sc_orient == 1 → r 为强光束
    sc_orient == 2 → 转向，不用
    """
    if sc_orient == 0:      # backward, l 强
        return ["gt1l", "gt2l", "gt3l"]
    elif sc_orient == 1:    # forward, r 强
        return ["gt1r", "gt2r", "gt3r"]
    else:
        return []           # 转向状态


strong_dict = {}   # { "ATL08_20220314200953_12611402_007_01" : ["gt1l", "gt2l", ...] }

for fn in sorted(os.listdir(h5_folder)):
    if not fn.endswith(".h5"):
        continue

    fpath = os.path.join(h5_folder, fn)
    key = norm_h5_name(fn)

    with h5py.File(fpath, "r") as f:
        sc_orient = int(f["/orbit_info/sc_orient"][0])
        strong_beams = get_strong_beams(sc_orient)

    strong_dict[key] = strong_beams

    print(f"{fn}")
    print(f"  norm key   = {key}")
    print(f"  sc_orient  = {sc_orient}")
    print(f"  strong_beams = {strong_beams}")
    print("-" * 40)

print("\n共建立强光束映射条数:", len(strong_dict))


# ==============================
# 2) 读 CSV，用 source_file + beam 判断是否强光束
# ==============================
df = pd.read_csv(csv_path)

print("\nCSV 列：", df.columns.tolist())
print("beam 示例:", df["beam"].unique()[:10])
print("source_file 示例:", df["source_file"].head())

def is_strong_row(row):
    src = str(row["source_file"])
    beam = str(row["beam"])

    key = norm_src_name(src)   # 和 strong_dict 用同一种规则
    strong_beams = strong_dict.get(key, [])

    return beam in strong_beams

df["is_strong"] = df.apply(is_strong_row, axis=1)

print("\n总点数:", len(df))
print("强光束点数:", df["is_strong"].sum())

df_strong = df[df["is_strong"]].copy()
print("筛选后 DataFrame 行数:", len(df_strong))

print("\n强光束 beam 分布：")
print(df_strong["beam"].value_counts())

# ==============================
# 3) 保存结果
# ==============================
df_strong.to_csv(out_csv, index=False)
print("\n✅ 已保存只含强光束的 CSV：", out_csv)

ATL08_20220102113835_01711406_007_01.h5
  norm key   = ATL08_20220102113835_01711406_007_01
  sc_orient  = 0
  strong_beams = ['gt1l', 'gt2l', 'gt3l']
----------------------------------------
ATL08_20220111230607_03161402_007_01.h5
  norm key   = ATL08_20220111230607_03161402_007_01
  sc_orient  = 0
  strong_beams = ['gt1l', 'gt2l', 'gt3l']
----------------------------------------
ATL08_20220115225748_03771402_007_01.h5
  norm key   = ATL08_20220115225748_03771402_007_01
  sc_orient  = 0
  strong_beams = ['gt1l', 'gt2l', 'gt3l']
----------------------------------------
ATL08_20220119224927_04381402_007_01.h5
  norm key   = ATL08_20220119224927_04381402_007_01
  sc_orient  = 0
  strong_beams = ['gt1l', 'gt2l', 'gt3l']
----------------------------------------
ATL08_20220209214206_07581402_007_01.h5
  norm key   = ATL08_20220209214206_07581402_007_01
  sc_orient  = 0
  strong_beams = ['gt1l', 'gt2l', 'gt3l']
----------------------------------------
ATL08_20220314200953_12611402_007_01.h5
