In [3]:
import os
import pandas as pd

# 设置主文件夹路径
root_folder = r"E:\10矢量\流域属性+第一层结果\流域"

# 存储每年的拼接结果
yearly_concat = {}

# 遍历所有子文件夹
for root, dirs, files in os.walk(root_folder):
    if os.path.basename(root) == "流域属性":
        local_yearly_data = {}  # 当前子文件夹的年别数据

        for file in files:
            if file.endswith(".csv"):
                file_path = os.path.join(root, file)
                df = pd.read_csv(file_path)

                attribute = os.path.splitext(file)[0]

                for col in df.columns:
                    if col.isdigit():  # 年份列
                        year = col
                        new_col = f"{attribute}_{year}"
                        temp_df = df[['Hylak_id', col]].copy().rename(columns={col: new_col})

                        # 合并所有属性（按列）
                        if year not in local_yearly_data:
                            local_yearly_data[year] = temp_df
                        else:
                            if new_col in local_yearly_data[year].columns:
                                print(f"列 {new_col} 在本地已存在，跳过")
                                continue
                            local_yearly_data[year] = pd.merge(local_yearly_data[year], temp_df, on='Hylak_id', how='outer')

        # 将当前子文件夹的合并结果按行追加到总的 year 数据中
        for year, df in local_yearly_data.items():
            if year not in yearly_concat:
                yearly_concat[year] = df
            else:
                yearly_concat[year] = pd.concat([yearly_concat[year], df], axis=0, ignore_index=True)

# 输出到合并结果文件夹
output_folder = os.path.join(root_folder, "最终合并结果")
os.makedirs(output_folder, exist_ok=True)

for year, df in yearly_concat.items():
    output_path = os.path.join(output_folder, f"{year}_final.csv")
    df.to_csv(output_path, index=False)

print("全部合并完成！")



全部合并完成！


In [7]:

X = data.iloc[:, -11:-1]
print(X)

         evap_1984  LAIH_1984  LAIL_1984  pop_1984  PRECI_1984  PRESSURE_1984  \
0        -0.007754   0.000000   0.000000  0.000007    0.019052    98095.10020   
1        -0.005711   0.000000   0.000000  0.000011    0.018900    97666.71458   
2        -0.003883   0.000000   0.000000  0.000013    0.020397    95220.46796   
3        -0.025618   1.281645   1.061935  0.000011    0.049260    98157.82121   
4        -0.003439   0.000000   0.000000  0.000013    0.018699    90802.16633   
...            ...        ...        ...       ...         ...            ...   
1427685  -0.067645   4.654712   1.683716  2.158818    0.132902    93050.75436   
1427686  -0.063035   2.822539   2.057408  0.001225    0.141497    95522.91447   
1427687  -0.059390   1.633977   0.412808  0.003605    0.158998    96243.90372   
1427688  -0.045278   3.794333   1.514303  1.705730    0.177976    89920.95973   
1427689  -0.054782   5.343370   1.878729  0.598898    0.224273    93261.53628   

         RUNOFF_1984    SOL

In [8]:
# 导入必要库
import pandas as pd
from sklearn.ensemble import RandomForestRegressor

# 用于存储所有特征的重要性
all_feature_importances = []

for i in range(1984, 2025):  # 可扩展为 range(1984, 2025)
    try:
        # 1. 数据加载
        data = pd.read_csv(f"E:\\10矢量\\流域属性+第一层结果\\流域\\最终合并结果\\{i}_final.csv")
        X = data.iloc[:, -11:-1]
        y = data[f"DIC_{i}"]

        # 2. 合并特征和目标变量，删除空值
        df = pd.concat([X, y], axis=1).dropna()
        X_clean = df.iloc[:, :-1]
        y_clean = df.iloc[:, -1]

        # 3. 训练模型
        model = RandomForestRegressor(
            n_estimators=100,
            random_state=42,
            max_depth=8,
            n_jobs=2
        )
        model.fit(X_clean, y_clean)

        # 4. 提取所有特征重要性并记录
        feature_names = X_clean.columns
        importances = model.feature_importances_
        for name, imp in zip(feature_names, importances):
            all_feature_importances.append({
                'Year': i,
                'Feature': name,
                'Importance': imp
            })

    except Exception as e:
        print(f"处理{i}年出错: {e}")

# 5. 保存所有特征的重要性到CSV
importance_df = pd.DataFrame(all_feature_importances)
importance_df.to_csv(r"E:\10矢量\流域属性+第一层结果\流域\最终合并结果\all_feature_importance.csv", index=False)
