### Validation for lake level derived by SWOT data.
#### 1. lake levels of SWOT pixc data, SWOT LakeSp data, and DAHITI data.
#### 2. Lake levels by using simple average and area-weighted average method.

In [1]:
import pickle
import xarray as xr
import pandas as pd  
from utils.date_transform import decimal_to_date


### 1. swot pixc vs swot lakesp vs dahiti 

In [2]:
## dianchi
path_DAHITI_dianchi= 'data/dahiti/Dan Chi.nc'  
path_pixc_wse_dianchi = 'data/swot_l2/pixc/dianchi-lake/dianchi_heights_lake.pkl'  
path_LakeSP_wse_dianchi = 'data/swot_l2/lakesp/dianchi-lake/dianchi_lakesp_wse.pkl'
path_dahiti_pix_lakesp_dianchi = 'data/eva_data/dahiti_pix_lakesp_dianchi.csv' ## save to path

## erhai
path_DAHITI_erhai = 'data/dahiti/ErHai.nc'
path_pix_wse_erhai = 'data/swot_l2/pixc/erhai-lake/erhai_heights_lake.pkl'
path_LakeSP_wse_erhai = 'data/swot_l2/lakesp/erhai-lake/erhai_lakesp_wse.pkl'
path_dahiti_pix_lakesp_erhai = 'data/eva_data/dahiti_pix_lakesp_erhai.csv'



#### 1.1 DAHITI水位数据处理（转换基准为EGM2008）

In [3]:
def read2df_dahiti(path_dahiti):
  '''
  (1) read dahiti data and convert to dataframe
  (2) convert date to monthly date
  (3) convert elevation reference of eigen-6c4 to egm2008
  '''
  dahiti_xr = xr.open_dataset(path_dahiti)
  dahiti_df = dahiti_xr.to_dataframe()
  dahiti_df['datetime'] = pd.to_datetime(dahiti_df['datetime'])
  dahiti_df['year_month'] = dahiti_df['datetime'].dt.to_period('M') 
  dahiti_wse_month = dahiti_df.groupby('year_month')['water_level'].mean().reset_index()
  dahiti_wse_month = dahiti_wse_month.rename(columns={'water_level': 'dahiti_wse'})
  dahiti_wse_month = dahiti_wse_month.set_index('year_month')
  #基准由DAHITI的EIGEN-6C4转为wgs84,再转为EGM2008
  dahiti_wse_month['dahiti_wse'] = dahiti_wse_month['dahiti_wse'] + (-32.2242)-(-32.1375)
  return dahiti_wse_month

dahiti_wse_month_dianchi = read2df_dahiti(path_DAHITI_dianchi)
dahiti_wse_month_erhai = read2df_dahiti(path_DAHITI_erhai)
dahiti_wse_month_erhai

Unnamed: 0_level_0,dahiti_wse
year_month,Unnamed: 1_level_1
2016-05,1965.288208
2016-06,1965.083252
2016-07,1965.069214
2016-08,1965.178711
2016-09,1965.405151
...,...
2024-08,1965.119263
2024-09,1965.403198
2024-10,1965.537231
2024-11,1965.607178


#### 1.2 基于SWOT Pixc数据的水位信息

In [6]:
### 
def read2df_pixc(path_pixc_wse):
    '''
    (1) read pixc data and convert to dataframe
    (2) convert decimal date to monthly date
    '''
    with open(path_pixc_wse, 'rb') as file:  
        pixc_data = pickle.load(file)      
        df = pd.DataFrame.from_dict(pixc_data, orient='index').reset_index()  
        df.columns = ['decimal_date', 'SWOT_pixc_wse']  
        
        # 转换日期
        df['date'] = df['decimal_date'].apply(decimal_to_date)  
        df['year_month'] = pd.to_datetime(df['date']).dt.to_period('M') 
        pixc_wse_month = df.groupby('year_month')['SWOT_pixc_wse'].mean().reset_index()    
        pixc_wse_month = pixc_wse_month.set_index('year_month')
    return pixc_wse_month

pixc_wse_month_dianchi = read2df_pixc(path_pixc_wse_dianchi)
pixc_wse_month_erhai = read2df_pixc(path_pix_wse_erhai)
pixc_wse_month_erhai


Unnamed: 0_level_0,SWOT_pixc_wse
year_month,Unnamed: 1_level_1
2023-09,1966.01709
2023-11,1966.293091
2023-12,1966.167236
2024-01,1966.077637
2024-02,1966.097778
2024-03,1965.846924
2024-04,1965.611694
2024-05,1965.489014
2024-06,1965.452393
2024-07,1965.417236


#### 1.3 基于LakeSP数据的SWOT水位信息

In [8]:
def read2df_lakesp(path_LakeSP_wse):
    '''
    (1) read lakesp data and convert to dataframe
    (2) convert decimal date to monthly date
    '''
    with open(path_LakeSP_wse, 'rb') as file:  
        LakeSP_wse = pickle.load(file)      
        df = pd.DataFrame.from_dict(LakeSP_wse, orient='index').reset_index()  
        df.columns = ['decimal_date', 'SWOT_LakeSP_wse']          
        # 转换日期
        df['date'] = df['decimal_date'].apply(decimal_to_date)  
        df['year_month'] = pd.to_datetime(df['date']).dt.to_period('M')
        LakeSP_wse_month = df.groupby('year_month')['SWOT_LakeSP_wse'].mean().reset_index()  
        LakeSP_wse_month = LakeSP_wse_month.set_index('year_month')
    return LakeSP_wse_month

lakesP_wse_month_dianchi = read2df_lakesp(path_LakeSP_wse_dianchi)
lakesP_wse_month_erhai = read2df_lakesp(path_LakeSP_wse_erhai)
lakesP_wse_month_erhai


Unnamed: 0_level_0,SWOT_LakeSP_wse
year_month,Unnamed: 1_level_1
2023-11,1966.4545
2023-12,1966.371
2024-01,1966.4045
2024-02,1966.234
2024-03,1965.936
2024-04,1965.78
2024-05,1965.599
2024-06,1965.743
2024-07,1965.557
2024-08,1965.931


#### 1.4 数据合并及对比分析   
合并数据--均方根计算--可视化

In [None]:
## merge_dahiti_pix_lakesp
def merge_dahiti_pix_lakesp(dahiti_wse_month, pixc_wse_month, LakeSP_wse_month):
    '''
    (1) merge dahiti, pixc, lakesp data into one dataframe
    (2) interpolate missing values
    (3) system bias correction for pixc and lakesp data by use dahiti data as reference
    '''
    ## 合并三个水位产品
    dahiti_pixc_lakesp_df = pixc_wse_month.merge(LakeSP_wse_month, on='year_month', how='outer').\
                                            merge(dahiti_wse_month, on='year_month', how='inner')
    dahiti_pixc_lakesp_df = dahiti_pixc_lakesp_df.interpolate(method='linear', limit_area='inside')  
    dahiti_pixc_lakesp_df = dahiti_pixc_lakesp_df.reindex(columns=['dahiti_wse', 'SWOT_pixc_wse', 'SWOT_LakeSP_wse']) 

    #计算均方根误差和相关性
    dif_lakesp_dahiti = dahiti_pixc_lakesp_df['SWOT_LakeSP_wse'] - dahiti_pixc_lakesp_df['dahiti_wse']
    dif_pix_dahiti = dahiti_pixc_lakesp_df['SWOT_pixc_wse'] - dahiti_pixc_lakesp_df['dahiti_wse']

    ## 计算差值的均值(系统偏差，以dahiti为参考)
    mean_dif_lakesp_dahiti = dif_lakesp_dahiti.mean()
    mean_dif_pix_dahiti = dif_pix_dahiti.mean()

    # 将均值加到wse_month列（以DAHITI为参考, 消除系统偏差）
    dahiti_pixc_lakesp_df['SWOT_pixc_wse_cor'] = dahiti_pixc_lakesp_df['SWOT_pixc_wse'] - mean_dif_pix_dahiti
    dahiti_pixc_lakesp_df['SWOT_LakeSP_wse_cor'] = dahiti_pixc_lakesp_df['SWOT_LakeSP_wse'] - mean_dif_lakesp_dahiti
    ### write out
    return dahiti_pixc_lakesp_df

dahiti_pixc_lakesp_dianchi = merge_dahiti_pix_lakesp(dahiti_wse_month_dianchi, pixc_wse_month_dianchi, lakesP_wse_month_dianchi)
dahiti_pixc_lakesp_erhai = merge_dahiti_pix_lakesp(dahiti_wse_month_erhai, pixc_wse_month_erhai, lakesP_wse_month_erhai)
dahiti_pixc_lakesp_dianchi
# dahiti_pixc_lakesp_dianchi.to_csv(path_dahiti_pix_lakesp_dianchi, index=True)
# dahiti_pixc_lakesp_erhai.to_csv(path_dahiti_pix_lakesp_erhai, index=True)


### 2. 像素云面积加权平均 vs 普通平均   
备注：基于pixc数据水位提取方法分析

In [13]:
## dianchi
path_unweighted_dianchi = 'data/swot_l2/pixc/dianchi-lake/dianchi_heights_lake_mean_sim.pkl'  
path_dahiti_pix_lakesp_dianchi = 'data/eva_data/dahiti_pix_lakesp_dianchi.csv' ## 
path_dahiti_unweighted_weighted_dianchi = 'data/eva_data/dahiti_unweighted_weighted_dianchi.csv' ## save to path
## erhai
path_unweighted_erhai = 'data/swot_l2/pixc/erhai-lake/erhai_heights_lake_mean_sim.pkl'
path_dahiti_pix_lakesp_erhai = 'data/eva_data/dahiti_pix_lakesp_erhai.csv' ## 
path_dahiti_unweighted_weighted_erhai = 'data/eva_data/dahiti_unweighted_weighted_erhai.csv' ## save to path


In [14]:
dahiti_pixc_lakesp_dianchi = pd.read_csv(path_dahiti_pix_lakesp_dianchi, index_col=0)
dahiti_pixc_lakesp_erhai = pd.read_csv(path_dahiti_pix_lakesp_erhai, index_col=0)
dahiti_pixc_lakesp_erhai


Unnamed: 0_level_0,dahiti_wse,SWOT_pixc_wse,SWOT_LakeSP_wse,SWOT_pixc_wse_cor,SWOT_LakeSP_wse_cor
year_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-09,1965.4342,1966.0171,,1965.4246,
2023-11,1965.5702,1966.2931,1966.4545,1965.7004,1965.704113
2023-12,1965.6812,1966.1672,1966.371,1965.5747,1965.620613
2024-01,1965.5062,1966.0776,1966.4045,1965.4851,1965.654113
2024-03,1965.3011,1965.8469,1965.936,1965.2544,1965.185613
2024-04,1965.0922,1965.6117,1965.78,1965.019,1965.029613
2024-05,1964.8522,1965.489,1965.599,1964.8965,1964.848613
2024-06,1964.8743,1965.4524,1965.743,1964.8599,1964.992613
2024-07,1964.8757,1965.4172,1965.557,1964.8247,1964.806613
2024-08,1965.1193,1965.8175,1965.931,1965.2249,1965.180613


#### 2.1 普通平均方法（非加权）所得结果

In [15]:
def read2df_unweighted(path_unweighted):
    '''
    (1) read unweighted data and convert to dataframe
    (2) convert decimal date to monthly date
    '''
    with open(path_unweighted, 'rb') as file:  
        unweighted_data = pickle.load(file)      
        df = pd.DataFrame.from_dict(unweighted_data, orient='index').reset_index()  
        df.columns = ['decimal_date', 'pixc_wse_unweighted']          
        # 转换日期
        df['date'] = df['decimal_date'].apply(decimal_to_date)  
        df['year_month'] = pd.to_datetime(df['date']).dt.to_period('M').astype(str)  # 转换为字符串格式 
        # 计算每月平均水位
        unweighted_wse_month = df.groupby('year_month')['pixc_wse_unweighted'].mean().reset_index()    
        unweighted_wse_month = unweighted_wse_month.set_index('year_month')

    return unweighted_wse_month

unweighted_wse_month_dianchi = read2df_unweighted(path_unweighted_dianchi)
unweighted_wse_month_erhai = read2df_unweighted(path_unweighted_erhai)
unweighted_wse_month_erhai


Unnamed: 0_level_0,pixc_wse_unweighted
year_month,Unnamed: 1_level_1
2023-09,1966.018234
2023-11,1966.294204
2023-12,1966.168222
2024-01,1966.078692
2024-02,1966.100168
2024-03,1965.848985
2024-04,1965.612582
2024-05,1965.489268
2024-06,1965.455832
2024-07,1965.418032


#### 2.2 数据合并及处理

In [17]:
def merge_dahiti_weighted_unweighted(dahiti_pixc_lakesp_df, pixc_unweighted_df):
    '''
    (1) merge dahiti_pixc_lakesp_df and pixc_unweight_df into one dataframe
    (2) calculate weighted and unweighted water level
    '''
    # 提取加权水位
    pixc_weighted_df = dahiti_pixc_lakesp_df[['dahiti_wse', 'SWOT_pixc_wse', 'SWOT_pixc_wse_cor']]
    pixc_weighted_df = pixc_weighted_df.rename(columns={'SWOT_pixc_wse': 'pixc_wse_weighted', 
                                                'SWOT_pixc_wse_cor': 'pixc_wse_weighted_cor'})
    # 合并加权和非加权数据
    pixc_weighted_unweighted_df = pixc_weighted_df.merge(pixc_unweighted_df, how='outer', on='year_month')
    ## interpolate missing values
    pixc_weighted_unweighted_df = pixc_weighted_unweighted_df.interpolate(method='linear', limit_area='inside')  
    # 非加权方法系统偏差改正（以dahiti为参考）
    dif_unweighted_dahiti = pixc_weighted_unweighted_df['pixc_wse_unweighted'] - pixc_weighted_unweighted_df['dahiti_wse']
    pixc_weighted_unweighted_df['pixc_wse_unweighted_cor'] = pixc_weighted_unweighted_df['pixc_wse_unweighted'] - dif_unweighted_dahiti.mean()
    return pixc_weighted_unweighted_df

dahiti_weighted_unweighted_dianchi = merge_dahiti_weighted_unweighted(dahiti_pixc_lakesp_dianchi, unweighted_wse_month_dianchi)
dahiti_weighted_unweighted_erhai = merge_dahiti_weighted_unweighted(dahiti_pixc_lakesp_erhai, unweighted_wse_month_erhai)
dahiti_weighted_unweighted_erhai


Unnamed: 0_level_0,dahiti_wse,pixc_wse_weighted,pixc_wse_weighted_cor,pixc_wse_unweighted,pixc_wse_unweighted_cor
year_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-09,1965.4342,1966.0171,1965.4246,1966.018234,1965.417407
2023-11,1965.5702,1966.2931,1965.7004,1966.294204,1965.693377
2023-12,1965.6812,1966.1672,1965.5747,1966.168222,1965.567395
2024-01,1965.5062,1966.0776,1965.4851,1966.078692,1965.477866
2024-02,1965.40365,1965.96225,1965.36975,1966.100168,1965.499341
2024-03,1965.3011,1965.8469,1965.2544,1965.848985,1965.248159
2024-04,1965.0922,1965.6117,1965.019,1965.612582,1965.011755
2024-05,1964.8522,1965.489,1964.8965,1965.489268,1964.888441
2024-06,1964.8743,1965.4524,1964.8599,1965.455832,1964.855005
2024-07,1964.8757,1965.4172,1964.8247,1965.418032,1964.817205


In [18]:
dahiti_weighted_unweighted_dianchi.to_csv(path_dahiti_unweighted_weighted_dianchi, index=True)
dahiti_weighted_unweighted_erhai.to_csv(path_dahiti_unweighted_weighted_erhai, index=True)
