In [1]:
from meta_info import *
from tools import *
from plot import *
from ntpath import join
import xarray as xr
import rioxarray as rxr
import numpy as np
import pandas as pd
from scipy import stats
import os
import math
from preprocessing import *

In [None]:
def run(data_root):

    datadir = join(data_root, 'GIMMS3g_NDVI')

    clean(datadir)
    resample_merge(datadir)
    monthly_compose(datadir, method='max')
    deseason_detrend(datadir)
    Month_to_daily(datadir)
    HANTS().run(datadir)
    growing_season_mask_monthly(datadir)

def calculate_growing_season(growing_season_mask):
    """
    计算生长季开始时间、结束时间、长度

    """
    growing_season_start = xr.full_like(growing_season_mask.astype(float), np.nan)  # 初始化，所有值为NaN
    growing_season_end = xr.full_like(growing_season_mask.astype(float), np.nan)  # 初始化，所有值为NaN
    growing_season_lenth = xr.full_like(growing_season_mask.astype(float), np.nan)  # 初始化，所有值为NaN

    # 遍历每个地理位置
    for lat in tqdm(growing_season_mask.lat.values):
        for lon in growing_season_mask.lon.values:
            # 提取单个像元的温度和掩码
            pixel_growing_season_mask = growing_season_mask.sel(lat=lat, lon=lon)
            
            # 计算连续为 1 的区间的索引
            region_indices = np.where(pixel_growing_season_mask == 1)[0]  # 获取为 1 的位置索引
            if len(region_indices) > 0:
                region_starts = [region_indices[0]]  # 连续区间的起始索引
                for i in range(1, len(region_indices)):
                    if region_indices[i] != region_indices[i-1] + 1:  # 如果不连续，则记录新的起始索引
                        region_starts.append(region_indices[i])

            
                for start_idx in region_starts:
                    end_idx = start_idx + 1
                    while end_idx < len(pixel_growing_season_mask.time) and pixel_growing_season_mask[end_idx] == 1:
                        end_idx += 1  # 找到连续区间的结束索引
                    start_time = pixel_growing_season_mask.time[start_idx].values  # 获取生长季开始时间  
                    start_month_of_year  = pd.to_datetime(start_time).month
                    end_time = pixel_growing_season_mask.time[end_idx - 1].values  # 获取生长季结束时间
                    end_month_of_year  = pd.to_datetime(end_time).month
                    # 转换为 pandas Timestamp
                    start_timestamp = pd.to_datetime(start_time)
                    end_timestamp = pd.to_datetime(end_time)
                    # 计算月份差异
                    length_months = (end_timestamp.year - start_timestamp.year) * 12 + end_timestamp.month - start_timestamp.month + 1
                    # length = np.datetime64(end_time) - np.datetime64(start_time)# 获取生长季长度
                    # length_days = length.astype('timedelta64[D]').astype(int) + 1  
                    growing_season_start.loc[{'lat': lat, 'lon': lon, 'time': pixel_growing_season_mask.time[start_idx]}] = start_month_of_year
                    growing_season_end.loc[{'lat': lat, 'lon': lon, 'time': pixel_growing_season_mask.time[start_idx]}] = end_month_of_year
                    growing_season_lenth.loc[{'lat': lat, 'lon': lon, 'time': pixel_growing_season_mask.time[start_idx]}] = length_months

                  
    growing_season_start.to_netcdf(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Growing_season_statistic\growing_season_start.nc4')
    growing_season_end.to_netcdf(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Growing_season_statistic\growing_season_end.nc4')
    growing_season_lenth.to_netcdf(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Growing_season_statistic\growing_season_lenth.nc4')        

    return growing_season_start, growing_season_end, growing_season_lenth

def drop_consecutive_drought(drought_mask):
    """
    去除两年内连续发生的干旱

    """
    drop_nyear_drought = drought_mask.copy()

    # 遍历每个地理位置
    for lat in tqdm(drop_nyear_drought.lat.values):
        for lon in drop_nyear_drought.lon.values:
            # 提取单个像元的温度和掩码 
            pixel_drought_mask = drought_mask.sel(lat=lat, lon=lon)

            # 计算连续为 1 的区间的索引
            region_indices = np.where(pixel_drought_mask == 1)[0]  # 获取为 1 的位置索引
            if len(region_indices) > 0:
                region_starts = [region_indices[0]]  # 连续区间的起始索引
                for i in range(1, len(region_indices)):
                    if region_indices[i] != region_indices[i-1] + 1:  # 如果不连续，则记录新的起始索引
                        region_starts.append(region_indices[i])

                for i, start_idx in enumerate(region_starts):
                    if i + 1 < len(region_starts):
                        months_between = region_starts[i+1] - region_starts[i]
                        if months_between <=24:
                            next_end_idx = region_starts[i+1] +1
                            while next_end_idx < len(pixel_drought_mask.time) and pixel_drought_mask[next_end_idx] == 1:
                                next_end_idx += 1  # 找到连续区间的结束索引
                            region_drought = pixel_drought_mask.isel(time=slice(int(start_idx), int(next_end_idx)))  
                            drop_nyear_drought.loc[{'lat': lat, 'lon': lon, 'time': region_drought.time}] = np.nan
                                                       
    return  drop_nyear_drought 

def drop_consecutive_all_drought(drought_mask, another_drought_mask):
    """
    去除两年内连续发生的干旱

    """
    drop_nyear_drought = drought_mask.copy()

    # 遍历每个地理位置
    for lat in tqdm(drop_nyear_drought.lat.values):
        for lon in drop_nyear_drought.lon.values:
            # 提取单个像元的温度和掩码 
            pixel_drought_mask = drought_mask.sel(lat=lat, lon=lon)
            pixel_another_drought_mask = another_drought_mask.sel(lat=lat, lon=lon)
            # 计算连续为 1 的区间的索引
            region_indices = np.where(pixel_drought_mask == 1)[0]  # 获取为 1 的位置索引
            if len(region_indices) > 0:
                region_starts = [region_indices[0]]  # 连续区间的起始索引
                for i in range(1, len(region_indices)):
                    if region_indices[i] != region_indices[i-1] + 1:  # 如果不连续，则记录新的起始索引
                        region_starts.append(region_indices[i])

                for i, start_idx in enumerate(region_starts):
                    if i + 1 < len(region_starts):
                        months_between = region_starts[i+1] - region_starts[i]
                        if months_between <=24:
                            next_end_idx = region_starts[i+1] +1
                            while next_end_idx < len(pixel_drought_mask.time) and pixel_drought_mask[next_end_idx] == 1:
                                next_end_idx += 1  # 找到连续区间的结束索引
                            region_drought = pixel_drought_mask.isel(time=slice(int(start_idx), int(next_end_idx)))  
                            drop_nyear_drought.loc[{'lat': lat, 'lon': lon, 'time': region_drought.time}] = np.nan

                    if i < len(region_starts):
                        two_years_later = pd.to_datetime(pixel_drought_mask.time[start_idx].values) + pd.DateOffset(years=2) # 干旱开始后的两年
                        if two_years_later <= (pixel_another_drought_mask.time[-1].values):        
                            two_years_another_drought = pixel_another_drought_mask.sel(time=slice(pixel_drought_mask.time[start_idx].values, two_years_later))
                        else:
                            two_years_another_drought = pixel_another_drought_mask.sel(time=slice(pixel_drought_mask.time[start_idx].values, pixel_another_drought_mask.time[-1].values))
                        if (two_years_another_drought == 1).any():
                            drop_nyear_drought.loc[{'lat': lat, 'lon': lon, 'time': two_years_another_drought.time[0]}] = np.nan
    return  drop_nyear_drought 

def statistic_drought_times(drough_mask):
    """
    计算干旱次数

    """
    drought_times = xr.full_like(drough_mask, np.nan)  

    # 遍历每个地理位置
    for lat in tqdm(drough_mask.lat.values):
        for lon in drough_mask.lon.values:
            # 提取单个像元的温度和掩码 
            pixel_drought_mask = drough_mask.sel(lat=lat, lon=lon)

            # 计算连续为 1 的区间的索引
            region_indices = np.where(pixel_drought_mask == 1)[0]  # 获取为 1 的位置索引
            if len(region_indices) > 0:
                region_starts = [region_indices[0]]  # 连续区间的起始索引
                for i in range(1, len(region_indices)):
                    if region_indices[i] != region_indices[i-1] + 1:  # 如果不连续，则记录新的起始索引
                        region_starts.append(region_indices[i])

                for start_idx in region_starts:
                    end_idx = start_idx + 1
                    while end_idx < len(pixel_drought_mask.time) and pixel_drought_mask[end_idx] == 1:
                        end_idx += 1  # 找到连续区间的结束索引
                    region_drought = pixel_drought_mask.isel(time=slice(int(start_idx), int(end_idx)))  
                    drought_times.loc[{'lat': lat, 'lon': lon, 'time': region_drought.time[0]}] = 1
                                                       
    return  drought_times 




In [None]:
# ds=xr.open_dataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Deseason_detrend\deseason_detrend.nc4')
# ds=xr.open_dataset(r'E:\PHD_Project\Data\SPEI\Result\drought_frequency.nc')
# ds1=xr.open_dataset(r'E:\PHD_Project\Data\SPEI\Source\spei03.nc')
# ds = xr.open_dataset('E:/PHD_Project/Data/GIMMS3g_NDVI/Resample_merge/ndvi3g_geo_v1_2_2021_0712.nc4')
# ds=xr.open_dataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Month_to_daily\1988.nc4')
# ds=xr.open_dataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Hants_daily\2021.nc4')
# ds1=xr.open_dataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Month_to_daily\2021.nc4')
# xr.open_mfdataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Hants_daily\*.nc4', concat_dim='time', combine='nested' )
# ds=xr.open_dataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Growing_season_mask_monthly\growing_season_mask_monthly.nc4')
# growing_season_mask=xr.open_dataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Growing_season_mask_monthly\growing_season_mask_monthly.nc4').ndvi
# growing_season_mask.isel(time=12).plot()
# growing_season_mask.sel(time=slice('2010-01-01', '2012-01-01')).sel(lon=100, lat=60 , method='nearest').plot()
# growing_season_mask.sel(time=slice('2010-01-01', '2013-01-01')).sel(lon=-50, lat=0 , method='nearest').plot()
# growing_season_mask.sel(time=slice('1982-01-01', '1986-01-01')).sel(lon=-50, lat=-20 , method='nearest').plot()

In [None]:
datadir_GIMMS = join(data_root, 'GIMMS3g_NDVI')
datadir_SPEI = join(data_root, 'SPEI')

# 生长季开始，结束，长度

In [None]:
growing_season_mask=xr.open_dataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Growing_season_mask_monthly\Growing_season_mask_monthly_leftrightclear.nc4').ndvi
growing_season_start, growing_season_end, growing_season_lenth=calculate_growing_season(growing_season_mask)
growing_season_start=xr.open_dataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Growing_season_statistic\growing_season_start.nc4')
growing_season_end=xr.open_dataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Growing_season_statistic\growing_season_end.nc4')
growing_season_lenth=xr.open_dataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Growing_season_statistic\growing_season_lenth.nc4')
def filter_multigrowing_season(ds):
    
    growing_season_mask = (ds > 0)
    season_starts_per_year = growing_season_mask.sum(dim='time', skipna=True)
    return ds.where(season_starts_per_year <=1)

# 将这些像元在原数据中设置为NaN
growing_season_start_new = growing_season_start.groupby('time.year').apply(filter_multigrowing_season)
growing_season_end_new = growing_season_end.groupby('time.year').apply(filter_multigrowing_season)
growing_season_lenth_new = growing_season_lenth.groupby('time.year').apply(filter_multigrowing_season)
growing_season_start_mean =growing_season_start_new.ndvi.mean(dim='time')
growing_season_end_mean =growing_season_end_new.ndvi.mean(dim='time')
growing_season_lenth_mean =growing_season_lenth_new.ndvi.mean(dim='time')
statistics_2d(growing_season_start_mean)
statistics_2d(growing_season_end_mean)
statistics_2d(growing_season_lenth_mean)
plot_spei2d(growing_season_start_mean, cmap= 'viridis')
plot_spei2d(growing_season_end_mean, cmap= 'viridis')
plot_spei2d(growing_season_lenth_mean, cmap= 'YlGn')

# 计算并统计四种干旱

In [None]:
normal_drought_times=xr.open_dataset(r'E:\PHD_Project\Results\Drought_types\normal_drought_times.nc').ndvi
extreme_drought_times=xr.open_dataset(r'E:\PHD_Project\Results\Drought_types\extreme_drought_times.nc').ndvi
normal_hot_drought_times=xr.open_dataset(r'E:\PHD_Project\Results\Drought_types\normal_hot_drought_times.nc').ndvi
extreme_hot_drought_times=xr.open_dataset(r'E:\PHD_Project\Results\Drought_types\extreme_hot_drought_times.nc').ndvi

In [None]:
normal_drought_times_sum = normal_drought_times.sum(dim='time')
extreme_drought_times_sum = extreme_drought_times.sum(dim='time')
normal_hot_drought_times_sum = normal_hot_drought_times.sum(dim='time')
extreme_hot_drought_times_sum = extreme_hot_drought_times.sum(dim='time')

In [None]:
statistics_2d(normal_drought_times_sum.where(normal_drought_times_sum>0))

In [None]:
statistics_2d(extreme_drought_times_sum.where(extreme_drought_times_sum>0))

In [None]:
statistics_2d(normal_hot_drought_times_sum.where(normal_hot_drought_times_sum>0))

In [None]:
statistics_2d(extreme_hot_drought_times_sum.where(extreme_hot_drought_times_sum>0))

In [None]:
plot_spei2d(normal_drought_times_sum.where(normal_drought_times_sum>0), cmap='hot', vmax=4)

In [None]:
plot_spei2d(extreme_drought_times_sum.where(extreme_drought_times_sum>0), cmap='hot', vmax=7)

In [None]:
plot_spei2d(normal_hot_drought_times_sum.where(normal_hot_drought_times_sum>0), cmap='hot', vmax=4)

In [None]:
plot_spei2d(extreme_hot_drought_times_sum.where(extreme_hot_drought_times_sum>0), cmap='hot', vmax =4)

In [None]:
spei=xr.open_dataset(r'E:\PHD_Project\Data\SPEI\Source\spei03.nc').spei
growing_season_mask=xr.open_dataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Growing_season_mask_monthly\Growing_season_mask_monthly_leftrightclear.nc4').ndvi
tmp=xr.open_dataset(r'E:\PHD_Project\Data\CRU\Source\cru_ts4.07.1901.2022.tmp.dat.nc').tmp
hot_mask=xr.open_dataset(r'E:\PHD_Project\Data\CRU\Hot_drought_tmp\hot_mask.nc').tmp
# 按月重采样到每月开始, 将时间对齐
spei = spei.resample(time='MS').mean()  # 'MS'表示每月的开始，mean()是一种聚合方法，你可以根据需要选择合适的聚合方法
growing_season_mask = growing_season_mask.resample(time='MS').mean()
# 截取ndvi同等时间段
tmp=tmp.resample(time='MS').mean().sel(time=growing_season_mask.time)
spei = spei.sel(time=growing_season_mask.time)
#筛选干旱并提取生长季干旱
normal_hot_drought_mask = growing_season_mask.where((spei < -0.5) & (spei > -1.5) & (hot_mask == 1))
extreme_hot_drought_mask = growing_season_mask.where((spei <= -1.5) & (hot_mask == 1))
normal_drought_mask = growing_season_mask.where((spei < -0.5) & (spei > -1.5) & (normal_hot_drought_mask != 1))
extreme_drought_mask = growing_season_mask.where((spei <= -1.5) & (extreme_hot_drought_mask != 1))

In [None]:
def caculate_drought(drought_types, out_fname_list):

    results_droped_drought=[]
    results_drought_times=[]

    for i in tqdm(range(len(drought_types))):


        droped_drought=drop_consecutive_drought(drought_types[i])
        
        drought_times=statistic_drought_times(droped_drought)

        results_droped_drought.append(droped_drought)
        results_drought_times.append(drought_times)

        droped_drought.to_netcdf(rf'E:\PHD_Project\Results\Drought_types\{out_fname_list[i]}_droped.nc') 
        drought_times.to_netcdf(rf'E:\PHD_Project\Results\Drought_types\{out_fname_list[i]}_times.nc') 
    
    return results_droped_drought, results_drought_times

In [None]:
drought_types=[normal_hot_drought_mask, extreme_hot_drought_mask, normal_drought_mask, extreme_drought_mask]
out_fname_list=['normal_hot_drought', 'extreme_hot_drought', 'normal_drought', 'extreme_drought']

In [None]:
droped_drought, drought_times=caculate_drought(drought_types, out_fname_list)

# 根据干旱强度提取、划分高温干旱

In [1]:
def drop_one_month_slight_drought(spei):

    condition = (spei<= -0.5) & (spei>= -0.55)

    # 遍历每个空间位置
    for lat in tqdm(spei.lat, desc='Processing Latitude'):
        for lon in spei.lon:
            # 提取该位置的时间序列
            pixel_spei = condition.sel(lat=lat, lon=lon)

            # 计算连续为 1 的区间的索引
            region_indices = np.where(pixel_spei == 1)[0]  # 获取为 1 的位置索引
            if len(region_indices) > 0:
                region_starts = [region_indices[0]]  # 连续区间的起始索引
                for i in range(1, len(region_indices)):
                    if region_indices[i] != region_indices[i-1] + 1:  # 如果不连续，则记录新的起始索引
                        region_starts.append(region_indices[i])

                
                for start_idx in region_starts:
                    end_idx = start_idx + 1
                    while end_idx < len(pixel_spei.time) and pixel_spei[end_idx] == 1:
                        end_idx += 1  # 找到连续区间的结束索引
                    if end_idx - start_idx <= 1:
                        region_spei = pixel_spei.isel(time=slice(int(start_idx), int(end_idx))) 
                    
                        spei.loc[{'lat': lat, 'lon': lon, 'time': region_spei.time[0]}] = np.nan
                    
    return spei

def caculate_new_drought(drought_types, out_fname_list):

    results_droped_drought=[]
    results_drought_times=[]

    for i in tqdm(range(len(drought_types))):

        if i == 0:
            droped_drought=drop_consecutive_all_drought(drought_types[i], drought_types[i+1])
            drought_times=statistic_drought_times(droped_drought)
            results_droped_drought.append(droped_drought)
            results_drought_times.append(drought_times)
        if i == 1:
            droped_drought=drop_consecutive_all_drought(drought_types[i], drought_types[i-1])
            drought_times=statistic_drought_times(droped_drought)
            results_droped_drought.append(droped_drought)
            results_drought_times.append(drought_times)
            
        droped_drought.to_netcdf(rf'E:\PHD_Project\New_results\Drought_types\{out_fname_list[i]}_droped.nc') 
        drought_times.to_netcdf(rf'E:\PHD_Project\New_results\Drought_types\{out_fname_list[i]}_times.nc') 
    
    return results_droped_drought, results_drought_times

def drought_intensity_duration(drought_mask, spei ):

   
    drought_intensity = xr.full_like(drought_mask, np.nan) 
    drought_duration = xr.full_like(drought_mask, np.nan) 

    # 遍历每个空间位置
    for lat in tqdm(drought_mask.lat, desc='Processing Latitude'):
        for lon in drought_mask.lon:
            # 提取该位置的时间序列
            pixel_drought_mask = drought_mask.sel(lat=lat, lon=lon)
            pixel_spei = spei.sel(lat=lat, lon=lon)

            # 计算连续为 1 的区间的索引
            region_indices = np.where(pixel_drought_mask == 1)[0]  # 获取为 1 的位置索引
            if len(region_indices) > 0:
                region_starts = [region_indices[0]]  # 连续区间的起始索引
                for i in range(1, len(region_indices)):
                    if region_indices[i] != region_indices[i-1] + 1:  # 如果不连续，则记录新的起始索引
                        region_starts.append(region_indices[i])

                
                for start_idx in region_starts:
                    end_idx = start_idx + 1
                    while end_idx < len(pixel_drought_mask.time) and pixel_drought_mask[end_idx] == 1:
                        end_idx += 1  # 找到连续区间的结束索引
                    region_spei = pixel_spei.isel(time=slice(int(start_idx), int(end_idx)))  
                    result_drought_intensity = -(region_spei.sum())
                    result_drought_duration = end_idx - start_idx + 1
                    drought_intensity.loc[{'lat': lat, 'lon': lon, 'time': region_spei.time[0]}] = result_drought_intensity
                    drought_duration.loc[{'lat': lat, 'lon': lon, 'time': region_spei.time[0]}] = result_drought_duration

    return drought_intensity, drought_duration

def caculate_intensity_durantion(drought_types, out_fname_list ,spei):

    results_intensity=[]
    results_duration=[]

    for i in tqdm(range(len(drought_types))):


        intensity, duration= drought_intensity_duration(drought_types[i], spei)

        results_intensity.append(intensity)
        results_duration.append(duration)

        intensity.to_netcdf(rf'E:\PHD_Project\New_results\Drought_intensity_duration\{out_fname_list[i]}_intensity.nc') 
        duration.to_netcdf(rf'E:\PHD_Project\New_results\Drought_intensity_duration\{out_fname_list[i]}_duration.nc') 
    
    return results_intensity, results_duration

In [None]:
droped_spei= drop_one_month_slight_drought(spei)
droped_spei.to_netcdf(rf'E:\PHD_Project\Data\SPEI\Result\droped_spei.nc')

In [None]:
spei=xr.open_dataset(r'E:\PHD_Project\Data\SPEI\Source\spei03.nc').spei
droped_spei=xr.open_dataset(r'E:\PHD_Project\Data\SPEI\Result\droped_spei.nc').spei
growing_season_mask=xr.open_dataset(r'E:\PHD_Project\Data\GIMMS3g_NDVI\Growing_season_mask_monthly\Growing_season_mask_monthly_leftrightclear.nc4').ndvi
tmp=xr.open_dataset(r'E:\PHD_Project\Data\CRU\Source\cru_ts4.07.1901.2022.tmp.dat.nc').tmp
hot_mask=xr.open_dataset(r'E:\PHD_Project\Data\CRU\Hot_drought_tmp\hot_mask.nc').tmp
# 按月重采样到每月开始, 将时间对齐
spei = spei.resample(time='MS').mean()  # 'MS'表示每月的开始，mean()是一种聚合方法，你可以根据需要选择合适的聚合方法
growing_season_mask = growing_season_mask.resample(time='MS').mean()
# 截取ndvi同等时间段
tmp=tmp.resample(time='MS').mean().sel(time=growing_season_mask.time)
spei = spei.sel(time=growing_season_mask.time)
#筛选干旱并提取生长季干旱
hot_drought_mask = growing_season_mask.where((droped_spei < -0.5) & (hot_mask == 1))
normal_drought_mask = growing_season_mask.where((droped_spei < -0.5) & (hot_drought_mask != 1))

In [None]:
drought_types=[normal_drought_mask, hot_drought_mask]
out_fname_list=['normal_drought', 'hot_drought']

In [None]:
droped_drought, drought_times=caculate_new_drought(drought_types, out_fname_list)

In [None]:
normal_drought_droped=xr.open_dataset(r'E:\PHD_Project\New_results\Drought_types\normal_drought_droped.nc').ndvi
hot_drought_droped=xr.open_dataset(r'E:\PHD_Project\New_results\Drought_types\hot_drought_droped.nc').ndvi

In [None]:
plot_spei2d((drought_times[0].sum(dim= 'time').where(drought_times[0].sum(dim= 'time')>0)), cmap= global_cmap_r)

In [None]:
plot_spei2d(drought_times[1].sum(dim= 'time'))

In [None]:
drought_types=[normal_drought_droped, hot_drought_droped]
out_fname_list=['normal_drought', 'hot_drought']

In [None]:
results_intensity, results_duration=caculate_intensity_durantion(drought_types, out_fname_list, spei)

In [None]:
hot_drought_intensity=xr.open_dataset(r'E:\PHD_Project\New_results\Drought_intensity_duration\hot_drought_intensity.nc').ndvi
hot_drought_duration=xr.open_dataset(r'E:\PHD_Project\New_results\Drought_intensity_duration\hot_drought_duration.nc').ndvi

In [None]:
hot_drought_intensity.mean(dim= 'time').plot()

In [None]:
hot_drought_duration.mean(dim= 'time').plot()

In [None]:
np.unique((hot_drought_intensity.sel(time=slice('1982-01-01', '2022-01-01')).sel(lon=-50, lat=-20 , method='nearest')))

# 提取相应像元的值

In [None]:
# hot_drought_duration=xr.open_dataset(r'G:\PHD_Project\2_types_results\Drought_intensity_duration\hot_drought_duration.nc').ndvi
# hot_drought_intensity=xr.open_dataset(r'G:\PHD_Project\2_types_results\Drought_intensity_duration\hot_drought_intensity.nc').ndvi
normal_drought_duration=xr.open_dataset(r'G:\PHD_Project\2_types_results\Drought_intensity_duration\normal_drought_duration.nc').ndvi
normal_drought_intensity=xr.open_dataset(r'G:\PHD_Project\2_types_results\Drought_intensity_duration\normal_drought_intensity.nc').ndvi
standard_ds = xr.open_dataset(r'G:\PHD_Project\2_types_results\Resistance_resilience_0.08\hot_drought_resilience.nc').ndvi
# 获取基准数据的坐标
target_coords = standard_ds.drop_vars('month').coords
# 重采样到0.08
# hot_drought_duration = hot_drought_duration.interp(coords=target_coords, method='nearest')
# hot_drought_intensity = hot_drought_intensity.interp(coords=target_coords, method='nearest')
normal_drought_duration = normal_drought_duration.interp(coords=target_coords, method='nearest')
normal_drought_intensity = normal_drought_intensity.interp(coords=target_coords, method='nearest')
# hot_drought_duration = hot_drought_duration.astype('float32')
# hot_drought_intensity = hot_drought_intensity.astype('float32')
normal_drought_duration = normal_drought_duration.astype('float32')
normal_drought_intensity = normal_drought_intensity.astype('float32')
# hot_drought_duration.to_netcdf(r'G:\PHD_Project\2_types_results\Drought_intensity_duration_0.08\hot_drought_duration_0.08.nc')
# hot_drought_intensity.to_netcdf(r'G:\PHD_Project\2_types_results\Drought_intensity_duration_0.08\hot_drought_intensity_0.08.nc')
normal_drought_duration.to_netcdf(r'G:\PHD_Project\2_types_results\Drought_intensity_duration_0.08\normal_drought_duration_0.08.nc')
normal_drought_intensity.to_netcdf(r'G:\PHD_Project\2_types_results\Drought_intensity_duration_0.08\normal_drought_intensity_0.08.nc')

## 重采样为0.08度

In [2]:
def resample_008(resampled_dataset, outdir):
    target_ds= xr.open_dataset(r'G:\PHD_Project\2_types_results\Resistance_resilience_0.08\normal_drought_resilience.nc').ndvi
    target_lon = np.append(target_ds.lon.values,target_ds.lon.values[-1]+0.08)-0.04
    target_lat = np.append(target_ds.lat.values, target_ds.lat.values[-1]-0.08)+0.04
    # 对纬度进行分组和聚合
    aa = resampled_dataset.groupby_bins("x", bins=target_lon, labels=target_ds.lon.values).mean(skipna = True)
    bb= aa.groupby_bins("y", bins=target_lat[::-1], labels=target_ds.lat.values[::-1]).mean(skipna = True)
    bb.rio.set_spatial_dims(x_dim='x_bins', y_dim='y_bins', inplace=True)
    bb.rio.write_nodata(np.nan, inplace=True)
    bb.rio.to_raster(outdir)

## 重采样WorldClim数据

In [3]:
file_names= os.listdir(r'G:\PHD_Project\Data\WorldClim_v2.1\wc2.1_30s_bio')
for filename in tqdm(file_names):
    ds = rxr.open_rasterio(fr'G:\PHD_Project\Data\WorldClim_v2.1\wc2.1_30s_bio\{filename}', masked=True)
    outdir = fr'G:\PHD_Project\Data\WorldClim_v2.1\wc2.1_30s_bio_0.08\{filename}'
    resample_008(ds, outdir)

## 重采样soilgrid数据

In [5]:
file_dirs = os.listdir(r'G:\PHD_Project\Data\SoilGrid_v2.0')
for file_dir in tqdm(file_dirs):
    file_path = os.path.join(r'G:\PHD_Project\Data\SoilGrid_v2.0', file_dir)
    file_names = os.listdir(file_path)
    if len(file_names)>2:
        ds1=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\{file_dir}\{file_dir}_0-5cm_mean_1000.tif', masked=True)
        ds1=ds1.rio.reproject("EPSG:4326")
        ds2=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\{file_dir}\{file_dir}_5-15cm_mean_1000.tif', masked=True)
        ds2=ds2.rio.reproject("EPSG:4326")
        ds3=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\{file_dir}\{file_dir}_15-30cm_mean_1000.tif', masked=True)
        ds3=ds3.rio.reproject("EPSG:4326")
        ds4=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\{file_dir}\{file_dir}_30-60cm_mean_1000.tif', masked=True)
        ds4=ds4.rio.reproject("EPSG:4326")
        ds5=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\{file_dir}\{file_dir}_60-100cm_mean_1000.tif', masked=True)
        ds5=ds5.rio.reproject("EPSG:4326")
        ds6=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\{file_dir}\{file_dir}_100-200cm_mean_1000.tif', masked=True)
        ds6=ds6.rio.reproject("EPSG:4326")
        bulk_densitie1=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\bdod\bdod_0-5cm_mean_1000.tif', masked=True)
        bulk_densitie1=bulk_densitie1.rio.reproject("EPSG:4326")
        bulk_densitie2=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\bdod\bdod_5-15cm_mean_1000.tif', masked=True)
        bulk_densitie2=bulk_densitie2.rio.reproject("EPSG:4326")
        bulk_densitie3=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\bdod\bdod_15-30cm_mean_1000.tif', masked=True)
        bulk_densitie3=bulk_densitie3.rio.reproject("EPSG:4326")
        bulk_densitie4=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\bdod\bdod_30-60cm_mean_1000.tif', masked=True)
        bulk_densitie4=bulk_densitie4.rio.reproject("EPSG:4326")
        bulk_densitie5=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\bdod\bdod_60-100cm_mean_1000.tif', masked=True)
        bulk_densitie5=bulk_densitie5.rio.reproject("EPSG:4326")
        bulk_densitie6=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\bdod\bdod_100-200cm_mean_1000.tif', masked=True)
        bulk_densitie6=bulk_densitie6.rio.reproject("EPSG:4326")
        weight1 = bulk_densitie1 * 5
        weight2 = bulk_densitie2 * 10
        weight3 = bulk_densitie3 * 15
        weight4 = bulk_densitie4 * 30
        weight5 = bulk_densitie5 * 40
        weight6 = bulk_densitie6 * 100
        layer_30 = (ds1*weight1 + ds2*weight2 + ds3*weight3)/(weight1 + weight2 + weight3)
        layer_100 = (ds4*weight4 + ds5*weight5)/(weight4 + weight5)
        layer_200 = ds6
        outdir1 = fr'G:\PHD_Project\Data\SoilGrid_merge_weight_0.08\{file_dir}\{file_dir}_0-30cm_mean.tif'
        outdir2 = fr'G:\PHD_Project\Data\SoilGrid_merge_weight_0.08\{file_dir}\{file_dir}_30-100cm_mean.tif'
        outdir3 = fr'G:\PHD_Project\Data\SoilGrid_merge_weight_0.08\{file_dir}\{file_dir}_100-200cm_mean.tif'
        ouput_path = fr'G:\PHD_Project\Data\SoilGrid_merge_weight_0.08\{file_dir}'
        mk_dir(ouput_path)
        resample_008(layer_30, outdir1)
        resample_008(layer_100, outdir2)
        resample_008(layer_200, outdir3)
    else:
        ds = rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid_v2.0\{file_dir}\{file_dir}_0-30cm_mean_1000.tif', masked=True)
        ds=ds.rio.reproject("EPSG:4326")
        ouput_path = fr'G:\PHD_Project\Data\SoilGrid_merge_weight_0.08\{file_dir}'
        mk_dir(ouput_path)
        outdir = fr'G:\PHD_Project\Data\SoilGrid_merge_weight_0.08\{file_dir}\{file_dir}_0-30cm_mean.tif'
        resample_008(ds, outdir)
        

In [None]:
file_dirs=['nitrogen']
for file_dir in tqdm(file_dirs):
    file_path = os.path.join(r'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0', file_dir)
    file_names = os.listdir(file_path)
    if len(file_names)>2:
        ds1=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0\{file_dir}\{file_dir}_0-5cm_mean_1000.tif', masked=True)
        ds1=ds1.rio.reproject("EPSG:4326")
        ds2=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0\{file_dir}\{file_dir}_5-15cm_mean_1000.tif', masked=True)
        ds2=ds2.rio.reproject("EPSG:4326")
        ds3=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0\{file_dir}\{file_dir}_15-30cm_mean_1000.tif', masked=True)
        ds3=ds3.rio.reproject("EPSG:4326")
        ds4=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0\{file_dir}\{file_dir}_30-60cm_mean_1000.tif', masked=True)
        ds4=ds4.rio.reproject("EPSG:4326")
        ds5=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0\{file_dir}\{file_dir}_60-100cm_mean_1000.tif', masked=True)
        ds5=ds5.rio.reproject("EPSG:4326")
        ds6=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0\{file_dir}\{file_dir}_100-200cm_mean_1000.tif', masked=True)
        ds6=ds6.rio.reproject("EPSG:4326")
        bulk_densitie1=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0\bdod\bdod_0-5cm_mean_1000.tif', masked=True)
        bulk_densitie1=bulk_densitie1.rio.reproject("EPSG:4326")
        bulk_densitie2=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0\bdod\bdod_5-15cm_mean_1000.tif', masked=True)
        bulk_densitie2=bulk_densitie2.rio.reproject("EPSG:4326")
        bulk_densitie3=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0\bdod\bdod_15-30cm_mean_1000.tif', masked=True)
        bulk_densitie3=bulk_densitie3.rio.reproject("EPSG:4326")
        bulk_densitie4=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0\bdod\bdod_30-60cm_mean_1000.tif', masked=True)
        bulk_densitie4=bulk_densitie4.rio.reproject("EPSG:4326")
        bulk_densitie5=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0\bdod\bdod_60-100cm_mean_1000.tif', masked=True)
        bulk_densitie5=bulk_densitie5.rio.reproject("EPSG:4326")
        bulk_densitie6=rxr.open_rasterio(fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_v2.0\bdod\bdod_100-200cm_mean_1000.tif', masked=True)
        bulk_densitie6=bulk_densitie6.rio.reproject("EPSG:4326")
        weight1 = bulk_densitie1 * 5
        weight2 = bulk_densitie2 * 10
        weight3 = bulk_densitie3 * 15
        weight4 = bulk_densitie4 * 30
        weight5 = bulk_densitie5 * 40
        weight6 = bulk_densitie6 * 100
        # 假设 ds1 和 ds2 是 xarray 数据集，且 ds2 的坐标需要调整
        weight1 = weight1.interp(y=ds1.y, x=ds1.x, method='nearest')
        weight2 = weight2.interp(y=ds1.y, x=ds1.x, method='nearest')
        weight3 = weight3.interp(y=ds1.y, x=ds1.x, method='nearest')
        weight4 = weight4.interp(y=ds1.y, x=ds1.x, method='nearest')
        weight5 = weight5.interp(y=ds1.y, x=ds1.x, method='nearest')
        weight6 = weight6.interp(y=ds1.y, x=ds1.x, method='nearest')
        layer_30 = (ds1*weight1 + ds2*weight2 + ds3*weight3)/(weight1 + weight2 + weight3)
        layer_100 = (ds4*weight4 + ds5*weight5)/(weight4 + weight5)
        layer_200 = ds6
        outdir1 = fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_merge_weight_0.08\{file_dir}\{file_dir}_0-30cm_mean.tif'
        outdir2 = fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_merge_weight_0.08\{file_dir}\{file_dir}_30-100cm_mean.tif'
        outdir3 = fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_merge_weight_0.08\{file_dir}\{file_dir}_100-200cm_mean.tif'
        ouput_path = fr'G:\PHD_Project\Data\SoilGrid\SoilGrid_merge_weight_0.08\{file_dir}'
        mk_dir(ouput_path)
        resample_008(layer_30, outdir1)
        resample_008(layer_100, outdir2)
        resample_008(layer_200, outdir3)

## 重采样ForestAge数据

In [3]:
ds= xr.open_dataset(r'G:\PHD_Project\Data\BGIForestAge\202498163741222_BGIForestAgeMPIBGC1.0.0.nc').ForestAge_TC030
resample_008(ds, 'G:/PHD_Project/Data/BGIForestAge/BGIForestAge_0.08.nc')
xr.open_dataset(r'G:/PHD_Project/Data/BGIForestAge/BGIForestAge_0.08.nc').ForestAge_TC030.plot()
xr.open_dataset(r'G:\PHD_Project\Data\BGIForestAge\202498163741222_BGIForestAgeMPIBGC1.0.0.nc').ForestAge_TC030.plot()

## 将提取0.08的species数据提取为csv

In [57]:
lon_grid, lat_grid = np.meshgrid(bb.x_bins.values, bb.y_bins.values)
df = pd.DataFrame({
    'Lon': lon_grid.ravel(),
    'Lat': lat_grid.ravel(),
    'S': bb.sel(band=1).values.ravel()
})
df = df.dropna(subset=['S'])
df.to_csv(r'F:\BaiduSyncdisk\Data\Global Map of Local Tree Species Richness per hectare (geoTiff, ~3km resolution)\global_0.08.csv', index= False)

## 提取每个像元各种指标

## KDTree 提取法

In [None]:
def extract_data(row, drought_type, drought_intensity, drought_duration,variable_name):
    # 找到最近点的索引
    dist, idx = tree.query([row['x'], row['y']])
    # 根据索引解析lon和lat坐标
    lon_idx, lat_idx = np.unravel_index(idx, lon_grid.shape)
    
    # 提取相应格点在不同时间下的变量值
    extracted_values = drought_type['ndvi'].isel(lat=lat_idx, lon=lon_idx).values
    drought_intensity_values = drought_intensity['ndvi'].isel(lat=lat_idx, lon=lon_idx).values
    drought_duration_values = drought_duration['ndvi'].isel(lat=lat_idx, lon=lon_idx).values
    # 创建结果列表，过滤掉NaN值
    results = []
    for t_idx, value in enumerate(extracted_values):
        if not np.isnan(value):
            result_row = row.copy()
            result_row['drought_time'] = drought_type.time.values[t_idx]
            result_row[variable_name] = value
            result_row['drought_intensity'] = drought_intensity_values[t_idx]
            result_row['drought_duration'] = drought_duration_values[t_idx]
            results.append(result_row)
    
    return results

In [None]:
from scipy.spatial import KDTree
species_data = pd.read_csv(r'F:\BaiduSyncdisk\Data\Global Map of Local Tree Species Richness per hectare (geoTiff, ~3km resolution)\global_0.08.csv')
# drought_types=['hot_drought_resistance','hot_drought_resilience','normal_drought_resistance', 'normal_drought_resilience']
drought_types=['normal_drought_resistance', 'normal_drought_resilience']
for drought_types in drought_types:
    drought_type = xr.open_dataset(rf'G:\PHD_Project\2_types_results\Resistance_resilience_0.08\{drought_types}.nc')
    if drought_types == 'hot_drought_resistance' or drought_types == 'hot_drought_resilience':
        drought_duration = xr.open_dataset(rf'G:\PHD_Project\2_types_results\Drought_intensity_duration_0.08\hot_drought_duration_0.08.nc')
        drought_intensity = xr.open_dataset(rf'G:\PHD_Project\2_types_results\Drought_intensity_duration_0.08\hot_drought_intensity_0.08.nc')
    else:
        drought_duration = xr.open_dataset(rf'G:\PHD_Project\2_types_results\Drought_intensity_duration_0.08\normal_drought_duration_0.08.nc')
        drought_intensity = xr.open_dataset(rf'G:\PHD_Project\2_types_results\Drought_intensity_duration_0.08\normal_drought_intensity_0.08.nc')
        
    lons = drought_type['lon'].values
    lats = drought_type['lat'].values
    # 创建KDTree以便快速匹配经纬度
    lon_grid, lat_grid = np.meshgrid(lons, lats, indexing='ij')
    coordinates = np.array(list(zip(lon_grid.ravel(), lat_grid.ravel())))
    tree = KDTree(coordinates)
    # 应用提取函数
    results = []
    for j, row in tqdm(species_data.iterrows(), total=len(species_data)):
        results.extend(extract_data(row, drought_type, drought_intensity, drought_duration,drought_types))
    # 将结果转化为DataFrame
    final_df = pd.DataFrame(results)
    final_df.to_csv(rf'G:\PHD_Project\Plot_Drought\Global_0.025\plot_{drought_types}.csv', index=False)

### 提取气候土壤林岭为csv

In [2]:
from natsort import natsorted
def extract_data(row, bio_climate_name):
    # 提取相应格点在不同时间下的变量值
    # 找到最近点的索引
    dist, idx = tree.query([row['Lat'], row['Lon']])
    # 根据索引解析lon和lat坐标
    lat_idx, lon_idx = np.unravel_index(idx, lon_grid.shape)
    results = []
    result_row = row.copy()
    # 提取林岭
    forest_age_data = xr.open_dataset(r'G:\PHD_Project\Data\BGIForestAge\BGIForestAge_0.08.nc').ForestAge_TC030
    forest_age_values = forest_age_data.isel(latitude_bins= lat_idx, longitude_bins=lon_idx).values
    result_row['forest_age'] = forest_age_values

    # 提取生物气候
    world_clim_dir = r"G:\PHD_Project\Data\WorldClim_v2.1\wc2.1_30s_bio_extract_0.08"
    world_clim_files = natsorted(os.listdir(world_clim_dir))
    for i ,world_clim_file in enumerate(world_clim_files):
        world_clim_file_name = os.path.join(world_clim_dir, world_clim_file)
        world_clim_data = rxr.open_rasterio(world_clim_file_name, masked=True)
        world_clim_values = world_clim_data.isel(band=0, y=lat_idx, x=lon_idx).values
        result_row[bio_climate_name[i]] = world_clim_values
    
    # 提取土壤
    soil_grid_path = r"G:\PHD_Project\Data\SoilGrid\SoilGrid_merge_weight_extract_0.08"
    soil_grid_dirs = os.listdir(soil_grid_path)
    for soil_grid_dir in soil_grid_dirs:
        soil_grid_file_path = os.path.join(soil_grid_path, soil_grid_dir)
        soil_grid_file_names = os.listdir(soil_grid_file_path)
        for soil_grid_file_name in soil_grid_file_names:
            soil_grid_data = rxr.open_rasterio(os.path.join(soil_grid_file_path, soil_grid_file_name), masked=True)
            soil_grid_values = soil_grid_data.isel(band=0, y=lat_idx, x=lon_idx).values
            vars_name = soil_grid_file_name.split('.tif')[0]
            result_row[vars_name] = soil_grid_values
            
    results.append(result_row)
    
    return results

In [None]:
from scipy.spatial import KDTree
model_ds= xr.open_dataset(r'G:\PHD_Project\Data\BGIForestAge\BGIForestAge_0.08.nc').ForestAge_TC030
lats = model_ds['latitude_bins'].values
lons = model_ds['longitude_bins'].values
lat_grid, lon_grid= np.meshgrid(lats, lons, indexing='ij')
coordinates = np.array(list(zip(lat_grid.ravel(), lon_grid.ravel())))
tree = KDTree(coordinates)
bio_climate = ['Annual Mean Temperature', 'Isothermality', 'Temperature Seasonality', 'Annual Precipitation', 'Precipitation Seasonality',
               'Precipitation of Warmest Quarter', 'Precipitation of Coldest Quarter'
]
source_dir = r"G:\PHD_Project\Plot_Drought\Global_0.08"
source_files = os.listdir(source_dir)
for source_file in tqdm(source_files):
    file_name = os.path.join(source_dir, source_file)
    species_data = pd.read_csv(file_name)
    results = []
    for j, row in tqdm(species_data.iterrows(), total=len(species_data)):
        results.extend(extract_data(row, bio_climate))
    # 将结果转化为DataFrame
    final_df = pd.DataFrame(results)
    final_df.to_csv(rf'G:\PHD_Project\Plot_Drought\Global_0.08_KDTree\{source_file}', index=False)

## 直接经纬度查找法提取

### 提取抵抗力、恢复力、干旱强度等为csv文件

In [None]:
def extract_data(row, drought_type, drought_intensity, drought_duration,variable_name):
    # 提取相应格点在不同时间下的变量值
    extracted_values = drought_type.sel(lat=row['Lat'], lon=row['Lon'], method='nearest').values
    drought_intensity_values = drought_intensity['ndvi'].sel(lat=row['Lat'], lon=row['Lon'], method='nearest').values
    drought_duration_values = drought_duration['ndvi'].sel(lat=row['Lat'], lon=row['Lon'], method='nearest').values
    # 创建结果列表，过滤掉NaN值
    results = []
    for t_idx, value in enumerate(extracted_values):
        if not np.isnan(value):
            result_row = row.copy()
            result_row['drought_time'] = drought_type.time.values[t_idx]
            result_row[variable_name] = value
            result_row['drought_intensity'] = drought_intensity_values[t_idx]
            result_row['drought_duration'] = drought_duration_values[t_idx]
            results.append(result_row)
    
    return results

In [None]:
species_data = pd.read_csv(r'F:\BaiduSyncdisk\Data\Global Map of Local Tree Species Richness per hectare (geoTiff, ~3km resolution)\global_0.08.csv')
drought_types=['hot_drought_resistance','hot_drought_resilience','normal_drought_resistance', 'normal_drought_resilience']
for drought_types in drought_types:
    drought_type = xr.open_dataset(rf'G:\PHD_Project\2_types_results\Resistance_resilience_0.08\{drought_types}.nc')
    if drought_types == 'hot_drought_resistance' or drought_types == 'hot_drought_resilience':
        drought_duration = xr.open_dataset(rf'G:\PHD_Project\2_types_results\Drought_intensity_duration_0.08\hot_drought_duration_0.08.nc')
        drought_intensity = xr.open_dataset(rf'G:\PHD_Project\2_types_results\Drought_intensity_duration_0.08\hot_drought_intensity_0.08.nc')
    else:
        drought_duration = xr.open_dataset(rf'G:\PHD_Project\2_types_results\Drought_intensity_duration_0.08\normal_drought_duration_0.08.nc')
        drought_intensity = xr.open_dataset(rf'G:\PHD_Project\2_types_results\Drought_intensity_duration_0.08\normal_drought_intensity_0.08.nc')
    # 应用提取函数
    results = []
    for j, row in tqdm(species_data.iterrows(), total=len(species_data)):
        results.extend(extract_data(row, drought_type, drought_intensity, drought_duration, drought_types))
    # 将结果转化为DataFrame
    final_df = pd.DataFrame(results)
    final_df.to_csv(rf'G:\PHD_Project\Plot_Drought\Global_0.08\plot_{drought_types}.csv', index=False)

### 提取气候土壤林岭数据为csv 

In [2]:
from natsort import natsorted
def extract_data(row, bio_climate_name):
    # 提取相应格点在不同时间下的变量值
    results = []
    result_row = row.copy()
    # 提取林岭
    forest_age_data = xr.open_dataset(r'G:\PHD_Project\Data\BGIForestAge\BGIForestAge_0.08.nc').ForestAge_TC030
    forest_age_values = forest_age_data.sel(latitude_bins=row['Lat'], longitude_bins=row['Lon'], method='nearest').values
    result_row['forest_age'] = forest_age_values

    # 提取生物气候
    world_clim_dir = r"G:\PHD_Project\Data\WorldClim_v2.1\wc2.1_30s_bio_0.08"
    world_clim_files = natsorted(os.listdir(world_clim_dir))
    for i ,world_clim_file in enumerate(world_clim_files):
        world_clim_file_name = os.path.join(world_clim_dir, world_clim_file)
        world_clim_data = rxr.open_rasterio(world_clim_file_name, masked=True)
        world_clim_values = world_clim_data.sel(y=row['Lat'], x=row['Lon'], method='nearest').values
        result_row[bio_climate_name[i]] = world_clim_values
    
    #提取土壤
    soil_grid_path = r"G:\PHD_Project\Data\SoilGrid\SoilGrid_merge_weight_0.08"
    soil_grid_dirs = os.listdir(soil_grid_path)
    for soil_grid_dir in soil_grid_dirs:
        soil_grid_file_path = os.path.join(soil_grid_path, soil_grid_dir)
        soil_grid_file_names = os.listdir(soil_grid_file_path)
        for soil_grid_file_name in soil_grid_file_names:
            soil_grid_data = rxr.open_rasterio(os.path.join(soil_grid_file_path, soil_grid_file_name), masked=True)
            soil_grid_values = soil_grid_data.sel(y=row['Lat'], x=row['Lon'], method='nearest').values
            vars_name = soil_grid_file_name.split('.tif')[0]
            result_row[vars_name] = soil_grid_values
    # 创建结果列表，过滤掉NaN值
    results.append(result_row)
    
    return results

In [2]:
from natsort import natsorted
def extract_data(row, bio_climate_name):
    # 提取相应格点在不同时间下的变量值
    results = []
    result_row = row.copy()
    # 提取林岭
    forest_age_data = xr.open_dataset(r'G:\PHD_Project\Data\BGIForestAge\BGIForestAge_0.08.nc').ForestAge_TC030
    forest_age_values = forest_age_data.sel(latitude_bins=row['Lat'], longitude_bins=row['Lon'], method='nearest').values
    result_row['forest_age'] = forest_age_values

    # 提取生物气候
    world_clim_dir = r"G:\PHD_Project\Data\WorldClim_v2.1\wc2.1_30s_bio_extract_0.08"
    world_clim_files = natsorted(os.listdir(world_clim_dir))
    for i ,world_clim_file in enumerate(world_clim_files):
        world_clim_file_name = os.path.join(world_clim_dir, world_clim_file)
        world_clim_data = rxr.open_rasterio(world_clim_file_name, masked=True)
        world_clim_values = world_clim_data.sel(band=1, y=row['Lat'], x=row['Lon'], method='nearest').values
        result_row[bio_climate_name[i]] = world_clim_values
    
    # 提取土壤
    soil_grid_path = r"G:\PHD_Project\Data\SoilGrid\SoilGrid_merge_weight_extract_0.08"
    soil_grid_dirs = os.listdir(soil_grid_path)
    for soil_grid_dir in soil_grid_dirs:
        soil_grid_file_path = os.path.join(soil_grid_path, soil_grid_dir)
        soil_grid_file_names = os.listdir(soil_grid_file_path)
        for soil_grid_file_name in soil_grid_file_names:
            soil_grid_data = rxr.open_rasterio(os.path.join(soil_grid_file_path, soil_grid_file_name), masked=True)
            soil_grid_values = soil_grid_data.sel(band=1, y=row['Lat'], x=row['Lon'], method='nearest').values
            vars_name = soil_grid_file_name.split('.tif')[0]
            result_row[vars_name] = soil_grid_values
            
    results.append(result_row)
    
    return results

### 高效率提取气候土壤林岭数据为csv 

In [2]:
from natsort import natsorted

def extract_data(row, bio_climate_name, forest_age_data, world_clim_data_list, soil_grid_data_dict):
    # 提取相应格点在不同时间下的变量值
    results = []
    result_row = row.copy()

    

    # 提取林龄数据
    forest_age_values = forest_age_data.sel(latitude_bins=row['Lat'], longitude_bins=row['Lon'], method='nearest').values
    result_row['forest_age'] = forest_age_values

    # 提取生物气候数据
    for i, world_clim_data in enumerate(world_clim_data_list):
        world_clim_values = world_clim_data.sel(band=1, y=row['Lat'], x=row['Lon'], method='nearest').values
        result_row[bio_climate_name[i]] = world_clim_values

    # 提取土壤数据
    for soil_grid_file_name, soil_grid_data in soil_grid_data_dict.items():
        soil_grid_values = soil_grid_data.sel(band=1, y=row['Lat'], x=row['Lon'], method='nearest').values
        vars_name = soil_grid_file_name.split('.tif')[0]
        result_row[vars_name] = soil_grid_values

    results.append(result_row)

    return results


In [3]:
# bio_climate_name = ['Annual Mean Temperature', 'Mean Diurnal Range', 'Isothermality', 'Temperature Seasonality', 'Max Temperature of Warmest Month',
#             'Min Temperature of Coldest Month', 'Temperature Annual Range', 'Mean Temperature of Wettest Quarter', 'Mean Temperature of Driest Quarter',
#             'Mean Temperature of Warmest Quarter', 'Mean Temperature of Coldest Quarter', 'Annual Precipitation', 'Precipitation of Wettest Month',
#             'Precipitation of Driest Month', 'Precipitation Seasonality ', 'Precipitation of Wettest Quarter', 'Precipitation of Driest Quarter',
#             'Precipitation of Warmest Quarter', 'Precipitation of Coldest Quarter'
#                ]

In [None]:
bio_climate_name = ['Annual Mean Temperature', 'Isothermality', 'Temperature Seasonality', 'Annual Precipitation', 'Precipitation Seasonality',
               'Precipitation of Warmest Quarter', 'Precipitation of Coldest Quarter'
]
source_dir = r"G:\PHD_Project\Plot_Drought\Global_0.08"
source_files = os.listdir(source_dir)
for source_file in tqdm(source_files):
    file_name = os.path.join(source_dir, source_file)
    species_data = pd.read_csv(file_name)
    results = []

    # 读取林龄数据
    forest_age_data = xr.open_dataset(r'G:\PHD_Project\Data\BGIForestAge\BGIForestAge_0.08.nc').ForestAge_TC030

    # 读取生物气候数据
    world_clim_dir = r"G:\PHD_Project\Data\WorldClim_v2.1\wc2.1_30s_bio_extract_0.08"
    world_clim_files = natsorted(os.listdir(world_clim_dir))
    world_clim_data_list = [rxr.open_rasterio(os.path.join(world_clim_dir, world_clim_file), masked=True) 
                            for world_clim_file in world_clim_files]

    # 读取土壤数据
    soil_grid_path = r"G:\PHD_Project\Data\SoilGrid\SoilGrid_merge_weight_extract_0.08"
    soil_grid_dirs = os.listdir(soil_grid_path)
    soil_grid_data_dict = {}  # 用于存储所有土壤数据
    for soil_grid_dir in soil_grid_dirs:
        soil_grid_file_path = os.path.join(soil_grid_path, soil_grid_dir)
        soil_grid_file_names = os.listdir(soil_grid_file_path)
        for soil_grid_file_name in soil_grid_file_names:
            soil_grid_data = rxr.open_rasterio(os.path.join(soil_grid_file_path, soil_grid_file_name), masked=True)
            soil_grid_data_dict[soil_grid_file_name] = soil_grid_data
            
    for j, row in tqdm(species_data.iterrows(), total=len(species_data)):
        results.extend(extract_data(row, bio_climate_name, forest_age_data, world_clim_data_list, soil_grid_data_dict))
    # 将结果转化为DataFrame
    final_df = pd.DataFrame(results)
    final_df.to_csv(rf'G:\PHD_Project\Plot_Drought\Global_0.08_quick\{source_file}', index=False)

In [2]:
from natsort import natsorted

def extract_data(row, soil_grid_files, soil_grid_data_list):
    # 提取相应格点在不同时间下的变量值
    results = []
    result_row = row.copy()

    # 提取土壤数据
    for i, soil_grid_data in enumerate(soil_grid_data_list):
        soil_grid_values = soil_grid_data.sel(band=1, y=row['Lat'], x=row['Lon'], method='nearest').values
        vars_name = soil_grid_files[i].split('.tif')[0]
        result_row[vars_name] = soil_grid_values

    results.append(result_row)

    return results


In [3]:
source_dir = r"G:\PHD_Project\Plot_Drought\Global_0.08"
source_files = os.listdir(source_dir)
for source_file in tqdm(source_files):
    file_name = os.path.join(source_dir, source_file)
    species_data = pd.read_csv(file_name)
    results = []

    # 读取土壤数据
    soil_grid_dir = r'G:\PHD_Project\Data\SoilGrid\SoilGrid_merge_weight_extract_0.08\nitrogen'
    soil_grid_files = os.listdir(soil_grid_dir)
    soil_grid_data_list = [rxr.open_rasterio(os.path.join(soil_grid_dir, soil_grid_file), masked=True) 
                            for soil_grid_file in soil_grid_files]
            
    for j, row in tqdm(species_data.iterrows(), total=len(species_data)):
        results.extend(extract_data(row, soil_grid_files, soil_grid_data_list))
    # 将结果转化为DataFrame
    final_df = pd.DataFrame(results)
    final_df.to_csv(rf'G:\PHD_Project\Plot_Drought\Global_0.08_quick\{source_file}', index=False)

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/1375750 [00:00<?, ?it/s]

  0%|          | 0/531406 [00:00<?, ?it/s]

  0%|          | 0/602779 [00:00<?, ?it/s]

  0%|          | 0/241616 [00:00<?, ?it/s]

### 删除某列

In [None]:
source_dir = r"G:\PHD_Project\Plot_Drought\Global_0.08_quick"
source_files = os.listdir(source_dir)
for source_file in tqdm(source_files):
    file_name = os.path.join(source_dir, source_file)
    species_data = pd.read_csv(file_name)
    # 删除指定的列
    species_data.drop(['nitrogen_0-30cm_mean', 'nitrogen_30-100cm_mean'], axis=1, inplace=True)
    # 保存修改后的DataFrame到原文件
    species_data.to_csv(file_name, index=False)  # index=False避免写入行索引
