In [4]:
import geopandas as gpd
import rasterio
import rasterio.mask
import numpy as np

# Path to your shapefile and raster
shapefile_path = r'E:\wenqu\segmentationn\segmentation_shapfile\site7_TP_segments.shp'
raster_path = r'E:\wenqu\segmentationn\masked_aviris_trait\site7_la_ymax.tif'

# 读取shapefile
polygons = gpd.read_file(shapefile_path)

# 确保 'FID' 在您的 GeoDataFrame 中，如果不是，替换为实际的标识符列名
assert 'FID' in polygons.columns, "FID column not found in shapefile"

# 初始化两个新列：一个用于存储像素数量，另一个用于存储平均值
polygons['la_count'] = 0
polygons['la_ymax'] = np.nan  # 使用np.nan初始化平均值列，代表默认值

# 打开栅格文件
with rasterio.open(raster_path) as src:
    for index, row in polygons.iterrows():
        # 对于每个多边形，使用栅格进行遮罩，以获取仅在多边形内的值
        out_image, out_transform = rasterio.mask.mask(src, [row['geometry']], crop=True, nodata=src.nodata)
        
        # 计算非nodata像素的数量
        pixel_count = (out_image != src.nodata).sum()
        polygons.at[index, 'la_count'] = pixel_count
        
        # 如果像素数量大于2000，则计算平均值
        if pixel_count > 1:
            # 将nodata值转换为NaN，以便计算平均值
            out_image = out_image.astype(float)
            out_image[out_image == src.nodata] = np.nan
            mean_val = np.nanmean(out_image)
            polygons.at[index, 'la_ymax'] = mean_val

# 过滤掉像素数量小于2000的多边形
# filtered_polygons = polygons[polygons['la_count'] > 2000]

# 打印过滤后的结果
print(polygons[['FID', 'la_count', 'la_ymax']])


       FID  la_count   la_ymax
0        0      1562  1.786029
1        1         0       NaN
2        2         2  1.341860
3        3         0       NaN
4        4         0       NaN
...    ...       ...       ...
3206  3206       420  1.996219
3207  3207      2111  2.040077
3208  3208      1309  2.059386
3209  3209      2160  1.920060
3210  3210      1586  2.810955

[3211 rows x 3 columns]


In [5]:
polygons

Unnamed: 0,FID,geometry,la_count,la_ymax
0,0,"POLYGON ((446207.272 7789160.971, 446461.743 7...",1562,1.786029
1,1,"POLYGON ((447552.335 7788475.627, 447602.321 7...",0,
2,2,"POLYGON ((445885.569 7789147.447, 445894.657 7...",2,1.341860
3,3,"POLYGON ((447143.363 7788684.008, 447247.878 7...",0,
4,4,"POLYGON ((447053.238 7788529.594, 447062.326 7...",0,
...,...,...,...,...
3206,3206,"POLYGON ((431986.121 7760014.683, 431995.210 7...",420,1.996219
3207,3207,"POLYGON ((432394.596 7760041.233, 432399.140 7...",2111,2.040077
3208,3208,"POLYGON ((430871.099 7760657.225, 430875.643 7...",1309,2.059386
3209,3209,"POLYGON ((431244.670 7760592.806, 431253.759 7...",2160,1.920060


In [2]:
# 新的TIFF栅格文件路径
new_raster_path = r'E:\wenqu\segmentationn\masked_aviris_trait\site7_sla_ymax.tif'

# 新的 TIFF 栅格文件路径
# new_raster_path = 'path_to_your_new_raster.tif'

# 初始化用于存储新计算值的列
filtered_polygons['sla_count'] = 0
filtered_polygons['sla_mean_value'] = np.nan

# 打开新的栅格文件
with rasterio.open(new_raster_path) as src:
    for index, row in filtered_polygons.iterrows():
        # 对于每个多边形，使用栅格进行遮罩，以获取仅在多边形内的值
        out_image, out_transform = rasterio.mask.mask(src, [row['geometry']], crop=True, nodata=src.nodata)
        
        # 计算新的非nodata像素数量
        new_pixel_count = (out_image != src.nodata).sum()
        filtered_polygons.at[index, 'sla_count'] = new_pixel_count
        
        # 如果新的像素数量大于0，则计算新的平均值
        if new_pixel_count > 0:
            out_image = out_image.astype(float)
            out_image[out_image == src.nodata] = np.nan
            new_mean_val = np.nanmean(out_image)
            filtered_polygons.at[index, 'sla_mean_value'] = new_mean_val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [3]:
# 新的TIFF栅格文件路径
new_raster_path = r'E:\wenqu\segmentationn\masked_aviris_trait\site7_d13c_ymax.tif'

# 新的 TIFF 栅格文件路径
# new_raster_path = 'path_to_your_new_raster.tif'

# 初始化用于存储新计算值的列
filtered_polygons['d13c_count'] = 0
filtered_polygons['d13c_mean_value'] = np.nan

# 打开新的栅格文件
with rasterio.open(new_raster_path) as src:
    for index, row in filtered_polygons.iterrows():
        # 对于每个多边形，使用栅格进行遮罩，以获取仅在多边形内的值
        out_image, out_transform = rasterio.mask.mask(src, [row['geometry']], crop=True, nodata=src.nodata)
        
        # 计算新的非nodata像素数量
        new_pixel_count = (out_image != src.nodata).sum()
        filtered_polygons.at[index, 'd13c_count'] = new_pixel_count
        
        # 如果新的像素数量大于0，则计算新的平均值
        if new_pixel_count > 0:
            out_image = out_image.astype(float)
            out_image[out_image == src.nodata] = np.nan
            new_mean_val = np.nanmean(out_image)
            filtered_polygons.at[index, 'd13c_mean_value'] = new_mean_val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [4]:
# 新的TIFF栅格文件路径
new_raster_path = r'E:\wenqu\segmentationn\masked_aviris_trait\site7_d15n_ymax.tif'

# 新的 TIFF 栅格文件路径
# new_raster_path = 'path_to_your_new_raster.tif'

# 初始化用于存储新计算值的列
filtered_polygons['d15n_count'] = 0
filtered_polygons['d15n_mean_value'] = np.nan

# 打开新的栅格文件
with rasterio.open(new_raster_path) as src:
    for index, row in filtered_polygons.iterrows():
        # 对于每个多边形，使用栅格进行遮罩，以获取仅在多边形内的值
        out_image, out_transform = rasterio.mask.mask(src, [row['geometry']], crop=True, nodata=src.nodata)
        
        # 计算新的非nodata像素数量
        new_pixel_count = (out_image != src.nodata).sum()
        filtered_polygons.at[index, 'd15n_count'] = new_pixel_count
        
        # 如果新的像素数量大于0，则计算新的平均值
        if new_pixel_count > 0:
            out_image = out_image.astype(float)
            out_image[out_image == src.nodata] = np.nan
            new_mean_val = np.nanmean(out_image)
            filtered_polygons.at[index, 'd15n_mean_value'] = new_mean_val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [5]:
# 新的TIFF栅格文件路径
new_raster_path = r'E:\wenqu\segmentationn\masked_aviris_trait\site7_ldmc_ymax.tif'

# 新的 TIFF 栅格文件路径
# new_raster_path = 'path_to_your_new_raster.tif'

# 初始化用于存储新计算值的列
filtered_polygons['ldmc_count'] = 0
filtered_polygons['ldmc_mean_value'] = np.nan

# 打开新的栅格文件
with rasterio.open(new_raster_path) as src:
    for index, row in filtered_polygons.iterrows():
        # 对于每个多边形，使用栅格进行遮罩，以获取仅在多边形内的值
        out_image, out_transform = rasterio.mask.mask(src, [row['geometry']], crop=True, nodata=src.nodata)
        
        # 计算新的非nodata像素数量
        new_pixel_count = (out_image != src.nodata).sum()
        filtered_polygons.at[index, 'ldmc_count'] = new_pixel_count
        
        # 如果新的像素数量大于0，则计算新的平均值
        if new_pixel_count > 0:
            out_image = out_image.astype(float)
            out_image[out_image == src.nodata] = np.nan
            new_mean_val = np.nanmean(out_image)
            filtered_polygons.at[index, 'ldmc_mean_value'] = new_mean_val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [6]:
# 新的TIFF栅格文件路径
new_raster_path = r'E:\wenqu\segmentationn\masked_aviris_trait\site7_pc_ymax.tif'

# 新的 TIFF 栅格文件路径
# new_raster_path = 'path_to_your_new_raster.tif'

# 初始化用于存储新计算值的列
filtered_polygons['pc_count'] = 0
filtered_polygons['pc_mean_value'] = np.nan

# 打开新的栅格文件
with rasterio.open(new_raster_path) as src:
    for index, row in filtered_polygons.iterrows():
        # 对于每个多边形，使用栅格进行遮罩，以获取仅在多边形内的值
        out_image, out_transform = rasterio.mask.mask(src, [row['geometry']], crop=True, nodata=src.nodata)
        
        # 计算新的非nodata像素数量
        new_pixel_count = (out_image != src.nodata).sum()
        filtered_polygons.at[index, 'pc_count'] = new_pixel_count
        
        # 如果新的像素数量大于0，则计算新的平均值
        if new_pixel_count > 0:
            out_image = out_image.astype(float)
            out_image[out_image == src.nodata] = np.nan
            new_mean_val = np.nanmean(out_image)
            filtered_polygons.at[index, 'pc_mean_value'] = new_mean_val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [7]:
# 新的TIFF栅格文件路径
new_raster_path = r'E:\wenqu\environmental_factors\snow_free_data\site7_snow.tif'

# 新的 TIFF 栅格文件路径
# new_raster_path = 'path_to_your_new_raster.tif'

# 初始化用于存储新计算值的列
filtered_polygons['snow_count'] = 0
filtered_polygons['snow_mean_value'] = np.nan

# 打开新的栅格文件
with rasterio.open(new_raster_path) as src:
    for index, row in filtered_polygons.iterrows():
        # 对于每个多边形，使用栅格进行遮罩，以获取仅在多边形内的值
        out_image, out_transform = rasterio.mask.mask(src, [row['geometry']], crop=True, nodata=src.nodata)
        
        # 计算新的非nodata像素数量
        new_pixel_count = (out_image != src.nodata).sum()
        filtered_polygons.at[index, 'snow_count'] = new_pixel_count
        
        # 如果新的像素数量大于0，则计算新的平均值
        if new_pixel_count > 0:
            out_image = out_image.astype(float)
            out_image[out_image == src.nodata] = np.nan
            new_mean_val = np.nanmean(out_image)
            filtered_polygons.at[index, 'snow_mean_value'] = new_mean_val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [8]:
# 新的TIFF栅格文件路径
new_raster_path = r'E:\wenqu\environmental_factors\RAD\Direct_Insolation_site7_proj.tif'

# 新的 TIFF 栅格文件路径
# new_raster_path = 'path_to_your_new_raster.tif'

# 初始化用于存储新计算值的列
filtered_polygons['RAD_count'] = 0
filtered_polygons['RAD_mean_value'] = np.nan

# 打开新的栅格文件
with rasterio.open(new_raster_path) as src:
    for index, row in filtered_polygons.iterrows():
        # 对于每个多边形，使用栅格进行遮罩，以获取仅在多边形内的值
        out_image, out_transform = rasterio.mask.mask(src, [row['geometry']], crop=True, nodata=src.nodata)
        
        # 计算新的非nodata像素数量
        new_pixel_count = (out_image != src.nodata).sum()
        filtered_polygons.at[index, 'RAD_count'] = new_pixel_count
        
        # 如果新的像素数量大于0，则计算新的平均值
        if new_pixel_count > 0:
            out_image = out_image.astype(float)
            out_image[out_image == src.nodata] = np.nan
            new_mean_val = np.nanmean(out_image)
            filtered_polygons.at[index, 'RAD_mean_value'] = new_mean_val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [9]:
# 新的TIFF栅格文件路径
new_raster_path = r'E:\wenqu\environmental_factors\SWI\site7_swi_proj.tif'

# 新的 TIFF 栅格文件路径
# new_raster_path = 'path_to_your_new_raster.tif'

# 初始化用于存储新计算值的列
filtered_polygons['swi_count'] = 0
filtered_polygons['swi_mean_value'] = np.nan

# 打开新的栅格文件
with rasterio.open(new_raster_path) as src:
    for index, row in filtered_polygons.iterrows():
        # 对于每个多边形，使用栅格进行遮罩，以获取仅在多边形内的值
        out_image, out_transform = rasterio.mask.mask(src, [row['geometry']], crop=True, nodata=src.nodata)
        
        # 计算新的非nodata像素数量
        new_pixel_count = (out_image != src.nodata).sum()
        filtered_polygons.at[index, 'swi_count'] = new_pixel_count
        
        # 如果新的像素数量大于0，则计算新的平均值
        if new_pixel_count > 0:
            out_image = out_image.astype(float)
            out_image[out_image == src.nodata] = np.nan
            new_mean_val = np.nanmean(out_image)
            filtered_polygons.at[index, 'swi_mean_value'] = new_mean_val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [10]:
# 新的TIFF栅格文件路径
new_raster_path = r'E:\wenqu\environmental_factors\TWI\site7_twi_proj.tif'

# 新的 TIFF 栅格文件路径
# new_raster_path = 'path_to_your_new_raster.tif'

# 初始化用于存储新计算值的列
filtered_polygons['twi_count'] = 0
filtered_polygons['twi_mean_value'] = np.nan

# 打开新的栅格文件
with rasterio.open(new_raster_path) as src:
    for index, row in filtered_polygons.iterrows():
        # 对于每个多边形，使用栅格进行遮罩，以获取仅在多边形内的值
        out_image, out_transform = rasterio.mask.mask(src, [row['geometry']], crop=True, nodata=src.nodata)
        
        # 计算新的非nodata像素数量
        new_pixel_count = (out_image != src.nodata).sum()
        filtered_polygons.at[index, 'twi_count'] = new_pixel_count
        
        # 如果新的像素数量大于0，则计算新的平均值
        if new_pixel_count > 0:
            out_image = out_image.astype(float)
            out_image[out_image == src.nodata] = np.nan
            new_mean_val = np.nanmean(out_image)
            filtered_polygons.at[index, 'twi_mean_value'] = new_mean_val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [11]:
# 新的TIFF栅格文件路径
new_raster_path = r'E:\wenqu\environmental_factors\dem_masked\projectionn\site7_proj.tif'

# 新的 TIFF 栅格文件路径
# new_raster_path = 'path_to_your_new_raster.tif'

# 初始化用于存储新计算值的列
filtered_polygons['dem_count'] = 0
filtered_polygons['dem_mean_value'] = np.nan

# 打开新的栅格文件
with rasterio.open(new_raster_path) as src:
    for index, row in filtered_polygons.iterrows():
        # 对于每个多边形，使用栅格进行遮罩，以获取仅在多边形内的值
        out_image, out_transform = rasterio.mask.mask(src, [row['geometry']], crop=True, nodata=src.nodata)
        
        # 计算新的非nodata像素数量
        new_pixel_count = (out_image != src.nodata).sum()
        filtered_polygons.at[index, 'dem_count'] = new_pixel_count
        
        # 如果新的像素数量大于0，则计算新的平均值
        if new_pixel_count > 0:
            out_image = out_image.astype(float)
            out_image[out_image == src.nodata] = np.nan
            new_mean_val = np.nanmean(out_image)
            filtered_polygons.at[index, 'dem_mean_value'] = new_mean_val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [12]:
filtered_polygons

Unnamed: 0,FID,geometry,la_count,la_ymax,sla_count,sla_mean_value,d13c_count,d13c_mean_value,d15n_count,d15n_mean_value,...,snow_count,snow_mean_value,RAD_count,RAD_mean_value,swi_count,swi_mean_value,twi_count,twi_mean_value,dem_count,dem_mean_value
13,13,"POLYGON ((446459.428 7789026.767, 446504.869 7...",3254,1.824276,3254,4.621369,3254,-29.232131,3254,-3.931438,...,54,144.074074,25032,0.959393,307,16.403366,25032,2.299500,25032,10.196408
31,31,"POLYGON ((445816.043 7789303.073, 445852.397 7...",3251,1.188939,3251,4.578506,3251,-28.973617,3251,-3.567295,...,51,143.058824,23903,0.965562,289,16.285467,23903,2.043305,23903,11.753713
34,34,"POLYGON ((446616.914 7788740.464, 446635.091 7...",2375,1.640811,2375,4.626823,2375,-29.126785,2375,-2.906615,...,44,143.909091,19711,0.957978,247,16.273954,19711,2.266876,19711,10.634101
44,44,"POLYGON ((445901.430 7789133.641, 445919.606 7...",2479,1.456353,2479,4.602234,2479,-29.421653,2479,-4.349399,...,34,143.441176,16265,0.961293,199,16.128141,16265,2.339992,16265,10.988452
65,65,"POLYGON ((445623.263 7788846.084, 445632.352 7...",2157,1.610726,2157,4.591054,2157,-29.205428,2157,-4.036001,...,32,145.718750,14213,0.973973,169,16.209073,14213,3.102873,14213,11.227879
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3197,3197,"POLYGON ((431052.627 7760833.754, 431057.171 7...",4064,2.036691,4064,4.592824,4064,-29.161771,4064,-5.242683,...,60,152.933333,26422,0.967312,325,14.599487,26422,2.074737,26422,58.778932
3203,3203,"POLYGON ((432483.769 7760070.207, 432492.857 7...",3610,1.941558,3610,4.613905,3610,-29.393465,3610,-4.710607,...,61,146.721311,36373,0.962227,441,14.499622,36373,1.784836,36373,58.596079
3204,3204,"POLYGON ((433216.997 7759610.751, 433248.806 7...",4459,2.347136,4459,4.623554,4459,-29.607767,4459,-5.776081,...,66,150.515152,33734,0.960658,409,14.823553,33734,1.666912,33734,59.598671
3207,3207,"POLYGON ((432394.596 7760041.233, 432399.140 7...",2111,2.040077,2111,4.603599,2111,-29.472922,2111,-5.023373,...,30,150.233333,18877,0.965371,235,14.317730,18877,1.623679,18877,58.704780


In [13]:
csv_output_path = 'E:\wenqu\zonal_statistics\site7.csv'
filtered_polygons.drop(columns=['geometry']).to_csv(csv_output_path, index=False)