In [1]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import os
import numpy as np
import pandas as pd
from shapely.geometry import box

# Load your shapefile
shapefile_path = r'D:\wenqu\chapter1_2\figures\ecoregion\noak_landscapepermafrost_12122014\landscape_6n.shp'
shapefile = gpd.read_file(shapefile_path)

# Function to get the bounding box of the .tif file as a shapely Polygon
def get_tif_bbox(tif_file):
    with rasterio.open(tif_file) as src:
        bbox = src.bounds  # Get bounding box (left, bottom, right, top)
        return box(bbox.left, bbox.bottom, bbox.right, bbox.top)

# List of your .tif files and output directory
tif_files = [
    r'D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\remove_outliers\pn\site4a_pn_std_masked.tif',
    r'D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\remove_outliers\pn\site4b_pn_std_masked.tif',
    r'D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\remove_outliers\pn\site6ab_pn_std_masked.tif',
    r'D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\remove_outliers\pn\site6c_pn_std_masked.tif',
    r'D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\remove_outliers\pn\site7_pn_std_masked.tif'
]
             
output_directory = r'D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\split\pn\std'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Initialize a list to store the summary data for CSV output
summary_data = []

# Dictionary to store the summary of non-NaN pixel counts per ecoregion
ecoregion_total_pixel_count = {}

# Loop through each .tif file
for tif_file in tif_files:
    # Get the bounding box of the current .tif file
    tif_bbox = get_tif_bbox(tif_file)
    
    # Filter the shapefile to only include polygons that intersect with the .tif bounding box
    shapefile_filtered = shapefile[shapefile.intersects(tif_bbox)]
    
    # Check if there are any polygons left after filtering
    if shapefile_filtered.empty:
        print(f"No overlapping geometries for {tif_file}. Skipping...")
        continue  # Skip this file if no geometries intersect

    # Loop through each unique combination of ecoregion and physiograp in the filtered shapefile
    for (ecoregion, physiograp), group in shapefile_filtered.groupby(['ECOREGION', 'Eco_Landsc']):
        with rasterio.open(tif_file) as src:
            try:
                # Get the original NoData value
                nodata_value = src.nodata

                # Clip the raster with the polygons in this group, preserving the NoData value
                out_image, out_transform = mask(src, group.geometry, crop=True, nodata=nodata_value)

                # Update the metadata with the new dimensions and transformation
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform,
                    "nodata": nodata_value
                })

                # Count the number of non-NaN (non-NoData) pixels in the clipped raster
                non_nan_pixel_count = np.count_nonzero(out_image != nodata_value)

                # Clean the ecoregion and physiograp names
                ecoregion_clean = ecoregion.replace(' ', '_').replace(',', '')
                physiograp_clean = physiograp.replace(' ', '_').replace(',', '')

                # Create a unique filename based on ecoregion and physiograp for the output file
                output_filename = f"{ecoregion_clean}_{physiograp_clean}_{os.path.basename(tif_file)}"
                output_path = os.path.join(output_directory, output_filename)

                # Write the clipped raster to a new file, preserving NoData value
                with rasterio.open(output_path, "w", **out_meta) as dest:
                    dest.write(out_image)

                # Store the data for each ecoregion, physiograp, site (output file name), and pixel count
                summary_data.append({
                    'ecoregion': ecoregion_clean,
                    'PHYSIOGRAP': physiograp_clean,
                    'site': output_filename,  # Use the output file name as the "site"
                    'pixel_number': non_nan_pixel_count
                })

                # Accumulate the total pixel count per ecoregion
                if ecoregion_clean not in ecoregion_total_pixel_count:
                    ecoregion_total_pixel_count[ecoregion_clean] = 0
                ecoregion_total_pixel_count[ecoregion_clean] += non_nan_pixel_count

            except ValueError as e:
                print(f"Skipping {tif_file} due to no overlap with the geometry: {e}")

# Now add the total pixel count for each ecoregion
for entry in summary_data:
    entry['ecoregion_area'] = ecoregion_total_pixel_count.get(entry['ecoregion'], 0)

# Convert the summary data to a DataFrame and save as CSV
df_summary = pd.DataFrame(summary_data)

# Save the summary to a CSV file
output_csv_path = os.path.join(output_directory, 'ecoregion_pixel_summary1.csv')
df_summary.to_csv(output_csv_path, index=False)

print(f"CSV file saved to {output_csv_path}")


Skipping D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\remove_outliers\pn\site4b_pn_std_masked.tif due to no overlap with the geometry: Input shapes do not overlap raster.
Skipping D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\remove_outliers\pn\site4b_pn_std_masked.tif due to no overlap with the geometry: Input shapes do not overlap raster.
Skipping D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\remove_outliers\pn\site6ab_pn_std_masked.tif due to no overlap with the geometry: Input shapes do not overlap raster.
Skipping D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\remove_outliers\pn\site7_pn_std_masked.tif due to no overlap with the geometry: Input shapes do not overlap raster.
CSV file saved to D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\split\pn\std\ecoregion_pixel_summary1.csv


In [2]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import os
import numpy as np
import pandas as pd
from shapely.geometry import box

# Load your shapefile
shapefile_path = r'D:\wenqu\chapter1_2\figures\ecoregion\noak_landscapepermafrost_12122014\landscape_5n.shp'
shapefile = gpd.read_file(shapefile_path)

# Function to get the bounding box of the .tif file as a shapely Polygon
def get_tif_bbox(tif_file):
    with rasterio.open(tif_file) as src:
        bbox = src.bounds  # Get bounding box (left, bottom, right, top)
        return box(bbox.left, bbox.bottom, bbox.right, bbox.top)

# List of your .tif files and output directory
tif_files = [r'D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\remove_outliers\pn\site2a_pn_std_masked.tif',
             r'D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\remove_outliers\pn\site3b_pn_std_masked.tif']
             
output_directory = r'D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\split\pn\std'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Initialize a list to store the summary data for CSV output
summary_data = []

# Initialize a list to store the summary data for CSV output
summary_data = []

# Dictionary to store the summary of non-NaN pixel counts per ecoregion
ecoregion_total_pixel_count = {}

# Loop through each .tif file
for tif_file in tif_files:
    # Get the bounding box of the current .tif file
    tif_bbox = get_tif_bbox(tif_file)
    
    # Filter the shapefile to only include polygons that intersect with the .tif bounding box
    shapefile_filtered = shapefile[shapefile.intersects(tif_bbox)]
    
    # Check if there are any polygons left after filtering
    if shapefile_filtered.empty:
        print(f"No overlapping geometries for {tif_file}. Skipping...")
        continue  # Skip this file if no geometries intersect

    # Loop through each unique combination of ecoregion and physiograp in the filtered shapefile
    for (ecoregion, physiograp), group in shapefile_filtered.groupby(['ECOREGION', 'Eco_Landsc']):
        with rasterio.open(tif_file) as src:
            try:
                # Get the original NoData value
                nodata_value = src.nodata

                # Clip the raster with the polygons in this group, preserving the NoData value
                out_image, out_transform = mask(src, group.geometry, crop=True, nodata=nodata_value)

                # Update the metadata with the new dimensions and transformation
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform,
                    "nodata": nodata_value
                })

                # Count the number of non-NaN (non-NoData) pixels in the clipped raster
                non_nan_pixel_count = np.count_nonzero(out_image != nodata_value)

                # Clean the ecoregion and physiograp names
                ecoregion_clean = ecoregion.replace(' ', '_').replace(',', '')
                physiograp_clean = physiograp.replace(' ', '_').replace(',', '')

                # Create a unique filename based on ecoregion and physiograp for the output file
                output_filename = f"{ecoregion_clean}_{physiograp_clean}_{os.path.basename(tif_file)}"
                output_path = os.path.join(output_directory, output_filename)

                # Write the clipped raster to a new file, preserving NoData value
                with rasterio.open(output_path, "w", **out_meta) as dest:
                    dest.write(out_image)

                # Store the data for each ecoregion, physiograp, site (output file name), and pixel count
                summary_data.append({
                    'ecoregion': ecoregion_clean,
                    'PHYSIOGRAP': physiograp_clean,
                    'site': output_filename,  # Use the output file name as the "site"
                    'pixel_number': non_nan_pixel_count
                })

                # Accumulate the total pixel count per ecoregion
                if ecoregion_clean not in ecoregion_total_pixel_count:
                    ecoregion_total_pixel_count[ecoregion_clean] = 0
                ecoregion_total_pixel_count[ecoregion_clean] += non_nan_pixel_count

            except ValueError as e:
                print(f"Skipping {tif_file} due to no overlap with the geometry: {e}")

# Now add the total pixel count for each ecoregion
for entry in summary_data:
    entry['ecoregion_area'] = ecoregion_total_pixel_count.get(entry['ecoregion'], 0)

# Convert the summary data to a DataFrame and save as CSV
df_summary = pd.DataFrame(summary_data)

# Save the summary to a CSV file
output_csv_path = os.path.join(output_directory, 'ecoregion_pixel_summary2.csv')
df_summary.to_csv(output_csv_path, index=False)

print(f"CSV file saved to {output_csv_path}")


CSV file saved to D:\wenqu\chapter1_2\updated_aviris\AVIRIS_trait_map3\split\pn\std\ecoregion_pixel_summary2.csv
