In [1]:
import os
import rasterio
import geopandas as gpd
import pandas as pd
from rasterio.features import geometry_mask
import numpy as np
from tqdm import tqdm
import gc

forest_2000_dir = 'G:/Hangkai/Global forest edge/2000Edge'
forest_area_2000_dir = 'G:/Hangkai/Global forest edge/2000Area'
forest_2020_dir = 'G:/Hangkai/Global forest edge/2020Edge'
forest_area_2020_dir = 'G:/Hangkai/Global forest edge/2020Area'

forest_2000_files = os.listdir(forest_2000_dir)
forest_area_2000_files = os.listdir(forest_area_2000_dir)
forest_2020_files = os.listdir(forest_2020_dir)
forest_area_2020_files = os.listdir(forest_area_2020_dir)

common_files = set(forest_2000_files).intersection(set(forest_2020_files))

output_dir = 'G:/Hangkai/Global forest edge/2000Edge_classified/'
os.makedirs(output_dir, exist_ok=True)

# Set aggregation factor
factor = int(0.1 / 0.00025)

for file in tqdm(common_files):
    with rasterio.open(os.path.join(forest_2000_dir, file)) as src_2000, \
         rasterio.open(os.path.join(forest_area_2000_dir, file)) as src_area_2000, \
         rasterio.open(os.path.join(forest_area_2020_dir, file)) as src_area_2020, \
         rasterio.open(os.path.join(forest_2020_dir, file)) as src_2020:
        
        edge_2000 = src_2000.read(1)
        area_2000 = src_area_2000.read(1)
        area_2020 = src_area_2020.read(1)
        edge_2020 = src_2020.read(1)
                
        # Initialize the result raster with zeros
        EDGE_class = np.zeros_like(edge_2000, dtype=np.int8)
        
        # Assign each class
        EDGE_class[(edge_2000 == 0) & (edge_2020 == 0)] = 0
        EDGE_class[(edge_2000 != 0) & (edge_2020 == edge_2000)] = 1
        EDGE_class[(edge_2020 > edge_2000) & (area_2020 > area_2000)] = 2
        EDGE_class[(edge_2020 > edge_2000) & (area_2020 <= area_2000)] = 3
        EDGE_class[(edge_2000 > edge_2020) & (area_2020 >= area_2000)] = 4
        EDGE_class[(edge_2000 > edge_2020) & (area_2020 < area_2000)] = 5
        del area_2020
        del area_2000
        gc.collect()
        
        # Create arrays for new aggregated raster
        new_shape = (EDGE_class.shape[0] // factor, EDGE_class.shape[1] // factor)
        aggregated_class = np.zeros(new_shape, dtype=np.int8)
        aggregated_diff = np.zeros(new_shape, dtype=np.float32)
        
        # Loop through the new aggregated raster's coordinates
        for i in range(new_shape[0]):
            for j in range(new_shape[1]):
        
                # Slice the original data
                slice_class = EDGE_class[i*factor:(i+1)*factor, j*factor:(j+1)*factor]
                slice_edge_2000 = edge_2000[i*factor:(i+1)*factor, j*factor:(j+1)*factor]/(1000*10)
                slice_edge_2020 = edge_2020[i*factor:(i+1)*factor, j*factor:(j+1)*factor]/(1000*10)

                # First layer - Dominant change class
                unique, counts = np.unique(slice_class, return_counts=True)
                counts_dict = dict(zip(unique, counts))

                # If only 0s and 1s are present
                if set(unique) <= {0, 1}:
                    if 0 in counts_dict and counts_dict[0] == factor**2:
                        aggregated_class[i, j] = 0
                    else:
                        aggregated_class[i, j] = 1
                else:
                    # remove class 0 and 1 from the dictionary and find the max class
                    counts_dict.pop(0, None)
                    counts_dict.pop(1, None)
                    aggregated_class[i, j] = max(counts_dict, key=counts_dict.get)

                # Second layer - Difference in edge lengths
                
                # Modify the difference calculation
                diff_values = slice_edge_2020.sum(dtype=np.float64) - slice_edge_2000.sum(dtype=np.float64)

                # Check and handle NaN or inf values
                if np.isnan(diff_values) or np.isinf(diff_values):
                    # Replace with a default value or handle in another appropriate way
                    print("Potential overflow with values:", slice_edge_2020.sum(), slice_edge_2000.sum())
                    diff_values = 0

                aggregated_diff[i, j] = diff_values

        # Write the results to a new raster
        output_path = os.path.join(output_dir, file)
        profile = src_2000.profile.copy()
        profile.update(dtype=rasterio.int32, count=2, compress='lzw', nodata=None)

    with rasterio.open(output_path, 'w', **profile) as dst:
        dst.write(aggregated_class.astype(rasterio.int32), 1)
        dst.write(aggregated_diff.astype(rasterio.float32), 2)
    del edge_2000
    del edge_2020

100%|████████████████████████████████████████████████████████████████████████████| 261/261 [32:41:42<00:00, 450.97s/it]


In [None]:
import os
import rasterio
import geopandas as gpd
import pandas as pd
from rasterio.features import geometry_mask
import numpy as np
from tqdm import tqdm
import gc

forest_2000_dir = 'G:/Hangkai/Global forest edge/2000Edge'
forest_area_2000_dir = 'G:/Hangkai/Global forest edge/2000Area'
forest_2020_dir = 'G:/Hangkai/Global forest edge/2020Edge'
forest_area_2020_dir = 'G:/Hangkai/Global forest edge/2020Area'

forest_2000_files = os.listdir(forest_2000_dir)
forest_area_2000_files = os.listdir(forest_area_2000_dir)
forest_2020_files = os.listdir(forest_2020_dir)
forest_area_2020_files = os.listdir(forest_area_2020_dir)

common_files = set(forest_2000_files).intersection(set(forest_2020_files))

output_dir_class = 'G:/Hangkai/Global forest edge/2000Edge_classified_01_class/'
output_dir_diff = 'G:/Hangkai/Global forest edge/2000Edge_classified_01_diff/'

os.makedirs(output_dir_class, exist_ok=True)
os.makedirs(output_dir_diff, exist_ok=True)

# Set aggregation factor
factor = int(0.1 / 0.00025)

for file in tqdm(common_files):
    with rasterio.open(os.path.join(forest_2000_dir, file)) as src_2000, \
         rasterio.open(os.path.join(forest_area_2000_dir, file)) as src_area_2000, \
         rasterio.open(os.path.join(forest_area_2020_dir, file)) as src_area_2020, \
         rasterio.open(os.path.join(forest_2020_dir, file)) as src_2020:
        
        edge_2000 = src_2000.read(1)
        area_2000 = src_area_2000.read(1)
        area_2020 = src_area_2020.read(1)
        edge_2020 = src_2020.read(1)
                
        # Initialize the result raster with zeros
        EDGE_class = np.zeros_like(edge_2000, dtype=np.int8)
        
        # Assign each class
        EDGE_class[(edge_2000 == 0) & (edge_2020 == 0)] = 0
        EDGE_class[(edge_2000 != 0) & (edge_2020 == edge_2000)] = 1
        EDGE_class[(edge_2020 > edge_2000) & (area_2020 > area_2000)] = 2
        EDGE_class[(edge_2020 > edge_2000) & (area_2020 <= area_2000)] = 3
        EDGE_class[(edge_2000 > edge_2020) & (area_2020 >= area_2000)] = 4
        EDGE_class[(edge_2000 > edge_2020) & (area_2020 < area_2000)] = 5
        del area_2020
        del area_2000
        gc.collect()
        
        # Create arrays for new aggregated raster
        new_shape = (EDGE_class.shape[0] // factor, EDGE_class.shape[1] // factor)
        aggregated_class = np.zeros(new_shape, dtype=np.int8)
        aggregated_diff = np.zeros(new_shape, dtype=np.float32)
        
        # Loop through the new aggregated raster's coordinates
        for i in range(new_shape[0]):
            for j in range(new_shape[1]):
        
                # Slice the original data
                slice_class = EDGE_class[i*factor:(i+1)*factor, j*factor:(j+1)*factor]
                slice_edge_2000 = edge_2000[i*factor:(i+1)*factor, j*factor:(j+1)*factor]/(1000*10)
                slice_edge_2020 = edge_2020[i*factor:(i+1)*factor, j*factor:(j+1)*factor]/(1000*10)

                # First layer - Dominant change class
                unique, counts = np.unique(slice_class, return_counts=True)
                counts_dict = dict(zip(unique, counts))

                # If only 0s and 1s are present
                if set(unique) <= {0, 1}:
                    if 0 in counts_dict and counts_dict[0] == factor**2:
                        aggregated_class[i, j] = 0
                    else:
                        aggregated_class[i, j] = 1
                else:
                    # remove class 0 and 1 from the dictionary and find the max class
                    counts_dict.pop(0, None)
                    counts_dict.pop(1, None)
                    aggregated_class[i, j] = max(counts_dict, key=counts_dict.get)

                # Second layer - Difference in edge lengths
                
                # Modify the difference calculation
                diff_values = slice_edge_2020.sum(dtype=np.float64) - slice_edge_2000.sum(dtype=np.float64)

                # Check and handle NaN or inf values
                if np.isnan(diff_values) or np.isinf(diff_values):
                    # Replace with a default value or handle in another appropriate way
                    print("Potential overflow with values:", slice_edge_2020.sum(), slice_edge_2000.sum())
                    diff_values = 0

                aggregated_diff[i, j] = diff_values

        # Write the results to two separate folders
        output_path_class = os.path.join(output_dir_class, file)
        output_path_diff = os.path.join(output_dir_diff, file)

        profile_class = src_2000.profile.copy()
        profile_diff = src_2000.profile.copy()

        profile_class.update(dtype=rasterio.int8, count=1, compress='lzw', nodata=None)
        profile_diff.update(dtype=rasterio.float32, count=1, compress='lzw', nodata=None)

        with rasterio.open(output_path_class, 'w', **profile_class) as dst_class:
            dst_class.write(aggregated_class.astype(rasterio.int8), 1)

        with rasterio.open(output_path_diff, 'w', **profile_diff) as dst_diff:
            dst_diff.write(aggregated_diff.astype(rasterio.float32), 1)

    del edge_2000
    del edge_2020

  2%|█▏                                                                            | 4/261 [27:59<29:04:17, 407.23s/it]

In [None]:
import os
import rasterio
import geopandas as gpd
import pandas as pd
from rasterio.features import geometry_mask
import numpy as np
import gc

forest_2000_dir = '/mnt/cephfs/scratch/groups/chen_group/hangkai/2000Edge'
forest_area_2000_dir = '/mnt/cephfs/scratch/groups/chen_group/hangkai/2000Area'
forest_2020_dir = '/mnt/cephfs/scratch/groups/chen_group/hangkai/2020Edge'
forest_area_2020_dir = '/mnt/cephfs/scratch/groups/chen_group/hangkai/2020Area'

forest_2000_files = os.listdir(forest_2000_dir)
forest_area_2000_files = os.listdir(forest_area_2000_dir)
forest_2020_files = os.listdir(forest_2020_dir)
forest_area_2020_files = os.listdir(forest_area_2020_dir)

common_files = set(forest_2000_files).intersection(set(forest_2020_files))

output_dir_class = '/mnt/cephfs/scratch/groups/chen_group/hangkai/2000Edge_classified_01_class/'
output_dir_diff = '/mnt/cephfs/scratch/groups/chen_group/hangkai/2000Edge_classified_01_diff/'

os.makedirs(output_dir_class, exist_ok=True)
os.makedirs(output_dir_diff, exist_ok=True)

# Set aggregation factor
factor = int(0.1 / 0.00025)

for file in common_files:
    with rasterio.open(os.path.join(forest_2000_dir, file)) as src_2000, \
         rasterio.open(os.path.join(forest_2020_dir, file)) as src_2020:
        
        edge_2000 = src_2000.read(1)
        edge_2020 = src_2020.read(1)     
        
        # Create arrays for new aggregated raster
        aggregated_diff = np.zeros(new_shape, dtype=np.float32)
        
        # Loop through the new aggregated raster's coordinates
        for i in range(new_shape[0]):
            for j in range(new_shape[1]):
        
                # Slice the original data
                slice_edge_2000 = edge_2000[i*factor:(i+1)*factor, j*factor:(j+1)*factor]/(1000*10)
                slice_edge_2020 = edge_2020[i*factor:(i+1)*factor, j*factor:(j+1)*factor]/(1000*10)
                                
                # Modify the difference calculation
                diff_values = slice_edge_2020.sum(dtype=np.float64) - slice_edge_2000.sum(dtype=np.float64)

                # Check and handle NaN or inf values
                if np.isnan(diff_values) or np.isinf(diff_values):
                    # Replace with a default value or handle in another appropriate way
                    print("Potential overflow with values:", slice_edge_2020.sum(), slice_edge_2000.sum())
                    diff_values = 0

                aggregated_diff[i, j] = diff_values

        # Write the results to two separate folders
        output_path_diff = os.path.join(output_dir_diff, file)

        profile_diff = src_2000.profile.copy()

        profile_diff.update(dtype=rasterio.float32, count=1, compress='lzw', nodata=None)

        with rasterio.open(output_path_diff, 'w', **profile_diff) as dst_diff:
            dst_diff.write(aggregated_diff.astype(rasterio.float32), 1)

    del edge_2000
    del edge_2020