In [2]:
"""
Forest Edge Image Processing - Complete Pipeline
==================================================

This script:
1. Cleans up all old backup files (*_original.png)
2. Finds ALL valid year-named images (2000, 2005, 2010, 2015, 2020 ±2 years)
3. Creates backups before cropping
4. Crops all images to keep red border, remove excess

Usage:
    python forest_edge_complete_processing.py
"""

import os
import cv2
import numpy as np
import shutil
import re

# ===== CONFIGURATION =====
FOLDER_PATH = r"G:\Hangkai\Global_Forest_edge_mapping_data\validation_samples_0p01deg_per_sample_folder\samples\output"
TARGET_YEARS = [2000, 2005, 2010, 2015, 2020]
TOLERANCE = 2
# =========================


def find_matching_image(subfolder, target_year, tolerance=2):
    """
    Find high-resolution image matching the target year within tolerance
    
    Args:
        subfolder: Path to subfolder
        target_year: Target year (e.g., 2000, 2005)
        tolerance: Year tolerance (default ±2 years)
    
    Returns:
        Path to matching PNG image or None, and the year found
    """
    year_pattern = r'(199[0-9]|20[0-1][0-9]|202[0-5])'
    
    if not os.path.exists(subfolder):
        return None, None
    
    try:
        for file in os.listdir(subfolder):
            # Skip backup files
            if file.endswith('_original.png'):
                continue
                
            if file.lower().endswith('.png'):
                match = re.search(year_pattern, file)
                if match:
                    img_year = int(match.group(1))
                    if abs(img_year - target_year) <= tolerance:
                        return os.path.join(subfolder, file), img_year
    except Exception as e:
        print(f"Warning: Error reading folder {subfolder}: {e}")
        return None, None
    
    return None, None


def crop_with_border(image_path):
    """
    Crop image keeping the red border, removing only excess outside
    Creates backup before cropping
    
    Returns:
        True if successful, False otherwise
    """
    # Create backup first
    backup_path = image_path.replace('.png', '_original.png')
    
    if os.path.exists(backup_path):
        print(f"    Warning: Backup already exists, skipping backup creation")
    else:
        shutil.copy2(image_path, backup_path)
    
    # Read image
    img = cv2.imread(image_path)
    if img is None:
        print(f"    Error: Cannot read image")
        return False
    
    original_size = f"{img.shape[1]}x{img.shape[0]}"
    
    # Convert to HSV for red detection
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    
    # Red color range in HSV
    lower_red1 = np.array([0, 100, 100])
    upper_red1 = np.array([10, 255, 255])
    lower_red2 = np.array([160, 100, 100])
    upper_red2 = np.array([180, 255, 255])
    
    mask1 = cv2.inRange(hsv, lower_red1, upper_red1)
    mask2 = cv2.inRange(hsv, lower_red2, upper_red2)
    mask = cv2.bitwise_or(mask1, mask2)
    
    # Find contours
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    if not contours:
        print(f"    Warning: No red border detected (size: {original_size})")
        return False
    
    # Get bounding box of largest contour
    largest_contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(largest_contour)
    
    # Keep the border, remove excess outside
    margin = 1
    x = max(0, x - margin)
    y = max(0, y - margin)
    w = min(img.shape[1] - x, w + 2 * margin)
    h = min(img.shape[0] - y, h + 2 * margin)
    
    # Crop and save
    cropped_img = img[y:y+h, x:x+w]
    cv2.imwrite(image_path, cropped_img)
    
    new_size = f"{w}x{h}"
    print(f"    Cropped: {original_size} → {new_size}")
    
    return True


def cleanup_backup_files(folder_path):
    """
    Remove all *_original.png backup files
    """
    deleted_count = 0
    
    print("=" * 80)
    print("STEP 1: CLEANING UP BACKUP FILES")
    print("=" * 80)
    print(f"Scanning folder: {folder_path}\n")
    
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith('_original.png'):
                backup_path = os.path.join(root, file)
                print(f"Deleting: {file}")
                os.remove(backup_path)
                deleted_count += 1
    
    print(f"\n✓ Deleted {deleted_count} backup files")
    print("=" * 80)
    
    return deleted_count


def check_remaining_images(folder_path):
    """
    Check what images remain after cleanup
    """
    stats = {
        'total_samples': 0,
        'samples_with_images': 0,
        'total_images': 0,
        'images_by_year': {year: 0 for year in TARGET_YEARS}
    }
    
    print("\n" + "=" * 80)
    print("STEP 2: CHECKING REMAINING IMAGES")
    print("=" * 80)
    
    for subfolder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subfolder)
        
        if not os.path.isdir(subfolder_path):
            continue
        
        stats['total_samples'] += 1
        
        # Find valid images
        valid_images = []
        for target_year in TARGET_YEARS:
            img_path, img_year = find_matching_image(subfolder_path, target_year)
            if img_path is not None:
                valid_images.append({
                    'filename': os.path.basename(img_path),
                    'year': img_year,
                    'target_year': target_year
                })
                stats['images_by_year'][target_year] += 1
                stats['total_images'] += 1
        
        if valid_images:
            stats['samples_with_images'] += 1
    
    print(f"\nTotal samples: {stats['total_samples']}")
    print(f"Samples with images: {stats['samples_with_images']}")
    print(f"Total valid images: {stats['total_images']}")
    print(f"\nImages by target year:")
    for year in TARGET_YEARS:
        count = stats['images_by_year'][year]
        print(f"  {year}: {count} images")
    
    print("=" * 80)
    
    return stats


def crop_all_images_fresh(folder_path):
    """
    Process all valid images from scratch
    """
    stats = {
        'total_samples': 0,
        'samples_processed': 0,
        'total_images': 0,
        'successfully_cropped': 0,
        'failed': 0,
        'no_border': 0
    }
    
    print("\n" + "=" * 80)
    print("STEP 3: PROCESSING ALL IMAGES FROM SCRATCH")
    print("=" * 80)
    print(f"Target years: {TARGET_YEARS}")
    print(f"Tolerance: ±{TOLERANCE} years")
    print("=" * 80)
    print()
    
    # Process each sample folder
    for subfolder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subfolder)
        
        if not os.path.isdir(subfolder_path):
            continue
        
        stats['total_samples'] += 1
        
        # Find all valid images for this sample
        valid_images = []
        for target_year in TARGET_YEARS:
            img_path, img_year = find_matching_image(subfolder_path, target_year)
            if img_path is not None:
                valid_images.append({
                    'path': img_path,
                    'filename': os.path.basename(img_path),
                    'year': img_year,
                    'target_year': target_year
                })
        
        if valid_images:
            stats['samples_processed'] += 1
            print(f"Sample: {subfolder} ({len(valid_images)} images)")
            
            for img_info in valid_images:
                stats['total_images'] += 1
                print(f"  Processing: {img_info['filename']} (year: {img_info['year']})")
                
                success = crop_with_border(img_info['path'])
                
                if success:
                    stats['successfully_cropped'] += 1
                else:
                    stats['failed'] += 1
            
            print()  # Blank line between samples
    
    return stats


def main():
    """
    Main execution function
    """
    print("=" * 80)
    print("FOREST EDGE IMAGE PROCESSING - COMPLETE PIPELINE")
    print("=" * 80)
    print("\nThis script will:")
    print("  1. Delete all old backup files (*_original.png)")
    print("  2. Find all valid year-named images")
    print("  3. Create fresh backups")
    print("  4. Crop all images to keep red border")
    print()
    print(f"Working folder: {FOLDER_PATH}")
    print(f"Target years: {TARGET_YEARS} (±{TOLERANCE} years)")
    print("=" * 80)
    
    if not os.path.exists(FOLDER_PATH):
        print(f"\nError: Folder does not exist: {FOLDER_PATH}")
        return
    
    response = input("\nProceed? (yes/no): ").strip().lower()
    
    if response not in ['yes', 'y']:
        print("\nOperation cancelled by user.")
        return
    
    # Step 1: Cleanup
    deleted = cleanup_backup_files(FOLDER_PATH)
    
    # Step 2: Check what's left
    check_stats = check_remaining_images(FOLDER_PATH)
    
    # Step 3: Process all images
    crop_stats = crop_all_images_fresh(FOLDER_PATH)
    
    # Final summary
    print("=" * 80)
    print("COMPLETE PROCESSING SUMMARY")
    print("=" * 80)
    print(f"\nCleanup:")
    print(f"  Backup files deleted: {deleted}")
    
    print(f"\nProcessing:")
    print(f"  Total samples scanned: {crop_stats['total_samples']}")
    print(f"  Samples with images: {crop_stats['samples_processed']}")
    print(f"  Total images found: {crop_stats['total_images']}")
    print(f"  Successfully cropped: {crop_stats['successfully_cropped']}")
    print(f"  Failed to crop: {crop_stats['failed']}")
    
    if crop_stats['total_images'] > 0:
        success_rate = (crop_stats['successfully_cropped'] / crop_stats['total_images']) * 100
        print(f"\n✓ Success rate: {success_rate:.1f}%")
        print(f"  Backups saved as *_original.png")
        print(f"  All images cropped to keep red border")
    
    print("\n" + "=" * 80)
    print("PROCESSING COMPLETE!")
    print("=" * 80)


if __name__ == "__main__":
    main()

FOREST EDGE IMAGE PROCESSING - COMPLETE PIPELINE

This script will:
  1. Delete all old backup files (*_original.png)
  2. Find all valid year-named images
  3. Create fresh backups
  4. Crop all images to keep red border

Working folder: G:\Hangkai\Global_Forest_edge_mapping_data\validation_samples_0p01deg_per_sample_folder\samples\output
Target years: [2000, 2005, 2010, 2015, 2020] (±2 years)

Proceed? (yes/no): yes
STEP 1: CLEANING UP BACKUP FILES
Scanning folder: G:\Hangkai\Global_Forest_edge_mapping_data\validation_samples_0p01deg_per_sample_folder\samples\output

Deleting: year 1999_original.png
Deleting: year 2005_original.png
Deleting: year 2010_original.png
Deleting: year 2015_original.png
Deleting: year 2019_original.png
Deleting: year 2005_original.png
Deleting: year 2010_original.png
Deleting: year 2021_original.png
Deleting: year 1999_original.png
Deleting: year 2005_original.png
Deleting: year 2010_original.png
Deleting: year 2015_original.png
Deleting: year 2020_original

    Cropped: 772x890 → 772x890
  Processing: year 2005.png (year: 2005)
    Cropped: 770x889 → 770x889
  Processing: year 2010.png (year: 2010)
    Cropped: 771x890 → 771x890
  Processing: year 2015.png (year: 2015)
    Cropped: 770x890 → 770x890
  Processing: year 2019.png (year: 2019)
    Cropped: 787x916 → 771x890

Sample: sample_00113 (3 images)
  Processing: year 2005.png (year: 2005)
    Cropped: 595x865 → 595x865
  Processing: year 2010.png (year: 2010)
    Cropped: 766x1113 → 766x1113
  Processing: year 2021.png (year: 2021)
    Cropped: 673x977 → 673x977

Sample: sample_00114 (5 images)
  Processing: year 1999.png (year: 1999)
    Cropped: 700x810 → 700x810
  Processing: year 2005.png (year: 2005)
    Cropped: 776x898 → 776x898
  Processing: year 2010.png (year: 2010)
    Cropped: 775x897 → 775x897
  Processing: year 2015.png (year: 2015)
    Cropped: 776x898 → 776x898
  Processing: year 2020.png (year: 2020)
    Cropped: 797x921 → 776x898

Sample: sample_00120 (4 images)
  Pr

    Cropped: 1023x1022 → 1023x1022
  Processing: 2015.PNG (year: 2015)
    Cropped: 1023x1022 → 1023x1022
  Processing: 2021.PNG (year: 2021)
    Cropped: 1028x1025 → 1028x1025

Sample: sample_00476 (2 images)
  Processing: 2003.PNG (year: 2003)
    Cropped: 636x1017 → 636x1017
  Processing: 2011.PNG (year: 2011)
    Cropped: 636x1017 → 636x1017

Sample: sample_00502 (3 images)
  Processing: year 2003.png (year: 2003)
    Cropped: 374x665 → 374x665
  Processing: year 2016.png (year: 2016)
    Cropped: 374x664 → 374x664
  Processing: year 2019.png (year: 2019)
    Cropped: 449x702 → 374x665

Sample: sample_00507 (1 images)
  Processing: year 2007.png (year: 2007)
    Cropped: 783x838 → 783x838

Sample: sample_00508 (4 images)
  Processing: year 2003.png (year: 2003)
    Cropped: 673x876 → 673x876
  Processing: year 2010.png (year: 2010)
    Cropped: 673x876 → 673x876
  Processing: year 2015.png (year: 2015)
    Cropped: 673x876 → 673x876
  Processing: year 2020.png (year: 2020)
    Crop

    Cropped: 735x771 → 735x771
  Processing: year 2013.png (year: 2013)
    Cropped: 736x785 → 736x785
  Processing: year 2018.png (year: 2018)
    Cropped: 735x771 → 735x771

Sample: sample_00701 (5 images)
  Processing: year 2002.png (year: 2002)
    Cropped: 706x762 → 706x762
  Processing: year 2004.png (year: 2004)
    Cropped: 717x782 → 706x761
  Processing: year 2010.png (year: 2010)
    Cropped: 706x762 → 706x762
  Processing: year 2015.png (year: 2015)
    Cropped: 706x762 → 706x762
  Processing: year 2020.png (year: 2020)
    Cropped: 734x788 → 706x762

Sample: sample_00706 (3 images)
  Processing: year 2009.png (year: 2009)
    Cropped: 671x1067 → 671x1067
  Processing: year 2015.png (year: 2015)
    Cropped: 671x1067 → 671x1067
  Processing: year 2019.png (year: 2019)
    Cropped: 693x1110 → 671x1067

Sample: sample_01010_A (4 images)
  Processing: year2003.png (year: 2003)
    Cropped: 982x1098 → 982x1098
  Processing: year2011.png (year: 2011)
    Cropped: 981x1090 → 981x1

    Cropped: 1070x1074 → 1070x1074
  Processing: 2007.png (year: 2007)
    Cropped: 1070x1074 → 1070x1074
  Processing: 2010.PNG (year: 2010)
    Cropped: 1070x1074 → 1070x1074
  Processing: 2016.png (year: 2016)
    Cropped: 1070x1074 → 1070x1074
  Processing: 2020.png (year: 2020)
    Cropped: 1104x1104 → 1071x1074

Sample: sample_01489_A (4 images)
  Processing: 2004.png (year: 2004)
    Cropped: 1040x1114 → 1040x1114
  Processing: 2010.png (year: 2010)
    Cropped: 1040x1114 → 1040x1114
  Processing: 2016.png (year: 2016)
    Cropped: 1041x1117 → 1041x1117
  Processing: 2019.png (year: 2019)
    Cropped: 1067x1138 → 1040x1115

Sample: sample_01502 (3 images)
  Processing: 2005.PNG (year: 2005)
    Cropped: 408x867 → 408x867
  Processing: 2015.PNG (year: 2015)
    Cropped: 408x867 → 408x867
  Processing: 2019.PNG (year: 2019)
    Cropped: 408x867 → 408x867

Sample: sample_01505 (3 images)
  Processing: 2012.PNG (year: 2012)
    Cropped: 810x899 → 810x899
  Processing: 2015.PNG (year

    Cropped: 802x1108 → 802x1108

Sample: sample_01607 (3 images)
  Processing: 2010.PNG (year: 2010)
    Cropped: 681x732 → 681x732
  Processing: 2015.PNG (year: 2015)
    Cropped: 681x732 → 681x732
  Processing: 2020.PNG (year: 2020)
    Cropped: 681x732 → 681x732

Sample: sample_01608 (4 images)
  Processing: 2006.PNG (year: 2006)
    Cropped: 808x938 → 808x938
  Processing: 2011.PNG (year: 2011)
    Cropped: 808x938 → 808x938
  Processing: 2015.PNG (year: 2015)
    Cropped: 808x939 → 808x939
  Processing: 2020.PNG (year: 2020)
    Cropped: 808x939 → 808x939

Sample: sample_01615 (3 images)
  Processing: 2006.PNG (year: 2006)
    Cropped: 911x914 → 911x914
  Processing: 2016.PNG (year: 2016)
    Cropped: 911x914 → 911x914
  Processing: 2021.PNG (year: 2021)
    Cropped: 910x913 → 910x913

Sample: sample_01620 (3 images)
  Processing: 2003.PNG (year: 2003)
    Cropped: 1032x1051 → 1032x1051
  Processing: 2013.PNG (year: 2013)
    Cropped: 1032x1051 → 1032x1051
  Processing: 2022.PNG 