In [1]:
import cv2
import os
import glob

In [None]:
def process_satellite_images(source_dir, output_dir, max_size=1024):
    """
    Process satellite images from source directory and save them to output directory.
    
    Args:
    source_dir (str): Path to the source directory containing satellite image data.
    output_dir (str): Path to the output directory where processed images will be saved.
    max_size (int): Maximum dimension (width or height) of the output images.
    """

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Iterate through folders in the source directory
    for folder in os.listdir(source_dir):
        tci_image_path = os.path.join(source_dir, folder, "*.SAFE/GRANULE/*/IMG_DATA/*_TCI.jp2")
        
        # Find all TCI image files
        tci_image_files = glob.glob(tci_image_path)
        
        for image_file in tci_image_files:
            # Extract subdirectory name from the image file path
            subdirectory_name = image_file.split('/')[-3]
            dest_subdirectory = os.path.join(output_dir, subdirectory_name)
            
            # Create subdirectory in the output directory if it doesn't exist
            if not os.path.exists(dest_subdirectory):
                os.makedirs(dest_subdirectory)
                
            # Preprocess the image
            processed_image = resize_and_convert_to_grayscale(image_file, max_size)
            output_image_path = os.path.join(dest_subdirectory, os.path.basename(image_file).replace('.jp2', '.jpg'))
            
            # Save the processed image as JPEG
            cv2.imwrite(output_image_path, processed_image, [int(cv2.IMWRITE_JPEG_QUALITY), 90])

In [None]:
def resize_and_convert_to_grayscale(image_path, max_size=1024):
    """
    Load an image, convert it to grayscale, and resize it while maintaining aspect ratio.
    
    Args:
    image_path (str): Path to the input image file.
    max_size (int): Maximum dimension (width or height) of the output image.
    
    Returns:
    numpy.ndarray: Processed grayscale image.
    """
    # Read the image in grayscale mode
    img = cv2.imread(image_path, 0)
    height, width = img.shape
    
    scale = max_size / max(height, width)
    
    # Resize the image
    resized_img = cv2.resize(img, (int(width * scale), int(height * scale)))
    return resized_img

In [None]:
process_satellite_images('/kaggle/input/deforestation-in-ukraine/', 'processed_dataset')

In [None]:
import zipfile

def create_zip_from_directory(directory_path, zip_file_path):
    """
    Create a ZIP file from the specified directory.
    
    Parameters:
    - directory_path: Path to the directory to be zipped.
    - zip_file_path: Path for the output ZIP file.
    """
    with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
        # Walk through the directory and add files to the zip file
        for foldername, subfolders, filenames in os.walk(directory_path):
            for filename in filenames:
                file_path = os.path.join(foldername, filename)
                zip_file.write(file_path, os.path.relpath(file_path, directory_path))

In [None]:
zip_file_path = '/kaggle/working/processed_dataset.zip'
create_zip_from_directory('/kaggle/working/processed_dataset', zip_file_path)

In [2]:
pip freeze > requirements.txt

Note: you may need to restart the kernel to use updated packages.
