# Notebook to segment GeoTIFF Sentinel-2 Satellite Images into 256*256 chunks 
## Image segmentation is performed so that the file type and its quality remain
### First we will pair the before and after images images to only segment the images of an entire pair
#### Authenticate to Google Drive to access the files


In [1]:
import io
import rasterio
import os
import numpy as np
import tempfile
from rasterio.windows import Window
import rasterio.windows
from rasterio.enums import Resampling
from tifffile import imwrite
from google.oauth2 import service_account
from googleapiclient.discovery import build
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.http import MediaIoBaseDownload, MediaIoBaseUpload

In [2]:
# OAuth 2.0 credentials (client_secret.json) should be in the same directory as this script
CLIENT_SECRET_FILE = 'client_secrets.json'
API_NAME = 'drive'
API_VERSION = 'v3'

# Create OAuth flow
flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, ['https://www.googleapis.com/auth/drive'])

# Authenticate and authorize the user
credentials = flow.run_local_server(port=0)

# Create a Drive API client
service = build(API_NAME, API_VERSION, credentials=credentials)

# List files in your Google Drive
results = service.files().list().execute()
files = results.get('files', [])

# Define the folder IDs the images in Google Drive
beforeRGB_folder_id = "1k4rzxlY5lh0NbxBdidO50vf8cpjLlJXf"
afterRGB_folder_id = '1zd9Wx7GXqw-w9F4hDZyViF9Rq04K5veI'
beforeMask_folder_id ="1ketMQmPQfPWh0X45fwvmee1DvzfHPSXd"
afterMask_folder_id = "1ky2Jj7s7jnyeVWVwPHEyrYE-mOlsu1oO" 
NDVI_Mask_path = "/Volumes/HD710PRO/Fire_and_Hurricane_Images/Fire/NDVI_Masks"

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=1003083568572-78avuh89fu558ciatmapqhgpka9p4c3l.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A51161%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&state=MhsyF7zxnk8tgC8RJSjZcC3ZL6RYHG&access_type=offline


## Pair Before and After images
### List the paired files

In [3]:
# Function to list all files in a folder with their IDs and names
def list_files_in_folder(service, folder_id):
    results = []
    page_token = None

    while True:
        response = service.files().list(
            q=f"'{folder_id}' in parents",
            pageSize=1000,  # Increase if you have more than 1000 files
            pageToken=page_token,
            fields="nextPageToken, files(id, name)"
        ).execute()

        files = response.get('files', [])
        results.extend(files)
        page_token = response.get('nextPageToken')
        if not page_token:
            break

    return results

# List all files in the "before" and "after" folders
before_files = list_files_in_folder(service, beforeRGB_folder_id)
after_files = list_files_in_folder(service, afterRGB_folder_id )


### Find matching files based on the common numbers using file IDs

In [4]:
# Find matching files based on the common numbers using file IDs
matchingRGB_files = {}

for before_file in before_files:
    before_filename = before_file['name']
    if before_filename.startswith("RGB_BeforeFire"):
        # Extract the number from the filename
        num = before_filename.split("RGB_BeforeFire")[1].split(".tif")[0]

        # Construct the corresponding "after" filename
        after_filename = f"RGB_AfterFire{num}.tif"

        # Search for the "after" file by name in the list of "after" files
        for after_file in after_files:
            if after_file['name'] == after_filename:
                matchingRGB_files[before_file['id']] = after_file['id']
                break
print(len(matchingRGB_files))

382


## Pair the remaining matched Before RGB files with their two-dimensional labels.

In [5]:
# List all files in the "before" and "after" folders
before_files = list_files_in_folder(service, beforeRGB_folder_id)
beforeMask_files = list_files_in_folder(service, beforeMask_folder_id)

# Create lists to store the file names for both folders
before_filenames = [file['name'] for file in before_files]
beforeMask_filenames = [file['name'] for file in beforeMask_files]

In [6]:
matchingMask_files = {}

for before_file_id, after_file_id in matchingRGB_files.items():
    # Extract the number from the BeforeRGB filename
    before_filename = service.files().get(fileId=before_file_id, fields="name").execute()['name']
    num = before_filename.split("RGB_BeforeFire")[1].split(".tif")[0]

    # Construct the corresponding mask filename
    before_mask_filename = f"RGB_BeforeFire{num}_with_mask.tif"

    # Search for the corresponding mask file by name in the list of mask files
    for before_mask_file in  beforeMask_files:
        if before_mask_file['name'] == before_mask_filename:
            matchingMask_files[before_file_id] = before_mask_file['id']

print("Matching Mask files:", len(matchingMask_files))


Matching Mask files: 382


## Image Segmentation

In [7]:
# Define the output folder for BEFORE RGB tiles 
outputRGB_folder = '/Volumes/HD710PRO/Fire_and_Hurricane_Images/Fire/BeforeRGBSegments'

# Define the output folder for BEFORE two-dimensional labels 
outputMask_folder = '/Volumes/HD710PRO/Fire_and_Hurricane_Images/Fire/BeforeMaskSegments'

tile_size =256   

### Segment the Before GeoTIFF files 
#### Temporarily download the images to segment it

In [8]:
# Function to download a file from Google Drive
def download_file_from_drive(service, file_id, local_filename):
    request = service.files().get_media(fileId=file_id)
    fh = io.FileIO(local_filename, 'wb')
    downloader = MediaIoBaseDownload(fh, request)
    
    done = False
    while not done:
        status, done = downloader.next_chunk()
    
    fh.close()
    return local_filename

## Save the segments without changing the data
### To do this rasterio and tifffile packages need to be used together

In [9]:
# Function to save image tiles without changing the data type
def save_rgb_tiles(image_path, output_folder, tile_size, parent_name):
    with rasterio.open(image_path) as src:
        height = src.height
        width = src.width

        num_rows = height // tile_size
        num_cols = width // tile_size

        tile_counter = 1  # Initialize the tile counter

        for i in range(num_rows):
            for j in range(num_cols):
                window = Window(j * tile_size, i * tile_size, tile_size, tile_size)
                
                # Read the original data without modifications
                tile = src.read((1, 2, 3), window=window)  # Assuming band order B4, B3, B2

                tile_name = f"{parent_name}_tile_{tile_counter}.tif"
                tile_path = os.path.join(output_folder, tile_name)

                # Save the tile using tifffile without changing data type
                imwrite(tile_path, tile)

                # Update metadata with georeferencing information
                meta = src.meta.copy()
                transform = src.window_transform(window)
                meta.update({
                    'width': tile_size,
                    'height': tile_size,
                    'transform': transform
                })

                with rasterio.open(tile_path, 'w', **meta) as dst:
                    dst.write(tile)

                tile_counter += 1  # Increment the tile counter

#### Segment the Before RGB images

In [None]:
# Process each matching pair
for rgb_file_id, mask_file_id in matchingMask_files.items():
    # Find the matching RGB file
    rgb_file = next(file for file in before_files if file['id'] == rgb_file_id)

    # Extract the parent file name (e.g., "RGB_BeforeFire{num}")
    parent_name = os.path.splitext(rgb_file['name'])[0]
    
    print(f"Processing parent image: {parent_name}")
    # Download RGB image
    rgb_temp_file = download_file_from_drive(service, rgb_file_id, tempfile.NamedTemporaryFile(delete=False).name)

    # Segment RGB image into tiles
    save_rgb_tiles(rgb_temp_file, outputRGB_folder, tile_size, parent_name)
    
    # Get the final number of tiles
    final_num_tiles = len([f for f in os.listdir(outputRGB_folder) if f.startswith(f"{parent_name}_tile_")])
    print(f"Final number of tiles for {parent_name}: {final_num_tiles}")

    # Clean up temporary file
    os.remove(rgb_temp_file)
    
     print(f' Tiling of {parent_name} completed succesfully')

#### Segment the Before two-dimensional labels

In [None]:
def save_label_mask_tiles(mask_path, output_folder, tile_size, parent_name):
    with rasterio.open(mask_path) as src:
        height = src.height
        width = src.width

        num_rows = height // tile_size
        num_cols = width // tile_size

        tile_counter = 1  # Initialize the tile counter

        for i in range(num_rows):
            for j in range(num_cols):
                window = Window(j * tile_size, i * tile_size, tile_size, tile_size)
                
                # Read the original data without modifications
                mask_tile = src.read(1, window=window)

                tile_name = f"{parent_name}_tile_{tile_counter}.tif"
                tile_path = os.path.join(output_folder, tile_name)

                # Save the two-dimensional label using tifffile without changing data type
                imwrite(tile_path, mask_tile)

                tile_counter += 1  # Increment the tile counter


In [None]:
# Process each matching pair
for rgb_file_id, mask_file_id in matchingMask_files.items():
    # Find the matching two-dimensional label
    mask_file = next(file for file in beforeMask_files if file['id'] == mask_file_id)

    # Extract the parent file name (e.g., "RGB_AfterFire{num}")
    parent_name = os.path.splitext(mask_file['name'])[0]
    
    print(f"Processing parent image: {parent_name}")
    
    # Define the output folder for BEFORE two-dimensional labels
    outputMask_folder = '/Volumes/HD710PRO/Fire_and_Hurricane_Images/Fire/BeforeMaskSegments'
    
    
    # Download the two-dimensional label to a temporary file
    mask_temp_file_path = os.path.join(tempfile.gettempdir(), f"{parent_name}_temp_mask.tif")
    mask_temp_file = download_file_from_drive(service, mask_file_id, mask_temp_file_path)

    print(f"Downloaded mask file to: {mask_temp_file}")

    # Segment RGB image into tiles
    save_label_mask_tiles(mask_temp_file, outputMask_folder, tile_size, parent_name)
    
    # Get the final number of tiles
    final_num_tiles = len([f for f in os.listdir(outputMask_folder) if f.startswith(f"{parent_name}_tile_")])
    print(f"Final number of tiles for {parent_name}: {final_num_tiles}")

    # Clean up temporary file
    os.remove(mask_temp_file)

print(f'Tiling completed successfully')


## Pair after images

In [11]:
# Define the output folder for AFTER RGB tiles 
outputAfterRGB_folder = '/Volumes/HD710PRO/Fire_and_Hurricane_Images/Fire/AfterRGBSegments'

# Define the output folder for AFTER two-dimensional labels 
outputAfterMask_folder = '/Volumes/HD710PRO/Fire_and_Hurricane_Images/Fire/AfterMaskSegments'
 
tile_size =256

In [12]:
# List all files in the "after" masks folder
afterMask_files = list_files_in_folder(service, afterMask_folder_id)
after_files_dict = {file['id']: file for file in after_files}

# Create a dictionary to store pairs of "after" RGB IDs and their corresponding mask IDs
matchingAfterMask_files = {}

# Iterate through matchingRGB_files and find corresponding two-dimensional labels
for before_id, after_id in matchingRGB_files.items():
    # Extract the number from the "after" filename
    num = after_files_dict[after_id]['name'].split("RGB_AfterFire")[1].split(".tif")[0]

    # Construct the "after" two-dimensional label filename
    afterMask_filename = f"RGB_AfterFire{num}_with_mask.tif"

    # Search for the "after" two-dimensional file by name in the list of "after" mask files
    for afterMask_file in afterMask_files:
        if afterMask_file['name'] == afterMask_filename:
            matchingAfterMask_files[after_id] = afterMask_file['id']  # Store file ID instead of file name
            break

# Print the number of matching "after" RGB and mask files
print(len(matchingAfterMask_files))



382


#### Segment the After RGB images

In [36]:
after_files = list_files_in_folder(service, afterRGB_folder_id)

# Sort the files to ensure consistent order
after_files.sort(key=lambda x: x['name'])

# Process each matching pair starting from index 320 because segmenting process was stopped before
start_index = 320
matching_files_to_process = list(matchingAfterMask_files.items())[start_index:]

for AfterRgb_file_id, AfterMask_file_id in matching_files_to_process:
    # Find the matching RGB file
    AfterRGB_file = next(file for file in after_files if file['id'] == AfterRgb_file_id)

    # Extract the parent file name (e.g., "RGB_AfterFire{num}")
    parent_name = os.path.splitext(AfterRGB_file['name'])[0]
    
    print(f"Processing parent image: {parent_name}")
    # Download RGB image
    rgb_temp_file = download_file_from_drive(service, AfterRgb_file_id, tempfile.NamedTemporaryFile(delete=False).name)

    # Segment RGB image into tiles
    save_rgb_tiles(rgb_temp_file, outputAfterRGB_folder, tile_size, parent_name)
    
    # Get the final number of tiles
    final_num_tiles = len([f for f in os.listdir(outputAfterRGB_folder) if f.startswith(f"{parent_name}_tile_")])
    print(f"Final number of tiles for {parent_name}: {final_num_tiles}")

    # Clean up temporary file
    os.remove(rgb_temp_file)
    
    print(f'Tiling of {parent_name} completed successfully')


Processing parent image: RGB_AfterFire179
Final number of tiles for RGB_AfterFire179: 208
Tiling of RGB_AfterFire179 completed successfully
Processing parent image: RGB_AfterFire178
Final number of tiles for RGB_AfterFire178: 208
Tiling of RGB_AfterFire178 completed successfully
Processing parent image: RGB_AfterFire164
Final number of tiles for RGB_AfterFire164: 221
Tiling of RGB_AfterFire164 completed successfully
Processing parent image: RGB_AfterFire161
Final number of tiles for RGB_AfterFire161: 208
Tiling of RGB_AfterFire161 completed successfully
Processing parent image: RGB_AfterFire160
Final number of tiles for RGB_AfterFire160: 221
Tiling of RGB_AfterFire160 completed successfully
Processing parent image: RGB_AfterFire152
Final number of tiles for RGB_AfterFire152: 208
Tiling of RGB_AfterFire152 completed successfully
Processing parent image: RGB_AfterFire151
Final number of tiles for RGB_AfterFire151: 221
Tiling of RGB_AfterFire151 completed successfully
Processing parent im

#### Segment the After Two-dimensional labels

In [37]:
# Process each matching pair
for rgb_file_id, mask_file_id in matchingAfterMask_files.items():
    # Find the matching RGB file
    mask_file = next(file for file in afterMask_files if file['id'] == mask_file_id)

    # Extract the parent file name (e.g., "RGB_AfterFire{num}")
    parent_name = os.path.splitext(mask_file['name'])[0]
    
    print(f"Processing parent image: {parent_name}")
    # Download RGB image
    mask_temp_file = download_file_from_drive(service, mask_file_id, tempfile.NamedTemporaryFile(delete=False).name)

    # Segment RGB image into tiles
    save_label_mask_tiles(mask_temp_file, outputAfterMask_folder, tile_size, parent_name)
    
    # Get the final number of tiles
    final_num_tiles = len([f for f in os.listdir(outputAfterMask_folder) if f.startswith(f"{parent_name}_tile_")])
    print(f"Final number of tiles for {parent_name}: {final_num_tiles}")

    # Clean up temporary file
    os.remove(mask_temp_file)
    
    print(f' Tiling of {parent_name} completed succesfully')

Processing parent image: RGB_AfterFire713_with_mask
Final number of tiles for RGB_AfterFire713_with_mask: 208
 Tiling of RGB_AfterFire713_with_mask completed succesfully
Processing parent image: RGB_AfterFire712_with_mask
Final number of tiles for RGB_AfterFire712_with_mask: 221
 Tiling of RGB_AfterFire712_with_mask completed succesfully
Processing parent image: RGB_AfterFire711_with_mask
Final number of tiles for RGB_AfterFire711_with_mask: 221
 Tiling of RGB_AfterFire711_with_mask completed succesfully
Processing parent image: RGB_AfterFire710_with_mask
Final number of tiles for RGB_AfterFire710_with_mask: 221
 Tiling of RGB_AfterFire710_with_mask completed succesfully
Processing parent image: RGB_AfterFire708_with_mask
Final number of tiles for RGB_AfterFire708_with_mask: 208
 Tiling of RGB_AfterFire708_with_mask completed succesfully
Processing parent image: RGB_AfterFire706_with_mask
Final number of tiles for RGB_AfterFire706_with_mask: 221
 Tiling of RGB_AfterFire706_with_mask co