# Fusion Strategies for Multisource Geospatial Data: Comparing MKL, Tabular, and Deep Learning Models​

- Tabular ML
- CNN
- MKL

# 1. Tabular ML

## 1.1 Resampling the nicfi to match the resolution of sentinel-2

### 1.1.1 DownScaling and helper function with the average method

In [7]:
# 

import os
import random
from pathlib import Path
import matplotlib.pyplot as plt
import rasterio
from rasterio.enums import Resampling

def downscaling(input_tif, output_tif, matching_tif):
    """
    Downscale an input GeoTIFF to match the resolution and dimensions of another GeoTIFF, 
    preserving geospatial alignment, and save the result to an output GeoTIFF.
    
    Args:
        input_tif (str): Path to the input GeoTIFF file to be downscaled.
        output_tif (str): Path where the downscaled GeoTIFF will be saved.
        matching_tif (str): Path to the reference GeoTIFF file whose resolution and size will be matched.
    """
    # Open the reference (matching) GeoTIFF
    with rasterio.open(matching_tif) as match_src:
        # Extract target height, width, transform, and CRS from the reference
        target_height = match_src.height
        target_width = match_src.width
        target_transform = match_src.transform
        target_crs = match_src.crs

    # Open the input GeoTIFF
    with rasterio.open(input_tif) as input_src:
        # Ensure the CRS and geospatial extent match
        if input_src.crs != target_crs:
            raise ValueError("CRS mismatch between input_tif and matching_tif.")
        if input_src.bounds != rasterio.transform.array_bounds(
            target_height, target_width, target_transform
        ):
            raise ValueError("Bounds mismatch between input_tif and matching_tif.")
        
        # Perform the resampling to match dimensions
        data = input_src.read(
            out_shape=(input_src.count, target_height, target_width),
            resampling=Resampling.average
        )
        
        # Update the output profile to match the reference
        output_profile = input_src.profile.copy()
        output_profile.update({
            'height': target_height,
            'width': target_width,
            'transform': target_transform,
            'crs': target_crs
        })
        
        # Write the resampled data to the output GeoTIFF
        with rasterio.open(output_tif, 'w', **output_profile) as dst:
            dst.write(data)


def procese_all_tifs(input_folder, output_folder, matching_folder):
    # the input folder are the nicfi tif folder
    # the output folder are the downscaled nicfi tif folder
    # the matching folder are the sentinel tif folder which used to be schema for the nicfi
    
    # get the list of tif files in the input folder
    input_tifs = [f for f in os.listdir(input_folder) if f.endswith('.tif')]
    macthed_tifs = [f for f in os.listdir(matching_folder) if f.endswith('.tif')]



    # process each input tif file
    for input_tif in input_tifs:
        input_path = os.path.join(input_folder, input_tif)
        output_path = os.path.join(output_folder, input_tif.replace('.tif', '_rs.tif'))# change the name of file with _rs

        # get the file name of the matching tif file(sentinel [index]-YYYYMMDD-sentinel.tif),but nicfi index-YYYY-MM-nicfi.tif
        match_tif= input_tif.split('-')[0]+"-"+input_tif.split('-')[1]+input_tif.split('-')[2]+'01-sentinel.tif'

        matching_path = os.path.join(matching_folder, match_tif)
        downscaling(input_path, output_path, matching_path)
        print(f"Processing: {input_tif}, Saved target raster to: {output_path},mached target raster to: {matching_path}")
    


def visualize_random_samples(dev_nicfi_folder, dev_sentinel_folder, upscaled_sentinel_folder, num_samples=20):
    """
    Visualize random samples from each folder in a grid and display image information.
    
    Args:
        dev_nicfi_folder (str): Path to the NICFI development folder, format: xxxx-YYYY-MM-nicfi.tif
        dev_sentinel_folder (str): Path to the Sentinel development folder, format: xxxx-YYYYMMDD-sentinel.tif
        upscaled_sentinel_folder (str): Path to upscaled Sentinel images, format: xxxx-YYYYMMDD-sentinel-rs.tif
        num_samples (int): Number of random samples to display
    """
    # Get list of files
    sentinel_files = sorted([f for f in os.listdir(dev_sentinel_folder) if f.endswith('-sentinel.tif')])
    
    # Randomly select sentinel files
    selected_indices = random.sample(range(len(sentinel_files)), min(num_samples, len(sentinel_files)))
    
    # Create figure
    fig, axes = plt.subplots(6, 5, figsize=(20, 24))  # Increased height for text info
    fig.suptitle('Comparison of NICFI, Original Sentinel, and Upscaled Sentinel Images', fontsize=16)
    
    # Iterate over selected indices
    for idx, file_idx in enumerate(selected_indices):
        # Calculate row and column for subplot placement
        row = idx // 5  # Integer division for row number
        col = idx % 5   # Modulo operation for column number
        
        # Get sentinel file and extract index
        sentinel_file = sentinel_files[file_idx]
        file_index = sentinel_file.split('-')[0]
        
        # Find corresponding NICFI and upscaled files
        nicfi_file = next((f for f in os.listdir(dev_nicfi_folder) 
                          if f.startswith(file_index) and f.endswith('-nicfi.tif')), None)
        upscaled_file = f"{os.path.splitext(sentinel_file)[0]}-rs.tif"
        
        if nicfi_file is None:
            print(f"No matching NICFI file found for Sentinel file: {sentinel_file}")
            continue
            
        nicfi_path = os.path.join(dev_nicfi_folder, nicfi_file)
        sentinel_path = os.path.join(dev_sentinel_folder, sentinel_file)
        upscaled_path = os.path.join(upscaled_sentinel_folder, upscaled_file)
        
        # Plot and get info for NICFI
        with rasterio.open(nicfi_path) as src:
            data = src.read(1)  # Read first band for visualization
            nicfi_info = f"NICFI: Bands={src.count}, Height={src.height}, Width={src.width}"
            axes[row*3, col].imshow(data, cmap='viridis')
            axes[row*3, col].set_title(f'NICFI\n{Path(nicfi_file).name}', fontsize=8)
            axes[row*3, col].text(0.5, 0.95, nicfi_info, 
                                  ha='center', va='top', 
                                  transform=axes[row*3, col].transAxes,
                                  fontsize=6, wrap=True, 
                                  bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))
            axes[row*3, col].axis('off')
        
        # Plot and get info for Original Sentinel
        with rasterio.open(sentinel_path) as src:
            # Read the full image data
            data = src.read(2)  # Read first band
            
            # Get the actual bounds
            bounds = src.bounds
            
            sentinel_info = f"Bands={src.count}, Height={src.height}, Width={src.width}"
            
            # Plot with correct bounds and no extent modification
            axes[row*3+1, col].imshow(data, 
                                    cmap='viridis',
                                    interpolation='none')  # Use 'none' to show actual pixels
            
            axes[row*3+1, col].set_title(f'Original Sentinel\n{Path(sentinel_file).name}', fontsize=8)
            axes[row*3+1, col].text(0.5, 0.95, sentinel_info,
                                    ha='center', va='top',
                                    transform=axes[row*3+1, col].transAxes,
                                    fontsize=6, color='white', bbox=dict(facecolor='black', alpha=0.5))
            axes[row*3+1, col].axis('off')
            
            # Print actual dimensions for debugging
            print(f"Sentinel image shape: {data.shape}")
        
        # Plot and get info for Upscaled Sentinel
        with rasterio.open(upscaled_path) as src:
            data = src.read(2)  # Read first band for visualization
            upscaled_info = f"Upscaled: Bands={src.count}, Height={src.height}, Width={src.width}"
            axes[row*3+2, col].imshow(data, cmap='viridis')
            axes[row*3+2, col].set_title(f'Upscaled Sentinel\n{Path(upscaled_file).name}', fontsize=8)
            axes[row*3+2, col].text(0.5, 0.95, upscaled_info,
                                    ha='center', va='top',
                                    transform=axes[row*3+2, col].transAxes,
                                    fontsize=6, color='white', bbox=dict(facecolor='black', alpha=0.5))
            axes[row*3+2, col].axis('off')
            
            # Print detailed information to console
            print(f"\nImage Set {idx + 1}:")
            print(f"Sentinel File: {sentinel_file}")
            print(f"NICFI File: {nicfi_file}")
            print(f"Upscaled File: {upscaled_file}")
            print(nicfi_info)
            print(sentinel_info)
            print(upscaled_info)
            print("-" * 50)
    
    plt.tight_layout()


In [8]:

nicif_folder_path= r'G:\GithubProject\GoogleEarthEngineTask\Tif_ML\dev_nicfi'
output_folder_path= r'G:\GithubProject\GoogleEarthEngineTask\Tif_ML\dev_downscale_nicfi'
target_folder_path= r'G:\GithubProject\GoogleEarthEngineTask\Tif_ML\dev_sentinel'
procese_all_tifs(nicif_folder_path,output_folder_path,target_folder_path)

ValueError: Bounds mismatch between input_tif and matching_tif.