In [None]:
from osgeo import gdal, osr
import os
from tqdm import tqdm
import rasterio
from rasterio.merge import merge
import re
from osgeo import gdal
from dask import compute, delayed
from dask.distributed import Client, LocalCluster
import dask

In [None]:
daskCluster = LocalCluster(threads_per_worker=2,
                n_workers=8, memory_limit='70GB')

client = Client(daskCluster)

client


In [None]:

# Define the input and output directories
input_directory = "../../input/gitignore/ghsl"
output_directory = "../../input/gitignore/ghsl/reprojected"

# Create the output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Define a delayed function for warping
@dask.delayed
def warp_tif(input_file, output_file):
    # Open the input dataset
    dataset = gdal.Open(input_file)
    
    # Define the warp options, including the target EPSG
    warp_options = gdal.WarpOptions(dstSRS='EPSG:4326')
    
    # Perform the warp operation and save the output
    gdal.Warp(destNameOrDestDS=output_file, srcDSOrSrcDSTab=dataset, options=warp_options)
    
    # Close the dataset
    dataset = None
    return output_file

# List to hold the delayed tasks
tasks = []

# Loop through all files in the input directory and create tasks
for filename in os.listdir(input_directory):
    if filename.endswith(".tif"):
        input_file = os.path.join(input_directory, filename)
        output_file = os.path.join(output_directory, "warped_" + filename)
        task = warp_tif(input_file, output_file)
        tasks.append(task)

# Compute all tasks in parallel
results = compute(*tasks)

print("Warping complete. Warped files are saved in:", output_directory)

In [None]:
import os
import rasterio
from rasterio.merge import merge
from tqdm import tqdm
from osgeo import gdal


# Define the directories
output_dir ="../../input/gitignore/ghsl/reprojected"
reference_file = "../../input/gitignore/ghsl/reprojected/warped_GHS_BUILT_H_100.tif"  # Adjust path as necessary

# Ensure the reference file exists
if not os.path.exists(reference_file):
    raise FileNotFoundError(f"Reference file {reference_file} does not exist.")

# Retrieve the properties of the reference file
with gdal.Open(reference_file) as ref:
    ref_proj = ref.GetProjection()
    ref_geotrans = ref.GetGeoTransform()
    # Extract resolution from geotransform (assuming square pixels)
    ref_resolution = (ref_geotrans[1], ref_geotrans[5])

# Iterate over each file in the output directory
for file_name in os.listdir(output_dir):
    if file_name.endswith('.tif') and file_name != os.path.basename(reference_file):
        file_path = os.path.join(output_dir, file_name)

        # Define the destination file path
        aligned_file_path = os.path.join(output_dir, f"aligned_{file_name}")

        # Set up the gdalwarp options to align to reference file
        options = gdal.WarpOptions(
            format='GTiff',
            dstSRS=ref_proj,
            xRes=ref_resolution[0],
            yRes=ref_resolution[1],
            targetAlignedPixels=True,
            resampleAlg='near'  # or another resampling method as needed
        )

        # Perform the warp operation to align to the reference grid
        gdal.Warp(destNameOrDestDS=aligned_file_path, srcDSOrSrcDSTab=file_path, options=options)
        print(f"Alignment complete. Output saved to {aligned_file_path}")

In [None]:
import shutil

# Define the source file and the destination file
source_file = ".../output/reprojected/ghs-built-h.tif"
destination_file = ".../output/reprojected/aligned_ghs-built-h.tif"

# Copy the source file to the destination file
shutil.copyfile(source_file, destination_file)