### Removing 0 Values from DEM

In [9]:
import rasterio
import numpy as np

def process_geotiff(input_file, output_file):
    # Open the input TIFF file
    with rasterio.open(input_file) as src:
        # Read the data from the first band
        data = src.read(1)
        
        # Set negative and zero values to NaN
        data[data == 0] = np.nan
        
        # Update the metadata to handle NaN values correctly
        profile = src.profile
        profile.update(
            dtype=rasterio.float32,  # Ensure data type can handle NaN
            nodata=np.nan            # Define nodata value
        )
        
        # Write the modified data to a new TIFF file
        with rasterio.open(output_file, 'w', **profile) as dst:
            dst.write(data, 1)

# Define input and output file paths
input_file = 'UK_DEM_merged_larger_ext.tif'
output_file = 'UK_DEM2.tif'

# Process the TIFF file
process_geotiff(input_file, output_file)

## Using Resmapling Libraries to do it 

In [10]:
import os
import rasterio
from rasterio.enums import Resampling
from rasterio.warp import reproject, Resampling as WarpResampling
import numpy as np
from tqdm import tqdm

# Define the resampling methods
resampling_methods = [
    WarpResampling.nearest,
    WarpResampling.bilinear,
    WarpResampling.cubic,
    WarpResampling.cubic_spline,
    WarpResampling.lanczos,
    WarpResampling.average,
    WarpResampling.mode,
    WarpResampling.max,
    WarpResampling.min,
    WarpResampling.med,
    WarpResampling.q1,
    WarpResampling.q3,
    WarpResampling.sum,
    WarpResampling.rms
]

# Paths to the input GeoTIFF files
raster1_path = 'ppt_1980-2010.tif'  # This is the reference raster for resolution
raster2_path = 'UK_DEM2.tif'

# Main directory for storing the processed files
main_dir = 'manual_method_resampling'
os.makedirs(main_dir, exist_ok=True)

# List to store paths of all processed files
processed_file_paths = []

# Function to process the GeoTIFF to set zero values to NaN
def process_geotiff(data, profile, output_file):
    data[data == 0] = np.nan
    profile.update(dtype=rasterio.float32, nodata=np.nan)
    with rasterio.open(output_file, 'w', **profile) as dst:
        dst.write(data, 1)

# Loop through each resampling method with a progress bar
for method in tqdm(resampling_methods, desc="Resampling Methods"):
    method_name = method.name.lower()
    method_dir = os.path.join(main_dir, method_name)
    os.makedirs(method_dir, exist_ok=True)

    final_output_path = os.path.join(method_dir, f'UK_imputation_resampled_12km_{method_name}_processed.tif')

    # Step 1: Resample raster2 to match the resolution of raster1 using the current method
    with rasterio.open(raster1_path) as src_ref:
        profile_ref = src_ref.profile
        transform_ref = src_ref.transform

    with rasterio.open(raster2_path) as src:
        data = src.read(1)
        resampled_data = np.empty(src_ref.shape, dtype=np.float32)
        reproject(
            source=rasterio.band(src, 1),
            destination=resampled_data,
            src_transform=src.transform,
            src_crs=src.crs,
            dst_transform=transform_ref,
            dst_crs=profile_ref['crs'],
            resampling=method
        )

    # Step 2: Process the resampled data to set zero values to NaN and save the final file
    process_geotiff(resampled_data, profile_ref, final_output_path)
    print(f"The final raster with NaN values for zero data using {method_name} has been created and saved.")

    # Store the processed file path
    processed_file_paths.append(final_output_path)

# Print or use the list of processed file paths as needed
print("List of all processed file paths:")
for path in processed_file_paths:
    print(path)


Resampling Methods:  14%|█▍        | 2/14 [00:00<00:01,  7.41it/s]

The final raster with NaN values for zero data using nearest has been created and saved.
The final raster with NaN values for zero data using bilinear has been created and saved.


Resampling Methods:  29%|██▊       | 4/14 [00:00<00:01,  5.58it/s]

The final raster with NaN values for zero data using cubic has been created and saved.
The final raster with NaN values for zero data using cubic_spline has been created and saved.


Resampling Methods:  43%|████▎     | 6/14 [00:01<00:01,  5.75it/s]

The final raster with NaN values for zero data using lanczos has been created and saved.
The final raster with NaN values for zero data using average has been created and saved.


Resampling Methods:  57%|█████▋    | 8/14 [00:01<00:01,  5.04it/s]

The final raster with NaN values for zero data using mode has been created and saved.
The final raster with NaN values for zero data using max has been created and saved.


Resampling Methods:  71%|███████▏  | 10/14 [00:01<00:00,  5.55it/s]

The final raster with NaN values for zero data using min has been created and saved.
The final raster with NaN values for zero data using med has been created and saved.


Resampling Methods:  86%|████████▌ | 12/14 [00:02<00:00,  5.79it/s]

The final raster with NaN values for zero data using q1 has been created and saved.
The final raster with NaN values for zero data using q3 has been created and saved.


Resampling Methods: 100%|██████████| 14/14 [00:03<00:00,  4.32it/s]

The final raster with NaN values for zero data using sum has been created and saved.
The final raster with NaN values for zero data using rms has been created and saved.
List of all processed file paths:
manual_method_resampling\nearest\UK_imputation_resampled_12km_nearest_processed.tif
manual_method_resampling\bilinear\UK_imputation_resampled_12km_bilinear_processed.tif
manual_method_resampling\cubic\UK_imputation_resampled_12km_cubic_processed.tif
manual_method_resampling\cubic_spline\UK_imputation_resampled_12km_cubic_spline_processed.tif
manual_method_resampling\lanczos\UK_imputation_resampled_12km_lanczos_processed.tif
manual_method_resampling\average\UK_imputation_resampled_12km_average_processed.tif
manual_method_resampling\mode\UK_imputation_resampled_12km_mode_processed.tif
manual_method_resampling\max\UK_imputation_resampled_12km_max_processed.tif
manual_method_resampling\min\UK_imputation_resampled_12km_min_processed.tif
manual_method_resampling\med\UK_imputation_resampled_1




In [2]:
import rasterio
from shapely.geometry import Point
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm import tqdm
import os

# List of file locations
file_locations = [
    'manual_method_resampling/nearest/UK_imputation_resampled_12km_nearest_processed.tif',
    'manual_method_resampling/bilinear/UK_imputation_resampled_12km_bilinear_processed.tif',
    'manual_method_resampling/cubic/UK_imputation_resampled_12km_cubic_processed.tif',
    'manual_method_resampling/cubic_spline/UK_imputation_resampled_12km_cubic_spline_processed.tif',
    'manual_method_resampling/lanczos/UK_imputation_resampled_12km_lanczos_processed.tif',
    'manual_method_resampling/average/UK_imputation_resampled_12km_average_processed.tif',
    'manual_method_resampling/mode/UK_imputation_resampled_12km_mode_processed.tif',
    'manual_method_resampling/max/UK_imputation_resampled_12km_max_processed.tif',
    'manual_method_resampling/min/UK_imputation_resampled_12km_min_processed.tif',
    'manual_method_resampling/med/UK_imputation_resampled_12km_med_processed.tif',
    'manual_method_resampling/q1/UK_imputation_resampled_12km_q1_processed.tif',
    'manual_method_resampling/q3/UK_imputation_resampled_12km_q3_processed.tif',
    'manual_method_resampling/sum/UK_imputation_resampled_12km_sum_processed.tif',
    'manual_method_resampling/rms/UK_imputation_resampled_12km_rms_processed.tif'
]

# List to store new shapefile locations
new_shapefile_locations = []

for input_file in file_locations:
    # Open the GeoTIFF file
    with rasterio.open(input_file) as src:
        # Read the entire raster data into memory
        data = src.read(1)
        
        # Get the nodata value
        nodata_value = src.nodata

        # Create a mask for valid data points (including zero values)
        mask = (data != nodata_value) & ~np.isnan(data)
        
        # Get the row and column indices of valid data points
        row_indices, col_indices = np.where(mask)
        
        # Get the pixel values of the valid data points
        values = data[mask]
        
        # Initialize lists to store coordinates and values
        lons, lats = [], []
        
        # Iterate with progress tracking
        for row, col in tqdm(zip(row_indices, col_indices), total=len(row_indices), desc=f"Processing {input_file}"):
            lon, lat = src.xy(row, col)
            lons.append(lon)
            lats.append(lat)

    # Create a DataFrame from the valid points
    df = pd.DataFrame({
        'longitude': lons,
        'latitude': lats,
        'value': values
    })

    # Create a GeoDataFrame from the DataFrame
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs=src.crs)

    # Define the output shapefile path
    output_shapefile = input_file.replace('.tif', '.shp')

    # Save the GeoDataFrame as a shapefile
    gdf.to_file(output_shapefile)
    
    # Append the new shapefile location to the list
    new_shapefile_locations.append(output_shapefile)

    print(f"Shapefile saved successfully: {output_shapefile}")

# Print the list of new shapefile locations
print("New shapefile locations:")
for shapefile in new_shapefile_locations:
    print(shapefile)


Processing manual_method_resampling/nearest/UK_imputation_resampled_12km_nearest_processed.tif: 100%|██████████| 2446/2446 [00:00<00:00, 34843.20it/s]


Shapefile saved successfully: manual_method_resampling/nearest/UK_imputation_resampled_12km_nearest_processed.shp


Processing manual_method_resampling/bilinear/UK_imputation_resampled_12km_bilinear_processed.tif: 100%|██████████| 2446/2446 [00:00<00:00, 42772.26it/s]


Shapefile saved successfully: manual_method_resampling/bilinear/UK_imputation_resampled_12km_bilinear_processed.shp


Processing manual_method_resampling/cubic/UK_imputation_resampled_12km_cubic_processed.tif: 100%|██████████| 2446/2446 [00:00<00:00, 35226.65it/s]


Shapefile saved successfully: manual_method_resampling/cubic/UK_imputation_resampled_12km_cubic_processed.shp


Processing manual_method_resampling/cubic_spline/UK_imputation_resampled_12km_cubic_spline_processed.tif: 100%|██████████| 2446/2446 [00:00<00:00, 38845.85it/s]


Shapefile saved successfully: manual_method_resampling/cubic_spline/UK_imputation_resampled_12km_cubic_spline_processed.shp


Processing manual_method_resampling/lanczos/UK_imputation_resampled_12km_lanczos_processed.tif: 100%|██████████| 2204/2204 [00:00<00:00, 40065.73it/s]


Shapefile saved successfully: manual_method_resampling/lanczos/UK_imputation_resampled_12km_lanczos_processed.shp


Processing manual_method_resampling/average/UK_imputation_resampled_12km_average_processed.tif: 100%|██████████| 3035/3035 [00:00<00:00, 31009.51it/s]


Shapefile saved successfully: manual_method_resampling/average/UK_imputation_resampled_12km_average_processed.shp


Processing manual_method_resampling/mode/UK_imputation_resampled_12km_mode_processed.tif: 100%|██████████| 3035/3035 [00:00<00:00, 35694.15it/s]


Shapefile saved successfully: manual_method_resampling/mode/UK_imputation_resampled_12km_mode_processed.shp


Processing manual_method_resampling/max/UK_imputation_resampled_12km_max_processed.tif: 100%|██████████| 3035/3035 [00:00<00:00, 34235.39it/s]

Shapefile saved successfully: manual_method_resampling/max/UK_imputation_resampled_12km_max_processed.shp







Processing manual_method_resampling/min/UK_imputation_resampled_12km_min_processed.tif: 100%|██████████| 3035/3035 [00:00<00:00, 40588.44it/s]

Shapefile saved successfully: manual_method_resampling/min/UK_imputation_resampled_12km_min_processed.shp



Processing manual_method_resampling/med/UK_imputation_resampled_12km_med_processed.tif: 100%|██████████| 3035/3035 [00:00<00:00, 40450.31it/s]

Shapefile saved successfully: manual_method_resampling/med/UK_imputation_resampled_12km_med_processed.shp



Processing manual_method_resampling/q1/UK_imputation_resampled_12km_q1_processed.tif: 100%|██████████| 3035/3035 [00:00<00:00, 32682.44it/s]

Shapefile saved successfully: manual_method_resampling/q1/UK_imputation_resampled_12km_q1_processed.shp



Processing manual_method_resampling/q3/UK_imputation_resampled_12km_q3_processed.tif: 100%|██████████| 3035/3035 [00:00<00:00, 37355.29it/s]


Shapefile saved successfully: manual_method_resampling/q3/UK_imputation_resampled_12km_q3_processed.shp


Processing manual_method_resampling/sum/UK_imputation_resampled_12km_sum_processed.tif: 100%|██████████| 3035/3035 [00:00<00:00, 33213.61it/s]

Shapefile saved successfully: manual_method_resampling/sum/UK_imputation_resampled_12km_sum_processed.shp



Processing manual_method_resampling/rms/UK_imputation_resampled_12km_rms_processed.tif: 100%|██████████| 3035/3035 [00:00<00:00, 37669.12it/s]


Shapefile saved successfully: manual_method_resampling/rms/UK_imputation_resampled_12km_rms_processed.shp
New shapefile locations:
manual_method_resampling/nearest/UK_imputation_resampled_12km_nearest_processed.shp
manual_method_resampling/bilinear/UK_imputation_resampled_12km_bilinear_processed.shp
manual_method_resampling/cubic/UK_imputation_resampled_12km_cubic_processed.shp
manual_method_resampling/cubic_spline/UK_imputation_resampled_12km_cubic_spline_processed.shp
manual_method_resampling/lanczos/UK_imputation_resampled_12km_lanczos_processed.shp
manual_method_resampling/average/UK_imputation_resampled_12km_average_processed.shp
manual_method_resampling/mode/UK_imputation_resampled_12km_mode_processed.shp
manual_method_resampling/max/UK_imputation_resampled_12km_max_processed.shp
manual_method_resampling/min/UK_imputation_resampled_12km_min_processed.shp
manual_method_resampling/med/UK_imputation_resampled_12km_med_processed.shp
manual_method_resampling/q1/UK_imputation_resampled

## Idea 2 

Convert whole data into points take the precipitation file within each big square store values and average by number of values there in that region.

Convert DEM to Points 

In [13]:
import rasterio
from shapely.geometry import Point
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm import tqdm

# Path to your GeoTIFF file
input_file = 'UK_DEM2.tif'

# Open the GeoTIFF file
with rasterio.open(input_file) as src:
    # Read the entire raster data into memory
    data = src.read(1)
    
    # Get the nodata value
    nodata_value = src.nodata

    # Create a mask for valid data points (including zero values)
    mask = (data != nodata_value) & ~np.isnan(data)
    
    # Get the row and column indices of valid data points
    row_indices, col_indices = np.where(mask)
    
    # Get the pixel values of the valid data points
    values = data[mask]
    
    # Initialize lists to store coordinates and values
    lons, lats = [], []
    
    # Iterate with progress tracking
    for row, col in tqdm(zip(row_indices, col_indices), total=len(row_indices), desc="Processing"):
        lon, lat = src.xy(row, col)
        lons.append(lon)
        lats.append(lat)

# Create a DataFrame from the valid points
df = pd.DataFrame({
    'longitude': lons,
    'latitude': lats,
    'value': values
})

# Create a GeoDataFrame from the DataFrame
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs=src.crs)

# Save the GeoDataFrame as a shapefile
output_shapefile = 'UK_DEM.shp'
gdf.to_file(output_shapefile)

print(f"Shapefile saved successfully: {output_shapefile}")


Processing: 100%|██████████| 1618746/1618746 [00:42<00:00, 38432.15it/s]


Shapefile saved successfully: UK_DEM.shp


Converting PPT data (raster) to Polygons (Vector)

In [2]:
import rasterio
from shapely.geometry import Point, box
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm import tqdm

# Path to your GeoTIFF file
input_file = 'ppt_1980-2010.tif'

# Open the GeoTIFF file
with rasterio.open(input_file) as src:
    # Read the entire raster data into memory
    data = src.read(1)
    
    # Get the nodata value
    nodata_value = src.nodata

    # Create a mask for valid data points (including zero values)
    mask = (data != nodata_value) & ~np.isnan(data)
    
    # Get the row and column indices of valid data points
    row_indices, col_indices = np.where(mask)
    
    # Get the pixel values of the valid data points
    values = data[mask]
    
    # Initialize lists to store coordinates, values, and bounds
    lons, lats, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], [], []
    
    # Get pixel size (resolution)
    pixel_size_x, pixel_size_y = src.res
    
    # Iterate with progress tracking
    for row, col in tqdm(zip(row_indices, col_indices), total=len(row_indices), desc="Processing"):
        lon, lat = src.xy(row, col)
        lons.append(lon)
        lats.append(lat)
        x_min, y_min = lon - pixel_size_x / 2, lat - pixel_size_y / 2
        x_max, y_max = lon + pixel_size_x / 2, lat + pixel_size_y / 2
        x_mins.append(x_min)
        y_mins.append(y_min)
        x_maxs.append(x_max)
        y_maxs.append(y_max)

# Create a DataFrame from the valid points
df = pd.DataFrame({
    'longitude': lons,
    'latitude': lats,
    'value': values,
    'x_min': x_mins,
    'y_min': y_mins,
    'x_max': x_maxs,
    'y_max': y_maxs
})

# Create a GeoDataFrame from the DataFrame
gdf = gpd.GeoDataFrame(df, geometry=[box(x_min, y_min, x_max, y_max) for x_min, y_min, x_max, y_max in zip(df.x_min, df.y_min, df.x_max, df.y_max)], crs=src.crs)

# Save the GeoDataFrame as a shapefile
output_shapefile = 'UK_PPT_1980-2010.shp'
gdf.to_file(output_shapefile)

print(f"Shapefile saved successfully: {output_shapefile}")


Processing: 100%|██████████| 2502/2502 [00:00<00:00, 32559.78it/s]


Shapefile saved successfully: UK_PPT_1980-2010.shp


In [14]:
import geopandas as gpd
from shapely.geometry import Point
import rtree
import pandas as pd
from tqdm import tqdm

# Load the shapefile with polygons (bounding boxes)
polygons_gdf = gpd.read_file('UK_PPT_1980-2010.shp')

# Load the shapefile with points
points_gdf = gpd.read_file('UK_DEM.shp')

# Check the coordinate reference systems (CRS)
print(f"Polygons CRS: {polygons_gdf.crs}")
print(f"Points CRS: {points_gdf.crs}")

# Ensure both GeoDataFrames have the same CRS
if polygons_gdf.crs != points_gdf.crs:
    points_gdf = points_gdf.to_crs(polygons_gdf.crs)
    print(f"Points reprojected to CRS: {points_gdf.crs}")

# Initialize fields in polygons GeoDataFrame
polygons_gdf['val_dem'] = 0.0
polygons_gdf['counter'] = 0

# Create an R-tree index for the bounding boxes
idx = rtree.index.Index()
for poly_id, geometry in enumerate(polygons_gdf.geometry):
    idx.insert(poly_id, geometry.bounds)

# Iterate through each point with a progress bar
for i, point in enumerate(tqdm(points_gdf.geometry, desc="Processing points")):
    # Get possible matching bounding boxes using the R-tree index
    possible_matches_index = list(idx.intersection(point.bounds))
    possible_matches = polygons_gdf.iloc[possible_matches_index]
    
    # Check if the point is actually within any of these polygons
    for poly_id in possible_matches.index:
        if possible_matches.loc[poly_id].geometry.contains(point):
            # Update the val_dem and counter fields for the polygon
            polygons_gdf.at[poly_id, 'val_dem'] += points_gdf.loc[i, 'value']  
            polygons_gdf.at[poly_id, 'counter'] += 1




gdf = polygons_gdf



# Replace 'column1' and 'column2' with the names of your existing columns
column1 = 'val_dem'
column2 = 'counter'

# Check if the columns exist
if column1 not in gdf.columns or column2 not in gdf.columns:
    raise ValueError(f"One or both columns '{column1}' and '{column2}' do not exist in the shapefile.")

# Calculate the average of the two columns
gdf['average_dem'] = (gdf[column1] / gdf[column2])

# Print the first few rows to check the new column


# Save the updated GeoDataFrame to a new shapefile
output_shapefile_path = 'UK_DEM_12km.shp'
gdf.to_file(output_shapefile_path)

print(f"Updated shapefile saved successfully: {output_shapefile_path}")


Polygons CRS: PROJCS["unnamed",GEOGCS["unknown",DATUM["D_unnamed",SPHEROID["Spheroid",6377563.396,299.324961266495]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",49],PARAMETER["central_meridian",-2],PARAMETER["scale_factor",0.9996012717],PARAMETER["false_easting",400000],PARAMETER["false_northing",-100000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]
Points CRS: EPSG:4326
Points reprojected to CRS: PROJCS["unnamed",GEOGCS["unknown",DATUM["D_unnamed",SPHEROID["Spheroid",6377563.396,299.324961266495]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",49],PARAMETER["central_meridian",-2],PARAMETER["scale_factor",0.9996012717],PARAMETER["false_easting",400000],PARAMETER["false_northing",-100000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]


Processing points: 100%|██████████| 1618746/1618746 [10:40<00:00, 2526.58it/s]
  gdf.to_file(output_shapefile_path)
  ogr_write(


Updated shapefile saved successfully: UK_DEM_12km.shp


### Comparing MSE of resampled and Average method


In [18]:
import geopandas as gpd
import pandas as pd
import os

# List of file locations
shapefile_locations = [
    'manual_method_resampling/nearest/UK_imputation_resampled_12km_nearest_processed.shp',
    'manual_method_resampling/bilinear/UK_imputation_resampled_12km_bilinear_processed.shp',
    'manual_method_resampling/cubic/UK_imputation_resampled_12km_cubic_processed.shp',
    'manual_method_resampling/cubic_spline/UK_imputation_resampled_12km_cubic_spline_processed.shp',
    'manual_method_resampling/lanczos/UK_imputation_resampled_12km_lanczos_processed.shp',
    'manual_method_resampling/average/UK_imputation_resampled_12km_average_processed.shp',
    'manual_method_resampling/mode/UK_imputation_resampled_12km_mode_processed.shp',
    'manual_method_resampling/max/UK_imputation_resampled_12km_max_processed.shp',
    'manual_method_resampling/min/UK_imputation_resampled_12km_min_processed.shp',
    'manual_method_resampling/med/UK_imputation_resampled_12km_med_processed.shp',
    'manual_method_resampling/q1/UK_imputation_resampled_12km_q1_processed.shp',
    'manual_method_resampling/q3/UK_imputation_resampled_12km_q3_processed.shp',
    'manual_method_resampling/sum/UK_imputation_resampled_12km_sum_processed.shp',
    'manual_method_resampling/rms/UK_imputation_resampled_12km_rms_processed.shp'
]

# Read the reference shapefile
reference_gdf = gpd.read_file('UK_DEM_12km.shp')
reference_gdf = reference_gdf[['latitude', 'longitude', 'value']].rename(columns={'value': 'average_de'})

# Initialize a list to hold the average percentage differences
average_percentage_differences = []

# Process each shapefile
for shapefile in shapefile_locations:
    # Read the shapefile
    gdf = gpd.read_file(shapefile)
    gdf = gdf[['latitude', 'longitude', 'value']].rename(columns={'value': 'value'})
    
    # Merge with the reference GeoDataFrame
    merged_gdf = reference_gdf.merge(gdf, on=['latitude', 'longitude'], how='inner', suffixes=('_ref', '_comp'))
    
    # Calculate the percentage difference
    merged_gdf['percentage_difference'] = (merged_gdf['average_de'] - merged_gdf['value']).abs() / merged_gdf['average_de'] 
    
    # Calculate the average percentage difference
    avg_percentage_difference = merged_gdf['percentage_difference'].mean()
    
    # Append the result to the list
    average_percentage_differences.append({
        'file': os.path.basename(shapefile),
        'average_percentage_difference': avg_percentage_difference
    })

# Convert the results to a DataFrame
avg_percentage_diff_df = pd.DataFrame(average_percentage_differences)

# Save the results to a CSV file
avg_percentage_diff_df.to_csv('average_percentage_differences.csv', index=False)

print("Average percentage differences saved successfully to average_percentage_differences.csv")


Average percentage differences saved successfully to average_percentage_differences.csv


In [19]:
print(avg_percentage_diff_df)

                                                 file  \
0   UK_imputation_resampled_12km_nearest_processed...   
1   UK_imputation_resampled_12km_bilinear_processe...   
2    UK_imputation_resampled_12km_cubic_processed.shp   
3   UK_imputation_resampled_12km_cubic_spline_proc...   
4   UK_imputation_resampled_12km_lanczos_processed...   
5   UK_imputation_resampled_12km_average_processed...   
6     UK_imputation_resampled_12km_mode_processed.shp   
7      UK_imputation_resampled_12km_max_processed.shp   
8      UK_imputation_resampled_12km_min_processed.shp   
9      UK_imputation_resampled_12km_med_processed.shp   
10      UK_imputation_resampled_12km_q1_processed.shp   
11      UK_imputation_resampled_12km_q3_processed.shp   
12     UK_imputation_resampled_12km_sum_processed.shp   
13     UK_imputation_resampled_12km_rms_processed.shp   

    average_percentage_difference  
0                       41.327182  
1                       41.065341  
2                       40.934714  


In [20]:
import geopandas as gpd
import rasterio
from rasterio.transform import from_origin
import numpy as np

def vector_to_raster(vector_shapefile, reference_raster_file, output_raster_file, no_data_value=-9999):
    """
    Converts vector data from a shapefile to raster format using a reference raster.

    Parameters:
    - vector_shapefile: path to the vector shapefile (with columns: longitude, latitude, value)
    - reference_raster_file: path to the reference raster file
    - output_raster_file: path for the output raster file
    - no_data_value: value to represent no data in the raster
    """

    # Load the vector data
    vector_data = gpd.read_file(vector_shapefile)
    if 'longitude' not in vector_data.columns or 'latitude' not in vector_data.columns or 'value' not in vector_data.columns:
        raise ValueError("Shapefile must contain 'longitude', 'latitude', and 'value' columns.")

    # Open the reference raster
    with rasterio.open(reference_raster_file) as ref_raster:
        ref_transform = ref_raster.transform
        ref_crs = ref_raster.crs
        ref_width = ref_raster.width
        ref_height = ref_raster.height
        ref_pixel_size_x = ref_transform[0]
        ref_pixel_size_y = -ref_transform[4]  # Pixel size in y-direction is negative in geotransform
        ref_min_lon = ref_transform[2]
        ref_max_lat = ref_transform[5]

    # Create an empty raster array
    raster_data = np.full((ref_height, ref_width), no_data_value, dtype=np.float32)

    # Fill the raster data with values from vector data
    for _, row in vector_data.iterrows():
        lon = row['longitude']
        lat = row['latitude']
        value = row['average_de']
        x_idx = int((lon - ref_min_lon) / ref_pixel_size_x)
        y_idx = int((ref_max_lat - lat) / ref_pixel_size_y)
        if 0 <= x_idx < ref_width and 0 <= y_idx < ref_height:
            raster_data[y_idx, x_idx] = value

    # Create the output raster file
    with rasterio.open(
        output_raster_file, 'w', driver='GTiff',
        height=raster_data.shape[0], width=raster_data.shape[1],
        count=1, dtype=raster_data.dtype,
        crs=ref_crs, transform=ref_transform
    ) as dst:
        dst.write(raster_data, 1)
        dst.write_mask(raster_data != no_data_value)  # Set no-data mask

# Example usage
vector_shapefile = 'UK_DEM_12km.shp'
reference_raster_file = 'ppt_1980-2010.tif'
output_raster_file = 'DEM_raster_12km.tif'

vector_to_raster(vector_shapefile, reference_raster_file, output_raster_file)
