In [47]:
# IMPORTS
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

from scipy import stats

import geopandas as gpd
import numpy as np
from shapely.geometry import box

from typing import Tuple, Dict, Any, List


import panel as pn
from matplotlib.figure import Figure

In [21]:
def geojson_to_numpy_grid(
    geojson_file: str,
    grid_size: Tuple[int, int],
    feature_column: str,
    target_crs: str = "EPSG:3857"  # Web Mercator projection
) -> Tuple[np.ndarray, np.ndarray, Dict[Any, int]]:
    # Read the GeoJSON file
    gdf = gpd.read_file(geojson_file)

    # Reproject to target CRS
    gdf = gdf.to_crs(target_crs)

    # Get the total bounds of all geometries
    minx, miny, maxx, maxy = gdf.total_bounds

    # Create a fixed-size grid
    x = np.linspace(minx, maxx, grid_size[1] + 1)
    y = np.linspace(miny, maxy, grid_size[0] + 1)

    # Get unique categories and create a mapping to integers
    categories = gdf[feature_column].unique()
    category_to_int = {cat: i for i, cat in enumerate(categories)}

    # Initialize the 2D NumPy array with -1 (representing no data)
    grid = np.full(grid_size, -1, dtype=int)

    # Create a spatial index for faster intersection checks
    sindex = gdf.sindex

    # Pre-compute cell geometries
    cells = [box(x[j], y[i], x[j+1], y[i+1])
             for i in range(grid_size[0])
             for j in range(grid_size[1])]

    # Vectorized operations for intersection
    def process_cell(cell, possible_matches):
        if possible_matches.empty:
            return -1
        intersections = possible_matches.geometry.intersection(cell)
        intersection_areas = intersections.area
        largest_intersection_idx = intersection_areas.idxmax()
        category = possible_matches.loc[largest_intersection_idx, feature_column]
        return category_to_int[category]

    # Iterate through each cell in the grid
    for idx, cell in enumerate(cells):
        i, j = divmod(idx, grid_size[1])

        # Use the spatial index to find potential intersecting polygons
        possible_matches_index = list(sindex.intersection(cell.bounds))
        if not possible_matches_index:
            continue

        # Check for actual intersection and assign the feature value
        possible_matches = gdf.iloc[possible_matches_index]
        grid[i, j] = process_cell(cell, possible_matches)

    return grid, categories, category_to_int


def geojson_to_numpy_grid_3d(
    geojson_file: str,
    grid_size: Tuple[int, int],  # Grid size for the output array
    target_crs: str = "EPSG:3857"  # Web Mercator projection
) -> Tuple[np.ndarray, Dict[str, np.ndarray], Dict[str, Dict[Any, int]]]:
    # Read the GeoJSON file
    gdf = gpd.read_file(geojson_file)

    # Reproject to target CRS
    gdf = gdf.to_crs(target_crs)

    # Get the total bounds of all geometries
    minx, miny, maxx, maxy = gdf.total_bounds

    # Create a fixed-size grid
    x = np.linspace(minx, maxx, grid_size[1] + 1)
    y = np.linspace(miny, maxy, grid_size[0] + 1)

    # Automatically extract all relevant feature columns, excluding geometry columns
    feature_columns = [col for col in gdf.columns if col != gdf.geometry.name]

    # Dictionary to hold grids and category mappings for each feature column
    feature_grids = {}
    feature_mappings = {}

    # Iterate over each feature column
    for feature_column in feature_columns:
        # Get unique categories and create a mapping to integers
        unique_categories = gdf[feature_column].unique()
        category_to_int = {cat: i for i, cat in enumerate(unique_categories)}

        # Initialize the 2D NumPy array with -1 (representing no data)
        grid = np.full(grid_size, -1, dtype=int)

        # Create a spatial index for faster intersection checks
        sindex = gdf.sindex

        # Pre-compute cell geometries
        cells = [box(x[j], y[i], x[j + 1], y[i + 1])
                 for i in range(grid_size[0])
                 for j in range(grid_size[1])]

        # Vectorized operations for intersection
        def process_cell(cell, possible_matches):
            if possible_matches.empty:
                return -1
            intersections = possible_matches.geometry.intersection(cell)
            intersection_areas = intersections.area
            largest_intersection_idx = intersection_areas.idxmax()
            category = possible_matches.loc[largest_intersection_idx, feature_column]
            return category_to_int[category]

        # Iterate through each cell in the grid
        for idx, cell in enumerate(cells):
            i, j = divmod(idx, grid_size[1])

            # Use the spatial index to find potential intersecting polygons
            possible_matches_index = list(sindex.intersection(cell.bounds))
            if not possible_matches_index:
                continue

            # Check for actual intersection and assign the feature value
            possible_matches = gdf.iloc[possible_matches_index]
            grid[i, j] = process_cell(cell, possible_matches)

        # Store the grid and category mapping for this feature
        feature_grids[feature_column] = grid
        feature_mappings[feature_column] = category_to_int

    # Stack all grids into a 3D array
    grid_3d = np.stack(list(feature_grids.values()), axis=0)

    return grid_3d, feature_grids, feature_mappings


def inspect_geojson(file_path):
    # Read the GeoJSON file
    gdf = gpd.read_file(file_path)

    # Get the column names
    columns = gdf.columns.tolist()

    # Print the column names
    print("Columns in the GeoJSON file:")
    for col in columns:
        print(f"- {col}")

    # Print a sample of the data
    print("\nSample data (first 5 rows):")
    print(gdf.head())

    # Print information about the GeoDataFrame
    print("\nDataFrame Info:")
    gdf.info()

In [49]:
def geojson_to_numpy_grid_3d_2(
    geojson_file: str,
    grid_size: Tuple[int, int],  # Grid size for the output array
    target_crs: str = "EPSG:3857"  # Web Mercator projection
) -> Tuple[np.ndarray, Dict[str, np.ndarray], Dict[str, Dict[Any, int]], Dict[str, Any]]:
    # Read the GeoJSON file
    gdf = gpd.read_file(geojson_file)

    # Reproject to target CRS
    gdf = gdf.to_crs(target_crs)

    # Get the total bounds of all geometries
    minx, miny, maxx, maxy = gdf.total_bounds

    # Create a fixed-size grid
    x = np.linspace(minx, maxx, grid_size[1] + 1)
    y = np.linspace(miny, maxy, grid_size[0] + 1)

    # Automatically extract all relevant feature columns, excluding geometry columns
    feature_columns = [col for col in gdf.columns if col != gdf.geometry.name]

    # Get the filename without extension for prefixing
    filename_prefix = os.path.splitext(os.path.basename(geojson_file))[0]

    # Dictionary to hold grids and category mappings for each feature column
    feature_grids = {}
    feature_mappings = {}

    # Dictionary to hold geospatial information for conversion back to GeoJSON
    geospatial_info = {
        'transform': (minx, miny, maxx, maxy),
        'crs': target_crs
    }

    # Iterate over each feature column
    for feature_column in feature_columns:
        # Get unique categories and create a mapping to integers
        unique_categories = gdf[feature_column].unique()
        
        # Prefix each feature class with the filename
        category_to_int = {f"{filename_prefix}_{cat}": i for i, cat in enumerate(unique_categories)}

        # Initialize the 2D NumPy array with -1 (representing no data)
        grid = np.full(grid_size, -1, dtype=int)

        # Create a spatial index for faster intersection checks
        sindex = gdf.sindex

        # Pre-compute cell geometries
        cells = [box(x[j], y[i], x[j + 1], y[i + 1])
                 for i in range(grid_size[0])
                 for j in range(grid_size[1])]

        # Vectorized operations for intersection
        def process_cell(cell, possible_matches):
            if possible_matches.empty:
                return -1
            intersections = possible_matches.geometry.intersection(cell)
            intersection_areas = intersections.area
            largest_intersection_idx = intersection_areas.idxmax()
            category = possible_matches.loc[largest_intersection_idx, feature_column]
            return category_to_int[f"{filename_prefix}_{category}"]

        # Iterate through each cell in the grid
        for idx, cell in enumerate(cells):
            i, j = divmod(idx, grid_size[1])

            # Use the spatial index to find potential intersecting polygons
            possible_matches_index = list(sindex.intersection(cell.bounds))
            if not possible_matches_index:
                continue

            # Check for actual intersection and assign the feature value
            possible_matches = gdf.iloc[possible_matches_index]
            grid[i, j] = process_cell(cell, possible_matches)

        # Store the grid and category mapping for this feature
        feature_grids[f"{filename_prefix}_{feature_column}"] = grid
        feature_mappings[f"{filename_prefix}_{feature_column}"] = category_to_int

    # Stack all grids into a 3D array
    grid_3d = np.stack(list(feature_grids.values()), axis=0)

    return grid_3d, feature_grids, feature_mappings, geospatial_info

In [5]:
data = r"C:\Users\TyHow\Documents\3. Work\GIS Stuff\ML_pilot_data\geology_clipped.geojson"

In [51]:

grid_size = (10, 10)  # Define the grid size

# Call the function
grid_3d, feature_grids, feature_mappings, geospatial_info = geojson_to_numpy_grid_3d_2(data, grid_size)

# Print results
print("Shape of the 3D grid array:", grid_3d.shape)
print("Feature grids:")
for key, value in feature_grids.items():
    print(f"Feature column: {key}, Grid shape: {value.shape}")
print("Feature mappings:")
for key, mapping in feature_mappings.items():
    print(f"Mapping for {key}: {mapping}")

Shape of the 3D grid array: (15, 10, 10)
Feature grids:
Feature column: geology_clipped_fid, Grid shape: (10, 10)
Feature column: geology_clipped_CD_CORRELA, Grid shape: (10, 10)
Feature column: geology_clipped_ESCALA, Grid shape: (10, 10)
Feature column: geology_clipped_SUBTIPO_DE, Grid shape: (10, 10)
Feature column: geology_clipped_CODIGO, Grid shape: (10, 10)
Feature column: geology_clipped_DEFINICION, Grid shape: (10, 10)
Feature column: geology_clipped_GEOCHRON_A, Grid shape: (10, 10)
Feature column: geology_clipped_GEOCHRON_1, Grid shape: (10, 10)
Feature column: geology_clipped_NOMBRE, Grid shape: (10, 10)
Feature column: geology_clipped_NOTA, Grid shape: (10, 10)
Feature column: geology_clipped_UNIDAD_GEN, Grid shape: (10, 10)
Feature column: geology_clipped_ERROR_MAX, Grid shape: (10, 10)
Feature column: geology_clipped_ERROR_MIN, Grid shape: (10, 10)
Feature column: geology_clipped_SHAPE_STAr, Grid shape: (10, 10)
Feature column: geology_clipped_SHAPE_STLe, Grid shape: (10, 

In [52]:
# Initialize the Panel extension
pn.extension()

# Function to plot a specific layer using Matplotlib
def plot_layer_bokeh(layer_index):
    fig = Figure(figsize=(4, 3))
    ax = fig.add_subplot(111)
    im = ax.imshow(grid_3d[layer_index], cmap='tab20', interpolation='nearest', aspect='auto')
    ax.set_title(f"Layer {layer_index + 1}: {list(feature_grids.keys())[layer_index]}")
    fig.colorbar(im, ax=ax, label='Classes')
    ax.set_xlabel('X Coordinate')
    ax.set_ylabel('Y Coordinate')
    return pn.pane.Matplotlib(fig, tight=True)

# Create a Panel widget for selecting the layer
layer_slider = pn.widgets.IntSlider(name='Layer Index', start=0, end=grid_3d.shape[0] - 1, step=1, value=0)

# Bind the plotting function to the slider value
panel = pn.bind(plot_layer_bokeh, layer_index=layer_slider)

# Display the Panel with the slider and plot
pn.Column(layer_slider, panel).servable()



BokehModel(combine_events=True, render_bundle={'docs_json': {'43aaf4dc-ef3b-4ec9-b60f-96870b424c2c': {'version…