# Notebook for SOM Training

By: Ty Janoski

Updated 1/7/2026

## Setup

### Imports

In [1]:
# Import Statements
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cmweather  # noqa: F401
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scienceplots  # noqa: F401
import xarray as xr
from minisom import MiniSom
from sklearn.manifold import MDS
from sklearn.metrics import pairwise_distances

matplotlib.use("Agg")

plt.style.use(["science", "nature", "grid"])
plt.rcParams["text.usetex"] = True
%config InlineBackend.figure_format = 'png'


### Data Loading

In [3]:
# Read in Z500 at flash-flood event times
path = "/mnt/drive2/SOM_intermediate_files/"
Z500_norm_weighted = xr.load_dataarray(f"{path}era5_Z500_norm_weighted.nc")


### Reshape Data

In [4]:
# Flatten the data for SOM training
X = Z500_norm_weighted.stack(features=["lat", "lon"]).values  # shape: (time, space)

## SOM Training

We are going to train our SOM with random initialization and online training. We will also use two phases: a "coarse" phase with a larger sigma and learning rate, then a "fine" phase with a smaller learning rate and sigma.

### Set SOM parameters

In [5]:
# Set SOM shape
xdim, ydim = 5, 4

# Set number of iterations for each phase
n1, n2 = 2000, 8000

# Set starting sigmas
sig1, sig2 = np.sqrt(xdim**2 + ydim**2), 1.5

# Set starting learning rates
lr1, lr2 = 0.1, 0.01

# Random seed for reproducibility
random_seed = 42


### Train SOM

In [6]:
# Create SOM instance
som = MiniSom(
    xdim,
    ydim,
    input_len=X.shape[1],
    sigma=sig1,
    learning_rate=lr1,
    decay_function="linear_decay_to_zero",
    sigma_decay_function="linear_decay_to_one",
    neighborhood_function="gaussian",
    random_seed=random_seed,
)

# Initialize random weights
som.random_weights_init(X)

# Random training
som.train_random(X, n1, verbose=True)

# Phase 2
som._sigma = sig2 # type: ignore
som._learning_rate = lr2
som.train_random(X, n2, verbose=True)


 [ 2000 / 2000 ] 100% - 0:00:00 left 
 quantization error: 87.01323562117507
 [ 8000 / 8000 ] 100% - 0:00:00 left 
 quantization error: 81.58734227901603


### Grab important fields

In [7]:
# Total node number
n_nodes = xdim * ydim

# Get flattened weights
weights = som.get_weights().reshape(xdim * ydim, -1)

# u-matrix
u_matrix = som.distance_map().T

# bmus & hit_map
bmus = np.array([som.winner(x) for x in X])

hit_map = np.zeros((xdim, ydim))
for i, j in bmus:
    hit_map[i, j] += 1
hit_map = hit_map.T

# Sammon Coordinates
D = pairwise_distances(weights)
coords = MDS(
    n_components=2, dissimilarity="precomputed", random_state=42, n_init=4
).fit_transform(D)

# Get lats/lons
lat = Z500_norm_weighted.lat
lon = Z500_norm_weighted.lon

# Dimensions of the spatial field
n_lat = lat.size
n_lon = lon.size

# Reshape into SOM-node grid: (xdim, ydim, lat, lon)
nodes = weights.reshape(xdim, ydim, n_lat, n_lon)




## Plots

### U-matrix and Sammon Map

In [8]:
fig, axes = plt.subplots(1, 2, layout="constrained", figsize=(6, 3), dpi=600)

# u-matrix
im0 = axes[0].imshow(u_matrix, cmap="viridis", origin="lower")
axes[0].set_title("U-Matrix (Mean Inter-Node Distance)", fontsize=7)
fig.colorbar(im0, ax=axes[0], fraction=0.046, pad=0.04, shrink=0.7)

# hit map
im1 = axes[1].imshow(hit_map, cmap="plasma", origin="lower")
axes[1].set_title("Hit Map (Samples per Node)", fontsize=7)
fig.colorbar(im1, ax=axes[1], fraction=0.046, pad=0.04, shrink=0.7)

# axis styling
for ax in axes:
    ax.set_xticks(np.arange(xdim))
    ax.set_yticks(np.arange(ydim))
    ax.set_xlabel("X-index", fontsize=6)
    ax.set_ylabel("Y-index", fontsize=6)

plt.savefig("figs/Z500-big-SOM//Z500_som_u_matrix_hit_map.png")
plt.close()

In [9]:
# Flatten u-matrix & hit map
U_flat = u_matrix.T.reshape(-1)      # back to (n_nodes,)
hits_flat = hit_map.T.reshape(-1)    # back to (n_nodes,)

# scale hits
hits_scaled = 30 + 250 * (hits_flat / hits_flat.max())

# plot
plt.figure(figsize=(7, 7))

# Scatter: U controls color, hits control bubble size
sc = plt.scatter(
    coords[:, 0], coords[:, 1],
    c=U_flat,
    s=hits_scaled,
    cmap="balance", edgecolor="k", linewidth=0.5,
    zorder=3
)

# Draw lattice connections (right & down neighbors only)
for i in range(xdim):
    for j in range(ydim):
        node = i * ydim + j

        # right neighbor
        if j + 1 < ydim:
            nbr = i * ydim + (j + 1)
            plt.plot(
                [coords[node, 0], coords[nbr, 0]],
                [coords[node, 1], coords[nbr, 1]],
                "k-", lw=0.6, alpha=0.4
            )

        # down neighbor
        if i + 1 < xdim:
            nbr = (i + 1) * ydim + j
            plt.plot(
                [coords[node, 0], coords[nbr, 0]],
                [coords[node, 1], coords[nbr, 1]],
                "k-", lw=0.6, alpha=0.4
            )

# Node labels (i,j)
for idx, (x, y) in enumerate(coords):
    ix, iy = divmod(idx, ydim)
    plt.text(
        x, y, f"({ix},{iy})",
        fontsize=8, ha="center", va="center", zorder=5
    )

plt.title("Sammon / MDS Distortion Grid\nU-Matrix (Color) \\& Node Frequency (Size)")
plt.axis("off")
plt.colorbar(sc, label="U-Matrix (Avg. Neighbor Distance)")
plt.savefig("figs/Z500-big-SOM/Z500_som_sammon_mds.png", bbox_inches="tight")
plt.close()

### Node Weights Map

In [14]:
fig, axes = plt.subplots(
    ydim, xdim,
    figsize=(6, 4),
    subplot_kw={'projection': ccrs.PlateCarree()},
    constrained_layout=True,
    dpi=600
)

levels = np.arange(-1.4, 1.41, 0.2)

for i in range(xdim):
    for j in range(ydim):
        ax = axes[j, i]

        field = nodes[i, j, :, :]

        im = ax.contourf(
            lon, lat, field,
            cmap="balance",
            levels=levels,
            transform=ccrs.PlateCarree(),
            extend="both"
        )

        ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
        ax.add_feature(cfeature.STATES.with_scale("50m"), linewidth=0.4)
        ax.set_title(f"Node ({i},{j})", fontsize=6)
        ax.set_xticks([])
        ax.set_yticks([])

# One shared colorbar
cbar = fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
cbar.set_label("Standardized 500-hPa Anomaly", fontsize=6)

plt.suptitle("Node Weight Patterns", fontsize=8)
plt.savefig("figs/Z500-big-SOM/Z500_som_node_weights.png", bbox_inches="tight")
plt.close()

### Anomaly Composite Map

In [16]:
# Create an empty numpy array to hold Z500 standardized anomalies
patterns = np.full((xdim, ydim, n_lat, n_lon), np.nan)
counts   = np.zeros((xdim, ydim), dtype=int)

for i in range(xdim):
    for j in range(ydim):
        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]
        counts[i, j] = len(idx)

        if len(idx) > 0:
            patterns[i, j] = Z500_norm_ffe.isel(time=idx).mean("time").values

In [None]:
fig, axes = plt.subplots(
    ydim,
    xdim,
    figsize=(6, 2.7),
    subplot_kw={"projection": ccrs.PlateCarree()},
    constrained_layout=True,
    dpi=600,
)

for i in range(xdim):
    for j in range(ydim):
        ax = axes[j, i]
        field = patterns[i, j, :, :]

        im = ax.contourf(
            lon,
            lat,
            field,
            cmap="balance",
            levels=np.arange(-2.0, 2.1, 0.25),
            transform=ccrs.PlateCarree(),
        )

        ax.add_feature(cfeature.COASTLINE, linewidth=0.6)
        ax.add_feature(cfeature.STATES.with_scale("50m"), linewidth=0.4)
        ax.set_title(f"({i},{j})  N={counts[i, j]}", fontsize=6)

        ax.set_xticks([])
        ax.set_yticks([])

# one colorbar
cbar = fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
cbar.set_label("Standardized Anomaly", fontsize=6)

plt.suptitle("SOM Composite 500-hPa Standardized Anomalies", fontsize=8, y=1.04)
plt.savefig("figs/Z500-big-SOM/Z500_som_composite_anomalies.png", bbox_inches="tight")
plt.close()


### Composite Mean Map

In [None]:
counts   = np.zeros((xdim, ydim), dtype=int)

# We can reuse earlier code
for i in range(xdim):
    for j in range(ydim):
        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]

        # exclude Lili
        idx = idx[idx != 5]

        counts[i, j] = len(idx)

        if len(idx) > 0:
            patterns[i, j] = Z500_ffe.isel(time=idx).mean("time").values

In [None]:
fig, axes = plt.subplots(
    ydim,
    xdim,
    figsize=(6, 2.7),
    subplot_kw={"projection": ccrs.PlateCarree()},
    constrained_layout=True,
    dpi=600,
)

for i in range(xdim):
    for j in range(ydim):
        ax = axes[j, i]
        field = patterns[i, j, :, :]

        c = ax.contour(
            lon,
            lat,
            field / 98.1,
            levels=range(552, 595, 3),
            colors="black",
            transform=ccrs.PlateCarree(),
            linewidths=0.6,
        )
        cf = ax.contourf(
            lon,
            lat,
            field / 10 / 9.81,
            cmap="HomeyerRainbow",
            levels=np.arange(552, 595, 3),
            transform=ccrs.PlateCarree(),
            alpha=0.7,
        )

        ax.add_feature(cfeature.COASTLINE, linewidth=0.6)
        ax.add_feature(cfeature.STATES.with_scale("50m"), linewidth=0.4)
        ax.set_title(f"({i},{j})  N={counts[i, j]}", fontsize=5)

        ax.set_xticks([])
        ax.set_yticks([])

        # Add inline labels
        ax.clabel(c, c.levels, fontsize=5)

# one colorbar
cbar = fig.colorbar(cf, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
cbar.set_label("500-hPa Height (dam)", fontsize=6)

plt.suptitle("SOM Composite 500-hPa Heights", fontsize=8, y=1.04)
plt.savefig("figs/Z500-big-SOM/Z500_som_composite_heights.png", bbox_inches="tight")
plt.close()


### Composite SD Map

In [19]:
stdevs = np.full((xdim, ydim, n_lat, n_lon), np.nan)
means = np.full((xdim, ydim, n_lat, n_lon), np.nan)

# Instead of taking the mean, we can plot the standard deviation
for i in range(xdim):
    for j in range(ydim):
        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]

        if len(idx) > 0:
            stdevs[i, j] = Z500_ffe.isel(time=idx).std("time").values
            means[i, j] = Z500_ffe.isel(time=idx).mean("time").values


In [None]:
fig, axes = plt.subplots(
    ydim,
    xdim,
    figsize=(6, 2.7),
    subplot_kw={"projection": ccrs.PlateCarree()},
    constrained_layout=True,
    dpi=600,
)

for i in range(xdim):
    for j in range(ydim):
        ax = axes[j, i]
        stdev = stdevs[i, j, :, :]
        mean = means[i, j, :, :]

        c = ax.contour(
            lon,
            lat,
            mean / 98.1,
            levels=range(552, 595, 3),
            colors="black",
            transform=ccrs.PlateCarree(),
            linewidths=0.6,
        )

        cf = ax.contourf(
            lon,
            lat,
            stdev / 98.1,
            cmap="Purples",
            levels=np.arange(0, 16, 2),
            transform=ccrs.PlateCarree(),
            alpha=0.7,
            extend="max",
        )

        ax.add_feature(cfeature.COASTLINE, linewidth=0.6)
        ax.add_feature(cfeature.STATES.with_scale("50m"), linewidth=0.4)
        ax.set_title(f"({i},{j})  N={counts[i, j]}", fontsize=5)

        ax.set_xticks([])
        ax.set_yticks([])

        # Add inline labels
        ax.clabel(c, c.levels, fontsize=5)

# one colorbar
cbar = fig.colorbar(cf, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
cbar.set_label("500-hPa Height (dam)", fontsize=6)

plt.suptitle("500-hPa Heights Std Dev Across Members", fontsize=8, y=1.04)
plt.savefig("figs/Z500-big-SOM/Z500_som_std_heights.png", bbox_inches="tight")
plt.close()


### Maps of Individual Nodes

In [None]:
# Set number of columns
cols = 5
proj = ccrs.PlateCarree()

# Iterate through each node
for i in range(xdim):
    for j in range(ydim):
        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]
        n = len(idx)

        # Get corresponding datetimes
        dates_in_node = Z500_ffe.time.isel(time=idx).values

        # Set number of rows
        rows = int(np.ceil(n / cols))

        # Create a figure with subplots
        fig, axes = plt.subplots(
            rows,
            cols,
            figsize=(3 * cols, 2.5 * rows),
            subplot_kw={"projection": proj},
            layout="constrained",
        )

        for k, ax in enumerate(axes.flat):
            if k < n:
                t = idx[k]
                data = Z500_ffe.isel(time=t)
                im = ax.contourf(
                    data.lon,
                    data.lat,
                    data / 10 / 9.81,
                    levels=np.arange(540, 603, 3),
                    cmap="balance",
                    transform=proj,
                    extend="both",
                )
                ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
                ax.add_feature(cfeature.BORDERS, linewidth=0.3)
                ax.add_feature(cfeature.STATES, linewidth=0.2)
                ax.set_title(str(pd.to_datetime(data.time.values))[:16])
            else:
                ax.axis("off")
        fig.suptitle(f"Node ({i},{j})  N={n}", fontsize=8, y=1.02)
        plt.savefig(f"figs/Z500-big-SOM/indiv-nodes/node_{i}_{j}.png")
        plt.close(fig)


In [None]:
# Set number of columns
cols = 5
proj = ccrs.PlateCarree()

# Iterate through each node
for i in range(xdim):
    for j in range(ydim):
        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]
        n = len(idx)

        # Get corresponding datetimes
        dates_in_node = tp_ffe.valid_time.isel(valid_time=idx).values

        # Set number of rows
        rows = int(np.ceil(n / cols))

        # Create a figure with subplots
        fig, axes = plt.subplots(
            rows,
            cols,
            figsize=(3 * cols, 2.5 * rows),
            subplot_kw={"projection": proj},
            layout="constrained",
            dpi=300
        )

        for k, ax in enumerate(axes.flat):
            if k < n:
                t = idx[k]
                data = tp_ffe.isel(valid_time=t)
                im = ax.contourf(
                    data.longitude,
                    data.latitude,
                    data * 1000,
                    levels=np.arange(0, 28, 3),
                    cmap="HomeyerRainbow",
                    transform=proj,
                    extend="both",
                )
                ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
                ax.add_feature(cfeature.BORDERS, linewidth=0.3)
                ax.add_feature(cfeature.STATES, linewidth=0.2)
                ax.set_title(str(pd.to_datetime(data.valid_time.values))[:16])
            else:
                ax.axis("off")

        # Add a colorbar
        cbar = fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
        cbar.set_label("Total Precipitation (mm)", fontsize=6)
        
        fig.suptitle(f"Node ({i},{j})  N={n}", fontsize=8, y=1.02)
        plt.savefig(f"figs/Z500-big-SOM/indiv-nodes/node_{i}_{j}_precip.png")
        plt.close(fig)


In [None]:
# Set number of columns
cols = 5
proj = ccrs.PlateCarree()

# Iterate through each node
for i in range(xdim):
    for j in range(ydim):
        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]
        n = len(idx)

        # Get corresponding datetimes
        dates_in_node = tp_ffe.valid_time.isel(valid_time=idx).values

        # Set number of rows
        rows = int(np.ceil(n / cols))

        # Create a figure with subplots
        fig, axes = plt.subplots(
            rows,
            cols,
            figsize=(3 * cols, 2.5 * rows),
            subplot_kw={"projection": proj},
            layout="constrained",
            dpi=300,
        )

        for k, ax in enumerate(axes.flat):
            if k < n:
                t = idx[k]
                data = mslp_ffe.isel(valid_time=t)
                im = ax.contour(
                    data.longitude,
                    data.latitude,
                    data / 100,
                    levels=np.arange(976, 1041, 4),
                    colors="black",
                    transform=proj,
                    linewidths=0.6,
                )
                ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
                ax.add_feature(cfeature.BORDERS, linewidth=0.3)
                ax.add_feature(cfeature.STATES, linewidth=0.2)
                ax.set_title(str(pd.to_datetime(data.valid_time.values))[:16])

                # Inline labels
                ax.clabel(im, im.levels, fontsize=5)

            else:
                ax.axis("off")

        fig.suptitle(f"Node ({i},{j})  N={n}", fontsize=8, y=1.02)
        plt.savefig(f"figs/Z500-big-SOM/indiv-nodes/node_{i}_{j}_mslp.png")
        plt.close(fig)


### Maps of composite TCWV for each SOM Node

In [17]:
# We can reuse earlier code
for i in range(xdim):
    for j in range(ydim):
        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]

        if len(idx) > 0:
            patterns[i, j] = tcwv_ffe.isel(time=idx).mean("time").values

In [None]:
fig, axes = plt.subplots(
    ydim, xdim,
    figsize=(6, 2.7),
    subplot_kw={'projection': ccrs.PlateCarree()},
    constrained_layout=True,
    dpi=600
)

levels = np.arange(15,46,5)

# ----------------------------------------------
# Loop through nodes and plot
# ----------------------------------------------
for i in range(xdim):
    for j in range(ydim):

        ax = axes[j, i]
        field = patterns[i, j, :, :]

        im = ax.contourf(
            lon, lat, field,
            cmap="cividis",
            levels=levels,
            extend="both",
            transform=ccrs.PlateCarree()
        )

        ax.add_feature(cfeature.COASTLINE, linewidth=0.6)
        ax.add_feature(cfeature.STATES.with_scale("50m"), linewidth=0.4)

        ax.set_title(f"({i},{j})  N={counts[i,j]}", fontsize=6)
        ax.set_xticks([])
        ax.set_yticks([])

cbar = fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
cbar.set_label("TCWV (kg m$^{-2}$)", fontsize=6)

plt.suptitle("SOM Composite TCWV", fontsize=8, y=1.04)
plt.savefig("figs/Z500-big-SOM/Z500_som_composite_tcwv.png", bbox_inches="tight")
plt.close()

### Maps of Composite MSLP for each SOM node

In [23]:
mslp = np.full((xdim, ydim, n_lat, n_lon), np.nan)
z500 = np.full((xdim, ydim, n_lat, n_lon), np.nan)

for i in range(xdim):
    for j in range(ydim):
        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]

        if len(idx) > 0:
            mslp[i, j] = mslp_ffe.isel(valid_time=idx).mean("valid_time").values
            z500[i, j] = Z500_ffe.isel(time=idx).mean("time").values


In [None]:
fig, axes = plt.subplots(
    ydim, xdim,
    figsize=(6, 2.7),
    subplot_kw={'projection': ccrs.PlateCarree()},
    constrained_layout=True,
    dpi=600
)

levels = np.arange(1006,1025,2)

# ----------------------------------------------
# Loop through nodes and plot
# ----------------------------------------------
for i in range(xdim):
    for j in range(ydim):

        ax = axes[j, i]
        field1 = mslp[i, j, :, :] / 100
        field2 = z500[i, j, :, :] / 10 / 9.81

        im = ax.contourf(
            lon, lat, field1,
            cmap="HomeyerRainbow",
            levels=levels,
            extend="neither",
            transform=ccrs.PlateCarree()
        )

        cn = ax.contour(
            lon,
            lat,
            field2,
            levels=range(552, 595, 3),
            colors="black",
            transform=ccrs.PlateCarree(),
            linewidths=0.4,
        )

        ax.add_feature(cfeature.COASTLINE, linewidth=0.6)
        ax.add_feature(cfeature.STATES.with_scale("50m"), linewidth=0.4)

        ax.set_title(f"({i},{j})  N={counts[i,j]}", fontsize=6)
        ax.set_xticks([])
        ax.set_yticks([])

cbar = fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
cbar.set_label("MSLP (hPa)", fontsize=6)

plt.suptitle("SOM Composite MSLP", fontsize=8, y=1.04)
plt.savefig("figs/Z500-big-SOM/Z500_som_composite_mslp.png", bbox_inches="tight")
plt.close()

### Maps of PMM precipitation for each SOM node

In [19]:
def process_dim(
    da: xr.DataArray,
    ens_dim: str = "valid_time",
    spatial_dims: tuple = ("lat", "lon"),
) -> xr.DataArray:
    """Compute PMM for a single time slice or 2D field"""

    # Ensemble mean field
    ens_mean = da.mean(dim=ens_dim).stack(i=spatial_dims)

    # Sort mean field
    sorted_mean = ens_mean.sortby(ens_mean)

    # Sort all values across ensemble members
    sorted_all = np.sort(da.stack(z=(ens_dim, *spatial_dims)).values)

    # Sample distribution to match spatial field size
    n = sorted_mean.size
    step = len(sorted_all) // n
    sorted_mean.values = sorted_all[::step][:n]

    return sorted_mean.unstack("i")


def pmm(
    da: xr.DataArray,
    ens_dim: str = "valid_time",
    spatial_dims: tuple = ("lat", "lon"),
) -> xr.DataArray:
    """Probability matched mean over ens_dim"""

    return process_dim(da, ens_dim=ens_dim, spatial_dims=spatial_dims)


In [44]:
for i in range(xdim):
    for j in range(ydim):
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]

        if len(idx) > 0:
            node_da = tp_ffe.isel(valid_time=idx)

            patterns[i, j] = pmm(
                node_da,
                ens_dim="valid_time",
                spatial_dims=("latitude", "longitude"),
            ).values


In [None]:
fig, axes = plt.subplots(
    ydim, xdim,
    figsize=(6, 2.7),
    subplot_kw={'projection': ccrs.PlateCarree()},
    constrained_layout=True,
    dpi=600
)

levels = np.arange(0,16,2)

# Loop through nodes
for i in range(xdim):
    for j in range(ydim):

        ax = axes[j, i]
        field = patterns[i, j, :, :] * 1000

        im = ax.contourf(
            lon, lat, field,
            cmap="viridis",
            levels=levels,
            extend="max",
            transform=ccrs.PlateCarree()
        )

        ax.add_feature(cfeature.COASTLINE, linewidth=0.6)
        ax.add_feature(cfeature.STATES.with_scale("50m"), linewidth=0.4)

        ax.set_title(f"({i},{j})  N={counts[i,j]}", fontsize=6)
        ax.set_xticks([])
        ax.set_yticks([])

cbar = fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
cbar.set_label("Total Prec. (mm)", fontsize=6)

plt.suptitle("SOM Composite PMM Prec.", fontsize=8, y=1.04)
plt.savefig("figs/Z500-big-SOM/Z500_som_composite_tp.png", bbox_inches="tight")
plt.close()

### Month Histograms

In [23]:
months = pd.to_datetime(Z500_ffe.time).month.to_numpy()

month_counts = {}

for i in range(xdim):
    for j in range(ydim):

        # find time indices belonging to this node
        idx = np.where((bmus[:,0] == i) & (bmus[:,1] == j))[0]

        # grab their months
        node_months = months[idx]

        # histogram over months 1–12
        counts = np.bincount(node_months, minlength=13)[1:]  # drop 0-bin

        month_counts[(i,j)] = counts

In [None]:
fig, axes = plt.subplots(
    ydim, xdim,
    figsize=(6, 2.7),
    constrained_layout=True,
    dpi=600
)

# Warm-season labels
month_labels = ["May", "Jun", "Jul", "Aug", "Sep", "Oct"]

for i in range(xdim):
    for j in range(ydim):

        ax = axes[j, i]

        # Extract May–Oct counts (months 5–10 → indices 4:10)
        counts = month_counts[(i, j)][4:10]

        ax.bar(
            month_labels,
            counts,
            color="teal",
            alpha=0.9,
            width=0.8
        )

        # Title matches your SOM composite style
        ax.set_title(f"({i},{j})  N={counts.sum()}", fontsize=6)

        # Remove ticks entirely (categorical labels don’t need them)
        ax.tick_params(axis="x", bottom=False, labelsize=5)

        # Shared, fixed y-axis across all panels
        ax.set_ylim(0, 14)
        ax.set_yticks(np.arange(0,15,2))

        # Light grid for readability
        ax.grid(True, linewidth=0.3, alpha=0.5, axis="y")

# Overall title
plt.suptitle("Warm-Season (May–Oct) Event Distribution per SOM Node", fontsize=8, y=1.04)
plt.savefig("figs/Z500-big-SOM/Z500_som_monthly_counts.png", bbox_inches="tight")
plt.close()

In [None]:
node_totals = np.array([
    month_counts[(i, j)].sum()
    for i in range(xdim)
    for j in range(ydim)
])

total_events = node_totals.sum()

P_node = node_totals / total_events

# Total events per month (all nodes combined)
all_month_counts = np.bincount(months, minlength=13)[1:]  # 1–12

# Only warm season (May–Oct)
month_idx = np.arange(4, 10)  # indices for May–Oct

n_nodes = xdim * ydim
heatmap = np.zeros((n_nodes, len(month_idx)))

# Flatten node indices consistently
node_labels = []

k = 0
for i in range(xdim):
    for j in range(ydim):

        counts = month_counts[(i, j)][month_idx]
        totals = all_month_counts[month_idx]

        # P(Node | Month)
        heatmap[k, :] = counts / totals
        node_labels.append(f"({i},{j})")

        k += 1

relative_heatmap = np.zeros_like(heatmap)

for k in range(n_nodes):
    relative_heatmap[k, :] = heatmap[k, :] / P_node[k]



In [None]:
fig, ax = plt.subplots(figsize=(6, 4), dpi=600)

im = ax.imshow(
    relative_heatmap,
    aspect="auto",
    cmap="RdBu_r",
    vmin=0,
    vmax=2
)

# Axes labels
ax.set_xticks(np.arange(len(month_idx)))
ax.set_xticklabels(["May", "Jun", "Jul", "Aug", "Sep", "Oct"], fontsize=7)

ax.set_yticks(np.arange(n_nodes))
ax.set_yticklabels(node_labels, fontsize=6)

ax.set_xlabel("Month")
ax.set_ylabel("SOM Node")

cbar = plt.colorbar(im, ax=ax)
cbar.set_label("Relative Likelihood", fontsize=7)

plt.title(
    "Monthly Relative Likelihood of SOM Nodes\n"
    "(Normalized by Seasonal Event Frequency)",
    fontsize=8
)

plt.tight_layout()
plt.savefig(
    "figs/Z500-big-SOM/Z500_som_monthly_relative_heatmap.png",
    bbox_inches="tight"
)
plt.close()
