# Notebook for SOM Training

By: Ty Janoski

Updated 12/9/2025

## Setup

### Imports

In [1]:
# Import Statements
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cmweather  # noqa: F401
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scienceplots  # noqa: F401
import xarray as xr
from minisom import MiniSom
from sklearn.manifold import MDS
from sklearn.metrics import pairwise_distances

matplotlib.use("Agg")

plt.style.use(["science", "nature", "grid"])
plt.rcParams["text.usetex"] = True
%config InlineBackend.figure_format = 'png'


### Data Loading

In [3]:
# Read in Z500 at flash-flood event times
path = "/mnt/drive2/SOM_intermediate_files/"
Z500_norm_weighted_ffe = xr.load_dataarray(f"{path}era5_Z500_norm_weighted_ffe.nc")
Z500_norm_ffe = xr.load_dataarray(f"{path}era5_Z500_norm_ffe.nc")
Z500_ffe = xr.load_dataarray(f"{path}era5_Z500_ffe.nc")

# Same thing for tcwv
tcwv_norm_weighted_ffe = xr.load_dataarray(f"{path}era5_tcwv_norm_weighted_ffe.nc")
tcwv_norm_ffe = xr.load_dataarray(f"{path}era5_tcwv_norm_ffe.nc")
tcwv_ffe = xr.load_dataarray(f"{path}era5_tcwv_ffe.nc")

# If you want to make tp composites, uncomment the following lines:
# tp_ffe = xr.load_dataarray(f"{path}era5_tp_ffe.nc")


### Reshape Data

In [4]:
# Flatten the data for SOM training
Z_flat = Z500_norm_weighted_ffe.stack(features=("lat", "lon")).values
tcwv_flat = tcwv_norm_weighted_ffe.stack(features=("lat", "lon")).values

# Concatenate along the feature axis
X = np.concatenate([Z_flat, tcwv_flat], axis=1)

## SOM Training

We are going to train our SOM with random initialization and online training. We will also use two phases: a "coarse" phase with a larger sigma and learning rate, then a "fine" phase with a smaller learning rate and sigma.

### Set SOM parameters

In [5]:
# Set SOM shape
xdim, ydim = 3, 2

# Set number of iterations for each phase
n1, n2 = 2000, 8000

# Set starting sigmas
sig1, sig2 = np.sqrt(xdim**2 + ydim**2), 1.5

# Set starting learning rates
lr1, lr2 = 0.1, 0.01

# Random seed for reproducibility
random_seed = 42


### Train SOM

In [6]:
# Create SOM instance
som = MiniSom(
    xdim,
    ydim,
    input_len=X.shape[1],
    sigma=sig1,
    learning_rate=lr1,
    decay_function="linear_decay_to_zero",
    sigma_decay_function="linear_decay_to_one",
    neighborhood_function="gaussian",
    random_seed=random_seed,
)

# Initialize random weights
som.random_weights_init(X)

# Random training
som.train_random(X, n1, verbose=True)

# Phase 2
som._sigma = sig2 # type: ignore
som._learning_rate = lr2
som.train_random(X, n2, verbose=True)


 [ 2000 / 2000 ] 100% - 0:00:00 left 
 quantization error: 130.20806856589203
 [ 8000 / 8000 ] 100% - 0:00:00 left 
 quantization error: 127.85658094622683


### Grab important fields

In [15]:
# Total node number
n_nodes = xdim * ydim

# weights
weights = som.get_weights().reshape(n_nodes, -1)

# u-matrix
u_matrix = som.distance_map().T

# bmus & hit_map
bmus = np.array([som.winner(x) for x in X])

hit_map = np.zeros((xdim, ydim))
for i, j in bmus:
    hit_map[i, j] += 1
hit_map = hit_map.T

# Sammon Coordinates
D = pairwise_distances(weights)
coords = MDS(
    n_components=2, dissimilarity="precomputed", random_state=42, n_init=4
).fit_transform(D)

# Get lats/lons
lat = Z500_norm_ffe.lat
lon = Z500_norm_ffe.lon

# Dimensions of the spatial field
n_lat = lat.size
n_lon = lon.size
n_features = n_lat * n_lon

# Split weights into Z500 and tcwv weights
Z500_weights  = weights[:, :n_features]
tcwv_weights  = weights[:, n_features:]

# Reshape weights back to spatial dimensions
Z500_nodes = Z500_weights.reshape(xdim, ydim, n_lat, n_lon)
tcwv_nodes = tcwv_weights.reshape(xdim, ydim, n_lat, n_lon)

## Plots

### U-matrix and Sammon Map

In [18]:
fig, axes = plt.subplots(1, 2, layout="constrained", figsize=(6, 3), dpi=600)

# u-matrix
im0 = axes[0].imshow(u_matrix, cmap="viridis", origin="lower")
axes[0].set_title("U-Matrix (Mean Inter-Node Distance)", fontsize=7)
fig.colorbar(im0, ax=axes[0], fraction=0.046, pad=0.04, shrink=0.7)

# hit map
im1 = axes[1].imshow(hit_map, cmap="plasma", origin="lower")
axes[1].set_title("Hit Map (Samples per Node)", fontsize=7)
fig.colorbar(im1, ax=axes[1], fraction=0.046, pad=0.04, shrink=0.7)

# axis styling
for ax in axes:
    ax.set_xticks(np.arange(xdim))
    ax.set_yticks(np.arange(ydim))
    ax.set_xlabel("X-index", fontsize=6)
    ax.set_ylabel("Y-index", fontsize=6)

plt.savefig("figs/Z500_and_tcwv_SOM/Z500_and_tcwv_SOM_u_matrix_hit_map.png")
plt.close()

In [17]:
# Flatten u-matrix & hit map
U_flat = u_matrix.T.reshape(-1)  # back to (n_nodes,)
hits_flat = hit_map.T.reshape(-1)  # back to (n_nodes,)

# scale hits
hits_scaled = 30 + 250 * (hits_flat / hits_flat.max())

# plot
plt.figure(figsize=(7, 7))

# Scatter: U controls color, hits control bubble size
sc = plt.scatter(
    coords[:, 0],
    coords[:, 1],
    c=U_flat,
    s=hits_scaled,
    cmap="balance",
    edgecolor="k",
    linewidth=0.5,
    zorder=3,
)

# Draw lattice connections (right & down neighbors only)
for i in range(xdim):
    for j in range(ydim):
        node = i * ydim + j

        # right neighbor
        if j + 1 < ydim:
            nbr = i * ydim + (j + 1)
            plt.plot(
                [coords[node, 0], coords[nbr, 0]],
                [coords[node, 1], coords[nbr, 1]],
                "k-",
                lw=0.6,
                alpha=0.4,
            )

        # down neighbor
        if i + 1 < xdim:
            nbr = (i + 1) * ydim + j
            plt.plot(
                [coords[node, 0], coords[nbr, 0]],
                [coords[node, 1], coords[nbr, 1]],
                "k-",
                lw=0.6,
                alpha=0.4,
            )

# Node labels (i,j)
for idx, (x, y) in enumerate(coords):
    ix, iy = divmod(idx, ydim)
    plt.text(x, y, f"({ix},{iy})", fontsize=8, ha="center", va="center", zorder=5)

plt.title("Sammon / MDS Distortion Grid\nU-Matrix (Color) \\& Node Frequency (Size)")
plt.axis("off")
plt.colorbar(sc, label="U-Matrix (Avg. Neighbor Distance)")
plt.savefig(
    "figs/Z500_and_tcwv_SOM/Z500_and_tcwv_SOM_sammon_mds.png", bbox_inches="tight"
)
plt.close()


### Node Weights Map

In [16]:
fig, axes = plt.subplots(
    ydim, xdim,
    figsize=(6, 2.7),
    subplot_kw={'projection': ccrs.PlateCarree()},
    constrained_layout=True,
    dpi=600
)

# Shading levels for Z500
levels_Z = np.arange(-1.2, 1.21, 0.2)

# TCWV contour levels (fewer for visibility)
levels_tcwv = np.arange(-1.0, 1.01, 0.2)

for i in range(xdim):
    for j in range(ydim):
        ax = axes[j, i]

        # Fields for this node
        Z_field = Z500_nodes[i, j, :, :]
        tcwv_field = tcwv_nodes[i, j, :, :]

        # --- Z500 shaded ---
        im = ax.contourf(
            lon, lat, Z_field,
            cmap="balance",
            levels=levels_Z,
            transform=ccrs.PlateCarree()
        )

        # --- TCWV contours (black or white depending on preference) ---
        ax.contour(
            lon, lat, tcwv_field,
            colors="black",
            linewidths=0.6,
            levels=levels_tcwv,
            alpha=0.9,
            transform=ccrs.PlateCarree()
        )

        # Map features
        ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
        ax.add_feature(cfeature.STATES.with_scale("50m"), linewidth=0.4)
        ax.set_title(f"Node ({i},{j})", fontsize=6)
        ax.set_xticks([])
        ax.set_yticks([])

# Shared colorbar for Z500 shading
cbar = fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
cbar.set_label("Standardized Z500 Anomaly (Shaded)", fontsize=6)

plt.suptitle("Node Weight Patterns\nZ500 (shaded) + TCWV (contoured)", fontsize=8)
plt.savefig("figs/Z500_and_tcwv_SOM/combined_node_weights.png", bbox_inches="tight")
plt.close()

### Anomaly Composite Map

In [19]:
# Create empty arrays for standardized anomalies
Z500_patterns = np.full((xdim, ydim, n_lat, n_lon), np.nan)
tcwv_patterns = np.full((xdim, ydim, n_lat, n_lon), np.nan)

counts = np.zeros((xdim, ydim), dtype=int)

for i in range(xdim):
    for j in range(ydim):

        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]
        counts[i, j] = len(idx)

        if len(idx) > 0:
            # Z500 composite
            Z500_patterns[i, j] = (
                Z500_norm_ffe.isel(time=idx).mean("time").values
            )

            # TCWV composite
            tcwv_patterns[i, j] = (
                tcwv_norm_ffe.isel(time=idx).mean("time").values
            )

In [21]:
fig, axes = plt.subplots(
    ydim,
    xdim,
    figsize=(6, 2.7),
    subplot_kw={"projection": ccrs.PlateCarree()},
    constrained_layout=True,
    dpi=600,
)

# Levels for shading (Z500)
levels_Z = np.arange(-2.0, 2.1, 0.25)

# Fewer contour levels for tcwv (to avoid clutter)
levels_tcwv = np.arange(-2, 2.1, 0.5)

for i in range(xdim):
    for j in range(ydim):
        ax = axes[j, i]

        # pull the Z500 & tcwv composite fields for this node
        Z_field = Z500_patterns[i, j, :, :]
        tcwv_field = tcwv_patterns[i, j, :, :]

        # --- Z500 shaded composite ---
        im = ax.contourf(
            lon,
            lat,
            Z_field,
            cmap="balance",
            levels=levels_Z,
            transform=ccrs.PlateCarree(),
        )

        # --- TCWV contour overlay ---
        ax.contour(
            lon,
            lat,
            tcwv_field,
            colors="black",
            linewidths=0.5,
            levels=levels_tcwv,
            transform=ccrs.PlateCarree(),
        )

        ax.add_feature(cfeature.COASTLINE, linewidth=0.6)
        ax.add_feature(cfeature.STATES.with_scale("50m"), linewidth=0.4)

        ax.set_title(f"({i},{j})  N={counts[i, j]}", fontsize=6)
        ax.set_xticks([])
        ax.set_yticks([])

# --- Shared colorbar for Z500 shading ---
cbar = fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
cbar.set_label("Standardized Anomaly", fontsize=6)

plt.suptitle(
    "SOM Composite Anomalies: Z500 (shaded) + TCWV (contoured)",
    fontsize=8,
    y=1.04,
)

plt.savefig(
    "figs/Z500_and_tcwv_SOM/Z500_and_tcwv_SOM_composite_anomalies.png",
    bbox_inches="tight",
)
plt.close()


### Composite Map

In [22]:
# Create empty arrays for raw composites
Z500_patterns_raw = np.full((xdim, ydim, n_lat, n_lon), np.nan)
tcwv_patterns_raw = np.full((xdim, ydim, n_lat, n_lon), np.nan)

for i in range(xdim):
    for j in range(ydim):

        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]

        if len(idx) > 0:
            # Z500 raw composite
            Z500_patterns_raw[i, j] = (
                Z500_ffe.isel(time=idx).mean("time").values
            )

            # TCWV raw composite
            tcwv_patterns_raw[i, j] = (
                tcwv_ffe.isel(time=idx).mean("time").values
            )

In [41]:
fig, axes = plt.subplots(
    ydim,
    xdim,
    figsize=(6, 2.7),
    subplot_kw={"projection": ccrs.PlateCarree()},
    constrained_layout=True,
    dpi=600,
)

for i in range(xdim):
    for j in range(ydim):
        ax = axes[j, i]
        field = patterns[i, j, :, :]

        c = ax.contour(
            lon,
            lat,
            field / 98.1,
            levels=range(552, 595, 3),
            colors="black",
            transform=ccrs.PlateCarree(),
            linewidths=0.6,
        )
        cf = ax.contourf(
            lon,
            lat,
            field / 10 / 9.81,
            cmap="HomeyerRainbow",
            levels=np.arange(552, 595, 3),
            transform=ccrs.PlateCarree(),
            alpha=0.7,
        )

        ax.add_feature(cfeature.COASTLINE, linewidth=0.6)
        ax.add_feature(cfeature.STATES.with_scale("50m"), linewidth=0.4)
        ax.set_title(f"({i},{j})  N={counts[i, j]}", fontsize=5)

        ax.set_xticks([])
        ax.set_yticks([])

        # Add inline labels
        ax.clabel(c, c.levels, fontsize=5)

# one colorbar
cbar = fig.colorbar(cf, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
cbar.set_label("500-hPa Height (dam)", fontsize=6)

plt.suptitle("SOM Composite 500-hPa Heights", fontsize=8, y=1.04)
plt.savefig("figs/Z500-SOM/Z500_som_composite_heights.png", bbox_inches="tight")
plt.close()


### Maps of Individual Nodes

In [43]:
# Set number of columns
cols = 5
proj = ccrs.PlateCarree()

# Iterate through each node
for i in range(xdim):
    for j in range(ydim):
        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]
        n = len(idx)

        # Get corresponding datetimes
        dates_in_node = Z500_ffe.time.isel(time=idx).values

        # Set number of rows
        rows = int(np.ceil(n / cols))

        # Create a figure with subplots
        fig, axes = plt.subplots(
            rows,
            cols,
            figsize=(3 * cols, 2.5 * rows),
            subplot_kw={"projection": proj},
            layout="constrained",
        )

        for k, ax in enumerate(axes.flat):
            if k < n:
                t = idx[k]
                data = Z500_ffe.isel(time=t)
                im = ax.contourf(
                    data.lon,
                    data.lat,
                    data / 10 / 9.81,
                    levels=np.arange(540, 603, 3),
                    cmap="balance",
                    transform=proj,
                    extend="both",
                )
                ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
                ax.add_feature(cfeature.BORDERS, linewidth=0.3)
                ax.add_feature(cfeature.STATES, linewidth=0.2)
                ax.set_title(str(pd.to_datetime(data.time.values))[:16])
            else:
                ax.axis("off")
        fig.suptitle(f"Node ({i},{j})  N={n}", fontsize=8, y=1.02)
        plt.savefig(f"figs/Z500-SOM/indiv-nodes/node_{i}_{j}.png")
        plt.close(fig)


### Maps of composite TCWV for each SOM Node

In [45]:
# We can reuse earlier code
for i in range(xdim):
    for j in range(ydim):
        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]

        if len(idx) > 0:
            patterns[i, j] = tcwv_ffe.isel(time=idx).mean("time").values

In [None]:
fig, axes = plt.subplots(
    ydim, xdim,
    figsize=(6, 2.7),
    subplot_kw={'projection': ccrs.PlateCarree()},
    constrained_layout=True,
    dpi=600
)

levels = np.arange(15,46,5)

# ----------------------------------------------
# Loop through nodes and plot
# ----------------------------------------------
for i in range(xdim):
    for j in range(ydim):

        ax = axes[j, i]
        field = patterns[i, j, :, :]

        im = ax.contourf(
            lon, lat, field,
            cmap="cividis",
            levels=levels,
            extend="both",
            transform=ccrs.PlateCarree()
        )

        ax.add_feature(cfeature.COASTLINE, linewidth=0.6)
        ax.add_feature(cfeature.STATES.with_scale("50m"), linewidth=0.4)

        ax.set_title(f"({i},{j})  N={counts[i,j]}", fontsize=6)
        ax.set_xticks([])
        ax.set_yticks([])

cbar = fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
cbar.set_label("TCWV (kg m$^{-2}$)", fontsize=6)

plt.suptitle("SOM Composite TCWV", fontsize=8, y=1.04)
plt.savefig("figs/Z500-SOM/Z500_som_composite_tcwv.png", bbox_inches="tight")
plt.close()

### Maps of composite precipitation for each SOM node

In [51]:
# We can reuse earlier code
for i in range(xdim):
    for j in range(ydim):
        # indices in this node
        idx = np.where((bmus[:, 0] == i) & (bmus[:, 1] == j))[0]

        if len(idx) > 0:
            patterns[i, j] = tp_ffe.isel(valid_time=idx).mean("valid_time").values

In [62]:
fig, axes = plt.subplots(
    ydim, xdim,
    figsize=(6, 2.7),
    subplot_kw={'projection': ccrs.PlateCarree()},
    constrained_layout=True,
    dpi=600
)

levels = np.arange(0,4.5,0.25)

# Loop through nodes
for i in range(xdim):
    for j in range(ydim):

        ax = axes[j, i]
        field = patterns[i, j, :, :] * 1000

        im = ax.contourf(
            lon, lat, field,
            cmap="viridis",
            levels=levels,
            extend="max",
            transform=ccrs.PlateCarree()
        )

        ax.add_feature(cfeature.COASTLINE, linewidth=0.6)
        ax.add_feature(cfeature.STATES.with_scale("50m"), linewidth=0.4)

        ax.set_title(f"({i},{j})  N={counts[i,j]}", fontsize=6)
        ax.set_xticks([])
        ax.set_yticks([])

cbar = fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.6, pad=0.02)
cbar.set_label("Total Prec. (mm)", fontsize=6)

plt.suptitle("SOM Composite Total Prec.", fontsize=8, y=1.04)
plt.savefig("figs/Z500-SOM/Z500_som_composite_tp.png", bbox_inches="tight")
plt.close()

### Month Histograms

In [69]:
months = pd.to_datetime(Z500_ffe.time).month.to_numpy()

month_counts = {}

for i in range(xdim):
    for j in range(ydim):

        # find time indices belonging to this node
        idx = np.where((bmus[:,0] == i) & (bmus[:,1] == j))[0]

        # grab their months
        node_months = months[idx]

        # histogram over months 1–12
        counts = np.bincount(node_months, minlength=13)[1:]  # drop 0-bin

        month_counts[(i,j)] = counts

In [68]:
fig, axes = plt.subplots(
    ydim, xdim,
    figsize=(6, 2.7),
    constrained_layout=True,
    dpi=600
)

# Warm-season labels
month_labels = ["May", "Jun", "Jul", "Aug", "Sep", "Oct"]

for i in range(xdim):
    for j in range(ydim):

        ax = axes[j, i]

        # Extract May–Oct counts (months 5–10 → indices 4:10)
        counts = month_counts[(i, j)][4:10]

        ax.bar(
            month_labels,
            counts,
            color="teal",
            alpha=0.9,
            width=0.8
        )

        # Title matches your SOM composite style
        ax.set_title(f"({i},{j})  N={counts.sum()}", fontsize=6)

        # Remove ticks entirely (categorical labels don’t need them)
        ax.tick_params(axis="x", bottom=False, labelsize=5)

        # Shared, fixed y-axis across all panels
        ax.set_ylim(0, 14)
        ax.set_yticks(np.arange(0,15,2))

        # Light grid for readability
        ax.grid(True, linewidth=0.3, alpha=0.5, axis="y")

# Overall title
plt.suptitle("Warm-Season (May–Oct) Event Distribution per SOM Node", fontsize=8, y=1.04)
plt.savefig("figs/Z500-SOM/Z500_som_monthly_counts.png", bbox_inches="tight")
plt.close()