In [None]:
%matplotlib widget

import gzip
import os
from dataclasses import dataclass
from functools import partial
from os import listdir, path
from typing import Any, Callable, List, Optional, Tuple, Union

import msgpack
import numpy as np
from dotenv import load_dotenv
from matplotlib import gridspec
from matplotlib import pyplot as plt
from matplotlib.axes import Axes
from matplotlib.collections import PathCollection
from matplotlib.figure import Figure
from mpl_toolkits.mplot3d.axes3d import Axes3D
from scipy import interpolate
from scipy.optimize import curve_fit
from scipy.interpolate import interp1d

_ = load_dotenv()


In [None]:
STICKING_PROBABILITIES = [1.0, 0.8, 0.6, 0.4, 0.2, 0.1, 0.0]
DATA_DIR = os.environ["DATA_DIR"]


# Function Setup

## Data Loading Routines

In [None]:
# Some handy type definitions
DataLoadingFunction = Callable[[str], dict]
NumpyOrFloat = Union[float, np.ndarray]


@dataclass(frozen=True, slots=True)
class ThicknessData:
    """Container class for storing extracted layer thickness information"""

    pos: np.ndarray
    thickness: list[list[np.ndarray]]
    sticking_probabilities: list[float]
    max_times: list[int]
    num_points: int


def map_configs(fnames: list[str]) -> dict[str, list[str]]:
    """Takes a list of filenames and splits those into the geometry part as well as the
    sticking probability part. For each geometry all thickness data resulting from
    different sticking probabilities are stored in a list."""
    configs = {}
    for fname in fnames:
        sticking, geom = fname[:5], fname[6:]
        if not geom in configs:
            configs[geom] = []
        configs[geom].append(sticking)
    return configs


def load_data(data_dir: str, filename: str) -> dict:
    """Loads nodal data of .msgpack.gz files storing graph data."""
    with gzip.open(path.join(data_dir, filename)) as gz:
        data = msgpack.unpack(gz, use_list=False, raw=False)
    return data["nodes"]


def extract_data(
    loader: DataLoadingFunction,
    name: str,
    sticking: list[str],
    prefix: str = "extracted_",
) -> ThicknessData:
    """Uses the provided data loader to get the nodal data of a geometry and
    subsequently extracts all fields starting with prefix."""
    data_list: list[list[np.ndarray]] = []
    sticking_probabilities: list[float] = []
    max_times: list[int] = []
    num_points: list[int] = []
    pos = np.array([])
    for ps in sticking:
        node_data = loader(f"{ps}_{name}")
        labels = [s for s in node_data if s.startswith(prefix)]
        data_list.append([np.array(node_data[l]) for l in labels])
        sticking_probabilities.append(float(ps[1:]) / 1000)
        max_times.append(len(labels))
        num_points.extend([d.shape[0] for d in data_list[-1]])
        if len(pos) == 0:
            pos = np.array(node_data["pos"])

    # Ensure that the number of nodes is the same for all data
    assert np.all(np.array(num_points) == num_points[0])

    # Construct the data container instance
    return ThicknessData(
        thickness=data_list,
        sticking_probabilities=sticking_probabilities,
        max_times=max_times,
        pos=pos,
        num_points=num_points[0],
    )


## Data Transformation and Augmentation Routines

In [None]:
def to_data_tensor(
    thickness_data: ThicknessData,
    sticking_probabilities: list[float],
) -> np.ndarray:
    """Converts the `ThicknessData` instance to a numpy tensor with missing data
    indicated by np.nan. First axis `point_index`, second axis `sticking_probability` and
    third axis `time`."""
    max_time = 50
    data_tensor = np.full(
        (thickness_data.num_points, len(sticking_probabilities), max_time),
        np.nan,
    )
    for i, s in enumerate(sticking_probabilities):
        # First, determine the index
        data_index = 0
        for sp in thickness_data.sticking_probabilities:
            if sp == s:
                break
            data_index = data_index + 1
        else:
            print(f"Couldn't find data for sticking probability {s}")
            continue
        data_tensor[:, i, 0] = 0
        data_tensor[
            :, i, 1 : np.min([thickness_data.max_times[data_index] + 1, max_time])
        ] = np.array(thickness_data.thickness[data_index]).T
    return data_tensor


def fill_conformal(
    data_tensor: np.ndarray, pos: np.ndarray, trench_diameter: float
) -> np.ndarray:
    """Creates a new data array based on the provided data tensor that contains the
    expected deposition radius for p_s=0 (conformal deposition)."""

    expanded_data = data_tensor.copy()
    y = np.where(pos[:, 1] > 0, pos[:, 1], 0)
    time = np.arange(data_tensor.shape[2])

    expanded_data[:, -1, :] = np.outer(
        np.ones(data_tensor.shape[0]),
        time,
    )

    expanded_data[:, -1, :] = expanded_data[:, -1, :] - np.outer(
        y, np.ones(data_tensor.shape[2])
    )

    expanded_data[:, -1, :] = np.where(
        expanded_data[:, -1, :] <= trench_diameter / 2,
        expanded_data[:, -1, :],
        trench_diameter / 2,
    )
    expanded_data[:, -1, :] = np.where(
        expanded_data[:, -1, :] < 0, 0, expanded_data[:, -1, :]
    )
    return expanded_data


def mask_monotonic(
    x: np.ndarray,
    axis: int = 0,
    eps: float = 0.1,
    increasing: bool = True,
    iterations: int = 1,
) -> np.ndarray:
    """Checks if the provided array is monotonically increasing or decreasing along the
    provided axis"""
    assert axis < len(x.shape)
    monotonic = np.full_like(x, True, dtype=bool)
    for i in np.arange(1, iterations + 1):
        if increasing:
            monotonic = monotonic & (
                np.diff(x, i, axis=axis, prepend=np.zeros((i, x.shape[1]))) > -eps
            )
        else:
            monotonic = monotonic & (
                np.diff(x, i, axis=axis, append=np.zeros((i, x.shape[1]))) > eps
            )
    return monotonic


### Grid data operations (pipeline building blocks)

In [None]:
@dataclass(frozen=True, slots=True)
class GridData:
    time: np.ndarray
    sp: np.ndarray
    radius: np.ndarray


def to_grid_data(
    z: np.ndarray,
    sticking_probabilities: List[float],
) -> GridData:
    """Converts the provided data to a format compatible with matplotlib's meshgrid used
    by surface plots."""
    y = sticking_probabilities
    x = np.arange(z.shape[1])
    xx, yy = np.meshgrid(x, y)
    # Ensure that all values are >= 0
    zz = np.where(z >= 0, z, np.nan)
    return GridData(xx, yy, zz)


def fill_along_axis(
    grid_data: GridData,
    masking_function: Callable[[np.ndarray], np.ndarray],
    axis: int = 0,
) -> GridData:
    """Fills griddata values for elements where `masking_function`
    evaluates to True using (linear) interpolation along one axis."""
    z = grid_data.radius.copy()
    mask = masking_function(z)
    for (sequence, ma) in zip(np.moveaxis(z, 0, axis), np.moveaxis(mask, 0, axis)):
        if ma.sum() != 0 and (~ma).sum():
            sequence[ma] = np.interp(
                np.flatnonzero(ma), np.flatnonzero(~ma), sequence[~ma]
            )
    return GridData(grid_data.time, grid_data.sp, z)


def interpolate_data(
    grid_data: GridData,
    resolution: int = 100,
) -> GridData:
    """Linearly interpolates the provided grid data."""
    xnew = np.linspace(
        np.min(grid_data.time[0, :]),
        np.max(grid_data.time[0, :]),
        resolution,
    )
    ynew = np.linspace(
        np.min(grid_data.sp[:, 0]),
        np.max(grid_data.sp[:, 0]),
        resolution,
    )
    xxnew, yynew = np.meshgrid(xnew, ynew)

    zznew = interpolate.griddata(
        (grid_data.time.ravel(), grid_data.sp.ravel()),
        grid_data.radius.ravel(),
        (xxnew.ravel(), yynew.ravel()),
        method="linear",
    ).reshape(xxnew.shape)

    return GridData(xxnew, yynew, zznew)


def mask_closed(
    grid_data: GridData,
    trench_diameter: float,
) -> GridData:
    closed = 2 * grid_data.time / (1 + grid_data.sp) > trench_diameter
    return GridData(
        grid_data.time, grid_data.sp, np.where(closed, np.nan, grid_data.radius)
    )


## Data Visualization Routines

In [None]:
def draw_2d_plot(ax: list[Axes], grid_data: GridData, _: int) -> None:
    """Creates two 2D plots which show the relation of the deposition radius with
    respect to the time dimension as well as the sticking probability"""
    assert len(ax) >= 2

    # Plot 1: radius vs sticking probability
    ax[0].clear()
    n_timesteps = grid_data.radius.shape[1]
    cmap = plt.get_cmap("viridis", n_timesteps)
    for i in np.arange(n_timesteps):
        x = grid_data.sp[:, i]
        y = grid_data.radius[:, i]
        ax[0].plot(
            x,
            y,
            marker="x",
            color=cmap(i),
        )

    ax[0].invert_xaxis()
    ax[0].grid(which="both")
    ax[0].set_xlabel("sticking probability")
    ax[0].set_ylabel("radius")

    # Plot 2: radius vs time
    ax[1].clear()
    n_sticking_probabilities = grid_data.radius.shape[0]
    for i in np.arange(n_sticking_probabilities):
        x = grid_data.time[i, :]
        y = grid_data.radius[i, :]
        ax[1].plot(
            x,
            y,
            marker="x",
            label=str(grid_data.sp[i, 0]),
        )

    ax[1].grid(which="both")
    ax[1].set_xlabel("time")
    ax[1].set_ylabel("radius")
    ax[1].legend()


def draw_3d_plot(ax: list[Axes], grid_data: GridData, _: int) -> None:
    """Creates a 3D surface plot of the data in the provided plot axes object."""
    assert len(ax) > 0

    ax[0].clear()
    cmap = plt.get_cmap("coolwarm")

    ax[0].plot_surface(
        grid_data.time,
        grid_data.sp,
        grid_data.radius,
        linewidth=1,
        cmap=cmap,
        antialiased=True,
    )

    ax[0].set_xlabel("time")
    ax[0].set_ylabel("sticking probability")
    ax[0].set_zlabel("radius")


def apply_pipeline(
    pipeline: list[Tuple[Callable[[GridData, Any], GridData], Any]],
    grid_data: GridData,
) -> GridData:
    """Applies the provided pipeline to the grid_data instance."""
    for op, args in pipeline:
        grid_data = op(grid_data, *args)
    return grid_data


def selector_plot(
    pos: np.ndarray,
    data_tensor: np.ndarray,
    sticking_probabilities: List[float],
    pipeline: Optional[List[Tuple[Callable[[GridData, Any], GridData], Any]]] = None,
    selection_callback: Optional[Callable[[List[Axes], GridData, int], None]] = None,
    figsize: Tuple[float, float] = (10, 5),
    projection: Optional[str] = None,
    num_subplots: int = 1,
    initial_point_index: int = 0,
) -> Tuple[Figure, List[Axes]]:
    """Plots the geometry and also creates a 3D surface plot of the data corresponding
    to the selected surface point."""

    plt.close("all")

    point_index = initial_point_index

    # Create a figure
    fig = plt.figure(figsize=figsize)

    # Create two subplots, one for the selector, one for the data plot(s)
    gs = gridspec.GridSpec(1, num_subplots + 1, figure=fig)
    ax_selector = fig.add_subplot(gs[0, 0])
    ax_plot = []
    for i in range(num_subplots):
        ax_plot.append(fig.add_subplot(gs[0, i + 1], projection=projection))
    fig.subplots_adjust(hspace=0.4)

    # Convert to grid data and apply the pipeline, if it exists
    grid_data = to_grid_data(data_tensor[point_index, ...], sticking_probabilities)
    if pipeline:
        grid_data = apply_pipeline(pipeline, grid_data)

    # Call the selection callback, if it exists
    if selection_callback:
        selection_callback(ax_plot, grid_data, point_index)

    # Plot the geometry in the picker subplot
    ax_selector.set_title("Geometry\n(selected: 0)")
    ax_selector.axhline(0, linestyle="--", color="black", linewidth=1)
    ax_selector.axvline(0, linestyle="--", color="black", linewidth=1)
    ax_selector.scatter(
        pos[:, 0],
        pos[:, 1],
        color="black",
        linewidths=0,
        marker=".",
        picker=True,
        pickradius=0.2,
    )

    # Plot the currently selected point in the picker subplot
    scat = ax_selector.scatter(
        pos[point_index, 0], pos[point_index, 1], color="red", marker="o"
    )

    def onpick(event):
        # If we detected a click on the ax_selector scatterplot
        if isinstance(event.artist, PathCollection):
            point_index = event.ind[0]
            colors = np.zeros((len(pos), 3))
            colors[point_index, 0] = 1
            scat.set_offsets((pos[point_index, 0], pos[point_index, 1]))
            ax_selector.set_title(f"Geometry\n(selected: {point_index})")

            grid_data = to_grid_data(
                data_tensor[point_index, ...], sticking_probabilities
            )

            if pipeline:
                grid_data = apply_pipeline(pipeline, grid_data)

            if selection_callback:
                selection_callback(ax_plot, grid_data, point_index)

            fig.canvas.draw_idle()

    ax_selector.axis("scaled")
    fig.canvas.mpl_connect("pick_event", onpick)
    return fig, ax_plot


# Data Exploration

## Loading and Transforming

In [None]:
# Get all available configurations
fnames = listdir(DATA_DIR)
configs = list(map_configs(fnames).items())

# Select a particular geometry
geometry_index = 27 # 34 #np.random.randint(0, len(configs)) #57 #13
print(geometry_index)
name, sticking = configs[geometry_index]

print(name)
trench_diameter = float(name.split("_")[0][1:4])

# Load the data of this geometry
data_loader = partial(load_data, DATA_DIR)

# Load the spherical distribution data (extracted from physical simulations)
thickness_data = extract_data(data_loader, name, sticking)
data = to_data_tensor(
    thickness_data,
    STICKING_PROBABILITIES,
)

# Load the viewfactor data
viewfactor_data = to_data_tensor(
    extract_data(data_loader, name, ["s1000"], prefix="viewfactor_"),
    STICKING_PROBABILITIES,
)

# Deviation of the simulation from the viewfactor model
deviation_data = np.zeros_like(data)
for i in range(data.shape[1]):
    deviation_data[:, i, :] = data[:, i, :] - viewfactor_data[:, 0, :]

# Use the radius calculated with the viewfactor method instead of the extracted one
data[:, 0, :] = viewfactor_data[:, 0, :]

# Extend data with expected conformal thickness at p_s=0
extended_data = fill_conformal(data, thickness_data.pos, trench_diameter)



### Data transformation pipeline

In [None]:
pipeline = [
    (  # Fill NaN
        fill_along_axis,
        (np.isnan, 0),
    ),
    (  # Ensure that data is monotonic
        fill_along_axis,
        (lambda x: ~mask_monotonic(x, axis=0), 1),
    ),
    # (  # Interpolation
    #     interpolate_data,
    #     (100,),
    # ),
    # (  # mask off closed configurations
    #     mask_closed,
    #     (trench_diameter,),
    # ),
]


## 2D Plots

In [None]:
fig, ax = selector_plot(
    thickness_data.pos,
    extended_data,
    STICKING_PROBABILITIES,
    pipeline=pipeline,
    selection_callback=draw_2d_plot,
    num_subplots=2,
)

## 3D Plots

In [None]:
fig, ax = selector_plot(
    thickness_data.pos,
    data,
    STICKING_PROBABILITIES,
    pipeline=pipeline,
    selection_callback=draw_3d_plot,
    projection="3d",
    num_subplots=1,
)


### Extended dataset (+conformal deposition thickness)

In [None]:
fig, ax = selector_plot(
    thickness_data.pos,
    extended_data,
    STICKING_PROBABILITIES,
    pipeline=pipeline,
    selection_callback=draw_3d_plot,
    projection="3d",
    initial_point_index=777,
)


## Curve Fitting

In [None]:
def testing(data_tensor, sticking_probabilities, point_index):
    data = data_tensor[point_index, ...]
    grid_data = to_grid_data(data, sticking_probabilities)
    grid_data = fill_along_axis(grid_data, np.isnan, 0)
    grid_data = fill_along_axis(
        grid_data, lambda x: ~mask_monotonic(x, axis=0), axis=1
    )
    ydata = grid_data.radius.T[-1]

    # def fit_fn(x, a, b, c, d, e):
    #     return a * b ** (-c * x) - d * x + e

    def fit_fn(ps, a, b, c, d):
        return a * np.exp(-b * ps) - c * ps + d

    popt, pcov = curve_fit(
        fit_fn, sticking_probabilities, ydata
    )  # , bounds=([1e-2, 1e-2, 1e-2, 0], [10,10,10,10]))
    print(popt, pcov)

    fig, ax = plt.subplots(1, 1)
    ax.plot(sticking_probabilities, ydata, marker="x")
    x = np.linspace(0, 1, 100)
    plt.plot(x, fit_fn(x, *popt))
    # ax.invert_xaxis()
    ax.grid(which="both")
    plt.show()


testing(extended_data, STICKING_PROBABILITIES, 681)


In [None]:
def optimize(
    data: np.ndarray,
    pos: np.ndarray,
    sticking_probabilities: list[float],
    tf: Callable,
    diff: bool = False,
) -> None:
    def callback(ax: List[Axes3D], grid_data: GridData, point_index: int) -> None:
        ax[0].clear()
        X = np.vstack([grid_data.time.ravel(), grid_data.sp.ravel()])
        Y = grid_data.radius.ravel()
        # valid = ~np.isnan(Y)
        valid = np.full_like(Y, True, dtype=bool)
        success = False
        # test_function = lambda x, a, b, c, d, e, f: tf(point_index, x, a, b, c, d, e, f)
        test_function = lambda x, a, b: tf(point_index, x, a, b)
        try:
            popt, pcov = curve_fit(
                test_function,
                X[:, valid],
                Y[valid],
                maxfev=2000,
                bounds=([1e-3, 0.5], [30, 1.5]),
            )
            success = True
        except RuntimeError as e:
            print(e)
            popt = []

        if not diff:
            # First, draw the actual surface
            draw_3d_plot(ax, grid_data, point_index)

        if success:
            zznew = test_function(X, *popt).reshape(grid_data.radius.shape)
            mse = np.sum((grid_data.radius - zznew) ** 2) / np.count_nonzero(
                np.isreal(zznew)
            )
            print(f"Parameters: {popt}, MSE: {mse}")
            if diff:
                ax[0].plot_surface(
                    grid_data.time,
                    grid_data.sp,
                    grid_data.radius - zznew,
                    linewidth=1,
                    antialiased=True,
                )
            else:
                # Then draw the fitted surface
                xnew = np.linspace(0, grid_data.time.shape[1], 100)
                ynew = np.linspace(
                    np.min(sticking_probabilities), np.max(sticking_probabilities), 100
                )
                xxnew, yynew = np.meshgrid(xnew, ynew)
                zznew = test_function((xxnew.ravel(), yynew.ravel()), *popt).reshape(
                    xxnew.shape
                )
                ax[0].plot_surface(
                    xxnew,
                    yynew,
                    zznew,
                    linewidth=1,
                    antialiased=True,
                )

    selector_plot(
        pos,
        data,
        sticking_probabilities,
        pipeline=pipeline,
        selection_callback=callback,
        projection="3d",
    )


def fit_function(X, a, b, c, d, e, f):
    t, s = X
    return (a * np.exp(-b * s) - c * s + d) * np.tanh((e * (1 - s) + f) * t)


def fit_function_2(point_index, X, a, b, c, d, e, f):
    t, s = X
    # y = t + g
    # y = np.where(y > trench_diameter, trench_diameter, y)
    # y = np.where(y < 0, 0, y)
    ypos = thickness_data.pos[point_index, 1]
    y = t - ypos
    y = np.where(y > trench_diameter / 2, trench_diameter / 2, y)
    y = np.where(y < 0, 0, y)
    y2 = (a * np.exp(-b * s) - c * s + d) * np.tanh((e * (1 - s) + f) * t)
    return np.min([y, y2], axis=0)


def fit_function_3(point_index, X, a, b):
    t, s = X

    # Calculated viewfactor thickness
    vf = extended_data[point_index, 0, :]
    f = interp1d(np.arange(len(vf)), vf, fill_value="extrapolate")
    vf = f(t)

    # Expected conformal layer thickenss
    ypos = thickness_data.pos[point_index, 1]
    co = t - np.max([0, ypos])
    co = np.where(co > trench_diameter / 2, trench_diameter / 2, co)
    co = np.where(co < 0, 0, co)

    def blend(s):
        return s ** (1 / a)

    return blend(s) * vf + (1 - blend(s)) * co


optimize(
    extended_data,
    thickness_data.pos,
    STICKING_PROBABILITIES,
    fit_function_3,
    False,
)


In [None]:
def testplot(diff: bool = False) -> None:
    def ftest2(X, point_index):
        t, s = X

        # Calculated viewfactor thickness
        vf = extended_data[point_index, 0, :]

        # Expected conformal layer thickenss
        ypos = thickness_data.pos[point_index, 1]
        co = t - np.max([0, ypos])
        co = np.where(co > trench_diameter / 2, trench_diameter / 2, co)
        co = np.where(co < 0, 0, co)

        def blend(s):
            return s ** (1 / 8)

        return blend(s) * vf + (1 - blend(s)) * co

    def callback(ax: List[Axes3D], grid_data: GridData, point_index: int) -> None:
        ax[0].clear()

        z = ftest2((grid_data.time, grid_data.sp), point_index)
        if diff:
            ax[0].plot_surface(
                grid_data.time,
                grid_data.sp,
                grid_data.radius - z,
                linewidth=1,
                antialiased=True,
            )
        else:
            draw_3d_plot(ax, grid_data, point_index)

            ax[0].plot_surface(
                grid_data.time,
                grid_data.sp,
                z,
                linewidth=1,
                antialiased=True,
            )

    selector_plot(
        thickness_data.pos,
        extended_data,
        STICKING_PROBABILITIES,
        pipeline=pipeline,
        selection_callback=callback,
        projection="3d",
        initial_point_index=279,
    )


testplot(diff=False)
