### Plot function for 2D mesh

In [10]:
import plotly.graph_objects as go

def plot_simplices_2d(X, simplices_idx):
    # Visualization for 2D
    fig = go.Figure()

    # Add the points
    fig.add_trace(go.Scatter(
        x=X[:, 0], 
        y=X[:, 1], 
        mode='markers', 
        marker=dict(color='blue', size=7),
        name='Points'
    ))

    # Add vertical and horizontal lines to split the plane into 4 equal squares
    fig.add_shape(
        type="line",
        x0=0.5, y0=0, x1=0.5, y1=1, opacity=0.5,
        line=dict(color="grey", width=1)
    )
    fig.add_shape(
        type="line",
        x0=0, y0=0.5, x1=1, y1=0.5, opacity=0.5,
        line=dict(color="grey", width=1)
    )

    # Add the Delaunay triangulation
    for simplex in simplices_idx:
        fig.add_trace(go.Scatter(
            x=X[simplex, 0], 
            y=X[simplex, 1], 
            mode='lines', 
            line=dict(color='orange'),
            name='Simplex'
        ))

    # Update the layout
    fig.update_layout(
        title='2D Delaunay Triangulation',
        xaxis_title='X',
        yaxis_title='Y',
        showlegend=False,
        width=800,
        height=800
    )

    # Show the plot
    fig.show()

### A 2D -> 1D function

In [11]:
import numpy as np

def func_2d(points):
    """Example function: f(x, y) = x^2 + y^2"""
    points = np.atleast_2d(points)
    return np.sum(np.square(points), axis=1, keepdims=True)

features_2d = ['x1', 'x2']
targets_1d = ['y1']

# Main class to compute simplices and augmented simplices

In [12]:
from typing import Union, List, Dict, Tuple, Callable, Optional
import numpy as np
import pandas as pd
from scipy.spatial import Delaunay
from sklearn.cluster import DBSCAN
from kneed import KneeLocator
from sklearn.neighbors import NearestNeighbors

class ASVD:
    """
    Augmented (Space) Simplex Volume Distribution (ASVD) class.

    This class calculates and analyzes fractional vertex star volumes in both
    original and augmented spaces. These volumes are also known as Voronoi
    volumes or Donald volumes in certain contexts.

    The class performs Delaunay triangulation on the input data, computes
    simplex volumes, and calculates fractional vertex star volumes in both the
    original feature space and the augmented space (features + targets).

    Attributes:
        features (List[str]): Names of feature columns.
        targets (List[str]): Names of target columns.
        vertices_x (np.ndarray): Vertex coordinates in the original feature space.
        vertices_xy (np.ndarray): Vertex coordinates in the augmented space (features + targets).
        simplices_idx (np.ndarray): Indices of simplices from Delaunay triangulation.
        simplices_x (np.ndarray): Simplex coordinates in the original feature space.
        simplices_xy (np.ndarray): Simplex coordinates in the augmented space.

    Note:
        - Suffix '_x' denotes attributes in the original feature space.
        - Suffix '_xy' denotes attributes in the augmented space (features + targets).
    """

    def __init__(
        self, data: pd.DataFrame, features: List[str], targets: List[str],
        use_func: bool=False, func: Callable=None
    ):
        """
        Initialize the ASVD object.

        Parameters:
        data: Dataframe (n, p+k) (or (n, p) if use_func is True) of samples that will
            be future (augmented) vertices.
            vertices = data[features], augmented_vertices = data[features + targets]
        use_func: Boolean indicating whether to use a custom function
        func: Custom function to compute targets (if use_func is True)
        """
        self.features = features
        self.targets = targets
        self.set_vertices(data, use_func, func)
        self.set_clusters()
        self.set_simplices()

    def set_vertices(self, data, use_func, func):
        # Set vertices
        vertices_x = data[self.features].values
        if not use_func:
            vertices_y = data[self.targets].values
        else:
            vertices_y = np.array([func(vertex).ravel() for vertex in vertices_x])
        vertices_xy = np.column_stack((vertices_x, vertices_y))
        # New attributes
        self.vertices_x = vertices_x
        self.vertices_xy = vertices_xy

    def set_clusters(self):
        # Automatically determine DBSCAN parameters
        n_neighbors = min(len(self.vertices_x) - 1, 10)  # Use 10 neighbors or less if fewer points
        nbrs = NearestNeighbors(n_neighbors=n_neighbors).fit(self.vertices_x)
        distances, _ = nbrs.kneighbors(self.vertices_x)
        
        # Sort distances to the nth neighbor (farthest neighbor)
        distances = np.sort(distances[:, -1])

        # Find the elbow point for epsilon
        kneedle = KneeLocator(range(len(distances)), distances, curve='convex', direction='increasing')
        epsilon = distances[kneedle.elbow] if kneedle.elbow else np.median(distances)

        # Perform DBSCAN clustering
        clustering = DBSCAN(eps=epsilon, min_samples=3).fit(self.vertices_x)
        labels = clustering.labels_

        # Store clustering information as attributes
        self.cluster_labels = labels
        self.n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
        self.cluster_sizes = np.bincount(labels[labels >= 0])
    
        # Store key variables in epsilon definition
        self.nnn_distances = distances  # Nth Nearest Neighbor distance
        self.elbow_point = kneedle.elbow

    def set_simplices(self):
        # Initialize lists to store simplices and their indices
        all_simplices_idx = []
        all_simplices_x = []
        all_simplices_xy = []

        # Process each cluster
        for cluster_id in range(self.n_clusters):
            # Get indices of points in this cluster
            cluster_indices = np.where(self.cluster_labels == cluster_id)[0]
            cluster_points = self.vertices_x[cluster_indices]

            # Skip clusters with too few points for triangulation
            if len(cluster_points) < len(self.features) + 1:
                continue

            # Create Delaunay triangulation for this cluster
            tri = Delaunay(cluster_points)
            
            # Map local indices to global indices
            global_simplices_idx = cluster_indices[tri.simplices]

            # Add to the lists
            all_simplices_idx.append(global_simplices_idx)
            all_simplices_x.append(self.vertices_x[global_simplices_idx])
            all_simplices_xy.append(self.vertices_xy[global_simplices_idx])

        # Combine all simplices
        self.simplices_idx = np.vstack(all_simplices_idx) if all_simplices_idx else np.array([])
        self.simplices_x = np.vstack(all_simplices_x) if all_simplices_x else np.array([])
        self.simplices_xy = np.vstack(all_simplices_xy) if all_simplices_xy else np.array([])

    def analyze_clusters(self):
        # Store cluster centroids
        cluster_centroids = np.array([
            np.mean(self.vertices_x[self.cluster_labels == i], axis=0) 
            for i in range(self.n_clusters)
        ])

        print(f"Number of clusters: {self.n_clusters}")
        print(f"Cluster sizes: {self.cluster_sizes}")
        print(f"Cluster centroids:\n{cluster_centroids}")

# First example of blob clusters

In [13]:
from sklearn.datasets import make_blobs

def generate_clustered_blobs_2d(n_samples=1000, n_clusters=4, cluster_std=0.05, random_state=None):
    """
    Generate 2D clustered data in the range [0, 1].

    Parameters:
    - n_samples: Total number of points to generate
    - n_clusters: Number of clusters to generate
    - cluster_std: Standard deviation of the clusters
    - random_state: Seed for random number generator

    Returns:
    - X: numpy array of shape (n_samples, 2) containing the generated points
    """
    # Generate raw clustered data
    X, _ = make_blobs(n_samples=n_samples, n_features=2, centers=n_clusters, 
                      cluster_std=cluster_std, random_state=random_state)
    
    # Scale the data to [0, 1] range
    X_min, X_max = X.min(axis=0), X.max(axis=0)
    X = (X - X_min) / (X_max - X_min)
    
    return X

# Generate data
X_blobs = generate_clustered_blobs_2d(n_samples=1000, n_clusters=4, cluster_std=0.05, random_state=42)

# Generate some random 2D points
data_blobs = pd.DataFrame(X_blobs, columns=features_2d)

# Initialize ASVD object
asvd_blobs= ASVD(data_blobs, features_2d, targets_1d, use_func=True, func=func_2d)

# Get the simplices (triangles)
simplices_blobs = asvd_blobs.simplices_idx

print(f"Matrix of simplices by vertex index: \n{asvd_blobs.simplices_idx[:2]} \n{asvd_blobs.simplices_idx.shape}\n")
asvd_blobs.analyze_clusters()

plot_simplices_2d(X_blobs, simplices_blobs)


Matrix of simplices by vertex index: 
[[714 102 854]
 [586 815 499]] 
(1934, 3)

Number of clusters: 4
Cluster sizes: [247 249 250 248]
Cluster centroids:
[[0.00974786 0.88419958]
 [0.15289587 0.00909913]
 [0.9907014  0.55451377]
 [0.47042245 0.98814182]]


# Second example of quadrants clusters

In [14]:
def generate_clustered_quadrants_2d(n_total):
    # Total number of points
    n_per_quadrant = int(n_total / 4)
    n_sparse = int(n_per_quadrant/4)
    n_dense = 2*n_per_quadrant - n_sparse + n_total % 4

    # Upper left quadrant: uniformly random
    upper_left_x = np.random.uniform(low=0, high=0.5, size=n_per_quadrant)
    upper_left_y = np.random.uniform(low=0.5, high=1, size=n_per_quadrant)
    upper_left = np.column_stack((upper_left_x, upper_left_y))

    # Upper right quadrant: uniformly random but only extreme upper right
    upper_right = np.random.uniform(low=0.8, high=1, size=(n_per_quadrant, 2))

    # Lower left quadrant: very dense uniformly random
    lower_left = np.random.uniform(low=0, high=0.5, size=(n_dense, 2))

    # Lower right quadrant: very sparse uniformly random
    lower_right_x = np.random.uniform(low=0.5, high=1, size=n_sparse)
    lower_right_y = np.random.uniform(low=0, high=0.5, size=n_sparse)
    lower_right = np.column_stack((lower_right_x, lower_right_y))

    # Combine all quadrants
    X = np.vstack((upper_left, upper_right, lower_left, lower_right))

    return X

# Generate 2D points
X_quadrants = generate_clustered_quadrants_2d(100)

# Generate some random 2D points
data_quadrants = pd.DataFrame(X_quadrants, columns=features_2d)

# Initialize ASVD object
asvd_quadrants= ASVD(data_quadrants, features_2d, targets_1d, use_func=True, func=func_2d)

# Get the simplices (triangles)
simplices_quadrants = asvd_quadrants.simplices_idx

print(f"Matrix of simplices by vertex index: \n{asvd_quadrants.simplices_idx[:2]} \n{asvd_quadrants.simplices_idx.shape}\n")
asvd_quadrants.analyze_clusters()

plot_simplices_2d(X_quadrants, simplices_quadrants)


Matrix of simplices by vertex index: 
[[53 52 22]
 [52 75 22]] 
(169, 3)

Number of clusters: 2
Cluster sizes: [71 25]
Cluster centroids:
[[0.31219902 0.44484435]
 [0.91740528 0.90417078]]


# Analyze elbow curve for quadrants clusters

In [15]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_elbow_curve(distances, elbow_point: Optional[int] = None):
    """
    Plot the elbow curve using Plotly.

    Parameters:
    distances (array-like): Sorted distances to the nth neighbor
    elbow_point (int, optional): Index of the elbow point
    epsilon (float, optional): Epsilon value used for DBSCAN
    """
    fig = make_subplots(rows=1, cols=1)
    n_distances = len(distances)

    # Add the main elbow curve
    fig.add_trace(
        go.Scatter(x=np.arange(n_distances), y=distances, mode='lines+markers', name='Elbow Curve'),
        row=1, col=1
    )
    
    # Add elbow point if provided
    if elbow_point is not None:
        epsilon = distances[elbow_point]

        fig.add_trace(
            go.Scatter(x=[elbow_point], y=[epsilon], 
                       mode='markers', name='Elbow Point',
                       marker=dict(color='red', size=10, symbol='star')),
            row=1, col=1
        )

        # Add vertical line at elbow point
        fig.add_vline(x=elbow_point, line_dash="dash", line_color="red")

        # Add horizontal line at epsilon distance
        fig.add_hline(y=epsilon, line_dash="dash", line_color="green")
        fig.add_annotation(
            x=elbow_point, y=epsilon,
            text=f"Epsilon: {epsilon:.4f}",
            showarrow=False,
            xanchor='right', yanchor='bottom',
            xshift=-10, yshift=10
        )

    # Update layout
    fig.update_layout(
        title='Selection of Epsilon distance for DBSCAN',
        xaxis_title='Point Index',
        yaxis_title='Distance to Farthest Neighbor',
        showlegend=True,
        hovermode='closest'
    )
    
    fig.show()

In [16]:
# plot_elbow_curve(asvd_blobs.nnn_distances, asvd_blobs.elbow_point)
plot_elbow_curve(asvd_quadrants.nnn_distances, asvd_quadrants.elbow_point)

# Example of augmentation for quadrant clusters

In [17]:
def add_curve_trace(fig, func, func_name, colorscale):
    # Add curve surface
    x = y = np.linspace(0, 1, 50)
    X, Y = np.meshgrid(x, y)
    Z = func(np.column_stack([X.ravel(), Y.ravel()])).reshape(X.shape)
    fig.add_trace(go.Surface(
        x=X, y=Y, z=Z, showscale=False, colorscale=colorscale,
        opacity=1, name=f'{func_name}', showlegend=True
    ))
    return fig

def add_mesh_trace(fig, simplices_xy, data_name, color='red', reduce=False):
    label = f'{data_name} augmented' if not reduce else f'{data_name}'
    if reduce:
        simplices_xy = simplices_xy.copy()
        simplices_xy[:, :, 2] = 0

    # Add 2D meshes as surfaces
    for i, simplex in enumerate(simplices_xy):
        simplex = np.array(simplex)
        fig.add_trace(go.Mesh3d(
            x=simplex[:, 0],
            y=simplex[:, 1],
            z=simplex[:, 2],
            opacity=0.5,
            color=color,
            name=f'{label} simplices',
            legendgroup=f'{label}',
            showlegend=i==0  # Show legend only for the first mesh
        ))

    # Add edges as lines
    Xe, Ye, Ze = [], [], []
    for simplex in simplices_xy:
        for i in range(3):
            Xe.extend([simplex[i, 0], simplex[(i + 1) % 3, 0], None])
            Ye.extend([simplex[i, 1], simplex[(i + 1) % 3, 1], None])
            Ze.extend([simplex[i, 2], simplex[(i + 1) % 3, 2], None])
    fig.add_trace(go.Scatter3d(
        x=Xe,
        y=Ye,
        z=Ze,
        mode='lines',
        line=dict(color='black', width=1),
        name=f'{label} edges',
        legendgroup=f'{label}',
        showlegend=False
    ))

    return fig

def plot_augmented_2d_meshes(func, simplices_xy, name, color='red'):
    # Create 3D plot
    fig = go.Figure()

    fig = add_curve_trace(fig, func, 'f(x,y)', 'Viridis')

    fig = add_mesh_trace(fig, simplices_xy, name, color, reduce=False)
    fig = add_mesh_trace(fig, simplices_xy, name, color, reduce=True)

    # Update layout
    fig.update_layout(
        title='2D Meshes on Function Curve Plane',
        scene=dict(
            xaxis_title='X',
            yaxis_title='Y',
            zaxis_title='Z',
            aspectmode='cube'
        ),
        width=800,
        height=800
    )

    return fig

In [18]:
# Create the plot
fig = plot_augmented_2d_meshes(func_2d, asvd_quadrants.simplices_xy, 'Quadrant', 'red')
fig.show()