In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

import gudhi

from TopoMapCut import TopoMapCut

In [15]:
def plot_data_projection(df, proj):

    fig = make_subplots(rows=1, cols=2,
                        specs=[[{'type': 'scene'}, 
                                {'type': 'xy'}]],
                        subplot_titles=('Original clusters', 
                                        'TopoMapCut Projection'))

    fig.add_trace(
        go.Scatter3d(x=df['x'], y=df['y'], z=df['z'],
                     mode='markers',
                     marker=dict(size=3,
                                 color=df['ClusterId'],
                                 colorscale=px.colors.qualitative.D3),
                     ),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(x=proj[:,0], 
                   y=proj[:,1],
                   mode='markers',
                   marker=dict(color=df['ClusterId'],
                               colorscale=px.colors.qualitative.D3),
                   ),
        row=1, col=2
    )

    fig.update_layout(showlegend=False,
                      height=500,
                      width=1200)

    return fig

## 3 Blobs Dataset

In [17]:
df_blobs = pd.read_csv('data/3blobs.csv')
data_blobs = df_blobs[['x','y','z']].values
proj_original_blobs = np.genfromtxt('data/3blobs_output.csv', delimiter=',')

In [18]:
topocut_blobs = TopoMapCut(data_blobs)
components_blobs = topocut_blobs.get_components(max_components=5)
proj_subsets = topocut_blobs.project_components(proj_method='tsne')
projections = topocut_blobs.join_components()
print(f'Number of components: {len(topocut_blobs.subsets)}')

Number of components: 5


In [19]:
df_blobs['component'] = 0

for i in range(len(data_blobs)):
    for j in range(len(topocut_blobs.subsets)):
        if i in topocut_blobs.subsets[j]:
            df_blobs.loc[i,'component'] = j

In [20]:
fig = plot_data_projection(df_blobs, projections)
fig.update_layout(title_text='3 Blobs Dataset')
fig.show()

## 3 Rings Dataset

In [21]:
df_rings = pd.read_csv('data/3rings.csv')
data_rings = df_rings[['x','y','z']].values
proj_original_rings = np.genfromtxt('data/3rings_output.csv', delimiter=',')

In [41]:
topocut_rings = TopoMapCut(data_rings)
components_rings = topocut_rings.get_components(max_components=10)
proj_subsets = topocut_rings.project_components(proj_method='tsne')
projections = topocut_rings.join_components()
print(f'Number of components: {len(topocut_rings.subsets)}')

Number of components: 10


In [42]:
df_rings['component'] = 0

for i in range(len(df_rings)):
    for j in range(len(topocut_rings.subsets)):
        if i in topocut_rings.subsets[j]:
            df_rings.loc[i,'component'] = j

In [43]:
fig = plot_data_projection(df_rings, projections)
fig.update_layout(title_text='3 Rings Dataset')
fig.show()

## 2 Cavities Dataset

In [25]:
df_cavities = pd.read_csv('data/2cavities.csv')
df_cavities = df_cavities.rename(columns={'X':'x', 'Y':'y', 'Z':'z'})
data_cavities = df_cavities[['x','y','z']].values
proj_original_cavities = np.genfromtxt('data/2cavities_output.csv', delimiter=',')

In [26]:
topocut_cavities = TopoMapCut(data_cavities)
components_cavities = topocut_cavities.get_components(max_components=5)
proj_cavities = topocut_cavities.project_components(proj_method='tsne')
projections = topocut_cavities.join_components()
print(f'Number of components: {len(topocut_cavities.subsets)}')

Number of components: 5


In [27]:
df_cavities['component'] = 0

for i in range(len(df_cavities)):
    for j in range(len(topocut_cavities.subsets)):
        if i in topocut_cavities.subsets[j]:
            df_cavities.loc[i,'component'] = j

In [28]:
fig = plot_data_projection(df_cavities, projections)
fig.update_layout(title_text='2 Cavities Dataset')
fig.show()