In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

import gudhi

from TopoMap import TopoMap
from TopoMapCut import TopoMapCut

In [2]:
def plot_data_projection(df, proj_topomap, proj_cut):

    fig = make_subplots(rows=1, cols=3,
                        specs=[[{'type': 'scene'}, 
                                {'type': 'xy'},
                                {'type': 'xy'}]],
                        subplot_titles=('Original clusters',
                                        'TopoMap Projection', 
                                        'New Projection'))

    fig.add_trace(
        go.Scatter3d(x=df['x'], y=df['y'], z=df['z'],
                     mode='markers',
                     marker=dict(size=3,
                                 color=df['ClusterId'],
                                 colorscale=px.colors.qualitative.D3),
                     ),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(x=proj_topomap[:,0], 
                   y=proj_topomap[:,1],
                   mode='markers',
                   marker=dict(color=df['ClusterId'],
                               colorscale=px.colors.qualitative.D3),
                   ),
        row=1, col=2
    )

    fig.add_trace(
        go.Scatter(x=proj_cut[:,0], 
                   y=proj_cut[:,1],
                   mode='markers',
                   marker=dict(color=df['ClusterId'],
                               colorscale=px.colors.qualitative.D3),
                   ),
        row=1, col=3
    )

    fig.update_layout(showlegend=False,
                      height=500,
                      width=1200)

    return fig

## 3 Blobs Dataset

In [3]:
df_blobs = pd.read_csv('data/3blobs.csv')
data_blobs = df_blobs[['x','y','z']].values
proj_original_blobs = np.genfromtxt('data/3blobs_output.csv', delimiter=',')

In [4]:
topocut_blobs = TopoMapCut(data_blobs,
                           max_components=20,
                           ignore_outliers=False)
projections = topocut_blobs.run()
print(f'Number of components: {len(topocut_blobs.subsets)}')

[INFO] Max components hit. # components: 20 | Max_components: 20
Number of components: 20


In [5]:
topomap_blobs = TopoMap(data_blobs)
proj_blobs = topomap_blobs.run()

In [6]:
df_blobs['component'] = 0

for i in range(len(data_blobs)):
    for j in range(len(topocut_blobs.subsets)):
        if i in topocut_blobs.subsets[j]:
            df_blobs.loc[i,'component'] = j

In [7]:
fig = plot_data_projection(df_blobs, proj_blobs, projections)
fig.update_layout(title_text='3 Blobs Dataset')
fig.show()

## 3 Rings Dataset

In [8]:
df_rings = pd.read_csv('data/3rings.csv')
data_rings = df_rings[['x','y','z']].values
proj_original_rings = np.genfromtxt('data/3rings_output.csv', delimiter=',')

In [9]:
topocut_rings = TopoMapCut(data_rings,
                           max_components=30,
                           ignore_outliers=False)
projections = topocut_rings.run()
print(f'Number of components: {len(topocut_rings.subsets)}')

[INFO] Max components hit. # components: 30 | Max_components: 30
Number of components: 30


In [10]:
topomap_rings = TopoMap(data_rings)
proj_rings = topomap_rings.run()

In [11]:
df_rings['component'] = 0

for i in range(len(df_rings)):
    for j in range(len(topocut_rings.subsets)):
        if i in topocut_rings.subsets[j]:
            df_rings.loc[i,'component'] = j

In [12]:
fig = plot_data_projection(df_rings, proj_rings, projections)
fig.update_layout(title_text='3 Rings Dataset')
fig.show()

## 2 Cavities Dataset

In [13]:
df_cavities = pd.read_csv('data/2cavities.csv')
df_cavities = df_cavities.rename(columns={'X':'x', 'Y':'y', 'Z':'z'})
data_cavities = df_cavities[['x','y','z']].values
proj_original_cavities = np.genfromtxt('data/2cavities_output.csv', delimiter=',')

In [14]:
topocut_cavities = TopoMapCut(data_cavities,
                              max_components=20,
                              ignore_outliers=False)
projections = topocut_cavities.run()
print(f'Number of components: {len(topocut_cavities.subsets)}')

[INFO] Max components hit. # components: 20 | Max_components: 20
Number of components: 20


In [15]:
topomap_cavities = TopoMap(data_cavities)
proj_cavities = topomap_cavities.run()

In [16]:
df_cavities['component'] = 0

for i in range(len(df_cavities)):
    for j in range(len(topocut_cavities.subsets)):
        if i in topocut_cavities.subsets[j]:
            df_cavities.loc[i,'component'] = j

In [17]:
fig = plot_data_projection(df_cavities, proj_cavities, projections)
fig.update_layout(title_text='2 Cavities Dataset')
fig.show()