In [None]:
# Install the libraries
pip install -U giotto-tda kaleido



In [None]:
# Data wrangling and saving Plotly figures as pdf files
import numpy as np
import pandas as pd
import kaleido

# Data viz using the native giotto wrapper
from gtda.plotting import plot_point_cloud

# TDA magic
from gtda.mapper import (
    CubicalCover,
    make_mapper_pipeline,
    Projection,
    plot_static_mapper_graph,
    plot_interactive_mapper_graph,
    MapperInteractivePlotter,
)

# ML tools for data generation, clustering and projection
from sklearn import datasets
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA

In [None]:
# Generate two nested circles with 5000 points and a little of noise
data, _ = datasets.make_circles(n_samples=5000, noise=0.05, factor=0.3, random_state=42)

plot_point_cloud(data)

In [None]:
# Define filter function – can be any scikit-learn transformer
filter_func = Projection(columns=[0, 1])
# Define cover
cover = CubicalCover(n_intervals=10, overlap_frac=0.3)
# Choose clustering algorithm – default is DBSCAN
clusterer = DBSCAN()

# Configure parallelism of clustering step
n_jobs = 1

# Initialise pipeline
pipe = make_mapper_pipeline(
    filter_func=filter_func,
    cover=cover,
    clusterer=clusterer,
    verbose=False,
    n_jobs=n_jobs,
)

In [None]:
fig = plot_static_mapper_graph(pipe, data)
fig.show(config={'scrollZoom': True})

In [None]:
plotly_params = {"node_trace": {"marker_colorscale": "Blues"}}
fig = plot_static_mapper_graph(
    pipe, data, color_data=data, plotly_params=plotly_params
)
fig.show(config={'scrollZoom': True})

In [None]:
# Initialise estimator to color graph by PCA
pca = PCA(n_components=1)

fig = plot_static_mapper_graph(
    pipe, data, color_data=data, color_features=pca
)
fig.show(config={'scrollZoom': True})

In [None]:
df = pd.DataFrame(data, columns=["x", "y"])
df.head()

Unnamed: 0,x,y
0,-0.711917,-0.546609
1,0.306951,-0.007028
2,0.288193,0.123284
3,-0.892223,0.502352
4,-0.143615,0.938935


In [None]:
pipe.set_params(filter_func=Projection(columns=["x", "y"]));
pipe.fit(df);
pipe

In [None]:
fig = plot_static_mapper_graph(pipe, df, color_data=df)
fig.show(config={'scrollZoom': True})


In [None]:
# Create two categories
df["Circle"] = df["x"] ** 2 + df["y"] ** 2 < 0.25
df["Circle"] = df["Circle"].replace([False, True], ["A", "B"])

In [None]:
color_data = pd.get_dummies(df["Circle"], prefix="Circle")

fig = plot_static_mapper_graph(pipe, df[["x", "y"]], color_data=color_data)
fig.show(config={'scrollZoom': True})

In [None]:
# Use the sum as the filter function
filter_func = np.sum

pipe = make_mapper_pipeline(
    filter_func=filter_func,
    cover=cover,
    clusterer=clusterer,
    verbose=True,
    n_jobs=n_jobs,
)

In [None]:
fig = plot_static_mapper_graph(pipe, data)
fig.show(config={'scrollZoom': True})

[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.0s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.1s
[Pipeline] ........ (step 2 of 3) Processing clustering, total=   0.3s
[Pipeline] ............. (step 3 of 3) Processing nerve, total=   0.0s
