In [7]:
import pandas as pd
from ucimlrepo import fetch_ucirepo 

import plotly.graph_objects as go

from TopoTree import TopoTree

In [2]:
def plot_hierarchical_treemap(df_comp):
    fig = go.Figure(go.Treemap(
            labels=df_comp['id'],
            parents=df_comp['parent'],
            values=df_comp['size'],
            branchvalues='total',
            marker=dict(
                colors=df_comp['persistence'],
                colorscale='bluyl',
                showscale=True,
                colorbar=dict(
                    title="Persistence"
                )),
            hovertemplate='<b>Component #%{label} </b> <br> Points: %{value}<br> Persistence: %{color:.2f}<br> Parent: #%{parent}',
            name='',
            maxdepth=-1,
            )
        )

    fig.update_layout(margin = dict(t=50, l=25, r=25, b=25),
                    title='TopoTree',
                    height=500,
                    width=1000)
    
    return fig

## MFeat dataset

In [3]:
data_mfeat = pd.read_csv('data/UCI/multiple+features/mfeat-kar', sep='\s+',
                         header=None,
                         names=['x'+str(i) for i in range(1,65)])
data_mfeat['class'] = 0
for i, row in data_mfeat.iterrows():
    data_mfeat.loc[i,'class'] = i//200

X_mfeat = data_mfeat.drop(['class'], axis=1)

In [4]:
topotree = TopoTree(X_mfeat.to_numpy(), min_box_size=10)
comp_info_mfeat = topotree.get_components()

In [5]:
df_comp_mfeat = pd.DataFrame.from_dict(comp_info_mfeat)
df_comp_mfeat

Unnamed: 0,id,points,size,persistence,created_at,children,parent
0,0,"[1495, 1475, 1574, 1479, 1582, 1488, 1554, 155...",32,7.523886,60,8,2.0
1,1,"[1505, 1473, 1507, 1571, 1451, 1483, 1561, 155...",25,7.615253,71,12,2.0
2,2,"[1536, 1409, 1408, 1539, 1415, 1543, 1547, 155...",68,8.094536,124,8,4.0
3,3,"[1501, 1569, 1410, 1570, 1541, 1480, 1553, 152...",13,7.873719,136,1,4.0
4,4,"[1536, 1539, 1541, 1543, 1547, 1551, 1552, 155...",178,10.324984,179,59,27.0
...,...,...,...,...,...,...,...
64,64,"[200, 201, 202, 203, 205, 206, 207, 208, 209, ...",1130,13.778539,1815,5,65.0
65,65,"[0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14...",1481,13.988324,1824,14,66.0
66,66,"[0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14...",1878,14.515242,1842,50,68.0
67,67,"[1761, 1606, 1736, 1705, 107, 172, 1750, 1624,...",10,14.252678,1872,0,68.0


In [6]:
fig = plot_hierarchical_treemap(df_comp_mfeat)
fig.update_layout(title='TopoTree - MFeat dataset')
fig.show()

## Iris dataset

In [8]:
iris = fetch_ucirepo(id=53) 
  
# data (as pandas dataframes) 
X = iris.data.features 

topotree = TopoTree(X.to_numpy(), min_box_size=3)
comp_info_iris = topotree.get_components()

df_comp_iris = pd.DataFrame.from_dict(comp_info_iris)

In [9]:
fig = plot_hierarchical_treemap(df_comp_iris)
fig.update_layout(title='TopoTree - Iris dataset')
fig.show()