# Urban Datasets

In [1]:
import numpy as np
import pandas as pd
import time

np.random.seed(2024)

from visualizations import plot_topomap_comparison_highlight, plot_hierarchical_treemap

from TopoTree import TopoTree
from TopoMap import TopoMap
from HierarchicalTopoMap import HierarchicalTopoMap

## Times Square

In [2]:
place = 'TimesSquare'

X_times = np.loadtxt(f'data/topomap-data/{place}.txt',
                     delimiter=",")
X_times.shape

(17520, 6)

In [3]:
topomap_times = TopoMap(X_times)
proj_topomap_times = topomap_times.run()

In [4]:
topotree_times = TopoTree(X_times, min_box_size=0.01*X_times.shape[0])
topotree_times.mst = topomap_times.mst
topotree_times.sorted_edges = topomap_times.sorted_edges
comp_info_times = topotree_times.run()

df_comp_times = pd.DataFrame.from_dict(comp_info_times)

df_comp_times.head()

Unnamed: 0,id,points,size,persistence,created_at,children,parent,died_at,persistence_density
0,0,"[5632, 9729, 4609, 14855, 10760, 9743, 10768, ...",228,0.018351,0.493816,15,11.0,0.512167,445.166968
1,1,"[14851, 10247, 14856, 5129, 6667, 4622, 3598, ...",247,0.007803,0.496059,10,5.0,0.503862,490.213315
2,2,"[6145, 4100, 12297, 3594, 4621, 11278, 14351, ...",203,0.005125,0.498737,7,5.0,0.503862,402.887866
3,3,"[13318, 2056, 5641, 11274, 14347, 15886, 10255...",334,0.01043,0.501738,13,11.0,0.512167,652.130558
4,4,"[13824, 5633, 2566, 4102, 11279, 10256, 6162, ...",303,0.033288,0.502811,24,17.0,0.536098,565.194788


In [5]:
fig = plot_hierarchical_treemap(df_comp_times, color='died_at')
fig.update_layout(title='TopoTree - Times Square Dataset',
                  width=1000)
fig.show()

In [6]:
components_to_highligth = [13,5,17,18,20,23,30,29]
edge_lentghs = [e[2] for e in topomap_times.sorted_edges]
hiertopomap_times = HierarchicalTopoMap(X_times, 
                                        components_to_scale=components_to_highligth,
                                        max_edge_length=edge_lentghs[int(0.99*len(edge_lentghs))])
hiertopomap_times.min_points_component = topotree_times.min_box_size
hiertopomap_times.mst = topomap_times.mst
hiertopomap_times.sorted_edges = topomap_times.sorted_edges
proj_hier_times = hiertopomap_times.run()

Scalling component 13 - Scale: 20 scaling - initial area: 2018.641... final area: 807456.312...
Scalling component 5 - Scale: 20 scaling - initial area: 1177.106... final area: 470842.375...
Scalling component 17 - Scale: 20 scaling - initial area: 1273.614... final area: 509445.500...
Scalling component 20 - Scale: 17.939334252639973 scaling - initial area: 309.084... final area: 99469.320...
Scalling component 18 - Scale: 20 scaling - initial area: 326.181... final area: 130472.570...
Scalling component 23 - Scale: 19.40699390791974 scaling - initial area: 164.737... final area: 62045.086...
Scalling component 30 - Scale: 16.910724317771283 scaling - initial area: 2139.795... final area: 611922.812...
Scalling component 29 - Scale: 14.309159394446356 scaling - initial area: 5851.799... final area: 1198167.750...
[INFO] Max edge length hit. Distance: 2.729558038979701 | max_edge_length: 2.7246005919733873


In [7]:
fig = plot_topomap_comparison_highlight(proj_topomap_times, proj_hier_times, 
                                               components_to_highligth, df_comp_times)
fig.update_layout(height=600, width=1200, title='Times Square Dataset', 
                  legend= {'itemsizing': 'constant'},
                  xaxis=dict(range=[-400, 800],showticklabels=False), 
                  yaxis=dict(range=[20400,21600],showticklabels=False),
                  xaxis2=dict(showticklabels=False), 
                  yaxis2=dict(showticklabels=False)
                  )
fig.show()