In [1]:

from commutazzio.filtration import CLFiltration
from commutazzio.compute import CLInvariants
from commutazzio.filtration import CLFiltrationDB
# from commutazzio.plot import ComplementaryTrianglesPlot as Visualizer1
import random
import numpy as np
# from echoflare import send_finish_message
from commutazzio.utils import filepath_generator
import csv
import pandas as pd
pd.options.display.max_rows = None
pd.set_option('display.max_columns', None)
from read_database import sqlite_to_df_cl4_pc, sqlite_to_df_cln_pc
import os
import glob 
import random
import os

def get_one_sample(dirname, fn_pattern, required_space_dim=[2], max_attempts=100):
    attempts = 0
    # Loop through all files in the directory matching the pattern
    while attempts < max_attempts:
        for filename in glob.glob(os.path.join(dirname, f"{fn_pattern}*.db")):
            print(f"Attempting to process file: {filename}")
            new_db = CLFiltrationDB(filename, create_new_db=False, table_name='clf_filtration')
            
            # Randomize the id to be an integer between 1 and 100
            sample = new_db.get_filtration_by_id(random.randint(1, 100))

            # Check if the sample has the required space dimension
            if sample is not None and hasattr(sample, 'info') and sample.info['space_dim'] in required_space_dim:
                return sample

        attempts += 1
        print(f"Attempt {attempts}/{max_attempts}: No matching sample found. Retrying...")

    # If no files found or no matching sample found after max_attempts, return None
    print("No matching sample found.")
    return None



# dirname_topoblaze = './database/TopoBlaze'
# dirname_nebulacompute = './database/NebulaCompute'
# dirname_mac= './database'
dirname= './database'
fn_pattern_cl4 = 'pc_EC2_4'
fn_pattern_cln = 'pc_EC2_n'
sample_cl4 = get_one_sample(dirname, fn_pattern_cl4)
sample_cln = get_one_sample(dirname, fn_pattern_cln)
# Now, 'sample' contains the data from the first matching file, or None if no files were found.

''
Attempting to process file: ./database/pc_EC2_4_000010.db
Connected to ./database/pc_EC2_4_000010.db database.
CLFiltration:assuming ordinal number filtration values.
CLFiltration:assuming ordinal number filtration values.
Attempt 1/100: No matching sample found. Retrying...
Attempting to process file: ./database/pc_EC2_4_000010.db
Connected to ./database/pc_EC2_4_000010.db database.
CLFiltration:assuming ordinal number filtration values.
CLFiltration:assuming ordinal number filtration values.
Attempt 2/100: No matching sample found. Retrying...
Attempting to process file: ./database/pc_EC2_4_000010.db
Connected to ./database/pc_EC2_4_000010.db database.
CLFiltration:assuming ordinal number filtration values.
CLFiltration:assuming ordinal number filtration values.
Attempting to process file: ./database/pc_EC2_n_000001.db
Connected to ./database/pc_EC2_n_000001.db database.
CLFiltration:assuming ordinal number filtration values.
CLFiltration:assuming ordinal number filtration values.

In [2]:
sample_cl4.info['space_dim']

2

In [3]:
sample_cl4.upper

a simplex tree with 2488 simplices and 4 filtration values @ 0x30c3b7380

In [4]:
# create a CL(4) filtration object from two A4 filtrations of simplicial complexes
from commutazzio.filtration import CLFiltration
clf=CLFiltration(upper=sample_cl4.upper, lower=sample_cl4.lower)
# compute its decomposition
from commutazzio.compute import CLInvariants
inv=CLInvariants(clf,enable_multi_processing=True)
inv.total_decomposition_computation(dim=0)
inv.total_decomposition_computation(dim=1)
inv.decompositions_all[0].nonzero_components
inv.decompositions_all[1].nonzero_components

CLFiltration:assuming ordinal number filtration values.
Filtration updated.
Number of cores being used: 12
Computing multiplicity vector @ dim=0 with prime=2...


Progress:  49%|████▊     | 37/76 [00:01<00:01, 35.14it/s]

''
''
''
''
''
''
''
''
''
''
''
''


Progress: 100%|██████████| 76/76 [00:01<00:00, 48.22it/s]


Multiplicity vector computed (dim=0, prime=2).
Total decomposition of the homology module at dimension 0 and finite field F2 is computed.
Number of cores being used: 12
Computing multiplicity vector @ dim=1 with prime=2...


Progress: 100%|██████████| 76/76 [00:00<00:00, 356.24it/s]

Multiplicity vector computed (dim=1, prime=2).
Total decomposition of the homology module at dimension 1 and finite field F2 is computed.





{'I21': 2, 'I41': 1, 'I51': 1, 'I55': 1}

In [5]:
import chromatic_tda as chro

def configure_chromatic_simplicial_complex(clf):
    # Step 1: Extract simplices and their filtration values from clf.upper
    upper_simplices = {tuple(s): fv for s,fv in clf.upper}

    # Step 2: Adjust the filtration values to start from 0 and be integers
    min_filtration = min(upper_simplices.values())
    adjusted_simplices = {simplex: int(value - min_filtration) for simplex, value in upper_simplices.items()}

    # Step 3: Define a dictionary mapping each simplex to its adjusted filtration value
    complex_simplices = {simplex: adjusted_simplices.get(simplex, 0) for simplex in adjusted_simplices}
    # Step 4: Extract the maximum simplices from clf.lower
    sub_complex_max = set(clf.lower.maximum_simplices().simplices)
    print(sub_complex_max)
    # Step 5: Create the chromatic simplicial complex and set its simplex weights
    simplicial_complex = chro.SimplicialComplex(complex_simplices)
    simplicial_complex.set_simplex_weights(complex_simplices)

    # Step 6: Set the sub-complex using the maximum simplices from clf.lower
    sub_complex = chro.SimplicialComplex(sub_complex_max).simplices()  # compute the subsimplices
    simplicial_complex.set_sub_complex(sub_complex)

    # Step 7: Compute persistence for the simplicial complex
    simplicial_complex.compute_persistence()

    return simplicial_complex

# Usage example
# simplicial_complex = configure_chromatic_simplicial_complex(sample_cl4)
def print_non_trivial_bars(simplicial_complex):
    """
    Prints all non-trivial bars of a simplicial complex.

    Parameters:
    simplicial_complex: The simplicial complex object with GROUPS and bars method.
    """
    print("Bars:")
    for grp in simplicial_complex.GROUPS:
        print()
        print(f"  {grp}:")
        bars_all = simplicial_complex.bars(grp, return_as='dict')
        for dim, bars in sorted(bars_all.items()):
            print(f"    dim {dim} ... ", end="")
            print(", ".join(str(bar) for bar in sorted(bars)))



In [12]:
from commutazzio.filtration import SimplexTree
u=SimplexTree()
for simplex in [(0,),(1,),(2,),(0,1),(0,2)]:
    u.insert(simplex,0)
u.insert((1,2),1)
for simplex in [(3,),(1,3),(2,3)]:
    u.insert(simplex,2)
u.insert((1,2,3),3)
u.insert((0,1,2),4)
#---------------------------------------
l=SimplexTree()
for simplex in [(0,),(1,),(2,),(0,1),(0,2)]:
    l.insert(simplex,0)
for simplex in [(3,),(1,3),(2,3)]:
    l.insert(simplex,2)
manual_example = CLFiltration(upper=u, lower=l,ladder_length=5,h_params=[0,1,2,3,4])
example=configure_chromatic_simplicial_complex(manual_example)

CLFiltration:rescaling filtration values to ordinal numbers.
{(0, 1), (1, 3), (2,), (2, 3), (1,), (0, 2), (0,), (3,)}


In [18]:
manual_example.lower.maximum_simplicial_complex.simplices

[(0,), (1,), (2,), (3,), (0, 1), (0, 2), (1, 3), (2, 3)]

In [13]:
# Write a dictionary of simplices and filtration values.
# All subsimplices of the given simplices will be added with filtration value 0.
complex_simplices = {
    (0,)    : 0, # would be still added with weight 0 if not explicitly stated
    (1,)    : 0, # would be still added with weight 0 if not explicitly stated
    (2,)    : 0, # would be still added with weight 0 if not explicitly stated
    (0,1)   : 0, # would be still added with weight 0 if not explicitly stated
    (0,2)   : 0, # would be still added with weight 0 if not explicitly stated
    (1,2)   : 1,
    (3,)    : 2,
    (1,3)   : 2,
    (2,3)   : 2,
    (1,2,3) : 3,
    (0,1,2) : 4
}

# Define subcomplex by the maximal simplices 
sub_complex_max = {(0,1), (0,2), (1,3), (2,3)}
sub_complex = chro.SimplicialComplex(sub_complex_max).simplices() # compute the subsimplices

simplicial_complex = chro.SimplicialComplex(complex_simplices)
simplicial_complex.set_simplex_weights(complex_simplices)
simplicial_complex.set_sub_complex(sub_complex)
simplicial_complex.compute_persistence()

In [16]:
print_non_trivial_bars(example)

Bars:

  kernel:
    dim 1 ... (4, inf)

  sub_complex:
    dim 0 ... (0, inf)
    dim 1 ... (2, inf)

  image:
    dim 0 ... (0, inf)
    dim 1 ... (2, 4)

  complex:
    dim 0 ... (0, inf)
    dim 1 ... (1, 4), (2, 3)

  cokernel:
    dim 1 ... (1, 3)

  relative:
    dim 1 ... (1, 3)
    dim 2 ... (4, inf)


In [14]:
print_non_trivial_bars(simplicial_complex)

Bars:

  kernel:
    dim 1 ... (4, inf)

  sub_complex:
    dim 0 ... (0, inf)
    dim 1 ... (2, inf)

  image:
    dim 0 ... (0, inf)
    dim 1 ... (2, 4)

  complex:
    dim 0 ... (0, inf)
    dim 1 ... (1, 4), (2, 3)

  cokernel:
    dim 1 ... (1, 3)

  relative:
    dim 1 ... (1, 3)
    dim 2 ... (4, inf)


In [None]:


def collect_data_from_db_files(dirnames, fn_pattern, db_2_df):
    """
    Loop through all files in the directory of form "{fn_pattern}_*.db". For each file, 
    read the database by applying the provided function (db_2_df), which returns a DataFrame.
    All DataFrames are then combined into one DataFrame.

    Args:
        dirname (str): The directory containing the db files.
        fn_pattern (str): The filename pattern to match.
        db_2_df (function): Function to convert db data into DataFrame.

    Returns:
        DataFrame: The combined data from all matched db files.
    """
    all_data = pd.DataFrame()
    for dirname in dirnames:
        # Loop through all files in the directory matching the pattern
        for filename in sorted(glob.glob(os.path.join(dirname, f"{fn_pattern}*.db"))):
            print(f"Processing file: {filename}")
            new_db = CLFiltrationDB(filename, create_new=False)
            df_new = db_2_df(new_db)
            all_data = pd.concat([all_data, df_new], ignore_index=True)
            print(f"Finished processing file: {filename}")

    return all_data


In [None]:
dirnames = ['./databases','./databases/new_topoblaze/']
fn_pattern = 'pc_EC2_4'

all_data_4 = collect_data_from_db_files(dirnames, fn_pattern, sqlite_to_df_cl4_pc)
all_data_4.to_pickle('cl4_df_pc_large.pkl')


In [None]:
# dirname = './databases'
dirnames = ['./databases','./databases/new_topoblaze/']
fn_pattern = 'pc_EC2_n'

all_data_n = collect_data_from_db_files(dirnames, fn_pattern, sqlite_to_df_cln_pc)
all_data_n.to_pickle('cln_df_pc_large.pkl')


In [None]:
len(all_data_n)