In [1]:
# Code by Melinda Kleczynski 
# Data from Christina Bergonzo 

# Finalized February 27, 2025 

# Determine when we can end the filtration / which rows to include in the GCCD matrices 

In [2]:
# data processing 
import numpy as np 
import pandas as pd 
import random

# TDA
import oat_python as oat

In [3]:
datasets = ['Fc_glycans', 'Fc_noglycans'] 
trajectories = [0, 1, 2, 3]
frames = [i for i in range(200, 1000)]

all_choices = [[d, t, f] for d in datasets for t in trajectories for f in frames]

# random subset of datasets
k = 1000
random.seed(0)
subsamples = random.sample(all_choices, k)

min_births = np.zeros(k)
max_deaths = np.zeros(k)

for i in range(k):

    dataset, trajectory, frame = subsamples[i]

    frame_data = pd.read_csv('formatted_data\\' + dataset + '\\traj' + str(trajectory) + '\\' + 
                            dataset + '_traj' + str(trajectory) + '_frame' + str(frame) + '.csv') 
    atom_coords = np.array(frame_data[['x', 'y', 'z']])

    # perform TDA 
    maxrad = oat.dissimilarity.enclosing_from_cloud(atom_coords) + 0.001 
    dissimilairty_matrix = oat.dissimilarity.matrix_from_cloud(cloud = atom_coords, dissimilarity_max = maxrad)
    boundary = oat.rust.FactoredBoundaryMatrixVr(dissimilarity_matrix = dissimilairty_matrix, homology_dimension_max = 1)
    ph = boundary.homology(return_cycle_representatives = False, return_bounding_chains = False)
    ph1 = ph[ph.dimension == 1]
    min_births[i] = np.min(ph1.birth)
    max_deaths[i] = np.max(ph1.death) 

In [4]:
print('smallest birth %.2f' %np.min(min_births))
print('suggested smallest GCCD spatial distance', np.floor(np.min(min_births))-2)

print('largest death %.1f' %np.max(max_deaths))
print('suggested largest GCCD spatial distance', np.ceil(np.max(max_deaths))+2)

smallest birth 3.87
suggested smallest GCCD spatial distance 1.0
largest death 27.5
suggested largest GCCD spatial distance 30.0
