In [33]:
import re, os
import pandas as pd
import numpy as np
os.chdir('/Users/walu/mdwarfs/')

In [11]:

# Set of accepted variability classes
valid_classes = {
    'ACYG', 'BCEP', 'SPB', 'BCEPH', 'MAIA', 'MAIAH', 'MAIAU', 'LPB',
    'GDOR', 'DSCT', 'roAP', 'SXPHE', 'SOLR', 'CEP', 'RR', 'RRAB',
    'RRC', 'CW', 'CWA', 'CWB', 'RV', 'RVA', 'RVB', 'M', 'RPHS',
    'SPHS', 'ZZ', 'ZZA', 'ZZB'
}

# Split on allowed delimiters: + / . , whitespace
def split_classes(raw_class):
    return re.split(r'[\s+/.,]+', raw_class.strip().upper())

# Check if variability class is fully valid
def is_valid_class(variability):
    if variability == '-' or variability.strip() == '':
        return False
    parts = split_classes(variability)
    return all(part in valid_classes for part in parts)

# Read and parse lines
rows = []

with open('table5.dat', 'r') as f:
    for line in f:
        if 'TIC' not in line:
            continue

        parts = line.strip().split()
        try:
            ra = float(parts[0])
            dec = float(parts[1])
            tic_index = parts.index('TIC')
            tic_id = parts[tic_index + 1]
            variability = parts[tic_index + 2] if tic_index + 2 < len(parts) else '-'

            if not is_valid_class(variability):
                continue

            # Extract next fields if present
            teff = parts[tic_index + 3] if tic_index + 3 < len(parts) else ''
            radius = parts[tic_index + 4] if tic_index + 4 < len(parts) else ''
            priority = parts[tic_index + 5] if tic_index + 5 < len(parts) else ''
            period = ''
            amp = ''
            spec_type = ''

            # Look ahead to identify numbers and text
            remaining = parts[tic_index + 6:]
            floats = [p for p in remaining if re.match(r'^-?\d+\.?\d*$', p)]
            strings = [p for p in remaining if not re.match(r'^-?\d+\.?\d*$', p)]

            if len(floats) == 1:
                period = floats[0]
            elif len(floats) >= 2:
                period, amp = floats[:2]

            if strings:
                spec_type = ' '.join(strings)

            rows.append({
                'RA': ra,
                'Dec': dec,
                'TIC_ID': tic_id,
                'VarClass': variability,
                'Teff': teff,
                'Radius': radius,
                'Priority': priority,
                'Period': period,
                'Amplitude': amp,
                'SpectralType': spec_type
            })

        except Exception:
            continue  # skip any malformed line silently

# Convert to DataFrame and save
df = pd.DataFrame(rows)
df.to_csv('filtered_pulsating_stars.csv', index=False)
print("Saved filtered catalog to 'filtered_stars.csv'")


Saved filtered catalog to 'filtered_stars.csv'


In [19]:
pd.unique(df['VarClass'])

array(['DSCT', 'GDOR', 'BCEP+SPB', 'ZZA', 'SPB', 'RRAB', 'SPHS', 'RRC',
       'MAIA', 'RPHS', 'BCEP', 'MAIAU', 'ACYG', 'SPHS+RPHS', 'ACYG+GDOR',
       'MAIAH', 'DSCT+SOLR', 'M', 'MAIA+SPB', 'SOLR', 'CW', 'RR',
       'ACYG+SPB', 'ACYG+BCEP', 'MAIAH+SPB', 'SXPHE', 'RVA', 'DSCT/RRC',
       'DSCT/SOLR'], dtype=object)

In [35]:
balona_puls_catalog = df['TIC_ID'].rename('TIC', inplace=True)
balona_puls_catalog = np.int64(balona_puls_catalog)

16324

In [29]:
current_puls_cat = pd.read_csv('current_pulsator_catalog.csv')
current_tics = current_puls_cat['TIC']

TypeError: cannot convert the series to <class 'int'>

In [38]:
#filtering the current tics that are in balona_puls_catalog
fake_pulsator_in_current = current_tics[~current_tics.isin(balona_puls_catalog)]
print(f"Fake pulsators in current catalog: {len(fake_pulsator_in_current)}")
fake_pulsator_in_current.to_csv('fake_pulsators_in_current.csv', index=False)

Fake pulsators in current catalog: 2142


In [41]:
new_pure_pulsator = balona_puls_catalog[~np.isin(balona_puls_catalog, current_tics)]
print(f"New pure pulsators: {len(new_pure_pulsator)}")
new_pure_pulsator = pd.DataFrame(new_pure_pulsator, columns=['TIC'])
new_pure_pulsator.to_csv('new_pure_pulsators.csv', index=False)

New pure pulsators: 3266


In [24]:
balona_unique = balona_puls_catalog[~balona_puls_catalog.isin(current_tics)]
print(f"Unique Balona pulsators not in current catalog: {len(balona_unique)}")

Unique Balona pulsators not in current catalog: 16324


In [25]:
common_elements = balona_puls_catalog[balona_puls_catalog.isin(current_tics)]
print(f"Common elements between Balona catalog and current catalog: {len(common_elements)}")

Common elements between Balona catalog and current catalog: 0


In [30]:
current_tics[0]

np.int64(100102390)