In [19]:
import glob
from find_source import make_catalog, combine_catalogs
import pandas as pd
import os
import astropy.units as u
from astropy.coordinates import Angle, SkyCoord
import json
import math

In [None]:
def low_level_csv(folder, csv_path = './low_level.csv'):

    master_catalog = None
    old_df = None
    str_obs_id = 'Unknown'

    try:
        old_df = pd.read_csv(csv_path)
    except:
        pass

    try:
        json_file = os.path.join(folder, 'polaris.json')
        with open(json_file, 'r') as file:
            obs_dict = json.load(file)

            #cleaning up obs_dict
            for key, value in obs_dict.items():
                if type(value) == list:
                    string = ', '.join(value)
                    obs_dict[key] = [string]
            obs_id = obs_dict.pop('obsID')
            str_obs_id = f'id{obs_id}'
        if old_df is not None:
            old_df = old_df[(old_df['Obs ID']) != str_obs_id] #removing old or outdated entries
    except Exception as e:
        print(f'Error with obsID: {e}. WARNING: Old/outdated data may not be deleted.')

    if old_df is not None:
        master_catalog = (old_df.T).to_dict()

    for file in glob.glob(os.path.join(folder, '*.fits')):
        try:
            catalog = make_catalog(file)
            if catalog is not None:
                for value in catalog.values():
                    value['Obs ID'] = str_obs_id
                    value['Source ID'] = 'Unknown'
                if master_catalog is None:
                    master_catalog = catalog
                elif catalog is not None:
                    master_catalog = combine_catalogs(master_catalog, catalog)
        except Exception as e:
            print(f'Error for {file}: {e}')

    df = pd.DataFrame.from_dict(master_catalog)
    df = df.T
    df.to_csv(csv_path, mode='w', header=True, index=False)

In [None]:
def high_level_csv(low_level_path = './low_level.csv', high_level_path = './high_level.csv'):

    low_df = pd.read_csv(low_level_path)
    unique_sources = None

    try:
        unique_sources = pd.read_csv(high_level_path).to_dict(orient='list')

    except:
        pass

    #coarse matching
    for row in range(len(low_df)):
        if unique_sources is not None:
            ra = low_df['Coord RA'].iloc[row]
            dec = low_df['Coord Dec'].iloc[row]
            coord1 = SkyCoord(ra, dec)
            fwhm = low_df['Beam Maj Axis'].iloc[row]
            fwhm1_val = float(fwhm.replace(' arcsec', ''))
            source_ids = unique_sources['Source ID']
            matched  = False
            while not matched:
                for i in range(len(source_ids)):
                    coord2 = SkyCoord(unique_sources['RA'][i], unique_sources['Dec'][i])
                    sep = coord1.separation(coord2)
                    fwhm2_val = float(unique_sources['FWHM'][i].replace(' arcsec', ''))
                    max_sep = (fwhm1_val * fwhm2_val)**(1/2) * u.arcsec
                    matched = (sep <= max_sep)
                    if matched:
                        low_df.loc[row, 'Source ID'] = source_ids[i]
                        break
                break
            if not matched:
                last_id = source_ids[-1]
                next_number = str(int(last_id.replace('id', '')) + 1)
                next_number = '0' * (4 - len(next_number)) + next_number
                next_id = f'id{next_number}'
                source_ids.append(next_id)
                unique_sources['RA'].append(ra)
                unique_sources['Dec'].append(dec)
                unique_sources['FWHM'].append(fwhm)
                low_df.loc[row, 'Source ID'] = next_id
            unique_sources['Ambiguous Ties'].append('Unknown')

        else:
            ra = low_df['Coord RA'].iloc[row]
            dec = low_df['Coord Dec'].iloc[row]
            fwhm = low_df['Beam Maj Axis'].iloc[row]
            unique_sources = {'Source ID': ['id0001'], 'RA': [ra], 'Dec': [dec], 'FWHM': [fwhm], 'Ambiguous Ties': ['Unknown']}
            low_df.loc[row, 'Source ID'] = 'id0001'

    #further refining matches
    #getting average values
    for i in len(unique_sources['Source ID']):
        temp_df = low_df[(low_df['Source ID']) == unique_sources['Source ID'][i]]
        ra_list = [Angle(ra, u.deg) for ra in temp_df['Coord RA']]
        dec_list = [Angle(dec, u.deg) for dec in temp_df['Coord Dec']]
        fwhm_list = [Angle(fwhm, u.arcsec) for fwhm in temp_df['Beam Maj Axis']]
        avg_ra = sum(ra_list) / len(ra_list)
        avg_dec = sum(dec_list) / len(dec_list)
        geo_avg_fwhm = math.prod(fwhm_list) ** (1/len(fwhm_list))
        unique_sources['RA'][i] = avg_ra
        unique_sources['Dec'][i] = avg_dec
        unique_sources['FWHM'][i] = geo_avg_fwhm
    #comparing averaged unique sources
    new_sources = unique_sources.copy()
    to_skip = []
    for i in len(unique_sources['Source ID']):
        coord1 = SkyCoord(unique_sources['RA'][i], unique_sources['Dec'][i])
        if len(unique_sources['Source ID']) > 1 and i not in to_skip:
            for j in range(i + 1, len(unique_sources['Source ID'])):
                if j not in to_skip:
                    coord2 = SkyCoord(unique_sources['RA'][j], unique_sources['Dec'][j])
                    sep = coord1.separation(coord2)
                    if sep < Angle(1, u.arcsec):
                        #match found, update averages
                        to_skip.append(j)
                        new_sources['Source ID'][j] = 'TO DELETE'
                        num_i = len(low_df[(low_df['Source ID']) == unique_sources['Source ID'][i]])
                        num_j = len(low_df[(low_df['Source ID']) == unique_sources['Source ID'][j]])
                        new_sources['RA'][i] = (unique_sources['RA'][i] * num_i + unique_sources['Ra'][j] * num_j) / (num_i + num_j)
                        new_sources['Dec'][i] = (unique_sources['Dec'][i] * num_i + unique_sources['Dec'][j] * num_j) / (num_i + num_j)
                        #update low_df

    df = pd.DataFrame.from_dict(unique_sources)
    df.to_csv(high_level_path, mode='w', header=True, index=False)
    low_df.to_csv(low_level_path, mode='w', header=True, index=False)

In [None]:
#%load_ext line_profiler

In [None]:
#%%time
#low_level_csv('../data/250611_03:56:34')
#low_level_csv('../data/multi_track')

In [None]:
#high_level_csv()

In [None]:
#%lprun -f make_catalog low_level_csv('../data/multi_track')

In [None]:
%lprun -f low_level_csv low_level_csv('../data/multi_track')

In [None]:
%lprun -f high_level_csv high_level_csv()