In [1]:
from astropy.io import fits
import numpy as np
from scipy.optimize import curve_fit
from scipy.stats import norm, median_abs_deviation
from scipy.io import loadmat
from astropy.coordinates import Angle, SkyCoord
from astropy.time import Time
import astropy.units as u
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.ticker as ticker
from datetime import datetime, timedelta
import glob
import pandas as pd
import json
import os
import math
import sqlite3
import io

In [2]:
def fits_data_index(fits_file: str):
    '''
    Finds the location of a FITS file's image data array.

    Parameters
    ----------
    fits_file : str
        The path of the FITS file to be searched.

    Returns
    -------
    int
        The index of the image data array in the FITS file.
    '''

    file_index = 0

    #open FITS file
    try:
        file = fits.open(fits_file)
    except:
        print(f'Unable to open {fits_file}')

    info = file[file_index]
    data = info.data
    while data is None:
        #going through the indices of file to find the array
        try:
            file_index += 1
            info = file[file_index]
            data = info.data
        except:
            print(f'Error in locating data index of {fits_file}')

    return file_index

In [3]:
def gaussian_theta(coord, amp, sigma, theta, mu_x, mu_y):
    '''
    Finds the value at a point on a 2D Gaussian.

    Parameters
    ----------
    coord : tuple
        The coordinate(s) of the point(s) where the first entry is the x-coordinate or a list of x-coordinates
        and the second entry is the y-coordinate or a list of y-coordinates.
    amp : float
        The factor in front of the 2D Gaussian's exponent.
    sigma : float
        The standard deviation of the 2D Gaussian.
    theta : float
        The angle of rotation of the 2D Gaussian.
    mu_x : float
        The x-value of the peak of the 2D Gaussian.
    mu_y : float
        The y-value of the peak of the 2D Gaussian.

    Returns
    -------
    float
        The value of the 2D Gaussian evaluated at the given point.
    '''

    x, y = coord
    return amp * np.exp(-(((x-mu_x)*math.cos(theta)+(y-mu_y)*math.sin(theta))**2+(-(x-mu_x)*math.sin(theta)+(y-mu_y)*math.cos(theta))**2)/(2*sigma**2))

In [4]:
def region_stats(fits_file: str, center: list = [], radius: list = [], invert: bool = False, Gaussian: bool = True, internal: bool = True,\
                 outer_radius: float = None):
    '''
    Finds the statistics of a region of an image.

    Parameters
    ----------
    fits_file : str
        The path of the FITS file that contains the image.
    center : list (optional)
        A list of center coordinates in units of pixels.
        If no center coordinates are given, eventually defaults to [((length of x-axis)/2, (length of y-axis)/2)], rounded up.
    radius : list (optional)
        A list of search radii in units of arcsec.
        If no radius list is given, defaults to an empty list.
    invert : bool (optional)
        Whether to swap the inclusion and exclusion regions.
        If no value is given, defaults to False.
    Gaussian : bool (optional)
        Whether to use a 2D Gaussian fit to estimate the true maximum flux and its corresponding coordinates.
        If no value is given, defaults to True.
    internal : bool (optional)
        Whether the peak to search for is internal (in which case to use a 5x5 pixel region if using a Gaussian fit)
        or external (in which case to use a 3x3 pixel region if using a Gaussian fit).
        If no value is given, defaults to True.
    outer radius : float (optional)
        The radius outside of which everything will be excluded. This is not affected by value invert.
        If no value is given, defaults to None and will not be used to exclude data.

    Returns
    -------
    dict
        A dictionary with:
            float
                The region's maximum flux in Jy.
            tuple (int, int)
                The coordinates in pixels of the image's center.
            tuple (int, int)
                The coordinates in pixels of the region's maximum flux.
            float
                The region's rms in Jy.
            float
                The image's beam size in arcseconds squared.
            float
                The image's x-axis length in arcsec.
            float
                The image's y-axis length in arcsec.
            float
                The area included in the mask in arcseconds squared.
            float
                The area excluded by the mask in arcseconds squared.
            float
                The number of measurements included in the mask.
            float
                The number of measurements excluded by the mask.
            float
                The median absolute deviation of the flux of the image.
            float
                The standard deviation of the flux of the image, as estimated by the MAD.
            float
                The most negative flux in the image, if such a flux exists. If not, this is None.

    Raises
    ------
    IndexError
        If center list and radius list are of different lengths.
    '''

    if center != [] and len(center) != len(radius):
        raise IndexError ('Center list and radius list are of different lengths')

    i = fits_data_index(fits_file)

    #open FITS file
    try:
        file = fits.open(fits_file)
    except:
        print(f'Unable to open {fits_file}')

    #extract data array
    info = file[i]
    data = info.data

    neg_peak = float(np.min(data[0]))
    if neg_peak >= 0:
        neg_peak = None

    mad = float(median_abs_deviation(data[0].flatten()))
    sd_mad = float(norm.ppf(0.84) / norm.ppf(0.75) * mad) #estimate standard deviation from MAD

    #getting dimensions for array
    x_dim = info.header['NAXIS1']
    y_dim = info.header['NAXIS2']

    x_dist_array = np.tile(np.arange(x_dim),(y_dim, 1)) #array of each pixel's horizontal distance (in pixels) from y-axis
    y_dist_array = x_dist_array.T #array of each pixel's vertical distance (in pixels) from x-axis

    #keep center pixel coordinates if specified, set to default if unspecified
    center_pix = center
    field_center = (round(x_dim/2), round(y_dim/2))
    if center == []:
        center_pix = [field_center]
        if len(radius) > 1:
            center_pix = center_pix * len(radius)

    #find units of axes
    x_unit = info.header['CUNIT1']
    y_unit = info.header['CUNIT2']

    #find cell size (units of arcsec)
    x_cell_size = (Angle(info.header['CDELT1'], x_unit)).to(u.arcsec)
    y_cell_size = (Angle(info.header['CDELT2'], y_unit)).to(u.arcsec)

    #find beam size (unitless but in arcsec^2)
    beam_size = float(((np.pi/4) * info.header['BMAJ'] * info.header['BMIN'] * Angle(1, x_unit) * Angle(1, y_unit) / np.log(2)).to(u.arcsec**2)\
                / (u.arcsec**2))

    #find axis sizes
    x_axis_size = x_dim * x_cell_size
    y_axis_size = y_dim * y_cell_size

    #distance from center array
    dist_from_center =((((x_dist_array - center_pix[0][0])*x_cell_size)**2 + ((y_dist_array - center_pix[0][1])*y_cell_size)**2)**0.5)

    #boolean mask and apply
    mask = (dist_from_center <= radius[0] * u.arcsec)
    if len(center) > 1:
        for j in range(1, len(center)):
            dist_from_center = ((((x_dist_array - center_pix[j][0])*x_cell_size)**2 + ((y_dist_array - center_pix[j][1])*y_cell_size)**2)**0.5)
            mask = np.logical_or(mask, (dist_from_center <= radius[j] * u.arcsec))

    if invert:
        mask = np.logical_not(mask)

    if outer_radius is not None:
        dist_from_field_center = ((((x_dist_array - field_center[0])*x_cell_size)**2 + ((y_dist_array - field_center[1])*y_cell_size)**2)**0.5)
        outer_mask = (dist_from_field_center <= outer_radius * u.arcsec)
        mask = np.logical_and(mask, outer_mask)

    incl_area = float(mask.sum() * x_cell_size * y_cell_size / (u.arcsec)**2)
    excl_area = float(np.logical_not(mask).sum() * x_cell_size * y_cell_size / (u.arcsec)**2)

    masked_data = data[0][mask]

    #get peak
    try:
        peak = float(max(masked_data))
    except ValueError:
        print('No values after mask applied. Check inclusion and exclusion radii.')

    #find coordinates of peak
    peak_pix = np.where(data[0] == peak)
    peak_x = int(peak_pix[1][0])
    peak_y = int(peak_pix[0][0])
    peak_coord = (peak_x, peak_y)

    #fit for peak and coordinates assuming Gaussian
    #use data from 5x5 region if internal peak
    if Gaussian and internal and (peak_x - 2) >= 0 and (peak_x + 2) <= x_dim and (peak_y - 2) >= 0 and (peak_y + 2) <= y_dim:
        neg2_2 = data[0][peak_x - 2][peak_y + 2]
        neg2_1 = data[0][peak_x - 2][peak_y + 1]
        neg2_0 = data[0][peak_x - 2][peak_y]
        neg2_neg1 = data[0][peak_x - 2][peak_y - 1]
        neg2_neg2 = data[0][peak_x - 2][peak_y - 2]
        neg1_2 = data[0][peak_x - 1][peak_y + 2]
        neg1_1 = data[0][peak_x - 1][peak_y + 1]
        neg1_0 = data[0][peak_x - 1][peak_y]
        neg1_neg1 = data[0][peak_x - 1][peak_y - 1]
        neg1_neg2 = data[0][peak_x - 1][peak_y - 2]
        zero_2 = data[0][peak_x][peak_y + 2]
        zero_1 = data[0][peak_x][peak_y + 1]
        zero_neg1 = data[0][peak_x][peak_y - 1]
        zero_neg2 = data[0][peak_x][peak_y - 2]
        pos1_2 = data[0][peak_x + 1][peak_y + 2]
        pos1_1 = data[0][peak_x + 1][peak_y + 1]
        pos1_0 = data[0][peak_x + 1][peak_y]
        pos1_neg1 = data[0][peak_x + 1][peak_y - 1]
        pos1_neg2 = data[0][peak_x + 1][peak_y - 2]
        pos2_2 = data[0][peak_x + 2][peak_y + 2]
        pos2_1 = data[0][peak_x + 2][peak_y + 1]
        pos2_0 = data[0][peak_x + 2][peak_y]
        pos2_neg1 = data[0][peak_x + 2][peak_y - 1]
        pos2_neg2 = data[0][peak_x + 2][peak_y - 2]

        z_data = [neg2_2, neg2_1, neg2_0, neg2_neg1, neg2_neg2,\
                neg1_2, neg1_1, neg1_0, neg1_neg1, neg1_neg2,\
                zero_2, zero_1, peak, zero_neg1, zero_neg2,\
                pos1_2, pos1_1, pos1_0, pos1_neg1, pos1_neg2,\
                pos2_2, pos2_1, pos2_0, pos2_neg1, pos2_neg2]
        x_data = [-2]*5 + [-1]*5 + [0]*5 + [1]*5 + [2]*5
        y_data = [2, 1, 0, -1, -2]*5

        try:
            popt, pcov = curve_fit(gaussian_theta, (x_data, y_data), z_data, bounds=([peak,0,0,-1,-1],[float('inf'),float('inf'),2*np.pi,1,1]))
            amp, sigma, theta, mu_x, mu_y = popt
            peak = float(amp)
            peak_coord = (float(peak_x + mu_x), float(peak_y + mu_y))
        except RuntimeError:
            pass

    #use data from 3x3 region if external peak
    elif Gaussian and (not internal) and (peak_x - 1) >= 0 and (peak_x + 1) <= x_dim and (peak_y - 1) >= 0 and (peak_y + 1) <= y_dim:
        left_top = data[0][peak_x - 1][peak_y + 1]
        left_middle = data[0][peak_x - 1][peak_y]
        left_bottom = data[0][peak_x - 1][peak_y - 1]
        middle_top = data[0][peak_x][peak_y + 1]
        middle_bottom = data[0][peak_x][peak_y - 1]
        right_top = data[0][peak_x + 1][peak_y + 1]
        right_middle = data[0][peak_x + 1][peak_y]
        right_bottom = data[0][peak_x + 1][peak_y - 1]

        z_data = [left_top, left_middle, left_bottom, middle_top, peak, middle_bottom, right_top, right_middle, right_bottom]
        x_data = [-1]*3 + [0]*3 + [1]*3
        y_data = [1, 0, -1] * 3

        try:
            popt, pcov = curve_fit(gaussian_theta, (x_data, y_data), z_data, bounds=([peak,0,0,-1,-1],[float('inf'),float('inf'),2*np.pi,1,1]))
            amp, sigma, theta, mu_x, mu_y = popt
            peak = float(amp)
            peak_coord = (float(peak_x + mu_x), float(peak_y + mu_y))
        except RuntimeError:
            pass

    rms = float((np.var(masked_data))**0.5)

    stats = {'peak': peak, 'field_center': field_center, 'peak_coord': peak_coord, 'rms': rms, 'beam_size': beam_size,\
             'x_axis': float(x_axis_size / u.arcsec), 'y_axis': float(y_axis_size / u.arcsec), 'incl_area': incl_area, 'excl_area': excl_area,\
             'n_incl_meas': float(incl_area / beam_size), 'n_excl_meas': float(excl_area / beam_size), 'mad': mad, 'sd_mad': sd_mad,\
             'neg_peak': neg_peak}

    return stats

In [5]:
def calc_prob_from_rms_uncert(peak: float, rms: float, n_excl: float, n_incl: float = None):
    '''
    Estimates the probability of a value or greater occurring in some number of measurements
    of a Gaussian distribution with an imprecisely known RMS.

    Parameters
    ----------
    peak : float
        The smallest value in the range of values whose probability of occurring will be estimated.
    rms : float
        The imprecisely known RMS value.
    n_excl : float
        The number of measurements in the region from which the RMS is measured.
        If no value is given for n_incl, this is also the number of measurements
        for which the probability will be estimated.
    n_incl : float (optional)
        The number of measurements for which the probability will be estimated.

    Returns
    -------
    float
        The estimated probability.
    '''

    #calculate error for rms
    rms_err = rms * (n_excl)**(-1/2)

    #create normal distributions from rms and error for rms
    uncert = np.linspace(-5 * rms_err, 5 * rms_err, 100)
    uncert_pdf = norm.pdf(uncert, loc = 0, scale = rms_err)

    #sum and normalize to find probabilities
    if n_incl == None:
        return float(sum((norm.cdf((-1 * peak)/(rms + uncert)) * n_excl) * uncert_pdf) / sum(uncert_pdf))
    else:
        return float(sum((norm.cdf((-1 * peak)/(rms + uncert)) * n_incl) * uncert_pdf) / sum(uncert_pdf))

In [6]:
def prob_dict_from_rms_uncert(fits_file: str, center: list = [], threshold: float = 0.01, radius_buffer: float = 5.0,\
                              ext_threshold: float = None):
    '''
    Finds the probabilities of the internal and external peaks, as well as other relevant statistics of an image.

    Parameters
    ----------
    fits_file : str
        The path of the FITS file that contains the image.
    center : list (optional)
        A list of center coordinates in units of pixels.
        If no center coordinates are given, first defaults to [((length of x-axis)/2, (length of y-axis)/2)], rounded up.
    threshold : float (optional)
        The maximum probability, assuming no source in the image, for a significant internal detection.
        If no value is given, defaults to 0.01.
    radius_buffer : float (optional)
        The amount of buffer, in arcsec, to add to the beam FWHM to get the initial search radius.
        If no value is given, defaults to 5 arcsec.
    ext_threshold : float (optional)
        The probability that an external peak must be below for it to be considered an external source.
        If no value is given, defaults to 1e-3, 1e-6, or 1e-12, depending on the SNR of the internal peak.

    Returns
    -------
    dict
        A dictionary with:
            tuple (int, int)
                The coordinates in pixels of the image's center.
            float
                The image's rms in Jy.
            float
                The median absolute deviation of the flux of the image.
            float
                The standard deviation of the flux of the image, as estimated by the MAD.
            float
                The number of measurements included in the mask.
            float
                The number of measurements excluded by the mask.
            float
                The length of the beam major axis in arcsec.
            float
                The radius of the initial inclusion region in arcsec.
            float
                The most negative flux in the image, if such a flux exists. If not, this is None.
            list
                A list with:
                    float(s)
                        The flux of the brightest internal peak and the fluxes of the remaining significant internal peaks,
                        if these exist.
            list
                A list with:
                    tuple(s) (int, int)
                        The coordinates in pixels of the brightest internal peak and the remaining significant internal peaks,
                        if these exist.
            list
                A list with:
                    float(s)
                        The probability/probabilities of the brightest internal peak and the remaining significant internal peaks,
                        if these exist.
            list
                A list with:
                    float(s)
                        The signal to noise ratios of the brightest internal peak and the remaining significant internal peaks,
                        if these exist.
            list
                A list with:
                    float(s)
                        The flux(es) the significant external peak(s), if these exist.

ext_peak_coord: []
ext_prob: []
ext_snr: []
next_ext_peak: 0.11062327027320862
    '''

    i = fits_data_index(fits_file)

    #open FITS file
    try:
        file = fits.open(fits_file)
    except:
        print(f'Unable to open {fits_file}')

    #extract data array
    info = file[i]

    beam_fwhm = float((info.header['BMAJ'] * (Angle(1, info.header['CUNIT1'])).to(u.arcsec) / u.arcsec)) #unitless but in arcsec
    search_radius = beam_fwhm + radius_buffer #unitless but in arcsec

    #search for brightest internal peak
    int_stats1 = region_stats(fits_file=fits_file, center=center, radius=[search_radius], invert=False, Gaussian=False, internal=True)
    int_coord1 = int_stats1['peak_coord']
    int_peak1 = int_stats1['peak']
    n_incl = int_stats1['n_incl_meas'] #should be the same for all internal peaks
    field_center = int_stats1['field_center'] #in pixels
    mad = int_stats1['mad'] #should be the same for all peaks
    sd_mad = int_stats1['sd_mad'] #should be the same for all peaks
    neg_peak = int_stats1['neg_peak']

    #find external peaks and get their info
    center = [field_center]
    radius = [search_radius]

    ext_stats1 = region_stats(fits_file=fits_file, center=center, radius=radius, invert=True, Gaussian=False, internal=False)
    n_excl = ext_stats1['n_incl_meas'] #should be the same for all external peaks
    ext_peak1 = ext_stats1['peak']
    rms = ext_stats1['rms'] #can be changed later as we exclude more peaks
    ext_prob1 = calc_prob_from_rms_uncert(peak=ext_peak1, rms=rms, n_excl=n_excl)

    prob_dict = {'field_center': field_center, 'rms_val': None, 'mad': mad, 'sd_mad': sd_mad, 'n_incl_meas': n_incl, 'n_excl_meas': n_excl,\
                 'fwhm': beam_fwhm, 'incl_radius': search_radius, 'neg_peak': neg_peak,\
                 'int_peak_val': [], 'int_peak_coord': [], 'int_prob': [], 'int_snr': [],\
                 'ext_peak_val': [], 'ext_peak_coord': [], 'ext_prob': [], 'ext_snr': [], 'next_ext_peak': None}

    #update ext_threshold if needed
    int_snr1 = int_peak1 / rms
    if ext_threshold == None:
        if int_snr1 < 20:
            ext_threshold = 1e-3
        elif int_snr1 < 100:
            ext_threshold = 1e-6
        else:
            ext_threshold = 1e-12

    if ext_prob1 < ext_threshold:
        ext_significant = True
    else:
        prob_dict['next_ext_peak'] = ext_peak1
        ext_significant = False

    while ext_significant:
        ext_stats = region_stats(fits_file=fits_file, center=center, radius=radius, invert=True, Gaussian=False, internal=False)
        peak = ext_stats['peak']
        rms = ext_stats['rms']

        ext_prob = calc_prob_from_rms_uncert(peak=peak, rms=rms, n_excl=n_excl)
        if ext_prob < ext_threshold:
            ext_stats = region_stats(fits_file=fits_file, center=center, radius=radius, invert=True, Gaussian=True, internal=False)
            coord = ext_stats['peak_coord']
            peak = ext_stats['peak']
            ext_prob = calc_prob_from_rms_uncert(peak=peak, rms=rms, n_excl=n_excl)
            prob_dict['ext_peak_val'].append(peak)
            prob_dict['ext_peak_coord'].append(coord)
            prob_dict['ext_prob'].append(ext_prob)
            prob_dict['ext_snr'].append(peak / rms)
            center.append(coord)
            radius.append(beam_fwhm)
        else:
            prob_dict['next_ext_peak'] = peak
            ext_significant = False

    prob_dict['rms_val'] = rms

    #find prob for 1st internal peak using updated rms
    prob_dict['int_peak_val'].append(int_peak1)
    prob_dict['int_peak_coord'].append(int_coord1)
    int_prob1 = calc_prob_from_rms_uncert(peak=int_peak1, rms=rms, n_excl=n_excl, n_incl=n_incl)
    prob_dict['int_prob'].append(int_prob1)
    prob_dict['int_snr'].append(int_peak1 / rms)

    if threshold == None:
        threshold = 0.01
    int_significant = (int_prob1 < threshold)

    #treat 1st internal peak kind of like an external peak and get rid of search radius so we can look inside
    center = [int_coord1]
    radius = [beam_fwhm]

    #find internal peaks in addition to 1st internal peak
    while int_significant:
        int_stats = region_stats(fits_file=fits_file, center=center, radius=radius, invert=True, Gaussian=False, internal=True,\
                                 outer_radius=search_radius)
        int_peak = int_stats['peak']
        int_prob = calc_prob_from_rms_uncert(peak=int_peak, rms=rms, n_excl=n_excl, n_incl=n_incl)
        if int_prob < threshold and (int_peak > int_snr1 / 100):
            int_stats = region_stats(fits_file=fits_file, center=center, radius=radius, invert=True, Gaussian=True, internal=True,\
                                     outer_radius=search_radius)
            int_coord = int_stats['peak_coord']
            int_peak = int_stats['peak']
            int_prob = calc_prob_from_rms_uncert(peak=int_peak, rms=rms, n_excl=n_excl, n_incl=n_incl)
            prob_dict['int_peak_val'].append(int_peak)
            prob_dict['int_peak_coord'].append(int_coord)
            prob_dict['int_prob'].append(int_prob)
            prob_dict['int_snr'].append(int_peak / rms)
            center.append(int_coord)
            radius.append(beam_fwhm)
        else:
            int_significant = False

    return prob_dict

In [7]:
def get_prob_rms_est_from_ext(prob_dict: dict):
    '''
    Using the rms estimated from the value of the exclusion region's maximum flux,
    finds the probability of detecting the inclusion region's maximum flux if there were no source in the inclusion region,
    the probability of detecting the exclusion region's maximum flux if there were no source in the exclusion region, and other statistics.

    The estimated rms is that the probability of finding such an external peak,
    assuming no source in the exclusion region, is 1.
    Note: this implies that the external probability will always be 1.

    The other statistics include the following as calculated using the rms estimated as described above:
    the exclusion region's rms in Jy, the inclusion region's signal to noise ratio,
    and the external region's signal to noise ratio.

    The remaining statisitcs include the following as calculated using the rms taken directly from the image:
    the inclusion region's maximum flux in Jy and its coordinates in pixels,
    the exclusion region's maximum flux in Jy and its coordinates in pixels, the exclusion region's rms in Jy,
    the number of measurements in the inclusion region, the number of measurements in the exclusion region,
    the coordinates in pixels of the image's center, and the radii in pixels of the inclusion zones,
    the inclusion region's signal to noise ratio, and the external region's signal to noise ratio.

    Parameters
    ----------
    prob_list : list
        The list of statistics, as outputted by get_prob_image_rms(), for an image.

    Returns
    -------
    list
        A list with:
            dict(s)
                A dictionary with the following, found using the rms taken directly from the image:
                    float
                        The probability of detecting the inclusion region's maximum flux if there were no source in the inclusion region.
                    float
                        The probability of detecting the exclusion region's maximum flux if there were no source in the exclusion region.
                    float
                        The inclusion region's maximum flux in Jy.
                    tuple (int, int)
                        The coordinates in pixels of the inclusion region's maximum flux.
                    float
                        The exclusion region's maximum flux in Jy.
                    tuple (int, int)
                        The coordinates in pixels of the exclusion region's maximum flux.
                    float
                        The exclusion region's rms in Jy.
                    float
                        The number of measurements in the inclusion region.
                    float
                        The number of measurements in the exclusion region.
                    tuple (int, int)
                        The coordinates in pixels of the image's center.
                    list
                        A list with:
                            float(s)
                                The radii in pixels of inclusion zones.
                    float
                        The inclusion region's signal to noise ratio.
                    float
                        The exclusion region's signal to noise ratio.
            dict
                A dictionary with the following, found using the rms estimated as described above:
                    float
                        The probability of detecting the inclusion region's maximum flux if there were no source in the inclusion region.
                    float
                        The probability of detecting the exclusion region's maximum flux if there were no source in the exclusion region.
                    float
                        The exclusion region's rms in Jy.
                    float
                        The inclusion region's signal to noise ratio.
                    float
                        The exclusion region's signal to noise ratio.
    '''
    int_peak_val = prob_dict['int_peak_val']
    ext_peak_val = prob_dict['next_ext_peak']
    n_incl_meas = prob_dict['n_incl_meas']
    n_excl_meas = prob_dict['n_excl_meas']

    excl_sigma = -1 * norm.ppf(1/n_excl_meas)
    old_rms_val = ext_peak_val / excl_sigma
    prob_dict['calc_rms_val'] = float(old_rms_val)

    sigma = norm.ppf(1/(n_incl_meas + n_excl_meas))
    neg_peak = prob_dict['neg_peak']

    if neg_peak is not None:
        rms_val = neg_peak / sigma
        prob_dict['neg_peak_rms_val'] = float(rms_val)
    else:
        prob_dict['neg_peak_rms_val'] = None
        rms_val = old_rms_val

    prob_dict['calc_ext_prob'] = float(norm.cdf((-1 * ext_peak_val)/(rms_val))) * n_excl_meas
    prob_dict['calc_ext_snr'] = float(excl_sigma)
    for i in range(len(int_peak_val)):
        if i == 0:
            prob_dict['calc_int_prob'] = [float(norm.cdf((-1 * int_peak_val[i])/(rms_val))) * n_incl_meas]
            prob_dict['calc_int_snr'] = [float(int_peak_val[i] / rms_val)]
        else:
            prob_dict['calc_int_prob'].append(float(norm.cdf((-1 * int_peak_val[i])/(rms_val))) * n_incl_meas)
            prob_dict['calc_int_snr'].append(float(int_peak_val[i] / rms_val))

    return prob_dict

In [8]:
def summary(fits_file: str, threshold: float = 0.01, radius_buffer: float = 5.0, ext_threshold: float = None,\
            short_dict: bool = True, plot: bool = True, save_path: str = ''):
    '''
    Summarizes an image's statistics into a shorter dictionary, a more detailed dictionary, and/or a plot,
    with an option to save the plot as a png.

    Parameters
    ----------
    fits_file : str
        The path of the FITS file that contains the image.
    radius_buffer : float (optional)
        The amount of buffer, in arcsec, to add to the beam FWHM to get the initial search radius.
        If no value is given, defaults to 5 arcsec.
    ext_threshold : float (optional)
        The probability that an external peak must be below for it to be considered an external source.
        If no value is given, defaults to 0.001.
    short_dict : bool (optional)
        Whether to return the short dictionary of statistics.
        If no value is given, defaults to True.
    full_list : bool (optional)
        Whether to return the more detailed list of statistics.
        If no value is given, defaults to False.
    plot : bool (optional)
        Whether to plot the image and statistics.
        If no value is given, defaults to True.
    save_path : str (optional)
        The path to which the plot will be saved.
        If no value is given, defaults to '' and no image is saved.

    Returns
    -------
    dict (if requested)
        A shorter dictionary with:
            float
                The probability, found using the rms taken directly from the image,
                of detecting the inclusion region's maximum flux if there were no source in the inclusion region.
            list
                A list with:
                    float(s)
                        The probabilities, found using the rms taken directly from the image,
                        of detecting the exclusion regions' maximum flux if there were no source in the exclusion regions.
                        If there are multiple entries in this list,
                        they are the probabilities as the exclusion region becomes increasingly small
                        as external peaks deemed significant are added to the inclusion region.
            float
                The inclusion region's maximum flux in Jy.
            tuple (float, float)
                The coordinates in relative arcsec of the inclusion region's maximum flux.
            list
                A list of with:
                    float(s)
                        The exclusion regions' maximum fluxes in Jy.
                        If there are multiple entries in this list,
                        they are the maxmimum fluxes as the exclusion region becomes increasingly small
                        as external peaks deemed significant are added to the inclusion region.
            list
                A list with:
                    tuple(s) (float, float)
                        The coordinates in relative arcsec of the exclusion regions' maximum fluxes.
                        If there are multiple entires in this list,
                        they are the coordinates as the exclusion region becomes increasingly small
                        as external peaks deemed significant are added to the inclusion region.
            float
                The exclusion region's rms in Jy. This uses the final (smallest) exclusion region.
            float
                The number of measurements in the inclusion region.
            float
                The number of measurements in the exclusion region.
            tuple (int, int)
                The coordinates in relative arcsec of the image's center. Should be (0, 0).
            list
                A list with:
                    float(s):
                        The radii in arcsec of inclusion zones.
            float
                The inclusion region's signal to noise ratio.
            list
                A list with:
                    float(s)
                        The exclusion regions' signal to noise ratios.
            float
                The probability, found using the rms estimated from the value of the exclusion region's maximum flux,
                of detecting the inclusion region's maximum flux if there were no source in the inclusion region.
            float
                The probability, found using the rms estimated from the value of the exclusion region's maximum flux,
                of detecting the exclusion region's maximum flux if there were no source in the exclusion region.
            float
                The rms in Jy estimated from the value of the exclusion region's maximum flux.
            float
                The inclusion region's signal to noise ratio,
                found using the rms estimated from the value of the exclusion region's maximum flux.
            float
                The exclusion region's signal to noise ratio,
                found using the rms estimated from the value of the exclusion region's maximum flux.
    list (if requested)
        A more detailed list with:
            dict(s)
                A dictionary with the following, found using the rms taken directly from the image:
                    float
                        The probability of detecting the inclusion region's maximum flux if there were no source in the inclusion region.
                    float
                        The probability of detecting the exclusion region's maximum flux if there were no source in the exclusion region.
                    float
                        The inclusion region's maximum flux in Jy.
                    tuple (float, float)
                        The coordinates in relative arcsec of the inclusion region's maximum flux.
                    float
                        The exclusion region's maximum flux in Jy.
                    tuple (float, float)
                        The coordinates in relative arcsec of the exclusion region's maximum flux.
                    float
                        The exclusion region's rms in Jy.
                    float
                        The number of measurements in the inclusion region.
                    float
                        The number of measurements in the exclusion region.
                    tuple (float, float)
                        The coordinates in relative arcsec of the image's center. Should be (0.0, 0.0).
                    list
                        A list with:
                            float(s)
                                The radii in arcsec of inclusion zones.
                    float
                        The inclusion region's signal to noise ratio.
                    float
                        The exclusion region's signal to noise ratio.
            dict
                A dictionary with the following, found using the rms estimated as described above:
                    float
                        The probability of detecting the inclusion region's maximum flux if there were no source in the inclusion region.
                    float
                        The probability of detecting the exclusion region's maximum flux if there were no source in the exclusion region.
                    float
                        The exclusion region's rms in Jy.
                    float
                        The inclusion region's signal to noise ratio.
                    float
                        The exclusion region's signal to noise ratio.
    '''
    info = (get_prob_rms_est_from_ext(prob_dict_from_rms_uncert(fits_file=fits_file, threshold=threshold, radius_buffer=radius_buffer,\
                                                                ext_threshold=ext_threshold)))

    center = info['field_center']

    header_data = fits.getheader(fits_file)
    pixel_scale = Angle(header_data['CDELT1'], header_data['CUNIT1']).to_value('arcsec')

    int_x_coords = []
    int_y_coords = []
    int_peak_coords = info['int_peak_coord']
    n_int_peaks = len(int_peak_coords)
    for i in range(n_int_peaks):
        #normalized internal peak coordinates
        int_x_coords.append((int_peak_coords[i][0] - center[0]) * pixel_scale)
        int_y_coords.append((int_peak_coords[i][1] - center[1]) * pixel_scale)
    int_x_coords = np.array(int_x_coords)
    int_y_coords = np.array(int_y_coords)

    incl_radius = info['incl_radius'] #unitless but in arcsec already

    # get most conservative rms and internal snr
    hdul = fits.open(fits_file)
    noise = None
    try:
        noise_col = hdul[1].columns[2]
        if noise_col.name == 'Noise Est':
            if noise_col.unit == 'mJy':
                noise = float(hdul[1].data[0][2] * 1e3) # into Jy
            elif noise_col.unit == 'Jy':
                noise = float(hdul[1].data[0][2])
    except:
        pass
    rms_list = [info['rms_val'], info['sd_mad'], info['calc_rms_val'], info['neg_peak_rms_val']] # all in Jy
    if info['neg_peak_rms_val'] is not None:
        rms_list.append(info['neg_peak_rms_val'])
    if noise is not None:
        rms_list.append(noise)
    conservative_rms = max(rms_list) # in Jy
    conservative_snr = round(info['int_peak_val'][0] / conservative_rms, 3)

    x_coords = []
    y_coords = []
    ext_peak_coords = info['ext_peak_coord']
    n_ext_peaks = len(ext_peak_coords)
    for i in range(n_ext_peaks):
        #normalized external peak coordinates
        x_coords.append((ext_peak_coords[i][0] - center[0]) * pixel_scale)
        y_coords.append((ext_peak_coords[i][1] - center[1]) * pixel_scale)

    fwhm = info['fwhm']

    if plot:
        #plt.rcParams['font.family'] = 'serif'
        #plt.rcParams['font.serif'] = ['Times New Roman']
        plt.rcParams['font.size'] = 15
        plt.rcParams['hatch.linewidth'] = 0.5
        plt.rcParams['figure.dpi'] = 60

        image_data = fits.getdata(fits_file)
        shape = image_data.shape

        while len(shape) > 2:
            image_data = image_data[0]
            shape = image_data.shape

        plt.set_cmap('inferno')
        fig, ax = plt.subplots(figsize=(6.7,5.1))

        plt.plot(int_x_coords, int_y_coords, 'wo', fillstyle='none', markersize=15)
        plt.plot(int_x_coords, int_y_coords, 'kx', fillstyle='none', markersize=15/np.sqrt(2))

        for i in range(n_int_peaks):
            int_circle = patches.Circle((int_x_coords[i], int_y_coords[i]), fwhm * pixel_scale, edgecolor='lime', fill=False)
            ax.add_artist(int_circle)

        int_circle = patches.Circle((0, 0), incl_radius, edgecolor='c', fill=False)
        ax.add_artist(int_circle)

        if n_ext_peaks > 0:
            x_coords = np.array(x_coords)
            y_coords = np.array(y_coords)
            plt.plot(x_coords, y_coords, 'ko', fillstyle='none', markersize=15)
            plt.plot(x_coords, y_coords, 'wx', fillstyle='none', markersize=15/np.sqrt(2))

            for i in range(n_ext_peaks):
                ext_circle = patches.Circle((x_coords[i], y_coords[i]), fwhm * pixel_scale, edgecolor='lime', fill=False)
                ax.add_artist(ext_circle)

        int_snr = info['int_snr'][0]

        x_min = ((0 - center[0]) - 0.5) * pixel_scale
        y_min = ((0 - center[1]) - 0.5) * pixel_scale
        x_max = ((image_data.shape[0] -  center[0]) - 0.5) * pixel_scale
        y_max = ((image_data.shape[1] -  center[1]) - 0.5) * pixel_scale

        beam = patches.Ellipse((x_min*0.88, y_min*0.92), Angle(header_data['BMIN'], header_data['CUNIT1']).to_value('arcsec'),\
                               Angle(header_data['BMAJ'], header_data['CUNIT1']).to_value('arcsec'), fill=True, facecolor='w',\
                                edgecolor='k', angle=header_data['BPA'], hatch='/////', lw=1)
        ax.add_artist(beam)

        title = fits_file[fits_file.rindex('/')+1:fits_file.index('.fits')]
        ax.text(x_min*0.96, y_max*0.96, f'Source: {title}\nInternal Candidate SNR: {conservative_snr}', horizontalalignment='left', verticalalignment='top',\
                fontsize=10, bbox=dict(facecolor='w'))

        plt.imshow(image_data, extent=[x_min, x_max, y_min, y_max], origin='lower')

        plt.xlabel('Relative RA Offset [arcsec]', fontsize=15)
        plt.ylabel('Relative Dec Offset [arcsec]', fontsize=15)

        jy_to_mjy = lambda x, pos: '{}'.format(round(x*1000, 1))
        fmt = ticker.FuncFormatter(jy_to_mjy)

        cbar = plt.colorbar(shrink=0.8, format=fmt)
        cbar.ax.set_ylabel('Intensity [mJy/beam]', fontsize=15, rotation=270, labelpad=24)

        if save_path != '':
            try:
                file = fits_file
                while '/' in file:
                    file = file[file.index('/')+1:]
                file = file.replace('.fits', '')
                if ext_threshold == None:
                    ext_threshold = 'default'
                file += f'_rb{radius_buffer}_et{ext_threshold}'
                if save_path[-1] != '/':
                    save_path = save_path + '/'
                plt.savefig(f'{save_path}{file}.jpg')
            except:
                print('Error saving figure. Double check path entered.')

    if short_dict:
        short_info = info

        int_peaks = []
        for i in range(n_int_peaks):
            int_peaks.append((float(int_x_coords[i]), float(int_y_coords[i])))

        ext_peaks = []
        for i in range(n_ext_peaks):
            ext_peaks.append((float(x_coords[i]), float(y_coords[i])))

        if n_ext_peaks == 0:
            ext_peaks = 'No significant external peak'
            short_info['ext_peak_val'] = 'No significant external peak'
            short_info['ext_snr'] = 'No significant external peak'
            short_info['ext_prob'] = 'No significant external peak'

        short_info = info
        short_info['int_peak_coord'] = int_peaks
        short_info['ext_peak_coord'] = ext_peaks
        short_info['field_center'] = (0,0)
        short_info['conservative_rms'] = conservative_rms
        short_info['conservative_snr'] = conservative_snr

        del short_info['next_ext_peak']

        return short_info

    else:
        return

In [9]:
def significant(fits_file: str, threshold: float = 0.01, radius_buffer: float = 5.0, ext_threshold: float = None):
    '''
    Finds whether a significant source was detected in a field's center region.

    Parameters
    ----------
    fits_file : str
        The path of the FITS file that contains the image.
    threshold : float (optional)
        The threshold for a significant detection.
        If the probability of detecting the center region's maximum flux assuming no source in the image
        is less than this threshold, then the detection is deemed significant.
        If no value is given, defaults to 0.01.
    radius_buffer : float (optional)
        The amount of buffer, in arcsec, to add to the beam FWHM to get the initial search radius.
        If no value is given, defaults to 5 arcsec.
    ext_threshold : float (optional)
        The probability that an external peak must be below for it to be considered an external source.
        If no value is given, defaults to 0.001.

    Returns
    -------
    bool : Whether a significant source was detected in the field's center region.

    Raises
    ------
    ValueError
        If threshold is not between 0 and 1, inclusive.
    '''

    #make sure reasonable input
    if not (threshold >= 0 and threshold <= 1):
        raise ValueError('Threshold must be between 0 and 1, inclusive.')

    summ = summary(fits_file=fits_file, radius_buffer=radius_buffer, ext_threshold=ext_threshold, short_dict=True, plot=False)
    return (summ['int_prob'][0] < threshold and summ['calc_int_prob'][0] < threshold)

In [10]:
def interpolation_kernel(s):
    dist = abs(s)
    if dist % 1 == 0:
        return 0
    if dist < 1:
        return (3/2 * (dist**3) - 5/2 * (dist**2) + 1)
    if dist < 2:
        return (-1/2 * (dist**3) + 5/2 * (dist**2) - 4 * dist + 2)
    return 0

In [11]:
def interpolation_function(x, node_x, node_val):
    num_nodes = len(node_x)
    N = num_nodes - 1

    # make sure inputs are valid
    if len(node_val) != num_nodes:
        raise ValueError("The number of nodes given does not match the number of node values given")
    h = node_x[1] - node_x[0]
    for i in range(1, N):
        if node_x[i+1] - node_x[i] != h:
            raise ValueError("Nodes are not uniformly spaced")

    # boundary conditions
    node_neg1 = node_x[0] - h
    node_Nplus1 = node_x[N] + h
    c_neg1 = node_val[2] - 3*node_val[1] + 3*node_val[0]
    c_Nplus1 = 3*node_val[N] - 3*node_val[N-1] + 3*node_val[N-2]

    # apply interpolation function
    interpolated_val = c_neg1 * interpolation_kernel((x - node_neg1) / h)
    for k in range(0, num_nodes):
        s = (x - node_x[k]) / h
        interpolated_val += node_val[k] * interpolation_kernel(s)
    interpolated_val += c_Nplus1 * interpolation_kernel((x - node_Nplus1) / h)
    return float(interpolated_val)

In [12]:
def thumbnail(fits_file: str, peak_coord: tuple, pts_bw_nodes: int = 4):

    i = fits_data_index(fits_file)

    # open FITS file
    try:
        file = fits.open(fits_file)
    except:
        print(f'Unable to open {fits_file}')

    # extract data array
    info = file[i]
    data = info.data

    data_array = data[0]
    min_flux = np.min(data_array)
    max_flux = np.max(data_array)

    header_data = fits.getheader(fits_file)

    beam_maj = Angle(header_data['BMAJ'], header_data['CUNIT1']).to_value('arcsec')
    pixel_scale = Angle(header_data['CDELT1'], header_data['CUNIT1']).to_value('arcsec')
    x_dim = header_data['NAXIS1']
    y_dim = header_data['NAXIS2']

    center = (round(x_dim/2), round(y_dim/2))

    # unnormalized the normalized coordinates
    unnorm_x = round((peak_coord[0] / pixel_scale) + center[0])
    unnorm_y = round((peak_coord[1] / pixel_scale) + center[1])

    delta = math.ceil((2.5 + (beam_maj/2))/ pixel_scale) # ~number pixels in search radius

    # make sure the ~5x5 arcsec box is actually inside the original image and handle issues if not
    new_data = data_array
    if unnorm_y - delta >= 0:
        new_data = data_array[unnorm_y - delta:]
    if unnorm_y + delta < y_dim:
        new_data = new_data[:(unnorm_y + delta) - y_dim + 1]
    if unnorm_x - delta >= 0:
        new_data = [row[unnorm_x - delta:] for row in new_data]
    if unnorm_x + delta < x_dim:
        new_data = [row[:(unnorm_x + delta) - x_dim + 1] for row in new_data]

    y_length = len(new_data)
    if y_length == 0:
        raise ValueError("Attempts to obtain a smaller image centered on the source resulted in an empty data array.")
    else:
        x_length = len(new_data[0])

    node_x = np.arange(0, x_length).tolist()
    node_y = np.arange(0, y_length).tolist()

    interpolated_data = []

    # first interpolate in x direction (add entries to rows)
    pts_bw_nodes = 4
    pts_spacing = 1 / (pts_bw_nodes + 1)


    # iterate through rows
    for row_num in range(y_length):
        temp = new_data[row_num].tolist()

        # iterate through nodes in row
        for i in range(x_length - 1):
            temp2 = []
            for j in range(1, pts_bw_nodes+1):
                x = i + j*pts_spacing
                temp2.append(interpolation_function(x, node_x=node_x, node_val=new_data[row_num]))
            temp = temp[:i-x_length+1] + temp2 + temp[i-x_length+1:]

        interpolated_data.append(temp)

    # interpolate in y direction (add rows)
    new_x_length = len(interpolated_data[0])

    temp = []
    for i in range((y_length - 1) * pts_bw_nodes):
        temp.append([])

    for col_num in range(new_x_length):
        temp2 = [row_num[col_num] for row_num in interpolated_data] # list of a single column's nodes

        # iterate through nodes in row so that we get rows to add
        for i in range(y_length - 1):
            for j in range(1, pts_bw_nodes+1):
                y = i + j*pts_spacing
                temp[i*pts_bw_nodes + j - 1].append(interpolation_function(y, node_x=node_y, node_val=temp2))

    for i in range(y_length - 1):
        interpolated_data = interpolated_data[:i-y_length+1] + temp[:pts_bw_nodes] + interpolated_data[i-y_length+1:]
        temp = temp[pts_bw_nodes:]

    fig, ax = plt.subplots()
    plt.axis('off')
    plt.title('normalized offset: {},\n pixel scale: {} arcsec'.format((round(peak_coord[0],2), round(peak_coord[1],2)), round(pixel_scale,2)))
    img = ax.imshow(interpolated_data, vmin=min_flux, vmax=max_flux)
    fig.colorbar(img)

    # saving to database
    buffer = io.BytesIO()
    plt.savefig(buffer, format='png')
    buffer.seek(0)
    plot_data = buffer.read()
    plt.close()

    return plot_data

In [13]:
def make_catalog(fits_file: str, threshold: float = 0.01, radius_buffer: float = 5.0, ext_threshold: float = None):
    '''
    Summarizes information on any significant point sources detected in an image.

    Parameters
    ----------
    fits_file : str
        The path of the FITS file that contains the image.
    threshold : float (optional)
        The threshold for a significant detection.
        If the probability of detecting the center region's maximum flux assuming no source in the image
        is less than this threshold, then the detection is deemed significant.
        If no value is given, defaults to 0.01.
    radius_buffer : float (optional)
        The amount of buffer, in arcsec, to add to the beam FWHM to get the initial search radius.
        If no value is given, defaults to 5 arcsec.
    ext_threshold : float (optional)
        The probability that an external peak must be below for it to be considered an external source.
        If no value is given, defaults to 0.001.

    Returns
    -------
    dict
        A dictionary with:
            dict(s)
                A dictionary with:
                    str
                        The name of the target object of the observation.
                    str
                        The date and time of the observation.
                    str
                        The name of the FITS file with the image.
                    Angle
                        The restoring beam major axis.
                    Angle
                        The restoring beam minor axis.
                    Angle
                        The restoring beam position angle.
                    float
                        The uncertainty in flux density measurements. The rms excluding any significant sources and a small circular region around them.
                    float
                        The flux density of the detected point source.
                    SkyCoord
                        The location of the detected point source.
                    bool
                        Whether the detected point source is in the initial search region.
    '''

    summ = summary(fits_file=fits_file, radius_buffer=radius_buffer, ext_threshold=ext_threshold, short_dict=True, plot=False)

    header_data = fits.getheader(fits_file)
    name = header_data['OBJECT']
    obs_date_time = header_data['DATE-OBS']
    bmaj = header_data['BMAJ']
    bmin = header_data['BMIN']
    bpa = header_data['BPA']
    ctype1 = header_data['CTYPE1']
    crval1 = header_data['CRVAL1']
    cunit1 = header_data['CUNIT1']
    ctype2 = header_data['CTYPE2']
    crval2 = header_data['CRVAL2']
    cunit2 = header_data['CUNIT2']
    ctype3 = header_data['CTYPE3']
    crval3 = header_data['CRVAL3']
    cunit3 = header_data['CUNIT3']

    freq = 'Not found'
    if ctype3 == 'FREQ':
        if cunit3 == 'GHz':
            freq = crval3
        elif cunit2 == 'Hz':
            freq = crval3 / 1e9 # into GHz
        freq = round(freq, 3)
    elif ctype3 == 'CHANNUM':
        hdul = fits.open(fits_file)
        try:
            freq_col = hdul[1].columns[1]
            if freq_col.name == 'Freq':
                if freq_col.unit == 'Hz':
                    freq = hdul[1].data[0][1] / 1e9 # into GHz
                elif freq_col.unit == 'GHz':
                    freq = hdul[1].data[0][1]
            freq = round(freq, 3)
        except:
            pass

    #assume beam axes in same units as CUNIT1 and CUNIT2 and BPA in degrees
    beam_maj_axis = Angle(bmaj, cunit1)
    beam_min_axis = Angle(bmin, cunit1)
    bpa_rad = math.radians(bpa)

    moving_objects = ['venus', 'mars', 'jupiter', 'uranus', 'neptune', 'io', 'europa', 'ganymede', 'callisto', 'titan',\
               'ceres', 'vesta', 'pallas', 'juno']

    stationary = True
    if name.lower() in moving_objects:
        stationary = False
    else:
        for obj in moving_objects:
            if obj in name.lower():
                stationary = False
                break

    interesting_sources = {}
    field_info = {'FieldName': name, 'ObsDateTime': obs_date_time, 'FileName': fits_file[fits_file.rindex('/')+1:],\
                   'Stationary': stationary,\
                   'BeamMajAxis_arcsec': round(float(beam_maj_axis.to(u.arcsec)/u.arcsec), 3),\
                   'BeamMinAxis_arcsec': round(float(beam_min_axis.to(u.arcsec)/u.arcsec), 3),\
                   'BeamPosAngle_deg': round(bpa, 3),\
                   'Freq_GHz': freq}

    field_info['FluxUncert_mJy'] = round(summ['conservative_rms'] * 1e3, 3)

    n_int_sources = len(summ['int_peak_val'])
    if type(summ['ext_peak_val']) == str:
        n_ext_sources = 0
    else:
        n_ext_sources = len(summ['ext_peak_val'])

    ra_index = 0
    dec_index = 1

    if 'RA' in ctype1:
        ra = crval1
    elif 'RA' in ctype2:
        ra = crval2
        ra_index = 1
    else:
        raise ValueError('No RA in image')

    if 'DEC' in ctype1:
        dec = crval1
        dec_index = 0
    elif 'DEC' in ctype2:
        dec = crval2
    else:
        raise ValueError('No dec in image')

    if cunit1 != cunit2:
        raise ValueError('Axes have different units')

    center = SkyCoord(ra, dec, unit=cunit1)

    pt_source_count = 1

    for i in range(n_int_sources):
        if (summ['int_prob'][i] < threshold and summ['calc_int_prob'][i] < threshold):
            info = field_info.copy()
            info['Flux_mJy'] = round(summ['int_peak_val'][i] * 1000, 3)

            snr = summ['int_peak_val'][i] / summ['conservative_rms']
            b_min_uncert = float((beam_maj_axis.to(u.arcsec) / u.arcsec) / snr)
            b_maj_uncert = float((beam_min_axis.to(u.arcsec) / u.arcsec) / snr)
            info['RAUncert_arcsec'] = round(b_min_uncert*abs(math.sin(bpa)) + b_maj_uncert*abs(math.cos(bpa)), 3)
            info['DecUncert_arcsec'] = round(b_maj_uncert*abs(math.sin(bpa)) + b_min_uncert*abs(math.cos(bpa)), 3)

            ra_offset = summ['int_peak_coord'][i][ra_index] * u.arcsec
            dec_offset = summ['int_peak_coord'][i][dec_index] * u.arcsec
            coord = center.spherical_offsets_by(ra_offset, dec_offset)

            ra_tuple = coord.ra.hms
            dec_tuple = coord.dec.dms

            # rounding the arcseconds to 2 past the decimal
            ra_str = f'{int(ra_tuple.h)}h{abs(int(ra_tuple.m))}m{abs(round(float(ra_tuple.s), 2))}s'
            dec_str = f'{int(dec_tuple.d)}d{abs(int(dec_tuple.m))}m{abs(round(float(dec_tuple.s), 2))}s'

            info['RA'] = ra_str
            info['Dec'] = dec_str
            info['Internal'] = True

            info['Image'] = thumbnail(fits_file=fits_file, peak_coord=summ['int_peak_coord'][i], pts_bw_nodes=4)

            key = f'Source{pt_source_count}'
            interesting_sources[key] = info
            pt_source_count +=1

    for i in range(n_ext_sources):
        info = field_info.copy()
        info['Flux_mJy'] = round(summ[f'ext_peak_val'][i] * 1000, 3)

        snr = summ['ext_peak_val'][i] / summ['conservative_rms']
        b_min_uncert = float(bmaj / snr)
        b_maj_uncert = float(bmin / snr)
        info['RAUncert_arcsec'] = round(b_min_uncert*abs(math.sin(bpa)) + b_maj_uncert*abs(math.cos(bpa)), 3)
        info['DecUncert_arcsec'] = round(b_maj_uncert*abs(math.sin(bpa)) + b_min_uncert*abs(math.cos(bpa)), 3)

        ra_offset = summ['ext_peak_coord'][i][ra_index] * u.arcsec
        dec_offset = summ['ext_peak_coord'][i][dec_index] * u.arcsec
        coord = center.spherical_offsets_by(ra_offset, dec_offset)

        ra_tuple = coord.ra.hms
        dec_tuple = coord.dec.dms

        # rounding the arcseconds to 2 past the decimal
        ra_str = f'{int(ra_tuple.h)}h{abs(int(ra_tuple.m))}m{abs(round(float(ra_tuple.s), 2))}s'
        dec_str = f'{int(dec_tuple.d)}d{abs(int(dec_tuple.m))}m{abs(round(float(dec_tuple.s), 2))}s'

        info['RA'] = ra_str
        info['Dec'] = dec_str
        info['Internal'] = False

        info['Image'] = thumbnail(fits_file=fits_file, peak_coord=summ['ext_peak_coord'][i], pts_bw_nodes=4)

        key = f'Source{pt_source_count}'
        interesting_sources[key] = info
        pt_source_count +=1

    if interesting_sources == {}:
        return
    else:
        return interesting_sources

In [14]:
def combine_catalogs(catalog_1: dict, catalog_2: dict):
    '''
    Combines two catalogs in the format returned by make_catalog() into a single catalog of the same format.

    Parameters
    ----------
    catalog_1 : dict
        The catalog to which the other catalog will be "appended."
    catalog_2 : dict
        The catalog to "append" to the other catalog.

    Returns
    -------
    dict
        A dictionary of the combined catalogs in the same catalog format.
    '''

    shift = len(catalog_1)
    for key, value in catalog_2.items():
        new_number = int(key.replace('Source', ''))
        new_key = f'Source{new_number + shift}'
        catalog_1[new_key] = value
    return catalog_1

In [15]:
def start_html(html_path):
    '''
    Starts source_info.html, in which source information can be stored.
    '''

    with open(html_path, 'w') as html_file:
        start = '''
        <!DOCTYPE html>
        <html>
        <style>
        img.field {
        width: 40%;
        height: 40%
        }
        img.bp {
        width: 20%;
        height: 20%
        }
        img.gain {
        width: 45%;
        height: 45%
        }
        .centered-large-text {
        text-align: center;
        font-size: 36px;
        }
        </style>
        <body>
        '''
        html_file.write(start)
        html_file.close()

In [16]:
def obs_info_to_html(json_file: str, html_path: str):
    '''
    Appends observation information table to source_info.html using information from a .json file.

    Parameters
    ----------
    json_file : str
        The path of the .json file that contains the observation information.
    '''

    with open(html_path, 'a') as html_file:
        try:
            with open(json_file, 'r') as file:
                obs_dict = json.load(file)

            #cleaning up obs_dict
            for key, value in obs_dict.items():
                if type(value) == list:
                    string = ', '.join(value)
                    obs_dict[key] = [string]
            obs_id = obs_dict.pop('obsID')
            base_name = obs_dict.pop('basename')

            df = pd.DataFrame(obs_dict)
            df_transposed = df.T

            html_table = df_transposed.to_html()

            html_file.write(f'<p class=\'centered-large-text\'>Source Information for {base_name} (ObsID {obs_id}) </p>')
            html_file.write(html_table)
        except:
            html_file.write('<p> Error generating observation information table. </p>')

In [17]:
def ap_eff_to_html(html_path, matlab: str):

    try:
        data = loadmat(matlab)
        ap_eff_array = data['apEffCorr']

        n_ants = len(ap_eff_array)
        panda_dict = {}

        for ant in range(n_ants):
            ant_eff = {}
            ant_eff['RxA LSB'] = float(ap_eff_array[ant][0])
            ant_eff['RxA USB'] = float(ap_eff_array[ant][1])
            ant_eff['RxB LSB'] = float(ap_eff_array[ant][2])
            ant_eff['RxB USB'] = float(ap_eff_array[ant][3])
            panda_dict[f'Ant {ant+1}'] = ant_eff

        df = pd.DataFrame.from_dict(panda_dict)
        df_transposed = df.T
        html_table = df_transposed.to_html()

        with open(html_path, 'a') as html_file:
            html_file.write(html_table)
    except:
        print('Error with aperture efficiency data.')

In [18]:
def calibration_plots(html_path, matlab: str):

    plt.rcdefaults()
    plt.rcParams['figure.dpi'] = 60
    plt.rcParams['font.size'] = 8


    data = loadmat(matlab)
    gt = data['gainTime']
    gws = data['gainWinSoln']
    gcs = data['gainChanSoln']
    gain_type = data['gainType']

    n_times = len(gt)
    n_ants = len(gws[0])
    n_spws = len(gws[0][0])
    n_chans = len(gcs[0][0][0])

    utc_midpts = []
    for t in range(len(gt)):
        midpt = 0.5 * (gt[t][0].real + gt[t][0].imag)
        utc_midpts.append((midpt%1)*24)

    colors = ['blue','r','y','purple','orange','g','m','c']

    chan_bit = 7
    if all(bit == 0 for bit in (gain_type & (2**chan_bit))):
        chan_bit = 0
    spw_bit = 6
    if all(bit == 0 for bit in (gain_type & (2**spw_bit))):
        spw_bit = 1

    #plotting bandpass gain solutions for amplitude and phase
    fig, ax = plt.subplots(nrows=n_ants, ncols=1, sharex=True, figsize=(3,8))
    fig2, ax2 = plt.subplots(nrows=n_ants, ncols=1, sharex=True, figsize=(3,8))

    max_amp = 0

    for time in range(n_times):
        if (gain_type & (2**chan_bit))[time] != 0:
            for ant in range(n_ants):

                #shifting for cosmetics
                pos = ax[ant].get_position()
                pos.x0 += 0.05
                pos.x1 += 0.05
                ax[ant].set_position(pos)
                pos2 = ax2[ant].get_position()
                pos2.x0 += 0.06
                pos2.x1 += 0.06
                ax2[ant].set_position(pos2)

                #no x axis ticks
                ax[ant].xaxis.set_tick_params(labelbottom=False)
                ax2[ant].xaxis.set_tick_params(labelbottom=False)


                for spw in range(n_spws):
                    amp_to_plot = [abs(a) for a in gcs.copy()[time][ant][spw]]
                    pha_to_plot = [np.angle(p, deg=True) for p in gcs.copy()[time][ant][spw]]
                    if max(amp_to_plot) > max_amp:
                        max_amp = max(amp_to_plot)

                    x_axis = np.arange(spw * n_chans + 1, (1 + spw) * n_chans + 1)

                    ax[ant].scatter(x_axis, amp_to_plot, c=colors[spw], s=20, marker='x', linewidths=1.5)
                    ax2[ant].scatter(x_axis, pha_to_plot, c=colors[spw], s=20, marker='x', linewidths=1.5)

                    ax[ant].yaxis.set_label_position('right')
                    ax2[ant].yaxis.set_label_position('right')
                    ax[ant].set_ylabel(f'Ant{ant+1}')
                    ax2[ant].set_ylabel(f'Ant{ant+1}')

    plt.setp(ax, yticks=np.arange(0, max_amp+1, 0.5))
    plt.setp(ax2, yticks=[-180,-120,-60,0,60,120,180])
    fig.suptitle('Bandpass gain solutions for amplitude', y=0.92)
    fig2.suptitle('Bandpass gain solutions for phase', y=0.92)
    fig.supxlabel('Full antenna bandwidth', y=0.07)
    fig2.supxlabel('Full antenna bandwidth', y=0.07)
    fig.supylabel('Gain amplitude')
    fig2.supylabel('Gain phase')

    html_folder = os.path.dirname(html_path)

    fig.savefig(os.path.join(html_folder, 'bp_amp.jpg'))
    fig2.savefig(os.path.join(html_folder, 'bp_pha.jpg'))

    plt.close()

    #plotting gain solutions for amplitude and phase
    n_rows = math.ceil(n_ants / 2)
    n_cols = 2

    fig, ax = plt.subplots(nrows=n_rows, ncols=n_cols, sharex=True, figsize=(5.7,4))
    fig2, ax2 = plt.subplots(nrows=n_rows, ncols=n_cols, sharex=True, figsize=(5.7,4))

    max_amp, min_time, max_time = 0, float('inf'), 0

    for spw in range(n_spws):
        for ant in range(n_ants):
            amp_to_plot, pha_to_plot = [], []
            times = []

            if ant < n_rows:
                row, col = ant, 0

                #shifting for cosmetics
                pos = ax[row, col].get_position()
                pos.x0 -= 0.005
                pos.x1 -= 0.005
                ax[row, col].set_position(pos)
                pos2 = ax2[row, col].get_position()
                pos2.x0 -= 0.005
                pos2.x1 -= 0.005
                ax2[row, col].set_position(pos2)
            else:
                row, col = ant % n_rows, 1

            for time in range(n_times):
                if gain_type[time] & (2**6) != 0:
                    amp_val = abs((gws.copy())[time][ant][spw])
                    pha_val = np.angle((gws.copy())[time][ant][spw], deg=True)
                    amp_to_plot.append(amp_val)
                    pha_to_plot.append(pha_val)

                    if amp_val > max_amp:
                        max_amp = amp_val

                    t = utc_midpts[time]
                    if t < min_time:
                        min_time = t
                    if t > max_time:
                        max_time = t

                    times.append(t)

            ax[row, col].scatter(times, amp_to_plot, c=colors[spw], s=4, marker='D')
            ax2[row, col].scatter(times, pha_to_plot, c=colors[spw], s=4, marker='D')

            ax[row, col].yaxis.set_label_position('right')
            ax2[row, col].yaxis.set_label_position('right')
            ax[row, col].set_ylabel(f'Ant{ant+1}')
            ax2[row, col].set_ylabel(f'Ant{ant+1}')
            amp_to_plot, pha_to_plot = [], []

    plt.setp(ax, xticks=np.arange(min_time//1, math.ceil(max_time), 1), yticks=np.arange(0, max_amp+1, 0.5))
    plt.setp(ax2, xticks=np.arange(min_time//1, math.ceil(max_time), 1), yticks=[-180,-120,-60,0,60,120,180])
    fig.suptitle('Gain solutions for amplitude')
    fig2.suptitle('Gain solutions for phase')
    fig.supxlabel('UT hours')
    fig2.supxlabel('UT hours')
    fig.supylabel('Gain amplitude')
    fig2.supylabel('Gain phase')

    fig.savefig(os.path.join(html_folder, 'g_amp.jpg'))
    fig2.savefig(os.path.join(html_folder, 'g_pha.jpg'))

    plt.close()

In [19]:
def fig_to_html(html_path: str, fits_file: str, radius_buffer: float = 5.0, ext_threshold: float = None):
    '''
    Appends source figures to source_info.html.

    Parameters
    ----------
    fits_file : str
        The path of the FITS file that contains the image.
    radius_buffer : float (optional)
        The amount of buffer, in arcsec, to add to the beam FWHM to get the initial search radius.
        If no value is given, defaults to 5 arcsec.
    ext_threshold : float (optional)
        The probability that an external peak must be below for it to be considered an external source.
        If no value is given, defaults to 0.001.
    '''

    with open(html_path, 'a') as html_file:
        try:
            summary(fits_file=fits_file, radius_buffer=radius_buffer, ext_threshold=ext_threshold,\
                    short_dict=False, plot=True, save_path=os.path.dirname(html_path))

            #getting full path
            file = fits_file
            while '/' in file:
                file = file[file.index('/')+1:]
            file = file.replace('.fits', '')
            if ext_threshold == None:
                ext_threshold = 'default'
            file += f'_rb{radius_buffer}_et{ext_threshold}'
            full_path = f'./{file}.jpg'

            html_figure = f'''
            <img class=\'field\' src=\'{full_path}\'>
            <br>
            '''

            html_file.write(html_figure)
        except:
            html_file.write(f'<p> Error generating figure for {fits_file}. </p>')

In [20]:
def catalog_to_html(catalog: dict, html_path):
    '''
    Appends source information table to source_info.html.

    Parameters
    ----------
    catalog : dict
        A catalog in the format returned by make_catalog().
    '''

    df = pd.DataFrame.from_dict(catalog)
    df_transposed = df.T
    html_table = df_transposed.to_html()

    with open(html_path, 'a') as html_file:
        html_file.write(html_table)

In [21]:
def end_html(html_path: str):
    '''
    Ends source_info.html, in which source information can be stored.
    '''

    with open(html_path, 'a') as html_file:

        end = '''
        </body>
        </html>
        '''

        html_file.write(end)

In [22]:
def full_html_and_txt(folder: str, threshold: float = 0.01, radius_buffer: float = 5.0, ext_threshold: float = None):
    '''
    From a folder of FITS files, creates source_info.html with observation information table, source figures, and source information table
    and creates interesting_field.txt with names of objects with any (possibly) interesting detections.

    Parameters
    ----------
    folder : str
        The path of the folder containing the FITS files to be analyzed.
    threshold : float (optional)
        The threshold for a significant detection.
        If the probability of detecting the center region's maximum flux assuming no source in the image
        is less than this threshold, then the detection is deemed significant.
        If no value is given, defaults to 0.01.
    radius_buffer : float (optional)
        The amount of buffer, in arcsec, to add to the beam FWHM to get the initial search radius.
        If no value is given, defaults to 5 arcsec.
    ext_threshold : float (optional)
        The probability that an external peak must be below for it to be considered an external source.
        If no value is given, defaults to 0.001.
    '''

    html_path = os.path.join(folder, 'index.html')
    matlab_file = os.path.join(folder, 'gains.mat')

    start_html(html_path)

    json_file = os.path.join(folder, 'polaris.json')

    obs_info_to_html(json_file, html_path)

    ap_eff_to_html(html_path, matlab_file)

    try:
        calibration_plots(html_path, matlab_file)

        with open(html_path, 'a') as html_file:
            html_gain_info = f'''
            <img class=\'bp\' src=\'./bp_amp.jpg'\'>
            <img class=\'bp\' src=\'./bp_pha.jpg'\'>
            <br>
            <img class=\'gain\' src=\'./g_amp.jpg'\'>
            <img class=\'gain\' src=\'./g_pha.jpg'\'>
            <br>
            '''
            html_file.write(html_gain_info)
    except:
        print('Error with gain calibration information.')

    final_catalog = {}
    with open(json_file, 'r') as file:
        obs_dict = json.load(file)

    sci_targs = [targ.lower() for targ in obs_dict['sciTargs']]
    pol_cals = [cal.lower() for cal in obs_dict['polCals']]
    with open(os.path.join(folder, 'interesting_fields.txt'), 'w') as txt:
        for file in glob.glob(os.path.join(folder, '*.fits')):
            obj = fits.getheader(file)['OBJECT']
            if obj.lower() not in pol_cals:
                fig_to_html(html_path, file, radius_buffer=radius_buffer, ext_threshold=ext_threshold)
            if obj.lower() in sci_targs:
                catalog = make_catalog(file, threshold=threshold, radius_buffer=radius_buffer, ext_threshold=ext_threshold)

                #add field name to .txt file if it is a science target with a significant detection in the initial inclusion region
                if catalog != None:
                    for key, value in catalog.items():
                        if value['Internal'] == True:
                            txt.write(f'{obj}\n')
                    final_catalog = combine_catalogs(final_catalog, catalog)

    catalog_to_html(final_catalog, html_path)
    end_html(html_path)

    plt.close('all')

In [23]:
def low_level_table(folder: str, db_path: str = '../sources.db'):

    str_obs_id = 'Unknown'
    big_catalog = None

    try:
        str_obs_id = folder.replace('/mnt/COMPASS9/sma/quality/', '')
        obs_id = str_obs_id.replace('/', '')
        obs_id = int(obs_id) #will throw Exception if obs_id isn't just numbers
    except Exception as e:
        obs_id = 'Unknown'
        print(f'Error with obsID: {e}. WARNING: Old/outdated data may not be deleted.')

    if os.path.exists(db_path):
        # get all rows from existing low level table, if it exists
        con1_established = False
        con1_closed = False
        old_data_cleared = False
        try:
            con1 = sqlite3.connect(db_path)
            con1_established = True
            cur1 = con1.cursor()
            cur1.execute("DELETE FROM low_level WHERE ObsID='{}'".format(obs_id))
            con1.commit()
            old_data_cleared = True
            con1.close()
            con1_closed = True
        except Exception as e:
            if con1_established and not con1_closed:
                con1.close()
                if not old_data_cleared and not isinstance(e, sqlite3.OperationalError):
                    print(f'Error removing old/outdated data from table "low_level" at {db_path}: {e}')

    for file in glob.glob(os.path.join(folder, '*.fits')):
        try:
            catalog = make_catalog(file)
            if catalog is not None:
                for value in catalog.values():
                    value['ObsID'] = obs_id
                    value['SourceID'] = 'Unknown'
                if big_catalog is None:
                    big_catalog = catalog
                else:
                    big_catalog = combine_catalogs(big_catalog, catalog)
        except Exception as e:
            print(f'Error for {file}: {e}')

    if big_catalog is not None:
        df = pd.DataFrame.from_dict(big_catalog)
        df = df.T

        # fixing rounding error where 60 appears in the seconds
        date_times = df['ObsDateTime'].tolist()
        df.drop(columns='ObsDateTime', inplace=True)
        for i in range(len(date_times)):
            dt = date_times[i]
            m_end = dt.rindex(':')
            s_start = m_end + 1
            if dt[s_start:] == '60':
                dt = dt[:s_start] + '0'
                fmt = '%m-%d-%y %H:%M'
                date_times[i] = (datetime.strptime(dt[:m_end], fmt) + timedelta(minutes=1)).strftime('%m-%d-%y %H:%M:%S')
        df['ObsDateTime'] = date_times

        # write into low level table
        con2_established = False
        con2_closed = False
        try:
            con2 = sqlite3.connect(db_path)
            con2_established = True
            df.to_sql("low_level", con=con2, if_exists='append', index=False)
            con2.close()
            con2_closed = True
        except Exception as e:
            if con2_established and not con2_closed:
                con2.close()
            print(f'Error adding to table "low_level" at {db_path}: {e}')

In [24]:
def high_level_table(db_path: str = '../sources.db'):

    unique_sources = None

    if os.path.exists(db_path):
        # get all rows from low level and high level tables, if they exist
        con1_established = False
        con1_closed = False
        try:
            con1 = sqlite3.connect(db_path)
            con1_established = True
            low_df = pd.read_sql_query("SELECT * FROM low_level;", con1)
            if low_df.empty:
                raise ValueError('Table "low_level" is empty')
            unique_sources = pd.read_sql_query("SELECT * FROM high_level;", con1).to_dict(orient='list')
            con1.close()
            con1_closed = True
        except Exception as e:
            if con1_established and not con1_closed:
                con1.close()
            if not isinstance(e, pd.errors.DatabaseError):
                print(f'Error reading from database at {db_path}: {e}')
    else:
        raise OSError(f'Path {db_path} not found')

    #coarse matching
    for row in range(len(low_df)):
        if low_df['SourceID'].iloc[row] == 'Unknown': #check to make sure we didn't already do coarse matching
            if low_df['Stationary'].iloc[row]:
                if unique_sources is not None:
                    ra = low_df['RA'].iloc[row]
                    dec = low_df['Dec'].iloc[row]
                    coord1 = SkyCoord(ra, dec)
                    fwhm = low_df['BeamMajAxis_arcsec'].iloc[row]
                    source_ids = unique_sources['SourceID']
                    matched  = False
                    while not matched:
                        for i in range(len(source_ids)): #compare with each unique source
                            coord2 = SkyCoord(unique_sources['RA'][i], unique_sources['Dec'][i])
                            sep = coord1.separation(coord2)
                            fwhm2_val = float(unique_sources['FWHM_arcsec'][i])
                            max_sep = (fwhm * fwhm2_val)**(1/2) * u.arcsec
                            matched = (sep <= max_sep)
                            if matched:
                                low_df.loc[row, 'SourceID'] = source_ids[i]
                                break
                        break
                    if not matched:
                        num = 1
                        id_nums = [int(source_id.replace('id', '')) for source_id in unique_sources['SourceID']]
                        while num in id_nums:
                            num += 1
                        next_number = '0' * (4 - len(str(num))) + str(num)
                        next_id = f'id{next_number}'
                        source_ids.append(next_id)
                        unique_sources['RA'].append(ra)
                        unique_sources['Dec'].append(dec)
                        unique_sources['FWHM_arcsec'].append(fwhm)
                        low_df.loc[row, 'SourceID'] = next_id
                        unique_sources['AmbiguousTies'].append('Unknown')
                else:
                    ra = low_df['RA'].iloc[row]
                    dec = low_df['Dec'].iloc[row]
                    fwhm = low_df['BeamMajAxis_arcsec'].iloc[row]
                    unique_sources = {'SourceID': ['id0001'], 'RA': [ra], 'Dec': [dec], 'FWHM_arcsec': [fwhm], 'AmbiguousTies': ['Unknown']}
                    low_df.loc[row, 'SourceID'] = 'id0001'
            else:
                low_df.loc[row, 'SourceID'] = 'Not Stationary'

    #further refining matches
    new_sources = unique_sources.copy()
    refined = []
    to_skip = []
    for i in range(len(unique_sources['SourceID'])):
        temp_df = low_df[(low_df['SourceID']) == unique_sources['SourceID'][i]]
        ra_list = [Angle(ra, u.deg) for ra in temp_df['RA']]
        dec_list = [Angle(dec, u.deg) for dec in temp_df['Dec']]
        fwhm_list = [Angle(fwhm, u.arcsec) for fwhm in temp_df['BeamMajAxis_arcsec']]
        if len(unique_sources['SourceID']) > 1 and i not in to_skip:
            for j in range(i + 1, len(unique_sources['SourceID'])):
                if j not in to_skip:
                    temp_df2 = low_df[(low_df['SourceID']) == unique_sources['SourceID'][j]]
                    ra_list2 = [Angle(ra, u.deg) for ra in temp_df2['RA']]
                    dec_list2 = [Angle(dec, u.deg) for dec in temp_df2['Dec']]
                    fwhm_list2 = [Angle(fwhm, u.arcsec) for fwhm in temp_df2['BeamMajAxis_arcsec']]
                    new_ra_list = ra_list + ra_list2
                    new_dec_list = dec_list + dec_list2
                    new_fwhm_list = fwhm_list + fwhm_list2
                    num_pts = len(new_ra_list)
                    avg_ra = sum(new_ra_list) / num_pts
                    avg_dec = sum(new_dec_list) / num_pts
                    geo_avg_fwhm = math.prod(new_fwhm_list) ** (1/num_pts)
                    avg_pt = SkyCoord(avg_ra, avg_dec)
                    temp = 0
                    for pt in range(num_pts):
                        sep = avg_pt.separation(SkyCoord(new_ra_list[pt], new_dec_list[pt]))
                        if sep > geo_avg_fwhm / 2:
                            temp += 1
                    proportion = (num_pts - temp) / (num_pts)
                    if proportion == 1: #average point is a good representative for all points, same source
                        refined.append(new_sources['SourceID'][i])
                        #match found, update averages
                        hms_ra = avg_ra.hms
                        dms_dec = avg_dec.dms
                        str_ra = f'{int(hms_ra.h)}h{abs(int(hms_ra.m))}m{round(abs(hms_ra.s), 2)}s'
                        str_dec = f'{int(dms_dec.d)}d{abs(int(dms_dec.m))}m{round(abs(dms_dec.s), 2)}s'
                        new_sources['RA'][i] = str_ra
                        new_sources['Dec'][i] = str_dec
                        new_sources['FWHM_arcsec'][i] = round(geo_avg_fwhm.value, 3)
                        #get rid of "replaced" source in AmbiguousTies
                        for k in range(len(unique_sources['SourceID'])):
                            unique_sources['AmbiguousTies'][k] = unique_sources['AmbiguousTies'][k].replace(unique_sources['SourceID'][j], '')
                            unique_sources['AmbiguousTies'][k] = unique_sources['AmbiguousTies'][k].replace('__', '_')
                            if unique_sources['AmbiguousTies'][k][0] == '_':
                                unique_sources['AmbiguousTies'][k] = unique_sources['AmbiguousTies'][k][1:]
                            if unique_sources['AmbiguousTies'][k][-1] == '_':
                                unique_sources['AmbiguousTies'][k] = unique_sources['AmbiguousTies'][k][:-1]
                        #update low_df
                        indices = low_df.index[low_df['SourceID'] == unique_sources['SourceID'][j]]
                        low_df.loc[indices, 'SourceID'] = unique_sources['SourceID'][i]
                        to_skip.append(j)
                    elif proportion > 0.7: #average point is a good representative for over 70% but less than 100% of points, ambiguous
                        if new_sources['AmbiguousTies'][i] == 'Unknown' or new_sources['AmbiguousTies'][i] == 'None found':
                            new_sources['AmbiguousTies'][i] = unique_sources['SourceID'][j]
                        elif unique_sources['SourceID'][j] not in new_sources['AmbiguousTies'][i]:
                            new_sources['AmbiguousTies'][i] += '_{}'.format(unique_sources['SourceID'][j])
                        if new_sources['AmbiguousTies'][j] == 'Unknown' or new_sources['AmbiguousTies'][j] == 'None found':
                            new_sources['AmbiguousTies'][j] = unique_sources['SourceID'][i]
                        elif unique_sources['SourceID'][i] not in new_sources['AmbiguousTies'][j]:
                            new_sources['AmbiguousTies'][j] += '_{}'.format(unique_sources['SourceID'][i])
                    if new_sources['AmbiguousTies'][i] == 'Unknown':
                        new_sources['AmbiguousTies'][i] = 'None found'
                    if new_sources['AmbiguousTies'][j] == 'Unknown':
                        new_sources['AmbiguousTies'][j] = 'None found'
    to_skip.sort(reverse=True)
    for k in to_skip:
        del new_sources['SourceID'][k]
        del new_sources['RA'][k]
        del new_sources['Dec'][k]
        del new_sources['FWHM_arcsec'][k]
        del new_sources['AmbiguousTies'][k]

    #get averages for sources only matched with coarse matching
    for i in range(len(new_sources['SourceID'])):
        if new_sources['SourceID'][i] not in refined:
            temp_df = low_df[(low_df['SourceID']) == new_sources['SourceID'][i]]
            ra_list = [Angle(ra, u.deg) for ra in temp_df['RA']]
            dec_list = [Angle(dec, u.deg) for dec in temp_df['Dec']]
            fwhm_list = [Angle(fwhm, u.arcsec) for fwhm in temp_df['BeamMajAxis_arcsec']]
            num_pts = len(ra_list)
            avg_ra = sum(ra_list) / num_pts
            hms_ra = avg_ra.hms
            str_ra = f'{int(hms_ra.h)}h{abs(int(hms_ra.m))}m{round(abs(hms_ra.s), 2)}s'
            avg_dec = sum(dec_list) / num_pts
            dms_dec = avg_dec.dms
            str_dec = f'{int(dms_dec.d)}d{abs(int(dms_dec.m))}m{round(abs(dms_dec.s), 2)}s'
            geo_avg_fwhm = math.prod(fwhm_list) ** (1/num_pts)
            new_sources['RA'][i] = str_ra
            new_sources['Dec'][i] = str_dec
            new_sources['FWHM_arcsec'][i] = round(geo_avg_fwhm.value, 3)

    df = pd.DataFrame.from_dict(new_sources)

    # write into low and high level tables
    con2_established = False
    con2_closed = False
    try:
        con2 = sqlite3.connect(db_path)
        con2_established = True
        df.to_sql("high_level", con=con2, if_exists='replace', index=False)
        low_df.to_sql("low_level", con=con2, if_exists='replace', index=False)
        con2.close()
        con2_closed = True
    except Exception as e:
        if con2_established and not con2_closed:
            con2.close()
        print(f'Error adding to table(s) at {db_path}: {e}')

In [25]:
def light_curve(source_id: str, db_path: str = '../sources.db',\
                plot: bool = True, table: bool = True, save_path: str = ''):

    if os.path.exists(db_path):
        # get all rows from low level and high level tables, if they exist
        con1_established = False
        con1_closed = False
        try:
            con1 = sqlite3.connect(db_path)
            con1_established = True
            low_df = pd.read_sql_query("SELECT * FROM low_level;", con1)
            if low_df.empty:
                raise ValueError('Table "low_level" is empty')
            high_df = pd.read_sql_query("SELECT * FROM high_level;", con1)
            if high_df.empty:
                raise ValueError('Table "high_level" is empty')
            con1.close()
            con1_closed = True
        except Exception as e:
            if con1_established and not con1_closed:
                con1.close()
            print(f'Error reading from database at {db_path}: {e}')
    else:
        raise OSError(f'Path {db_path} not found')

    source_df = low_df[low_df['SourceID'] == source_id]

    if plot:
        fluxes = source_df['Flux_mJy'].to_list()
        flux_errs = source_df['FluxUncert_mJy'].to_list()
        flux_unit = 'mJy'
        if max(fluxes) > 1000:
            flux_unit = 'Jy'
            for i in range(len(fluxes)):
                fluxes[i] /= 1000
                flux_errs[i] /= 1000
        date_times = source_df['ObsDateTime'].tolist()
        fmt_str = '%m-%d-%y %H:%M:%S'
        date_times = [Time(datetime.strptime(dt, fmt_str), format='datetime', scale='utc').mjd for dt in date_times]

        freqs = source_df['Freq_GHz'].tolist()
        other = []
        small_milli = [] # 1.1-1.2mm
        large_milli = [] # 1.3-1.4mm
        micro = [] # 870µm
        for i in range(len(freqs)):
            if freqs[i] == 'Not found':
                other.append(i)
                pass
            else:
                try:
                    float_freq = float(freqs[i])
                    if float_freq > 241.77 and float_freq < 282.82: # 1.24-1.06mm
                        small_milli.append(i)
                    elif float_freq > 208.19 and float_freq < 237.93: # 1.44-1.26mm
                        large_milli.append(i)
                    elif float_freq > 333.10 and float_freq < 356.90: # 900-840µm
                        micro.append(i)
                    else:
                        other.append(i)
                except Exception as e:
                    print(f'Error while getting the frequencies for source {source_id}: {e}')
        other_dt = [date_times[a] for a in other]
        other_flx = [fluxes[a] for a in other]
        other_flx_err = [flux_errs[a] for a in other]
        sm_milli_dt = [date_times[b] for b in small_milli]
        sm_milli_flx = [fluxes[b] for b in small_milli]
        sm_milli_flx_err = [flux_errs[b] for b in small_milli]
        lg_milli_dt = [date_times[c] for c in large_milli]
        lg_milli_flx = [fluxes[c] for c in large_milli]
        lg_milli_flx_err = [flux_errs[c] for c in large_milli]
        micro_dt = [date_times[d] for d in micro]
        micro_flx = [fluxes[d] for d in micro]
        micro_flx_err = [flux_errs[d] for d in micro]

        plt.errorbar(sm_milli_dt, sm_milli_flx, yerr=sm_milli_flx_err, color='g', fmt='x', capsize=3, markersize=2,\
                    capthick=0.5, elinewidth=0.5, label='~1.1-1.2mm')
        plt.errorbar(lg_milli_dt, lg_milli_flx, yerr=lg_milli_flx_err, color='r', fmt='x', capsize=3, markersize=2,\
                    capthick=0.5, elinewidth=0.5, label='~1.3-1.4mm')
        plt.errorbar(micro_dt, micro_flx, yerr=micro_flx_err, color='b', fmt='x', capsize=3, markersize=2,\
                    capthick=0.5, elinewidth=0.5, label='~870µm')
        plt.errorbar(other_dt, other_flx, yerr=other_flx_err, color='k', fmt='x', capsize=3, markersize=2,\
                    capthick=0.5, elinewidth=0.5, label='Other/not found')

        plt.title(f'Source {source_id[2:]}')
        plt.xlabel('Modified Julian Date')
        plt.ylabel(f'Flux [{flux_unit}]')
        plt.legend()
        plt.ylim(bottom=0)

        if save_path != '':
            try:
                if save_path[-1] != '/':
                    save_path = save_path + '/'
                plt.savefig(f'{save_path}{source_id}.jpg')
            except:
                print('Error saving figure. Double check path entered.')

    if table:
        cal_df = source_df.copy()
        for col in cal_df.columns:
            if col not in ['ObsDateTime', 'ObsID', 'Flux_mJy', 'FluxUncert_mJy', 'Freq_GHz']:
                cal_df.drop(columns=col, inplace=True)
        snr_list = [round(float(cal_df['Flux_mJy'].to_list()[i] / cal_df['FluxUncert_mJy'].to_list()[i]), 2) for i in range(len(cal_df))]
        cal_df['SNR'] = snr_list
        fmt_str = '%m-%d-%y %H:%M:%S'
        mjd_list = [float(Time(datetime.strptime(dt, fmt_str), format='datetime', scale='utc').mjd) for dt in cal_df['ObsDateTime']]
        cal_df['MJD'] = mjd_list
        return cal_df

In [26]:
def clause_helper(column_name: str, parameter, other_type):
    phrase = ''
    if parameter is not None:
        if type(parameter) == list:
            if parameter:
                for i in range(len(parameter)):
                    if type(parameter[i]) != other_type:
                        raise TypeError(f'In order to write a condition for {column_name}, if input is a list, its elements must be of {other_type}.')
                    if i == 0:
                        phrase += ' ({} = "{}"'.format(column_name, parameter[i])
                    else:
                        phrase += ' OR {} = "{}"'.format(column_name, parameter[i])
            phrase += ')'
        elif type(parameter) == other_type:
            if type(parameter) == str:
                if not parameter.strip():
                    return ''
            phrase += f' ({column_name} = "{parameter}")'
        else:
            raise TypeError(f'In order to write a condition for {column_name}, input must be None, of type list, or of {other_type}.')
        if phrase:
            phrase += ' AND'
    return phrase

In [27]:
def search_low_level(db_path: str = '../sources.db', field_name = None, stationary = True, lower_freq = None, upper_freq = None,\
                    lower_flux = None, upper_flux = None, ra = None, dec = None, sep_lower = None, sep_upper = None,\
                    internal = None, obs_id = None, source_id = None, obs_dt_lower = None, obs_dt_upper = None):

    where_clause = 'WHERE'
    where_clause += clause_helper(column_name='FieldName', parameter=field_name, other_type=str)
    if stationary is not None:
        where_clause += f' (Stationary = {stationary}) AND'
    if not (lower_flux is None or type(lower_flux) == float or type(lower_flux) == int):
        raise TypeError('Inputted lower bound for flux must be None, of type float, or of type int.')
    if not (upper_flux is None or type(upper_flux) == float or type(upper_flux) == int):
        raise TypeError('Inputted upper bound for flux must be None, of type float, or of type int.')
    if lower_flux is not None and upper_flux is not None:
        where_clause += f' (Flux_mJy BETWEEN {lower_flux} AND {upper_flux}) AND'
    elif lower_flux is not None:
        where_clause += f' (Flux_mJy >= {lower_flux})'
    elif upper_flux is not None:
        where_clause += f' (Flux_mJy <= {upper_flux})'
    if internal is not None:
        where_clause += f' (Internal = {internal}) AND'
    where_clause += clause_helper(column_name='ObsID', parameter=obs_id, other_type=str)
    where_clause += clause_helper(column_name='SourceID', parameter=source_id, other_type=str)

    if where_clause[-5:] == 'WHERE':
        where_clause = ''
    if where_clause[-4:] == ' AND':
        where_clause = where_clause[:-4]
    where_clause += ';'

    if os.path.exists(db_path):
        # connect and query
        con_established = False
        con_closed = False
        try:
            con = sqlite3.connect(db_path)
            con_established = True
            result_df = pd.read_sql_query(f'SELECT * FROM low_level {where_clause}', con)
            con.close()
            con_closed = True
        except Exception as e:
            if con_established and not con_closed:
                con.close()
            print(f'Error querying database at {db_path} : {e}')
    else:
        raise OSError(f'Path {db_path} not found')

    to_drop = []
    if not (lower_freq is None or type(lower_freq) == int or type(lower_freq) == float):
        raise TypeError('Inputted frequency lower bound must be None, of type int, or of type float.')
    if not (upper_freq is None or type(upper_freq) == int or type(upper_freq) == float):
        raise TypeError('Inputted frequency upper bound must be None, of type int, or of type float.')
    if lower_freq is not None and upper_freq is not None:
        for row in range(len(result_df)):
            if result_df['Freq_GHz'].iloc[row] == 'Not found':
                to_drop.append(row)
            elif not (float(result_df['Freq_GHz'].iloc[row]) <= upper_freq and float(result_df['Freq_GHz'].iloc[row]) >= lower_freq):
                to_drop.append(row)
        result_df.drop(to_drop, inplace=True)
    elif lower_freq is not None:
        for row in range(len(result_df)):
            if result_df['Freq_GHz'].iloc[row] == 'Not found':
                to_drop.append(row)
            elif not (float(result_df['Freq_GHz'].iloc[row]) >= lower_freq):
                to_drop.append(row)
        result_df.drop(to_drop, inplace=True)
    elif upper_freq is not None:
        for row in range(len(result_df)):
            if result_df['Freq_GHz'].iloc[row] == 'Not found':
                to_drop.append(row)
            elif not (float(result_df['Freq_GHz'].iloc[row]) <= upper_freq):
                to_drop.append(row)
        result_df.drop(to_drop, inplace=True)
    if to_drop and not result_df.empty:
        result_df.reset_index(inplace=True)

    # handling ra, dec stuff
    coord = None
    ra_ang = None
    dec_ang = None
    lower_ang = None
    upper_ang = None
    if sep_lower is not None:
        try:
            lower_ang = Angle(sep_lower)
            if lower_ang == 0:
                lower_ang = None
        except Exception as e:
            print(f'Error converting separation lower bound input to Angle: {e}')
    if sep_upper is not None:
        try:
            upper_ang = Angle(sep_upper)
            if upper_ang == 0:
                upper_ang = None
        except Exception as e:
            print(f'Error converting separation upper bound input to Angle: {e}')
    if lower_ang is not None and upper_ang is not None:
        if lower_ang > upper_ang:
            raise ValueError(f'Inputted separation lower bound {sep_lower} is greater than inputted separation upper bound {sep_upper}.')
    if ra is not None and dec is not None:
        try:
            coord = SkyCoord(ra, dec)
        except Exception as e:
            print(f'Error converting Right Ascension and Declination inputs to SkyCoord object: {e}')
    elif ra is not None:
        try:
            ra_ang = Angle(ra)
        except Exception as e:
            print(f'Error converting Right Ascension input to Angle: {e}')
    elif dec is not None:
        try:
            dec_ang = Angle(dec)
        except Exception as e:
            print(f'Error converting Declination input to Angle: {e}')

    to_drop = []
    if lower_ang is not None and upper_ang is not None:
        if coord is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_dec = result_df['Dec'].iloc[row]
                temp_coord = SkyCoord(temp_ra, temp_dec)
                sep = coord.separation(temp_coord)
                if not (sep <= upper_ang and sep >= lower_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif ra_ang is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_ang = Angle(temp_ra)
                if not (abs(ra_ang - temp_ang) <= upper_ang and abs(ra_ang - temp_ang) >= lower_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif dec_ang is not None:
            for row in range(len(result_df)):
                temp_dec = result_df['Dec'].iloc[row]
                temp_ang = Angle(temp_dec)
                if not (abs(dec_ang - temp_ang) <= upper_ang and abs(dec_ang - temp_ang) >= lower_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
    elif lower_ang is not None:
        if coord is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_dec = result_df['Dec'].iloc[row]
                temp_coord = SkyCoord(temp_ra, temp_dec)
                sep = coord.separation(temp_coord)
                if not (sep >= lower_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif ra_ang is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_ang = Angle(temp_ra)
                if not (abs(ra_ang - temp_ang) >= lower_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif dec_ang is not None:
            for row in range(len(result_df)):
                temp_dec = result_df['Dec'].iloc[row]
                temp_ang = Angle(temp_dec)
                if not (abs(dec_ang - temp_ang) >= lower_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
    elif upper_ang is not None:
        if coord is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_dec = result_df['Dec'].iloc[row]
                temp_coord = SkyCoord(temp_ra, temp_dec)
                sep = coord.separation(temp_coord)
                if not (sep <= upper_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif ra_ang is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_ang = Angle(temp_ra)
                if not (abs(ra_ang - temp_ang) <= upper_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif dec_ang is not None:
            for row in range(len(result_df)):
                temp_dec = result_df['Dec'].iloc[row]
                temp_ang = Angle(temp_dec)
                if not (abs(dec_ang - temp_ang) <= upper_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
    else:
        if coord is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_dec = result_df['Dec'].iloc[row]
                temp_coord = SkyCoord(temp_ra, temp_dec)
                if not (temp_coord == coord):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif ra_ang is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_ang = Angle(temp_ra)
                if not (temp_ang == ra_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif dec_ang is not None:
            for row in range(len(result_df)):
                temp_dec = result_df['Dec'].iloc[row]
                temp_ang = Angle(temp_dec)
                if not (temp_ang == dec_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
    if to_drop and not result_df.empty:
        result_df.reset_index(inplace=True)

    to_drop = []
    # handling observation date time stuff
    lower_dt = None
    upper_dt = None
    fmt = '%m-%d-%y %H:%M:%S'
    if obs_dt_lower is not None:
        try:
            lower_dt = datetime.strptime(obs_dt_lower, fmt)
        except Exception as e:
            print(f'Error converting inputted observation date and time lower bound to datetime object: {e}. Please check the input format and ensure it matches {fmt}.')
    if obs_dt_upper is not None:
        try:
            upper_dt = datetime.strptime(obs_dt_upper, fmt)
        except Exception as e:
            print(f'Error converting inputted observation date and time upper bound input to datetime object: {e}. Please check the input format and ensure it matcheds {fmt}.')

    if lower_dt is not None and upper_dt is not None:
        if lower_dt > upper_dt:
            raise ValueError(f'Inputted observation date and time lower bound {obs_dt_lower} is later than inputted observation date and time upper bound {obs_dt_upper}.')
        for row in range(len(result_df)):
            temp_dt = datetime.strptime(result_df['ObsDateTime'].iloc[row], fmt)
            if not (temp_dt <= upper_dt and temp_dt >= lower_dt):
                to_drop.append(row)
        result_df.drop(to_drop, inplace=True)
    elif lower_dt is not None:
        for row in range(len(result_df)):
            temp_dt = datetime.strptime(result_df['ObsDateTime'].iloc[row], fmt)
            if not (temp_dt >= lower_dt):
                to_drop.append(row)
        result_df.drop(to_drop, inplace=True)
    elif upper_dt is not None:
        for row in range(len(result_df)):
            temp_dt = datetime.strptime(result_df['ObsDateTime'].iloc[row], fmt)
            if not (temp_dt <= upper_dt):
                to_drop.append(row)
        result_df.drop(to_drop, inplace=True)
    if to_drop and not result_df.empty:
        result_df.reset_index(inplace=True)

    if result_df.empty:
        print('Search returned an empty table.')
    else:
        if 'level_0' in result_df:
            result_df.drop(columns='level_0', inplace=True)
        if 'index' in result_df:
            result_df.drop(columns='index', inplace=True)
    return result_df

In [28]:
def search_high_level(db_path: str = '../sources.db', source_id = None, ra = None, dec = None, sep_lower = None, sep_upper = None,\
                      ambiguous_ties = None, ambig_exact: bool = False):
    where_clause = 'WHERE'
    where_clause += clause_helper(column_name='SourceID', parameter=source_id, other_type=str)

    if where_clause[-5:] == 'WHERE':
        where_clause = ''
    if where_clause[-4:] == ' AND':
        where_clause = where_clause[:-4]
    where_clause += ';'

    if os.path.exists(db_path):
        # connect and query
        con_established = False
        con_closed = False
        try:
            con = sqlite3.connect(db_path)
            con_established = True
            result_df = pd.read_sql_query(f'SELECT * FROM high_level {where_clause}', con)
            con.close()
            con_closed = True
        except Exception as e:
            if con_established and not con_closed:
                con.close()
            print(f'Error querying database at {db_path} : {e}')
    else:
        raise OSError(f'Path {db_path} not found')

    # handling ra, dec stuff
    coord = None
    ra_ang = None
    dec_ang = None
    lower_ang = None
    upper_ang = None
    if sep_lower is not None:
        try:
            lower_ang = Angle(sep_lower)
            if lower_ang == 0:
                lower_ang = None
        except Exception as e:
            print(f'Error converting separation lower bound input to Angle: {e}')
    if sep_upper is not None:
        try:
            upper_ang = Angle(sep_upper)
            if upper_ang == 0:
                upper_ang = None
        except Exception as e:
            print(f'Error converting separation upper bound input to Angle: {e}')
    if lower_ang is not None and upper_ang is not None:
        if lower_ang > upper_ang:
            raise ValueError(f'Inputted separation lower bound {sep_lower} is greater than inputted separation upper bound {sep_upper}.')
    if ra is not None and dec is not None:
        try:
            coord = SkyCoord(ra, dec)
        except Exception as e:
            print(f'Error converting Right Ascension and Declination inputs to SkyCoord object: {e}')
    elif ra is not None:
        try:
            ra_ang = Angle(ra)
        except Exception as e:
            print(f'Error converting Right Ascension input to Angle: {e}')
    elif dec is not None:
        try:
            dec_ang = Angle(dec)
        except Exception as e:
            print(f'Error converting Declination input to Angle: {e}')

    to_drop = []
    if lower_ang is not None and upper_ang is not None:
        if coord is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_dec = result_df['Dec'].iloc[row]
                temp_coord = SkyCoord(temp_ra, temp_dec)
                sep = coord.separation(temp_coord)
                if not (sep <= upper_ang and sep >= lower_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif ra_ang is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_ang = Angle(temp_ra)
                if not (abs(ra_ang - temp_ang) <= upper_ang and abs(ra_ang - temp_ang) >= lower_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif dec_ang is not None:
            for row in range(len(result_df)):
                temp_dec = result_df['Dec'].iloc[row]
                temp_ang = Angle(temp_dec)
                if not (abs(dec_ang - temp_ang) <= upper_ang and abs(dec_ang - temp_ang) >= lower_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
    elif lower_ang is not None:
        if coord is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_dec = result_df['Dec'].iloc[row]
                temp_coord = SkyCoord(temp_ra, temp_dec)
                sep = coord.separation(temp_coord)
                if not (sep >= lower_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif ra_ang is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_ang = Angle(temp_ra)
                if not (abs(ra_ang - temp_ang) >= lower_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif dec_ang is not None:
            for row in range(len(result_df)):
                temp_dec = result_df['Dec'].iloc[row]
                temp_ang = Angle(temp_dec)
                if not (abs(dec_ang - temp_ang) >= lower_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
    elif upper_ang is not None:
        if coord is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_dec = result_df['Dec'].iloc[row]
                temp_coord = SkyCoord(temp_ra, temp_dec)
                sep = coord.separation(temp_coord)
                if not (sep <= upper_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif ra_ang is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_ang = Angle(temp_ra)
                if not (abs(ra_ang - temp_ang) <= upper_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif dec_ang is not None:
            for row in range(len(result_df)):
                temp_dec = result_df['Dec'].iloc[row]
                temp_ang = Angle(temp_dec)
                if not (abs(dec_ang - temp_ang) <= upper_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
    else:
        if coord is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_dec = result_df['Dec'].iloc[row]
                temp_coord = SkyCoord(temp_ra, temp_dec)
                if not (temp_coord == coord):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif ra_ang is not None:
            for row in range(len(result_df)):
                temp_ra = result_df['RA'].iloc[row]
                temp_ang = Angle(temp_ra)
                if not (temp_ang == ra_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
        elif dec_ang is not None:
            for row in range(len(result_df)):
                temp_dec = result_df['Dec'].iloc[row]
                temp_ang = Angle(temp_dec)
                if not (temp_ang == dec_ang):
                    to_drop.append(row)
            result_df.drop(to_drop, inplace=True)
    if to_drop and not result_df.empty:
        result_df.reset_index(inplace=True)

    to_drop = []
    if ambiguous_ties is not None:
        if type(ambiguous_ties) == bool:
            if ambiguous_ties:
                for row in range(len(result_df)):
                    if result_df['AmbiguousTies'].iloc[row] == 'None found':
                        to_drop.append(row)
                result_df.drop(to_drop, inplace=True)
            elif not ambiguous_ties:
                for row in range(len(result_df)):
                    if result_df['AmbiguousTies'].iloc[row] != 'None found':
                        to_drop.append(row)
                result_df.drop(to_drop, inplace=True)
        elif ambig_exact:
            if type(ambiguous_ties) == list:
                if ambiguous_ties:
                    try:
                        ambiguous_ties = [ele.strip() for ele in ambiguous_ties]
                    except AttributeError:
                        if type(ambiguous_ties) == str:
                            raise AttributeError
                        else:
                            raise TypeError('In order to search by ambiguous ties, if input is a list, its elements must be of type str.')
                    for row in range(len(result_df)):
                        temp = result_df['AmbiguousTies'].iloc[row]
                        for ele in ambiguous_ties:
                            if ele not in temp:
                                to_drop.append(row)
                            temp = temp.replace(ele, '')
                        if temp.replace('_', ''): # this means there are source IDs in temp that are not in ambiguous_ties
                            if row not in to_drop:
                                to_drop.append(row)
                    result_df.drop(to_drop, inplace=True)
            elif type(ambiguous_ties) == str:
                ambiguous_ties = ambiguous_ties.strip()
                if ambiguous_ties:
                    for row in range(len(result_df)):
                        temp = result_df['AmbiguousTies'].iloc[row]
                        if ambiguous_ties != temp:
                            to_drop.append(row)
                    result_df.drop(to_drop, inplace=True)
            else:
                raise TypeError('In order to search by ambiguous ties, input must be None, of type list, or of type str.')
        elif not ambig_exact:
            if type(ambiguous_ties) == list:
                if ambiguous_ties:
                    try:
                        ambiguous_ties = [ele.strip() for ele in ambiguous_ties]
                    except AttributeError:
                        if type(ambiguous_ties) == str:
                            raise AttributeError
                        else:
                            raise TypeError('In order to search by ambiguous ties, if input is a list, its elements must be of type str.')
                    for row in range(len(result_df)):
                        temp = result_df['AmbiguousTies'].iloc[row]
                        for ele in ambiguous_ties:
                            if ele in temp:
                                continue
                            to_drop.append(row)
                    result_df.drop(to_drop, inplace=True)
            elif type(ambiguous_ties) == str:
                ambiguous_ties = ambiguous_ties.strip()
                if ambiguous_ties:
                    for row in range(len(result_df)):
                        temp = result_df['AmbiguousTies'].iloc[row]
                        if ambiguous_ties not in temp:
                            to_drop.append(row)
                    result_df.drop(to_drop, inplace=True)
    if to_drop and not result_df.empty:
        result_df.reset_index(inplace=True)

    if result_df.empty:
            print('Search returned an empty table.')
    else:
        if 'level_0' in result_df:
            result_df.drop(columns='level_0', inplace=True)
        if 'index' in result_df:
            result_df.drop(columns='index', inplace=True)
    return result_df

In [31]:
for file in glob.glob('../data/11151/*'):
    try:
        print(make_catalog(file))
    except:
        pass
# low_level_table(folder='../data/11151', db_path='../test.db')

{'Source1': {'FieldName': 'hd283572', 'ObsDateTime': '3-13-25 8:44:15', 'FileName': 'hd283572.fits', 'Stationary': True, 'BeamMajAxis_arcsec': 3.291, 'BeamMinAxis_arcsec': 2.932, 'BeamPosAngle_deg': 126.1, 'Freq_GHz': 225.512, 'FluxUncert_mJy': 0.718, 'Flux_mJy': 3.582, 'RAUncert_arcsec': 0.812, 'DecUncert_arcsec': 0.846, 'RA': '4h21m58.85s', 'Dec': '28d18m5.41s', 'Internal': True, 'Image': b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02\x80\x00\x00\x01\xe0\x08\x06\x00\x00\x005\xd1\xdc\xe4\x00\x00\x009tEXtSoftware\x00Matplotlib version3.9.2, https://matplotlib.org/!N\x03\xd9\x00\x00\x00\tpHYs\x00\x00\x0fa\x00\x00\x0fa\x01\xa8?\xa7i\x00\x00ufIDATx\x9c\xed\xddy|T\xd5\xf9?\xf0\xcfLV\x08$\x88\x02\x01AD\xa0\r\xe0\x82\xb2\xc4 \x15\x95hP\x14\xa9\xa0@\xad,ei-\xb6R\xdc-\x12\xc1\x85"\x8a\x0b\xa8\x14\x17D[\xabP+\xee(\x05\xac\xfd)\x05\xc5\xa5\x15\x05Q\xb1X0,\xc5$\x12\xc86\xf7\xfc\xfe\xe0\x9b\xa9s\xce\x93\xdc\x93;\x93e\xee|\xde\xafW^:\'\xe7\x9e\xbb\xcd\x0c\'\xf7>\xcfs\x03J)\x05""""J\x18\xc1\xa6\x

In [None]:
# conn = sqlite3.connect('../test.db')
# cursor = conn.cursor()
# cursor.execute("SELECT image FROM plots WHERE plot_name = ?", ("My First Plot",))
# retrieved_plot_data = cursor.fetchone()[0]
# conn.close()