In [84]:
from astropy.io import fits
import numpy as np
import scipy.stats
from scipy.stats import norm
from astropy.coordinates import Angle
import astropy.units as u

In [85]:
def fits_data_index(fits_file: str):
    '''Given a FITS file, return the index of the file where the data array is'''

    file_index = 0

    #open FITS file
    try:
        file = fits.open(fits_file)
    except:
        print(f'Unable to open {fits_file}')

    while True:
        #going through the indices of file to find the array
        try:
            info = file[file_index]
            data = info.data
            if isinstance(data, np.ndarray):
                break
            else:
                file_index += 1
        except:
            print(f'Error in locating data index of {fits_file}')

    return file_index

In [86]:
def region_stats(fits_file: str, exclusion: float = 0, inclusion: float = float('inf'), center: tuple = (float('inf'), float('inf'))):
    '''Given a FITS file, exclusion radius in units of arcsec (exclude area within this radius),
    inclusion radius in units of arcsec (include area within this radius),
    and center coordinates in units of arcsec,
    return a dictionary with floats of the maximum flux (in Jy), rms (in Jy), beam size (in arcsec^2),
    x axis length (in arcsec), and y axis length (in arcsec) in the specified region.
    If no exclusion radius given, default to 0.
    If no inclusion radius given, default to infinity.
    If no center given, will eventually default to center of ((length of x-axis)/2, (length of y-axis)/2), rounded up.
    '''

    i = fits_data_index(fits_file)

    #open FITS file
    try:
        file = fits.open(fits_file)
    except:
        print(f'Unable to open {fits_file}')

    #extract data array
    info = file[i]
    data = info.data

    #getting dimensions for array
    try:
        dims = data.shape
        x_dim = dims[1]
        y_dim = dims[2]
    except:
        print('Data dimension error')

    x_dist_array = np.tile(np.arange(x_dim),(y_dim, 1)) #array of each pixel's horizontal distance (in pixels) from y-axis
    y_dist_array = x_dist_array.T #array of each pixel's vertical distance (in pixels) from x-axis

    #keep center pixel coordinates if specified, set to default if unspecified
    center_pix = center
    if center == (float('inf'), float('inf')):
        center_pix = (round(x_dim/2), round(y_dim/2))

    #find units of axes
    x_unit = info.header['CUNIT1']
    y_unit = info.header['CUNIT2']

    #find cell size (units of arcsec)
    x_cell_size = (Angle(info.header['CDELT1'], x_unit)).to(u.arcsec)
    y_cell_size = (Angle(info.header['CDELT2'], y_unit)).to(u.arcsec)
    y_cell_size.to(u.arcsec)

    #find major axis (units of arcsec), minor axis (units of arcsec), beam size (units of arcsec^2)
    beam_size = ((np.pi/4) * info.header['BMAJ'] * info.header['BMIN'] * Angle(1, x_unit) * Angle(1, y_unit) / np.log(2)).to(u.arcsec**2)

    #find axis sizes
    x_axis_size = info.header['NAXIS1'] * x_cell_size
    y_axis_size = info.header['NAXIS2'] * y_cell_size

    #distance from center array
    dist_from_center =((((x_dist_array - center_pix[0])*x_cell_size)**2 + \
                        ((y_dist_array - center_pix[1])*y_cell_size)**2)**0.5) #array of each pixel's distance from center_pix

    #boolean mask and apply
    mask = (dist_from_center >= exclusion * u.arcsec) & (dist_from_center <= inclusion * u.arcsec)
    masked_data = data[0][mask]

    #get peak, rms, beam_size values
    try:
        peak = float(max(masked_data))
    except ValueError:
        print('No values after mask applied. Check inclusion and exclusion radii.')

    rms = float((np.var(masked_data))**0.5)

    stats = {'peak': peak, 'rms': rms, 'beam_size': float(beam_size / (u.arcsec**2)),\
              'x_axis': float(x_axis_size / u.arcsec), 'y_axis': float(y_axis_size / u.arcsec)}

    return stats

In [87]:
print(region_stats('3c245.fits', exclusion = 5, inclusion = 11))
print(region_stats('ngc5044.fits', exclusion = 15))
print(region_stats('sdssj152527.48+050029.9.fits', inclusion = 20))
print(region_stats('sdssj155636.40+415250.5.fits', exclusion = 15, inclusion = 20))
print(region_stats('3c270.1.fits'))
print(region_stats('1407+2827.fits', exclusion = 5, inclusion = 30))

{'peak': 0.004479635506868362, 'rms': 0.001597219379618764, 'beam_size': 15.743715781426644, 'x_axis': 81.69952006356291, 'y_axis': 81.69952006356291}
{'peak': 0.0032760933972895145, 'rms': 0.0007613645866513252, 'beam_size': 11.183545339265727, 'x_axis': 78.24760544464061, 'y_axis': 78.24760544464061}
{'peak': 0.004618875216692686, 'rms': 0.001178167061880231, 'beam_size': 0.5719882791674973, 'x_axis': 53.04328701380826, 'y_axis': 53.04328701380826}
{'peak': 0.0041266013868153095, 'rms': 0.0011277092853561044, 'beam_size': 0.616271058016661, 'x_axis': 53.03845967450358, 'y_axis': 53.03845967450358}
{'peak': 0.01752321608364582, 'rms': 0.0006712005706503987, 'beam_size': 10.150207645247436, 'x_axis': 78.04997828407494, 'y_axis': 78.04997828407494}
{'peak': 0.0033987266942858696, 'rms': 0.0010311135556548834, 'beam_size': 10.456207744515474, 'x_axis': 64.72768722680007, 'y_axis': 64.72768722680007}


In [88]:
def incl_excl_data(fits_file: str, center: tuple = (float('inf'), float('inf'))):
    '''Given a FITS file and (optional) center coordinates in units of arcsec,
    return a dictionary with the peak flux value of the inclusion area, peak flux value of the exclusion area, rms value of the exclusion area,
    number of measurements in the inclusion area, and number of measurements in the exclusion area of the specified circle.
    '''

    i = fits_data_index(fits_file)

    #open FITS file
    try:
        file = fits.open(fits_file)
    except:
        print(f'Unable to open {fits_file}')

    #extract data array
    info = file[i]

    radius = float((info.header['BMAJ'] * (Angle(1, info.header['CUNIT1'])).to(u.arcsec) / u.arcsec) + 5) #major axis + 5 arcsec

    #get info on inclusion and exclusion regions
    int_info = region_stats(fits_file = fits_file, inclusion = radius, center = center)
    ext_info = region_stats(fits_file = fits_file, exclusion = radius, center = center)

    #getting values for peak, rms, axis lengths, beam size
    int_peak_val = int_info['peak']
    ext_peak_val = ext_info['peak']
    rms_val = ext_info['rms']
    x_axis = int_info['x_axis']
    y_axis = int_info['y_axis']
    beam_size = int_info['beam_size']

    #calculating number of measurements in inclusion and exclusion regions
    incl_area = np.pi * (radius**2)
    excl_area = x_axis * y_axis - incl_area
    n_incl_meas = incl_area / beam_size
    n_excl_meas = excl_area / beam_size

    return {'int_peak_val': int_peak_val, 'ext_peak_val': ext_peak_val, 'rms_val': rms_val, \
            'n_incl_meas': n_incl_meas, 'n_excl_meas': n_excl_meas}

In [89]:
def meas_rms_prob(fits_file: str, center: tuple = (float('inf'), float('inf'))):
    '''Given a FITS file and (optional) center coordinates in units of arcsec,
    return a dictionary with the probability of the peak to noise ratio of the interior of the specified circle
    and the probability of peak to noise ratio of the exterior of the specified circle,
    with noise being the measured rms in the exclusion area.
    '''

    info = incl_excl_data(fits_file, center)

    int_peak_val = info['int_peak_val']
    ext_peak_val = info['ext_peak_val']
    rms_val = info['rms_val']
    n_incl_meas = info['n_incl_meas']
    n_excl_meas = info['n_excl_meas']

    prob_dict = {}

    #calculate error for rms
    rms_err = rms_val * (n_excl_meas)**(-1/2)

    #create normal distributions from rms and error for rms
    uncert = np.linspace(-5 * rms_err, 5 * rms_err, 100)
    uncert_pdf = norm.pdf(uncert, loc = 0, scale = rms_err)

    #sum and normalize to find probabilities
    prob_dict['int_prob'] = float(sum((norm.cdf((-1 * int_peak_val)/(rms_val + uncert)) * n_incl_meas) * uncert_pdf) / sum(uncert_pdf))
    prob_dict['ext_prob'] = float(sum((norm.cdf((-1 * ext_peak_val)/(rms_val + uncert)) * n_excl_meas) * uncert_pdf) / sum(uncert_pdf))

    return prob_dict

In [90]:
def calc_rms_prob(fits_file: str, center: tuple = (float('inf'), float('inf'))):
    '''Given a FITS file and (optional) center coordinates in units of arcsec,
    return a dictionary with the probability of the peak to noise ratio of the interior of the specified circle
    and the probability of peak to noise ratio of the exterior of the specified circle,
    with noise being the calculated rms in the exclusion area based on the expected probability of the peak value in the exclusion area.
    '''

    prob_dict = {}

    info = incl_excl_data(fits_file, center)

    int_peak_val = info['int_peak_val']
    ext_peak_val = info['ext_peak_val']
    n_incl_meas = info['n_incl_meas']
    n_excl_meas = info['n_excl_meas']

    excl_sigma = -1 * norm.ppf(1/n_excl_meas)
    rms_val = ext_peak_val / excl_sigma

    prob_dict['int_prob'] = float(norm.cdf((-1 * int_peak_val)/(rms_val))) * n_incl_meas
    prob_dict['ext_prob'] = float(norm.cdf((-1 * ext_peak_val)/(rms_val))) * n_excl_meas

    return(prob_dict)

In [91]:
def significance(fits_file: str, version: str, threshold: float):
    '''Given a FITS file, version ('meas_rms_prob' or 'calc_rms_prob'), and threshold probability,
    return a Boolean of whether source emission is considered significant for the given threshold.
    '''
    if not (threshold >= 0 and threshold <= 1):
        raise ValueError("threshold must be between 0 and 1, inclusive")

    if not (version == 'meas_rms_prob' or version == 'calc_rms_prob'):
        raise ValueError("version must be either 'meas_rms_prob' or 'calc_rms_prob'")

    if version == 'meas_rms_prob':
        return meas_rms_prob(fits_file)['int_prob'] <= threshold

    else:
        return calc_rms_prob(fits_file)['int_prob'] <= threshold

In [92]:
fits_list = ['ngc5044.fits', '1407+2827.fits', '3c245.fits', 'sdssj152527.48+050029.9.fits',\
             'sdssj155636.40+415250.5.fits', '3c270.1.fits']
for f in range(len(fits_list)):
    print(fits_list[f])
    print('meas')
    print(meas_rms_prob(fits_file = fits_list[f]))
    print(significance(fits_file = fits_list[f], version = 'meas_rms_prob', threshold = 0.05))
    print('calc')
    print(calc_rms_prob(fits_file = fits_list[f]))
    print(significance(fits_file = fits_list[f], version = 'calc_rms_prob', threshold = 0.05))
    print()

ngc5044.fits
meas
{'int_prob': 0.0, 'ext_prob': 0.010185324770829738}
True
calc
{'int_prob': 0.0, 'ext_prob': 0.9999999999999972}
True

1407+2827.fits
meas
{'int_prob': 4.601260628642234e-05, 'ext_prob': 0.06673845334065923}
True
calc
{'int_prob': 0.0030807096131315066, 'ext_prob': 1.0000000000000004}
True

3c245.fits
meas
{'int_prob': 0.0, 'ext_prob': 0.00945169784907658}
True
calc
{'int_prob': 0.0, 'ext_prob': 0.9999999999999999}
True

sdssj152527.48+050029.9.fits
meas
{'int_prob': 0.007739206677762738, 'ext_prob': 0.1632742505807966}
True
calc
{'int_prob': 0.04557035247628376, 'ext_prob': 0.9999999999999974}
True

sdssj155636.40+415250.5.fits
meas
{'int_prob': 0.06838942908130662, 'ext_prob': 0.00349148668211047}
False
calc
{'int_prob': 1.2478165490666304, 'ext_prob': 0.9999999999999974}
False

3c270.1.fits
meas
{'int_prob': 1.333784198557712e-290, 'ext_prob': 0.029963089739749336}
True
calc
{'int_prob': 5.964264253020974e-230, 'ext_prob': 0.9999999999999992}
True



In [93]:
fits_list = ['0510+180.fits', '3c279.fits', 'hd283323.fits', 'l1551-51.fits', '0854+201.fits', 'bd+21 584.fits', 'hd28354.fits', \
'uranus.fits', '3c111.fits', 'bllac.fits', 'hd283572.fits', 'v652per.fits']
for f in range(len(fits_list)):
    print(fits_list[f])
    print('meas')
    print(meas_rms_prob(fits_file = fits_list[f]))
    print(significance(fits_file = fits_list[f], version = 'meas_rms_prob', threshold = 0.05))
    print('calc')
    print(calc_rms_prob(fits_file = fits_list[f]))
    print(significance(fits_file = fits_list[f], version = 'calc_rms_prob', threshold = 0.05))
    print()

0510+180.fits
meas
{'int_prob': 0.0, 'ext_prob': 0.020403648882963095}
True
calc
{'int_prob': 0.0, 'ext_prob': 1.0000000000000009}
True

3c279.fits
meas
{'int_prob': 0.0, 'ext_prob': 3.1675440696901414e-12}
True
calc
{'int_prob': 0.0, 'ext_prob': 0.9999999999999999}
True

hd283323.fits
meas
{'int_prob': 0.12792050437476432, 'ext_prob': 0.06977787881698642}
False
calc
{'int_prob': 0.48532563598234196, 'ext_prob': 0.9999999999999972}
False

l1551-51.fits
meas
{'int_prob': 0.17565757949317096, 'ext_prob': 0.00019929925318330907}
False
calc
{'int_prob': 1.668080430471248, 'ext_prob': 1.0}
False

0854+201.fits
meas
{'int_prob': 0.0, 'ext_prob': 0.0005188486510814418}
True
calc
{'int_prob': 0.0, 'ext_prob': 0.9999999999999996}
True

bd+21 584.fits
meas
{'int_prob': 0.0462429774544983, 'ext_prob': 0.02963723625300116}
True
calc
{'int_prob': 0.339310413414468, 'ext_prob': 0.9999999999999981}
False

hd28354.fits
meas
{'int_prob': 0.12444341935302737, 'ext_prob': 0.10859110618387637}
False
calc
