In [1089]:
from astropy.io import fits
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import emcee
import random
from find_source import summary

In [1090]:
def const_fn_red_chi2(y_and_sd, const):
    chi2 = 0
    for tup in y_and_sd:
        y, sd = tup
        chi2 += ((y-const)/sd)**2
    df = len(y_and_sd) - 1
    return chi2/df

In [1091]:
def test_naive_uv_fitting(data):#fits_file: str):
    # file = fits.open(fits_file)
    # data = file[1].data
    vis = np.array(data)
    freq_bin, u, v, Re, Im, w = [], [], [], [], [], []
    for row in vis:
        freq_bin_data, u_data, v_data, Re_data, Im_data, w_data = row
        freq_bin.append(int(freq_bin_data))
        u.append(int(u_data))
        v.append(int(v_data))
        Re.append(float(Re_data/w_data))
        Im.append(float(Im_data/w_data))
        w.append(float(w_data))

    # adding in conjugate half of data
    freq_bin *= 2
    neg_u = [-1 * val for val in u]
    u += neg_u
    neg_v = [-1 * val for val in v]
    v += neg_v
    Re *= 2
    neg_Im = [-1 * val for val in Im]
    Im += neg_Im
    w *= 2
    Rlambda = []
    for i in range(len(u)):
        Rlambda.append((u[i]**2 +v[i]**2)**(1/2))

    sd = [weight**(-1/2) for weight in w]
    amp_dat = []
    re_dat = []
    im_dat = []
    for i in range(len(sd)):
        amp_dat.append(((Re[i]**2 + Im[i]**2)**(1/2), sd[i]))
        re_dat.append((Re[i], sd[i]))
        im_dat.append((Im[i], sd[i]))
    zeroes = [0]*len(amp_dat)

    amp_popt, amp_pcov = curve_fit(const_fn_red_chi2, amp_dat, zeroes)
    amp_red_chi2 = const_fn_red_chi2(amp_dat, float(amp_popt))
    amp_dict = {'val': float(amp_popt[0]), 'sd': float(amp_pcov[0][0]**(1/2)), 'red_chi2': amp_red_chi2}

    re_popt, re_pcov = curve_fit(const_fn_red_chi2, re_dat, zeroes)
    re_red_chi2 = const_fn_red_chi2(re_dat, float(re_popt))
    re_dict = {'val': float(re_popt[0]), 'sd': float(re_pcov[0][0]**(1/2)), 'red_chi2': re_red_chi2}

    im_popt, im_pcov = curve_fit(const_fn_red_chi2, im_dat, zeroes)
    im_red_chi2 = const_fn_red_chi2(im_dat, float(im_popt))
    im_dict = {'val': float(im_popt[0]), 'sd': float(im_pcov[0][0]**(1/2)), 'red_chi2': im_red_chi2}

    return {'amp': amp_dict, 're': re_dict, 'im': im_dict}


In [1092]:
# logarithmic likelihood function (for Gaussian probability)
def log_likelihood(b, y, yerr):
    model = b
    sigma2 = yerr**2
    return -0.5 * np.sum((y - model) ** 2 / sigma2 + np.log(2*np.pi*sigma2))

In [1093]:
def log_prior(b):
    if -20.0 < b < 20.0: # this range is for testing purposes
        return 0.0
    return -np.inf

In [1094]:
def log_probability(b, y, yerr):
    lp = log_prior(b)
    if not np.isfinite(lp): # if outside of range where m, b, log(f) could be
        return -np.inf # essentially 0 probability (e^-inf = 0)
    return lp + log_likelihood(b, y, yerr)

In [1095]:
def test_mcmc_single_pt_source(data):#fits_file: str):
    # TODO: generalize to fitting n point/disk/Gaussian sources

    vis = np.array(data)
    freq_bin, u, v, Re, Im, w = [], [], [], [], [], []
    for row in vis:
        freq_bin_data, u_data, v_data, Re_data, Im_data, w_data = row
        freq_bin.append(int(freq_bin_data))
        u.append(int(u_data))
        v.append(int(v_data))
        Re.append(float(Re_data/w_data))
        Im.append(float(Im_data/w_data))
        w.append(float(w_data))

    # adding in conjugate half of data
    freq_bin *= 2
    neg_u = [-1 * val for val in u]
    u += neg_u
    neg_v = [-1 * val for val in v]
    v += neg_v
    Re *= 2
    neg_Im = [-1 * val for val in Im]
    Im += neg_Im
    w *= 2
    Rlambda = []
    for i in range(len(u)):
        Rlambda.append((u[i]**2 +v[i]**2)**(1/2))

    amp = []
    for i in range(len(Re)):
        amp.append((Re[i]**2 + Im[i]**2)**(1/2))
    sd = [weight**(-1/2) for weight in w]

    # TODO: optional parameters: number of sources (therefore number of dimensions), nsteps, discard number
    nwalkers = 50
    nsteps = 5000
    ndim = 1

    amp_pos = np.random.randn(nwalkers, ndim) * 2 + 10
    # TODO: optional parameters: number of sources (therefore number of dimensions), nsteps, discard number

    amp_sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(np.array(amp), np.array(sd)))
    amp_results = amp_sampler.run_mcmc(amp_pos, nsteps)

    # initial positions for Re and Im until I can figure out initial guesses from image domain
    re_pos = np.random.randn(nwalkers, ndim) * 2 + np.average(Re)
    im_pos = np.random.randn(nwalkers, ndim) * 2 + np.average(Im)

    re_sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(np.array(Re), np.array(sd)))
    re_results = re_sampler.run_mcmc(re_pos, nsteps)

    im_sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(np.array(Im), np.array(sd)))
    im_results = im_sampler.run_mcmc(im_pos, nsteps)

    return amp_results, re_results, im_results

In [1096]:
# # extract visibility data
# file = fits.open('../data/uv_test/3c84.uvfits')
# data = file[1].data
# vis = np.array(data)
# freq_bin, u, v, Re, Im, w = [], [], [], [], [], []
# for row in vis:
#     freq_bin_data, u_data, v_data, Re_data, Im_data, w_data = row
#     freq_bin.append(int(freq_bin_data))
#     u.append(int(u_data))
#     v.append(int(v_data))
#     Re.append(float(Re_data/w_data))
#     Im.append(float(Im_data/w_data))
#     w.append(float(w_data))

# # adding in conjugate half of data
# freq_bin *= 2
# neg_u = [-1 * val for val in u]
# u += neg_u
# neg_v = [-1 * val for val in v]
# v += neg_v
# Re *= 2
# neg_Im = [-1 * val for val in Im]
# Im += neg_Im
# w *= 2
# Rlambda = []
# for i in range(len(u)):
#     Rlambda.append((u[i]**2 +v[i]**2)**(1/2))

In [1097]:
# # plot visibility data
# fig=plt.figure(figsize=(15,8))
# ax=fig.add_subplot(111)
# meters_u = [u_val / 1300 for u_val in u]
# meters_v = [v_val / 1300 for v_val in v]
# ax.scatter(meters_u,meters_v, c=Re, marker='.', cmap='gist_rainbow')
# ax.set_aspect(aspect=1.0)
# ax.set_xlabel('u (lambda)')
# ax.set_ylabel('v (lambda)')

In [1098]:
# plt.scatter(Rlambda, Re, marker='.')
# weights = np.array(w)
# sd = weights**(-1/2)
# plt.errorbar(x=Rlambda, y=Re, yerr=sd, linestyle='None')

In [1099]:
# np.ma.average(Re, axis=(0,40000,80000,100000,120000,140000,160000,180000))

In [1100]:
# plt.scatter(Rlambda, Im, marker='.')
# plt.errorbar(x=Rlambda, y=Im, yerr=sd, linestyle='None')

In [1101]:
# sd = [weight**(-1/2) for weight in w]

# uv_dat = []
# for i in range(len(sd)):
#     uv_dat.append(((Re[i]**2 + Im[i]**2)**(-1/2), sd[i]))
# zeroes = [0]*len(uv_dat)
# popt, pcov = curve_fit(const_fn_red_chi2, uv_dat, zeroes)
# red_chi2_val = const_fn_red_chi2(uv_dat, float(popt))

In [1102]:
##### FAKE ARRAY IN FORM OF KARTO'S DATA #####
fake_arr = np.tile([float(0)]*6, (20, 1))
for i in range(len(fake_arr)):
    fake_arr[i][1] = i
    fake_arr[i][2] = i
    real = 10
    imag = 0
    x = 1e-3
    re_noise = random.random() * x
    im_noise = random.random() * x
    if random.randint(0,1):
        re_noise *= -1
    if random.randint(0,1):
        im_noise *= -1
    fake_arr[i][3] = real + re_noise
    fake_arr[i][4] = imag + im_noise
    weight = (re_noise**2 + im_noise**2)**(-1)
    fake_arr[i][5] = weight
    fake_arr[i][3] *= weight # weighting reals like Karto
    fake_arr[i][4] *= weight # weighting imaginaries like Karto

In [1103]:
test_naive_uv_fitting(fake_arr)

  amp_red_chi2 = const_fn_red_chi2(amp_dat, float(amp_popt))
  re_red_chi2 = const_fn_red_chi2(re_dat, float(re_popt))
  im_red_chi2 = const_fn_red_chi2(im_dat, float(im_popt))


{'amp': {'val': 9.999937775342728,
  'sd': 0.4171457585271395,
  'red_chi2': 0.5425096563266717},
 're': {'val': 9.999937769258377,
  'sd': 0.4163643223510114,
  'red_chi2': 0.542515072644872},
 'im': {'val': -2.9624847598745597e-09,
  'sd': 0.019880584421536427,
  'red_chi2': 0.46836744612806513}}

In [1104]:
##### FAKE ARRAY IN FORM OF KARTO'S DATA #####
fake_arr = np.tile([float(0)]*6, (20, 1))
for i in range(len(fake_arr)):
    fake_arr[i][1] = i
    fake_arr[i][2] = i
    real = 10
    imag = 0
    x = 1e-3
    re_noise = random.random() * x
    im_noise = random.random() * x
    if random.randint(0,1):
        re_noise *= -1
    if random.randint(0,1):
        im_noise *= -1
    fake_arr[i][3] = real + re_noise
    fake_arr[i][4] = imag + im_noise
    weight = (re_noise**2 + im_noise**2)**(-1)
    fake_arr[i][5] = weight
    fake_arr[i][3] *= weight # weighting reals like Karto
    fake_arr[i][4] *= weight # weighting imaginaries like Karto

In [1105]:
test_mcmc_single_pt_source(fake_arr)

(State([[ 9.99996419]
  [10.0000516 ]
  [10.00008685]
  [ 9.99987351]
  [ 9.99993407]
  [ 9.99987395]
  [ 9.99995072]
  [ 9.99976887]
  [ 9.9997533 ]
  [ 9.99996112]
  [10.00000658]
  [ 9.99984264]
  [ 9.99978949]
  [ 9.99983813]
  [ 9.99992366]
  [ 9.99996247]
  [ 9.99982809]
  [ 9.99985483]
  [10.00002136]
  [ 9.99981638]
  [ 9.99986464]
  [10.00004374]
  [ 9.99991077]
  [ 9.99985908]
  [ 9.99978717]
  [ 9.99993326]
  [ 9.99985039]
  [ 9.99976753]
  [ 9.99992307]
  [10.00002775]
  [ 9.99992573]
  [ 9.9998129 ]
  [ 9.99979362]
  [ 9.99981048]
  [ 9.99996245]
  [10.00004327]
  [ 9.99979846]
  [ 9.99990874]
  [ 9.99991477]
  [ 9.99986757]
  [ 9.99975384]
  [ 9.999834  ]
  [ 9.99985232]
  [ 9.99997523]
  [ 9.99994285]
  [ 9.99981794]
  [ 9.99996448]
  [ 9.99988172]
  [ 9.99975337]
  [ 9.99988076]], log_prob=[249.16655145 247.35117251 246.27440648 249.76352136 249.51012598
  249.76378935 249.33812193 248.82429946 248.53541147 249.20813758
  248.43814339 249.66785373 249.14742332 249.64117

In [1106]:
# Rlambda = []
# for i in range(len(u)):
#     Rlambda.append((u[i]**2 +v[i]**2)**(1/2))
# plt.scatter(Rlambda, Re)
# plt.ylim((0,12))

In [1107]:
# plt.scatter(Rlambda, Im)
# plt.ylim((-1,1))

In [1108]:
def initial_guess(fits_file):
    summ = summary(fits_file, plot=False)
    peak_coords = summ['int_peak_val']
    if type(summ['ext_peak_val']) == list:
        peak_coords += summ['ext_peak_val']
    return peak_coords, summ['conservative_rms']

In [1109]:
initial_guess('../data/uv_test/orig.fits')

([0.0006192037835717201], 0.00017520653373480203)

In [1110]:
def naive_uv_fit(fits_file: str):
    file = fits.open(fits_file)
    data = file[1].data
    vis = np.array(data)
    freq_bin, u, v, Re, Im, w = [], [], [], [], [], []
    for row in vis:
        freq_bin_data, u_data, v_data, Re_data, Im_data, w_data = row
        freq_bin.append(int(freq_bin_data))
        u.append(int(u_data))
        v.append(int(v_data))
        Re.append(float(Re_data/w_data))
        Im.append(float(Im_data/w_data))
        w.append(float(w_data))

    # adding in conjugate half of data
    freq_bin *= 2
    neg_u = [-1 * val for val in u]
    u += neg_u
    neg_v = [-1 * val for val in v]
    v += neg_v
    Re *= 2
    neg_Im = [-1 * val for val in Im]
    Im += neg_Im
    w *= 2
    Rlambda = []
    for i in range(len(u)):
        Rlambda.append((u[i]**2 +v[i]**2)**(1/2))

    sd = [weight**(-1/2) for weight in w]
    amp_dat = []
    re_dat = []
    im_dat = []
    for i in range(len(sd)):
        amp_dat.append(((Re[i]**2 + Im[i]**2)**(1/2), sd[i]))
        re_dat.append((Re[i], sd[i]))
        im_dat.append((Im[i], sd[i]))
    zeroes = [0]*len(amp_dat)

    init_info = initial_guess('../data/uv_test/orig.fits')#fits_file)
    # TODO: figure out how to get real and im inital guesses from summary info

    amp_popt, amp_pcov = curve_fit(const_fn_red_chi2, amp_dat, zeroes, p0=init_info[0][0])
    amp_red_chi2 = const_fn_red_chi2(amp_dat, float(amp_popt))
    amp_dict = {'val': float(amp_popt[0]), 'sd': float(amp_pcov[0][0]**(1/2)), 'red_chi2': amp_red_chi2}

    re_popt, re_pcov = curve_fit(const_fn_red_chi2, re_dat, zeroes)
    re_red_chi2 = const_fn_red_chi2(re_dat, float(re_popt))
    re_dict = {'val': float(re_popt[0]), 'sd': float(re_pcov[0][0]**(1/2)), 'red_chi2': re_red_chi2}

    im_popt, im_pcov = curve_fit(const_fn_red_chi2, im_dat, zeroes)
    im_red_chi2 = const_fn_red_chi2(im_dat, float(im_popt))
    im_dict = {'val': float(im_popt[0]), 'sd': float(im_pcov[0][0]**(1/2)), 'red_chi2': im_red_chi2}

    return {'amp': amp_dict, 're': re_dict, 'im': im_dict}


In [1111]:
def mcmc_uv_fit(fits_file: str):
    # TODO: generalize to fitting n point/disk/Gaussian sources

    file = fits.open(fits_file)
    data = file[1].data
    vis = np.array(data)
    freq_bin, u, v, Re, Im, w = [], [], [], [], [], []
    for row in vis:
        freq_bin_data, u_data, v_data, Re_data, Im_data, w_data = row
        freq_bin.append(int(freq_bin_data))
        u.append(int(u_data))
        v.append(int(v_data))
        Re.append(float(Re_data/w_data))
        Im.append(float(Im_data/w_data))
        w.append(float(w_data))

    # adding in conjugate half of data
    freq_bin *= 2
    neg_u = [-1 * val for val in u]
    u += neg_u
    neg_v = [-1 * val for val in v]
    v += neg_v
    Re *= 2
    neg_Im = [-1 * val for val in Im]
    Im += neg_Im
    w *= 2
    Rlambda = []
    for i in range(len(u)):
        Rlambda.append((u[i]**2 +v[i]**2)**(1/2))

    amp = []
    for i in range(len(Re)):
        amp.append((Re[i]**2 + Im[i]**2)**(1/2))
    sd = [weight**(-1/2) for weight in w]

    nwalkers = 50
    ndim = 1
    nsteps = 5000

    init_info = initial_guess('../data/uv_test/orig.fits')#fits_file)

    amp_pos = np.random.randn(nwalkers, ndim) * init_info[1] + init_info[0][0]
    # TODO: optional parameters: number of sources (therefore number of dimensions), nsteps, discard number

    # pos = np.random.randn(50,1) * 2 + 10
    amp_sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(np.array(amp), np.array(sd)))
    amp_results = amp_sampler.run_mcmc(amp_pos, nsteps)

    # initial positions for Re and Im until I can figure out initial guesses from image domain
    re_pos = np.random.randn(nwalkers, ndim) * init_info[1] + np.average(Re)
    im_pos = np.random.randn(nwalkers, ndim) * init_info[1] + np.average(Im)

    re_sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(np.array(Re), np.array(sd)))
    re_results = re_sampler.run_mcmc(re_pos, nsteps)

    im_sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(np.array(Im), np.array(sd)))
    im_results = im_sampler.run_mcmc(im_pos, nsteps)

    return amp_results, re_results, im_results

In [1112]:
orig = '../data/uv_test/orig.fits'
at = '../data/uv_test/at2024tvd.fits'
naive_uv_fit(at)

  amp_red_chi2 = const_fn_red_chi2(amp_dat, float(amp_popt))
  re_red_chi2 = const_fn_red_chi2(re_dat, float(re_popt))
  im_red_chi2 = const_fn_red_chi2(im_dat, float(im_popt))


{'amp': {'val': 0.006974530566237036,
  'sd': 4035.85273676525,
  'red_chi2': 0.48303642995676704},
 're': {'val': 0.0004904882554896553,
  'sd': 171.8119936652506,
  'red_chi2': 0.4873412477494087},
 'im': {'val': -2.7391005396064185e-07,
  'sd': 0.03651166785822786,
  'red_chi2': 0.5192661783285399}}

In [1113]:
mcmc_uv_fit(at)

(State([[0.00716419]
  [0.00678329]
  [0.00656295]
  [0.0069478 ]
  [0.00719697]
  [0.00690764]
  [0.00704523]
  [0.00710736]
  [0.00714001]
  [0.00700007]
  [0.00702715]
  [0.00703983]
  [0.00686306]
  [0.00681768]
  [0.00692527]
  [0.00704138]
  [0.00705811]
  [0.00704265]
  [0.00698384]
  [0.0069905 ]
  [0.00697738]
  [0.00718741]
  [0.00714008]
  [0.00673855]
  [0.00693648]
  [0.00695363]
  [0.00693452]
  [0.00697457]
  [0.00705714]
  [0.00712334]
  [0.00697703]
  [0.00702772]
  [0.00694858]
  [0.0068007 ]
  [0.00698353]
  [0.00703406]
  [0.00708635]
  [0.00689993]
  [0.00688107]
  [0.0069165 ]
  [0.00700878]
  [0.00709396]
  [0.00690831]
  [0.00700396]
  [0.0070748 ]
  [0.00683426]
  [0.00690731]
  [0.00704511]
  [0.00706282]
  [0.00690608]], log_prob=[16742.51948196 16742.50121111 16738.49334985 16743.5832355
  16742.11177314 16743.46979285 16743.453977   16743.07238885
  16742.77849396 16743.58510667 16743.5212424  16743.47611535
  16743.22989717 16742.86247384 16743.53156252 16