In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
sylt_ids = np.load('../../contrastive_learning/sylt_ids.npz')['ids']

In [3]:
def mag_to_flx(mag):
    """
    Input: an AB magnitude
    Output: a flux in microjansky (muJy)
    Works with np arrays and pd Series
    """
    return 10**(29-(48.60/2.5)) * 10**(-mag/2.5)

def read_lephare_spectrum_file(id):
    """
    Reads the file containing the output spectra for the inputted ID
    """
    with open(f'output_spectra/Id{str(id)[-9:]}.spec','r') as f:
        specfile_lines = f.read().split('\n')
    specfile_lines = [row.split() for row in specfile_lines]
    return specfile_lines

def get_lephare_spectrum(id, i): # i from 0 to 2
    """
    Inputs: An ID, and an integer in [0,1,2]
    Outputs: wavelengths, and a spectrum in muJy. The spectrum is the 
        LePHARE best fit spectrum for:
            0: Galaxy
            1: Quasar
            2: Star
    """
    file_lines = read_lephare_spectrum_file(id)
    spectra = np.array(file_lines[171:][:-1]).astype('float32')
    restart_indices = np.where(np.diff(spectra, axis=0)[:,0]<0)[0]
    spec_list = np.split(spectra, restart_indices+1, axis=0)
    x,y_mag = spec_list[i][:,0],spec_list[i][:,1]
    x,y_mag = x[(y_mag<100) & (y_mag!=0)], y_mag[(y_mag<100)&(y_mag!=0)] # Clipping ceilinged SED values
    y = mag_to_flx(y_mag)
    return x,y

Now the various wavelength and flux arrays generated by the above function are not of the same size (not just because some values have been clipped, it seems LePHARE always outputs non-standard lengths of spectra, so that's just great). In order to store the spectra in a nice way, and because we're only interested in a certain wavelength range anyway, we choose a standardised set of wavelength values between 0.4$\mu$m to 6$\mu$m and interpolate the LePHARE spectra.

In [5]:
num_samples = 10000
wavelength_samples = np.linspace(4e3, 6e4, num_samples)
spectrum_array = np.zeros((len(sylt_ids), 3, num_samples)) # ID x Spectrum Type (G/Q/S) x wavelength

for i, idd in tqdm(enumerate(sylt_ids), total=len(sylt_ids)):
    for obj_num in range(3):
        wavs, flxs = get_lephare_spectrum(idd,obj_num)
        flux_samples = np.interp(wavelength_samples,
                                 wavs, flxs)
        spectrum_array[i,obj_num,:] = flux_samples

100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 30.60it/s]


In [6]:
with open('lephare_output.out','r') as f:
    lephare_output_text = f.read()
lephare_df = pd.DataFrame([line.split() for line in lephare_output_text.split('\n')[55:]],
                          columns = ['COADD_OBJECT_ID',
                                     'Z_BEST',
                                     'Z_BEST68_LOW',
                                     'Z_BEST68_HIGH',
                                     'Z_ML',
                                     'CHI_BEST',
                                     'MOD_BEST',
                                     'MAG_ABS_BEST',
                                     'PDZ_BEST',
                                     'SCALE_BEST',
                                     'DIST_MOD_BEST',
                                     'NBAND_USED',
                                     'Z_SEC',
                                     'CHI_SEC',
                                     'MOD_SEC',
                                     'AGE_SEC',
                                     'Z_QSO',
                                     'CHI_QSO',
                                     'MOD_QSO',
                                     'MAG_ABS_QSO',
                                     'DIST_MOD_QSO',
                                     'MOD_STAR',
                                     'CHI_STAR',
                                     'CONTEXT',
                                     'ZSPEC'
                                    ]
                         )[:-1].set_index('COADD_OBJECT_ID')
lephare_df = pd.DataFrame(
    lephare_df.loc[[str(int(str(idd)[-9:])) for idd in sylt_ids]].to_numpy(),
    columns = lephare_df.columns,
    index=sylt_ids)
lephare_df.to_csv('lephare_output_data.csv')
lephare_df

Unnamed: 0,Z_BEST,Z_BEST68_LOW,Z_BEST68_HIGH,Z_ML,CHI_BEST,MOD_BEST,MAG_ABS_BEST,PDZ_BEST,SCALE_BEST,DIST_MOD_BEST,...,AGE_SEC,Z_QSO,CHI_QSO,MOD_QSO,MAG_ABS_QSO,DIST_MOD_QSO,MOD_STAR,CHI_STAR,CONTEXT,ZSPEC
980453716,1.0327,1.038,1.0409,1.04,1375.78,1,-24.318,100.0,620859.0,44.1881,...,-99.0,6.1,109.059,17,-28.201,48.8499,207,636.932,508.0,-99.0
1143273115,0.9994,0.9994,1.0006,-99.0,1996.84,1,-24.272,100.0,604926.0,44.1,...,-99.0,6.1,377.818,5,-27.693,48.8499,209,1179.4,511.0,-99.0
1197311621,0.9959,0.9955,1.0029,0.9996,521.169,1,-23.425,100.0,274958.0,44.0906,...,-99.0,6.2,23.8606,16,-27.341,48.8909,209,419.063,508.0,-99.0
1228745162,0.955,0.9596,0.9602,-99.0,4309.04,1,-24.41,100.0,679011.0,43.9781,...,-99.0,6.0,77.2263,15,-27.585,48.8082,211,1259.36,510.0,-99.0
1271696125,1.1048,1.116,1.1206,1.1197,1026.73,1,-24.49,100.0,715978.0,44.3695,...,-99.0,6.5,141.36,16,-28.203,49.0099,207,774.716,508.0,-99.0
1456016600,0.869,0.8773,0.8808,-99.0,2480.79,1,-23.439,100.0,272784.0,43.7253,...,-99.0,6.0,34.8033,20,-27.127,48.8082,211,615.816,510.0,-99.0
1470729144,0.9535,0.9585,0.9608,-99.0,1732.61,1,-23.513,100.0,295955.0,43.9738,...,-99.0,6.0,7.62809,22,-26.967,48.8082,210,617.169,510.0,-99.0
1471313744,1.0144,0.9998,1.0013,-99.0,5829.16,1,-24.495,100.0,770518.0,44.1402,...,-99.0,6.1,485.497,22,-27.779,48.8499,210,4893.0,508.0,-99.0
1513805078,0.9632,0.9592,0.961,-99.0,2096.4,1,-23.624,100.0,336211.0,44.0011,...,-99.0,6.0,35.3117,11,-27.358,48.8082,210,696.056,510.0,-99.0
1599741416,0.9704,0.9593,0.9623,0.96,950.755,1,-23.593,100.0,333020.0,44.0212,...,-99.0,5.96,29.5227,16,-27.243,48.7913,185,303.842,510.0,-99.0


In [7]:
# A 6690x3 array (IDs x G/Q/S)
chi2s = lephare_df.loc[sylt_ids][['CHI_BEST','CHI_QSO','CHI_STAR']].to_numpy().astype('float32')

In [8]:
np.savez_compressed('lephare_spectra_chi2s.npz',
                    ids=sylt_ids,
                    wavelengths = wavelength_samples,
                    spectra=spectrum_array,
                    chi2s=chi2s)