In [1]:
from astropy.table import Table
from astropy.io import fits
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy.stats import gaussian_kde
from minisom import MiniSom
import pickle
import os
import warnings
warnings.filterwarnings('ignore')

In [2]:
df_L3_info = Table.read('../dataset/L3_COSMOS2020_Richard_RefCat_2023DEC4_info.fits')
df_L3_info = df_L3_info.to_pandas().sort_values(by = "cosmos_id")

fname = 'D:/SPHEREx_SOM/dataset/sphx_refcat/Noiseless_phot_cosmos_nolines_refcat30k.txt'
data_noiseless = np.loadtxt(fname)[:, 3:]

fname = 'D:/SPHEREx_SOM/dataset/sphx_refcat/NoisySphx_shallow_nolines_refcat30k.txt'
data_all = np.loadtxt(fname)[:, 3:]

fname = 'D:/SPHEREx_SOM/dataset/sphx_refcat/NoisySphx_deep_nolines_refcat30k.txt'
data_deep = np.loadtxt(fname)[:, 3:]

fname = 'D:/SPHEREx_SOM/dataset/sphx_refcat/SPHEREx_1sigma_noise.txt'
data_1sig  = np.loadtxt(fname, skiprows=1)
wl = data_1sig[:,0]
sigma_all = data_1sig[:,1]
sigma_deep = data_1sig[:,2]

In [3]:
shallow_data = pd.DataFrame(data_all[:, 0::2]).dropna().to_numpy()
shallow_info = df_L3_info[-pd.DataFrame(data_all[:, 0::2]).isna()[0]]
shallow_noiseless = data_noiseless[-pd.DataFrame(data_all[:, 0::2]).isna()[0]]
shallow_err = np.tile(data_all[0, 1::2],  (shallow_data.shape[0], 1))

deep_data = pd.DataFrame(data_deep[:, 0::2]).dropna().to_numpy()
deep_info = df_L3_info[-pd.DataFrame(data_deep[:, 0::2]).isna()[0]]
deep_noiseless = data_noiseless[-pd.DataFrame(data_deep[:, 0::2]).isna()[0]]
deep_err = np.tile(data_deep[0, 1::2],  (deep_data.shape[0], 1))

noiseless_info = df_L3_info[-pd.DataFrame(data_noiseless).isna()[0]]
noiseless_data = pd.DataFrame(data_noiseless).dropna()
noiseless_info = noiseless_info[-noiseless_data[noiseless_data > 0].isna()[0]]
noiseless_data = noiseless_data[noiseless_data > 0].dropna().to_numpy()

In [142]:
def label_map(data, err, info_data, file):
    skip = False
    try:
        with open(file, "rb") as fh:
            record = pickle.load(fh)
    except:
        print(file)
        skip = True

    if skip == False:
        som = record["som"]
        
        if "b_scale" in record:
            b_scale = record["b_scale"]
        else:
            b_scale = 1.042
            
        dim = record["dim"] 
        lupmag = -np.arcsinh(data / (2 * b_scale  * err))
        proc_data = (lupmag - np.mean(lupmag, axis=0)) / np.std(lupmag, ddof = 1, axis=0)
    
        if True:
            if np.absolute(som.topographic_error(proc_data) - record["topo_err"]) < 10 ** (-5) and np.absolute(som.quantization_error(proc_data) - record["quan_err"]) < 10 ** (-5):
                proc_err = np.absolute(-1 / (2 * b_scale  * err) / np.sqrt((data / (2 * b_scale * err)) ** 2 + 1) * err)
                proc_err = proc_err / np.std(lupmag, ddof = 1, axis=0)
                labels_map = som.labels_map(proc_data, proc_err, tuple(map(tuple, np.concatenate((np.expand_dims(info_data["HSC_i_MAG"].values, axis = 1), 
                                                                                                  np.expand_dims(info_data["z_true"].values, axis = 1), 
                                                                                                  np.expand_dims(np.array([i for i in range(0, proc_data.shape[0])]), axis = 1)), axis = -1))))
                record["prop_map"] = labels_map
                
                z_std_gal = np.array([])
                labels_map = record["prop_map"]
                
                for i in labels_map.keys():
                    properties = np.array(list(labels_map[i].keys()))
                    means = np.mean(properties, axis = 0)
                    stds = np.std(properties, axis = 0, ddof = 1)
                    z_std_gal = np.concatenate((z_std_gal, np.tile(np.array(stds[1] / (means[1] + 1)), properties.shape[0])))
                    
                record["z_std_gal"] = z_std_gal
                record["mean_z_std"] = np.nanmean(z_std_gal)
            
                try:
                    with open(file, 'wb') as fh:
                        pickle.dump(record, fh, protocol=pickle.HIGHEST_PROTOCOL)
                except KeyboardInterrupt:
                    print('KeyboardInterrupt caught, data saved.')
            else:
                print("Wrong data")
        else:
            if len(list(list(record["prop_map"].items())[0][1].keys())[0]) != 3 or "prop_map" not in record:
                if np.absolute(som.topographic_error(proc_data) - record["topo_err"]) < 10 ** (-5) and np.absolute(som.quantization_error(proc_data) - record["quan_err"]) < 10 ** (-5):
                    proc_err = np.absolute(-1 / (2 * b_scale  * err) / np.sqrt((data / (2 * b_scale * err)) ** 2 + 1) * err)
                    proc_err = proc_err / np.std(lupmag, ddof = 1, axis=0)
                    labels_map = som.labels_map(proc_data, proc_err, tuple(map(tuple, np.concatenate((np.expand_dims(info_data["HSC_i_MAG"].values, axis = 1), 
                                                                                                      np.expand_dims(info_data["z_true"].values, axis = 1), 
                                                                                                      np.expand_dims(np.array([i for i in range(0, proc_data.shape[0])]), axis = 1)), axis = -1))))
                    record["prop_map"] = labels_map
                    
                    z_std_gal = np.array([])
                    labels_map = record["prop_map"]
                    
                    for i in labels_map.keys():
                        properties = np.array(list(labels_map[i].keys()))
                        means = np.mean(properties, axis = 0)
                        stds = np.std(properties, axis = 0, ddof = 1)
                        z_std_gal = np.concatenate((z_std_gal, np.tile(np.array(stds[1] / (means[1] + 1)), properties.shape[0])))
                        
                    record["z_std_gal"] = z_std_gal
                    record["mean_z_std"] = np.nanmean(z_std_gal)
                
                    try:
                        with open(file, 'wb') as fh:
                            pickle.dump(record, fh, protocol=pickle.HIGHEST_PROTOCOL)
                    except KeyboardInterrupt:
                        print('KeyboardInterrupt caught, data saved.')
                    
                else:
                    print("Wrong data")
            else:
                pass

    else:
        pass

In [140]:
def label_map_mag(data, err, info_data, file):
    skip = False
    try:
        with open(file, "rb") as fh:
            record = pickle.load(fh)
    except:
        print(file)
        skip = True

    if skip == False:
        som = record["som"]
        dim = record["dim"] 
        proc_data = -2.5 * np.log10(data / 10 ** 6) + 8.9
        proc_data = (proc_data - np.mean(proc_data, axis=0)) / np.std(proc_data, ddof = 1, axis=0)
        if "prop_map" not in record:
            if np.absolute(som.topographic_error(proc_data) - record["topo_err"]) < 10 ** (-5) and np.absolute(som.quantization_error(proc_data) - record["quan_err"]) < 10 ** (-5):
                labels_map = som.labels_map(proc_data, err, tuple(map(tuple, np.concatenate((np.expand_dims(info_data["HSC_i_MAG"].values, axis = 1), 
                                                                                                  np.expand_dims(info_data["z_true"].values, axis = 1), 
                                                                                                  np.expand_dims(np.array([i for i in range(0, proc_data.shape[0])]), axis = 1)), axis = -1))))
                record["prop_map"] = labels_map
                
                z_std_gal = np.array([])
                labels_map = record["prop_map"]
                
                for i in labels_map.keys():
                    properties = np.array(list(labels_map[i].keys()))
                    means = np.mean(properties, axis = 0)
                    stds = np.std(properties, axis = 0, ddof = 1)
                    z_std_gal = np.concatenate((z_std_gal, np.tile(np.array(stds[1] / (means[1] + 1)), properties.shape[0])))
                    
                record["z_std_gal"] = z_std_gal
                record["mean_z_std"] = np.nanmean(z_std_gal)
            
                try:
                    with open(file, 'wb') as fh:
                        pickle.dump(record, fh, protocol=pickle.HIGHEST_PROTOCOL)
                except KeyboardInterrupt:
                    print('KeyboardInterrupt caught, data saved.')
            else:
                print("Wrong data")
        else:
            if len(list(list(record["prop_map"].items())[0][1].keys())[0]) != 3 or "prop_map" not in record:
                if np.absolute(som.topographic_error(proc_data) - record["topo_err"]) < 10 ** (-5) and np.absolute(som.quantization_error(proc_data) - record["quan_err"]) < 10 ** (-5):
                    labels_map = som.labels_map(proc_data, err, tuple(map(tuple, np.concatenate((np.expand_dims(info_data["HSC_i_MAG"].values, axis = 1), 
                                                                                                      np.expand_dims(info_data["z_true"].values, axis = 1), 
                                                                                                      np.expand_dims(np.array([i for i in range(0, proc_data.shape[0])]), axis = 1)), axis = -1))))
                    record["prop_map"] = labels_map
                    
                    z_std_gal = np.array([])
                    labels_map = record["prop_map"]
                    
                    for i in labels_map.keys():
                        properties = np.array(list(labels_map[i].keys()))
                        means = np.mean(properties, axis = 0)
                        stds = np.std(properties, axis = 0, ddof = 1)
                        z_std_gal = np.concatenate((z_std_gal, np.tile(np.array(stds[1] / (means[1] + 1)), properties.shape[0])))
                        
                    record["z_std_gal"] = z_std_gal
                    record["mean_z_std"] = np.nanmean(z_std_gal)
                
                    try:
                        with open(file, 'wb') as fh:
                            pickle.dump(record, fh, protocol=pickle.HIGHEST_PROTOCOL)
                    except KeyboardInterrupt:
                        print('KeyboardInterrupt caught, data saved.')
                    
                else:
                    print("Wrong data")
            else:
                pass

    else:
        pass

In [155]:
def best_std(names, means, n = 5):
    best_means = []
    best_names = []
    for i in range(0, n):
        best_means.append(np.min(means))
        best_names.append(names[np.argmin(means)])
        names = names[means > np.min(means)]
        means = means[means > np.min(means)]
        
    return best_means, best_names

In [159]:
files = os.listdir("D:/SPHEREx_SOM/record/4th_exploration/SOM/")
n = 0
for i in files:
    n += 1
    print(f"{round(n / len(files) * 100, 3) }%", end = "\r")
    if "lup_shallow_30" in i:
        try:
            with open("D:/SPHEREx_SOM/record/4th_exploration/SOM/" + i, "rb") as fh:
                record = pickle.load(fh)
            del record["b_scale"]
            # try:
            #     with open("D:/SPHEREx_SOM/record/4th_exploration/SOM/" + i, 'wb') as fh:
            #         pickle.dump(record, fh, protocol=pickle.HIGHEST_PROTOCOL)
            # except KeyboardInterrupt:
            #     print('KeyboardInterrupt caught, data saved.')
        except:
            pass

100.0%%

In [None]:
files = os.listdir("D:/SPHEREx_SOM/record/4th_exploration/SOM/")
n = 0
for i in files:
    n += 1
    print(f"{round(n / len(files) * 100, 3) }%", end = "\r")
    if "lup_shallow_30" in i:
        label_map(data = shallow_data, err = shallow_err, info_data = shallow_info, file = "D:/SPHEREx_SOM/record/4th_exploration/SOM/" + i)

In [176]:
files = os.listdir("D:/SPHEREx_SOM/record/4th_exploration/SOM/")
means30shallow = []
names30shallow = []
n = 0
for i in files:
    n += 1
    print(f"{round(n / len(files) * 100, 3) }%", end = "\r")
    if "lup_shallow_30" in i:
        try:
            with open("D:/SPHEREx_SOM/record/4th_exploration/SOM/" + i, "rb") as fh:
                record = pickle.load(fh)
            if "mean_z_std" in record:
                means30shallow.append(record["mean_z_std"])
                names30shallow.append(i)
        except:
            pass
means30shallow = np.array(means30shallow)
names30shallow = np.array(names30shallow)

100.0%%

In [177]:
files = os.listdir("D:/SPHEREx_SOM/record/4th_exploration/SOM/")
means30deep = []
names30deep = []
n = 0
for i in files:
    n += 1
    print(f"{round(n / len(files) * 100, 3) }%", end = "\r")
    if "lup_deep_30" in i:
        try:
            with open("D:/SPHEREx_SOM/record/4th_exploration/SOM/" + i, "rb") as fh:
                record = pickle.load(fh)
            if "mean_z_std" in record:
                means30deep.append(record["mean_z_std"])
                names30deep.append(i)
        except:
            pass
means50deep = np.array(means50deep)
names50deep = np.array(names50deep)

100.0%%

In [178]:
files = os.listdir("D:/SPHEREx_SOM/record/5th_exploration/SOM/")
means50shallow = []
names50shallow = []
n = 0
for i in files:
    n += 1
    print(f"{round(n / len(files) * 100, 3) }%", end = "\r")
    if "lup_shallow_50" in i and "20" not in i:
        try:
            with open("D:/SPHEREx_SOM/record/5th_exploration/SOM/" + i, "rb") as fh:
                record = pickle.load(fh)
            if "mean_z_std" in record:
                means50shallow.append(record["mean_z_std"])
                names50shallow.append(i)
        except:
            pass
means50shallow = np.array(means50shallow)
names50shallow = np.array(names50shallow)

100.0%%

In [179]:
files = os.listdir("D:/SPHEREx_SOM/record/5th_exploration/SOM/")
means50deep = []
names50deep = []
n = 0
for i in files:
    n += 1
    print(f"{round(n / len(files) * 100, 3) }%", end = "\r")
    if "lup_deep_50" in i:
        try:
            with open("D:/SPHEREx_SOM/record/5th_exploration/SOM/" + i, "rb") as fh:
                record = pickle.load(fh)
            if "mean_z_std" in record:
                means50deep.append(record["mean_z_std"])
                names50deep.append(i)
        except:
            pass
means50deep = np.array(means50deep)
names50deep = np.array(names50deep)

100.0%%

In [180]:
files = os.listdir("D:/SPHEREx_SOM/record/6th_exploration/SOM/")
means70deep = []
names70deep = []
n = 0
for i in files:
    n += 1
    print(f"{round(n / len(files) * 100, 3) }%", end = "\r")
    if "lup_deep_70" in i:
        try:
            with open("D:/SPHEREx_SOM/record/6th_exploration/SOM/" + i, "rb") as fh:
                record = pickle.load(fh)
            if "mean_z_std" in record:
                means70deep.append(record["mean_z_std"])
                names70deep.append(i)
        except:
            pass

means70deep = np.array(means70deep)
names70deep = np.array(names70deep)

100.0%%

In [181]:
files = os.listdir("D:/SPHEREx_SOM/record/6th_exploration/SOM/")
means70shallow = []
names70shallow = []
n = 0
for i in files:
    n += 1
    print(f"{round(n / len(files) * 100, 3) }%", end = "\r")
    if "lup_shallow_70" in i:
        try:
            with open("D:/SPHEREx_SOM/record/6th_exploration/SOM/" + i, "rb") as fh:
                record = pickle.load(fh)
            if "mean_z_std" in record:
                means70shallow.append(record["mean_z_std"])
                names70shallow.append(i)
        except:
            pass

means70shallow = np.array(means70shallow)
names70shallow = np.array(names70shallow)
names70shallow = names70shallow[means70shallow > 0.13]
means70shallow = means70shallow[means70shallow > 0.13]

100.0%%

In [182]:
print(np.min(means30deep))
print(np.min(means50deep))
print(np.min(means70deep))

0.11670291420469942
0.09753727190299434
0.08651051593538707


In [183]:
print(names30deep[np.argmin(means30deep)])
print(names50deep[np.argmin(means50deep)])
print(names70deep[np.argmin(means70deep)])

chi2_lup_deep_30_0.7_0.112_2.pkl
chi2_lup_deep_50_0.9_0.4_10_8.pkl
chi2_lup_deep_70_1.1_1.1500000000000004_7_10.pkl


In [185]:
print(np.min(means30shallow))
print(np.min(means50shallow))
print(np.min(means70shallow))

0.16333414270143426
0.1557392826995994
0.14714224449843774


In [186]:
print(names30shallow[np.argmin(means30shallow)])
print(names50shallow[np.argmin(means50shallow)])
print(names70shallow[np.argmin(means70shallow)])

chi2_lup_shallow_30_1.6_0.02_1000_2.pkl
chi2_lup_shallow_50_1.8_0.15_1000_10.pkl
chi2_lup_shallow_70_3.0_0.2_9_1000.pkl


In [187]:
best_std(names = names70shallow, means = means70shallow, n = 10)

([np.float64(0.14714224449843774),
  np.float64(0.1474401795761474),
  np.float64(0.14755486028834508),
  np.float64(0.1476456534258754),
  np.float64(0.14771782321299337),
  np.float64(0.14774044221769944),
  np.float64(0.14829741729057672),
  np.float64(0.1483199716656326),
  np.float64(0.1484160280066544),
  np.float64(0.1485132694997687)],
 [np.str_('chi2_lup_shallow_70_3.0_0.2_9_1000.pkl'),
  np.str_('chi2_lup_shallow_70_2.6_0.25_6_1000.pkl'),
  np.str_('chi2_lup_shallow_70_2.6_0.25_5_1000.pkl'),
  np.str_('chi2_lup_shallow_70_3.0_0.2_8_1000.pkl'),
  np.str_('chi2_lup_shallow_70_2.6_0.25_10_1000.pkl'),
  np.str_('chi2_lup_shallow_70_2.6_0.25_8_1000.pkl'),
  np.str_('chi2_lup_shallow_70_3.0_0.2_7_1000.pkl'),
  np.str_('chi2_lup_shallow_70_1.7_0.45_10_100.pkl'),
  np.str_('chi2_lup_shallow_70_3.0_0.2_5_1000.pkl'),
  np.str_('chi2_lup_shallow_70_2.6_0.25_7_1000.pkl')])

In [188]:
best_std(names = names70deep, means = means70deep, n = 10)

([np.float64(0.08651051593538707),
  np.float64(0.0865135717925882),
  np.float64(0.08657592544638155),
  np.float64(0.08671415560663555),
  np.float64(0.08705014141707373),
  np.float64(0.0870682024314059),
  np.float64(0.08715527827370809),
  np.float64(0.0871632220112652),
  np.float64(0.08728962897161734),
  np.float64(0.08729257115339313)],
 [np.str_('chi2_lup_deep_70_1.1_1.1500000000000004_7_10.pkl'),
  np.str_('chi2_lup_deep_70_1.3_1_7_10.pkl'),
  np.str_('chi2_lup_deep_70_1.9749999999999999_0.34_6_10.pkl'),
  np.str_('chi2_lup_deep_70_1.5_0.667_6_10.pkl'),
  np.str_('chi2_lup_deep_70_1.525_0.617_6_10.pkl'),
  np.str_('chi2_lup_deep_70_1.925_0.54_6_10.pkl'),
  np.str_('chi2_lup_deep_70_1.3_1_6_10.pkl'),
  np.str_('chi2_lup_deep_70_1.125_0.9000000000000001_7_10.pkl'),
  np.str_('chi2_lup_deep_70_2.1_0.54_9_10.pkl'),
  np.str_('chi2_lup_deep_70_1.5499999999999998_0.7170000000000001_6_10.pkl')])