# Introducting BEACON

**B**ayesian **E**xoplanet **A**I for **C**haracterization and **O**bservatio**N**s.



In [1]:
import pandas as pd

file='Ariel_MCS_TPCs_2024-07-09.csv'

df = pd.read_csv(file)

In [3]:
df['Preferred Method']
# List all the column names with the word transit in them
transit_columns = [col for col in df.columns if 'transit' in col.lower()]
print("Columns with 'transit' in their names:")
for col in transit_columns:
    print(col)

Columns with 'transit' in their names:
Transit Mid Time
Transit Mid Time Error Lower [days]
Transit Mid Time Error Upper [days]
Transit Depth [%]
Transit Depth Error Lower [%]
Transit Depth Error Upper [%]
Transit Duration [hr]
Transit Duration Error Lower [hr]
Transit Duration Error Upper [hr]
Transit Duration T14 [s]
Available Transits
Tier 1 Transits
Tier 2 Transits
Tier 3 Transits


In [4]:
df['Available Transits']

0        57
1       268
2       238
3       115
4        35
       ... 
2020    157
2021     29
2022    652
2023     85
2024    168
Name: Available Transits, Length: 2025, dtype: int64

In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# --------- inputs ----------
transit_csv = Path("outputs_ariel_spec_bnn/tpc_transit_ranked.csv")
eclipse_csv = Path("outputs_ariel_spec_bnn/tpc_eclipse_ranked.csv")

outdir = Path("combined_ranking_plots")
outdir.mkdir(parents=True, exist_ok=True)

# --------- load + sanitize ----------
tr = pd.read_csv(transit_csv)
ec = pd.read_csv(eclipse_csv)
tr.columns = tr.columns.str.strip()
ec.columns = ec.columns.str.strip()

# score columns (explicit)
tr_score_col = "transit_spectroscopy_score"
ec_score_col = "eclipse_spectroscopy_score"

# required+feasible columns (explicit)
tr_pred_col  = "pred_Tier1Transits_med"
tr_pfeas_col = "p_pred_le_available_transits"

ec_pred_col  = "pred_Tier1Eclipses_med"
ec_pfeas_col = "p_pred_le_available_eclipses"

# --------- helpers ----------
def sym_sigma(df, lo_col, hi_col):
    lo = pd.to_numeric(df.get(lo_col), errors="coerce").to_numpy(float)
    hi = pd.to_numeric(df.get(hi_col), errors="coerce").to_numpy(float)
    return 0.5 * (np.abs(lo) + np.abs(hi))

def depth_frac(df):
    return pd.to_numeric(df.get("Transit Depth [%]"), errors="coerce").to_numpy(float) / 100.0

def tess_mag(df):
    return pd.to_numeric(df.get("Star TESS Mag"), errors="coerce").to_numpy(float)

def as_float(df, col):
    return pd.to_numeric(df.get(col), errors="coerce").to_numpy(float)

def size_from_score(score, smin=None, smax=None):
    s = np.asarray(score, float)
    if smin is None: smin = np.nanmin(s)
    if smax is None: smax = np.nanmax(s)
    return 20 + 200 * (s - smin) / (smax - smin + 1e-12)

# make sure each file is actually sorted (best→worst)
# (they should be, but this guarantees it)
tr = tr.sort_values(tr_score_col, ascending=False).reset_index(drop=True)
ec = ec.sort_values(ec_score_col, ascending=False).reset_index(drop=True)

# colours (you asked explicitly)
C_TR = "tab:blue"
C_EC = "tab:orange"

topN = 80

# --------- 1) Score drop-off vs rank (combined) ----------
tr_score = as_float(tr, tr_score_col)
ec_score = as_float(ec, ec_score_col)

plt.figure(figsize=(7.5, 4.8))
plt.plot(np.arange(1, len(tr)+1), tr_score, label="Transit", color=C_TR)
plt.plot(np.arange(1, len(ec)+1), ec_score, label="Eclipse", color=C_EC)
plt.xlabel("Rank (1 = best)")
plt.ylabel("spectroscopy score")
plt.title("Score drop-off across ranked lists (Transit vs Eclipse)")
plt.legend()
plt.tight_layout()
plt.savefig(outdir / "01_combined_score_dropoff.png", dpi=200)
plt.show()

# --------- 2) Feasibility vs required (Pareto, combined) ----------
tr_pred = as_float(tr, tr_pred_col)
tr_pfeas = as_float(tr, tr_pfeas_col)
ec_pred = as_float(ec, ec_pred_col)
ec_pfeas = as_float(ec, ec_pfeas_col)

plt.figure(figsize=(7, 6))
plt.scatter(tr_pred, tr_pfeas, alpha=0.20, label="Transit", color=C_TR)
plt.scatter(ec_pred, ec_pfeas, alpha=0.20, label="Eclipse", color=C_EC)

# highlight top-N in each
plt.scatter(tr_pred[:topN], tr_pfeas[:topN], alpha=0.90, color=C_TR)
plt.scatter(ec_pred[:topN], ec_pfeas[:topN], alpha=0.90, color=C_EC)

plt.xlabel("Predicted required Tier-1 events (median)")
plt.ylabel("P(required ≤ available)")
plt.title(f"Feasibility vs required events (top {topN} highlighted)")
plt.legend()
plt.tight_layout()
plt.savefig(outdir / "02_combined_feasible_vs_required.png", dpi=200)
plt.show()

# --------- 3) Depth vs brightness, marker size ∝ score (combined) ----------
tr_depth = depth_frac(tr)
ec_depth = depth_frac(ec)
tr_mag = tess_mag(tr)
ec_mag = tess_mag(ec)

# use a shared score scaling so sizes comparable across methods
all_scores = np.concatenate([tr_score[np.isfinite(tr_score)], ec_score[np.isfinite(ec_score)]])
smin, smax = np.nanmin(all_scores), np.nanmax(all_scores)

tr_ms = size_from_score(tr_score, smin=smin, smax=smax)
ec_ms = size_from_score(ec_score, smin=smin, smax=smax)

plt.figure(figsize=(7.5, 5.5))
plt.scatter(tr_depth, tr_mag, s=tr_ms, alpha=0.22, label="Transit", color=C_TR)
plt.scatter(ec_depth, ec_mag, s=ec_ms, alpha=0.22, label="Eclipse", color=C_EC)

plt.xscale("log")
plt.gca().invert_yaxis()
plt.xlabel("Transit Depth (fraction)")
plt.ylabel("Star TESS Mag (brighter = higher)")
plt.title("Depth vs brightness (marker size ∝ score)")
plt.legend()
plt.tight_layout()
plt.savefig(outdir / "03_combined_depth_vs_brightness_size_score.png", dpi=200)
plt.show()

# --------- 4) Overlaid score histograms (combined) ----------
plt.figure(figsize=(7.5, 4.8))
plt.hist(tr_score[np.isfinite(tr_score)], bins=40, alpha=0.55, label="Transit", color=C_TR)
plt.hist(ec_score[np.isfinite(ec_score)], bins=40, alpha=0.55, label="Eclipse", color=C_EC)
plt.xlabel("spectroscopy score")
plt.ylabel("count")
plt.title("Score distributions (Transit vs Eclipse)")
plt.legend()
plt.tight_layout()
plt.savefig(outdir / "04_combined_score_hist.png", dpi=200)
plt.show()

print("Saved combined plots to:", outdir.resolve())


FileNotFoundError: [Errno 2] No such file or directory: 'outputs_ariel_spec_bnn/plots/tpc_transit_ranked.csv'