# Tile Rubin tract 5063 (all patches) and fetch Euclid counterparts

This mirrors `01_getdata_patch.ipynb` but loops over every patch in a tract. It tolerates missing bands/patches by skipping what's unavailable.


In [None]:
import os, glob, tarfile
from pathlib import Path
import numpy as np
from lsst.daf.butler import Butler
try:
    from tqdm.auto import tqdm
except Exception:
    tqdm = None
import lsst.geom as geom

from astroquery.ipac.irsa import Irsa
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.table import Table
from astropy.io import fits
from astropy.nddata import Cutout2D
from astropy.wcs import WCS
import fsspec

# ---- Config ----
TRACT = 5063
SKYMAP = "lsst_cells_v1"
REPO = "dp1"
COLLECTION = "LSSTComCam/DP1"
DATASETTYPE = "deep_coadd"

# You can reorder / trim if you only need a subset
bands_rubin = ("u","g","r","i","z","y")
TILE_SIZE = 512
STRIDE    = 256    # overlap

# Batch controls (small disk footprint)
BATCH_SIZE = 5          # number of tiles per batch
MAX_TOTAL_TILES = None  # set to an int to stop early
SKIP_TILES = 0         # number of tiles to skip before starting
DELETE_AFTER_ARCHIVE = True
VERBOSE_EUCLID_BANDS = True  # print per-band downloads

OUT_RUBIN_ROOT = Path("../data/rubin_tiles_tract5063")
OUT_EUCLID_DIR = Path("../data/euclid_tiles_tract5063")
ARCHIVE_DIR    = Path("../data/batch_archives_tract5063")
OUT_RUBIN_ROOT.mkdir(parents=True, exist_ok=True)
OUT_EUCLID_DIR.mkdir(parents=True, exist_ok=True)
ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)

bands_euclid = ("VIS", "Y", "J", "H")
EUCLID_SIZE_ARCSEC = 105.0

butler = Butler(REPO, collections=COLLECTION)


def get_patches_in_tract(butler, tract, band="r", datasetType=DATASETTYPE, skymap=SKYMAP):
    """List patches that have *at least* the chosen band; safer than assuming all 100 exist."""
    refs = butler.query_datasets(
        datasetType,
        where="tract = tract AND band = band AND skymap = skymap",
        bind={"tract": tract, "band": band, "skymap": skymap},
        with_dimension_records=True,
    )
    return sorted({ref.dataId["patch"] for ref in refs})


def load_patch_exposures_by_id(butler, tract, patch, bands=bands_rubin, datasetType=DATASETTYPE, skymap=SKYMAP):
    exps = {}
    available = []
    for b in bands:
        if VERBOSE_EUCLID_BANDS:
            print(f"    Euclid band {b} (science/noise)")
        dataId = {"tract": tract, "patch": patch, "band": b, "skymap": skymap}
        try:
            exps[b] = butler.get(datasetType, dataId=dataId)
            available.append(b)
        except Exception as e:
            print(f"  skipping band {b} for patch {patch}: {e}")
    if not available:
        raise RuntimeError(f"No bands found for patch {patch}")
    wcs_full = exps[available[0]].getWcs()
    return exps, wcs_full, available


def wcs_to_hdr_dict_lsst(wcs_lsst):
    md = wcs_lsst.getFitsMetadata()
    return {k: md.getScalar(k) for k in md.names()}


def sanitize_rms(rms, huge=1e10):
    rms = rms.astype(np.float32, copy=False)
    bad = (~np.isfinite(rms)) | (rms <= 0) | (rms > huge)
    rms = rms.copy(); rms[bad] = np.nan
    return rms


def load_euclid_cutouts(ra, dec, size_arcsec, bands=("VIS","Y","J","H"), collection="euclid_DpdMerBksMosaic", radius_arcsec=60):
    coord = SkyCoord(ra=ra*u.deg, dec=dec*u.deg, frame="icrs")
    tab = Irsa.query_sia(pos=(coord, radius_arcsec*u.arcsec), collection=collection)
    if not isinstance(tab, Table):
        tab = tab.to_table()
    out_img = {b: None for b in bands}; out_var = {b: None for b in bands}; wcs_out = {}

    def get_row(band, subtype):
        m = (tab["energy_bandpassname"] == band) & (tab["dataproduct_subtype"] == subtype)
        rows = tab[m]; return rows[0] if len(rows) else None

    for b in bands:
        if VERBOSE_EUCLID_BANDS:
            print(f"    Euclid band {b} (science/noise)")
        row_sci = get_row(b, "science")
        if row_sci is None:
            continue
        with fsspec.open(row_sci["access_url"], "rb") as f:
            with fits.open(f, memmap=False) as hdul:
                wcs0 = WCS(hdul[0].header)
                cut = Cutout2D(hdul[0].data, coord, size_arcsec * u.arcsec, wcs=wcs0)
                out_img[b] = np.array(cut.data, dtype=np.float32)
                wcs_out[b] = cut.wcs
        row_rms = get_row(b, "noise")
        if row_rms is None:
            continue
        with fsspec.open(row_rms["access_url"], "rb") as f:
            with fits.open(f, memmap=False) as hdul:
                wcsn = WCS(hdul[0].header)
                cutn = Cutout2D(hdul[0].data, coord, size_arcsec * u.arcsec, wcs=wcsn)
                rms = np.array(cutn.data, dtype=np.float32)
        rms = sanitize_rms(rms, huge=1e10)
        out_var[b] = rms * rms
    return out_img, out_var, wcs_out


def iter_tile_positions(h, w, tile_size, stride):
    for y0 in range(0, h - tile_size + 1, stride):
        for x0 in range(0, w - tile_size + 1, stride):
            yield x0, y0


def save_rubin_tile(exps, wcs_full, bands, out_dir, x0, y0, tile_size=TILE_SIZE, stride=STRIDE):
    patch_origin = exps[bands[0]].getXY0()
    x0_patch = patch_origin.getX()
    y0_patch = patch_origin.getY()

    global_cx = x0_patch + x0 + (tile_size - 1) / 2.0
    global_cy = y0_patch + y0 + (tile_size - 1) / 2.0
    sp_global = wcs_full.pixelToSky(global_cx, global_cy)
    ra_c  = sp_global.getRa().asDegrees()
    dec_c = sp_global.getDec().asDegrees()

    wcs_local = wcs_full.copyAtShiftedPixelOrigin(geom.Extent2D(-(x0_patch + x0), -(y0_patch + y0)))

    imgs, vars_, masks = [], [], []
    for b in bands:
        if VERBOSE_EUCLID_BANDS:
            print(f"    Euclid band {b} (science/noise)")
        exp = exps[b]
        img = exp.image.array[y0:y0+tile_size, x0:x0+tile_size].astype(np.float32)
        var = exp.variance.array[y0:y0+tile_size, x0:x0+tile_size].astype(np.float32)
        msk = exp.mask.array[y0:y0+tile_size, x0:x0+tile_size].astype(np.int32)
        imgs.append(img); vars_.append(var); masks.append(msk)

    imgs_stacked  = np.stack(imgs,  axis=0)
    vars_stacked  = np.stack(vars_, axis=0)
    masks_stacked = np.stack(masks, axis=0)

    tile_id = f"tile_x{x0:05d}_y{y0:05d}"
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)
    fn = out_dir / f"{tile_id}.npz"
    np.savez_compressed(
        fn,
        img=imgs_stacked,
        var=vars_stacked,
        mask=masks_stacked,
        wcs_hdr=wcs_to_hdr_dict_lsst(wcs_local),
        x0=np.int32(x0), y0=np.int32(y0),
        tile_id=np.bytes_(tile_id),
        ra_center=np.float64(ra_c),
        dec_center=np.float64(dec_c),
        tile_size=np.int32(tile_size),
        stride=np.int32(stride),
        bands=np.array(list(bands)),
    )
    return fn, tile_id, ra_c, dec_c


def save_euclid_tile(tile_id, ra_c, dec_c, out_dir):
    out_fn = Path(out_dir) / f"{tile_id}_euclid.npz"
    if out_fn.exists():
        return out_fn
    eu_imgs, eu_var, eu_wcss = load_euclid_cutouts(ra_c, dec_c, size_arcsec=EUCLID_SIZE_ARCSEC, bands=bands_euclid)
    save_dict = {"ra_center": ra_c, "dec_center": dec_c, "tile_id": tile_id}
    for b in bands_euclid:
        if eu_imgs[b] is not None:
            save_dict[f"img_{b}"] = eu_imgs[b]
            save_dict[f"wcs_{b}"] = eu_wcss[b].to_header_string()
        if eu_var[b] is not None:
            save_dict[f"var_{b}"] = eu_var[b]
    np.savez_compressed(out_fn, **save_dict)
    return out_fn


def archive_batch(batch_id, rubin_files, euclid_files):
    archive_path = ARCHIVE_DIR / f"batch_{batch_id:05d}.tar.gz"
    with tarfile.open(archive_path, "w:gz") as tar:
        for p in rubin_files + euclid_files:
            p = Path(p)
            if not p.exists():
                continue
            arcname = p.relative_to(Path("../data"))
            tar.add(p, arcname=str(arcname))
    print(f"Archived batch {batch_id}: {archive_path}")
    return archive_path


def delete_files(paths):
    for p in paths:
        try:
            Path(p).unlink()
        except Exception as e:
            print(f"  failed to delete {p}: {e}")


# ---- Run Rubin tiling for the whole tract in small batches ----
patch_ids = get_patches_in_tract(butler, TRACT, band="r")
print(f"Tract {TRACT} has {len(patch_ids)} patches with r-band: {patch_ids}")

batch = []
batch_id = 0
total_tiles = 0
skipped_tiles = 0
stop_early = False

for patch in patch_ids:
    try:
        exps, wcs_full, bands_present = load_patch_exposures_by_id(butler, tract=TRACT, patch=patch)
    except Exception as e:
        print(f"Skipping patch {patch}: {e}")
        continue

    out_dir = OUT_RUBIN_ROOT / f"patch{int(patch):02d}"
    h, w = exps[bands_present[0]].image.array.shape

    n_tiles_patch = ((h - TILE_SIZE) // STRIDE + 1) * ((w - TILE_SIZE) // STRIDE + 1)
    tile_iter = iter_tile_positions(h, w, TILE_SIZE, STRIDE)
    if tqdm is not None:
        tile_iter = tqdm(tile_iter, total=n_tiles_patch, desc=f"tract {TRACT} patch {patch}")

    for x0, y0 in tile_iter:
        r_fn, tile_id, ra_c, dec_c = save_rubin_tile(
            exps, wcs_full, bands_present, out_dir, x0, y0,
            tile_size=TILE_SIZE, stride=STRIDE,
        )
        print(f"Tile: tract {TRACT} patch {patch} {tile_id} (RA {ra_c:.5f}, Dec {dec_c:.5f})")
        batch.append((r_fn, tile_id, ra_c, dec_c))
        total_tiles += 1

        if MAX_TOTAL_TILES is not None and total_tiles >= MAX_TOTAL_TILES:
            stop_early = True

        if len(batch) >= BATCH_SIZE or stop_early:
            batch_id += 1
            rubin_files = [b[0] for b in batch]
            euclid_files = []
            for _, tid, ra_c, dec_c in batch:
                try:
                    euclid_files.append(save_euclid_tile(tid, ra_c, dec_c, OUT_EUCLID_DIR))
                    print(f"Saved Euclid match for {tid}")
                except Exception as e:
                    print(f"Failed to fetch Euclid for {tid}: {e}")

            archive_batch(batch_id, rubin_files, euclid_files)
            if DELETE_AFTER_ARCHIVE:
                delete_files(rubin_files + euclid_files)
            batch = []

        if stop_early:
            break

    if stop_early:
        break

# flush remainder
if batch:
    batch_id += 1
    rubin_files = [b[0] for b in batch]
    euclid_files = []
    for _, tid, ra_c, dec_c in batch:
        try:
            euclid_files.append(save_euclid_tile(tid, ra_c, dec_c, OUT_EUCLID_DIR))
            print(f"Saved Euclid match for {tid}")
        except Exception as e:
            print(f"Failed to fetch Euclid for {tid}: {e}")

    archive_batch(batch_id, rubin_files, euclid_files)
    if DELETE_AFTER_ARCHIVE:
        delete_files(rubin_files + euclid_files)

print("Total tiles processed:", total_tiles)



In [None]:

import lsst.afw.geom as afwGeom
from lsst.daf.base import PropertySet
import numpy as np
import glob
import matplotlib.pyplot as plt


def wcs_from_hdr_dict(hdr_dict):
    ps = PropertySet()
    for k, v in hdr_dict.items():
        ps.set(k, v)
    return afwGeom.makeSkyWcs(ps)


def tile_corners_from_npz(npz_path):
    d = np.load(npz_path, allow_pickle=True)
    wcs = wcs_from_hdr_dict(d["wcs_hdr"].item())
    tile_size = int(d["tile_size"])
    S = tile_size - 1
    corners_pix = [(0,0),(S,0),(S,S),(0,S),(0,0)]
    ra, dec = [], []
    for x, y in corners_pix:
        sp = wcs.pixelToSky(x, y)
        ra.append(sp.getRa().asDegrees())
        dec.append(sp.getDec().asDegrees())
    return np.array(ra), np.array(dec)

files = sorted(glob.glob(f"{OUT_RUBIN_ROOT}/**/tile_*.npz", recursive=True))
print("Found", len(files), "tiles")

plt.figure(figsize=(7,7))
for fn in files:
    ra, dec = tile_corners_from_npz(fn)
    plt.plot(ra, dec, linewidth=0.5, alpha=0.5)
plt.xlabel("RA [deg]")
plt.ylabel("Dec [deg]")
plt.title("Rubin 512×512 tile footprints (tract 5063)")
plt.gca().invert_xaxis(); plt.gca().set_aspect("equal", adjustable="box")
plt.show()


# Fetch Euclid VIS + Y/J/H for every Rubin tile in tract 5063

Paths mirror the Rubin output, but Euclid tiles are stored flat under `OUT_EUCLID_DIR` using the same `tile_id` suffixes.


In [None]:
from astroquery.ipac.irsa import Irsa
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.table import Table
from astropy.io import fits
from astropy.nddata import Cutout2D
from astropy.wcs import WCS
import fsspec
import os, glob, numpy as np

from scipy.ndimage import maximum_filter, median_filter, zoom, gaussian_filter
from scipy.optimize import linear_sum_assignment
from scipy.stats import gaussian_kde
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Circle
import matplotlib.patches as mpatches
from astropy.coordinates import match_coordinates_sky, search_around_sky


def sanitize_rms(rms, huge=1e10):
    rms = rms.astype(np.float32, copy=False)
    bad = (~np.isfinite(rms)) | (rms <= 0) | (rms > huge)
    rms = rms.copy(); rms[bad] = np.nan
    return rms

def robust_imshow(ax, img, title="", p=(1, 99)):
    if img is None:
        ax.set_title(f"{title} (missing)"); ax.axis("off"); return
    lo, hi = np.nanpercentile(img, p)
    ax.imshow(img, origin="lower", vmin=lo, vmax=hi)
    ax.set_title(title); ax.set_xticks([]); ax.set_yticks([])

def save_bundle(path, **kw):
    np.savez_compressed(path, **{k: v for k, v in kw.items() if v is not None})
    print("saved:", path)


def load_euclid_cutouts(ra, dec, size_arcsec, bands=("VIS","Y","J","H"), collection="euclid_DpdMerBksMosaic", radius_arcsec=60):
    coord = SkyCoord(ra=ra*u.deg, dec=dec*u.deg, frame="icrs")
    tab = Irsa.query_sia(pos=(coord, radius_arcsec*u.arcsec), collection=collection)
    if not isinstance(tab, Table):
        tab = tab.to_table()
    out_img = {b: None for b in bands}; out_var = {b: None for b in bands}; wcs_out = {}

    def get_row(band, subtype):
        m = (tab["energy_bandpassname"] == band) & (tab["dataproduct_subtype"] == subtype)
        rows = tab[m]; return rows[0] if len(rows) else None

    for b in bands:
        row_sci = get_row(b, "science")
        if row_sci is None:
            continue
        with fsspec.open(row_sci["access_url"], "rb") as f:
            with fits.open(f, memmap=False) as hdul:
                wcs0 = WCS(hdul[0].header)
                cut = Cutout2D(hdul[0].data, coord, size_arcsec * u.arcsec, wcs=wcs0)
                out_img[b] = np.array(cut.data, dtype=np.float32)
                wcs_out[b] = cut.wcs
        row_rms = get_row(b, "noise")
        if row_rms is None:
            continue
        with fsspec.open(row_rms["access_url"], "rb") as f:
            with fits.open(f, memmap=False) as hdul:
                wcsn = WCS(hdul[0].header)
                cutn = Cutout2D(hdul[0].data, coord, size_arcsec * u.arcsec, wcs=wcsn)
                rms = np.array(cutn.data, dtype=np.float32)
        rms = sanitize_rms(rms, huge=1e10)
        out_var[b] = rms * rms
    return out_img, out_var, wcs_out

# ---- Config ----
IN_RUBIN_DIR = OUT_RUBIN_ROOT
OUT_EUCLID_DIR = "../data/euclid_tiles_tract5063"
os.makedirs(OUT_EUCLID_DIR, exist_ok=True)

bands_euclid = ("VIS", "Y", "J", "H")
EUCLID_SIZE_ARCSEC = 105.0


def process_euclid_for_rubin_tiles():
    rubin_files = sorted(glob.glob(os.path.join(IN_RUBIN_DIR, "**", "tile_*.npz"), recursive=True))
    print(f"Found {len(rubin_files)} Rubin tiles. Fetching Euclid counterparts...")
    for r_file in rubin_files:
        with np.load(r_file) as data:
            ra_c = float(data['ra_center']); dec_c = float(data['dec_center'])
            raw_id = data['tile_id']
            tile_id = raw_id.item().decode('utf-8') if isinstance(raw_id, (np.ndarray, np.bytes_, bytes)) else str(raw_id)
        out_fn = os.path.join(OUT_EUCLID_DIR, f"{tile_id}_euclid.npz")
        if os.path.exists(out_fn):
            continue
        try:
            eu_imgs, eu_var, eu_wcss = load_euclid_cutouts(ra_c, dec_c, size_arcsec=EUCLID_SIZE_ARCSEC, bands=bands_euclid)
            save_dict = {"ra_center": ra_c, "dec_center": dec_c, "tile_id": tile_id}
            for b in bands_euclid:
                if eu_imgs[b] is not None:
                    save_dict[f"img_{b}"] = eu_imgs[b]
                    save_dict[f"wcs_{b}"] = eu_wcss[b].to_header_string()
                if eu_var[b] is not None:
                    save_dict[f"var_{b}"] = eu_var[b]
            np.savez_compressed(out_fn, **save_dict)
            print(f"Saved Euclid match for {tile_id}")
        except Exception as e:
            print(f"Failed to fetch Euclid for {tile_id}: {e}")

# Optional: only needed if you want to re-fetch Euclid separately
# process_euclid_for_rubin_tiles()



In [None]:

import os, numpy as np, matplotlib.pyplot as plt
from astropy.wcs import WCS

# pick one tile id for visualization
tile_id_str = "tile_x00000_y00000"
rubin_path  = os.path.join(OUT_RUBIN_ROOT,  "patch00", f"{tile_id_str}.npz")
euclid_path = os.path.join(OUT_EUCLID_DIR, f"{tile_id_str}_euclid.npz")

r_data = np.load(rubin_path)
e_data = np.load(euclid_path)

rubin_bands_full = ["u", "g", "r", "i", "z", "y"]
nb_rubin = r_data['img'].shape[0]

fig, axes = plt.subplots(2, 5, figsize=(20, 8)); axes = axes.flatten()

# Rubin panels (only bands present)
for i in range(nb_rubin):
    band = rubin_bands_full[i] if i < len(rubin_bands_full) else f"b{i}"
    robust_imshow(axes[i], r_data['img'][i], title=f"Rubin {band}")

# Euclid panels
for i, band in enumerate(["VIS", "Y", "J", "H"]):
    ax = axes[i + 6]
    img_key = f"img_{band}"
    if img_key in e_data:
        img = e_data[img_key]
        robust_imshow(ax, img, title=f"Euclid {band}")
    else:
        ax.set_title(f"Euclid {band} (Missing)"); ax.axis('off')

plt.suptitle(f"Multi-band view: {tile_id_str}
RA: {r_data['ra_center']:.4f}, Dec: {r_data['dec_center']:.4f}", fontsize=16)
plt.tight_layout(); plt.show()
