# COSMOS2025 High-redshift Quiescent Galaxy Identifier

## Needed Libraries

In [1]:
import pandas as pd
import joblib
import warnings
warnings.filterwarnings('ignore')

## Needed Function

In [2]:
def calculate_pairwise_differences(data):
    """
    Calculate all pairwise differences between columns in a DataFrame (using for deriving colors from bands).
    
    For each unique pair of columns (i,j) where i < j, creates a new column
    named by concatenating the two column names, containing the element-wise
    difference between column i and column j.
    
    Parameters:
    -----------
    data : pandas.DataFrame
        Input DataFrame containing numerical columns to compare
        
    Returns:
    --------
    pandas.DataFrame
        Original DataFrame with new columns added for each pairwise difference
    """
    
    ln = len(data.columns)  # Get number of columns in DataFrame
    
    # Iterate through all possible column pairs
    for i in range(ln):
        for j in range(ln):
            if i < j:  # Ensure we only do unique pairs (i < j)
                # Create new column name by concatenating the two column names
                diff_name = f"{data.columns[i]}{data.columns[j]}"
                
                # Calculate and store the difference between columns i and j
                data[diff_name] = data[data.columns[i]] - data[data.columns[j]]
    
    return data

## Loading Models

In [3]:
model = joblib.load('ML_Model_Classifier.pkl') # Load model classifier
scaler = joblib.load('ML_Model_Scaler.pkl') # Load scaler fitted on training data

## COSMOS2025 Data (2.5 < redshift < 5)

In [12]:
df = pd.read_csv('COSMOS2025_ml_classifications.csv') # Load COSMOS2020 data

## ML Quiescent Identifier

In [13]:
selected_df = df[['hst-f814w', 'uvista-y', 'uvista-j', 'uvista-h','uvista-ks', 
                  'irac-ch1', 'irac-ch2', 'nircam_f115w', 'nircam_f150w',
                  'nircam_f277w', 'nircam_f444w']] # select 11 bands (Warning: Order is important)

X_new = calculate_pairwise_differences(selected_df).iloc[:,11:] # Construct 55 colors form 11 selected bands

X_new_scaled = scaler.transform(X_new.values) # # Scale the new galaxies using the previously fitted scaler (training data)

y_new = model.predict(X_new_scaled) # Predict galaxy types: True for Quiescent, False for Non-quiescent

In [14]:
n_qg = sum(y_new) # Count quiescent galaxies

n_sfg = len(y_new) - n_qg # Count non-quiescent galaxies

print(f"The number of quiescent galaxies: {n_qg} \nThe number of non-quiescent galaxies: {n_sfg}")

The number of quiescent galaxies: 1111 
The number of non-quiescent galaxies: 8735


## SED-fitting Classification

In [11]:
n_qg = sum(df['SED-class']) # Count quiescent galaxies

n_sfg = len(df['SED-class']) - n_qg # Count non-quiescent galaxies

print(f"The number of quiescent galaxies: {n_qg} \nThe number of non-quiescent galaxies: {n_sfg}")

The number of quiescent galaxies: 427 
The number of non-quiescent galaxies: 9419
