In [163]:
#!pip3 install --user astropy
#!pip3 install --user kaleido

In [164]:
import numpy as np
import astropy as ap
import pandas as pd
from astropy.io import fits
import scipy.linalg as slg
from scipy.stats import norm, pearsonr
#import scipy.stats
from math import ceil
import csv

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import iplot
import kaleido

# import matplotlib.pyplot as plt
# import matplotlib.colors as colors
# from matplotlib.colors import LinearSegmentedColormap
import glob
import os
# These are in Functions
from os.path import join as pj
from os.path import exists # as pj
# from os.path import abspath as absp

from IPython.display import Image
from IPython.display import display

from joblib import Parallel, delayed

import PIL
import pickle

import sys
from collections import namedtuple as nt

In [387]:
os.environ["SPARCFIRE_HOME"] = "/home/portmanm/sparcfire_matt/"

_HOME_DIR = os.path.expanduser("~")
try:
    _SPARCFIRE_DIR = os.environ["SPARCFIRE_HOME"]
    _MODULE_DIR = pj(_SPARCFIRE_DIR, "GalfitModule")
except KeyError:
    if __name__ == "__main__":
        print("SPARCFIRE_HOME is not set. Please run 'setup.bash' inside SpArcFiRe directory if not done so already.")
        print("Checking the current directory for GalfitModule, otherwise quitting.")
            
        _MODULE_DIR = pj(os.getcwd(), "GalfitModule")
        
        if not exists(_MODULE_DIR):
            raise Exception("Could not find GalfitModule!")
    
sys.path.append(_MODULE_DIR)
from Classes.Components import *
from Classes.Containers import *
from Classes.FitsHandlers import *
from Functions.helper_functions import *

all_results_nt = nt("all_results", ["full_df", "success_df", "not_success_df", "by_eye_success_df", "by_eye_not_success_df"])
combined_results_nt = nt("combined_results", ["bool_df", "full_df", "success_df", "by_eye_success_df"])
mini_sep    = "\n" + 40*"=" + "\n"

In [166]:
# # Defunct
# def check_galfit_chi(gal_name, base_path):
#     # An example line
#     # # Chi^2/nu = 4.661,  Chi^2 = 12025.575,  Ndof = 2580
    
#     #galfit_txt_out = "galfit.01" # in the future galfit.01 may change
#     filename = os.path.join(base_path, gal_name, galfit_txt_out)
#     with open(filename, "r") as f:
#         for line in f:
#             if "Chi" in line:
#                 chi_line = line.strip("# ")
    
#     # This also works but it's quite devious...
#     # chi_line.replace("^", "").replace("/", "_").replace(",  ", "\n").lower()
#     # exec(chi_line)
    
#     out_vals = chi_line.split(",")
#     chi2_nu = float(out_vals[0].strip().split("=")[-1])
#     chi2 = float(out_vals[1].strip().split("=")[-1])
#     ndof = int(out_vals[2].strip().split("=")[-1])
    
#     return chi2_nu, chi2, ndof

In [167]:
def get_total_galaxies(in_dir = "sparcfire-in", out_dir = "sparcfire-out"):   
    all_gnames_in  = find_files(in_dir, "123*", "f")
    all_gnames_out = find_files(out_dir, "123*", "d")
    total_galaxies = min(len(all_gnames_in), len(all_gnames_out))
    if not total_galaxies:
        total_galaxies  = max(len(all_gnames_in), len(all_gnames_out))
        
    return total_galaxies

In [168]:
def load_residual_df(
    out_dir, 
    basename,
    **kwargs
):
    
    method              = kwargs.get("method", "nmr_x_1-p")
    verbose             = kwargs.get("verbose", True)
    residual_cutoff_val = kwargs.get("residual_cutoff_val", 0.007)
    
    pickle_filename = pj(out_dir, basename, sorted(find_files(pj(out_dir, basename), f'{basename}_output_results*.pkl', "f"))[-1])
    
    residual_df  = pd.read_pickle(pickle_filename)
    # temp_df = deepcopy(residual_df)
    # Setting residual columns
    #residual_df["KS_P"] = 1 - residual_df["KS_P"]
    if method == "nmr_x_1-p":
        result_of_method = (1 - residual_df["KS_P"])*residual_df["NMR"]
    elif method == "nmr_neg_log":
        result_of_method = residual_df["NMR"]/-np.log(residual_df["KS_P"] + 1e-10)
    elif method == "W_quality":
        result_of_method = residual_df["KS_P"]/residual_df["W_NMR"]
    else:
        raise Exception(f"Method given: {method} is not a valid method (yet).")
    
    residual_df[method] = result_of_method
    
    # Valid meaning NMR was successfully calculated
    #cols_to_drop = [col for col in residual_df.columns if col.endswith("_sky_2")]
    #valid_spiral_df = residual_df.drop(columns = cols_to_drop).dropna()

    # rename sky_2 to sky_3 for non-spirals to be inline with everything else
    # this would be for potential comparison down the line
    cols_to_merge = [col for col in residual_df.columns if col.endswith("_sky_3") or col.endswith("_sky_4")]
    #_ = [residual_df[col].fillna(residual_df[f"{col[:-1]}2"], inplace = True) for col in cols_to_merge]
    cols_to_drop  = [col for col in residual_df.columns if col.endswith("_sky_2") or col.endswith("_sky_3")]#  + ["KS_STAT"]
    residual_df.drop(columns = cols_to_drop, inplace = True)
    
    if verbose:
        print(f"{len(residual_df)} galaxy models generated.")
        residual_cutoff = residual_df[method] <= residual_cutoff_val
        print(f"{sum(residual_cutoff)} models pass score cutoff.")
        
    
    return residual_df.sort_values(by = method)

In [169]:
def load_galaxy_csv(out_dir, basename, pre_post):
    
    field = " pa_alenWtd_avg_domChiralityOnly"
    # {basename}_ uneccessary because different *galfit* runs 
    # should have same sparcfire output
    fname = pj(out_dir, basename, f"{basename}_{pre_post}_galfit_galaxy.csv")
    sparc_output_csv = pd.read_csv(fname, #pj(out_dir, f"pre_galfit_galaxy.csv"),
                                       index_col = "name",
                                       on_bad_lines = "warn",
                                       usecols   = ["name", field], # , " iptSz"],
                                       #na_values = "NaN",
                                       #dtype     = {field : float} #, " iptSz" : str}#, "name" : str}
                                      )#.loc[:, field]
    #sparc_output_csv.index.name = None
    sparc_output_csv[field] = sparc_output_csv[field].astype(float)
    sparc_output_csv.index  = sparc_output_csv.index.map(str)
    #sparc_output_csv[" iptSz"] = sparc_output_csv[" iptSz"].str.extract(r"([0-9]+)").astype(float)

    #sparc_output_csv["pre_sign"] = np.sign(sparc_output_csv[field])
    sparc_output_csv.rename(columns = {field : f"galaxy_{pre_post}_pa"}, inplace = True)
    
    return sparc_output_csv

In [170]:
def load_galaxy_arcs_csv(out_dir, basename, pre_post, **kwargs):
    
    field_pa   = kwargs.get("field_pa"  , "pitch_angle")
    field_alen = kwargs.get("field_alen", "arc_length")
    name_col   = kwargs.get("name_col"  , "gxyName")

    fname = pj(out_dir, basename, f"{basename}_{pre_post}_galfit_galaxy_arcs.csv")
    sparc_output_arcs_csv = pd.read_csv(fname, 
                                       index_col = name_col,
                                       usecols   = [name_col, field_pa, field_alen],
                                       dtype     = {field_pa : float, field_alen : float} #, name_col : str}
                                      )#.loc[:, field]
    #sparc_output_csv.index.name = None
    sparc_output_arcs_csv.index = sparc_output_arcs_csv.index.map(str)

    # Filtering for pure circles and near circles
    sparc_output_arcs_csv = sparc_output_arcs_csv[abs(sparc_output_arcs_csv[field_pa ]) > 1]

    #sparc_output_arcs_csv = pd.concat([sparc_output_arcs_csv, pre_sparc_output_csv], axis = 1)
    #sparc_output_arcs_csv["sign"] = np.sign(sparc_output_arcs_csv[field])

    # Keeps only arms which align with dom chirality only
    # sparc_output_arcs_csv["check"] = [
    #     row["sign"] + pre_sparc_output_csv.loc[i, "pre_sign"] 
    #     if i in pre_sparc_output_csv.index 
    #     else None 
    #     for i, row in sparc_output_arcs_csv.iterrows()
    # ]

    #sparc_output_arcs_csv = sparc_output_arcs_csv[abs(sparc_output_arcs_csv.loc[:, "check"]) == 2].drop(columns = ["sign", "check"])
    sparc_output_arcs_top3 = sparc_output_arcs_csv.groupby(name_col).head(3).reset_index()
    sparc_output_arcs_top3[f"{pre_post}_sign"] = np.sign(sparc_output_arcs_top3.pitch_angle)

    dom_sign = np.sign(sparc_output_arcs_top3.groupby(name_col).sum()[f"{pre_post}_sign"])
    sparc_output_arcs_top3 = sparc_output_arcs_top3.join(dom_sign, rsuffix = "_dom", on = name_col)

    cond = sparc_output_arcs_top3[f"{pre_post}_sign_dom"] == sparc_output_arcs_top3[f"{pre_post}_sign"]
    sparc_output_arcs_top2 = sparc_output_arcs_top3[cond].groupby(name_col).head(2).reset_index().drop(columns = [f"{pre_post}_sign_dom", "index"])

    #pre_sparc_output_top2.rename(columns = {field : "pre_pa"}, inplace = True)
    #pre_sparc_output_csv.dropna(inplace=True)
    return sparc_output_arcs_top2

In [171]:
def prepare_arcs_output(sparc_output_arcs_top2, pre_post, **kwargs):
    
    field_pa   = kwargs.get("field_pa"  , "pitch_angle")
    field_alen = kwargs.get("field_alen", "arc_length")
    name_col   = kwargs.get("name_col"  , "gxyName")
    
    single_arm = sparc_output_arcs_top2[~sparc_output_arcs_top2.duplicated(name_col, keep = False)]
    single_arm.loc[:, field_pa] = 0
    #single_arm.loc[:, "arc_length"]  = 0

    filled_in = pd.concat([sparc_output_arcs_top2, single_arm], ignore_index = True)
    str_fill = [f"{pre_post}_pa1", f"{pre_post}_pa2"] * (len(filled_in) // 2)
    filled_in["temp1"] = str_fill

    str_fill = [f"{pre_post}_alen1", f"{pre_post}_alen2"] * (len(filled_in) // 2)
    filled_in["temp2"] = str_fill

    #filled_in = filled_in.reset_index().drop(columns = ["index"])
    sp_out = filled_in.pivot_table(index = name_col, columns = ["temp1", "temp2"], values = [field_pa, field_alen])

    sp_out = sp_out.droplevel(0, axis = 1).droplevel(0, axis = 1)
    sp_out.columns = [f'{pre_post}_alen1', f'{pre_post}_alen2', f'{pre_post}_pa1', f'{pre_post}_pa2']
    
    return sp_out

In [172]:
def before_after_galfit_comparison(all_sparc_out, pre_sparc_output_csv, post_sparc_output_csv):
    
    before_after_galfit_df = deepcopy(all_sparc_out)#.dropna() #full_df.dropna(subset = ["post_pa"])
    #before_after_galfit_df = before_after_galfit_df[np.sign(before_after_galfit_df.loc[:, "pre_pa"]) != np.sign(before_after_galfit_df.loc[:, "post_pa"])]

    before_after_galfit_df["chiral_agreement"] = np.sign(before_after_galfit_df["pre_pa1"]) == np.sign(before_after_galfit_df["post_pa1"])

    before_after_galfit_df["pre_pa1"]  = abs(before_after_galfit_df["pre_pa1"])
    before_after_galfit_df["pre_pa2"]  = abs(before_after_galfit_df["pre_pa2"])
    before_after_galfit_df["post_pa1"] = abs(before_after_galfit_df["post_pa1"])
    before_after_galfit_df["post_pa2"] = abs(before_after_galfit_df["post_pa2"])


    before_after_galfit_df["1-1"] = abs(before_after_galfit_df["pre_pa1"] - before_after_galfit_df["post_pa1"])
    before_after_galfit_df["2-2"] = abs(before_after_galfit_df["pre_pa2"] - before_after_galfit_df["post_pa2"])
    before_after_galfit_df["1-2"] = abs(before_after_galfit_df["pre_pa1"] - before_after_galfit_df["post_pa2"])
    before_after_galfit_df["2-1"] = abs(before_after_galfit_df["pre_pa2"] - before_after_galfit_df["post_pa1"])

    before_after_galfit_df["mean-1122"]  = before_after_galfit_df[["1-1","2-2"]].mean(axis = "columns")
    before_after_galfit_df["mean-1221"]  = before_after_galfit_df[["1-2","2-1"]].mean(axis = "columns")

    before_after_galfit_df["min_diff"]   = before_after_galfit_df[["mean-1122", "mean-1221"]].min(axis = 1)

    before_after_galfit_df["best_diffs"] = [
        (row["1-1"], row["2-2"]) if np.mean((row["1-1"], row["2-2"])) == row["min_diff"] 
        else (row["1-2"], row["2-1"]) 
        for _, row in before_after_galfit_df.iterrows()
    ]

    before_after_galfit_df["pa_diff1"], before_after_galfit_df["pa_diff2"] = zip(*before_after_galfit_df["best_diffs"])
    #before_after_galfit_df["best_diff2"] = [row["2-2"] if np.mean((row["1-1"], row["2-2"])) == row["min_diff"] else row["2-1"] for _, row in before_after_galfit_df.iterrows()]
    before_after_galfit_df["pa_diff_galaxy"] = abs(abs(post_sparc_output_csv["galaxy_post_pa"]) - abs(pre_sparc_output_csv["galaxy_pre_pa"]))# < 15

    # min(2_arm_length)/max(2_arm_length) > 0.7, verify that this is valid by eye
    #before_after_galfit_df["alen_ratio"] = post_sparc_output_csv[" iptSz"]*before_after_galfit_df[["pre_alen1", "pre_alen2"]].min(axis = 1)/(pre_sparc_output_csv[" iptSz"]*before_after_galfit_df[["post_alen1", "post_alen2"]].max(axis = 1))
    before_after_galfit_df["alen_ratio"] = before_after_galfit_df[["post_alen1", "post_alen2"]].min(axis = 1)/before_after_galfit_df[["post_alen1", "post_alen2"]].max(axis = 1)
    #before_after_galfit_df.drop(columns = ["pre_sign", "post_sign"], inplace = True)

    # before_after_galfit_df.loc[:, "within_15_degrees_pre"]  = before_after_galfit_df.loc[:, "diff_pre"] < 15
    # before_after_galfit_df.loc[:, "within_15_degrees_post"] = before_after_galfit_df.loc[:, "diff_post"] < 15
    #before_after_galfit_df.sort_values(by = ["post_pa"])
    before_after_galfit_df = before_after_galfit_df.drop(columns = before_after_galfit_df.columns[9:-4])
    
    return before_after_galfit_df

In [173]:
def gather_everything(residual_df, before_after_galfit_df, method):
    full_df = residual_df.join(before_after_galfit_df)
    full_df = full_df[full_df.index.notnull()].sort_values(by = method)

    #full_df.dropna(subset = ["pa_diff1", "pa_diff2", "pa_diff_galaxy"], how = "all", inplace = True)
    #full_df.fillna(subset = ["pa_diff1", "pa_diff2", "pa_diff_galaxy"], how = "all", inplace = True)
    full_df["min_pa_diff"] = full_df[["pa_diff1", "pa_diff2", "pa_diff_galaxy"]].min(axis = 1)
    
    return full_df

In [174]:
def determine_success(
    full_df, 
    **kwargs
):
    
    in_dir                = kwargs.get("in_dir", "sparcfire-in") 
    out_dir               = kwargs.get("out_dir","sparcfire-out")
    sparcfire_processed   = kwargs.get("sparcfire_processed", None)
    flip_chiral_agreement = kwargs.get("flip_chiral_agreement", False)
    residual_cutoff_val   = kwargs.get("residual_cutoff_val", 0.007)
    pa_cutoff_val         = kwargs.get("pa_cutoff_val", 10)
    alen_cutoff_val       = kwargs.get("alen_cutoff_val", 0.5)
    verbose               = kwargs.get("verbose", True)
    
    residual_cutoff = full_df["nmr_x_1-p"] <= residual_cutoff_val
    #pa_cutoff = (full_df["pa_diff1"] < 10) | (full_df["pa_diff2"] < 10)
    pa_cutoff   = full_df["min_pa_diff"] < pa_cutoff_val
    alen_cutoff = full_df["alen_ratio"] > alen_cutoff_val #[True]
    sign_cutoff = full_df["chiral_agreement"].astype(bool)
    if flip_chiral_agreement:
        sign_cutoff = ~sign_cutoff

    success_df     = full_df[residual_cutoff & pa_cutoff & alen_cutoff & sign_cutoff]
    not_success_df = full_df[~(residual_cutoff & pa_cutoff & alen_cutoff & sign_cutoff)]
    
    if verbose:
        # print(f"{len(full_df)} processed by sparcfire")
        # print(f"{sum(residual_cutoff)} pass score cutoff")
        print(f"{sum(pa_cutoff)} pass pitch angle cutoff")
        print(f"{sum(alen_cutoff)} pass arm length ratio cutoff")
        print(f"{sum(sign_cutoff)} pass chiral agreement")
        print(f"{len(success_df)} or {100*len(success_df)/len(full_df):.2f}% ({len(success_df)}/{len(full_df)}) succeed by SpArcFiRe+Score")
        if sparcfire_processed is not None:
            sparcfire_processed = full_df.dropna(subset = ["pa_diff1", "pa_diff2", "pa_diff_galaxy"], how = "all")
        
        print(f"{total_galaxies - len(sparcfire_processed)}/{total_galaxies} models failed reprocessing by SpArcFiRe")
        
        #print(f"Total success less 24% false positive -- {len(success_df)*.76:.0f}")
        #print(f"Total success less 24% false positive + 24% false negative -- {len(not_success_df)*0.24+len(success_df)*.76:.0f}")
        #print(f"Estimated total success % -- {100*(len(not_success_df)*0.24+len(success_df)*.76)/len(full_df):.0f}%")
    
    # cutoffs = {
    #     "residual_cutoff" : residual_cutoff, 
    #     "pa_cutoff"       : pa_cutoff, 
    #     "alen_cutoff"     : alen_cutoff, 
    #     "sign_cutoff"     : sign_cutoff
    # }
    return success_df, not_success_df # , cutoffs

In [175]:
def extract_by_eye_data(
    out_dir, 
    basename, 
    residual_df, 
    full_df,
    **kwargs
):
    
    sparcfire_processed = kwargs.get("sparcfire_processed", None)
    subset              = kwargs.get("subset", None)
    verbose             = kwargs.get("verbose", True)
    
    with open(f"{pj(out_dir, basename, basename)}_by-eye_success.txt", "r") as f:
        raw_by_eye_success_galaxies = [i.split("_")[0].strip() for i in f.readlines()]

    with open(f"{pj(out_dir, basename, basename)}_by-eye_not_success.txt", "r") as f:
        raw_by_eye_not_success_galaxies = [i.split("_")[0].strip() for i in f.readlines()]
        
    by_eye_success_galaxies = [i for i in raw_by_eye_success_galaxies if i in full_df.index]
    by_eye_not_success_galaxies = [i for i in raw_by_eye_not_success_galaxies if i in full_df.index]
    if sparcfire_processed is not None:
        sparcfire_processed = full_df.dropna(subset = ["pa_diff1", "pa_diff2", "pa_diff_galaxy"], how = "all")
    
    if verbose:
        total = len(residual_df)
        if subset:
            total = subset
            print(f"Working on a subset of {total} galaxies")
            
        align = len(f"{len(by_eye_success_galaxies)}/{len(raw_by_eye_success_galaxies)}")
        print(f"Number of *total* by eye successful galaxies")
        print(f"{len(raw_by_eye_success_galaxies):<{align}} => {len(raw_by_eye_success_galaxies)/total*100:.2f}%")
        print(f"Number of by eye successful galaxies that SpArcFiRe *could* process")
        by_eye_processed = [i for i in sparcfire_processed.index if i in raw_by_eye_success_galaxies]
        print(f"{len(by_eye_processed)}/{len(raw_by_eye_success_galaxies)} => {len(by_eye_processed)/len(raw_by_eye_success_galaxies)*100:.2f}%")
        
        print()
        
        align = len(f"{len(by_eye_not_success_galaxies)}/{len(raw_by_eye_not_success_galaxies)}")
        print(f"Number of *total* by eye not successful galaxies")
        print(f"{len(raw_by_eye_not_success_galaxies):<{align}} => {len(raw_by_eye_not_success_galaxies)/total*100:.2f}%")
        
        print(f"Number of by eye not successful galaxies that SpArcFiRe *could* process")
        by_eye_processed = [i for i in sparcfire_processed.index if i in raw_by_eye_not_success_galaxies]
        print(f"{len(by_eye_processed)}/{len(raw_by_eye_not_success_galaxies)} => {len(by_eye_processed)/len(raw_by_eye_not_success_galaxies)*100:.2f}%")
    
    return by_eye_success_galaxies, by_eye_not_success_galaxies

In [176]:
def calculate_false_positive_negative(
    by_eye_success_galaxies, 
    by_eye_not_success_galaxies, 
    success_df, 
    not_success_df, 
    full_df,
    method  = "nmr_x_1-p",
    verbose = True
):
    
    false_positive = set(by_eye_not_success_galaxies).intersection(set(success_df.index))
    false_negative = set(by_eye_success_galaxies).intersection(set(not_success_df.index))

    by_eye_success_df     = full_df.loc[by_eye_success_galaxies].sort_values(by = method)
    by_eye_not_success_df = full_df.loc[by_eye_not_success_galaxies].sort_values(by = method)

    FP_rate = f"{len(false_positive)}/({len(false_positive)} + {len(by_eye_not_success_df)})"
    FN_rate = f"{len(false_negative)}/({len(false_negative)} + {len(by_eye_success_df)})"

    if verbose:
        print(f"False positive rate (by eye) -- {FP_rate} = {100*eval(FP_rate):.2f}%")
        print(f"False negative rate (by eye) -- {FN_rate} = {100*eval(FN_rate):.2f}%")

    #print(f"Total # of galaxies sorted by eye -- {len(raw_by_eye_success_galaxies) + len(raw_by_eye_not_success_galaxies)}")
    return by_eye_success_df, by_eye_not_success_df, FP_rate, FN_rate

In [177]:
def vprint(verbosity, *args, **kwargs):
    if verbosity:
        print(*args, **kwargs)

In [453]:
def residual_analysis(
    **kwargs
):
    
    in_dir                = kwargs.get("in_dir", "sparcfire-in")
    out_dir               = kwargs.get("out_dir", "sparcfire-out")
    basename              = kwargs.get("basename", "") 
    method                = kwargs.get("method", "nmr_x_1-p")
    flip_chiral_agreement = kwargs.get("flip_chiral_agreement", False)
    pa_cutoff_val         = kwargs.get("pa_cutoff_val", 10)
    residual_cutoff_val   = kwargs.get("residual_cutoff_val", 0.5)
    alen_cutoff_val       = kwargs.get("alen_cutoff_val", 0.007)
    incl_by_eye           = kwargs.get("incl_by_eye", True)
    by_eye_subset         = kwargs.get("by_eye_subset", None)
    verbose               = kwargs.get("verbose", False)
    
    vprint(verbose, "Load residual.")
    residual_df = load_residual_df(
        out_dir, 
        basename, 
        method = method, 
        residual_cutoff_val = residual_cutoff_val
    )
    
    # field_pa   = "pitch_angle"
    # field_alen = "arc_length"
    # name_col   = "gxyName"

    vprint(verbose, "Load pre galaxy csv.")
    pre_sparc_output_csv        = load_galaxy_csv(out_dir,      basename, pre_post = "pre")
        
    vprint(verbose, "Load pre galaxy arcs csv.")
    pre_sparc_output_arcs_top2  = load_galaxy_arcs_csv(out_dir, basename, pre_post = "pre")

    vprint(verbose, "Load post galaxy csv.")
    post_sparc_output_csv       = load_galaxy_csv(out_dir,      basename, pre_post = "post")
    
    vprint(verbose, "Load post galaxy arcs csv.")
    post_sparc_output_arcs_top2 = load_galaxy_arcs_csv(out_dir, basename, pre_post = "post")

# ====================================================================================================================

    vprint(verbose, "Prep pre galaxy arcs df")
    pre_sp_out    = prepare_arcs_output(pre_sparc_output_arcs_top2,  pre_post = "pre")
    vprint(verbose, "Prep post galaxy arcs df")
    post_sp_out   = prepare_arcs_output(post_sparc_output_arcs_top2, pre_post = "post")

    vprint(verbose, "And combine")
    all_sparc_out = pd.concat([pre_sp_out, post_sp_out], axis = 1)
    
# ====================================================================================================================

    vprint(verbose, "Compare SpArcFiRe analysis before and after")
    before_after_galfit_df     = before_after_galfit_comparison(
        all_sparc_out, 
        pre_sparc_output_csv, 
        post_sparc_output_csv
    )
    
    vprint(verbose, "Bring everything together")
    full_df             = gather_everything(residual_df, before_after_galfit_df, method)
    
    sparcfire_processed = full_df.dropna(subset = ['pa_diff1', 'pa_diff2', 'pa_diff_galaxy'], how = 'all')
    
    vprint(verbose, "Determine success")
    success_df, not_success_df = determine_success(
        full_df, 
        in_dir                 = in_dir, 
        out_dir                = out_dir, 
        flip_chiral_agreement  = flip_chiral_agreement,
        sparcfire_processed    = sparcfire_processed,
        pa_cutoff_val          = pa_cutoff_val, 
        residual_cutoff_val    = residual_cutoff_val,
        alen_cutoff_val        = alen_cutoff_val
    )
    
    full_df["success"] = full_df.index.isin(success_df.index)
    print()
    
# ====================================================================================================================
    
    by_eye_success_df     = None
    by_eye_not_success_df = None
    
    if incl_by_eye:
        vprint(verbose, "Extract by-eye evaluation")
        by_eye_success_galaxies, by_eye_not_success_galaxies = extract_by_eye_data(
            out_dir, 
            basename, 
            residual_df, 
            full_df, 
            subset = by_eye_subset,
            sparcfire_processed = sparcfire_processed
        )
        print()

        # To resolve an occasional processing error...
        by_eye_success_limited     = list(set(by_eye_success_galaxies).intersection(full_df.index))
        by_eye_not_success_limited = list(set(by_eye_not_success_galaxies).intersection(full_df.index))

        vprint(verbose, "Calculate by-eye statistics")
        by_eye_success_df, by_eye_not_success_df, FP_rate, FN_rate = calculate_false_positive_negative(
            by_eye_success_limited, 
            by_eye_not_success_limited, 
            success_df, 
            not_success_df, 
            full_df,
            method = method
        )

        full_df["by_eye_success"] = full_df.index.isin(by_eye_success_df.index)
    
    results_nt = all_results_nt(full_df, success_df, not_success_df, by_eye_success_df, by_eye_not_success_df)
    for df in results_nt:
        df["runname"]  = basename
        
    return results_nt

In [449]:
def combine_multi_run_results(
    method, 
    *args,
    **kwargs
):
    
    df_names      = kwargs.get("df_names", [])
    incl_by_eye   = kwargs.get("incl_by_eye", True)
    by_eye_subset = kwargs.get("by_eye_subset", None)
    verbose       = kwargs.get("verbose", True)
    
    print(f"Joining {len(args)} attempts...")
    primary_full_df = deepcopy(args[0].full_df)
    
    num_dfs = len(args)
    #alt_full_df     = deepcopy(args[1].full_df)
    #alt_full_df.rename(columns = {method : f"1_{method}"}, inplace = True)

    all_full_dfs = [primary_full_df]
    all_methods  = [method]
    all_columns  = []
    
    for i, arg in enumerate(args[1:]):
        alt_method = f"{i}_{method}"
        all_methods.append(alt_method)
        
        all_full_dfs.append(arg.full_df.rename(columns = {method : alt_method}))
        all_columns.append(set(arg.full_df.columns))
        
    shared_columns = list(set(primary_full_df.columns).intersection(*all_columns)) + ["gname"]
    #empty_list = [None]*max([len(df) for df in all_full_dfs])
    #empty_df = pd.DataFrame({col : None for col in shared_columns}) #.set_index("gname")
    
    # BY RESIDUAL
    #temp_bool_df = pd.concat([primary_full_df[method], alt_full_df[f"1_{method}"]], axis = 1)
    combined_bool_df = pd.concat([df[method] for df, method in zip(all_full_dfs, all_methods)], axis = 1)

    #combined_bool_df.drop(index = list(set(primary_full_df.index).difference(set(alt_full_df.index))), inplace = True)
    #temp_bool_df["minima"] = temp_bool_df.idxmin(axis = 1)
    combined_bool_df["minima"] = combined_bool_df.idxmin(axis = 1)
    
    #og_minima  = temp_bool_df.minima == method
    #alt_minima = temp_bool_df.minima == f"1_{method}"
    
    #og_success = temp_bool_df.index.isin(args[0].by_eye_success_df.index)
    #alt_success = temp_bool_df.index.isin(args[1].by_eye_success_df.index)
    #print(sum((og_minima & og_success) | (alt_minima & alt_success)))
        
    # By everything
    eval_str = " | ".join([f"all_full_dfs[{i}].success" for i in range(num_dfs)])
    # success_n | success_m
    combined_bool_df["by_sparcfire_score_success"] = eval(eval_str) #primary_full_df.success | alt_full_df.success # | combined_bool_df.residual_minima_success
    
    minima_conditions  = [combined_bool_df.minima == method for method in all_methods]
    success_conditions = [df.success for df in all_full_dfs]
    all_conditions     = zip(minima_conditions, success_conditions)
    
    list_o_conditions = [cond_set[0] & cond_set[1] for cond_set in all_conditions]
    eval_str = " | ".join([f"list_o_conditions[{i}]" for i in range(num_dfs)])
    # minima -> success_minima
    combined_bool_df["by_sparcfire_and_best_score_success"] = eval(eval_str)
    
    combined_bool_df["best_fit"] = combined_bool_df[combined_bool_df.by_sparcfire_score_success].minima.replace({f"{i}_{method}" : name for i, name in enumerate(df_names)})
    
    if incl_by_eye:
        
        # Use by eye success df to account for by eye subsets (and to shorten the array) rather than info in full_df
        by_eye_success_conditions = [combined_bool_df.index.isin(df.by_eye_success_df.index) for df in args]
        # Flatten
        all_by_eye_success_gnames     = list(set([gname for df in args for gname in df.by_eye_success_df.index]))
        #all_by_eye_not_success_gnames = list(set([gname for df in args for gname in df.by_eye_not_success_df.index]))
        all_by_eye_not_success_gnames = list(set([gname for df in args for gname in df.by_eye_not_success_df.index]))
        #by_sparcfire_success_cond = [combined_bool_df.by_sparcfire_success == method for method in all_methods]

        combined_bool_df["residual_success_by_eye"] = combined_bool_df.index.isin(all_by_eye_success_gnames) & combined_bool_df.by_sparcfire_score_success 
        
        all_conditions = zip(minima_conditions, by_eye_success_conditions)
        list_o_conditions = [cond_set[0] & cond_set[1] for cond_set in all_conditions]
        eval_str = " | ".join([f"list_o_conditions[{i}]" for i in range(num_dfs)])

        # How well does choosing the smallest residual score across all runs work in picking a successful fit
        # when compared with the by eye analysis?
        # minima -> (success_minima & by eye)
        combined_bool_df["residual_minima_success_by_eye"] = eval(eval_str)
        # print(sum((minima_conditions[0] & by_eye_success_conditions[0]) | (minima_conditions[1] & by_eye_success_conditions[1])))
        # print(sum(list_o_conditions[0] | list_o_conditions[1]))
        
        by_sparcfire_success_by_eye = combined_bool_df.index.isin(all_by_eye_success_gnames) & combined_bool_df.by_sparcfire_score_success
            
        # As with and including residual minima, but now include the sparcfire scoring
        # (minima -> [success_minima & by eye]) | ([success_m | success_n] & by eye)
        combined_bool_df["by_minima_or_sparcfire_success_by_eye"]  = by_sparcfire_success_by_eye | combined_bool_df.residual_minima_success_by_eye
        # Comment out this one because it's filtering both individually by eye rather than doing (minima | score) & by eye
        #combined_bool_df["by_minima_and_sparcfire_success_by_eye"] = by_sparcfire_success_by_eye & combined_bool_df.residual_minima_success_by_eye
    
        # TODO: Show % in both
        # by eye success for all labeled by df
        best_fit_str_dict = {m : f"df_{i}" for i, m in enumerate(all_methods)}
        combined_bool_df["best_fit_by_eye"] = None

        for gname, row in combined_bool_df.iterrows():
            best_method = [
                (m, full_df.loc[gname, "by_eye_success"]) 
                for m, full_df in zip(all_methods, all_full_dfs)
                if gname in full_df.index and full_df.loc[gname, "by_eye_success"]
            ]

            if len(best_method) > 1:
                best_method = [(row.minima, None)]

            elif not best_method:
                best_method = [(None, None)]

            if not combined_bool_df.loc[gname, "best_fit_by_eye"]:
                combined_bool_df.loc[gname, "best_fit_by_eye"] = best_fit_str_dict.get(best_method[0][0], None)

        #eval_str = " | ".join([f"all_full_dfs[{i}].by_eye_success" for i, _ in enumerate(all_full_dfs)])
        #combined_bool_df["by_eye_success"] = eval(eval_str) #primary_full_df.by_eye_success | alt_full_df.by_eye_success
        combined_bool_df["by_eye_success"] = False | combined_bool_df.best_fit_by_eye.str.contains("df")
    
    if verbose:
        print(f"Total success by combining SpArcFiRe + score: {sum(combined_bool_df.by_sparcfire_score_success)}/{total_galaxies}")
        print(f"i.e. success_n | success_m | ...")
        print()
        print(f"Total success by combining SpArcFiRe + best score: {sum(combined_bool_df.by_sparcfire_and_best_score_success)}/{total_galaxies}")
        print(f"i.e. minima -> success_minima")
        print(mini_sep)
        if incl_by_eye:
            print("Checking against the by eye determination...")
            _total_galaxies = total_galaxies
            if df_names:
                if len(df_names) != num_dfs: 
                    print("Length of dataframe names supplied should be equal to the number of dataframes supplied.")
                    print("Leaving current convention in the dataframe (df_0, df_1, ..., df_n)")
                else:
                    combined_bool_df["best_fit_by_eye"]   = combined_bool_df.best_fit_by_eye.replace({f"df_{i}" : name for i, name in enumerate(df_names)})
            
            if by_eye_subset:
                _total_galaxies = by_eye_subset
                print("Using a subset of galaxies for the by eye determination...")
                
            print(f"Total success by eye: {sum(combined_bool_df.by_eye_success)}/{_total_galaxies}")
            total_by_eye = sum(combined_bool_df.by_eye_success)
            print()
            print(f"By eye captured by either score: {sum(combined_bool_df.residual_success_by_eye)}/{total_by_eye}")
            print(f"i.e. (success_m | success_n | ...) & by eye")
            print()
            print(f"By eye captured by best score: {sum(combined_bool_df.residual_minima_success_by_eye)}/{total_by_eye}")
            print(f"i.e. minima -> (success_minima & by eye)")
            print()
            print(f"By eye captured by SpArcFiRe or choosing best score between the two runs: {sum(combined_bool_df.by_minima_or_sparcfire_success_by_eye)}/{total_by_eye}")
            print(f"i.e. (minima -> [success_minima & by eye]) | ([success_m | success_n | ...] & by eye)")
            #print(f"By eye captured by SpArcFiRe and choosing best score between the two runs: {sum(combined_bool_df.by_minima_and_sparcfire_success_by_eye)}/{total_by_eye}")
            print(mini_sep)

            bss  = set(combined_bool_df[combined_bool_df.by_sparcfire_score_success].index)
            #bss  = set(combined_bool_df[bss.isin(all_by_eye_success_gnames)].index)
            TP   = all_by_eye_success_gnames
            #TP   = set(combined_bool_df[combined_bool_df["by_eye_success"]].index)
            
            #bsns  = ~combined_bool_df.by_sparcfire_score_success
            bsns = combined_bool_df[~combined_bool_df.by_sparcfire_score_success].index
            #bsns = bsns.index
            #bsns = set(combined_bool_df[bsns.isin(all_by_eye_not_success_gnames)].index)
            
            # Exclude the ones found in the success galaxies because some runs may find success where the others didn't
            TN =  set(all_by_eye_not_success_gnames).difference(set(all_by_eye_success_gnames))
            assert len(TP) + len(TN) == _total_galaxies, f"True positive and true negative don't add up to {_total_galaxies}!"
            #TN   = combined_bool_df[~combined_bool_df["by_eye_success"]].index
            #TN   = set(TN[TN.isin(all_by_eye_not_success_gnames)])

            FP   = bss.intersection(TN)
            FN   = bsns.intersection(TP)

            sparc_positive = bss.intersection(TP)
            sparc_negative = bsns.intersection(TN)
            fraction = len(sparc_positive)/sum(combined_bool_df.by_eye_success)
            
            combined_bool_by_eye_not_success = ~combined_bool_df.by_eye_success
            denom = combined_bool_by_eye_not_success[combined_bool_by_eye_not_success.index.isin(all_by_eye_not_success_gnames)]
            neg_fraction = len(sparc_negative)/sum(denom)
            # FPR = FP/(FP + TN)
            # FNR = FN/(FN + TP)
            # TODO WTF
            print(f"By eye success found by SpArcFiRe + score:  {len(sparc_positive)}/{sum(combined_bool_df.by_eye_success)} = {100*fraction:.2f}%")
            print(f"By eye not success found by SpArcFiRe + score:  {len(sparc_negative)}/{sum(denom)} = {100*neg_fraction:.2f}%")
            
            FP_rate = f"{len(FP)} / ({len(FP)} + {len(TN)})"
            FN_rate = f"{len(FN)} / ({len(FN)} + {len(TP)})"

            print()
            print(f"False positive rate (by eye) -- {FP_rate} = {100*eval(FP_rate):.2f}%")
            print(f"False negative rate (by eye) -- {FN_rate} = {100*eval(FN_rate):.2f}%")
            
            # TODO: GENERATE CONFUSION MATRIX
            #print()
            #print(f"Confusion matrix")
            #print()
            #print()
    
    
    _ = [full_df.rename(columns = {f"{i}_{method}" : method}, inplace = True) for i, full_df in enumerate(all_full_dfs[1:])]
    
    combined_full_df = pd.concat([full_df for full_df in all_full_dfs])
    
    combined_success_df = pd.concat(
                full_df.loc[combined_bool_df[combined_bool_df.best_fit == name].index, :] 
                for name, full_df in zip(df_names, all_full_dfs)
            )
    combined_success_df.groupby(combined_success_df.columns, axis = 1).agg(lambda x: x.dropna)
            
    combined_by_eye_success_df = None
    if incl_by_eye:
        # Get index, i.e. galaxy name from choosing the best fit then feed that into the full_dfs in all_full_dfs via loc
        # to grab the row
        combined_by_eye_success_df = pd.concat(
            full_df.loc[combined_bool_df[combined_bool_df.best_fit_by_eye == name].index, :] 
            for name, full_df in zip(df_names, all_full_dfs)
        )
        # Merge duplicated columns
        combined_by_eye_success_df.groupby(combined_by_eye_success_df.columns, axis = 1).agg(lambda x: x.dropna)
    
    return combined_results_nt(combined_bool_df, combined_full_df, combined_success_df, combined_by_eye_success_df)

In [27]:
# combined_bool_df = combine_multi_run_results(
#             "nmr_x_1-p",
#             galaxy_set_1000_results["1000_NC2"],
#             galaxy_set_1000_results["1000_NC3"],
#             df_names      = ["1000_NC2", "1000_NC3"],
#             incl_by_eye   = True,
#             #by_ebye_subset = 1000
#         )

In [182]:
def create_ecdf(x, runname, df, dict_o_kwargs):
    
    # for key, default in exec_kwargs().items():
    #     exec(f"{key} = kwargs.get({key}, {default})")
    # Yeesh https://stackoverflow.com/a/67367191
    # Use frame(3) since these functions are nested
    sys._getframe(4).f_locals.update(dict_o_kwargs)

    fig = px.ecdf(df,
                  x = x,
                  markers = True, 
                  lines = False, 
                  marginal = "histogram",
                  ecdfnorm = None,
                  log_x    = log_x,
                  log_y    = log_y
                 ) 

    if add_vline:
        fig.add_vline(x = cutoff_val, 
                      row = 1,
                      line_color = "cyan",
                      annotation_text= f"{cutoff_val}", 
                      annotation_position="bottom")

    if add_hline:
        yval = sum(df.loc[:, x] < cutoff_val)
        fig.add_hline(y = yval, 
                      row = 1,
                      col = 1,
                      line_color = "magenta",
                      annotation_text=f"{yval}",
                      annotation_position="bottom left"
                     )
        
    return fig

In [183]:
def create_scatter(x, runname, df, dict_o_kwargs):
    
    sys._getframe(4).f_locals.update(dict_o_kwargs)
        
    fig = px.scatter(df, 
               x = x, 
               y = y, 
               color = color,
               color_continuous_scale = "Agsunset",
                    )
    
    return fig

In [184]:
def create_histogram(x, runname, df, dict_o_kwargs, multi):
    
    sys._getframe(4).f_locals.update(dict_o_kwargs)
    
    fig = px.histogram(
        df,
        x                       = x,
        color                   = color,
        color_discrete_sequence = color_discrete_sequence,
        histnorm                = histnorm,
        facet_col               = facet_col,
        facet_row               = facet_row,
        nbins                   = nbins,
        #hover_data = {'Galaxy ID': (":c", full_df.index)},
        log_x                   = log_x,
        log_y                   = log_y,
    )
    
    if facet_col or facet_row:
        fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))

    if multi:
        fig.update_layout(barmode = "overlay")
        fig.update_traces(opacity = 0.75)
        
    return fig

In [394]:
def create_plot(
    x, 
    runname, 
    plot_type, 
    df, 
    output_image_dir = "for_paper_images", 
    **kwargs
):
        
    dict_o_kwargs = {
        "y"               : None,
        "color"           : None,
        "color_discrete_sequence" : None,
        
        "xaxis_title"     : "",
        "yaxis_title"     : "",
        
        "xaxis_range"     : None,
        "yaxis_range"     : None,
        
        "log_x"           : False,
        "log_y"           : False,
        
        "histnorm"        : "",
        "facet_col"       : None,
        "facet_row"       : None,
        
        "nbins"           : 0,
        
        "cutoff_val"      : 0.007,
        "add_vline"       : True,
        "add_hline"       : True,
        
        "title"           : "",
        "title_x"         : 0.9,
        "title_y"         : 0.5,

        "output_image_dir" : output_image_dir,
        "filetype"        : "png",
        "show"            : True,
        "write"           : True
    }
    
    # Updating with kwargs
    dict_o_kwargs = {key : kwargs.get(key, default) for key, default in dict_o_kwargs.items()}
    
    plt.clf()
    if plot_type == "ecdf":
        fig = create_ecdf(x, runname, df, dict_o_kwargs)
    elif plot_type == "scatter":
        fig = create_scatter(x, runname, df, dict_o_kwargs)
    elif plot_type == "histogram":
        multi = kwargs.get("multi", True)
        fig = create_histogram(x, runname, df, dict_o_kwargs, multi)
    else:
        return
    
    sys._getframe(3).f_locals.update(dict_o_kwargs)
    
    if title:
        fig.update_layout(
            title_text = title, 
            title_x    = title_x, 
            title_y    = title_y
        )
        
    if xaxis_title:
        fig.update_layout(xaxis_title = xaxis_title)
    if yaxis_title:
        fig.update_layout(yaxis_title = yaxis_title)
        
    if xaxis_range:
        fig.update_layout(xaxis_range = xaxis_range)
    if yaxis_range:
        fig.update_layout(yaxis_range = yaxis_range)
    
    if show:
        fig.show()
        
    height           = kwargs.get("height", 800)
    width_multiplier = kwargs.get("width_multiplier", 1.5) #1200
    
    if write:
        fig.write_image(
            f"{output_image_dir}/{plot_type}_{runname}_{x}.{filetype}", 
            height = height, width = height*width_multiplier
        )
        
    fig.data   = []
    fig.layout = {}
    
    #return fig

In [28]:
def create_all_plots(
    df_container,
    method, 
    basename, 
    output_image_dir,
    **kwargs
):
    
    incl_by_eye = kwargs.get("incl_by_eye", True)
    show        = kwargs.get("show", True)
    write       = kwargs.get("write", True)
        
    # Use a cutoff because there tends to be some extremely high values which skew the plot
    plot_df = df_container.full_df[df_container.full_df.loc[:, method] < kwargs.get("score_ecdf_cutoff", 0.015)]

# ============================================================================================================================================================
# FULL ECDF
# ============================================================================================================================================================

    _ = create_plot(
        x                = method,
        runname          = basename,
        plot_type        = "ecdf",
        df               = plot_df,
        output_image_dir = output_image_dir,
        xaxis_title      = method, #"KStest+NMR",
        # title       = f"1000 galaxies: ECDF for KStest+NMR on all models",
        # title_y     = 0.92
        cutoff_val       = kwargs.get("residual_cutoff_val", 0.007),
        show             = show,
        write            = write
    )
    
# ============================================================================================================================================================
# ECDF OF BY EYE SCORE
# ============================================================================================================================================================
    
    if incl_by_eye:
        _ = create_plot(
            x                = method,
            runname          = f"{basename}_by-eye",
            plot_type        = "ecdf",
            df               = df_container.by_eye_success_df,
            output_image_dir = output_image_dir,
            xaxis_title      = method, #"KStest+NMR",
            add_hline        = False,
            # title       = f"1000 galaxies: ECDF for KStest+NMR on by-eye successful model fits",
            # title_y     = 0.92
            show             = show,
            write            = write
        )
    
# ============================================================================================================================================================
# SERSIC INDEX HISTOGRAMS
# ============================================================================================================================================================

    x1   = "sersic_index_sersic_1"
    x2   = "sersic_index_sersic_2"
    x    = "n"
    fcol = "domain"

    plot_df = df_container.success_df[[x1,x2]].rename(columns = {x1 : "sersic_1", x2 : "sersic_2"})
    plot_df = pd.melt(plot_df).rename(columns = {"value" : x, "variable" : "component"})
    plot_df[fcol] = "success"

    plot_df1 = df_container.full_df[[x1,x2]].rename(columns = {x1 : "sersic_1", x2 : "sersic_2"})
    plot_df1 = pd.melt(plot_df1).rename(columns = {"value" : x, "variable" : "component"})
    plot_df1[fcol] = "all models"
    
    to_concat = [plot_df, plot_df1]
    runname   = f"{basename}_success-vs-all"
    
    if incl_by_eye:
        # By eye success
        plot_df2 = df_container.by_eye_success_df[[x1,x2]].rename(columns = {x1 : "sersic_1", x2 : "sersic_2"})
        plot_df2 = pd.melt(plot_df2).rename(columns   = {"value" : x, "variable" : "component"})
        plot_df2[fcol] = "by-eye success"
        to_concat.append(plot_df2)
        runname   = f"{basename}_by-eye-vs-success-vs-all"

    plot_df = pd.concat(to_concat, axis = 0)

    # Bulge -- Redder
    color_1 = px.colors.qualitative.Plotly[1]
    # Disk -- Bluer
    color_2 = px.colors.qualitative.Plotly[0]
    colors = [color_1, color_2]
    
    _ = create_plot(
        x                       = x,
        runname                 = runname,
        plot_type               = "histogram",
        df                      = plot_df,
        output_image_dir        = output_image_dir,
        histnorm                = "probability",
        color                   = "component",
        color_discrete_sequence = colors,
        nbins                   = 40,
        facet_col               = fcol,
        xaxis_range             = kwargs.get("xaxis_range_sersic_hist", None), # [10, 20],
        yaxis_range             = kwargs.get("yaxis_range_sersic_hist", None), # [0, 0.15],
        # title       = f"{runname} galaxies: distribution of magnitudes for by-eye successful models"
        # title_y     = 0.85
        show                    = show,
        write                   = write
    )

    # _ = create_plot(
    #     x           = x,
    #     runname     = "1000_by-eye",
    #     plot_type   = "histogram",
    #     df          = plot_df,
    #     color       = "component",
    #     # title       = f"{runname} galaxies: distribution of Sérsic indices for by-eye successful models"
    #     # title_y     = 0.85
    #     show        = show
    # )

    # # All results
    # plot_df = full_df[[x1,x2]].rename(columns = {x1 : "sersic_1", x2 : "sersic_2"})
    # plot_df = pd.melt(plot_df).rename(columns = {"value" : x, "variable" : "component"})

    # _ = create_plot(
    #     x           = x,
    #     runname     = "1000",
    #     plot_type   = "histogram",
    #     df          = plot_df,
    #     color       = "component",
    #     # title       = f"{runname} galaxies: distribution of Sérsic indices for by-eye successful models"
    #     # title_y     = 0.85
    #     show        = show
    # )
    
# ============================================================================================================================================================
# MAGNITUDE HISTOGRAMS 
# ============================================================================================================================================================
    
    # TODO: Combine these into one plot either using facet or go
    # https://plotly.com/python/subplots/
    # or https://plotly.com/python/facet-plots/

    x1   = "magnitude_sersic_1"
    x2   = "magnitude_sersic_2"
    x    = "m"
    fcol = "domain"
    
    # TODO: BULGE SHOULD BE RED, DISK SHOULD BE BLUE
    plot_df = df_container.success_df[[x1,x2]].rename(columns = {x1 : "sersic_1", x2 : "sersic_2"})
    plot_df = pd.melt(plot_df).rename(columns = {"value" : x, "variable" : "component"})
    plot_df[fcol] = "success"

    plot_df1 = df_container.full_df[[x1,x2]].rename(columns = {x1 : "sersic_1", x2 : "sersic_2"})
    plot_df1 = pd.melt(plot_df1).rename(columns = {"value" : x, "variable" : "component"})
    plot_df1[fcol] = "all models"
    
    to_concat = [plot_df, plot_df1]
    runname   = f"{basename}_success-vs-all"
    
    if incl_by_eye:
        # By eye success
        plot_df2 = df_container.by_eye_success_df[[x1,x2]].rename(columns = {x1 : "sersic_1", x2 : "sersic_2"})
        plot_df2 = pd.melt(plot_df2).rename(columns   = {"value" : x, "variable" : "component"})
        plot_df2[fcol] = "by-eye success"
        to_concat.append(plot_df2)
        runname   = f"{basename}_by-eye-vs-success-vs-all"
        
    plot_df = pd.concat(to_concat, axis = 0)

    # Bulge -- Redder
    color_1 = px.colors.qualitative.Plotly[1]
    # Disk -- Bluer
    color_2 = px.colors.qualitative.Plotly[0]
    colors = [color_1, color_2]
    
    _ = create_plot(
        x                       = x,
        runname                 = runname,
        plot_type               = "histogram",
        df                      = plot_df,
        output_image_dir        = output_image_dir,
        histnorm                = "probability",
        color                   = "component",
        color_discrete_sequence = colors,
        facet_col               = fcol,
        xaxis_range             = kwargs.get("xaxis_range_mag_hist", None), # [10, 20],
        yaxis_range             = kwargs.get("yaxis_range_mag_hist", None), # [0, 0.15],
        # title       = f"{runname} galaxies: distribution of magnitudes for by-eye successful models"
        # title_y     = 0.85
        show                    = show,
        write                   = write
    )

    # By eye
    # figure1 = create_plot(
    #     x           = x,
    #     runname     = "1000_by-eye",
    #     plot_type   = "histogram",
    #     df          = plot_df,
    #     color       = "component",
    #     xaxis_range = [10, 17],
    #     # title       = f"{runname} galaxies: distribution of magnitudes for by-eye successful models"
    #     # title_y     = 0.85
    #     show        = show,
    #     write       = False
    # )

    # # All results
    # plot_df = full_df[[x1,x2]].rename(columns = {x1 : "sersic_1", x2 : "sersic_2"})
    # plot_df = pd.melt(plot_df).rename(columns = {"value" : x, "variable" : "component"})

    # figure2 = create_plot(
    #     x           = x,
    #     runname     = "1000",
    #     plot_type   = "histogram",
    #     df          = plot_df,
    #     color       = "component",
    #     xaxis_range = [10, 18],
    #     # title       = f"{runname} galaxies: distribution of magnitudes for by-eye successful models"
    #     # title_y     = 0.85
    #     show        = show,
    #     write       = False
    # )
    
# ============================================================================================================================================================
# ALEN HISTOGRAM
# ============================================================================================================================================================
    _ = create_plot(
        x                = "alen_ratio",
        runname          = basename,
        plot_type        = "histogram",
        df               = df_container.full_df,
        output_image_dir = output_image_dir,
        xaxis_title      = "alen ratio",
        multi            = False,
        # title       = f"{runname} galaxies: distribution of alen ratios for all models"
        # title_y     = 0.85
        show             = show,
        write            = write
    )
    
# ============================================================================================================================================================
# SCATTER OF PITCH ANGLE DIFFERENCES
# ============================================================================================================================================================
    
    x     = "observation"
    y     = "model"
    color = "difference"

    pre_pa  = df_container.full_df[["pre_pa1" , "pre_pa2"]].mean(axis = 1)
    post_pa = df_container.full_df[["post_pa1", "post_pa2"]].mean(axis = 1)
    plot_df = pd.concat([pre_pa, post_pa], axis = 1).rename(columns = {0 : x, 1 : y})
    plot_df[color] = abs(plot_df[x] - plot_df[y])

    _ = create_plot(
        x                = x,
        y                = y,
        runname          = f"{basename}_pa_diff",
        plot_type        = "scatter",
        df               = plot_df,
        output_image_dir = output_image_dir,
        color            = color,
        width_multiplier = 1,
        # title     = "Pitch angle difference reported by SpArcFiRe, model vs observation"
        # title_y   = 0.85
        show             = show,
        write            = write
    )
    
# ============================================================================================================================================================
# SCATTER OF PITCH ANGLE DIFFERENCES
# ============================================================================================================================================================

    _ = create_plot(
        x                = "pa_diff_galaxy",
        runname          = basename,
        plot_type        = "ecdf",
        df               = df_container.full_df,
        output_image_dir = output_image_dir,
        xaxis_title      = "Pitch Angle Difference (deg)",
        cutoff_val       = kwargs.get("pa_cutoff_val", 10),
        # title       = f"ECDF of pitch angle difference reported by SpArcFiRe, model vs observation"
        # title_y     = 0.85
        show             = show,
        write            = write
    )

In [473]:
def create_quantiles(
    out_dir, 
    df, 
    method,
    **kwargs
):
    print_latex = kwargs.get("print_latex", True)
    copy_png    = kwargs.get("copy_png", False)
    
    # Just in case
    df.sort_values(by = method, inplace = True)

    # Expect that if there exists more than one runname,
    # then we're working with combined ata
    runnames = list(set(df.runname))
    if len(runnames) > 1:
        prefixes = list(set([i.split("_")[0] for i in runnames]))
        if len(prefixes) == 1:
            runname = f"{prefixes[0]}_combined"
        else:
            runname = "combined"
    else:
        runname = runnames[0]
        
    success_dir = pj(out_dir, runname, f'{runname}_galfit_png')
    print_latex_file = pj(out_dir, runname, f"{runname}_for_latex.txt")
    
    if not exists(success_dir):
        os.makedirs(success_dir)
    
    quantile           = ["0", "20", "40", "60", "80"]
    quantiled_galaxies = []
    
    print_latex_all = []
    if print_latex:
        
        if exists(print_latex_file):
            print("Deleting old latex output file...")
            os.remove(print_latex_file)
            
        print(f"Writing latex to file {print_latex_file}")
    
    for q in quantile:
        #vprint(print_latex, f"{q} &")
        print_latex_all.append(f"{q} &")
        
        if copy_png:
            quantile_dir = pj(success_dir, f"{runname}_all_quantile", f"quantile_{q}")
            if exists(quantile_dir):
                shutil.rmtree(quantile_dir)
            os.makedirs(quantile_dir)
              
        interp_df = df[method][df[method] >= df[method].quantile(0.01*float(q), interpolation='lower')]
        for count, (index, value) in enumerate(interp_df.items()):
            #if count < 5:
            #    continue
            if count == 8:
                break

            gname = index
            #print(q, i)
            #vprint(print_latex, f"{initial_str}{gname + '_combined.png'}{end_str}")
            rname = df.loc[index, "runname"]
            
            temp_str    = f"images/{rname}/{rname}_all_quantile/quantile_"
            initial_str = f"    \includegraphics[height=0.18\\textheight]{{{temp_str}{q}/"
            
            end_str = "} &"
            if count == 7 or count == len(interp_df) - 1:
                end_str = "} \\\\"
                
            print_latex_all.append(f"{initial_str}{gname + '_combined.png'}{end_str}")

            if copy_png:
                png_dir = pj(out_dir, rname, f'{rname}_galfit_png')
                shutil.copy(pj(png_dir, f"{gname}_combined.png"), quantile_dir)

            quantiled_galaxies.append(gname)
                
            #sp(f"cp {pj(out_dir, 'by_eye_success', gname + '_combined.png')} {pj(success_dir, 'all_quantile', 'quantile_' + q)}")
            
    if print_latex:           
        with open(print_latex_file, "w") as plf:
            plf.write("\n".join(print_latex_all))
            plf.write("\n")
            
    if copy_png:
        # Tar it all up!
        sp(f"tar -czvf {pj(out_dir, runname, runname)}_all_quantile.tar.gz -C {success_dir} {runname}_all_quantile")
        
    return quantiled_galaxies

In [474]:
def fprint(input_str, fill_char = "*", fill_len = 100):
    input_str = f" {input_str} "
    print()
    print(f"{input_str:{fill_char}^{fill_len}}")
    print()

In [475]:
def main(
    run_path, 
    *basenames, 
    **kwargs
):
    # Set some path variables and things
    run_path = run_path
    
    if in_notebook():
        run_path = run_path.replace("ics-home", "portmanm")

    in_dir  = kwargs.get("in_dir", pj(run_path, "sparcfire-in"))
    out_dir = kwargs.get("tmp_dir", pj(run_path, "sparcfire-out"))
    tmp_dir = kwargs.get("out_dir", pj(run_path, "sparcfire-tmp"))

    output_image_dir = kwargs.get("output_image_dir", pj(run_path, "for_paper_images"))
    if not exists(output_image_dir):
        os.makedirs(output_image_dir)
        
    method          = kwargs.get("method", "nmr_x_1-p")
    nmr             = "norm_masked_residual"
    
    global total_galaxies
    total_galaxies = get_total_galaxies(in_dir = in_dir, out_dir = out_dir)
    
    # FUNCTIONS OPTIONS
    incl_by_eye   = kwargs.get("incl_by_eye", False)
    by_eye_subset = kwargs.get("by_eye_subset", False)
    write         = kwargs.get("write", False)
    show          = kwargs.get("show", False)
    print_latex   = kwargs.get("print_latex", True)
    copy_png      = kwargs.get("copy_png", True)
    
    # Getting ready
    all_results  = {}
    plot_options = kwargs.get("plot_options", {bname : {} for bname in basenames})
    
    # LOOPING THROUGH NAMES GIVEN FOR ANALYSIS
    for basename in basenames:
        # RESIDUAL ANALYSIS
        fprint(f"PERFORMING RESIDUAL ANALYSIS FOR {basename}")
        analysis_results  = residual_analysis(
            in_dir              = in_dir, 
            out_dir             = out_dir, 
            basename            = basename,
            method              = method,
            incl_by_eye         = incl_by_eye,
            by_eye_subset       = by_eye_subset,
            pa_cutoff_val       = kwargs.get("pa_cutoff_val", 10),
            residual_cutoff_val = kwargs.get("residual_cutoff_val", 0.5),
            alen_cutoff_val     = kwargs.get("alen_cutoff_val", 0.007)
        )
        
        # Collating
        all_results[basename] = analysis_results
        
        if write or show:
            # OUTPUTTING PLOTS
            fprint("CREATING PLOTS")
            _ = create_all_plots(
                analysis_results, 
                method, 
                basename, 
                output_image_dir, 
                incl_by_eye = incl_by_eye,
                show        = show,
                pa_cutoff_val       = kwargs.get("pa_cutoff_val", 10),
                residual_cutoff_val = kwargs.get("residual_cutoff_val", 0.5),
                #alen_cutoff_val     = kwargs.get("alen_cutoff_val", 0.007)
                **plot_options[basename]
                #xaxis_range_mag_hist = [10, 20],
                #yaxis_range_mag_hist = [0, 0.15]
            )

        if print_latex or copy_png:
            if incl_by_eye:
                quantile_df = analysis_results.by_eye_success_df
            else:
                quantile_df = analysis_results.success_df

            fprint("QUANTILING IMAGES FROM RESULTS")
            galaxy_set_q = create_quantiles(
                out_dir, 
                #basename, 
                quantile_df,
                method,
                **kwargs
                # print_latex = print_latex, 
                # copy_png = copy_png
            )
        
        # Unfortunately have to do this after and have the user generate the pngs from here
        # in order to rerun create_quantiles
        if kwargs.get("prep_for_quantile", False):
            fprint("JUST KIDDING, EXTRACTING QUANTILED MODELS TO BE CONVERTED TO PNG")
            
            to_untar = ' '.join([f"./{gname}_galfit_out.fits" for gname in galaxy_set_q])
            tar_file = f"{pj(out_dir, basename, basename)}_galfits.tar.gz"
            sp(f"tar -xzvf {tar_file} --occurrence {to_untar}")

            _ = [shutil.move(f"{gname}_galfit_out.fits", f"{pj(out_dir, basename, basename)}_galfits")
                 for gname in galaxy_set_q
                ]
            
            print(f"Please generate the pngs corresponding with the fits in the {pj(out_dir, basename, basename)}_galfits directory.")
            print("You may then proceed to run the 'create_quantiles' function again with copy_png set to True.")
    
    if len(basenames) > 1:
        fprint("COMBINING RESULTS FROM ALL RUNS FED IN")
        combined = combine_multi_run_results(
            method,
            *all_results.values(),
            df_names      = basenames,
            incl_by_eye   = incl_by_eye,
            by_eye_subset = by_eye_subset
        )
        
        prefixes = list(set([i.split("_")[0] for i in basenames]))
        if len(prefixes) == 1:
            new_basename = f"{prefixes[0]}_combined"
        else:
            new_basename = "combined"
        
        all_results[new_basename] = combined
        
        if write or show:
            try:
                # OUTPUTTING PLOTS
                fprint("CREATING PLOTS")
                _ = create_all_plots(
                    combined, 
                    method, 
                    new_basename, 
                    output_image_dir, 
                    incl_by_eye = incl_by_eye,
                    show        = show,
                    pa_cutoff_val       = kwargs.get("pa_cutoff_val", 10),
                    residual_cutoff_val = kwargs.get("residual_cutoff_val", 0.5),
                    **plot_options[new_basename]
                    #xaxis_range_mag_hist = [10, 20],
                    #yaxis_range_mag_hist = [0, 0.15]
                )
            except KeyError as ke:
                print(f"Were plot options specified with the correct combined basename, {new_basename}?")
                print("Proceeding without plot options.")
                _ = create_all_plots(
                    combined, 
                    method, 
                    new_basename, 
                    output_image_dir, 
                    incl_by_eye = incl_by_eye,
                    show        = show,
                    pa_cutoff_val       = kwargs.get("pa_cutoff_val", 10),
                    residual_cutoff_val = kwargs.get("residual_cutoff_val", 0.5),
                    #**plot_options[new_basename]
                    #xaxis_range_mag_hist = [10, 20],
                    #yaxis_range_mag_hist = [0, 0.15]
                )

        if print_latex or copy_png:
            if incl_by_eye:
                quantile_df = combined.by_eye_success_df
            else:
                quantile_df = combined.success_df

            fprint("QUANTILING IMAGES FROM RESULTS")
            galaxy_set_q = create_quantiles(
                out_dir, 
                #basename, 
                quantile_df,
                method,
                **kwargs
                # print_latex = print_latex, 
                # copy_png = copy_png
            )
        
        # Unfortunately have to do this after and have the user generate the pngs from here
        # in order to rerun create_quantiles
        if kwargs.get("prep_for_quantile", False):
            fprint("JUST KIDDING, EXTRACTING QUANTILED MODELS TO BE CONVERTED TO PNG")
            
            to_untar = ' '.join([f"./{gname}_galfit_out.fits" for gname in galaxy_set_q])
            tar_file = f"{pj(out_dir, new_basename, new_basename)}_galfits.tar.gz"
            sp(f"tar -xzvf {tar_file} --occurrence {to_untar}")

            _ = [shutil.move(f"{gname}_galfit_out.fits", f"{pj(out_dir, new_basename, new_basename)}_galfits")
                 for gname in galaxy_set_q
                ]
            
            print(f"Please generate the pngs corresponding with the fits in the {pj(out_dir, basename, basename)}_galfits directory.")
            print("You may then proceed to run the 'create_quantiles' function again with copy_png set to True.")
        
    fprint("DONE!!!")
    
    # combined_bool_df only if applicable
    # {basename : namedtuple (fields below), "combined_bool_df" : combined_bool_df}
    # full_df, success_df, not_success_df, by_eye_success_df, by_eye_not_success_df
    return all_results
    

In [483]:
if __name__ == "__main__":
    galaxy_set_14_results = main(
        "testing_python_control", 
        "14_NC2", 
        "14_NC3",
        incl_by_eye = True,
        write       = True,
        copy_png    = True,
        print_latex = True
    )


***************************** PERFORMING RESIDUAL ANALYSIS FOR 14_NC2 ******************************

15 galaxy models generated.
14 models pass score cutoff.
13 pass pitch angle cutoff
13 pass arm length ratio cutoff
7 pass chiral agreement
7 or 50.00% (7/14) succeed by SpArcFiRe+Score
0/14 models failed reprocessing by SpArcFiRe

Number of *total* by eye successful galaxies
5   => 33.33%
Number of by eye successful galaxies that SpArcFiRe *could* process
5/5 => 100.00%

Number of *total* by eye not successful galaxies
9   => 60.00%
Number of by eye not successful galaxies that SpArcFiRe *could* process
9/9 => 100.00%

False positive rate (by eye) -- 2/(2 + 9) = 18.18%
False negative rate (by eye) -- 0/(0 + 5) = 0.00%

****************************************** CREATING PLOTS ******************************************





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




********************************** QUANTILING IMAGES FROM RESULTS **********************************

Deleting old latex output file...
Writing latex to file testing_python_control/sparcfire-out/14_NC2/14_NC2_for_latex.txt

***************************** PERFORMING RESIDUAL ANALYSIS FOR 14_NC3 ******************************

15 galaxy models generated.
14 models pass score cutoff.
13 pass pitch angle cutoff
14 pass arm length ratio cutoff
12 pass chiral agreement
12 or 85.71% (12/14) succeed by SpArcFiRe+Score
0/14 models failed reprocessing by SpArcFiRe

Number of *total* by eye successful galaxies
11    => 73.33%
Number of by eye successful galaxies that SpArcFiRe *could* process
11/11 => 100.00%

Number of *total* by eye not successful galaxies
3   => 20.00%
Number of by eye not successful galaxies that SpArcFiRe *could* process
3/3 => 100.00%

False positive rate (by eye) -- 3/(3 + 3) = 50.00%
False negative rate (by eye) -- 2/(2 + 11) = 15.38%

************************************



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




********************************** QUANTILING IMAGES FROM RESULTS **********************************

Deleting old latex output file...
Writing latex to file testing_python_control/sparcfire-out/14_NC3/14_NC3_for_latex.txt

****************************** COMBINING RESULTS FROM ALL RUNS FED IN ******************************

Joining 2 attempts...
Total success by combining SpArcFiRe + score: 12/14
i.e. success_n | success_m | ...

Total success by combining SpArcFiRe + best score: 12/14
i.e. minima -> success_minima


Checking against the by eye determination...
Total success by eye: 11/14

By eye captured by either score: 9/11
i.e. (success_m | success_n | ...) & by eye

By eye captured by best score: 10/11
i.e. minima -> (success_minima & by eye)

By eye captured by SpArcFiRe or choosing best score between the two runs: 10/11
i.e. (minima -> [success_minima & by eye]) | ([success_m | success_n | ...] & by eye)


By eye success found by SpArcFiRe + score:  9/11 = 81.82%
By eye not suc

<Figure size 640x480 with 0 Axes>

In [484]:
if __name__ == "__main__":
    
    # NC2
    # xaxis_range_mag_hist = [10, 20],
    # yaxis_range_mag_hist = [0, 0.15]
    
    # NC3
    # xaxis_range_mag_hist = [10, 22],
    # yaxis_range_mag_hist = [0, 0.3]
    
    plot_options = {
        "1000_NC2" : {
            "xaxis_range_mag_hist"    : [10, 20], #, "yaxis_range_mag_hist" : [0, 0.15]
            "xaxis_range_sersic_hist" : [0, 5],
            "yaxis_range_sersic_hist" : [0, 0.5]
        },
        "1000_NC3" : {
            "xaxis_range_mag_hist"    : [10, 22], #, "yaxis_range_mag_hist" : [0, 0.35]
            "xaxis_range_sersic_hist" : [0, 5],
            "yaxis_range_sersic_hist" : [0, 0.5]
        },
    }
    
    galaxy_set_elps_1000_results = main(
        "run13_for_paper", 
        "1000_NC2", 
        "1000_NC3",
        pa_cutoff_val       = 5,
        alen_cutoff_val     = 0.5,
        residual_cutoff_val = 0.007,
        incl_by_eye = True,
        write       = True,
        copy_png    = True,
        print_latex = True,
        plot_options = plot_options
    )
    


**************************** PERFORMING RESIDUAL ANALYSIS FOR 1000_NC2 *****************************

1000 galaxy models generated.
909 models pass score cutoff.
533 pass pitch angle cutoff
896 pass arm length ratio cutoff
708 pass chiral agreement
367 or 36.70% (367/1000) succeed by SpArcFiRe+Score
38/1000 models failed reprocessing by SpArcFiRe

Number of *total* by eye successful galaxies
406     => 40.60%
Number of by eye successful galaxies that SpArcFiRe *could* process
405/406 => 99.75%

Number of *total* by eye not successful galaxies
594     => 59.40%
Number of by eye not successful galaxies that SpArcFiRe *could* process
557/594 => 93.77%

False positive rate (by eye) -- 145/(145 + 594) = 19.62%
False negative rate (by eye) -- 184/(184 + 406) = 31.19%

****************************************** CREATING PLOTS ******************************************





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




********************************** QUANTILING IMAGES FROM RESULTS **********************************

Deleting old latex output file...
Writing latex to file run13_for_paper/sparcfire-out/1000_NC2/1000_NC2_for_latex.txt

**************************** PERFORMING RESIDUAL ANALYSIS FOR 1000_NC3 *****************************

1000 galaxy models generated.
896 models pass score cutoff.
550 pass pitch angle cutoff
861 pass arm length ratio cutoff
693 pass chiral agreement
335 or 33.50% (335/1000) succeed by SpArcFiRe+Score
37/1000 models failed reprocessing by SpArcFiRe

Number of *total* by eye successful galaxies
422     => 42.20%
Number of by eye successful galaxies that SpArcFiRe *could* process
418/422 => 99.05%

Number of *total* by eye not successful galaxies
578     => 57.80%
Number of by eye not successful galaxies that SpArcFiRe *could* process
545/578 => 94.29%

False positive rate (by eye) -- 116/(116 + 578) = 16.71%
False negative rate (by eye) -- 203/(203 + 422) = 32.48%

*****



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




********************************** QUANTILING IMAGES FROM RESULTS **********************************

Deleting old latex output file...
Writing latex to file run13_for_paper/sparcfire-out/1000_NC3/1000_NC3_for_latex.txt

****************************** COMBINING RESULTS FROM ALL RUNS FED IN ******************************

Joining 2 attempts...
Total success by combining SpArcFiRe + score: 520/1000
i.e. success_n | success_m | ...

Total success by combining SpArcFiRe + best score: 372/1000
i.e. minima -> success_minima


Checking against the by eye determination...
Total success by eye: 569/1000

By eye captured by either score: 371/569
i.e. (success_m | success_n | ...) & by eye

By eye captured by best score: 467/569
i.e. minima -> (success_minima & by eye)

By eye captured by SpArcFiRe or choosing best score between the two runs: 522/569
i.e. (minima -> [success_minima & by eye]) | ([success_m | success_n | ...] & by eye)


By eye success found by SpArcFiRe + score:  371/569 = 65.20

<Figure size 640x480 with 0 Axes>

In [None]:
galaxy_set_elps_1000_results["1000_combined"].full_df.run_name

In [129]:
if __name__ == "__main__":
    
    plot_options = {
        "29k_NC2" : {
            "xaxis_range_mag_hist" : [9, 25], #"yaxis_range_mag_hist" : [0, 0.15]
            "xaxis_range_sersic_hist" : [0, 5],
            "yaxis_range_sersic_hist" : [0, 0.5]
        },
        "29k_NC3" : {
            "xaxis_range_mag_hist"    : [9, 25], #"yaxis_range_mag_hist" : [0, 0.3]
            "xaxis_range_sersic_hist" : [0, 5],
            "yaxis_range_sersic_hist" : [0, 0.5]
        },
    }
    
    galaxy_set_29k_results = main(
        "29k_galaxies", 
        "29k_NC2", 
        "29k_NC3",
        plot_options  = plot_options,
        pa_cutoff_val   = 7,
        alen_cutoff_val = 0.7,
        write         = False,
        incl_by_eye   = True,
        by_eye_subset = 1000,
        copy_png      = False,
        print_latex   = False
    )


***************************** PERFORMING RESIDUAL ANALYSIS FOR 29k_NC2 *****************************

28912 galaxy models generated.
28900 models pass score cutoff.



Columns (70) have mixed types. Specify dtype option on import or set low_memory=False.



16836 pass pitch angle cutoff
14663 pass arm length ratio cutoff
17012 pass chiral agreement
7089 or 24.52% (7089/28911) succeed by SpArcFiRe+Score
1685/28912 models failed reprocessing by SpArcFiRe

Working on a subset of 1000 galaxies
Number of *total* by eye successful galaxies
358     => 35.80%
Number of by eye successful galaxies that SpArcFiRe *could* process
356/358 => 99.44%

Number of *total* by eye not successful galaxies
642     => 64.20%
Number of by eye not successful galaxies that SpArcFiRe *could* process
578/642 => 90.03%

False positive rate (by eye) -- 87/(87 + 642) = 11.93%
False negative rate (by eye) -- 202/(202 + 358) = 36.07%

***************************** PERFORMING RESIDUAL ANALYSIS FOR 29k_NC3 *****************************

28912 galaxy models generated.
28878 models pass score cutoff.



Columns (70) have mixed types. Specify dtype option on import or set low_memory=False.



18538 pass pitch angle cutoff
14899 pass arm length ratio cutoff
18358 pass chiral agreement
8149 or 28.19% (8149/28904) succeed by SpArcFiRe+Score
1241/28912 models failed reprocessing by SpArcFiRe

Working on a subset of 1000 galaxies
Number of *total* by eye successful galaxies
461     => 46.10%
Number of by eye successful galaxies that SpArcFiRe *could* process
459/461 => 99.57%

Number of *total* by eye not successful galaxies
539     => 53.90%
Number of by eye not successful galaxies that SpArcFiRe *could* process
498/539 => 92.39%

False positive rate (by eye) -- 79/(79 + 539) = 12.78%
False negative rate (by eye) -- 263/(263 + 461) = 36.33%

****************************** COMBINING RESULTS FROM ALL RUNS FED IN ******************************

Joining 2 attempts...
Total success by combining SpArcFiRe + score: 12521/28912
i.e. success_n | success_m | ... 

Total success by combining SpArcFiRe + best score: 8036/28912
i.e. minima -> success_minima


Checking against the by eye det

In [517]:
galaxy_set_29k_results["29k_NC3"].full_df.columns

Index(['magnitude_sersic_1', 'effective_radius_sersic_1',
       'sersic_index_sersic_1', 'axis_ratio_sersic_1',
       'position_angle_sersic_1', 'position_x_sersic_1', 'position_y_sersic_1',
       'skip_sersic_1', 'magnitude_sersic_2', 'effective_radius_sersic_2',
       'sersic_index_sersic_2', 'axis_ratio_sersic_2',
       'position_angle_sersic_2', 'position_x_sersic_2', 'position_y_sersic_2',
       'skip_sersic_2', 'magnitude_sersic_3', 'effective_radius_sersic_3',
       'sersic_index_sersic_3', 'axis_ratio_sersic_3',
       'position_angle_sersic_3', 'position_x_sersic_3', 'position_y_sersic_3',
       'skip_sersic_3', 'inner_rad_power_3', 'outer_rad_power_3',
       'cumul_rot_power_3', 'powerlaw_index_power_3', 'inclination_power_3',
       'sky_position_angle_power_3', 'F1_amplitude_fourier_3',
       'F1_phase_angle_fourier_3', 'F3_amplitude_fourier_3',
       'F3_phase_angle_fourier_3', 'skip_fourier_3', 'sky_background_sky_4',
       'dsky_dx_sky_4', 'dsky_dy_sky_4', 's

In [518]:
df_to_evaluate = galaxy_set_29k_results["29k_NC2"]
pre_pa  = df_to_evaluate.full_df[["pre_pa1" , "pre_pa2"]].mean(axis = 1).dropna()
post_pa = df_to_evaluate.full_df[["post_pa1", "post_pa2"]].mean(axis = 1).dropna()

In [519]:
combined_index = list(set(pre_pa.index).intersection(set(post_pa.index)))
post_pa = post_pa[combined_index]
pre_pa  = pre_pa[combined_index]

In [498]:
print("29k NC2")
print(pearsonr(pre_pa,post_pa))

29k NC2
PearsonRResult(statistic=0.006045526757101547, pvalue=0.3399548475956398)


In [499]:
df_to_evaluate = galaxy_set_29k_results["29k_NC3"]
pre_pa  = df_to_evaluate.full_df[["pre_pa1" , "pre_pa2"]].mean(axis = 1).dropna()
post_pa = df_to_evaluate.full_df[["post_pa1", "post_pa2"]].mean(axis = 1).dropna()

In [500]:
combined_index = list(set(pre_pa.index).intersection(set(post_pa.index)))
post_pa = post_pa[combined_index]
pre_pa  = pre_pa[combined_index]

In [501]:
print("29k NC3")
print(pearsonr(pre_pa,post_pa))

29k NC3
PearsonRResult(statistic=0.0127772371657911, pvalue=0.04143568397346512)


In [None]:
if __name__ == "__main__":
    galaxy_set_1000_results = main("29k_galaxies_gband", "29k_NC3_g", incl_by_eye = False)

In [None]:
def generate_images_old(input_df, png_dir:str, variable_name:str, custom_range = None):
    images_out = []
    
    if not custom_range:
        custom_range = range(0, len(input_df), 50) 
        
    count = 0
    for index_num in custom_range:
        g_variable = input_df.iloc[index_num]
        gname = g_variable.name
        variable_value = g_variable[variable_name]#.norm_masked_residual

        height = 500
        width = 500
        size = (height, width)
        #out_str = galaxy_info.name.replace("galfit_out.fits", "combined.png").strip()
        out_str = f"{gname}_combined.png"
        #print(out_str)
        
        
        images_out.append(Image(filename = pj(png_dir, out_str), width=width, height=height))
            
        print(f"{gname}, sorted #: {index_num}")
        print(f"{variable_name} = {variable_value:.6f}")
        #print(f"Dim: {galaxy_info['image_size']}x{galaxy_info['image_size']}")
        print()
        
    return images_out

In [None]:
images_to_disp = generate_images_old(full_df, pj(out_dir, "galfit_png"), "diff") #, range(0,len(full_df)))

In [None]:
display(*images_to_disp)

In [None]:
def generate_images(input_df, png_dir:str, cutoff_val = 0.01, variable_name = "norm_masked_residual", custom_range = None):
    images_below_cutoff = []
    images_above_cutoff = []
    
    if not custom_range:
        custom_range = range(0, len(input_df), 50) 
    count = 0
    for index_num in custom_range:
        g_variable = input_df.iloc[index_num]
        gname = g_variable.name
        variable_value = g_variable[variable_name]#.norm_masked_residual

        # iloc returns a series, name returns the name of the row

        
        # print(f"chi^2/nu = {galaxy_info['chi^2_nu']:.2f}")
        # print(f"chi^2 = {galaxy_info['chi^2']:.2f}")
        #print(f"Norm GALFIT residual = {norm_galfit_residual:.4f}")


        # galfit_cmap = grayscale_cmap('RdBu')
        # residual_plot = plt.imshow(np.flipud(masked_residual[:,:])) #, norm=colors.LogNorm())
        # residual_plot.set_cmap('Greys')
        # residual_plot.set_cmap(galfit_cmap)
        # cbar = plt.colorbar()

        #plt.imshow(residual_plot)
        #imgplot = plt.imshow(arr[:, :, 0])
        height = 500
        width = 500
        size = (height, width)
        #out_str = galaxy_info.name.replace("galfit_out.fits", "combined.png").strip()
        out_str = f"{gname}_combined.png"
        #print(out_str)
        
        if variable_value < cutoff_val:
            images_below_cutoff.append(Image(filename = pj(png_dir, out_str), width=width, height=height))
            #images_below_cutoff.append(PIL.Image.open(pj(png_dir, out_str)).resize(size))
        else:
            count += 1
            if count == 1:
                print("="*80)
            images_above_cutoff.append(Image(filename = pj(png_dir, out_str), width=width, height=height))
            #images_above_cutoff.append(PIL.Image.open(pj(png_dir, out_str)).resize(size))

            
        print(f"{gname}, sorted #: {index_num}")
        print(f"{variable_name} = {variable_value:.6f}")
        #print(f"Dim: {galaxy_info['image_size']}x{galaxy_info['image_size']}")
        print()
        
    return images_below_cutoff, images_above_cutoff

In [None]:
png_dir = os.path.join(run_path, out_dir, "galfit_png")
#below, above = generate_images(residual_df, png_dir, cutoff_val = 0.013342, variable_name = analysis_var, custom_range = range(700, len(residual_df), 10) )
below, above = generate_images(residual_df, png_dir, cutoff_val = cutoff_val, variable_name = analysis_var, custom_range = range(800, len(residual_df), 10) )

In [None]:
display(*below)

In [None]:
display(*above)

In [None]:
# good_fit = "1237671262278582530"
# bad_fit = "1237668366388756890"

# good_fit_obj = OutputFits(pj(out_dir, good_fit, f"{good_fit}_galfit_out.fits"))
# bad_fit_obj = OutputFits(pj(out_dir, bad_fit, f"{bad_fit}_galfit_out.fits"))
# good_residual = good_fit_obj.residual.data

# scipy.stats.probplot(good_residual.flatten(), plot = plt)

In [None]:
# bad_residual = bad_fit_obj.residual.data
# scipy.stats.probplot(bad_residual.flatten(), plot = plt)

In [None]:
# Thanks to https://jakevdp.github.io/PythonDataScienceHandbook/04.07-customizing-colorbars.html
def grayscale_cmap(cmap):
    """Return a grayscale version of the given colormap"""
    cmap = plt.cm.get_cmap(cmap)
    colors = cmap(np.arange(cmap.N))
    
    # convert RGBA to perceived grayscale luminance
    # cf. http://alienryderflex.com/hsp.html
    RGB_weight = [0.299, 0.587, 0.114]
    luminance = np.sqrt(np.dot(colors[:, :3] ** 2, RGB_weight))
    colors[:, :3] = luminance[:, np.newaxis]
        
    return LinearSegmentedColormap.from_list(cmap.name + "_gray", colors, cmap.N)