In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.lines import Line2D
import seaborn as sns
from scipy import stats
from astropy.table import Table
import warnings
from astropy.utils.exceptions import AstropyWarning
import itertools
np.seterr(all='ignore')
warnings.simplefilter('ignore', category=AstropyWarning)
plt.style.use('dark_background')
plt.rcParams.update({'figure.facecolor':'#16191C'})
plt.rcParams.update({'axes.facecolor':'#16191C'})
%matplotlib widget
SA_DIR = '/media/data3/wiseman/stats/survival_analysis/'
FIG_DIR = SA_DIR+'figs/'
sns.set_color_codes(palette='colorblind')

First, read in data. Let's take the Pantheon SN Ia dataset (Scolnic et al. 2018), with over 1000 SNe Ia

In [4]:
def rewrite_mass_columns(df,mass_cn,err_cn,pm=False):
    '''A function to '''
    df.rename(columns={mass_cn:'logm',err_cn:'logm_err'},inplace=True)
    if pm:
        df['logm_err'] = np.abs(df['logm']-df['logm_err'])
    df['detection'] = pd.Series((df['logm_err']>0),index=df.index)
    return df

In [5]:
pantheon = pd.read_csv('/media/data3/wiseman/des/hosts/ps1_s18.dat',sep = ' ',skipinitialspace=True)
pantheon = pantheon[pantheon['HOST_LOGMASS']>5.01]
pantheon = rewrite_mass_columns(pantheon,'HOST_LOGMASS','HOST_LOGMASS_ERR')


In [6]:
def make_cdf(arr):
    sorted_arr = np.sort(arr)
    cumsum = np.cumsum(sorted_arr)
    cumsum /=cumsum[-1]
    return sorted_arr,cumsum

def perturb_arr_normal(vals,lowerrs,upperrs):
    peturbed = np.random.normal(vals,np.mean([lowerrs,upperrs]))
    return peturbed

In [10]:
f,ax=plt.subplots(figsize=(8,6.5))
sorted_ms,cdf = make_cdf(pantheon['logm'])
ax.step(sorted_ms,cdf,label='Pantheon SNe')
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
plt.savefig(FIG_DIR+'mass_CDF_raw')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

So this CDF looks fairly standard, with a long low-mass tail -- I bet these aren't real galaxies (No SNe explode in galaxies with log M < 7, there aren't enough stars. Anyway, we'll save that for later. For now, let's do some sample comparisons. Let's split the data up by their stretch parameter, x1, which is known to correlate with mass (e.g. Sullivan et al. 2010)

In [31]:
import itertools 
stretch_cond = pantheon['x1']<0
low_stretch_SNe = pantheon[stretch_cond]
high_stretch_SNe = pantheon[~stretch_cond]
colours = itertools.cycle(['purple','y'])
f,ax=plt.subplots(figsize=(8,6.5))
for SNe,sample_name in zip([low_stretch_SNe,high_stretch_SNe],['$x_1 <0$','$x_1\geq0$']):
    sorted_ms,cdf = make_cdf(SNe['logm'])
    ax.step(sorted_ms,cdf,label=sample_name,c=next(colours))
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
ax.legend(loc='upper left',fontsize=16)
plt.savefig(FIG_DIR+'mass_CDF_split_x1')

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

The low stretch SNe are clearly shifted to higher mass than the high-stretch objects. Usually, people would just do a KS test and be done with it. So let's do that...

In [10]:
stats.ks_2samp(low_stretch_SNe.logm,high_stretch_SNe.logm)

KstestResult(statistic=0.2780124941320911, pvalue=1.6527576345946424e-17)

The p-value is tiny; we'd say that these samples are unambiguously drawn from different parent distributions. But, we've made a lot of assumptions -- How do we know that the objects don't have a mass uncertainty that's larger than their differences? What about the x1 split, how many objects are on the wrong side and could that change things? 

First, let's resample from the mass uncertainties and reestimate the CDF

In [33]:
f,ax=plt.subplots(figsize=(8,6.5))
for SNe,sample_name in zip([low_stretch_SNe,high_stretch_SNe],['$x_1 <0$','$x_1\geq0$']):
    sorted_ms,cdf = make_cdf(SNe['logm'])
    c = next(colours)
    ax.step(sorted_ms,cdf,label=sample_name,color=c)
    for i in range(100):
        perturbed = perturb_arr_normal(SNe['logm'],SNe['logm_err'],SNe['logm_err'])
        sorted_ms,cdf = make_cdf(perturbed)
        ax.step(sorted_ms,cdf,linewidth=0.025,color=c)
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
ax.legend(loc='upper left',fontsize=16)
plt.savefig(FIG_DIR+'mass_CDF_split_x1_bad_errs')

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Uh-oh!
The estimated CDFs from resampled data are being dragged out to unphysical low and high masses due to some poorly constrained uncertainties -- our CDF is being entirely driven by the errors

In [12]:
pantheon['logm_err'].max()

99.0

In [13]:
len(pantheon[pantheon['logm_err']==99])

26

In [14]:
len(pantheon[pantheon['logm_err']>1])

111

Wow. 111 objects have a mass uncertainty greater than 1 order of magnitude. That's basically saying we have no idea -- there could be 10^9 stars, but also 10^11. Resampling from these broad distributions is completely smearing out our CDF. How should we deal with this? Let's get rid of them first, and see what happens.

In [34]:
colours = itertools.cycle(['purple','y'])
f,ax=plt.subplots(figsize=(8,6.5))
for SNe,sample_name in zip([low_stretch_SNe,high_stretch_SNe],['$x_1 <0$','$x_1\geq0$']):
    SNe = SNe[SNe['logm_err']<10]
    sorted_ms,cdf = make_cdf(SNe['logm'])
    c = next(colours)
    ax.step(sorted_ms,cdf,label=sample_name,color=c)
    for i in range(100):
        perturbed = perturb_arr_normal(SNe['logm'],SNe['logm_err'],SNe['logm_err'])
        sorted_ms,cdf = make_cdf(perturbed)
        ax.step(sorted_ms,cdf,linewidth=0.025,color=c)
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
ax.legend(loc='upper left',fontsize=16)

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x2b95833f27f0>

We're still a bit off. What's most interesting is what is happening at low and high masses - the CDF is getting blurred a bit. This is because the resampling will always act to pull a few of the high mass objects even higher, and a few of the low mass objects even lower. So maybe this is giving us a better estimate of the _true_ CDF compared to the noisy one given by the point estimates of the data.

Pantheon is a compilation of various surveys, each with their own host stellar mass estimation techniques. It appears that some defaulted to different logm_err values if the mass was poorly constrained:

In [35]:
f,ax=plt.subplots(figsize=(8,6.5))
ax.hist(pantheon.logm_err,bins=np.logspace(-2,2,100))
ax.set_xscale('log')
ax.set_xlabel('Mass uncertainty',size=20)
ax.set_ylabel('Counts',size=20)
plt.savefig(FIG_DIR+'masserr_hist')

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Ok; it looks like errors are lognormally distributed with a cut at about 2. Let's assume that anything with an error larger than that is uncontrained, i.e. the data point is a limit!

In [36]:
colours = itertools.cycle(['purple','y'])
f,ax=plt.subplots(figsize=(8,6.5))
for SNe,sample_name in zip([low_stretch_SNe,high_stretch_SNe],['$x_1 <0$','$x_1\geq0$']):
    SNe = SNe[SNe['logm_err']<2]
    sorted_ms,cdf = make_cdf(SNe['logm'])
    c = next(colours)
    ax.step(sorted_ms,cdf,label=sample_name,color=c)
    for i in range(100):
        perturbed = perturb_arr_normal(SNe['logm'],SNe['logm_err'],SNe['logm_err'])
        sorted_ms,cdf = make_cdf(perturbed)
        ax.step(sorted_ms,cdf,linewidth=0.025,color=c)
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
ax.legend(loc='upper left',fontsize=16)
plt.savefig(FIG_DIR+'mass_CDF_split_x1_small_errs')

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Ok, so we now have 100 resampled CDFs that seem to bound the data for almost the entire range of the data. We're getting there! Let's go and do that horrible KS-testing stuff.

In [20]:
ks_stats = []
for i in range(1000):
    samples =[]
    for SNe,sample_name in zip([low_stretch_SNe,high_stretch_SNe],['$x_1 <0$','$x_1\geq0$']):
        SNe = SNe[SNe['logm_err']<2]
        perturbed = perturb_arr_normal(SNe['logm'],SNe['logm_err'],SNe['logm_err'])
        samples.append(perturbed)
    stat,pval = stats.ks_2samp(samples[0],samples[1])
    ks_stats.append([stat,pval])
ks_stats = np.array(ks_stats)    

In [21]:
print('KS mean: ',np.mean(ks_stats,axis=0))
print('KS std: ',np.std(ks_stats,axis=0))

KS mean:  [2.86646661e-01 1.96287431e-15]
KS std:  [1.12467591e-02 7.96422153e-15]


In [22]:
f,ax=plt.subplots(figsize=(8,6.5))
ax.hist(ks_stats[:,1],bins=np.logspace(-15,-13,100))
ax.set_xscale('log')
ax.set_xlabel('KS p-value')
ax.set_ylabel('Count')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'Count')

Ok, not a good example as those samples are clearly different. How about SN colour?

In [29]:
stretch_cond = pantheon['c']<0
blue_SNe = pantheon[stretch_cond]
red_SNe = pantheon[~stretch_cond]
colours = itertools.cycle(['c','r'])
f,ax=plt.subplots(figsize=(8,6.5))
for SNe,sample_name in zip([blue_SNe,red_SNe],['$c <0$','$c\geq0$']):
    #SNe = SNe[SNe['logm_err']<2]
    sorted_ms,cdf = make_cdf(SNe['logm'])
    c = next(colours)
    
    ax.step(sorted_ms,cdf,label=sample_name,color=c)
ax.legend(fontsize=16)
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
plt.savefig(FIG_DIR+'mass_CDF_split_c')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
f,ax=plt.subplots(figsize=(8,6.5))

samples = []
for SNe,sample_name in zip([blue_SNe,red_SNe],['$c <0$','$c\geq0$']):
    SNe = SNe[SNe['logm_err']<2]
    sorted_ms,cdf = make_cdf(SNe['logm'])
    c = next(colours)
    samples.append(SNe.logm)
    ax.step(sorted_ms,cdf,label=sample_name,color=c)

stat,pval = stats.ks_2samp(samples[0],samples[1])
print('KS stat: ',stat,'KS pval: ',pval)
ks_stats = []
for i in range(1000):
    samples = []
    for SNe in [blue_SNe,red_SNe]:
        c = next(colours)
        SNe = SNe[SNe['logm_err']<2]
        perturbed = perturb_arr_normal(SNe['logm'],SNe['logm_err'],SNe['logm_err'])
        sorted_ms,cdf = make_cdf(perturbed)
        ax.step(sorted_ms,cdf,linewidth=0.025,color=c)
        samples.append(perturbed)
    stat,pval = stats.ks_2samp(samples[0],samples[1])
    ks_stats.append([stat,pval])
ks_stats = np.array(ks_stats)   
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
ax.legend(loc='upper left',fontsize=16)
plt.savefig(FIG_DIR+'mass_CDF_split_c_small_errs_resamp')
print('KS mean: ',np.mean(ks_stats,axis=0))
print('KS std: ',np.std(ks_stats,axis=0))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

KS stat:  0.06612608438232039 KS pval:  0.29053979263886043
KS mean:  [0.06987598 0.25631619]
KS std:  [0.00877381 0.12316067]


We can see that the mean KS values are slightly different from the fiducial one, although well within the uncertainty. But look at the variance of that p-value - it's huge. In this case, the samples are fairly close, but what if we choose a different colour to split our sample at

In [25]:
stretch_cond = pantheon['c']<-0.05
blue_SNe = pantheon[stretch_cond]
red_SNe = pantheon[~stretch_cond]
colours = itertools.cycle(['c','r'])

f,ax=plt.subplots(figsize=(8,6.5))

samples = []
for SNe,sample_name in zip([blue_SNe,red_SNe],['$c <0$','$c\geq0$']):
    SNe = SNe[SNe['logm_err']<2]
    sorted_ms,cdf = make_cdf(SNe['logm'])
    c = next(colours)
    samples.append(SNe.logm)
    ax.step(sorted_ms,cdf,label=sample_name,color=c)
stat,pval = stats.ks_2samp(samples[0],samples[1])
print('KS stat: ',stat,'KS pval: ',pval)
ks_stats = []
for i in range(1000):
    samples = []
    for SNe in [blue_SNe,red_SNe]:
        c = next(colours)
        SNe = SNe[SNe['logm_err']<2]
        perturbed = perturb_arr_normal(SNe['logm'],SNe['logm_err'],SNe['logm_err'])
        sorted_ms,cdf = make_cdf(perturbed)
        ax.step(sorted_ms,cdf,linewidth=0.025,color=c)
        samples.append(perturbed)
    stat,pval = stats.ks_2samp(samples[0],samples[1])
    ks_stats.append([stat,pval])
ks_stats = np.array(ks_stats)   
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
ax.legend(loc='upper left',fontsize=16)
plt.savefig(FIG_DIR+'mass_CDF_split_c_negative_small_errs')
print('KS mean: ',np.mean(ks_stats,axis=0))
print('KS std: ',np.std(ks_stats,axis=0))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

KS stat:  0.08837029773206201 KS pval:  0.06855218953212117
KS mean:  [0.08585638 0.09531291]
KS std:  [0.00830981 0.05370809]


In [26]:
stretch_cond = pantheon['c']<-0.05
blue_SNe = pantheon[stretch_cond]
red_SNe = pantheon[~stretch_cond]
colours = itertools.cycle(['c','r'])

f,ax=plt.subplots(figsize=(8,6.5))

samples = []
for SNe,sample_name in zip([blue_SNe,red_SNe],['$c <0$','$c\geq0$']):
    SNe = SNe[SNe['logm_err']<5]
    sorted_ms,cdf = make_cdf(SNe['logm'])
    c = next(colours)
    samples.append(SNe.logm)
    ax.step(sorted_ms,cdf,label=sample_name,color=c)
stat,pval = stats.ks_2samp(samples[0],samples[1])
print('KS stat: ',stat,'KS pval: ',pval)
ks_stats = []
for i in range(1000):
    samples = []
    for SNe in [blue_SNe,red_SNe]:
        c = next(colours)
        SNe = SNe[SNe['logm_err']<2]
        perturbed = perturb_arr_normal(SNe['logm'],SNe['logm_err'],SNe['logm_err'])
        sorted_ms,cdf = make_cdf(perturbed)
        ax.step(sorted_ms,cdf,linewidth=0.025,color=c)
        samples.append(perturbed)
    stat,pval = stats.ks_2samp(samples[0],samples[1])
    ks_stats.append([stat,pval])
ks_stats = np.array(ks_stats)   
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
ax.legend(loc='upper left',fontsize=16)
plt.savefig(FIG_DIR+'mass_CDF_split_c_negative_small_errs')
print('KS mean: ',np.mean(ks_stats,axis=0))
print('KS std: ',np.std(ks_stats,axis=0))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

KS stat:  0.08474691142868208 KS pval:  0.08080865507690549
KS mean:  [0.08564942 0.09742148]
KS std:  [0.00845497 0.05794093]


Our fiducial value is dangerously close to the "magical" 0.05, but our resamples produce a more confusing picture -- the mean is larger, but the spread suggests that there's a non-zero chance that the p-value is actually <0.05! 

## Whatever, cool But I came here for upper limits!

Let's have a look at where our limits lie in the mass plane

In [33]:
f,ax=plt.subplots(figsize=(8,6.5))
colours = itertools.cycle(['purple','y'])
for SNe,sample_name in zip([low_stretch_SNe,high_stretch_SNe],['$x_1 <0$','$x_1\geq0$']):
    limits = SNe[SNe['logm_err']>=2]
    SNe = SNe[SNe['logm_err']<2]
    sorted_ms,cdf = make_cdf(SNe['logm'])
    c = next(colours)
    ax.step(sorted_ms,cdf,color=c,label=sample_name)
    for l in limits.logm.values:
        ax.vlines(l,0,0.2,color=c,linestyle='--')
ax.legend(fontsize=16)
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
plt.savefig(FIG_DIR+'mass_CDF_split_x1_err_vlines')

colours = itertools.cycle(['c','r'])
f,ax=plt.subplots(figsize=(8,6.5))
for SNe,sample_name in zip([blue_SNe,red_SNe],['$c <0$','$c\geq0$']):
    limits = SNe[SNe['logm_err']>=2]
    SNe = SNe[SNe['logm_err']<2]
    sorted_ms,cdf = make_cdf(SNe['logm'])
    c = next(colours)
    ax.step(sorted_ms,cdf,color=c,label=sample_name)
    for l in limits.logm.values:
        ax.vlines(l,0,0.2,color=c,linestyle='--')
ax.legend(fontsize=16)
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
plt.savefig(FIG_DIR+'mass_CDF_split_c_err_vlines')

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Hmm, that's not showing us much. What happens if we see how the CDF changes compared to the one with no limits, if we assume that the limits are detections?

In [29]:
f,ax=plt.subplots(figsize=(8,6.5))

colours = itertools.cycle(['c','r'])
for SNe,sample_name in zip([blue_SNe,red_SNe],['$c <0$','$c\geq0$']):
    #SNe = SNe[SNe['logm_err']<2]
    sorted_ms,cdf = make_cdf(SNe['logm'])
    c = next(colours)
    ax.step(sorted_ms,cdf,label=sample_name,color=c)

for i in range(100):
    for SNe in [blue_SNe,red_SNe]:
        c = next(colours)
        SNe = SNe[SNe['logm_err']<2]
        perturbed = perturb_arr_normal(SNe['logm'],SNe['logm_err'],SNe['logm_err'])
        sorted_ms,cdf = make_cdf(perturbed)
        ax.step(sorted_ms,cdf,linewidth=0.025,color=c)
        
    
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
ax.legend(loc='upper left',fontsize=16)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x2b9592939e50>

In [11]:
detections = pantheon[(pantheon['logm_err']<2)].sort_values('logm')#&(pantheon['logm']>=6.5)]
limits = pantheon[(pantheon['logm_err']>=2)].sort_values('logm')# &(pantheon['logm']>6.5)].sort_values('logm')

In [8]:
from des_sn_hosts.utils import stan_utility

In [57]:


data = dict(
    N_obs = len(detections),
    N_cens = len(limits),
    y_obs = detections.logm.values,
    y_cens = limits.logm.values
    )

from des_sn_hosts.utils import stan_utility
model =stan_utility.compile_model('stan_models/normal_survival.stan')
            
           

fit =model.sampling(data=data, seed=1234, iter=int(2000),warmup=1000,n_jobs=4)

chains=fit.extract()

f,ax=plt.subplots()
ax.scatter(limits.logm,np.median(chains['latent_y_cens_raw'],axis=0))

all_log_ms = np.concatenate([np.mean(chains['latent_y_cens'],axis=0).T,detections.logm.values.T])

In [49]:
len(all_log_ms)

1002

In [50]:
f,ax=plt.subplots(figsize=(8,6.5))
for mass_array,name in zip([pantheon.logm,detections.logm,all_log_ms],['Raw data','Raw data (detections only)','Imputed']):
    sorted_ms,cdf = make_cdf(mass_array)
    ax.step(sorted_ms,cdf,label=name)
ax.legend()

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x2b9598534640>

In [51]:
f,ax=plt.subplots(figsize=(8,6.5))
ax.hist(pantheon.logm,density=True,histtype='step',bins=25,label='Data: All values')
ax.hist(detections.logm,density=True,histtype='step',bins=25,label='Data: Detections')
ax.hist(all_log_ms,density=True,histtype='step',bins=25,label='Detections and imputed limits')
ax.legend(loc='upper left')
plt.savefig(FIG_DIR+'mass_hist_imputed_normal')

In [51]:
data = dict(
    N_obs = len(detections),
    y_obs = detections.logm.values,
    )
model =stan_utility.compile_model('stan_models/normal_nolimits.stan')
fit =model.sampling(data=data, seed=1234, iter=int(2000),warmup=1000,n_jobs=4)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_91c80dea74a6643bc8a20be9337d6b89 NOW.
  tree = Parsing.p_module(s, pxd, full_module_name)


In [52]:
chains_nolims = fit.extract()

In [53]:
np.median(chains_nolims['mu'])

10.196623809631754

In [54]:
np.median(chains['mu'])

10.055843206699931

In [55]:
data = dict(
    N_obs = len(pantheon),
    y_obs = pantheon.logm.values,
    )
model =stan_utility.compile_model('stan_models/normal_nolimits.stan')
fit =model.sampling(data=data, seed=1234, iter=int(2000),warmup=1000,n_jobs=4)
chains_all = fit.extract()

Using cached StanModel


In [56]:
np.median(chains_all['mu'])

10.143153809070622

In [68]:
f,ax=plt.subplots(figsize=(8,6.5))
ax.hist(pantheon.logm,density=True,histtype='step',bins=25,label='Data: All values')
ax.hist(detections.logm,density=True,histtype='step',bins=25,label='Data: Detections')



f,ax=plt.subplots(figsize=(8,6.5))
ax.hist(pantheon.logm,density=True,histtype='step',bins=25)
ax.hist(detections.logm,density=True,histtype='step',bins=25)
xs = np.linspace(5,13,100)
ys = stats.norm(np.median(chains_nolims['mu']),np.median(chains_nolims['sigma'])).pdf(xs)
ax.step(xs,ys,label='Fit: detections only')
xs = np.linspace(5,13,100)
ys = stats.norm(np.median(chains_all['mu']),np.median(chains_all['sigma'])).pdf(xs)
ax.step(xs,ys,label='Fit: detections and limits')
ax.legend(loc='upper left')
plt.savefig(FIG_DIR+'mass_hist_imputed_normal')

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [69]:
data = dict(
    N_obs = len(detections),
    y_obs = detections.logm.values,
    sigma_obs = detections.logm_err.values,
    )
model =stan_utility.compile_model('stan_models/normal_nolimits_errs.stan')
fit =model.sampling(data=data, seed=1234, iter=int(2000),warmup=1000,n_jobs=4)

Using cached StanModel


In [70]:
chains_normal = fit.extract()

In [71]:
np.median(chains_normal['mu'])

10.25946903636849

In [72]:
data = dict(
    N_obs = len(pantheon),
    y_obs = pantheon.logm.values,
    sigma_obs = pantheon.logm_err.values,
    )
model =stan_utility.compile_model('stan_models/normal_nolimits_errs.stan')
fit =model.sampling(data=data, seed=1234, iter=int(2000),warmup=1000,n_jobs=4)

Using cached StanModel


In [73]:
chains_all = fit.extract()
np.median(chains_all['mu'])
    

10.25773160770165

In [74]:
f,ax=plt.subplots(figsize=(8,6.5))
ax.hist(pantheon.logm,density=True,histtype='step',bins=25)
ax.hist(detections.logm,density=True,histtype='step',bins=25)
xs = np.linspace(5,13,100)
ys = stats.norm(np.median(chains_normal['mu']),np.median(chains_normal['sigma'])).pdf(xs)
xs = np.linspace(5,13,100)
ys = stats.norm(np.median(chains_all['mu']),np.median(chains_all['sigma'])).pdf(xs)
ax.step(xs,ys)

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x2b959b4a1700>]

In [75]:
f,ax=plt.subplots(figsize=(8,6.5))
ax.hist(pantheon.logm,density=True,histtype='step',bins=25,cumulative=True)
ax.hist(detections.logm,density=True,histtype='step',bins=25,cumulative=True)
xs = np.linspace(5,13,100)
ys = stats.norm(np.median(chains_normal['mu']),np.median(chains_normal['sigma'])).pdf(xs)
xs = np.linspace(5,13,100)
ys = stats.norm(np.median(chains_all['mu']),np.median(chains_all['sigma'])).pdf(xs)
ax.step(xs,make_cdf(ys)

SyntaxError: unexpected EOF while parsing (<ipython-input-75-346851d52248>, line 8)

In [51]:
data = dict(
    N_obs = len(detections),
    N_cens = len(limits),
    y_obs = detections.logm.values,
    y_cens = limits.logm.values,
    y_err = detections.logm_err.values,
    )
model =stan_utility.compile_model('stan_models/normal_survival_mix.stan')
fit =model.sampling(data=data, seed=1234, iter=int(2000),warmup=1000,n_jobs=4)
chains_all_errs = fit.extract()

Using cached StanModel


In [54]:
f,ax=plt.subplots(figsize=(8,6.5))
ax.hist(pantheon.logm,density=True,histtype='step',bins=25,label='All')
ax.hist(detections.logm,density=True,histtype='step',bins=25,label='Detections only')
xs = np.linspace(5,13,100)
#ys = stats.norm(np.median(chains_all['mu1']),np.median(chains_all['sigma1'])).pdf(xs)+stats.norm(np.median(chains_all['mu2']),np.median(chains_all['sigma2'])).pdf(xs)
#ax.step(xs,ys)

all_log_ms = np.concatenate([np.median(chains_all_errs['latent_y_cens'],axis=0).T,detections.logm.values.T])
ax.hist(all_log_ms,density=True,histtype='step',bins=25,label='Imputed')
xs = np.linspace(5,13,100)
bw = ((np.max(xs)-np.min(xs))/100)
ys = np.median(chains_all_errs['theta'])*stats.norm(np.median(chains_all_errs['mu1']),np.median(chains_all_errs['sigma1'])).pdf(xs)+ (1-np.median(chains_all_errs['theta']))*stats.norm(np.median(chains_all_errs['mu2']),np.median(chains_all_errs['sigma2'])).pdf(xs)
ax.step(xs,ys)
ax.legend()

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x2af9a226dca0>

In [55]:
import arviz as az
az.plot_trace(fit,var_names=['mu1','sigma1','mu2','sigma2','latent_y_cens'],compact=True,)


  fig = plt.figure(**backend_kwargs)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x2af9bb85cf70>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x2af9a2560a00>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x2af9bb945a90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x2af9bb99b610>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x2af9bb59fe80>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x2af9a255c700>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x2af9ba63f190>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x2af9bab37790>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x2af9a1617670>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x2af9bb47bd30>]],
      dtype=object)

In [None]:
help(az.plot_trace)

In [9]:
import lifelines

In [75]:
T = pantheon['logm']
E = pantheon['logm_err'].apply(lambda x: x<2).astype(int)
detections = pantheon[pantheon['logm_err']<2]

from lifelines import KaplanMeierFitter
kmf = KaplanMeierFitter()

kmf.fit_left_censoring(T, E)


In [83]:
f,ax=plt.subplots(figsize=(8,6.5))
kmf.plot_cumulative_density(label='Kaplan-Meier left_censored',color='g')

sorted_ms,cdf = make_cdf(pantheon['logm'])
ax.step(sorted_ms,cdf,label='All data',color='y')
sorted_ms,cdf = make_cdf(detections['logm'])
ax.step(sorted_ms,cdf,label='Detections only',color='r')
sorted_ms,cdf = make_cdf(all_log_ms)
ax.step(sorted_ms,cdf,label='Detections and imputed limits (Single Gaussian)',color='c')
ax.legend()
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
ax.set_xlim(5,12)
plt.tight_layout()
plt.savefig(FIG_DIR+'mass_hist_imputed_KM')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [43]:
from des_sn_hosts.utils import stan_utility

stretch_cond = pantheon['c']<0
blue_SNe = pantheon[stretch_cond]
red_SNe = pantheon[~stretch_cond]
colours = itertools.cycle(['c','r'])
imputed_data = []
for SNe,sample_name in zip([blue_SNe,red_SNe],['$c <0$','$c\geq0$']):
    f,ax=plt.subplots(figsize=(8,6.5))
    detections = SNe[SNe['logm_err']<2]
    limits = SNe[SNe['logm_err']>2]
    
    c = next(colours)
    data = dict(
        N_obs = len(detections),
        N_cens = len(limits),
        y_obs = detections.logm.values,
        y_cens = limits.logm.values
        )
    
    model =stan_utility.compile_model('stan_models/normal_survival.stan')

    fit =model.sampling(data=data, seed=1234, iter=int(2000),warmup=1000,n_jobs=4)

    chains=fit.extract()

    all_log_ms = np.concatenate([np.mean(chains['latent_y_cens'],axis=0).T,detections.logm.values.T])
    sorted_ms,cdf = make_cdf(detections['logm'])
    ax.step(sorted_ms,cdf,label='%s: Detections only'%sample_name,color='grey')
    sorted_ms,cdf = make_cdf(all_log_ms)
    imputed_data.append([sorted_ms,cdf])
    ax.step(sorted_ms,cdf,label='%s: Detections and imputed limits (Single Gaussian)'%sample_name,color=c)
    ax.legend()
    ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
    ax.set_ylabel('$F(M)$',size=20)
    ax.set_xlim(6.5,12)
    plt.tight_layout()

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

{'N_obs': 561, 'N_cens': 47, 'y_obs': array([ 9.517 , 11.58  ,  8.647 ,  8.53  , 10.651 , 10.198 , 10.628 ,
       10.389 ,  9.879 , 10.005 , 10.635 ,  9.034 , 11.338 ,  9.397 ,
       11.382 ,  9.769 , 10.638 ,  9.642 , 10.809 ,  9.517 ,  9.48  ,
       10.485 ,  8.459 , 11.111 ,  9.882 ,  9.356 , 10.032 , 10.785 ,
        9.475 ,  9.325 , 10.855 ,  8.024 , 11.226 ,  8.631 ,  8.42  ,
        9.416 , 11.043 ,  7.466 ,  8.851 , 10.098 ,  9.21  ,  9.144 ,
       10.941 ,  7.738 ,  9.997 ,  9.287 ,  8.876 , 11.222 ,  9.015 ,
        9.004 , 10.471 ,  9.457 , 10.62  , 10.63  , 10.432 , 10.788 ,
        8.986 , 10.23  ,  8.98  , 10.429 ,  8.321 , 10.133 , 10.883 ,
       11.277 ,  9.74  , 10.905 , 10.083 ,  8.709 ,  9.512 , 10.766 ,
        9.3   ,  9.96  ,  8.861 , 10.627 ,  9.381 ,  9.169 , 10.821 ,
       10.391 , 10.695 ,  9.339 ,  8.751 ,  9.705 ,  9.414 ,  7.638 ,
        9.64  ,  8.199 ,  8.086 ,  9.929 ,  9.17  ,  9.888 , 10.623 ,
        7.542 , 10.235 ,  8.834 , 10.991 ,  9.98  , 

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

{'N_obs': 343, 'N_cens': 33, 'y_obs': array([10.831 ,  9.464 , 11.008 , 10.162 ,  8.73  ,  9.645 ,  7.966 ,
       10.215 , 10.496 ,  8.386 ,  9.326 ,  9.097 , 10.189 , 11.34  ,
       10.79  , 10.541 ,  8.706 , 10.057 , 11.199 ,  9.316 , 10.379 ,
        9.854 ,  9.534 , 10.244 , 10.977 ,  8.657 ,  8.557 ,  9.609 ,
       11.164 ,  8.008 ,  8.931 , 10.873 , 10.285 ,  9.265 , 10.461 ,
        8.984 ,  8.617 , 10.87  , 11.326 ,  9.459 , 10.211 , 10.38  ,
       10.418 ,  9.808 ,  9.314 , 10.416 , 10.246 , 10.96  ,  9.548 ,
       10.29  ,  8.895 ,  8.977 ,  8.305 ,  8.528 , 10.931 ,  9.239 ,
       10.111 ,  9.724 ,  8.425 , 10.277 , 10.977 ,  9.761 ,  9.418 ,
       10.011 , 10.4   ,  8.832 ,  9.138 , 10.078 ,  8.381 ,  9.994 ,
       10.855 ,  8.467 ,  9.808 ,  7.908 ,  9.589 ,  9.55  , 10.59  ,
        9.276 , 10.47  , 10.696 ,  9.27  , 11.351 , 10.041 , 10.231 ,
        9.664 , 10.054 ,  9.359 ,  9.225 ,  9.086 ,  9.959 ,  8.722 ,
       10.071 ,  9.323 , 10.696 , 10.216 ,  9.252 , 

In [44]:
f,ax=plt.subplots(figsize=(8,6.5))
for counter,sample_name in enumerate(['$c <0$','$c\geq0$']):
    c = next(colours)
    sorted_ms,cdf = imputed_data[counter]
    ax.step(sorted_ms,cdf,label='%s: Detections and imputed limits (Single Gaussian)'%sample_name,color=c)

for SNe,sample_name in zip([blue_SNe,red_SNe],['$c <0$','$c\geq0$']):
    SNe = SNe[SNe['logm_err']<2]
    sorted_ms,cdf = make_cdf(SNe['logm'])
    c = next(colours)
    ax.step(sorted_ms,cdf,label='%s: Detections Only'%sample_name,color=c,lw=0.5,linestyle='--')
ax.legend()
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
ax.set_xlim(6.5,12)
plt.tight_layout()
plt.savefig(FIG_DIR+'mass_cdf_split_c_imputed_vs_detections')

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [23]:
f,ax=plt.subplots(figsize=(8,6.5))
for counter,sample_name in enumerate(['$c <0$','$c\geq0$']):
    c = next(colours)
    
    ax.hist(imputed_data[counter][0],label='%s: Detections and imputed limits (Single Gaussian)'%sample_name,color=c,histtype='step',density=True,bins=25)

for SNe,sample_name in zip([blue_SNe,red_SNe],['$c <0$','$c\geq0$']):
    SNe = SNe[SNe['logm_err']<2]
    
    c = next(colours)
    ax.hist(SNe['logm'],label='%s: Detections Only'%sample_name,color=c,lw=0.5,linestyle='--',histtype='step',density=True,bins=25)
ax.legend(loc='upper left')
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$f(M)$',size=20)
ax.set_xlim(5,12.5)
#ax.set_ylim(0,0.7)
plt.tight_layout()
plt.savefig(FIG_DIR+'mass_hist_split_c_imputed_vs_detections')

  f,ax=plt.subplots(figsize=(8,6.5))


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
pantheon = pantheon[pantheon['logm']>6.1]
stretch_cond = pantheon['c']<0
blue_SNe = pantheon[stretch_cond]
red_SNe = pantheon[~stretch_cond]
colours = itertools.cycle(['c','r'])
imputed_data = []
fit_params = []

for SNe,sample_name in zip([blue_SNe,red_SNe],['$c <0$','$c\geq0$']):
    f,ax=plt.subplots(figsize=(8,6.5))
    detections = SNe[SNe['logm_err']<2]
    limits = SNe[SNe['logm_err']>2]
    
    c = next(colours)
    data = dict(
        N_obs = len(detections),
        N_cens = len(limits),
        y_obs = detections.logm.values,
        y_cens = limits.logm.values,
        y_err = detections.logm_err.values,
        )
    model =stan_utility.compile_model('stan_models/normal_survival_mix.stan')
    fit =model.sampling(data=data, seed=1234, iter=int(2000),warmup=1000,n_jobs=4)
    chains_all_errs = fit.extract()

    all_log_ms = np.concatenate([chains_all_errs['latent_y_cens'].T,chains_all_errs['latent_y'].T])
    sorted_ms,cdf = make_cdf(detections['logm'])
    ax.step(sorted_ms,cdf,label='%s: Detections only'%sample_name,color='grey')
    sorted_ms,cdf = make_cdf(np.mean(all_log_ms,axis=1))
    imputed_data.append([all_log_ms,sorted_ms,cdf])
    fit_params.append([chains_all_errs['mu1'],chains_all_errs['mu2'],chains_all_errs['sigma1'],chains_all_errs['sigma2'],chains_all_errs['theta']])
    ax.step(sorted_ms,cdf,label='%s: Detections and imputed limits (Gaussian Mixture)'%sample_name,color=c)
    
    T = detections['logm']
    E = detections['logm_err'].apply(lambda x: x<2).astype(int)
    

    from lifelines import KaplanMeierFitter
    kmf = KaplanMeierFitter()

    kmf.fit_left_censoring(T, E)
    kmf.plot_cumulative_density(label='Kaplan-Meier left_censored',color='m')
    ax.legend()
    ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
    ax.set_ylabel('$F(M)$',size=20)
    ax.set_xlim(5,12)
    plt.tight_layout()



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Using cached StanModel


INFO:numexpr.utils:Note: NumExpr detected 32 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Using cached StanModel


In [20]:
import arviz as az
data = az.from_pystan(fit)


In [27]:
az.plot_trace(data,var_names=['mu1','sigma1','mu2','sigma2','latent_y_cens'],compact=True,)
az.plot_joint(data,var_names=['mu1','mu2',],kind='kde',)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

array([<matplotlib.axes._subplots.AxesSubplot object at 0x2ac88a771340>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x2ac88a797d30>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x2ac88a7ccd30>],
      dtype=object)

In [28]:
colours = itertools.cycle(['c','r'])
f,ax=plt.subplots(figsize=(8,6.5))
for counter,sample_name in enumerate(['$c <0$','$c\geq0$']):
    c = next(colours)
    all_imputed = imputed_data[counter][0]
    for i in range(len(all_imputed)):
        label=None
        if i ==0:
            label='Imputed %s'%sample_name
        sorted_ms,cdf = make_cdf(all_imputed[:,i])
        ax.step(sorted_ms,cdf,linewidth=0.05,color=c,alpha=0.2,label=label)
for SNe,sample_name in zip([blue_SNe,red_SNe],['$c <0$','$c\geq0$']):
    SNe = SNe[SNe['logm_err']<2]
    
    c = next(colours)
    sorted_ms,cdf = make_cdf(SNe['logm'])
    ax.step(sorted_ms,cdf,label='%s: Detections only'%sample_name,color=c,linestyle='--')
    
    T = SNe['logm']
    E = SNe['logm_err'].apply(lambda x: x<2).astype(int)
    

    from lifelines import KaplanMeierFitter
    kmf = KaplanMeierFitter()

    kmf.fit_left_censoring(T, E)
    kmf.plot_cumulative_density(label='Kaplan-Meier left_censored',color=c,ci_show=False,linestyle=':')
leg =ax.legend(loc='upper left')
for lh in leg.legendHandles:
    lh.set_alpha(1)
    lh.set_linewidth(1)
ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$F(M)$',size=20)
ax.set_xlim(6.5,12)
#ax.set_ylim(0,0.7)
plt.tight_layout()
plt.savefig(FIG_DIR+'mass_CDF_split_c_imputed_mixture_vs_detections')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [27]:

f,ax=plt.subplots(figsize=(8,6.5))
for counter,sample_name in enumerate(['$c <0$','$c\geq0$']):
    c = next(colours)
    params = fit_params[counter]
    
    mu1,mu2,sigma1,sigma2,theta = [params[i] for i in range(len(params))]
    
    #ax.hist(imputed_data[counter][0],label='%s: Detections and imputed limits (Gaussian Mixture)'%sample_name,color=c*4000,histtype='step',density=True,bins=25)
    xs = np.linspace(5,13,1000)
    bw = ((np.max(xs)-np.min(xs))/100)
    for j in range(len(mu1)):
        label=None
        if j ==0:
            label='Imputed %s'%sample_name
        ys = theta[j]*stats.norm(mu1[j],sigma1[j]).pdf(xs)+ ((1-theta[j])*stats.norm(mu2[j],sigma2[j]).pdf(xs))
        ax.step(xs,ys,linewidth=0.05,c=c,alpha=0.1,label=label)
for SNe,sample_name in zip([blue_SNe,red_SNe],['$c <0$','$c\geq0$']):
    SNe = SNe[SNe['logm_err']<2]
    
    c = next(colours)
    #ax.hist(SNe['logm'],label='%s: Detections Only'%sample_name,color=c,linestyle='--',histtype='step',density=True,bins=25,)
    plot_hist(SNe['logm'],SNe['logm'].min(),SNe['logm'].max(),0.25,axhist=ax,label='%s: Detections Only'%sample_name,color=c,linestyle='--',density=True)

ax.set_xlabel('$\log (M/M_{\odot})$',size=20)
ax.set_ylabel('$f(M)$',size=20)
ax.set_xlim(6.5,12.)
#ax.set_ylim(0,0.7)
plt.tight_layout()
leg =ax.legend(loc='upper left')
for lh in leg.legendHandles:
    lh.set_alpha(1)
    lh.set_linewidth(1)
plt.savefig(FIG_DIR+'mass_hist_split_c_imputed_mixture_vs_detections')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [32]:
mu1[0]

9.809783993096936

In [54]:
imputed_data[0][0].shape

(608, 4000)

In [17]:
def plot_hist(x,lo,hi,step,axhist,weights = [],density=False,**kwargs):
    
    bins = np.linspace(lo,hi,int((hi-lo)/step),endpoint=False)
    bin_centers = (bins [:-1] + bins [1:])/2
    frac_obj = []
    n_obj = []
    frac_errs = []
    bw = bins[-1] - bins[-2]
    for counter,bl in enumerate(bins[:-1]):
        if len(weights)>0:
            n_in_bin = np.sum(weights[(x>bl)&(x<=bl+(bw))])
        else: 
            n_in_bin = len(x[(x>bl)&(x<=bl+(bw))])
        frac_obj.append(n_in_bin/len(x))
        n_obj.append(n_in_bin)
        frac_errs.append(np.sqrt(n_in_bin)/len(x))
    if density:
        axhist.step(bin_centers,frac_obj/bw,linewidth=2,where='mid',**kwargs)
    else:
        axhist.step(bin_centers,n_obj/bw,linewidth=2,where='mid',**kwargs)
