diff --git a/.travis.yml b/.travis.yml index 4cbbc10..a534f47 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,8 +2,8 @@ language: python matrix: include: - - python: 2.7 - env: PYTHON=2.7 ROOT=6.04 +# - python: 2.7 +# env: PYTHON=2.7 ROOT=6.04 - python: 3.4 env: PYTHON=3.4 ROOT=6.04 install: @@ -19,7 +19,7 @@ install: - conda config --add channels http://conda.anaconda.org/NLeSC - conda config --add channels http://conda.anaconda.org/conda-forge - conda config --set show_channel_urls yes - - conda create -q -n testenv python=${PYTHON} root=${ROOT} numpy pytest pytest-cov=2.4.0 python-coveralls>=2.8.0 coveralls + - conda create -q -n testenv python=${PYTHON} root=${ROOT} numpy scipy pytest pytest-cov=2.4.0 python-coveralls>=2.8.0 coveralls - export CONDA_ENV_PATH=$HOME/miniconda/envs/testenv - source activate testenv diff --git a/countingworkspace/utils.py b/countingworkspace/utils.py index 5182d41..f8acae9 100644 --- a/countingworkspace/utils.py +++ b/countingworkspace/utils.py @@ -1,4 +1,7 @@ import ROOT +import numpy as np +from array import array +from scipy import stats def loop_iterator(iterator): @@ -62,18 +65,124 @@ def generate_and_fit(ws, ws_generate=None, ntoys=100, snapshot='initial', snapsh yield fr -def toy_study(ws, ws_generate=None, ntoys=100, snapshot='initial', snapshot_gen=None, seed=0, save_errors=True): - from array import array +class ToyStudyPlugin: + def initialize(self, tree): + self.tree = tree + + def run(self, fit_result): + pass + + +class ToyStudyError(ToyStudyPlugin): + def __init__(self, save_asym=False): + self.save_asym = save_asym + self.variables = set() # filled during first event + + def initialize(self, tree): + super(ToyStudyError, self).initialize(tree) + self.all_values_error = {} + if self.save_asym: + self.all_values_error_up = {} + self.all_values_error_down = {} + + def run(self, fit_result): + r = iter_collection(fit_result.floatParsFinal()) + + for rr in r: + name = rr.GetName() + if name not in self.variables: + self.all_values_error[name] = array('f', [0]) + self.tree.Branch(name + '_error', self.all_values_error[name], '%s_error/F' % name) + if self.save_asym: + self.all_values_error_up[name] = array('f', [0]) + self.all_values_error_down[name] = array('f', [0]) + self.tree.Branch(name + '_error_up', self.all_values_error[name], '%s_error_up/F' % name) + self.tree.Branch(name + '_error_down', self.all_values_error[name], '%s_error_down/F' % name) + self.variables.add(name) + self.all_values_error[name][0] = rr.getError() + if self.save_asym: + self.all_values_error_up[name][0] = rr.getErrorHi() + self.all_values_error_down[name][0] = rr.getErrorLo() + + +class ToyStudyCoverage(ToyStudyPlugin): + def __init__(self, variables, true_values, pvalue=None, significance=None, output_var=None): + + + if type(variables) is ROOT.RooArgSet: + self.variables = [v.GetName() for v in iter_collection(variables)] + else: + self.variables = variables + self.ndim = len(self.variables) + if len(true_values) != self.ndim: + raise ValueError('true values have different dimensions than variables') + self.true_values = true_values + + if pvalue is None and significance is None: + significance = 1 + if pvalue is None and significance is not None: + self.pvalue =self.get_likelihood_count_levels(self.ndim, significance) + elif pvalue is not None and significance is None: + self.pvalue = pvalue + else: + raise ValueError('cannot specify both significance and pvalue') + + if output_var is None: + output_var = 'isCovered_%s' % '_'.join(self.variables) + self.output_var = output_var + + def get_likelihood_count_levels(self, ndof, z): + p = 1 - stats.norm(0, 1).sf(z) * 2 + return stats.chi2(ndof).isf(1 - p) + + def get_implicit_ellipse(self, values, true, A): + return np.einsum('...i,ij,...j', true - values, A, true - values) + + def compute_is_covered(self, true, value, cov, level): + dim = len(true) + inv_cov = np.linalg.inv(cov) + return (self.get_implicit_ellipse(true, value, inv_cov) < level) + + def initialize(self, tree): + super(ToyStudyCoverage, self).initialize(tree) + self.covered = array('b', [False]) + self.tree.Branch(self.output_var, self.covered, '%s/B' % self.output_var) + + def run(self, fit_result): + cov = np.zeros((self.ndim, self.ndim)) + rr = list(iter_collection(fit_result.floatParsFinal())) + errs = {v.GetName(): v.getError() for v in rr} + vals = {v.GetName(): v.getVal() for v in rr} + for i1, v1 in enumerate(self.variables): + e1 = errs[v1] + for i2, v2 in enumerate(self.variables): + e2 = errs[v2] + corr = fit_result.correlation(v1, v2) + cov[i1, i2] = corr * e1 * e2 + + fitted_values = [vals[n] for n in self.variables] + + is_covered = self.compute_is_covered(self.true_values, fitted_values, cov, self.pvalue) + + self.covered[0] = is_covered + + + + +def toy_study(ws, ws_generate=None, ntoys=100, snapshot='initial', snapshot_gen=None, seed=0, plugins=None): + + plugins = plugins or list() ROOT.RooRandom.randomGenerator().SetSeed(seed) f = ROOT.TFile.Open("result_%s.root" % ROOT.RooRandom.randomGenerator().GetSeed(), 'RECREATE') tree = ROOT.TTree('results', 'results') + + for plugin in plugins: + plugin.initialize(tree) + status = array('i', [0]) nll = array('f', [0]) all_values = {} - all_values_error = {} - all_values_error_up = {} - all_values_error_down = {} tree.Branch('status', status, 'status/I') tree.Branch('nll', nll, 'nll/F') @@ -82,6 +191,9 @@ def toy_study(ws, ws_generate=None, ntoys=100, snapshot='initial', snapshot_gen= status[0] = result.status() nll[0] = result.minNll() + for plugin in plugins: + plugin.run(result) + r = iter_collection(result.floatParsFinal()) for rr in r: @@ -89,17 +201,7 @@ def toy_study(ws, ws_generate=None, ntoys=100, snapshot='initial', snapshot_gen= if name not in all_values: all_values[name] = array('f', [0]) tree.Branch(name, all_values[name], '%s/F' % name) - if save_errors: - all_values_error[name] = array('f', [0]) - all_values_error_up[name] = array('f', [0]) - all_values_error_down[name] = array('f', [0]) - tree.Branch(name + '_error', all_values_error[name], '%s_error/F' % name) - tree.Branch(name + '_error_up', all_values_error[name], '%s_error_up/F' % name) - tree.Branch(name + '_error_down', all_values_error[name], '%s_error_down/F' % name) all_values[name][0] = rr.getVal() - all_values_error[name][0] = rr.getError() - all_values_error_up[name][0] = rr.getErrorHi() - all_values_error_down[name][0] = rr.getErrorLo() tree.Fill() tree.Write() diff --git a/setup.py b/setup.py index d2fcf87..c367548 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ long_description_content_type="text/markdown", url="https://github.com/wiso/countingworkspace", packages=setuptools.find_packages(), - install_requires=['numpy'], + install_requires=['numpy', 'scipy'], classifiers=[ "Programming Language :: Python :: 3", "Programming Language :: Python :: 2", diff --git a/tests/test_all.py b/tests/test_all.py index ace61b1..e3f35c0 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -566,7 +566,8 @@ def test_generate_and_fit_crossed(): def test_toy_study(): ws = create_workspace(NCATEGORIES, NPROCESS, NTRUE, EFFICIENCIES, EXPECTED_BKG_CAT) ntoys = 10 - countingworkspace.utils.toy_study(ws, ntoys=ntoys, seed=42) + countingworkspace.utils.toy_study(ws, ntoys=ntoys, seed=42, + plugins=[countingworkspace.utils.ToyStudyError(save_asym=True)]) f = ROOT.TFile.Open('result_42.root') tree = f.Get("results") assert tree @@ -581,6 +582,36 @@ def test_toy_study(): assert ("nsignal_gen_proc%s_error_down" % format_index(nproc)) in branches +def test_toy_coverage(): + ws = create_workspace(NCATEGORIES, NPROCESS, NTRUE, EFFICIENCIES, EXPECTED_BKG_CAT) + ntoys = 10 + countingworkspace.utils.toy_study(ws, ntoys=ntoys, seed=42, + plugins=[countingworkspace.utils.ToyStudyError(save_asym=True), + countingworkspace.utils.ToyStudyCoverage(ws.obj('ModelConfig').GetParametersOfInterest(), + NTRUE, significance=1, output_var='isCoveredAll'), + countingworkspace.utils.ToyStudyCoverage(ws.obj('ModelConfig').GetParametersOfInterest(), + NTRUE, significance=2, output_var='isCoveredAll2sigma') + ]) + f = ROOT.TFile.Open('result_42.root') + tree = f.Get("results") + assert tree + assert tree.GetEntries() == ntoys + branches = [k.GetName() for k in tree.GetListOfBranches()] + assert 'nll' in branches + assert 'status' in branches + assert 'isCoveredAll' in branches + for nproc in range(NPROCESS): + assert ("nsignal_gen_proc%s" % format_index(nproc)) in branches + assert ("nsignal_gen_proc%s_error" % format_index(nproc)) in branches + assert ("nsignal_gen_proc%s_error_up" % format_index(nproc)) in branches + assert ("nsignal_gen_proc%s_error_down" % format_index(nproc)) in branches + + h1sigma = ROOT.TH1F("h1sigma", "h1sigma", 2, 0, 2) + h2sigma = ROOT.TH1F("h2sigma", "h2sigma", 2, 0, 2) + tree.Draw("isCoveredAll>>h1sigma") + tree.Draw("isCoveredAll2sigma>>h2sigma") + assert(h1sigma.GetMean() <= h2sigma.GetMean()) + def test_trivial_unfolding(): ncat = 4 nprocess = 4 @@ -646,3 +677,4 @@ def test_trivial_unfolding_gaus_fixed_error(): expected_error = ws.obj('error_cat%s' % format_index(nproc)).getVal() assert ws.obj(var_name).getVal() == pytest.approx(t, rel=0.1) assert ws.obj(var_name).getError() == pytest.approx(expected_error, rel=0.1) +