From 6abc3974da05e65941598eb7ac6e2573765d71fb Mon Sep 17 00:00:00 2001 From: Ruggero Turra Date: Thu, 4 Apr 2019 10:08:48 +0200 Subject: [PATCH 1/5] implement plugings --- countingworkspace/utils.py | 66 +++++++++++++++++++++++++++++--------- tests/test_all.py | 7 ++-- 2 files changed, 55 insertions(+), 18 deletions(-) diff --git a/countingworkspace/utils.py b/countingworkspace/utils.py index 5182d41..38e6162 100644 --- a/countingworkspace/utils.py +++ b/countingworkspace/utils.py @@ -1,4 +1,5 @@ import ROOT +from array import array def loop_iterator(iterator): @@ -62,18 +63,60 @@ def generate_and_fit(ws, ws_generate=None, ntoys=100, snapshot='initial', snapsh yield fr -def toy_study(ws, ws_generate=None, ntoys=100, snapshot='initial', snapshot_gen=None, seed=0, save_errors=True): - from array import array +class ToyStudyPlugin: + def initialize(self, tree): + self.tree = tree + + def run(self, fit_result): + pass + + +class ToyStudyError(ToyStudyPlugin): + def __init__(self, save_asym=False): + self.save_asym = save_asym + self.variables = set() # filled during first event + + def initialize(self, tree): + super().initialize(tree) + self.all_values_error = {} + if self.save_asym: + self.all_values_error_up = {} + self.all_values_error_down = {} + + def run(self, fit_result): + r = iter_collection(fit_result.floatParsFinal()) + + for rr in r: + name = rr.GetName() + if name not in self.variables: + self.all_values_error[name] = array('f', [0]) + self.tree.Branch(name + '_error', self.all_values_error[name], '%s_error/F' % name) + if self.save_asym: + self.all_values_error_up[name] = array('f', [0]) + self.all_values_error_down[name] = array('f', [0]) + self.tree.Branch(name + '_error_up', self.all_values_error[name], '%s_error_up/F' % name) + self.tree.Branch(name + '_error_down', self.all_values_error[name], '%s_error_down/F' % name) + self.variables.add(name) + self.all_values_error[name][0] = rr.getError() + if self.save_asym: + self.all_values_error_up[name][0] = rr.getErrorHi() + self.all_values_error_down[name][0] = rr.getErrorLo() + + +def toy_study(ws, ws_generate=None, ntoys=100, snapshot='initial', snapshot_gen=None, seed=0, plugins=None): + + plugins = plugins or list() ROOT.RooRandom.randomGenerator().SetSeed(seed) f = ROOT.TFile.Open("result_%s.root" % ROOT.RooRandom.randomGenerator().GetSeed(), 'RECREATE') tree = ROOT.TTree('results', 'results') + + for plugin in plugins: + plugin.initialize(tree) + status = array('i', [0]) nll = array('f', [0]) all_values = {} - all_values_error = {} - all_values_error_up = {} - all_values_error_down = {} tree.Branch('status', status, 'status/I') tree.Branch('nll', nll, 'nll/F') @@ -82,6 +125,9 @@ def toy_study(ws, ws_generate=None, ntoys=100, snapshot='initial', snapshot_gen= status[0] = result.status() nll[0] = result.minNll() + for plugin in plugins: + plugin.run(result) + r = iter_collection(result.floatParsFinal()) for rr in r: @@ -89,17 +135,7 @@ def toy_study(ws, ws_generate=None, ntoys=100, snapshot='initial', snapshot_gen= if name not in all_values: all_values[name] = array('f', [0]) tree.Branch(name, all_values[name], '%s/F' % name) - if save_errors: - all_values_error[name] = array('f', [0]) - all_values_error_up[name] = array('f', [0]) - all_values_error_down[name] = array('f', [0]) - tree.Branch(name + '_error', all_values_error[name], '%s_error/F' % name) - tree.Branch(name + '_error_up', all_values_error[name], '%s_error_up/F' % name) - tree.Branch(name + '_error_down', all_values_error[name], '%s_error_down/F' % name) all_values[name][0] = rr.getVal() - all_values_error[name][0] = rr.getError() - all_values_error_up[name][0] = rr.getErrorHi() - all_values_error_down[name][0] = rr.getErrorLo() tree.Fill() tree.Write() diff --git a/tests/test_all.py b/tests/test_all.py index aa9ea6d..39c706e 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -565,12 +565,13 @@ def test_generate_and_fit_crossed(): def test_toy_study(): ws = create_workspace(NCATEGORIES, NPROCESS, NTRUE, EFFICIENCIES, EXPECTED_BKG_CAT) - NTOYS = 10 - countingworkspace.utils.toy_study(ws, ntoys=NTOYS, seed=42) + ntoys = 10 + countingworkspace.utils.toy_study(ws, ntoys=ntoys, seed=42, + plugins=[countingworkspace.utils.ToyStudyError(save_asym=True)]) f = ROOT.TFile.Open('result_42.root') tree = f.Get("results") assert tree - assert tree.GetEntries() == NTOYS + assert tree.GetEntries() == ntoys branches = [k.GetName() for k in tree.GetListOfBranches()] assert 'nll' in branches assert 'status' in branches From 1a48a449be237c8dbc9ded55a7cb8b370438fb56 Mon Sep 17 00:00:00 2001 From: Ruggero Turra Date: Thu, 4 Apr 2019 11:53:06 +0200 Subject: [PATCH 2/5] implement coverage study --- countingworkspace/utils.py | 68 +++++++++++++++++++++++++++++++++++++- tests/test_all.py | 31 +++++++++++++++++ 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/countingworkspace/utils.py b/countingworkspace/utils.py index 38e6162..f8acae9 100644 --- a/countingworkspace/utils.py +++ b/countingworkspace/utils.py @@ -1,5 +1,7 @@ import ROOT +import numpy as np from array import array +from scipy import stats def loop_iterator(iterator): @@ -77,7 +79,7 @@ def __init__(self, save_asym=False): self.variables = set() # filled during first event def initialize(self, tree): - super().initialize(tree) + super(ToyStudyError, self).initialize(tree) self.all_values_error = {} if self.save_asym: self.all_values_error_up = {} @@ -103,6 +105,70 @@ def run(self, fit_result): self.all_values_error_down[name][0] = rr.getErrorLo() +class ToyStudyCoverage(ToyStudyPlugin): + def __init__(self, variables, true_values, pvalue=None, significance=None, output_var=None): + + + if type(variables) is ROOT.RooArgSet: + self.variables = [v.GetName() for v in iter_collection(variables)] + else: + self.variables = variables + self.ndim = len(self.variables) + if len(true_values) != self.ndim: + raise ValueError('true values have different dimensions than variables') + self.true_values = true_values + + if pvalue is None and significance is None: + significance = 1 + if pvalue is None and significance is not None: + self.pvalue =self.get_likelihood_count_levels(self.ndim, significance) + elif pvalue is not None and significance is None: + self.pvalue = pvalue + else: + raise ValueError('cannot specify both significance and pvalue') + + if output_var is None: + output_var = 'isCovered_%s' % '_'.join(self.variables) + self.output_var = output_var + + def get_likelihood_count_levels(self, ndof, z): + p = 1 - stats.norm(0, 1).sf(z) * 2 + return stats.chi2(ndof).isf(1 - p) + + def get_implicit_ellipse(self, values, true, A): + return np.einsum('...i,ij,...j', true - values, A, true - values) + + def compute_is_covered(self, true, value, cov, level): + dim = len(true) + inv_cov = np.linalg.inv(cov) + return (self.get_implicit_ellipse(true, value, inv_cov) < level) + + def initialize(self, tree): + super(ToyStudyCoverage, self).initialize(tree) + self.covered = array('b', [False]) + self.tree.Branch(self.output_var, self.covered, '%s/B' % self.output_var) + + def run(self, fit_result): + cov = np.zeros((self.ndim, self.ndim)) + rr = list(iter_collection(fit_result.floatParsFinal())) + errs = {v.GetName(): v.getError() for v in rr} + vals = {v.GetName(): v.getVal() for v in rr} + for i1, v1 in enumerate(self.variables): + e1 = errs[v1] + for i2, v2 in enumerate(self.variables): + e2 = errs[v2] + corr = fit_result.correlation(v1, v2) + cov[i1, i2] = corr * e1 * e2 + + fitted_values = [vals[n] for n in self.variables] + + is_covered = self.compute_is_covered(self.true_values, fitted_values, cov, self.pvalue) + + self.covered[0] = is_covered + + + + def toy_study(ws, ws_generate=None, ntoys=100, snapshot='initial', snapshot_gen=None, seed=0, plugins=None): plugins = plugins or list() diff --git a/tests/test_all.py b/tests/test_all.py index 39c706e..9c0a0cb 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -580,3 +580,34 @@ def test_toy_study(): assert ("nsignal_gen_proc%s_error" % format_index(nproc)) in branches assert ("nsignal_gen_proc%s_error_up" % format_index(nproc)) in branches assert ("nsignal_gen_proc%s_error_down" % format_index(nproc)) in branches + + +def test_toy_coverage(): + ws = create_workspace(NCATEGORIES, NPROCESS, NTRUE, EFFICIENCIES, EXPECTED_BKG_CAT) + ntoys = 10 + countingworkspace.utils.toy_study(ws, ntoys=ntoys, seed=42, + plugins=[countingworkspace.utils.ToyStudyError(save_asym=True), + countingworkspace.utils.ToyStudyCoverage(ws.obj('ModelConfig').GetParametersOfInterest(), + NTRUE, significance=1, output_var='isCoveredAll'), + countingworkspace.utils.ToyStudyCoverage(ws.obj('ModelConfig').GetParametersOfInterest(), + NTRUE, significance=2, output_var='isCoveredAll2sigma') + ]) + f = ROOT.TFile.Open('result_42.root') + tree = f.Get("results") + assert tree + assert tree.GetEntries() == ntoys + branches = [k.GetName() for k in tree.GetListOfBranches()] + assert 'nll' in branches + assert 'status' in branches + assert 'isCoveredAll' in branches + for nproc in range(NPROCESS): + assert ("nsignal_gen_proc%s" % format_index(nproc)) in branches + assert ("nsignal_gen_proc%s_error" % format_index(nproc)) in branches + assert ("nsignal_gen_proc%s_error_up" % format_index(nproc)) in branches + assert ("nsignal_gen_proc%s_error_down" % format_index(nproc)) in branches + + h1sigma = ROOT.TH1F("h1sigma", "h1sigma", 2, 0, 2) + h2sigma = ROOT.TH1F("h2sigma", "h2sigma", 2, 0, 2) + tree.Draw("isCoveredAll>>h1sigma") + tree.Draw("isCoveredAll2sigma>>h2sigma") + assert(h1sigma.GetMean() <= h2sigma.GetMean()) From 4cce13fc6ab6f0efeffcecbfce7d5b8e936ac998 Mon Sep 17 00:00:00 2001 From: Ruggero Turra Date: Thu, 4 Apr 2019 12:10:11 +0200 Subject: [PATCH 3/5] add dependency --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d2fcf87..c367548 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ long_description_content_type="text/markdown", url="https://github.com/wiso/countingworkspace", packages=setuptools.find_packages(), - install_requires=['numpy'], + install_requires=['numpy', 'scipy'], classifiers=[ "Programming Language :: Python :: 3", "Programming Language :: Python :: 2", From a7a81056269944e17d14bfef397bbfcd64e6461b Mon Sep 17 00:00:00 2001 From: Ruggero Turra Date: Thu, 4 Apr 2019 12:24:24 +0200 Subject: [PATCH 4/5] add scipy to travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4cbbc10..bf38dd8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,7 @@ install: - conda config --add channels http://conda.anaconda.org/NLeSC - conda config --add channels http://conda.anaconda.org/conda-forge - conda config --set show_channel_urls yes - - conda create -q -n testenv python=${PYTHON} root=${ROOT} numpy pytest pytest-cov=2.4.0 python-coveralls>=2.8.0 coveralls + - conda create -q -n testenv python=${PYTHON} root=${ROOT} numpy scipy pytest pytest-cov=2.4.0 python-coveralls>=2.8.0 coveralls - export CONDA_ENV_PATH=$HOME/miniconda/envs/testenv - source activate testenv From 1e42fe2f4a4b01b8f0544b33f95806486214ff72 Mon Sep 17 00:00:00 2001 From: Ruggero Turra Date: Thu, 4 Apr 2019 12:50:08 +0200 Subject: [PATCH 5/5] remove python 2.7 --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index bf38dd8..a534f47 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,8 +2,8 @@ language: python matrix: include: - - python: 2.7 - env: PYTHON=2.7 ROOT=6.04 +# - python: 2.7 +# env: PYTHON=2.7 ROOT=6.04 - python: 3.4 env: PYTHON=3.4 ROOT=6.04 install: