Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Cleanup internal names for hypotest #1247

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 37 additions & 39 deletions src/pyhf/infer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


def hypotest(
poi_test,
alt_mu,
data,
pdf,
init_pars=None,
Expand All @@ -15,6 +15,7 @@ def hypotest(
return_tail_probs=False,
return_expected=False,
return_expected_set=False,
null_mu=None,
**kwargs,
):
r"""
Expand All @@ -40,7 +41,7 @@ def hypotest(
[array(0.00260626), array(0.01382005), array(0.06445321), array(0.23525644), array(0.57303621)]

Args:
poi_test (Number or Tensor): The value of the parameter of interest (POI)
alt_mu (Number or Tensor): The value of the parameter of interest (POI) for the alternative hypothesis.
data (Number or Tensor): The data considered
pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``
init_pars (:obj:`tensor`): The initial parameter values to be used for minimization
Expand All @@ -50,6 +51,7 @@ def hypotest(
return_tail_probs (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{s+b}` and :math:`\mathrm{CL}_{b}`
return_expected (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{\mathrm{exp}}`
return_expected_set (:obj:`bool`): Bool for returning the :math:`(-2,-1,0,1,2)\sigma` :math:`\mathrm{CL}_{\mathrm{exp}}` --- the "Brazil band"
null_mu (None or :obj:`float` or :obj:`tensor`): The value for the parameter of interest for the null hypothesis. Default (`None`) is to automatically set based on the test statistic used.

Returns:
Tuple of Floats and lists of Floats:
Expand Down Expand Up @@ -138,61 +140,57 @@ def hypotest(
**kwargs,
)

teststat = calc.teststatistic(poi_test)
sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test)
is_q0 = kwargs.get('test_stat', 'qtilde') == 'q0'
null_mu = null_mu or (1.0 if is_q0 else 0.0)

teststat = calc.teststatistic(alt_mu, null_mu)
alt_distribution, null_distribution = calc.distributions(alt_mu, null_mu)

CLsb = sig_plus_bkg_distribution.pvalue(teststat)
CLb = b_only_distribution.pvalue(teststat)
CLs = CLsb / CLb
pvalue_alt = alt_distribution.pvalue(teststat)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s+b is null

pvalue_null = null_distribution.pvalue(teststat)
pvalue_mod_alt = pvalue_alt / pvalue_null

tensorlib, _ = get_backend()
# Ensure that all CL values are 0-d tensors
CLsb, CLb, CLs = (
tensorlib.astensor(CLsb),
tensorlib.astensor(CLb),
tensorlib.astensor(CLs),
# Ensure that all p-values are 0-d tensors
pvalue_alt, pvalue_null, pvalue_mod_alt = (
tensorlib.astensor(pvalue_alt),
tensorlib.astensor(pvalue_null),
tensorlib.astensor(pvalue_mod_alt),
)

is_q0 = kwargs.get('test_stat', 'qtilde') == 'q0'

_returns = [CLsb if is_q0 else CLs]
_returns = [pvalue_alt if is_q0 else pvalue_mod_alt]
if return_tail_probs:
if is_q0:
_returns.append([CLb])
_returns.append([pvalue_null])
else:
_returns.append([CLsb, CLb])
_returns.append([pvalue_alt, pvalue_null])
if return_expected_set:
CLs_exp = []
pvalue_mod_alt_exp = []
for n_sigma in [2, 1, 0, -1, -2]:

expected_bonly_teststat = b_only_distribution.expected_value(n_sigma)
expected_null_teststat = null_distribution.expected_value(n_sigma)

if is_q0:
# despite the name in this case this is the discovery p value
CLs = sig_plus_bkg_distribution.pvalue(expected_bonly_teststat)
else:
CLs = sig_plus_bkg_distribution.pvalue(
expected_bonly_teststat
) / b_only_distribution.pvalue(expected_bonly_teststat)
CLs_exp.append(tensorlib.astensor(CLs))
pvalue_mod_alt_exp_value = alt_distribution.pvalue(expected_null_teststat)
if not is_q0:
pvalue_mod_alt_exp_value /= null_distribution.pvalue(
expected_null_teststat
)

pvalue_mod_alt_exp.append(tensorlib.astensor(pvalue_mod_alt_exp_value))
if return_expected:
_returns.append(CLs_exp[2])
_returns.append(CLs_exp)
_returns.append(pvalue_mod_alt_exp[2])
_returns.append(pvalue_mod_alt_exp)
elif return_expected:
n_sigma = 0
expected_bonly_teststat = b_only_distribution.expected_value(n_sigma)
expected_null_teststat = null_distribution.expected_value(n_sigma)

if is_q0:
# despite the name in this case this is the discovery p value
CLs = sig_plus_bkg_distribution.pvalue(expected_bonly_teststat)
else:
CLs = sig_plus_bkg_distribution.pvalue(
expected_bonly_teststat
) / b_only_distribution.pvalue(expected_bonly_teststat)
pvalue_mod_alt_exp_value = alt_distribution.pvalue(expected_null_teststat)
if not is_q0:
pvalue_mod_alt_exp_value /= null_distribution.pvalue(expected_null_teststat)

_returns.append(tensorlib.astensor(CLs))
_returns.append(tensorlib.astensor(pvalue_mod_alt_exp_value))

# Enforce a consistent return type of the observed CLs
# Enforce a consistent return type of the observed pvalue_mod_alt
return tuple(_returns) if len(_returns) > 1 else _returns[0]


Expand Down
84 changes: 46 additions & 38 deletions src/pyhf/infer/calculators.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
log = logging.getLogger(__name__)


def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds, fixed_params):
def generate_asimov_data(mu, data, pdf, init_pars, par_bounds, fixed_params):
"""
Compute Asimov Dataset (expected yields at best-fit values) for a given POI value.

Expand All @@ -35,7 +35,7 @@ def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds, fixed_para
array([ 60.61229858, 56.52802479, 270.06832542, 48.31545488])

Args:
asimov_mu (:obj:`float`): The value for the parameter of interest to be used.
mu (:obj:`float`): The value for the parameter of interest to be used.
data (:obj:`tensor`): The observed data.
pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``.
init_pars (:obj:`tensor`): The initial parameter values to be used for fitting.
Expand All @@ -47,7 +47,7 @@ def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds, fixed_para

"""
bestfit_nuisance_asimov = fixed_poi_fit(
asimov_mu, data, pdf, init_pars, par_bounds, fixed_params
mu, data, pdf, init_pars, par_bounds, fixed_params
)
return pdf.expected_data(bestfit_nuisance_asimov)

Expand Down Expand Up @@ -193,7 +193,7 @@ def __init__(
self.test_stat = test_stat
self.sqrtqmuA_v = None

def distributions(self, poi_test):
def distributions(self, alt_mu, null_mu):
r"""
Probability distributions of the test statistic, as defined in
:math:`\S` 3 of :xref:`arXiv:1007.1727` under the Wald approximation,
Expand All @@ -209,26 +209,30 @@ def distributions(self, poi_test):
>>> observations = [51, 48]
>>> data = observations + model.config.auxdata
>>> mu_test = 1.0
>>> null_mu = 0.0
>>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator(data, model, test_stat="qtilde")
>>> _ = asymptotic_calculator.teststatistic(mu_test)
>>> qmu_sig, qmu_bkg = asymptotic_calculator.distributions(mu_test)
>>> _ = asymptotic_calculator.teststatistic(mu_test, null_mu)
>>> qmu_sig, qmu_bkg = asymptotic_calculator.distributions(mu_test, null_mu)
>>> qmu_sig.pvalue(mu_test), qmu_bkg.pvalue(mu_test)
(0.002192624107163899, 0.15865525393145707)

Args:
poi_test (:obj:`float` or :obj:`tensor`): The value for the parameter of interest.
alt_mu (:obj:`float` or :obj:`tensor`): The value for the parameter of interest for the alternative hypothesis.
null_mu (:obj:`float` or :obj:`tensor`): The value for the parameter of interest for the null hypothesis.

Returns:
Tuple (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distributions under the hypotheses.

"""
if self.sqrtqmuA_v is None:
raise RuntimeError('need to call .teststatistic(poi_test) first')
sb_dist = AsymptoticTestStatDistribution(-self.sqrtqmuA_v)
b_dist = AsymptoticTestStatDistribution(0.0)
return sb_dist, b_dist

def teststatistic(self, poi_test):
raise RuntimeError('need to call .teststatistic first')
distribution_alt = AsymptoticTestStatDistribution(-self.sqrtqmuA_v)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

signal + background in exclusion fit is the null

distribution_null = AsymptoticTestStatDistribution(
0.0
) # TODO is this asimov_mu / null_mu?
return distribution_alt, distribution_null

def teststatistic(self, alt_mu, null_mu):
"""
Compute the test statistic for the observed data under the studied model.

Expand All @@ -242,12 +246,14 @@ def teststatistic(self, poi_test):
>>> observations = [51, 48]
>>> data = observations + model.config.auxdata
>>> mu_test = 1.0
>>> null_mu = 0.0
>>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator(data, model, test_stat="qtilde")
>>> asymptotic_calculator.teststatistic(mu_test)
>>> asymptotic_calculator.teststatistic(mu_test, null_mu)
0.14043184405388176

Args:
poi_test (:obj:`float` or :obj:`tensor`): The value for the parameter of interest.
alt_mu (:obj:`float` or :obj:`tensor`): The value for the parameter of interest for the alternative hypothesis.
null_mu (:obj:`float` or :obj:`tensor`): The value for the parameter of interest for the null hypothesis.

Returns:
Float: The value of the test statistic.
Expand All @@ -258,7 +264,7 @@ def teststatistic(self, poi_test):
teststat_func = utils.get_test_stat(self.test_stat)

qmu_v = teststat_func(
poi_test,
alt_mu,
self.data,
self.pdf,
self.init_pars,
Expand All @@ -267,18 +273,16 @@ def teststatistic(self, poi_test):
)
sqrtqmu_v = tensorlib.sqrt(qmu_v)

asimov_mu = 1.0 if self.test_stat == 'q0' else 0.0

asimov_data = generate_asimov_data(
asimov_mu,
null_mu,
self.data,
self.pdf,
self.init_pars,
self.par_bounds,
self.fixed_params,
)
qmuA_v = teststat_func(
poi_test,
alt_mu,
asimov_data,
self.pdf,
self.init_pars,
Expand Down Expand Up @@ -489,7 +493,7 @@ def __init__(
self.test_stat = test_stat
self.track_progress = track_progress

def distributions(self, poi_test, track_progress=None):
def distributions(self, alt_mu, null_mu, track_progress=None):
"""
Probability Distributions of the test statistic value under the signal + background and background-only hypothesis.

Expand All @@ -505,15 +509,17 @@ def distributions(self, poi_test, track_progress=None):
>>> observations = [51, 48]
>>> data = observations + model.config.auxdata
>>> mu_test = 1.0
>>> null_mu = 0.0
>>> toy_calculator = pyhf.infer.calculators.ToyCalculator(
... data, model, ntoys=100, track_progress=False
... )
>>> qmu_sig, qmu_bkg = toy_calculator.distributions(mu_test)
>>> qmu_sig, qmu_bkg = toy_calculator.distributions(mu_test, null_mu)
>>> qmu_sig.pvalue(mu_test), qmu_bkg.pvalue(mu_test)
(0.14, 0.76)

Args:
poi_test (:obj:`float` or :obj:`tensor`): The value for the parameter of interest.
alt_mu (:obj:`float` or :obj:`tensor`): The value for the parameter of interest for the alternative hypothesis.
null_mu (:obj:`float` or :obj:`tensor`): The value for the parameter of interest for the null hypothesis.
track_progress (:obj:`bool`): Whether to display the `tqdm` progress bar or not (outputs to `stderr`)

Returns:
Expand All @@ -524,12 +530,12 @@ def distributions(self, poi_test, track_progress=None):
sample_shape = (self.ntoys,)

signal_pars = self.pdf.config.suggested_init()
signal_pars[self.pdf.config.poi_index] = poi_test
signal_pars[self.pdf.config.poi_index] = alt_mu
signal_pdf = self.pdf.make_pdf(tensorlib.astensor(signal_pars))
signal_sample = signal_pdf.sample(sample_shape)

bkg_pars = self.pdf.config.suggested_init()
bkg_pars[self.pdf.config.poi_index] = 1.0 if self.test_stat == 'q0' else 0.0
bkg_pars[self.pdf.config.poi_index] = null_mu
bkg_pdf = self.pdf.make_pdf(tensorlib.astensor(bkg_pars))
bkg_sample = bkg_pdf.sample(sample_shape)

Expand All @@ -544,11 +550,11 @@ def distributions(self, poi_test, track_progress=None):
unit='toy',
)

signal_teststat = []
teststat_alt = []
for sample in tqdm.tqdm(signal_sample, **tqdm_options, desc='Signal-like'):
signal_teststat.append(
teststat_alt.append(
teststat_func(
poi_test,
alt_mu,
sample,
self.pdf,
signal_pars,
Expand All @@ -557,11 +563,11 @@ def distributions(self, poi_test, track_progress=None):
)
)

bkg_teststat = []
teststat_null = []
for sample in tqdm.tqdm(bkg_sample, **tqdm_options, desc='Background-like'):
bkg_teststat.append(
teststat_null.append(
teststat_func(
poi_test,
alt_mu,
sample,
self.pdf,
bkg_pars,
Expand All @@ -570,11 +576,11 @@ def distributions(self, poi_test, track_progress=None):
)
)

s_plus_b = EmpiricalDistribution(tensorlib.astensor(signal_teststat))
b_only = EmpiricalDistribution(tensorlib.astensor(bkg_teststat))
return s_plus_b, b_only
distribution_alt = EmpiricalDistribution(tensorlib.astensor(teststat_alt))
distribution_null = EmpiricalDistribution(tensorlib.astensor(teststat_null))
return distribution_alt, distribution_null

def teststatistic(self, poi_test):
def teststatistic(self, alt_mu, null_mu):
"""
Compute the test statistic for the observed data under the studied model.

Expand All @@ -590,22 +596,24 @@ def teststatistic(self, poi_test):
>>> observations = [51, 48]
>>> data = observations + model.config.auxdata
>>> mu_test = 1.0
>>> null_mu = 0.0
>>> toy_calculator = pyhf.infer.calculators.ToyCalculator(
... data, model, ntoys=100, track_progress=False
... )
>>> toy_calculator.teststatistic(mu_test)
>>> toy_calculator.teststatistic(mu_test, null_mu)
array(3.93824492)

Args:
poi_test (:obj:`float` or :obj:`tensor`): The value for the parameter of interest.
alt_mu (:obj:`float` or :obj:`tensor`): The value for the parameter of interest for the alternative hypothesis.
null_mu (:obj:`float` or :obj:`tensor`): The value for the parameter of interest for the null hypothesis.

Returns:
Float: The value of the test statistic.

"""
teststat_func = utils.get_test_stat(self.test_stat)
teststat = teststat_func(
poi_test,
alt_mu,
self.data,
self.pdf,
self.init_pars,
Expand Down
5 changes: 3 additions & 2 deletions src/pyhf/infer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@ def create_calculator(calctype, *args, **kwargs):
>>> observations = [51, 48]
>>> data = observations + model.config.auxdata
>>> mu_test = 1.0
>>> null_mu = 0.0
>>> toy_calculator = pyhf.infer.utils.create_calculator(
... "toybased", data, model, ntoys=100, test_stat="qtilde", track_progress=False
... "toybased", data, model, ntoys=100, test_stat="qtilde", track_progress=False, null_mu=0.0
... )
>>> qmu_sig, qmu_bkg = toy_calculator.distributions(mu_test)
>>> qmu_sig, qmu_bkg = toy_calculator.distributions(mu_test, null_mu)
>>> qmu_sig.pvalue(mu_test), qmu_bkg.pvalue(mu_test)
(0.14, 0.76)
Expand Down
Loading