diff --git a/docs/source/api/distributions/continuous.rst b/docs/source/api/distributions/continuous.rst index 36b4070a92..1b12e7d6f8 100644 --- a/docs/source/api/distributions/continuous.rst +++ b/docs/source/api/distributions/continuous.rst @@ -41,3 +41,4 @@ Continuous VonMises Wald Weibull + ZeroOneInflatedBeta diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py index c578267091..974100b230 100644 --- a/pymc/distributions/__init__.py +++ b/pymc/distributions/__init__.py @@ -51,6 +51,7 @@ VonMises, Wald, Weibull, + ZeroOneInflatedBeta, ) from pymc.distributions.custom import CustomDist, DensityDist from pymc.distributions.discrete import ( @@ -204,5 +205,6 @@ "ZeroInflatedBinomial", "ZeroInflatedNegativeBinomial", "ZeroInflatedPoisson", + "ZeroOneInflatedBeta", "ZeroSumNormal", ] diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py index 9c4382ffb7..7b1a806aca 100644 --- a/pymc/distributions/continuous.py +++ b/pymc/distributions/continuous.py @@ -89,7 +89,11 @@ def polyagamma_cdf(*args, **kwargs): normal_lcdf, zvalue, ) -from pymc.distributions.distribution import DIST_PARAMETER_TYPES, Continuous, SymbolicRandomVariable +from pymc.distributions.distribution import ( + DIST_PARAMETER_TYPES, + Continuous, + SymbolicRandomVariable, +) from pymc.distributions.shape_utils import implicit_size_from_params, rv_size_is_none from pymc.distributions.transforms import _default_transform from pymc.math import invlogit, logdiffexp @@ -129,6 +133,7 @@ def polyagamma_cdf(*args, **kwargs): "VonMises", "Wald", "Weibull", + "ZeroOneInflatedBeta", ] @@ -377,7 +382,9 @@ def logp(value): def logcdf(value): return pt.switch( - pt.eq(value, -np.inf), -np.inf, pt.switch(pt.eq(value, np.inf), 0, pt.log(0.5)) + pt.eq(value, -np.inf), + -np.inf, + pt.switch(pt.eq(value, np.inf), 0, pt.log(0.5)), ) @@ -1242,6 +1249,250 @@ def icdf(value, alpha, beta): ) +class ZeroOneInflatedBetaRV(SymbolicRandomVariable): + name = "zero_one_inflated_beta" + extended_signature = "[rng],[size],(),(),(),()->[rng],()" + _print_name = ("ZeroOneInflatedBeta", "\\operatorname{ZeroOneInflatedBeta}") + + @classmethod + def rv_op(cls, zoi, coi, a, b, *, rng=None, size=None): + zoi = pt.as_tensor(zoi) + coi = pt.as_tensor(coi) + a = pt.as_tensor(a) + b = pt.as_tensor(b) + rng = normalize_rng_param(rng) + size = normalize_size_param(size) + if rv_size_is_none(size): + size = implicit_size_from_params(zoi, coi, a, b, ndims_params=cls.ndims_params) + + next_rng, u = uniform(size=size, rng=rng, return_next_rng=True) + + next_rng, beta_draws = beta(a, b, size=size, rng=next_rng, return_next_rng=True) + + draws = pt.switch( + pt.lt(u, zoi * (1 - coi)), + pt.zeros_like(beta_draws), + pt.switch( + pt.lt(u, zoi), + pt.ones_like(beta_draws), + beta_draws, + ), + ) + + return cls( + inputs=[rng, size, zoi, coi, a, b], + outputs=[next_rng, draws], + )(rng, size, zoi, coi, a, b) + + +class ZeroOneInflatedBeta(UnitContinuous): + r""" + Zero-One-Inflated Beta Distribution. + + The pdf of this distribution is: + + .. math:: + + f(x \mid \text{zoi}, \text{coi}, \alpha, \beta) = + \begin{cases} + \text{zoi} \cdot (1 - \text{coi}) + & \text{if } x = 0 \\ + \text{zoi} \cdot \text{coi} + & \text{if } x = 1 \\ + (1 - \text{zoi}) \cdot \dfrac{x^{\alpha - 1} (1 - x)^{\beta - 1}}{B(\alpha, \beta)} + & \text{if } x \in (0, 1) + \end{cases} + + where :math:`B` is the Beta function, :math:`\alpha = \mu \kappa`, and :math:`\beta = (1 - \mu) \kappa`. + + + Examples + -------- + Model proportion data with structural zeros and ones: + + .. code-block:: python + + import pymc as pm + import numpy as np + + # Data with 30% boundary values, 40% of which are ones + true_zoi = 0.3 + true_coi = 0.4 + true_mu = 0.4 + true_kappa = 20.0 + n = 1000 + + rng = np.random.default_rng(100) + u = rng.random(n) + alpha = true_mu * true_kappa + beta = (1 - true_mu) * true_kappa + beta_data = rng.beta(alpha, beta, n) + data = np.where(u < true_zoi * (1 - true_coi), 0.0, + np.where(u < true_zoi, 1.0, + beta_data)) + + # Fit model + with pm.Model() as model: + zoi = pm.Beta("zoi", alpha=1, beta=1) + coi = pm.Beta("coi", alpha=1, beta=1) + mu = pm.Beta("mu", alpha=2, beta=2) + kappa = pm.Gamma("kappa", alpha=2, beta=0.1) + y = pm.ZeroOneInflatedBeta( + "y", zoi=zoi, coi=coi, mu=mu, kappa=kappa, + observed=data + ) + trace = pm.sample(1000, tune=1000) + + + ======== =============================================== + Support :math:`y \in [0, 1]` + Mean :math:`(1 - \text{zoi})\mu + \text{zoi} \cdot \text{coi}` + Variance :math:`(1-\text{zoi})\left[\dfrac{\mu(1-\mu)}{\kappa+1} + \text{zoi} \cdot \text{coi} \cdot (1 - \text{coi})\right] + \text{zoi}(1-\text{zoi})\mu^2` + ======== =============================================== + + Setting :math:`coi = 0` recovers the Zero-Inflated Beta (ZIB) distribution. + Setting :math:`coi = 1` recovers the One-Inflated Beta (OIB) distribution. + + + Parameters + ---------- + zoi : tensor_like of float + Probability of a structural value at 0 or 1, :math:`0 \leq zoi \leq 1`. + coi : tensor_like of float + Conditional probability given a structural value that it is a structural 1, :math:`0 \leq coi \leq 1`. + alpha : tensor_like of float, optional + ``alpha`` > 0. If not specified, then calculated using ``mu`` and ``kappa``. + beta : tensor_like of float, optional + ``beta`` > 0. If not specified, then calculated using ``mu`` and ``kappa``. + mu : tensor_like of float, optional + Mean of the Beta component, :math:`0 < \mu < 1`. + kappa : tensor_like of float, optional + Precision of the Beta component, :math:`\kappa > 0`. + + References + ---------- + .. [1] Ospina, R., & Ferrari, S. L. (2010). Inflated beta distributions. + .. [2] Ospina, R., & Ferrari, S. L. (2012). A general class of zero-or-one inflated + beta regression models. + + Notes + ----- + ZeroOneInflatedBeta is appropriate for modeling proportion data when exact zeros + or ones represent distinct processes (structural zeros and ones), separate from + the tails of the Beta component. + """ + + rv_type = ZeroOneInflatedBetaRV + rv_op = ZeroOneInflatedBetaRV.rv_op + + @classmethod + def dist(cls, zoi, coi, mu=None, kappa=None, alpha=None, beta=None, **kwargs): + if (alpha is not None) and (beta is not None) and (mu is not None or kappa is not None): + warnings.warn( + "Both parametrizations provided. Proceeding with alpha and beta.", + UserWarning, + ) + if (alpha is not None) and (beta is not None): + pass + elif (mu is not None) and (kappa is not None): + alpha = mu * kappa + beta = (1.0 - mu) * kappa + else: + raise ValueError( + "Incompatible parametrization. Must specify alpha and beta, or mu and kappa." + ) + zoi = pt.as_tensor_variable(zoi) + coi = pt.as_tensor_variable(coi) + alpha = pt.as_tensor_variable(alpha) + beta = pt.as_tensor_variable(beta) + return super().dist([zoi, coi, alpha, beta], **kwargs) + + def support_point(rv, size, zoi, coi, alpha, beta): + mean = zoi * coi + (1.0 - zoi) * (alpha / (alpha + beta)) + if not rv_size_is_none(size): + mean = pt.full(size, mean) + return mean + + def logp(value, zoi, coi, alpha, beta): + saved_zoi = zoi + saved_coi = coi + + check_zoi = pt.bitwise_and(pt.ge(zoi, 0), pt.le(zoi, 1)) + check_coi = pt.bitwise_and(pt.ge(coi, 0), pt.le(coi, 1)) + zoi = pt.switch(check_zoi, pt.clip(zoi, 1e-12, 1 - 1e-12), zoi) + coi = pt.switch(check_coi, pt.clip(coi, 1e-12, 1 - 1e-12), coi) + + safe_value = pt.clip(value, 1e-12, 1 - 1e-12) + + logp_beta = ( + pt.switch(pt.eq(alpha, 1.0), 0.0, (alpha - 1.0) * pt.log(safe_value)) + + pt.switch(pt.eq(beta, 1.0), 0.0, (beta - 1.0) * pt.log1p(-safe_value)) + - (pt.gammaln(alpha) + pt.gammaln(beta) - pt.gammaln(alpha + beta)) + ) + + res = pt.switch( + pt.eq(value, 0), + pt.log(zoi) + pt.log1p(-coi), + pt.switch( + pt.eq(value, 1), + pt.log(zoi) + pt.log(coi), + pt.log1p(-zoi) + logp_beta, + ), + ) + + res = pt.switch( + pt.bitwise_and(pt.ge(value, 0.0), pt.le(value, 1.0)), + res, + -np.inf, + ) + return check_parameters( + res, + saved_zoi >= 0, + saved_zoi <= 1, + saved_coi >= 0, + saved_coi <= 1, + alpha > 0, + beta > 0, + msg="0 <= zoi <= 1, 0 <= coi <= 1, alpha > 0, beta > 0", + ) + + def logcdf(value, zoi, coi, alpha, beta): + saved_zoi = zoi + saved_coi = coi + + check_zoi = pt.bitwise_and(pt.ge(zoi, 0), pt.le(zoi, 1)) + check_coi = pt.bitwise_and(pt.ge(coi, 0), pt.le(coi, 1)) + zoi = pt.switch(check_zoi, pt.clip(zoi, 1e-12, 1 - 1e-12), zoi) + coi = pt.switch(check_coi, pt.clip(coi, 1e-12, 1 - 1e-12), coi) + + beta_logcdf = pt.log(pt.betainc(alpha, beta, value)) + + res = pt.switch( + pt.lt(value, 0), + -np.inf, + pt.switch( + pt.eq(value, 0), + pt.log(zoi) + pt.log1p(-coi), + pt.switch( + pt.lt(value, 1), + pt.log(zoi * (1 - coi) + (1 - zoi) * pt.exp(beta_logcdf)), + 0.0, + ), + ), + ) + + return check_parameters( + res, + saved_zoi >= 0, + saved_zoi <= 1, + saved_coi >= 0, + saved_coi <= 1, + alpha > 0, + beta > 0, + msg="0 <= zoi <= 1, 0 <= coi <= 1, alpha > 0, beta > 0", + ) + + class KumaraswamyRV(SymbolicRandomVariable): name = "kumaraswamy" extended_signature = "[rng],[size],(),()->[rng],()" @@ -1548,7 +1799,9 @@ def logcdf(value, mu, b): def icdf(value, mu, b): res = pt.switch( - pt.le(value, 0.5), mu + b * np.log(2 * value), mu - b * np.log(2 - 2 * value) + pt.le(value, 0.5), + mu + b * np.log(2 * value), + mu - b * np.log(2 - 2 * value), ) res = check_icdf_value(res, value) return check_icdf_parameters(res, b > 0, msg="b > 0") diff --git a/tests/distributions/test_continuous.py b/tests/distributions/test_continuous.py index 9f0c729e2c..9f584fd4d3 100644 --- a/tests/distributions/test_continuous.py +++ b/tests/distributions/test_continuous.py @@ -420,6 +420,38 @@ def test_beta_icdf(self): lambda q, alpha, beta: st.beta.ppf(q, alpha, beta), ) + def test_zero_one_inflated_beta_logp(self): + def zoib_logp(value, zoi, coi, alpha, beta): + if value == 0: + return np.log(zoi) + np.log1p(-coi) + elif value == 1: + return np.log(zoi) + np.log(coi) + else: + return np.log1p(-zoi) + st.beta.logpdf(value, alpha, beta) + + def zoib_logcdf(value, zoi, coi, alpha, beta): + if value < 0: + return -np.inf + elif value == 0: + return np.log(zoi * (1 - coi)) + elif value < 1: + return np.log(zoi * (1 - coi) + (1 - zoi) * st.beta.cdf(value, alpha, beta)) + else: + return 0.0 + + check_logp( + pm.ZeroOneInflatedBeta, + Unit, + {"zoi": Runif, "coi": Runif, "alpha": Rplus, "beta": Rplus}, + zoib_logp, + ) + check_logcdf( + pm.ZeroOneInflatedBeta, + Unit, + {"zoi": Runif, "coi": Runif, "alpha": Rplus, "beta": Rplus}, + zoib_logcdf, + ) + def test_kumaraswamy(self): # Scipy does not have a built-in Kumaraswamy def scipy_log_pdf(value, a, b): @@ -648,7 +680,9 @@ def test_half_cauchy(self): lambda value, beta: st.halfcauchy.logcdf(value, scale=beta), ) check_icdf( - pm.HalfCauchy, {"beta": Rplusbig}, lambda q, beta: st.halfcauchy.ppf(q, scale=beta) + pm.HalfCauchy, + {"beta": Rplusbig}, + lambda q, beta: st.halfcauchy.ppf(q, scale=beta), ) def test_gamma_logp(self): @@ -817,8 +851,20 @@ def test_skew_normal(self): (50.0, 50.000, 10.000, 10.000, -1.433714), (1000.0, 500.000, 10.000, 20.000, -1.573708e-11), (0.01, 0.01, 100.0, 0.01, -0.69314718), # Fails in scipy version - (-0.43402407, 0.0, 0.1, 0.1, -13.59615423), # Previous 32-bit version failed here - (-0.72402009, 0.0, 0.1, 0.1, -31.26571842), # Previous 64-bit version failed here + ( + -0.43402407, + 0.0, + 0.1, + 0.1, + -13.59615423, + ), # Previous 32-bit version failed here + ( + -0.72402009, + 0.0, + 0.1, + 0.1, + -31.26571842, + ), # Previous 64-bit version failed here ], ) def test_ex_gaussian_cdf(self, value, mu, sigma, nu, logcdf_val): @@ -1430,7 +1476,12 @@ def test_inverse_gamma_support_point(self, alpha, beta, size, expected): [ (2, 1, None, 1 * 2 ** (1 / 2)), (2, 1, 5, np.full(5, 1 * 2 ** (1 / 2))), - (np.arange(2, 7), np.arange(1, 6), None, np.arange(1, 6) * 2 ** (1 / np.arange(2, 7))), + ( + np.arange(2, 7), + np.arange(1, 6), + None, + np.arange(1, 6) * 2 ** (1 / np.arange(2, 7)), + ), ( np.arange(2, 7), np.arange(1, 6), @@ -1465,7 +1516,13 @@ def test_vonmises_support_point(self, mu, kappa, size, expected): (None, 1, 1, 5, np.full(5, 1)), (1, None, np.ones(5), None, np.full(5, 1)), (3, np.full(5, 2), None, None, np.full(5, 3)), - (np.arange(1, 6), None, np.arange(1, 6), (2, 5), np.full((2, 5), np.arange(1, 6))), + ( + np.arange(1, 6), + None, + np.arange(1, 6), + (2, 5), + np.full((2, 5), np.arange(1, 6)), + ), ], ) def test_wald_support_point(self, mu, lam, phi, size, expected): @@ -1577,7 +1634,12 @@ def test_triangular_support_point(self, c, lower, upper, size, expected): (0, np.arange(1, 5), None, sp.expit(np.zeros(4))), (np.arange(4), 1, None, sp.expit(np.arange(4))), (1, 5, 4, sp.expit(np.ones(4))), - (np.arange(4), np.arange(1, 5), (2, 4), np.full((2, 4), sp.expit(np.arange(4)))), + ( + np.arange(4), + np.arange(1, 5), + (2, 4), + np.full((2, 4), sp.expit(np.arange(4))), + ), ], ) def test_logitnormal_support_point(self, mu, sigma, size, expected): @@ -1633,7 +1695,12 @@ def test_interpolated_support_point(self, x_points, pdf_points, size, expected): (4.0, 3.0, None, 7.8110885363844345), (4.0, np.full(5, 3), None, np.full(5, 7.8110885363844345)), (np.arange(5), 1, None, np.arange(5) + 1.2703628454614782), - (np.arange(5), np.ones(5), (2, 5), np.full((2, 5), np.arange(5) + 1.2703628454614782)), + ( + np.arange(5), + np.ones(5), + (2, 5), + np.full((2, 5), np.arange(5) + 1.2703628454614782), + ), ], ) def test_moyal_support_point(self, mu, sigma, size, expected): @@ -1958,7 +2025,7 @@ def halfstudentt_rng_fn(self, df, loc, scale, size, rng): pymc_dist_params = {"nu": 5.0, "sigma": 2.0} expected_rv_op_params = {"nu": 5.0, "sigma": 2.0} reference_dist_params = {"df": 5.0, "loc": 0, "scale": 2.0} - reference_dist = lambda self: ft.partial(self.halfstudentt_rng_fn, rng=self.get_random_state()) # noqa: E731 + reference_dist = lambda self: ft.partial(self.halfstudentt_rng_fn, rng=self.get_random_state()) checks_to_run = [ "check_pymc_params_match_rv_op", "check_pymc_draws_match_reference", @@ -2102,7 +2169,9 @@ class TestWald(BaseTestDistributionRandom): def check_pymc_draws_match_reference(self): npt.assert_array_almost_equal( - self.pymc_rv.eval(), self.reference_dist_draws + self.alpha, decimal=self.decimal + self.pymc_rv.eval(), + self.reference_dist_draws + self.alpha, + decimal=self.decimal, ) @@ -2192,7 +2261,7 @@ def logit_normal_rng_fn(self, rng, size, loc, scale): pymc_dist_params = {"mu": 5.0, "sigma": 10.0} expected_rv_op_params = {"mu": 5.0, "sigma": 10.0} reference_dist_params = {"loc": 5.0, "scale": 10.0} - reference_dist = lambda self: ft.partial(self.logit_normal_rng_fn, rng=self.get_random_state()) # noqa: E731 + reference_dist = lambda self: ft.partial(self.logit_normal_rng_fn, rng=self.get_random_state()) checks_to_run = [ "check_pymc_params_match_rv_op", "check_pymc_draws_match_reference", @@ -2263,7 +2332,7 @@ class TestBeta(BaseTestDistributionRandom): expected_rv_op_params = {"alpha": 2.0, "beta": 5.0} reference_dist_params = {"a": 2.0, "b": 5.0} size = 15 - reference_dist = lambda self: ft.partial(clipped_beta_rvs, random_state=self.get_random_state()) # noqa: E731 + reference_dist = lambda self: ft.partial(clipped_beta_rvs, random_state=self.get_random_state()) checks_to_run = [ "check_pymc_params_match_rv_op", "check_pymc_draws_match_reference", @@ -2342,7 +2411,7 @@ def halfcauchy_rng_fn(self, scale, size, rng): pymc_dist_params = {"beta": 5.0} expected_rv_op_params = {"beta": 5.0} reference_dist_params = {"scale": 5.0} - reference_dist = lambda self: ft.partial(self.halfcauchy_rng_fn, rng=self.get_random_state()) # noqa: E731 + reference_dist = lambda self: ft.partial(self.halfcauchy_rng_fn, rng=self.get_random_state()) checks_to_run = [ "check_pymc_params_match_rv_op", "check_pymc_draws_match_reference_not_numba", @@ -2484,7 +2553,7 @@ def polyagamma_rng_fn(self, size, h, z, rng): pymc_dist_params = {"h": 1.0, "z": 0.0} expected_rv_op_params = {"h": 1.0, "z": 0.0} reference_dist_params = {"h": 1.0, "z": 0.0} - reference_dist = lambda self: ft.partial(self.polyagamma_rng_fn, rng=self.get_random_state()) # noqa: E731 + reference_dist = lambda self: ft.partial(self.polyagamma_rng_fn, rng=self.get_random_state()) checks_to_run = [ "check_pymc_params_match_rv_op", "check_pymc_draws_match_reference", @@ -2505,7 +2574,7 @@ def interpolated_rng_fn(self, size, mu, sigma, rng): pymc_dist_params = {"x_points": x_points, "pdf_points": pdf_points} reference_dist_params = {"mu": mu, "sigma": sigma} - reference_dist = lambda self: ft.partial(self.interpolated_rng_fn, rng=self.get_random_state()) # noqa: E731 + reference_dist = lambda self: ft.partial(self.interpolated_rng_fn, rng=self.get_random_state()) checks_to_run = [ "check_rv_size", "check_draws", @@ -2534,3 +2603,181 @@ def dist(cls, **kwargs): extra_args={"rng": pytensor.shared(rng)}, ref_rand=ref_rand, ) + + +class TestZeroOneInflatedBeta(BaseTestDistributionRandom): + pymc_dist = pm.ZeroOneInflatedBeta + pymc_dist_params = {"zoi": 0.3, "coi": 0.4, "mu": 0.5, "kappa": 10.0} + expected_rv_op_params = {"zoi": 0.3, "coi": 0.4, "a": 5.0, "b": 5.0} + size = 15 + checks_to_run = [ + "check_pymc_params_match_rv_op", + "check_rv_size", + ] + + +class TestZeroOneInflatedBetaLogp: + def test_logp_at_zero(self): + zoi, coi, mu, kappa = 0.3, 0.4, 0.4, 10.0 + expected = np.log(zoi) + np.log1p(-coi) + with pm.Model(): + dist = pm.ZeroOneInflatedBeta.dist(zoi=zoi, coi=coi, mu=mu, kappa=kappa) + got = pm.logp(dist, 0.0).eval() + np.testing.assert_allclose(got, expected, rtol=1e-6) + + def test_logp_at_one(self): + zoi, coi, mu, kappa = 0.3, 0.4, 0.4, 10.0 + expected = np.log(zoi) + np.log(coi) + with pm.Model(): + dist = pm.ZeroOneInflatedBeta.dist(zoi=zoi, coi=coi, mu=mu, kappa=kappa) + got = pm.logp(dist, 1.0).eval() + np.testing.assert_allclose(got, expected, rtol=1e-6) + + def test_logp_interior(self): + zoi, coi, mu, kappa = 0.3, 0.4, 0.4, 10.0 + alpha = mu * kappa + beta = (1 - mu) * kappa + for v in [0.1, 0.3, 0.5, 0.7, 0.9]: + expected = np.log1p(-zoi) + st.beta.logpdf(v, alpha, beta) + with pm.Model(): + dist = pm.ZeroOneInflatedBeta.dist(zoi=zoi, coi=coi, mu=mu, kappa=kappa) + got = pm.logp(dist, v).eval() + np.testing.assert_allclose(got, expected, rtol=1e-6, err_msg=f"Mismatch at value={v}") + + def test_support_point(self): + zoi, coi, mu, kappa = 0.2, 0.3, 0.4, 10.0 + expected = zoi * coi + (1 - zoi) * mu + with pm.Model(): + dist = pm.ZeroOneInflatedBeta.dist(zoi=zoi, coi=coi, mu=mu, kappa=kappa) + alpha = mu * kappa + beta = (1 - mu) * kappa + sp = pm.distributions.continuous.ZeroOneInflatedBeta.support_point( + dist, None, zoi, coi, alpha, beta + ) + np.testing.assert_allclose(float(sp), expected, rtol=1e-6) + + def test_parameter_constraints(self): + from pymc.logprob.utils import ParameterValueError + + with pytest.raises(ParameterValueError): + with pm.Model(): + dist = pm.ZeroOneInflatedBeta.dist(zoi=1.5, coi=0.4, mu=0.5, kappa=10) + pm.logp(dist, 0.5).eval() + with pytest.raises(ParameterValueError): + with pm.Model(): + dist = pm.ZeroOneInflatedBeta.dist(zoi=0.3, coi=1.5, mu=0.5, kappa=10) + pm.logp(dist, 0.5).eval() + + with pytest.raises(ParameterValueError): + with pm.Model(): + dist = pm.ZeroOneInflatedBeta.dist(zoi=0.3, coi=0.4, alpha=-1.0, beta=5.0) + pm.logp(dist, 0.5).eval() + + def test_zib_special_case(self): + with pm.Model(): + y = pm.ZeroOneInflatedBeta("y", zoi=0.5, coi=0.0, mu=0.5, kappa=10, size=1000) + prior = pm.sample_prior_predictive(draws=1, random_seed=42) + samples = prior.prior["y"].values.flatten() + assert np.sum(samples == 1.0) == 0 + + def test_oib_special_case(self): + with pm.Model(): + y = pm.ZeroOneInflatedBeta("y", zoi=0.5, coi=1.0, mu=0.5, kappa=10, size=1000) + prior = pm.sample_prior_predictive(draws=1, random_seed=42) + samples = prior.prior["y"].values.flatten() + assert np.sum(samples == 0.0) == 0 + + def test_high_zoi_produces_many_boundaries(self): + with pm.Model(): + y = pm.ZeroOneInflatedBeta("y", zoi=0.9, coi=0.5, mu=0.5, kappa=10, size=2000) + prior = pm.sample_prior_predictive(draws=1, random_seed=42) + samples = prior.prior["y"].values.flatten() + prop_boundary = np.mean((samples == 0.0) | (samples == 1.0)) + assert 0.83 < prop_boundary < 0.95 + + def test_zoib_both_boundaries(self): + with pm.Model(): + y = pm.ZeroOneInflatedBeta("y", zoi=0.5, coi=0.5, mu=0.5, kappa=10, size=5000) + prior = pm.sample_prior_predictive(draws=1, random_seed=42) + samples = prior.prior["y"].values.flatten() + prop_zeros = np.mean(samples == 0.0) + prop_ones = np.mean(samples == 1.0) + prop_interior = np.mean((samples > 0.0) & (samples < 1.0)) + assert 0.18 < prop_zeros < 0.32 + assert 0.18 < prop_ones < 0.32 + assert 0.43 < prop_interior < 0.57 + + def test_zoib_reduces_to_beta(self): + mu, kappa = 0.4, 10.0 + alpha = mu * kappa + beta = (1 - mu) * kappa + for v in [0.2, 0.5, 0.8]: + expected = st.beta.logpdf(v, alpha, beta) + with pm.Model(): + dist = pm.ZeroOneInflatedBeta.dist(zoi=0.0, coi=0.5, mu=mu, kappa=kappa) + got = pm.logp(dist, v).eval() + np.testing.assert_allclose(got, expected, rtol=1e-6, err_msg=f"Mismatch at value={v}") + + def test_zoib_mu_kappa_matches_alpha_beta(self): + mu, kappa = 0.4, 10.0 + alpha = mu * kappa + beta = (1 - mu) * kappa + zoi, coi = 0.3, 0.4 + for v in [0.0, 0.5, 1.0]: + with pm.Model(): + dist_mk = pm.ZeroOneInflatedBeta.dist(zoi=zoi, coi=coi, mu=mu, kappa=kappa) + logp_mk = pm.logp(dist_mk, v).eval() + with pm.Model(): + dist_ab = pm.ZeroOneInflatedBeta.dist(zoi=zoi, coi=coi, alpha=alpha, beta=beta) + logp_ab = pm.logp(dist_ab, v).eval() + np.testing.assert_allclose( + logp_mk, logp_ab, rtol=1e-6, err_msg=f"Mismatch at value={v}" + ) + + def test_zoib_vectorized(self): + zoi = np.array([0.1, 0.2, 0.3]) + coi = np.array([0.3, 0.4, 0.5]) + mu = np.array([0.3, 0.5, 0.7]) + kappa = np.array([10.0, 15.0, 20.0]) + values = np.array([0.0, 0.5, 1.0]) + with pm.Model(): + dist = pm.ZeroOneInflatedBeta.dist(zoi=zoi, coi=coi, mu=mu, kappa=kappa) + logp = pm.logp(dist, values).eval() + assert logp.shape == (3,) + assert np.all(np.isfinite(logp)) + + def test_sampling_recovers_parameters(self): + rng = np.random.default_rng(42) + true_zoi, true_coi = 0.25, 0.40 + true_mu, true_kappa = 0.45, 15.0 + n = 600 + + alpha = true_mu * true_kappa + beta_p = (1 - true_mu) * true_kappa + u = rng.random(n) + data = np.where( + u < true_zoi * (1 - true_coi), + 0.0, + np.where(u < true_zoi, 1.0, rng.beta(alpha, beta_p, n)), + ) + + with pm.Model(): + zoi = pm.Beta("zoi", alpha=1, beta=1) + coi = pm.Beta("coi", alpha=1, beta=1) + mu = pm.Beta("mu", alpha=2, beta=2) + kappa = pm.Gamma("kappa", alpha=2, beta=0.1) + pm.ZeroOneInflatedBeta("y", zoi=zoi, coi=coi, mu=mu, kappa=kappa, observed=data) + trace = pm.sample( + 1000, + tune=1000, + chains=2, + random_seed=42, + progressbar=False, + return_inferencedata=True, + ) + + post = trace.posterior + assert abs(post["zoi"].mean().values - true_zoi) < 0.08 + assert abs(post["coi"].mean().values - true_coi) < 0.08 + assert abs(post["mu"].mean().values - true_mu) < 0.08 + assert abs(post["kappa"].mean().values - true_kappa) < 5.0