Source code for pyhs3.distributions.mathematical

"""
Mathematical Distribution implementations.

Provides classes for handling mathematical distributions defined by
mathematical formulas, polynomials, and custom expressions. These distributions
allow for flexible modeling using symbolic mathematical expressions.
"""

from __future__ import annotations

import logging
from typing import Literal, cast

import pytensor.tensor as pt
import sympy as sp
from pydantic import (
    ConfigDict,
    Field,
    PrivateAttr,
    model_validator,
)

from pyhs3.context import Context
from pyhs3.distributions.core import Distribution
from pyhs3.generic_parse import analyze_sympy_expr, parse_expression, sympy_to_pytensor
from pyhs3.typing.aliases import TensorVar

log = logging.getLogger(__name__)


[docs] class GenericDist(Distribution): """ Generic distribution implementation. Evaluates custom mathematical expressions using SymPy parsing and PyTensor computation graphs. Parameters: name: Name of the distribution expression: Mathematical expression string to be evaluated Supported Functions: - Basic arithmetic: +, -, *, /, ** - Trigonometric: sin, cos, tan - Exponential/Logarithmic: exp, log - Other: sqrt, abs Examples: Create a quadratic distribution: >>> dist = GenericDist(name="quadratic", expression="x**2 + 2*x + 1") Create a custom exponential with oscillation: >>> dist = GenericDist(name="exp_cos", expression="exp(-x**2/2) * cos(y)") Create a complex mathematical function: >>> dist = GenericDist(name="complex", expression="sin(x) + log(abs(y) + 1)") """ model_config = ConfigDict(arbitrary_types_allowed=True, serialize_by_alias=True) type: Literal["generic_dist"] = Field(default="generic_dist", repr=False) expression_str: str = Field(alias="expression", repr=False) _sympy_expr: sp.Expr = PrivateAttr(default=None) _dependent_vars: list[str] = PrivateAttr(default_factory=list) @model_validator(mode="after") def setup_expression(self) -> GenericDist: """Parse and analyze the expression during initialization.""" # Parse and analyze the expression during initialization self._sympy_expr = parse_expression(self.expression_str) # Analyze the expression to determine dependencies analysis = analyze_sympy_expr(self._sympy_expr) independent_vars = [str(symbol) for symbol in analysis["independent_vars"]] self._dependent_vars = [str(symbol) for symbol in analysis["dependent_vars"]] # Set parameters based on the analyzed expression self._parameters = {var: var for var in independent_vars} return self def likelihood(self, context: Context) -> TensorVar: """ Evaluate the generic distribution using expression parsing. Args: context: Mapping of names to pytensor variables Returns: PyTensor expression representing the parsed mathematical expression Raises: ValueError: If the expression cannot be parsed or contains undefined variables """ # Get the required variables using the parameters determined during initialization variables = [context[name] for name in self._parameters.values()] # Convert using the pre-parsed sympy expression result = sympy_to_pytensor(self._sympy_expr, variables) return cast(TensorVar, result)
[docs] class PolynomialDist(Distribution): r""" Polynomial probability distribution. Implements a polynomial probability density function as defined in ROOT's RooPolynomial and the HS3 specification: .. math:: f(x; a_0, a_1, a_2, ...) = \frac{1}{\mathcal{M}} \sum_{i=0}^n a_i x^i = a_0 + a_1 x + a_2 x^2 + ... Parameters: x (str): Input variable name. coefficients (list[str]): Array of coefficient parameter names. Note: The degree of the polynomial is determined by the length of the coefficients array. ROOT uses a lowestOrder parameter to handle default coefficients, but for simplicity we require all coefficients to be explicitly specified. ROOT Reference: :rootref:`RooPolynomial <classRooPolynomial.html>` """ type: Literal["polynomial_dist"] = "polynomial_dist" x: str | float | int coefficients: list[str] def likelihood(self, context: Context) -> TensorVar: """ Builds a symbolic expression for the polynomial PDF. Args: context (dict): Mapping of names to pytensor variables. Returns: pytensor.tensor.variable.TensorVariable: Symbolic representation of polynomial PDF. """ x = context[self._parameters["x"]] # Build polynomial: sum(a_i * x^i) result = pt.constant(0.0) processed_coefficients = self.get_parameter_list(context, "coefficients") for i, coef in enumerate(processed_coefficients): result = result + coef if i == 0 else result + coef * x**i # a_i * x^i return cast(TensorVar, result)
[docs] class BernsteinPolyDist(Distribution): r""" Bernstein polynomial probability distribution. Implements the Bernstein polynomial as defined in ROOT's RooBernstein. Used extensively for non-parametric fits and background modeling. .. math:: f(x; c_0, c_1, ..., c_n) = \frac{1}{\mathcal{M}} \sum_{i=0}^n c_i B_{i,n}(x) where $B_{i,n}(x) = \binom{n}{i} x^i (1-x)^{n-i}$ are the Bernstein basis polynomials. Parameters: x (str): Input variable name (should be normalized to [0,1]). coefficients (list[str]): Array of coefficient parameter names. Note: The input variable is expected to be normalized to the [0,1] interval. The normalization to this interval is typically handled by the domain. ROOT Reference: :rootref:`RooBernstein <classRooBernstein.html>` """ type: Literal["bernstein_poly_dist"] = "bernstein_poly_dist" x: str | float | int coefficients: list[str | float | int] def likelihood(self, context: Context) -> TensorVar: """ Builds a symbolic expression for the Bernstein polynomial PDF. Args: context (dict): Mapping of names to pytensor variables. Returns: pytensor.tensor.variable.TensorVariable: Symbolic representation of Bernstein polynomial PDF. """ x = context[self._parameters["x"]] n = len(self.coefficients) - 1 # polynomial degree result = pt.constant(0.0) processed_coefficients = self.get_parameter_list(context, "coefficients") for i, coef in enumerate(processed_coefficients): # Bernstein basis polynomial: C(n,i) * x^i * (1-x)^(n-i) # Use pt.gammaln for log(C(n,i)) = log(Γ(n+1)) - log(Γ(i+1)) - log(Γ(n-i+1)) log_binomial = pt.gammaln(n + 1) - pt.gammaln(i + 1) - pt.gammaln(n - i + 1) binomial = pt.exp(log_binomial) basis = binomial * (x**i) * ((1 - x) ** (n - i)) result = result + coef * basis return cast(TensorVar, result)
# Registry of mathematical distributions distributions: dict[str, type[Distribution]] = { "generic_dist": GenericDist, "polynomial_dist": PolynomialDist, "bernstein_poly_dist": BernsteinPolyDist, } # Define what should be exported from this module __all__ = [ "BernsteinPolyDist", "GenericDist", "PolynomialDist", "distributions", ]