Source code for gensbi.diagnostics.marginal_coverage

import numpy as np
import matplotlib.pyplot as plt
from KDEpy import FFTKDE
from tqdm import tqdm

from gensbi.diagnostics.utils import (
    probit,
    z_from_alpha,
    alpha_from_z,
    jefferys_interval,
)


[docs] def _compute_marginal_coverage_KDE( theta, posterior_samples, grid_points=2048, bw="ISJ" ): """ Computes the marginal coverage (credibility) for each observation and dimension using Kernel Density Estimation. For each observation b and dimension d, estimates the 1D marginal posterior density p(x) via KDE, then computes the HPD (Highest Posterior Density) credibility: .. math:: \alpha = \int_{p(x) > p(\theta^*)} p(x) dx This gives the probability mass in regions denser than the true value. If the posterior is well calibrated, alpha should be Uniform(0, 1). Parameters ---------- theta : array_like of shape (N_batch, D_dim) The ground truth parameters. posterior_samples : array_like of shape (N_samples, N_batch, D_dim) Samples from the posterior. grid_points : int, optional Number of grid points for KDE evaluation. Default is 2048. bw : str or float, optional Bandwidth method or value for KDE. Default is "ISJ" (Improved Sheather-Jones, a plug-in bandwidth selector). Returns ------- alpha : np.ndarray of shape (D_dim, N_batch) The credibility values (0 to 1). If the posterior is well calibrated, these should be uniformly distributed. """ theta = np.asarray(theta) posterior_samples = np.asarray(posterior_samples) N_samples, N_batch, D_dim = posterior_samples.shape assert theta.shape[0] == N_batch assert theta.shape[1] == D_dim alpha_values = np.zeros((D_dim, N_batch)) estimator = FFTKDE(bw=bw) for d in range(D_dim): print(f"Computing coverage for dimension {d+1}/{D_dim}") for b in tqdm(range(N_batch), leave=False): samples = posterior_samples[:, b, d] truth = theta[b, d] try: kde_fit = estimator.fit(samples) x_grid, y_grid = kde_fit.evaluate(grid_points) # Evaluate p(θ*) by linear interpolation on the KDE grid. # If truth falls outside the grid, the posterior assigns # negligible density to it, so alpha → 1. if truth < x_grid.min() or truth > x_grid.max(): pdf_truth = 0.0 else: pdf_truth = np.interp(truth, x_grid, y_grid) # Approximate ∫_{p(x) ≥ p(θ*)} p(x) dx via a Riemann sum # on the uniform KDE grid. The grid spacing cancels in the # ratio, so we can use the raw density values directly. mask = y_grid >= pdf_truth alpha = y_grid[mask].sum() / y_grid.sum() alpha_values[d, b] = alpha except Exception as e: print(f"Error in KDE for batch {b}, dim {d}: {e}") alpha_values[d, b] = np.nan return alpha_values
[docs] def _compute_marginal_coverage_histogram(theta, posterior_samples, bins="stone"): """ Computes marginal coverage using empirical histograms. This is a discrete approximation of the HPD credibility: for each (observation, dimension) pair, it bins the posterior samples and sums the mass of all bins at least as dense as the one containing the true parameter. Faster than KDE and works well for multimodal distributions, but accuracy depends on bin count. Parameters ---------- theta : array_like of shape (N_batch, D_dim) The ground truth parameters. posterior_samples : array_like of shape (N_samples, N_batch, D_dim) Samples from the posterior. bins : int or str, optional Number of bins or binning strategy. See ``numpy.histogram_bin_edges`` for valid string values ('stone', 'scott', 'fd', 'sturges', etc). Default is 'stone'. Returns ------- alpha_values : np.ndarray of shape (D_dim, N_batch) The credibility values (0 to 1). """ theta = np.asarray(theta) posterior_samples = np.asarray(posterior_samples) N_samples, N_batch, D_dim = posterior_samples.shape alpha_values = np.zeros((D_dim, N_batch)) for d in range(D_dim): print(f"Computing coverage for dimension {d+1}/{D_dim}") for b in tqdm(range(N_batch), leave=False): samples = posterior_samples[:, b, d] truth = theta[b, d] counts, bin_edges = np.histogram(samples, bins=bins) # Locate the bin containing the true parameter. # np.digitize returns 1-based indices, so subtract 1. bin_idx = np.digitize(truth, bin_edges) - 1 if bin_idx < 0 or bin_idx >= len(counts): # Truth lies outside the histogram range, meaning the # posterior assigns zero density there → alpha = 1. alpha = 1.0 else: truth_count = counts[bin_idx] # HPD credibility: sum all bins with count ≥ truth bin count, # normalized by total samples. This is a discrete analogue of # ∫_{p(x) ≥ p(θ*)} p(x) dx. mask = counts >= truth_count alpha = counts[mask].sum() / N_samples alpha_values[d, b] = alpha return alpha_values
[docs] def compute_marginal_coverage(theta, posterior_samples, method="histogram", **kwargs): """ Compute marginal coverage for each observation and dimension. Parameters ---------- theta : array_like of shape (N_batch, D_dim) The ground truth parameters. posterior_samples : array_like of shape (N_samples, N_batch, D_dim) Samples from the posterior. method : str, optional Method to use for computing marginal coverage. Options are: - "histogram": Use empirical histograms (default). - "KDE": Use Kernel Density Estimation. **kwargs : dict, optional Additional keyword arguments to pass to the chosen method. Returns ------- alpha_values : np.ndarray of shape (D_dim, N_batch) The credibility values (0 to 1). """ if method == "histogram": return _compute_marginal_coverage_histogram(theta, posterior_samples, **kwargs) elif method == "KDE": return _compute_marginal_coverage_KDE(theta, posterior_samples, **kwargs) else: raise ValueError(f"Unknown method: {method}")
[docs] def estimate_hat_z(alpha_values, nbins=50, zmax=4, z_band=1): """ Compute empirical coverage (hat{z}) vs nominal coverage (z) for one dimension. Given a set of HPD credibility values (alpha), this function builds the empirical CDF of those values and converts it to z-score space. For a well-calibrated posterior, hat{z} should equal z along the diagonal. Parameters ---------- alpha_values : array_like 1D array of credibility values for a single dimension. nbins : int, optional Number of bins for z (default is 50). zmax : float, optional Maximum z value to evaluate (default is 4). z_band : float, optional Z-score for the Jeffrey's uncertainty band (default is 1, ~68% CI). Returns ------- stats : dict Dictionary containing: - 'z': Nominal coverage z-values. - 'mean': Empirical coverage hat{z}. - 'upper': Upper bound of uncertainty band. - 'lower': Lower bound of uncertainty band. """ n = len(alpha_values) zlist = np.linspace(0, zmax, nbins) # Map z-scores to coverage thresholds: z → alpha → coverage = 1 - alpha. # At each threshold t, count how many observations have alpha < t # (i.e., truth lies within the t-credibility region). tlist = 1 - alpha_from_z(zlist) k = np.array([np.sum(alpha_values < t) for t in tlist]) # Empirical coverage fraction and its Jeffrey's uncertainty band r_mean = k / n r_lower, r_upper = jefferys_interval(k, n, z=z_band) # Convert coverage fractions back to z-scores for plotting z_mean = z_from_alpha(1 - r_mean) z_band_lower = z_from_alpha(1 - r_lower) z_band_upper = z_from_alpha(1 - r_upper) return dict(z=zlist, mean=z_mean, upper=z_band_upper, lower=z_band_lower)
[docs] def plot_marginal_coverage(alpha_values, zmax=3.5, n_cols=3, figsize=None): """ Plots the marginal coverage for all dimensions. Parameters ---------- alpha_values : np.ndarray of shape (D_dim, N_batch) Credibility values. zmax : float, optional Maximum z-score for plotting limits (default is 3.5). n_cols : int, optional Number of columns in the subplot grid (default is 3). figsize : tuple, optional Figure size (width, height). If None, calculated automatically. Returns ------- fig : matplotlib.figure.Figure The figure object. """ n_cols = min(n_cols, alpha_values.shape[0]) D_dim, N_batch = alpha_values.shape n_rows = (D_dim + n_cols - 1) // n_cols if figsize is None: figsize = (5 * n_cols, 4 * n_rows) fig, axes = plt.subplots(n_rows, n_cols, figsize=figsize) axes = np.atleast_2d(axes).reshape(n_rows, n_cols) # handle 1D case axes_flat = axes.flatten() for k in range(D_dim, len(axes_flat)): axes_flat[k].axis("off") for d in range(D_dim): ax = axes_flat[d] hat_z = estimate_hat_z(alpha_values[d], zmax=zmax) z_grid = hat_z["z"] ax.plot(z_grid, hat_z["mean"], "k") upper = hat_z["upper"] upper = np.where(np.isinf(upper), 100.0, upper) # clamp infs for plotting ax.fill_between(z_grid, hat_z["lower"], upper, color="0.8") ax.plot([0, zmax + 0.5], [0, zmax + 0.5], "--", color="darkgreen") # Annotate integer sigma levels with empirical coverage percentages for t in range(1, int(zmax) + 1): l = np.interp(t, z_grid, hat_z["mean"]) if not np.isinf(l): ax.plot([0, t], [l, l], ":", color="r") c = 1 - alpha_from_z(l) ax.text( 0.1, l + 0.02, ("%.2f" % (c * 100)) + "%", ha="left", va="bottom", ) ax.plot([t, t], [0, l], ":", color="r") else: ax.plot([t, t], [0, 10.0], ":", color="r") c = 1 - alpha_from_z(t) ax.text( t + 0.02, 0.1, ("%.2f" % (c * 100)) + "%", rotation=-90, ha="left", va="bottom", ) ax.set_xlim([0, zmax]) ax.set_ylim([0, zmax + 0.5]) ax.set_ylabel(r"Empirical coverage, $\hat{z}$") ax.set_xlabel(r"Confidence level, $z$") ax.set_title(f"Dim {d+1}") # Compute rotation angle for diagonal labels x_ = [0, 10] y_ = [0, 10] p1 = ax.transData.transform((x_[0], y_[0])) p2 = ax.transData.transform((x_[1], y_[1])) dy_screen = p2[1] - p1[1] dx_screen = p2[0] - p1[0] phi = np.degrees(np.arctan2(dy_screen, dx_screen)) ax.text( zmax / 2, zmax / 2 + 0.4, "Conservative", ha="center", va="center", rotation=phi, color="darkgreen", alpha=1, rotation_mode="anchor", fontsize=11, ) ax.text( zmax / 2 + 0.4, zmax / 2, "Overconfident", ha="center", va="center", rotation=phi, color="darkgreen", alpha=1, rotation_mode="anchor", fontsize=11, ) plt.tight_layout() return fig