Source code for driada.information.gcmi

"""Gaussian Copula Mutual Information (GCMI) implementation.

This module implements mutual information and conditional mutual information
estimators using the Gaussian copula approach.

Credits:
    Based on the original GCMI implementation by Robin Ince:
    https://github.com/robince/gcmi
    
    Reference:
    Ince, R.A.A., Giordano, B.L., Kayser, C., Rousselet, G.A., Gross, J. and 
    Schyns, P.G. (2017). A statistical framework for neuroimaging data analysis 
    based on mutual information estimated via a gaussian copula. Human Brain 
    Mapping, 38(3), 1541-1573.
"""

import numpy as np
from numba import njit
import warnings
from scipy.special import ndtri, psi

from .info_utils import py_fast_digamma_arr
from ..utils.jit import conditional_njit


def _prepare_for_jit(*arrays):
    """Prepare arrays for JIT compilation by ensuring they are contiguous and float type.

    This helper function ensures arrays meet the requirements for efficient JIT compilation:
    - C-contiguous memory layout for cache efficiency
    - Float32 or float64 dtype for numerical operations

    Parameters
    ----------
    *arrays
        Variable number of arrays to prepare. Can be any dtype or memory layout.

    Returns
    -------
    tuple or ndarray
        If multiple arrays: tuple of prepared arrays in the same order as input.
        If single array: the prepared array directly (not wrapped in tuple).
        Each output array is C-contiguous with float32 or float64 dtype.

    Notes
    -----
    Arrays that already meet the requirements are returned unchanged.
    Arrays with non-float dtypes are converted to float64.
    Non-contiguous arrays are made contiguous via copy.

    Examples
    --------
    >>> x = np.array([1, 2, 3], dtype=np.int32)
    >>> x_prep = _prepare_for_jit(x)
    >>> x_prep.dtype == np.float64
    True
    >>> x_prep.flags.c_contiguous
    True

    >>> # Multiple arrays
    >>> x = np.array([[1, 2], [3, 4]], order='F')  # Fortran order
    >>> y = np.array([1.0, 2.0])
    >>> x_prep, y_prep = _prepare_for_jit(x, y)
    >>> x_prep.flags.c_contiguous
    True"""
    prepared = []
    for arr in arrays:
        # Ensure contiguous
        if not arr.flags.c_contiguous:
            arr = np.ascontiguousarray(arr)
        # Ensure float type
        if arr.dtype not in (np.float32, np.float64):
            arr = arr.astype(np.float64)
        prepared.append(arr)

    return tuple(prepared) if len(prepared) > 1 else prepared[0]


# Import JIT versions if available
try:
    from .gcmi_jit_utils import (
        ctransform_jit,
        ctransform_2d_jit,
        copnorm_jit,
        copnorm_2d_jit,
        mi_gg_jit,
        cmi_ggg_jit,
        gcmi_cc_jit,
        gccmi_ccd_jit,
    )

    _JIT_AVAILABLE = True
except ImportError:
    _JIT_AVAILABLE = False



[docs]
def ctransform(x):
    """Copula transformation (empirical CDF).

    Transforms data to uniform marginals on (0,1) using the empirical
    cumulative distribution function. This is the first step in copula-based
    mutual information estimation.

    Parameters
    ----------
    x : ndarray
        Input data, shape (n_samples,) for 1D or (n_features, n_samples)
        for multivariate data. The transformation is applied along the last
        axis (samples).

    Returns
    -------
    ndarray
        Copula-transformed data with same shape as input. Values are in
        the open interval (0, 1), representing empirical CDF values.

    Notes
    -----
    The transformation maps each value to its rank divided by (n+1), where
    n is the number of samples. This ensures values are strictly between
    0 and 1 (never exactly 0 or 1), which is important for subsequent
    inverse normal CDF transformation.

    Automatically uses JIT-compiled version when available for better
    performance on large datasets.

    Examples
    --------
    >>> x = np.array([3.2, 1.1, 4.5, 2.3, 1.1])
    >>> result = ctransform(x)
    >>> np.round(result, 3)
    array([[0.667, 0.167, 0.833, 0.5  , 0.333]])

    >>> # 2D multivariate case
    >>> x = np.array([[1, 3, 2], [4, 2, 3]])
    >>> ctransform(x)  # Each row transformed independently
    array([[0.25, 0.75, 0.5 ],
           [0.75, 0.25, 0.5 ]])

    See Also
    --------
    copnorm : Complete copula normalization to standard normal"""
    x = np.atleast_2d(x)

    # Use JIT version if available
    if _JIT_AVAILABLE:
        x = _prepare_for_jit(x)

        if x.shape[0] == 1:
            # 1D case
            return ctransform_jit(x.ravel()).reshape(1, -1)
        else:
            # 2D case
            return ctransform_2d_jit(x)

    # Fallback to original implementation
    xi = np.argsort(x)
    xr = np.argsort(xi)
    cx = (xr + 1).astype(float) / (xr.shape[-1] + 1)
    return cx




[docs]
def copnorm(x):
    """Copula normalization to standard normal distribution.

    Transforms data to have standard normal marginals while preserving
    the copula (dependence structure). This is the key preprocessing step
    for Gaussian Copula Mutual Information (GCMI) estimation.

    Parameters
    ----------
    x : ndarray
        Input data, shape (n_samples,) for 1D or (n_features, n_samples)
        for multivariate data. The transformation is applied along the last
        axis (samples).

    Returns
    -------
    ndarray
        Copula-normalized data with same shape as input. Each marginal
        distribution is transformed to standard normal N(0,1).

    Notes
    -----
    The two-step process:
    1. Apply empirical CDF transform to get uniform marginals (ctransform)
    2. Apply inverse normal CDF to get standard normal marginals

    This transformation is:
    - Robust to outliers (rank-based)
    - Preserves all dependence relationships
    - Makes data amenable to Gaussian-based MI estimation

    Automatically uses JIT-compiled version when available for better
    performance.

    Examples
    --------
    >>> # Transform non-Gaussian data
    >>> x = np.random.exponential(size=1000)
    >>> x_norm = copnorm(x)
    >>> # x_norm now has standard normal marginal distribution
    >>> np.abs(np.mean(x_norm)) < 0.1  # Close to 0
    True
    >>> np.abs(np.std(x_norm) - 1) < 0.1  # Close to 1
    True

    See Also
    --------
    ctransform : First step of copula normalization
    gcmi_cc : Uses copnorm internally for MI estimation

    References
    ----------
    Ince, R. A., et al. (2017). A statistical framework for neuroimaging data
    analysis based on mutual information estimated via a Gaussian copula.
    Human Brain Mapping, 38(3), 1541-1573."""
    x = np.atleast_2d(x)

    # Use JIT version if available
    if _JIT_AVAILABLE:
        x = _prepare_for_jit(x)

        if x.shape[0] == 1:
            # 1D case
            return copnorm_jit(x.ravel()).reshape(1, -1)
        else:
            # 2D case
            return copnorm_2d_jit(x)

    # Fallback to original implementation
    # cx = sp.stats.norm.ppf(ctransform(x))
    cx = ndtri(ctransform(x))
    return cx



@conditional_njit
def demean(x):
    """Demean each row of a 2D array.

    Subtracts the mean from each row independently, resulting in rows with
    zero mean. This is a common preprocessing step for covariance calculations.

    Parameters
    ----------
    x : ndarray, shape (n_features, n_samples)
        2D array where each row (feature) is demeaned independently.
        Must be 2-dimensional.

    Returns
    -------
    ndarray
        Array with same shape as input where each row has zero mean.
        The operation is performed in-place on a copy of the input.

    Notes
    -----
    This function is JIT-compiled with numba for performance.
    The demeaning is performed row-wise, treating each row as a separate
    signal or feature to be centered.

    Examples
    --------
    >>> x = np.array([[1.0, 2.0, 3.0],
    ...               [4.0, 5.0, 6.0]])
    >>> demean(x)
    array([[-1.,  0.,  1.],
           [-1.,  0.,  1.]])

    >>> # Verify zero mean
    >>> demeaned = demean(x)
    >>> np.allclose(demeaned.mean(axis=1), 0)
    True"""
    # Get the number of rows
    num_rows = x.shape[0]

    # Create an output array with the same shape as input
    demeaned_x = np.empty_like(x)

    # Demean each row
    for i in range(num_rows):
        row_mean = np.mean(x[i])
        demeaned_x[i] = x[i] - row_mean

    return demeaned_x


@conditional_njit
def regularized_cholesky(C, regularization=1e-12):
    """Compute Cholesky decomposition with regularization for numerical stability.

    Adds diagonal regularization to prevent issues with near-singular
    covariance matrices. Uses adaptive regularization for severely ill-conditioned
    matrices based on determinant check.

    Parameters
    ----------
    C : ndarray, shape (n, n)
        Symmetric positive semi-definite matrix (typically a covariance matrix).
        Should be square and symmetric.
    regularization : float, optional
        Base regularization parameter added to diagonal. Default is 1e-12.
        For severely ill-conditioned matrices, adaptive regularization may
        apply a larger value.

    Returns
    -------
    L : ndarray, shape (n, n)
        Lower triangular Cholesky factor such that C ≈ L @ L.T.

    Notes
    -----
    The function detects ill-conditioning by comparing the determinant to an
    expected scale based on the trace. For severely ill-conditioned matrices
    (det < expected_scale * 1e-8), it applies stronger adaptive regularization.

    The regularization modifies the input as: C_reg = C + reg * I, where I is
    the identity matrix and reg is the regularization parameter.

    This function is JIT-compiled with numba for performance.

    Examples
    --------
    >>> # Well-conditioned matrix
    >>> C = np.array([[2.0, 1.0], [1.0, 2.0]])
    >>> L = regularized_cholesky(C)
    >>> np.allclose(L @ L.T, C, rtol=1e-10)
    True

    >>> # Near-singular matrix
    >>> C = np.array([[1.0, 0.99999], [0.99999, 1.0]])
    >>> L = regularized_cholesky(C)  # Applies regularization
    >>> # Result is stable despite near-singularity"""
    # Check matrix conditioning using determinant
    det_C = np.linalg.det(C)
    trace_C = np.trace(C)

    # Adaptive regularization based on determinant relative to trace
    # For near-singular matrices, det << trace^n where n is matrix size
    n = C.shape[0]
    expected_det_scale = (trace_C / n) ** n

    if det_C > 0 and det_C < expected_det_scale * 1e-8:  # Severely ill-conditioned
        # Use stronger regularization proportional to trace
        adaptive_reg = trace_C * 1e-8 / n  # Scale by matrix size
        reg = max(regularization, adaptive_reg)
    else:
        reg = regularization

    # Apply regularization
    C_reg = C + np.eye(C.shape[0]) * reg
    return np.linalg.cholesky(C_reg)


@conditional_njit()
def ent_g(x, biascorrect=True):
    """
    Entropy of a Gaussian variable in bits.

    Computes the differential entropy of a (possibly multidimensional)
    Gaussian variable using the covariance matrix. Supports any number
    of features/dimensions.

    Parameters
    ----------
    x : array_like, shape (n_features, n_samples) or (n_samples,)
        Data array. If 2D, rows are features/dimensions and columns are samples.
        If 1D with shape (n,), it is converted to shape (1, n) representing
        a single feature with n samples.
        Can handle any number of features (no limit on n_features).
    biascorrect : bool, optional
        Whether to apply bias correction for finite samples. Default is True.

    Returns
    -------
    float
        Entropy in bits. For a d-dimensional Gaussian with covariance C:
        H = 0.5 * log(det(2πeC)) / log(2)

    Raises
    ------
    ValueError
        If x is a 3D or higher dimensional array (must be 1D or 2D).

    Notes
    -----
    The bias correction uses digamma functions to account for finite sample
    effects in covariance estimation. Data is demeaned before computation.
    The function supports high-dimensional data with many features.

    This function is JIT-compiled with numba for performance.

    Examples
    --------
    >>> # 1D Gaussian entropy
    >>> rng = np.random.RandomState(42)
    >>> x = rng.randn(1000)  # Standard normal
    >>> h = ent_g(x)
    >>> # Theoretical entropy of N(0,1) is 0.5*log2(2*pi*e) ≈ 2.047
    >>> abs(h - 2.047) < 0.1
    True

    >>> # 2D Gaussian entropy
    >>> x = rng.randn(2, 1000)  # 2D standard normal
    >>> h = ent_g(x)
    >>> # Theoretical entropy is d*0.5*log2(2*pi*e) ≈ 2*2.047
    >>> abs(h - 4.094) < 0.2
    True

    >>> # Effect of correlation on entropy
    >>> x = rng.randn(2, 1000)
    >>> x[1] = 0.9 * x[0] + 0.1 * rng.randn(1000)  # Correlated
    >>> h_corr = ent_g(x)
    >>> h_uncorr = ent_g(rng.randn(2, 1000))
    >>> h_corr < h_uncorr  # Correlation reduces entropy
    True

    See Also
    --------
    ~driada.information.mi_gg : Uses entropy to compute mutual information

    References
    ----------
    Cover, T. M., & Thomas, J. A. (2006). Elements of information theory."""
    x = np.atleast_2d(x)
    if x.ndim > 2:
        raise ValueError("x must be a 1D or 2D array with shape (n_features, n_samples)")
    Ntrl = x.shape[1]
    Nvarx = x.shape[0]

    # demean data
    x = demean(x)
    # covariance
    C = np.dot(x, x.T) / float(Ntrl - 1)
    chC = regularized_cholesky(C)

    # entropy in nats
    # Extract diagonal manually for Numba compatibility
    diag_sum = 0.0
    for i in range(chC.shape[0]):
        diag_sum += np.log(chC[i, i])
    HX = diag_sum + 0.5 * Nvarx * (np.log(2 * np.pi) + 1.0)

    ln2 = np.log(2)
    if biascorrect:
        psiterms = (
            py_fast_digamma_arr((Ntrl - np.arange(1, Nvarx + 1, dtype=np.float64)) / 2.0) / 2.0
        )
        dterm = (ln2 - np.log(Ntrl - 1.0)) / 2.0
        HX = HX - Nvarx * dterm - psiterms.sum()

    # convert to bits
    return HX / ln2


@conditional_njit()
def mi_gg(x, y, biascorrect=True, demeaned=False):
    """Mutual information (MI) between two Gaussian variables in bits.

    Computes mutual information between two (possibly multidimensional)
    Gaussian variables using the entropy relation: I(X;Y) = H(X) + H(Y) - H(X,Y).
    Assumes variables follow a multivariate Gaussian distribution.

    Parameters
    ----------
    x : ndarray
        First variable, shape (n_features_x, n_samples) or (n_samples,) for 1D.
        Columns correspond to samples, rows to dimensions/variables.
    y : ndarray
        Second variable, shape (n_features_y, n_samples) or (n_samples,) for 1D.
        Must have same number of samples as x.
    biascorrect : bool, default=True
        Whether to apply bias correction to the MI estimate. Uses psi function
        (digamma) correction for finite sample bias in entropy estimation.
    demeaned : bool, default=False
        Whether input data already has zero mean. Set True if data has been
        copula-normalized or otherwise centered.

    Returns
    -------
    float
        Mutual information in bits. Always non-negative, with 0 indicating
        independence.

    Raises
    ------
    ValueError
        If x and y have different number of samples, or if inputs are not
        1D or 2D arrays.

    Notes
    -----
    This function assumes data follows a multivariate Gaussian distribution.
    For non-Gaussian data, use copula normalization first (via ctransform).

    The bias correction uses the Miller-Madow correction generalized to
    multivariate Gaussians, improving accuracy for small sample sizes.

    Examples
    --------
    >>> # Independent Gaussian variables
    >>> rng = np.random.RandomState(42)
    >>> x = rng.randn(1, 1000)  # 1D Gaussian
    >>> y = rng.randn(1, 1000)  # Independent 1D Gaussian
    >>> mi = mi_gg(x, y)
    >>> mi < 0.05  # Should be near 0 for independent variables
    True

    >>> # Correlated Gaussian variables
    >>> x = rng.randn(1, 1000)
    >>> y = 0.7 * x + 0.3 * rng.randn(1, 1000)  # Correlated
    >>> mi = mi_gg(x, y)
    >>> mi > 0.5  # Significant mutual information
    True

    >>> # Multidimensional case
    >>> x = rng.randn(3, 1000)  # 3D Gaussian
    >>> y = rng.randn(2, 1000)  # 2D Gaussian
    >>> # Create correlation: y depends on x
    >>> y[0] = 0.5 * x[0] + 0.5 * rng.randn(1000)
    >>> mi = mi_gg(x, y)
    >>> mi > 0.2  # Detects the dependency
    True

    See Also
    --------
    gcmi_cc : Gaussian-copula MI for arbitrary continuous distributions
    ~driada.information.gcmi.ent_g : Gaussian entropy used in MI calculation

    References
    ----------
    Ince, R. A., et al. (2017). A statistical framework for neuroimaging data
    analysis based on mutual information estimated via a Gaussian copula.
    Human Brain Mapping, 38(3), 1541-1573."""

    x = np.atleast_2d(x)
    y = np.atleast_2d(y)
    if x.ndim > 2 or y.ndim > 2:
        raise ValueError("x and y must be 1D or 2D arrays")
    Ntrl = x.shape[1]
    Nvarx = x.shape[0]
    Nvary = y.shape[0]
    Nvarxy = Nvarx + Nvary

    if y.shape[1] != Ntrl:
        raise ValueError("number of trials do not match")

    # joint variable
    xy = np.vstack((x, y))

    if not demeaned:
        xy = demean(xy)

    Cxy = np.dot(xy, xy.T) / float(Ntrl - 1)
    # submatrices of joint covariance
    Cx = Cxy[:Nvarx, :Nvarx]
    Cy = Cxy[Nvarx:, Nvarx:]

    chCxy = regularized_cholesky(Cxy)
    chCx = regularized_cholesky(Cx)
    chCy = regularized_cholesky(Cy)

    # entropies in nats
    # normalizations cancel for mutual information
    HX = np.sum(np.log(np.diag(chCx)))  # + 0.5*Nvarx*(np.log(2*np.pi)+1.0)
    HY = np.sum(np.log(np.diag(chCy)))  # + 0.5*Nvary*(np.log(2*np.pi)+1.0)
    HXY = np.sum(np.log(np.diag(chCxy)))  # + 0.5*Nvarxy*(np.log(2*np.pi)+1.0)

    ln2 = np.log(2)
    if biascorrect:
        psiterms = py_fast_digamma_arr((Ntrl - np.arange(1, Nvarxy + 1)) / 2.0) / 2.0
        dterm = (ln2 - np.log(Ntrl - 1.0)) / 2.0
        HX = HX - Nvarx * dterm - psiterms[:Nvarx].sum()
        HY = HY - Nvary * dterm - psiterms[:Nvary].sum()
        HXY = HXY - Nvarxy * dterm - psiterms[:Nvarxy].sum()

    # MI in bits
    I = (HX + HY - HXY) / ln2
    return max(0.0, I)  # MI is non-negative



[docs]
def mi_model_gd(x, y, Ym=None, biascorrect=True, demeaned=False):
    """
    Mutual information between a Gaussian and a discrete variable in bits.

    Computes MI between a (possibly multidimensional) Gaussian variable x and
    a discrete variable y using ANOVA-style model comparison. For 1D x this
    provides a lower bound to the mutual information.

    Note: Each discrete class must have at least 2 samples for covariance
    estimation. Classes with fewer samples will be skipped with a warning.

    Parameters
    ----------
    x : array_like, shape (n_features, n_samples) or (n_samples,)
        Gaussian variable data. If 2D, rows are features and columns are samples.
        If 1D with shape (n,), converted to shape (1, n) representing a single
        feature with n samples.
    y : array_like, shape (n_samples,)
        Discrete variable containing integer values in range [0, max(y)].
        Must be 1D array with same number of samples as x.
    Ym : int, optional
        Number of discrete states. If None (default), automatically computed as
        np.max(y) + 1. Useful if y doesn't contain all possible states.
    biascorrect : bool, optional
        Whether to apply bias correction for finite samples. Default is True.
    demeaned : bool, optional
        Whether input data x already has zero mean (e.g., if copula-normalized).
        Default is False.

    Returns
    -------
    float
        Mutual information I(X;Y) in bits.

    Raises
    ------
    ValueError
        If x is 3D or higher dimensional array.
        If y is not a 1D array.
        If number of samples don't match between x and y.
        If Ym is not an integer.

    Warnings
    --------
    RuntimeWarning
        If any class has fewer than 2 samples. These classes will be skipped
        in the MI calculation as covariance estimation requires at least 2
        samples per class.

    Examples
    --------
    >>> import numpy as np
    >>> # Continuous data (2 variables, 100 samples)
    >>> x = np.random.randn(2, 100)
    >>> # Discrete labels (3 classes)
    >>> y = np.random.randint(0, 3, 100)
    >>> mi = mi_model_gd(x, y, Ym=3)
    >>> isinstance(mi, float)
    True
    >>> # Automatic Ym detection
    >>> mi_auto = mi_model_gd(x, y)
    >>> isinstance(mi_auto, float)
    True

    See Also
    --------
    ~driada.information.gcmi.ent_g : Gaussian entropy estimation"""

    x = np.atleast_2d(x)
    # y = np.squeeze(y)
    if x.ndim > 2:
        raise ValueError("x must be a 1D or 2D array with shape (n_features, n_samples)")
    if y.ndim > 1:
        raise ValueError("only univariate discrete variables supported")

    # Handle Ym parameter
    if Ym is None:
        # Auto-detect number of states
        Ym = int(np.max(y) + 1)
    else:
        # Validate and convert user-provided value
        if int(Ym) != Ym:
            raise ValueError("Ym should be an integer")
        Ym = int(Ym)

    # Check for classes with too few samples
    import warnings

    for yi in range(Ym):
        n_samples = np.sum(y == yi)
        if n_samples == 0:
            warnings.warn(f"Class {yi} has no samples and will be skipped", RuntimeWarning)
        elif n_samples == 1:
            warnings.warn(
                f"Class {yi} has only 1 sample and will be skipped. At least 2 samples per class are required for covariance estimation.",
                RuntimeWarning,
            )

    # Call JIT-compiled implementation
    return _mi_model_gd_jit(x, y, Ym, biascorrect, demeaned)



@conditional_njit()
def _mi_model_gd_jit(x, y, Ym, biascorrect, demeaned):
    """JIT-compiled implementation of mi_model_gd.

    Parameters
    ----------
    x : ndarray
        Continuous data array of shape (n_features, n_trials)
    y : ndarray
        Discrete labels array of shape (n_trials,)
    Ym : int
        Number of unique discrete values in y
    biascorrect : bool
        Whether to apply bias correction
    demeaned : bool
        Whether the data is already demeaned

    Returns
    -------
    float
        Mutual information in bits
    """
    Ntrl = x.shape[1]
    Nvarx = x.shape[0]

    if y.size != Ntrl:
        raise ValueError("number of trials do not match")

    # Demean x for unconditional entropy (consistency with per-class demeaning)
    if not demeaned:
        x = demean(x)

    # class-conditional entropies
    Ntrl_y = np.zeros(Ym)
    Hcond = np.zeros(Ym)
    c = 0.5 * (np.log(2.0 * np.pi) + 1)

    for yi in range(Ym):
        idx = y == yi
        xm = x[:, idx]
        Ntrl_y[yi] = xm.shape[1]

        # Skip classes with too few samples
        if Ntrl_y[yi] < 2:
            # Set weight to 0 and entropy to 0 for this class
            Hcond[yi] = 0.0
            continue

        xm = demean(xm)
        Cm = np.dot(xm, xm.T) / float(Ntrl_y[yi] - 1)
        chCm = regularized_cholesky(Cm)
        Hcond[yi] = np.sum(np.log(np.diag(chCm)))  # + c*Nvarx

    # class weights
    w = Ntrl_y / float(Ntrl)

    # unconditional entropy from unconditional Gaussian fit
    Cx = np.dot(x, x.T) / float(Ntrl - 1)
    chC = regularized_cholesky(Cx)
    Hunc = np.sum(np.log(np.diag(chC)))  # + c*Nvarx

    ln2 = np.log(2)
    if biascorrect:
        vars = np.arange(1, Nvarx + 1)

        psiterms = py_fast_digamma_arr((Ntrl - vars) / 2.0) / 2.0
        dterm = (ln2 - np.log(float(Ntrl - 1))) / 2.0
        Hunc = Hunc - Nvarx * dterm - psiterms.sum()

        # For bias correction, handle each class separately
        for yi in range(Ym):
            if Ntrl_y[yi] < 2:
                # Skip bias correction for classes with insufficient samples
                continue
            dterm_yi = (ln2 - np.log(float(Ntrl_y[yi] - 1))) / 2.0
            psiterm_yi = 0.0
            for vi in vars:
                if Ntrl_y[yi] > vi:
                    idx = Ntrl_y[yi] - vi
                    psiterm_yi += py_fast_digamma_arr(np.array([idx / 2.0]))[0]
            Hcond[yi] = Hcond[yi] - Nvarx * dterm_yi - (psiterm_yi / 2.0)

    # MI in bits
    I = (Hunc - np.sum(w * Hcond)) / ln2
    return max(0.0, I)  # MI is non-negative



[docs]
def gcmi_cc(x, y):
    """Gaussian-Copula Mutual Information between two continuous variables.

    Main user-facing function for computing mutual information between
    continuous variables using the Gaussian Copula MI (GCMI) method.
    Handles arbitrary continuous distributions by transforming marginals
    to Gaussian via copula normalization.

    Parameters
    ----------
    x : ndarray
        First continuous variable, shape (n_features_x, n_samples) or (n_samples,) for 1D.
        If 2D, rows are features and columns are samples. If 1D, treated as
        single feature with multiple samples.
    y : ndarray
        Second continuous variable, shape (n_features_y, n_samples) or (n_samples,) for 1D.
        Must have same number of samples as x.

    Returns
    -------
    float
        Mutual information in bits. Always non-negative, with 0 indicating
        independence. Provides a lower bound to the true MI.

    Notes
    -----
    The GCMI method:

    1. Transforms each variable to standard normal marginals using the
       empirical CDF (copula transform)
    2. Computes MI under the Gaussian copula assumption
    3. Applies bias correction for finite samples

    This approach is:

    - Robust to outliers due to rank-based transform
    - Computationally efficient (no density estimation)
    - Provides MI lower bound (exact for jointly Gaussian data)
    - Suitable for continuous neural data (firing rates, LFP, etc.)

    For discrete variables, use gcmi_cd or gcmi_ccd instead.

    Examples
    --------
    >>> # Linear relationship with non-Gaussian marginals
    >>> rng = np.random.RandomState(42)
    >>> x = rng.exponential(size=1000)  # Non-Gaussian
    >>> y = 2 * x + rng.normal(0, 0.5, size=1000)
    >>> mi = gcmi_cc(x, y)
    >>> mi > 1.0  # Detects strong dependency
    True

    >>> # Monotonic nonlinear relationship
    >>> x = rng.exponential(size=1000)
    >>> y = np.log(x + 1) + rng.normal(0, 0.1, size=1000)
    >>> mi = gcmi_cc(x, y)
    >>> mi > 0.5  # Detects monotonic dependency
    True

    >>> # Multidimensional example
    >>> x = rng.randn(3, 1000)  # 3D variable
    >>> y = rng.randn(2, 1000)  # 2D variable
    >>> # Create dependency
    >>> y[0] = 0.5 * x[0] + 0.3 * x[1] + 0.2 * rng.randn(1000)
    >>> mi = gcmi_cc(x, y)
    >>> mi > 0.3  # Detects multivariate dependency
    True

    See Also
    --------
    ~driada.information.mi_gg : MI for Gaussian variables (without copula transform)
    ~driada.information.gccmi_ccd : Conditional MI with discrete conditioning variable

    References
    ----------
    Ince, R. A., et al. (2017). A statistical framework for neuroimaging data
    analysis based on mutual information estimated via a Gaussian copula.
    Human Brain Mapping, 38(3), 1541-1573."""

    x = np.atleast_2d(x)
    y = np.atleast_2d(y)
    if x.ndim > 2 or y.ndim > 2:
        raise ValueError("x and y must be at most 2d")
    Ntrl = x.shape[1]
    Nvarx = x.shape[0]
    Nvary = y.shape[0]

    if y.shape[1] != Ntrl:
        raise ValueError("number of trials do not match")

    # Use JIT version if available and suitable
    if (
        _JIT_AVAILABLE
        and x.flags.c_contiguous
        and y.flags.c_contiguous
        and x.dtype in (np.float32, np.float64)
        and y.dtype in (np.float32, np.float64)
    ):
        return gcmi_cc_jit(x, y)

    """
    # check for repeated values
    for xi in range(Nvarx):
        if (np.unique(x[xi,:]).size / float(Ntrl)) < 0.9:
            warnings.warn("Input x has more than 10% repeated values")
            break
    for yi in range(Nvary):
        if (np.unique(y[yi,:]).size / float(Ntrl)) < 0.9:
            warnings.warn("Input y has more than 10% repeated values")
            break
    """

    # copula normalization
    cx = copnorm(x)
    cy = copnorm(y)
    # parametric Gaussian MI
    I = mi_gg(cx, cy, True, True)
    return max(0.0, I)  # MI is non-negative



# Note: All functions below have been integrated with numba JIT compilation

[docs]
def cmi_ggg(x, y, z, biascorrect=True, demeaned=False):
    """
    Conditional mutual information between two Gaussian variables given a third.

    Computes CMI between two (possibly multidimensional) Gaussian variables
    x and y, conditioned on a third variable z. Uses entropy decomposition:
    I(X;Y|Z) = H(X|Z) + H(Y|Z) - H(X,Y|Z).

    Parameters
    ----------
    x : array_like, shape (n_features_x, n_samples) or (n_samples,)
        First Gaussian variable. If 2D, rows are features and columns are samples.
        If 1D with shape (n,), converted to shape (1, n).
    y : array_like, shape (n_features_y, n_samples) or (n_samples,)
        Second Gaussian variable. Must have same number of samples as x.
    z : array_like, shape (n_features_z, n_samples) or (n_samples,)
        Conditioning Gaussian variable. Must have same number of samples as x and y.
    biascorrect : bool, optional
        Whether to apply bias correction for finite samples. Default is True.
    demeaned : bool, optional
        Whether input data already has zero mean (e.g., if copula-normalized).
        Default is False.

    Returns
    -------
    float
        Conditional mutual information I(X;Y|Z) in bits.

    Raises
    ------
    ValueError
        If x, y, or z are 3D or higher dimensional arrays.
        If number of samples don't match between x, y, and z.

    Notes
    -----
    Conditional mutual information measures the dependency between X and Y
    after accounting for the influence of Z. If X and Y are independent
    given Z, then I(X;Y|Z) = 0.

    This function assumes all variables follow a joint Gaussian distribution.
    For non-Gaussian data, apply copula normalization first.

    The entropy decomposition uses:
    I(X;Y|Z) = H(X,Z) + H(Y,Z) - H(X,Y,Z) - H(Z)

    Examples
    --------
    >>> # Independent given Z
    >>> rng = np.random.RandomState(42)
    >>> z = rng.randn(1, 1000)
    >>> x = z + 0.5 * rng.randn(1, 1000)  # X depends on Z
    >>> y = z + 0.5 * rng.randn(1, 1000)  # Y depends on Z
    >>> # X and Y are correlated through Z
    >>> mi_uncond = mi_gg(x, y)
    >>> mi_uncond > 0.3  # Significant MI without conditioning
    True
    >>> # But independent given Z
    >>> cmi = cmi_ggg(x, y, z)
    >>> cmi < 0.05  # Near zero when conditioned on Z
    True

    >>> # Direct dependency not explained by Z
    >>> z = rng.randn(1, 1000)
    >>> x = rng.randn(1, 1000)
    >>> y = 0.5 * x + 0.3 * z + 0.2 * rng.randn(1, 1000)
    >>> cmi = cmi_ggg(x, y, z)
    >>> cmi > 0.3  # Still significant after conditioning
    True

    See Also
    --------
    ~driada.information.mi_gg : Unconditional mutual information
    ~driada.information.gccmi_ccd : CMI with discrete conditioning variable

    References
    ----------
    Cover, T. M., & Thomas, J. A. (2006). Elements of information theory."""

    x = np.atleast_2d(x)
    y = np.atleast_2d(y)
    z = np.atleast_2d(z)

    # Use JIT version if available
    if _JIT_AVAILABLE:
        x, y, z = _prepare_for_jit(x, y, z)
        return cmi_ggg_jit(x, y, z, biascorrect, demeaned)

    if x.ndim > 2 or y.ndim > 2 or z.ndim > 2:
        raise ValueError("x, y and z must be at most 2d")
    Ntrl = x.shape[1]
    Nvarx = x.shape[0]
    Nvary = y.shape[0]
    Nvarz = z.shape[0]
    Nvaryz = Nvary + Nvarz
    Nvarxy = Nvarx + Nvary
    Nvarxz = Nvarx + Nvarz
    Nvarxyz = Nvarx + Nvaryz

    if y.shape[1] != Ntrl or z.shape[1] != Ntrl:
        raise ValueError("number of trials do not match")

    # joint variable
    xyz = np.vstack((x, y, z))
    if not demeaned:
        xyz = xyz - xyz.mean(axis=1)[:, np.newaxis]
    Cxyz = np.dot(xyz, xyz.T) / float(Ntrl - 1)
    # submatrices of joint covariance
    Cz = Cxyz[Nvarxy:, Nvarxy:]
    Cyz = Cxyz[Nvarx:, Nvarx:]
    Cxz = np.zeros((Nvarxz, Nvarxz))
    Cxz[:Nvarx, :Nvarx] = Cxyz[:Nvarx, :Nvarx]
    Cxz[:Nvarx, Nvarx:] = Cxyz[:Nvarx, Nvarxy:]
    Cxz[Nvarx:, :Nvarx] = Cxyz[Nvarxy:, :Nvarx]
    Cxz[Nvarx:, Nvarx:] = Cxyz[Nvarxy:, Nvarxy:]

    chCz = regularized_cholesky(Cz)
    chCxz = regularized_cholesky(Cxz)
    chCyz = regularized_cholesky(Cyz)
    chCxyz = regularized_cholesky(Cxyz)

    # entropies in nats
    # normalizations cancel for cmi
    HZ = np.sum(np.log(np.diagonal(chCz)))  # + 0.5*Nvarz*(np.log(2*np.pi)+1.0)
    HXZ = np.sum(np.log(np.diagonal(chCxz)))  # + 0.5*Nvarxz*(np.log(2*np.pi)+1.0)
    HYZ = np.sum(np.log(np.diagonal(chCyz)))  # + 0.5*Nvaryz*(np.log(2*np.pi)+1.0)
    HXYZ = np.sum(np.log(np.diagonal(chCxyz)))  # + 0.5*Nvarxyz*(np.log(2*np.pi)+1.0)

    ln2 = np.log(2)
    if biascorrect:
        psiterms = psi((Ntrl - np.arange(1, Nvarxyz + 1)).astype(float) / 2.0) / 2.0
        dterm = (ln2 - np.log(Ntrl - 1.0)) / 2.0
        HZ = HZ - Nvarz * dterm - psiterms[:Nvarz].sum()
        HXZ = HXZ - Nvarxz * dterm - psiterms[:Nvarxz].sum()
        HYZ = HYZ - Nvaryz * dterm - psiterms[:Nvaryz].sum()
        HXYZ = HXYZ - Nvarxyz * dterm - psiterms[:Nvarxyz].sum()

    # MI in bits
    I = (HXZ + HYZ - HXYZ - HZ) / ln2
    return max(0.0, I)  # CMI is non-negative




[docs]
def gccmi_ccd(x, y, z, Zm=None):
    """Gaussian-Copula CMI between 2 continuous variables conditioned on a discrete variable.

    Calculates the conditional mutual information (CMI) between two continuous
    variables x and y, conditioned on a discrete variable z, using a Gaussian
    copula approach. This method can handle multivariate continuous variables.

    The Gaussian copula transforms the marginal distributions to standard Gaussian
    while preserving the dependence structure, allowing efficient estimation of
    mutual information.

    Parameters
    ----------
    x : array_like, shape (n_features_x, n_samples) or (n_samples,)
        First continuous variable. If multivariate, features are in rows and
        samples in columns.
    y : array_like, shape (n_features_y, n_samples) or (n_samples,)
        Second continuous variable. If multivariate, features are in rows and
        samples in columns.
    z : array_like, shape (n_samples,)
        Discrete conditioning variable. Must contain integer values in the range
        [0, max(z)] (inclusive).
    Zm : int, optional
        Number of unique values in the discrete variable z. If None (default),
        it is automatically computed as len(np.unique(z)). Providing this value
        can be useful if you know z doesn't contain all possible values.

    Returns
    -------
    I : float
        Conditional mutual information I(X;Y|Z) in bits.

    Raises
    ------
    ValueError
        If x or y have more than 2 dimensions.
        If z is not a 1D array.
        If z does not contain integer values.
        If the number of samples doesn't match across inputs.

    Notes
    -----
    The function uses a Gaussian copula transformation to estimate CMI. For each
    value of the discrete conditioning variable z, it transforms the conditional
    distributions of x and y to Gaussian, then computes the CMI using entropy
    calculations on the Gaussian-transformed data.

    This is particularly useful for analyzing neural data where the conditioning
    variable might represent experimental conditions, stimulus types, or
    behavioral states.

    Examples
    --------
    >>> # Continuous variables with dependency modulated by discrete state
    >>> rng = np.random.RandomState(42)
    >>> n_samples = 3000
    >>> z = rng.choice([0, 1, 2], size=n_samples)  # 3 discrete states
    >>> x = np.zeros(n_samples)
    >>> y = np.zeros(n_samples)
    >>>
    >>> # Different relationships for each state
    >>> for state in [0, 1, 2]:
    ...     mask = z == state
    ...     n_state = mask.sum()
    ...     if state == 0:  # Independent in state 0
    ...         x[mask] = rng.randn(n_state)
    ...         y[mask] = rng.randn(n_state)
    ...     elif state == 1:  # Linear relationship in state 1
    ...         x[mask] = rng.randn(n_state)
    ...         y[mask] = 0.8 * x[mask] + 0.2 * rng.randn(n_state)
    ...     else:  # Nonlinear in state 2
    ...         x[mask] = rng.uniform(-2, 2, n_state)
    ...         y[mask] = x[mask]**2 + 0.5 * rng.randn(n_state)
    >>>
    >>> # Reshape for function input
    >>> x = x.reshape(1, -1)
    >>> y = y.reshape(1, -1)
    >>>
    >>> # CMI captures state-dependent relationships
    >>> cmi = gccmi_ccd(x, y, z)
    >>> cmi > 0.2  # Significant CMI due to state-dependent coupling
    True

    >>> # Compare with unconditional MI
    >>> mi_uncond = gcmi_cc(x, y)
    >>> cmi > mi_uncond * 0.8  # CMI captures most of the dependency
    True

    See Also
    --------
    gcmi_cc : Unconditional Gaussian-copula MI
    cmi_ggg : CMI for all-continuous variables

    References
    ----------
    Ince, R. A., et al. (2017). A statistical framework for neuroimaging data
    analysis based on mutual information estimated via a Gaussian copula.
    Human Brain Mapping, 38(3), 1541-1573."""

    x = np.atleast_2d(x)
    y = np.atleast_2d(y)
    if x.ndim > 2 or y.ndim > 2:
        raise ValueError("x and y must be at most 2d")
    if z.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not np.issubdtype(z.dtype, np.integer):
        raise ValueError("z should be an integer array")

    # Compute Zm if not provided
    if Zm is None:
        Zm = len(np.unique(z))
    elif not isinstance(Zm, int):
        raise ValueError("Zm should be an integer")

    Ntrl = x.shape[1]
    Nvarx = x.shape[0]
    Nvary = y.shape[0]

    if y.shape[1] != Ntrl or z.size != Ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(Nvarx):
        if (np.unique(x[xi, :]).size / float(Ntrl)) < 0.9:
            warnings.warn("Input x has more than 10% repeated values")
            break
    for yi in range(Nvary):
        if (np.unique(y[yi, :]).size / float(Ntrl)) < 0.9:
            warnings.warn("Input y has more than 10% repeated values")
            break

    # check values of discrete variable
    if z.min() != 0 or z.max() != (Zm - 1):
        raise ValueError("values of discrete variable z are out of bounds")

    # Use JIT version if available
    if _JIT_AVAILABLE:
        x, y = _prepare_for_jit(x, y)
        return gccmi_ccd_jit(x, y, z, Zm)

    # calculate gcmi for each z value
    Icond = np.zeros(Zm)
    Pz = np.zeros(Zm)
    cx = []
    cy = []
    for zi in range(Zm):
        idx = z == zi
        thsx = copnorm(x[:, idx])
        thsy = copnorm(y[:, idx])
        Pz[zi] = idx.sum()
        cx.append(thsx)
        cy.append(thsy)
        Icond[zi] = mi_gg(thsx, thsy, True, True)

    Pz = Pz / float(Ntrl)

    # conditional mutual information
    CMI = np.sum(Pz * Icond)
    # I = mi_gg(np.hstack(cx),np.hstack(cy),True,False)
    return max(0.0, CMI)  # CMI is non-negative