Source code for driada.experiment.synthetic.core

"""
Core utilities for synthetic data generation.

This module contains the fundamental utilities used across all synthetic data
generation functions, including firing rate validation and calcium signal generation.
"""

import numpy as np
import warnings

# =============================================================================
# Default Parameters for Synthetic Data Generation
# =============================================================================

DEFAULT_SYNTHETIC_PARAMS = {
    # Calcium indicator dynamics
    "t_rise": 0.05,  # Rise time in seconds
    "decay_time": 2.0,  # Decay time constant in seconds
    "calcium_noise": 0.02,  # Noise std for calcium signal
    "amplitude_range": (0.5, 2.0),  # Calcium event amplitude range
    # Firing rates
    "baseline_rate": 0.1,  # Baseline firing rate in Hz
    "peak_rate": 2.0,  # Peak firing rate in Hz (realistic for calcium imaging)
    "firing_noise": 0.05,  # Noise std added to firing rates
    # Recording parameters
    "fps": 20.0,  # Sampling rate in Hz
    "duration": 600,  # Duration in seconds
}

# Convenience aliases for direct access
DEFAULT_T_RISE = DEFAULT_SYNTHETIC_PARAMS["t_rise"]
DEFAULT_DECAY_TIME = DEFAULT_SYNTHETIC_PARAMS["decay_time"]
DEFAULT_CALCIUM_NOISE = DEFAULT_SYNTHETIC_PARAMS["calcium_noise"]
DEFAULT_AMPLITUDE_RANGE = DEFAULT_SYNTHETIC_PARAMS["amplitude_range"]
DEFAULT_BASELINE_RATE = DEFAULT_SYNTHETIC_PARAMS["baseline_rate"]
DEFAULT_PEAK_RATE = DEFAULT_SYNTHETIC_PARAMS["peak_rate"]
DEFAULT_FIRING_NOISE = DEFAULT_SYNTHETIC_PARAMS["firing_noise"]
DEFAULT_FPS = DEFAULT_SYNTHETIC_PARAMS["fps"]
DEFAULT_DURATION = DEFAULT_SYNTHETIC_PARAMS["duration"]


def validate_peak_rate(peak_rate, context=""):
    """
    Validate that peak firing rate is within physiologically realistic range.

    Parameters
    ----------
    peak_rate : float
        Peak firing rate in Hz. Must be non-negative and finite.
    context : str, optional
        Context string for more informative warning message.

    Returns
    -------
    None
        This function only validates and warns; it does not return a value.

    Raises
    ------
    ValueError
        If peak_rate is negative, NaN, or infinite.
    TypeError
        If peak_rate is not numeric.

    Notes
    -----
    Typical firing rates for neurons:
    - Cortical pyramidal cells: 0.1-2 Hz (sparse firing)
    - Hippocampal place cells: 0.5-5 Hz (with brief peaks up to 20 Hz)
    - Fast-spiking interneurons: 5-50 Hz

    For calcium imaging with GCaMP indicators:
    - Decay time ~1-2 seconds limits temporal resolution
    - Firing rates >2 Hz can cause signal saturation
    - Realistic modeling should use rates in 0.1-2 Hz range

    Examples
    --------
    >>> validate_peak_rate(1.5)  # No warning, physiologically realistic

    >>> import warnings
    >>> with warnings.catch_warnings(record=True) as w:
    ...     warnings.simplefilter("always")
    ...     validate_peak_rate(5.0, context="Place cell simulation")
    ...     len(w) > 0 and "exceeds recommended maximum" in str(w[0].message)
    True

    >>> validate_peak_rate(-1.0)
    Traceback (most recent call last):
        ...
    ValueError: peak_rate must be non-negative, got -1.0
    """
    # Validate input
    try:
        peak_rate = float(peak_rate)
    except (TypeError, ValueError):
        raise TypeError(f"peak_rate must be numeric, got {type(peak_rate).__name__}")

    if np.isnan(peak_rate):
        raise ValueError("peak_rate cannot be NaN")
    if np.isinf(peak_rate):
        raise ValueError("peak_rate cannot be infinite")
    if peak_rate < 0:
        raise ValueError(f"peak_rate must be non-negative, got {peak_rate}")

    if peak_rate > 2.0:
        warning_msg = (
            f"peak_rate={peak_rate:.1f} Hz exceeds recommended maximum of 2.0 Hz "
            f"for calcium imaging. "
            f"High firing rates can cause calcium signal saturation due to slow "
            f"indicator dynamics (decay time ~2s). Consider using peak_rate <= 2.0 Hz "
            f"for more realistic calcium traces."
        )
        if context:
            warning_msg = f"{context}: {warning_msg}"
        warnings.warn(warning_msg, UserWarning, stacklevel=2)


# =============================================================================
# Calcium Transient Kernels
# =============================================================================


def _double_exponential_kernel(t, amplitude, rise_time, decay_time):
    """Generate double-exponential calcium transient kernel.

    Models realistic calcium indicator dynamics with separate rise and decay phases.
    Form: (1 - exp(-t/rise)) * exp(-t/decay)

    Parameters
    ----------
    t : ndarray
        Time array in samples.
    amplitude : float
        Peak amplitude of the transient.
    rise_time : float
        Rise time constant in samples.
    decay_time : float
        Decay time constant in samples.

    Returns
    -------
    ndarray
        Calcium transient waveform normalized to peak amplitude.
    """
    kernel = (1 - np.exp(-t / rise_time)) * np.exp(-t / decay_time)
    # Normalize to peak = 1, then scale by amplitude
    if np.max(kernel) > 0:
        kernel = kernel / np.max(kernel) * amplitude
    return kernel


def _exponential_kernel(t, amplitude, decay_time):
    """Generate simple exponential decay kernel.

    Models instantaneous rise followed by exponential decay.
    Form: amplitude * exp(-t/decay)

    Parameters
    ----------
    t : ndarray
        Time array in samples.
    amplitude : float
        Peak amplitude of the transient.
    decay_time : float
        Decay time constant in samples.

    Returns
    -------
    ndarray
        Calcium transient waveform.
    """
    return amplitude * np.exp(-t / decay_time)


def _step_kernel(t, amplitude, decay_time, sharpness=5.0):
    """Generate non-physiological step-like kernel for control.

    Models unrealistic calcium dynamics with sharp edges.
    Useful as negative control for testing model assumptions.

    Parameters
    ----------
    t : ndarray
        Time array in samples.
    amplitude : float
        Plateau amplitude.
    decay_time : float
        Duration of plateau in samples.
    sharpness : float, optional
        Controls edge sharpness (higher = sharper). Default 5.0.

    Returns
    -------
    ndarray
        Step-like waveform.
    """
    # Sigmoid rise and fall
    rise = 1 / (1 + np.exp(-sharpness * (t / decay_time - 0.1)))
    fall = 1 / (1 + np.exp(sharpness * (t / decay_time - 0.9)))
    return amplitude * rise * fall


# =============================================================================
# Calcium Signal Generation
# =============================================================================



[docs]
def generate_pseudo_calcium_signal(
    events=None,
    duration=600,
    sampling_rate=20.0,
    event_rate=0.2,
    amplitude_range=(0.5, 2),
    decay_time=2,
    noise_std=0.1,
    seed=None,
    rise_time=None,
    kernel="exponential",
):
    """Generate a pseudo-calcium imaging signal with configurable dynamics.

    Creates a synthetic calcium fluorescence signal that mimics GCaMP-like
    dynamics with configurable kernel shapes, random event amplitudes, and
    Gaussian noise.

    Parameters
    ----------
    events : ndarray or None, optional
        Binary array indicating event occurrences at each time point.
        If None, events are generated randomly using a Poisson process.
        When provided, indices directly correspond to sample indices (not time in seconds).
    duration : float, default=600
        Total duration of the signal in seconds. Only used if events is None.
        Must be positive.
    sampling_rate : float, default=20.0
        Sampling rate in Hz. Must be positive.
    event_rate : float, default=0.2
        Average rate of calcium events per second. Only used if events is None.
        Must be non-negative.
    amplitude_range : tuple of float, default=(0.5, 2)
        (min, max) range for random calcium event amplitudes.
        Must have min <= max.
    decay_time : float, default=2
        Time constant for exponential decay of calcium events in seconds.
        Typical GCaMP indicators have decay times of 1-2 seconds.
        Must be positive.
    noise_std : float, default=0.1
        Standard deviation of additive Gaussian noise. Must be non-negative.
    seed : int, optional
        Random seed for reproducibility. Default is None.
    rise_time : float, optional
        Rise time constant in seconds. Only used for 'double_exponential' kernel.
        Default is None, which uses 0.25s (typical for GCaMP indicators).
    kernel : {'exponential', 'double_exponential', 'step'}, default='exponential'
        Type of calcium transient kernel:

        - 'exponential': Simple exponential decay with instantaneous rise.
          Form: amplitude * exp(-t/decay_time). Fast but less realistic.
        - 'double_exponential': Physiologically realistic kernel with separate
          rise and decay phases. Form: (1 - exp(-t/rise)) * exp(-t/decay).
          Matches real GCaMP dynamics.
        - 'step': Non-physiological step-like waveform with sharp edges.
          Useful as negative control for testing model assumptions.

    Returns
    -------
    ndarray
        1D array representing the pseudo-calcium signal with shape (n_samples,).

    Raises
    ------
    ValueError
        If duration, sampling_rate, or decay_time <= 0.
        If event_rate or noise_std < 0.
        If amplitude_range[0] > amplitude_range[1].
        If kernel is not one of the valid options.

    Notes
    -----
    The calcium signal is modeled as a sum of transients triggered at event times,
    plus additive Gaussian noise. This approximates the dynamics of genetically
    encoded calcium indicators like GCaMP6.

    When events is None: Event times are drawn uniformly in [0, duration) seconds.
    When events is provided: Non-zero indices are treated as event sample indices.

    Multiple overlapping events accumulate additively without saturation modeling.

    Examples
    --------
    >>> # Generate random calcium signal with simple exponential decay
    >>> signal = generate_pseudo_calcium_signal(duration=100, event_rate=0.5)
    >>> signal.shape
    (2000,)

    >>> # Generate with realistic double-exponential dynamics
    >>> signal = generate_pseudo_calcium_signal(
    ...     duration=100, event_rate=0.5,
    ...     kernel='double_exponential', rise_time=0.25
    ... )

    >>> # Generate from specific spike times
    >>> spikes = np.zeros(1000)
    >>> spikes[[100, 200, 300]] = 1  # 3 spike events at sample indices
    >>> signal = generate_pseudo_calcium_signal(events=spikes)
    """
    # Validate kernel type
    valid_kernels = ["exponential", "double_exponential", "step"]
    if kernel not in valid_kernels:
        raise ValueError(f"kernel must be one of {valid_kernels}, got '{kernel}'")

    # Input validation
    if sampling_rate <= 0:
        raise ValueError(f"sampling_rate must be positive, got {sampling_rate}")
    if decay_time <= 0:
        raise ValueError(f"decay_time must be positive, got {decay_time}")
    if noise_std < 0:
        raise ValueError(f"noise_std must be non-negative, got {noise_std}")
    if len(amplitude_range) != 2 or amplitude_range[0] > amplitude_range[1]:
        raise ValueError(
            f"amplitude_range must be (min, max) with min <= max, got {amplitude_range}"
        )

    # Default rise_time for double_exponential
    if rise_time is None:
        rise_time = 0.25  # Typical GCaMP rise time

    # Initialize RNG
    rng = np.random.default_rng(seed)

    if events is None:
        if duration <= 0:
            raise ValueError(f"duration must be positive, got {duration}")
        if event_rate < 0:
            raise ValueError(f"event_rate must be non-negative, got {event_rate}")

        # Calculate number of samples
        num_samples = int(duration * sampling_rate)

        # Generate calcium events
        num_events = rng.poisson(event_rate * duration)
        event_times = rng.uniform(0, duration, num_events)
        event_amplitudes = rng.uniform(amplitude_range[0], amplitude_range[1], num_events)

    else:
        num_samples = len(events)
        event_times = np.where(events > 0)[0]
        # Use amplitude_range to modulate event amplitudes instead of using binary values
        if len(event_times) > 0:
            event_amplitudes = rng.uniform(
                amplitude_range[0], amplitude_range[1], len(event_times)
            )
        else:
            event_amplitudes = np.array([])

    # Convert time constants to samples
    rise_time_samples = rise_time * sampling_rate
    decay_time_samples = decay_time * sampling_rate

    # Convert event times to sample indices
    if events is None:
        event_indices = (event_times * sampling_rate).astype(int)
    else:
        event_indices = event_times.astype(int)

    # Filter out-of-bounds events
    valid_mask = event_indices < num_samples
    event_indices = event_indices[valid_mask]
    event_amplitudes = event_amplitudes[valid_mask]

    # Generate signal using vectorized convolution
    if len(event_indices) > 0:
        # Determine kernel length (until decay to <1% of peak)
        if kernel == "step":
            kernel_length = int(2 * decay_time_samples) + 1
        else:
            kernel_length = int(5 * decay_time_samples) + 1

        # Create unit kernel
        t_kernel = np.arange(kernel_length)
        if kernel == "double_exponential":
            unit_kernel = _double_exponential_kernel(
                t_kernel, 1.0, rise_time_samples, decay_time_samples
            )
        elif kernel == "exponential":
            unit_kernel = _exponential_kernel(t_kernel, 1.0, decay_time_samples)
        elif kernel == "step":
            unit_kernel = _step_kernel(t_kernel, 1.0, decay_time_samples)

        # Create sparse event signal with amplitudes
        event_signal = np.zeros(num_samples)
        np.add.at(event_signal, event_indices, event_amplitudes)

        # Convolve and truncate
        signal = np.convolve(event_signal, unit_kernel, mode="full")[:num_samples]
    else:
        signal = np.zeros(num_samples)

    # Add Gaussian noise
    noise = rng.normal(0, noise_std, num_samples)
    signal += noise

    return signal




[docs]
def generate_pseudo_calcium_multisignal(
    n,
    events=None,
    duration=600,
    sampling_rate=20,
    event_rate=0.2,
    amplitude_range=(0.5, 2),
    decay_time=2,
    noise_std=0.1,
    seed=None,
    rise_time=None,
    kernel="exponential",
):
    """
    Generate multiple pseudo calcium signals.

    Parameters
    ----------
    n : int
        Number of neurons. Must be non-negative.
    events : ndarray, optional
        Event array of shape (n_neurons, n_timepoints). If provided, must have
        n_neurons == n. Each row corresponds to one neuron's event indices.
    duration : float, default=600
        Duration in seconds. Only used if events is None.
    sampling_rate : float, default=20
        Sampling rate in Hz.
    event_rate : float, default=0.2
        Average rate of calcium events per second. Only used if events is None.
    amplitude_range : tuple, default=(0.5, 2)
        (min, max) for the amplitude of calcium events.
    decay_time : float, default=2
        Time constant for the decay of calcium events in seconds.
    noise_std : float, default=0.1
        Standard deviation of the Gaussian noise.
    seed : int, optional
        Random seed for reproducibility. Default is None.
    rise_time : float, optional
        Rise time constant in seconds. Only used for 'double_exponential' kernel.
        Default is None, which uses 0.25s.
    kernel : {'exponential', 'double_exponential', 'step'}, default='exponential'
        Type of calcium transient kernel. See generate_pseudo_calcium_signal
        for details on each kernel type.

    Returns
    -------
    ndarray
        Calcium signals of shape (n_neurons, n_timepoints).

    Raises
    ------
    ValueError
        If n < 0.
        If events is provided with wrong shape.

    Notes
    -----
    This is a convenience wrapper that calls generate_pseudo_calcium_signal
    for each neuron independently. Each neuron gets independent random
    events (if events=None) and independent noise.

    Examples
    --------
    >>> # Generate 5 neurons with random events
    >>> signals = generate_pseudo_calcium_multisignal(5, duration=100)
    >>> signals.shape
    (5, 2000)

    >>> # Generate with specific events per neuron
    >>> events = np.random.binomial(1, 0.01, size=(3, 1000))
    >>> signals = generate_pseudo_calcium_multisignal(3, events=events)"""
    # Input validation
    if n < 0:
        raise ValueError(f"n must be non-negative, got {n}")

    if events is not None:
        events = np.asarray(events)
        if events.ndim != 2:
            raise ValueError(f"events must be 2D array, got shape {events.shape}")
        if events.shape[0] != n:
            raise ValueError(f"events first dimension {events.shape[0]} must match n={n}")

    # Initialize RNG for seed generation
    rng = np.random.default_rng(seed)

    sigs = []
    for i in range(n):
        local_events = None
        if events is not None:
            local_events = events[i, :]

        # Generate unique seed for each neuron
        neuron_seed = int(rng.integers(0, 2**31))

        sig = generate_pseudo_calcium_signal(
            events=local_events,
            duration=duration,
            sampling_rate=sampling_rate,
            event_rate=event_rate,
            amplitude_range=amplitude_range,
            decay_time=decay_time,
            noise_std=noise_std,
            seed=neuron_seed,
            rise_time=rise_time,
            kernel=kernel,
        )
        sigs.append(sig)

    return np.vstack(sigs)