Source code for driada.dim_reduction.neural

import sys

# Fix torch reimport issue during coverage testing
if "torch" in sys.modules:
    torch = sys.modules["torch"]
    nn = sys.modules["torch.nn"]
    F = sys.modules["torch.nn.functional"]
    optim = sys.modules["torch.optim"]
    Dataset = sys.modules["torch.utils.data"].Dataset
    DataLoader = sys.modules["torch.utils.data"].DataLoader
else:
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.utils.data import Dataset



[docs]
class Encoder(nn.Module):
    """Neural network encoder for dimensionality reduction.

    Implements a two-layer neural network that encodes high-dimensional data
    into a lower-dimensional latent representation. Used as the encoder
    component in autoencoders.

    Parameters
    ----------
    orig_dim : int
        Original input dimension (number of features).
    inter_dim : int
        Intermediate hidden layer dimension.
    code_dim : int
        Output dimension of the encoded representation (latent space).
    kwargs : dict
        Additional parameters. Supports key 'dropout' (float, optional):
        dropout rate from 0 to 1. Default is no dropout.
    device : torch.device, optional
        Device to run the model on. Defaults to CUDA if available, else CPU.

    Attributes
    ----------
    encoder_hidden_layer : nn.Linear
        First linear transformation layer.
    encoder_output_layer : nn.Linear
        Second linear transformation to latent space.
    dropout : nn.Dropout
        Dropout layer for regularization.

    Raises
    ------
    ValueError
        If dropout rate is not in the range [0, 1).

    Examples
    --------
    >>> import torch
    >>> encoder = Encoder(orig_dim=100, inter_dim=50, code_dim=10,
    ...                   kwargs={'dropout': 0.2})
    >>> data = torch.randn(32, 100)  # batch of 32 samples
    >>> latent = encoder(data)
    >>> print(latent.shape)
    torch.Size([32, 10])

    Notes
    -----
    The encoder uses LeakyReLU activation for the hidden layer. The output
    layer has no activation function, producing unbounded latent codes to
    maximize the representational capacity of the latent space.

    See Also
    --------
    Decoder : The corresponding decoder network.
    VAEEncoder : Variational encoder for probabilistic latent representations.
    """


[docs]
    def __init__(self, orig_dim, inter_dim, code_dim, kwargs, device=None):
        """Initialize the encoder network.

        Sets up the two-layer neural network architecture with optional
        dropout regularization and moves the model to the specified device.

        Parameters
        ----------
        orig_dim : int
            Original input dimension (number of features).
        inter_dim : int
            Intermediate hidden layer dimension.
        code_dim : int
            Output dimension of the encoded representation.
        kwargs : dict
            Additional parameters, supports 'dropout' key.
        device : torch.device, optional
            Target device for computations.

        Raises
        ------
        ValueError
            If dropout rate is not in the range [0, 1)."""
        super().__init__()
        dropout = kwargs.get("dropout", None)

        self.encoder_hidden_layer = nn.Linear(in_features=orig_dim, out_features=inter_dim)
        self.encoder_output_layer = nn.Linear(in_features=inter_dim, out_features=code_dim)

        if dropout is not None:
            if 0 <= dropout < 1:
                self.dropout = nn.Dropout(p=dropout)
            else:
                raise ValueError("Dropout rate should be in the range 0<=dropout<1")
        else:
            self.dropout = nn.Dropout(0.0)

        if device is None:
            self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        else:
            self._device = device

        # Move model to the specified device
        self.to(self._device)



[docs]
    def forward(self, features):
        """Forward pass through the encoder.

        Applies the two-layer neural network transformation to encode
        input features into a lower-dimensional latent representation.

        Parameters
        ----------
        features : torch.Tensor
            Input tensor of shape (batch_size, orig_dim).

        Returns
        -------
        torch.Tensor
            Encoded representation of shape (batch_size, code_dim).
            Values are unbounded to maximize representational capacity.

        Notes
        -----
        The forward pass applies the following transformations:
        1. Linear transformation to hidden dimension
        2. Dropout regularization (if enabled)
        3. LeakyReLU activation
        4. Linear transformation to latent dimension

        The output is unbounded to allow full representational capacity
        in the latent space."""
        activation = self.encoder_hidden_layer(features)
        activation = self.dropout(activation)
        activation = F.leaky_relu(activation)
        code = self.encoder_output_layer(activation)
        return code





[docs]
class VAEEncoder(nn.Module):
    """Variational encoder that outputs parameters for latent Gaussian distribution.

    Unlike standard autoencoders, VAE encoders output parameters (mean and log variance)
    for a Gaussian distribution in the latent space, enabling probabilistic sampling
    and regularization via KL divergence.

    Parameters
    ----------
    orig_dim : int
        Original input dimension (number of features).
    inter_dim : int
        Intermediate hidden layer dimension.
    code_dim : int
        Latent space dimension. Note: this should be 2*latent_dim if you want
        latent_dim means and latent_dim log variances. The encoder outputs
        code_dim values total.
    kwargs : dict
        Additional parameters. Supports key 'dropout' (float, optional):
        dropout rate from 0 to 1. Default is no dropout.
    device : torch.device, optional
        Device to run the model on. Defaults to CUDA if available, else CPU.

    Attributes
    ----------
    encoder_hidden_layer : nn.Linear
        First linear transformation layer.
    encoder_output_layer : nn.Linear
        Second linear transformation to latent parameters (outputs code_dim values).
    dropout : nn.Dropout
        Dropout layer for regularization.

    Raises
    ------
    ValueError
        If dropout rate is not in the range [0, 1).

    Examples
    --------
    >>> import torch
    >>> # For 10-dim latent space, need code_dim=20 (10 means + 10 log variances)
    >>> vae_encoder = VAEEncoder(orig_dim=100, inter_dim=50, code_dim=20,
    ...                          kwargs={'dropout': 0.2})
    >>> data = torch.randn(32, 100)  # batch of 32 samples
    >>> params = vae_encoder(data)
    >>> print(params.shape)
    torch.Size([32, 20])

    Notes
    -----
    The output layer does not use sigmoid activation (unlike standard AE)
    because it needs to output unconstrained means and log variances for
    the Gaussian distribution.

    See Also
    --------
    VAE : Complete variational autoencoder that uses this encoder.
    Encoder : Standard encoder with bounded outputs."""


[docs]
    def __init__(self, orig_dim, inter_dim, code_dim, kwargs, device=None):
        """Initialize the variational encoder network.

        Sets up the two-layer neural network architecture that outputs
        parameters for a Gaussian distribution in latent space.

        Parameters
        ----------
        orig_dim : int
            Original input dimension (number of features).
        inter_dim : int
            Intermediate hidden layer dimension.
        code_dim : int
            Total output dimension (should be 2*latent_dim for mean and log variance).
        kwargs : dict
            Additional parameters, supports 'dropout' key.
        device : torch.device, optional
            Target device for computations.

        Raises
        ------
        ValueError
            If dropout rate is not in the range [0, 1)."""
        super().__init__()
        dropout = kwargs.get("dropout", None)

        self.encoder_hidden_layer = nn.Linear(in_features=orig_dim, out_features=inter_dim)
        self.encoder_output_layer = nn.Linear(in_features=inter_dim, out_features=code_dim)

        if dropout is not None:
            if 0 <= dropout < 1:
                self.dropout = nn.Dropout(p=dropout)
            else:
                raise ValueError("Dropout rate should be in the range 0<=dropout<1")
        else:
            self.dropout = nn.Dropout(0.0)

        if device is None:
            self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        else:
            self._device = device

        # Move model to the specified device
        self.to(self._device)



[docs]
    def forward(self, features):
        """Forward pass through the VAE encoder.

        Applies the two-layer neural network transformation to encode
        input features into parameters for a Gaussian distribution.

        Parameters
        ----------
        features : torch.Tensor
            Input tensor of shape (batch_size, orig_dim).

        Returns
        -------
        torch.Tensor
            Encoded representation of shape (batch_size, code_dim).
            Contains concatenated parameters for the latent Gaussian:
            typically first half for means, second half for log variances.

        Notes
        -----
        Unlike standard encoders, VAE encoders output unconstrained values
        (no sigmoid activation) since they represent distribution parameters.
        The output should be reshaped to extract means and log variances."""
        activation = self.encoder_hidden_layer(features)
        activation = self.dropout(activation)
        activation = F.leaky_relu(activation)
        code = self.encoder_output_layer(activation)
        return code





[docs]
class Decoder(nn.Module):
    """Neural network decoder for dimensionality reduction.

    Implements a two-layer neural network that decodes low-dimensional latent
    representations back to the original high-dimensional space. Used as the
    decoder component in autoencoders.

    Parameters
    ----------
    code_dim : int
        Input dimension of the latent representation.
    inter_dim : int
        Intermediate hidden layer dimension.
    orig_dim : int
        Output dimension (same as original data dimension).
    kwargs : dict
        Additional parameters. Supports key 'dropout' (float, optional):
        dropout rate from 0 to 1. Default is no dropout.
    device : torch.device, optional
        Device to run the model on. Defaults to CUDA if available, else CPU.

    Attributes
    ----------
    decoder_hidden_layer : nn.Linear
        First linear transformation from latent space.
    decoder_output_layer : nn.Linear
        Second linear transformation to original space.
    dropout : nn.Dropout
        Dropout layer for regularization.

    Raises
    ------
    ValueError
        If dropout rate is not in the range [0, 1).

    Examples
    --------
    >>> import torch
    >>> decoder = Decoder(code_dim=10, inter_dim=50, orig_dim=100,
    ...                   kwargs={'dropout': 0.2})
    >>> latent = torch.randn(32, 10)  # batch of 32 latent codes
    >>> reconstructed = decoder(latent)
    >>> print(reconstructed.shape)
    torch.Size([32, 100])

    Notes
    -----
    The decoder uses LeakyReLU activation for the hidden layer and
    no activation function on the output layer, allowing it to output
    unbounded values for reconstruction.

    See Also
    --------
    Encoder : The corresponding encoder network.
    AE : Complete autoencoder using this decoder."""


[docs]
    def __init__(self, code_dim, inter_dim, orig_dim, kwargs, device=None):
        """Initialize the decoder network.

        Sets up the two-layer neural network architecture with optional
        dropout regularization and moves the model to the specified device.

        Parameters
        ----------
        code_dim : int
            Input dimension of the latent representation.
        inter_dim : int
            Intermediate hidden layer dimension.
        orig_dim : int
            Output dimension (same as original data).
        kwargs : dict
            Additional parameters, supports 'dropout' key.
        device : torch.device, optional
            Target device for computations.

        Raises
        ------
        ValueError
            If dropout rate is not in the range [0, 1)."""
        super().__init__()
        dropout = kwargs.get("dropout", None)

        self.decoder_hidden_layer = nn.Linear(in_features=code_dim, out_features=inter_dim)
        self.decoder_output_layer = nn.Linear(in_features=inter_dim, out_features=orig_dim)

        if dropout is not None:
            if 0 <= dropout < 1:
                self.dropout = nn.Dropout(p=dropout)
            else:
                raise ValueError("Dropout rate should be in the range 0<=dropout<1")
        else:
            self.dropout = nn.Dropout(0.0)

        if device is None:
            self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        else:
            self._device = device

        # Move model to the specified device
        self.to(self._device)



[docs]
    def forward(self, features):
        """Forward pass through the decoder.

        Applies the two-layer neural network transformation to decode
        latent representations back to the original data space.

        Parameters
        ----------
        features : torch.Tensor
            Latent representation tensor of shape (batch_size, code_dim).

        Returns
        -------
        torch.Tensor
            Reconstructed data of shape (batch_size, orig_dim).

        Notes
        -----
        The forward pass applies the following transformations:
        1. Linear transformation to hidden dimension
        2. Dropout regularization (if enabled)
        3. LeakyReLU activation
        4. Linear transformation to original dimension
        5. No output activation (unbounded reconstruction)"""
        activation = self.decoder_hidden_layer(features)
        activation = self.dropout(activation)
        activation = F.leaky_relu(activation)
        activation = self.decoder_output_layer(activation)
        reconstructed = activation
        return reconstructed





[docs]
class AE(nn.Module):
    """Standard Autoencoder for non-linear dimensionality reduction.

    Combines an encoder and decoder to learn a compressed representation
    of high-dimensional data through reconstruction. The model is trained
    to minimize reconstruction error.

    Parameters
    ----------
    orig_dim : int
        Original input dimension (number of features).
    inter_dim : int
        Intermediate hidden layer dimension for both encoder and decoder.
    code_dim : int
        Dimension of the latent representation (bottleneck).
    enc_kwargs : dict
        Additional parameters for the encoder (e.g., dropout rate).
    dec_kwargs : dict
        Additional parameters for the decoder (e.g., dropout rate).
    device : torch.device
        Device to run the model on (CPU or CUDA).

    Attributes
    ----------
    encoder : Encoder
        The encoder network.
    decoder : Decoder
        The decoder network.
    orig_dim : int
        Original data dimension.
    inter_dim : int
        Hidden layer dimension.
    code_dim : int
        Latent space dimension.

    Examples
    --------
    >>> import torch
    >>> device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    >>> ae = AE(orig_dim=100, inter_dim=50, code_dim=10,
    ...         enc_kwargs={'dropout': 0.2}, dec_kwargs={'dropout': 0.2},
    ...         device=device)
    >>> data = torch.randn(32, 100).to(device)
    >>> reconstructed = ae(data)
    >>> print(reconstructed.shape)
    torch.Size([32, 100])
    >>> latent = ae.get_code_embedding(data)
    >>> print(latent.shape)  # Note: transposed output
    (10, 32)

    Notes
    -----
    The encoder produces unbounded latent codes, while the decoder
    outputs unbounded reconstructions. This design is suitable for
    general-purpose dimensionality reduction of unbounded data.

    See Also
    --------
    VAE : Variational autoencoder for probabilistic encoding.
    Encoder : The encoder component.
    Decoder : The decoder component."""


[docs]
    def __init__(self, orig_dim, inter_dim, code_dim, enc_kwargs, dec_kwargs, device):
        """Initialize the autoencoder.

        Creates encoder and decoder networks with the specified architecture.

        Parameters
        ----------
        orig_dim : int
            Original input dimension.
        inter_dim : int
            Hidden layer dimension for both networks.
        code_dim : int
            Latent representation dimension.
        enc_kwargs : dict
            Encoder parameters (e.g., {'dropout': 0.2}).
        dec_kwargs : dict
            Decoder parameters (e.g., {'dropout': 0.2}).
        device : torch.device
            Device for computations."""
        super(AE, self).__init__()

        self.encoder = Encoder(
            orig_dim=orig_dim,
            inter_dim=inter_dim,
            code_dim=code_dim,
            kwargs=enc_kwargs,
            device=device,
        )
        self.decoder = Decoder(
            orig_dim=orig_dim,
            inter_dim=inter_dim,
            code_dim=code_dim,
            kwargs=dec_kwargs,
            device=device,
        )
        self.orig_dim = orig_dim
        self.inter_dim = inter_dim
        self.code_dim = code_dim
        self._device = device



[docs]
    def forward(self, features):
        """Forward pass through the autoencoder.

        Encodes input data to latent representation and then decodes it
        back to reconstruct the original data.

        Parameters
        ----------
        features : torch.Tensor
            Input data of shape (batch_size, orig_dim).

        Returns
        -------
        torch.Tensor
            Reconstructed data of shape (batch_size, orig_dim).

        Notes
        -----
        The forward pass performs: input → encoder → latent → decoder → reconstruction.
        Both latent codes and reconstructions are unbounded."""
        code = self.encoder.forward(features)
        reconstructed = self.decoder.forward(code)
        return reconstructed



[docs]
    def get_code_embedding(self, input_):
        """Extract latent representation from input data.

        Parameters
        ----------
        input_ : torch.Tensor
            Input data of shape (batch_size, orig_dim).

        Returns
        -------
        numpy.ndarray
            Latent representation of shape (code_dim, batch_size).
            Note: Output is transposed for compatibility with DRIADA conventions.

        Notes
        -----
        This method only runs the encoder portion and returns the latent
        codes as a numpy array. The transpose operation converts from
        PyTorch's (batch, features) to DRIADA's (features, samples) format."""
        encoder = self.encoder
        embedding = encoder.forward(input_)
        return embedding.detach().cpu().numpy().T





[docs]
class VAE(nn.Module):
    """Variational Autoencoder for probabilistic dimensionality reduction.

    Implements a VAE that learns a probabilistic mapping to a latent space.
    Unlike standard autoencoders, VAEs learn a distribution over the latent
    space, enabling generation of new samples and providing uncertainty estimates.

    The encoder outputs parameters of a Gaussian distribution (mean and log variance),
    and the latent code is sampled from this distribution using the reparameterization trick.

    Parameters
    ----------
    orig_dim : int
        Original input dimension (number of features).
    inter_dim : int
        Intermediate hidden layer dimension for both encoder and decoder.
    code_dim : int
        Dimension of the latent representation (bottleneck).
    enc_kwargs : dict, optional
        Additional parameters for the encoder (e.g., dropout rate).
    dec_kwargs : dict, optional
        Additional parameters for the decoder (e.g., dropout rate).
    device : torch.device, optional
        Device to run the model on. Defaults to CUDA if available, else CPU.

    Attributes
    ----------
    encoder : VAEEncoder
        The encoder network that outputs mean and log variance.
    decoder : Decoder
        The decoder network.
    orig_dim : int
        Original data dimension.
    inter_dim : int
        Hidden layer dimension.
    code_dim : int
        Latent space dimension.

    Examples
    --------
    >>> import torch
    >>> vae = VAE(orig_dim=100, inter_dim=50, code_dim=10,
    ...           enc_kwargs={'dropout': 0.2}, dec_kwargs={'dropout': 0.2})
    >>> data = torch.randn(32, 100)
    >>> reconstructed, mean, log_var = vae(data)
    >>> # Compute VAE loss
    >>> recon_loss = torch.nn.functional.mse_loss(reconstructed, data)
    >>> kl_loss = -0.5 * torch.sum(1 + log_var - mean.pow(2) - log_var.exp())
    >>> vae_loss = recon_loss + kl_loss
    >>> # Generate new samples
    >>> z = torch.randn(32, 10)
    >>> generated = vae.decoder(z)

    Notes
    -----
    The VAE loss consists of two terms:
    1. Reconstruction loss (e.g., MSE or BCE)
    2. KL divergence between the learned distribution and a standard Gaussian

    The encoder internally outputs 2*code_dim features which are split into
    mean and log variance parameters for the latent Gaussian distribution.

    See Also
    --------
    AE : Standard deterministic autoencoder.
    VAEEncoder : The probabilistic encoder component."""


[docs]
    def __init__(
        self,
        orig_dim,
        inter_dim,
        code_dim,
        enc_kwargs=None,
        dec_kwargs=None,
        device=None,
    ):
        """Initialize the Variational Autoencoder.

        Creates a VAE with encoder outputting distribution parameters
        (mean and log variance) and decoder for reconstruction. The encoder
        output dimension is doubled to accommodate both parameters.

        Parameters
        ----------
        orig_dim : int
            Original input dimension (number of features).
        inter_dim : int
            Hidden layer dimension for both encoder and decoder.
        code_dim : int
            Dimension of the latent representation. The encoder will output
            2 * code_dim features (code_dim for mean, code_dim for log variance).
        enc_kwargs : dict, optional
            Additional encoder parameters (e.g., {'dropout': 0.2}).
            Defaults to empty dict if None.
        dec_kwargs : dict, optional
            Additional decoder parameters (e.g., {'dropout': 0.2}).
            Defaults to empty dict if None.
        device : torch.device, optional
            Device for computations. If None, encoder/decoder handle device selection.

        Notes
        -----
        The encoder output dimension is set to 2 * code_dim to enable the
        VAE to learn both mean and log variance parameters for the latent
        Gaussian distribution. These parameters are later split in the
        get_code method."""
        super(VAE, self).__init__()

        # Use VAEEncoder instead of regular Encoder
        self.encoder = VAEEncoder(
            orig_dim=orig_dim,
            inter_dim=inter_dim,
            code_dim=2 * code_dim,
            kwargs=enc_kwargs or {},
            device=device,
        )
        self.decoder = Decoder(
            orig_dim=orig_dim,
            inter_dim=inter_dim,
            code_dim=code_dim,
            kwargs=dec_kwargs or {},
            device=device,
        )
        self.orig_dim = orig_dim
        self.inter_dim = inter_dim
        self.code_dim = code_dim



[docs]
    def reparameterization(self, mu, log_var):
        """Reparameterization trick for VAE.

        Samples from the latent distribution N(mu, sigma^2) in a way that allows
        backpropagation through the sampling operation by expressing the sample
        as a deterministic function of the parameters and a separate noise variable.

        Parameters
        ----------
        mu : torch.Tensor
            Mean of the latent Gaussian distribution, shape (batch_size, code_dim).
        log_var : torch.Tensor
            Log variance of the latent Gaussian distribution, shape (batch_size, code_dim).

        Returns
        -------
        torch.Tensor
            Sampled latent vector, shape (batch_size, code_dim).

        Examples
        --------
        >>> import torch
        >>> # Create a VAE instance to access the method
        >>> vae = VAE(orig_dim=100, inter_dim=50, code_dim=20,
        ...           enc_kwargs={}, dec_kwargs={}, device=torch.device('cpu'))
        >>> mu = torch.zeros(32, 10)
        >>> log_var = torch.ones(32, 10) * -2  # Small variance
        >>> z = vae.reparameterization(mu, log_var)
        >>> print(z.shape)
        torch.Size([32, 10])

        Notes
        -----
        The reparameterization trick transforms sampling from N(mu, sigma^2) into:
        z = mu + sigma * epsilon, where epsilon ~ N(0, I)

        This allows gradients to flow through mu and log_var during backpropagation
        while maintaining the stochasticity through the random epsilon."""
        std = torch.exp(0.5 * log_var)  # standard deviation
        eps = torch.randn_like(std)  # sample from N(0, I)
        sample = mu + (eps * std)  # reparameterized sample
        return sample



[docs]
    def get_code(self, features):
        """Extract latent code from input features using VAE encoding.

        Encodes input features through the VAE encoder which outputs concatenated
        mean and log variance parameters. These are reshaped and separated, then
        used to sample from the latent distribution via reparameterization.

        Parameters
        ----------
        features : torch.Tensor
            Input data of shape (batch_size, orig_dim).

        Returns
        -------
        tuple of torch.Tensor
            - code : Sampled latent representation, shape (batch_size, code_dim)
            - mu : Mean of latent distribution, shape (batch_size, code_dim)
            - log_var : Log variance of latent distribution, shape (batch_size, code_dim)

        Examples
        --------
        >>> import torch
        >>> # Create a VAE with latent dimension 10 (code_dim=20 for mean+logvar)
        >>> vae = VAE(orig_dim=100, inter_dim=50, code_dim=20,
        ...           enc_kwargs={}, dec_kwargs={},
        ...           device=torch.device('cpu'))
        >>> features = torch.randn(32, 100)
        >>> code, mu, log_var = vae.get_code(features)
        >>> print(code.shape, mu.shape, log_var.shape)
        torch.Size([32, 20]) torch.Size([32, 20]) torch.Size([32, 20])

        Notes
        -----
        The encoder outputs a tensor of shape (batch_size, 2 * code_dim) which
        is reshaped to (batch_size, 2, code_dim) where:
        - [:, 0, :] contains the mean parameters
        - [:, 1, :] contains the log variance parameters"""
        x = self.encoder.forward(features)

        # Reshape to separate mean and log variance
        x = x.view(-1, 2, self.code_dim)

        # Extract distribution parameters
        mu = x[:, 0, :]  # the first feature values as mean
        log_var = x[:, 1, :]  # the other feature values as log variance

        # Sample latent code via reparameterization
        code = self.reparameterization(mu, log_var)

        return code, mu, log_var



[docs]
    def forward(self, features):
        """Forward pass through the VAE.

        Performs a complete forward pass: encoding input to latent distribution
        parameters, sampling from the distribution, and decoding back to
        reconstruction space. Returns both reconstruction and distribution
        parameters needed for VAE loss computation.

        Parameters
        ----------
        features : torch.Tensor
            Input data of shape (batch_size, orig_dim).

        Returns
        -------
        tuple of torch.Tensor
            - reconstructed : Reconstructed data, shape (batch_size, orig_dim)
            - mu : Mean of latent distribution, shape (batch_size, code_dim)
            - log_var : Log variance of latent distribution, shape (batch_size, code_dim)

        Examples
        --------
        >>> import torch
        >>> import torch.nn.functional as F
        >>> # Create a simple VAE instance for demonstration
        >>> vae = VAE(orig_dim=100, inter_dim=50, code_dim=20,
        ...           enc_kwargs={}, dec_kwargs={},
        ...           device=torch.device('cpu'))
        >>> data = torch.randn(32, 100)
        >>> recon, mu, log_var = vae(data)
        >>> # Compute VAE loss
        >>> recon_loss = F.mse_loss(recon, data)
        >>> kl_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
        >>> vae_loss = recon_loss + kl_loss
        >>> print(f"Reconstruction shape: {recon.shape}")
        Reconstruction shape: torch.Size([32, 100])
        >>> print(f"Latent mean shape: {mu.shape}")
        Latent mean shape: torch.Size([32, 20])
        >>> print(f"Latent log variance shape: {log_var.shape}")
        Latent log variance shape: torch.Size([32, 20])

        Notes
        -----
        The mu and log_var are needed to compute the KL divergence loss:
        KL = -0.5 * sum(1 + log_var - mu^2 - exp(log_var))

        The total VAE loss is: L = reconstruction_loss + beta * KL_loss
        where beta is a hyperparameter controlling the regularization strength.

        See Also
        --------
        get_code : For encoding only without reconstruction.
        reparameterization : The sampling mechanism."""
        # encoding
        code, mu, log_var = self.get_code(features)

        # decoding
        reconstructed = self.decoder.forward(code)
        return reconstructed, mu, log_var



[docs]
    def get_code_embedding(self, input_, use_mean=True):
        """Extract latent representation from input data.

        Returns either the mean of the latent distribution (deterministic) or
        a sample from it (stochastic), transposed to match DRIADA conventions.

        Parameters
        ----------
        input_ : torch.Tensor
            Input data of shape (batch_size, orig_dim).
        use_mean : bool, default=True
            If True, returns the mean of the latent distribution (deterministic).
            If False, returns a sample from the distribution (stochastic).

        Returns
        -------
        numpy.ndarray
            Latent representation of shape (code_dim, batch_size).

        Examples
        --------
        >>> import torch
        >>> import numpy as np
        >>> from driada.dim_reduction.neural import VAE
        >>> # Create VAE
        >>> vae = VAE(orig_dim=100, inter_dim=50, code_dim=20,
        ...           enc_kwargs={}, dec_kwargs={},
        ...           device=torch.device('cpu'))
        >>> data = torch.randn(32, 100)
        >>> # Get deterministic embedding (mean)
        >>> embedding = vae.get_code_embedding(data, use_mean=True)
        >>> embedding2 = vae.get_code_embedding(data, use_mean=True)
        >>> print(np.allclose(embedding, embedding2))  # True - deterministic
        True
        >>> # Get stochastic embedding (sampled)
        >>> _ = torch.manual_seed(42)  # For reproducibility in doctest
        >>> embedding3 = vae.get_code_embedding(data, use_mean=False)
        >>> _ = torch.manual_seed(43)  # Different seed
        >>> embedding4 = vae.get_code_embedding(data, use_mean=False)
        >>> print(np.allclose(embedding3, embedding4))  # False - different samples
        False
        >>> print(embedding.shape)  # Note: transposed output
        (20, 32)

        Notes
        -----
        - Output is transposed: (batch, features) → (features, samples)
        - use_mean=True is recommended for visualization, downstream tasks,
          and when you need consistent embeddings
        - use_mean=False captures the uncertainty in the latent representation

        See Also
        --------
        get_code : Returns code, mean, and log variance as tensors.
        AE.get_code_embedding : Always deterministic (standard autoencoder).
        """
        code, mu, log_var = self.get_code(input_)
        if use_mean:
            return mu.detach().cpu().numpy().T
        else:
            return code.detach().cpu().numpy().T





[docs]
class NeuroDataset(Dataset):
    """PyTorch Dataset wrapper for neural activity data.

    Wraps neural data matrices for use with PyTorch DataLoader, enabling
    efficient batching and sampling during neural network training.

    Parameters
    ----------
    data : ndarray
        Input data matrix of shape (n_features, n_samples). Will be transposed
        internally to (n_samples, n_features) for PyTorch compatibility.
    transform : callable, optional
        Optional transform to be applied on each sample.

    Attributes
    ----------
    data : ndarray
        Transposed data matrix of shape (n_samples, n_features).
    transform : callable or None
        Transform function to apply to samples.

    Examples
    --------
    >>> import numpy as np
    >>> from torch.utils.data import DataLoader
    >>> # Create dataset with 100 neurons, 1000 time points
    >>> data = np.random.randn(100, 1000)
    >>> dataset = NeuroDataset(data)
    >>> # Create DataLoader for batching
    >>> loader = DataLoader(dataset, batch_size=32, shuffle=True)
    >>> for batch_data, batch_idx in loader:
    ...     print(batch_data.shape)  # (32, 100) - batch_size x n_features
    ...     break
    torch.Size([32, 100])

    Notes
    -----
    The dataset returns tuples of (sample, index) where the index can be
    used for tracking which samples were selected during training."""


[docs]
    def __init__(self, data, transform=None):
        """Initialize the neural dataset.

        Transposes the input data from (n_features, n_samples) to
        (n_samples, n_features) for PyTorch compatibility.

        Parameters
        ----------
        data : ndarray
            Input data matrix of shape (n_features, n_samples).
        transform : callable, optional
            Optional transform function to apply to each sample."""
        self.data = data.T
        self.transform = transform



[docs]
    def __len__(self):
        """Return the number of samples in the dataset.

        Returns
        -------
        int
            Number of samples (n_samples)."""
        return len(self.data)



[docs]
    def __getitem__(self, idx):
        """Retrieve a sample and its index from the dataset.

        Parameters
        ----------
        idx : int or torch.Tensor
            Index of the sample to retrieve. If tensor, will be converted
            to Python list/int for numpy indexing.

        Returns
        -------
        tuple
            - sample : ndarray
                Data sample of shape (n_features,), optionally transformed.
            - idx : int
                The index of the retrieved sample.

        Notes
        -----
        Returns both the sample and its index to allow tracking of which
        samples were used during training. This can be useful for debugging
        or sample weighting schemes."""
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.data[idx]

        if self.transform:
            sample = self.transform(sample)

        return sample, idx