"""
Analyze functional organization in neural manifolds.
This module provides functions to analyze how single-neuron selectivity
relates to population-level manifold structure.
"""
import logging
from typing import Dict, List, Optional, TYPE_CHECKING
if TYPE_CHECKING:
from ..experiment.exp_base import Experiment
from ..intense.intense_base import IntenseResults
import numpy as np
from collections import defaultdict
[docs]
def get_functional_organization(
experiment: "Experiment",
method_name: str,
data_type: str = "calcium",
intense_results: Optional["IntenseResults"] = None,
) -> Dict:
"""
Analyze functional organization in the manifold.
Examines how neurons contribute to different embedding components
and identifies functional clusters based on selectivity patterns.
Parameters
----------
experiment : Experiment
Experiment object with stored embeddings.
method_name : str
Name of the embedding method to analyze.
data_type : str, optional
Data type used for embedding ('calcium' or 'spikes'). Default is 'calcium'.
intense_results : IntenseResults, optional
IntenseResults object from compute_embedding_selectivity for this
specific embedding method. Must be an instance of
driada.intense.intense_base.IntenseResults. If not provided, only
basic statistics (component importance) will be returned.
Returns
-------
dict
Dictionary containing:
- 'component_importance': Variance explained by each component
- 'n_components': Number of embedding components
- 'n_neurons_used': Number of neurons in the embedding
- 'neuron_indices': Indices of neurons used
If intense_results provided, also includes:
- 'neuron_participation': How many components each neuron contributes to
- 'component_specialization': How selective each component is
- 'functional_clusters': Groups of neurons with similar embedding selectivity
- 'n_participating_neurons': Number of neurons with selectivity
- 'mean_components_per_neuron': Average participation per neuron
Raises
------
KeyError
If the specified embedding method is not found.
ValueError
If component variances sum to zero.
TypeError
If intense_results is not an IntenseResults object when provided.
ValueError
If intense_results lacks the required 'significance' attribute.
Notes
-----
This function reads embedding data from the experiment's stored embeddings.
When intense_results is provided, it performs detailed selectivity analysis
to identify functional clusters and neuron participation patterns.
Examples
--------
Basic usage without selectivity analysis:
>>> # Create a simple synthetic experiment with circular manifold
>>> import numpy as np
>>> from driada.experiment.synthetic import generate_circular_manifold_exp
>>> np.random.seed(42)
>>>
>>> # Generate experiment with head direction cells
>>> exp = generate_circular_manifold_exp(
... n_neurons=20,
... duration=30, # Short duration for example
... fps=10.0,
... kappa=4.0, # Moderate tuning width
... verbose=False
... )
>>>
>>> # Create PCA embedding (automatically stores it)
>>> embedding = exp.create_embedding('pca', n_components=5, verbose=False)
>>>
>>> # Basic analysis without selectivity (just component importance)
>>> org_basic = get_functional_organization(exp, 'pca')
>>> print(f"Number of components: {org_basic['n_components']}")
Number of components: 5
>>> print(f"Component importance shape: {org_basic['component_importance'].shape}")
Component importance shape: (5,)
>>> print(f"Neurons used: {org_basic['n_neurons_used']}")
Neurons used: 20
Advanced usage with selectivity analysis (intensive computation):
>>> # The following example shows how to use with selectivity analysis
>>> # Note: This requires intensive computation and is skipped in doctests
>>> from driada.intense import compute_embedding_selectivity # doctest: +SKIP
>>>
>>> # For real analysis, use longer experiments:
>>> exp_long = generate_circular_manifold_exp( # doctest: +SKIP
... n_neurons=50, duration=300, verbose=False)
>>>
>>> # Compute PCA embedding
>>> exp_long.create_embedding('pca', n_components=5, verbose=False) # doctest: +SKIP
>>>
>>> # Compute selectivity with full parameters
>>> results = compute_embedding_selectivity( # doctest: +SKIP
... exp_long,
... embedding_methods='pca',
... n_shuffles_stage1=100,
... n_shuffles_stage2=500,
... mode='two_stage',
... verbose=False
... )
>>>
>>> # Extract the IntenseResults object
>>> intense_res = results['pca']['intense_results'] # doctest: +SKIP
>>>
>>> # Full functional organization analysis with selectivity
>>> org_full = get_functional_organization( # doctest: +SKIP
... exp_long, 'pca', intense_results=intense_res)
>>>
>>> # Access detailed selectivity information
>>> print(f"Participating neurons: {org_full['n_participating_neurons']}") # doctest: +SKIP
>>> print(f"Mean components per neuron: {org_full['mean_components_per_neuron']:.2f}") # doctest: +SKIP
>>> print(f"Number of functional clusters: {len(org_full['functional_clusters'])}") # doctest: +SKIP
See Also
--------
~driada.integration.manifold_analysis.compare_embeddings : Compare multiple embedding methods
~driada.intense.pipelines.compute_embedding_selectivity : Compute selectivity for embeddings"""
# Get embedding and metadata
embedding_dict = experiment.get_embedding(method_name, data_type)
embedding = embedding_dict["data"]
metadata = embedding_dict.get("metadata", {})
neuron_indices = metadata.get("neuron_indices", list(range(experiment.n_cells)))
# Compute component importance (variance explained)
component_var = np.var(embedding, axis=0)
var_sum = np.sum(component_var)
if var_sum == 0:
raise ValueError("All embedding components have zero variance")
component_importance = component_var / var_sum
organization = {
"component_importance": component_importance,
"n_components": embedding.shape[1],
"n_neurons_used": len(neuron_indices),
"neuron_indices": neuron_indices,
}
# Check if we have selectivity results
if intense_results is not None:
# Validate it's an actual IntenseResults object
from ..intense.intense_base import IntenseResults
if not isinstance(intense_results, IntenseResults):
raise TypeError(
f"intense_results must be an IntenseResults object, got {type(intense_results).__name__}"
)
if not hasattr(intense_results, "significance"):
raise ValueError("intense_results must have 'significance' attribute")
significance_data = intense_results.significance
if significance_data is None:
# No significance data available
return organization
# Analyze neuron participation across components
neuron_participation = {}
component_specialization = {}
for comp_idx in range(embedding.shape[1]):
feat_name = f"{method_name}_comp{comp_idx}"
selective_neurons = []
# Check which neurons are selective to this component
# Significance structure: significance[neuron_id][feat_name]
for neuron_idx in neuron_indices:
if neuron_idx in significance_data:
neuron_sig = significance_data[neuron_idx]
if feat_name in neuron_sig and neuron_sig[feat_name].get("stage2", False):
selective_neurons.append(neuron_idx)
# Track neuron participation
if neuron_idx not in neuron_participation:
neuron_participation[neuron_idx] = []
neuron_participation[neuron_idx].append(comp_idx)
component_specialization[comp_idx] = {
"n_selective_neurons": len(selective_neurons),
"selective_neurons": selective_neurons,
"selectivity_rate": (
len(selective_neurons) / len(neuron_indices) if len(neuron_indices) > 0 else 0
),
}
# Identify functional clusters (neurons selective to same components)
cluster_map = defaultdict(list)
for neuron_idx, components in neuron_participation.items():
cluster_key = tuple(sorted(components))
cluster_map[cluster_key].append(neuron_idx)
functional_clusters = []
for components, neurons in cluster_map.items():
if len(neurons) > 1: # Only keep clusters with multiple neurons
functional_clusters.append(
{
"components": list(components),
"neurons": neurons,
"size": len(neurons),
}
)
# Sort clusters by size
functional_clusters.sort(key=lambda x: x["size"], reverse=True)
organization.update(
{
"neuron_participation": neuron_participation,
"component_specialization": component_specialization,
"functional_clusters": functional_clusters,
"n_participating_neurons": len(neuron_participation),
"mean_components_per_neuron": (
np.mean([len(comps) for comps in neuron_participation.values()])
if neuron_participation
else 0
),
}
)
else:
# No intense_results provided - return only basic statistics
pass
return organization
[docs]
def compare_embeddings(
experiment: "Experiment",
method_names: List[str],
data_type: str = "calcium",
intense_results_dict: Optional[Dict[str, "IntenseResults"]] = None,
) -> Dict:
"""
Compare functional organization across different embedding methods.
Analyzes and compares how different dimensionality reduction methods
organize the neural population, including neuron participation overlap
and clustering patterns.
Parameters
----------
experiment : Experiment
Experiment object with stored embeddings.
method_names : list of str
List of embedding method names to compare.
data_type : str, optional
Data type used for embeddings ('calcium' or 'spikes'). Default is 'calcium'.
intense_results_dict : dict, optional
Dict mapping method names to IntenseResults objects (not the full
compute_embedding_selectivity output). Each value must be an instance
of driada.intense.intense_base.IntenseResults. If not provided, only
basic comparison metrics will be returned.
Returns
-------
dict
Comparison metrics including:
- 'methods': List of valid methods analyzed
- 'n_components': Number of components per method
- 'n_participating_neurons': Neurons with selectivity per method
- 'mean_components_per_neuron': Average participation per method
- 'n_functional_clusters': Number of clusters per method
- 'participation_overlap': Pairwise overlap between methods
Raises
------
ValueError
If no valid embeddings found to compare or method_names is empty.
TypeError
If method_names is not a list or intense_results_dict contains
non-IntenseResults values.
Notes
-----
This function logs warnings when embeddings are not found for requested methods.
When only one valid embedding is found, it returns statistics for that single
embedding without computing overlaps.
Examples
--------
Basic comparison without selectivity:
>>> # Create a simple synthetic experiment for comparison
>>> import numpy as np
>>> from driada.experiment.synthetic import generate_circular_manifold_exp
>>> np.random.seed(42)
>>>
>>> # Generate experiment with 20 neurons, 500 frames
>>> exp = generate_circular_manifold_exp(
... n_neurons=20,
... duration=50, # 50 seconds at 10 fps = 500 frames
... fps=10.0,
... kappa=4.0, # Moderate tuning width
... verbose=False
... )
>>>
>>> # Create PCA embedding
>>> pca_embedding = exp.create_embedding('pca', n_components=3, verbose=False)
>>>
>>> # Create t-SNE embedding
>>> tsne_embedding = exp.create_embedding('tsne', n_components=3,
... perplexity=10, random_state=42)
>>>
>>> # Basic comparison without selectivity
>>> comparison_basic = compare_embeddings(exp, ['pca', 'tsne'])
>>> print(f"Methods compared: {sorted(comparison_basic['methods'])}")
Methods compared: ['pca', 'tsne']
>>> print(f"Components: PCA={comparison_basic['n_components']['pca']}, "
... f"t-SNE={comparison_basic['n_components']['tsne']}")
Components: PCA=3, t-SNE=3
>>> print(f"Mean components per neuron (no selectivity): "
... f"PCA={comparison_basic['mean_components_per_neuron']['pca']}, "
... f"t-SNE={comparison_basic['mean_components_per_neuron']['tsne']}")
Mean components per neuron (no selectivity): PCA=0, t-SNE=0
Advanced comparison with selectivity analysis:
>>> # The following example shows comparison with selectivity analysis
>>> # Note: This requires intensive computation and is skipped in doctests
>>> from driada.intense import compute_embedding_selectivity # doctest: +SKIP
>>>
>>> # Compute selectivity for both methods
>>> results = compute_embedding_selectivity( # doctest: +SKIP
... exp,
... embedding_methods=['pca', 'tsne'],
... n_shuffles_stage1=100,
... n_shuffles_stage2=500,
... mode='two_stage',
... verbose=False
... )
>>>
>>> # Extract IntenseResults objects (not the full dict)
>>> intense_dict = { # doctest: +SKIP
... method: results[method]['intense_results']
... for method in ['pca', 'tsne']
... }
>>>
>>> # Full comparison with selectivity
>>> comparison_full = compare_embeddings( # doctest: +SKIP
... exp, ['pca', 'tsne'],
... intense_results_dict=intense_dict
... )
>>>
>>> # Access detailed comparison metrics
>>> print(f"Participating neurons: PCA={comparison_full['n_participating_neurons']['pca']}, " # doctest: +SKIP
... f"t-SNE={comparison_full['n_participating_neurons']['tsne']}")
>>> print(f"Participation overlap: {comparison_full['participation_overlap']['pca_vs_tsne']:.2f}") # doctest: +SKIP
Error handling:
>>> # Test error handling
>>> try:
... compare_embeddings(exp, []) # Empty list
... except ValueError as e:
... print(f"Error: {str(e)}")
Error: method_names cannot be empty
>>>
>>> # Test with non-existent embedding (requesting a method not computed)
>>> comparison_missing = compare_embeddings(exp, ['pca', 'umap']) # umap not computed
>>> print(f"Valid methods found: {sorted(comparison_missing['methods'])}")
Valid methods found: ['pca']
See Also
--------
~driada.integration.manifold_analysis.get_functional_organization : Analyze individual embeddings
~driada.intense.pipelines.compute_embedding_selectivity : Compute selectivity for embeddings"""
if not isinstance(method_names, list):
raise TypeError("method_names must be a list")
if len(method_names) == 0:
raise ValueError("method_names cannot be empty")
organizations = {}
logger = logging.getLogger(__name__)
# Validate intense_results_dict if provided
if intense_results_dict:
from ..intense.intense_base import IntenseResults
for method, intense_res in intense_results_dict.items():
if not isinstance(intense_res, IntenseResults):
raise TypeError(
f"intense_results_dict['{method}'] must be an IntenseResults object, "
f"got {type(intense_res).__name__}"
)
for method in method_names:
try:
# Get IntenseResults for this method if available
intense_res = None
if intense_results_dict and method in intense_results_dict:
intense_res = intense_results_dict[method]
organizations[method] = get_functional_organization(
experiment, method, data_type, intense_results=intense_res
)
except KeyError:
logger.warning(f"No embedding found for method '{method}'")
if len(organizations) == 0:
raise ValueError("No valid embeddings found to compare")
if len(organizations) == 1:
# Special case: only one embedding exists
logger.info("Only one embedding found, returning individual statistics")
comparison = {
"methods": list(organizations.keys()),
"n_components": {m: org["n_components"] for m, org in organizations.items()},
"n_participating_neurons": {
m: org.get("n_participating_neurons", 0) for m, org in organizations.items()
},
"mean_components_per_neuron": {
m: org.get("mean_components_per_neuron", 0) for m, org in organizations.items()
},
"n_functional_clusters": {
m: len(org.get("functional_clusters", [])) for m, org in organizations.items()
},
}
# Compare neuron participation overlap (only if we have multiple methods)
if len(organizations) > 1 and all(
"neuron_participation" in org for org in organizations.values()
):
from itertools import combinations
method_pairs = list(combinations(organizations.keys(), 2))
participation_overlap = {}
for m1, m2 in method_pairs:
neurons1 = set(organizations[m1]["neuron_participation"].keys())
neurons2 = set(organizations[m2]["neuron_participation"].keys())
if neurons1 or neurons2:
overlap = len(neurons1 & neurons2) / len(neurons1 | neurons2)
else:
overlap = 0
participation_overlap[f"{m1}_vs_{m2}"] = overlap
comparison["participation_overlap"] = participation_overlap
return comparison