Source code for driada.dim_reduction.graph

import pynndescent
import scipy.sparse as sp
import numpy as np
import matplotlib.pyplot as plt

from sklearn.neighbors import kneighbors_graph
from sklearn.utils.validation import check_symmetric
from scipy.sparse.csgraph import shortest_path
from umap.umap_ import fuzzy_simplicial_set
from pynndescent.distances import named_distances
from .dr_base import m_param_filter, g_param_filter

from ..network.net_base import Network



[docs]
class ProximityGraph(Network):
    """Proximity graph for manifold learning and dimensionality reduction.

    Constructs a graph where nodes are data points and edges connect nearby points,
    capturing the local geometry of the underlying manifold. Supports multiple
    graph construction methods including k-NN, UMAP fuzzy topology, and epsilon-ball.

    The graph can be used for manifold learning algorithms, intrinsic dimension
    estimation, and as input to spectral dimensionality reduction methods.

    Parameters
    ----------
    d : ndarray
        Data matrix of shape (n_features, n_samples). Each column is a data point.
    m_params : dict
        Metric parameters dictionary. Required key:

        - 'metric_name' : str or callable
          Distance metric name from pynndescent.distances.named_distances
          ('euclidean', 'cosine', 'manhattan', etc.), 'hyperbolic',
          or a callable custom distance function.

        Optional keys (filtered by m_param_filter):

        - 'sigma' : float, bandwidth for heat kernel affinity transformation
        - 'p' : float, parameter for minkowski metric
        - Additional metric-specific parameters passed to distance function
    g_params : dict
        Graph construction parameters. Required key:

        - 'g_method_name' : str, graph construction method.
          Options: 'knn', 'umap', 'auto_knn', 'eps'.

        Method-specific keys (filtered by g_param_filter):

        - 'nn' : int, number of nearest neighbors (for knn/umap/auto_knn)
        - 'eps' : float, epsilon radius (for eps method)
        - 'min_density' : float, minimum graph density threshold (for eps method)

        General optional keys:

        - 'weighted' : bool, whether to create weighted edges
        - 'dist_to_aff' : str, distance to affinity conversion ('hk' for heat kernel)
        - 'max_deleted_nodes' : float, maximum fraction of nodes that can be
          deleted during giant component extraction (raises exception if exceeded)
        - 'graph_preprocessing' : str, preprocessing method (default: 'giant_cc')
        - 'seed' : int, random seed for reproducibility (default: 42)
    create_nx_graph : bool, default=False
        Whether to create NetworkX graph representation (passed to Network parent).
    verbose : bool, default=False
        Whether to print progress messages.

    Attributes
    ----------
    data : ndarray
        The input data matrix (n_features, n_samples).
    metric : str
        The distance metric name extracted from m_params.
    metric_args : dict
        Filtered metric parameters excluding 'metric_name' and 'sigma'.
    all_metric_params : dict
        All filtered metric parameters from m_param_filter.
    adj : sparse matrix
        Weighted adjacency matrix. For weighted graphs with dist_to_aff='hk',
        contains affinities exp(-d²/(sigma*mean_squared_dist)). For unweighted
        graphs, same as bin_adj.
    bin_adj : sparse matrix
        Binary adjacency matrix indicating connections.
    neigh_distmat : sparse matrix
        Sparse matrix of distances between connected neighbors. For unweighted
        graphs or methods without distance computation, contains zeros in sparse format.
    knn_indices : ndarray or None
        k-NN indices array of shape (n_nodes, k+1) including self (only for 'knn' method).
    knn_distances : ndarray or None
        k-NN distances array of shape (n_nodes, k+1) including self (only for 'knn' method).
    lost_nodes : set
        Set of node indices removed during giant component preprocessing. Empty set
        if no preprocessing or no nodes lost.
    intrinsic_dimensions : dict
        Cached intrinsic dimension estimates. Keys are method names with parameters.

    Plus all attributes from g_params set via setattr.

    Methods
    -------
    construct_adjacency()
        Dynamically call graph construction method based on g_method_name.
    distances_to_affinities()
        Convert neigh_distmat to affinity weights using heat kernel (only 'hk' implemented).
    create_knn_graph_()
        Create k-NN graph using pynndescent. Saves knn_indices and knn_distances.
    create_umap_graph_()
        Create graph using UMAP's fuzzy_simplicial_set. Sets knn arrays to None.
    create_auto_knn_graph_()
        Create unweighted k-NN graph using sklearn. Sets knn arrays to None.
    create_eps_graph_()
        Create epsilon-ball graph. Checks density against min_density. Sets knn arrays to None.
    get_int_dim(method='geodesic', force_recompute=False, logger=None, **kwargs)
        Estimate intrinsic dimension. Methods: 'geodesic' (uses neigh_distmat or adj),
        'nn' (requires saved knn data from 'knn' method). Results are cached.
    scaling()
        Compute diagonal sums of adjacency matrix powers for graph scaling analysis.
    _checkpoint()
        Verify graph sparsity and symmetry. Called after construction.

    Raises
    ------
    Exception
        If more than max_deleted_nodes fraction of nodes are lost during preprocessing.
    Exception
        If adjacency matrix is not sparse or not symmetric.
    ValueError
        If unknown metric or graph method specified.

    Notes
    -----
    - Inherits from Network class, gaining spectral analysis capabilities
    - All graphs are enforced to be symmetric/undirected via check_symmetric
    - Giant connected component extraction is default preprocessing
    - For weighted graphs with 'hk', distances are converted to similarities
    - Node indices are remapped after giant component extraction

    Examples
    --------
    >>> import numpy as np
    >>> from driada.dim_reduction.graph import ProximityGraph
    >>> # Generate sample data
    >>> np.random.seed(42)  # For reproducible results
    >>> data = np.random.randn(3, 100)  # 100 points in 3D
    >>> # Define metric parameters
    >>> m_params = {'metric_name': 'euclidean', 'sigma': 1.0}
    >>> # Define graph parameters for k-NN graph
    >>> g_params = {
    ...     'g_method_name': 'knn',
    ...     'nn': 15,
    ...     'weighted': True,
    ...     'dist_to_aff': 'hk',
    ...     'max_deleted_nodes': 0.1
    ... }
    >>> # Create proximity graph
    >>> graph = ProximityGraph(data, m_params, g_params)
    >>> edges = graph.adj.nnz//2
    >>> print(f"Graph has {graph.n} nodes and {edges} edges")  # doctest: +ELLIPSIS
    Graph has 100 nodes and ... edges
    """


[docs]
    def __init__(self, d, m_params, g_params, create_nx_graph=False, verbose=False):
        """Initialize proximity graph from data.

        Parameters
        ----------
        d : ndarray of shape (n_features, n_samples)
            Data matrix where each column is a sample.
        m_params : dict
            Metric parameters. Must contain 'metric_name'.
        g_params : dict
            Graph parameters. Must contain 'g_method_name'.
        create_nx_graph : bool, default=False
            Whether to create NetworkX representation.
        verbose : bool, default=False
            Whether to print progress messages.

        Raises
        ------
        ValueError
            If data is not 2D or is empty.
            If required parameters are missing.

        Notes
        -----
        lost_nodes attribute is always set (even if empty).
        Data shape assumes (features, samples) format."""
        # Validate input data
        d = np.asarray(d)
        if d.ndim != 2:
            raise ValueError(f"Data must be 2D array, got {d.ndim}D")
        if d.size == 0:
            raise ValueError("Data array is empty")

        self.all_metric_params = m_param_filter(m_params)
        self.metric = m_params["metric_name"]
        self.metric_args = {
            key: self.all_metric_params[key]
            for key in self.all_metric_params.keys()
            if key not in ["metric_name", "sigma"]
        }

        all_params = g_param_filter(g_params)

        # Safe attribute setting - only set allowed attributes
        allowed_attrs = {
            "g_method_name",
            "nn",
            "eps",
            "min_density",
            "perplexity",
            "max_deleted_nodes",
            "weighted",
            "dist_to_aff",
            "graph_preprocessing",
            "seed",
            "knn_engine",
            "symmetrization",
        }
        for key in all_params:
            if key in allowed_attrs:
                setattr(self, key, g_params[key])
            else:
                if self.verbose:
                    print(f"Warning: Ignoring unknown parameter '{key}'")

        # Set default for max_deleted_nodes if not provided
        if not hasattr(self, "max_deleted_nodes"):
            self.max_deleted_nodes = 0.5

        self.data = d
        self.verbose = verbose

        self.construct_adjacency()
        # Get graph preprocessing parameter — respect explicit None
        graph_preprocessing = all_params["graph_preprocessing"] if "graph_preprocessing" in all_params else None
        super(ProximityGraph, self).__init__(
            adj=self.adj,
            preprocessing=graph_preprocessing,
            create_nx_graph=create_nx_graph,
            directed=False,
            weighted=all_params["weighted"],
        )

        node_mapping = self._init_to_final_node_mapping
        original_n = self.data.shape[1]  # Data is (features, samples)
        lost_nodes = set(range(original_n)) - set(node_mapping.values())
        self.lost_nodes = lost_nodes  # Always set the attribute
        if len(lost_nodes) > 0:
            if self.verbose:
                print(f"{len(lost_nodes)} nodes lost after giant component creation!")

            if len(lost_nodes) >= self.max_deleted_nodes * original_n:
                raise Exception(
                    f"more than {self.max_deleted_nodes * 100} % of nodes discarded during gc creation!"
                )
            else:
                connected = [i for i in range(original_n) if i not in self.lost_nodes]
                self.bin_adj = self.bin_adj[connected, :].tocsc()[:, connected].tocsr()
                self.neigh_distmat = self.neigh_distmat[connected, :].tocsc()[:, connected].tocsr()

                # Update k-NN arrays if they exist
                if hasattr(self, "knn_indices") and self.knn_indices is not None:
                    self.knn_indices = self.knn_indices[connected]
                if hasattr(self, "knn_distances") and self.knn_distances is not None:
                    self.knn_distances = self.knn_distances[connected]

        self._checkpoint()


        # arr = np.array(range(self.n)).reshape(-1, 1)
        # self.timediff = cdist(arr, arr, 'cityblock')
        # self.norm_timediff = self.timediff / (self.n / 3)


[docs]
    def distances_to_affinities(self):
        """Convert distance matrix to affinity matrix.

        Transforms distances between neighbors into similarity weights.
        The transformation method is specified by self.dist_to_aff parameter.

        Currently implemented methods:

        - 'hk': Heat kernel with adaptive bandwidth:
          ``w_ij = exp(-d_ij^2 / (sigma * mean_squared_distance))``

        Raises
        ------
        RuntimeError
            If no distance matrix is available or graph is not weighted.
        ValueError
            If sigma is not positive.

        Notes
        -----
        Only applies to weighted graphs. The resulting affinity matrix
        is symmetrized to ensure undirected graph structure.
        Mean distance uses only non-zero entries."""
        if self.neigh_distmat is None:
            raise RuntimeError("distances between nearest neighbors not available")

        if not self.weighted:
            raise RuntimeError("no need to construct affinities for binary graph weights")

        if self.dist_to_aff == "hk":
            sigma = self.all_metric_params.get("sigma", 1.0)
            if sigma <= 0:
                raise ValueError(f"Sigma must be positive, got {sigma}")

            self.adj = self.neigh_distmat.copy()
            sqdist_matrix = self.neigh_distmat.multiply(self.neigh_distmat)
            mean_sqdist = sqdist_matrix.sum() / sqdist_matrix.nnz

            if mean_sqdist == 0:
                raise ValueError("All distances are zero - cannot compute meaningful affinities")

            self.adj.data = np.exp(-sqdist_matrix.data / (sigma * mean_sqdist))

            # Ensure symmetry after transformation
            self.adj = (self.adj + self.adj.T) / 2.0



[docs]
    def construct_adjacency(self):
        """Construct the adjacency matrix using the specified graph method.

        Dynamically calls the appropriate graph construction method based on
        self.g_method_name (e.g., 'knn', 'umap', 'auto_knn', 'eps').

        Raises
        ------
        ValueError
            If g_method_name contains invalid characters.
        AttributeError
            If the specified graph construction method doesn't exist."""
        # Validate method name
        if not self.g_method_name.replace("_", "").isalnum():
            raise ValueError(f"Invalid g_method_name: {self.g_method_name}")

        method_name = "create_" + self.g_method_name + "_graph_"
        if not hasattr(self, method_name):
            raise AttributeError(f"Unknown graph construction method: {self.g_method_name}")

        construct_fn = getattr(self, method_name)
        construct_fn()



[docs]
    def _checkpoint(self):
        """Verify graph construction integrity.

        Checks that adjacency matrices are sparse and symmetric.
        Called after graph construction to ensure valid graph structure.

        Raises
        ------
        RuntimeError
            If adjacency is not sparse or not symmetric.

        Notes
        -----
        Validates that matrices are symmetric within tolerance."""
        if self.adj is not None:
            if not sp.issparse(self.adj):
                # check for sparsity violation
                raise RuntimeError("Adjacency matrix is not sparse!")

            # Check symmetry - this will raise if not symmetric
            try:
                check_symmetric(self.adj, raise_exception=True)
            except Exception as e:
                raise RuntimeError(f"Adjacency matrix not symmetric: {e}")

            if hasattr(self, "bin_adj") and self.bin_adj is not None:
                try:
                    check_symmetric(self.bin_adj, raise_exception=True)
                except Exception as e:
                    raise RuntimeError(f"Binary adjacency not symmetric: {e}")

            if hasattr(self, "neigh_distmat") and self.neigh_distmat is not None:
                try:
                    check_symmetric(self.neigh_distmat, raise_exception=True)
                except Exception as e:
                    raise RuntimeError(f"Neighbor distances not symmetric: {e}")

            if self.verbose:
                print("Adjacency symmetry confirmed")

        else:
            raise RuntimeError(
                "Adjacency matrix is not constructed, checkpoint routines are unavailable"
            )



[docs]
    def create_umap_graph_(self):
        """Create graph using UMAP's fuzzy simplicial set construction.

        Uses UMAP's algorithm to build a fuzzy topological representation
        that captures both local and global structure of the data manifold.

        Notes
        -----
        The resulting graph has weighted edges representing fuzzy set membership.
        Sets self.adj (weighted), self.bin_adj (binary), and self.neigh_distmat.
        Uses seed from g_params if provided, otherwise defaults to 42 for reproducibility.
        Data is transposed to match UMAP's expected (n_samples, n_features) format."""
        seed = getattr(self, "seed", 42)
        RAND = np.random.RandomState(seed)
        adj, _, _, dists = fuzzy_simplicial_set(
            self.data.T,
            self.nn,
            metric=self.metric,
            metric_kwds=self.metric_args,
            random_state=RAND,
            return_dists=True,
        )

        self.adj = sp.csr_matrix(adj)
        self.bin_adj = (self.adj > 0).astype(int)
        self.neigh_distmat = (
            sp.csr_matrix(dists) if dists is not None else sp.csr_matrix(self.adj.shape)
        )

        # Initialize k-NN attributes to None (not directly available for UMAP)
        self.knn_indices = None
        self.knn_distances = None



[docs]
    def create_knn_graph_(self):
        """Create k-nearest neighbors graph.

        Constructs a symmetric k-NN graph where each point is connected to its
        k nearest neighbors. Supports two engines:

        - ``'pynndescent'`` (default): approximate NN, supports custom metrics
        - ``'cKDTree'``: exact NN via scipy, faster for moderate dimensions

        Symmetrization can be ``'intersection'`` (default, ``.minimum()``) or
        ``'union'`` (``.maximum()``).

        Raises
        ------
        ValueError
            If nn exceeds number of samples minus 1.
            If metric is unknown or invalid.
            If knn_engine is unknown."""
        N = self.data.shape[1]
        if self.nn >= N:
            raise ValueError(f"nn ({self.nn}) must be less than number of samples ({N})")

        engine = getattr(self, 'knn_engine', 'pynndescent')
        sym = getattr(self, 'symmetrization', 'intersection')

        if engine == 'cKDTree':
            self._create_knn_cKDTree(N)
        elif engine == 'pynndescent':
            self._create_knn_pynndescent(N)
        else:
            raise ValueError(
                f"Unknown knn_engine '{engine}'. Choose 'pynndescent' or 'cKDTree'."
            )

        # Symmetrize
        if sym == 'union':
            self.neigh_distmat = self.neigh_distmat.maximum(self.neigh_distmat.T)
        else:
            # 'intersection' — default, same as sklearn
            self.neigh_distmat = self.neigh_distmat.minimum(self.neigh_distmat.T)

        # Create binary adjacency from distance matrix
        self.bin_adj = (self.neigh_distmat > 0).astype(int)

        if self.weighted:
            self.distances_to_affinities()
        else:
            self.adj = self.bin_adj.copy()


    def _create_knn_pynndescent(self, N):
        """Build k-NN graph using pynndescent approximate nearest neighbors."""
        if callable(self.metric):
            curr_metric = self.metric
        elif self.metric in named_distances:
            curr_metric = self.metric
        else:
            raise ValueError(
                f"Unknown metric '{self.metric}'. Must be one of "
                f"{list(named_distances.keys())} or a callable function."
            )

        index = pynndescent.NNDescent(
            self.data.T,
            metric=curr_metric,
            metric_kwds=self.metric_args,
            n_neighbors=self.nn + 1,
            diversify_prob=1.0,
            pruning_degree_multiplier=1.5,
        )

        neighs, dists = index.neighbor_graph

        self.knn_indices = neighs
        self.knn_distances = dists

        neigh_cols = neighs[:, 1:].flatten()
        dist_vals = dists[:, 1:].flatten()
        neigh_rows = np.repeat(np.arange(N), self.nn)

        self.neigh_distmat = sp.csr_matrix(
            (dist_vals, (neigh_rows, neigh_cols)), shape=(N, N)
        )

    def _create_knn_cKDTree(self, N):
        """Build k-NN graph using scipy cKDTree exact nearest neighbors."""
        from scipy.spatial import cKDTree

        tree = cKDTree(self.data.T)
        dists, neighs = tree.query(self.data.T, k=self.nn + 1, workers=1)

        self.knn_indices = neighs
        self.knn_distances = dists

        neigh_cols = neighs[:, 1:].flatten()
        dist_vals = dists[:, 1:].flatten()
        neigh_rows = np.repeat(np.arange(N), self.nn)

        self.neigh_distmat = sp.csr_matrix(
            (dist_vals, (neigh_rows, neigh_cols)), shape=(N, N)
        )


[docs]
    def create_auto_knn_graph_(self):
        """Create k-NN graph using scikit-learn's implementation.

        A simpler alternative to pynndescent that uses sklearn's
        kneighbors_graph. Creates an unweighted, symmetric graph.

        Raises
        ------
        ValueError
            If nn exceeds number of samples minus 1.

        Notes
        -----
        - Always creates unweighted graphs (connectivity mode)
        - Uses 'auto' algorithm selection in sklearn
        - Sets diagonal to 0 to exclude self-connections
        - Does not store knn_indices or knn_distances
        - Data is transposed to match sklearn expected format
        - Symmetrizes by A = A + A.T"""
        N = self.data.shape[1]
        if self.nn >= N:
            raise ValueError(f"nn ({self.nn}) must be less than number of samples ({N})")
        A = kneighbors_graph(self.data.T, self.nn, mode="connectivity", include_self=False)
        A.setdiag(0)
        A = A.astype(bool, casting="unsafe", copy=True)
        A = A + A.T
        self.adj = A
        self.bin_adj = A.copy()
        self.neigh_distmat = sp.csr_matrix(A.shape)

        # Initialize k-NN attributes to None (not available for this method)
        self.knn_indices = None
        self.knn_distances = None



[docs]
    def create_eps_graph_(self):
        """Create epsilon-ball graph where edges connect points within distance eps.

        Constructs a graph by connecting all pairs of points whose distance is
        less than or equal to the epsilon threshold. Uses sklearn's
        radius_neighbors_graph for efficient computation.

        The resulting graph density is checked against self.min_density to ensure
        sufficient connectivity. Issues a warning if density exceeds 0.5.

        Raises
        ------
        ValueError
            If eps is not positive.
            If min_density is not in (0, 1].
            If graph density is below self.min_density threshold.

        Notes
        -----
        Sets self.adj (weighted or binary), self.bin_adj (binary), and
        self.neigh_distmat (distances for weighted graphs, zero matrix otherwise).
        For weighted graphs, distances are converted to affinities.
        Does not store knn_indices or knn_distances.
        Data is transposed to match sklearn expected format."""
        if self.eps <= 0:
            raise ValueError(f"eps must be positive, got {self.eps}")
        if not 0 < self.min_density <= 1:
            raise ValueError(f"min_density must be in (0, 1], got {self.min_density}")
        from sklearn.neighbors import radius_neighbors_graph

        # Use radius_neighbors_graph to create epsilon-ball graph
        # eps is the maximum distance between points
        # mode='connectivity' gives binary adjacency matrix
        # include_self=False excludes self-loops
        A = radius_neighbors_graph(
            self.data.T,
            self.eps,
            mode="connectivity",
            metric=self.metric,
            metric_params=self.metric_args,
            include_self=False,
        )

        # Ensure symmetry
        A = A + A.T
        A = A.astype(bool, casting="unsafe", copy=True)

        # Check if graph is too sparse or too dense
        nnz_ratio = A.nnz / (self.data.shape[1] * (self.data.shape[1] - 1))
        if nnz_ratio < self.min_density:
            raise ValueError(
                f"Epsilon graph too sparse (density={nnz_ratio:.4f} < min_density={self.min_density}). "
                f"Consider increasing eps parameter."
            )
        if nnz_ratio > 0.5:
            if self.verbose:
                print(
                    f"WARNING: Epsilon graph is dense (density={nnz_ratio:.4f}). "
                    f"Consider decreasing eps parameter."
                )

        self.adj = A
        self.bin_adj = A.copy()

        # For weighted graphs, compute distance matrix
        if self.weighted:
            # Get distance matrix for connected pairs
            D = radius_neighbors_graph(
                self.data.T,
                self.eps,
                mode="distance",
                metric=self.metric,
                metric_params=self.metric_args,
                include_self=False,
            )
            D = D + D.T
            self.neigh_distmat = D
            self.distances_to_affinities()
        else:
            # For unweighted graphs, create sparse zero matrix for distances
            self.neigh_distmat = sp.csr_matrix(A.shape)

        # Initialize k-NN attributes to None (not available for epsilon-ball graphs)
        self.knn_indices = None
        self.knn_distances = None



[docs]
    def scaling(self):
        """Compute normalized diagonal sums of the binary adjacency matrix.

        Analyzes graph structure by computing the average connectivity at different
        node separations. For each diagonal offset i, computes the fraction of
        possible edges that exist between nodes separated by i positions.

        Returns
        -------
        list of float
            List of length n-1 where element i is the average value along the
            i-th super-diagonal of the binary adjacency matrix, normalized by
            the diagonal length (n-i).

        Notes
        -----
        This provides a simple graph structure summary. The first elements
        indicate local connectivity while later elements show longer-range
        connections. Regular lattices show characteristic patterns.
        Uses sparse matrix operations to avoid memory issues with large graphs."""
        # Work with sparse binary adjacency matrix
        mat = self.bin_adj.astype(bool)
        diagsums = []

        # Extract diagonal sums using sparse operations
        for i in range(self.n - 1):
            # Get indices for the i-th superdiagonal
            diag_length = self.n - i
            row_indices = np.arange(diag_length)
            col_indices = row_indices + i

            # Sum the values along this diagonal
            diag_sum = 0
            for j in range(diag_length):
                if mat[row_indices[j], col_indices[j]]:
                    diag_sum += 1

            diagsums.append(diag_sum / diag_length)

        return diagsums



[docs]
    def get_int_dim(self, method="geodesic", force_recompute=False, logger=None, **kwargs):
        """Estimate intrinsic dimension using graph-based methods.

        This method estimates the intrinsic dimensionality using either
        geodesic distances on the graph or k-NN method with precomputed
        k-NN information. Results are cached to avoid recomputation.

        Parameters
        ----------
        method : {'geodesic', 'nn'}, default='geodesic'
            The method to use for intrinsic dimension estimation:
            - 'geodesic': Uses geodesic distances (shortest paths) on the graph
            - 'nn': k-nearest neighbor based estimation using saved k-NN data

        force_recompute : bool, default=False
            If True, recompute the dimension even if it has been computed before.
            If False, return cached result if available.

        logger : logging.Logger, optional
            Logger instance for logging messages. If None, creates a default logger.

        **kwargs
            Additional parameters passed to the dimension estimation method:
            - For 'geodesic': mode ('full'/'fast'), factor (subsampling factor)

        Returns
        -------
        dimension : float
            The estimated intrinsic dimension of the dataset/manifold.

        Raises
        ------
        ValueError
            If an unknown method is specified or required data is not available.

        Examples
        --------
        >>> import numpy as np
        >>> from driada.dim_reduction.graph import ProximityGraph
        >>> # Generate Swiss roll data
        >>> from sklearn.datasets import make_swiss_roll
        >>> np.random.seed(42)  # For reproducible results
        >>> data, _ = make_swiss_roll(n_samples=500, random_state=42)
        >>> # Create proximity graph
        >>> m_params = {'metric_name': 'euclidean', 'sigma': 1.0}
        >>> g_params = {'g_method_name': 'knn', 'nn': 15, 'weighted': True,
        ...             'dist_to_aff': 'hk', 'max_deleted_nodes': 0.5}
        >>> graph = ProximityGraph(data.T, m_params, g_params)
        >>> # Estimate dimension using geodesic method
        >>> dim_geo = graph.get_int_dim(method='geodesic')
        >>> # Get cached result (fast)
        >>> dim_geo_cached = graph.get_int_dim(method='geodesic')
        >>> # Force recomputation
        >>> dim_geo_new = graph.get_int_dim(method='geodesic', force_recompute=True)
        >>> # Or using nn method (only if k-NN graph was used)
        >>> dim_nn = graph.get_int_dim(method='nn')
        >>> # Access all computed dimensions
        >>> dims = sorted(graph.intrinsic_dimensions.keys())
        >>> print(dims)
        ['geodesic_full_f2', 'nn']
        """
        import logging
        from ..dimensionality import geodesic_dimension, nn_dimension

        # Setup logger
        if logger is None:
            logger = logging.getLogger(f"{self.__class__.__name__}.get_int_dim")

        # Initialize cache if not exists
        if not hasattr(self, "intrinsic_dimensions"):
            self.intrinsic_dimensions = {}

        # Validate method
        valid_methods = {"geodesic", "nn"}
        if method not in valid_methods:
            raise ValueError(f"Unknown method: {method}. Choose from {valid_methods}")

        # Create cache key for method with parameters
        cache_key = method
        if method == "geodesic":
            mode = kwargs.get("mode", "full")
            factor = kwargs.get("factor", 2)
            cache_key = f"{method}_{mode}_f{factor}"

        # Check cache unless force_recompute
        if not force_recompute and cache_key in self.intrinsic_dimensions:
            logger.info(
                f"Returning cached intrinsic dimension for {cache_key}: "
                f"{self.intrinsic_dimensions[cache_key]:.3f}"
            )
            return self.intrinsic_dimensions[cache_key]

        logger.info(f"Computing intrinsic dimension using {method} method")

        if method == "geodesic":
            # Extract parameters
            mode = kwargs.get("mode", "full")
            factor = kwargs.get("factor", 2)

            # Use the adjacency matrix with distances if available
            if hasattr(self, "neigh_distmat") and self.neigh_distmat is not None:
                # Use distance matrix if available
                graph_for_geodesic = self.neigh_distmat
            else:
                # Fall back to binary adjacency
                graph_for_geodesic = self.adj

            # Warn about weighted graphs with transformations
            if self.weighted and hasattr(self, "dist_to_aff") and self.dist_to_aff is not None:
                import warnings

                warnings.warn(
                    "Geodesic dimension estimation on weighted graphs with distance-to-affinity "
                    "transformations (e.g., heat kernel) may give different results than expected. "
                    "Consider using an unweighted graph or raw distances for more consistent results.",
                    RuntimeWarning,
                )

            logger.debug(
                f"Using graph with shape {graph_for_geodesic.shape}, "
                f"nnz={graph_for_geodesic.nnz}, mode={mode}, factor={factor}"
            )

            dimension = geodesic_dimension(graph=graph_for_geodesic, mode=mode, factor=factor)

        elif method == "nn":
            # Use k-NN method with saved k-NN data
            # Check if we have saved k-NN data
            if (
                hasattr(self, "knn_indices")
                and self.knn_indices is not None
                and hasattr(self, "knn_distances")
                and self.knn_distances is not None
            ):

                # Use the k value from graph construction
                k = self.nn if hasattr(self, "nn") else self.knn_indices.shape[1] - 1

                logger.debug(f"Using saved k-NN data with k={k}")

                # Use precomputed graph
                dimension = nn_dimension(
                    precomputed_graph=(self.knn_indices, self.knn_distances), k=k
                )
            else:
                raise ValueError(
                    f"nn method requires k-NN graph data which is not available. "
                    f"The graph was created with method '{self.g_method_name}' which "
                    f"does not provide k-NN neighbor information."
                )

        # Cache the result
        self.intrinsic_dimensions[cache_key] = dimension

        logger.info(f"Estimated intrinsic dimension: {dimension:.3f}")

        return dimension