Source code for driada.intense.correction

"""
Multiple comparison correction for INTENSE analysis.

Provides p-value threshold calculation for family-wise error rate (FWER) and
false discovery rate (FDR) control methods.
"""


[docs] def get_multicomp_correction_thr(fwer, mode="holm", **multicomp_kwargs) -> float: """ Calculate p-value threshold for multiple hypothesis correction. Parameters ---------- fwer : float Family-wise error rate or false discovery rate (e.g., 0.05). Must be between 0 and 1. mode : str or None, optional Multiple comparison correction method. Default: 'holm'. - None: No correction, threshold = fwer - 'bonferroni': Bonferroni correction (FWER control) - 'holm': Holm-Bonferroni correction (FWER control, more powerful) - 'fdr_bh': Benjamini-Hochberg FDR correction **multicomp_kwargs Additional arguments for correction method: - For 'bonferroni': nhyp (int) - number of hypotheses, must be > 0 - For 'holm': all_pvals (list) - all p-values to be tested - For 'fdr_bh': all_pvals (list) - all p-values to be tested Returns ------- threshold : float Adjusted p-value threshold for individual hypothesis testing. Returns 0 if no p-values pass the correction criteria (reject all). Raises ------ ValueError If fwer is not between 0 and 1. If required arguments are missing or invalid. If unknown method specified. If nhyp <= 0 for bonferroni method. If all_pvals is empty for holm or fdr_bh methods. Notes ----- - FWER methods (bonferroni, holm) control probability of ANY false positive - FDR methods control expected proportion of false positives among rejections - Holm is uniformly more powerful than Bonferroni - FDR typically allows more discoveries but with controlled false positive rate - If no p-values satisfy the correction criteria, threshold is set to 0 (reject all hypotheses) Examples -------- >>> # Compare different multiple comparison correction methods >>> pvals = [0.001, 0.01, 0.02, 0.03, 0.04] >>> >>> # No correction - uses raw threshold >>> thr_none = get_multicomp_correction_thr(0.05, mode=None) >>> thr_none 0.05 >>> >>> # Bonferroni correction - most conservative >>> thr_bonf = get_multicomp_correction_thr(0.05, mode='bonferroni', nhyp=5) >>> thr_bonf 0.01 >>> >>> # Holm correction - less conservative than Bonferroni >>> thr_holm = get_multicomp_correction_thr(0.05, mode='holm', all_pvals=pvals) >>> round(thr_holm, 4) 0.0125 >>> >>> # FDR correction - controls false discovery rate >>> thr_fdr = get_multicomp_correction_thr(0.05, mode='fdr_bh', all_pvals=pvals) >>> thr_fdr 0.04 >>> >>> # FDR is least conservative: bonf < holm < fdr < none >>> thr_bonf < thr_holm < thr_fdr < thr_none True""" # Validate fwer parameter if not 0 <= fwer <= 1: raise ValueError(f"fwer must be between 0 and 1, got {fwer}") if mode is None: threshold = fwer elif mode == "bonferroni": if "nhyp" not in multicomp_kwargs: raise ValueError("Number of hypotheses for Bonferroni correction not provided") nhyp = multicomp_kwargs["nhyp"] if nhyp <= 0: raise ValueError(f"Number of hypotheses must be positive, got {nhyp}") threshold = fwer / nhyp elif mode == "holm": if "all_pvals" not in multicomp_kwargs: raise ValueError("List of p-values for Holm correction not provided") all_pvals = multicomp_kwargs["all_pvals"] if len(all_pvals) == 0: raise ValueError("Empty p-value list provided for Holm correction") all_pvals = sorted(all_pvals) nhyp = len(all_pvals) threshold = 0 # Default if no discoveries (reject all) for i, pval in enumerate(all_pvals): cthr = fwer / (nhyp - i) if pval > cthr: break threshold = cthr elif mode == "fdr_bh": if "all_pvals" not in multicomp_kwargs: raise ValueError("List of p-values for FDR correction not provided") all_pvals = multicomp_kwargs["all_pvals"] if len(all_pvals) == 0: raise ValueError("Empty p-value list provided for FDR correction") all_pvals = sorted(all_pvals) nhyp = len(all_pvals) threshold = 0.0 # Default if no discoveries (reject all) # Benjamini-Hochberg procedure for i in range(nhyp - 1, -1, -1): if all_pvals[i] <= fwer * (i + 1) / nhyp: threshold = all_pvals[i] break else: raise ValueError("Unknown multiple comparisons correction method") return threshold