AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/stats/tests/test_multi.py
2024-10-02 22:15:59 +04:00

545 lines
24 KiB
Python

'''Tests for multipletests and fdr pvalue corrections
Author : Josef Perktold
['b', 's', 'sh', 'hs', 'h', 'fdr_i', 'fdr_n', 'fdr_tsbh']
are tested against R:multtest
'hommel' is tested against R stats p_adjust (not available in multtest
'fdr_gbs', 'fdr_2sbky' I did not find them in R, currently tested for
consistency only
'''
import pytest
import numpy as np
from numpy.testing import (assert_almost_equal, assert_equal,
assert_allclose)
from statsmodels.stats.multitest import (multipletests, fdrcorrection,
fdrcorrection_twostage,
NullDistribution,
local_fdr, multitest_methods_names)
from statsmodels.stats.multicomp import tukeyhsd
from scipy.stats.distributions import norm
import scipy
from packaging import version
pval0 = np.array([
0.838541367553, 0.642193923795, 0.680845947633,
0.967833824309, 0.71626938238, 0.177096952723, 5.23656777208e-005,
0.0202732688798, 0.00028140506198, 0.0149877310796])
res_multtest1 = np.array([
[5.2365677720800003e-05, 5.2365677720800005e-04,
5.2365677720800005e-04, 5.2365677720800005e-04,
5.2353339704891422e-04, 5.2353339704891422e-04,
5.2365677720800005e-04, 1.5337740764175588e-03],
[2.8140506198000000e-04, 2.8140506197999998e-03,
2.5326455578199999e-03, 2.5326455578199999e-03,
2.8104897961789277e-03, 2.5297966317768816e-03,
1.4070253098999999e-03, 4.1211324652269442e-03],
[1.4987731079600001e-02, 1.4987731079600000e-01,
1.1990184863680001e-01, 1.1990184863680001e-01,
1.4016246580579017e-01, 1.1379719679449507e-01,
4.9959103598666670e-02, 1.4632862843720582e-01],
[2.0273268879800001e-02, 2.0273268879799999e-01,
1.4191288215860001e-01, 1.4191288215860001e-01,
1.8520270949069695e-01, 1.3356756197485375e-01,
5.0683172199499998e-02, 1.4844940238274187e-01],
[1.7709695272300000e-01, 1.0000000000000000e+00,
1.0000000000000000e+00, 9.6783382430900000e-01,
8.5760763426056130e-01, 6.8947825122356643e-01,
3.5419390544599999e-01, 1.0000000000000000e+00],
[6.4219392379499995e-01, 1.0000000000000000e+00,
1.0000000000000000e+00, 9.6783382430900000e-01,
9.9996560644133570e-01, 9.9413539782557070e-01,
8.9533672797500008e-01, 1.0000000000000000e+00],
[6.8084594763299999e-01, 1.0000000000000000e+00,
1.0000000000000000e+00, 9.6783382430900000e-01,
9.9998903512635740e-01, 9.9413539782557070e-01,
8.9533672797500008e-01, 1.0000000000000000e+00],
[7.1626938238000004e-01, 1.0000000000000000e+00,
1.0000000000000000e+00, 9.6783382430900000e-01,
9.9999661886871472e-01, 9.9413539782557070e-01,
8.9533672797500008e-01, 1.0000000000000000e+00],
[8.3854136755300002e-01, 1.0000000000000000e+00,
1.0000000000000000e+00, 9.6783382430900000e-01,
9.9999998796038225e-01, 9.9413539782557070e-01,
9.3171263061444454e-01, 1.0000000000000000e+00],
[9.6783382430900000e-01, 1.0000000000000000e+00,
1.0000000000000000e+00, 9.6783382430900000e-01,
9.9999999999999878e-01, 9.9413539782557070e-01,
9.6783382430900000e-01, 1.0000000000000000e+00]])
res_multtest2_columns = [
'rawp', 'Bonferroni', 'Holm', 'Hochberg', 'SidakSS', 'SidakSD',
'BH', 'BY', 'ABH', 'TSBH_0.05']
rmethods = {
'rawp': (0, 'pval'),
'Bonferroni': (1, 'b'),
'Holm': (2, 'h'),
'Hochberg': (3, 'sh'),
'SidakSS': (4, 's'),
'SidakSD': (5, 'hs'),
'BH': (6, 'fdr_i'),
'BY': (7, 'fdr_n'),
'TSBH_0.05': (9, 'fdr_tsbh')
}
NA = np.nan
# all rejections, except for Bonferroni and Sidak
res_multtest2 = np.array([
0.002, 0.004, 0.006, 0.008, 0.01, 0.012, 0.012, 0.024, 0.036, 0.048,
0.06, 0.072, 0.012, 0.02, 0.024, 0.024, 0.024, 0.024, 0.012, 0.012,
0.012, 0.012, 0.012, 0.012, 0.01194015976019192, 0.02376127616613988,
0.03546430060660932, 0.04705017875634587, 0.058519850599,
0.06987425045000606, 0.01194015976019192, 0.01984063872102404,
0.02378486270400004, 0.023808512, 0.023808512, 0.023808512, 0.012,
0.012, 0.012, 0.012, 0.012, 0.012, 0.0294, 0.0294, 0.0294, 0.0294,
0.0294, 0.0294, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0
]).reshape(6, 10, order='F')
res_multtest3 = np.array([
0.001, 0.002, 0.003, 0.004, 0.005, 0.05, 0.06, 0.07, 0.08, 0.09, 0.01,
0.02, 0.03, 0.04, 0.05, 0.5, 0.6, 0.7, 0.8, 0.9, 0.01, 0.018, 0.024,
0.028, 0.03, 0.25, 0.25, 0.25, 0.25, 0.25, 0.01, 0.018, 0.024, 0.028,
0.03, 0.09, 0.09, 0.09, 0.09, 0.09, 0.00995511979025177,
0.01982095664805061, 0.02959822305108317, 0.03928762649718986,
0.04888986953422814, 0.4012630607616213, 0.4613848859051006,
0.5160176928207072, 0.5656115457763677, 0.6105838818818925,
0.00995511979025177, 0.0178566699880266, 0.02374950634358763,
0.02766623106147537, 0.02962749064373438, 0.2262190625000001,
0.2262190625000001, 0.2262190625000001, 0.2262190625000001,
0.2262190625000001, 0.01, 0.01, 0.01, 0.01, 0.01, 0.08333333333333334,
0.0857142857142857, 0.0875, 0.0888888888888889, 0.09,
0.02928968253968254, 0.02928968253968254, 0.02928968253968254,
0.02928968253968254, 0.02928968253968254, 0.2440806878306878,
0.2510544217687075, 0.2562847222222222, 0.2603527336860670,
0.2636071428571428, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.005,
0.005, 0.005, 0.005, 0.005, 0.04166666666666667, 0.04285714285714286,
0.04375, 0.04444444444444445, 0.045
]).reshape(10, 10, order='F')
res0_large = np.array([
0.00031612, 0.0003965, 0.00048442, 0.00051932, 0.00101436, 0.00121506,
0.0014516, 0.00265684, 0.00430043, 0.01743686, 0.02080285, 0.02785414,
0.0327198, 0.03494679, 0.04206808, 0.08067095, 0.23882767, 0.28352304,
0.36140401, 0.43565145, 0.44866768, 0.45368782, 0.48282088,
0.49223781, 0.55451638, 0.6207473, 0.71847853, 0.72424145, 0.85950263,
0.89032747, 0.0094836, 0.011895, 0.0145326, 0.0155796, 0.0304308,
0.0364518, 0.043548, 0.0797052, 0.1290129, 0.5231058, 0.6240855,
0.8356242, 0.981594, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0.0094836, 0.0114985, 0.01356376, 0.01402164, 0.02637336,
0.0303765, 0.0348384, 0.06110732, 0.09460946, 0.36617406, 0.416057,
0.52922866, 0.5889564, 0.59409543, 0.67308928, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0.0094836, 0.0114985, 0.01356376, 0.01402164,
0.02637336, 0.0303765, 0.0348384, 0.06110732, 0.09460946, 0.36617406,
0.416057, 0.52922866, 0.5889564, 0.59409543, 0.67308928, 0.89032747,
0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747,
0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747,
0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.009440257627368331,
0.01182686507401931, 0.01443098172617119, 0.01546285007478554,
0.02998742566629453, 0.03581680249125385, 0.04264369065603335,
0.0767094173291795, 0.1212818694859857, 0.410051586220387,
0.4677640287633493, 0.5715077903157826, 0.631388450393325,
0.656016359012282, 0.724552174001554, 0.919808283456286,
0.999721715014484, 0.9999547032674126, 0.9999985652190126,
0.999999964809746, 0.999999982525548, 0.999999986719131,
0.999999997434160, 0.999999998521536, 0.999999999970829,
0.999999999999767, 1, 1, 1, 1, 0.009440257627368331,
0.01143489901147732, 0.0134754287611275, 0.01392738605848343,
0.0260416568490015, 0.02993768724817902, 0.0342629726119179,
0.0593542206208364, 0.09045742964699988, 0.308853956167216,
0.343245865702423, 0.4153483370083637, 0.4505333180190900,
0.453775200643535, 0.497247406680671, 0.71681858015803,
0.978083969553718, 0.986889206426321, 0.995400461639735,
0.9981506396214986, 0.9981506396214986, 0.9981506396214986,
0.9981506396214986, 0.9981506396214986, 0.9981506396214986,
0.9981506396214986, 0.9981506396214986, 0.9981506396214986,
0.9981506396214986, 0.9981506396214986, 0.0038949, 0.0038949,
0.0038949, 0.0038949, 0.0060753, 0.0060753, 0.006221142857142857,
0.00996315, 0.01433476666666667, 0.05231058, 0.05673504545454545,
0.06963535, 0.07488597857142856, 0.07488597857142856, 0.08413616,
0.15125803125, 0.421460594117647, 0.4725384, 0.570637910526316,
0.6152972625, 0.6152972625, 0.6152972625, 0.6152972625, 0.6152972625,
0.665419656, 0.7162468846153845, 0.775972982142857, 0.775972982142857,
0.889140651724138, 0.89032747, 0.01556007537622183,
0.01556007537622183, 0.01556007537622183, 0.01556007537622183,
0.02427074531648065, 0.02427074531648065, 0.02485338565390302,
0.0398026560334295, 0.0572672083580799, 0.2089800939109816,
0.2266557764630925, 0.2781923271071372, 0.2991685206792373,
0.2991685206792373, 0.336122876445059, 0.6042738882921044, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.00220711, 0.00220711, 0.00220711,
0.00220711, 0.00344267, 0.00344267, 0.003525314285714285, 0.005645785,
0.00812303444444444, 0.029642662, 0.0321498590909091,
0.03946003166666667, 0.04243538785714285, 0.04243538785714285,
0.0476771573333333, 0.085712884375, 0.23882767, 0.26777176,
0.323361482631579, 0.34866844875, 0.34866844875, 0.34866844875,
0.34866844875, 0.34866844875, 0.3770711384, 0.4058732346153846,
0.4397180232142857, 0.4397180232142857, 0.503846369310345,
0.504518899666667, 0.00272643, 0.00272643, 0.00272643, 0.00272643,
0.00425271, 0.00425271, 0.0043548, 0.006974205, 0.01003433666666667,
0.036617406, 0.03971453181818182, 0.048744745, 0.052420185,
0.052420185, 0.058895312, 0.105880621875, 0.295022415882353,
0.33077688, 0.399446537368421, 0.43070808375, 0.43070808375,
0.43070808375, 0.43070808375, 0.43070808375, 0.4657937592,
0.5013728192307692, 0.5431810875, 0.5431810875, 0.622398456206897,
0.623229229
]).reshape(30, 10, order='F')
class CheckMultiTestsMixin:
@pytest.mark.parametrize('key,val', sorted(rmethods.items()))
def test_multi_pvalcorrection_rmethods(self, key, val):
# test against R package multtest mt.rawp2adjp
res_multtest = self.res2
pval0 = res_multtest[:, 0]
if val[1] in self.methods:
reject, pvalscorr = multipletests(pval0,
alpha=self.alpha,
method=val[1])[:2]
assert_almost_equal(pvalscorr, res_multtest[:, val[0]], 15)
assert_equal(reject, pvalscorr <= self.alpha)
def test_multi_pvalcorrection(self):
# test against R package multtest mt.rawp2adjp
res_multtest = self.res2
pval0 = res_multtest[:, 0]
pvalscorr = np.sort(fdrcorrection(pval0, method='n')[1])
assert_almost_equal(pvalscorr, res_multtest[:, 7], 15)
pvalscorr = np.sort(fdrcorrection(pval0, method='i')[1])
assert_almost_equal(pvalscorr, res_multtest[:, 6], 15)
class TestMultiTests1(CheckMultiTestsMixin):
@classmethod
def setup_class(cls):
cls.methods = ['b', 's', 'sh', 'hs', 'h', 'fdr_i', 'fdr_n']
cls.alpha = 0.1
cls.res2 = res_multtest1
class TestMultiTests2(CheckMultiTestsMixin):
# case: all hypothesis rejected (except 'b' and 's'
@classmethod
def setup_class(cls):
cls.methods = ['b', 's', 'sh', 'hs', 'h', 'fdr_i', 'fdr_n']
cls.alpha = 0.05
cls.res2 = res_multtest2
class TestMultiTests3(CheckMultiTestsMixin):
@classmethod
def setup_class(cls):
cls.methods = ['b', 's', 'sh', 'hs', 'h', 'fdr_i', 'fdr_n',
'fdr_tsbh']
cls.alpha = 0.05
cls.res2 = res0_large
class TestMultiTests4(CheckMultiTestsMixin):
# in simulations, all two stage fdr, fdr_tsbky, fdr_tsbh, fdr_gbs, have in
# some cases (cases with large Alternative) an FDR that looks too large
# this is the first case #rejected = 12, DGP : has 10 false
@classmethod
def setup_class(cls):
cls.methods = ['b', 's', 'sh', 'hs', 'h', 'fdr_i', 'fdr_n',
'fdr_tsbh']
cls.alpha = 0.05
cls.res2 = res_multtest3
@pytest.mark.parametrize('alpha', [0.01, 0.05, 0.1])
@pytest.mark.parametrize('method', ['b', 's', 'sh', 'hs', 'h', 'hommel',
'fdr_i', 'fdr_n', 'fdr_tsbky',
'fdr_tsbh', 'fdr_gbs'])
@pytest.mark.parametrize('ii', list(range(11)))
def test_pvalcorrection_reject(alpha, method, ii):
# consistency test for reject boolean and pvalscorr
pval1 = np.hstack((np.linspace(0.0001, 0.0100, ii),
np.linspace(0.05001, 0.11, 10 - ii)))
# using .05001 instead of 0.05 to avoid edge case issue #768
reject, pvalscorr = multipletests(pval1, alpha=alpha,
method=method)[:2]
msg = 'case %s %3.2f rejected:%d\npval_raw=%r\npvalscorr=%r' % (
method, alpha, reject.sum(), pval1, pvalscorr)
assert_equal(reject, pvalscorr <= alpha, err_msg=msg)
def test_hommel():
# tested against R stats p_adjust(pval0, method='hommel')
pval0 = np.array([
0.00116, 0.00924, 0.01075, 0.01437, 0.01784, 0.01918,
0.02751, 0.02871, 0.03054, 0.03246, 0.04259, 0.06879,
0.0691, 0.08081, 0.08593, 0.08993, 0.09386, 0.09412,
0.09718, 0.09758, 0.09781, 0.09788, 0.13282, 0.20191,
0.21757, 0.24031, 0.26061, 0.26762, 0.29474, 0.32901,
0.41386, 0.51479, 0.52461, 0.53389, 0.56276, 0.62967,
0.72178, 0.73403, 0.87182, 0.95384])
result_ho = np.array([
0.0464, 0.25872, 0.29025,
0.3495714285714286, 0.41032, 0.44114,
0.57771, 0.60291, 0.618954,
0.6492, 0.7402725000000001, 0.86749,
0.86749, 0.8889100000000001, 0.8971477777777778,
0.8993, 0.9175374999999999, 0.9175374999999999,
0.9175374999999999, 0.9175374999999999, 0.9175374999999999,
0.9175374999999999, 0.95384, 0.9538400000000001,
0.9538400000000001, 0.9538400000000001, 0.9538400000000001,
0.9538400000000001, 0.9538400000000001, 0.9538400000000001,
0.9538400000000001, 0.9538400000000001, 0.9538400000000001,
0.9538400000000001, 0.9538400000000001, 0.9538400000000001,
0.9538400000000001, 0.9538400000000001, 0.9538400000000001,
0.9538400000000001])
rej, pvalscorr, _, _ = multipletests(pval0, alpha=0.1, method='ho')
assert_almost_equal(pvalscorr, result_ho, 15)
assert_equal(rej, result_ho < 0.1)
def test_fdr_bky():
# test for fdrcorrection_twostage
# example from BKY
pvals = [
0.0001, 0.0004, 0.0019, 0.0095, 0.0201, 0.0278, 0.0298, 0.0344, 0.0459,
0.3240, 0.4262, 0.5719, 0.6528, 0.7590, 1.000]
# no test for corrected p-values, but they are inherited
# same number of rejection as in BKY paper:
# single step-up:4, two-stage:8, iterated two-step:9
# also alpha_star is the same as theirs for TST
# alpha_star for stage 2
with pytest.warns(FutureWarning, match="iter keyword"):
res_tst = fdrcorrection_twostage(pvals, alpha=0.05, iter=False)
assert_almost_equal([0.047619, 0.0649], res_tst[-1][:2], 3)
assert_equal(8, res_tst[0].sum())
# reference number from Prism, see #8619
res2 = np.array([
0.0012, 0.0023, 0.0073, 0.0274, 0.0464, 0.0492, 0.0492, 0.0497,
0.0589, 0.3742, 0.4475, 0.5505, 0.5800, 0.6262, 0.77
])
assert_allclose(res_tst[1], res2, atol=6e-5)
# issue #8619, problems if no or all rejected, ordering
pvals = np.array([0.2, 0.8, 0.3, 0.5, 1])
res1 = fdrcorrection_twostage(pvals, alpha=0.05, method='bky')
res2 = multipletests(pvals, alpha=0.05, method='fdr_tsbky')
assert_equal(res1[0], res2[0])
assert_allclose(res1[1], res2[1], atol=6e-5)
# confirmed with Prism
res_pv = np.array([0.7875, 1., 0.7875, 0.875 , 1.])
assert_allclose(res1[1], res_pv, atol=6e-5)
def test_fdr_twostage():
# test for iteration in fdrcorrection_twostage, new maxiter
# example from BKY
pvals = [
0.0001, 0.0004, 0.0019, 0.0095, 0.0201, 0.0278, 0.0298, 0.0344, 0.0459,
0.3240, 0.4262, 0.5719, 0.6528, 0.7590, 1.000]
n = len(pvals)
# bh twostage fdr
k = 0
# same pvalues as one-stage fdr
res0 = multipletests(pvals, alpha=0.05, method='fdr_bh')
res1 = fdrcorrection_twostage(pvals, alpha=0.05, method='bh', maxiter=k,
iter=None)
res2 = multipletests(pvals, alpha=0.05, method='fdr_tsbh', maxiter=k)
assert_allclose(res1[1], res0[1])
assert_allclose(res2[1], res1[1])
k = 1
# pvalues corrected by first stage number of rejections
res0 = multipletests(pvals, alpha=0.05, method='fdr_bh')
res1 = fdrcorrection_twostage(pvals, alpha=0.05, method='bh', maxiter=k,
iter=None)
res2 = multipletests(pvals, alpha=0.05, method='fdr_tsbh', maxiter=k)
res3 = multipletests(pvals, alpha=0.05, method='fdr_tsbh')
assert_allclose(res1[1], res0[1] * (1 - res0[0].sum() / n))
assert_allclose(res2[1], res1[1])
assert_allclose(res3[1], res1[1]) # check default maxiter
# bky has an extra factor 1+alpha in fdr twostage independent of iter
fact = 1 + 0.05
k = 0
# same pvalues as one-stage fdr
res0 = multipletests(pvals, alpha=0.05, method='fdr_bh')
res1 = fdrcorrection_twostage(pvals, alpha=0.05, method='bky', maxiter=k,
iter=None)
res2 = multipletests(pvals, alpha=0.05, method='fdr_tsbky', maxiter=k)
assert_allclose(res1[1], np.clip(res0[1] * fact, 0, 1))
assert_allclose(res2[1], res1[1])
k = 1
# pvalues corrected by first stage number of rejections
res0 = multipletests(pvals, alpha=0.05, method='fdr_bh')
res1 = fdrcorrection_twostage(pvals, alpha=0.05, method='bky', maxiter=k,
iter=None)
res2 = multipletests(pvals, alpha=0.05, method='fdr_tsbky', maxiter=k)
res3 = multipletests(pvals, alpha=0.05, method='fdr_tsbky')
assert_allclose(res1[1], res0[1] * (1 - res0[0].sum() / n) * fact)
assert_allclose(res2[1], res1[1])
assert_allclose(res3[1], res1[1]) # check default maxiter
@pytest.mark.parametrize('method', sorted(multitest_methods_names))
def test_issorted(method):
# test that is_sorted keyword works correctly
# the fdrcorrection functions are tested indirectly
# data generated as random numbers np.random.beta(0.2, 0.5, size=10)
pvals = np.array([31, 9958111, 7430818, 8653643, 9892855, 876, 2651691,
145836, 9931, 6174747]) * 1e-7
sortind = np.argsort(pvals)
sortrevind = sortind.argsort()
pvals_sorted = pvals[sortind]
res1 = multipletests(pvals, method=method, is_sorted=False)
res2 = multipletests(pvals_sorted, method=method, is_sorted=True)
assert_equal(res2[0][sortrevind], res1[0])
assert_allclose(res2[0][sortrevind], res1[0], rtol=1e-10)
@pytest.mark.parametrize('method', sorted(multitest_methods_names))
def test_floating_precision(method):
# issue #7465
pvals = np.full(6000, 0.99)
pvals[0] = 1.138569e-56
assert multipletests(pvals, method=method)[1][0] > 1e-60
def test_tukeyhsd():
# example multicomp in R p 83
res = '''\
pair diff lwr upr p adj
P-M 8.150000 -10.037586 26.3375861 0.670063958
S-M -3.258333 -21.445919 14.9292527 0.982419709
T-M 23.808333 5.620747 41.9959194 0.006783701
V-M 4.791667 -13.395919 22.9792527 0.931020848
S-P -11.408333 -29.595919 6.7792527 0.360680099
T-P 15.658333 -2.529253 33.8459194 0.113221634
V-P -3.358333 -21.545919 14.8292527 0.980350080
T-S 27.066667 8.879081 45.2542527 0.002027122
V-S 8.050000 -10.137586 26.2375861 0.679824487
V-T -19.016667 -37.204253 -0.8290806 0.037710044
'''
res = np.array([
[8.150000, -10.037586, 26.3375861, 0.670063958],
[-3.258333, -21.445919, 14.9292527, 0.982419709],
[23.808333, 5.620747, 41.9959194, 0.006783701],
[4.791667, -13.395919, 22.9792527, 0.931020848],
[-11.408333, -29.595919, 6.7792527, 0.360680099],
[15.658333, -2.529253, 33.8459194, 0.113221634],
[-3.358333, -21.545919, 14.8292527, 0.980350080],
[27.066667, 8.879081, 45.2542527, 0.002027122],
[8.050000, -10.137586, 26.2375861, 0.679824487],
[-19.016667, -37.204253, -0.8290806, 0.037710044]])
m_r = [94.39167, 102.54167, 91.13333, 118.20000, 99.18333]
myres = tukeyhsd(m_r, 6, 110.8254416667, alpha=0.05, df=4)
pairs, reject, meandiffs, std_pairs, confint, q_crit = myres[:6]
assert_almost_equal(meandiffs, res[:, 0], decimal=5)
assert_almost_equal(confint, res[:, 1:3], decimal=2)
assert_equal(reject, res[:, 3] < 0.05)
# check p-values (divergence of high values is expected)
small_pvals_idx = [2, 5, 7, 9]
# Remove this check when minimum SciPy version is 1.7+ (gh-8035)
scipy_version = (version.parse(scipy.version.version) >=
version.parse('1.7.0'))
rtol = 1e-5 if scipy_version else 1e-2
assert_allclose(myres[8][small_pvals_idx], res[small_pvals_idx, 3],
rtol=rtol)
def test_local_fdr():
# Create a mixed population of Z-scores: 1000 standard normal and
# 20 uniformly distributed between 3 and 4.
grid = np.linspace(0.001, 0.999, 1000)
z0 = norm.ppf(grid)
z1 = np.linspace(3, 4, 20)
zs = np.concatenate((z0, z1))
# Exact local FDR for U(3, 4) component.
f1 = np.exp(-z1**2 / 2) / np.sqrt(2*np.pi)
r = len(z1) / float(len(z0) + len(z1))
f1 /= (1 - r) * f1 + r
for alpha in None, 0, 1e-8:
if alpha is None:
fdr = local_fdr(zs)
else:
fdr = local_fdr(zs, alpha=alpha)
fdr1 = fdr[len(z0):]
assert_allclose(f1, fdr1, rtol=0.05, atol=0.1)
def test_null_distribution():
# Create a mixed population of Z-scores: 1000 standard normal and
# 20 uniformly distributed between 3 and 4.
grid = np.linspace(0.001, 0.999, 1000)
z0 = norm.ppf(grid)
z1 = np.linspace(3, 4, 20)
zs = np.concatenate((z0, z1))
emp_null = NullDistribution(zs, estimate_null_proportion=True)
assert_allclose(emp_null.mean, 0, atol=1e-5, rtol=1e-5)
assert_allclose(emp_null.sd, 1, atol=1e-5, rtol=1e-2)
assert_allclose(emp_null.null_proportion, 0.98, atol=1e-5, rtol=1e-2)
# consistency check
assert_allclose(emp_null.pdf(np.r_[-1, 0, 1]),
norm.pdf(np.r_[-1, 0, 1],
loc=emp_null.mean, scale=emp_null.sd),
rtol=1e-13)
@pytest.mark.parametrize('estimate_prob', [True, False])
@pytest.mark.parametrize('estimate_scale', [True, False])
@pytest.mark.parametrize('estimate_mean', [True, False])
def test_null_constrained(estimate_mean, estimate_scale, estimate_prob):
# Create a mixed population of Z-scores: 1000 standard normal and
# 20 uniformly distributed between 3 and 4.
grid = np.linspace(0.001, 0.999, 1000)
z0 = norm.ppf(grid)
z1 = np.linspace(3, 4, 20)
zs = np.concatenate((z0, z1))
emp_null = NullDistribution(zs, estimate_mean=estimate_mean,
estimate_scale=estimate_scale,
estimate_null_proportion=estimate_prob)
if not estimate_mean:
assert_allclose(emp_null.mean, 0, atol=1e-5, rtol=1e-5)
if not estimate_scale:
assert_allclose(emp_null.sd, 1, atol=1e-5, rtol=1e-2)
if not estimate_prob:
assert_allclose(emp_null.null_proportion, 1, atol=1e-5, rtol=1e-2)
# consistency check
assert_allclose(emp_null.pdf(np.r_[-1, 0, 1]),
norm.pdf(np.r_[-1, 0, 1], loc=emp_null.mean,
scale=emp_null.sd),
rtol=1e-13)