update
This commit is contained in:
0
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__init__.py
vendored
Normal file
0
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__init__.py
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/common_tests.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/common_tests.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_axis_nan_policy.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_axis_nan_policy.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_binned_statistic.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_binned_statistic.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_censored_data.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_censored_data.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_contingency.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_contingency.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_continuous_basic.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_continuous_basic.cpython-312.pyc
vendored
Normal file
Binary file not shown.
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_crosstab.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_crosstab.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_discrete_basic.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_discrete_basic.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_discrete_distns.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_discrete_distns.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_distributions.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_distributions.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_entropy.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_entropy.cpython-312.pyc
vendored
Normal file
Binary file not shown.
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_fit.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_fit.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_hypotests.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_hypotests.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_kdeoth.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_kdeoth.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_mgc.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_mgc.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_morestats.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_morestats.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_mstats_basic.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_mstats_basic.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_mstats_extras.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_mstats_extras.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_multicomp.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_multicomp.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_multivariate.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_multivariate.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_odds_ratio.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_odds_ratio.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_qmc.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_qmc.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_rank.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_rank.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_relative_risk.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_relative_risk.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_resampling.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_resampling.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_sampling.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_sampling.cpython-312.pyc
vendored
Normal file
Binary file not shown.
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_stats.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_stats.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_survival.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_survival.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_tukeylambda_stats.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_tukeylambda_stats.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_variation.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_variation.cpython-312.pyc
vendored
Normal file
Binary file not shown.
354
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/common_tests.py
vendored
Normal file
354
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/common_tests.py
vendored
Normal file
@@ -0,0 +1,354 @@
|
||||
import pickle
|
||||
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
import numpy.ma.testutils as ma_npt
|
||||
|
||||
from scipy._lib._util import (
|
||||
getfullargspec_no_self as _getfullargspec, np_long
|
||||
)
|
||||
from scipy._lib._array_api import xp_assert_equal
|
||||
from scipy import stats
|
||||
|
||||
|
||||
def check_named_results(res, attributes, ma=False, xp=None):
|
||||
for i, attr in enumerate(attributes):
|
||||
if ma:
|
||||
ma_npt.assert_equal(res[i], getattr(res, attr))
|
||||
elif xp is not None:
|
||||
xp_assert_equal(res[i], getattr(res, attr))
|
||||
else:
|
||||
npt.assert_equal(res[i], getattr(res, attr))
|
||||
|
||||
|
||||
def check_normalization(distfn, args, distname):
|
||||
norm_moment = distfn.moment(0, *args)
|
||||
npt.assert_allclose(norm_moment, 1.0)
|
||||
|
||||
if distname == "rv_histogram_instance":
|
||||
atol, rtol = 1e-5, 0
|
||||
else:
|
||||
atol, rtol = 1e-7, 1e-7
|
||||
|
||||
normalization_expect = distfn.expect(lambda x: 1, args=args)
|
||||
npt.assert_allclose(normalization_expect, 1.0, atol=atol, rtol=rtol,
|
||||
err_msg=distname, verbose=True)
|
||||
|
||||
_a, _b = distfn.support(*args)
|
||||
normalization_cdf = distfn.cdf(_b, *args)
|
||||
npt.assert_allclose(normalization_cdf, 1.0)
|
||||
|
||||
|
||||
def check_moment(distfn, arg, m, v, msg):
|
||||
m1 = distfn.moment(1, *arg)
|
||||
m2 = distfn.moment(2, *arg)
|
||||
if not np.isinf(m):
|
||||
npt.assert_almost_equal(m1, m, decimal=10,
|
||||
err_msg=msg + ' - 1st moment')
|
||||
else: # or np.isnan(m1),
|
||||
npt.assert_(np.isinf(m1),
|
||||
msg + ' - 1st moment -infinite, m1=%s' % str(m1))
|
||||
|
||||
if not np.isinf(v):
|
||||
npt.assert_almost_equal(m2 - m1 * m1, v, decimal=10,
|
||||
err_msg=msg + ' - 2ndt moment')
|
||||
else: # or np.isnan(m2),
|
||||
npt.assert_(np.isinf(m2), msg + f' - 2nd moment -infinite, {m2=}')
|
||||
|
||||
|
||||
def check_mean_expect(distfn, arg, m, msg):
|
||||
if np.isfinite(m):
|
||||
m1 = distfn.expect(lambda x: x, arg)
|
||||
npt.assert_almost_equal(m1, m, decimal=5,
|
||||
err_msg=msg + ' - 1st moment (expect)')
|
||||
|
||||
|
||||
def check_var_expect(distfn, arg, m, v, msg):
|
||||
dist_looser_tolerances = {"rv_histogram_instance" , "ksone"}
|
||||
kwargs = {'rtol': 5e-6} if msg in dist_looser_tolerances else {}
|
||||
if np.isfinite(v):
|
||||
m2 = distfn.expect(lambda x: x*x, arg)
|
||||
npt.assert_allclose(m2, v + m*m, **kwargs)
|
||||
|
||||
|
||||
def check_skew_expect(distfn, arg, m, v, s, msg):
|
||||
if np.isfinite(s):
|
||||
m3e = distfn.expect(lambda x: np.power(x-m, 3), arg)
|
||||
npt.assert_almost_equal(m3e, s * np.power(v, 1.5),
|
||||
decimal=5, err_msg=msg + ' - skew')
|
||||
else:
|
||||
npt.assert_(np.isnan(s))
|
||||
|
||||
|
||||
def check_kurt_expect(distfn, arg, m, v, k, msg):
|
||||
if np.isfinite(k):
|
||||
m4e = distfn.expect(lambda x: np.power(x-m, 4), arg)
|
||||
npt.assert_allclose(m4e, (k + 3.) * np.power(v, 2),
|
||||
atol=1e-5, rtol=1e-5,
|
||||
err_msg=msg + ' - kurtosis')
|
||||
elif not np.isposinf(k):
|
||||
npt.assert_(np.isnan(k))
|
||||
|
||||
|
||||
def check_munp_expect(dist, args, msg):
|
||||
# If _munp is overridden, test a higher moment. (Before gh-18634, some
|
||||
# distributions had issues with moments 5 and higher.)
|
||||
if dist._munp.__func__ != stats.rv_continuous._munp:
|
||||
res = dist.moment(5, *args) # shouldn't raise an error
|
||||
ref = dist.expect(lambda x: x ** 5, args, lb=-np.inf, ub=np.inf)
|
||||
if not np.isfinite(res): # could be valid; automated test can't know
|
||||
return
|
||||
# loose tolerance, mostly to see whether _munp returns *something*
|
||||
assert_allclose(res, ref, atol=1e-10, rtol=1e-4,
|
||||
err_msg=msg + ' - higher moment / _munp')
|
||||
|
||||
|
||||
def check_entropy(distfn, arg, msg):
|
||||
ent = distfn.entropy(*arg)
|
||||
npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
|
||||
|
||||
|
||||
def check_private_entropy(distfn, args, superclass):
|
||||
# compare a generic _entropy with the distribution-specific implementation
|
||||
npt.assert_allclose(distfn._entropy(*args),
|
||||
superclass._entropy(distfn, *args))
|
||||
|
||||
|
||||
def check_entropy_vect_scale(distfn, arg):
|
||||
# check 2-d
|
||||
sc = np.asarray([[1, 2], [3, 4]])
|
||||
v_ent = distfn.entropy(*arg, scale=sc)
|
||||
s_ent = [distfn.entropy(*arg, scale=s) for s in sc.ravel()]
|
||||
s_ent = np.asarray(s_ent).reshape(v_ent.shape)
|
||||
assert_allclose(v_ent, s_ent, atol=1e-14)
|
||||
|
||||
# check invalid value, check cast
|
||||
sc = [1, 2, -3]
|
||||
v_ent = distfn.entropy(*arg, scale=sc)
|
||||
s_ent = [distfn.entropy(*arg, scale=s) for s in sc]
|
||||
s_ent = np.asarray(s_ent).reshape(v_ent.shape)
|
||||
assert_allclose(v_ent, s_ent, atol=1e-14)
|
||||
|
||||
|
||||
def check_edge_support(distfn, args):
|
||||
# Make sure that x=self.a and self.b are handled correctly.
|
||||
x = distfn.support(*args)
|
||||
if isinstance(distfn, stats.rv_discrete):
|
||||
x = x[0]-1, x[1]
|
||||
|
||||
npt.assert_equal(distfn.cdf(x, *args), [0.0, 1.0])
|
||||
npt.assert_equal(distfn.sf(x, *args), [1.0, 0.0])
|
||||
|
||||
if distfn.name not in ('skellam', 'dlaplace'):
|
||||
# with a = -inf, log(0) generates warnings
|
||||
npt.assert_equal(distfn.logcdf(x, *args), [-np.inf, 0.0])
|
||||
npt.assert_equal(distfn.logsf(x, *args), [0.0, -np.inf])
|
||||
|
||||
npt.assert_equal(distfn.ppf([0.0, 1.0], *args), x)
|
||||
npt.assert_equal(distfn.isf([0.0, 1.0], *args), x[::-1])
|
||||
|
||||
# out-of-bounds for isf & ppf
|
||||
npt.assert_(np.isnan(distfn.isf([-1, 2], *args)).all())
|
||||
npt.assert_(np.isnan(distfn.ppf([-1, 2], *args)).all())
|
||||
|
||||
|
||||
def check_named_args(distfn, x, shape_args, defaults, meths):
|
||||
## Check calling w/ named arguments.
|
||||
|
||||
# check consistency of shapes, numargs and _parse signature
|
||||
signature = _getfullargspec(distfn._parse_args)
|
||||
npt.assert_(signature.varargs is None)
|
||||
npt.assert_(signature.varkw is None)
|
||||
npt.assert_(not signature.kwonlyargs)
|
||||
npt.assert_(list(signature.defaults) == list(defaults))
|
||||
|
||||
shape_argnames = signature.args[:-len(defaults)] # a, b, loc=0, scale=1
|
||||
if distfn.shapes:
|
||||
shapes_ = distfn.shapes.replace(',', ' ').split()
|
||||
else:
|
||||
shapes_ = ''
|
||||
npt.assert_(len(shapes_) == distfn.numargs)
|
||||
npt.assert_(len(shapes_) == len(shape_argnames))
|
||||
|
||||
# check calling w/ named arguments
|
||||
shape_args = list(shape_args)
|
||||
|
||||
vals = [meth(x, *shape_args) for meth in meths]
|
||||
npt.assert_(np.all(np.isfinite(vals)))
|
||||
|
||||
names, a, k = shape_argnames[:], shape_args[:], {}
|
||||
while names:
|
||||
k.update({names.pop(): a.pop()})
|
||||
v = [meth(x, *a, **k) for meth in meths]
|
||||
npt.assert_array_equal(vals, v)
|
||||
if 'n' not in k.keys():
|
||||
# `n` is first parameter of moment(), so can't be used as named arg
|
||||
npt.assert_equal(distfn.moment(1, *a, **k),
|
||||
distfn.moment(1, *shape_args))
|
||||
|
||||
# unknown arguments should not go through:
|
||||
k.update({'kaboom': 42})
|
||||
assert_raises(TypeError, distfn.cdf, x, **k)
|
||||
|
||||
|
||||
def check_random_state_property(distfn, args):
|
||||
# check the random_state attribute of a distribution *instance*
|
||||
|
||||
# This test fiddles with distfn.random_state. This breaks other tests,
|
||||
# hence need to save it and then restore.
|
||||
rndm = distfn.random_state
|
||||
|
||||
# baseline: this relies on the global state
|
||||
np.random.seed(1234)
|
||||
distfn.random_state = None
|
||||
r0 = distfn.rvs(*args, size=8)
|
||||
|
||||
# use an explicit instance-level random_state
|
||||
distfn.random_state = 1234
|
||||
r1 = distfn.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
distfn.random_state = np.random.RandomState(1234)
|
||||
r2 = distfn.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r2)
|
||||
|
||||
# check that np.random.Generator can be used (numpy >= 1.17)
|
||||
if hasattr(np.random, 'default_rng'):
|
||||
# obtain a np.random.Generator object
|
||||
rng = np.random.default_rng(1234)
|
||||
distfn.rvs(*args, size=1, random_state=rng)
|
||||
|
||||
# can override the instance-level random_state for an individual .rvs call
|
||||
distfn.random_state = 2
|
||||
orig_state = distfn.random_state.get_state()
|
||||
|
||||
r3 = distfn.rvs(*args, size=8, random_state=np.random.RandomState(1234))
|
||||
npt.assert_equal(r0, r3)
|
||||
|
||||
# ... and that does not alter the instance-level random_state!
|
||||
npt.assert_equal(distfn.random_state.get_state(), orig_state)
|
||||
|
||||
# finally, restore the random_state
|
||||
distfn.random_state = rndm
|
||||
|
||||
|
||||
def check_meth_dtype(distfn, arg, meths):
|
||||
q0 = [0.25, 0.5, 0.75]
|
||||
x0 = distfn.ppf(q0, *arg)
|
||||
x_cast = [x0.astype(tp) for tp in (np_long, np.float16, np.float32,
|
||||
np.float64)]
|
||||
|
||||
for x in x_cast:
|
||||
# casting may have clipped the values, exclude those
|
||||
distfn._argcheck(*arg)
|
||||
x = x[(distfn.a < x) & (x < distfn.b)]
|
||||
for meth in meths:
|
||||
val = meth(x, *arg)
|
||||
npt.assert_(val.dtype == np.float64)
|
||||
|
||||
|
||||
def check_ppf_dtype(distfn, arg):
|
||||
q0 = np.asarray([0.25, 0.5, 0.75])
|
||||
q_cast = [q0.astype(tp) for tp in (np.float16, np.float32, np.float64)]
|
||||
for q in q_cast:
|
||||
for meth in [distfn.ppf, distfn.isf]:
|
||||
val = meth(q, *arg)
|
||||
npt.assert_(val.dtype == np.float64)
|
||||
|
||||
|
||||
def check_cmplx_deriv(distfn, arg):
|
||||
# Distributions allow complex arguments.
|
||||
def deriv(f, x, *arg):
|
||||
x = np.asarray(x)
|
||||
h = 1e-10
|
||||
return (f(x + h*1j, *arg)/h).imag
|
||||
|
||||
x0 = distfn.ppf([0.25, 0.51, 0.75], *arg)
|
||||
x_cast = [x0.astype(tp) for tp in (np_long, np.float16, np.float32,
|
||||
np.float64)]
|
||||
|
||||
for x in x_cast:
|
||||
# casting may have clipped the values, exclude those
|
||||
distfn._argcheck(*arg)
|
||||
x = x[(distfn.a < x) & (x < distfn.b)]
|
||||
|
||||
pdf, cdf, sf = distfn.pdf(x, *arg), distfn.cdf(x, *arg), distfn.sf(x, *arg)
|
||||
assert_allclose(deriv(distfn.cdf, x, *arg), pdf, rtol=1e-5)
|
||||
assert_allclose(deriv(distfn.logcdf, x, *arg), pdf/cdf, rtol=1e-5)
|
||||
|
||||
assert_allclose(deriv(distfn.sf, x, *arg), -pdf, rtol=1e-5)
|
||||
assert_allclose(deriv(distfn.logsf, x, *arg), -pdf/sf, rtol=1e-5)
|
||||
|
||||
assert_allclose(deriv(distfn.logpdf, x, *arg),
|
||||
deriv(distfn.pdf, x, *arg) / distfn.pdf(x, *arg),
|
||||
rtol=1e-5)
|
||||
|
||||
|
||||
def check_pickling(distfn, args):
|
||||
# check that a distribution instance pickles and unpickles
|
||||
# pay special attention to the random_state property
|
||||
|
||||
# save the random_state (restore later)
|
||||
rndm = distfn.random_state
|
||||
|
||||
# check unfrozen
|
||||
distfn.random_state = 1234
|
||||
distfn.rvs(*args, size=8)
|
||||
s = pickle.dumps(distfn)
|
||||
r0 = distfn.rvs(*args, size=8)
|
||||
|
||||
unpickled = pickle.loads(s)
|
||||
r1 = unpickled.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
# also smoke test some methods
|
||||
medians = [distfn.ppf(0.5, *args), unpickled.ppf(0.5, *args)]
|
||||
npt.assert_equal(medians[0], medians[1])
|
||||
npt.assert_equal(distfn.cdf(medians[0], *args),
|
||||
unpickled.cdf(medians[1], *args))
|
||||
|
||||
# check frozen pickling/unpickling with rvs
|
||||
frozen_dist = distfn(*args)
|
||||
pkl = pickle.dumps(frozen_dist)
|
||||
unpickled = pickle.loads(pkl)
|
||||
|
||||
r0 = frozen_dist.rvs(size=8)
|
||||
r1 = unpickled.rvs(size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
# check pickling/unpickling of .fit method
|
||||
if hasattr(distfn, "fit"):
|
||||
fit_function = distfn.fit
|
||||
pickled_fit_function = pickle.dumps(fit_function)
|
||||
unpickled_fit_function = pickle.loads(pickled_fit_function)
|
||||
assert fit_function.__name__ == unpickled_fit_function.__name__ == "fit"
|
||||
|
||||
# restore the random_state
|
||||
distfn.random_state = rndm
|
||||
|
||||
|
||||
def check_freezing(distfn, args):
|
||||
# regression test for gh-11089: freezing a distribution fails
|
||||
# if loc and/or scale are specified
|
||||
if isinstance(distfn, stats.rv_continuous):
|
||||
locscale = {'loc': 1, 'scale': 2}
|
||||
else:
|
||||
locscale = {'loc': 1}
|
||||
|
||||
rv = distfn(*args, **locscale)
|
||||
assert rv.a == distfn(*args).a
|
||||
assert rv.b == distfn(*args).b
|
||||
|
||||
|
||||
def check_rvs_broadcast(distfunc, distname, allargs, shape, shape_only, otype):
|
||||
np.random.seed(123)
|
||||
sample = distfunc.rvs(*allargs)
|
||||
assert_equal(sample.shape, shape, "%s: rvs failed to broadcast" % distname)
|
||||
if not shape_only:
|
||||
rvs = np.vectorize(lambda *allargs: distfunc.rvs(*allargs), otypes=otype)
|
||||
np.random.seed(123)
|
||||
expected = rvs(*allargs)
|
||||
assert_allclose(sample, expected, rtol=1e-13)
|
||||
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/__pycache__/_mvt.cpython-312.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/__pycache__/_mvt.cpython-312.pyc
vendored
Normal file
Binary file not shown.
Binary file not shown.
171
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/_mvt.py
vendored
Normal file
171
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/_mvt.py
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy import special
|
||||
from scipy.stats._qmc import primes_from_2_to
|
||||
|
||||
|
||||
def _primes(n):
|
||||
# Defined to facilitate comparison between translation and source
|
||||
# In Matlab, primes(10.5) -> first four primes, primes(11.5) -> first five
|
||||
return primes_from_2_to(math.ceil(n))
|
||||
|
||||
|
||||
def _gaminv(a, b):
|
||||
# Defined to facilitate comparison between translation and source
|
||||
# Matlab's `gaminv` is like `special.gammaincinv` but args are reversed
|
||||
return special.gammaincinv(b, a)
|
||||
|
||||
|
||||
def _qsimvtv(m, nu, sigma, a, b, rng):
|
||||
"""Estimates the multivariate t CDF using randomized QMC
|
||||
|
||||
Parameters
|
||||
----------
|
||||
m : int
|
||||
The number of points
|
||||
nu : float
|
||||
Degrees of freedom
|
||||
sigma : ndarray
|
||||
A 2D positive semidefinite covariance matrix
|
||||
a : ndarray
|
||||
Lower integration limits
|
||||
b : ndarray
|
||||
Upper integration limits.
|
||||
rng : Generator
|
||||
Pseudorandom number generator
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : float
|
||||
The estimated CDF.
|
||||
e : float
|
||||
An absolute error estimate.
|
||||
|
||||
"""
|
||||
# _qsimvtv is a Python translation of the Matlab function qsimvtv,
|
||||
# semicolons and all.
|
||||
#
|
||||
# This function uses an algorithm given in the paper
|
||||
# "Comparison of Methods for the Numerical Computation of
|
||||
# Multivariate t Probabilities", in
|
||||
# J. of Computational and Graphical Stat., 11(2002), pp. 950-971, by
|
||||
# Alan Genz and Frank Bretz
|
||||
#
|
||||
# The primary references for the numerical integration are
|
||||
# "On a Number-Theoretical Integration Method"
|
||||
# H. Niederreiter, Aequationes Mathematicae, 8(1972), pp. 304-11.
|
||||
# and
|
||||
# "Randomization of Number Theoretic Methods for Multiple Integration"
|
||||
# R. Cranley & T.N.L. Patterson, SIAM J Numer Anal, 13(1976), pp. 904-14.
|
||||
#
|
||||
# Alan Genz is the author of this function and following Matlab functions.
|
||||
# Alan Genz, WSU Math, PO Box 643113, Pullman, WA 99164-3113
|
||||
# Email : alangenz@wsu.edu
|
||||
#
|
||||
# Copyright (C) 2013, Alan Genz, All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided the following conditions are met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. The contributor name(s) may not be used to endorse or promote
|
||||
# products derived from this software without specific prior
|
||||
# written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
# Initialization
|
||||
sn = max(1, math.sqrt(nu)); ch, az, bz = _chlrps(sigma, a/sn, b/sn)
|
||||
n = len(sigma); N = 10; P = math.ceil(m/N); on = np.ones(P); p = 0; e = 0
|
||||
ps = np.sqrt(_primes(5*n*math.log(n+4)/4)); q = ps[:, np.newaxis] # Richtmyer gens.
|
||||
|
||||
# Randomization loop for ns samples
|
||||
c = None; dc = None
|
||||
for S in range(N):
|
||||
vp = on.copy(); s = np.zeros((n, P))
|
||||
for i in range(n):
|
||||
x = np.abs(2*np.mod(q[i]*np.arange(1, P+1) + rng.random(), 1)-1) # periodizing transform
|
||||
if i == 0:
|
||||
r = on
|
||||
if nu > 0:
|
||||
r = np.sqrt(2*_gaminv(x, nu/2))
|
||||
else:
|
||||
y = _Phinv(c + x*dc)
|
||||
s[i:] += ch[i:, i-1:i] * y
|
||||
si = s[i, :]; c = on.copy(); ai = az[i]*r - si; d = on.copy(); bi = bz[i]*r - si
|
||||
c[ai <= -9] = 0; tl = abs(ai) < 9; c[tl] = _Phi(ai[tl])
|
||||
d[bi <= -9] = 0; tl = abs(bi) < 9; d[tl] = _Phi(bi[tl])
|
||||
dc = d - c; vp = vp * dc
|
||||
d = (np.mean(vp) - p)/(S + 1); p = p + d; e = (S - 1)*e/(S + 1) + d**2
|
||||
e = math.sqrt(e) # error estimate is 3 times std error with N samples.
|
||||
return p, e
|
||||
|
||||
|
||||
# Standard statistical normal distribution functions
|
||||
def _Phi(z):
|
||||
return special.ndtr(z)
|
||||
|
||||
|
||||
def _Phinv(p):
|
||||
return special.ndtri(p)
|
||||
|
||||
|
||||
def _chlrps(R, a, b):
|
||||
"""
|
||||
Computes permuted and scaled lower Cholesky factor c for R which may be
|
||||
singular, also permuting and scaling integration limit vectors a and b.
|
||||
"""
|
||||
ep = 1e-10 # singularity tolerance
|
||||
eps = np.finfo(R.dtype).eps
|
||||
|
||||
n = len(R); c = R.copy(); ap = a.copy(); bp = b.copy(); d = np.sqrt(np.maximum(np.diag(c), 0))
|
||||
for i in range(n):
|
||||
if d[i] > 0:
|
||||
c[:, i] /= d[i]; c[i, :] /= d[i]
|
||||
ap[i] /= d[i]; bp[i] /= d[i]
|
||||
y = np.zeros((n, 1)); sqtp = math.sqrt(2*math.pi)
|
||||
|
||||
for k in range(n):
|
||||
im = k; ckk = 0; dem = 1; s = 0
|
||||
for i in range(k, n):
|
||||
if c[i, i] > eps:
|
||||
cii = math.sqrt(max(c[i, i], 0))
|
||||
if i > 0: s = c[i, :k] @ y[:k]
|
||||
ai = (ap[i]-s)/cii; bi = (bp[i]-s)/cii; de = _Phi(bi)-_Phi(ai)
|
||||
if de <= dem:
|
||||
ckk = cii; dem = de; am = ai; bm = bi; im = i
|
||||
if im > k:
|
||||
ap[[im, k]] = ap[[k, im]]; bp[[im, k]] = bp[[k, im]]; c[im, im] = c[k, k]
|
||||
t = c[im, :k].copy(); c[im, :k] = c[k, :k]; c[k, :k] = t
|
||||
t = c[im+1:, im].copy(); c[im+1:, im] = c[im+1:, k]; c[im+1:, k] = t
|
||||
t = c[k+1:im, k].copy(); c[k+1:im, k] = c[im, k+1:im].T; c[im, k+1:im] = t.T
|
||||
if ckk > ep*(k+1):
|
||||
c[k, k] = ckk; c[k, k+1:] = 0
|
||||
for i in range(k+1, n):
|
||||
c[i, k] = c[i, k]/ckk; c[i, k+1:i+1] = c[i, k+1:i+1] - c[i, k]*c[k+1:i+1, k].T
|
||||
if abs(dem) > ep:
|
||||
y[k] = (np.exp(-am**2/2) - np.exp(-bm**2/2)) / (sqtp*dem)
|
||||
else:
|
||||
y[k] = (am + bm) / 2
|
||||
if am < -10:
|
||||
y[k] = bm
|
||||
elif bm > 10:
|
||||
y[k] = am
|
||||
c[k, :k+1] /= ckk; ap[k] /= ckk; bp[k] /= ckk
|
||||
else:
|
||||
c[k:, k] = 0; y[k] = (ap[k] + bp[k])/2
|
||||
pass
|
||||
return c, ap, bp
|
||||
607
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/fisher_exact_results_from_r.py
vendored
Normal file
607
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/fisher_exact_results_from_r.py
vendored
Normal file
@@ -0,0 +1,607 @@
|
||||
# DO NOT EDIT THIS FILE!
|
||||
# This file was generated by the R script
|
||||
# generate_fisher_exact_results_from_r.R
|
||||
# The script was run with R version 3.6.2 (2019-12-12) at 2020-11-09 06:16:09
|
||||
|
||||
|
||||
from collections import namedtuple
|
||||
import numpy as np
|
||||
|
||||
|
||||
Inf = np.inf
|
||||
|
||||
Parameters = namedtuple('Parameters',
|
||||
['table', 'confidence_level', 'alternative'])
|
||||
RResults = namedtuple('RResults',
|
||||
['pvalue', 'conditional_odds_ratio',
|
||||
'conditional_odds_ratio_ci'])
|
||||
data = [
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.04035202926536294,
|
||||
2.662846672960251))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.02301413756522116,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.004668988338943325,
|
||||
0.895792956493601))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157191,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.4153910882532168,
|
||||
259.2593661129417))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.09580440012477633,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.08056337526385809,
|
||||
1.22704788545557))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.2697004098849359,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.1176691231650079,
|
||||
1.787463657995973))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157192,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.003857141267422399,
|
||||
2.407369893767229))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.06126482213438735,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.451643573543705))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904762,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.024822256141754,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
39.00054996869288))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904761,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.024822256141754,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
39.00054996869287))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=2.005657880389071e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(349.2595113327733,
|
||||
3630.382605689872))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=5.728437460831947e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(152.4166024390096,
|
||||
1425.700792178893))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.574111858126088,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.8520462587912048,
|
||||
1.340148950273938))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.02502345007115455,
|
||||
6.304424772117853))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.02301413756522116,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.001923034001462487,
|
||||
1.53670836950172))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157191,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.2397970951413721,
|
||||
1291.342011095509))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.09580440012477633,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.05127576113762925,
|
||||
1.717176678806983))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.2697004098849359,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.07498546954483619,
|
||||
2.506969905199901))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157192,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.0007743881879531337,
|
||||
4.170192301163831))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.06126482213438735,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
2.642491011905582))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904762,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.496935393325443,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
198.019801980198))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904761,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.496935393325443,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
198.019801980198))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=2.005657880389071e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(270.0334165523604,
|
||||
5461.333333326708))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=5.728437460831947e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(116.7944750275836,
|
||||
1931.995993191814))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.574111858126088,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.7949398282935892,
|
||||
1.436229679394333))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.797867027270803))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0185217259520665,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
0.6785254803404526))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.9782608695652173,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
127.8497388102893))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.05625775074399956,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.032332939718425))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1808979350599346,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.502407513296985))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1652173913043479,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.820421051562392))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0565217391304348,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.06224603077045))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.5,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
19.00192394479939))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.4999999999999999,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
19.00192394479939))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
3045.460216525746))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1186.440170942579))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.7416227010368963,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.293551891610822))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
4.375946050832565))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0185217259520665,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.235282118191202))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.9782608695652173,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
657.2063583945989))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.05625775074399956,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.498867660683128))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1808979350599346,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
2.186159386716762))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1652173913043479,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
3.335351451901569))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0565217391304348,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
2.075407697450433))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.5,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
99.00009507969122))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.4999999999999999,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
99.00009507969123))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
4503.078257659934))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1811.766127544222))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.7416227010368963,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.396522811516685))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.979790445314723,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.05119649909830196,
|
||||
Inf))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9990149169715733,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.007163749169069961,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.1652173913043478,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.5493234651081089,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9849086665340765,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.1003538933958604,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9330176609214881,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.146507416280863,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9782608695652174,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.007821681994077808,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.02380952380952382,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.487678929918272,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.0238095238095238,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.487678929918272,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=2.005657880388915e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(397.784359748113,
|
||||
Inf))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=5.728437460831983e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(174.7148056880929,
|
||||
Inf))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.2959825901308897,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.8828406663967776,
|
||||
Inf))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.979790445314723,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.03045407081240429,
|
||||
Inf))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9990149169715733,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.002768053063547901,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.1652173913043478,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.2998184792279909,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9849086665340765,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.06180414342643172,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9330176609214881,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.09037094010066403,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9782608695652174,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.001521592095430679,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.02380952380952382,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.6661157890359722,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.0238095238095238,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.6661157890359725,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=2.005657880388915e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(297.9619252357688,
|
||||
Inf))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=5.728437460831983e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(130.3213490295859,
|
||||
Inf))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.2959825901308897,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.8176272148267533,
|
||||
Inf))),
|
||||
]
|
||||
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/jf_skew_t_gamlss_pdf_data.npy
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/jf_skew_t_gamlss_pdf_data.npy
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/levy_stable/stable-Z1-cdf-sample-data.npy
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/levy_stable/stable-Z1-cdf-sample-data.npy
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/levy_stable/stable-Z1-pdf-sample-data.npy
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/levy_stable/stable-Z1-pdf-sample-data.npy
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/levy_stable/stable-loc-scale-sample-data.npy
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/levy_stable/stable-loc-scale-sample-data.npy
vendored
Normal file
Binary file not shown.
108
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/AtmWtAg.dat
vendored
Normal file
108
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/AtmWtAg.dat
vendored
Normal file
@@ -0,0 +1,108 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: AtmWtAg (AtmWtAg.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 108)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Powell, L.J., Murphy, T.J. and Gramlich, J.W. (1982).
|
||||
"The Absolute Isotopic Abundance & Atomic Weight
|
||||
of a Reference Sample of Silver".
|
||||
NBS Journal of Research, 87, pp. 9-19.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
2 Treatments
|
||||
24 Replicates/Cell
|
||||
48 Observations
|
||||
7 Constant Leading Digits
|
||||
Average Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
|
||||
Model: 3 Parameters (mu, tau_1, tau_2)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
|
||||
Between Instrument 1 3.63834187500000E-09 3.63834187500000E-09 1.59467335677930E+01
|
||||
Within Instrument 46 1.04951729166667E-08 2.28155932971014E-10
|
||||
|
||||
Certified R-Squared 2.57426544538321E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.51048314446410E-05
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Instrument AgWt
|
||||
1 107.8681568
|
||||
1 107.8681465
|
||||
1 107.8681572
|
||||
1 107.8681785
|
||||
1 107.8681446
|
||||
1 107.8681903
|
||||
1 107.8681526
|
||||
1 107.8681494
|
||||
1 107.8681616
|
||||
1 107.8681587
|
||||
1 107.8681519
|
||||
1 107.8681486
|
||||
1 107.8681419
|
||||
1 107.8681569
|
||||
1 107.8681508
|
||||
1 107.8681672
|
||||
1 107.8681385
|
||||
1 107.8681518
|
||||
1 107.8681662
|
||||
1 107.8681424
|
||||
1 107.8681360
|
||||
1 107.8681333
|
||||
1 107.8681610
|
||||
1 107.8681477
|
||||
2 107.8681079
|
||||
2 107.8681344
|
||||
2 107.8681513
|
||||
2 107.8681197
|
||||
2 107.8681604
|
||||
2 107.8681385
|
||||
2 107.8681642
|
||||
2 107.8681365
|
||||
2 107.8681151
|
||||
2 107.8681082
|
||||
2 107.8681517
|
||||
2 107.8681448
|
||||
2 107.8681198
|
||||
2 107.8681482
|
||||
2 107.8681334
|
||||
2 107.8681609
|
||||
2 107.8681101
|
||||
2 107.8681512
|
||||
2 107.8681469
|
||||
2 107.8681360
|
||||
2 107.8681254
|
||||
2 107.8681261
|
||||
2 107.8681450
|
||||
2 107.8681368
|
||||
85
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SiRstv.dat
vendored
Normal file
85
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SiRstv.dat
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SiRstv (SiRstv.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 85)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Ehrstein, James and Croarkin, M. Carroll.
|
||||
Unpublished NIST dataset.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
5 Treatments
|
||||
5 Replicates/Cell
|
||||
25 Observations
|
||||
3 Constant Leading Digits
|
||||
Lower Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
|
||||
Model: 6 Parameters (mu,tau_1, ... , tau_5)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Instrument 4 5.11462616000000E-02 1.27865654000000E-02 1.18046237440255E+00
|
||||
Within Instrument 20 2.16636560000000E-01 1.08318280000000E-02
|
||||
|
||||
Certified R-Squared 1.90999039051129E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.04076068334656E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Instrument Resistance
|
||||
1 196.3052
|
||||
1 196.1240
|
||||
1 196.1890
|
||||
1 196.2569
|
||||
1 196.3403
|
||||
2 196.3042
|
||||
2 196.3825
|
||||
2 196.1669
|
||||
2 196.3257
|
||||
2 196.0422
|
||||
3 196.1303
|
||||
3 196.2005
|
||||
3 196.2889
|
||||
3 196.0343
|
||||
3 196.1811
|
||||
4 196.2795
|
||||
4 196.1748
|
||||
4 196.1494
|
||||
4 196.1485
|
||||
4 195.9885
|
||||
5 196.2119
|
||||
5 196.1051
|
||||
5 196.1850
|
||||
5 196.0052
|
||||
5 196.2090
|
||||
249
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs01.dat
vendored
Normal file
249
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs01.dat
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SmLs01 (SmLs01.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
1 Constant Leading Digit
|
||||
Lower Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1.4
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
2 1.3
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
3 1.5
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
4 1.3
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
5 1.5
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
6 1.3
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
7 1.5
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
8 1.3
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
9 1.5
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
1869
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs02.dat
vendored
Normal file
1869
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs02.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
18069
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs03.dat
vendored
Normal file
18069
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs03.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
249
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs04.dat
vendored
Normal file
249
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs04.dat
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SmLs04 (SmLs04.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
7 Constant Leading Digits
|
||||
Average Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1000000.4
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
2 1000000.3
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
3 1000000.5
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
4 1000000.3
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
5 1000000.5
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
6 1000000.3
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
7 1000000.5
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
8 1000000.3
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
9 1000000.5
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
1869
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs05.dat
vendored
Normal file
1869
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs05.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
18069
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs06.dat
vendored
Normal file
18069
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs06.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
249
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs07.dat
vendored
Normal file
249
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs07.dat
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SmLs07 (SmLs07.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
13 Constant Leading Digits
|
||||
Higher Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1000000000000.4
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
2 1000000000000.3
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
3 1000000000000.5
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
4 1000000000000.3
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
5 1000000000000.5
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
6 1000000000000.3
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
7 1000000000000.5
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
8 1000000000000.3
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
9 1000000000000.5
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
1869
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs08.dat
vendored
Normal file
1869
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs08.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
18069
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs09.dat
vendored
Normal file
18069
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs09.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
97
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_linregress/Norris.dat
vendored
Normal file
97
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_linregress/Norris.dat
vendored
Normal file
@@ -0,0 +1,97 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: Norris (Norris.dat)
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 31 to 46)
|
||||
Data (lines 61 to 96)
|
||||
|
||||
Procedure: Linear Least Squares Regression
|
||||
|
||||
Reference: Norris, J., NIST.
|
||||
Calibration of Ozone Monitors.
|
||||
|
||||
Data: 1 Response Variable (y)
|
||||
1 Predictor Variable (x)
|
||||
36 Observations
|
||||
Lower Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
Model: Linear Class
|
||||
2 Parameters (B0,B1)
|
||||
|
||||
y = B0 + B1*x + e
|
||||
|
||||
|
||||
|
||||
Certified Regression Statistics
|
||||
|
||||
Standard Deviation
|
||||
Parameter Estimate of Estimate
|
||||
|
||||
B0 -0.262323073774029 0.232818234301152
|
||||
B1 1.00211681802045 0.429796848199937E-03
|
||||
|
||||
Residual
|
||||
Standard Deviation 0.884796396144373
|
||||
|
||||
R-Squared 0.999993745883712
|
||||
|
||||
|
||||
Certified Analysis of Variance Table
|
||||
|
||||
Source of Degrees of Sums of Mean
|
||||
Variation Freedom Squares Squares F Statistic
|
||||
|
||||
Regression 1 4255954.13232369 4255954.13232369 5436385.54079785
|
||||
Residual 34 26.6173985294224 0.782864662630069
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: y x
|
||||
0.1 0.2
|
||||
338.8 337.4
|
||||
118.1 118.2
|
||||
888.0 884.6
|
||||
9.2 10.1
|
||||
228.1 226.5
|
||||
668.5 666.3
|
||||
998.5 996.3
|
||||
449.1 448.6
|
||||
778.9 777.0
|
||||
559.2 558.2
|
||||
0.3 0.4
|
||||
0.1 0.6
|
||||
778.1 775.5
|
||||
668.8 666.9
|
||||
339.3 338.0
|
||||
448.9 447.5
|
||||
10.8 11.6
|
||||
557.7 556.0
|
||||
228.3 228.1
|
||||
998.0 995.8
|
||||
888.8 887.6
|
||||
119.6 120.2
|
||||
0.3 0.3
|
||||
0.6 0.3
|
||||
557.6 556.8
|
||||
339.3 339.1
|
||||
888.0 887.2
|
||||
998.5 999.0
|
||||
778.9 779.0
|
||||
10.2 11.1
|
||||
117.6 118.3
|
||||
228.9 229.2
|
||||
668.4 669.1
|
||||
449.2 448.9
|
||||
0.2 0.5
|
||||
|
||||
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/rel_breitwigner_pdf_sample_data_ROOT.npy
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/rel_breitwigner_pdf_sample_data_ROOT.npy
vendored
Normal file
Binary file not shown.
1499
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/studentized_range_mpmath_ref.json
vendored
Normal file
1499
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/studentized_range_mpmath_ref.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1290
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_axis_nan_policy.py
vendored
Normal file
1290
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_axis_nan_policy.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
568
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_binned_statistic.py
vendored
Normal file
568
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_binned_statistic.py
vendored
Normal file
@@ -0,0 +1,568 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
from scipy.stats import (binned_statistic, binned_statistic_2d,
|
||||
binned_statistic_dd)
|
||||
from scipy._lib._util import check_random_state
|
||||
|
||||
from .common_tests import check_named_results
|
||||
|
||||
|
||||
class TestBinnedStatistic:
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
rng = check_random_state(9865)
|
||||
cls.x = rng.uniform(size=100)
|
||||
cls.y = rng.uniform(size=100)
|
||||
cls.v = rng.uniform(size=100)
|
||||
cls.X = rng.uniform(size=(100, 3))
|
||||
cls.w = rng.uniform(size=100)
|
||||
cls.u = rng.uniform(size=100) + 1e6
|
||||
|
||||
def test_1d_count(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
count1, edges1, bc = binned_statistic(x, v, 'count', bins=10)
|
||||
count2, edges2 = np.histogram(x, bins=10)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_gh5927(self):
|
||||
# smoke test for gh5927 - binned_statistic was using `is` for string
|
||||
# comparison
|
||||
x = self.x
|
||||
v = self.v
|
||||
statistics = ['mean', 'median', 'count', 'sum']
|
||||
for statistic in statistics:
|
||||
binned_statistic(x, v, statistic, bins=10)
|
||||
|
||||
def test_big_number_std(self):
|
||||
# tests for numerical stability of std calculation
|
||||
# see issue gh-10126 for more
|
||||
x = self.x
|
||||
u = self.u
|
||||
stat1, edges1, bc = binned_statistic(x, u, 'std', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, u, np.std, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
|
||||
def test_empty_bins_std(self):
|
||||
# tests that std returns gives nan for empty bins
|
||||
x = self.x
|
||||
u = self.u
|
||||
print(binned_statistic(x, u, 'count', bins=1000))
|
||||
stat1, edges1, bc = binned_statistic(x, u, 'std', bins=1000)
|
||||
stat2, edges2, bc = binned_statistic(x, u, np.std, bins=1000)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
|
||||
def test_non_finite_inputs_and_int_bins(self):
|
||||
# if either `values` or `sample` contain np.inf or np.nan throw
|
||||
# see issue gh-9010 for more
|
||||
x = self.x
|
||||
u = self.u
|
||||
orig = u[0]
|
||||
u[0] = np.inf
|
||||
assert_raises(ValueError, binned_statistic, u, x, 'std', bins=10)
|
||||
# need to test for non-python specific ints, e.g. np.int8, np.int64
|
||||
assert_raises(ValueError, binned_statistic, u, x, 'std',
|
||||
bins=np.int64(10))
|
||||
u[0] = np.nan
|
||||
assert_raises(ValueError, binned_statistic, u, x, 'count', bins=10)
|
||||
# replace original value, u belongs the class
|
||||
u[0] = orig
|
||||
|
||||
def test_1d_result_attributes(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic(x, v, 'count', bins=10)
|
||||
attributes = ('statistic', 'bin_edges', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_1d_sum(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
sum1, edges1, bc = binned_statistic(x, v, 'sum', bins=10)
|
||||
sum2, edges2 = np.histogram(x, bins=10, weights=v)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_mean(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'mean', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.mean, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_std(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'std', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.std, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_min(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'min', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.min, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_max(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'max', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.max, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_median(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'median', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_bincode(self):
|
||||
x = self.x[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, edges1, bc = binned_statistic(x, v, 'count', bins=3)
|
||||
bc2 = np.array([3, 2, 1, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 1, 2, 3, 1,
|
||||
1, 2, 1])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
assert_allclose(bcount, count1)
|
||||
|
||||
def test_1d_range_keyword(self):
|
||||
# Regression test for gh-3063, range can be (min, max) or [(min, max)]
|
||||
np.random.seed(9865)
|
||||
x = np.arange(30)
|
||||
data = np.random.random(30)
|
||||
|
||||
mean, bins, _ = binned_statistic(x[:15], data[:15])
|
||||
mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)])
|
||||
mean_range2, bins_range2, _ = binned_statistic(x, data, range=(0, 14))
|
||||
|
||||
assert_allclose(mean, mean_range)
|
||||
assert_allclose(bins, bins_range)
|
||||
assert_allclose(mean, mean_range2)
|
||||
assert_allclose(bins, bins_range2)
|
||||
|
||||
def test_1d_multi_values(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
stat1v, edges1v, bc1v = binned_statistic(x, v, 'mean', bins=10)
|
||||
stat1w, edges1w, bc1w = binned_statistic(x, w, 'mean', bins=10)
|
||||
stat2, edges2, bc2 = binned_statistic(x, [v, w], 'mean', bins=10)
|
||||
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(edges1v, edges2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_2d_count(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
count1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'count', bins=5)
|
||||
count2, binx2, biny2 = np.histogram2d(x, y, bins=5)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_result_attributes(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic_2d(x, y, v, 'count', bins=5)
|
||||
attributes = ('statistic', 'x_edge', 'y_edge', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_2d_sum(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
sum1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'sum', bins=5)
|
||||
sum2, binx2, biny2 = np.histogram2d(x, y, bins=5, weights=v)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_mean(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'mean', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_mean_unicode(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_std(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'std', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.std, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_min(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'min', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.min, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_max(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'max', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.max, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_median(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'median', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(
|
||||
x, y, v, np.median, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_bincode(self):
|
||||
x = self.x[:20]
|
||||
y = self.y[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'count', bins=3)
|
||||
bc2 = np.array([17, 11, 6, 16, 11, 17, 18, 17, 17, 7, 6, 18, 16,
|
||||
6, 11, 16, 6, 6, 11, 8])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
count1adj = count1[count1.nonzero()]
|
||||
assert_allclose(bcount, count1adj)
|
||||
|
||||
def test_2d_multi_values(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
stat1v, binx1v, biny1v, bc1v = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=8)
|
||||
stat1w, binx1w, biny1w, bc1w = binned_statistic_2d(
|
||||
x, y, w, 'mean', bins=8)
|
||||
stat2, binx2, biny2, bc2 = binned_statistic_2d(
|
||||
x, y, [v, w], 'mean', bins=8)
|
||||
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(binx1v, binx2)
|
||||
assert_allclose(biny1w, biny2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_2d_binnumbers_unraveled(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat, edgesx, bcx = binned_statistic(x, v, 'mean', bins=20)
|
||||
stat, edgesy, bcy = binned_statistic(y, v, 'mean', bins=10)
|
||||
|
||||
stat2, edgesx2, edgesy2, bc2 = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=(20, 10), expand_binnumbers=True)
|
||||
|
||||
bcx3 = np.searchsorted(edgesx, x, side='right')
|
||||
bcy3 = np.searchsorted(edgesy, y, side='right')
|
||||
|
||||
# `numpy.searchsorted` is non-inclusive on right-edge, compensate
|
||||
bcx3[x == x.max()] -= 1
|
||||
bcy3[y == y.max()] -= 1
|
||||
|
||||
assert_allclose(bcx, bc2[0])
|
||||
assert_allclose(bcy, bc2[1])
|
||||
assert_allclose(bcx3, bc2[0])
|
||||
assert_allclose(bcy3, bc2[1])
|
||||
|
||||
def test_dd_count(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
count2, edges2 = np.histogramdd(X, bins=3)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_result_attributes(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
attributes = ('statistic', 'bin_edges', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_dd_sum(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3)
|
||||
sum2, edges2 = np.histogramdd(X, bins=3, weights=v)
|
||||
sum3, edges3, bc = binned_statistic_dd(X, v, np.sum, bins=3)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1, edges2)
|
||||
assert_allclose(sum1, sum3)
|
||||
assert_allclose(edges1, edges3)
|
||||
|
||||
def test_dd_mean(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'mean', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.mean, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_std(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'std', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_min(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'min', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.min, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_max(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'max', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.max, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_median(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_bincode(self):
|
||||
X = self.X[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92,
|
||||
32, 36, 91, 43, 87, 81, 81])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
count1adj = count1[count1.nonzero()]
|
||||
assert_allclose(bcount, count1adj)
|
||||
|
||||
def test_dd_multi_values(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
for stat in ["count", "sum", "mean", "std", "min", "max", "median",
|
||||
np.std]:
|
||||
stat1v, edges1v, bc1v = binned_statistic_dd(X, v, stat, bins=8)
|
||||
stat1w, edges1w, bc1w = binned_statistic_dd(X, w, stat, bins=8)
|
||||
stat2, edges2, bc2 = binned_statistic_dd(X, [v, w], stat, bins=8)
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(edges1v, edges2)
|
||||
assert_allclose(edges1w, edges2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_dd_binnumbers_unraveled(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat, edgesx, bcx = binned_statistic(X[:, 0], v, 'mean', bins=15)
|
||||
stat, edgesy, bcy = binned_statistic(X[:, 1], v, 'mean', bins=20)
|
||||
stat, edgesz, bcz = binned_statistic(X[:, 2], v, 'mean', bins=10)
|
||||
|
||||
stat2, edges2, bc2 = binned_statistic_dd(
|
||||
X, v, 'mean', bins=(15, 20, 10), expand_binnumbers=True)
|
||||
|
||||
assert_allclose(bcx, bc2[0])
|
||||
assert_allclose(bcy, bc2[1])
|
||||
assert_allclose(bcz, bc2[2])
|
||||
|
||||
def test_dd_binned_statistic_result(self):
|
||||
# NOTE: tests the reuse of bin_edges from previous call
|
||||
x = np.random.random((10000, 3))
|
||||
v = np.random.random(10000)
|
||||
bins = np.linspace(0, 1, 10)
|
||||
bins = (bins, bins, bins)
|
||||
|
||||
result = binned_statistic_dd(x, v, 'mean', bins=bins)
|
||||
stat = result.statistic
|
||||
|
||||
result = binned_statistic_dd(x, v, 'mean',
|
||||
binned_statistic_result=result)
|
||||
stat2 = result.statistic
|
||||
|
||||
assert_allclose(stat, stat2)
|
||||
|
||||
def test_dd_zero_dedges(self):
|
||||
x = np.random.random((10000, 3))
|
||||
v = np.random.random(10000)
|
||||
bins = np.linspace(0, 1, 10)
|
||||
bins = np.append(bins, 1)
|
||||
bins = (bins, bins, bins)
|
||||
with assert_raises(ValueError, match='difference is numerically 0'):
|
||||
binned_statistic_dd(x, v, 'mean', bins=bins)
|
||||
|
||||
def test_dd_range_errors(self):
|
||||
# Test that descriptive exceptions are raised as appropriate for bad
|
||||
# values of the `range` argument. (See gh-12996)
|
||||
with assert_raises(ValueError,
|
||||
match='In range, start must be <= stop'):
|
||||
binned_statistic_dd([self.y], self.v,
|
||||
range=[[1, 0]])
|
||||
with assert_raises(
|
||||
ValueError,
|
||||
match='In dimension 1 of range, start must be <= stop'):
|
||||
binned_statistic_dd([self.x, self.y], self.v,
|
||||
range=[[1, 0], [0, 1]])
|
||||
with assert_raises(
|
||||
ValueError,
|
||||
match='In dimension 2 of range, start must be <= stop'):
|
||||
binned_statistic_dd([self.x, self.y], self.v,
|
||||
range=[[0, 1], [1, 0]])
|
||||
with assert_raises(
|
||||
ValueError,
|
||||
match='range given for 1 dimensions; 2 required'):
|
||||
binned_statistic_dd([self.x, self.y], self.v,
|
||||
range=[[0, 1]])
|
||||
|
||||
def test_binned_statistic_float32(self):
|
||||
X = np.array([0, 0.42358226], dtype=np.float32)
|
||||
stat, _, _ = binned_statistic(X, None, 'count', bins=5)
|
||||
assert_allclose(stat, np.array([1, 0, 0, 0, 1], dtype=np.float64))
|
||||
|
||||
def test_gh14332(self):
|
||||
# Test the wrong output when the `sample` is close to bin edge
|
||||
x = []
|
||||
size = 20
|
||||
for i in range(size):
|
||||
x += [1-0.1**i]
|
||||
|
||||
bins = np.linspace(0,1,11)
|
||||
sum1, edges1, bc = binned_statistic_dd(x, np.ones(len(x)),
|
||||
bins=[bins], statistic='sum')
|
||||
sum2, edges2 = np.histogram(x, bins=bins)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1[0], edges2)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float64, np.complex128])
|
||||
@pytest.mark.parametrize("statistic", [np.mean, np.median, np.sum, np.std,
|
||||
np.min, np.max, 'count',
|
||||
lambda x: (x**2).sum(),
|
||||
lambda x: (x**2).sum() * 1j])
|
||||
def test_dd_all(self, dtype, statistic):
|
||||
def ref_statistic(x):
|
||||
return len(x) if statistic == 'count' else statistic(x)
|
||||
|
||||
rng = np.random.default_rng(3704743126639371)
|
||||
n = 10
|
||||
x = rng.random(size=n)
|
||||
i = x >= 0.5
|
||||
v = rng.random(size=n)
|
||||
if dtype is np.complex128:
|
||||
v = v + rng.random(size=n)*1j
|
||||
|
||||
stat, _, _ = binned_statistic_dd(x, v, statistic, bins=2)
|
||||
ref = np.array([ref_statistic(v[~i]), ref_statistic(v[i])])
|
||||
assert_allclose(stat, ref)
|
||||
assert stat.dtype == np.result_type(ref.dtype, np.float64)
|
||||
152
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_censored_data.py
vendored
Normal file
152
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_censored_data.py
vendored
Normal file
@@ -0,0 +1,152 @@
|
||||
# Tests for the CensoredData class.
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_array_equal
|
||||
from scipy.stats import CensoredData
|
||||
|
||||
|
||||
class TestCensoredData:
|
||||
|
||||
def test_basic(self):
|
||||
uncensored = [1]
|
||||
left = [0]
|
||||
right = [2, 5]
|
||||
interval = [[2, 3]]
|
||||
data = CensoredData(uncensored, left=left, right=right,
|
||||
interval=interval)
|
||||
assert_equal(data._uncensored, uncensored)
|
||||
assert_equal(data._left, left)
|
||||
assert_equal(data._right, right)
|
||||
assert_equal(data._interval, interval)
|
||||
|
||||
udata = data._uncensor()
|
||||
assert_equal(udata, np.concatenate((uncensored, left, right,
|
||||
np.mean(interval, axis=1))))
|
||||
|
||||
def test_right_censored(self):
|
||||
x = np.array([0, 3, 2.5])
|
||||
is_censored = np.array([0, 1, 0], dtype=bool)
|
||||
data = CensoredData.right_censored(x, is_censored)
|
||||
assert_equal(data._uncensored, x[~is_censored])
|
||||
assert_equal(data._right, x[is_censored])
|
||||
assert_equal(data._left, [])
|
||||
assert_equal(data._interval, np.empty((0, 2)))
|
||||
|
||||
def test_left_censored(self):
|
||||
x = np.array([0, 3, 2.5])
|
||||
is_censored = np.array([0, 1, 0], dtype=bool)
|
||||
data = CensoredData.left_censored(x, is_censored)
|
||||
assert_equal(data._uncensored, x[~is_censored])
|
||||
assert_equal(data._left, x[is_censored])
|
||||
assert_equal(data._right, [])
|
||||
assert_equal(data._interval, np.empty((0, 2)))
|
||||
|
||||
def test_interval_censored_basic(self):
|
||||
a = [0.5, 2.0, 3.0, 5.5]
|
||||
b = [1.0, 2.5, 3.5, 7.0]
|
||||
data = CensoredData.interval_censored(low=a, high=b)
|
||||
assert_array_equal(data._interval, np.array(list(zip(a, b))))
|
||||
assert data._uncensored.shape == (0,)
|
||||
assert data._left.shape == (0,)
|
||||
assert data._right.shape == (0,)
|
||||
|
||||
def test_interval_censored_mixed(self):
|
||||
# This is actually a mix of uncensored, left-censored, right-censored
|
||||
# and interval-censored data. Check that when the `interval_censored`
|
||||
# class method is used, the data is correctly separated into the
|
||||
# appropriate arrays.
|
||||
a = [0.5, -np.inf, -13.0, 2.0, 1.0, 10.0, -1.0]
|
||||
b = [0.5, 2500.0, np.inf, 3.0, 1.0, 11.0, np.inf]
|
||||
data = CensoredData.interval_censored(low=a, high=b)
|
||||
assert_array_equal(data._interval, [[2.0, 3.0], [10.0, 11.0]])
|
||||
assert_array_equal(data._uncensored, [0.5, 1.0])
|
||||
assert_array_equal(data._left, [2500.0])
|
||||
assert_array_equal(data._right, [-13.0, -1.0])
|
||||
|
||||
def test_interval_to_other_types(self):
|
||||
# The interval parameter can represent uncensored and
|
||||
# left- or right-censored data. Test the conversion of such
|
||||
# an example to the canonical form in which the different
|
||||
# types have been split into the separate arrays.
|
||||
interval = np.array([[0, 1], # interval-censored
|
||||
[2, 2], # not censored
|
||||
[3, 3], # not censored
|
||||
[9, np.inf], # right-censored
|
||||
[8, np.inf], # right-censored
|
||||
[-np.inf, 0], # left-censored
|
||||
[1, 2]]) # interval-censored
|
||||
data = CensoredData(interval=interval)
|
||||
assert_equal(data._uncensored, [2, 3])
|
||||
assert_equal(data._left, [0])
|
||||
assert_equal(data._right, [9, 8])
|
||||
assert_equal(data._interval, [[0, 1], [1, 2]])
|
||||
|
||||
def test_empty_arrays(self):
|
||||
data = CensoredData(uncensored=[], left=[], right=[], interval=[])
|
||||
assert data._uncensored.shape == (0,)
|
||||
assert data._left.shape == (0,)
|
||||
assert data._right.shape == (0,)
|
||||
assert data._interval.shape == (0, 2)
|
||||
assert len(data) == 0
|
||||
|
||||
def test_invalid_constructor_args(self):
|
||||
with pytest.raises(ValueError, match='must be a one-dimensional'):
|
||||
CensoredData(uncensored=[[1, 2, 3]])
|
||||
with pytest.raises(ValueError, match='must be a one-dimensional'):
|
||||
CensoredData(left=[[1, 2, 3]])
|
||||
with pytest.raises(ValueError, match='must be a one-dimensional'):
|
||||
CensoredData(right=[[1, 2, 3]])
|
||||
with pytest.raises(ValueError, match='must be a two-dimensional'):
|
||||
CensoredData(interval=[[1, 2, 3]])
|
||||
|
||||
with pytest.raises(ValueError, match='must not contain nan'):
|
||||
CensoredData(uncensored=[1, np.nan, 2])
|
||||
with pytest.raises(ValueError, match='must not contain nan'):
|
||||
CensoredData(left=[1, np.nan, 2])
|
||||
with pytest.raises(ValueError, match='must not contain nan'):
|
||||
CensoredData(right=[1, np.nan, 2])
|
||||
with pytest.raises(ValueError, match='must not contain nan'):
|
||||
CensoredData(interval=[[1, np.nan], [2, 3]])
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match='both values must not be infinite'):
|
||||
CensoredData(interval=[[1, 3], [2, 9], [np.inf, np.inf]])
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match='left value must not exceed the right'):
|
||||
CensoredData(interval=[[1, 0], [2, 2]])
|
||||
|
||||
@pytest.mark.parametrize('func', [CensoredData.left_censored,
|
||||
CensoredData.right_censored])
|
||||
def test_invalid_left_right_censored_args(self, func):
|
||||
with pytest.raises(ValueError,
|
||||
match='`x` must be one-dimensional'):
|
||||
func([[1, 2, 3]], [0, 1, 1])
|
||||
with pytest.raises(ValueError,
|
||||
match='`censored` must be one-dimensional'):
|
||||
func([1, 2, 3], [[0, 1, 1]])
|
||||
with pytest.raises(ValueError, match='`x` must not contain'):
|
||||
func([1, 2, np.nan], [0, 1, 1])
|
||||
with pytest.raises(ValueError, match='must have the same length'):
|
||||
func([1, 2, 3], [0, 0, 1, 1])
|
||||
|
||||
def test_invalid_censored_args(self):
|
||||
with pytest.raises(ValueError,
|
||||
match='`low` must be a one-dimensional'):
|
||||
CensoredData.interval_censored(low=[[3]], high=[4, 5])
|
||||
with pytest.raises(ValueError,
|
||||
match='`high` must be a one-dimensional'):
|
||||
CensoredData.interval_censored(low=[3], high=[[4, 5]])
|
||||
with pytest.raises(ValueError, match='`low` must not contain'):
|
||||
CensoredData.interval_censored([1, 2, np.nan], [0, 1, 1])
|
||||
with pytest.raises(ValueError, match='must have the same length'):
|
||||
CensoredData.interval_censored([1, 2, 3], [0, 0, 1, 1])
|
||||
|
||||
def test_count_censored(self):
|
||||
x = [1, 2, 3]
|
||||
# data1 has no censored data.
|
||||
data1 = CensoredData(x)
|
||||
assert data1.num_censored() == 0
|
||||
data2 = CensoredData(uncensored=[2.5], left=[10], interval=[[0, 1]])
|
||||
assert data2.num_censored() == 2
|
||||
241
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_contingency.py
vendored
Normal file
241
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_contingency.py
vendored
Normal file
@@ -0,0 +1,241 @@
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_equal, assert_array_equal,
|
||||
assert_array_almost_equal, assert_approx_equal,
|
||||
assert_allclose)
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
from scipy.special import xlogy
|
||||
from scipy.stats.contingency import (margins, expected_freq,
|
||||
chi2_contingency, association)
|
||||
|
||||
|
||||
def test_margins():
|
||||
a = np.array([1])
|
||||
m = margins(a)
|
||||
assert_equal(len(m), 1)
|
||||
m0 = m[0]
|
||||
assert_array_equal(m0, np.array([1]))
|
||||
|
||||
a = np.array([[1]])
|
||||
m0, m1 = margins(a)
|
||||
expected0 = np.array([[1]])
|
||||
expected1 = np.array([[1]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
|
||||
a = np.arange(12).reshape(2, 6)
|
||||
m0, m1 = margins(a)
|
||||
expected0 = np.array([[15], [51]])
|
||||
expected1 = np.array([[6, 8, 10, 12, 14, 16]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
|
||||
a = np.arange(24).reshape(2, 3, 4)
|
||||
m0, m1, m2 = margins(a)
|
||||
expected0 = np.array([[[66]], [[210]]])
|
||||
expected1 = np.array([[[60], [92], [124]]])
|
||||
expected2 = np.array([[[60, 66, 72, 78]]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
assert_array_equal(m2, expected2)
|
||||
|
||||
|
||||
def test_expected_freq():
|
||||
assert_array_equal(expected_freq([1]), np.array([1.0]))
|
||||
|
||||
observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]])
|
||||
e = expected_freq(observed)
|
||||
assert_array_equal(e, np.ones_like(observed))
|
||||
|
||||
observed = np.array([[10, 10, 20], [20, 20, 20]])
|
||||
e = expected_freq(observed)
|
||||
correct = np.array([[12., 12., 16.], [18., 18., 24.]])
|
||||
assert_array_almost_equal(e, correct)
|
||||
|
||||
|
||||
def test_chi2_contingency_trivial():
|
||||
# Some very simple tests for chi2_contingency.
|
||||
|
||||
# A trivial case
|
||||
obs = np.array([[1, 2], [1, 2]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs, correction=False)
|
||||
assert_equal(chi2, 0.0)
|
||||
assert_equal(p, 1.0)
|
||||
assert_equal(dof, 1)
|
||||
assert_array_equal(obs, expected)
|
||||
|
||||
# A *really* trivial case: 1-D data.
|
||||
obs = np.array([1, 2, 3])
|
||||
chi2, p, dof, expected = chi2_contingency(obs, correction=False)
|
||||
assert_equal(chi2, 0.0)
|
||||
assert_equal(p, 1.0)
|
||||
assert_equal(dof, 0)
|
||||
assert_array_equal(obs, expected)
|
||||
|
||||
|
||||
def test_chi2_contingency_R():
|
||||
# Some test cases that were computed independently, using R.
|
||||
|
||||
# Rcode = \
|
||||
# """
|
||||
# # Data vector.
|
||||
# data <- c(
|
||||
# 12, 34, 23, 4, 47, 11,
|
||||
# 35, 31, 11, 34, 10, 18,
|
||||
# 12, 32, 9, 18, 13, 19,
|
||||
# 12, 12, 14, 9, 33, 25
|
||||
# )
|
||||
#
|
||||
# # Create factor tags:r=rows, c=columns, t=tiers
|
||||
# r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1", "r2", "r3", "r4")))
|
||||
# c <- factor(gl(3, 1, 2*3*4, labels=c("c1", "c2", "c3")))
|
||||
# t <- factor(gl(2, 3, 2*3*4, labels=c("t1", "t2")))
|
||||
#
|
||||
# # 3-way Chi squared test of independence
|
||||
# s = summary(xtabs(data~r+c+t))
|
||||
# print(s)
|
||||
# """
|
||||
# Routput = \
|
||||
# """
|
||||
# Call: xtabs(formula = data ~ r + c + t)
|
||||
# Number of cases in table: 478
|
||||
# Number of factors: 3
|
||||
# Test for independence of all factors:
|
||||
# Chisq = 102.17, df = 17, p-value = 3.514e-14
|
||||
# """
|
||||
obs = np.array(
|
||||
[[[12, 34, 23],
|
||||
[35, 31, 11],
|
||||
[12, 32, 9],
|
||||
[12, 12, 14]],
|
||||
[[4, 47, 11],
|
||||
[34, 10, 18],
|
||||
[18, 13, 19],
|
||||
[9, 33, 25]]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs)
|
||||
assert_approx_equal(chi2, 102.17, significant=5)
|
||||
assert_approx_equal(p, 3.514e-14, significant=4)
|
||||
assert_equal(dof, 17)
|
||||
|
||||
# Rcode = \
|
||||
# """
|
||||
# # Data vector.
|
||||
# data <- c(
|
||||
# #
|
||||
# 12, 17,
|
||||
# 11, 16,
|
||||
# #
|
||||
# 11, 12,
|
||||
# 15, 16,
|
||||
# #
|
||||
# 23, 15,
|
||||
# 30, 22,
|
||||
# #
|
||||
# 14, 17,
|
||||
# 15, 16
|
||||
# )
|
||||
#
|
||||
# # Create factor tags:r=rows, c=columns, d=depths(?), t=tiers
|
||||
# r <- factor(gl(2, 2, 2*2*2*2, labels=c("r1", "r2")))
|
||||
# c <- factor(gl(2, 1, 2*2*2*2, labels=c("c1", "c2")))
|
||||
# d <- factor(gl(2, 4, 2*2*2*2, labels=c("d1", "d2")))
|
||||
# t <- factor(gl(2, 8, 2*2*2*2, labels=c("t1", "t2")))
|
||||
#
|
||||
# # 4-way Chi squared test of independence
|
||||
# s = summary(xtabs(data~r+c+d+t))
|
||||
# print(s)
|
||||
# """
|
||||
# Routput = \
|
||||
# """
|
||||
# Call: xtabs(formula = data ~ r + c + d + t)
|
||||
# Number of cases in table: 262
|
||||
# Number of factors: 4
|
||||
# Test for independence of all factors:
|
||||
# Chisq = 8.758, df = 11, p-value = 0.6442
|
||||
# """
|
||||
obs = np.array(
|
||||
[[[[12, 17],
|
||||
[11, 16]],
|
||||
[[11, 12],
|
||||
[15, 16]]],
|
||||
[[[23, 15],
|
||||
[30, 22]],
|
||||
[[14, 17],
|
||||
[15, 16]]]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs)
|
||||
assert_approx_equal(chi2, 8.758, significant=4)
|
||||
assert_approx_equal(p, 0.6442, significant=4)
|
||||
assert_equal(dof, 11)
|
||||
|
||||
|
||||
def test_chi2_contingency_g():
|
||||
c = np.array([[15, 60], [15, 90]])
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood',
|
||||
correction=False)
|
||||
assert_allclose(g, 2*xlogy(c, c/e).sum())
|
||||
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood',
|
||||
correction=True)
|
||||
c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])
|
||||
assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())
|
||||
|
||||
c = np.array([[10, 12, 10], [12, 10, 10]])
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')
|
||||
assert_allclose(g, 2*xlogy(c, c/e).sum())
|
||||
|
||||
|
||||
def test_chi2_contingency_bad_args():
|
||||
# Test that "bad" inputs raise a ValueError.
|
||||
|
||||
# Negative value in the array of observed frequencies.
|
||||
obs = np.array([[-1, 10], [1, 2]])
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
# The zeros in this will result in zeros in the array
|
||||
# of expected frequencies.
|
||||
obs = np.array([[0, 1], [0, 1]])
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
# A degenerate case: `observed` has size 0.
|
||||
obs = np.empty((0, 8))
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
|
||||
def test_chi2_contingency_yates_gh13875():
|
||||
# Magnitude of Yates' continuity correction should not exceed difference
|
||||
# between expected and observed value of the statistic; see gh-13875
|
||||
observed = np.array([[1573, 3], [4, 0]])
|
||||
p = chi2_contingency(observed)[1]
|
||||
assert_allclose(p, 1, rtol=1e-12)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("correction", [False, True])
|
||||
def test_result(correction):
|
||||
obs = np.array([[1, 2], [1, 2]])
|
||||
res = chi2_contingency(obs, correction=correction)
|
||||
assert_equal((res.statistic, res.pvalue, res.dof, res.expected_freq), res)
|
||||
|
||||
|
||||
def test_bad_association_args():
|
||||
# Invalid Test Statistic
|
||||
assert_raises(ValueError, association, [[1, 2], [3, 4]], "X")
|
||||
# Invalid array shape
|
||||
assert_raises(ValueError, association, [[[1, 2]], [[3, 4]]], "cramer")
|
||||
# chi2_contingency exception
|
||||
assert_raises(ValueError, association, [[-1, 10], [1, 2]], 'cramer')
|
||||
# Invalid Array Item Data Type
|
||||
assert_raises(ValueError, association,
|
||||
np.array([[1, 2], ["dd", 4]], dtype=object), 'cramer')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('stat, expected',
|
||||
[('cramer', 0.09222412010290792),
|
||||
('tschuprow', 0.0775509319944633),
|
||||
('pearson', 0.12932925727138758)])
|
||||
def test_assoc(stat, expected):
|
||||
# 2d Array
|
||||
obs1 = np.array([[12, 13, 14, 15, 16],
|
||||
[17, 16, 18, 19, 11],
|
||||
[9, 15, 14, 12, 11]])
|
||||
a = association(observed=obs1, method=stat)
|
||||
assert_allclose(a, expected)
|
||||
1046
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_continuous_basic.py
vendored
Normal file
1046
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_continuous_basic.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
683
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_continuous_fit_censored.py
vendored
Normal file
683
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_continuous_fit_censored.py
vendored
Normal file
@@ -0,0 +1,683 @@
|
||||
# Tests for fitting specific distributions to censored data.
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from scipy.optimize import fmin
|
||||
from scipy.stats import (CensoredData, beta, cauchy, chi2, expon, gamma,
|
||||
gumbel_l, gumbel_r, invgauss, invweibull, laplace,
|
||||
logistic, lognorm, nct, ncx2, norm, weibull_max,
|
||||
weibull_min)
|
||||
|
||||
|
||||
# In some tests, we'll use this optimizer for improved accuracy.
|
||||
def optimizer(func, x0, args=(), disp=0):
|
||||
return fmin(func, x0, args=args, disp=disp, xtol=1e-12, ftol=1e-12)
|
||||
|
||||
|
||||
def test_beta():
|
||||
"""
|
||||
Test fitting beta shape parameters to interval-censored data.
|
||||
|
||||
Calculation in R:
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(0.10, 0.50, 0.75, 0.80),
|
||||
+ right=c(0.20, 0.55, 0.90, 0.95))
|
||||
> result = fitdistcens(data, 'beta', control=list(reltol=1e-14))
|
||||
|
||||
> result
|
||||
Fitting of the distribution ' beta ' on censored data by maximum likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
shape1 1.419941
|
||||
shape2 1.027066
|
||||
> result$sd
|
||||
shape1 shape2
|
||||
0.9914177 0.6866565
|
||||
"""
|
||||
data = CensoredData(interval=[[0.10, 0.20],
|
||||
[0.50, 0.55],
|
||||
[0.75, 0.90],
|
||||
[0.80, 0.95]])
|
||||
|
||||
# For this test, fit only the shape parameters; loc and scale are fixed.
|
||||
a, b, loc, scale = beta.fit(data, floc=0, fscale=1, optimizer=optimizer)
|
||||
|
||||
assert_allclose(a, 1.419941, rtol=5e-6)
|
||||
assert_allclose(b, 1.027066, rtol=5e-6)
|
||||
assert loc == 0
|
||||
assert scale == 1
|
||||
|
||||
|
||||
def test_cauchy_right_censored():
|
||||
"""
|
||||
Test fitting the Cauchy distribution to right-censored data.
|
||||
|
||||
Calculation in R, with two values not censored [1, 10] and
|
||||
one right-censored value [30].
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(1, 10, 30), right=c(1, 10, NA))
|
||||
> result = fitdistcens(data, 'cauchy', control=list(reltol=1e-14))
|
||||
> result
|
||||
Fitting of the distribution ' cauchy ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
location 7.100001
|
||||
scale 7.455866
|
||||
"""
|
||||
data = CensoredData(uncensored=[1, 10], right=[30])
|
||||
loc, scale = cauchy.fit(data, optimizer=optimizer)
|
||||
assert_allclose(loc, 7.10001, rtol=5e-6)
|
||||
assert_allclose(scale, 7.455866, rtol=5e-6)
|
||||
|
||||
|
||||
def test_cauchy_mixed():
|
||||
"""
|
||||
Test fitting the Cauchy distribution to data with mixed censoring.
|
||||
|
||||
Calculation in R, with:
|
||||
* two values not censored [1, 10],
|
||||
* one left-censored [1],
|
||||
* one right-censored [30], and
|
||||
* one interval-censored [[4, 8]].
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(NA, 1, 4, 10, 30), right=c(1, 1, 8, 10, NA))
|
||||
> result = fitdistcens(data, 'cauchy', control=list(reltol=1e-14))
|
||||
> result
|
||||
Fitting of the distribution ' cauchy ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
location 4.605150
|
||||
scale 5.900852
|
||||
"""
|
||||
data = CensoredData(uncensored=[1, 10], left=[1], right=[30],
|
||||
interval=[[4, 8]])
|
||||
loc, scale = cauchy.fit(data, optimizer=optimizer)
|
||||
assert_allclose(loc, 4.605150, rtol=5e-6)
|
||||
assert_allclose(scale, 5.900852, rtol=5e-6)
|
||||
|
||||
|
||||
def test_chi2_mixed():
|
||||
"""
|
||||
Test fitting just the shape parameter (df) of chi2 to mixed data.
|
||||
|
||||
Calculation in R, with:
|
||||
* two values not censored [1, 10],
|
||||
* one left-censored [1],
|
||||
* one right-censored [30], and
|
||||
* one interval-censored [[4, 8]].
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(NA, 1, 4, 10, 30), right=c(1, 1, 8, 10, NA))
|
||||
> result = fitdistcens(data, 'chisq', control=list(reltol=1e-14))
|
||||
> result
|
||||
Fitting of the distribution ' chisq ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
df 5.060329
|
||||
"""
|
||||
data = CensoredData(uncensored=[1, 10], left=[1], right=[30],
|
||||
interval=[[4, 8]])
|
||||
df, loc, scale = chi2.fit(data, floc=0, fscale=1, optimizer=optimizer)
|
||||
assert_allclose(df, 5.060329, rtol=5e-6)
|
||||
assert loc == 0
|
||||
assert scale == 1
|
||||
|
||||
|
||||
def test_expon_right_censored():
|
||||
"""
|
||||
For the exponential distribution with loc=0, the exact solution for
|
||||
fitting n uncensored points x[0]...x[n-1] and m right-censored points
|
||||
x[n]..x[n+m-1] is
|
||||
|
||||
scale = sum(x)/n
|
||||
|
||||
That is, divide the sum of all the values (not censored and
|
||||
right-censored) by the number of uncensored values. (See, for example,
|
||||
https://en.wikipedia.org/wiki/Censoring_(statistics)#Likelihood.)
|
||||
|
||||
The second derivative of the log-likelihood function is
|
||||
|
||||
n/scale**2 - 2*sum(x)/scale**3
|
||||
|
||||
from which the estimate of the standard error can be computed.
|
||||
|
||||
-----
|
||||
|
||||
Calculation in R, for reference only. The R results are not
|
||||
used in the test.
|
||||
|
||||
> library(fitdistrplus)
|
||||
> dexps <- function(x, scale) {
|
||||
+ return(dexp(x, 1/scale))
|
||||
+ }
|
||||
> pexps <- function(q, scale) {
|
||||
+ return(pexp(q, 1/scale))
|
||||
+ }
|
||||
> left <- c(1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15,
|
||||
+ 16, 16, 20, 20, 21, 22)
|
||||
> right <- c(1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15,
|
||||
+ NA, NA, NA, NA, NA, NA)
|
||||
> result = fitdistcens(data, 'exps', start=list(scale=mean(data$left)),
|
||||
+ control=list(reltol=1e-14))
|
||||
> result
|
||||
Fitting of the distribution ' exps ' on censored data by maximum likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
scale 19.85
|
||||
> result$sd
|
||||
scale
|
||||
6.277119
|
||||
"""
|
||||
# This data has 10 uncensored values and 6 right-censored values.
|
||||
obs = [1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15, 16, 16, 20, 20, 21, 22]
|
||||
cens = [False]*10 + [True]*6
|
||||
data = CensoredData.right_censored(obs, cens)
|
||||
|
||||
loc, scale = expon.fit(data, floc=0, optimizer=optimizer)
|
||||
|
||||
assert loc == 0
|
||||
# Use the analytical solution to compute the expected value. This
|
||||
# is the sum of the observed values divided by the number of uncensored
|
||||
# values.
|
||||
n = len(data) - data.num_censored()
|
||||
total = data._uncensored.sum() + data._right.sum()
|
||||
expected = total / n
|
||||
assert_allclose(scale, expected, 1e-8)
|
||||
|
||||
|
||||
def test_gamma_right_censored():
|
||||
"""
|
||||
Fit gamma shape and scale to data with one right-censored value.
|
||||
|
||||
Calculation in R:
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0, 25.0),
|
||||
+ right=c(2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0, NA))
|
||||
> result = fitdistcens(data, 'gamma', start=list(shape=1, scale=10),
|
||||
+ control=list(reltol=1e-13))
|
||||
> result
|
||||
Fitting of the distribution ' gamma ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
shape 1.447623
|
||||
scale 8.360197
|
||||
> result$sd
|
||||
shape scale
|
||||
0.7053086 5.1016531
|
||||
"""
|
||||
# The last value is right-censored.
|
||||
x = CensoredData.right_censored([2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0,
|
||||
25.0],
|
||||
[0]*7 + [1])
|
||||
|
||||
a, loc, scale = gamma.fit(x, floc=0, optimizer=optimizer)
|
||||
|
||||
assert_allclose(a, 1.447623, rtol=5e-6)
|
||||
assert loc == 0
|
||||
assert_allclose(scale, 8.360197, rtol=5e-6)
|
||||
|
||||
|
||||
def test_gumbel():
|
||||
"""
|
||||
Fit gumbel_l and gumbel_r to censored data.
|
||||
|
||||
This R calculation should match gumbel_r.
|
||||
|
||||
> library(evd)
|
||||
> library(fitdistrplus)
|
||||
> data = data.frame(left=c(0, 2, 3, 9, 10, 10),
|
||||
+ right=c(1, 2, 3, 9, NA, NA))
|
||||
> result = fitdistcens(data, 'gumbel',
|
||||
+ control=list(reltol=1e-14),
|
||||
+ start=list(loc=4, scale=5))
|
||||
> result
|
||||
Fitting of the distribution ' gumbel ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
loc 4.487853
|
||||
scale 4.843640
|
||||
"""
|
||||
# First value is interval-censored. Last two are right-censored.
|
||||
uncensored = np.array([2, 3, 9])
|
||||
right = np.array([10, 10])
|
||||
interval = np.array([[0, 1]])
|
||||
data = CensoredData(uncensored, right=right, interval=interval)
|
||||
loc, scale = gumbel_r.fit(data, optimizer=optimizer)
|
||||
assert_allclose(loc, 4.487853, rtol=5e-6)
|
||||
assert_allclose(scale, 4.843640, rtol=5e-6)
|
||||
|
||||
# Negate the data and reverse the intervals, and test with gumbel_l.
|
||||
data2 = CensoredData(-uncensored, left=-right,
|
||||
interval=-interval[:, ::-1])
|
||||
# Fitting gumbel_l to data2 should give the same result as above, but
|
||||
# with loc negated.
|
||||
loc2, scale2 = gumbel_l.fit(data2, optimizer=optimizer)
|
||||
assert_allclose(loc2, -4.487853, rtol=5e-6)
|
||||
assert_allclose(scale2, 4.843640, rtol=5e-6)
|
||||
|
||||
|
||||
def test_invgauss():
|
||||
"""
|
||||
Fit just the shape parameter of invgauss to data with one value
|
||||
left-censored and one value right-censored.
|
||||
|
||||
Calculation in R; using a fixed dispersion parameter amounts to fixing
|
||||
the scale to be 1.
|
||||
|
||||
> library(statmod)
|
||||
> library(fitdistrplus)
|
||||
> left <- c(NA, 0.4813096, 0.5571880, 0.5132463, 0.3801414, 0.5904386,
|
||||
+ 0.4822340, 0.3478597, 3, 0.7191797, 1.5810902, 0.4442299)
|
||||
> right <- c(0.15, 0.4813096, 0.5571880, 0.5132463, 0.3801414, 0.5904386,
|
||||
+ 0.4822340, 0.3478597, NA, 0.7191797, 1.5810902, 0.4442299)
|
||||
> data <- data.frame(left=left, right=right)
|
||||
> result = fitdistcens(data, 'invgauss', control=list(reltol=1e-12),
|
||||
+ fix.arg=list(dispersion=1), start=list(mean=3))
|
||||
> result
|
||||
Fitting of the distribution ' invgauss ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
mean 0.853469
|
||||
Fixed parameters:
|
||||
value
|
||||
dispersion 1
|
||||
> result$sd
|
||||
mean
|
||||
0.247636
|
||||
|
||||
Here's the R calculation with the dispersion as a free parameter to
|
||||
be fit.
|
||||
|
||||
> result = fitdistcens(data, 'invgauss', control=list(reltol=1e-12),
|
||||
+ start=list(mean=3, dispersion=1))
|
||||
> result
|
||||
Fitting of the distribution ' invgauss ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
mean 0.8699819
|
||||
dispersion 1.2261362
|
||||
|
||||
The parametrization of the inverse Gaussian distribution in the
|
||||
`statmod` package is not the same as in SciPy (see
|
||||
https://arxiv.org/abs/1603.06687
|
||||
for details). The translation from R to SciPy is
|
||||
|
||||
scale = 1/dispersion
|
||||
mu = mean * dispersion
|
||||
|
||||
> 1/result$estimate['dispersion'] # 1/dispersion
|
||||
dispersion
|
||||
0.8155701
|
||||
> result$estimate['mean'] * result$estimate['dispersion']
|
||||
mean
|
||||
1.066716
|
||||
|
||||
Those last two values are the SciPy scale and shape parameters.
|
||||
"""
|
||||
# One point is left-censored, and one is right-censored.
|
||||
x = [0.4813096, 0.5571880, 0.5132463, 0.3801414,
|
||||
0.5904386, 0.4822340, 0.3478597, 0.7191797,
|
||||
1.5810902, 0.4442299]
|
||||
data = CensoredData(uncensored=x, left=[0.15], right=[3])
|
||||
|
||||
# Fit only the shape parameter.
|
||||
mu, loc, scale = invgauss.fit(data, floc=0, fscale=1, optimizer=optimizer)
|
||||
|
||||
assert_allclose(mu, 0.853469, rtol=5e-5)
|
||||
assert loc == 0
|
||||
assert scale == 1
|
||||
|
||||
# Fit the shape and scale.
|
||||
mu, loc, scale = invgauss.fit(data, floc=0, optimizer=optimizer)
|
||||
|
||||
assert_allclose(mu, 1.066716, rtol=5e-5)
|
||||
assert loc == 0
|
||||
assert_allclose(scale, 0.8155701, rtol=5e-5)
|
||||
|
||||
|
||||
def test_invweibull():
|
||||
"""
|
||||
Fit invweibull to censored data.
|
||||
|
||||
Here is the calculation in R. The 'frechet' distribution from the evd
|
||||
package matches SciPy's invweibull distribution. The `loc` parameter
|
||||
is fixed at 0.
|
||||
|
||||
> library(evd)
|
||||
> library(fitdistrplus)
|
||||
> data = data.frame(left=c(0, 2, 3, 9, 10, 10),
|
||||
+ right=c(1, 2, 3, 9, NA, NA))
|
||||
> result = fitdistcens(data, 'frechet',
|
||||
+ control=list(reltol=1e-14),
|
||||
+ start=list(loc=4, scale=5))
|
||||
> result
|
||||
Fitting of the distribution ' frechet ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
scale 2.7902200
|
||||
shape 0.6379845
|
||||
Fixed parameters:
|
||||
value
|
||||
loc 0
|
||||
"""
|
||||
# In the R data, the first value is interval-censored, and the last
|
||||
# two are right-censored. The rest are not censored.
|
||||
data = CensoredData(uncensored=[2, 3, 9], right=[10, 10],
|
||||
interval=[[0, 1]])
|
||||
c, loc, scale = invweibull.fit(data, floc=0, optimizer=optimizer)
|
||||
assert_allclose(c, 0.6379845, rtol=5e-6)
|
||||
assert loc == 0
|
||||
assert_allclose(scale, 2.7902200, rtol=5e-6)
|
||||
|
||||
|
||||
def test_laplace():
|
||||
"""
|
||||
Fir the Laplace distribution to left- and right-censored data.
|
||||
|
||||
Calculation in R:
|
||||
|
||||
> library(fitdistrplus)
|
||||
> dlaplace <- function(x, location=0, scale=1) {
|
||||
+ return(0.5*exp(-abs((x - location)/scale))/scale)
|
||||
+ }
|
||||
> plaplace <- function(q, location=0, scale=1) {
|
||||
+ z <- (q - location)/scale
|
||||
+ s <- sign(z)
|
||||
+ f <- -s*0.5*exp(-abs(z)) + (s+1)/2
|
||||
+ return(f)
|
||||
+ }
|
||||
> left <- c(NA, -41.564, 50.0, 15.7384, 50.0, 10.0452, -2.0684,
|
||||
+ -19.5399, 50.0, 9.0005, 27.1227, 4.3113, -3.7372,
|
||||
+ 25.3111, 14.7987, 34.0887, 50.0, 42.8496, 18.5862,
|
||||
+ 32.8921, 9.0448, -27.4591, NA, 19.5083, -9.7199)
|
||||
> right <- c(-50.0, -41.564, NA, 15.7384, NA, 10.0452, -2.0684,
|
||||
+ -19.5399, NA, 9.0005, 27.1227, 4.3113, -3.7372,
|
||||
+ 25.3111, 14.7987, 34.0887, NA, 42.8496, 18.5862,
|
||||
+ 32.8921, 9.0448, -27.4591, -50.0, 19.5083, -9.7199)
|
||||
> data <- data.frame(left=left, right=right)
|
||||
> result <- fitdistcens(data, 'laplace', start=list(location=10, scale=10),
|
||||
+ control=list(reltol=1e-13))
|
||||
> result
|
||||
Fitting of the distribution ' laplace ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
location 14.79870
|
||||
scale 30.93601
|
||||
> result$sd
|
||||
location scale
|
||||
0.1758864 7.0972125
|
||||
"""
|
||||
# The value -50 is left-censored, and the value 50 is right-censored.
|
||||
obs = np.array([-50.0, -41.564, 50.0, 15.7384, 50.0, 10.0452, -2.0684,
|
||||
-19.5399, 50.0, 9.0005, 27.1227, 4.3113, -3.7372,
|
||||
25.3111, 14.7987, 34.0887, 50.0, 42.8496, 18.5862,
|
||||
32.8921, 9.0448, -27.4591, -50.0, 19.5083, -9.7199])
|
||||
x = obs[(obs != -50.0) & (obs != 50)]
|
||||
left = obs[obs == -50.0]
|
||||
right = obs[obs == 50.0]
|
||||
data = CensoredData(uncensored=x, left=left, right=right)
|
||||
loc, scale = laplace.fit(data, loc=10, scale=10, optimizer=optimizer)
|
||||
assert_allclose(loc, 14.79870, rtol=5e-6)
|
||||
assert_allclose(scale, 30.93601, rtol=5e-6)
|
||||
|
||||
|
||||
def test_logistic():
|
||||
"""
|
||||
Fit the logistic distribution to left-censored data.
|
||||
|
||||
Calculation in R:
|
||||
> library(fitdistrplus)
|
||||
> left = c(13.5401, 37.4235, 11.906 , 13.998 , NA , 0.4023, NA ,
|
||||
+ 10.9044, 21.0629, 9.6985, NA , 12.9016, 39.164 , 34.6396,
|
||||
+ NA , 20.3665, 16.5889, 18.0952, 45.3818, 35.3306, 8.4949,
|
||||
+ 3.4041, NA , 7.2828, 37.1265, 6.5969, 17.6868, 17.4977,
|
||||
+ 16.3391, 36.0541)
|
||||
> right = c(13.5401, 37.4235, 11.906 , 13.998 , 0. , 0.4023, 0. ,
|
||||
+ 10.9044, 21.0629, 9.6985, 0. , 12.9016, 39.164 , 34.6396,
|
||||
+ 0. , 20.3665, 16.5889, 18.0952, 45.3818, 35.3306, 8.4949,
|
||||
+ 3.4041, 0. , 7.2828, 37.1265, 6.5969, 17.6868, 17.4977,
|
||||
+ 16.3391, 36.0541)
|
||||
> data = data.frame(left=left, right=right)
|
||||
> result = fitdistcens(data, 'logis', control=list(reltol=1e-14))
|
||||
> result
|
||||
Fitting of the distribution ' logis ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
location 14.633459
|
||||
scale 9.232736
|
||||
> result$sd
|
||||
location scale
|
||||
2.931505 1.546879
|
||||
"""
|
||||
# Values that are zero are left-censored; the true values are less than 0.
|
||||
x = np.array([13.5401, 37.4235, 11.906, 13.998, 0.0, 0.4023, 0.0, 10.9044,
|
||||
21.0629, 9.6985, 0.0, 12.9016, 39.164, 34.6396, 0.0, 20.3665,
|
||||
16.5889, 18.0952, 45.3818, 35.3306, 8.4949, 3.4041, 0.0,
|
||||
7.2828, 37.1265, 6.5969, 17.6868, 17.4977, 16.3391,
|
||||
36.0541])
|
||||
data = CensoredData.left_censored(x, censored=(x == 0))
|
||||
loc, scale = logistic.fit(data, optimizer=optimizer)
|
||||
assert_allclose(loc, 14.633459, rtol=5e-7)
|
||||
assert_allclose(scale, 9.232736, rtol=5e-6)
|
||||
|
||||
|
||||
def test_lognorm():
|
||||
"""
|
||||
Ref: https://math.montana.edu/jobo/st528/documents/relc.pdf
|
||||
|
||||
The data is the locomotive control time to failure example that starts
|
||||
on page 8. That's the 8th page in the PDF; the page number shown in
|
||||
the text is 270).
|
||||
The document includes SAS output for the data.
|
||||
"""
|
||||
# These are the uncensored measurements. There are also 59 right-censored
|
||||
# measurements where the lower bound is 135.
|
||||
miles_to_fail = [22.5, 37.5, 46.0, 48.5, 51.5, 53.0, 54.5, 57.5, 66.5,
|
||||
68.0, 69.5, 76.5, 77.0, 78.5, 80.0, 81.5, 82.0, 83.0,
|
||||
84.0, 91.5, 93.5, 102.5, 107.0, 108.5, 112.5, 113.5,
|
||||
116.0, 117.0, 118.5, 119.0, 120.0, 122.5, 123.0, 127.5,
|
||||
131.0, 132.5, 134.0]
|
||||
|
||||
data = CensoredData.right_censored(miles_to_fail + [135]*59,
|
||||
[0]*len(miles_to_fail) + [1]*59)
|
||||
sigma, loc, scale = lognorm.fit(data, floc=0)
|
||||
|
||||
assert loc == 0
|
||||
# Convert the lognorm parameters to the mu and sigma of the underlying
|
||||
# normal distribution.
|
||||
mu = np.log(scale)
|
||||
# The expected results are from the 17th page of the PDF document
|
||||
# (labeled page 279), in the SAS output on the right side of the page.
|
||||
assert_allclose(mu, 5.1169, rtol=5e-4)
|
||||
assert_allclose(sigma, 0.7055, rtol=5e-3)
|
||||
|
||||
|
||||
def test_nct():
|
||||
"""
|
||||
Test fitting the noncentral t distribution to censored data.
|
||||
|
||||
Calculation in R:
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(1, 2, 3, 5, 8, 10, 25, 25),
|
||||
+ right=c(1, 2, 3, 5, 8, 10, NA, NA))
|
||||
> result = fitdistcens(data, 't', control=list(reltol=1e-14),
|
||||
+ start=list(df=1, ncp=2))
|
||||
> result
|
||||
Fitting of the distribution ' t ' on censored data by maximum likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
df 0.5432336
|
||||
ncp 2.8893565
|
||||
|
||||
"""
|
||||
data = CensoredData.right_censored([1, 2, 3, 5, 8, 10, 25, 25],
|
||||
[0, 0, 0, 0, 0, 0, 1, 1])
|
||||
# Fit just the shape parameter df and nc; loc and scale are fixed.
|
||||
with np.errstate(over='ignore'): # remove context when gh-14901 is closed
|
||||
df, nc, loc, scale = nct.fit(data, floc=0, fscale=1,
|
||||
optimizer=optimizer)
|
||||
assert_allclose(df, 0.5432336, rtol=5e-6)
|
||||
assert_allclose(nc, 2.8893565, rtol=5e-6)
|
||||
assert loc == 0
|
||||
assert scale == 1
|
||||
|
||||
|
||||
def test_ncx2():
|
||||
"""
|
||||
Test fitting the shape parameters (df, ncp) of ncx2 to mixed data.
|
||||
|
||||
Calculation in R, with
|
||||
* 5 not censored values [2.7, 0.2, 6.5, 0.4, 0.1],
|
||||
* 1 interval-censored value [[0.6, 1.0]], and
|
||||
* 2 right-censored values [8, 8].
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(2.7, 0.2, 6.5, 0.4, 0.1, 0.6, 8, 8),
|
||||
+ right=c(2.7, 0.2, 6.5, 0.4, 0.1, 1.0, NA, NA))
|
||||
> result = fitdistcens(data, 'chisq', control=list(reltol=1e-14),
|
||||
+ start=list(df=1, ncp=2))
|
||||
> result
|
||||
Fitting of the distribution ' chisq ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
df 1.052871
|
||||
ncp 2.362934
|
||||
"""
|
||||
data = CensoredData(uncensored=[2.7, 0.2, 6.5, 0.4, 0.1], right=[8, 8],
|
||||
interval=[[0.6, 1.0]])
|
||||
with np.errstate(over='ignore'): # remove context when gh-14901 is closed
|
||||
df, ncp, loc, scale = ncx2.fit(data, floc=0, fscale=1,
|
||||
optimizer=optimizer)
|
||||
assert_allclose(df, 1.052871, rtol=5e-6)
|
||||
assert_allclose(ncp, 2.362934, rtol=5e-6)
|
||||
assert loc == 0
|
||||
assert scale == 1
|
||||
|
||||
|
||||
def test_norm():
|
||||
"""
|
||||
Test fitting the normal distribution to interval-censored data.
|
||||
|
||||
Calculation in R:
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(0.10, 0.50, 0.75, 0.80),
|
||||
+ right=c(0.20, 0.55, 0.90, 0.95))
|
||||
> result = fitdistcens(data, 'norm', control=list(reltol=1e-14))
|
||||
|
||||
> result
|
||||
Fitting of the distribution ' norm ' on censored data by maximum likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
mean 0.5919990
|
||||
sd 0.2868042
|
||||
> result$sd
|
||||
mean sd
|
||||
0.1444432 0.1029451
|
||||
"""
|
||||
data = CensoredData(interval=[[0.10, 0.20],
|
||||
[0.50, 0.55],
|
||||
[0.75, 0.90],
|
||||
[0.80, 0.95]])
|
||||
|
||||
loc, scale = norm.fit(data, optimizer=optimizer)
|
||||
|
||||
assert_allclose(loc, 0.5919990, rtol=5e-6)
|
||||
assert_allclose(scale, 0.2868042, rtol=5e-6)
|
||||
|
||||
|
||||
def test_weibull_censored1():
|
||||
# Ref: http://www.ams.sunysb.edu/~zhu/ams588/Lecture_3_likelihood.pdf
|
||||
|
||||
# Survival times; '*' indicates right-censored.
|
||||
s = "3,5,6*,8,10*,11*,15,20*,22,23,27*,29,32,35,40,26,28,33*,21,24*"
|
||||
|
||||
times, cens = zip(*[(float(t[0]), len(t) == 2)
|
||||
for t in [w.split('*') for w in s.split(',')]])
|
||||
data = CensoredData.right_censored(times, cens)
|
||||
|
||||
c, loc, scale = weibull_min.fit(data, floc=0)
|
||||
|
||||
# Expected values are from the reference.
|
||||
assert_allclose(c, 2.149, rtol=1e-3)
|
||||
assert loc == 0
|
||||
assert_allclose(scale, 28.99, rtol=1e-3)
|
||||
|
||||
# Flip the sign of the data, and make the censored values
|
||||
# left-censored. We should get the same parameters when we fit
|
||||
# weibull_max to the flipped data.
|
||||
data2 = CensoredData.left_censored(-np.array(times), cens)
|
||||
|
||||
c2, loc2, scale2 = weibull_max.fit(data2, floc=0)
|
||||
|
||||
assert_allclose(c2, 2.149, rtol=1e-3)
|
||||
assert loc2 == 0
|
||||
assert_allclose(scale2, 28.99, rtol=1e-3)
|
||||
|
||||
|
||||
def test_weibull_min_sas1():
|
||||
# Data and SAS results from
|
||||
# https://support.sas.com/documentation/cdl/en/qcug/63922/HTML/default/
|
||||
# viewer.htm#qcug_reliability_sect004.htm
|
||||
|
||||
text = """
|
||||
450 0 460 1 1150 0 1150 0 1560 1
|
||||
1600 0 1660 1 1850 1 1850 1 1850 1
|
||||
1850 1 1850 1 2030 1 2030 1 2030 1
|
||||
2070 0 2070 0 2080 0 2200 1 3000 1
|
||||
3000 1 3000 1 3000 1 3100 0 3200 1
|
||||
3450 0 3750 1 3750 1 4150 1 4150 1
|
||||
4150 1 4150 1 4300 1 4300 1 4300 1
|
||||
4300 1 4600 0 4850 1 4850 1 4850 1
|
||||
4850 1 5000 1 5000 1 5000 1 6100 1
|
||||
6100 0 6100 1 6100 1 6300 1 6450 1
|
||||
6450 1 6700 1 7450 1 7800 1 7800 1
|
||||
8100 1 8100 1 8200 1 8500 1 8500 1
|
||||
8500 1 8750 1 8750 0 8750 1 9400 1
|
||||
9900 1 10100 1 10100 1 10100 1 11500 1
|
||||
"""
|
||||
|
||||
life, cens = np.array([int(w) for w in text.split()]).reshape(-1, 2).T
|
||||
life = life/1000.0
|
||||
|
||||
data = CensoredData.right_censored(life, cens)
|
||||
|
||||
c, loc, scale = weibull_min.fit(data, floc=0, optimizer=optimizer)
|
||||
assert_allclose(c, 1.0584, rtol=1e-4)
|
||||
assert_allclose(scale, 26.2968, rtol=1e-5)
|
||||
assert loc == 0
|
||||
|
||||
|
||||
def test_weibull_min_sas2():
|
||||
# http://support.sas.com/documentation/cdl/en/ormpug/67517/HTML/default/
|
||||
# viewer.htm#ormpug_nlpsolver_examples06.htm
|
||||
|
||||
# The last two values are right-censored.
|
||||
days = np.array([143, 164, 188, 188, 190, 192, 206, 209, 213, 216, 220,
|
||||
227, 230, 234, 246, 265, 304, 216, 244])
|
||||
|
||||
data = CensoredData.right_censored(days, [0]*(len(days) - 2) + [1]*2)
|
||||
|
||||
c, loc, scale = weibull_min.fit(data, 1, loc=100, scale=100,
|
||||
optimizer=optimizer)
|
||||
|
||||
assert_allclose(c, 2.7112, rtol=5e-4)
|
||||
assert_allclose(loc, 122.03, rtol=5e-4)
|
||||
assert_allclose(scale, 108.37, rtol=5e-4)
|
||||
115
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_crosstab.py
vendored
Normal file
115
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_crosstab.py
vendored
Normal file
@@ -0,0 +1,115 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_array_equal, assert_equal
|
||||
from scipy.stats.contingency import crosstab
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', [False, True])
|
||||
def test_crosstab_basic(sparse):
|
||||
a = [0, 0, 9, 9, 0, 0, 9]
|
||||
b = [2, 1, 3, 1, 2, 3, 3]
|
||||
expected_avals = [0, 9]
|
||||
expected_bvals = [1, 2, 3]
|
||||
expected_count = np.array([[1, 2, 1],
|
||||
[1, 0, 2]])
|
||||
(avals, bvals), count = crosstab(a, b, sparse=sparse)
|
||||
assert_array_equal(avals, expected_avals)
|
||||
assert_array_equal(bvals, expected_bvals)
|
||||
if sparse:
|
||||
assert_array_equal(count.toarray(), expected_count)
|
||||
else:
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
def test_crosstab_basic_1d():
|
||||
# Verify that a single input sequence works as expected.
|
||||
x = [1, 2, 3, 1, 2, 3, 3]
|
||||
expected_xvals = [1, 2, 3]
|
||||
expected_count = np.array([2, 2, 3])
|
||||
(xvals,), count = crosstab(x)
|
||||
assert_array_equal(xvals, expected_xvals)
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
def test_crosstab_basic_3d():
|
||||
# Verify the function for three input sequences.
|
||||
a = 'a'
|
||||
b = 'b'
|
||||
x = [0, 0, 9, 9, 0, 0, 9, 9]
|
||||
y = [a, a, a, a, b, b, b, a]
|
||||
z = [1, 2, 3, 1, 2, 3, 3, 1]
|
||||
expected_xvals = [0, 9]
|
||||
expected_yvals = [a, b]
|
||||
expected_zvals = [1, 2, 3]
|
||||
expected_count = np.array([[[1, 1, 0],
|
||||
[0, 1, 1]],
|
||||
[[2, 0, 1],
|
||||
[0, 0, 1]]])
|
||||
(xvals, yvals, zvals), count = crosstab(x, y, z)
|
||||
assert_array_equal(xvals, expected_xvals)
|
||||
assert_array_equal(yvals, expected_yvals)
|
||||
assert_array_equal(zvals, expected_zvals)
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', [False, True])
|
||||
def test_crosstab_levels(sparse):
|
||||
a = [0, 0, 9, 9, 0, 0, 9]
|
||||
b = [1, 2, 3, 1, 2, 3, 3]
|
||||
expected_avals = [0, 9]
|
||||
expected_bvals = [0, 1, 2, 3]
|
||||
expected_count = np.array([[0, 1, 2, 1],
|
||||
[0, 1, 0, 2]])
|
||||
(avals, bvals), count = crosstab(a, b, levels=[None, [0, 1, 2, 3]],
|
||||
sparse=sparse)
|
||||
assert_array_equal(avals, expected_avals)
|
||||
assert_array_equal(bvals, expected_bvals)
|
||||
if sparse:
|
||||
assert_array_equal(count.toarray(), expected_count)
|
||||
else:
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', [False, True])
|
||||
def test_crosstab_extra_levels(sparse):
|
||||
# The pair of values (-1, 3) will be ignored, because we explicitly
|
||||
# request the counted `a` values to be [0, 9].
|
||||
a = [0, 0, 9, 9, 0, 0, 9, -1]
|
||||
b = [1, 2, 3, 1, 2, 3, 3, 3]
|
||||
expected_avals = [0, 9]
|
||||
expected_bvals = [0, 1, 2, 3]
|
||||
expected_count = np.array([[0, 1, 2, 1],
|
||||
[0, 1, 0, 2]])
|
||||
(avals, bvals), count = crosstab(a, b, levels=[[0, 9], [0, 1, 2, 3]],
|
||||
sparse=sparse)
|
||||
assert_array_equal(avals, expected_avals)
|
||||
assert_array_equal(bvals, expected_bvals)
|
||||
if sparse:
|
||||
assert_array_equal(count.toarray(), expected_count)
|
||||
else:
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
def test_validation_at_least_one():
|
||||
with pytest.raises(TypeError, match='At least one'):
|
||||
crosstab()
|
||||
|
||||
|
||||
def test_validation_same_lengths():
|
||||
with pytest.raises(ValueError, match='must have the same length'):
|
||||
crosstab([1, 2], [1, 2, 3, 4])
|
||||
|
||||
|
||||
def test_validation_sparse_only_two_args():
|
||||
with pytest.raises(ValueError, match='only two input sequences'):
|
||||
crosstab([0, 1, 1], [8, 8, 9], [1, 3, 3], sparse=True)
|
||||
|
||||
|
||||
def test_validation_len_levels_matches_args():
|
||||
with pytest.raises(ValueError, match='number of input sequences'):
|
||||
crosstab([0, 1, 1], [8, 8, 9], levels=([0, 1, 2, 3],))
|
||||
|
||||
|
||||
def test_result():
|
||||
res = crosstab([0, 1], [1, 2])
|
||||
assert_equal((res.elements, res.count), res)
|
||||
563
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_discrete_basic.py
vendored
Normal file
563
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_discrete_basic.py
vendored
Normal file
@@ -0,0 +1,563 @@
|
||||
import numpy.testing as npt
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from scipy import stats
|
||||
from .common_tests import (check_normalization, check_moment,
|
||||
check_mean_expect,
|
||||
check_var_expect, check_skew_expect,
|
||||
check_kurt_expect, check_entropy,
|
||||
check_private_entropy, check_edge_support,
|
||||
check_named_args, check_random_state_property,
|
||||
check_pickling, check_rvs_broadcast,
|
||||
check_freezing,)
|
||||
from scipy.stats._distr_params import distdiscrete, invdistdiscrete
|
||||
from scipy.stats._distn_infrastructure import rv_discrete_frozen
|
||||
|
||||
vals = ([1, 2, 3, 4], [0.1, 0.2, 0.3, 0.4])
|
||||
distdiscrete += [[stats.rv_discrete(values=vals), ()]]
|
||||
|
||||
# For these distributions, test_discrete_basic only runs with test mode full
|
||||
distslow = {'zipfian', 'nhypergeom'}
|
||||
|
||||
# Override number of ULPs adjustment for `check_cdf_ppf`
|
||||
roundtrip_cdf_ppf_exceptions = {'nbinom': 30}
|
||||
|
||||
def cases_test_discrete_basic():
|
||||
seen = set()
|
||||
for distname, arg in distdiscrete:
|
||||
if distname in distslow:
|
||||
yield pytest.param(distname, arg, distname, marks=pytest.mark.slow)
|
||||
else:
|
||||
yield distname, arg, distname not in seen
|
||||
seen.add(distname)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname,arg,first_case', cases_test_discrete_basic())
|
||||
def test_discrete_basic(distname, arg, first_case):
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'sample distribution'
|
||||
np.random.seed(9765456)
|
||||
rvs = distfn.rvs(size=2000, *arg)
|
||||
supp = np.unique(rvs)
|
||||
m, v = distfn.stats(*arg)
|
||||
check_cdf_ppf(distfn, arg, supp, distname + ' cdf_ppf')
|
||||
|
||||
check_pmf_cdf(distfn, arg, distname)
|
||||
check_oth(distfn, arg, supp, distname + ' oth')
|
||||
check_edge_support(distfn, arg)
|
||||
|
||||
alpha = 0.01
|
||||
check_discrete_chisquare(distfn, arg, rvs, alpha,
|
||||
distname + ' chisquare')
|
||||
|
||||
if first_case:
|
||||
locscale_defaults = (0,)
|
||||
meths = [distfn.pmf, distfn.logpmf, distfn.cdf, distfn.logcdf,
|
||||
distfn.logsf]
|
||||
# make sure arguments are within support
|
||||
# for some distributions, this needs to be overridden
|
||||
spec_k = {'randint': 11, 'hypergeom': 4, 'bernoulli': 0,
|
||||
'nchypergeom_wallenius': 6}
|
||||
k = spec_k.get(distname, 1)
|
||||
check_named_args(distfn, k, arg, locscale_defaults, meths)
|
||||
if distname != 'sample distribution':
|
||||
check_scale_docstring(distfn)
|
||||
check_random_state_property(distfn, arg)
|
||||
check_pickling(distfn, arg)
|
||||
check_freezing(distfn, arg)
|
||||
|
||||
# Entropy
|
||||
check_entropy(distfn, arg, distname)
|
||||
if distfn.__class__._entropy != stats.rv_discrete._entropy:
|
||||
check_private_entropy(distfn, arg, stats.rv_discrete)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname,arg', distdiscrete)
|
||||
def test_moments(distname, arg):
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'sample distribution'
|
||||
m, v, s, k = distfn.stats(*arg, moments='mvsk')
|
||||
check_normalization(distfn, arg, distname)
|
||||
|
||||
# compare `stats` and `moment` methods
|
||||
check_moment(distfn, arg, m, v, distname)
|
||||
check_mean_expect(distfn, arg, m, distname)
|
||||
check_var_expect(distfn, arg, m, v, distname)
|
||||
check_skew_expect(distfn, arg, m, v, s, distname)
|
||||
with np.testing.suppress_warnings() as sup:
|
||||
if distname in ['zipf', 'betanbinom']:
|
||||
sup.filter(RuntimeWarning)
|
||||
check_kurt_expect(distfn, arg, m, v, k, distname)
|
||||
|
||||
# frozen distr moments
|
||||
check_moment_frozen(distfn, arg, m, 1)
|
||||
check_moment_frozen(distfn, arg, v+m*m, 2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist,shape_args', distdiscrete)
|
||||
def test_rvs_broadcast(dist, shape_args):
|
||||
# If shape_only is True, it means the _rvs method of the
|
||||
# distribution uses more than one random number to generate a random
|
||||
# variate. That means the result of using rvs with broadcasting or
|
||||
# with a nontrivial size will not necessarily be the same as using the
|
||||
# numpy.vectorize'd version of rvs(), so we can only compare the shapes
|
||||
# of the results, not the values.
|
||||
# Whether or not a distribution is in the following list is an
|
||||
# implementation detail of the distribution, not a requirement. If
|
||||
# the implementation the rvs() method of a distribution changes, this
|
||||
# test might also have to be changed.
|
||||
shape_only = dist in ['betabinom', 'betanbinom', 'skellam', 'yulesimon',
|
||||
'dlaplace', 'nchypergeom_fisher',
|
||||
'nchypergeom_wallenius']
|
||||
|
||||
try:
|
||||
distfunc = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfunc = dist
|
||||
dist = f'rv_discrete(values=({dist.xk!r}, {dist.pk!r}))'
|
||||
loc = np.zeros(2)
|
||||
nargs = distfunc.numargs
|
||||
allargs = []
|
||||
bshape = []
|
||||
# Generate shape parameter arguments...
|
||||
for k in range(nargs):
|
||||
shp = (k + 3,) + (1,)*(k + 1)
|
||||
param_val = shape_args[k]
|
||||
allargs.append(np.full(shp, param_val))
|
||||
bshape.insert(0, shp[0])
|
||||
allargs.append(loc)
|
||||
bshape.append(loc.size)
|
||||
# bshape holds the expected shape when loc, scale, and the shape
|
||||
# parameters are all broadcast together.
|
||||
check_rvs_broadcast(
|
||||
distfunc, dist, allargs, bshape, shape_only, [np.dtype(int)]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist,args', distdiscrete)
|
||||
def test_ppf_with_loc(dist, args):
|
||||
try:
|
||||
distfn = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfn = dist
|
||||
#check with a negative, no and positive relocation.
|
||||
np.random.seed(1942349)
|
||||
re_locs = [np.random.randint(-10, -1), 0, np.random.randint(1, 10)]
|
||||
_a, _b = distfn.support(*args)
|
||||
for loc in re_locs:
|
||||
npt.assert_array_equal(
|
||||
[_a-1+loc, _b+loc],
|
||||
[distfn.ppf(0.0, *args, loc=loc), distfn.ppf(1.0, *args, loc=loc)]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist, args', distdiscrete)
|
||||
def test_isf_with_loc(dist, args):
|
||||
try:
|
||||
distfn = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfn = dist
|
||||
# check with a negative, no and positive relocation.
|
||||
np.random.seed(1942349)
|
||||
re_locs = [np.random.randint(-10, -1), 0, np.random.randint(1, 10)]
|
||||
_a, _b = distfn.support(*args)
|
||||
for loc in re_locs:
|
||||
expected = _b + loc, _a - 1 + loc
|
||||
res = distfn.isf(0., *args, loc=loc), distfn.isf(1., *args, loc=loc)
|
||||
npt.assert_array_equal(expected, res)
|
||||
# test broadcasting behaviour
|
||||
re_locs = [np.random.randint(-10, -1, size=(5, 3)),
|
||||
np.zeros((5, 3)),
|
||||
np.random.randint(1, 10, size=(5, 3))]
|
||||
_a, _b = distfn.support(*args)
|
||||
for loc in re_locs:
|
||||
expected = _b + loc, _a - 1 + loc
|
||||
res = distfn.isf(0., *args, loc=loc), distfn.isf(1., *args, loc=loc)
|
||||
npt.assert_array_equal(expected, res)
|
||||
|
||||
|
||||
def check_cdf_ppf(distfn, arg, supp, msg):
|
||||
# supp is assumed to be an array of integers in the support of distfn
|
||||
# (but not necessarily all the integers in the support).
|
||||
# This test assumes that the PMF of any value in the support of the
|
||||
# distribution is greater than 1e-8.
|
||||
|
||||
# cdf is a step function, and ppf(q) = min{k : cdf(k) >= q, k integer}
|
||||
cdf_supp = distfn.cdf(supp, *arg)
|
||||
# In very rare cases, the finite precision calculation of ppf(cdf(supp))
|
||||
# can produce an array in which an element is off by one. We nudge the
|
||||
# CDF values down by a few ULPs help to avoid this.
|
||||
n_ulps = roundtrip_cdf_ppf_exceptions.get(distfn.name, 15)
|
||||
cdf_supp0 = cdf_supp - n_ulps*np.spacing(cdf_supp)
|
||||
npt.assert_array_equal(distfn.ppf(cdf_supp0, *arg),
|
||||
supp, msg + '-roundtrip')
|
||||
# Repeat the same calculation, but with the CDF values decreased by 1e-8.
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp, *arg) - 1e-8, *arg),
|
||||
supp, msg + '-roundtrip')
|
||||
|
||||
if not hasattr(distfn, 'xk'):
|
||||
_a, _b = distfn.support(*arg)
|
||||
supp1 = supp[supp < _b]
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp1, *arg) + 1e-8, *arg),
|
||||
supp1 + distfn.inc, msg + ' ppf-cdf-next')
|
||||
|
||||
|
||||
def check_pmf_cdf(distfn, arg, distname):
|
||||
if hasattr(distfn, 'xk'):
|
||||
index = distfn.xk
|
||||
else:
|
||||
startind = int(distfn.ppf(0.01, *arg) - 1)
|
||||
index = list(range(startind, startind + 10))
|
||||
cdfs = distfn.cdf(index, *arg)
|
||||
pmfs_cum = distfn.pmf(index, *arg).cumsum()
|
||||
|
||||
atol, rtol = 1e-10, 1e-10
|
||||
if distname == 'skellam': # ncx2 accuracy
|
||||
atol, rtol = 1e-5, 1e-5
|
||||
npt.assert_allclose(cdfs - cdfs[0], pmfs_cum - pmfs_cum[0],
|
||||
atol=atol, rtol=rtol)
|
||||
|
||||
# also check that pmf at non-integral k is zero
|
||||
k = np.asarray(index)
|
||||
k_shifted = k[:-1] + np.diff(k)/2
|
||||
npt.assert_equal(distfn.pmf(k_shifted, *arg), 0)
|
||||
|
||||
# better check frozen distributions, and also when loc != 0
|
||||
loc = 0.5
|
||||
dist = distfn(loc=loc, *arg)
|
||||
npt.assert_allclose(dist.pmf(k[1:] + loc), np.diff(dist.cdf(k + loc)))
|
||||
npt.assert_equal(dist.pmf(k_shifted + loc), 0)
|
||||
|
||||
|
||||
def check_moment_frozen(distfn, arg, m, k):
|
||||
npt.assert_allclose(distfn(*arg).moment(k), m,
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
|
||||
def check_oth(distfn, arg, supp, msg):
|
||||
# checking other methods of distfn
|
||||
npt.assert_allclose(distfn.sf(supp, *arg), 1. - distfn.cdf(supp, *arg),
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
q = np.linspace(0.01, 0.99, 20)
|
||||
npt.assert_allclose(distfn.isf(q, *arg), distfn.ppf(1. - q, *arg),
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
median_sf = distfn.isf(0.5, *arg)
|
||||
npt.assert_(distfn.sf(median_sf - 1, *arg) > 0.5)
|
||||
npt.assert_(distfn.cdf(median_sf + 1, *arg) > 0.5)
|
||||
|
||||
|
||||
def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
|
||||
"""Perform chisquare test for random sample of a discrete distribution
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distname : string
|
||||
name of distribution function
|
||||
arg : sequence
|
||||
parameters of distribution
|
||||
alpha : float
|
||||
significance level, threshold for p-value
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : bool
|
||||
0 if test passes, 1 if test fails
|
||||
|
||||
"""
|
||||
wsupp = 0.05
|
||||
|
||||
# construct intervals with minimum mass `wsupp`.
|
||||
# intervals are left-half-open as in a cdf difference
|
||||
_a, _b = distfn.support(*arg)
|
||||
lo = int(max(_a, -1000))
|
||||
high = int(min(_b, 1000)) + 1
|
||||
distsupport = range(lo, high)
|
||||
last = 0
|
||||
distsupp = [lo]
|
||||
distmass = []
|
||||
for ii in distsupport:
|
||||
current = distfn.cdf(ii, *arg)
|
||||
if current - last >= wsupp - 1e-14:
|
||||
distsupp.append(ii)
|
||||
distmass.append(current - last)
|
||||
last = current
|
||||
if current > (1 - wsupp):
|
||||
break
|
||||
if distsupp[-1] < _b:
|
||||
distsupp.append(_b)
|
||||
distmass.append(1 - last)
|
||||
distsupp = np.array(distsupp)
|
||||
distmass = np.array(distmass)
|
||||
|
||||
# convert intervals to right-half-open as required by histogram
|
||||
histsupp = distsupp + 1e-8
|
||||
histsupp[0] = _a
|
||||
|
||||
# find sample frequencies and perform chisquare test
|
||||
freq, hsupp = np.histogram(rvs, histsupp)
|
||||
chis, pval = stats.chisquare(np.array(freq), len(rvs)*distmass)
|
||||
|
||||
npt.assert_(
|
||||
pval > alpha,
|
||||
f'chisquare - test for {msg} at arg = {str(arg)} with pval = {str(pval)}'
|
||||
)
|
||||
|
||||
|
||||
def check_scale_docstring(distfn):
|
||||
if distfn.__doc__ is not None:
|
||||
# Docstrings can be stripped if interpreter is run with -OO
|
||||
npt.assert_('scale' not in distfn.__doc__)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['pmf', 'logpmf', 'cdf', 'logcdf',
|
||||
'sf', 'logsf', 'ppf', 'isf'])
|
||||
@pytest.mark.parametrize('distname, args', distdiscrete)
|
||||
def test_methods_with_lists(method, distname, args):
|
||||
# Test that the discrete distributions can accept Python lists
|
||||
# as arguments.
|
||||
try:
|
||||
dist = getattr(stats, distname)
|
||||
except TypeError:
|
||||
return
|
||||
if method in ['ppf', 'isf']:
|
||||
z = [0.1, 0.2]
|
||||
else:
|
||||
z = [0, 1]
|
||||
p2 = [[p]*2 for p in args]
|
||||
loc = [0, 1]
|
||||
result = dist.pmf(z, *p2, loc=loc)
|
||||
npt.assert_allclose(result,
|
||||
[dist.pmf(*v) for v in zip(z, *p2, loc)],
|
||||
rtol=1e-15, atol=1e-15)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, args', invdistdiscrete)
|
||||
def test_cdf_gh13280_regression(distname, args):
|
||||
# Test for nan output when shape parameters are invalid
|
||||
dist = getattr(stats, distname)
|
||||
x = np.arange(-2, 15)
|
||||
vals = dist.cdf(x, *args)
|
||||
expected = np.nan
|
||||
npt.assert_equal(vals, expected)
|
||||
|
||||
|
||||
def cases_test_discrete_integer_shapes():
|
||||
# distributions parameters that are only allowed to be integral when
|
||||
# fitting, but are allowed to be real as input to PDF, etc.
|
||||
integrality_exceptions = {'nbinom': {'n'}, 'betanbinom': {'n'}}
|
||||
|
||||
seen = set()
|
||||
for distname, shapes in distdiscrete:
|
||||
if distname in seen:
|
||||
continue
|
||||
seen.add(distname)
|
||||
|
||||
try:
|
||||
dist = getattr(stats, distname)
|
||||
except TypeError:
|
||||
continue
|
||||
|
||||
shape_info = dist._shape_info()
|
||||
|
||||
for i, shape in enumerate(shape_info):
|
||||
if (shape.name in integrality_exceptions.get(distname, set()) or
|
||||
not shape.integrality):
|
||||
continue
|
||||
|
||||
yield distname, shape.name, shapes
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, shapename, shapes',
|
||||
cases_test_discrete_integer_shapes())
|
||||
def test_integer_shapes(distname, shapename, shapes):
|
||||
dist = getattr(stats, distname)
|
||||
shape_info = dist._shape_info()
|
||||
shape_names = [shape.name for shape in shape_info]
|
||||
i = shape_names.index(shapename) # this element of params must be integral
|
||||
|
||||
shapes_copy = list(shapes)
|
||||
|
||||
valid_shape = shapes[i]
|
||||
invalid_shape = valid_shape - 0.5 # arbitrary non-integral value
|
||||
new_valid_shape = valid_shape - 1
|
||||
shapes_copy[i] = [[valid_shape], [invalid_shape], [new_valid_shape]]
|
||||
|
||||
a, b = dist.support(*shapes)
|
||||
x = np.round(np.linspace(a, b, 5))
|
||||
|
||||
pmf = dist.pmf(x, *shapes_copy)
|
||||
assert not np.any(np.isnan(pmf[0, :]))
|
||||
assert np.all(np.isnan(pmf[1, :]))
|
||||
assert not np.any(np.isnan(pmf[2, :]))
|
||||
|
||||
|
||||
def test_frozen_attributes():
|
||||
# gh-14827 reported that all frozen distributions had both pmf and pdf
|
||||
# attributes; continuous should have pdf and discrete should have pmf.
|
||||
message = "'rv_discrete_frozen' object has no attribute"
|
||||
with pytest.raises(AttributeError, match=message):
|
||||
stats.binom(10, 0.5).pdf
|
||||
with pytest.raises(AttributeError, match=message):
|
||||
stats.binom(10, 0.5).logpdf
|
||||
stats.binom.pdf = "herring"
|
||||
frozen_binom = stats.binom(10, 0.5)
|
||||
assert isinstance(frozen_binom, rv_discrete_frozen)
|
||||
delattr(stats.binom, 'pdf')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, shapes', distdiscrete)
|
||||
def test_interval(distname, shapes):
|
||||
# gh-11026 reported that `interval` returns incorrect values when
|
||||
# `confidence=1`. The values were not incorrect, but it was not intuitive
|
||||
# that the left end of the interval should extend beyond the support of the
|
||||
# distribution. Confirm that this is the behavior for all distributions.
|
||||
if isinstance(distname, str):
|
||||
dist = getattr(stats, distname)
|
||||
else:
|
||||
dist = distname
|
||||
a, b = dist.support(*shapes)
|
||||
npt.assert_equal(dist.ppf([0, 1], *shapes), (a-1, b))
|
||||
npt.assert_equal(dist.isf([1, 0], *shapes), (a-1, b))
|
||||
npt.assert_equal(dist.interval(1, *shapes), (a-1, b))
|
||||
|
||||
|
||||
@pytest.mark.xfail_on_32bit("Sensible to machine precision")
|
||||
def test_rv_sample():
|
||||
# Thoroughly test rv_sample and check that gh-3758 is resolved
|
||||
|
||||
# Generate a random discrete distribution
|
||||
rng = np.random.default_rng(98430143469)
|
||||
xk = np.sort(rng.random(10) * 10)
|
||||
pk = rng.random(10)
|
||||
pk /= np.sum(pk)
|
||||
dist = stats.rv_discrete(values=(xk, pk))
|
||||
|
||||
# Generate points to the left and right of xk
|
||||
xk_left = (np.array([0] + xk[:-1].tolist()) + xk)/2
|
||||
xk_right = (np.array(xk[1:].tolist() + [xk[-1]+1]) + xk)/2
|
||||
|
||||
# Generate points to the left and right of cdf
|
||||
cdf2 = np.cumsum(pk)
|
||||
cdf2_left = (np.array([0] + cdf2[:-1].tolist()) + cdf2)/2
|
||||
cdf2_right = (np.array(cdf2[1:].tolist() + [1]) + cdf2)/2
|
||||
|
||||
# support - leftmost and rightmost xk
|
||||
a, b = dist.support()
|
||||
assert_allclose(a, xk[0])
|
||||
assert_allclose(b, xk[-1])
|
||||
|
||||
# pmf - supported only on the xk
|
||||
assert_allclose(dist.pmf(xk), pk)
|
||||
assert_allclose(dist.pmf(xk_right), 0)
|
||||
assert_allclose(dist.pmf(xk_left), 0)
|
||||
|
||||
# logpmf is log of the pmf; log(0) = -np.inf
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(dist.logpmf(xk), np.log(pk))
|
||||
assert_allclose(dist.logpmf(xk_right), -np.inf)
|
||||
assert_allclose(dist.logpmf(xk_left), -np.inf)
|
||||
|
||||
# cdf - the cumulative sum of the pmf
|
||||
assert_allclose(dist.cdf(xk), cdf2)
|
||||
assert_allclose(dist.cdf(xk_right), cdf2)
|
||||
assert_allclose(dist.cdf(xk_left), [0]+cdf2[:-1].tolist())
|
||||
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(dist.logcdf(xk), np.log(dist.cdf(xk)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logcdf(xk_right), np.log(dist.cdf(xk_right)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logcdf(xk_left), np.log(dist.cdf(xk_left)),
|
||||
atol=1e-15)
|
||||
|
||||
# sf is 1-cdf
|
||||
assert_allclose(dist.sf(xk), 1-dist.cdf(xk))
|
||||
assert_allclose(dist.sf(xk_right), 1-dist.cdf(xk_right))
|
||||
assert_allclose(dist.sf(xk_left), 1-dist.cdf(xk_left))
|
||||
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(dist.logsf(xk), np.log(dist.sf(xk)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logsf(xk_right), np.log(dist.sf(xk_right)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logsf(xk_left), np.log(dist.sf(xk_left)),
|
||||
atol=1e-15)
|
||||
|
||||
# ppf
|
||||
assert_allclose(dist.ppf(cdf2), xk)
|
||||
assert_allclose(dist.ppf(cdf2_left), xk)
|
||||
assert_allclose(dist.ppf(cdf2_right)[:-1], xk[1:])
|
||||
assert_allclose(dist.ppf(0), a - 1)
|
||||
assert_allclose(dist.ppf(1), b)
|
||||
|
||||
# isf
|
||||
sf2 = dist.sf(xk)
|
||||
assert_allclose(dist.isf(sf2), xk)
|
||||
assert_allclose(dist.isf(1-cdf2_left), dist.ppf(cdf2_left))
|
||||
assert_allclose(dist.isf(1-cdf2_right), dist.ppf(cdf2_right))
|
||||
assert_allclose(dist.isf(0), b)
|
||||
assert_allclose(dist.isf(1), a - 1)
|
||||
|
||||
# interval is (ppf(alpha/2), isf(alpha/2))
|
||||
ps = np.linspace(0.01, 0.99, 10)
|
||||
int2 = dist.ppf(ps/2), dist.isf(ps/2)
|
||||
assert_allclose(dist.interval(1-ps), int2)
|
||||
assert_allclose(dist.interval(0), dist.median())
|
||||
assert_allclose(dist.interval(1), (a-1, b))
|
||||
|
||||
# median is simply ppf(0.5)
|
||||
med2 = dist.ppf(0.5)
|
||||
assert_allclose(dist.median(), med2)
|
||||
|
||||
# all four stats (mean, var, skew, and kurtosis) from the definitions
|
||||
mean2 = np.sum(xk*pk)
|
||||
var2 = np.sum((xk - mean2)**2 * pk)
|
||||
skew2 = np.sum((xk - mean2)**3 * pk) / var2**(3/2)
|
||||
kurt2 = np.sum((xk - mean2)**4 * pk) / var2**2 - 3
|
||||
assert_allclose(dist.mean(), mean2)
|
||||
assert_allclose(dist.std(), np.sqrt(var2))
|
||||
assert_allclose(dist.var(), var2)
|
||||
assert_allclose(dist.stats(moments='mvsk'), (mean2, var2, skew2, kurt2))
|
||||
|
||||
# noncentral moment against definition
|
||||
mom3 = np.sum((xk**3) * pk)
|
||||
assert_allclose(dist.moment(3), mom3)
|
||||
|
||||
# expect - check against moments
|
||||
assert_allclose(dist.expect(lambda x: 1), 1)
|
||||
assert_allclose(dist.expect(), mean2)
|
||||
assert_allclose(dist.expect(lambda x: x**3), mom3)
|
||||
|
||||
# entropy is the negative of the expected value of log(p)
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(-dist.expect(lambda x: dist.logpmf(x)), dist.entropy())
|
||||
|
||||
# RVS is just ppf of uniform random variates
|
||||
rng = np.random.default_rng(98430143469)
|
||||
rvs = dist.rvs(size=100, random_state=rng)
|
||||
rng = np.random.default_rng(98430143469)
|
||||
rvs0 = dist.ppf(rng.random(size=100))
|
||||
assert_allclose(rvs, rvs0)
|
||||
|
||||
def test__pmf_float_input():
|
||||
# gh-21272
|
||||
# test that `rvs()` can be computed when `_pmf` requires float input
|
||||
|
||||
class rv_exponential(stats.rv_discrete):
|
||||
def _pmf(self, i):
|
||||
return (2/3)*3**(1 - i)
|
||||
|
||||
rv = rv_exponential(a=0.0, b=float('inf'))
|
||||
rvs = rv.rvs(random_state=42) # should not crash due to integer input to `_pmf`
|
||||
assert_allclose(rvs, 0)
|
||||
648
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_discrete_distns.py
vendored
Normal file
648
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_discrete_distns.py
vendored
Normal file
@@ -0,0 +1,648 @@
|
||||
import pytest
|
||||
import itertools
|
||||
|
||||
from scipy.stats import (betabinom, betanbinom, hypergeom, nhypergeom,
|
||||
bernoulli, boltzmann, skellam, zipf, zipfian, binom,
|
||||
nbinom, nchypergeom_fisher, nchypergeom_wallenius,
|
||||
randint)
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import (
|
||||
assert_almost_equal, assert_equal, assert_allclose, suppress_warnings
|
||||
)
|
||||
from scipy.special import binom as special_binom
|
||||
from scipy.optimize import root_scalar
|
||||
from scipy.integrate import quad
|
||||
|
||||
|
||||
# The expected values were computed with Wolfram Alpha, using
|
||||
# the expression CDF[HypergeometricDistribution[N, n, M], k].
|
||||
@pytest.mark.parametrize('k, M, n, N, expected, rtol',
|
||||
[(3, 10, 4, 5,
|
||||
0.9761904761904762, 1e-15),
|
||||
(107, 10000, 3000, 215,
|
||||
0.9999999997226765, 1e-15),
|
||||
(10, 10000, 3000, 215,
|
||||
2.681682217692179e-21, 5e-11)])
|
||||
def test_hypergeom_cdf(k, M, n, N, expected, rtol):
|
||||
p = hypergeom.cdf(k, M, n, N)
|
||||
assert_allclose(p, expected, rtol=rtol)
|
||||
|
||||
|
||||
# The expected values were computed with Wolfram Alpha, using
|
||||
# the expression SurvivalFunction[HypergeometricDistribution[N, n, M], k].
|
||||
@pytest.mark.parametrize('k, M, n, N, expected, rtol',
|
||||
[(25, 10000, 3000, 215,
|
||||
0.9999999999052958, 1e-15),
|
||||
(125, 10000, 3000, 215,
|
||||
1.4416781705752128e-18, 5e-11)])
|
||||
def test_hypergeom_sf(k, M, n, N, expected, rtol):
|
||||
p = hypergeom.sf(k, M, n, N)
|
||||
assert_allclose(p, expected, rtol=rtol)
|
||||
|
||||
|
||||
def test_hypergeom_logpmf():
|
||||
# symmetries test
|
||||
# f(k,N,K,n) = f(n-k,N,N-K,n) = f(K-k,N,K,N-n) = f(k,N,n,K)
|
||||
k = 5
|
||||
N = 50
|
||||
K = 10
|
||||
n = 5
|
||||
logpmf1 = hypergeom.logpmf(k, N, K, n)
|
||||
logpmf2 = hypergeom.logpmf(n - k, N, N - K, n)
|
||||
logpmf3 = hypergeom.logpmf(K - k, N, K, N - n)
|
||||
logpmf4 = hypergeom.logpmf(k, N, n, K)
|
||||
assert_almost_equal(logpmf1, logpmf2, decimal=12)
|
||||
assert_almost_equal(logpmf1, logpmf3, decimal=12)
|
||||
assert_almost_equal(logpmf1, logpmf4, decimal=12)
|
||||
|
||||
# test related distribution
|
||||
# Bernoulli distribution if n = 1
|
||||
k = 1
|
||||
N = 10
|
||||
K = 7
|
||||
n = 1
|
||||
hypergeom_logpmf = hypergeom.logpmf(k, N, K, n)
|
||||
bernoulli_logpmf = bernoulli.logpmf(k, K/N)
|
||||
assert_almost_equal(hypergeom_logpmf, bernoulli_logpmf, decimal=12)
|
||||
|
||||
|
||||
def test_nhypergeom_pmf():
|
||||
# test with hypergeom
|
||||
M, n, r = 45, 13, 8
|
||||
k = 6
|
||||
NHG = nhypergeom.pmf(k, M, n, r)
|
||||
HG = hypergeom.pmf(k, M, n, k+r-1) * (M - n - (r-1)) / (M - (k+r-1))
|
||||
assert_allclose(HG, NHG, rtol=1e-10)
|
||||
|
||||
|
||||
def test_nhypergeom_pmfcdf():
|
||||
# test pmf and cdf with arbitrary values.
|
||||
M = 8
|
||||
n = 3
|
||||
r = 4
|
||||
support = np.arange(n+1)
|
||||
pmf = nhypergeom.pmf(support, M, n, r)
|
||||
cdf = nhypergeom.cdf(support, M, n, r)
|
||||
assert_allclose(pmf, [1/14, 3/14, 5/14, 5/14], rtol=1e-13)
|
||||
assert_allclose(cdf, [1/14, 4/14, 9/14, 1.0], rtol=1e-13)
|
||||
|
||||
|
||||
def test_nhypergeom_r0():
|
||||
# test with `r = 0`.
|
||||
M = 10
|
||||
n = 3
|
||||
r = 0
|
||||
pmf = nhypergeom.pmf([[0, 1, 2, 0], [1, 2, 0, 3]], M, n, r)
|
||||
assert_allclose(pmf, [[1, 0, 0, 1], [0, 0, 1, 0]], rtol=1e-13)
|
||||
|
||||
|
||||
def test_nhypergeom_rvs_shape():
|
||||
# Check that when given a size with more dimensions than the
|
||||
# dimensions of the broadcast parameters, rvs returns an array
|
||||
# with the correct shape.
|
||||
x = nhypergeom.rvs(22, [7, 8, 9], [[12], [13]], size=(5, 1, 2, 3))
|
||||
assert x.shape == (5, 1, 2, 3)
|
||||
|
||||
|
||||
def test_nhypergeom_accuracy():
|
||||
# Check that nhypergeom.rvs post-gh-13431 gives the same values as
|
||||
# inverse transform sampling
|
||||
np.random.seed(0)
|
||||
x = nhypergeom.rvs(22, 7, 11, size=100)
|
||||
np.random.seed(0)
|
||||
p = np.random.uniform(size=100)
|
||||
y = nhypergeom.ppf(p, 22, 7, 11)
|
||||
assert_equal(x, y)
|
||||
|
||||
|
||||
def test_boltzmann_upper_bound():
|
||||
k = np.arange(-3, 5)
|
||||
|
||||
N = 1
|
||||
p = boltzmann.pmf(k, 0.123, N)
|
||||
expected = k == 0
|
||||
assert_equal(p, expected)
|
||||
|
||||
lam = np.log(2)
|
||||
N = 3
|
||||
p = boltzmann.pmf(k, lam, N)
|
||||
expected = [0, 0, 0, 4/7, 2/7, 1/7, 0, 0]
|
||||
assert_allclose(p, expected, rtol=1e-13)
|
||||
|
||||
c = boltzmann.cdf(k, lam, N)
|
||||
expected = [0, 0, 0, 4/7, 6/7, 1, 1, 1]
|
||||
assert_allclose(c, expected, rtol=1e-13)
|
||||
|
||||
|
||||
def test_betabinom_a_and_b_unity():
|
||||
# test limiting case that betabinom(n, 1, 1) is a discrete uniform
|
||||
# distribution from 0 to n
|
||||
n = 20
|
||||
k = np.arange(n + 1)
|
||||
p = betabinom(n, 1, 1).pmf(k)
|
||||
expected = np.repeat(1 / (n + 1), n + 1)
|
||||
assert_almost_equal(p, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtypes', itertools.product(*[(int, float)]*3))
|
||||
def test_betabinom_stats_a_and_b_integers_gh18026(dtypes):
|
||||
# gh-18026 reported that `betabinom` kurtosis calculation fails when some
|
||||
# parameters are integers. Check that this is resolved.
|
||||
n_type, a_type, b_type = dtypes
|
||||
n, a, b = n_type(10), a_type(2), b_type(3)
|
||||
assert_allclose(betabinom.stats(n, a, b, moments='k'), -0.6904761904761907)
|
||||
|
||||
|
||||
def test_betabinom_bernoulli():
|
||||
# test limiting case that betabinom(1, a, b) = bernoulli(a / (a + b))
|
||||
a = 2.3
|
||||
b = 0.63
|
||||
k = np.arange(2)
|
||||
p = betabinom(1, a, b).pmf(k)
|
||||
expected = bernoulli(a / (a + b)).pmf(k)
|
||||
assert_almost_equal(p, expected)
|
||||
|
||||
|
||||
def test_issue_10317():
|
||||
alpha, n, p = 0.9, 10, 1
|
||||
assert_equal(nbinom.interval(confidence=alpha, n=n, p=p), (0, 0))
|
||||
|
||||
|
||||
def test_issue_11134():
|
||||
alpha, n, p = 0.95, 10, 0
|
||||
assert_equal(binom.interval(confidence=alpha, n=n, p=p), (0, 0))
|
||||
|
||||
|
||||
def test_issue_7406():
|
||||
np.random.seed(0)
|
||||
assert_equal(binom.ppf(np.random.rand(10), 0, 0.5), 0)
|
||||
|
||||
# Also check that endpoints (q=0, q=1) are correct
|
||||
assert_equal(binom.ppf(0, 0, 0.5), -1)
|
||||
assert_equal(binom.ppf(1, 0, 0.5), 0)
|
||||
|
||||
|
||||
def test_issue_5122():
|
||||
p = 0
|
||||
n = np.random.randint(100, size=10)
|
||||
|
||||
x = 0
|
||||
ppf = binom.ppf(x, n, p)
|
||||
assert_equal(ppf, -1)
|
||||
|
||||
x = np.linspace(0.01, 0.99, 10)
|
||||
ppf = binom.ppf(x, n, p)
|
||||
assert_equal(ppf, 0)
|
||||
|
||||
x = 1
|
||||
ppf = binom.ppf(x, n, p)
|
||||
assert_equal(ppf, n)
|
||||
|
||||
|
||||
def test_issue_1603():
|
||||
assert_equal(binom(1000, np.logspace(-3, -100)).ppf(0.01), 0)
|
||||
|
||||
|
||||
def test_issue_5503():
|
||||
p = 0.5
|
||||
x = np.logspace(3, 14, 12)
|
||||
assert_allclose(binom.cdf(x, 2*x, p), 0.5, atol=1e-2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('x, n, p, cdf_desired', [
|
||||
(300, 1000, 3/10, 0.51559351981411995636),
|
||||
(3000, 10000, 3/10, 0.50493298381929698016),
|
||||
(30000, 100000, 3/10, 0.50156000591726422864),
|
||||
(300000, 1000000, 3/10, 0.50049331906666960038),
|
||||
(3000000, 10000000, 3/10, 0.50015600124585261196),
|
||||
(30000000, 100000000, 3/10, 0.50004933192735230102),
|
||||
(30010000, 100000000, 3/10, 0.98545384016570790717),
|
||||
(29990000, 100000000, 3/10, 0.01455017177985268670),
|
||||
(29950000, 100000000, 3/10, 5.02250963487432024943e-28),
|
||||
])
|
||||
def test_issue_5503pt2(x, n, p, cdf_desired):
|
||||
assert_allclose(binom.cdf(x, n, p), cdf_desired)
|
||||
|
||||
|
||||
def test_issue_5503pt3():
|
||||
# From Wolfram Alpha: CDF[BinomialDistribution[1e12, 1e-12], 2]
|
||||
assert_allclose(binom.cdf(2, 10**12, 10**-12), 0.91969860292869777384)
|
||||
|
||||
|
||||
def test_issue_6682():
|
||||
# Reference value from R:
|
||||
# options(digits=16)
|
||||
# print(pnbinom(250, 50, 32/63, lower.tail=FALSE))
|
||||
assert_allclose(nbinom.sf(250, 50, 32./63.), 1.460458510976452e-35)
|
||||
|
||||
|
||||
def test_issue_19747():
|
||||
# test that negative k does not raise an error in nbinom.logcdf
|
||||
result = nbinom.logcdf([5, -1, 1], 5, 0.5)
|
||||
reference = [-0.47313352, -np.inf, -2.21297293]
|
||||
assert_allclose(result, reference)
|
||||
|
||||
|
||||
def test_boost_divide_by_zero_issue_15101():
|
||||
n = 1000
|
||||
p = 0.01
|
||||
k = 996
|
||||
assert_allclose(binom.pmf(k, n, p), 0.0)
|
||||
|
||||
|
||||
def test_skellam_gh11474():
|
||||
# test issue reported in gh-11474 caused by `cdfchn`
|
||||
mu = [1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000]
|
||||
cdf = skellam.cdf(0, mu, mu)
|
||||
# generated in R
|
||||
# library(skellam)
|
||||
# options(digits = 16)
|
||||
# mu = c(1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000)
|
||||
# pskellam(0, mu, mu, TRUE)
|
||||
cdf_expected = [0.6542541612768356, 0.5448901559424127, 0.5141135799745580,
|
||||
0.5044605891382528, 0.5019947363350450, 0.5019848365953181,
|
||||
0.5019750827993392, 0.5019466621805060, 0.5018209330219539]
|
||||
assert_allclose(cdf, cdf_expected)
|
||||
|
||||
|
||||
class TestZipfian:
|
||||
def test_zipfian_asymptotic(self):
|
||||
# test limiting case that zipfian(a, n) -> zipf(a) as n-> oo
|
||||
a = 6.5
|
||||
N = 10000000
|
||||
k = np.arange(1, 21)
|
||||
assert_allclose(zipfian.pmf(k, a, N), zipf.pmf(k, a))
|
||||
assert_allclose(zipfian.cdf(k, a, N), zipf.cdf(k, a))
|
||||
assert_allclose(zipfian.sf(k, a, N), zipf.sf(k, a))
|
||||
assert_allclose(zipfian.stats(a, N, moments='msvk'),
|
||||
zipf.stats(a, moments='msvk'))
|
||||
|
||||
def test_zipfian_continuity(self):
|
||||
# test that zipfian(0.999999, n) ~ zipfian(1.000001, n)
|
||||
# (a = 1 switches between methods of calculating harmonic sum)
|
||||
alt1, agt1 = 0.99999999, 1.00000001
|
||||
N = 30
|
||||
k = np.arange(1, N + 1)
|
||||
assert_allclose(zipfian.pmf(k, alt1, N), zipfian.pmf(k, agt1, N),
|
||||
rtol=5e-7)
|
||||
assert_allclose(zipfian.cdf(k, alt1, N), zipfian.cdf(k, agt1, N),
|
||||
rtol=5e-7)
|
||||
assert_allclose(zipfian.sf(k, alt1, N), zipfian.sf(k, agt1, N),
|
||||
rtol=5e-7)
|
||||
assert_allclose(zipfian.stats(alt1, N, moments='msvk'),
|
||||
zipfian.stats(agt1, N, moments='msvk'), rtol=5e-7)
|
||||
|
||||
def test_zipfian_R(self):
|
||||
# test against R VGAM package
|
||||
# library(VGAM)
|
||||
# k <- c(13, 16, 1, 4, 4, 8, 10, 19, 5, 7)
|
||||
# a <- c(1.56712977, 3.72656295, 5.77665117, 9.12168729, 5.79977172,
|
||||
# 4.92784796, 9.36078764, 4.3739616 , 7.48171872, 4.6824154)
|
||||
# n <- c(70, 80, 48, 65, 83, 89, 50, 30, 20, 20)
|
||||
# pmf <- dzipf(k, N = n, shape = a)
|
||||
# cdf <- pzipf(k, N = n, shape = a)
|
||||
# print(pmf)
|
||||
# print(cdf)
|
||||
np.random.seed(0)
|
||||
k = np.random.randint(1, 20, size=10)
|
||||
a = np.random.rand(10)*10 + 1
|
||||
n = np.random.randint(1, 100, size=10)
|
||||
pmf = [8.076972e-03, 2.950214e-05, 9.799333e-01, 3.216601e-06,
|
||||
3.158895e-04, 3.412497e-05, 4.350472e-10, 2.405773e-06,
|
||||
5.860662e-06, 1.053948e-04]
|
||||
cdf = [0.8964133, 0.9998666, 0.9799333, 0.9999995, 0.9998584,
|
||||
0.9999458, 1.0000000, 0.9999920, 0.9999977, 0.9998498]
|
||||
# skip the first point; zipUC is not accurate for low a, n
|
||||
assert_allclose(zipfian.pmf(k, a, n)[1:], pmf[1:], rtol=1e-6)
|
||||
assert_allclose(zipfian.cdf(k, a, n)[1:], cdf[1:], rtol=5e-5)
|
||||
|
||||
np.random.seed(0)
|
||||
naive_tests = np.vstack((np.logspace(-2, 1, 10),
|
||||
np.random.randint(2, 40, 10))).T
|
||||
|
||||
@pytest.mark.parametrize("a, n", naive_tests)
|
||||
def test_zipfian_naive(self, a, n):
|
||||
# test against bare-bones implementation
|
||||
|
||||
@np.vectorize
|
||||
def Hns(n, s):
|
||||
"""Naive implementation of harmonic sum"""
|
||||
return (1/np.arange(1, n+1)**s).sum()
|
||||
|
||||
@np.vectorize
|
||||
def pzip(k, a, n):
|
||||
"""Naive implementation of zipfian pmf"""
|
||||
if k < 1 or k > n:
|
||||
return 0.
|
||||
else:
|
||||
return 1 / k**a / Hns(n, a)
|
||||
|
||||
k = np.arange(n+1)
|
||||
pmf = pzip(k, a, n)
|
||||
cdf = np.cumsum(pmf)
|
||||
mean = np.average(k, weights=pmf)
|
||||
var = np.average((k - mean)**2, weights=pmf)
|
||||
std = var**0.5
|
||||
skew = np.average(((k-mean)/std)**3, weights=pmf)
|
||||
kurtosis = np.average(((k-mean)/std)**4, weights=pmf) - 3
|
||||
assert_allclose(zipfian.pmf(k, a, n), pmf)
|
||||
assert_allclose(zipfian.cdf(k, a, n), cdf)
|
||||
assert_allclose(zipfian.stats(a, n, moments="mvsk"),
|
||||
[mean, var, skew, kurtosis])
|
||||
|
||||
def test_pmf_integer_k(self):
|
||||
k = np.arange(0, 1000)
|
||||
k_int32 = k.astype(np.int32)
|
||||
dist = zipfian(111, 22)
|
||||
pmf = dist.pmf(k)
|
||||
pmf_k_int32 = dist.pmf(k_int32)
|
||||
assert_equal(pmf, pmf_k_int32)
|
||||
|
||||
|
||||
class TestNCH:
|
||||
np.random.seed(2) # seeds 0 and 1 had some xl = xu; randint failed
|
||||
shape = (2, 4, 3)
|
||||
max_m = 100
|
||||
m1 = np.random.randint(1, max_m, size=shape) # red balls
|
||||
m2 = np.random.randint(1, max_m, size=shape) # white balls
|
||||
N = m1 + m2 # total balls
|
||||
n = randint.rvs(0, N, size=N.shape) # number of draws
|
||||
xl = np.maximum(0, n-m2) # lower bound of support
|
||||
xu = np.minimum(n, m1) # upper bound of support
|
||||
x = randint.rvs(xl, xu, size=xl.shape)
|
||||
odds = np.random.rand(*x.shape)*2
|
||||
|
||||
# test output is more readable when function names (strings) are passed
|
||||
@pytest.mark.parametrize('dist_name',
|
||||
['nchypergeom_fisher', 'nchypergeom_wallenius'])
|
||||
def test_nch_hypergeom(self, dist_name):
|
||||
# Both noncentral hypergeometric distributions reduce to the
|
||||
# hypergeometric distribution when odds = 1
|
||||
dists = {'nchypergeom_fisher': nchypergeom_fisher,
|
||||
'nchypergeom_wallenius': nchypergeom_wallenius}
|
||||
dist = dists[dist_name]
|
||||
x, N, m1, n = self.x, self.N, self.m1, self.n
|
||||
assert_allclose(dist.pmf(x, N, m1, n, odds=1),
|
||||
hypergeom.pmf(x, N, m1, n))
|
||||
|
||||
def test_nchypergeom_fisher_naive(self):
|
||||
# test against a very simple implementation
|
||||
x, N, m1, n, odds = self.x, self.N, self.m1, self.n, self.odds
|
||||
|
||||
@np.vectorize
|
||||
def pmf_mean_var(x, N, m1, n, w):
|
||||
# simple implementation of nchypergeom_fisher pmf
|
||||
m2 = N - m1
|
||||
xl = np.maximum(0, n-m2)
|
||||
xu = np.minimum(n, m1)
|
||||
|
||||
def f(x):
|
||||
t1 = special_binom(m1, x)
|
||||
t2 = special_binom(m2, n - x)
|
||||
return t1 * t2 * w**x
|
||||
|
||||
def P(k):
|
||||
return sum(f(y)*y**k for y in range(xl, xu + 1))
|
||||
|
||||
P0 = P(0)
|
||||
P1 = P(1)
|
||||
P2 = P(2)
|
||||
pmf = f(x) / P0
|
||||
mean = P1 / P0
|
||||
var = P2 / P0 - (P1 / P0)**2
|
||||
return pmf, mean, var
|
||||
|
||||
pmf, mean, var = pmf_mean_var(x, N, m1, n, odds)
|
||||
assert_allclose(nchypergeom_fisher.pmf(x, N, m1, n, odds), pmf)
|
||||
assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='m'),
|
||||
mean)
|
||||
assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='v'),
|
||||
var)
|
||||
|
||||
def test_nchypergeom_wallenius_naive(self):
|
||||
# test against a very simple implementation
|
||||
|
||||
np.random.seed(2)
|
||||
shape = (2, 4, 3)
|
||||
max_m = 100
|
||||
m1 = np.random.randint(1, max_m, size=shape)
|
||||
m2 = np.random.randint(1, max_m, size=shape)
|
||||
N = m1 + m2
|
||||
n = randint.rvs(0, N, size=N.shape)
|
||||
xl = np.maximum(0, n-m2)
|
||||
xu = np.minimum(n, m1)
|
||||
x = randint.rvs(xl, xu, size=xl.shape)
|
||||
w = np.random.rand(*x.shape)*2
|
||||
|
||||
def support(N, m1, n, w):
|
||||
m2 = N - m1
|
||||
xl = np.maximum(0, n-m2)
|
||||
xu = np.minimum(n, m1)
|
||||
return xl, xu
|
||||
|
||||
@np.vectorize
|
||||
def mean(N, m1, n, w):
|
||||
m2 = N - m1
|
||||
xl, xu = support(N, m1, n, w)
|
||||
|
||||
def fun(u):
|
||||
return u/m1 + (1 - (n-u)/m2)**w - 1
|
||||
|
||||
return root_scalar(fun, bracket=(xl, xu)).root
|
||||
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(RuntimeWarning,
|
||||
message="invalid value encountered in mean")
|
||||
assert_allclose(nchypergeom_wallenius.mean(N, m1, n, w),
|
||||
mean(N, m1, n, w), rtol=2e-2)
|
||||
|
||||
@np.vectorize
|
||||
def variance(N, m1, n, w):
|
||||
m2 = N - m1
|
||||
u = mean(N, m1, n, w)
|
||||
a = u * (m1 - u)
|
||||
b = (n-u)*(u + m2 - n)
|
||||
return N*a*b / ((N-1) * (m1*b + m2*a))
|
||||
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(RuntimeWarning,
|
||||
message="invalid value encountered in mean")
|
||||
assert_allclose(
|
||||
nchypergeom_wallenius.stats(N, m1, n, w, moments='v'),
|
||||
variance(N, m1, n, w),
|
||||
rtol=5e-2
|
||||
)
|
||||
|
||||
@np.vectorize
|
||||
def pmf(x, N, m1, n, w):
|
||||
m2 = N - m1
|
||||
xl, xu = support(N, m1, n, w)
|
||||
|
||||
def integrand(t):
|
||||
D = w*(m1 - x) + (m2 - (n-x))
|
||||
res = (1-t**(w/D))**x * (1-t**(1/D))**(n-x)
|
||||
return res
|
||||
|
||||
def f(x):
|
||||
t1 = special_binom(m1, x)
|
||||
t2 = special_binom(m2, n - x)
|
||||
the_integral = quad(integrand, 0, 1,
|
||||
epsrel=1e-16, epsabs=1e-16)
|
||||
return t1 * t2 * the_integral[0]
|
||||
|
||||
return f(x)
|
||||
|
||||
pmf0 = pmf(x, N, m1, n, w)
|
||||
pmf1 = nchypergeom_wallenius.pmf(x, N, m1, n, w)
|
||||
|
||||
atol, rtol = 1e-6, 1e-6
|
||||
i = np.abs(pmf1 - pmf0) < atol + rtol*np.abs(pmf0)
|
||||
assert i.sum() > np.prod(shape) / 2 # works at least half the time
|
||||
|
||||
# for those that fail, discredit the naive implementation
|
||||
for N, m1, n, w in zip(N[~i], m1[~i], n[~i], w[~i]):
|
||||
# get the support
|
||||
m2 = N - m1
|
||||
xl, xu = support(N, m1, n, w)
|
||||
x = np.arange(xl, xu + 1)
|
||||
|
||||
# calculate sum of pmf over the support
|
||||
# the naive implementation is very wrong in these cases
|
||||
assert pmf(x, N, m1, n, w).sum() < .5
|
||||
assert_allclose(nchypergeom_wallenius.pmf(x, N, m1, n, w).sum(), 1)
|
||||
|
||||
def test_wallenius_against_mpmath(self):
|
||||
# precompute data with mpmath since naive implementation above
|
||||
# is not reliable. See source code in gh-13330.
|
||||
M = 50
|
||||
n = 30
|
||||
N = 20
|
||||
odds = 2.25
|
||||
# Expected results, computed with mpmath.
|
||||
sup = np.arange(21)
|
||||
pmf = np.array([3.699003068656875e-20,
|
||||
5.89398584245431e-17,
|
||||
2.1594437742911123e-14,
|
||||
3.221458044649955e-12,
|
||||
2.4658279241205077e-10,
|
||||
1.0965862603981212e-08,
|
||||
3.057890479665704e-07,
|
||||
5.622818831643761e-06,
|
||||
7.056482841531681e-05,
|
||||
0.000618899425358671,
|
||||
0.003854172932571669,
|
||||
0.01720592676256026,
|
||||
0.05528844897093792,
|
||||
0.12772363313574242,
|
||||
0.21065898367825722,
|
||||
0.24465958845359234,
|
||||
0.1955114898110033,
|
||||
0.10355390084949237,
|
||||
0.03414490375225675,
|
||||
0.006231989845775931,
|
||||
0.0004715577304677075])
|
||||
mean = 14.808018384813426
|
||||
var = 2.6085975877923717
|
||||
|
||||
# nchypergeom_wallenius.pmf returns 0 for pmf(0) and pmf(1), and pmf(2)
|
||||
# has only three digits of accuracy (~ 2.1511e-14).
|
||||
assert_allclose(nchypergeom_wallenius.pmf(sup, M, n, N, odds), pmf,
|
||||
rtol=1e-13, atol=1e-13)
|
||||
assert_allclose(nchypergeom_wallenius.mean(M, n, N, odds),
|
||||
mean, rtol=1e-13)
|
||||
assert_allclose(nchypergeom_wallenius.var(M, n, N, odds),
|
||||
var, rtol=1e-11)
|
||||
|
||||
@pytest.mark.parametrize('dist_name',
|
||||
['nchypergeom_fisher', 'nchypergeom_wallenius'])
|
||||
def test_rvs_shape(self, dist_name):
|
||||
# Check that when given a size with more dimensions than the
|
||||
# dimensions of the broadcast parameters, rvs returns an array
|
||||
# with the correct shape.
|
||||
dists = {'nchypergeom_fisher': nchypergeom_fisher,
|
||||
'nchypergeom_wallenius': nchypergeom_wallenius}
|
||||
dist = dists[dist_name]
|
||||
x = dist.rvs(50, 30, [[10], [20]], [0.5, 1.0, 2.0], size=(5, 1, 2, 3))
|
||||
assert x.shape == (5, 1, 2, 3)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mu, q, expected",
|
||||
[[10, 120, -1.240089881791596e-38],
|
||||
[1500, 0, -86.61466680572661]])
|
||||
def test_nbinom_11465(mu, q, expected):
|
||||
# test nbinom.logcdf at extreme tails
|
||||
size = 20
|
||||
n, p = size, size/(size+mu)
|
||||
# In R:
|
||||
# options(digits=16)
|
||||
# pnbinom(mu=10, size=20, q=120, log.p=TRUE)
|
||||
assert_allclose(nbinom.logcdf(q, n, p), expected)
|
||||
|
||||
|
||||
def test_gh_17146():
|
||||
# Check that discrete distributions return PMF of zero at non-integral x.
|
||||
# See gh-17146.
|
||||
x = np.linspace(0, 1, 11)
|
||||
p = 0.8
|
||||
pmf = bernoulli(p).pmf(x)
|
||||
i = (x % 1 == 0)
|
||||
assert_allclose(pmf[-1], p)
|
||||
assert_allclose(pmf[0], 1-p)
|
||||
assert_equal(pmf[~i], 0)
|
||||
|
||||
|
||||
class TestBetaNBinom:
|
||||
@pytest.mark.parametrize('x, n, a, b, ref',
|
||||
[[5, 5e6, 5, 20, 1.1520944824139114e-107],
|
||||
[100, 50, 5, 20, 0.002855762954310226],
|
||||
[10000, 1000, 5, 20, 1.9648515726019154e-05]])
|
||||
def test_betanbinom_pmf(self, x, n, a, b, ref):
|
||||
# test that PMF stays accurate in the distribution tails
|
||||
# reference values computed with mpmath
|
||||
# from mpmath import mp
|
||||
# mp.dps = 500
|
||||
# def betanbinom_pmf(k, n, a, b):
|
||||
# k = mp.mpf(k)
|
||||
# a = mp.mpf(a)
|
||||
# b = mp.mpf(b)
|
||||
# n = mp.mpf(n)
|
||||
# return float(mp.binomial(n + k - mp.one, k)
|
||||
# * mp.beta(a + n, b + k) / mp.beta(a, b))
|
||||
assert_allclose(betanbinom.pmf(x, n, a, b), ref, rtol=1e-10)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('n, a, b, ref',
|
||||
[[10000, 5000, 50, 0.12841520515722202],
|
||||
[10, 9, 9, 7.9224400871459695],
|
||||
[100, 1000, 10, 1.5849602176622748]])
|
||||
def test_betanbinom_kurtosis(self, n, a, b, ref):
|
||||
# reference values were computed via mpmath
|
||||
# from mpmath import mp
|
||||
# def kurtosis_betanegbinom(n, a, b):
|
||||
# n = mp.mpf(n)
|
||||
# a = mp.mpf(a)
|
||||
# b = mp.mpf(b)
|
||||
# four = mp.mpf(4.)
|
||||
# mean = n * b / (a - mp.one)
|
||||
# var = (n * b * (n + a - 1.) * (a + b - 1.)
|
||||
# / ((a - 2.) * (a - 1.)**2.))
|
||||
# def f(k):
|
||||
# return (mp.binomial(n + k - mp.one, k)
|
||||
# * mp.beta(a + n, b + k) / mp.beta(a, b)
|
||||
# * (k - mean)**four)
|
||||
# fourth_moment = mp.nsum(f, [0, mp.inf])
|
||||
# return float(fourth_moment/var**2 - 3.)
|
||||
assert_allclose(betanbinom.stats(n, a, b, moments="k"),
|
||||
ref, rtol=3e-15)
|
||||
|
||||
|
||||
class TestZipf:
|
||||
def test_gh20692(self):
|
||||
# test that int32 data for k generates same output as double
|
||||
k = np.arange(0, 1000)
|
||||
k_int32 = k.astype(np.int32)
|
||||
dist = zipf(9)
|
||||
pmf = dist.pmf(k)
|
||||
pmf_k_int32 = dist.pmf(k_int32)
|
||||
assert_equal(pmf, pmf_k_int32)
|
||||
9815
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_distributions.py
vendored
Normal file
9815
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_distributions.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
304
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_entropy.py
vendored
Normal file
304
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_entropy.py
vendored
Normal file
@@ -0,0 +1,304 @@
|
||||
import math
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from scipy import stats
|
||||
from scipy.conftest import array_api_compatible
|
||||
from scipy._lib._array_api import xp_assert_close, xp_assert_equal, xp_assert_less
|
||||
|
||||
class TestEntropy:
|
||||
@array_api_compatible
|
||||
def test_entropy_positive(self, xp):
|
||||
# See ticket #497
|
||||
pk = xp.asarray([0.5, 0.2, 0.3])
|
||||
qk = xp.asarray([0.1, 0.25, 0.65])
|
||||
eself = stats.entropy(pk, pk)
|
||||
edouble = stats.entropy(pk, qk)
|
||||
xp_assert_equal(eself, xp.asarray(0.))
|
||||
xp_assert_less(-edouble, xp.asarray(0.))
|
||||
|
||||
@array_api_compatible
|
||||
def test_entropy_base(self, xp):
|
||||
pk = xp.ones(16)
|
||||
S = stats.entropy(pk, base=2.)
|
||||
xp_assert_less(xp.abs(S - 4.), xp.asarray(1.e-5))
|
||||
|
||||
qk = xp.ones(16)
|
||||
qk = xp.where(xp.arange(16) < 8, xp.asarray(2.), qk)
|
||||
S = stats.entropy(pk, qk)
|
||||
S2 = stats.entropy(pk, qk, base=2.)
|
||||
xp_assert_less(xp.abs(S/S2 - math.log(2.)), xp.asarray(1.e-5))
|
||||
|
||||
@array_api_compatible
|
||||
def test_entropy_zero(self, xp):
|
||||
# Test for PR-479
|
||||
x = xp.asarray([0., 1., 2.])
|
||||
xp_assert_close(stats.entropy(x),
|
||||
xp.asarray(0.63651416829481278))
|
||||
|
||||
@array_api_compatible
|
||||
def test_entropy_2d(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = xp.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
|
||||
xp_assert_close(stats.entropy(pk, qk),
|
||||
xp.asarray([0.1933259, 0.18609809]))
|
||||
|
||||
@array_api_compatible
|
||||
def test_entropy_2d_zero(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = xp.asarray([[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]])
|
||||
xp_assert_close(stats.entropy(pk, qk),
|
||||
xp.asarray([xp.inf, 0.18609809]))
|
||||
|
||||
pk = xp.asarray([[0.0, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
xp_assert_close(stats.entropy(pk, qk),
|
||||
xp.asarray([0.17403988, 0.18609809]))
|
||||
|
||||
@array_api_compatible
|
||||
def test_entropy_base_2d_nondefault_axis(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
xp_assert_close(stats.entropy(pk, axis=1),
|
||||
xp.asarray([0.63651417, 0.63651417, 0.66156324]))
|
||||
|
||||
@array_api_compatible
|
||||
def test_entropy_2d_nondefault_axis(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = xp.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
|
||||
xp_assert_close(stats.entropy(pk, qk, axis=1),
|
||||
xp.asarray([0.23104906, 0.23104906, 0.12770641]))
|
||||
|
||||
@array_api_compatible
|
||||
def test_entropy_raises_value_error(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = xp.asarray([[0.1, 0.2], [0.6, 0.3]])
|
||||
message = "Array shapes are incompatible for broadcasting."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.entropy(pk, qk)
|
||||
|
||||
@array_api_compatible
|
||||
def test_base_entropy_with_axis_0_is_equal_to_default(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
xp_assert_close(stats.entropy(pk, axis=0),
|
||||
stats.entropy(pk))
|
||||
|
||||
@array_api_compatible
|
||||
def test_entropy_with_axis_0_is_equal_to_default(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = xp.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
|
||||
xp_assert_close(stats.entropy(pk, qk, axis=0),
|
||||
stats.entropy(pk, qk))
|
||||
|
||||
@array_api_compatible
|
||||
def test_base_entropy_transposed(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
xp_assert_close(stats.entropy(pk.T),
|
||||
stats.entropy(pk, axis=1))
|
||||
|
||||
@array_api_compatible
|
||||
def test_entropy_transposed(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = xp.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
|
||||
xp_assert_close(stats.entropy(pk.T, qk.T),
|
||||
stats.entropy(pk, qk, axis=1))
|
||||
|
||||
@array_api_compatible
|
||||
def test_entropy_broadcasting(self, xp):
|
||||
rng = np.random.default_rng(74187315492831452)
|
||||
x = xp.asarray(rng.random(3))
|
||||
y = xp.asarray(rng.random((2, 1)))
|
||||
res = stats.entropy(x, y, axis=-1)
|
||||
xp_assert_equal(res[0], stats.entropy(x, y[0, ...]))
|
||||
xp_assert_equal(res[1], stats.entropy(x, y[1, ...]))
|
||||
|
||||
@array_api_compatible
|
||||
def test_entropy_shape_mismatch(self, xp):
|
||||
x = xp.ones((10, 1, 12))
|
||||
y = xp.ones((11, 2))
|
||||
message = "Array shapes are incompatible for broadcasting."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.entropy(x, y)
|
||||
|
||||
@array_api_compatible
|
||||
def test_input_validation(self, xp):
|
||||
x = xp.ones(10)
|
||||
message = "`base` must be a positive number."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.entropy(x, base=-2)
|
||||
|
||||
|
||||
class TestDifferentialEntropy:
|
||||
"""
|
||||
Vasicek results are compared with the R package vsgoftest.
|
||||
|
||||
# library(vsgoftest)
|
||||
#
|
||||
# samp <- c(<values>)
|
||||
# entropy.estimate(x = samp, window = <window_length>)
|
||||
|
||||
"""
|
||||
|
||||
def test_differential_entropy_vasicek(self):
|
||||
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal(100)
|
||||
|
||||
entropy = stats.differential_entropy(values, method='vasicek')
|
||||
assert_allclose(entropy, 1.342551, rtol=1e-6)
|
||||
|
||||
entropy = stats.differential_entropy(values, window_length=1,
|
||||
method='vasicek')
|
||||
assert_allclose(entropy, 1.122044, rtol=1e-6)
|
||||
|
||||
entropy = stats.differential_entropy(values, window_length=8,
|
||||
method='vasicek')
|
||||
assert_allclose(entropy, 1.349401, rtol=1e-6)
|
||||
|
||||
def test_differential_entropy_vasicek_2d_nondefault_axis(self):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((3, 100))
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=1, method='vasicek')
|
||||
assert_allclose(
|
||||
entropy,
|
||||
[1.342551, 1.341826, 1.293775],
|
||||
rtol=1e-6,
|
||||
)
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=1, window_length=1,
|
||||
method='vasicek')
|
||||
assert_allclose(
|
||||
entropy,
|
||||
[1.122044, 1.102944, 1.129616],
|
||||
rtol=1e-6,
|
||||
)
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=1, window_length=8,
|
||||
method='vasicek')
|
||||
assert_allclose(
|
||||
entropy,
|
||||
[1.349401, 1.338514, 1.292332],
|
||||
rtol=1e-6,
|
||||
)
|
||||
|
||||
def test_differential_entropy_raises_value_error(self):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((3, 100))
|
||||
|
||||
error_str = (
|
||||
r"Window length \({window_length}\) must be positive and less "
|
||||
r"than half the sample size \({sample_size}\)."
|
||||
)
|
||||
|
||||
sample_size = values.shape[1]
|
||||
|
||||
for window_length in {-1, 0, sample_size//2, sample_size}:
|
||||
|
||||
formatted_error_str = error_str.format(
|
||||
window_length=window_length,
|
||||
sample_size=sample_size,
|
||||
)
|
||||
|
||||
with assert_raises(ValueError, match=formatted_error_str):
|
||||
stats.differential_entropy(
|
||||
values,
|
||||
window_length=window_length,
|
||||
axis=1,
|
||||
)
|
||||
|
||||
def test_base_differential_entropy_with_axis_0_is_equal_to_default(self):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((100, 3))
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=0)
|
||||
default_entropy = stats.differential_entropy(values)
|
||||
assert_allclose(entropy, default_entropy)
|
||||
|
||||
def test_base_differential_entropy_transposed(self):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((3, 100))
|
||||
|
||||
assert_allclose(
|
||||
stats.differential_entropy(values.T).T,
|
||||
stats.differential_entropy(values, axis=1),
|
||||
)
|
||||
|
||||
def test_input_validation(self):
|
||||
x = np.random.rand(10)
|
||||
|
||||
message = "`base` must be a positive number or `None`."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.differential_entropy(x, base=-2)
|
||||
|
||||
message = "`method` must be one of..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.differential_entropy(x, method='ekki-ekki')
|
||||
|
||||
@pytest.mark.parametrize('method', ['vasicek', 'van es',
|
||||
'ebrahimi', 'correa'])
|
||||
def test_consistency(self, method):
|
||||
# test that method is a consistent estimator
|
||||
n = 10000 if method == 'correa' else 1000000
|
||||
rvs = stats.norm.rvs(size=n, random_state=0)
|
||||
expected = stats.norm.entropy()
|
||||
res = stats.differential_entropy(rvs, method=method)
|
||||
assert_allclose(res, expected, rtol=0.005)
|
||||
|
||||
# values from differential_entropy reference [6], table 1, n=50, m=7
|
||||
norm_rmse_std_cases = { # method: (RMSE, STD)
|
||||
'vasicek': (0.198, 0.109),
|
||||
'van es': (0.212, 0.110),
|
||||
'correa': (0.135, 0.112),
|
||||
'ebrahimi': (0.128, 0.109)
|
||||
}
|
||||
|
||||
@pytest.mark.parametrize('method, expected',
|
||||
list(norm_rmse_std_cases.items()))
|
||||
def test_norm_rmse_std(self, method, expected):
|
||||
# test that RMSE and standard deviation of estimators matches values
|
||||
# given in differential_entropy reference [6]. Incidentally, also
|
||||
# tests vectorization.
|
||||
reps, n, m = 10000, 50, 7
|
||||
rmse_expected, std_expected = expected
|
||||
rvs = stats.norm.rvs(size=(reps, n), random_state=0)
|
||||
true_entropy = stats.norm.entropy()
|
||||
res = stats.differential_entropy(rvs, window_length=m,
|
||||
method=method, axis=-1)
|
||||
assert_allclose(np.sqrt(np.mean((res - true_entropy)**2)),
|
||||
rmse_expected, atol=0.005)
|
||||
assert_allclose(np.std(res), std_expected, atol=0.002)
|
||||
|
||||
# values from differential_entropy reference [6], table 2, n=50, m=7
|
||||
expon_rmse_std_cases = { # method: (RMSE, STD)
|
||||
'vasicek': (0.194, 0.148),
|
||||
'van es': (0.179, 0.149),
|
||||
'correa': (0.155, 0.152),
|
||||
'ebrahimi': (0.151, 0.148)
|
||||
}
|
||||
|
||||
@pytest.mark.parametrize('method, expected',
|
||||
list(expon_rmse_std_cases.items()))
|
||||
def test_expon_rmse_std(self, method, expected):
|
||||
# test that RMSE and standard deviation of estimators matches values
|
||||
# given in differential_entropy reference [6]. Incidentally, also
|
||||
# tests vectorization.
|
||||
reps, n, m = 10000, 50, 7
|
||||
rmse_expected, std_expected = expected
|
||||
rvs = stats.expon.rvs(size=(reps, n), random_state=0)
|
||||
true_entropy = stats.expon.entropy()
|
||||
res = stats.differential_entropy(rvs, window_length=m,
|
||||
method=method, axis=-1)
|
||||
assert_allclose(np.sqrt(np.mean((res - true_entropy)**2)),
|
||||
rmse_expected, atol=0.005)
|
||||
assert_allclose(np.std(res), std_expected, atol=0.002)
|
||||
|
||||
@pytest.mark.parametrize('n, method', [(8, 'van es'),
|
||||
(12, 'ebrahimi'),
|
||||
(1001, 'vasicek')])
|
||||
def test_method_auto(self, n, method):
|
||||
rvs = stats.norm.rvs(size=(n,), random_state=0)
|
||||
res1 = stats.differential_entropy(rvs)
|
||||
res2 = stats.differential_entropy(rvs, method=method)
|
||||
assert res1 == res2
|
||||
432
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_fast_gen_inversion.py
vendored
Normal file
432
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_fast_gen_inversion.py
vendored
Normal file
@@ -0,0 +1,432 @@
|
||||
import pytest
|
||||
import warnings
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_array_equal, assert_allclose,
|
||||
suppress_warnings)
|
||||
from copy import deepcopy
|
||||
from scipy.stats.sampling import FastGeneratorInversion
|
||||
from scipy import stats
|
||||
|
||||
|
||||
def test_bad_args():
|
||||
# loc and scale must be scalar
|
||||
with pytest.raises(ValueError, match="loc must be scalar"):
|
||||
FastGeneratorInversion(stats.norm(loc=(1.2, 1.3)))
|
||||
with pytest.raises(ValueError, match="scale must be scalar"):
|
||||
FastGeneratorInversion(stats.norm(scale=[1.5, 5.7]))
|
||||
|
||||
with pytest.raises(ValueError, match="'test' cannot be used to seed"):
|
||||
FastGeneratorInversion(stats.norm(), random_state="test")
|
||||
|
||||
msg = "Each of the 1 shape parameters must be a scalar"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
FastGeneratorInversion(stats.gamma([1.3, 2.5]))
|
||||
|
||||
with pytest.raises(ValueError, match="`dist` must be a frozen"):
|
||||
FastGeneratorInversion("xy")
|
||||
|
||||
with pytest.raises(ValueError, match="Distribution 'truncnorm' is not"):
|
||||
FastGeneratorInversion(stats.truncnorm(1.3, 4.5))
|
||||
|
||||
|
||||
def test_random_state():
|
||||
# fixed seed
|
||||
gen = FastGeneratorInversion(stats.norm(), random_state=68734509)
|
||||
x1 = gen.rvs(size=10)
|
||||
gen.random_state = 68734509
|
||||
x2 = gen.rvs(size=10)
|
||||
assert_array_equal(x1, x2)
|
||||
|
||||
# Generator
|
||||
urng = np.random.default_rng(20375857)
|
||||
gen = FastGeneratorInversion(stats.norm(), random_state=urng)
|
||||
x1 = gen.rvs(size=10)
|
||||
gen.random_state = np.random.default_rng(20375857)
|
||||
x2 = gen.rvs(size=10)
|
||||
assert_array_equal(x1, x2)
|
||||
|
||||
# RandomState
|
||||
urng = np.random.RandomState(2364)
|
||||
gen = FastGeneratorInversion(stats.norm(), random_state=urng)
|
||||
x1 = gen.rvs(size=10)
|
||||
gen.random_state = np.random.RandomState(2364)
|
||||
x2 = gen.rvs(size=10)
|
||||
assert_array_equal(x1, x2)
|
||||
|
||||
# if evaluate_error is called, it must not interfere with the random_state
|
||||
# used by rvs
|
||||
gen = FastGeneratorInversion(stats.norm(), random_state=68734509)
|
||||
x1 = gen.rvs(size=10)
|
||||
_ = gen.evaluate_error(size=5) # this will generate 5 uniform rvs
|
||||
x2 = gen.rvs(size=10)
|
||||
gen.random_state = 68734509
|
||||
x3 = gen.rvs(size=20)
|
||||
assert_array_equal(x2, x3[10:])
|
||||
|
||||
|
||||
dists_with_params = [
|
||||
("alpha", (3.5,)),
|
||||
("anglit", ()),
|
||||
("argus", (3.5,)),
|
||||
("argus", (5.1,)),
|
||||
("beta", (1.5, 0.9)),
|
||||
("cosine", ()),
|
||||
("betaprime", (2.5, 3.3)),
|
||||
("bradford", (1.2,)),
|
||||
("burr", (1.3, 2.4)),
|
||||
("burr12", (0.7, 1.2)),
|
||||
("cauchy", ()),
|
||||
("chi2", (3.5,)),
|
||||
("chi", (4.5,)),
|
||||
("crystalball", (0.7, 1.2)),
|
||||
("expon", ()),
|
||||
("gamma", (1.5,)),
|
||||
("gennorm", (2.7,)),
|
||||
("gumbel_l", ()),
|
||||
("gumbel_r", ()),
|
||||
("hypsecant", ()),
|
||||
("invgauss", (3.1,)),
|
||||
("invweibull", (1.5,)),
|
||||
("laplace", ()),
|
||||
("logistic", ()),
|
||||
("maxwell", ()),
|
||||
("moyal", ()),
|
||||
("norm", ()),
|
||||
("pareto", (1.3,)),
|
||||
("powerlaw", (7.6,)),
|
||||
("rayleigh", ()),
|
||||
("semicircular", ()),
|
||||
("t", (5.7,)),
|
||||
("wald", ()),
|
||||
("weibull_max", (2.4,)),
|
||||
("weibull_min", (1.2,)),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(("distname, args"), dists_with_params)
|
||||
def test_rvs_and_ppf(distname, args):
|
||||
# check sample against rvs generated by rv_continuous
|
||||
urng = np.random.default_rng(9807324628097097)
|
||||
rng1 = getattr(stats, distname)(*args)
|
||||
rvs1 = rng1.rvs(size=500, random_state=urng)
|
||||
rng2 = FastGeneratorInversion(rng1, random_state=urng)
|
||||
rvs2 = rng2.rvs(size=500)
|
||||
assert stats.cramervonmises_2samp(rvs1, rvs2).pvalue > 0.01
|
||||
|
||||
# check ppf
|
||||
q = [0.001, 0.1, 0.5, 0.9, 0.999]
|
||||
assert_allclose(rng1.ppf(q), rng2.ppf(q), atol=1e-10)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(("distname, args"), dists_with_params)
|
||||
def test_u_error(distname, args):
|
||||
# check sample against rvs generated by rv_continuous
|
||||
dist = getattr(stats, distname)(*args)
|
||||
with suppress_warnings() as sup:
|
||||
# filter the warnings thrown by UNU.RAN
|
||||
sup.filter(RuntimeWarning)
|
||||
rng = FastGeneratorInversion(dist)
|
||||
u_error, x_error = rng.evaluate_error(
|
||||
size=10_000, random_state=9807324628097097, x_error=False
|
||||
)
|
||||
assert u_error <= 1e-10
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
@pytest.mark.xfail(reason="geninvgauss CDF is not accurate")
|
||||
def test_geninvgauss_uerror():
|
||||
dist = stats.geninvgauss(3.2, 1.5)
|
||||
rng = FastGeneratorInversion(dist)
|
||||
err = rng.evaluate_error(size=10_000, random_state=67982)
|
||||
assert err[0] < 1e-10
|
||||
|
||||
|
||||
# TODO: add more distributions
|
||||
@pytest.mark.parametrize(("distname, args"), [("beta", (0.11, 0.11))])
|
||||
def test_error_extreme_params(distname, args):
|
||||
# take extreme parameters where u-error might not be below the tolerance
|
||||
# due to limitations of floating point arithmetic
|
||||
with suppress_warnings() as sup:
|
||||
# filter the warnings thrown by UNU.RAN for such extreme parameters
|
||||
sup.filter(RuntimeWarning)
|
||||
dist = getattr(stats, distname)(*args)
|
||||
rng = FastGeneratorInversion(dist)
|
||||
u_error, x_error = rng.evaluate_error(
|
||||
size=10_000, random_state=980732462809709732623, x_error=True
|
||||
)
|
||||
if u_error >= 2.5 * 1e-10:
|
||||
assert x_error < 1e-9
|
||||
|
||||
|
||||
def test_evaluate_error_inputs():
|
||||
gen = FastGeneratorInversion(stats.norm())
|
||||
with pytest.raises(ValueError, match="size must be an integer"):
|
||||
gen.evaluate_error(size=3.5)
|
||||
with pytest.raises(ValueError, match="size must be an integer"):
|
||||
gen.evaluate_error(size=(3, 3))
|
||||
|
||||
|
||||
def test_rvs_ppf_loc_scale():
|
||||
loc, scale = 3.5, 2.3
|
||||
dist = stats.norm(loc=loc, scale=scale)
|
||||
rng = FastGeneratorInversion(dist, random_state=1234)
|
||||
r = rng.rvs(size=1000)
|
||||
r_rescaled = (r - loc) / scale
|
||||
assert stats.cramervonmises(r_rescaled, "norm").pvalue > 0.01
|
||||
q = [0.001, 0.1, 0.5, 0.9, 0.999]
|
||||
assert_allclose(rng._ppf(q), rng.ppf(q), atol=1e-10)
|
||||
|
||||
|
||||
def test_domain():
|
||||
# only a basic check that the domain argument is passed to the
|
||||
# UNU.RAN generators
|
||||
rng = FastGeneratorInversion(stats.norm(), domain=(-1, 1))
|
||||
r = rng.rvs(size=100)
|
||||
assert -1 <= r.min() < r.max() <= 1
|
||||
|
||||
# if loc and scale are used, new domain is loc + scale*domain
|
||||
loc, scale = 3.5, 1.3
|
||||
dist = stats.norm(loc=loc, scale=scale)
|
||||
rng = FastGeneratorInversion(dist, domain=(-1.5, 2))
|
||||
r = rng.rvs(size=100)
|
||||
lb, ub = loc - scale * 1.5, loc + scale * 2
|
||||
assert lb <= r.min() < r.max() <= ub
|
||||
|
||||
|
||||
@pytest.mark.parametrize(("distname, args, expected"),
|
||||
[("beta", (3.5, 2.5), (0, 1)),
|
||||
("norm", (), (-np.inf, np.inf))])
|
||||
def test_support(distname, args, expected):
|
||||
# test that the support is updated if truncation and loc/scale are applied
|
||||
# use beta distribution since it is a transformed betaprime distribution,
|
||||
# so it is important that the correct support is considered
|
||||
# (i.e., the support of beta is (0,1), while betaprime is (0, inf))
|
||||
dist = getattr(stats, distname)(*args)
|
||||
rng = FastGeneratorInversion(dist)
|
||||
assert_array_equal(rng.support(), expected)
|
||||
rng.loc = 1
|
||||
rng.scale = 2
|
||||
assert_array_equal(rng.support(), 1 + 2*np.array(expected))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(("distname, args"),
|
||||
[("beta", (3.5, 2.5)), ("norm", ())])
|
||||
def test_support_truncation(distname, args):
|
||||
# similar test for truncation
|
||||
dist = getattr(stats, distname)(*args)
|
||||
rng = FastGeneratorInversion(dist, domain=(0.5, 0.7))
|
||||
assert_array_equal(rng.support(), (0.5, 0.7))
|
||||
rng.loc = 1
|
||||
rng.scale = 2
|
||||
assert_array_equal(rng.support(), (1 + 2 * 0.5, 1 + 2 * 0.7))
|
||||
|
||||
|
||||
def test_domain_shift_truncation():
|
||||
# center of norm is zero, it should be shifted to the left endpoint of
|
||||
# domain. if this was not the case, PINV in UNURAN would raise a warning
|
||||
# as the center is not inside the domain
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
rng = FastGeneratorInversion(stats.norm(), domain=(1, 2))
|
||||
r = rng.rvs(size=100)
|
||||
assert 1 <= r.min() < r.max() <= 2
|
||||
|
||||
|
||||
def test_non_rvs_methods_with_domain():
|
||||
# as a first step, compare truncated normal against stats.truncnorm
|
||||
rng = FastGeneratorInversion(stats.norm(), domain=(2.3, 3.2))
|
||||
trunc_norm = stats.truncnorm(2.3, 3.2)
|
||||
# take values that are inside and outside the domain
|
||||
x = (2.0, 2.4, 3.0, 3.4)
|
||||
p = (0.01, 0.5, 0.99)
|
||||
assert_allclose(rng._cdf(x), trunc_norm.cdf(x))
|
||||
assert_allclose(rng._ppf(p), trunc_norm.ppf(p))
|
||||
loc, scale = 2, 3
|
||||
rng.loc = 2
|
||||
rng.scale = 3
|
||||
trunc_norm = stats.truncnorm(2.3, 3.2, loc=loc, scale=scale)
|
||||
x = np.array(x) * scale + loc
|
||||
assert_allclose(rng._cdf(x), trunc_norm.cdf(x))
|
||||
assert_allclose(rng._ppf(p), trunc_norm.ppf(p))
|
||||
|
||||
# do another sanity check with beta distribution
|
||||
# in that case, it is important to use the correct domain since beta
|
||||
# is a transformation of betaprime which has a different support
|
||||
rng = FastGeneratorInversion(stats.beta(2.5, 3.5), domain=(0.3, 0.7))
|
||||
rng.loc = 2
|
||||
rng.scale = 2.5
|
||||
# the support is 2.75, , 3.75 (2 + 2.5 * 0.3, 2 + 2.5 * 0.7)
|
||||
assert_array_equal(rng.support(), (2.75, 3.75))
|
||||
x = np.array([2.74, 2.76, 3.74, 3.76])
|
||||
# the cdf needs to be zero outside of the domain
|
||||
y_cdf = rng._cdf(x)
|
||||
assert_array_equal((y_cdf[0], y_cdf[3]), (0, 1))
|
||||
assert np.min(y_cdf[1:3]) > 0
|
||||
# ppf needs to map 0 and 1 to the boundaries
|
||||
assert_allclose(rng._ppf(y_cdf), (2.75, 2.76, 3.74, 3.75))
|
||||
|
||||
|
||||
def test_non_rvs_methods_without_domain():
|
||||
norm_dist = stats.norm()
|
||||
rng = FastGeneratorInversion(norm_dist)
|
||||
x = np.linspace(-3, 3, num=10)
|
||||
p = (0.01, 0.5, 0.99)
|
||||
assert_allclose(rng._cdf(x), norm_dist.cdf(x))
|
||||
assert_allclose(rng._ppf(p), norm_dist.ppf(p))
|
||||
loc, scale = 0.5, 1.3
|
||||
rng.loc = loc
|
||||
rng.scale = scale
|
||||
norm_dist = stats.norm(loc=loc, scale=scale)
|
||||
assert_allclose(rng._cdf(x), norm_dist.cdf(x))
|
||||
assert_allclose(rng._ppf(p), norm_dist.ppf(p))
|
||||
|
||||
@pytest.mark.parametrize(("domain, x"),
|
||||
[(None, 0.5),
|
||||
((0, 1), 0.5),
|
||||
((0, 1), 1.5)])
|
||||
def test_scalar_inputs(domain, x):
|
||||
""" pdf, cdf etc should map scalar values to scalars. check with and
|
||||
w/o domain since domain impacts pdf, cdf etc
|
||||
Take x inside and outside of domain """
|
||||
rng = FastGeneratorInversion(stats.norm(), domain=domain)
|
||||
assert np.isscalar(rng._cdf(x))
|
||||
assert np.isscalar(rng._ppf(0.5))
|
||||
|
||||
|
||||
def test_domain_argus_large_chi():
|
||||
# for large chi, the Gamma distribution is used and the domain has to be
|
||||
# transformed. this is a test to ensure that the transformation works
|
||||
chi, lb, ub = 5.5, 0.25, 0.75
|
||||
rng = FastGeneratorInversion(stats.argus(chi), domain=(lb, ub))
|
||||
rng.random_state = 4574
|
||||
r = rng.rvs(size=500)
|
||||
assert lb <= r.min() < r.max() <= ub
|
||||
# perform goodness of fit test with conditional cdf
|
||||
cdf = stats.argus(chi).cdf
|
||||
prob = cdf(ub) - cdf(lb)
|
||||
assert stats.cramervonmises(r, lambda x: cdf(x) / prob).pvalue > 0.05
|
||||
|
||||
|
||||
def test_setting_loc_scale():
|
||||
rng = FastGeneratorInversion(stats.norm(), random_state=765765864)
|
||||
r1 = rng.rvs(size=1000)
|
||||
rng.loc = 3.0
|
||||
rng.scale = 2.5
|
||||
r2 = rng.rvs(1000)
|
||||
# rescaled r2 should be again standard normal
|
||||
assert stats.cramervonmises_2samp(r1, (r2 - 3) / 2.5).pvalue > 0.05
|
||||
# reset values to default loc=0, scale=1
|
||||
rng.loc = 0
|
||||
rng.scale = 1
|
||||
r2 = rng.rvs(1000)
|
||||
assert stats.cramervonmises_2samp(r1, r2).pvalue > 0.05
|
||||
|
||||
|
||||
def test_ignore_shape_range():
|
||||
msg = "No generator is defined for the shape parameters"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
rng = FastGeneratorInversion(stats.t(0.03))
|
||||
rng = FastGeneratorInversion(stats.t(0.03), ignore_shape_range=True)
|
||||
# we can ignore the recommended range of shape parameters
|
||||
# but u-error can be expected to be too large in that case
|
||||
u_err, _ = rng.evaluate_error(size=1000, random_state=234)
|
||||
assert u_err >= 1e-6
|
||||
|
||||
@pytest.mark.xfail_on_32bit(
|
||||
"NumericalInversePolynomial.qrvs fails for Win 32-bit"
|
||||
)
|
||||
class TestQRVS:
|
||||
def test_input_validation(self):
|
||||
gen = FastGeneratorInversion(stats.norm())
|
||||
|
||||
match = "`qmc_engine` must be an instance of..."
|
||||
with pytest.raises(ValueError, match=match):
|
||||
gen.qrvs(qmc_engine=0)
|
||||
|
||||
match = "`d` must be consistent with dimension of `qmc_engine`."
|
||||
with pytest.raises(ValueError, match=match):
|
||||
gen.qrvs(d=3, qmc_engine=stats.qmc.Halton(2))
|
||||
|
||||
qrngs = [None, stats.qmc.Sobol(1, seed=0), stats.qmc.Halton(3, seed=0)]
|
||||
# `size=None` should not add anything to the shape, `size=1` should
|
||||
sizes = [
|
||||
(None, tuple()),
|
||||
(1, (1,)),
|
||||
(4, (4,)),
|
||||
((4,), (4,)),
|
||||
((2, 4), (2, 4)),
|
||||
]
|
||||
# Neither `d=None` nor `d=1` should add anything to the shape
|
||||
ds = [(None, tuple()), (1, tuple()), (3, (3,))]
|
||||
|
||||
@pytest.mark.parametrize("qrng", qrngs)
|
||||
@pytest.mark.parametrize("size_in, size_out", sizes)
|
||||
@pytest.mark.parametrize("d_in, d_out", ds)
|
||||
def test_QRVS_shape_consistency(self, qrng, size_in, size_out,
|
||||
d_in, d_out):
|
||||
gen = FastGeneratorInversion(stats.norm())
|
||||
|
||||
# If d and qrng.d are inconsistent, an error is raised
|
||||
if d_in is not None and qrng is not None and qrng.d != d_in:
|
||||
match = "`d` must be consistent with dimension of `qmc_engine`."
|
||||
with pytest.raises(ValueError, match=match):
|
||||
gen.qrvs(size_in, d=d_in, qmc_engine=qrng)
|
||||
return
|
||||
|
||||
# Sometimes d is really determined by qrng
|
||||
if d_in is None and qrng is not None and qrng.d != 1:
|
||||
d_out = (qrng.d,)
|
||||
|
||||
shape_expected = size_out + d_out
|
||||
|
||||
qrng2 = deepcopy(qrng)
|
||||
qrvs = gen.qrvs(size=size_in, d=d_in, qmc_engine=qrng)
|
||||
if size_in is not None:
|
||||
assert qrvs.shape == shape_expected
|
||||
|
||||
if qrng2 is not None:
|
||||
uniform = qrng2.random(np.prod(size_in) or 1)
|
||||
qrvs2 = stats.norm.ppf(uniform).reshape(shape_expected)
|
||||
assert_allclose(qrvs, qrvs2, atol=1e-12)
|
||||
|
||||
def test_QRVS_size_tuple(self):
|
||||
# QMCEngine samples are always of shape (n, d). When `size` is a tuple,
|
||||
# we set `n = prod(size)` in the call to qmc_engine.random, transform
|
||||
# the sample, and reshape it to the final dimensions. When we reshape,
|
||||
# we need to be careful, because the _columns_ of the sample returned
|
||||
# by a QMCEngine are "independent"-ish, but the elements within the
|
||||
# columns are not. We need to make sure that this doesn't get mixed up
|
||||
# by reshaping: qrvs[..., i] should remain "independent"-ish of
|
||||
# qrvs[..., i+1], but the elements within qrvs[..., i] should be
|
||||
# transformed from the same low-discrepancy sequence.
|
||||
|
||||
gen = FastGeneratorInversion(stats.norm())
|
||||
|
||||
size = (3, 4)
|
||||
d = 5
|
||||
qrng = stats.qmc.Halton(d, seed=0)
|
||||
qrng2 = stats.qmc.Halton(d, seed=0)
|
||||
|
||||
uniform = qrng2.random(np.prod(size))
|
||||
|
||||
qrvs = gen.qrvs(size=size, d=d, qmc_engine=qrng)
|
||||
qrvs2 = stats.norm.ppf(uniform)
|
||||
|
||||
for i in range(d):
|
||||
sample = qrvs[..., i]
|
||||
sample2 = qrvs2[:, i].reshape(size)
|
||||
assert_allclose(sample, sample2, atol=1e-12)
|
||||
|
||||
|
||||
def test_burr_overflow():
|
||||
# this case leads to an overflow error if math.exp is used
|
||||
# in the definition of the burr pdf instead of np.exp
|
||||
# a direct implementation of the PDF as x**(-c-1) / (1+x**(-c))**(d+1)
|
||||
# also leads to an overflow error in the setup
|
||||
args = (1.89128135, 0.30195177)
|
||||
with suppress_warnings() as sup:
|
||||
# filter potential overflow warning
|
||||
sup.filter(RuntimeWarning)
|
||||
gen = FastGeneratorInversion(stats.burr(*args))
|
||||
u_error, _ = gen.evaluate_error(random_state=4326)
|
||||
assert u_error <= 1e-10
|
||||
1038
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_fit.py
vendored
Normal file
1038
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_fit.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1857
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_hypotests.py
vendored
Normal file
1857
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_hypotests.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
608
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_kdeoth.py
vendored
Normal file
608
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_kdeoth.py
vendored
Normal file
@@ -0,0 +1,608 @@
|
||||
from scipy import stats, linalg, integrate
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_almost_equal, assert_, assert_equal,
|
||||
assert_array_almost_equal,
|
||||
assert_array_almost_equal_nulp, assert_allclose)
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
|
||||
def test_kde_1d():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
xn = np.random.randn(n_basesample)
|
||||
xnmean = xn.mean()
|
||||
xnstd = xn.std(ddof=1)
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
xs = np.linspace(-7,7,501)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
|
||||
intervall = xs[1] - xs[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
|
||||
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
|
||||
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
|
||||
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
|
||||
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*intervall, decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
|
||||
(kdepdf*normpdf).sum()*intervall, decimal=2)
|
||||
|
||||
|
||||
def test_kde_1d_weighted():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
xn = np.random.randn(n_basesample)
|
||||
wn = np.random.rand(n_basesample)
|
||||
xnmean = np.average(xn, weights=wn)
|
||||
xnstd = np.sqrt(np.average((xn-xnmean)**2, weights=wn))
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
xs = np.linspace(-7,7,501)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
|
||||
intervall = xs[1] - xs[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
|
||||
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
|
||||
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
|
||||
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
|
||||
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*intervall, decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
|
||||
(kdepdf*normpdf).sum()*intervall, decimal=2)
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_kde_2d():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
|
||||
# Need transpose (shape (2, 500)) for kde
|
||||
xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
x, y = np.mgrid[-7:7:500j, -7:7:500j]
|
||||
grid_coords = np.vstack([x.ravel(), y.ravel()])
|
||||
kdepdf = gkde.evaluate(grid_coords)
|
||||
kdepdf = kdepdf.reshape(500, 500)
|
||||
|
||||
normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]),
|
||||
mean=mean, cov=covariance)
|
||||
intervall = y.ravel()[1] - y.ravel()[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
|
||||
|
||||
small = -1e100
|
||||
large = 1e100
|
||||
prob1 = gkde.integrate_box([small, mean[1]], [large, large])
|
||||
prob2 = gkde.integrate_box([small, small], [large, mean[1]])
|
||||
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*(intervall**2), decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
|
||||
(kdepdf*normpdf).sum()*(intervall**2), decimal=2)
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_kde_2d_weighted():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
|
||||
# Need transpose (shape (2, 500)) for kde
|
||||
xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
wn = np.random.rand(n_basesample)
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
x, y = np.mgrid[-7:7:500j, -7:7:500j]
|
||||
grid_coords = np.vstack([x.ravel(), y.ravel()])
|
||||
kdepdf = gkde.evaluate(grid_coords)
|
||||
kdepdf = kdepdf.reshape(500, 500)
|
||||
|
||||
normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]),
|
||||
mean=mean, cov=covariance)
|
||||
intervall = y.ravel()[1] - y.ravel()[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
|
||||
|
||||
small = -1e100
|
||||
large = 1e100
|
||||
prob1 = gkde.integrate_box([small, mean[1]], [large, large])
|
||||
prob2 = gkde.integrate_box([small, small], [large, mean[1]])
|
||||
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*(intervall**2), decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
|
||||
(kdepdf*normpdf).sum()*(intervall**2), decimal=2)
|
||||
|
||||
|
||||
def test_kde_bandwidth_method():
|
||||
def scotts_factor(kde_obj):
|
||||
"""Same as default, just check that it works."""
|
||||
return np.power(kde_obj.n, -1./(kde_obj.d+4))
|
||||
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
# Supply a callable
|
||||
gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
|
||||
# Supply a scalar
|
||||
gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
|
||||
|
||||
xs = np.linspace(-7,7,51)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
kdepdf2 = gkde2.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf2)
|
||||
kdepdf3 = gkde3.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf3)
|
||||
|
||||
assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
|
||||
|
||||
|
||||
def test_kde_bandwidth_method_weighted():
|
||||
def scotts_factor(kde_obj):
|
||||
"""Same as default, just check that it works."""
|
||||
return np.power(kde_obj.neff, -1./(kde_obj.d+4))
|
||||
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
# Supply a callable
|
||||
gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
|
||||
# Supply a scalar
|
||||
gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
|
||||
|
||||
xs = np.linspace(-7,7,51)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
kdepdf2 = gkde2.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf2)
|
||||
kdepdf3 = gkde3.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf3)
|
||||
|
||||
assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
|
||||
|
||||
|
||||
# Subclasses that should stay working (extracted from various sources).
|
||||
# Unfortunately the earlier design of gaussian_kde made it necessary for users
|
||||
# to create these kinds of subclasses, or call _compute_covariance() directly.
|
||||
|
||||
class _kde_subclass1(stats.gaussian_kde):
|
||||
def __init__(self, dataset):
|
||||
self.dataset = np.atleast_2d(dataset)
|
||||
self.d, self.n = self.dataset.shape
|
||||
self.covariance_factor = self.scotts_factor
|
||||
self._compute_covariance()
|
||||
|
||||
|
||||
class _kde_subclass2(stats.gaussian_kde):
|
||||
def __init__(self, dataset):
|
||||
self.covariance_factor = self.scotts_factor
|
||||
super().__init__(dataset)
|
||||
|
||||
|
||||
class _kde_subclass4(stats.gaussian_kde):
|
||||
def covariance_factor(self):
|
||||
return 0.5 * self.silverman_factor()
|
||||
|
||||
|
||||
def test_gaussian_kde_subclassing():
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=50)
|
||||
|
||||
# gaussian_kde itself
|
||||
kde = stats.gaussian_kde(x1)
|
||||
ys = kde(xs)
|
||||
|
||||
# subclass 1
|
||||
kde1 = _kde_subclass1(x1)
|
||||
y1 = kde1(xs)
|
||||
assert_array_almost_equal_nulp(ys, y1, nulp=10)
|
||||
|
||||
# subclass 2
|
||||
kde2 = _kde_subclass2(x1)
|
||||
y2 = kde2(xs)
|
||||
assert_array_almost_equal_nulp(ys, y2, nulp=10)
|
||||
|
||||
# subclass 3 was removed because we have no obligation to maintain support
|
||||
# for user invocation of private methods
|
||||
|
||||
# subclass 4
|
||||
kde4 = _kde_subclass4(x1)
|
||||
y4 = kde4(x1)
|
||||
y_expected = [0.06292987, 0.06346938, 0.05860291, 0.08657652, 0.07904017]
|
||||
|
||||
assert_array_almost_equal(y_expected, y4, decimal=6)
|
||||
|
||||
# Not a subclass, but check for use of _compute_covariance()
|
||||
kde5 = kde
|
||||
kde5.covariance_factor = lambda: kde.factor
|
||||
kde5._compute_covariance()
|
||||
y5 = kde5(xs)
|
||||
assert_array_almost_equal_nulp(ys, y5, nulp=10)
|
||||
|
||||
|
||||
def test_gaussian_kde_covariance_caching():
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=5)
|
||||
# These expected values are from scipy 0.10, before some changes to
|
||||
# gaussian_kde. They were not compared with any external reference.
|
||||
y_expected = [0.02463386, 0.04689208, 0.05395444, 0.05337754, 0.01664475]
|
||||
|
||||
# Set the bandwidth, then reset it to the default.
|
||||
kde = stats.gaussian_kde(x1)
|
||||
kde.set_bandwidth(bw_method=0.5)
|
||||
kde.set_bandwidth(bw_method='scott')
|
||||
y2 = kde(xs)
|
||||
|
||||
assert_array_almost_equal(y_expected, y2, decimal=7)
|
||||
|
||||
|
||||
def test_gaussian_kde_monkeypatch():
|
||||
"""Ugly, but people may rely on this. See scipy pull request 123,
|
||||
specifically the linked ML thread "Width of the Gaussian in stats.kde".
|
||||
If it is necessary to break this later on, that is to be discussed on ML.
|
||||
"""
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=50)
|
||||
|
||||
# The old monkeypatched version to get at Silverman's Rule.
|
||||
kde = stats.gaussian_kde(x1)
|
||||
kde.covariance_factor = kde.silverman_factor
|
||||
kde._compute_covariance()
|
||||
y1 = kde(xs)
|
||||
|
||||
# The new saner version.
|
||||
kde2 = stats.gaussian_kde(x1, bw_method='silverman')
|
||||
y2 = kde2(xs)
|
||||
|
||||
assert_array_almost_equal_nulp(y1, y2, nulp=10)
|
||||
|
||||
|
||||
def test_kde_integer_input():
|
||||
"""Regression test for #1181."""
|
||||
x1 = np.arange(5)
|
||||
kde = stats.gaussian_kde(x1)
|
||||
y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, 0.13480721]
|
||||
assert_array_almost_equal(kde(x1), y_expected, decimal=6)
|
||||
|
||||
|
||||
_ftypes = ['float32', 'float64', 'float96', 'float128', 'int32', 'int64']
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bw_type", _ftypes + ["scott", "silverman"])
|
||||
@pytest.mark.parametrize("dtype", _ftypes)
|
||||
def test_kde_output_dtype(dtype, bw_type):
|
||||
# Check whether the datatypes are available
|
||||
dtype = getattr(np, dtype, None)
|
||||
|
||||
if bw_type in ["scott", "silverman"]:
|
||||
bw = bw_type
|
||||
else:
|
||||
bw_type = getattr(np, bw_type, None)
|
||||
bw = bw_type(3) if bw_type else None
|
||||
|
||||
if any(dt is None for dt in [dtype, bw]):
|
||||
pytest.skip()
|
||||
|
||||
weights = np.arange(5, dtype=dtype)
|
||||
dataset = np.arange(5, dtype=dtype)
|
||||
k = stats.gaussian_kde(dataset, bw_method=bw, weights=weights)
|
||||
points = np.arange(5, dtype=dtype)
|
||||
result = k(points)
|
||||
# weights are always cast to float64
|
||||
assert result.dtype == np.result_type(dataset, points, np.float64(weights),
|
||||
k.factor)
|
||||
|
||||
|
||||
def test_pdf_logpdf_validation():
|
||||
rng = np.random.default_rng(64202298293133848336925499069837723291)
|
||||
xn = rng.standard_normal((2, 10))
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
xs = rng.standard_normal((3, 10))
|
||||
|
||||
msg = "points have dimension 3, dataset has dimension 2"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
gkde.logpdf(xs)
|
||||
|
||||
|
||||
def test_pdf_logpdf():
|
||||
np.random.seed(1)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
xs = np.linspace(-15, 12, 25)
|
||||
pdf = gkde.evaluate(xs)
|
||||
pdf2 = gkde.pdf(xs)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
logpdf = np.log(pdf)
|
||||
logpdf2 = gkde.logpdf(xs)
|
||||
assert_almost_equal(logpdf, logpdf2, decimal=12)
|
||||
|
||||
# There are more points than data
|
||||
gkde = stats.gaussian_kde(xs)
|
||||
pdf = np.log(gkde.evaluate(xn))
|
||||
pdf2 = gkde.logpdf(xn)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
|
||||
def test_pdf_logpdf_weighted():
|
||||
np.random.seed(1)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
wn = np.random.rand(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
xs = np.linspace(-15, 12, 25)
|
||||
pdf = gkde.evaluate(xs)
|
||||
pdf2 = gkde.pdf(xs)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
logpdf = np.log(pdf)
|
||||
logpdf2 = gkde.logpdf(xs)
|
||||
assert_almost_equal(logpdf, logpdf2, decimal=12)
|
||||
|
||||
# There are more points than data
|
||||
gkde = stats.gaussian_kde(xs, weights=np.random.rand(len(xs)))
|
||||
pdf = np.log(gkde.evaluate(xn))
|
||||
pdf2 = gkde.logpdf(xn)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
|
||||
def test_marginal_1_axis():
|
||||
rng = np.random.default_rng(6111799263660870475)
|
||||
n_data = 50
|
||||
n_dim = 10
|
||||
dataset = rng.normal(size=(n_dim, n_data))
|
||||
points = rng.normal(size=(n_dim, 3))
|
||||
|
||||
dimensions = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) # dimensions to keep
|
||||
|
||||
kde = stats.gaussian_kde(dataset)
|
||||
marginal = kde.marginal(dimensions)
|
||||
pdf = marginal.pdf(points[dimensions])
|
||||
|
||||
def marginal_pdf_single(point):
|
||||
def f(x):
|
||||
x = np.concatenate(([x], point[dimensions]))
|
||||
return kde.pdf(x)[0]
|
||||
return integrate.quad(f, -np.inf, np.inf)[0]
|
||||
|
||||
def marginal_pdf(points):
|
||||
return np.apply_along_axis(marginal_pdf_single, axis=0, arr=points)
|
||||
|
||||
ref = marginal_pdf(points)
|
||||
|
||||
assert_allclose(pdf, ref, rtol=1e-6)
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_marginal_2_axis():
|
||||
rng = np.random.default_rng(6111799263660870475)
|
||||
n_data = 30
|
||||
n_dim = 4
|
||||
dataset = rng.normal(size=(n_dim, n_data))
|
||||
points = rng.normal(size=(n_dim, 3))
|
||||
|
||||
dimensions = np.array([1, 3]) # dimensions to keep
|
||||
|
||||
kde = stats.gaussian_kde(dataset)
|
||||
marginal = kde.marginal(dimensions)
|
||||
pdf = marginal.pdf(points[dimensions])
|
||||
|
||||
def marginal_pdf(points):
|
||||
def marginal_pdf_single(point):
|
||||
def f(y, x):
|
||||
w, z = point[dimensions]
|
||||
x = np.array([x, w, y, z])
|
||||
return kde.pdf(x)[0]
|
||||
return integrate.dblquad(f, -np.inf, np.inf, -np.inf, np.inf)[0]
|
||||
|
||||
return np.apply_along_axis(marginal_pdf_single, axis=0, arr=points)
|
||||
|
||||
ref = marginal_pdf(points)
|
||||
|
||||
assert_allclose(pdf, ref, rtol=1e-6)
|
||||
|
||||
|
||||
def test_marginal_iv():
|
||||
# test input validation
|
||||
rng = np.random.default_rng(6111799263660870475)
|
||||
n_data = 30
|
||||
n_dim = 4
|
||||
dataset = rng.normal(size=(n_dim, n_data))
|
||||
points = rng.normal(size=(n_dim, 3))
|
||||
|
||||
kde = stats.gaussian_kde(dataset)
|
||||
|
||||
# check that positive and negative indices are equivalent
|
||||
dimensions1 = [-1, 1]
|
||||
marginal1 = kde.marginal(dimensions1)
|
||||
pdf1 = marginal1.pdf(points[dimensions1])
|
||||
|
||||
dimensions2 = [3, -3]
|
||||
marginal2 = kde.marginal(dimensions2)
|
||||
pdf2 = marginal2.pdf(points[dimensions2])
|
||||
|
||||
assert_equal(pdf1, pdf2)
|
||||
|
||||
# IV for non-integer dimensions
|
||||
message = "Elements of `dimensions` must be integers..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
kde.marginal([1, 2.5])
|
||||
|
||||
# IV for uniquenes
|
||||
message = "All elements of `dimensions` must be unique."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
kde.marginal([1, 2, 2])
|
||||
|
||||
# IV for non-integer dimensions
|
||||
message = (r"Dimensions \[-5 6\] are invalid for a distribution in 4...")
|
||||
with pytest.raises(ValueError, match=message):
|
||||
kde.marginal([1, -5, 6])
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_logpdf_overflow():
|
||||
# regression test for gh-12988; testing against linalg instability for
|
||||
# very high dimensionality kde
|
||||
np.random.seed(1)
|
||||
n_dimensions = 2500
|
||||
n_samples = 5000
|
||||
xn = np.array([np.random.randn(n_samples) + (n) for n in range(
|
||||
0, n_dimensions)])
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
logpdf = gkde.logpdf(np.arange(0, n_dimensions))
|
||||
np.testing.assert_equal(np.isneginf(logpdf[0]), False)
|
||||
np.testing.assert_equal(np.isnan(logpdf[0]), False)
|
||||
|
||||
|
||||
def test_weights_intact():
|
||||
# regression test for gh-9709: weights are not modified
|
||||
np.random.seed(12345)
|
||||
vals = np.random.lognormal(size=100)
|
||||
weights = np.random.choice([1.0, 10.0, 100], size=vals.size)
|
||||
orig_weights = weights.copy()
|
||||
|
||||
stats.gaussian_kde(np.log10(vals), weights=weights)
|
||||
assert_allclose(weights, orig_weights, atol=1e-14, rtol=1e-14)
|
||||
|
||||
|
||||
def test_weights_integer():
|
||||
# integer weights are OK, cf gh-9709 (comment)
|
||||
np.random.seed(12345)
|
||||
values = [0.2, 13.5, 21.0, 75.0, 99.0]
|
||||
weights = [1, 2, 4, 8, 16] # a list of integers
|
||||
pdf_i = stats.gaussian_kde(values, weights=weights)
|
||||
pdf_f = stats.gaussian_kde(values, weights=np.float64(weights))
|
||||
|
||||
xn = [0.3, 11, 88]
|
||||
assert_allclose(pdf_i.evaluate(xn),
|
||||
pdf_f.evaluate(xn), atol=1e-14, rtol=1e-14)
|
||||
|
||||
|
||||
def test_seed():
|
||||
# Test the seed option of the resample method
|
||||
def test_seed_sub(gkde_trail):
|
||||
n_sample = 200
|
||||
# The results should be different without using seed
|
||||
samp1 = gkde_trail.resample(n_sample)
|
||||
samp2 = gkde_trail.resample(n_sample)
|
||||
assert_raises(
|
||||
AssertionError, assert_allclose, samp1, samp2, atol=1e-13
|
||||
)
|
||||
# Use integer seed
|
||||
seed = 831
|
||||
samp1 = gkde_trail.resample(n_sample, seed=seed)
|
||||
samp2 = gkde_trail.resample(n_sample, seed=seed)
|
||||
assert_allclose(samp1, samp2, atol=1e-13)
|
||||
# Use RandomState
|
||||
rstate1 = np.random.RandomState(seed=138)
|
||||
samp1 = gkde_trail.resample(n_sample, seed=rstate1)
|
||||
rstate2 = np.random.RandomState(seed=138)
|
||||
samp2 = gkde_trail.resample(n_sample, seed=rstate2)
|
||||
assert_allclose(samp1, samp2, atol=1e-13)
|
||||
|
||||
# check that np.random.Generator can be used (numpy >= 1.17)
|
||||
if hasattr(np.random, 'default_rng'):
|
||||
# obtain a np.random.Generator object
|
||||
rng = np.random.default_rng(1234)
|
||||
gkde_trail.resample(n_sample, seed=rng)
|
||||
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
wn = np.random.rand(n_basesample)
|
||||
# Test 1D case
|
||||
xn_1d = np.random.randn(n_basesample)
|
||||
|
||||
gkde_1d = stats.gaussian_kde(xn_1d)
|
||||
test_seed_sub(gkde_1d)
|
||||
gkde_1d_weighted = stats.gaussian_kde(xn_1d, weights=wn)
|
||||
test_seed_sub(gkde_1d_weighted)
|
||||
|
||||
# Test 2D case
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
xn_2d = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
|
||||
gkde_2d = stats.gaussian_kde(xn_2d)
|
||||
test_seed_sub(gkde_2d)
|
||||
gkde_2d_weighted = stats.gaussian_kde(xn_2d, weights=wn)
|
||||
test_seed_sub(gkde_2d_weighted)
|
||||
|
||||
|
||||
def test_singular_data_covariance_gh10205():
|
||||
# When the data lie in a lower-dimensional subspace and this causes
|
||||
# and exception, check that the error message is informative.
|
||||
rng = np.random.default_rng(2321583144339784787)
|
||||
mu = np.array([1, 10, 20])
|
||||
sigma = np.array([[4, 10, 0], [10, 25, 0], [0, 0, 100]])
|
||||
data = rng.multivariate_normal(mu, sigma, 1000)
|
||||
try: # doesn't raise any error on some platforms, and that's OK
|
||||
stats.gaussian_kde(data.T)
|
||||
except linalg.LinAlgError:
|
||||
msg = "The data appears to lie in a lower-dimensional subspace..."
|
||||
with assert_raises(linalg.LinAlgError, match=msg):
|
||||
stats.gaussian_kde(data.T)
|
||||
|
||||
|
||||
def test_fewer_points_than_dimensions_gh17436():
|
||||
# When the number of points is fewer than the number of dimensions, the
|
||||
# the covariance matrix would be singular, and the exception tested in
|
||||
# test_singular_data_covariance_gh10205 would occur. However, sometimes
|
||||
# this occurs when the user passes in the transpose of what `gaussian_kde`
|
||||
# expects. This can result in a huge covariance matrix, so bail early.
|
||||
rng = np.random.default_rng(2046127537594925772)
|
||||
rvs = rng.multivariate_normal(np.zeros(3), np.eye(3), size=5)
|
||||
message = "Number of dimensions is greater than number of samples..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.gaussian_kde(rvs)
|
||||
217
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_mgc.py
vendored
Normal file
217
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_mgc.py
vendored
Normal file
@@ -0,0 +1,217 @@
|
||||
import pytest
|
||||
from pytest import raises as assert_raises, warns as assert_warns
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_approx_equal, assert_allclose, assert_equal
|
||||
|
||||
from scipy.spatial.distance import cdist
|
||||
from scipy import stats
|
||||
|
||||
class TestMGCErrorWarnings:
|
||||
""" Tests errors and warnings derived from MGC.
|
||||
"""
|
||||
def test_error_notndarray(self):
|
||||
# raises error if x or y is not a ndarray
|
||||
x = np.arange(20)
|
||||
y = [5] * 20
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, y, x)
|
||||
|
||||
def test_error_shape(self):
|
||||
# raises error if number of samples different (n)
|
||||
x = np.arange(100).reshape(25, 4)
|
||||
y = x.reshape(10, 10)
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
|
||||
|
||||
def test_error_lowsamples(self):
|
||||
# raises error if samples are low (< 3)
|
||||
x = np.arange(3)
|
||||
y = np.arange(3)
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
|
||||
|
||||
def test_error_nans(self):
|
||||
# raises error if inputs contain NaNs
|
||||
x = np.arange(20, dtype=float)
|
||||
x[0] = np.nan
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, x)
|
||||
|
||||
y = np.arange(20)
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
|
||||
|
||||
def test_error_wrongdisttype(self):
|
||||
# raises error if metric is not a function
|
||||
x = np.arange(20)
|
||||
compute_distance = 0
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, x,
|
||||
compute_distance=compute_distance)
|
||||
|
||||
@pytest.mark.parametrize("reps", [
|
||||
-1, # reps is negative
|
||||
'1', # reps is not integer
|
||||
])
|
||||
def test_error_reps(self, reps):
|
||||
# raises error if reps is negative
|
||||
x = np.arange(20)
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, x, reps=reps)
|
||||
|
||||
def test_warns_reps(self):
|
||||
# raises warning when reps is less than 1000
|
||||
x = np.arange(20)
|
||||
reps = 100
|
||||
assert_warns(RuntimeWarning, stats.multiscale_graphcorr, x, x, reps=reps)
|
||||
|
||||
def test_error_infty(self):
|
||||
# raises error if input contains infinities
|
||||
x = np.arange(20)
|
||||
y = np.ones(20) * np.inf
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
|
||||
|
||||
|
||||
class TestMGCStat:
|
||||
""" Test validity of MGC test statistic
|
||||
"""
|
||||
def _simulations(self, samps=100, dims=1, sim_type=""):
|
||||
# linear simulation
|
||||
if sim_type == "linear":
|
||||
x = np.random.uniform(-1, 1, size=(samps, 1))
|
||||
y = x + 0.3 * np.random.random_sample(size=(x.size, 1))
|
||||
|
||||
# spiral simulation
|
||||
elif sim_type == "nonlinear":
|
||||
unif = np.array(np.random.uniform(0, 5, size=(samps, 1)))
|
||||
x = unif * np.cos(np.pi * unif)
|
||||
y = (unif * np.sin(np.pi * unif) +
|
||||
0.4*np.random.random_sample(size=(x.size, 1)))
|
||||
|
||||
# independence (tests type I simulation)
|
||||
elif sim_type == "independence":
|
||||
u = np.random.normal(0, 1, size=(samps, 1))
|
||||
v = np.random.normal(0, 1, size=(samps, 1))
|
||||
u_2 = np.random.binomial(1, p=0.5, size=(samps, 1))
|
||||
v_2 = np.random.binomial(1, p=0.5, size=(samps, 1))
|
||||
x = u/3 + 2*u_2 - 1
|
||||
y = v/3 + 2*v_2 - 1
|
||||
|
||||
# raises error if not approved sim_type
|
||||
else:
|
||||
raise ValueError("sim_type must be linear, nonlinear, or "
|
||||
"independence")
|
||||
|
||||
# add dimensions of noise for higher dimensions
|
||||
if dims > 1:
|
||||
dims_noise = np.random.normal(0, 1, size=(samps, dims-1))
|
||||
x = np.concatenate((x, dims_noise), axis=1)
|
||||
|
||||
return x, y
|
||||
|
||||
@pytest.mark.xslow
|
||||
@pytest.mark.parametrize("sim_type, obs_stat, obs_pvalue", [
|
||||
("linear", 0.97, 1/1000), # test linear simulation
|
||||
("nonlinear", 0.163, 1/1000), # test spiral simulation
|
||||
("independence", -0.0094, 0.78) # test independence simulation
|
||||
])
|
||||
def test_oned(self, sim_type, obs_stat, obs_pvalue):
|
||||
np.random.seed(12345678)
|
||||
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=1, sim_type=sim_type)
|
||||
|
||||
# test stat and pvalue
|
||||
stat, pvalue, _ = stats.multiscale_graphcorr(x, y)
|
||||
assert_approx_equal(stat, obs_stat, significant=1)
|
||||
assert_approx_equal(pvalue, obs_pvalue, significant=1)
|
||||
|
||||
@pytest.mark.xslow
|
||||
@pytest.mark.parametrize("sim_type, obs_stat, obs_pvalue", [
|
||||
("linear", 0.184, 1/1000), # test linear simulation
|
||||
("nonlinear", 0.0190, 0.117), # test spiral simulation
|
||||
])
|
||||
def test_fived(self, sim_type, obs_stat, obs_pvalue):
|
||||
np.random.seed(12345678)
|
||||
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=5, sim_type=sim_type)
|
||||
|
||||
# test stat and pvalue
|
||||
stat, pvalue, _ = stats.multiscale_graphcorr(x, y)
|
||||
assert_approx_equal(stat, obs_stat, significant=1)
|
||||
assert_approx_equal(pvalue, obs_pvalue, significant=1)
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_twosamp(self):
|
||||
np.random.seed(12345678)
|
||||
|
||||
# generate x and y
|
||||
x = np.random.binomial(100, 0.5, size=(100, 5))
|
||||
y = np.random.normal(0, 1, size=(80, 5))
|
||||
|
||||
# test stat and pvalue
|
||||
stat, pvalue, _ = stats.multiscale_graphcorr(x, y)
|
||||
assert_approx_equal(stat, 1.0, significant=1)
|
||||
assert_approx_equal(pvalue, 0.001, significant=1)
|
||||
|
||||
# generate x and y
|
||||
y = np.random.normal(0, 1, size=(100, 5))
|
||||
|
||||
# test stat and pvalue
|
||||
stat, pvalue, _ = stats.multiscale_graphcorr(x, y, is_twosamp=True)
|
||||
assert_approx_equal(stat, 1.0, significant=1)
|
||||
assert_approx_equal(pvalue, 0.001, significant=1)
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_workers(self):
|
||||
np.random.seed(12345678)
|
||||
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=1, sim_type="linear")
|
||||
|
||||
# test stat and pvalue
|
||||
stat, pvalue, _ = stats.multiscale_graphcorr(x, y, workers=2)
|
||||
assert_approx_equal(stat, 0.97, significant=1)
|
||||
assert_approx_equal(pvalue, 0.001, significant=1)
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_random_state(self):
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=1, sim_type="linear")
|
||||
|
||||
# test stat and pvalue
|
||||
stat, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=1)
|
||||
assert_approx_equal(stat, 0.97, significant=1)
|
||||
assert_approx_equal(pvalue, 0.001, significant=1)
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_dist_perm(self):
|
||||
np.random.seed(12345678)
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=1, sim_type="nonlinear")
|
||||
distx = cdist(x, x, metric="euclidean")
|
||||
disty = cdist(y, y, metric="euclidean")
|
||||
|
||||
stat_dist, pvalue_dist, _ = stats.multiscale_graphcorr(distx, disty,
|
||||
compute_distance=None,
|
||||
random_state=1)
|
||||
assert_approx_equal(stat_dist, 0.163, significant=1)
|
||||
assert_approx_equal(pvalue_dist, 0.001, significant=1)
|
||||
|
||||
@pytest.mark.fail_slow(10) # all other tests are XSLOW; we need at least one to run
|
||||
@pytest.mark.slow
|
||||
def test_pvalue_literature(self):
|
||||
np.random.seed(12345678)
|
||||
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=1, sim_type="linear")
|
||||
|
||||
# test stat and pvalue
|
||||
_, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=1)
|
||||
assert_allclose(pvalue, 1/1001)
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_alias(self):
|
||||
np.random.seed(12345678)
|
||||
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=1, sim_type="linear")
|
||||
|
||||
res = stats.multiscale_graphcorr(x, y, random_state=1)
|
||||
assert_equal(res.stat, res.statistic)
|
||||
3120
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_morestats.py
vendored
Normal file
3120
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_morestats.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2066
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_mstats_basic.py
vendored
Normal file
2066
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_mstats_basic.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
172
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_mstats_extras.py
vendored
Normal file
172
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_mstats_extras.py
vendored
Normal file
@@ -0,0 +1,172 @@
|
||||
import numpy as np
|
||||
import numpy.ma as ma
|
||||
import scipy.stats.mstats as ms
|
||||
|
||||
from numpy.testing import (assert_equal, assert_almost_equal, assert_,
|
||||
assert_allclose)
|
||||
|
||||
|
||||
def test_compare_medians_ms():
|
||||
x = np.arange(7)
|
||||
y = x + 10
|
||||
assert_almost_equal(ms.compare_medians_ms(x, y), 0)
|
||||
|
||||
y2 = np.linspace(0, 1, num=10)
|
||||
assert_almost_equal(ms.compare_medians_ms(x, y2), 0.017116406778)
|
||||
|
||||
|
||||
def test_hdmedian():
|
||||
# 1-D array
|
||||
x = ma.arange(11)
|
||||
assert_allclose(ms.hdmedian(x), 5, rtol=1e-14)
|
||||
x.mask = ma.make_mask(x)
|
||||
x.mask[:7] = False
|
||||
assert_allclose(ms.hdmedian(x), 3, rtol=1e-14)
|
||||
|
||||
# Check that `var` keyword returns a value. TODO: check whether returned
|
||||
# value is actually correct.
|
||||
assert_(ms.hdmedian(x, var=True).size == 2)
|
||||
|
||||
# 2-D array
|
||||
x2 = ma.arange(22).reshape((11, 2))
|
||||
assert_allclose(ms.hdmedian(x2, axis=0), [10, 11])
|
||||
x2.mask = ma.make_mask(x2)
|
||||
x2.mask[:7, :] = False
|
||||
assert_allclose(ms.hdmedian(x2, axis=0), [6, 7])
|
||||
|
||||
|
||||
def test_rsh():
|
||||
np.random.seed(132345)
|
||||
x = np.random.randn(100)
|
||||
res = ms.rsh(x)
|
||||
# Just a sanity check that the code runs and output shape is correct.
|
||||
# TODO: check that implementation is correct.
|
||||
assert_(res.shape == x.shape)
|
||||
|
||||
# Check points keyword
|
||||
res = ms.rsh(x, points=[0, 1.])
|
||||
assert_(res.size == 2)
|
||||
|
||||
|
||||
def test_mjci():
|
||||
# Tests the Marits-Jarrett estimator
|
||||
data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
|
||||
296,299,306,376,428,515,666,1310,2611])
|
||||
assert_almost_equal(ms.mjci(data),[55.76819,45.84028,198.87875],5)
|
||||
|
||||
|
||||
def test_trimmed_mean_ci():
|
||||
# Tests the confidence intervals of the trimmed mean.
|
||||
data = ma.array([545,555,558,572,575,576,578,580,
|
||||
594,605,635,651,653,661,666])
|
||||
assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
|
||||
assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1),
|
||||
[561.8, 630.6])
|
||||
|
||||
|
||||
def test_idealfourths():
|
||||
# Tests ideal-fourths
|
||||
test = np.arange(100)
|
||||
assert_almost_equal(np.asarray(ms.idealfourths(test)),
|
||||
[24.416667,74.583333],6)
|
||||
test_2D = test.repeat(3).reshape(-1,3)
|
||||
assert_almost_equal(ms.idealfourths(test_2D, axis=0),
|
||||
[[24.416667,24.416667,24.416667],
|
||||
[74.583333,74.583333,74.583333]],6)
|
||||
assert_almost_equal(ms.idealfourths(test_2D, axis=1),
|
||||
test.repeat(2).reshape(-1,2))
|
||||
test = [0, 0]
|
||||
_result = ms.idealfourths(test)
|
||||
assert_(np.isnan(_result).all())
|
||||
|
||||
|
||||
class TestQuantiles:
|
||||
data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
|
||||
0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
|
||||
0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
|
||||
0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
|
||||
0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
|
||||
0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
|
||||
0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
|
||||
0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
|
||||
0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
|
||||
0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
|
||||
0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
|
||||
0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
|
||||
0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
|
||||
0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
|
||||
0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
|
||||
0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
|
||||
0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
|
||||
0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
|
||||
0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
|
||||
0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
|
||||
|
||||
def test_hdquantiles(self):
|
||||
data = self.data
|
||||
assert_almost_equal(ms.hdquantiles(data,[0., 1.]),
|
||||
[0.006514031, 0.995309248])
|
||||
hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75])
|
||||
assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
|
||||
|
||||
data = np.array(data).reshape(10,10)
|
||||
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0)
|
||||
assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75]))
|
||||
assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75]))
|
||||
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True)
|
||||
assert_almost_equal(hdq[...,0],
|
||||
ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True))
|
||||
assert_almost_equal(hdq[...,-1],
|
||||
ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
|
||||
|
||||
def test_hdquantiles_sd(self):
|
||||
# Standard deviation is a jackknife estimator, so we can check if
|
||||
# the efficient version (hdquantiles_sd) matches a rudimentary,
|
||||
# but clear version here.
|
||||
|
||||
hd_std_errs = ms.hdquantiles_sd(self.data)
|
||||
|
||||
# jacknnife standard error, Introduction to the Bootstrap Eq. 11.5
|
||||
n = len(self.data)
|
||||
jdata = np.broadcast_to(self.data, (n, n))
|
||||
jselector = np.logical_not(np.eye(n)) # leave out one sample each row
|
||||
jdata = jdata[jselector].reshape(n, n-1)
|
||||
jdist = ms.hdquantiles(jdata, axis=1)
|
||||
jdist_mean = np.mean(jdist, axis=0)
|
||||
jstd = ((n-1)/n * np.sum((jdist - jdist_mean)**2, axis=0))**.5
|
||||
|
||||
assert_almost_equal(hd_std_errs, jstd)
|
||||
# Test actual values for good measure
|
||||
assert_almost_equal(hd_std_errs, [0.0379258, 0.0380656, 0.0380013])
|
||||
|
||||
two_data_points = ms.hdquantiles_sd([1, 2])
|
||||
assert_almost_equal(two_data_points, [0.5, 0.5, 0.5])
|
||||
|
||||
def test_mquantiles_cimj(self):
|
||||
# Only test that code runs, implementation not checked for correctness
|
||||
ci_lower, ci_upper = ms.mquantiles_cimj(self.data)
|
||||
assert_(ci_lower.size == ci_upper.size == 3)
|
||||
|
||||
|
||||
def test_median_cihs():
|
||||
# Basic test against R library EnvStats function `eqnpar`, e.g.
|
||||
# library(EnvStats)
|
||||
# options(digits=8)
|
||||
# x = c(0.88612955, 0.35242375, 0.66240904, 0.94617974, 0.10929913,
|
||||
# 0.76699506, 0.88550655, 0.62763754, 0.76818588, 0.68506508,
|
||||
# 0.88043148, 0.03911248, 0.93805564, 0.95326961, 0.25291112,
|
||||
# 0.16128487, 0.49784577, 0.24588924, 0.6597, 0.92239679)
|
||||
# eqnpar(x, p=0.5,
|
||||
# ci.method = "interpolate", approx.conf.level = 0.95, ci = TRUE)
|
||||
rng = np.random.default_rng(8824288259505800535)
|
||||
x = rng.random(size=20)
|
||||
assert_allclose(ms.median_cihs(x), (0.38663198, 0.88431272))
|
||||
|
||||
# SciPy's 90% CI upper limit doesn't match that of EnvStats eqnpar. SciPy
|
||||
# doesn't look wrong, and it agrees with a different reference,
|
||||
# `median_confint_hs` from `hoehleatsu/quantileCI`.
|
||||
# In (e.g.) Colab with R runtime:
|
||||
# devtools::install_github("hoehleatsu/quantileCI")
|
||||
# library(quantileCI)
|
||||
# median_confint_hs(x=x, conf.level=0.90, interpolate=TRUE)
|
||||
assert_allclose(ms.median_cihs(x, 0.1), (0.48319773366, 0.88094268050))
|
||||
404
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_multicomp.py
vendored
Normal file
404
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_multicomp.py
vendored
Normal file
@@ -0,0 +1,404 @@
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from scipy import stats
|
||||
from scipy.stats._multicomp import _pvalue_dunnett, DunnettResult
|
||||
|
||||
|
||||
class TestDunnett:
|
||||
# For the following tests, p-values were computed using Matlab, e.g.
|
||||
# sample = [18. 15. 18. 16. 17. 15. 14. 14. 14. 15. 15....
|
||||
# 14. 15. 14. 22. 18. 21. 21. 10. 10. 11. 9....
|
||||
# 25. 26. 17.5 16. 15.5 14.5 22. 22. 24. 22.5 29....
|
||||
# 24.5 20. 18. 18.5 17.5 26.5 13. 16.5 13. 13. 13....
|
||||
# 28. 27. 34. 31. 29. 27. 24. 23. 38. 36. 25....
|
||||
# 38. 26. 22. 36. 27. 27. 32. 28. 31....
|
||||
# 24. 27. 33. 32. 28. 19. 37. 31. 36. 36....
|
||||
# 34. 38. 32. 38. 32....
|
||||
# 26. 24. 26. 25. 29. 29.5 16.5 36. 44....
|
||||
# 25. 27. 19....
|
||||
# 25. 20....
|
||||
# 28.];
|
||||
# j = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
|
||||
# 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
|
||||
# 0 0 0 0...
|
||||
# 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1...
|
||||
# 2 2 2 2 2 2 2 2 2...
|
||||
# 3 3 3...
|
||||
# 4 4...
|
||||
# 5];
|
||||
# [~, ~, stats] = anova1(sample, j, "off");
|
||||
# [results, ~, ~, gnames] = multcompare(stats, ...
|
||||
# "CriticalValueType", "dunnett", ...
|
||||
# "Approximate", false);
|
||||
# tbl = array2table(results, "VariableNames", ...
|
||||
# ["Group", "Control Group", "Lower Limit", ...
|
||||
# "Difference", "Upper Limit", "P-value"]);
|
||||
# tbl.("Group") = gnames(tbl.("Group"));
|
||||
# tbl.("Control Group") = gnames(tbl.("Control Group"))
|
||||
|
||||
# Matlab doesn't report the statistic, so the statistics were
|
||||
# computed using R multcomp `glht`, e.g.:
|
||||
# library(multcomp)
|
||||
# options(digits=16)
|
||||
# control < - c(18.0, 15.0, 18.0, 16.0, 17.0, 15.0, 14.0, 14.0, 14.0,
|
||||
# 15.0, 15.0, 14.0, 15.0, 14.0, 22.0, 18.0, 21.0, 21.0,
|
||||
# 10.0, 10.0, 11.0, 9.0, 25.0, 26.0, 17.5, 16.0, 15.5,
|
||||
# 14.5, 22.0, 22.0, 24.0, 22.5, 29.0, 24.5, 20.0, 18.0,
|
||||
# 18.5, 17.5, 26.5, 13.0, 16.5, 13.0, 13.0, 13.0, 28.0,
|
||||
# 27.0, 34.0, 31.0, 29.0, 27.0, 24.0, 23.0, 38.0, 36.0,
|
||||
# 25.0, 38.0, 26.0, 22.0, 36.0, 27.0, 27.0, 32.0, 28.0,
|
||||
# 31.0)
|
||||
# t < - c(24.0, 27.0, 33.0, 32.0, 28.0, 19.0, 37.0, 31.0, 36.0, 36.0,
|
||||
# 34.0, 38.0, 32.0, 38.0, 32.0)
|
||||
# w < - c(26.0, 24.0, 26.0, 25.0, 29.0, 29.5, 16.5, 36.0, 44.0)
|
||||
# x < - c(25.0, 27.0, 19.0)
|
||||
# y < - c(25.0, 20.0)
|
||||
# z < - c(28.0)
|
||||
#
|
||||
# groups = factor(rep(c("control", "t", "w", "x", "y", "z"),
|
||||
# times=c(length(control), length(t), length(w),
|
||||
# length(x), length(y), length(z))))
|
||||
# df < - data.frame(response=c(control, t, w, x, y, z),
|
||||
# group=groups)
|
||||
# model < - aov(response
|
||||
# ~group, data = df)
|
||||
# test < - glht(model=model,
|
||||
# linfct=mcp(group="Dunnett"),
|
||||
# alternative="g")
|
||||
# summary(test)
|
||||
# confint(test)
|
||||
# p-values agreed with those produced by Matlab to at least atol=1e-3
|
||||
|
||||
# From Matlab's documentation on multcompare
|
||||
samples_1 = [
|
||||
[
|
||||
24.0, 27.0, 33.0, 32.0, 28.0, 19.0, 37.0, 31.0, 36.0, 36.0,
|
||||
34.0, 38.0, 32.0, 38.0, 32.0
|
||||
],
|
||||
[26.0, 24.0, 26.0, 25.0, 29.0, 29.5, 16.5, 36.0, 44.0],
|
||||
[25.0, 27.0, 19.0],
|
||||
[25.0, 20.0],
|
||||
[28.0]
|
||||
]
|
||||
control_1 = [
|
||||
18.0, 15.0, 18.0, 16.0, 17.0, 15.0, 14.0, 14.0, 14.0, 15.0, 15.0,
|
||||
14.0, 15.0, 14.0, 22.0, 18.0, 21.0, 21.0, 10.0, 10.0, 11.0, 9.0,
|
||||
25.0, 26.0, 17.5, 16.0, 15.5, 14.5, 22.0, 22.0, 24.0, 22.5, 29.0,
|
||||
24.5, 20.0, 18.0, 18.5, 17.5, 26.5, 13.0, 16.5, 13.0, 13.0, 13.0,
|
||||
28.0, 27.0, 34.0, 31.0, 29.0, 27.0, 24.0, 23.0, 38.0, 36.0, 25.0,
|
||||
38.0, 26.0, 22.0, 36.0, 27.0, 27.0, 32.0, 28.0, 31.0
|
||||
]
|
||||
pvalue_1 = [4.727e-06, 0.022346, 0.97912, 0.99953, 0.86579] # Matlab
|
||||
# Statistic, alternative p-values, and CIs computed with R multcomp `glht`
|
||||
p_1_twosided = [1e-4, 0.02237, 0.97913, 0.99953, 0.86583]
|
||||
p_1_greater = [1e-4, 0.011217, 0.768500, 0.896991, 0.577211]
|
||||
p_1_less = [1, 1, 0.99660, 0.98398, .99953]
|
||||
statistic_1 = [5.27356, 2.91270, 0.60831, 0.27002, 0.96637]
|
||||
ci_1_twosided = [[5.3633917835622, 0.7296142201217, -8.3879817106607,
|
||||
-11.9090753452911, -11.7655021543469],
|
||||
[15.9709832164378, 13.8936496687672, 13.4556900439941,
|
||||
14.6434503452911, 25.4998771543469]]
|
||||
ci_1_greater = [5.9036402398526, 1.4000632918725, -7.2754756323636,
|
||||
-10.5567456382391, -9.8675629499576]
|
||||
ci_1_less = [15.4306165948619, 13.2230539537359, 12.3429406339544,
|
||||
13.2908248513211, 23.6015228251660]
|
||||
pvalues_1 = dict(twosided=p_1_twosided, less=p_1_less, greater=p_1_greater)
|
||||
cis_1 = dict(twosided=ci_1_twosided, less=ci_1_less, greater=ci_1_greater)
|
||||
case_1 = dict(samples=samples_1, control=control_1, statistic=statistic_1,
|
||||
pvalues=pvalues_1, cis=cis_1)
|
||||
|
||||
# From Dunnett1955 comparing with R's DescTools: DunnettTest
|
||||
samples_2 = [[9.76, 8.80, 7.68, 9.36], [12.80, 9.68, 12.16, 9.20, 10.55]]
|
||||
control_2 = [7.40, 8.50, 7.20, 8.24, 9.84, 8.32]
|
||||
pvalue_2 = [0.6201, 0.0058]
|
||||
# Statistic, alternative p-values, and CIs computed with R multcomp `glht`
|
||||
p_2_twosided = [0.6201020, 0.0058254]
|
||||
p_2_greater = [0.3249776, 0.0029139]
|
||||
p_2_less = [0.91676, 0.99984]
|
||||
statistic_2 = [0.85703, 3.69375]
|
||||
ci_2_twosided = [[-1.2564116462124, 0.8396273539789],
|
||||
[2.5564116462124, 4.4163726460211]]
|
||||
ci_2_greater = [-0.9588591188156, 1.1187563667543]
|
||||
ci_2_less = [2.2588591188156, 4.1372436332457]
|
||||
pvalues_2 = dict(twosided=p_2_twosided, less=p_2_less, greater=p_2_greater)
|
||||
cis_2 = dict(twosided=ci_2_twosided, less=ci_2_less, greater=ci_2_greater)
|
||||
case_2 = dict(samples=samples_2, control=control_2, statistic=statistic_2,
|
||||
pvalues=pvalues_2, cis=cis_2)
|
||||
|
||||
samples_3 = [[55, 64, 64], [55, 49, 52], [50, 44, 41]]
|
||||
control_3 = [55, 47, 48]
|
||||
pvalue_3 = [0.0364, 0.8966, 0.4091]
|
||||
# Statistic, alternative p-values, and CIs computed with R multcomp `glht`
|
||||
p_3_twosided = [0.036407, 0.896539, 0.409295]
|
||||
p_3_greater = [0.018277, 0.521109, 0.981892]
|
||||
p_3_less = [0.99944, 0.90054, 0.20974]
|
||||
statistic_3 = [3.09073, 0.56195, -1.40488]
|
||||
ci_3_twosided = [[0.7529028025053, -8.2470971974947, -15.2470971974947],
|
||||
[21.2470971974947, 12.2470971974947, 5.2470971974947]]
|
||||
ci_3_greater = [2.4023682323149, -6.5976317676851, -13.5976317676851]
|
||||
ci_3_less = [19.5984402363662, 10.5984402363662, 3.5984402363662]
|
||||
pvalues_3 = dict(twosided=p_3_twosided, less=p_3_less, greater=p_3_greater)
|
||||
cis_3 = dict(twosided=ci_3_twosided, less=ci_3_less, greater=ci_3_greater)
|
||||
case_3 = dict(samples=samples_3, control=control_3, statistic=statistic_3,
|
||||
pvalues=pvalues_3, cis=cis_3)
|
||||
|
||||
# From Thomson and Short,
|
||||
# Mucociliary function in health, chronic obstructive airway disease,
|
||||
# and asbestosis, Journal of Applied Physiology, 1969. Table 1
|
||||
# Comparing with R's DescTools: DunnettTest
|
||||
samples_4 = [[3.8, 2.7, 4.0, 2.4], [2.8, 3.4, 3.7, 2.2, 2.0]]
|
||||
control_4 = [2.9, 3.0, 2.5, 2.6, 3.2]
|
||||
pvalue_4 = [0.5832, 0.9982]
|
||||
# Statistic, alternative p-values, and CIs computed with R multcomp `glht`
|
||||
p_4_twosided = [0.58317, 0.99819]
|
||||
p_4_greater = [0.30225, 0.69115]
|
||||
p_4_less = [0.91929, 0.65212]
|
||||
statistic_4 = [0.90875, -0.05007]
|
||||
ci_4_twosided = [[-0.6898153448579, -1.0333456251632],
|
||||
[1.4598153448579, 0.9933456251632]]
|
||||
ci_4_greater = [-0.5186459268412, -0.8719655502147 ]
|
||||
ci_4_less = [1.2886459268412, 0.8319655502147]
|
||||
pvalues_4 = dict(twosided=p_4_twosided, less=p_4_less, greater=p_4_greater)
|
||||
cis_4 = dict(twosided=ci_4_twosided, less=ci_4_less, greater=ci_4_greater)
|
||||
case_4 = dict(samples=samples_4, control=control_4, statistic=statistic_4,
|
||||
pvalues=pvalues_4, cis=cis_4)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'rho, n_groups, df, statistic, pvalue, alternative',
|
||||
[
|
||||
# From Dunnett1955
|
||||
# Tables 1a and 1b pages 1117-1118
|
||||
(0.5, 1, 10, 1.81, 0.05, "greater"), # different than two-sided
|
||||
(0.5, 3, 10, 2.34, 0.05, "greater"),
|
||||
(0.5, 2, 30, 1.99, 0.05, "greater"),
|
||||
(0.5, 5, 30, 2.33, 0.05, "greater"),
|
||||
(0.5, 4, 12, 3.32, 0.01, "greater"),
|
||||
(0.5, 7, 12, 3.56, 0.01, "greater"),
|
||||
(0.5, 2, 60, 2.64, 0.01, "greater"),
|
||||
(0.5, 4, 60, 2.87, 0.01, "greater"),
|
||||
(0.5, 4, 60, [2.87, 2.21], [0.01, 0.05], "greater"),
|
||||
# Tables 2a and 2b pages 1119-1120
|
||||
(0.5, 1, 10, 2.23, 0.05, "two-sided"), # two-sided
|
||||
(0.5, 3, 10, 2.81, 0.05, "two-sided"),
|
||||
(0.5, 2, 30, 2.32, 0.05, "two-sided"),
|
||||
(0.5, 3, 20, 2.57, 0.05, "two-sided"),
|
||||
(0.5, 4, 12, 3.76, 0.01, "two-sided"),
|
||||
(0.5, 7, 12, 4.08, 0.01, "two-sided"),
|
||||
(0.5, 2, 60, 2.90, 0.01, "two-sided"),
|
||||
(0.5, 4, 60, 3.14, 0.01, "two-sided"),
|
||||
(0.5, 4, 60, [3.14, 2.55], [0.01, 0.05], "two-sided"),
|
||||
],
|
||||
)
|
||||
def test_critical_values(
|
||||
self, rho, n_groups, df, statistic, pvalue, alternative
|
||||
):
|
||||
rng = np.random.default_rng(165250594791731684851746311027739134893)
|
||||
rho = np.full((n_groups, n_groups), rho)
|
||||
np.fill_diagonal(rho, 1)
|
||||
|
||||
statistic = np.array(statistic)
|
||||
res = _pvalue_dunnett(
|
||||
rho=rho, df=df, statistic=statistic,
|
||||
alternative=alternative,
|
||||
rng=rng
|
||||
)
|
||||
assert_allclose(res, pvalue, atol=5e-3)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'samples, control, pvalue, statistic',
|
||||
[
|
||||
(samples_1, control_1, pvalue_1, statistic_1),
|
||||
(samples_2, control_2, pvalue_2, statistic_2),
|
||||
(samples_3, control_3, pvalue_3, statistic_3),
|
||||
(samples_4, control_4, pvalue_4, statistic_4),
|
||||
]
|
||||
)
|
||||
def test_basic(self, samples, control, pvalue, statistic):
|
||||
rng = np.random.default_rng(11681140010308601919115036826969764808)
|
||||
|
||||
res = stats.dunnett(*samples, control=control, random_state=rng)
|
||||
|
||||
assert isinstance(res, DunnettResult)
|
||||
assert_allclose(res.statistic, statistic, rtol=5e-5)
|
||||
assert_allclose(res.pvalue, pvalue, rtol=1e-2, atol=1e-4)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'alternative',
|
||||
['two-sided', 'less', 'greater']
|
||||
)
|
||||
def test_ttest_ind(self, alternative):
|
||||
# check that `dunnett` agrees with `ttest_ind`
|
||||
# when there are only two groups
|
||||
rng = np.random.default_rng(114184017807316971636137493526995620351)
|
||||
|
||||
for _ in range(10):
|
||||
sample = rng.integers(-100, 100, size=(10,))
|
||||
control = rng.integers(-100, 100, size=(10,))
|
||||
|
||||
res = stats.dunnett(
|
||||
sample, control=control,
|
||||
alternative=alternative, random_state=rng
|
||||
)
|
||||
ref = stats.ttest_ind(
|
||||
sample, control,
|
||||
alternative=alternative, random_state=rng
|
||||
)
|
||||
|
||||
assert_allclose(res.statistic, ref.statistic, rtol=1e-3, atol=1e-5)
|
||||
assert_allclose(res.pvalue, ref.pvalue, rtol=1e-3, atol=1e-5)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'alternative, pvalue',
|
||||
[
|
||||
('less', [0, 1]),
|
||||
('greater', [1, 0]),
|
||||
('two-sided', [0, 0]),
|
||||
]
|
||||
)
|
||||
def test_alternatives(self, alternative, pvalue):
|
||||
rng = np.random.default_rng(114184017807316971636137493526995620351)
|
||||
|
||||
# width of 20 and min diff between samples/control is 60
|
||||
# and maximal diff would be 100
|
||||
sample_less = rng.integers(0, 20, size=(10,))
|
||||
control = rng.integers(80, 100, size=(10,))
|
||||
sample_greater = rng.integers(160, 180, size=(10,))
|
||||
|
||||
res = stats.dunnett(
|
||||
sample_less, sample_greater, control=control,
|
||||
alternative=alternative, random_state=rng
|
||||
)
|
||||
assert_allclose(res.pvalue, pvalue, atol=1e-7)
|
||||
|
||||
ci = res.confidence_interval()
|
||||
# two-sided is comparable for high/low
|
||||
if alternative == 'less':
|
||||
assert np.isneginf(ci.low).all()
|
||||
assert -100 < ci.high[0] < -60
|
||||
assert 60 < ci.high[1] < 100
|
||||
elif alternative == 'greater':
|
||||
assert -100 < ci.low[0] < -60
|
||||
assert 60 < ci.low[1] < 100
|
||||
assert np.isposinf(ci.high).all()
|
||||
elif alternative == 'two-sided':
|
||||
assert -100 < ci.low[0] < -60
|
||||
assert 60 < ci.low[1] < 100
|
||||
assert -100 < ci.high[0] < -60
|
||||
assert 60 < ci.high[1] < 100
|
||||
|
||||
@pytest.mark.parametrize("case", [case_1, case_2, case_3, case_4])
|
||||
@pytest.mark.parametrize("alternative", ['less', 'greater', 'two-sided'])
|
||||
def test_against_R_multicomp_glht(self, case, alternative):
|
||||
rng = np.random.default_rng(189117774084579816190295271136455278291)
|
||||
samples = case['samples']
|
||||
control = case['control']
|
||||
alternatives = {'less': 'less', 'greater': 'greater',
|
||||
'two-sided': 'twosided'}
|
||||
p_ref = case['pvalues'][alternative.replace('-', '')]
|
||||
|
||||
res = stats.dunnett(*samples, control=control, alternative=alternative,
|
||||
random_state=rng)
|
||||
# atol can't be tighter because R reports some pvalues as "< 1e-4"
|
||||
assert_allclose(res.pvalue, p_ref, rtol=5e-3, atol=1e-4)
|
||||
|
||||
ci_ref = case['cis'][alternatives[alternative]]
|
||||
if alternative == "greater":
|
||||
ci_ref = [ci_ref, np.inf]
|
||||
elif alternative == "less":
|
||||
ci_ref = [-np.inf, ci_ref]
|
||||
assert res._ci is None
|
||||
assert res._ci_cl is None
|
||||
ci = res.confidence_interval(confidence_level=0.95)
|
||||
assert_allclose(ci.low, ci_ref[0], rtol=5e-3, atol=1e-5)
|
||||
assert_allclose(ci.high, ci_ref[1], rtol=5e-3, atol=1e-5)
|
||||
|
||||
# re-run to use the cached value "is" to check id as same object
|
||||
assert res._ci is ci
|
||||
assert res._ci_cl == 0.95
|
||||
ci_ = res.confidence_interval(confidence_level=0.95)
|
||||
assert ci_ is ci
|
||||
|
||||
@pytest.mark.parametrize('alternative', ["two-sided", "less", "greater"])
|
||||
def test_str(self, alternative):
|
||||
rng = np.random.default_rng(189117774084579816190295271136455278291)
|
||||
|
||||
res = stats.dunnett(
|
||||
*self.samples_3, control=self.control_3, alternative=alternative,
|
||||
random_state=rng
|
||||
)
|
||||
|
||||
# check some str output
|
||||
res_str = str(res)
|
||||
assert '(Sample 2 - Control)' in res_str
|
||||
assert '95.0%' in res_str
|
||||
|
||||
if alternative == 'less':
|
||||
assert '-inf' in res_str
|
||||
assert '19.' in res_str
|
||||
elif alternative == 'greater':
|
||||
assert 'inf' in res_str
|
||||
assert '-13.' in res_str
|
||||
else:
|
||||
assert 'inf' not in res_str
|
||||
assert '21.' in res_str
|
||||
|
||||
def test_warnings(self):
|
||||
rng = np.random.default_rng(189117774084579816190295271136455278291)
|
||||
|
||||
res = stats.dunnett(
|
||||
*self.samples_3, control=self.control_3, random_state=rng
|
||||
)
|
||||
msg = r"Computation of the confidence interval did not converge"
|
||||
with pytest.warns(UserWarning, match=msg):
|
||||
res._allowance(tol=1e-5)
|
||||
|
||||
def test_raises(self):
|
||||
samples, control = self.samples_3, self.control_3
|
||||
|
||||
# alternative
|
||||
with pytest.raises(ValueError, match="alternative must be"):
|
||||
stats.dunnett(*samples, control=control, alternative='bob')
|
||||
|
||||
# 2D for a sample
|
||||
samples_ = copy.deepcopy(samples)
|
||||
samples_[0] = [samples_[0]]
|
||||
with pytest.raises(ValueError, match="must be 1D arrays"):
|
||||
stats.dunnett(*samples_, control=control)
|
||||
|
||||
# 2D for control
|
||||
control_ = copy.deepcopy(control)
|
||||
control_ = [control_]
|
||||
with pytest.raises(ValueError, match="must be 1D arrays"):
|
||||
stats.dunnett(*samples, control=control_)
|
||||
|
||||
# No obs in a sample
|
||||
samples_ = copy.deepcopy(samples)
|
||||
samples_[1] = []
|
||||
with pytest.raises(ValueError, match="at least 1 observation"):
|
||||
stats.dunnett(*samples_, control=control)
|
||||
|
||||
# No obs in control
|
||||
control_ = []
|
||||
with pytest.raises(ValueError, match="at least 1 observation"):
|
||||
stats.dunnett(*samples, control=control_)
|
||||
|
||||
res = stats.dunnett(*samples, control=control)
|
||||
with pytest.raises(ValueError, match="Confidence level must"):
|
||||
res.confidence_interval(confidence_level=3)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Computation of the confidence")
|
||||
@pytest.mark.parametrize('n_samples', [1, 2, 3])
|
||||
def test_shapes(self, n_samples):
|
||||
rng = np.random.default_rng(689448934110805334)
|
||||
samples = rng.normal(size=(n_samples, 10))
|
||||
control = rng.normal(size=10)
|
||||
res = stats.dunnett(*samples, control=control, random_state=rng)
|
||||
assert res.statistic.shape == (n_samples,)
|
||||
assert res.pvalue.shape == (n_samples,)
|
||||
ci = res.confidence_interval()
|
||||
assert ci.low.shape == (n_samples,)
|
||||
assert ci.high.shape == (n_samples,)
|
||||
3854
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_multivariate.py
vendored
Normal file
3854
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_multivariate.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
148
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_odds_ratio.py
vendored
Normal file
148
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_odds_ratio.py
vendored
Normal file
@@ -0,0 +1,148 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_allclose
|
||||
from .._discrete_distns import nchypergeom_fisher, hypergeom
|
||||
from scipy.stats._odds_ratio import odds_ratio
|
||||
from .data.fisher_exact_results_from_r import data
|
||||
|
||||
|
||||
class TestOddsRatio:
|
||||
|
||||
@pytest.mark.parametrize('parameters, rresult', data)
|
||||
def test_results_from_r(self, parameters, rresult):
|
||||
alternative = parameters.alternative.replace('.', '-')
|
||||
result = odds_ratio(parameters.table)
|
||||
# The results computed by R are not very accurate.
|
||||
if result.statistic < 400:
|
||||
or_rtol = 5e-4
|
||||
ci_rtol = 2e-2
|
||||
else:
|
||||
or_rtol = 5e-2
|
||||
ci_rtol = 1e-1
|
||||
assert_allclose(result.statistic,
|
||||
rresult.conditional_odds_ratio, rtol=or_rtol)
|
||||
ci = result.confidence_interval(parameters.confidence_level,
|
||||
alternative)
|
||||
assert_allclose((ci.low, ci.high), rresult.conditional_odds_ratio_ci,
|
||||
rtol=ci_rtol)
|
||||
|
||||
# Also do a self-check for the conditional odds ratio.
|
||||
# With the computed conditional odds ratio as the noncentrality
|
||||
# parameter of the noncentral hypergeometric distribution with
|
||||
# parameters table.sum(), table[0].sum(), and table[:,0].sum() as
|
||||
# total, ngood and nsample, respectively, the mean of the distribution
|
||||
# should equal table[0, 0].
|
||||
cor = result.statistic
|
||||
table = np.array(parameters.table)
|
||||
total = table.sum()
|
||||
ngood = table[0].sum()
|
||||
nsample = table[:, 0].sum()
|
||||
# nchypergeom_fisher does not allow the edge cases where the
|
||||
# noncentrality parameter is 0 or inf, so handle those values
|
||||
# separately here.
|
||||
if cor == 0:
|
||||
nchg_mean = hypergeom.support(total, ngood, nsample)[0]
|
||||
elif cor == np.inf:
|
||||
nchg_mean = hypergeom.support(total, ngood, nsample)[1]
|
||||
else:
|
||||
nchg_mean = nchypergeom_fisher.mean(total, ngood, nsample, cor)
|
||||
assert_allclose(nchg_mean, table[0, 0], rtol=1e-13)
|
||||
|
||||
# Check that the confidence interval is correct.
|
||||
alpha = 1 - parameters.confidence_level
|
||||
if alternative == 'two-sided':
|
||||
if ci.low > 0:
|
||||
sf = nchypergeom_fisher.sf(table[0, 0] - 1,
|
||||
total, ngood, nsample, ci.low)
|
||||
assert_allclose(sf, alpha/2, rtol=1e-11)
|
||||
if np.isfinite(ci.high):
|
||||
cdf = nchypergeom_fisher.cdf(table[0, 0],
|
||||
total, ngood, nsample, ci.high)
|
||||
assert_allclose(cdf, alpha/2, rtol=1e-11)
|
||||
elif alternative == 'less':
|
||||
if np.isfinite(ci.high):
|
||||
cdf = nchypergeom_fisher.cdf(table[0, 0],
|
||||
total, ngood, nsample, ci.high)
|
||||
assert_allclose(cdf, alpha, rtol=1e-11)
|
||||
else:
|
||||
# alternative == 'greater'
|
||||
if ci.low > 0:
|
||||
sf = nchypergeom_fisher.sf(table[0, 0] - 1,
|
||||
total, ngood, nsample, ci.low)
|
||||
assert_allclose(sf, alpha, rtol=1e-11)
|
||||
|
||||
@pytest.mark.parametrize('table', [
|
||||
[[0, 0], [5, 10]],
|
||||
[[5, 10], [0, 0]],
|
||||
[[0, 5], [0, 10]],
|
||||
[[5, 0], [10, 0]],
|
||||
])
|
||||
def test_row_or_col_zero(self, table):
|
||||
result = odds_ratio(table)
|
||||
assert_equal(result.statistic, np.nan)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (0, np.inf))
|
||||
|
||||
@pytest.mark.parametrize("case",
|
||||
[[0.95, 'two-sided', 0.4879913, 2.635883],
|
||||
[0.90, 'two-sided', 0.5588516, 2.301663]])
|
||||
def test_sample_odds_ratio_ci(self, case):
|
||||
# Compare the sample odds ratio confidence interval to the R function
|
||||
# oddsratio.wald from the epitools package, e.g.
|
||||
# > library(epitools)
|
||||
# > table = matrix(c(10, 20, 41, 93), nrow=2, ncol=2, byrow=TRUE)
|
||||
# > result = oddsratio.wald(table)
|
||||
# > result$measure
|
||||
# odds ratio with 95% C.I.
|
||||
# Predictor estimate lower upper
|
||||
# Exposed1 1.000000 NA NA
|
||||
# Exposed2 1.134146 0.4879913 2.635883
|
||||
|
||||
confidence_level, alternative, ref_low, ref_high = case
|
||||
table = [[10, 20], [41, 93]]
|
||||
result = odds_ratio(table, kind='sample')
|
||||
assert_allclose(result.statistic, 1.134146, rtol=1e-6)
|
||||
ci = result.confidence_interval(confidence_level, alternative)
|
||||
assert_allclose([ci.low, ci.high], [ref_low, ref_high], rtol=1e-6)
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided'])
|
||||
def test_sample_odds_ratio_one_sided_ci(self, alternative):
|
||||
# can't find a good reference for one-sided CI, so bump up the sample
|
||||
# size and compare against the conditional odds ratio CI
|
||||
table = [[1000, 2000], [4100, 9300]]
|
||||
res = odds_ratio(table, kind='sample')
|
||||
ref = odds_ratio(table, kind='conditional')
|
||||
assert_allclose(res.statistic, ref.statistic, atol=1e-5)
|
||||
assert_allclose(res.confidence_interval(alternative=alternative),
|
||||
ref.confidence_interval(alternative=alternative),
|
||||
atol=2e-3)
|
||||
|
||||
@pytest.mark.parametrize('kind', ['sample', 'conditional'])
|
||||
@pytest.mark.parametrize('bad_table', [123, "foo", [10, 11, 12]])
|
||||
def test_invalid_table_shape(self, kind, bad_table):
|
||||
with pytest.raises(ValueError, match="Invalid shape"):
|
||||
odds_ratio(bad_table, kind=kind)
|
||||
|
||||
def test_invalid_table_type(self):
|
||||
with pytest.raises(ValueError, match='must be an array of integers'):
|
||||
odds_ratio([[1.0, 3.4], [5.0, 9.9]])
|
||||
|
||||
def test_negative_table_values(self):
|
||||
with pytest.raises(ValueError, match='must be nonnegative'):
|
||||
odds_ratio([[1, 2], [3, -4]])
|
||||
|
||||
def test_invalid_kind(self):
|
||||
with pytest.raises(ValueError, match='`kind` must be'):
|
||||
odds_ratio([[10, 20], [30, 14]], kind='magnetoreluctance')
|
||||
|
||||
def test_invalid_alternative(self):
|
||||
result = odds_ratio([[5, 10], [2, 32]])
|
||||
with pytest.raises(ValueError, match='`alternative` must be'):
|
||||
result.confidence_interval(alternative='depleneration')
|
||||
|
||||
@pytest.mark.parametrize('level', [-0.5, 1.5])
|
||||
def test_invalid_confidence_level(self, level):
|
||||
result = odds_ratio([[5, 10], [2, 32]])
|
||||
with pytest.raises(ValueError, match='must be between 0 and 1'):
|
||||
result.confidence_interval(confidence_level=level)
|
||||
1410
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_qmc.py
vendored
Normal file
1410
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_qmc.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
338
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_rank.py
vendored
Normal file
338
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_rank.py
vendored
Normal file
@@ -0,0 +1,338 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_array_equal
|
||||
import pytest
|
||||
|
||||
from scipy.conftest import skip_xp_invalid_arg
|
||||
from scipy.stats import rankdata, tiecorrect
|
||||
from scipy._lib._util import np_long
|
||||
|
||||
|
||||
class TestTieCorrect:
|
||||
|
||||
def test_empty(self):
|
||||
"""An empty array requires no correction, should return 1.0."""
|
||||
ranks = np.array([], dtype=np.float64)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_one(self):
|
||||
"""A single element requires no correction, should return 1.0."""
|
||||
ranks = np.array([1.0], dtype=np.float64)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_no_correction(self):
|
||||
"""Arrays with no ties require no correction."""
|
||||
ranks = np.arange(2.0)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
ranks = np.arange(3.0)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_basic(self):
|
||||
"""Check a few basic examples of the tie correction factor."""
|
||||
# One tie of two elements
|
||||
ranks = np.array([1.0, 2.5, 2.5])
|
||||
c = tiecorrect(ranks)
|
||||
T = 2.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# One tie of two elements (same as above, but tie is not at the end)
|
||||
ranks = np.array([1.5, 1.5, 3.0])
|
||||
c = tiecorrect(ranks)
|
||||
T = 2.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# One tie of three elements
|
||||
ranks = np.array([1.0, 3.0, 3.0, 3.0])
|
||||
c = tiecorrect(ranks)
|
||||
T = 3.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# Two ties, lengths 2 and 3.
|
||||
ranks = np.array([1.5, 1.5, 4.0, 4.0, 4.0])
|
||||
c = tiecorrect(ranks)
|
||||
T1 = 2.0
|
||||
T2 = 3.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - ((T1**3 - T1) + (T2**3 - T2)) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
def test_overflow(self):
|
||||
ntie, k = 2000, 5
|
||||
a = np.repeat(np.arange(k), ntie)
|
||||
n = a.size # ntie * k
|
||||
out = tiecorrect(rankdata(a))
|
||||
assert_equal(out, 1.0 - k * (ntie**3 - ntie) / float(n**3 - n))
|
||||
|
||||
|
||||
class TestRankData:
|
||||
|
||||
def test_empty(self):
|
||||
"""stats.rankdata([]) should return an empty array."""
|
||||
a = np.array([], dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, np.array([], dtype=np.float64))
|
||||
r = rankdata([])
|
||||
assert_array_equal(r, np.array([], dtype=np.float64))
|
||||
|
||||
@pytest.mark.parametrize("shape", [(0, 1, 2)])
|
||||
@pytest.mark.parametrize("axis", [None, *range(3)])
|
||||
def test_empty_multidim(self, shape, axis):
|
||||
a = np.empty(shape, dtype=int)
|
||||
r = rankdata(a, axis=axis)
|
||||
expected_shape = (0,) if axis is None else shape
|
||||
assert_equal(r.shape, expected_shape)
|
||||
assert_equal(r.dtype, np.float64)
|
||||
|
||||
def test_one(self):
|
||||
"""Check stats.rankdata with an array of length 1."""
|
||||
data = [100]
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, np.array([1.0], dtype=np.float64))
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, np.array([1.0], dtype=np.float64))
|
||||
|
||||
def test_basic(self):
|
||||
"""Basic tests of stats.rankdata."""
|
||||
data = [100, 10, 50]
|
||||
expected = np.array([3.0, 1.0, 2.0], dtype=np.float64)
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, expected)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, expected)
|
||||
|
||||
data = [40, 10, 30, 10, 50]
|
||||
expected = np.array([4.0, 1.5, 3.0, 1.5, 5.0], dtype=np.float64)
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, expected)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, expected)
|
||||
|
||||
data = [20, 20, 20, 10, 10, 10]
|
||||
expected = np.array([5.0, 5.0, 5.0, 2.0, 2.0, 2.0], dtype=np.float64)
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, expected)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, expected)
|
||||
# The docstring states explicitly that the argument is flattened.
|
||||
a2d = a.reshape(2, 3)
|
||||
r = rankdata(a2d)
|
||||
assert_array_equal(r, expected)
|
||||
|
||||
@skip_xp_invalid_arg
|
||||
def test_rankdata_object_string(self):
|
||||
|
||||
def min_rank(a):
|
||||
return [1 + sum(i < j for i in a) for j in a]
|
||||
|
||||
def max_rank(a):
|
||||
return [sum(i <= j for i in a) for j in a]
|
||||
|
||||
def ordinal_rank(a):
|
||||
return min_rank([(x, i) for i, x in enumerate(a)])
|
||||
|
||||
def average_rank(a):
|
||||
return [(i + j) / 2.0 for i, j in zip(min_rank(a), max_rank(a))]
|
||||
|
||||
def dense_rank(a):
|
||||
b = np.unique(a)
|
||||
return [1 + sum(i < j for i in b) for j in a]
|
||||
|
||||
rankf = dict(min=min_rank, max=max_rank, ordinal=ordinal_rank,
|
||||
average=average_rank, dense=dense_rank)
|
||||
|
||||
def check_ranks(a):
|
||||
for method in 'min', 'max', 'dense', 'ordinal', 'average':
|
||||
out = rankdata(a, method=method)
|
||||
assert_array_equal(out, rankf[method](a))
|
||||
|
||||
val = ['foo', 'bar', 'qux', 'xyz', 'abc', 'efg', 'ace', 'qwe', 'qaz']
|
||||
check_ranks(np.random.choice(val, 200))
|
||||
check_ranks(np.random.choice(val, 200).astype('object'))
|
||||
|
||||
val = np.array([0, 1, 2, 2.718, 3, 3.141], dtype='object')
|
||||
check_ranks(np.random.choice(val, 200).astype('object'))
|
||||
|
||||
def test_large_int(self):
|
||||
data = np.array([2**60, 2**60+1], dtype=np.uint64)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, [1.0, 2.0])
|
||||
|
||||
data = np.array([2**60, 2**60+1], dtype=np.int64)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, [1.0, 2.0])
|
||||
|
||||
data = np.array([2**60, -2**60+1], dtype=np.int64)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, [2.0, 1.0])
|
||||
|
||||
def test_big_tie(self):
|
||||
for n in [10000, 100000, 1000000]:
|
||||
data = np.ones(n, dtype=int)
|
||||
r = rankdata(data)
|
||||
expected_rank = 0.5 * (n + 1)
|
||||
assert_array_equal(r, expected_rank * data,
|
||||
"test failed with n=%d" % n)
|
||||
|
||||
def test_axis(self):
|
||||
data = [[0, 2, 1],
|
||||
[4, 2, 2]]
|
||||
expected0 = [[1., 1.5, 1.],
|
||||
[2., 1.5, 2.]]
|
||||
r0 = rankdata(data, axis=0)
|
||||
assert_array_equal(r0, expected0)
|
||||
expected1 = [[1., 3., 2.],
|
||||
[3., 1.5, 1.5]]
|
||||
r1 = rankdata(data, axis=1)
|
||||
assert_array_equal(r1, expected1)
|
||||
|
||||
methods = ["average", "min", "max", "dense", "ordinal"]
|
||||
dtypes = [np.float64] + [np_long]*4
|
||||
|
||||
@pytest.mark.parametrize("axis", [0, 1])
|
||||
@pytest.mark.parametrize("method, dtype", zip(methods, dtypes))
|
||||
def test_size_0_axis(self, axis, method, dtype):
|
||||
shape = (3, 0)
|
||||
data = np.zeros(shape)
|
||||
r = rankdata(data, method=method, axis=axis)
|
||||
assert_equal(r.shape, shape)
|
||||
assert_equal(r.dtype, dtype)
|
||||
|
||||
@pytest.mark.parametrize('axis', range(3))
|
||||
@pytest.mark.parametrize('method', methods)
|
||||
def test_nan_policy_omit_3d(self, axis, method):
|
||||
shape = (20, 21, 22)
|
||||
rng = np.random.RandomState(23983242)
|
||||
|
||||
a = rng.random(size=shape)
|
||||
i = rng.random(size=shape) < 0.4
|
||||
j = rng.random(size=shape) < 0.1
|
||||
k = rng.random(size=shape) < 0.1
|
||||
a[i] = np.nan
|
||||
a[j] = -np.inf
|
||||
a[k] - np.inf
|
||||
|
||||
def rank_1d_omit(a, method):
|
||||
out = np.zeros_like(a)
|
||||
i = np.isnan(a)
|
||||
a_compressed = a[~i]
|
||||
res = rankdata(a_compressed, method)
|
||||
out[~i] = res
|
||||
out[i] = np.nan
|
||||
return out
|
||||
|
||||
def rank_omit(a, method, axis):
|
||||
return np.apply_along_axis(lambda a: rank_1d_omit(a, method),
|
||||
axis, a)
|
||||
|
||||
res = rankdata(a, method, axis=axis, nan_policy='omit')
|
||||
res0 = rank_omit(a, method, axis=axis)
|
||||
|
||||
assert_array_equal(res, res0)
|
||||
|
||||
def test_nan_policy_2d_axis_none(self):
|
||||
# 2 2d-array test with axis=None
|
||||
data = [[0, np.nan, 3],
|
||||
[4, 2, np.nan],
|
||||
[1, 2, 2]]
|
||||
assert_array_equal(rankdata(data, axis=None, nan_policy='omit'),
|
||||
[1., np.nan, 6., 7., 4., np.nan, 2., 4., 4.])
|
||||
assert_array_equal(rankdata(data, axis=None, nan_policy='propagate'),
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
|
||||
np.nan, np.nan, np.nan])
|
||||
|
||||
def test_nan_policy_raise(self):
|
||||
# 1 1d-array test
|
||||
data = [0, 2, 3, -2, np.nan, np.nan]
|
||||
with pytest.raises(ValueError, match="The input contains nan"):
|
||||
rankdata(data, nan_policy='raise')
|
||||
|
||||
# 2 2d-array test
|
||||
data = [[0, np.nan, 3],
|
||||
[4, 2, np.nan],
|
||||
[np.nan, 2, 2]]
|
||||
|
||||
with pytest.raises(ValueError, match="The input contains nan"):
|
||||
rankdata(data, axis=0, nan_policy="raise")
|
||||
|
||||
with pytest.raises(ValueError, match="The input contains nan"):
|
||||
rankdata(data, axis=1, nan_policy="raise")
|
||||
|
||||
def test_nan_policy_propagate(self):
|
||||
# 1 1d-array test
|
||||
data = [0, 2, 3, -2, np.nan, np.nan]
|
||||
assert_array_equal(rankdata(data, nan_policy='propagate'),
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan])
|
||||
|
||||
# 2 2d-array test
|
||||
data = [[0, np.nan, 3],
|
||||
[4, 2, np.nan],
|
||||
[1, 2, 2]]
|
||||
assert_array_equal(rankdata(data, axis=0, nan_policy='propagate'),
|
||||
[[1, np.nan, np.nan],
|
||||
[3, np.nan, np.nan],
|
||||
[2, np.nan, np.nan]])
|
||||
assert_array_equal(rankdata(data, axis=1, nan_policy='propagate'),
|
||||
[[np.nan, np.nan, np.nan],
|
||||
[np.nan, np.nan, np.nan],
|
||||
[1, 2.5, 2.5]])
|
||||
|
||||
|
||||
_cases = (
|
||||
# values, method, expected
|
||||
([], 'average', []),
|
||||
([], 'min', []),
|
||||
([], 'max', []),
|
||||
([], 'dense', []),
|
||||
([], 'ordinal', []),
|
||||
#
|
||||
([100], 'average', [1.0]),
|
||||
([100], 'min', [1.0]),
|
||||
([100], 'max', [1.0]),
|
||||
([100], 'dense', [1.0]),
|
||||
([100], 'ordinal', [1.0]),
|
||||
#
|
||||
([100, 100, 100], 'average', [2.0, 2.0, 2.0]),
|
||||
([100, 100, 100], 'min', [1.0, 1.0, 1.0]),
|
||||
([100, 100, 100], 'max', [3.0, 3.0, 3.0]),
|
||||
([100, 100, 100], 'dense', [1.0, 1.0, 1.0]),
|
||||
([100, 100, 100], 'ordinal', [1.0, 2.0, 3.0]),
|
||||
#
|
||||
([100, 300, 200], 'average', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'min', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'max', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'dense', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'ordinal', [1.0, 3.0, 2.0]),
|
||||
#
|
||||
([100, 200, 300, 200], 'average', [1.0, 2.5, 4.0, 2.5]),
|
||||
([100, 200, 300, 200], 'min', [1.0, 2.0, 4.0, 2.0]),
|
||||
([100, 200, 300, 200], 'max', [1.0, 3.0, 4.0, 3.0]),
|
||||
([100, 200, 300, 200], 'dense', [1.0, 2.0, 3.0, 2.0]),
|
||||
([100, 200, 300, 200], 'ordinal', [1.0, 2.0, 4.0, 3.0]),
|
||||
#
|
||||
([100, 200, 300, 200, 100], 'average', [1.5, 3.5, 5.0, 3.5, 1.5]),
|
||||
([100, 200, 300, 200, 100], 'min', [1.0, 3.0, 5.0, 3.0, 1.0]),
|
||||
([100, 200, 300, 200, 100], 'max', [2.0, 4.0, 5.0, 4.0, 2.0]),
|
||||
([100, 200, 300, 200, 100], 'dense', [1.0, 2.0, 3.0, 2.0, 1.0]),
|
||||
([100, 200, 300, 200, 100], 'ordinal', [1.0, 3.0, 5.0, 4.0, 2.0]),
|
||||
#
|
||||
([10] * 30, 'ordinal', np.arange(1.0, 31.0)),
|
||||
)
|
||||
|
||||
|
||||
def test_cases():
|
||||
for values, method, expected in _cases:
|
||||
r = rankdata(values, method=method)
|
||||
assert_array_equal(r, expected)
|
||||
95
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_relative_risk.py
vendored
Normal file
95
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_relative_risk.py
vendored
Normal file
@@ -0,0 +1,95 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
from scipy.stats.contingency import relative_risk
|
||||
|
||||
|
||||
# Test just the calculation of the relative risk, including edge
|
||||
# cases that result in a relative risk of 0, inf or nan.
|
||||
@pytest.mark.parametrize(
|
||||
'exposed_cases, exposed_total, control_cases, control_total, expected_rr',
|
||||
[(1, 4, 3, 8, 0.25 / 0.375),
|
||||
(0, 10, 5, 20, 0),
|
||||
(0, 10, 0, 20, np.nan),
|
||||
(5, 15, 0, 20, np.inf)]
|
||||
)
|
||||
def test_relative_risk(exposed_cases, exposed_total,
|
||||
control_cases, control_total, expected_rr):
|
||||
result = relative_risk(exposed_cases, exposed_total,
|
||||
control_cases, control_total)
|
||||
assert_allclose(result.relative_risk, expected_rr, rtol=1e-13)
|
||||
|
||||
|
||||
def test_relative_risk_confidence_interval():
|
||||
result = relative_risk(exposed_cases=16, exposed_total=128,
|
||||
control_cases=24, control_total=256)
|
||||
rr = result.relative_risk
|
||||
ci = result.confidence_interval(confidence_level=0.95)
|
||||
# The corresponding calculation in R using the epitools package.
|
||||
#
|
||||
# > library(epitools)
|
||||
# > c <- matrix(c(232, 112, 24, 16), nrow=2)
|
||||
# > result <- riskratio(c)
|
||||
# > result$measure
|
||||
# risk ratio with 95% C.I.
|
||||
# Predictor estimate lower upper
|
||||
# Exposed1 1.000000 NA NA
|
||||
# Exposed2 1.333333 0.7347317 2.419628
|
||||
#
|
||||
# The last line is the result that we want.
|
||||
assert_allclose(rr, 4/3)
|
||||
assert_allclose((ci.low, ci.high), (0.7347317, 2.419628), rtol=5e-7)
|
||||
|
||||
|
||||
def test_relative_risk_ci_conflevel0():
|
||||
result = relative_risk(exposed_cases=4, exposed_total=12,
|
||||
control_cases=5, control_total=30)
|
||||
rr = result.relative_risk
|
||||
assert_allclose(rr, 2.0, rtol=1e-14)
|
||||
ci = result.confidence_interval(0)
|
||||
assert_allclose((ci.low, ci.high), (2.0, 2.0), rtol=1e-12)
|
||||
|
||||
|
||||
def test_relative_risk_ci_conflevel1():
|
||||
result = relative_risk(exposed_cases=4, exposed_total=12,
|
||||
control_cases=5, control_total=30)
|
||||
ci = result.confidence_interval(1)
|
||||
assert_equal((ci.low, ci.high), (0, np.inf))
|
||||
|
||||
|
||||
def test_relative_risk_ci_edge_cases_00():
|
||||
result = relative_risk(exposed_cases=0, exposed_total=12,
|
||||
control_cases=0, control_total=30)
|
||||
assert_equal(result.relative_risk, np.nan)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (np.nan, np.nan))
|
||||
|
||||
|
||||
def test_relative_risk_ci_edge_cases_01():
|
||||
result = relative_risk(exposed_cases=0, exposed_total=12,
|
||||
control_cases=1, control_total=30)
|
||||
assert_equal(result.relative_risk, 0)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (0.0, np.nan))
|
||||
|
||||
|
||||
def test_relative_risk_ci_edge_cases_10():
|
||||
result = relative_risk(exposed_cases=1, exposed_total=12,
|
||||
control_cases=0, control_total=30)
|
||||
assert_equal(result.relative_risk, np.inf)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (np.nan, np.inf))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ec, et, cc, ct', [(0, 0, 10, 20),
|
||||
(-1, 10, 1, 5),
|
||||
(1, 10, 0, 0),
|
||||
(1, 10, -1, 4)])
|
||||
def test_relative_risk_bad_value(ec, et, cc, ct):
|
||||
with pytest.raises(ValueError, match="must be an integer not less than"):
|
||||
relative_risk(ec, et, cc, ct)
|
||||
|
||||
|
||||
def test_relative_risk_bad_type():
|
||||
with pytest.raises(TypeError, match="must be an integer"):
|
||||
relative_risk(1, 10, 2.0, 40)
|
||||
2025
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_resampling.py
vendored
Normal file
2025
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_resampling.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1447
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_sampling.py
vendored
Normal file
1447
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_sampling.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
301
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_sensitivity_analysis.py
vendored
Normal file
301
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_sensitivity_analysis.py
vendored
Normal file
@@ -0,0 +1,301 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_array_less
|
||||
import pytest
|
||||
|
||||
from scipy import stats
|
||||
from scipy.stats import sobol_indices
|
||||
from scipy.stats._resampling import BootstrapResult
|
||||
from scipy.stats._sensitivity_analysis import (
|
||||
BootstrapSobolResult, f_ishigami, sample_AB, sample_A_B
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def ishigami_ref_indices():
|
||||
"""Reference values for Ishigami from Saltelli2007.
|
||||
|
||||
Chapter 4, exercise 5 pages 179-182.
|
||||
"""
|
||||
a = 7.
|
||||
b = 0.1
|
||||
|
||||
var = 0.5 + a**2/8 + b*np.pi**4/5 + b**2*np.pi**8/18
|
||||
v1 = 0.5 + b*np.pi**4/5 + b**2*np.pi**8/50
|
||||
v2 = a**2/8
|
||||
v3 = 0
|
||||
v12 = 0
|
||||
# v13: mistake in the book, see other derivations e.g. in 10.1002/nme.4856
|
||||
v13 = b**2*np.pi**8*8/225
|
||||
v23 = 0
|
||||
|
||||
s_first = np.array([v1, v2, v3])/var
|
||||
s_second = np.array([
|
||||
[0., 0., v13],
|
||||
[v12, 0., v23],
|
||||
[v13, v23, 0.]
|
||||
])/var
|
||||
s_total = s_first + s_second.sum(axis=1)
|
||||
|
||||
return s_first, s_total
|
||||
|
||||
|
||||
def f_ishigami_vec(x):
|
||||
"""Output of shape (2, n)."""
|
||||
res = f_ishigami(x)
|
||||
return res, res
|
||||
|
||||
|
||||
class TestSobolIndices:
|
||||
|
||||
dists = [
|
||||
stats.uniform(loc=-np.pi, scale=2*np.pi) # type: ignore[attr-defined]
|
||||
] * 3
|
||||
|
||||
def test_sample_AB(self):
|
||||
# (d, n)
|
||||
A = np.array(
|
||||
[[1, 4, 7, 10],
|
||||
[2, 5, 8, 11],
|
||||
[3, 6, 9, 12]]
|
||||
)
|
||||
B = A + 100
|
||||
# (d, d, n)
|
||||
ref = np.array(
|
||||
[[[101, 104, 107, 110],
|
||||
[2, 5, 8, 11],
|
||||
[3, 6, 9, 12]],
|
||||
[[1, 4, 7, 10],
|
||||
[102, 105, 108, 111],
|
||||
[3, 6, 9, 12]],
|
||||
[[1, 4, 7, 10],
|
||||
[2, 5, 8, 11],
|
||||
[103, 106, 109, 112]]]
|
||||
)
|
||||
AB = sample_AB(A=A, B=B)
|
||||
assert_allclose(AB, ref)
|
||||
|
||||
@pytest.mark.xslow
|
||||
@pytest.mark.xfail_on_32bit("Can't create large array for test")
|
||||
@pytest.mark.parametrize(
|
||||
'func',
|
||||
[f_ishigami, pytest.param(f_ishigami_vec, marks=pytest.mark.slow)],
|
||||
ids=['scalar', 'vector']
|
||||
)
|
||||
def test_ishigami(self, ishigami_ref_indices, func):
|
||||
rng = np.random.default_rng(28631265345463262246170309650372465332)
|
||||
res = sobol_indices(
|
||||
func=func, n=4096,
|
||||
dists=self.dists,
|
||||
random_state=rng
|
||||
)
|
||||
|
||||
if func.__name__ == 'f_ishigami_vec':
|
||||
ishigami_ref_indices = [
|
||||
[ishigami_ref_indices[0], ishigami_ref_indices[0]],
|
||||
[ishigami_ref_indices[1], ishigami_ref_indices[1]]
|
||||
]
|
||||
|
||||
assert_allclose(res.first_order, ishigami_ref_indices[0], atol=1e-2)
|
||||
assert_allclose(res.total_order, ishigami_ref_indices[1], atol=1e-2)
|
||||
|
||||
assert res._bootstrap_result is None
|
||||
bootstrap_res = res.bootstrap(n_resamples=99)
|
||||
assert isinstance(bootstrap_res, BootstrapSobolResult)
|
||||
assert isinstance(res._bootstrap_result, BootstrapResult)
|
||||
|
||||
assert res._bootstrap_result.confidence_interval.low.shape[0] == 2
|
||||
assert res._bootstrap_result.confidence_interval.low[1].shape \
|
||||
== res.first_order.shape
|
||||
|
||||
assert bootstrap_res.first_order.confidence_interval.low.shape \
|
||||
== res.first_order.shape
|
||||
assert bootstrap_res.total_order.confidence_interval.low.shape \
|
||||
== res.total_order.shape
|
||||
|
||||
assert_array_less(
|
||||
bootstrap_res.first_order.confidence_interval.low, res.first_order
|
||||
)
|
||||
assert_array_less(
|
||||
res.first_order, bootstrap_res.first_order.confidence_interval.high
|
||||
)
|
||||
assert_array_less(
|
||||
bootstrap_res.total_order.confidence_interval.low, res.total_order
|
||||
)
|
||||
assert_array_less(
|
||||
res.total_order, bootstrap_res.total_order.confidence_interval.high
|
||||
)
|
||||
|
||||
# call again to use previous results and change a param
|
||||
assert isinstance(
|
||||
res.bootstrap(confidence_level=0.9, n_resamples=99),
|
||||
BootstrapSobolResult
|
||||
)
|
||||
assert isinstance(res._bootstrap_result, BootstrapResult)
|
||||
|
||||
def test_func_dict(self, ishigami_ref_indices):
|
||||
rng = np.random.default_rng(28631265345463262246170309650372465332)
|
||||
n = 4096
|
||||
dists = [
|
||||
stats.uniform(loc=-np.pi, scale=2*np.pi),
|
||||
stats.uniform(loc=-np.pi, scale=2*np.pi),
|
||||
stats.uniform(loc=-np.pi, scale=2*np.pi)
|
||||
]
|
||||
|
||||
A, B = sample_A_B(n=n, dists=dists, random_state=rng)
|
||||
AB = sample_AB(A=A, B=B)
|
||||
|
||||
func = {
|
||||
'f_A': f_ishigami(A).reshape(1, -1),
|
||||
'f_B': f_ishigami(B).reshape(1, -1),
|
||||
'f_AB': f_ishigami(AB).reshape((3, 1, -1))
|
||||
}
|
||||
|
||||
res = sobol_indices(
|
||||
func=func, n=n,
|
||||
dists=dists,
|
||||
random_state=rng
|
||||
)
|
||||
assert_allclose(res.first_order, ishigami_ref_indices[0], atol=1e-2)
|
||||
|
||||
res = sobol_indices(
|
||||
func=func, n=n,
|
||||
random_state=rng
|
||||
)
|
||||
assert_allclose(res.first_order, ishigami_ref_indices[0], atol=1e-2)
|
||||
|
||||
def test_method(self, ishigami_ref_indices):
|
||||
def jansen_sobol(f_A, f_B, f_AB):
|
||||
"""Jansen for S and Sobol' for St.
|
||||
|
||||
From Saltelli2010, table 2 formulations (c) and (e)."""
|
||||
var = np.var([f_A, f_B], axis=(0, -1))
|
||||
|
||||
s = (var - 0.5*np.mean((f_B - f_AB)**2, axis=-1)) / var
|
||||
st = np.mean(f_A*(f_A - f_AB), axis=-1) / var
|
||||
|
||||
return s.T, st.T
|
||||
|
||||
rng = np.random.default_rng(28631265345463262246170309650372465332)
|
||||
res = sobol_indices(
|
||||
func=f_ishigami, n=4096,
|
||||
dists=self.dists,
|
||||
method=jansen_sobol,
|
||||
random_state=rng
|
||||
)
|
||||
|
||||
assert_allclose(res.first_order, ishigami_ref_indices[0], atol=1e-2)
|
||||
assert_allclose(res.total_order, ishigami_ref_indices[1], atol=1e-2)
|
||||
|
||||
def jansen_sobol_typed(
|
||||
f_A: np.ndarray, f_B: np.ndarray, f_AB: np.ndarray
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
return jansen_sobol(f_A, f_B, f_AB)
|
||||
|
||||
_ = sobol_indices(
|
||||
func=f_ishigami, n=8,
|
||||
dists=self.dists,
|
||||
method=jansen_sobol_typed,
|
||||
random_state=rng
|
||||
)
|
||||
|
||||
def test_normalization(self, ishigami_ref_indices):
|
||||
rng = np.random.default_rng(28631265345463262246170309650372465332)
|
||||
res = sobol_indices(
|
||||
func=lambda x: f_ishigami(x) + 1000, n=4096,
|
||||
dists=self.dists,
|
||||
random_state=rng
|
||||
)
|
||||
|
||||
assert_allclose(res.first_order, ishigami_ref_indices[0], atol=1e-2)
|
||||
assert_allclose(res.total_order, ishigami_ref_indices[1], atol=1e-2)
|
||||
|
||||
def test_constant_function(self, ishigami_ref_indices):
|
||||
|
||||
def f_ishigami_vec_const(x):
|
||||
"""Output of shape (3, n)."""
|
||||
res = f_ishigami(x)
|
||||
return res, res * 0 + 10, res
|
||||
|
||||
rng = np.random.default_rng(28631265345463262246170309650372465332)
|
||||
res = sobol_indices(
|
||||
func=f_ishigami_vec_const, n=4096,
|
||||
dists=self.dists,
|
||||
random_state=rng
|
||||
)
|
||||
|
||||
ishigami_vec_indices = [
|
||||
[ishigami_ref_indices[0], [0, 0, 0], ishigami_ref_indices[0]],
|
||||
[ishigami_ref_indices[1], [0, 0, 0], ishigami_ref_indices[1]]
|
||||
]
|
||||
|
||||
assert_allclose(res.first_order, ishigami_vec_indices[0], atol=1e-2)
|
||||
assert_allclose(res.total_order, ishigami_vec_indices[1], atol=1e-2)
|
||||
|
||||
@pytest.mark.xfail_on_32bit("Can't create large array for test")
|
||||
def test_more_converged(self, ishigami_ref_indices):
|
||||
rng = np.random.default_rng(28631265345463262246170309650372465332)
|
||||
res = sobol_indices(
|
||||
func=f_ishigami, n=2**19, # 524288
|
||||
dists=self.dists,
|
||||
random_state=rng
|
||||
)
|
||||
|
||||
assert_allclose(res.first_order, ishigami_ref_indices[0], atol=1e-4)
|
||||
assert_allclose(res.total_order, ishigami_ref_indices[1], atol=1e-4)
|
||||
|
||||
def test_raises(self):
|
||||
|
||||
message = r"Each distribution in `dists` must have method `ppf`"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=0, func=f_ishigami, dists="uniform")
|
||||
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=0, func=f_ishigami, dists=[lambda x: x])
|
||||
|
||||
message = r"The balance properties of Sobol'"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=7, func=f_ishigami, dists=[stats.uniform()])
|
||||
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=4.1, func=f_ishigami, dists=[stats.uniform()])
|
||||
|
||||
message = r"'toto' is not a valid 'method'"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=0, func=f_ishigami, method='toto')
|
||||
|
||||
message = r"must have the following signature"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=0, func=f_ishigami, method=lambda x: x)
|
||||
|
||||
message = r"'dists' must be defined when 'func' is a callable"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=0, func=f_ishigami)
|
||||
|
||||
def func_wrong_shape_output(x):
|
||||
return x.reshape(-1, 1)
|
||||
|
||||
message = r"'func' output should have a shape"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(
|
||||
n=2, func=func_wrong_shape_output, dists=[stats.uniform()]
|
||||
)
|
||||
|
||||
message = r"When 'func' is a dictionary"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(
|
||||
n=2, func={'f_A': [], 'f_AB': []}, dists=[stats.uniform()]
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=message):
|
||||
# f_B malformed
|
||||
sobol_indices(
|
||||
n=2,
|
||||
func={'f_A': [1, 2], 'f_B': [3], 'f_AB': [5, 6, 7, 8]},
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=message):
|
||||
# f_AB malformed
|
||||
sobol_indices(
|
||||
n=2,
|
||||
func={'f_A': [1, 2], 'f_B': [3, 4], 'f_AB': [5, 6, 7]},
|
||||
)
|
||||
9039
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_stats.py
vendored
Normal file
9039
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_stats.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
470
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_survival.py
vendored
Normal file
470
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_survival.py
vendored
Normal file
@@ -0,0 +1,470 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_allclose
|
||||
from scipy import stats
|
||||
from scipy.stats import _survival
|
||||
|
||||
|
||||
def _kaplan_meier_reference(times, censored):
|
||||
# This is a very straightforward implementation of the Kaplan-Meier
|
||||
# estimator that does almost everything differently from the implementation
|
||||
# in stats.ecdf.
|
||||
|
||||
# Begin by sorting the raw data. Note that the order of death and loss
|
||||
# at a given time matters: death happens first. See [2] page 461:
|
||||
# "These conventions may be paraphrased by saying that deaths recorded as
|
||||
# of an age t are treated as if they occurred slightly before t, and losses
|
||||
# recorded as of an age t are treated as occurring slightly after t."
|
||||
# We implement this by sorting the data first by time, then by `censored`,
|
||||
# (which is 0 when there is a death and 1 when there is only a loss).
|
||||
dtype = [('time', float), ('censored', int)]
|
||||
data = np.array([(t, d) for t, d in zip(times, censored)], dtype=dtype)
|
||||
data = np.sort(data, order=('time', 'censored'))
|
||||
times = data['time']
|
||||
died = np.logical_not(data['censored'])
|
||||
|
||||
m = times.size
|
||||
n = np.arange(m, 0, -1) # number at risk
|
||||
sf = np.cumprod((n - died) / n)
|
||||
|
||||
# Find the indices of the *last* occurrence of unique times. The
|
||||
# corresponding entries of `times` and `sf` are what we want.
|
||||
_, indices = np.unique(times[::-1], return_index=True)
|
||||
ref_times = times[-indices - 1]
|
||||
ref_sf = sf[-indices - 1]
|
||||
return ref_times, ref_sf
|
||||
|
||||
|
||||
class TestSurvival:
|
||||
|
||||
@staticmethod
|
||||
def get_random_sample(rng, n_unique):
|
||||
# generate random sample
|
||||
unique_times = rng.random(n_unique)
|
||||
# convert to `np.int32` to resolve `np.repeat` failure in 32-bit CI
|
||||
repeats = rng.integers(1, 4, n_unique).astype(np.int32)
|
||||
times = rng.permuted(np.repeat(unique_times, repeats))
|
||||
censored = rng.random(size=times.size) > rng.random()
|
||||
sample = stats.CensoredData.right_censored(times, censored)
|
||||
return sample, times, censored
|
||||
|
||||
def test_input_validation(self):
|
||||
message = '`sample` must be a one-dimensional sequence.'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.ecdf([[1]])
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.ecdf(1)
|
||||
|
||||
message = '`sample` must not contain nan'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.ecdf([np.nan])
|
||||
|
||||
message = 'Currently, only uncensored and right-censored data...'
|
||||
with pytest.raises(NotImplementedError, match=message):
|
||||
stats.ecdf(stats.CensoredData.left_censored([1], censored=[True]))
|
||||
|
||||
message = 'method` must be one of...'
|
||||
res = stats.ecdf([1, 2, 3])
|
||||
with pytest.raises(ValueError, match=message):
|
||||
res.cdf.confidence_interval(method='ekki-ekki')
|
||||
with pytest.raises(ValueError, match=message):
|
||||
res.sf.confidence_interval(method='shrubbery')
|
||||
|
||||
message = 'confidence_level` must be a scalar between 0 and 1'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
res.cdf.confidence_interval(-1)
|
||||
with pytest.raises(ValueError, match=message):
|
||||
res.sf.confidence_interval([0.5, 0.6])
|
||||
|
||||
message = 'The confidence interval is undefined at some observations.'
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
ci = res.cdf.confidence_interval()
|
||||
|
||||
message = 'Confidence interval bounds do not implement...'
|
||||
with pytest.raises(NotImplementedError, match=message):
|
||||
ci.low.confidence_interval()
|
||||
with pytest.raises(NotImplementedError, match=message):
|
||||
ci.high.confidence_interval()
|
||||
|
||||
def test_edge_cases(self):
|
||||
res = stats.ecdf([])
|
||||
assert_equal(res.cdf.quantiles, [])
|
||||
assert_equal(res.cdf.probabilities, [])
|
||||
|
||||
res = stats.ecdf([1])
|
||||
assert_equal(res.cdf.quantiles, [1])
|
||||
assert_equal(res.cdf.probabilities, [1])
|
||||
|
||||
def test_unique(self):
|
||||
# Example with unique observations; `stats.ecdf` ref. [1] page 80
|
||||
sample = [6.23, 5.58, 7.06, 6.42, 5.20]
|
||||
res = stats.ecdf(sample)
|
||||
ref_x = np.sort(np.unique(sample))
|
||||
ref_cdf = np.arange(1, 6) / 5
|
||||
ref_sf = 1 - ref_cdf
|
||||
assert_equal(res.cdf.quantiles, ref_x)
|
||||
assert_equal(res.cdf.probabilities, ref_cdf)
|
||||
assert_equal(res.sf.quantiles, ref_x)
|
||||
assert_equal(res.sf.probabilities, ref_sf)
|
||||
|
||||
def test_nonunique(self):
|
||||
# Example with non-unique observations; `stats.ecdf` ref. [1] page 82
|
||||
sample = [0, 2, 1, 2, 3, 4]
|
||||
res = stats.ecdf(sample)
|
||||
ref_x = np.sort(np.unique(sample))
|
||||
ref_cdf = np.array([1/6, 2/6, 4/6, 5/6, 1])
|
||||
ref_sf = 1 - ref_cdf
|
||||
assert_equal(res.cdf.quantiles, ref_x)
|
||||
assert_equal(res.cdf.probabilities, ref_cdf)
|
||||
assert_equal(res.sf.quantiles, ref_x)
|
||||
assert_equal(res.sf.probabilities, ref_sf)
|
||||
|
||||
def test_evaluate_methods(self):
|
||||
# Test CDF and SF `evaluate` methods
|
||||
rng = np.random.default_rng(1162729143302572461)
|
||||
sample, _, _ = self.get_random_sample(rng, 15)
|
||||
res = stats.ecdf(sample)
|
||||
x = res.cdf.quantiles
|
||||
xr = x + np.diff(x, append=x[-1]+1)/2 # right shifted points
|
||||
|
||||
assert_equal(res.cdf.evaluate(x), res.cdf.probabilities)
|
||||
assert_equal(res.cdf.evaluate(xr), res.cdf.probabilities)
|
||||
assert_equal(res.cdf.evaluate(x[0]-1), 0) # CDF starts at 0
|
||||
assert_equal(res.cdf.evaluate([-np.inf, np.inf]), [0, 1])
|
||||
|
||||
assert_equal(res.sf.evaluate(x), res.sf.probabilities)
|
||||
assert_equal(res.sf.evaluate(xr), res.sf.probabilities)
|
||||
assert_equal(res.sf.evaluate(x[0]-1), 1) # SF starts at 1
|
||||
assert_equal(res.sf.evaluate([-np.inf, np.inf]), [1, 0])
|
||||
|
||||
# ref. [1] page 91
|
||||
t1 = [37, 43, 47, 56, 60, 62, 71, 77, 80, 81] # times
|
||||
d1 = [0, 0, 1, 1, 0, 0, 0, 1, 1, 1] # 1 means deaths (not censored)
|
||||
r1 = [1, 1, 0.875, 0.75, 0.75, 0.75, 0.75, 0.5, 0.25, 0] # reference SF
|
||||
|
||||
# https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_survival/BS704_Survival5.html
|
||||
t2 = [8, 12, 26, 14, 21, 27, 8, 32, 20, 40]
|
||||
d2 = [1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
|
||||
r2 = [0.9, 0.788, 0.675, 0.675, 0.54, 0.405, 0.27, 0.27, 0.27]
|
||||
t3 = [33, 28, 41, 48, 48, 25, 37, 48, 25, 43]
|
||||
d3 = [1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
|
||||
r3 = [1, 0.875, 0.75, 0.75, 0.6, 0.6, 0.6]
|
||||
|
||||
# https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_survival/bs704_survival4.html
|
||||
t4 = [24, 3, 11, 19, 24, 13, 14, 2, 18, 17,
|
||||
24, 21, 12, 1, 10, 23, 6, 5, 9, 17]
|
||||
d4 = [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1]
|
||||
r4 = [0.95, 0.95, 0.897, 0.844, 0.844, 0.844, 0.844, 0.844, 0.844,
|
||||
0.844, 0.76, 0.676, 0.676, 0.676, 0.676, 0.507, 0.507]
|
||||
|
||||
# https://www.real-statistics.com/survival-analysis/kaplan-meier-procedure/confidence-interval-for-the-survival-function/
|
||||
t5 = [3, 5, 8, 10, 5, 5, 8, 12, 15, 14, 2, 11, 10, 9, 12, 5, 8, 11]
|
||||
d5 = [1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1]
|
||||
r5 = [0.944, 0.889, 0.722, 0.542, 0.542, 0.542, 0.361, 0.181, 0.181, 0.181]
|
||||
|
||||
@pytest.mark.parametrize("case", [(t1, d1, r1), (t2, d2, r2), (t3, d3, r3),
|
||||
(t4, d4, r4), (t5, d5, r5)])
|
||||
def test_right_censored_against_examples(self, case):
|
||||
# test `ecdf` against other implementations on example problems
|
||||
times, died, ref = case
|
||||
sample = stats.CensoredData.right_censored(times, np.logical_not(died))
|
||||
res = stats.ecdf(sample)
|
||||
assert_allclose(res.sf.probabilities, ref, atol=1e-3)
|
||||
assert_equal(res.sf.quantiles, np.sort(np.unique(times)))
|
||||
|
||||
# test reference implementation against other implementations
|
||||
res = _kaplan_meier_reference(times, np.logical_not(died))
|
||||
assert_equal(res[0], np.sort(np.unique(times)))
|
||||
assert_allclose(res[1], ref, atol=1e-3)
|
||||
|
||||
@pytest.mark.parametrize('seed', [182746786639392128, 737379171436494115,
|
||||
576033618403180168, 308115465002673650])
|
||||
def test_right_censored_against_reference_implementation(self, seed):
|
||||
# test `ecdf` against reference implementation on random problems
|
||||
rng = np.random.default_rng(seed)
|
||||
n_unique = rng.integers(10, 100)
|
||||
sample, times, censored = self.get_random_sample(rng, n_unique)
|
||||
res = stats.ecdf(sample)
|
||||
ref = _kaplan_meier_reference(times, censored)
|
||||
assert_allclose(res.sf.quantiles, ref[0])
|
||||
assert_allclose(res.sf.probabilities, ref[1])
|
||||
|
||||
# If all observations are uncensored, the KM estimate should match
|
||||
# the usual estimate for uncensored data
|
||||
sample = stats.CensoredData(uncensored=times)
|
||||
res = _survival._ecdf_right_censored(sample) # force Kaplan-Meier
|
||||
ref = stats.ecdf(times)
|
||||
assert_equal(res[0], ref.sf.quantiles)
|
||||
assert_allclose(res[1], ref.cdf.probabilities, rtol=1e-14)
|
||||
assert_allclose(res[2], ref.sf.probabilities, rtol=1e-14)
|
||||
|
||||
def test_right_censored_ci(self):
|
||||
# test "greenwood" confidence interval against example 4 (URL above).
|
||||
times, died = self.t4, self.d4
|
||||
sample = stats.CensoredData.right_censored(times, np.logical_not(died))
|
||||
res = stats.ecdf(sample)
|
||||
ref_allowance = [0.096, 0.096, 0.135, 0.162, 0.162, 0.162, 0.162,
|
||||
0.162, 0.162, 0.162, 0.214, 0.246, 0.246, 0.246,
|
||||
0.246, 0.341, 0.341]
|
||||
|
||||
sf_ci = res.sf.confidence_interval()
|
||||
cdf_ci = res.cdf.confidence_interval()
|
||||
allowance = res.sf.probabilities - sf_ci.low.probabilities
|
||||
|
||||
assert_allclose(allowance, ref_allowance, atol=1e-3)
|
||||
assert_allclose(sf_ci.low.probabilities,
|
||||
np.clip(res.sf.probabilities - allowance, 0, 1))
|
||||
assert_allclose(sf_ci.high.probabilities,
|
||||
np.clip(res.sf.probabilities + allowance, 0, 1))
|
||||
assert_allclose(cdf_ci.low.probabilities,
|
||||
np.clip(res.cdf.probabilities - allowance, 0, 1))
|
||||
assert_allclose(cdf_ci.high.probabilities,
|
||||
np.clip(res.cdf.probabilities + allowance, 0, 1))
|
||||
|
||||
# test "log-log" confidence interval against Mathematica
|
||||
# e = {24, 3, 11, 19, 24, 13, 14, 2, 18, 17, 24, 21, 12, 1, 10, 23, 6, 5,
|
||||
# 9, 17}
|
||||
# ci = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0}
|
||||
# R = EventData[e, ci]
|
||||
# S = SurvivalModelFit[R]
|
||||
# S["PointwiseIntervals", ConfidenceLevel->0.95,
|
||||
# ConfidenceTransform->"LogLog"]
|
||||
|
||||
ref_low = [0.694743, 0.694743, 0.647529, 0.591142, 0.591142, 0.591142,
|
||||
0.591142, 0.591142, 0.591142, 0.591142, 0.464605, 0.370359,
|
||||
0.370359, 0.370359, 0.370359, 0.160489, 0.160489]
|
||||
ref_high = [0.992802, 0.992802, 0.973299, 0.947073, 0.947073, 0.947073,
|
||||
0.947073, 0.947073, 0.947073, 0.947073, 0.906422, 0.856521,
|
||||
0.856521, 0.856521, 0.856521, 0.776724, 0.776724]
|
||||
sf_ci = res.sf.confidence_interval(method='log-log')
|
||||
assert_allclose(sf_ci.low.probabilities, ref_low, atol=1e-6)
|
||||
assert_allclose(sf_ci.high.probabilities, ref_high, atol=1e-6)
|
||||
|
||||
def test_right_censored_ci_example_5(self):
|
||||
# test "exponential greenwood" confidence interval against example 5
|
||||
times, died = self.t5, self.d5
|
||||
sample = stats.CensoredData.right_censored(times, np.logical_not(died))
|
||||
res = stats.ecdf(sample)
|
||||
lower = np.array([0.66639, 0.624174, 0.456179, 0.287822, 0.287822,
|
||||
0.287822, 0.128489, 0.030957, 0.030957, 0.030957])
|
||||
upper = np.array([0.991983, 0.970995, 0.87378, 0.739467, 0.739467,
|
||||
0.739467, 0.603133, 0.430365, 0.430365, 0.430365])
|
||||
|
||||
sf_ci = res.sf.confidence_interval(method='log-log')
|
||||
cdf_ci = res.cdf.confidence_interval(method='log-log')
|
||||
|
||||
assert_allclose(sf_ci.low.probabilities, lower, atol=1e-5)
|
||||
assert_allclose(sf_ci.high.probabilities, upper, atol=1e-5)
|
||||
assert_allclose(cdf_ci.low.probabilities, 1-upper, atol=1e-5)
|
||||
assert_allclose(cdf_ci.high.probabilities, 1-lower, atol=1e-5)
|
||||
|
||||
# Test against R's `survival` library `survfit` function, 90%CI
|
||||
# library(survival)
|
||||
# options(digits=16)
|
||||
# time = c(3, 5, 8, 10, 5, 5, 8, 12, 15, 14, 2, 11, 10, 9, 12, 5, 8, 11)
|
||||
# status = c(1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1)
|
||||
# res = survfit(Surv(time, status)
|
||||
# ~1, conf.type = "log-log", conf.int = 0.90)
|
||||
# res$time; res$lower; res$upper
|
||||
low = [0.74366748406861172, 0.68582332289196246, 0.50596835651480121,
|
||||
0.32913131413336727, 0.32913131413336727, 0.32913131413336727,
|
||||
0.15986912028781664, 0.04499539918147757, 0.04499539918147757,
|
||||
0.04499539918147757]
|
||||
high = [0.9890291867238429, 0.9638835422144144, 0.8560366823086629,
|
||||
0.7130167643978450, 0.7130167643978450, 0.7130167643978450,
|
||||
0.5678602982997164, 0.3887616766886558, 0.3887616766886558,
|
||||
0.3887616766886558]
|
||||
sf_ci = res.sf.confidence_interval(method='log-log',
|
||||
confidence_level=0.9)
|
||||
assert_allclose(sf_ci.low.probabilities, low)
|
||||
assert_allclose(sf_ci.high.probabilities, high)
|
||||
|
||||
# And with conf.type = "plain"
|
||||
low = [0.8556383113628162, 0.7670478794850761, 0.5485720663578469,
|
||||
0.3441515412527123, 0.3441515412527123, 0.3441515412527123,
|
||||
0.1449184105424544, 0., 0., 0.]
|
||||
high = [1., 1., 0.8958723780865975, 0.7391817920806210,
|
||||
0.7391817920806210, 0.7391817920806210, 0.5773038116797676,
|
||||
0.3642270254596720, 0.3642270254596720, 0.3642270254596720]
|
||||
sf_ci = res.sf.confidence_interval(confidence_level=0.9)
|
||||
assert_allclose(sf_ci.low.probabilities, low)
|
||||
assert_allclose(sf_ci.high.probabilities, high)
|
||||
|
||||
def test_right_censored_ci_nans(self):
|
||||
# test `ecdf` confidence interval on a problem that results in NaNs
|
||||
times, died = self.t1, self.d1
|
||||
sample = stats.CensoredData.right_censored(times, np.logical_not(died))
|
||||
res = stats.ecdf(sample)
|
||||
|
||||
# Reference values generated with Matlab
|
||||
# format long
|
||||
# t = [37 43 47 56 60 62 71 77 80 81];
|
||||
# d = [0 0 1 1 0 0 0 1 1 1];
|
||||
# censored = ~d1;
|
||||
# [f, x, flo, fup] = ecdf(t, 'Censoring', censored, 'Alpha', 0.05);
|
||||
x = [37, 47, 56, 77, 80, 81]
|
||||
flo = [np.nan, 0, 0, 0.052701464070711, 0.337611126231790, np.nan]
|
||||
fup = [np.nan, 0.35417230377, 0.5500569798, 0.9472985359, 1.0, np.nan]
|
||||
i = np.searchsorted(res.cdf.quantiles, x)
|
||||
|
||||
message = "The confidence interval is undefined at some observations"
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
ci = res.cdf.confidence_interval()
|
||||
|
||||
# Matlab gives NaN as the first element of the CIs. Mathematica agrees,
|
||||
# but R's survfit does not. It makes some sense, but it's not what the
|
||||
# formula gives, so skip that element.
|
||||
assert_allclose(ci.low.probabilities[i][1:], flo[1:])
|
||||
assert_allclose(ci.high.probabilities[i][1:], fup[1:])
|
||||
|
||||
# [f, x, flo, fup] = ecdf(t, 'Censoring', censored, 'Function',
|
||||
# 'survivor', 'Alpha', 0.05);
|
||||
flo = [np.nan, 0.64582769623, 0.449943020228, 0.05270146407, 0, np.nan]
|
||||
fup = [np.nan, 1.0, 1.0, 0.947298535929289, 0.662388873768210, np.nan]
|
||||
i = np.searchsorted(res.cdf.quantiles, x)
|
||||
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
ci = res.sf.confidence_interval()
|
||||
|
||||
assert_allclose(ci.low.probabilities[i][1:], flo[1:])
|
||||
assert_allclose(ci.high.probabilities[i][1:], fup[1:])
|
||||
|
||||
# With the same data, R's `survival` library `survfit` function
|
||||
# doesn't produce the leading NaN
|
||||
# library(survival)
|
||||
# options(digits=16)
|
||||
# time = c(37, 43, 47, 56, 60, 62, 71, 77, 80, 81)
|
||||
# status = c(0, 0, 1, 1, 0, 0, 0, 1, 1, 1)
|
||||
# res = survfit(Surv(time, status)
|
||||
# ~1, conf.type = "plain", conf.int = 0.95)
|
||||
# res$time
|
||||
# res$lower
|
||||
# res$upper
|
||||
low = [1., 1., 0.64582769623233816, 0.44994302022779326,
|
||||
0.44994302022779326, 0.44994302022779326, 0.44994302022779326,
|
||||
0.05270146407071086, 0., np.nan]
|
||||
high = [1., 1., 1., 1., 1., 1., 1., 0.9472985359292891,
|
||||
0.6623888737682101, np.nan]
|
||||
assert_allclose(ci.low.probabilities, low)
|
||||
assert_allclose(ci.high.probabilities, high)
|
||||
|
||||
# It does with conf.type="log-log", as do we
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
ci = res.sf.confidence_interval(method='log-log')
|
||||
low = [np.nan, np.nan, 0.38700001403202522, 0.31480711370551911,
|
||||
0.31480711370551911, 0.31480711370551911, 0.31480711370551911,
|
||||
0.08048821148507734, 0.01049958986680601, np.nan]
|
||||
high = [np.nan, np.nan, 0.9813929658789660, 0.9308983170906275,
|
||||
0.9308983170906275, 0.9308983170906275, 0.9308983170906275,
|
||||
0.8263946341076415, 0.6558775085110887, np.nan]
|
||||
assert_allclose(ci.low.probabilities, low)
|
||||
assert_allclose(ci.high.probabilities, high)
|
||||
|
||||
def test_right_censored_against_uncensored(self):
|
||||
rng = np.random.default_rng(7463952748044886637)
|
||||
sample = rng.integers(10, 100, size=1000)
|
||||
censored = np.zeros_like(sample)
|
||||
censored[np.argmax(sample)] = True
|
||||
res = stats.ecdf(sample)
|
||||
ref = stats.ecdf(stats.CensoredData.right_censored(sample, censored))
|
||||
assert_equal(res.sf.quantiles, ref.sf.quantiles)
|
||||
assert_equal(res.sf._n, ref.sf._n)
|
||||
assert_equal(res.sf._d[:-1], ref.sf._d[:-1]) # difference @ [-1]
|
||||
assert_allclose(res.sf._sf[:-1], ref.sf._sf[:-1], rtol=1e-14)
|
||||
|
||||
def test_plot_iv(self):
|
||||
rng = np.random.default_rng(1769658657308472721)
|
||||
n_unique = rng.integers(10, 100)
|
||||
sample, _, _ = self.get_random_sample(rng, n_unique)
|
||||
res = stats.ecdf(sample)
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt # noqa: F401
|
||||
res.sf.plot() # no other errors occur
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
# Avoid trying to call MPL with numpy 2.0-dev, because that fails
|
||||
# too often due to ABI mismatches and is hard to avoid. This test
|
||||
# will work fine again once MPL has done a 2.0-compatible release.
|
||||
if not np.__version__.startswith('2.0.0.dev0'):
|
||||
message = r"matplotlib must be installed to use method `plot`."
|
||||
with pytest.raises(ModuleNotFoundError, match=message):
|
||||
res.sf.plot()
|
||||
|
||||
|
||||
class TestLogRank:
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"x, y, statistic, pvalue",
|
||||
# Results validate with R
|
||||
# library(survival)
|
||||
# options(digits=16)
|
||||
#
|
||||
# futime_1 <- c(8, 12, 26, 14, 21, 27, 8, 32, 20, 40)
|
||||
# fustat_1 <- c(1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
|
||||
# rx_1 <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
#
|
||||
# futime_2 <- c(33, 28, 41, 48, 48, 25, 37, 48, 25, 43)
|
||||
# fustat_2 <- c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
|
||||
# rx_2 <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
|
||||
#
|
||||
# futime <- c(futime_1, futime_2)
|
||||
# fustat <- c(fustat_1, fustat_2)
|
||||
# rx <- c(rx_1, rx_2)
|
||||
#
|
||||
# survdiff(formula = Surv(futime, fustat) ~ rx)
|
||||
#
|
||||
# Also check against another library which handle alternatives
|
||||
# library(nph)
|
||||
# logrank.test(futime, fustat, rx, alternative = "two.sided")
|
||||
# res["test"]
|
||||
[(
|
||||
# https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_survival/BS704_Survival5.html
|
||||
# uncensored, censored
|
||||
[[8, 12, 26, 14, 21, 27], [8, 32, 20, 40]],
|
||||
[[33, 28, 41], [48, 48, 25, 37, 48, 25, 43]],
|
||||
# chi2, ["two-sided", "less", "greater"]
|
||||
6.91598157449,
|
||||
[0.008542873404, 0.9957285632979385, 0.004271436702061537]
|
||||
),
|
||||
(
|
||||
# https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_survival/BS704_Survival5.html
|
||||
[[19, 6, 5, 4], [20, 19, 17, 14]],
|
||||
[[16, 21, 7], [21, 15, 18, 18, 5]],
|
||||
0.835004855038,
|
||||
[0.3608293039, 0.8195853480676912, 0.1804146519323088]
|
||||
),
|
||||
(
|
||||
# Bland, Altman, "The logrank test", BMJ, 2004
|
||||
# https://www.bmj.com/content/328/7447/1073.short
|
||||
[[6, 13, 21, 30, 37, 38, 49, 50, 63, 79, 86, 98, 202, 219],
|
||||
[31, 47, 80, 82, 82, 149]],
|
||||
[[10, 10, 12, 13, 14, 15, 16, 17, 18, 20, 24, 24, 25, 28, 30,
|
||||
33, 35, 37, 40, 40, 46, 48, 76, 81, 82, 91, 112, 181],
|
||||
[34, 40, 70]],
|
||||
7.49659416854,
|
||||
[0.006181578637, 0.003090789318730882, 0.9969092106812691]
|
||||
)]
|
||||
)
|
||||
def test_log_rank(self, x, y, statistic, pvalue):
|
||||
x = stats.CensoredData(uncensored=x[0], right=x[1])
|
||||
y = stats.CensoredData(uncensored=y[0], right=y[1])
|
||||
|
||||
for i, alternative in enumerate(["two-sided", "less", "greater"]):
|
||||
res = stats.logrank(x=x, y=y, alternative=alternative)
|
||||
|
||||
# we return z and use the normal distribution while other framework
|
||||
# return z**2. The p-value are directly comparable, but we have to
|
||||
# square the statistic
|
||||
assert_allclose(res.statistic**2, statistic, atol=1e-10)
|
||||
assert_allclose(res.pvalue, pvalue[i], atol=1e-10)
|
||||
|
||||
def test_raises(self):
|
||||
sample = stats.CensoredData([1, 2])
|
||||
|
||||
msg = r"`y` must be"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
stats.logrank(x=sample, y=[[1, 2]])
|
||||
|
||||
msg = r"`x` must be"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
stats.logrank(x=[[1, 2]], y=sample)
|
||||
85
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_tukeylambda_stats.py
vendored
Normal file
85
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_tukeylambda_stats.py
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
|
||||
from scipy.stats._tukeylambda_stats import (tukeylambda_variance,
|
||||
tukeylambda_kurtosis)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_known_exact():
|
||||
"""Compare results with some known exact formulas."""
|
||||
# Some exact values of the Tukey Lambda variance and kurtosis:
|
||||
# lambda var kurtosis
|
||||
# 0 pi**2/3 6/5 (logistic distribution)
|
||||
# 0.5 4 - pi (5/3 - pi/2)/(pi/4 - 1)**2 - 3
|
||||
# 1 1/3 -6/5 (uniform distribution on (-1,1))
|
||||
# 2 1/12 -6/5 (uniform distribution on (-1/2, 1/2))
|
||||
|
||||
# lambda = 0
|
||||
var = tukeylambda_variance(0)
|
||||
assert_allclose(var, np.pi**2 / 3, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(0)
|
||||
assert_allclose(kurt, 1.2, atol=1e-10)
|
||||
|
||||
# lambda = 0.5
|
||||
var = tukeylambda_variance(0.5)
|
||||
assert_allclose(var, 4 - np.pi, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(0.5)
|
||||
desired = (5./3 - np.pi/2) / (np.pi/4 - 1)**2 - 3
|
||||
assert_allclose(kurt, desired, atol=1e-10)
|
||||
|
||||
# lambda = 1
|
||||
var = tukeylambda_variance(1)
|
||||
assert_allclose(var, 1.0 / 3, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(1)
|
||||
assert_allclose(kurt, -1.2, atol=1e-10)
|
||||
|
||||
# lambda = 2
|
||||
var = tukeylambda_variance(2)
|
||||
assert_allclose(var, 1.0 / 12, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(2)
|
||||
assert_allclose(kurt, -1.2, atol=1e-10)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_mpmath():
|
||||
"""Compare results with some values that were computed using mpmath."""
|
||||
a10 = dict(atol=1e-10, rtol=0)
|
||||
a12 = dict(atol=1e-12, rtol=0)
|
||||
data = [
|
||||
# lambda variance kurtosis
|
||||
[-0.1, 4.78050217874253547, 3.78559520346454510],
|
||||
[-0.0649, 4.16428023599895777, 2.52019675947435718],
|
||||
[-0.05, 3.93672267890775277, 2.13129793057777277],
|
||||
[-0.001, 3.30128380390964882, 1.21452460083542988],
|
||||
[0.001, 3.27850775649572176, 1.18560634779287585],
|
||||
[0.03125, 2.95927803254615800, 0.804487555161819980],
|
||||
[0.05, 2.78281053405464501, 0.611604043886644327],
|
||||
[0.0649, 2.65282386754100551, 0.476834119532774540],
|
||||
[1.2, 0.242153920578588346, -1.23428047169049726],
|
||||
[10.0, 0.00095237579757703597, 2.37810697355144933],
|
||||
[20.0, 0.00012195121951131043, 7.37654321002709531],
|
||||
]
|
||||
|
||||
for lam, var_expected, kurt_expected in data:
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_allclose(var, var_expected, **a12)
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_allclose(kurt, kurt_expected, **a10)
|
||||
|
||||
# Test with vector arguments (most of the other tests are for single
|
||||
# values).
|
||||
lam, var_expected, kurt_expected = zip(*data)
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_allclose(var, var_expected, **a12)
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_allclose(kurt, kurt_expected, **a10)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_invalid():
|
||||
"""Test values of lambda outside the domains of the functions."""
|
||||
lam = [-1.0, -0.5]
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_equal(var, np.array([np.nan, np.inf]))
|
||||
|
||||
lam = [-1.0, -0.25]
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_equal(kurt, np.array([np.nan, np.inf]))
|
||||
213
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_variation.py
vendored
Normal file
213
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_variation.py
vendored
Normal file
@@ -0,0 +1,213 @@
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from numpy.testing import suppress_warnings
|
||||
|
||||
from scipy.stats import variation
|
||||
from scipy._lib._util import AxisError
|
||||
from scipy.conftest import array_api_compatible
|
||||
from scipy._lib._array_api import xp_assert_equal, xp_assert_close, is_numpy
|
||||
from scipy.stats._axis_nan_policy import (too_small_nd_omit, too_small_nd_not_omit,
|
||||
SmallSampleWarning)
|
||||
|
||||
pytestmark = [array_api_compatible, pytest.mark.usefixtures("skip_xp_backends")]
|
||||
skip_xp_backends = pytest.mark.skip_xp_backends
|
||||
|
||||
|
||||
class TestVariation:
|
||||
"""
|
||||
Test class for scipy.stats.variation
|
||||
"""
|
||||
|
||||
def test_ddof(self, xp):
|
||||
x = xp.arange(9.0)
|
||||
xp_assert_close(variation(x, ddof=1), xp.asarray(math.sqrt(60/8)/4))
|
||||
|
||||
@pytest.mark.parametrize('sgn', [1, -1])
|
||||
def test_sign(self, sgn, xp):
|
||||
x = xp.asarray([1., 2., 3., 4., 5.])
|
||||
v = variation(sgn*x)
|
||||
expected = xp.asarray(sgn*math.sqrt(2)/3)
|
||||
xp_assert_close(v, expected, rtol=1e-10)
|
||||
|
||||
def test_scalar(self, xp):
|
||||
# A scalar is treated like a 1-d sequence with length 1.
|
||||
xp_assert_equal(variation(4.0), 0.0)
|
||||
|
||||
@pytest.mark.parametrize('nan_policy, expected',
|
||||
[('propagate', np.nan),
|
||||
('omit', np.sqrt(20/3)/4)])
|
||||
@skip_xp_backends(np_only=True,
|
||||
reasons=['`nan_policy` only supports NumPy backend'])
|
||||
def test_variation_nan(self, nan_policy, expected, xp):
|
||||
x = xp.arange(10.)
|
||||
x[9] = xp.nan
|
||||
xp_assert_close(variation(x, nan_policy=nan_policy), expected)
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reasons=['`nan_policy` only supports NumPy backend'])
|
||||
def test_nan_policy_raise(self, xp):
|
||||
x = xp.asarray([1.0, 2.0, xp.nan, 3.0])
|
||||
with pytest.raises(ValueError, match='input contains nan'):
|
||||
variation(x, nan_policy='raise')
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reasons=['`nan_policy` only supports NumPy backend'])
|
||||
def test_bad_nan_policy(self, xp):
|
||||
with pytest.raises(ValueError, match='must be one of'):
|
||||
variation([1, 2, 3], nan_policy='foobar')
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reasons=['`keepdims` only supports NumPy backend'])
|
||||
def test_keepdims(self, xp):
|
||||
x = xp.reshape(xp.arange(10), (2, 5))
|
||||
y = variation(x, axis=1, keepdims=True)
|
||||
expected = np.array([[np.sqrt(2)/2],
|
||||
[np.sqrt(2)/7]])
|
||||
xp_assert_close(y, expected)
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reasons=['`keepdims` only supports NumPy backend'])
|
||||
@pytest.mark.parametrize('axis, expected',
|
||||
[(0, np.empty((1, 0))),
|
||||
(1, np.full((5, 1), fill_value=np.nan))])
|
||||
def test_keepdims_size0(self, axis, expected, xp):
|
||||
x = xp.zeros((5, 0))
|
||||
if axis == 1:
|
||||
with pytest.warns(SmallSampleWarning, match=too_small_nd_not_omit):
|
||||
y = variation(x, axis=axis, keepdims=True)
|
||||
else:
|
||||
y = variation(x, axis=axis, keepdims=True)
|
||||
xp_assert_equal(y, expected)
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reasons=['`keepdims` only supports NumPy backend'])
|
||||
@pytest.mark.parametrize('incr, expected_fill', [(0, np.inf), (1, np.nan)])
|
||||
def test_keepdims_and_ddof_eq_len_plus_incr(self, incr, expected_fill, xp):
|
||||
x = xp.asarray([[1, 1, 2, 2], [1, 2, 3, 3]])
|
||||
y = variation(x, axis=1, ddof=x.shape[1] + incr, keepdims=True)
|
||||
xp_assert_equal(y, xp.full((2, 1), fill_value=expected_fill))
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reasons=['`nan_policy` only supports NumPy backend'])
|
||||
def test_propagate_nan(self, xp):
|
||||
# Check that the shape of the result is the same for inputs
|
||||
# with and without nans, cf gh-5817
|
||||
a = xp.reshape(xp.arange(8, dtype=float), (2, -1))
|
||||
a[1, 0] = xp.nan
|
||||
v = variation(a, axis=1, nan_policy="propagate")
|
||||
xp_assert_close(v, [math.sqrt(5/4)/1.5, xp.nan], atol=1e-15)
|
||||
|
||||
@skip_xp_backends(np_only=True, reasons=['Python list input uses NumPy backend'])
|
||||
def test_axis_none(self, xp):
|
||||
# Check that `variation` computes the result on the flattened
|
||||
# input when axis is None.
|
||||
y = variation([[0, 1], [2, 3]], axis=None)
|
||||
xp_assert_close(y, math.sqrt(5/4)/1.5)
|
||||
|
||||
def test_bad_axis(self, xp):
|
||||
# Check that an invalid axis raises np.exceptions.AxisError.
|
||||
x = xp.asarray([[1, 2, 3], [4, 5, 6]])
|
||||
with pytest.raises((AxisError, IndexError)):
|
||||
variation(x, axis=10)
|
||||
|
||||
def test_mean_zero(self, xp):
|
||||
# Check that `variation` returns inf for a sequence that is not
|
||||
# identically zero but whose mean is zero.
|
||||
x = xp.asarray([10., -3., 1., -4., -4.])
|
||||
y = variation(x)
|
||||
xp_assert_equal(y, xp.asarray(xp.inf))
|
||||
|
||||
x2 = xp.stack([x, -10.*x])
|
||||
y2 = variation(x2, axis=1)
|
||||
xp_assert_equal(y2, xp.asarray([xp.inf, xp.inf]))
|
||||
|
||||
@pytest.mark.parametrize('x', [[0.]*5, [1, 2, np.inf, 9]])
|
||||
def test_return_nan(self, x, xp):
|
||||
x = xp.asarray(x)
|
||||
# Test some cases where `variation` returns nan.
|
||||
y = variation(x)
|
||||
xp_assert_equal(y, xp.asarray(xp.nan, dtype=x.dtype))
|
||||
|
||||
@pytest.mark.parametrize('axis, expected',
|
||||
[(0, []), (1, [np.nan]*3), (None, np.nan)])
|
||||
def test_2d_size_zero_with_axis(self, axis, expected, xp):
|
||||
x = xp.empty((3, 0))
|
||||
with suppress_warnings() as sup:
|
||||
# torch
|
||||
sup.filter(UserWarning, "std*")
|
||||
if axis != 0:
|
||||
if is_numpy(xp):
|
||||
with pytest.warns(SmallSampleWarning, match="See documentation..."):
|
||||
y = variation(x, axis=axis)
|
||||
else:
|
||||
y = variation(x, axis=axis)
|
||||
else:
|
||||
y = variation(x, axis=axis)
|
||||
xp_assert_equal(y, xp.asarray(expected))
|
||||
|
||||
def test_neg_inf(self, xp):
|
||||
# Edge case that produces -inf: ddof equals the number of non-nan
|
||||
# values, the values are not constant, and the mean is negative.
|
||||
x1 = xp.asarray([-3., -5.])
|
||||
xp_assert_equal(variation(x1, ddof=2), xp.asarray(-xp.inf))
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reasons=['`nan_policy` only supports NumPy backend'])
|
||||
def test_neg_inf_nan(self, xp):
|
||||
x2 = xp.asarray([[xp.nan, 1, -10, xp.nan],
|
||||
[-20, -3, xp.nan, xp.nan]])
|
||||
xp_assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
|
||||
[-xp.inf, -xp.inf])
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reasons=['`nan_policy` only supports NumPy backend'])
|
||||
@pytest.mark.parametrize("nan_policy", ['propagate', 'omit'])
|
||||
def test_combined_edge_cases(self, nan_policy, xp):
|
||||
x = xp.array([[0, 10, xp.nan, 1],
|
||||
[0, -5, xp.nan, 2],
|
||||
[0, -5, xp.nan, 3]])
|
||||
if nan_policy == 'omit':
|
||||
with pytest.warns(SmallSampleWarning, match=too_small_nd_omit):
|
||||
y = variation(x, axis=0, nan_policy=nan_policy)
|
||||
else:
|
||||
y = variation(x, axis=0, nan_policy=nan_policy)
|
||||
xp_assert_close(y, [xp.nan, xp.inf, xp.nan, math.sqrt(2/3)/2])
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reasons=['`nan_policy` only supports NumPy backend'])
|
||||
@pytest.mark.parametrize(
|
||||
'ddof, expected',
|
||||
[(0, [np.sqrt(1/6), np.sqrt(5/8), np.inf, 0, np.nan, 0.0, np.nan]),
|
||||
(1, [0.5, np.sqrt(5/6), np.inf, 0, np.nan, 0, np.nan]),
|
||||
(2, [np.sqrt(0.5), np.sqrt(5/4), np.inf, np.nan, np.nan, 0, np.nan])]
|
||||
)
|
||||
def test_more_nan_policy_omit_tests(self, ddof, expected, xp):
|
||||
# The slightly strange formatting in the follow array is my attempt to
|
||||
# maintain a clean tabular arrangement of the data while satisfying
|
||||
# the demands of pycodestyle. Currently, E201 and E241 are not
|
||||
# disabled by the `noqa` annotation.
|
||||
nan = xp.nan
|
||||
x = xp.asarray([[1.0, 2.0, nan, 3.0],
|
||||
[0.0, 4.0, 3.0, 1.0],
|
||||
[nan, -.5, 0.5, nan],
|
||||
[nan, 9.0, 9.0, nan],
|
||||
[nan, nan, nan, nan],
|
||||
[3.0, 3.0, 3.0, 3.0],
|
||||
[0.0, 0.0, 0.0, 0.0]])
|
||||
with pytest.warns(SmallSampleWarning, match=too_small_nd_omit):
|
||||
v = variation(x, axis=1, ddof=ddof, nan_policy='omit')
|
||||
xp_assert_close(v, expected)
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reasons=['`nan_policy` only supports NumPy backend'])
|
||||
def test_variation_ddof(self, xp):
|
||||
# test variation with delta degrees of freedom
|
||||
# regression test for gh-13341
|
||||
a = xp.asarray([1., 2., 3., 4., 5.])
|
||||
nan_a = xp.asarray([1, 2, 3, xp.nan, 4, 5, xp.nan])
|
||||
y = variation(a, ddof=1)
|
||||
nan_y = variation(nan_a, nan_policy="omit", ddof=1)
|
||||
xp_assert_close(y, math.sqrt(5/2)/3)
|
||||
assert y == nan_y
|
||||
Reference in New Issue
Block a user