comment here
This commit is contained in:
0
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__init__.py
vendored
Normal file
0
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__init__.py
vendored
Normal file
BIN
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__pycache__/__init__.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__pycache__/__init__.cpython-311.pyc
vendored
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__pycache__/test_fit.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__pycache__/test_fit.cpython-311.pyc
vendored
Normal file
Binary file not shown.
Binary file not shown.
BIN
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__pycache__/test_kdeoth.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__pycache__/test_kdeoth.cpython-311.pyc
vendored
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__pycache__/test_qmc.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__pycache__/test_qmc.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__pycache__/test_rank.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__pycache__/test_rank.cpython-311.pyc
vendored
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__pycache__/test_stats.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/__pycache__/test_stats.cpython-311.pyc
vendored
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
450
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/common_tests.py
vendored
Normal file
450
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/common_tests.py
vendored
Normal file
@@ -0,0 +1,450 @@
|
||||
import pickle
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
import numpy.ma.testutils as ma_npt
|
||||
|
||||
from scipy._lib._util import getfullargspec_no_self as _getfullargspec
|
||||
from scipy import stats
|
||||
|
||||
|
||||
def check_named_results(res, attributes, ma=False):
|
||||
for i, attr in enumerate(attributes):
|
||||
if ma:
|
||||
ma_npt.assert_equal(res[i], getattr(res, attr))
|
||||
else:
|
||||
npt.assert_equal(res[i], getattr(res, attr))
|
||||
|
||||
|
||||
def check_normalization(distfn, args, distname):
|
||||
norm_moment = distfn.moment(0, *args)
|
||||
npt.assert_allclose(norm_moment, 1.0)
|
||||
|
||||
if distname == "rv_histogram_instance":
|
||||
atol, rtol = 1e-5, 0
|
||||
else:
|
||||
atol, rtol = 1e-7, 1e-7
|
||||
|
||||
normalization_expect = distfn.expect(lambda x: 1, args=args)
|
||||
npt.assert_allclose(normalization_expect, 1.0, atol=atol, rtol=rtol,
|
||||
err_msg=distname, verbose=True)
|
||||
|
||||
_a, _b = distfn.support(*args)
|
||||
normalization_cdf = distfn.cdf(_b, *args)
|
||||
npt.assert_allclose(normalization_cdf, 1.0)
|
||||
|
||||
|
||||
def check_moment(distfn, arg, m, v, msg):
|
||||
m1 = distfn.moment(1, *arg)
|
||||
m2 = distfn.moment(2, *arg)
|
||||
if not np.isinf(m):
|
||||
npt.assert_almost_equal(m1, m, decimal=10, err_msg=msg +
|
||||
' - 1st moment')
|
||||
else: # or np.isnan(m1),
|
||||
npt.assert_(np.isinf(m1),
|
||||
msg + ' - 1st moment -infinite, m1=%s' % str(m1))
|
||||
|
||||
if not np.isinf(v):
|
||||
npt.assert_almost_equal(m2 - m1 * m1, v, decimal=10, err_msg=msg +
|
||||
' - 2ndt moment')
|
||||
else: # or np.isnan(m2),
|
||||
npt.assert_(np.isinf(m2),
|
||||
msg + ' - 2nd moment -infinite, m2=%s' % str(m2))
|
||||
|
||||
|
||||
def check_mean_expect(distfn, arg, m, msg):
|
||||
if np.isfinite(m):
|
||||
m1 = distfn.expect(lambda x: x, arg)
|
||||
npt.assert_almost_equal(m1, m, decimal=5, err_msg=msg +
|
||||
' - 1st moment (expect)')
|
||||
|
||||
|
||||
def check_var_expect(distfn, arg, m, v, msg):
|
||||
kwargs = {'rtol': 5e-6} if msg == "rv_histogram_instance" else {}
|
||||
if np.isfinite(v):
|
||||
m2 = distfn.expect(lambda x: x*x, arg)
|
||||
npt.assert_allclose(m2, v + m*m, **kwargs)
|
||||
|
||||
|
||||
def check_skew_expect(distfn, arg, m, v, s, msg):
|
||||
if np.isfinite(s):
|
||||
m3e = distfn.expect(lambda x: np.power(x-m, 3), arg)
|
||||
npt.assert_almost_equal(m3e, s * np.power(v, 1.5),
|
||||
decimal=5, err_msg=msg + ' - skew')
|
||||
else:
|
||||
npt.assert_(np.isnan(s))
|
||||
|
||||
|
||||
def check_kurt_expect(distfn, arg, m, v, k, msg):
|
||||
if np.isfinite(k):
|
||||
m4e = distfn.expect(lambda x: np.power(x-m, 4), arg)
|
||||
npt.assert_allclose(m4e, (k + 3.) * np.power(v, 2), atol=1e-5, rtol=1e-5,
|
||||
err_msg=msg + ' - kurtosis')
|
||||
elif not np.isposinf(k):
|
||||
npt.assert_(np.isnan(k))
|
||||
|
||||
|
||||
def check_entropy(distfn, arg, msg):
|
||||
ent = distfn.entropy(*arg)
|
||||
npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
|
||||
|
||||
|
||||
def check_private_entropy(distfn, args, superclass):
|
||||
# compare a generic _entropy with the distribution-specific implementation
|
||||
npt.assert_allclose(distfn._entropy(*args),
|
||||
superclass._entropy(distfn, *args))
|
||||
|
||||
|
||||
def check_entropy_vect_scale(distfn, arg):
|
||||
# check 2-d
|
||||
sc = np.asarray([[1, 2], [3, 4]])
|
||||
v_ent = distfn.entropy(*arg, scale=sc)
|
||||
s_ent = [distfn.entropy(*arg, scale=s) for s in sc.ravel()]
|
||||
s_ent = np.asarray(s_ent).reshape(v_ent.shape)
|
||||
assert_allclose(v_ent, s_ent, atol=1e-14)
|
||||
|
||||
# check invalid value, check cast
|
||||
sc = [1, 2, -3]
|
||||
v_ent = distfn.entropy(*arg, scale=sc)
|
||||
s_ent = [distfn.entropy(*arg, scale=s) for s in sc]
|
||||
s_ent = np.asarray(s_ent).reshape(v_ent.shape)
|
||||
assert_allclose(v_ent, s_ent, atol=1e-14)
|
||||
|
||||
|
||||
def check_edge_support(distfn, args):
|
||||
# Make sure that x=self.a and self.b are handled correctly.
|
||||
x = distfn.support(*args)
|
||||
if isinstance(distfn, stats.rv_discrete):
|
||||
x = x[0]-1, x[1]
|
||||
|
||||
npt.assert_equal(distfn.cdf(x, *args), [0.0, 1.0])
|
||||
npt.assert_equal(distfn.sf(x, *args), [1.0, 0.0])
|
||||
|
||||
if distfn.name not in ('skellam', 'dlaplace'):
|
||||
# with a = -inf, log(0) generates warnings
|
||||
npt.assert_equal(distfn.logcdf(x, *args), [-np.inf, 0.0])
|
||||
npt.assert_equal(distfn.logsf(x, *args), [0.0, -np.inf])
|
||||
|
||||
npt.assert_equal(distfn.ppf([0.0, 1.0], *args), x)
|
||||
npt.assert_equal(distfn.isf([0.0, 1.0], *args), x[::-1])
|
||||
|
||||
# out-of-bounds for isf & ppf
|
||||
npt.assert_(np.isnan(distfn.isf([-1, 2], *args)).all())
|
||||
npt.assert_(np.isnan(distfn.ppf([-1, 2], *args)).all())
|
||||
|
||||
|
||||
def check_named_args(distfn, x, shape_args, defaults, meths):
|
||||
## Check calling w/ named arguments.
|
||||
|
||||
# check consistency of shapes, numargs and _parse signature
|
||||
signature = _getfullargspec(distfn._parse_args)
|
||||
npt.assert_(signature.varargs is None)
|
||||
npt.assert_(signature.varkw is None)
|
||||
npt.assert_(not signature.kwonlyargs)
|
||||
npt.assert_(list(signature.defaults) == list(defaults))
|
||||
|
||||
shape_argnames = signature.args[:-len(defaults)] # a, b, loc=0, scale=1
|
||||
if distfn.shapes:
|
||||
shapes_ = distfn.shapes.replace(',', ' ').split()
|
||||
else:
|
||||
shapes_ = ''
|
||||
npt.assert_(len(shapes_) == distfn.numargs)
|
||||
npt.assert_(len(shapes_) == len(shape_argnames))
|
||||
|
||||
# check calling w/ named arguments
|
||||
shape_args = list(shape_args)
|
||||
|
||||
vals = [meth(x, *shape_args) for meth in meths]
|
||||
npt.assert_(np.all(np.isfinite(vals)))
|
||||
|
||||
names, a, k = shape_argnames[:], shape_args[:], {}
|
||||
while names:
|
||||
k.update({names.pop(): a.pop()})
|
||||
v = [meth(x, *a, **k) for meth in meths]
|
||||
npt.assert_array_equal(vals, v)
|
||||
if 'n' not in k.keys():
|
||||
# `n` is first parameter of moment(), so can't be used as named arg
|
||||
npt.assert_equal(distfn.moment(1, *a, **k),
|
||||
distfn.moment(1, *shape_args))
|
||||
|
||||
# unknown arguments should not go through:
|
||||
k.update({'kaboom': 42})
|
||||
assert_raises(TypeError, distfn.cdf, x, **k)
|
||||
|
||||
|
||||
def check_random_state_property(distfn, args):
|
||||
# check the random_state attribute of a distribution *instance*
|
||||
|
||||
# This test fiddles with distfn.random_state. This breaks other tests,
|
||||
# hence need to save it and then restore.
|
||||
rndm = distfn.random_state
|
||||
|
||||
# baseline: this relies on the global state
|
||||
np.random.seed(1234)
|
||||
distfn.random_state = None
|
||||
r0 = distfn.rvs(*args, size=8)
|
||||
|
||||
# use an explicit instance-level random_state
|
||||
distfn.random_state = 1234
|
||||
r1 = distfn.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
distfn.random_state = np.random.RandomState(1234)
|
||||
r2 = distfn.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r2)
|
||||
|
||||
# check that np.random.Generator can be used (numpy >= 1.17)
|
||||
if hasattr(np.random, 'default_rng'):
|
||||
# obtain a np.random.Generator object
|
||||
rng = np.random.default_rng(1234)
|
||||
distfn.rvs(*args, size=1, random_state=rng)
|
||||
|
||||
# can override the instance-level random_state for an individual .rvs call
|
||||
distfn.random_state = 2
|
||||
orig_state = distfn.random_state.get_state()
|
||||
|
||||
r3 = distfn.rvs(*args, size=8, random_state=np.random.RandomState(1234))
|
||||
npt.assert_equal(r0, r3)
|
||||
|
||||
# ... and that does not alter the instance-level random_state!
|
||||
npt.assert_equal(distfn.random_state.get_state(), orig_state)
|
||||
|
||||
# finally, restore the random_state
|
||||
distfn.random_state = rndm
|
||||
|
||||
|
||||
def check_meth_dtype(distfn, arg, meths):
|
||||
q0 = [0.25, 0.5, 0.75]
|
||||
x0 = distfn.ppf(q0, *arg)
|
||||
x_cast = [x0.astype(tp) for tp in
|
||||
(np.int_, np.float16, np.float32, np.float64)]
|
||||
|
||||
for x in x_cast:
|
||||
# casting may have clipped the values, exclude those
|
||||
distfn._argcheck(*arg)
|
||||
x = x[(distfn.a < x) & (x < distfn.b)]
|
||||
for meth in meths:
|
||||
val = meth(x, *arg)
|
||||
npt.assert_(val.dtype == np.float_)
|
||||
|
||||
|
||||
def check_ppf_dtype(distfn, arg):
|
||||
q0 = np.asarray([0.25, 0.5, 0.75])
|
||||
q_cast = [q0.astype(tp) for tp in (np.float16, np.float32, np.float64)]
|
||||
for q in q_cast:
|
||||
for meth in [distfn.ppf, distfn.isf]:
|
||||
val = meth(q, *arg)
|
||||
npt.assert_(val.dtype == np.float_)
|
||||
|
||||
|
||||
def check_cmplx_deriv(distfn, arg):
|
||||
# Distributions allow complex arguments.
|
||||
def deriv(f, x, *arg):
|
||||
x = np.asarray(x)
|
||||
h = 1e-10
|
||||
return (f(x + h*1j, *arg)/h).imag
|
||||
|
||||
x0 = distfn.ppf([0.25, 0.51, 0.75], *arg)
|
||||
x_cast = [x0.astype(tp) for tp in
|
||||
(np.int_, np.float16, np.float32, np.float64)]
|
||||
|
||||
for x in x_cast:
|
||||
# casting may have clipped the values, exclude those
|
||||
distfn._argcheck(*arg)
|
||||
x = x[(distfn.a < x) & (x < distfn.b)]
|
||||
|
||||
pdf, cdf, sf = distfn.pdf(x, *arg), distfn.cdf(x, *arg), distfn.sf(x, *arg)
|
||||
assert_allclose(deriv(distfn.cdf, x, *arg), pdf, rtol=1e-5)
|
||||
assert_allclose(deriv(distfn.logcdf, x, *arg), pdf/cdf, rtol=1e-5)
|
||||
|
||||
assert_allclose(deriv(distfn.sf, x, *arg), -pdf, rtol=1e-5)
|
||||
assert_allclose(deriv(distfn.logsf, x, *arg), -pdf/sf, rtol=1e-5)
|
||||
|
||||
assert_allclose(deriv(distfn.logpdf, x, *arg),
|
||||
deriv(distfn.pdf, x, *arg) / distfn.pdf(x, *arg),
|
||||
rtol=1e-5)
|
||||
|
||||
|
||||
def check_pickling(distfn, args):
|
||||
# check that a distribution instance pickles and unpickles
|
||||
# pay special attention to the random_state property
|
||||
|
||||
# save the random_state (restore later)
|
||||
rndm = distfn.random_state
|
||||
|
||||
# check unfrozen
|
||||
distfn.random_state = 1234
|
||||
distfn.rvs(*args, size=8)
|
||||
s = pickle.dumps(distfn)
|
||||
r0 = distfn.rvs(*args, size=8)
|
||||
|
||||
unpickled = pickle.loads(s)
|
||||
r1 = unpickled.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
# also smoke test some methods
|
||||
medians = [distfn.ppf(0.5, *args), unpickled.ppf(0.5, *args)]
|
||||
npt.assert_equal(medians[0], medians[1])
|
||||
npt.assert_equal(distfn.cdf(medians[0], *args),
|
||||
unpickled.cdf(medians[1], *args))
|
||||
|
||||
# check frozen pickling/unpickling with rvs
|
||||
frozen_dist = distfn(*args)
|
||||
pkl = pickle.dumps(frozen_dist)
|
||||
unpickled = pickle.loads(pkl)
|
||||
|
||||
r0 = frozen_dist.rvs(size=8)
|
||||
r1 = unpickled.rvs(size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
# check pickling/unpickling of .fit method
|
||||
if hasattr(distfn, "fit"):
|
||||
fit_function = distfn.fit
|
||||
pickled_fit_function = pickle.dumps(fit_function)
|
||||
unpickled_fit_function = pickle.loads(pickled_fit_function)
|
||||
assert fit_function.__name__ == unpickled_fit_function.__name__ == "fit"
|
||||
|
||||
# restore the random_state
|
||||
distfn.random_state = rndm
|
||||
|
||||
|
||||
def check_freezing(distfn, args):
|
||||
# regression test for gh-11089: freezing a distribution fails
|
||||
# if loc and/or scale are specified
|
||||
if isinstance(distfn, stats.rv_continuous):
|
||||
locscale = {'loc': 1, 'scale': 2}
|
||||
else:
|
||||
locscale = {'loc': 1}
|
||||
|
||||
rv = distfn(*args, **locscale)
|
||||
assert rv.a == distfn(*args).a
|
||||
assert rv.b == distfn(*args).b
|
||||
|
||||
|
||||
def check_rvs_broadcast(distfunc, distname, allargs, shape, shape_only, otype):
|
||||
np.random.seed(123)
|
||||
sample = distfunc.rvs(*allargs)
|
||||
assert_equal(sample.shape, shape, "%s: rvs failed to broadcast" % distname)
|
||||
if not shape_only:
|
||||
rvs = np.vectorize(lambda *allargs: distfunc.rvs(*allargs), otypes=otype)
|
||||
np.random.seed(123)
|
||||
expected = rvs(*allargs)
|
||||
assert_allclose(sample, expected, rtol=1e-13)
|
||||
|
||||
|
||||
def check_deprecation_warning_gh5982_moment(distfn, arg, distname):
|
||||
# See description of cases that need to be tested in the definition of
|
||||
# scipy.stats.rv_generic.moment
|
||||
shapes = [] if distfn.shapes is None else distfn.shapes.split(", ")
|
||||
kwd_shapes = dict(zip(shapes, arg or [])) # dictionary of shape kwds
|
||||
n = kwd_shapes.pop('n', None)
|
||||
|
||||
message1 = "moment() missing 1 required positional argument"
|
||||
message2 = "_parse_args() missing 1 required positional argument: 'n'"
|
||||
message3 = "moment() got multiple values for first argument"
|
||||
|
||||
if 'n' in shapes:
|
||||
expected = distfn.mean(n=n, **kwd_shapes)
|
||||
|
||||
# A1
|
||||
res = distfn.moment(1, n=n, **kwd_shapes)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# A2
|
||||
with assert_raises(TypeError, match=re.escape(message1)):
|
||||
distfn.moment(n=n, **kwd_shapes)
|
||||
|
||||
# A3
|
||||
# if `n` is not provided at all
|
||||
with assert_raises(TypeError, match=re.escape(message2)):
|
||||
distfn.moment(1, **kwd_shapes)
|
||||
# if `n` is provided as a positional argument
|
||||
res = distfn.moment(1, *arg)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# A4
|
||||
with assert_raises(TypeError, match=re.escape(message1)):
|
||||
distfn.moment(**kwd_shapes)
|
||||
|
||||
else:
|
||||
expected = distfn.mean(**kwd_shapes)
|
||||
|
||||
# B1
|
||||
with assert_raises(TypeError, match=re.escape(message3)):
|
||||
res = distfn.moment(1, n=1, **kwd_shapes)
|
||||
|
||||
# B2
|
||||
with np.testing.assert_warns(DeprecationWarning):
|
||||
res = distfn.moment(n=1, **kwd_shapes)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# B3
|
||||
res = distfn.moment(1, *arg)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# B4
|
||||
with assert_raises(TypeError, match=re.escape(message1)):
|
||||
distfn.moment(**kwd_shapes)
|
||||
|
||||
|
||||
def check_deprecation_warning_gh5982_interval(distfn, arg, distname):
|
||||
# See description of cases that need to be tested in the definition of
|
||||
# scipy.stats.rv_generic.moment
|
||||
shapes = [] if distfn.shapes is None else distfn.shapes.split(", ")
|
||||
kwd_shapes = dict(zip(shapes, arg or [])) # dictionary of shape kwds
|
||||
alpha = kwd_shapes.pop('alpha', None)
|
||||
|
||||
def my_interval(*args, **kwds):
|
||||
return (distfn.ppf(0.25, *args, **kwds),
|
||||
distfn.ppf(0.75, *args, **kwds))
|
||||
|
||||
message1 = "interval() missing 1 required positional argument"
|
||||
message2 = "_parse_args() missing 1 required positional argument: 'alpha'"
|
||||
message3 = "interval() got multiple values for first argument"
|
||||
|
||||
if 'alpha' in shapes:
|
||||
expected = my_interval(alpha=alpha, **kwd_shapes)
|
||||
|
||||
# A1
|
||||
res = distfn.interval(0.5, alpha=alpha, **kwd_shapes)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# A2
|
||||
with assert_raises(TypeError, match=re.escape(message1)):
|
||||
distfn.interval(alpha=alpha, **kwd_shapes)
|
||||
|
||||
# A3
|
||||
# if `alpha` is not provided at all
|
||||
with assert_raises(TypeError, match=re.escape(message2)):
|
||||
distfn.interval(0.5, **kwd_shapes)
|
||||
# if `alpha` is provided as a positional argument
|
||||
res = distfn.interval(0.5, *arg)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# A4
|
||||
with assert_raises(TypeError, match=re.escape(message1)):
|
||||
distfn.interval(**kwd_shapes)
|
||||
|
||||
else:
|
||||
expected = my_interval(**kwd_shapes)
|
||||
|
||||
# B1
|
||||
with assert_raises(TypeError, match=re.escape(message3)):
|
||||
res = distfn.interval(0.5, alpha=1, **kwd_shapes)
|
||||
|
||||
# B2
|
||||
with np.testing.assert_warns(DeprecationWarning):
|
||||
res = distfn.interval(alpha=0.5, **kwd_shapes)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# B3
|
||||
res = distfn.interval(0.5, *arg)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# B4
|
||||
with assert_raises(TypeError, match=re.escape(message1)):
|
||||
distfn.interval(**kwd_shapes)
|
||||
Binary file not shown.
607
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/fisher_exact_results_from_r.py
vendored
Normal file
607
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/fisher_exact_results_from_r.py
vendored
Normal file
@@ -0,0 +1,607 @@
|
||||
# DO NOT EDIT THIS FILE!
|
||||
# This file was generated by the R script
|
||||
# generate_fisher_exact_results_from_r.R
|
||||
# The script was run with R version 3.6.2 (2019-12-12) at 2020-11-09 06:16:09
|
||||
|
||||
|
||||
from collections import namedtuple
|
||||
import numpy as np
|
||||
|
||||
|
||||
Inf = np.inf
|
||||
|
||||
Parameters = namedtuple('Parameters',
|
||||
['table', 'confidence_level', 'alternative'])
|
||||
RResults = namedtuple('RResults',
|
||||
['pvalue', 'conditional_odds_ratio',
|
||||
'conditional_odds_ratio_ci'])
|
||||
data = [
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.04035202926536294,
|
||||
2.662846672960251))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.02301413756522116,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.004668988338943325,
|
||||
0.895792956493601))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157191,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.4153910882532168,
|
||||
259.2593661129417))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.09580440012477633,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.08056337526385809,
|
||||
1.22704788545557))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.2697004098849359,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.1176691231650079,
|
||||
1.787463657995973))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157192,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.003857141267422399,
|
||||
2.407369893767229))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.06126482213438735,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.451643573543705))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904762,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.024822256141754,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
39.00054996869288))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904761,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.024822256141754,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
39.00054996869287))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=2.005657880389071e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(349.2595113327733,
|
||||
3630.382605689872))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=5.728437460831947e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(152.4166024390096,
|
||||
1425.700792178893))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.574111858126088,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.8520462587912048,
|
||||
1.340148950273938))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.02502345007115455,
|
||||
6.304424772117853))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.02301413756522116,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.001923034001462487,
|
||||
1.53670836950172))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157191,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.2397970951413721,
|
||||
1291.342011095509))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.09580440012477633,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.05127576113762925,
|
||||
1.717176678806983))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.2697004098849359,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.07498546954483619,
|
||||
2.506969905199901))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157192,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.0007743881879531337,
|
||||
4.170192301163831))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.06126482213438735,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
2.642491011905582))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904762,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.496935393325443,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
198.019801980198))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904761,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.496935393325443,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
198.019801980198))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=2.005657880389071e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(270.0334165523604,
|
||||
5461.333333326708))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=5.728437460831947e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(116.7944750275836,
|
||||
1931.995993191814))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.574111858126088,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.7949398282935892,
|
||||
1.436229679394333))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.797867027270803))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0185217259520665,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
0.6785254803404526))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.9782608695652173,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
127.8497388102893))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.05625775074399956,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.032332939718425))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1808979350599346,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.502407513296985))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1652173913043479,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.820421051562392))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0565217391304348,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.06224603077045))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.5,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
19.00192394479939))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.4999999999999999,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
19.00192394479939))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
3045.460216525746))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1186.440170942579))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.7416227010368963,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.293551891610822))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
4.375946050832565))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0185217259520665,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.235282118191202))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.9782608695652173,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
657.2063583945989))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.05625775074399956,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.498867660683128))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1808979350599346,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
2.186159386716762))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1652173913043479,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
3.335351451901569))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0565217391304348,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
2.075407697450433))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.5,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
99.00009507969122))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.4999999999999999,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
99.00009507969123))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
4503.078257659934))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1811.766127544222))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.7416227010368963,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.396522811516685))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.979790445314723,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.05119649909830196,
|
||||
Inf))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9990149169715733,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.007163749169069961,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.1652173913043478,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.5493234651081089,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9849086665340765,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.1003538933958604,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9330176609214881,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.146507416280863,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9782608695652174,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.007821681994077808,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.02380952380952382,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.487678929918272,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.0238095238095238,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.487678929918272,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=2.005657880388915e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(397.784359748113,
|
||||
Inf))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=5.728437460831983e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(174.7148056880929,
|
||||
Inf))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.2959825901308897,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.8828406663967776,
|
||||
Inf))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.979790445314723,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.03045407081240429,
|
||||
Inf))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9990149169715733,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.002768053063547901,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.1652173913043478,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.2998184792279909,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9849086665340765,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.06180414342643172,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9330176609214881,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.09037094010066403,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9782608695652174,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.001521592095430679,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.02380952380952382,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.6661157890359722,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.0238095238095238,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.6661157890359725,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=2.005657880388915e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(297.9619252357688,
|
||||
Inf))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=5.728437460831983e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(130.3213490295859,
|
||||
Inf))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.2959825901308897,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.8176272148267533,
|
||||
Inf))),
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
108
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/AtmWtAg.dat
vendored
Normal file
108
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/AtmWtAg.dat
vendored
Normal file
@@ -0,0 +1,108 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: AtmWtAg (AtmWtAg.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 108)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Powell, L.J., Murphy, T.J. and Gramlich, J.W. (1982).
|
||||
"The Absolute Isotopic Abundance & Atomic Weight
|
||||
of a Reference Sample of Silver".
|
||||
NBS Journal of Research, 87, pp. 9-19.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
2 Treatments
|
||||
24 Replicates/Cell
|
||||
48 Observations
|
||||
7 Constant Leading Digits
|
||||
Average Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
|
||||
Model: 3 Parameters (mu, tau_1, tau_2)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
|
||||
Between Instrument 1 3.63834187500000E-09 3.63834187500000E-09 1.59467335677930E+01
|
||||
Within Instrument 46 1.04951729166667E-08 2.28155932971014E-10
|
||||
|
||||
Certified R-Squared 2.57426544538321E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.51048314446410E-05
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Instrument AgWt
|
||||
1 107.8681568
|
||||
1 107.8681465
|
||||
1 107.8681572
|
||||
1 107.8681785
|
||||
1 107.8681446
|
||||
1 107.8681903
|
||||
1 107.8681526
|
||||
1 107.8681494
|
||||
1 107.8681616
|
||||
1 107.8681587
|
||||
1 107.8681519
|
||||
1 107.8681486
|
||||
1 107.8681419
|
||||
1 107.8681569
|
||||
1 107.8681508
|
||||
1 107.8681672
|
||||
1 107.8681385
|
||||
1 107.8681518
|
||||
1 107.8681662
|
||||
1 107.8681424
|
||||
1 107.8681360
|
||||
1 107.8681333
|
||||
1 107.8681610
|
||||
1 107.8681477
|
||||
2 107.8681079
|
||||
2 107.8681344
|
||||
2 107.8681513
|
||||
2 107.8681197
|
||||
2 107.8681604
|
||||
2 107.8681385
|
||||
2 107.8681642
|
||||
2 107.8681365
|
||||
2 107.8681151
|
||||
2 107.8681082
|
||||
2 107.8681517
|
||||
2 107.8681448
|
||||
2 107.8681198
|
||||
2 107.8681482
|
||||
2 107.8681334
|
||||
2 107.8681609
|
||||
2 107.8681101
|
||||
2 107.8681512
|
||||
2 107.8681469
|
||||
2 107.8681360
|
||||
2 107.8681254
|
||||
2 107.8681261
|
||||
2 107.8681450
|
||||
2 107.8681368
|
||||
85
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SiRstv.dat
vendored
Normal file
85
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SiRstv.dat
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SiRstv (SiRstv.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 85)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Ehrstein, James and Croarkin, M. Carroll.
|
||||
Unpublished NIST dataset.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
5 Treatments
|
||||
5 Replicates/Cell
|
||||
25 Observations
|
||||
3 Constant Leading Digits
|
||||
Lower Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
|
||||
Model: 6 Parameters (mu,tau_1, ... , tau_5)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Instrument 4 5.11462616000000E-02 1.27865654000000E-02 1.18046237440255E+00
|
||||
Within Instrument 20 2.16636560000000E-01 1.08318280000000E-02
|
||||
|
||||
Certified R-Squared 1.90999039051129E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.04076068334656E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Instrument Resistance
|
||||
1 196.3052
|
||||
1 196.1240
|
||||
1 196.1890
|
||||
1 196.2569
|
||||
1 196.3403
|
||||
2 196.3042
|
||||
2 196.3825
|
||||
2 196.1669
|
||||
2 196.3257
|
||||
2 196.0422
|
||||
3 196.1303
|
||||
3 196.2005
|
||||
3 196.2889
|
||||
3 196.0343
|
||||
3 196.1811
|
||||
4 196.2795
|
||||
4 196.1748
|
||||
4 196.1494
|
||||
4 196.1485
|
||||
4 195.9885
|
||||
5 196.2119
|
||||
5 196.1051
|
||||
5 196.1850
|
||||
5 196.0052
|
||||
5 196.2090
|
||||
249
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs01.dat
vendored
Normal file
249
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs01.dat
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SmLs01 (SmLs01.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
1 Constant Leading Digit
|
||||
Lower Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1.4
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
2 1.3
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
3 1.5
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
4 1.3
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
5 1.5
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
6 1.3
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
7 1.5
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
8 1.3
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
9 1.5
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
1869
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs02.dat
vendored
Normal file
1869
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs02.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
18069
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs03.dat
vendored
Normal file
18069
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs03.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
249
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs04.dat
vendored
Normal file
249
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs04.dat
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SmLs04 (SmLs04.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
7 Constant Leading Digits
|
||||
Average Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1000000.4
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
2 1000000.3
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
3 1000000.5
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
4 1000000.3
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
5 1000000.5
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
6 1000000.3
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
7 1000000.5
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
8 1000000.3
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
9 1000000.5
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
1869
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs05.dat
vendored
Normal file
1869
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs05.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
18069
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs06.dat
vendored
Normal file
18069
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs06.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
249
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs07.dat
vendored
Normal file
249
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs07.dat
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SmLs07 (SmLs07.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
13 Constant Leading Digits
|
||||
Higher Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1000000000000.4
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
2 1000000000000.3
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
3 1000000000000.5
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
4 1000000000000.3
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
5 1000000000000.5
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
6 1000000000000.3
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
7 1000000000000.5
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
8 1000000000000.3
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
9 1000000000000.5
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
1869
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs08.dat
vendored
Normal file
1869
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs08.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
18069
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs09.dat
vendored
Normal file
18069
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_anova/SmLs09.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
97
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_linregress/Norris.dat
vendored
Normal file
97
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/nist_linregress/Norris.dat
vendored
Normal file
@@ -0,0 +1,97 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: Norris (Norris.dat)
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 31 to 46)
|
||||
Data (lines 61 to 96)
|
||||
|
||||
Procedure: Linear Least Squares Regression
|
||||
|
||||
Reference: Norris, J., NIST.
|
||||
Calibration of Ozone Monitors.
|
||||
|
||||
Data: 1 Response Variable (y)
|
||||
1 Predictor Variable (x)
|
||||
36 Observations
|
||||
Lower Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
Model: Linear Class
|
||||
2 Parameters (B0,B1)
|
||||
|
||||
y = B0 + B1*x + e
|
||||
|
||||
|
||||
|
||||
Certified Regression Statistics
|
||||
|
||||
Standard Deviation
|
||||
Parameter Estimate of Estimate
|
||||
|
||||
B0 -0.262323073774029 0.232818234301152
|
||||
B1 1.00211681802045 0.429796848199937E-03
|
||||
|
||||
Residual
|
||||
Standard Deviation 0.884796396144373
|
||||
|
||||
R-Squared 0.999993745883712
|
||||
|
||||
|
||||
Certified Analysis of Variance Table
|
||||
|
||||
Source of Degrees of Sums of Mean
|
||||
Variation Freedom Squares Squares F Statistic
|
||||
|
||||
Regression 1 4255954.13232369 4255954.13232369 5436385.54079785
|
||||
Residual 34 26.6173985294224 0.782864662630069
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: y x
|
||||
0.1 0.2
|
||||
338.8 337.4
|
||||
118.1 118.2
|
||||
888.0 884.6
|
||||
9.2 10.1
|
||||
228.1 226.5
|
||||
668.5 666.3
|
||||
998.5 996.3
|
||||
449.1 448.6
|
||||
778.9 777.0
|
||||
559.2 558.2
|
||||
0.3 0.4
|
||||
0.1 0.6
|
||||
778.1 775.5
|
||||
668.8 666.9
|
||||
339.3 338.0
|
||||
448.9 447.5
|
||||
10.8 11.6
|
||||
557.7 556.0
|
||||
228.3 228.1
|
||||
998.0 995.8
|
||||
888.8 887.6
|
||||
119.6 120.2
|
||||
0.3 0.3
|
||||
0.6 0.3
|
||||
557.6 556.8
|
||||
339.3 339.1
|
||||
888.0 887.2
|
||||
998.5 999.0
|
||||
778.9 779.0
|
||||
10.2 11.1
|
||||
117.6 118.3
|
||||
228.9 229.2
|
||||
668.4 669.1
|
||||
449.2 448.9
|
||||
0.2 0.5
|
||||
|
||||
1499
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/studentized_range_mpmath_ref.json
vendored
Normal file
1499
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/data/studentized_range_mpmath_ref.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1044
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_axis_nan_policy.py
vendored
Normal file
1044
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_axis_nan_policy.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
568
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_binned_statistic.py
vendored
Normal file
568
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_binned_statistic.py
vendored
Normal file
@@ -0,0 +1,568 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
from scipy.stats import (binned_statistic, binned_statistic_2d,
|
||||
binned_statistic_dd)
|
||||
from scipy._lib._util import check_random_state
|
||||
|
||||
from .common_tests import check_named_results
|
||||
|
||||
|
||||
class TestBinnedStatistic:
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
rng = check_random_state(9865)
|
||||
cls.x = rng.uniform(size=100)
|
||||
cls.y = rng.uniform(size=100)
|
||||
cls.v = rng.uniform(size=100)
|
||||
cls.X = rng.uniform(size=(100, 3))
|
||||
cls.w = rng.uniform(size=100)
|
||||
cls.u = rng.uniform(size=100) + 1e6
|
||||
|
||||
def test_1d_count(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
count1, edges1, bc = binned_statistic(x, v, 'count', bins=10)
|
||||
count2, edges2 = np.histogram(x, bins=10)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_gh5927(self):
|
||||
# smoke test for gh5927 - binned_statistic was using `is` for string
|
||||
# comparison
|
||||
x = self.x
|
||||
v = self.v
|
||||
statistics = ['mean', 'median', 'count', 'sum']
|
||||
for statistic in statistics:
|
||||
binned_statistic(x, v, statistic, bins=10)
|
||||
|
||||
def test_big_number_std(self):
|
||||
# tests for numerical stability of std calculation
|
||||
# see issue gh-10126 for more
|
||||
x = self.x
|
||||
u = self.u
|
||||
stat1, edges1, bc = binned_statistic(x, u, 'std', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, u, np.std, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
|
||||
def test_empty_bins_std(self):
|
||||
# tests that std returns gives nan for empty bins
|
||||
x = self.x
|
||||
u = self.u
|
||||
print(binned_statistic(x, u, 'count', bins=1000))
|
||||
stat1, edges1, bc = binned_statistic(x, u, 'std', bins=1000)
|
||||
stat2, edges2, bc = binned_statistic(x, u, np.std, bins=1000)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
|
||||
def test_non_finite_inputs_and_int_bins(self):
|
||||
# if either `values` or `sample` contain np.inf or np.nan throw
|
||||
# see issue gh-9010 for more
|
||||
x = self.x
|
||||
u = self.u
|
||||
orig = u[0]
|
||||
u[0] = np.inf
|
||||
assert_raises(ValueError, binned_statistic, u, x, 'std', bins=10)
|
||||
# need to test for non-python specific ints, e.g. np.int8, np.int64
|
||||
assert_raises(ValueError, binned_statistic, u, x, 'std',
|
||||
bins=np.int64(10))
|
||||
u[0] = np.nan
|
||||
assert_raises(ValueError, binned_statistic, u, x, 'count', bins=10)
|
||||
# replace original value, u belongs the class
|
||||
u[0] = orig
|
||||
|
||||
def test_1d_result_attributes(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic(x, v, 'count', bins=10)
|
||||
attributes = ('statistic', 'bin_edges', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_1d_sum(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
sum1, edges1, bc = binned_statistic(x, v, 'sum', bins=10)
|
||||
sum2, edges2 = np.histogram(x, bins=10, weights=v)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_mean(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'mean', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.mean, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_std(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'std', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.std, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_min(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'min', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.min, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_max(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'max', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.max, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_median(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'median', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_bincode(self):
|
||||
x = self.x[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, edges1, bc = binned_statistic(x, v, 'count', bins=3)
|
||||
bc2 = np.array([3, 2, 1, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 1, 2, 3, 1,
|
||||
1, 2, 1])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
assert_allclose(bcount, count1)
|
||||
|
||||
def test_1d_range_keyword(self):
|
||||
# Regression test for gh-3063, range can be (min, max) or [(min, max)]
|
||||
np.random.seed(9865)
|
||||
x = np.arange(30)
|
||||
data = np.random.random(30)
|
||||
|
||||
mean, bins, _ = binned_statistic(x[:15], data[:15])
|
||||
mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)])
|
||||
mean_range2, bins_range2, _ = binned_statistic(x, data, range=(0, 14))
|
||||
|
||||
assert_allclose(mean, mean_range)
|
||||
assert_allclose(bins, bins_range)
|
||||
assert_allclose(mean, mean_range2)
|
||||
assert_allclose(bins, bins_range2)
|
||||
|
||||
def test_1d_multi_values(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
stat1v, edges1v, bc1v = binned_statistic(x, v, 'mean', bins=10)
|
||||
stat1w, edges1w, bc1w = binned_statistic(x, w, 'mean', bins=10)
|
||||
stat2, edges2, bc2 = binned_statistic(x, [v, w], 'mean', bins=10)
|
||||
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(edges1v, edges2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_2d_count(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
count1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'count', bins=5)
|
||||
count2, binx2, biny2 = np.histogram2d(x, y, bins=5)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_result_attributes(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic_2d(x, y, v, 'count', bins=5)
|
||||
attributes = ('statistic', 'x_edge', 'y_edge', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_2d_sum(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
sum1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'sum', bins=5)
|
||||
sum2, binx2, biny2 = np.histogram2d(x, y, bins=5, weights=v)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_mean(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'mean', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_mean_unicode(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_std(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'std', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.std, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_min(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'min', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.min, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_max(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'max', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.max, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_median(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'median', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(
|
||||
x, y, v, np.median, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_bincode(self):
|
||||
x = self.x[:20]
|
||||
y = self.y[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'count', bins=3)
|
||||
bc2 = np.array([17, 11, 6, 16, 11, 17, 18, 17, 17, 7, 6, 18, 16,
|
||||
6, 11, 16, 6, 6, 11, 8])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
count1adj = count1[count1.nonzero()]
|
||||
assert_allclose(bcount, count1adj)
|
||||
|
||||
def test_2d_multi_values(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
stat1v, binx1v, biny1v, bc1v = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=8)
|
||||
stat1w, binx1w, biny1w, bc1w = binned_statistic_2d(
|
||||
x, y, w, 'mean', bins=8)
|
||||
stat2, binx2, biny2, bc2 = binned_statistic_2d(
|
||||
x, y, [v, w], 'mean', bins=8)
|
||||
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(binx1v, binx2)
|
||||
assert_allclose(biny1w, biny2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_2d_binnumbers_unraveled(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat, edgesx, bcx = binned_statistic(x, v, 'mean', bins=20)
|
||||
stat, edgesy, bcy = binned_statistic(y, v, 'mean', bins=10)
|
||||
|
||||
stat2, edgesx2, edgesy2, bc2 = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=(20, 10), expand_binnumbers=True)
|
||||
|
||||
bcx3 = np.searchsorted(edgesx, x, side='right')
|
||||
bcy3 = np.searchsorted(edgesy, y, side='right')
|
||||
|
||||
# `numpy.searchsorted` is non-inclusive on right-edge, compensate
|
||||
bcx3[x == x.max()] -= 1
|
||||
bcy3[y == y.max()] -= 1
|
||||
|
||||
assert_allclose(bcx, bc2[0])
|
||||
assert_allclose(bcy, bc2[1])
|
||||
assert_allclose(bcx3, bc2[0])
|
||||
assert_allclose(bcy3, bc2[1])
|
||||
|
||||
def test_dd_count(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
count2, edges2 = np.histogramdd(X, bins=3)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_result_attributes(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
attributes = ('statistic', 'bin_edges', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_dd_sum(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3)
|
||||
sum2, edges2 = np.histogramdd(X, bins=3, weights=v)
|
||||
sum3, edges3, bc = binned_statistic_dd(X, v, np.sum, bins=3)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1, edges2)
|
||||
assert_allclose(sum1, sum3)
|
||||
assert_allclose(edges1, edges3)
|
||||
|
||||
def test_dd_mean(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'mean', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.mean, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_std(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'std', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_min(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'min', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.min, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_max(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'max', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.max, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_median(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_bincode(self):
|
||||
X = self.X[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92,
|
||||
32, 36, 91, 43, 87, 81, 81])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
count1adj = count1[count1.nonzero()]
|
||||
assert_allclose(bcount, count1adj)
|
||||
|
||||
def test_dd_multi_values(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
for stat in ["count", "sum", "mean", "std", "min", "max", "median",
|
||||
np.std]:
|
||||
stat1v, edges1v, bc1v = binned_statistic_dd(X, v, stat, bins=8)
|
||||
stat1w, edges1w, bc1w = binned_statistic_dd(X, w, stat, bins=8)
|
||||
stat2, edges2, bc2 = binned_statistic_dd(X, [v, w], stat, bins=8)
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(edges1v, edges2)
|
||||
assert_allclose(edges1w, edges2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_dd_binnumbers_unraveled(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat, edgesx, bcx = binned_statistic(X[:, 0], v, 'mean', bins=15)
|
||||
stat, edgesy, bcy = binned_statistic(X[:, 1], v, 'mean', bins=20)
|
||||
stat, edgesz, bcz = binned_statistic(X[:, 2], v, 'mean', bins=10)
|
||||
|
||||
stat2, edges2, bc2 = binned_statistic_dd(
|
||||
X, v, 'mean', bins=(15, 20, 10), expand_binnumbers=True)
|
||||
|
||||
assert_allclose(bcx, bc2[0])
|
||||
assert_allclose(bcy, bc2[1])
|
||||
assert_allclose(bcz, bc2[2])
|
||||
|
||||
def test_dd_binned_statistic_result(self):
|
||||
# NOTE: tests the reuse of bin_edges from previous call
|
||||
x = np.random.random((10000, 3))
|
||||
v = np.random.random((10000))
|
||||
bins = np.linspace(0, 1, 10)
|
||||
bins = (bins, bins, bins)
|
||||
|
||||
result = binned_statistic_dd(x, v, 'mean', bins=bins)
|
||||
stat = result.statistic
|
||||
|
||||
result = binned_statistic_dd(x, v, 'mean',
|
||||
binned_statistic_result=result)
|
||||
stat2 = result.statistic
|
||||
|
||||
assert_allclose(stat, stat2)
|
||||
|
||||
def test_dd_zero_dedges(self):
|
||||
x = np.random.random((10000, 3))
|
||||
v = np.random.random((10000))
|
||||
bins = np.linspace(0, 1, 10)
|
||||
bins = np.append(bins, 1)
|
||||
bins = (bins, bins, bins)
|
||||
with assert_raises(ValueError, match='difference is numerically 0'):
|
||||
binned_statistic_dd(x, v, 'mean', bins=bins)
|
||||
|
||||
def test_dd_range_errors(self):
|
||||
# Test that descriptive exceptions are raised as appropriate for bad
|
||||
# values of the `range` argument. (See gh-12996)
|
||||
with assert_raises(ValueError,
|
||||
match='In range, start must be <= stop'):
|
||||
binned_statistic_dd([self.y], self.v,
|
||||
range=[[1, 0]])
|
||||
with assert_raises(
|
||||
ValueError,
|
||||
match='In dimension 1 of range, start must be <= stop'):
|
||||
binned_statistic_dd([self.x, self.y], self.v,
|
||||
range=[[1, 0], [0, 1]])
|
||||
with assert_raises(
|
||||
ValueError,
|
||||
match='In dimension 2 of range, start must be <= stop'):
|
||||
binned_statistic_dd([self.x, self.y], self.v,
|
||||
range=[[0, 1], [1, 0]])
|
||||
with assert_raises(
|
||||
ValueError,
|
||||
match='range given for 1 dimensions; 2 required'):
|
||||
binned_statistic_dd([self.x, self.y], self.v,
|
||||
range=[[0, 1]])
|
||||
|
||||
def test_binned_statistic_float32(self):
|
||||
X = np.array([0, 0.42358226], dtype=np.float32)
|
||||
stat, _, _ = binned_statistic(X, None, 'count', bins=5)
|
||||
assert_allclose(stat, np.array([1, 0, 0, 0, 1], dtype=np.float64))
|
||||
|
||||
def test_gh14332(self):
|
||||
# Test the wrong output when the `sample` is close to bin edge
|
||||
x = []
|
||||
size = 20
|
||||
for i in range(size):
|
||||
x += [1-0.1**i]
|
||||
|
||||
bins = np.linspace(0,1,11)
|
||||
sum1, edges1, bc = binned_statistic_dd(x, np.ones(len(x)),
|
||||
bins=[bins], statistic='sum')
|
||||
sum2, edges2 = np.histogram(x, bins=bins)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1[0], edges2)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float64, np.complex128])
|
||||
@pytest.mark.parametrize("statistic", [np.mean, np.median, np.sum, np.std,
|
||||
np.min, np.max, 'count',
|
||||
lambda x: (x**2).sum(),
|
||||
lambda x: (x**2).sum() * 1j])
|
||||
def test_dd_all(self, dtype, statistic):
|
||||
def ref_statistic(x):
|
||||
return len(x) if statistic == 'count' else statistic(x)
|
||||
|
||||
rng = np.random.default_rng(3704743126639371)
|
||||
n = 10
|
||||
x = rng.random(size=n)
|
||||
i = x >= 0.5
|
||||
v = rng.random(size=n)
|
||||
if dtype is np.complex128:
|
||||
v = v + rng.random(size=n)*1j
|
||||
|
||||
stat, _, _ = binned_statistic_dd(x, v, statistic, bins=2)
|
||||
ref = np.array([ref_statistic(v[~i]), ref_statistic(v[i])])
|
||||
assert_allclose(stat, ref)
|
||||
assert stat.dtype == np.result_type(ref.dtype, np.float64)
|
||||
44
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_boost_ufuncs.py
vendored
Normal file
44
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_boost_ufuncs.py
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
from scipy.stats import _boost
|
||||
|
||||
|
||||
type_char_to_type_tol = {'f': (np.float32, 32*np.finfo(np.float32).eps),
|
||||
'd': (np.float64, 32*np.finfo(np.float64).eps),
|
||||
'g': (np.longdouble, 32*np.finfo(np.longdouble).eps)}
|
||||
|
||||
|
||||
# Each item in this list is
|
||||
# (func, args, expected_value)
|
||||
# All the values can be represented exactly, even with np.float32.
|
||||
#
|
||||
# This is not an exhaustive test data set of all the functions!
|
||||
# It is a spot check of several functions, primarily for
|
||||
# checking that the different data types are handled correctly.
|
||||
test_data = [
|
||||
(_boost._beta_cdf, (0.5, 2, 3), 0.6875),
|
||||
(_boost._beta_ppf, (0.6875, 2, 3), 0.5),
|
||||
(_boost._beta_pdf, (0.5, 2, 3), 1.5),
|
||||
(_boost._beta_sf, (0.5, 2, 1), 0.75),
|
||||
(_boost._beta_isf, (0.75, 2, 1), 0.5),
|
||||
(_boost._binom_cdf, (1, 3, 0.5), 0.5),
|
||||
(_boost._binom_pdf, (1, 4, 0.5), 0.25),
|
||||
(_boost._hypergeom_cdf, (2, 3, 5, 6), 0.5),
|
||||
(_boost._nbinom_cdf, (1, 4, 0.25), 0.015625),
|
||||
(_boost._ncf_mean, (10, 12, 2.5), 1.5),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
@pytest.mark.parametrize('func, args, expected', test_data)
|
||||
def test_stats_boost_ufunc(func, args, expected):
|
||||
type_sigs = func.types
|
||||
type_chars = [sig.split('->')[-1] for sig in type_sigs]
|
||||
for type_char in type_chars:
|
||||
typ, rtol = type_char_to_type_tol[type_char]
|
||||
args = [typ(arg) for arg in args]
|
||||
value = func(*args)
|
||||
assert isinstance(value, typ)
|
||||
assert_allclose(value, expected, rtol=rtol)
|
||||
241
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_contingency.py
vendored
Normal file
241
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_contingency.py
vendored
Normal file
@@ -0,0 +1,241 @@
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_equal, assert_array_equal,
|
||||
assert_array_almost_equal, assert_approx_equal,
|
||||
assert_allclose)
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
from scipy.special import xlogy
|
||||
from scipy.stats.contingency import (margins, expected_freq,
|
||||
chi2_contingency, association)
|
||||
|
||||
|
||||
def test_margins():
|
||||
a = np.array([1])
|
||||
m = margins(a)
|
||||
assert_equal(len(m), 1)
|
||||
m0 = m[0]
|
||||
assert_array_equal(m0, np.array([1]))
|
||||
|
||||
a = np.array([[1]])
|
||||
m0, m1 = margins(a)
|
||||
expected0 = np.array([[1]])
|
||||
expected1 = np.array([[1]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
|
||||
a = np.arange(12).reshape(2, 6)
|
||||
m0, m1 = margins(a)
|
||||
expected0 = np.array([[15], [51]])
|
||||
expected1 = np.array([[6, 8, 10, 12, 14, 16]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
|
||||
a = np.arange(24).reshape(2, 3, 4)
|
||||
m0, m1, m2 = margins(a)
|
||||
expected0 = np.array([[[66]], [[210]]])
|
||||
expected1 = np.array([[[60], [92], [124]]])
|
||||
expected2 = np.array([[[60, 66, 72, 78]]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
assert_array_equal(m2, expected2)
|
||||
|
||||
|
||||
def test_expected_freq():
|
||||
assert_array_equal(expected_freq([1]), np.array([1.0]))
|
||||
|
||||
observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]])
|
||||
e = expected_freq(observed)
|
||||
assert_array_equal(e, np.ones_like(observed))
|
||||
|
||||
observed = np.array([[10, 10, 20], [20, 20, 20]])
|
||||
e = expected_freq(observed)
|
||||
correct = np.array([[12., 12., 16.], [18., 18., 24.]])
|
||||
assert_array_almost_equal(e, correct)
|
||||
|
||||
|
||||
def test_chi2_contingency_trivial():
|
||||
# Some very simple tests for chi2_contingency.
|
||||
|
||||
# A trivial case
|
||||
obs = np.array([[1, 2], [1, 2]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs, correction=False)
|
||||
assert_equal(chi2, 0.0)
|
||||
assert_equal(p, 1.0)
|
||||
assert_equal(dof, 1)
|
||||
assert_array_equal(obs, expected)
|
||||
|
||||
# A *really* trivial case: 1-D data.
|
||||
obs = np.array([1, 2, 3])
|
||||
chi2, p, dof, expected = chi2_contingency(obs, correction=False)
|
||||
assert_equal(chi2, 0.0)
|
||||
assert_equal(p, 1.0)
|
||||
assert_equal(dof, 0)
|
||||
assert_array_equal(obs, expected)
|
||||
|
||||
|
||||
def test_chi2_contingency_R():
|
||||
# Some test cases that were computed independently, using R.
|
||||
|
||||
# Rcode = \
|
||||
# """
|
||||
# # Data vector.
|
||||
# data <- c(
|
||||
# 12, 34, 23, 4, 47, 11,
|
||||
# 35, 31, 11, 34, 10, 18,
|
||||
# 12, 32, 9, 18, 13, 19,
|
||||
# 12, 12, 14, 9, 33, 25
|
||||
# )
|
||||
#
|
||||
# # Create factor tags:r=rows, c=columns, t=tiers
|
||||
# r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1", "r2", "r3", "r4")))
|
||||
# c <- factor(gl(3, 1, 2*3*4, labels=c("c1", "c2", "c3")))
|
||||
# t <- factor(gl(2, 3, 2*3*4, labels=c("t1", "t2")))
|
||||
#
|
||||
# # 3-way Chi squared test of independence
|
||||
# s = summary(xtabs(data~r+c+t))
|
||||
# print(s)
|
||||
# """
|
||||
# Routput = \
|
||||
# """
|
||||
# Call: xtabs(formula = data ~ r + c + t)
|
||||
# Number of cases in table: 478
|
||||
# Number of factors: 3
|
||||
# Test for independence of all factors:
|
||||
# Chisq = 102.17, df = 17, p-value = 3.514e-14
|
||||
# """
|
||||
obs = np.array(
|
||||
[[[12, 34, 23],
|
||||
[35, 31, 11],
|
||||
[12, 32, 9],
|
||||
[12, 12, 14]],
|
||||
[[4, 47, 11],
|
||||
[34, 10, 18],
|
||||
[18, 13, 19],
|
||||
[9, 33, 25]]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs)
|
||||
assert_approx_equal(chi2, 102.17, significant=5)
|
||||
assert_approx_equal(p, 3.514e-14, significant=4)
|
||||
assert_equal(dof, 17)
|
||||
|
||||
# Rcode = \
|
||||
# """
|
||||
# # Data vector.
|
||||
# data <- c(
|
||||
# #
|
||||
# 12, 17,
|
||||
# 11, 16,
|
||||
# #
|
||||
# 11, 12,
|
||||
# 15, 16,
|
||||
# #
|
||||
# 23, 15,
|
||||
# 30, 22,
|
||||
# #
|
||||
# 14, 17,
|
||||
# 15, 16
|
||||
# )
|
||||
#
|
||||
# # Create factor tags:r=rows, c=columns, d=depths(?), t=tiers
|
||||
# r <- factor(gl(2, 2, 2*2*2*2, labels=c("r1", "r2")))
|
||||
# c <- factor(gl(2, 1, 2*2*2*2, labels=c("c1", "c2")))
|
||||
# d <- factor(gl(2, 4, 2*2*2*2, labels=c("d1", "d2")))
|
||||
# t <- factor(gl(2, 8, 2*2*2*2, labels=c("t1", "t2")))
|
||||
#
|
||||
# # 4-way Chi squared test of independence
|
||||
# s = summary(xtabs(data~r+c+d+t))
|
||||
# print(s)
|
||||
# """
|
||||
# Routput = \
|
||||
# """
|
||||
# Call: xtabs(formula = data ~ r + c + d + t)
|
||||
# Number of cases in table: 262
|
||||
# Number of factors: 4
|
||||
# Test for independence of all factors:
|
||||
# Chisq = 8.758, df = 11, p-value = 0.6442
|
||||
# """
|
||||
obs = np.array(
|
||||
[[[[12, 17],
|
||||
[11, 16]],
|
||||
[[11, 12],
|
||||
[15, 16]]],
|
||||
[[[23, 15],
|
||||
[30, 22]],
|
||||
[[14, 17],
|
||||
[15, 16]]]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs)
|
||||
assert_approx_equal(chi2, 8.758, significant=4)
|
||||
assert_approx_equal(p, 0.6442, significant=4)
|
||||
assert_equal(dof, 11)
|
||||
|
||||
|
||||
def test_chi2_contingency_g():
|
||||
c = np.array([[15, 60], [15, 90]])
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood',
|
||||
correction=False)
|
||||
assert_allclose(g, 2*xlogy(c, c/e).sum())
|
||||
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood',
|
||||
correction=True)
|
||||
c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])
|
||||
assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())
|
||||
|
||||
c = np.array([[10, 12, 10], [12, 10, 10]])
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')
|
||||
assert_allclose(g, 2*xlogy(c, c/e).sum())
|
||||
|
||||
|
||||
def test_chi2_contingency_bad_args():
|
||||
# Test that "bad" inputs raise a ValueError.
|
||||
|
||||
# Negative value in the array of observed frequencies.
|
||||
obs = np.array([[-1, 10], [1, 2]])
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
# The zeros in this will result in zeros in the array
|
||||
# of expected frequencies.
|
||||
obs = np.array([[0, 1], [0, 1]])
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
# A degenerate case: `observed` has size 0.
|
||||
obs = np.empty((0, 8))
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
|
||||
def test_chi2_contingency_yates_gh13875():
|
||||
# Magnitude of Yates' continuity correction should not exceed difference
|
||||
# between expected and observed value of the statistic; see gh-13875
|
||||
observed = np.array([[1573, 3], [4, 0]])
|
||||
p = chi2_contingency(observed)[1]
|
||||
assert_allclose(p, 1, rtol=1e-12)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("correction", [False, True])
|
||||
def test_result(correction):
|
||||
obs = np.array([[1, 2], [1, 2]])
|
||||
res = chi2_contingency(obs, correction=correction)
|
||||
assert_equal((res.statistic, res.pvalue, res.dof, res.expected_freq), res)
|
||||
|
||||
|
||||
def test_bad_association_args():
|
||||
# Invalid Test Statistic
|
||||
assert_raises(ValueError, association, [[1, 2], [3, 4]], "X")
|
||||
# Invalid array shape
|
||||
assert_raises(ValueError, association, [[[1, 2]], [[3, 4]]], "cramer")
|
||||
# chi2_contingency exception
|
||||
assert_raises(ValueError, association, [[-1, 10], [1, 2]], 'cramer')
|
||||
# Invalid Array Item Data Type
|
||||
assert_raises(ValueError, association,
|
||||
np.array([[1, 2], ["dd", 4]], dtype=object), 'cramer')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('stat, expected',
|
||||
[('cramer', 0.09222412010290792),
|
||||
('tschuprow', 0.0775509319944633),
|
||||
('pearson', 0.12932925727138758)])
|
||||
def test_assoc(stat, expected):
|
||||
# 2d Array
|
||||
obs1 = np.array([[12, 13, 14, 15, 16],
|
||||
[17, 16, 18, 19, 11],
|
||||
[9, 15, 14, 12, 11]])
|
||||
a = association(observed=obs1, method=stat)
|
||||
assert_allclose(a, expected)
|
||||
997
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_continuous_basic.py
vendored
Normal file
997
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_continuous_basic.py
vendored
Normal file
@@ -0,0 +1,997 @@
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
from scipy.integrate import IntegrationWarning
|
||||
import itertools
|
||||
|
||||
from scipy import stats
|
||||
from .common_tests import (check_normalization, check_moment, check_mean_expect,
|
||||
check_var_expect, check_skew_expect,
|
||||
check_kurt_expect, check_entropy,
|
||||
check_private_entropy, check_entropy_vect_scale,
|
||||
check_edge_support, check_named_args,
|
||||
check_random_state_property,
|
||||
check_meth_dtype, check_ppf_dtype, check_cmplx_deriv,
|
||||
check_pickling, check_rvs_broadcast, check_freezing,
|
||||
check_deprecation_warning_gh5982_moment,
|
||||
check_deprecation_warning_gh5982_interval)
|
||||
from scipy.stats._distr_params import distcont
|
||||
from scipy.stats._distn_infrastructure import rv_continuous_frozen
|
||||
|
||||
"""
|
||||
Test all continuous distributions.
|
||||
|
||||
Parameters were chosen for those distributions that pass the
|
||||
Kolmogorov-Smirnov test. This provides safe parameters for each
|
||||
distributions so that we can perform further testing of class methods.
|
||||
|
||||
These tests currently check only/mostly for serious errors and exceptions,
|
||||
not for numerically exact results.
|
||||
"""
|
||||
|
||||
# Note that you need to add new distributions you want tested
|
||||
# to _distr_params
|
||||
|
||||
DECIMAL = 5 # specify the precision of the tests # increased from 0 to 5
|
||||
|
||||
# For skipping test_cont_basic
|
||||
distslow = ['recipinvgauss', 'vonmises', 'kappa4', 'vonmises_line',
|
||||
'gausshyper', 'norminvgauss', 'geninvgauss', 'genhyperbolic',
|
||||
'truncnorm', 'truncweibull_min']
|
||||
|
||||
# distxslow are sorted by speed (very slow to slow)
|
||||
distxslow = ['studentized_range', 'kstwo', 'ksone', 'wrapcauchy', 'genexpon']
|
||||
|
||||
# For skipping test_moments, which is already marked slow
|
||||
distxslow_test_moments = ['studentized_range', 'vonmises', 'vonmises_line',
|
||||
'ksone', 'kstwo', 'recipinvgauss', 'genexpon']
|
||||
|
||||
# skip check_fit_args (test is slow)
|
||||
skip_fit_test_mle = ['exponpow', 'exponweib', 'gausshyper', 'genexpon',
|
||||
'halfgennorm', 'gompertz', 'johnsonsb', 'johnsonsu',
|
||||
'kappa4', 'ksone', 'kstwo', 'kstwobign', 'mielke', 'ncf',
|
||||
'nct', 'powerlognorm', 'powernorm', 'recipinvgauss',
|
||||
'trapezoid', 'vonmises', 'vonmises_line', 'levy_stable',
|
||||
'rv_histogram_instance', 'studentized_range']
|
||||
|
||||
# these were really slow in `test_fit`.py.
|
||||
# note that this list is used to skip both fit_test and fit_fix tests
|
||||
slow_fit_test_mm = ['argus', 'exponpow', 'exponweib', 'gausshyper', 'genexpon',
|
||||
'genhalflogistic', 'halfgennorm', 'gompertz', 'johnsonsb',
|
||||
'kappa4', 'kstwobign', 'recipinvgauss',
|
||||
'trapezoid', 'truncexpon', 'vonmises', 'vonmises_line',
|
||||
'studentized_range']
|
||||
# pearson3 fails due to something weird
|
||||
# the first list fails due to non-finite distribution moments encountered
|
||||
# most of the rest fail due to integration warnings
|
||||
# pearson3 is overriden as not implemented due to gh-11746
|
||||
fail_fit_test_mm = (['alpha', 'betaprime', 'bradford', 'burr', 'burr12',
|
||||
'cauchy', 'crystalball', 'f', 'fisk', 'foldcauchy',
|
||||
'genextreme', 'genpareto', 'halfcauchy', 'invgamma',
|
||||
'kappa3', 'levy', 'levy_l', 'loglaplace', 'lomax',
|
||||
'mielke', 'nakagami', 'ncf', 'skewcauchy', 't',
|
||||
'tukeylambda', 'invweibull']
|
||||
+ ['genhyperbolic', 'johnsonsu', 'ksone', 'kstwo',
|
||||
'nct', 'pareto', 'powernorm', 'powerlognorm']
|
||||
+ ['pearson3'])
|
||||
skip_fit_test = {"MLE": skip_fit_test_mle,
|
||||
"MM": slow_fit_test_mm + fail_fit_test_mm}
|
||||
|
||||
# skip check_fit_args_fix (test is slow)
|
||||
skip_fit_fix_test_mle = ['burr', 'exponpow', 'exponweib', 'gausshyper',
|
||||
'genexpon', 'halfgennorm', 'gompertz', 'johnsonsb',
|
||||
'johnsonsu', 'kappa4', 'ksone', 'kstwo', 'kstwobign',
|
||||
'levy_stable', 'mielke', 'ncf', 'ncx2',
|
||||
'powerlognorm', 'powernorm', 'rdist', 'recipinvgauss',
|
||||
'trapezoid', 'vonmises', 'vonmises_line',
|
||||
'studentized_range']
|
||||
# the first list fails due to non-finite distribution moments encountered
|
||||
# most of the rest fail due to integration warnings
|
||||
# pearson3 is overriden as not implemented due to gh-11746
|
||||
fail_fit_fix_test_mm = (['alpha', 'betaprime', 'burr', 'burr12', 'cauchy',
|
||||
'crystalball', 'f', 'fisk', 'foldcauchy',
|
||||
'genextreme', 'genpareto', 'halfcauchy', 'invgamma',
|
||||
'kappa3', 'levy', 'levy_l', 'loglaplace', 'lomax',
|
||||
'mielke', 'nakagami', 'ncf', 'nct', 'skewcauchy', 't',
|
||||
'truncpareto', 'invweibull']
|
||||
+ ['genhyperbolic', 'johnsonsu', 'ksone', 'kstwo',
|
||||
'pareto', 'powernorm', 'powerlognorm']
|
||||
+ ['pearson3'])
|
||||
skip_fit_fix_test = {"MLE": skip_fit_fix_test_mle,
|
||||
"MM": slow_fit_test_mm + fail_fit_fix_test_mm}
|
||||
|
||||
# These distributions fail the complex derivative test below.
|
||||
# Here 'fail' mean produce wrong results and/or raise exceptions, depending
|
||||
# on the implementation details of corresponding special functions.
|
||||
# cf https://github.com/scipy/scipy/pull/4979 for a discussion.
|
||||
fails_cmplx = set(['argus', 'beta', 'betaprime', 'chi', 'chi2', 'cosine',
|
||||
'dgamma', 'dweibull', 'erlang', 'f', 'gamma',
|
||||
'gausshyper', 'gengamma', 'genhyperbolic',
|
||||
'geninvgauss', 'gennorm', 'genpareto',
|
||||
'halfgennorm', 'invgamma',
|
||||
'ksone', 'kstwo', 'kstwobign', 'levy_l', 'loggamma',
|
||||
'logistic', 'loguniform', 'maxwell', 'nakagami',
|
||||
'ncf', 'nct', 'ncx2', 'norminvgauss', 'pearson3', 'rdist',
|
||||
'reciprocal', 'rice', 'skewnorm', 't', 'truncweibull_min',
|
||||
'tukeylambda', 'vonmises', 'vonmises_line',
|
||||
'rv_histogram_instance', 'truncnorm', 'studentized_range'])
|
||||
|
||||
# rv_histogram instances, with uniform and non-uniform bins;
|
||||
# stored as (dist, arg) tuples for cases_test_cont_basic
|
||||
# and cases_test_moments.
|
||||
histogram_test_instances = []
|
||||
case1 = {'a': [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6,
|
||||
6, 6, 6, 7, 7, 7, 8, 8, 9], 'bins': 8} # equal width bins
|
||||
case2 = {'a': [1, 1], 'bins': [0, 1, 10]} # unequal width bins
|
||||
for case, density in itertools.product([case1, case2], [True, False]):
|
||||
_hist = np.histogram(**case, density=density)
|
||||
_rv_hist = stats.rv_histogram(_hist, density=density)
|
||||
histogram_test_instances.append((_rv_hist, tuple()))
|
||||
|
||||
|
||||
def cases_test_cont_basic():
|
||||
for distname, arg in distcont[:] + histogram_test_instances:
|
||||
if distname == 'levy_stable':
|
||||
continue
|
||||
elif distname in distslow:
|
||||
yield pytest.param(distname, arg, marks=pytest.mark.slow)
|
||||
elif distname in distxslow:
|
||||
yield pytest.param(distname, arg, marks=pytest.mark.xslow)
|
||||
else:
|
||||
yield distname, arg
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
@pytest.mark.parametrize('distname,arg', cases_test_cont_basic())
|
||||
@pytest.mark.parametrize('sn, n_fit_samples', [(500, 200)])
|
||||
def test_cont_basic(distname, arg, sn, n_fit_samples):
|
||||
# this test skips slow distributions
|
||||
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'rv_histogram_instance'
|
||||
|
||||
rng = np.random.RandomState(765456)
|
||||
rvs = distfn.rvs(size=sn, *arg, random_state=rng)
|
||||
m, v = distfn.stats(*arg)
|
||||
|
||||
if distname not in {'laplace_asymmetric'}:
|
||||
check_sample_meanvar_(m, v, rvs)
|
||||
check_cdf_ppf(distfn, arg, distname)
|
||||
check_sf_isf(distfn, arg, distname)
|
||||
check_pdf(distfn, arg, distname)
|
||||
check_pdf_logpdf(distfn, arg, distname)
|
||||
check_pdf_logpdf_at_endpoints(distfn, arg, distname)
|
||||
check_cdf_logcdf(distfn, arg, distname)
|
||||
check_sf_logsf(distfn, arg, distname)
|
||||
check_ppf_broadcast(distfn, arg, distname)
|
||||
check_deprecation_warning_gh5982_moment(distfn, arg, distname)
|
||||
check_deprecation_warning_gh5982_interval(distfn, arg, distname)
|
||||
|
||||
alpha = 0.01
|
||||
if distname == 'rv_histogram_instance':
|
||||
check_distribution_rvs(distfn.cdf, arg, alpha, rvs)
|
||||
elif distname != 'geninvgauss':
|
||||
# skip kstest for geninvgauss since cdf is too slow; see test for
|
||||
# rv generation in TestGenInvGauss in test_distributions.py
|
||||
check_distribution_rvs(distname, arg, alpha, rvs)
|
||||
|
||||
locscale_defaults = (0, 1)
|
||||
meths = [distfn.pdf, distfn.logpdf, distfn.cdf, distfn.logcdf,
|
||||
distfn.logsf]
|
||||
# make sure arguments are within support
|
||||
spec_x = {'weibull_max': -0.5, 'levy_l': -0.5,
|
||||
'pareto': 1.5, 'truncpareto': 3.2, 'tukeylambda': 0.3,
|
||||
'rv_histogram_instance': 5.0}
|
||||
x = spec_x.get(distname, 0.5)
|
||||
if distname == 'invweibull':
|
||||
arg = (1,)
|
||||
elif distname == 'ksone':
|
||||
arg = (3,)
|
||||
|
||||
check_named_args(distfn, x, arg, locscale_defaults, meths)
|
||||
check_random_state_property(distfn, arg)
|
||||
check_pickling(distfn, arg)
|
||||
check_freezing(distfn, arg)
|
||||
|
||||
# Entropy
|
||||
if distname not in ['kstwobign', 'kstwo', 'ncf']:
|
||||
check_entropy(distfn, arg, distname)
|
||||
|
||||
if distfn.numargs == 0:
|
||||
check_vecentropy(distfn, arg)
|
||||
|
||||
if (distfn.__class__._entropy != stats.rv_continuous._entropy
|
||||
and distname != 'vonmises'):
|
||||
check_private_entropy(distfn, arg, stats.rv_continuous)
|
||||
|
||||
with npt.suppress_warnings() as sup:
|
||||
sup.filter(IntegrationWarning, "The occurrence of roundoff error")
|
||||
sup.filter(IntegrationWarning, "Extremely bad integrand")
|
||||
sup.filter(RuntimeWarning, "invalid value")
|
||||
check_entropy_vect_scale(distfn, arg)
|
||||
|
||||
check_retrieving_support(distfn, arg)
|
||||
check_edge_support(distfn, arg)
|
||||
|
||||
check_meth_dtype(distfn, arg, meths)
|
||||
check_ppf_dtype(distfn, arg)
|
||||
|
||||
if distname not in fails_cmplx:
|
||||
check_cmplx_deriv(distfn, arg)
|
||||
|
||||
if distname != 'truncnorm':
|
||||
check_ppf_private(distfn, arg, distname)
|
||||
|
||||
for method in ["MLE", "MM"]:
|
||||
if distname not in skip_fit_test[method]:
|
||||
check_fit_args(distfn, arg, rvs[:n_fit_samples], method)
|
||||
|
||||
if distname not in skip_fit_fix_test[method]:
|
||||
check_fit_args_fix(distfn, arg, rvs[:n_fit_samples], method)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname,arg', cases_test_cont_basic())
|
||||
def test_rvs_scalar(distname, arg):
|
||||
# rvs should return a scalar when given scalar arguments (gh-12428)
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'rv_histogram_instance'
|
||||
|
||||
assert np.isscalar(distfn.rvs(*arg))
|
||||
assert np.isscalar(distfn.rvs(*arg, size=()))
|
||||
assert np.isscalar(distfn.rvs(*arg, size=None))
|
||||
|
||||
|
||||
def test_levy_stable_random_state_property():
|
||||
# levy_stable only implements rvs(), so it is skipped in the
|
||||
# main loop in test_cont_basic(). Here we apply just the test
|
||||
# check_random_state_property to levy_stable.
|
||||
check_random_state_property(stats.levy_stable, (0.5, 0.1))
|
||||
|
||||
|
||||
def cases_test_moments():
|
||||
fail_normalization = set()
|
||||
fail_higher = set(['ncf'])
|
||||
|
||||
for distname, arg in distcont[:] + histogram_test_instances:
|
||||
if distname == 'levy_stable':
|
||||
continue
|
||||
|
||||
if distname in distxslow_test_moments:
|
||||
yield pytest.param(distname, arg, True, True, True,
|
||||
marks=pytest.mark.xslow(reason="too slow"))
|
||||
continue
|
||||
|
||||
cond1 = distname not in fail_normalization
|
||||
cond2 = distname not in fail_higher
|
||||
|
||||
marks = list()
|
||||
# Currently unused, `marks` can be used to add a timeout to a test of
|
||||
# a specific distribution. For example, this shows how a timeout could
|
||||
# be added for the 'skewnorm' distribution:
|
||||
#
|
||||
# marks = list()
|
||||
# if distname == 'skewnorm':
|
||||
# marks.append(pytest.mark.timeout(300))
|
||||
|
||||
yield pytest.param(distname, arg, cond1, cond2, False, marks=marks)
|
||||
|
||||
if not cond1 or not cond2:
|
||||
# Run the distributions that have issues twice, once skipping the
|
||||
# not_ok parts, once with the not_ok parts but marked as knownfail
|
||||
yield pytest.param(distname, arg, True, True, True,
|
||||
marks=[pytest.mark.xfail] + marks)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize('distname,arg,normalization_ok,higher_ok,'
|
||||
'is_xfailing',
|
||||
cases_test_moments())
|
||||
def test_moments(distname, arg, normalization_ok, higher_ok, is_xfailing):
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'rv_histogram_instance'
|
||||
|
||||
with npt.suppress_warnings() as sup:
|
||||
sup.filter(IntegrationWarning,
|
||||
"The integral is probably divergent, or slowly convergent.")
|
||||
sup.filter(IntegrationWarning,
|
||||
"The maximum number of subdivisions.")
|
||||
|
||||
if is_xfailing:
|
||||
sup.filter(IntegrationWarning)
|
||||
|
||||
m, v, s, k = distfn.stats(*arg, moments='mvsk')
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
if normalization_ok:
|
||||
check_normalization(distfn, arg, distname)
|
||||
|
||||
if higher_ok:
|
||||
check_mean_expect(distfn, arg, m, distname)
|
||||
check_skew_expect(distfn, arg, m, v, s, distname)
|
||||
check_var_expect(distfn, arg, m, v, distname)
|
||||
check_kurt_expect(distfn, arg, m, v, k, distname)
|
||||
|
||||
check_moment(distfn, arg, m, v, distname)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist,shape_args', distcont)
|
||||
def test_rvs_broadcast(dist, shape_args):
|
||||
if dist in ['gausshyper', 'genexpon', 'studentized_range']:
|
||||
pytest.skip("too slow")
|
||||
|
||||
# If shape_only is True, it means the _rvs method of the
|
||||
# distribution uses more than one random number to generate a random
|
||||
# variate. That means the result of using rvs with broadcasting or
|
||||
# with a nontrivial size will not necessarily be the same as using the
|
||||
# numpy.vectorize'd version of rvs(), so we can only compare the shapes
|
||||
# of the results, not the values.
|
||||
# Whether or not a distribution is in the following list is an
|
||||
# implementation detail of the distribution, not a requirement. If
|
||||
# the implementation the rvs() method of a distribution changes, this
|
||||
# test might also have to be changed.
|
||||
shape_only = dist in ['argus', 'betaprime', 'dgamma', 'dweibull',
|
||||
'exponnorm', 'genhyperbolic', 'geninvgauss',
|
||||
'levy_stable', 'nct', 'norminvgauss', 'rice',
|
||||
'skewnorm', 'semicircular', 'gennorm', 'loggamma']
|
||||
|
||||
distfunc = getattr(stats, dist)
|
||||
loc = np.zeros(2)
|
||||
scale = np.ones((3, 1))
|
||||
nargs = distfunc.numargs
|
||||
allargs = []
|
||||
bshape = [3, 2]
|
||||
# Generate shape parameter arguments...
|
||||
for k in range(nargs):
|
||||
shp = (k + 4,) + (1,)*(k + 2)
|
||||
allargs.append(shape_args[k]*np.ones(shp))
|
||||
bshape.insert(0, k + 4)
|
||||
allargs.extend([loc, scale])
|
||||
# bshape holds the expected shape when loc, scale, and the shape
|
||||
# parameters are all broadcast together.
|
||||
|
||||
check_rvs_broadcast(distfunc, dist, allargs, bshape, shape_only, 'd')
|
||||
|
||||
|
||||
# Expected values of the SF, CDF, PDF were computed using
|
||||
# mpmath with mpmath.mp.dps = 50 and output at 20:
|
||||
#
|
||||
# def ks(x, n):
|
||||
# x = mpmath.mpf(x)
|
||||
# logp = -mpmath.power(6.0*n*x+1.0, 2)/18.0/n
|
||||
# sf, cdf = mpmath.exp(logp), -mpmath.expm1(logp)
|
||||
# pdf = (6.0*n*x+1.0) * 2 * sf/3
|
||||
# print(mpmath.nstr(sf, 20), mpmath.nstr(cdf, 20), mpmath.nstr(pdf, 20))
|
||||
#
|
||||
# Tests use 1/n < x < 1-1/n and n > 1e6 to use the asymptotic computation.
|
||||
# Larger x has a smaller sf.
|
||||
@pytest.mark.parametrize('x,n,sf,cdf,pdf,rtol',
|
||||
[(2.0e-5, 1000000000,
|
||||
0.44932297307934442379, 0.55067702692065557621,
|
||||
35946.137394996276407, 5e-15),
|
||||
(2.0e-9, 1000000000,
|
||||
0.99999999061111115519, 9.3888888448132728224e-9,
|
||||
8.6666665852962971765, 5e-14),
|
||||
(5.0e-4, 1000000000,
|
||||
7.1222019433090374624e-218, 1.0,
|
||||
1.4244408634752704094e-211, 5e-14)])
|
||||
def test_gh17775_regression(x, n, sf, cdf, pdf, rtol):
|
||||
# Regression test for gh-17775. In scipy 1.9.3 and earlier,
|
||||
# these test would fail.
|
||||
#
|
||||
# KS one asymptotic sf ~ e^(-(6nx+1)^2 / 18n)
|
||||
# Given a large 32-bit integer n, 6n will overflow in the c implementation.
|
||||
# Example of broken behaviour:
|
||||
# ksone.sf(2.0e-5, 1000000000) == 0.9374359693473666
|
||||
ks = stats.ksone
|
||||
vals = np.array([ks.sf(x, n), ks.cdf(x, n), ks.pdf(x, n)])
|
||||
expected = np.array([sf, cdf, pdf])
|
||||
npt.assert_allclose(vals, expected, rtol=rtol)
|
||||
# The sf+cdf must sum to 1.0.
|
||||
npt.assert_equal(vals[0] + vals[1], 1.0)
|
||||
# Check inverting the (potentially very small) sf (uses a lower tolerance)
|
||||
npt.assert_allclose([ks.isf(sf, n)], [x], rtol=1e-8)
|
||||
|
||||
|
||||
def test_rvs_gh2069_regression():
|
||||
# Regression tests for gh-2069. In scipy 0.17 and earlier,
|
||||
# these tests would fail.
|
||||
#
|
||||
# A typical example of the broken behavior:
|
||||
# >>> norm.rvs(loc=np.zeros(5), scale=np.ones(5))
|
||||
# array([-2.49613705, -2.49613705, -2.49613705, -2.49613705, -2.49613705])
|
||||
rng = np.random.RandomState(123)
|
||||
vals = stats.norm.rvs(loc=np.zeros(5), scale=1, random_state=rng)
|
||||
d = np.diff(vals)
|
||||
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
|
||||
vals = stats.norm.rvs(loc=0, scale=np.ones(5), random_state=rng)
|
||||
d = np.diff(vals)
|
||||
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
|
||||
vals = stats.norm.rvs(loc=np.zeros(5), scale=np.ones(5), random_state=rng)
|
||||
d = np.diff(vals)
|
||||
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
|
||||
vals = stats.norm.rvs(loc=np.array([[0], [0]]), scale=np.ones(5),
|
||||
random_state=rng)
|
||||
d = np.diff(vals.ravel())
|
||||
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
|
||||
|
||||
assert_raises(ValueError, stats.norm.rvs, [[0, 0], [0, 0]],
|
||||
[[1, 1], [1, 1]], 1)
|
||||
assert_raises(ValueError, stats.gamma.rvs, [2, 3, 4, 5], 0, 1, (2, 2))
|
||||
assert_raises(ValueError, stats.gamma.rvs, [1, 1, 1, 1], [0, 0, 0, 0],
|
||||
[[1], [2]], (4,))
|
||||
|
||||
|
||||
def test_nomodify_gh9900_regression():
|
||||
# Regression test for gh-9990
|
||||
# Prior to gh-9990, calls to stats.truncnorm._cdf() use what ever was
|
||||
# set inside the stats.truncnorm instance during stats.truncnorm.cdf().
|
||||
# This could cause issues wth multi-threaded code.
|
||||
# Since then, the calls to cdf() are not permitted to modify the global
|
||||
# stats.truncnorm instance.
|
||||
tn = stats.truncnorm
|
||||
# Use the right-half truncated normal
|
||||
# Check that the cdf and _cdf return the same result.
|
||||
npt.assert_almost_equal(tn.cdf(1, 0, np.inf), 0.6826894921370859)
|
||||
npt.assert_almost_equal(tn._cdf([1], [0], [np.inf]), 0.6826894921370859)
|
||||
|
||||
# Now use the left-half truncated normal
|
||||
npt.assert_almost_equal(tn.cdf(-1, -np.inf, 0), 0.31731050786291415)
|
||||
npt.assert_almost_equal(tn._cdf([-1], [-np.inf], [0]), 0.31731050786291415)
|
||||
|
||||
# Check that the right-half truncated normal _cdf hasn't changed
|
||||
npt.assert_almost_equal(tn._cdf([1], [0], [np.inf]), 0.6826894921370859) # noqa, NOT 1.6826894921370859
|
||||
npt.assert_almost_equal(tn.cdf(1, 0, np.inf), 0.6826894921370859)
|
||||
|
||||
# Check that the left-half truncated normal _cdf hasn't changed
|
||||
npt.assert_almost_equal(tn._cdf([-1], [-np.inf], [0]), 0.31731050786291415) # noqa, Not -0.6826894921370859
|
||||
npt.assert_almost_equal(tn.cdf(1, -np.inf, 0), 1) # Not 1.6826894921370859
|
||||
npt.assert_almost_equal(tn.cdf(-1, -np.inf, 0), 0.31731050786291415) # Not -0.6826894921370859
|
||||
|
||||
|
||||
def test_broadcast_gh9990_regression():
|
||||
# Regression test for gh-9990
|
||||
# The x-value 7 only lies within the support of 4 of the supplied
|
||||
# distributions. Prior to 9990, one array passed to
|
||||
# stats.reciprocal._cdf would have 4 elements, but an array
|
||||
# previously stored by stats.reciprocal_argcheck() would have 6, leading
|
||||
# to a broadcast error.
|
||||
a = np.array([1, 2, 3, 4, 5, 6])
|
||||
b = np.array([8, 16, 1, 32, 1, 48])
|
||||
ans = [stats.reciprocal.cdf(7, _a, _b) for _a, _b in zip(a,b)]
|
||||
npt.assert_array_almost_equal(stats.reciprocal.cdf(7, a, b), ans)
|
||||
|
||||
ans = [stats.reciprocal.cdf(1, _a, _b) for _a, _b in zip(a,b)]
|
||||
npt.assert_array_almost_equal(stats.reciprocal.cdf(1, a, b), ans)
|
||||
|
||||
ans = [stats.reciprocal.cdf(_a, _a, _b) for _a, _b in zip(a,b)]
|
||||
npt.assert_array_almost_equal(stats.reciprocal.cdf(a, a, b), ans)
|
||||
|
||||
ans = [stats.reciprocal.cdf(_b, _a, _b) for _a, _b in zip(a,b)]
|
||||
npt.assert_array_almost_equal(stats.reciprocal.cdf(b, a, b), ans)
|
||||
|
||||
|
||||
def test_broadcast_gh7933_regression():
|
||||
# Check broadcast works
|
||||
stats.truncnorm.logpdf(
|
||||
np.array([3.0, 2.0, 1.0]),
|
||||
a=(1.5 - np.array([6.0, 5.0, 4.0])) / 3.0,
|
||||
b=np.inf,
|
||||
loc=np.array([6.0, 5.0, 4.0]),
|
||||
scale=3.0
|
||||
)
|
||||
|
||||
|
||||
def test_gh2002_regression():
|
||||
# Add a check that broadcast works in situations where only some
|
||||
# x-values are compatible with some of the shape arguments.
|
||||
x = np.r_[-2:2:101j]
|
||||
a = np.r_[-np.ones(50), np.ones(51)]
|
||||
expected = [stats.truncnorm.pdf(_x, _a, np.inf) for _x, _a in zip(x, a)]
|
||||
ans = stats.truncnorm.pdf(x, a, np.inf)
|
||||
npt.assert_array_almost_equal(ans, expected)
|
||||
|
||||
|
||||
def test_gh1320_regression():
|
||||
# Check that the first example from gh-1320 now works.
|
||||
c = 2.62
|
||||
stats.genextreme.ppf(0.5, np.array([[c], [c + 0.5]]))
|
||||
# The other examples in gh-1320 appear to have stopped working
|
||||
# some time ago.
|
||||
# ans = stats.genextreme.moment(2, np.array([c, c + 0.5]))
|
||||
# expected = np.array([25.50105963, 115.11191437])
|
||||
# stats.genextreme.moment(5, np.array([[c], [c + 0.5]]))
|
||||
# stats.genextreme.moment(5, np.array([c, c + 0.5]))
|
||||
|
||||
|
||||
def test_method_of_moments():
|
||||
# example from https://en.wikipedia.org/wiki/Method_of_moments_(statistics)
|
||||
np.random.seed(1234)
|
||||
x = [0, 0, 0, 0, 1]
|
||||
a = 1/5 - 2*np.sqrt(3)/5
|
||||
b = 1/5 + 2*np.sqrt(3)/5
|
||||
# force use of method of moments (uniform.fit is overriden)
|
||||
loc, scale = super(type(stats.uniform), stats.uniform).fit(x, method="MM")
|
||||
npt.assert_almost_equal(loc, a, decimal=4)
|
||||
npt.assert_almost_equal(loc+scale, b, decimal=4)
|
||||
|
||||
|
||||
def check_sample_meanvar_(popmean, popvar, sample):
|
||||
if np.isfinite(popmean):
|
||||
check_sample_mean(sample, popmean)
|
||||
if np.isfinite(popvar):
|
||||
check_sample_var(sample, popvar)
|
||||
|
||||
|
||||
def check_sample_mean(sample, popmean):
|
||||
# Checks for unlikely difference between sample mean and population mean
|
||||
prob = stats.ttest_1samp(sample, popmean).pvalue
|
||||
assert prob > 0.01
|
||||
|
||||
|
||||
def check_sample_var(sample, popvar):
|
||||
# check that population mean lies within the CI bootstrapped from the
|
||||
# sample. This used to be a chi-squared test for variance, but there were
|
||||
# too many false positives
|
||||
res = stats.bootstrap(
|
||||
(sample,),
|
||||
lambda x, axis: x.var(ddof=1, axis=axis),
|
||||
confidence_level=0.995,
|
||||
)
|
||||
conf = res.confidence_interval
|
||||
low, high = conf.low, conf.high
|
||||
assert low <= popvar <= high
|
||||
|
||||
|
||||
def check_cdf_ppf(distfn, arg, msg):
|
||||
values = [0.001, 0.5, 0.999]
|
||||
npt.assert_almost_equal(distfn.cdf(distfn.ppf(values, *arg), *arg),
|
||||
values, decimal=DECIMAL, err_msg=msg +
|
||||
' - cdf-ppf roundtrip')
|
||||
|
||||
|
||||
def check_sf_isf(distfn, arg, msg):
|
||||
npt.assert_almost_equal(distfn.sf(distfn.isf([0.1, 0.5, 0.9], *arg), *arg),
|
||||
[0.1, 0.5, 0.9], decimal=DECIMAL, err_msg=msg +
|
||||
' - sf-isf roundtrip')
|
||||
npt.assert_almost_equal(distfn.cdf([0.1, 0.9], *arg),
|
||||
1.0 - distfn.sf([0.1, 0.9], *arg),
|
||||
decimal=DECIMAL, err_msg=msg +
|
||||
' - cdf-sf relationship')
|
||||
|
||||
|
||||
def check_pdf(distfn, arg, msg):
|
||||
# compares pdf at median with numerical derivative of cdf
|
||||
median = distfn.ppf(0.5, *arg)
|
||||
eps = 1e-6
|
||||
pdfv = distfn.pdf(median, *arg)
|
||||
if (pdfv < 1e-4) or (pdfv > 1e4):
|
||||
# avoid checking a case where pdf is close to zero or
|
||||
# huge (singularity)
|
||||
median = median + 0.1
|
||||
pdfv = distfn.pdf(median, *arg)
|
||||
cdfdiff = (distfn.cdf(median + eps, *arg) -
|
||||
distfn.cdf(median - eps, *arg))/eps/2.0
|
||||
# replace with better diff and better test (more points),
|
||||
# actually, this works pretty well
|
||||
msg += ' - cdf-pdf relationship'
|
||||
npt.assert_almost_equal(pdfv, cdfdiff, decimal=DECIMAL, err_msg=msg)
|
||||
|
||||
|
||||
def check_pdf_logpdf(distfn, args, msg):
|
||||
# compares pdf at several points with the log of the pdf
|
||||
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
|
||||
vals = distfn.ppf(points, *args)
|
||||
vals = vals[np.isfinite(vals)]
|
||||
pdf = distfn.pdf(vals, *args)
|
||||
logpdf = distfn.logpdf(vals, *args)
|
||||
pdf = pdf[(pdf != 0) & np.isfinite(pdf)]
|
||||
logpdf = logpdf[np.isfinite(logpdf)]
|
||||
msg += " - logpdf-log(pdf) relationship"
|
||||
npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_pdf_logpdf_at_endpoints(distfn, args, msg):
|
||||
# compares pdf with the log of the pdf at the (finite) end points
|
||||
points = np.array([0, 1])
|
||||
vals = distfn.ppf(points, *args)
|
||||
vals = vals[np.isfinite(vals)]
|
||||
pdf = distfn.pdf(vals, *args)
|
||||
logpdf = distfn.logpdf(vals, *args)
|
||||
pdf = pdf[(pdf != 0) & np.isfinite(pdf)]
|
||||
logpdf = logpdf[np.isfinite(logpdf)]
|
||||
msg += " - logpdf-log(pdf) relationship"
|
||||
npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_sf_logsf(distfn, args, msg):
|
||||
# compares sf at several points with the log of the sf
|
||||
points = np.array([0.0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0])
|
||||
vals = distfn.ppf(points, *args)
|
||||
vals = vals[np.isfinite(vals)]
|
||||
sf = distfn.sf(vals, *args)
|
||||
logsf = distfn.logsf(vals, *args)
|
||||
sf = sf[sf != 0]
|
||||
logsf = logsf[np.isfinite(logsf)]
|
||||
msg += " - logsf-log(sf) relationship"
|
||||
npt.assert_almost_equal(np.log(sf), logsf, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_cdf_logcdf(distfn, args, msg):
|
||||
# compares cdf at several points with the log of the cdf
|
||||
points = np.array([0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0])
|
||||
vals = distfn.ppf(points, *args)
|
||||
vals = vals[np.isfinite(vals)]
|
||||
cdf = distfn.cdf(vals, *args)
|
||||
logcdf = distfn.logcdf(vals, *args)
|
||||
cdf = cdf[cdf != 0]
|
||||
logcdf = logcdf[np.isfinite(logcdf)]
|
||||
msg += " - logcdf-log(cdf) relationship"
|
||||
npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_ppf_broadcast(distfn, arg, msg):
|
||||
# compares ppf for multiple argsets.
|
||||
num_repeats = 5
|
||||
args = [] * num_repeats
|
||||
if arg:
|
||||
args = [np.array([_] * num_repeats) for _ in arg]
|
||||
|
||||
median = distfn.ppf(0.5, *arg)
|
||||
medians = distfn.ppf(0.5, *args)
|
||||
msg += " - ppf multiple"
|
||||
npt.assert_almost_equal(medians, [median] * num_repeats, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_distribution_rvs(dist, args, alpha, rvs):
|
||||
# dist is either a cdf function or name of a distribution in scipy.stats.
|
||||
# args are the args for scipy.stats.dist(*args)
|
||||
# alpha is a significance level, ~0.01
|
||||
# rvs is array_like of random variables
|
||||
# test from scipy.stats.tests
|
||||
# this version reuses existing random variables
|
||||
D, pval = stats.kstest(rvs, dist, args=args, N=1000)
|
||||
if (pval < alpha):
|
||||
# The rvs passed in failed the K-S test, which _could_ happen
|
||||
# but is unlikely if alpha is small enough.
|
||||
# Repeat the test with a new sample of rvs.
|
||||
# Generate 1000 rvs, perform a K-S test that the new sample of rvs
|
||||
# are distributed according to the distribution.
|
||||
D, pval = stats.kstest(dist, dist, args=args, N=1000)
|
||||
npt.assert_(pval > alpha, "D = " + str(D) + "; pval = " + str(pval) +
|
||||
"; alpha = " + str(alpha) + "\nargs = " + str(args))
|
||||
|
||||
|
||||
def check_vecentropy(distfn, args):
|
||||
npt.assert_equal(distfn.vecentropy(*args), distfn._entropy(*args))
|
||||
|
||||
|
||||
def check_loc_scale(distfn, arg, m, v, msg):
|
||||
# Make `loc` and `scale` arrays to catch bugs like gh-13580 where
|
||||
# `loc` and `scale` arrays improperly broadcast with shapes.
|
||||
loc, scale = np.array([10.0, 20.0]), np.array([10.0, 20.0])
|
||||
mt, vt = distfn.stats(loc=loc, scale=scale, *arg)
|
||||
npt.assert_allclose(m*scale + loc, mt)
|
||||
npt.assert_allclose(v*scale*scale, vt)
|
||||
|
||||
|
||||
def check_ppf_private(distfn, arg, msg):
|
||||
# fails by design for truncnorm self.nb not defined
|
||||
ppfs = distfn._ppf(np.array([0.1, 0.5, 0.9]), *arg)
|
||||
npt.assert_(not np.any(np.isnan(ppfs)), msg + 'ppf private is nan')
|
||||
|
||||
|
||||
def check_retrieving_support(distfn, args):
|
||||
loc, scale = 1, 2
|
||||
supp = distfn.support(*args)
|
||||
supp_loc_scale = distfn.support(*args, loc=loc, scale=scale)
|
||||
npt.assert_almost_equal(np.array(supp)*scale + loc,
|
||||
np.array(supp_loc_scale))
|
||||
|
||||
|
||||
def check_fit_args(distfn, arg, rvs, method):
|
||||
with np.errstate(all='ignore'), npt.suppress_warnings() as sup:
|
||||
sup.filter(category=RuntimeWarning,
|
||||
message="The shape parameter of the erlang")
|
||||
sup.filter(category=RuntimeWarning,
|
||||
message="floating point number truncated")
|
||||
vals = distfn.fit(rvs, method=method)
|
||||
vals2 = distfn.fit(rvs, optimizer='powell', method=method)
|
||||
# Only check the length of the return; accuracy tested in test_fit.py
|
||||
npt.assert_(len(vals) == 2+len(arg))
|
||||
npt.assert_(len(vals2) == 2+len(arg))
|
||||
|
||||
|
||||
def check_fit_args_fix(distfn, arg, rvs, method):
|
||||
with np.errstate(all='ignore'), npt.suppress_warnings() as sup:
|
||||
sup.filter(category=RuntimeWarning,
|
||||
message="The shape parameter of the erlang")
|
||||
|
||||
vals = distfn.fit(rvs, floc=0, method=method)
|
||||
vals2 = distfn.fit(rvs, fscale=1, method=method)
|
||||
npt.assert_(len(vals) == 2+len(arg))
|
||||
npt.assert_(vals[-2] == 0)
|
||||
npt.assert_(vals2[-1] == 1)
|
||||
npt.assert_(len(vals2) == 2+len(arg))
|
||||
if len(arg) > 0:
|
||||
vals3 = distfn.fit(rvs, f0=arg[0], method=method)
|
||||
npt.assert_(len(vals3) == 2+len(arg))
|
||||
npt.assert_(vals3[0] == arg[0])
|
||||
if len(arg) > 1:
|
||||
vals4 = distfn.fit(rvs, f1=arg[1], method=method)
|
||||
npt.assert_(len(vals4) == 2+len(arg))
|
||||
npt.assert_(vals4[1] == arg[1])
|
||||
if len(arg) > 2:
|
||||
vals5 = distfn.fit(rvs, f2=arg[2], method=method)
|
||||
npt.assert_(len(vals5) == 2+len(arg))
|
||||
npt.assert_(vals5[2] == arg[2])
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
@pytest.mark.parametrize('method', ['pdf', 'logpdf', 'cdf', 'logcdf',
|
||||
'sf', 'logsf', 'ppf', 'isf'])
|
||||
@pytest.mark.parametrize('distname, args', distcont)
|
||||
def test_methods_with_lists(method, distname, args):
|
||||
# Test that the continuous distributions can accept Python lists
|
||||
# as arguments.
|
||||
dist = getattr(stats, distname)
|
||||
f = getattr(dist, method)
|
||||
if distname == 'invweibull' and method.startswith('log'):
|
||||
x = [1.5, 2]
|
||||
else:
|
||||
x = [0.1, 0.2]
|
||||
|
||||
shape2 = [[a]*2 for a in args]
|
||||
loc = [0, 0.1]
|
||||
scale = [1, 1.01]
|
||||
result = f(x, *shape2, loc=loc, scale=scale)
|
||||
npt.assert_allclose(result,
|
||||
[f(*v) for v in zip(x, *shape2, loc, scale)],
|
||||
rtol=1e-14, atol=5e-14)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['pdf', 'logpdf', 'cdf', 'logcdf',
|
||||
'sf', 'logsf', 'ppf', 'isf'])
|
||||
def test_gilbrat_deprecation(method):
|
||||
expected = getattr(stats.gibrat, method)(1)
|
||||
with pytest.warns(
|
||||
DeprecationWarning,
|
||||
match=rf"\s*`gilbrat\.{method}` is deprecated,.*",
|
||||
):
|
||||
result = getattr(stats.gilbrat, method)(1)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['pdf', 'logpdf', 'cdf', 'logcdf',
|
||||
'sf', 'logsf', 'ppf', 'isf'])
|
||||
def test_gilbrat_deprecation_frozen(method):
|
||||
expected = getattr(stats.gibrat, method)(1)
|
||||
with pytest.warns(DeprecationWarning, match=r"\s*`gilbrat` is deprecated"):
|
||||
# warn on instantiation of frozen distribution...
|
||||
g = stats.gilbrat()
|
||||
# ... not on its methods
|
||||
result = getattr(g, method)(1)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_burr_fisk_moment_gh13234_regression():
|
||||
vals0 = stats.burr.moment(1, 5, 4)
|
||||
assert isinstance(vals0, float)
|
||||
|
||||
vals1 = stats.fisk.moment(1, 8)
|
||||
assert isinstance(vals1, float)
|
||||
|
||||
|
||||
def test_moments_with_array_gh12192_regression():
|
||||
# array loc and scalar scale
|
||||
vals0 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), scale=1)
|
||||
expected0 = np.array([1., 2., 3.])
|
||||
npt.assert_equal(vals0, expected0)
|
||||
|
||||
# array loc and invalid scalar scale
|
||||
vals1 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), scale=-1)
|
||||
expected1 = np.array([np.nan, np.nan, np.nan])
|
||||
npt.assert_equal(vals1, expected1)
|
||||
|
||||
# array loc and array scale with invalid entries
|
||||
vals2 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]),
|
||||
scale=[-3, 1, 0])
|
||||
expected2 = np.array([np.nan, 2., np.nan])
|
||||
npt.assert_equal(vals2, expected2)
|
||||
|
||||
# (loc == 0) & (scale < 0)
|
||||
vals3 = stats.norm.moment(order=2, loc=0, scale=-4)
|
||||
expected3 = np.nan
|
||||
npt.assert_equal(vals3, expected3)
|
||||
assert isinstance(vals3, expected3.__class__)
|
||||
|
||||
# array loc with 0 entries and scale with invalid entries
|
||||
vals4 = stats.norm.moment(order=2, loc=[1, 0, 2], scale=[3, -4, -5])
|
||||
expected4 = np.array([10., np.nan, np.nan])
|
||||
npt.assert_equal(vals4, expected4)
|
||||
|
||||
# all(loc == 0) & (array scale with invalid entries)
|
||||
vals5 = stats.norm.moment(order=2, loc=[0, 0, 0], scale=[5., -2, 100.])
|
||||
expected5 = np.array([25., np.nan, 10000.])
|
||||
npt.assert_equal(vals5, expected5)
|
||||
|
||||
# all( (loc == 0) & (scale < 0) )
|
||||
vals6 = stats.norm.moment(order=2, loc=[0, 0, 0], scale=[-5., -2, -100.])
|
||||
expected6 = np.array([np.nan, np.nan, np.nan])
|
||||
npt.assert_equal(vals6, expected6)
|
||||
|
||||
# scalar args, loc, and scale
|
||||
vals7 = stats.chi.moment(order=2, df=1, loc=0, scale=0)
|
||||
expected7 = np.nan
|
||||
npt.assert_equal(vals7, expected7)
|
||||
assert isinstance(vals7, expected7.__class__)
|
||||
|
||||
# array args, scalar loc, and scalar scale
|
||||
vals8 = stats.chi.moment(order=2, df=[1, 2, 3], loc=0, scale=0)
|
||||
expected8 = np.array([np.nan, np.nan, np.nan])
|
||||
npt.assert_equal(vals8, expected8)
|
||||
|
||||
# array args, array loc, and array scale
|
||||
vals9 = stats.chi.moment(order=2, df=[1, 2, 3], loc=[1., 0., 2.],
|
||||
scale=[1., -3., 0.])
|
||||
expected9 = np.array([3.59576912, np.nan, np.nan])
|
||||
npt.assert_allclose(vals9, expected9, rtol=1e-8)
|
||||
|
||||
# (n > 4), all(loc != 0), and all(scale != 0)
|
||||
vals10 = stats.norm.moment(5, [1., 2.], [1., 2.])
|
||||
expected10 = np.array([26., 832.])
|
||||
npt.assert_allclose(vals10, expected10, rtol=1e-13)
|
||||
|
||||
# test broadcasting and more
|
||||
a = [-1.1, 0, 1, 2.2, np.pi]
|
||||
b = [-1.1, 0, 1, 2.2, np.pi]
|
||||
loc = [-1.1, 0, np.sqrt(2)]
|
||||
scale = [-2.1, 0, 1, 2.2, np.pi]
|
||||
|
||||
a = np.array(a).reshape((-1, 1, 1, 1))
|
||||
b = np.array(b).reshape((-1, 1, 1))
|
||||
loc = np.array(loc).reshape((-1, 1))
|
||||
scale = np.array(scale)
|
||||
|
||||
vals11 = stats.beta.moment(order=2, a=a, b=b, loc=loc, scale=scale)
|
||||
|
||||
a, b, loc, scale = np.broadcast_arrays(a, b, loc, scale)
|
||||
|
||||
for i in np.ndenumerate(a):
|
||||
with np.errstate(invalid='ignore', divide='ignore'):
|
||||
i = i[0] # just get the index
|
||||
# check against same function with scalar input
|
||||
expected = stats.beta.moment(order=2, a=a[i], b=b[i],
|
||||
loc=loc[i], scale=scale[i])
|
||||
np.testing.assert_equal(vals11[i], expected)
|
||||
|
||||
|
||||
def test_broadcasting_in_moments_gh12192_regression():
|
||||
vals0 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), scale=[[1]])
|
||||
expected0 = np.array([[1., 2., 3.]])
|
||||
npt.assert_equal(vals0, expected0)
|
||||
assert vals0.shape == expected0.shape
|
||||
|
||||
vals1 = stats.norm.moment(order=1, loc=np.array([[1], [2], [3]]),
|
||||
scale=[1, 2, 3])
|
||||
expected1 = np.array([[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]])
|
||||
npt.assert_equal(vals1, expected1)
|
||||
assert vals1.shape == expected1.shape
|
||||
|
||||
vals2 = stats.chi.moment(order=1, df=[1., 2., 3.], loc=0., scale=1.)
|
||||
expected2 = np.array([0.79788456, 1.25331414, 1.59576912])
|
||||
npt.assert_allclose(vals2, expected2, rtol=1e-8)
|
||||
assert vals2.shape == expected2.shape
|
||||
|
||||
vals3 = stats.chi.moment(order=1, df=[[1.], [2.], [3.]], loc=[0., 1., 2.],
|
||||
scale=[-1., 0., 3.])
|
||||
expected3 = np.array([[np.nan, np.nan, 4.39365368],
|
||||
[np.nan, np.nan, 5.75994241],
|
||||
[np.nan, np.nan, 6.78730736]])
|
||||
npt.assert_allclose(vals3, expected3, rtol=1e-8)
|
||||
assert vals3.shape == expected3.shape
|
||||
|
||||
|
||||
def test_kappa3_array_gh13582():
|
||||
# https://github.com/scipy/scipy/pull/15140#issuecomment-994958241
|
||||
shapes = [0.5, 1.5, 2.5, 3.5, 4.5]
|
||||
moments = 'mvsk'
|
||||
res = np.array([[stats.kappa3.stats(shape, moments=moment)
|
||||
for shape in shapes] for moment in moments])
|
||||
res2 = np.array(stats.kappa3.stats(shapes, moments=moments))
|
||||
npt.assert_allclose(res, res2)
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_kappa4_array_gh13582():
|
||||
h = np.array([-0.5, 2.5, 3.5, 4.5, -3])
|
||||
k = np.array([-0.5, 1, -1.5, 0, 3.5])
|
||||
moments = 'mvsk'
|
||||
res = np.array([[stats.kappa4.stats(h[i], k[i], moments=moment)
|
||||
for i in range(5)] for moment in moments])
|
||||
res2 = np.array(stats.kappa4.stats(h, k, moments=moments))
|
||||
npt.assert_allclose(res, res2)
|
||||
|
||||
# https://github.com/scipy/scipy/pull/15250#discussion_r775112913
|
||||
h = np.array([-1, -1/4, -1/4, 1, -1, 0])
|
||||
k = np.array([1, 1, 1/2, -1/3, -1, 0])
|
||||
res = np.array([[stats.kappa4.stats(h[i], k[i], moments=moment)
|
||||
for i in range(6)] for moment in moments])
|
||||
res2 = np.array(stats.kappa4.stats(h, k, moments=moments))
|
||||
npt.assert_allclose(res, res2)
|
||||
|
||||
# https://github.com/scipy/scipy/pull/15250#discussion_r775115021
|
||||
h = np.array([-1, -0.5, 1])
|
||||
k = np.array([-1, -0.5, 0, 1])[:, None]
|
||||
res2 = np.array(stats.kappa4.stats(h, k, moments=moments))
|
||||
assert res2.shape == (4, 4, 3)
|
||||
|
||||
|
||||
def test_frozen_attributes():
|
||||
# gh-14827 reported that all frozen distributions had both pmf and pdf
|
||||
# attributes; continuous should have pdf and discrete should have pmf.
|
||||
message = "'rv_continuous_frozen' object has no attribute"
|
||||
with pytest.raises(AttributeError, match=message):
|
||||
stats.norm().pmf
|
||||
with pytest.raises(AttributeError, match=message):
|
||||
stats.norm().logpmf
|
||||
stats.norm.pmf = "herring"
|
||||
frozen_norm = stats.norm()
|
||||
assert isinstance(frozen_norm, rv_continuous_frozen)
|
||||
delattr(stats.norm, 'pmf')
|
||||
|
||||
|
||||
def test_skewnorm_pdf_gh16038():
|
||||
rng = np.random.default_rng(0)
|
||||
x, a = -np.inf, 0
|
||||
npt.assert_equal(stats.skewnorm.pdf(x, a), stats.norm.pdf(x))
|
||||
x, a = rng.random(size=(3, 3)), rng.random(size=(3, 3))
|
||||
mask = rng.random(size=(3, 3)) < 0.5
|
||||
a[mask] = 0
|
||||
x_norm = x[mask]
|
||||
res = stats.skewnorm.pdf(x, a)
|
||||
npt.assert_equal(res[mask], stats.norm.pdf(x_norm))
|
||||
npt.assert_equal(res[~mask], stats.skewnorm.pdf(x[~mask], a[~mask]))
|
||||
|
||||
|
||||
# for scalar input, these functions should return scalar output
|
||||
scalar_out = [['rvs', []], ['pdf', [0]], ['logpdf', [0]], ['cdf', [0]],
|
||||
['logcdf', [0]], ['sf', [0]], ['logsf', [0]], ['ppf', [0]],
|
||||
['isf', [0]], ['moment', [1]], ['entropy', []], ['expect', []],
|
||||
['median', []], ['mean', []], ['std', []], ['var', []]]
|
||||
scalars_out = [['interval', [0.95]], ['support', []], ['stats', ['mv']]]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('case', scalar_out + scalars_out)
|
||||
def test_scalar_for_scalar(case):
|
||||
# Some rv_continuous functions returned 0d array instead of NumPy scalar
|
||||
# Guard against regression
|
||||
method_name, args = case
|
||||
method = getattr(stats.norm(), method_name)
|
||||
res = method(*args)
|
||||
if case in scalar_out:
|
||||
assert isinstance(res, np.number)
|
||||
else:
|
||||
assert isinstance(res[0], np.number)
|
||||
assert isinstance(res[1], np.number)
|
||||
|
||||
|
||||
def test_scalar_for_scalar2():
|
||||
# test methods that are not attributes of frozen distributions
|
||||
res = stats.norm.fit([1, 2, 3])
|
||||
assert isinstance(res[0], np.number)
|
||||
assert isinstance(res[1], np.number)
|
||||
res = stats.norm.fit_loc_scale([1, 2, 3])
|
||||
assert isinstance(res[0], np.number)
|
||||
assert isinstance(res[1], np.number)
|
||||
res = stats.norm.nnlf((0, 1), [1, 2, 3])
|
||||
assert isinstance(res, np.number)
|
||||
115
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_crosstab.py
vendored
Normal file
115
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_crosstab.py
vendored
Normal file
@@ -0,0 +1,115 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_array_equal, assert_equal
|
||||
from scipy.stats.contingency import crosstab
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', [False, True])
|
||||
def test_crosstab_basic(sparse):
|
||||
a = [0, 0, 9, 9, 0, 0, 9]
|
||||
b = [2, 1, 3, 1, 2, 3, 3]
|
||||
expected_avals = [0, 9]
|
||||
expected_bvals = [1, 2, 3]
|
||||
expected_count = np.array([[1, 2, 1],
|
||||
[1, 0, 2]])
|
||||
(avals, bvals), count = crosstab(a, b, sparse=sparse)
|
||||
assert_array_equal(avals, expected_avals)
|
||||
assert_array_equal(bvals, expected_bvals)
|
||||
if sparse:
|
||||
assert_array_equal(count.A, expected_count)
|
||||
else:
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
def test_crosstab_basic_1d():
|
||||
# Verify that a single input sequence works as expected.
|
||||
x = [1, 2, 3, 1, 2, 3, 3]
|
||||
expected_xvals = [1, 2, 3]
|
||||
expected_count = np.array([2, 2, 3])
|
||||
(xvals,), count = crosstab(x)
|
||||
assert_array_equal(xvals, expected_xvals)
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
def test_crosstab_basic_3d():
|
||||
# Verify the function for three input sequences.
|
||||
a = 'a'
|
||||
b = 'b'
|
||||
x = [0, 0, 9, 9, 0, 0, 9, 9]
|
||||
y = [a, a, a, a, b, b, b, a]
|
||||
z = [1, 2, 3, 1, 2, 3, 3, 1]
|
||||
expected_xvals = [0, 9]
|
||||
expected_yvals = [a, b]
|
||||
expected_zvals = [1, 2, 3]
|
||||
expected_count = np.array([[[1, 1, 0],
|
||||
[0, 1, 1]],
|
||||
[[2, 0, 1],
|
||||
[0, 0, 1]]])
|
||||
(xvals, yvals, zvals), count = crosstab(x, y, z)
|
||||
assert_array_equal(xvals, expected_xvals)
|
||||
assert_array_equal(yvals, expected_yvals)
|
||||
assert_array_equal(zvals, expected_zvals)
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', [False, True])
|
||||
def test_crosstab_levels(sparse):
|
||||
a = [0, 0, 9, 9, 0, 0, 9]
|
||||
b = [1, 2, 3, 1, 2, 3, 3]
|
||||
expected_avals = [0, 9]
|
||||
expected_bvals = [0, 1, 2, 3]
|
||||
expected_count = np.array([[0, 1, 2, 1],
|
||||
[0, 1, 0, 2]])
|
||||
(avals, bvals), count = crosstab(a, b, levels=[None, [0, 1, 2, 3]],
|
||||
sparse=sparse)
|
||||
assert_array_equal(avals, expected_avals)
|
||||
assert_array_equal(bvals, expected_bvals)
|
||||
if sparse:
|
||||
assert_array_equal(count.A, expected_count)
|
||||
else:
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', [False, True])
|
||||
def test_crosstab_extra_levels(sparse):
|
||||
# The pair of values (-1, 3) will be ignored, because we explicitly
|
||||
# request the counted `a` values to be [0, 9].
|
||||
a = [0, 0, 9, 9, 0, 0, 9, -1]
|
||||
b = [1, 2, 3, 1, 2, 3, 3, 3]
|
||||
expected_avals = [0, 9]
|
||||
expected_bvals = [0, 1, 2, 3]
|
||||
expected_count = np.array([[0, 1, 2, 1],
|
||||
[0, 1, 0, 2]])
|
||||
(avals, bvals), count = crosstab(a, b, levels=[[0, 9], [0, 1, 2, 3]],
|
||||
sparse=sparse)
|
||||
assert_array_equal(avals, expected_avals)
|
||||
assert_array_equal(bvals, expected_bvals)
|
||||
if sparse:
|
||||
assert_array_equal(count.A, expected_count)
|
||||
else:
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
def test_validation_at_least_one():
|
||||
with pytest.raises(TypeError, match='At least one'):
|
||||
crosstab()
|
||||
|
||||
|
||||
def test_validation_same_lengths():
|
||||
with pytest.raises(ValueError, match='must have the same length'):
|
||||
crosstab([1, 2], [1, 2, 3, 4])
|
||||
|
||||
|
||||
def test_validation_sparse_only_two_args():
|
||||
with pytest.raises(ValueError, match='only two input sequences'):
|
||||
crosstab([0, 1, 1], [8, 8, 9], [1, 3, 3], sparse=True)
|
||||
|
||||
|
||||
def test_validation_len_levels_matches_args():
|
||||
with pytest.raises(ValueError, match='number of input sequences'):
|
||||
crosstab([0, 1, 1], [8, 8, 9], levels=([0, 1, 2, 3],))
|
||||
|
||||
|
||||
def test_result():
|
||||
res = crosstab([0, 1], [1, 2])
|
||||
assert_equal((res.elements, res.count), res)
|
||||
545
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_discrete_basic.py
vendored
Normal file
545
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_discrete_basic.py
vendored
Normal file
@@ -0,0 +1,545 @@
|
||||
import numpy.testing as npt
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from scipy import stats
|
||||
from .common_tests import (check_normalization, check_moment, check_mean_expect,
|
||||
check_var_expect, check_skew_expect,
|
||||
check_kurt_expect, check_entropy,
|
||||
check_private_entropy, check_edge_support,
|
||||
check_named_args, check_random_state_property,
|
||||
check_pickling, check_rvs_broadcast, check_freezing,
|
||||
check_deprecation_warning_gh5982_moment,
|
||||
check_deprecation_warning_gh5982_interval)
|
||||
from scipy.stats._distr_params import distdiscrete, invdistdiscrete
|
||||
from scipy.stats._distn_infrastructure import rv_discrete_frozen
|
||||
|
||||
vals = ([1, 2, 3, 4], [0.1, 0.2, 0.3, 0.4])
|
||||
distdiscrete += [[stats.rv_discrete(values=vals), ()]]
|
||||
|
||||
# For these distributions, test_discrete_basic only runs with test mode full
|
||||
distslow = {'zipfian', 'nhypergeom'}
|
||||
|
||||
|
||||
def cases_test_discrete_basic():
|
||||
seen = set()
|
||||
for distname, arg in distdiscrete:
|
||||
if distname in distslow:
|
||||
yield pytest.param(distname, arg, distname, marks=pytest.mark.slow)
|
||||
else:
|
||||
yield distname, arg, distname not in seen
|
||||
seen.add(distname)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
@pytest.mark.parametrize('distname,arg,first_case', cases_test_discrete_basic())
|
||||
def test_discrete_basic(distname, arg, first_case):
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'sample distribution'
|
||||
np.random.seed(9765456)
|
||||
rvs = distfn.rvs(size=2000, *arg)
|
||||
supp = np.unique(rvs)
|
||||
m, v = distfn.stats(*arg)
|
||||
check_cdf_ppf(distfn, arg, supp, distname + ' cdf_ppf')
|
||||
|
||||
check_pmf_cdf(distfn, arg, distname)
|
||||
check_oth(distfn, arg, supp, distname + ' oth')
|
||||
check_edge_support(distfn, arg)
|
||||
check_deprecation_warning_gh5982_moment(distfn, arg, distname)
|
||||
check_deprecation_warning_gh5982_interval(distfn, arg, distname)
|
||||
|
||||
alpha = 0.01
|
||||
check_discrete_chisquare(distfn, arg, rvs, alpha,
|
||||
distname + ' chisquare')
|
||||
|
||||
if first_case:
|
||||
locscale_defaults = (0,)
|
||||
meths = [distfn.pmf, distfn.logpmf, distfn.cdf, distfn.logcdf,
|
||||
distfn.logsf]
|
||||
# make sure arguments are within support
|
||||
# for some distributions, this needs to be overridden
|
||||
spec_k = {'randint': 11, 'hypergeom': 4, 'bernoulli': 0,
|
||||
'nchypergeom_wallenius': 6}
|
||||
k = spec_k.get(distname, 1)
|
||||
check_named_args(distfn, k, arg, locscale_defaults, meths)
|
||||
if distname != 'sample distribution':
|
||||
check_scale_docstring(distfn)
|
||||
check_random_state_property(distfn, arg)
|
||||
check_pickling(distfn, arg)
|
||||
check_freezing(distfn, arg)
|
||||
|
||||
# Entropy
|
||||
check_entropy(distfn, arg, distname)
|
||||
if distfn.__class__._entropy != stats.rv_discrete._entropy:
|
||||
check_private_entropy(distfn, arg, stats.rv_discrete)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
@pytest.mark.parametrize('distname,arg', distdiscrete)
|
||||
def test_moments(distname, arg):
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'sample distribution'
|
||||
m, v, s, k = distfn.stats(*arg, moments='mvsk')
|
||||
check_normalization(distfn, arg, distname)
|
||||
|
||||
# compare `stats` and `moment` methods
|
||||
check_moment(distfn, arg, m, v, distname)
|
||||
check_mean_expect(distfn, arg, m, distname)
|
||||
check_var_expect(distfn, arg, m, v, distname)
|
||||
check_skew_expect(distfn, arg, m, v, s, distname)
|
||||
if distname not in ['zipf', 'yulesimon']:
|
||||
check_kurt_expect(distfn, arg, m, v, k, distname)
|
||||
|
||||
# frozen distr moments
|
||||
check_moment_frozen(distfn, arg, m, 1)
|
||||
check_moment_frozen(distfn, arg, v+m*m, 2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist,shape_args', distdiscrete)
|
||||
def test_rvs_broadcast(dist, shape_args):
|
||||
# If shape_only is True, it means the _rvs method of the
|
||||
# distribution uses more than one random number to generate a random
|
||||
# variate. That means the result of using rvs with broadcasting or
|
||||
# with a nontrivial size will not necessarily be the same as using the
|
||||
# numpy.vectorize'd version of rvs(), so we can only compare the shapes
|
||||
# of the results, not the values.
|
||||
# Whether or not a distribution is in the following list is an
|
||||
# implementation detail of the distribution, not a requirement. If
|
||||
# the implementation the rvs() method of a distribution changes, this
|
||||
# test might also have to be changed.
|
||||
shape_only = dist in ['betabinom', 'skellam', 'yulesimon', 'dlaplace',
|
||||
'nchypergeom_fisher', 'nchypergeom_wallenius']
|
||||
|
||||
try:
|
||||
distfunc = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfunc = dist
|
||||
dist = 'rv_discrete(values=(%r, %r))' % (dist.xk, dist.pk)
|
||||
loc = np.zeros(2)
|
||||
nargs = distfunc.numargs
|
||||
allargs = []
|
||||
bshape = []
|
||||
# Generate shape parameter arguments...
|
||||
for k in range(nargs):
|
||||
shp = (k + 3,) + (1,)*(k + 1)
|
||||
param_val = shape_args[k]
|
||||
allargs.append(np.full(shp, param_val))
|
||||
bshape.insert(0, shp[0])
|
||||
allargs.append(loc)
|
||||
bshape.append(loc.size)
|
||||
# bshape holds the expected shape when loc, scale, and the shape
|
||||
# parameters are all broadcast together.
|
||||
check_rvs_broadcast(distfunc, dist, allargs, bshape, shape_only, [np.int_])
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist,args', distdiscrete)
|
||||
def test_ppf_with_loc(dist, args):
|
||||
try:
|
||||
distfn = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfn = dist
|
||||
#check with a negative, no and positive relocation.
|
||||
np.random.seed(1942349)
|
||||
re_locs = [np.random.randint(-10, -1), 0, np.random.randint(1, 10)]
|
||||
_a, _b = distfn.support(*args)
|
||||
for loc in re_locs:
|
||||
npt.assert_array_equal(
|
||||
[_a-1+loc, _b+loc],
|
||||
[distfn.ppf(0.0, *args, loc=loc), distfn.ppf(1.0, *args, loc=loc)]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist, args', distdiscrete)
|
||||
def test_isf_with_loc(dist, args):
|
||||
try:
|
||||
distfn = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfn = dist
|
||||
# check with a negative, no and positive relocation.
|
||||
np.random.seed(1942349)
|
||||
re_locs = [np.random.randint(-10, -1), 0, np.random.randint(1, 10)]
|
||||
_a, _b = distfn.support(*args)
|
||||
for loc in re_locs:
|
||||
expected = _b + loc, _a - 1 + loc
|
||||
res = distfn.isf(0., *args, loc=loc), distfn.isf(1., *args, loc=loc)
|
||||
npt.assert_array_equal(expected, res)
|
||||
# test broadcasting behaviour
|
||||
re_locs = [np.random.randint(-10, -1, size=(5, 3)),
|
||||
np.zeros((5, 3)),
|
||||
np.random.randint(1, 10, size=(5, 3))]
|
||||
_a, _b = distfn.support(*args)
|
||||
for loc in re_locs:
|
||||
expected = _b + loc, _a - 1 + loc
|
||||
res = distfn.isf(0., *args, loc=loc), distfn.isf(1., *args, loc=loc)
|
||||
npt.assert_array_equal(expected, res)
|
||||
|
||||
|
||||
def check_cdf_ppf(distfn, arg, supp, msg):
|
||||
# supp is assumed to be an array of integers in the support of distfn
|
||||
# (but not necessarily all the integers in the support).
|
||||
# This test assumes that the PMF of any value in the support of the
|
||||
# distribution is greater than 1e-8.
|
||||
|
||||
# cdf is a step function, and ppf(q) = min{k : cdf(k) >= q, k integer}
|
||||
cdf_supp = distfn.cdf(supp, *arg)
|
||||
# In very rare cases, the finite precision calculation of ppf(cdf(supp))
|
||||
# can produce an array in which an element is off by one. We nudge the
|
||||
# CDF values down by 10 ULPs help to avoid this.
|
||||
cdf_supp0 = cdf_supp - 10*np.spacing(cdf_supp)
|
||||
npt.assert_array_equal(distfn.ppf(cdf_supp0, *arg),
|
||||
supp, msg + '-roundtrip')
|
||||
# Repeat the same calculation, but with the CDF values decreased by 1e-8.
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp, *arg) - 1e-8, *arg),
|
||||
supp, msg + '-roundtrip')
|
||||
|
||||
if not hasattr(distfn, 'xk'):
|
||||
_a, _b = distfn.support(*arg)
|
||||
supp1 = supp[supp < _b]
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp1, *arg) + 1e-8, *arg),
|
||||
supp1 + distfn.inc, msg + ' ppf-cdf-next')
|
||||
|
||||
|
||||
def check_pmf_cdf(distfn, arg, distname):
|
||||
if hasattr(distfn, 'xk'):
|
||||
index = distfn.xk
|
||||
else:
|
||||
startind = int(distfn.ppf(0.01, *arg) - 1)
|
||||
index = list(range(startind, startind + 10))
|
||||
cdfs = distfn.cdf(index, *arg)
|
||||
pmfs_cum = distfn.pmf(index, *arg).cumsum()
|
||||
|
||||
atol, rtol = 1e-10, 1e-10
|
||||
if distname == 'skellam': # ncx2 accuracy
|
||||
atol, rtol = 1e-5, 1e-5
|
||||
npt.assert_allclose(cdfs - cdfs[0], pmfs_cum - pmfs_cum[0],
|
||||
atol=atol, rtol=rtol)
|
||||
|
||||
# also check that pmf at non-integral k is zero
|
||||
k = np.asarray(index)
|
||||
k_shifted = k[:-1] + np.diff(k)/2
|
||||
npt.assert_equal(distfn.pmf(k_shifted, *arg), 0)
|
||||
|
||||
# better check frozen distributions, and also when loc != 0
|
||||
loc = 0.5
|
||||
dist = distfn(loc=loc, *arg)
|
||||
npt.assert_allclose(dist.pmf(k[1:] + loc), np.diff(dist.cdf(k + loc)))
|
||||
npt.assert_equal(dist.pmf(k_shifted + loc), 0)
|
||||
|
||||
|
||||
def check_moment_frozen(distfn, arg, m, k):
|
||||
npt.assert_allclose(distfn(*arg).moment(k), m,
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
|
||||
def check_oth(distfn, arg, supp, msg):
|
||||
# checking other methods of distfn
|
||||
npt.assert_allclose(distfn.sf(supp, *arg), 1. - distfn.cdf(supp, *arg),
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
q = np.linspace(0.01, 0.99, 20)
|
||||
npt.assert_allclose(distfn.isf(q, *arg), distfn.ppf(1. - q, *arg),
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
median_sf = distfn.isf(0.5, *arg)
|
||||
npt.assert_(distfn.sf(median_sf - 1, *arg) > 0.5)
|
||||
npt.assert_(distfn.cdf(median_sf + 1, *arg) > 0.5)
|
||||
|
||||
|
||||
def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
|
||||
"""Perform chisquare test for random sample of a discrete distribution
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distname : string
|
||||
name of distribution function
|
||||
arg : sequence
|
||||
parameters of distribution
|
||||
alpha : float
|
||||
significance level, threshold for p-value
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : bool
|
||||
0 if test passes, 1 if test fails
|
||||
|
||||
"""
|
||||
wsupp = 0.05
|
||||
|
||||
# construct intervals with minimum mass `wsupp`.
|
||||
# intervals are left-half-open as in a cdf difference
|
||||
_a, _b = distfn.support(*arg)
|
||||
lo = int(max(_a, -1000))
|
||||
high = int(min(_b, 1000)) + 1
|
||||
distsupport = range(lo, high)
|
||||
last = 0
|
||||
distsupp = [lo]
|
||||
distmass = []
|
||||
for ii in distsupport:
|
||||
current = distfn.cdf(ii, *arg)
|
||||
if current - last >= wsupp - 1e-14:
|
||||
distsupp.append(ii)
|
||||
distmass.append(current - last)
|
||||
last = current
|
||||
if current > (1 - wsupp):
|
||||
break
|
||||
if distsupp[-1] < _b:
|
||||
distsupp.append(_b)
|
||||
distmass.append(1 - last)
|
||||
distsupp = np.array(distsupp)
|
||||
distmass = np.array(distmass)
|
||||
|
||||
# convert intervals to right-half-open as required by histogram
|
||||
histsupp = distsupp + 1e-8
|
||||
histsupp[0] = _a
|
||||
|
||||
# find sample frequencies and perform chisquare test
|
||||
freq, hsupp = np.histogram(rvs, histsupp)
|
||||
chis, pval = stats.chisquare(np.array(freq), len(rvs)*distmass)
|
||||
|
||||
npt.assert_(pval > alpha,
|
||||
'chisquare - test for %s at arg = %s with pval = %s' %
|
||||
(msg, str(arg), str(pval)))
|
||||
|
||||
|
||||
def check_scale_docstring(distfn):
|
||||
if distfn.__doc__ is not None:
|
||||
# Docstrings can be stripped if interpreter is run with -OO
|
||||
npt.assert_('scale' not in distfn.__doc__)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['pmf', 'logpmf', 'cdf', 'logcdf',
|
||||
'sf', 'logsf', 'ppf', 'isf'])
|
||||
@pytest.mark.parametrize('distname, args', distdiscrete)
|
||||
def test_methods_with_lists(method, distname, args):
|
||||
# Test that the discrete distributions can accept Python lists
|
||||
# as arguments.
|
||||
try:
|
||||
dist = getattr(stats, distname)
|
||||
except TypeError:
|
||||
return
|
||||
if method in ['ppf', 'isf']:
|
||||
z = [0.1, 0.2]
|
||||
else:
|
||||
z = [0, 1]
|
||||
p2 = [[p]*2 for p in args]
|
||||
loc = [0, 1]
|
||||
result = dist.pmf(z, *p2, loc=loc)
|
||||
npt.assert_allclose(result,
|
||||
[dist.pmf(*v) for v in zip(z, *p2, loc)],
|
||||
rtol=1e-15, atol=1e-15)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, args', invdistdiscrete)
|
||||
def test_cdf_gh13280_regression(distname, args):
|
||||
# Test for nan output when shape parameters are invalid
|
||||
dist = getattr(stats, distname)
|
||||
x = np.arange(-2, 15)
|
||||
vals = dist.cdf(x, *args)
|
||||
expected = np.nan
|
||||
npt.assert_equal(vals, expected)
|
||||
|
||||
|
||||
def cases_test_discrete_integer_shapes():
|
||||
# distributions parameters that are only allowed to be integral when
|
||||
# fitting, but are allowed to be real as input to PDF, etc.
|
||||
integrality_exceptions = {'nbinom': {'n'}}
|
||||
|
||||
seen = set()
|
||||
for distname, shapes in distdiscrete:
|
||||
if distname in seen:
|
||||
continue
|
||||
seen.add(distname)
|
||||
|
||||
try:
|
||||
dist = getattr(stats, distname)
|
||||
except TypeError:
|
||||
continue
|
||||
|
||||
shape_info = dist._shape_info()
|
||||
|
||||
for i, shape in enumerate(shape_info):
|
||||
if (shape.name in integrality_exceptions.get(distname, set()) or
|
||||
not shape.integrality):
|
||||
continue
|
||||
|
||||
yield distname, shape.name, shapes
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, shapename, shapes',
|
||||
cases_test_discrete_integer_shapes())
|
||||
def test_integer_shapes(distname, shapename, shapes):
|
||||
dist = getattr(stats, distname)
|
||||
shape_info = dist._shape_info()
|
||||
shape_names = [shape.name for shape in shape_info]
|
||||
i = shape_names.index(shapename) # this element of params must be integral
|
||||
|
||||
shapes_copy = list(shapes)
|
||||
|
||||
valid_shape = shapes[i]
|
||||
invalid_shape = valid_shape - 0.5 # arbitrary non-integral value
|
||||
new_valid_shape = valid_shape - 1
|
||||
shapes_copy[i] = [[valid_shape], [invalid_shape], [new_valid_shape]]
|
||||
|
||||
a, b = dist.support(*shapes)
|
||||
x = np.round(np.linspace(a, b, 5))
|
||||
|
||||
pmf = dist.pmf(x, *shapes_copy)
|
||||
assert not np.any(np.isnan(pmf[0, :]))
|
||||
assert np.all(np.isnan(pmf[1, :]))
|
||||
assert not np.any(np.isnan(pmf[2, :]))
|
||||
|
||||
|
||||
def test_frozen_attributes():
|
||||
# gh-14827 reported that all frozen distributions had both pmf and pdf
|
||||
# attributes; continuous should have pdf and discrete should have pmf.
|
||||
message = "'rv_discrete_frozen' object has no attribute"
|
||||
with pytest.raises(AttributeError, match=message):
|
||||
stats.binom(10, 0.5).pdf
|
||||
with pytest.raises(AttributeError, match=message):
|
||||
stats.binom(10, 0.5).logpdf
|
||||
stats.binom.pdf = "herring"
|
||||
frozen_binom = stats.binom(10, 0.5)
|
||||
assert isinstance(frozen_binom, rv_discrete_frozen)
|
||||
delattr(stats.binom, 'pdf')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, shapes', distdiscrete)
|
||||
def test_interval(distname, shapes):
|
||||
# gh-11026 reported that `interval` returns incorrect values when
|
||||
# `confidence=1`. The values were not incorrect, but it was not intuitive
|
||||
# that the left end of the interval should extend beyond the support of the
|
||||
# distribution. Confirm that this is the behavior for all distributions.
|
||||
if isinstance(distname, str):
|
||||
dist = getattr(stats, distname)
|
||||
else:
|
||||
dist = distname
|
||||
a, b = dist.support(*shapes)
|
||||
npt.assert_equal(dist.ppf([0, 1], *shapes), (a-1, b))
|
||||
npt.assert_equal(dist.isf([1, 0], *shapes), (a-1, b))
|
||||
npt.assert_equal(dist.interval(1, *shapes), (a-1, b))
|
||||
|
||||
|
||||
def test_rv_sample():
|
||||
# Thoroughly test rv_sample and check that gh-3758 is resolved
|
||||
|
||||
# Generate a random discrete distribution
|
||||
rng = np.random.default_rng(98430143469)
|
||||
xk = np.sort(rng.random(10) * 10)
|
||||
pk = rng.random(10)
|
||||
pk /= np.sum(pk)
|
||||
dist = stats.rv_discrete(values=(xk, pk))
|
||||
|
||||
# Generate points to the left and right of xk
|
||||
xk_left = (np.array([0] + xk[:-1].tolist()) + xk)/2
|
||||
xk_right = (np.array(xk[1:].tolist() + [xk[-1]+1]) + xk)/2
|
||||
|
||||
# Generate points to the left and right of cdf
|
||||
cdf2 = np.cumsum(pk)
|
||||
cdf2_left = (np.array([0] + cdf2[:-1].tolist()) + cdf2)/2
|
||||
cdf2_right = (np.array(cdf2[1:].tolist() + [1]) + cdf2)/2
|
||||
|
||||
# support - leftmost and rightmost xk
|
||||
a, b = dist.support()
|
||||
assert_allclose(a, xk[0])
|
||||
assert_allclose(b, xk[-1])
|
||||
|
||||
# pmf - supported only on the xk
|
||||
assert_allclose(dist.pmf(xk), pk)
|
||||
assert_allclose(dist.pmf(xk_right), 0)
|
||||
assert_allclose(dist.pmf(xk_left), 0)
|
||||
|
||||
# logpmf is log of the pmf; log(0) = -np.inf
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(dist.logpmf(xk), np.log(pk))
|
||||
assert_allclose(dist.logpmf(xk_right), -np.inf)
|
||||
assert_allclose(dist.logpmf(xk_left), -np.inf)
|
||||
|
||||
# cdf - the cumulative sum of the pmf
|
||||
assert_allclose(dist.cdf(xk), cdf2)
|
||||
assert_allclose(dist.cdf(xk_right), cdf2)
|
||||
assert_allclose(dist.cdf(xk_left), [0]+cdf2[:-1].tolist())
|
||||
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(dist.logcdf(xk), np.log(dist.cdf(xk)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logcdf(xk_right), np.log(dist.cdf(xk_right)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logcdf(xk_left), np.log(dist.cdf(xk_left)),
|
||||
atol=1e-15)
|
||||
|
||||
# sf is 1-cdf
|
||||
assert_allclose(dist.sf(xk), 1-dist.cdf(xk))
|
||||
assert_allclose(dist.sf(xk_right), 1-dist.cdf(xk_right))
|
||||
assert_allclose(dist.sf(xk_left), 1-dist.cdf(xk_left))
|
||||
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(dist.logsf(xk), np.log(dist.sf(xk)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logsf(xk_right), np.log(dist.sf(xk_right)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logsf(xk_left), np.log(dist.sf(xk_left)),
|
||||
atol=1e-15)
|
||||
|
||||
# ppf
|
||||
assert_allclose(dist.ppf(cdf2), xk)
|
||||
assert_allclose(dist.ppf(cdf2_left), xk)
|
||||
assert_allclose(dist.ppf(cdf2_right)[:-1], xk[1:])
|
||||
assert_allclose(dist.ppf(0), a - 1)
|
||||
assert_allclose(dist.ppf(1), b)
|
||||
|
||||
# isf
|
||||
sf2 = dist.sf(xk)
|
||||
assert_allclose(dist.isf(sf2), xk)
|
||||
assert_allclose(dist.isf(1-cdf2_left), dist.ppf(cdf2_left))
|
||||
assert_allclose(dist.isf(1-cdf2_right), dist.ppf(cdf2_right))
|
||||
assert_allclose(dist.isf(0), b)
|
||||
assert_allclose(dist.isf(1), a - 1)
|
||||
|
||||
# interval is (ppf(alpha/2), isf(alpha/2))
|
||||
ps = np.linspace(0.01, 0.99, 10)
|
||||
int2 = dist.ppf(ps/2), dist.isf(ps/2)
|
||||
assert_allclose(dist.interval(1-ps), int2)
|
||||
assert_allclose(dist.interval(0), dist.median())
|
||||
assert_allclose(dist.interval(1), (a-1, b))
|
||||
|
||||
# median is simply ppf(0.5)
|
||||
med2 = dist.ppf(0.5)
|
||||
assert_allclose(dist.median(), med2)
|
||||
|
||||
# all four stats (mean, var, skew, and kurtosis) from the definitions
|
||||
mean2 = np.sum(xk*pk)
|
||||
var2 = np.sum((xk - mean2)**2 * pk)
|
||||
skew2 = np.sum((xk - mean2)**3 * pk) / var2**(3/2)
|
||||
kurt2 = np.sum((xk - mean2)**4 * pk) / var2**2 - 3
|
||||
assert_allclose(dist.mean(), mean2)
|
||||
assert_allclose(dist.std(), np.sqrt(var2))
|
||||
assert_allclose(dist.var(), var2)
|
||||
assert_allclose(dist.stats(moments='mvsk'), (mean2, var2, skew2, kurt2))
|
||||
|
||||
# noncentral moment against definition
|
||||
mom3 = np.sum((xk**3) * pk)
|
||||
assert_allclose(dist.moment(3), mom3)
|
||||
|
||||
# expect - check against moments
|
||||
assert_allclose(dist.expect(lambda x: 1), 1)
|
||||
assert_allclose(dist.expect(), mean2)
|
||||
assert_allclose(dist.expect(lambda x: x**3), mom3)
|
||||
|
||||
# entropy is the negative of the expected value of log(p)
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(-dist.expect(lambda x: dist.logpmf(x)), dist.entropy())
|
||||
|
||||
# RVS is just ppf of uniform random variates
|
||||
rng = np.random.default_rng(98430143469)
|
||||
rvs = dist.rvs(size=100, random_state=rng)
|
||||
rng = np.random.default_rng(98430143469)
|
||||
rvs0 = dist.ppf(rng.random(size=100))
|
||||
assert_allclose(rvs, rvs0)
|
||||
566
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_discrete_distns.py
vendored
Normal file
566
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_discrete_distns.py
vendored
Normal file
@@ -0,0 +1,566 @@
|
||||
import pytest
|
||||
from scipy.stats import (betabinom, hypergeom, nhypergeom, bernoulli,
|
||||
boltzmann, skellam, zipf, zipfian, binom, nbinom,
|
||||
nchypergeom_fisher, nchypergeom_wallenius, randint)
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import (
|
||||
assert_almost_equal, assert_equal, assert_allclose, suppress_warnings
|
||||
)
|
||||
from scipy.special import binom as special_binom
|
||||
from scipy.optimize import root_scalar
|
||||
from scipy.integrate import quad
|
||||
|
||||
|
||||
# The expected values were computed with Wolfram Alpha, using
|
||||
# the expression CDF[HypergeometricDistribution[N, n, M], k].
|
||||
@pytest.mark.parametrize('k, M, n, N, expected, rtol',
|
||||
[(3, 10, 4, 5,
|
||||
0.9761904761904762, 1e-15),
|
||||
(107, 10000, 3000, 215,
|
||||
0.9999999997226765, 1e-15),
|
||||
(10, 10000, 3000, 215,
|
||||
2.681682217692179e-21, 5e-11)])
|
||||
def test_hypergeom_cdf(k, M, n, N, expected, rtol):
|
||||
p = hypergeom.cdf(k, M, n, N)
|
||||
assert_allclose(p, expected, rtol=rtol)
|
||||
|
||||
|
||||
# The expected values were computed with Wolfram Alpha, using
|
||||
# the expression SurvivalFunction[HypergeometricDistribution[N, n, M], k].
|
||||
@pytest.mark.parametrize('k, M, n, N, expected, rtol',
|
||||
[(25, 10000, 3000, 215,
|
||||
0.9999999999052958, 1e-15),
|
||||
(125, 10000, 3000, 215,
|
||||
1.4416781705752128e-18, 5e-11)])
|
||||
def test_hypergeom_sf(k, M, n, N, expected, rtol):
|
||||
p = hypergeom.sf(k, M, n, N)
|
||||
assert_allclose(p, expected, rtol=rtol)
|
||||
|
||||
|
||||
def test_hypergeom_logpmf():
|
||||
# symmetries test
|
||||
# f(k,N,K,n) = f(n-k,N,N-K,n) = f(K-k,N,K,N-n) = f(k,N,n,K)
|
||||
k = 5
|
||||
N = 50
|
||||
K = 10
|
||||
n = 5
|
||||
logpmf1 = hypergeom.logpmf(k, N, K, n)
|
||||
logpmf2 = hypergeom.logpmf(n - k, N, N - K, n)
|
||||
logpmf3 = hypergeom.logpmf(K - k, N, K, N - n)
|
||||
logpmf4 = hypergeom.logpmf(k, N, n, K)
|
||||
assert_almost_equal(logpmf1, logpmf2, decimal=12)
|
||||
assert_almost_equal(logpmf1, logpmf3, decimal=12)
|
||||
assert_almost_equal(logpmf1, logpmf4, decimal=12)
|
||||
|
||||
# test related distribution
|
||||
# Bernoulli distribution if n = 1
|
||||
k = 1
|
||||
N = 10
|
||||
K = 7
|
||||
n = 1
|
||||
hypergeom_logpmf = hypergeom.logpmf(k, N, K, n)
|
||||
bernoulli_logpmf = bernoulli.logpmf(k, K/N)
|
||||
assert_almost_equal(hypergeom_logpmf, bernoulli_logpmf, decimal=12)
|
||||
|
||||
|
||||
def test_nhypergeom_pmf():
|
||||
# test with hypergeom
|
||||
M, n, r = 45, 13, 8
|
||||
k = 6
|
||||
NHG = nhypergeom.pmf(k, M, n, r)
|
||||
HG = hypergeom.pmf(k, M, n, k+r-1) * (M - n - (r-1)) / (M - (k+r-1))
|
||||
assert_allclose(HG, NHG, rtol=1e-10)
|
||||
|
||||
|
||||
def test_nhypergeom_pmfcdf():
|
||||
# test pmf and cdf with arbitrary values.
|
||||
M = 8
|
||||
n = 3
|
||||
r = 4
|
||||
support = np.arange(n+1)
|
||||
pmf = nhypergeom.pmf(support, M, n, r)
|
||||
cdf = nhypergeom.cdf(support, M, n, r)
|
||||
assert_allclose(pmf, [1/14, 3/14, 5/14, 5/14], rtol=1e-13)
|
||||
assert_allclose(cdf, [1/14, 4/14, 9/14, 1.0], rtol=1e-13)
|
||||
|
||||
|
||||
def test_nhypergeom_r0():
|
||||
# test with `r = 0`.
|
||||
M = 10
|
||||
n = 3
|
||||
r = 0
|
||||
pmf = nhypergeom.pmf([[0, 1, 2, 0], [1, 2, 0, 3]], M, n, r)
|
||||
assert_allclose(pmf, [[1, 0, 0, 1], [0, 0, 1, 0]], rtol=1e-13)
|
||||
|
||||
|
||||
def test_nhypergeom_rvs_shape():
|
||||
# Check that when given a size with more dimensions than the
|
||||
# dimensions of the broadcast parameters, rvs returns an array
|
||||
# with the correct shape.
|
||||
x = nhypergeom.rvs(22, [7, 8, 9], [[12], [13]], size=(5, 1, 2, 3))
|
||||
assert x.shape == (5, 1, 2, 3)
|
||||
|
||||
|
||||
def test_nhypergeom_accuracy():
|
||||
# Check that nhypergeom.rvs post-gh-13431 gives the same values as
|
||||
# inverse transform sampling
|
||||
np.random.seed(0)
|
||||
x = nhypergeom.rvs(22, 7, 11, size=100)
|
||||
np.random.seed(0)
|
||||
p = np.random.uniform(size=100)
|
||||
y = nhypergeom.ppf(p, 22, 7, 11)
|
||||
assert_equal(x, y)
|
||||
|
||||
|
||||
def test_boltzmann_upper_bound():
|
||||
k = np.arange(-3, 5)
|
||||
|
||||
N = 1
|
||||
p = boltzmann.pmf(k, 0.123, N)
|
||||
expected = k == 0
|
||||
assert_equal(p, expected)
|
||||
|
||||
lam = np.log(2)
|
||||
N = 3
|
||||
p = boltzmann.pmf(k, lam, N)
|
||||
expected = [0, 0, 0, 4/7, 2/7, 1/7, 0, 0]
|
||||
assert_allclose(p, expected, rtol=1e-13)
|
||||
|
||||
c = boltzmann.cdf(k, lam, N)
|
||||
expected = [0, 0, 0, 4/7, 6/7, 1, 1, 1]
|
||||
assert_allclose(c, expected, rtol=1e-13)
|
||||
|
||||
|
||||
def test_betabinom_a_and_b_unity():
|
||||
# test limiting case that betabinom(n, 1, 1) is a discrete uniform
|
||||
# distribution from 0 to n
|
||||
n = 20
|
||||
k = np.arange(n + 1)
|
||||
p = betabinom(n, 1, 1).pmf(k)
|
||||
expected = np.repeat(1 / (n + 1), n + 1)
|
||||
assert_almost_equal(p, expected)
|
||||
|
||||
|
||||
def test_betabinom_bernoulli():
|
||||
# test limiting case that betabinom(1, a, b) = bernoulli(a / (a + b))
|
||||
a = 2.3
|
||||
b = 0.63
|
||||
k = np.arange(2)
|
||||
p = betabinom(1, a, b).pmf(k)
|
||||
expected = bernoulli(a / (a + b)).pmf(k)
|
||||
assert_almost_equal(p, expected)
|
||||
|
||||
|
||||
def test_issue_10317():
|
||||
alpha, n, p = 0.9, 10, 1
|
||||
assert_equal(nbinom.interval(confidence=alpha, n=n, p=p), (0, 0))
|
||||
|
||||
|
||||
def test_issue_11134():
|
||||
alpha, n, p = 0.95, 10, 0
|
||||
assert_equal(binom.interval(confidence=alpha, n=n, p=p), (0, 0))
|
||||
|
||||
|
||||
def test_issue_7406():
|
||||
np.random.seed(0)
|
||||
assert_equal(binom.ppf(np.random.rand(10), 0, 0.5), 0)
|
||||
|
||||
# Also check that endpoints (q=0, q=1) are correct
|
||||
assert_equal(binom.ppf(0, 0, 0.5), -1)
|
||||
assert_equal(binom.ppf(1, 0, 0.5), 0)
|
||||
|
||||
|
||||
def test_issue_5122():
|
||||
p = 0
|
||||
n = np.random.randint(100, size=10)
|
||||
|
||||
x = 0
|
||||
ppf = binom.ppf(x, n, p)
|
||||
assert_equal(ppf, -1)
|
||||
|
||||
x = np.linspace(0.01, 0.99, 10)
|
||||
ppf = binom.ppf(x, n, p)
|
||||
assert_equal(ppf, 0)
|
||||
|
||||
x = 1
|
||||
ppf = binom.ppf(x, n, p)
|
||||
assert_equal(ppf, n)
|
||||
|
||||
|
||||
def test_issue_1603():
|
||||
assert_equal(binom(1000, np.logspace(-3, -100)).ppf(0.01), 0)
|
||||
|
||||
|
||||
def test_issue_5503():
|
||||
p = 0.5
|
||||
x = np.logspace(3, 14, 12)
|
||||
assert_allclose(binom.cdf(x, 2*x, p), 0.5, atol=1e-2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('x, n, p, cdf_desired', [
|
||||
(300, 1000, 3/10, 0.51559351981411995636),
|
||||
(3000, 10000, 3/10, 0.50493298381929698016),
|
||||
(30000, 100000, 3/10, 0.50156000591726422864),
|
||||
(300000, 1000000, 3/10, 0.50049331906666960038),
|
||||
(3000000, 10000000, 3/10, 0.50015600124585261196),
|
||||
(30000000, 100000000, 3/10, 0.50004933192735230102),
|
||||
(30010000, 100000000, 3/10, 0.98545384016570790717),
|
||||
(29990000, 100000000, 3/10, 0.01455017177985268670),
|
||||
(29950000, 100000000, 3/10, 5.02250963487432024943e-28),
|
||||
])
|
||||
def test_issue_5503pt2(x, n, p, cdf_desired):
|
||||
assert_allclose(binom.cdf(x, n, p), cdf_desired)
|
||||
|
||||
|
||||
def test_issue_5503pt3():
|
||||
# From Wolfram Alpha: CDF[BinomialDistribution[1e12, 1e-12], 2]
|
||||
assert_allclose(binom.cdf(2, 10**12, 10**-12), 0.91969860292869777384)
|
||||
|
||||
|
||||
def test_issue_6682():
|
||||
# Reference value from R:
|
||||
# options(digits=16)
|
||||
# print(pnbinom(250, 50, 32/63, lower.tail=FALSE))
|
||||
assert_allclose(nbinom.sf(250, 50, 32./63.), 1.460458510976452e-35)
|
||||
|
||||
|
||||
def test_boost_divide_by_zero_issue_15101():
|
||||
n = 1000
|
||||
p = 0.01
|
||||
k = 996
|
||||
assert_allclose(binom.pmf(k, n, p), 0.0)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
def test_skellam_gh11474():
|
||||
# test issue reported in gh-11474 caused by `cdfchn`
|
||||
mu = [1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000]
|
||||
cdf = skellam.cdf(0, mu, mu)
|
||||
# generated in R
|
||||
# library(skellam)
|
||||
# options(digits = 16)
|
||||
# mu = c(1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000)
|
||||
# pskellam(0, mu, mu, TRUE)
|
||||
cdf_expected = [0.6542541612768356, 0.5448901559424127, 0.5141135799745580,
|
||||
0.5044605891382528, 0.5019947363350450, 0.5019848365953181,
|
||||
0.5019750827993392, 0.5019466621805060, 0.5018209330219539]
|
||||
assert_allclose(cdf, cdf_expected)
|
||||
|
||||
|
||||
class TestZipfian:
|
||||
def test_zipfian_asymptotic(self):
|
||||
# test limiting case that zipfian(a, n) -> zipf(a) as n-> oo
|
||||
a = 6.5
|
||||
N = 10000000
|
||||
k = np.arange(1, 21)
|
||||
assert_allclose(zipfian.pmf(k, a, N), zipf.pmf(k, a))
|
||||
assert_allclose(zipfian.cdf(k, a, N), zipf.cdf(k, a))
|
||||
assert_allclose(zipfian.sf(k, a, N), zipf.sf(k, a))
|
||||
assert_allclose(zipfian.stats(a, N, moments='msvk'),
|
||||
zipf.stats(a, moments='msvk'))
|
||||
|
||||
def test_zipfian_continuity(self):
|
||||
# test that zipfian(0.999999, n) ~ zipfian(1.000001, n)
|
||||
# (a = 1 switches between methods of calculating harmonic sum)
|
||||
alt1, agt1 = 0.99999999, 1.00000001
|
||||
N = 30
|
||||
k = np.arange(1, N + 1)
|
||||
assert_allclose(zipfian.pmf(k, alt1, N), zipfian.pmf(k, agt1, N),
|
||||
rtol=5e-7)
|
||||
assert_allclose(zipfian.cdf(k, alt1, N), zipfian.cdf(k, agt1, N),
|
||||
rtol=5e-7)
|
||||
assert_allclose(zipfian.sf(k, alt1, N), zipfian.sf(k, agt1, N),
|
||||
rtol=5e-7)
|
||||
assert_allclose(zipfian.stats(alt1, N, moments='msvk'),
|
||||
zipfian.stats(agt1, N, moments='msvk'), rtol=5e-7)
|
||||
|
||||
def test_zipfian_R(self):
|
||||
# test against R VGAM package
|
||||
# library(VGAM)
|
||||
# k <- c(13, 16, 1, 4, 4, 8, 10, 19, 5, 7)
|
||||
# a <- c(1.56712977, 3.72656295, 5.77665117, 9.12168729, 5.79977172,
|
||||
# 4.92784796, 9.36078764, 4.3739616 , 7.48171872, 4.6824154)
|
||||
# n <- c(70, 80, 48, 65, 83, 89, 50, 30, 20, 20)
|
||||
# pmf <- dzipf(k, N = n, shape = a)
|
||||
# cdf <- pzipf(k, N = n, shape = a)
|
||||
# print(pmf)
|
||||
# print(cdf)
|
||||
np.random.seed(0)
|
||||
k = np.random.randint(1, 20, size=10)
|
||||
a = np.random.rand(10)*10 + 1
|
||||
n = np.random.randint(1, 100, size=10)
|
||||
pmf = [8.076972e-03, 2.950214e-05, 9.799333e-01, 3.216601e-06,
|
||||
3.158895e-04, 3.412497e-05, 4.350472e-10, 2.405773e-06,
|
||||
5.860662e-06, 1.053948e-04]
|
||||
cdf = [0.8964133, 0.9998666, 0.9799333, 0.9999995, 0.9998584,
|
||||
0.9999458, 1.0000000, 0.9999920, 0.9999977, 0.9998498]
|
||||
# skip the first point; zipUC is not accurate for low a, n
|
||||
assert_allclose(zipfian.pmf(k, a, n)[1:], pmf[1:], rtol=1e-6)
|
||||
assert_allclose(zipfian.cdf(k, a, n)[1:], cdf[1:], rtol=5e-5)
|
||||
|
||||
np.random.seed(0)
|
||||
naive_tests = np.vstack((np.logspace(-2, 1, 10),
|
||||
np.random.randint(2, 40, 10))).T
|
||||
|
||||
@pytest.mark.parametrize("a, n", naive_tests)
|
||||
def test_zipfian_naive(self, a, n):
|
||||
# test against bare-bones implementation
|
||||
|
||||
@np.vectorize
|
||||
def Hns(n, s):
|
||||
"""Naive implementation of harmonic sum"""
|
||||
return (1/np.arange(1, n+1)**s).sum()
|
||||
|
||||
@np.vectorize
|
||||
def pzip(k, a, n):
|
||||
"""Naive implementation of zipfian pmf"""
|
||||
if k < 1 or k > n:
|
||||
return 0.
|
||||
else:
|
||||
return 1 / k**a / Hns(n, a)
|
||||
|
||||
k = np.arange(n+1)
|
||||
pmf = pzip(k, a, n)
|
||||
cdf = np.cumsum(pmf)
|
||||
mean = np.average(k, weights=pmf)
|
||||
var = np.average((k - mean)**2, weights=pmf)
|
||||
std = var**0.5
|
||||
skew = np.average(((k-mean)/std)**3, weights=pmf)
|
||||
kurtosis = np.average(((k-mean)/std)**4, weights=pmf) - 3
|
||||
assert_allclose(zipfian.pmf(k, a, n), pmf)
|
||||
assert_allclose(zipfian.cdf(k, a, n), cdf)
|
||||
assert_allclose(zipfian.stats(a, n, moments="mvsk"),
|
||||
[mean, var, skew, kurtosis])
|
||||
|
||||
|
||||
class TestNCH():
|
||||
np.random.seed(2) # seeds 0 and 1 had some xl = xu; randint failed
|
||||
shape = (2, 4, 3)
|
||||
max_m = 100
|
||||
m1 = np.random.randint(1, max_m, size=shape) # red balls
|
||||
m2 = np.random.randint(1, max_m, size=shape) # white balls
|
||||
N = m1 + m2 # total balls
|
||||
n = randint.rvs(0, N, size=N.shape) # number of draws
|
||||
xl = np.maximum(0, n-m2) # lower bound of support
|
||||
xu = np.minimum(n, m1) # upper bound of support
|
||||
x = randint.rvs(xl, xu, size=xl.shape)
|
||||
odds = np.random.rand(*x.shape)*2
|
||||
|
||||
# test output is more readable when function names (strings) are passed
|
||||
@pytest.mark.parametrize('dist_name',
|
||||
['nchypergeom_fisher', 'nchypergeom_wallenius'])
|
||||
def test_nch_hypergeom(self, dist_name):
|
||||
# Both noncentral hypergeometric distributions reduce to the
|
||||
# hypergeometric distribution when odds = 1
|
||||
dists = {'nchypergeom_fisher': nchypergeom_fisher,
|
||||
'nchypergeom_wallenius': nchypergeom_wallenius}
|
||||
dist = dists[dist_name]
|
||||
x, N, m1, n = self.x, self.N, self.m1, self.n
|
||||
assert_allclose(dist.pmf(x, N, m1, n, odds=1),
|
||||
hypergeom.pmf(x, N, m1, n))
|
||||
|
||||
def test_nchypergeom_fisher_naive(self):
|
||||
# test against a very simple implementation
|
||||
x, N, m1, n, odds = self.x, self.N, self.m1, self.n, self.odds
|
||||
|
||||
@np.vectorize
|
||||
def pmf_mean_var(x, N, m1, n, w):
|
||||
# simple implementation of nchypergeom_fisher pmf
|
||||
m2 = N - m1
|
||||
xl = np.maximum(0, n-m2)
|
||||
xu = np.minimum(n, m1)
|
||||
|
||||
def f(x):
|
||||
t1 = special_binom(m1, x)
|
||||
t2 = special_binom(m2, n - x)
|
||||
return t1 * t2 * w**x
|
||||
|
||||
def P(k):
|
||||
return sum((f(y)*y**k for y in range(xl, xu + 1)))
|
||||
|
||||
P0 = P(0)
|
||||
P1 = P(1)
|
||||
P2 = P(2)
|
||||
pmf = f(x) / P0
|
||||
mean = P1 / P0
|
||||
var = P2 / P0 - (P1 / P0)**2
|
||||
return pmf, mean, var
|
||||
|
||||
pmf, mean, var = pmf_mean_var(x, N, m1, n, odds)
|
||||
assert_allclose(nchypergeom_fisher.pmf(x, N, m1, n, odds), pmf)
|
||||
assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='m'),
|
||||
mean)
|
||||
assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='v'),
|
||||
var)
|
||||
|
||||
def test_nchypergeom_wallenius_naive(self):
|
||||
# test against a very simple implementation
|
||||
|
||||
np.random.seed(2)
|
||||
shape = (2, 4, 3)
|
||||
max_m = 100
|
||||
m1 = np.random.randint(1, max_m, size=shape)
|
||||
m2 = np.random.randint(1, max_m, size=shape)
|
||||
N = m1 + m2
|
||||
n = randint.rvs(0, N, size=N.shape)
|
||||
xl = np.maximum(0, n-m2)
|
||||
xu = np.minimum(n, m1)
|
||||
x = randint.rvs(xl, xu, size=xl.shape)
|
||||
w = np.random.rand(*x.shape)*2
|
||||
|
||||
def support(N, m1, n, w):
|
||||
m2 = N - m1
|
||||
xl = np.maximum(0, n-m2)
|
||||
xu = np.minimum(n, m1)
|
||||
return xl, xu
|
||||
|
||||
@np.vectorize
|
||||
def mean(N, m1, n, w):
|
||||
m2 = N - m1
|
||||
xl, xu = support(N, m1, n, w)
|
||||
|
||||
def fun(u):
|
||||
return u/m1 + (1 - (n-u)/m2)**w - 1
|
||||
|
||||
return root_scalar(fun, bracket=(xl, xu)).root
|
||||
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(RuntimeWarning,
|
||||
message="invalid value encountered in mean")
|
||||
assert_allclose(nchypergeom_wallenius.mean(N, m1, n, w),
|
||||
mean(N, m1, n, w), rtol=2e-2)
|
||||
|
||||
@np.vectorize
|
||||
def variance(N, m1, n, w):
|
||||
m2 = N - m1
|
||||
u = mean(N, m1, n, w)
|
||||
a = u * (m1 - u)
|
||||
b = (n-u)*(u + m2 - n)
|
||||
return N*a*b / ((N-1) * (m1*b + m2*a))
|
||||
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(RuntimeWarning,
|
||||
message="invalid value encountered in mean")
|
||||
assert_allclose(
|
||||
nchypergeom_wallenius.stats(N, m1, n, w, moments='v'),
|
||||
variance(N, m1, n, w),
|
||||
rtol=5e-2
|
||||
)
|
||||
|
||||
@np.vectorize
|
||||
def pmf(x, N, m1, n, w):
|
||||
m2 = N - m1
|
||||
xl, xu = support(N, m1, n, w)
|
||||
|
||||
def integrand(t):
|
||||
D = w*(m1 - x) + (m2 - (n-x))
|
||||
res = (1-t**(w/D))**x * (1-t**(1/D))**(n-x)
|
||||
return res
|
||||
|
||||
def f(x):
|
||||
t1 = special_binom(m1, x)
|
||||
t2 = special_binom(m2, n - x)
|
||||
the_integral = quad(integrand, 0, 1,
|
||||
epsrel=1e-16, epsabs=1e-16)
|
||||
return t1 * t2 * the_integral[0]
|
||||
|
||||
return f(x)
|
||||
|
||||
pmf0 = pmf(x, N, m1, n, w)
|
||||
pmf1 = nchypergeom_wallenius.pmf(x, N, m1, n, w)
|
||||
|
||||
atol, rtol = 1e-6, 1e-6
|
||||
i = np.abs(pmf1 - pmf0) < atol + rtol*np.abs(pmf0)
|
||||
assert i.sum() > np.prod(shape) / 2 # works at least half the time
|
||||
|
||||
# for those that fail, discredit the naive implementation
|
||||
for N, m1, n, w in zip(N[~i], m1[~i], n[~i], w[~i]):
|
||||
# get the support
|
||||
m2 = N - m1
|
||||
xl, xu = support(N, m1, n, w)
|
||||
x = np.arange(xl, xu + 1)
|
||||
|
||||
# calculate sum of pmf over the support
|
||||
# the naive implementation is very wrong in these cases
|
||||
assert pmf(x, N, m1, n, w).sum() < .5
|
||||
assert_allclose(nchypergeom_wallenius.pmf(x, N, m1, n, w).sum(), 1)
|
||||
|
||||
def test_wallenius_against_mpmath(self):
|
||||
# precompute data with mpmath since naive implementation above
|
||||
# is not reliable. See source code in gh-13330.
|
||||
M = 50
|
||||
n = 30
|
||||
N = 20
|
||||
odds = 2.25
|
||||
# Expected results, computed with mpmath.
|
||||
sup = np.arange(21)
|
||||
pmf = np.array([3.699003068656875e-20,
|
||||
5.89398584245431e-17,
|
||||
2.1594437742911123e-14,
|
||||
3.221458044649955e-12,
|
||||
2.4658279241205077e-10,
|
||||
1.0965862603981212e-08,
|
||||
3.057890479665704e-07,
|
||||
5.622818831643761e-06,
|
||||
7.056482841531681e-05,
|
||||
0.000618899425358671,
|
||||
0.003854172932571669,
|
||||
0.01720592676256026,
|
||||
0.05528844897093792,
|
||||
0.12772363313574242,
|
||||
0.21065898367825722,
|
||||
0.24465958845359234,
|
||||
0.1955114898110033,
|
||||
0.10355390084949237,
|
||||
0.03414490375225675,
|
||||
0.006231989845775931,
|
||||
0.0004715577304677075])
|
||||
mean = 14.808018384813426
|
||||
var = 2.6085975877923717
|
||||
|
||||
# nchypergeom_wallenius.pmf returns 0 for pmf(0) and pmf(1), and pmf(2)
|
||||
# has only three digits of accuracy (~ 2.1511e-14).
|
||||
assert_allclose(nchypergeom_wallenius.pmf(sup, M, n, N, odds), pmf,
|
||||
rtol=1e-13, atol=1e-13)
|
||||
assert_allclose(nchypergeom_wallenius.mean(M, n, N, odds),
|
||||
mean, rtol=1e-13)
|
||||
assert_allclose(nchypergeom_wallenius.var(M, n, N, odds),
|
||||
var, rtol=1e-11)
|
||||
|
||||
@pytest.mark.parametrize('dist_name',
|
||||
['nchypergeom_fisher', 'nchypergeom_wallenius'])
|
||||
def test_rvs_shape(self, dist_name):
|
||||
# Check that when given a size with more dimensions than the
|
||||
# dimensions of the broadcast parameters, rvs returns an array
|
||||
# with the correct shape.
|
||||
dists = {'nchypergeom_fisher': nchypergeom_fisher,
|
||||
'nchypergeom_wallenius': nchypergeom_wallenius}
|
||||
dist = dists[dist_name]
|
||||
x = dist.rvs(50, 30, [[10], [20]], [0.5, 1.0, 2.0], size=(5, 1, 2, 3))
|
||||
assert x.shape == (5, 1, 2, 3)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mu, q, expected",
|
||||
[[10, 120, -1.240089881791596e-38],
|
||||
[1500, 0, -86.61466680572661]])
|
||||
def test_nbinom_11465(mu, q, expected):
|
||||
# test nbinom.logcdf at extreme tails
|
||||
size = 20
|
||||
n, p = size, size/(size+mu)
|
||||
# In R:
|
||||
# options(digits=16)
|
||||
# pnbinom(mu=10, size=20, q=120, log.p=TRUE)
|
||||
assert_allclose(nbinom.logcdf(q, n, p), expected)
|
||||
|
||||
|
||||
def test_gh_17146():
|
||||
# Check that discrete distributions return PMF of zero at non-integral x.
|
||||
# See gh-17146.
|
||||
x = np.linspace(0, 1, 11)
|
||||
p = 0.8
|
||||
pmf = bernoulli(p).pmf(x)
|
||||
i = (x % 1 == 0)
|
||||
assert_allclose(pmf[-1], p)
|
||||
assert_allclose(pmf[0], 1-p)
|
||||
assert_equal(pmf[~i], 0)
|
||||
7625
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_distributions.py
vendored
Executable file
7625
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_distributions.py
vendored
Executable file
File diff suppressed because it is too large
Load Diff
287
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_entropy.py
vendored
Normal file
287
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_entropy.py
vendored
Normal file
@@ -0,0 +1,287 @@
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_allclose
|
||||
# avoid new uses of the following; prefer assert/np.testing.assert_allclose
|
||||
from numpy.testing import (assert_, assert_almost_equal,
|
||||
assert_array_almost_equal)
|
||||
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
import scipy.stats as stats
|
||||
|
||||
|
||||
class TestEntropy:
|
||||
def test_entropy_positive(self):
|
||||
# See ticket #497
|
||||
pk = [0.5, 0.2, 0.3]
|
||||
qk = [0.1, 0.25, 0.65]
|
||||
eself = stats.entropy(pk, pk)
|
||||
edouble = stats.entropy(pk, qk)
|
||||
assert_(0.0 == eself)
|
||||
assert_(edouble >= 0.0)
|
||||
|
||||
def test_entropy_base(self):
|
||||
pk = np.ones(16, float)
|
||||
S = stats.entropy(pk, base=2.)
|
||||
assert_(abs(S - 4.) < 1.e-5)
|
||||
|
||||
qk = np.ones(16, float)
|
||||
qk[:8] = 2.
|
||||
S = stats.entropy(pk, qk)
|
||||
S2 = stats.entropy(pk, qk, base=2.)
|
||||
assert_(abs(S/S2 - np.log(2.)) < 1.e-5)
|
||||
|
||||
def test_entropy_zero(self):
|
||||
# Test for PR-479
|
||||
assert_almost_equal(stats.entropy([0, 1, 2]), 0.63651416829481278,
|
||||
decimal=12)
|
||||
|
||||
def test_entropy_2d(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]
|
||||
assert_array_almost_equal(stats.entropy(pk, qk),
|
||||
[0.1933259, 0.18609809])
|
||||
|
||||
def test_entropy_2d_zero(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
qk = [[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]]
|
||||
assert_array_almost_equal(stats.entropy(pk, qk),
|
||||
[np.inf, 0.18609809])
|
||||
|
||||
pk[0][0] = 0.0
|
||||
assert_array_almost_equal(stats.entropy(pk, qk),
|
||||
[0.17403988, 0.18609809])
|
||||
|
||||
def test_entropy_base_2d_nondefault_axis(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
assert_array_almost_equal(stats.entropy(pk, axis=1),
|
||||
[0.63651417, 0.63651417, 0.66156324])
|
||||
|
||||
def test_entropy_2d_nondefault_axis(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]
|
||||
assert_array_almost_equal(stats.entropy(pk, qk, axis=1),
|
||||
[0.231049, 0.231049, 0.127706])
|
||||
|
||||
def test_entropy_raises_value_error(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
qk = [[0.1, 0.2], [0.6, 0.3]]
|
||||
assert_raises(ValueError, stats.entropy, pk, qk)
|
||||
|
||||
def test_base_entropy_with_axis_0_is_equal_to_default(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
assert_array_almost_equal(stats.entropy(pk, axis=0),
|
||||
stats.entropy(pk))
|
||||
|
||||
def test_entropy_with_axis_0_is_equal_to_default(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]
|
||||
assert_array_almost_equal(stats.entropy(pk, qk, axis=0),
|
||||
stats.entropy(pk, qk))
|
||||
|
||||
def test_base_entropy_transposed(self):
|
||||
pk = np.array([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
assert_array_almost_equal(stats.entropy(pk.T).T,
|
||||
stats.entropy(pk, axis=1))
|
||||
|
||||
def test_entropy_transposed(self):
|
||||
pk = np.array([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = np.array([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
|
||||
assert_array_almost_equal(stats.entropy(pk.T, qk.T).T,
|
||||
stats.entropy(pk, qk, axis=1))
|
||||
|
||||
def test_entropy_broadcasting(self):
|
||||
np.random.rand(0)
|
||||
x = np.random.rand(3)
|
||||
y = np.random.rand(2, 1)
|
||||
res = stats.entropy(x, y, axis=-1)
|
||||
assert_equal(res[0], stats.entropy(x, y[0]))
|
||||
assert_equal(res[1], stats.entropy(x, y[1]))
|
||||
|
||||
def test_entropy_shape_mismatch(self):
|
||||
x = np.random.rand(10, 1, 12)
|
||||
y = np.random.rand(11, 2)
|
||||
message = "shape mismatch: objects cannot be broadcast"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.entropy(x, y)
|
||||
|
||||
def test_input_validation(self):
|
||||
x = np.random.rand(10)
|
||||
message = "`base` must be a positive number."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.entropy(x, base=-2)
|
||||
|
||||
|
||||
class TestDifferentialEntropy:
|
||||
"""
|
||||
Vasicek results are compared with the R package vsgoftest.
|
||||
|
||||
# library(vsgoftest)
|
||||
#
|
||||
# samp <- c(<values>)
|
||||
# entropy.estimate(x = samp, window = <window_length>)
|
||||
|
||||
"""
|
||||
|
||||
def test_differential_entropy_vasicek(self):
|
||||
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal(100)
|
||||
|
||||
entropy = stats.differential_entropy(values, method='vasicek')
|
||||
assert_allclose(entropy, 1.342551, rtol=1e-6)
|
||||
|
||||
entropy = stats.differential_entropy(values, window_length=1,
|
||||
method='vasicek')
|
||||
assert_allclose(entropy, 1.122044, rtol=1e-6)
|
||||
|
||||
entropy = stats.differential_entropy(values, window_length=8,
|
||||
method='vasicek')
|
||||
assert_allclose(entropy, 1.349401, rtol=1e-6)
|
||||
|
||||
def test_differential_entropy_vasicek_2d_nondefault_axis(self):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((3, 100))
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=1, method='vasicek')
|
||||
assert_allclose(
|
||||
entropy,
|
||||
[1.342551, 1.341826, 1.293775],
|
||||
rtol=1e-6,
|
||||
)
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=1, window_length=1,
|
||||
method='vasicek')
|
||||
assert_allclose(
|
||||
entropy,
|
||||
[1.122044, 1.102944, 1.129616],
|
||||
rtol=1e-6,
|
||||
)
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=1, window_length=8,
|
||||
method='vasicek')
|
||||
assert_allclose(
|
||||
entropy,
|
||||
[1.349401, 1.338514, 1.292332],
|
||||
rtol=1e-6,
|
||||
)
|
||||
|
||||
def test_differential_entropy_raises_value_error(self):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((3, 100))
|
||||
|
||||
error_str = (
|
||||
r"Window length \({window_length}\) must be positive and less "
|
||||
r"than half the sample size \({sample_size}\)."
|
||||
)
|
||||
|
||||
sample_size = values.shape[1]
|
||||
|
||||
for window_length in {-1, 0, sample_size//2, sample_size}:
|
||||
|
||||
formatted_error_str = error_str.format(
|
||||
window_length=window_length,
|
||||
sample_size=sample_size,
|
||||
)
|
||||
|
||||
with assert_raises(ValueError, match=formatted_error_str):
|
||||
stats.differential_entropy(
|
||||
values,
|
||||
window_length=window_length,
|
||||
axis=1,
|
||||
)
|
||||
|
||||
def test_base_differential_entropy_with_axis_0_is_equal_to_default(self):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((100, 3))
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=0)
|
||||
default_entropy = stats.differential_entropy(values)
|
||||
assert_allclose(entropy, default_entropy)
|
||||
|
||||
def test_base_differential_entropy_transposed(self):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((3, 100))
|
||||
|
||||
assert_allclose(
|
||||
stats.differential_entropy(values.T).T,
|
||||
stats.differential_entropy(values, axis=1),
|
||||
)
|
||||
|
||||
def test_input_validation(self):
|
||||
x = np.random.rand(10)
|
||||
|
||||
message = "`base` must be a positive number or `None`."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.differential_entropy(x, base=-2)
|
||||
|
||||
message = "`method` must be one of..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.differential_entropy(x, method='ekki-ekki')
|
||||
|
||||
@pytest.mark.parametrize('method', ['vasicek', 'van es',
|
||||
'ebrahimi', 'correa'])
|
||||
def test_consistency(self, method):
|
||||
# test that method is a consistent estimator
|
||||
n = 10000 if method == 'correa' else 1000000
|
||||
rvs = stats.norm.rvs(size=n, random_state=0)
|
||||
expected = stats.norm.entropy()
|
||||
res = stats.differential_entropy(rvs, method=method)
|
||||
assert_allclose(res, expected, rtol=0.005)
|
||||
|
||||
# values from differential_entropy reference [6], table 1, n=50, m=7
|
||||
norm_rmse_std_cases = { # method: (RMSE, STD)
|
||||
'vasicek': (0.198, 0.109),
|
||||
'van es': (0.212, 0.110),
|
||||
'correa': (0.135, 0.112),
|
||||
'ebrahimi': (0.128, 0.109)
|
||||
}
|
||||
|
||||
@pytest.mark.parametrize('method, expected',
|
||||
list(norm_rmse_std_cases.items()))
|
||||
def test_norm_rmse_std(self, method, expected):
|
||||
# test that RMSE and standard deviation of estimators matches values
|
||||
# given in differential_entropy reference [6]. Incidentally, also
|
||||
# tests vectorization.
|
||||
reps, n, m = 10000, 50, 7
|
||||
rmse_expected, std_expected = expected
|
||||
rvs = stats.norm.rvs(size=(reps, n), random_state=0)
|
||||
true_entropy = stats.norm.entropy()
|
||||
res = stats.differential_entropy(rvs, window_length=m,
|
||||
method=method, axis=-1)
|
||||
assert_allclose(np.sqrt(np.mean((res - true_entropy)**2)),
|
||||
rmse_expected, atol=0.005)
|
||||
assert_allclose(np.std(res), std_expected, atol=0.002)
|
||||
|
||||
# values from differential_entropy reference [6], table 2, n=50, m=7
|
||||
expon_rmse_std_cases = { # method: (RMSE, STD)
|
||||
'vasicek': (0.194, 0.148),
|
||||
'van es': (0.179, 0.149),
|
||||
'correa': (0.155, 0.152),
|
||||
'ebrahimi': (0.151, 0.148)
|
||||
}
|
||||
|
||||
@pytest.mark.parametrize('method, expected',
|
||||
list(expon_rmse_std_cases.items()))
|
||||
def test_expon_rmse_std(self, method, expected):
|
||||
# test that RMSE and standard deviation of estimators matches values
|
||||
# given in differential_entropy reference [6]. Incidentally, also
|
||||
# tests vectorization.
|
||||
reps, n, m = 10000, 50, 7
|
||||
rmse_expected, std_expected = expected
|
||||
rvs = stats.expon.rvs(size=(reps, n), random_state=0)
|
||||
true_entropy = stats.expon.entropy()
|
||||
res = stats.differential_entropy(rvs, window_length=m,
|
||||
method=method, axis=-1)
|
||||
assert_allclose(np.sqrt(np.mean((res - true_entropy)**2)),
|
||||
rmse_expected, atol=0.005)
|
||||
assert_allclose(np.std(res), std_expected, atol=0.002)
|
||||
|
||||
@pytest.mark.parametrize('n, method', [(8, 'van es'),
|
||||
(12, 'ebrahimi'),
|
||||
(1001, 'vasicek')])
|
||||
def test_method_auto(self, n, method):
|
||||
rvs = stats.norm.rvs(size=(n,), random_state=0)
|
||||
res1 = stats.differential_entropy(rvs)
|
||||
res2 = stats.differential_entropy(rvs, method=method)
|
||||
assert res1 == res2
|
||||
850
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_fit.py
vendored
Normal file
850
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_fit.py
vendored
Normal file
@@ -0,0 +1,850 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
import pytest
|
||||
from scipy import stats
|
||||
from scipy.optimize import differential_evolution
|
||||
|
||||
from .test_continuous_basic import distcont
|
||||
from scipy.stats._distn_infrastructure import FitError
|
||||
from scipy.stats._distr_params import distdiscrete
|
||||
from scipy.stats import goodness_of_fit
|
||||
|
||||
|
||||
# this is not a proper statistical test for convergence, but only
|
||||
# verifies that the estimate and true values don't differ by too much
|
||||
|
||||
fit_sizes = [1000, 5000, 10000] # sample sizes to try
|
||||
|
||||
thresh_percent = 0.25 # percent of true parameters for fail cut-off
|
||||
thresh_min = 0.75 # minimum difference estimate - true to fail test
|
||||
|
||||
mle_failing_fits = [
|
||||
'burr',
|
||||
'chi2',
|
||||
'gausshyper',
|
||||
'genexpon',
|
||||
'gengamma',
|
||||
'kappa4',
|
||||
'ksone',
|
||||
'kstwo',
|
||||
'mielke',
|
||||
'ncf',
|
||||
'ncx2',
|
||||
'pearson3',
|
||||
'powerlognorm',
|
||||
'truncexpon',
|
||||
'truncpareto',
|
||||
'tukeylambda',
|
||||
'vonmises',
|
||||
'levy_stable',
|
||||
'trapezoid',
|
||||
'truncweibull_min',
|
||||
'studentized_range',
|
||||
]
|
||||
|
||||
mm_failing_fits = ['alpha', 'betaprime', 'burr', 'burr12', 'cauchy', 'chi',
|
||||
'chi2', 'crystalball', 'dgamma', 'dweibull', 'f',
|
||||
'fatiguelife', 'fisk', 'foldcauchy', 'genextreme',
|
||||
'gengamma', 'genhyperbolic', 'gennorm', 'genpareto',
|
||||
'halfcauchy', 'invgamma', 'invweibull', 'johnsonsu',
|
||||
'kappa3', 'ksone', 'kstwo', 'levy', 'levy_l',
|
||||
'levy_stable', 'loglaplace', 'lomax', 'mielke', 'nakagami',
|
||||
'ncf', 'nct', 'ncx2', 'pareto', 'powerlognorm', 'powernorm',
|
||||
'skewcauchy', 't', 'trapezoid', 'triang', 'truncpareto',
|
||||
'truncweibull_min', 'tukeylambda', 'studentized_range']
|
||||
|
||||
# not sure if these fail, but they caused my patience to fail
|
||||
mm_slow_fits = ['argus', 'exponpow', 'exponweib', 'gausshyper', 'genexpon',
|
||||
'genhalflogistic', 'halfgennorm', 'gompertz', 'johnsonsb',
|
||||
'kappa4', 'kstwobign', 'recipinvgauss',
|
||||
'truncexpon', 'vonmises', 'vonmises_line']
|
||||
|
||||
failing_fits = {"MM": mm_failing_fits + mm_slow_fits, "MLE": mle_failing_fits}
|
||||
|
||||
# Don't run the fit test on these:
|
||||
skip_fit = [
|
||||
'erlang', # Subclass of gamma, generates a warning.
|
||||
'genhyperbolic', # too slow
|
||||
]
|
||||
|
||||
|
||||
def cases_test_cont_fit():
|
||||
# this tests the closeness of the estimated parameters to the true
|
||||
# parameters with fit method of continuous distributions
|
||||
# Note: is slow, some distributions don't converge with sample
|
||||
# size <= 10000
|
||||
for distname, arg in distcont:
|
||||
if distname not in skip_fit:
|
||||
yield distname, arg
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize('distname,arg', cases_test_cont_fit())
|
||||
@pytest.mark.parametrize('method', ["MLE", "MM"])
|
||||
def test_cont_fit(distname, arg, method):
|
||||
if distname in failing_fits[method]:
|
||||
# Skip failing fits unless overridden
|
||||
try:
|
||||
xfail = not int(os.environ['SCIPY_XFAIL'])
|
||||
except Exception:
|
||||
xfail = True
|
||||
if xfail:
|
||||
msg = "Fitting %s doesn't work reliably yet" % distname
|
||||
msg += (" [Set environment variable SCIPY_XFAIL=1 to run this"
|
||||
" test nevertheless.]")
|
||||
pytest.xfail(msg)
|
||||
|
||||
distfn = getattr(stats, distname)
|
||||
|
||||
truearg = np.hstack([arg, [0.0, 1.0]])
|
||||
diffthreshold = np.max(np.vstack([truearg*thresh_percent,
|
||||
np.full(distfn.numargs+2, thresh_min)]),
|
||||
0)
|
||||
|
||||
for fit_size in fit_sizes:
|
||||
# Note that if a fit succeeds, the other fit_sizes are skipped
|
||||
np.random.seed(1234)
|
||||
|
||||
with np.errstate(all='ignore'):
|
||||
rvs = distfn.rvs(size=fit_size, *arg)
|
||||
est = distfn.fit(rvs, method=method) # start with default values
|
||||
|
||||
diff = est - truearg
|
||||
|
||||
# threshold for location
|
||||
diffthreshold[-2] = np.max([np.abs(rvs.mean())*thresh_percent,
|
||||
thresh_min])
|
||||
|
||||
if np.any(np.isnan(est)):
|
||||
raise AssertionError('nan returned in fit')
|
||||
else:
|
||||
if np.all(np.abs(diff) <= diffthreshold):
|
||||
break
|
||||
else:
|
||||
txt = 'parameter: %s\n' % str(truearg)
|
||||
txt += 'estimated: %s\n' % str(est)
|
||||
txt += 'diff : %s\n' % str(diff)
|
||||
raise AssertionError('fit not very good in %s\n' % distfn.name + txt)
|
||||
|
||||
|
||||
def _check_loc_scale_mle_fit(name, data, desired, atol=None):
|
||||
d = getattr(stats, name)
|
||||
actual = d.fit(data)[-2:]
|
||||
assert_allclose(actual, desired, atol=atol,
|
||||
err_msg='poor mle fit of (loc, scale) in %s' % name)
|
||||
|
||||
|
||||
def test_non_default_loc_scale_mle_fit():
|
||||
data = np.array([1.01, 1.78, 1.78, 1.78, 1.88, 1.88, 1.88, 2.00])
|
||||
_check_loc_scale_mle_fit('uniform', data, [1.01, 0.99], 1e-3)
|
||||
_check_loc_scale_mle_fit('expon', data, [1.01, 0.73875], 1e-3)
|
||||
|
||||
|
||||
def test_expon_fit():
|
||||
"""gh-6167"""
|
||||
data = [0, 0, 0, 0, 2, 2, 2, 2]
|
||||
phat = stats.expon.fit(data, floc=0)
|
||||
assert_allclose(phat, [0, 1.0], atol=1e-3)
|
||||
|
||||
|
||||
def test_fit_error():
|
||||
data = np.concatenate([np.zeros(29), np.ones(21)])
|
||||
message = "Optimization converged to parameters that are..."
|
||||
with pytest.raises(FitError, match=message), \
|
||||
pytest.warns(RuntimeWarning):
|
||||
stats.beta.fit(data)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dist, params",
|
||||
[(stats.norm, (0.5, 2.5)), # type: ignore[attr-defined] # noqa
|
||||
(stats.binom, (10, 0.3, 2))]) # type: ignore[attr-defined] # noqa
|
||||
def test_nnlf_and_related_methods(dist, params):
|
||||
rng = np.random.default_rng(983459824)
|
||||
|
||||
if hasattr(dist, 'pdf'):
|
||||
logpxf = dist.logpdf
|
||||
else:
|
||||
logpxf = dist.logpmf
|
||||
|
||||
x = dist.rvs(*params, size=100, random_state=rng)
|
||||
ref = -logpxf(x, *params).sum()
|
||||
res1 = dist.nnlf(params, x)
|
||||
res2 = dist._penalized_nnlf(params, x)
|
||||
assert_allclose(res1, ref)
|
||||
assert_allclose(res2, ref)
|
||||
|
||||
|
||||
def cases_test_fit_mle():
|
||||
# These fail default test or hang
|
||||
skip_basic_fit = {'argus', 'foldnorm', 'truncpareto', 'truncweibull_min',
|
||||
'ksone', 'levy_stable', 'studentized_range', 'kstwo'}
|
||||
slow_basic_fit = {'burr12', 'johnsonsb', 'bradford', 'fisk', 'mielke',
|
||||
'exponpow', 'rdist', 'norminvgauss', 'betaprime',
|
||||
'powerlaw', 'pareto', 'johnsonsu', 'loglaplace',
|
||||
'wrapcauchy', 'weibull_max', 'arcsine', 'binom', 'rice',
|
||||
'uniform', 'f', 'invweibull', 'genpareto',
|
||||
'nbinom', 'kappa3', 'lognorm', 'halfgennorm', 'pearson3',
|
||||
'alpha', 't', 'crystalball', 'fatiguelife', 'nakagami',
|
||||
'kstwobign', 'gompertz', 'dweibull', 'lomax', 'invgauss',
|
||||
'recipinvgauss', 'chi', 'foldcauchy', 'powernorm',
|
||||
'gennorm', 'randint', 'genextreme'}
|
||||
xslow_basic_fit = {'nchypergeom_fisher', 'nchypergeom_wallenius',
|
||||
'gausshyper', 'genexpon', 'gengamma', 'genhyperbolic',
|
||||
'geninvgauss', 'tukeylambda', 'skellam', 'ncx2',
|
||||
'hypergeom', 'nhypergeom', 'zipfian', 'ncf',
|
||||
'truncnorm', 'powerlognorm', 'beta',
|
||||
'loguniform', 'reciprocal', 'trapezoid', 'nct',
|
||||
'kappa4', 'betabinom', 'exponweib', 'genhalflogistic',
|
||||
'burr', 'triang'}
|
||||
|
||||
for dist in dict(distdiscrete + distcont):
|
||||
if dist in skip_basic_fit or not isinstance(dist, str):
|
||||
reason = "tested separately"
|
||||
yield pytest.param(dist, marks=pytest.mark.skip(reason=reason))
|
||||
elif dist in slow_basic_fit:
|
||||
reason = "too slow (>= 0.25s)"
|
||||
yield pytest.param(dist, marks=pytest.mark.slow(reason=reason))
|
||||
elif dist in xslow_basic_fit:
|
||||
reason = "too slow (>= 1.0s)"
|
||||
yield pytest.param(dist, marks=pytest.mark.xslow(reason=reason))
|
||||
else:
|
||||
yield dist
|
||||
|
||||
|
||||
def cases_test_fit_mse():
|
||||
# the first four are so slow that I'm not sure whether they would pass
|
||||
skip_basic_fit = {'levy_stable', 'studentized_range', 'ksone', 'skewnorm',
|
||||
'norminvgauss', # super slow (~1 hr) but passes
|
||||
'kstwo', # very slow (~25 min) but passes
|
||||
'geninvgauss', # quite slow (~4 minutes) but passes
|
||||
'gausshyper', 'genhyperbolic', # integration warnings
|
||||
'argus', # close, but doesn't meet tolerance
|
||||
'vonmises'} # can have negative CDF; doesn't play nice
|
||||
slow_basic_fit = {'wald', 'genextreme', 'anglit', 'semicircular',
|
||||
'kstwobign', 'arcsine', 'genlogistic', 'truncexpon',
|
||||
'fisk', 'uniform', 'exponnorm', 'maxwell', 'lomax',
|
||||
'laplace_asymmetric', 'lognorm', 'foldcauchy',
|
||||
'genpareto', 'powernorm', 'loglaplace', 'foldnorm',
|
||||
'recipinvgauss', 'exponpow', 'bradford', 'weibull_max',
|
||||
'gompertz', 'dweibull', 'truncpareto', 'weibull_min',
|
||||
'johnsonsu', 'loggamma', 'kappa3', 'fatiguelife',
|
||||
'pareto', 'invweibull', 'alpha', 'erlang', 'dgamma',
|
||||
'chi2', 'crystalball', 'nakagami', 'truncweibull_min',
|
||||
't', 'vonmises_line', 'triang', 'wrapcauchy', 'gamma',
|
||||
'mielke', 'chi', 'johnsonsb', 'exponweib',
|
||||
'genhalflogistic', 'randint', 'nhypergeom', 'hypergeom',
|
||||
'betabinom'}
|
||||
xslow_basic_fit = {'burr', 'halfgennorm', 'invgamma',
|
||||
'invgauss', 'powerlaw', 'burr12', 'trapezoid', 'kappa4',
|
||||
'f', 'powerlognorm', 'ncx2', 'rdist', 'reciprocal',
|
||||
'loguniform', 'betaprime', 'rice', 'gennorm',
|
||||
'gengamma', 'truncnorm', 'ncf', 'nct', 'pearson3',
|
||||
'beta', 'genexpon', 'tukeylambda', 'zipfian',
|
||||
'nchypergeom_wallenius', 'nchypergeom_fisher'}
|
||||
warns_basic_fit = {'skellam'} # can remove mark after gh-14901 is resolved
|
||||
|
||||
for dist in dict(distdiscrete + distcont):
|
||||
if dist in skip_basic_fit or not isinstance(dist, str):
|
||||
reason = "Fails. Oh well."
|
||||
yield pytest.param(dist, marks=pytest.mark.skip(reason=reason))
|
||||
elif dist in slow_basic_fit:
|
||||
reason = "too slow (>= 0.25s)"
|
||||
yield pytest.param(dist, marks=pytest.mark.slow(reason=reason))
|
||||
elif dist in xslow_basic_fit:
|
||||
reason = "too slow (>= 1.0s)"
|
||||
yield pytest.param(dist, marks=pytest.mark.xslow(reason=reason))
|
||||
elif dist in warns_basic_fit:
|
||||
mark = pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
yield pytest.param(dist, marks=mark)
|
||||
else:
|
||||
yield dist
|
||||
|
||||
|
||||
def cases_test_fitstart():
|
||||
for distname, shapes in dict(distcont).items():
|
||||
if (not isinstance(distname, str) or
|
||||
distname in {'studentized_range', 'recipinvgauss'}): # slow
|
||||
continue
|
||||
yield distname, shapes
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, shapes', cases_test_fitstart())
|
||||
def test_fitstart(distname, shapes):
|
||||
dist = getattr(stats, distname)
|
||||
rng = np.random.default_rng(216342614)
|
||||
data = rng.random(10)
|
||||
|
||||
with np.errstate(invalid='ignore', divide='ignore'): # irrelevant to test
|
||||
guess = dist._fitstart(data)
|
||||
|
||||
assert dist._argcheck(*guess[:-2])
|
||||
|
||||
|
||||
def assert_nlff_less_or_close(dist, data, params1, params0, rtol=1e-7, atol=0,
|
||||
nlff_name='nnlf'):
|
||||
nlff = getattr(dist, nlff_name)
|
||||
nlff1 = nlff(params1, data)
|
||||
nlff0 = nlff(params0, data)
|
||||
if not (nlff1 < nlff0):
|
||||
np.testing.assert_allclose(nlff1, nlff0, rtol=rtol, atol=atol)
|
||||
|
||||
|
||||
class TestFit:
|
||||
dist = stats.binom # type: ignore[attr-defined]
|
||||
seed = 654634816187
|
||||
rng = np.random.default_rng(seed)
|
||||
data = stats.binom.rvs(5, 0.5, size=100, random_state=rng) # type: ignore[attr-defined] # noqa
|
||||
shape_bounds_a = [(1, 10), (0, 1)]
|
||||
shape_bounds_d = {'n': (1, 10), 'p': (0, 1)}
|
||||
atol = 5e-2
|
||||
rtol = 1e-2
|
||||
tols = {'atol': atol, 'rtol': rtol}
|
||||
|
||||
def opt(self, *args, **kwds):
|
||||
return differential_evolution(*args, seed=0, **kwds)
|
||||
|
||||
def test_dist_iv(self):
|
||||
message = "`dist` must be an instance of..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(10, self.data, self.shape_bounds_a)
|
||||
|
||||
def test_data_iv(self):
|
||||
message = "`data` must be exactly one-dimensional."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, [[1, 2, 3]], self.shape_bounds_a)
|
||||
|
||||
message = "All elements of `data` must be finite numbers."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, [1, 2, 3, np.nan], self.shape_bounds_a)
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, [1, 2, 3, np.inf], self.shape_bounds_a)
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, ['1', '2', '3'], self.shape_bounds_a)
|
||||
|
||||
def test_bounds_iv(self):
|
||||
message = "Bounds provided for the following unrecognized..."
|
||||
shape_bounds = {'n': (1, 10), 'p': (0, 1), '1': (0, 10)}
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "Each element of a `bounds` sequence must be a tuple..."
|
||||
shape_bounds = [(1, 10, 3), (0, 1)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "Each element of `bounds` must be a tuple specifying..."
|
||||
shape_bounds = [(1, 10, 3), (0, 1, 0.5)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
shape_bounds = [1, 0]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "A `bounds` sequence must contain at least 2 elements..."
|
||||
shape_bounds = [(1, 10)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "A `bounds` sequence may not contain more than 3 elements..."
|
||||
bounds = [(1, 10), (1, 10), (1, 10), (1, 10)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, bounds)
|
||||
|
||||
message = "There are no values for `p` on the interval..."
|
||||
shape_bounds = {'n': (1, 10), 'p': (1, 0)}
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "There are no values for `n` on the interval..."
|
||||
shape_bounds = [(10, 1), (0, 1)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "There are no integer values for `n` on the interval..."
|
||||
shape_bounds = [(1.4, 1.6), (0, 1)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "The intersection of user-provided bounds for `n`"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data)
|
||||
shape_bounds = [(-np.inf, np.inf), (0, 1)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
def test_guess_iv(self):
|
||||
message = "Guesses provided for the following unrecognized..."
|
||||
guess = {'n': 1, 'p': 0.5, '1': 255}
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "Each element of `guess` must be a scalar..."
|
||||
guess = {'n': 1, 'p': 'hi'}
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
guess = [1, 'f']
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
guess = [[1, 2]]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "A `guess` sequence must contain at least 2..."
|
||||
guess = [1]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "A `guess` sequence may not contain more than 3..."
|
||||
guess = [1, 2, 3, 4]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "Guess for parameter `n` rounded..."
|
||||
guess = {'n': 4.5, 'p': -0.5}
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "Guess for parameter `loc` rounded..."
|
||||
guess = [5, 0.5, 0.5]
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "Guess for parameter `p` clipped..."
|
||||
guess = {'n': 5, 'p': -0.5}
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "Guess for parameter `loc` clipped..."
|
||||
guess = [5, 0.5, 1]
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
def basic_fit_test(self, dist_name, method):
|
||||
|
||||
N = 5000
|
||||
dist_data = dict(distcont + distdiscrete)
|
||||
rng = np.random.default_rng(self.seed)
|
||||
dist = getattr(stats, dist_name)
|
||||
shapes = np.array(dist_data[dist_name])
|
||||
bounds = np.empty((len(shapes) + 2, 2), dtype=np.float64)
|
||||
bounds[:-2, 0] = shapes/10.**np.sign(shapes)
|
||||
bounds[:-2, 1] = shapes*10.**np.sign(shapes)
|
||||
bounds[-2] = (0, 10)
|
||||
bounds[-1] = (1e-16, 10)
|
||||
loc = rng.uniform(*bounds[-2])
|
||||
scale = rng.uniform(*bounds[-1])
|
||||
ref = list(dist_data[dist_name]) + [loc, scale]
|
||||
|
||||
if getattr(dist, 'pmf', False):
|
||||
ref = ref[:-1]
|
||||
ref[-1] = np.floor(loc)
|
||||
data = dist.rvs(*ref, size=N, random_state=rng)
|
||||
bounds = bounds[:-1]
|
||||
if getattr(dist, 'pdf', False):
|
||||
data = dist.rvs(*ref, size=N, random_state=rng)
|
||||
|
||||
with npt.suppress_warnings() as sup:
|
||||
sup.filter(RuntimeWarning, "overflow encountered")
|
||||
res = stats.fit(dist, data, bounds, method=method,
|
||||
optimizer=self.opt)
|
||||
|
||||
nlff_names = {'mle': 'nnlf', 'mse': '_penalized_nlpsf'}
|
||||
nlff_name = nlff_names[method]
|
||||
assert_nlff_less_or_close(dist, data, res.params, ref, **self.tols,
|
||||
nlff_name=nlff_name)
|
||||
|
||||
@pytest.mark.parametrize("dist_name", cases_test_fit_mle())
|
||||
def test_basic_fit_mle(self, dist_name):
|
||||
self.basic_fit_test(dist_name, "mle")
|
||||
|
||||
@pytest.mark.parametrize("dist_name", cases_test_fit_mse())
|
||||
def test_basic_fit_mse(self, dist_name):
|
||||
self.basic_fit_test(dist_name, "mse")
|
||||
|
||||
def test_argus(self):
|
||||
# Can't guarantee that all distributions will fit all data with
|
||||
# arbitrary bounds. This distribution just happens to fail above.
|
||||
# Try something slightly different.
|
||||
N = 1000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
dist = stats.argus
|
||||
shapes = (1., 2., 3.)
|
||||
data = dist.rvs(*shapes, size=N, random_state=rng)
|
||||
shape_bounds = {'chi': (0.1, 10), 'loc': (0.1, 10), 'scale': (0.1, 10)}
|
||||
res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
|
||||
|
||||
assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols)
|
||||
|
||||
def test_foldnorm(self):
|
||||
# Can't guarantee that all distributions will fit all data with
|
||||
# arbitrary bounds. This distribution just happens to fail above.
|
||||
# Try something slightly different.
|
||||
N = 1000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
dist = stats.foldnorm
|
||||
shapes = (1.952125337355587, 2., 3.)
|
||||
data = dist.rvs(*shapes, size=N, random_state=rng)
|
||||
shape_bounds = {'c': (0.1, 10), 'loc': (0.1, 10), 'scale': (0.1, 10)}
|
||||
res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
|
||||
|
||||
assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols)
|
||||
|
||||
def test_truncpareto(self):
|
||||
# Can't guarantee that all distributions will fit all data with
|
||||
# arbitrary bounds. This distribution just happens to fail above.
|
||||
# Try something slightly different.
|
||||
N = 1000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
dist = stats.truncpareto
|
||||
shapes = (1.8, 5.3, 2.3, 4.1)
|
||||
data = dist.rvs(*shapes, size=N, random_state=rng)
|
||||
shape_bounds = [(0.1, 10)]*4
|
||||
res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
|
||||
|
||||
assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols)
|
||||
|
||||
def test_truncweibull_min(self):
|
||||
# Can't guarantee that all distributions will fit all data with
|
||||
# arbitrary bounds. This distribution just happens to fail above.
|
||||
# Try something slightly different.
|
||||
N = 1000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
dist = stats.truncweibull_min
|
||||
shapes = (2.5, 0.25, 1.75, 2., 3.)
|
||||
data = dist.rvs(*shapes, size=N, random_state=rng)
|
||||
shape_bounds = [(0.1, 10)]*5
|
||||
res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
|
||||
|
||||
assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols)
|
||||
|
||||
def test_missing_shape_bounds(self):
|
||||
# some distributions have a small domain w.r.t. a parameter, e.g.
|
||||
# $p \in [0, 1]$ for binomial distribution
|
||||
# User does not need to provide these because the intersection of the
|
||||
# user's bounds (none) and the distribution's domain is finite
|
||||
N = 1000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
|
||||
dist = stats.binom
|
||||
n, p, loc = 10, 0.65, 0
|
||||
data = dist.rvs(n, p, loc=loc, size=N, random_state=rng)
|
||||
shape_bounds = {'n': np.array([0, 20])} # check arrays are OK, too
|
||||
res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
|
||||
assert_allclose(res.params, (n, p, loc), **self.tols)
|
||||
|
||||
dist = stats.bernoulli
|
||||
p, loc = 0.314159, 0
|
||||
data = dist.rvs(p, loc=loc, size=N, random_state=rng)
|
||||
res = stats.fit(dist, data, optimizer=self.opt)
|
||||
assert_allclose(res.params, (p, loc), **self.tols)
|
||||
|
||||
def test_fit_only_loc_scale(self):
|
||||
# fit only loc
|
||||
N = 5000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
|
||||
dist = stats.norm
|
||||
loc, scale = 1.5, 1
|
||||
data = dist.rvs(loc=loc, size=N, random_state=rng)
|
||||
loc_bounds = (0, 5)
|
||||
bounds = {'loc': loc_bounds}
|
||||
res = stats.fit(dist, data, bounds, optimizer=self.opt)
|
||||
assert_allclose(res.params, (loc, scale), **self.tols)
|
||||
|
||||
# fit only scale
|
||||
loc, scale = 0, 2.5
|
||||
data = dist.rvs(scale=scale, size=N, random_state=rng)
|
||||
scale_bounds = (0, 5)
|
||||
bounds = {'scale': scale_bounds}
|
||||
res = stats.fit(dist, data, bounds, optimizer=self.opt)
|
||||
assert_allclose(res.params, (loc, scale), **self.tols)
|
||||
|
||||
# fit only loc and scale
|
||||
dist = stats.norm
|
||||
loc, scale = 1.5, 2.5
|
||||
data = dist.rvs(loc=loc, scale=scale, size=N, random_state=rng)
|
||||
bounds = {'loc': loc_bounds, 'scale': scale_bounds}
|
||||
res = stats.fit(dist, data, bounds, optimizer=self.opt)
|
||||
assert_allclose(res.params, (loc, scale), **self.tols)
|
||||
|
||||
def test_everything_fixed(self):
|
||||
N = 5000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
|
||||
dist = stats.norm
|
||||
loc, scale = 1.5, 2.5
|
||||
data = dist.rvs(loc=loc, scale=scale, size=N, random_state=rng)
|
||||
|
||||
# loc, scale fixed to 0, 1 by default
|
||||
res = stats.fit(dist, data)
|
||||
assert_allclose(res.params, (0, 1), **self.tols)
|
||||
|
||||
# loc, scale explicitly fixed
|
||||
bounds = {'loc': (loc, loc), 'scale': (scale, scale)}
|
||||
res = stats.fit(dist, data, bounds)
|
||||
assert_allclose(res.params, (loc, scale), **self.tols)
|
||||
|
||||
# `n` gets fixed during polishing
|
||||
dist = stats.binom
|
||||
n, p, loc = 10, 0.65, 0
|
||||
data = dist.rvs(n, p, loc=loc, size=N, random_state=rng)
|
||||
shape_bounds = {'n': (0, 20), 'p': (0.65, 0.65)}
|
||||
res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
|
||||
assert_allclose(res.params, (n, p, loc), **self.tols)
|
||||
|
||||
def test_failure(self):
|
||||
N = 5000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
|
||||
dist = stats.nbinom
|
||||
shapes = (5, 0.5)
|
||||
data = dist.rvs(*shapes, size=N, random_state=rng)
|
||||
|
||||
assert data.min() == 0
|
||||
# With lower bounds on location at 0.5, likelihood is zero
|
||||
bounds = [(0, 30), (0, 1), (0.5, 10)]
|
||||
res = stats.fit(dist, data, bounds)
|
||||
message = "Optimization converged to parameter values that are"
|
||||
assert res.message.startswith(message)
|
||||
assert res.success is False
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_guess(self):
|
||||
# Test that guess helps DE find the desired solution
|
||||
N = 2000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
dist = stats.nhypergeom
|
||||
params = (20, 7, 12, 0)
|
||||
bounds = [(2, 200), (0.7, 70), (1.2, 120), (0, 10)]
|
||||
|
||||
data = dist.rvs(*params, size=N, random_state=rng)
|
||||
|
||||
res = stats.fit(dist, data, bounds, optimizer=self.opt)
|
||||
assert not np.allclose(res.params, params, **self.tols)
|
||||
|
||||
res = stats.fit(dist, data, bounds, guess=params, optimizer=self.opt)
|
||||
assert_allclose(res.params, params, **self.tols)
|
||||
|
||||
def test_mse_accuracy_1(self):
|
||||
# Test maximum spacing estimation against example from Wikipedia
|
||||
# https://en.wikipedia.org/wiki/Maximum_spacing_estimation#Examples
|
||||
data = [2, 4]
|
||||
dist = stats.expon
|
||||
bounds = {'loc': (0, 0), 'scale': (1e-8, 10)}
|
||||
res_mle = stats.fit(dist, data, bounds=bounds, method='mle')
|
||||
assert_allclose(res_mle.params.scale, 3, atol=1e-3)
|
||||
res_mse = stats.fit(dist, data, bounds=bounds, method='mse')
|
||||
assert_allclose(res_mse.params.scale, 3.915, atol=1e-3)
|
||||
|
||||
def test_mse_accuracy_2(self):
|
||||
# Test maximum spacing estimation against example from Wikipedia
|
||||
# https://en.wikipedia.org/wiki/Maximum_spacing_estimation#Examples
|
||||
rng = np.random.default_rng(9843212616816518964)
|
||||
|
||||
dist = stats.uniform
|
||||
n = 10
|
||||
data = dist(3, 6).rvs(size=n, random_state=rng)
|
||||
bounds = {'loc': (0, 10), 'scale': (1e-8, 10)}
|
||||
res = stats.fit(dist, data, bounds=bounds, method='mse')
|
||||
# (loc=3.608118420015416, scale=5.509323262055043)
|
||||
|
||||
x = np.sort(data)
|
||||
a = (n*x[0] - x[-1])/(n - 1)
|
||||
b = (n*x[-1] - x[0])/(n - 1)
|
||||
ref = a, b-a # (3.6081133632151503, 5.509328130317254)
|
||||
assert_allclose(res.params, ref, rtol=1e-4)
|
||||
|
||||
|
||||
# Data from Matlab: https://www.mathworks.com/help/stats/lillietest.html
|
||||
examgrades = [65, 61, 81, 88, 69, 89, 55, 84, 86, 84, 71, 81, 84, 81, 78, 67,
|
||||
96, 66, 73, 75, 59, 71, 69, 63, 79, 76, 63, 85, 87, 88, 80, 71,
|
||||
65, 84, 71, 75, 81, 79, 64, 65, 84, 77, 70, 75, 84, 75, 73, 92,
|
||||
90, 79, 80, 71, 73, 71, 58, 79, 73, 64, 77, 82, 81, 59, 54, 82,
|
||||
57, 79, 79, 73, 74, 82, 63, 64, 73, 69, 87, 68, 81, 73, 83, 73,
|
||||
80, 73, 73, 71, 66, 78, 64, 74, 68, 67, 75, 75, 80, 85, 74, 76,
|
||||
80, 77, 93, 70, 86, 80, 81, 83, 68, 60, 85, 64, 74, 82, 81, 77,
|
||||
66, 85, 75, 81, 69, 60, 83, 72]
|
||||
|
||||
|
||||
class TestGoodnessOfFit:
|
||||
|
||||
def test_gof_iv(self):
|
||||
dist = stats.norm
|
||||
x = [1, 2, 3]
|
||||
|
||||
message = r"`dist` must be a \(non-frozen\) instance of..."
|
||||
with pytest.raises(TypeError, match=message):
|
||||
goodness_of_fit(stats.norm(), x)
|
||||
|
||||
message = "`data` must be a one-dimensional array of numbers."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
goodness_of_fit(dist, [[1, 2, 3]])
|
||||
|
||||
message = "`statistic` must be one of..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
goodness_of_fit(dist, x, statistic='mm')
|
||||
|
||||
message = "`n_mc_samples` must be an integer."
|
||||
with pytest.raises(TypeError, match=message):
|
||||
goodness_of_fit(dist, x, n_mc_samples=1000.5)
|
||||
|
||||
message = "'herring' cannot be used to seed a"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
goodness_of_fit(dist, x, random_state='herring')
|
||||
|
||||
def test_against_ks(self):
|
||||
rng = np.random.default_rng(8517426291317196949)
|
||||
x = examgrades
|
||||
known_params = {'loc': np.mean(x), 'scale': np.std(x, ddof=1)}
|
||||
res = goodness_of_fit(stats.norm, x, known_params=known_params,
|
||||
statistic='ks', random_state=rng)
|
||||
ref = stats.kstest(x, stats.norm(**known_params).cdf, method='exact')
|
||||
assert_allclose(res.statistic, ref.statistic) # ~0.0848
|
||||
assert_allclose(res.pvalue, ref.pvalue, atol=5e-3) # ~0.335
|
||||
|
||||
def test_against_lilliefors(self):
|
||||
rng = np.random.default_rng(2291803665717442724)
|
||||
x = examgrades
|
||||
res = goodness_of_fit(stats.norm, x, statistic='ks', random_state=rng)
|
||||
known_params = {'loc': np.mean(x), 'scale': np.std(x, ddof=1)}
|
||||
ref = stats.kstest(x, stats.norm(**known_params).cdf, method='exact')
|
||||
assert_allclose(res.statistic, ref.statistic) # ~0.0848
|
||||
assert_allclose(res.pvalue, 0.0348, atol=5e-3)
|
||||
|
||||
def test_against_cvm(self):
|
||||
rng = np.random.default_rng(8674330857509546614)
|
||||
x = examgrades
|
||||
known_params = {'loc': np.mean(x), 'scale': np.std(x, ddof=1)}
|
||||
res = goodness_of_fit(stats.norm, x, known_params=known_params,
|
||||
statistic='cvm', random_state=rng)
|
||||
ref = stats.cramervonmises(x, stats.norm(**known_params).cdf)
|
||||
assert_allclose(res.statistic, ref.statistic) # ~0.090
|
||||
assert_allclose(res.pvalue, ref.pvalue, atol=5e-3) # ~0.636
|
||||
|
||||
def test_against_anderson_case_0(self):
|
||||
# "Case 0" is where loc and scale are known [1]
|
||||
rng = np.random.default_rng(7384539336846690410)
|
||||
x = np.arange(1, 101)
|
||||
# loc that produced critical value of statistic found w/ root_scalar
|
||||
known_params = {'loc': 45.01575354024957, 'scale': 30}
|
||||
res = goodness_of_fit(stats.norm, x, known_params=known_params,
|
||||
statistic='ad', random_state=rng)
|
||||
assert_allclose(res.statistic, 2.492) # See [1] Table 1A 1.0
|
||||
assert_allclose(res.pvalue, 0.05, atol=5e-3)
|
||||
|
||||
def test_against_anderson_case_1(self):
|
||||
# "Case 1" is where scale is known and loc is fit [1]
|
||||
rng = np.random.default_rng(5040212485680146248)
|
||||
x = np.arange(1, 101)
|
||||
# scale that produced critical value of statistic found w/ root_scalar
|
||||
known_params = {'scale': 29.957112639101933}
|
||||
res = goodness_of_fit(stats.norm, x, known_params=known_params,
|
||||
statistic='ad', random_state=rng)
|
||||
assert_allclose(res.statistic, 0.908) # See [1] Table 1B 1.1
|
||||
assert_allclose(res.pvalue, 0.1, atol=5e-3)
|
||||
|
||||
def test_against_anderson_case_2(self):
|
||||
# "Case 2" is where loc is known and scale is fit [1]
|
||||
rng = np.random.default_rng(726693985720914083)
|
||||
x = np.arange(1, 101)
|
||||
# loc that produced critical value of statistic found w/ root_scalar
|
||||
known_params = {'loc': 44.5680212261933}
|
||||
res = goodness_of_fit(stats.norm, x, known_params=known_params,
|
||||
statistic='ad', random_state=rng)
|
||||
assert_allclose(res.statistic, 2.904) # See [1] Table 1B 1.2
|
||||
assert_allclose(res.pvalue, 0.025, atol=5e-3)
|
||||
|
||||
def test_against_anderson_case_3(self):
|
||||
# "Case 3" is where both loc and scale are fit [1]
|
||||
rng = np.random.default_rng(6763691329830218206)
|
||||
# c that produced critical value of statistic found w/ root_scalar
|
||||
x = stats.skewnorm.rvs(1.4477847789132101, loc=1, scale=2, size=100,
|
||||
random_state=rng)
|
||||
res = goodness_of_fit(stats.norm, x, statistic='ad', random_state=rng)
|
||||
assert_allclose(res.statistic, 0.559) # See [1] Table 1B 1.2
|
||||
assert_allclose(res.pvalue, 0.15, atol=5e-3)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_against_anderson_gumbel_r(self):
|
||||
rng = np.random.default_rng(7302761058217743)
|
||||
# c that produced critical value of statistic found w/ root_scalar
|
||||
x = stats.genextreme(0.051896837188595134, loc=0.5,
|
||||
scale=1.5).rvs(size=1000, random_state=rng)
|
||||
res = goodness_of_fit(stats.gumbel_r, x, statistic='ad',
|
||||
random_state=rng)
|
||||
ref = stats.anderson(x, dist='gumbel_r')
|
||||
assert_allclose(res.statistic, ref.critical_values[0])
|
||||
assert_allclose(res.pvalue, ref.significance_level[0]/100, atol=5e-3)
|
||||
|
||||
def test_params_effects(self):
|
||||
# Ensure that `guessed_params`, `fit_params`, and `known_params` have
|
||||
# the intended effects.
|
||||
rng = np.random.default_rng(9121950977643805391)
|
||||
x = stats.skewnorm.rvs(-5.044559778383153, loc=1, scale=2, size=50,
|
||||
random_state=rng)
|
||||
|
||||
# Show that `guessed_params` don't fit to the guess,
|
||||
# but `fit_params` and `known_params` respect the provided fit
|
||||
guessed_params = {'c': 13.4}
|
||||
fit_params = {'scale': 13.73}
|
||||
known_params = {'loc': -13.85}
|
||||
rng = np.random.default_rng(9121950977643805391)
|
||||
res1 = goodness_of_fit(stats.weibull_min, x, n_mc_samples=2,
|
||||
guessed_params=guessed_params,
|
||||
fit_params=fit_params,
|
||||
known_params=known_params, random_state=rng)
|
||||
assert not np.allclose(res1.fit_result.params.c, 13.4)
|
||||
assert_equal(res1.fit_result.params.scale, 13.73)
|
||||
assert_equal(res1.fit_result.params.loc, -13.85)
|
||||
|
||||
# Show that changing the guess changes the parameter that gets fit,
|
||||
# and it changes the null distribution
|
||||
guessed_params = {'c': 2}
|
||||
rng = np.random.default_rng(9121950977643805391)
|
||||
res2 = goodness_of_fit(stats.weibull_min, x, n_mc_samples=2,
|
||||
guessed_params=guessed_params,
|
||||
fit_params=fit_params,
|
||||
known_params=known_params, random_state=rng)
|
||||
assert not np.allclose(res2.fit_result.params.c,
|
||||
res1.fit_result.params.c, rtol=1e-8)
|
||||
assert not np.allclose(res2.null_distribution,
|
||||
res1.null_distribution, rtol=1e-8)
|
||||
assert_equal(res2.fit_result.params.scale, 13.73)
|
||||
assert_equal(res2.fit_result.params.loc, -13.85)
|
||||
|
||||
# If we set all parameters as fit_params and known_params,
|
||||
# they're all fixed to those values, but the null distribution
|
||||
# varies.
|
||||
fit_params = {'c': 13.4, 'scale': 13.73}
|
||||
rng = np.random.default_rng(9121950977643805391)
|
||||
res3 = goodness_of_fit(stats.weibull_min, x, n_mc_samples=2,
|
||||
guessed_params=guessed_params,
|
||||
fit_params=fit_params,
|
||||
known_params=known_params, random_state=rng)
|
||||
assert_equal(res3.fit_result.params.c, 13.4)
|
||||
assert_equal(res3.fit_result.params.scale, 13.73)
|
||||
assert_equal(res3.fit_result.params.loc, -13.85)
|
||||
assert not np.allclose(res3.null_distribution, res1.null_distribution)
|
||||
|
||||
|
||||
class TestFitResult:
|
||||
def test_plot_iv(self):
|
||||
rng = np.random.default_rng(1769658657308472721)
|
||||
data = stats.norm.rvs(0, 1, size=100, random_state=rng)
|
||||
|
||||
def optimizer(*args, **kwargs):
|
||||
return differential_evolution(*args, **kwargs, seed=rng)
|
||||
|
||||
bounds = [(0, 30), (0, 1)]
|
||||
res = stats.fit(stats.norm, data, bounds, optimizer=optimizer)
|
||||
try:
|
||||
import matplotlib # noqa
|
||||
message = r"`plot_type` must be one of \{'..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
res.plot(plot_type='llama')
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
message = r"matplotlib must be installed to use method `plot`."
|
||||
with pytest.raises(ModuleNotFoundError, match=message):
|
||||
res.plot(plot_type='llama')
|
||||
1712
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_hypotests.py
vendored
Normal file
1712
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_hypotests.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
604
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_kdeoth.py
vendored
Normal file
604
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_kdeoth.py
vendored
Normal file
@@ -0,0 +1,604 @@
|
||||
from scipy import stats, linalg, integrate
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_almost_equal, assert_, assert_equal,
|
||||
assert_array_almost_equal, assert_array_almost_equal_nulp, assert_allclose)
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
|
||||
def test_kde_1d():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
xn = np.random.randn(n_basesample)
|
||||
xnmean = xn.mean()
|
||||
xnstd = xn.std(ddof=1)
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
xs = np.linspace(-7,7,501)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
|
||||
intervall = xs[1] - xs[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
|
||||
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
|
||||
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
|
||||
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
|
||||
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*intervall, decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
|
||||
(kdepdf*normpdf).sum()*intervall, decimal=2)
|
||||
|
||||
|
||||
def test_kde_1d_weighted():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
xn = np.random.randn(n_basesample)
|
||||
wn = np.random.rand(n_basesample)
|
||||
xnmean = np.average(xn, weights=wn)
|
||||
xnstd = np.sqrt(np.average((xn-xnmean)**2, weights=wn))
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
xs = np.linspace(-7,7,501)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
|
||||
intervall = xs[1] - xs[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
|
||||
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
|
||||
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
|
||||
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
|
||||
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*intervall, decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
|
||||
(kdepdf*normpdf).sum()*intervall, decimal=2)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_kde_2d():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
|
||||
# Need transpose (shape (2, 500)) for kde
|
||||
xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
x, y = np.mgrid[-7:7:500j, -7:7:500j]
|
||||
grid_coords = np.vstack([x.ravel(), y.ravel()])
|
||||
kdepdf = gkde.evaluate(grid_coords)
|
||||
kdepdf = kdepdf.reshape(500, 500)
|
||||
|
||||
normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]), mean=mean, cov=covariance)
|
||||
intervall = y.ravel()[1] - y.ravel()[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
|
||||
|
||||
small = -1e100
|
||||
large = 1e100
|
||||
prob1 = gkde.integrate_box([small, mean[1]], [large, large])
|
||||
prob2 = gkde.integrate_box([small, small], [large, mean[1]])
|
||||
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*(intervall**2), decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
|
||||
(kdepdf*normpdf).sum()*(intervall**2), decimal=2)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_kde_2d_weighted():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
|
||||
# Need transpose (shape (2, 500)) for kde
|
||||
xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
wn = np.random.rand(n_basesample)
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
x, y = np.mgrid[-7:7:500j, -7:7:500j]
|
||||
grid_coords = np.vstack([x.ravel(), y.ravel()])
|
||||
kdepdf = gkde.evaluate(grid_coords)
|
||||
kdepdf = kdepdf.reshape(500, 500)
|
||||
|
||||
normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]), mean=mean, cov=covariance)
|
||||
intervall = y.ravel()[1] - y.ravel()[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
|
||||
|
||||
small = -1e100
|
||||
large = 1e100
|
||||
prob1 = gkde.integrate_box([small, mean[1]], [large, large])
|
||||
prob2 = gkde.integrate_box([small, small], [large, mean[1]])
|
||||
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*(intervall**2), decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
|
||||
(kdepdf*normpdf).sum()*(intervall**2), decimal=2)
|
||||
|
||||
|
||||
def test_kde_bandwidth_method():
|
||||
def scotts_factor(kde_obj):
|
||||
"""Same as default, just check that it works."""
|
||||
return np.power(kde_obj.n, -1./(kde_obj.d+4))
|
||||
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
# Supply a callable
|
||||
gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
|
||||
# Supply a scalar
|
||||
gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
|
||||
|
||||
xs = np.linspace(-7,7,51)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
kdepdf2 = gkde2.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf2)
|
||||
kdepdf3 = gkde3.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf3)
|
||||
|
||||
assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
|
||||
|
||||
|
||||
def test_kde_bandwidth_method_weighted():
|
||||
def scotts_factor(kde_obj):
|
||||
"""Same as default, just check that it works."""
|
||||
return np.power(kde_obj.neff, -1./(kde_obj.d+4))
|
||||
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
# Supply a callable
|
||||
gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
|
||||
# Supply a scalar
|
||||
gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
|
||||
|
||||
xs = np.linspace(-7,7,51)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
kdepdf2 = gkde2.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf2)
|
||||
kdepdf3 = gkde3.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf3)
|
||||
|
||||
assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
|
||||
|
||||
|
||||
# Subclasses that should stay working (extracted from various sources).
|
||||
# Unfortunately the earlier design of gaussian_kde made it necessary for users
|
||||
# to create these kinds of subclasses, or call _compute_covariance() directly.
|
||||
|
||||
class _kde_subclass1(stats.gaussian_kde):
|
||||
def __init__(self, dataset):
|
||||
self.dataset = np.atleast_2d(dataset)
|
||||
self.d, self.n = self.dataset.shape
|
||||
self.covariance_factor = self.scotts_factor
|
||||
self._compute_covariance()
|
||||
|
||||
|
||||
class _kde_subclass2(stats.gaussian_kde):
|
||||
def __init__(self, dataset):
|
||||
self.covariance_factor = self.scotts_factor
|
||||
super().__init__(dataset)
|
||||
|
||||
|
||||
class _kde_subclass4(stats.gaussian_kde):
|
||||
def covariance_factor(self):
|
||||
return 0.5 * self.silverman_factor()
|
||||
|
||||
|
||||
def test_gaussian_kde_subclassing():
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=50)
|
||||
|
||||
# gaussian_kde itself
|
||||
kde = stats.gaussian_kde(x1)
|
||||
ys = kde(xs)
|
||||
|
||||
# subclass 1
|
||||
kde1 = _kde_subclass1(x1)
|
||||
y1 = kde1(xs)
|
||||
assert_array_almost_equal_nulp(ys, y1, nulp=10)
|
||||
|
||||
# subclass 2
|
||||
kde2 = _kde_subclass2(x1)
|
||||
y2 = kde2(xs)
|
||||
assert_array_almost_equal_nulp(ys, y2, nulp=10)
|
||||
|
||||
# subclass 3 was removed because we have no obligation to maintain support
|
||||
# for user invocation of private methods
|
||||
|
||||
# subclass 4
|
||||
kde4 = _kde_subclass4(x1)
|
||||
y4 = kde4(x1)
|
||||
y_expected = [0.06292987, 0.06346938, 0.05860291, 0.08657652, 0.07904017]
|
||||
|
||||
assert_array_almost_equal(y_expected, y4, decimal=6)
|
||||
|
||||
# Not a subclass, but check for use of _compute_covariance()
|
||||
kde5 = kde
|
||||
kde5.covariance_factor = lambda: kde.factor
|
||||
kde5._compute_covariance()
|
||||
y5 = kde5(xs)
|
||||
assert_array_almost_equal_nulp(ys, y5, nulp=10)
|
||||
|
||||
|
||||
def test_gaussian_kde_covariance_caching():
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=5)
|
||||
# These expected values are from scipy 0.10, before some changes to
|
||||
# gaussian_kde. They were not compared with any external reference.
|
||||
y_expected = [0.02463386, 0.04689208, 0.05395444, 0.05337754, 0.01664475]
|
||||
|
||||
# Set the bandwidth, then reset it to the default.
|
||||
kde = stats.gaussian_kde(x1)
|
||||
kde.set_bandwidth(bw_method=0.5)
|
||||
kde.set_bandwidth(bw_method='scott')
|
||||
y2 = kde(xs)
|
||||
|
||||
assert_array_almost_equal(y_expected, y2, decimal=7)
|
||||
|
||||
|
||||
def test_gaussian_kde_monkeypatch():
|
||||
"""Ugly, but people may rely on this. See scipy pull request 123,
|
||||
specifically the linked ML thread "Width of the Gaussian in stats.kde".
|
||||
If it is necessary to break this later on, that is to be discussed on ML.
|
||||
"""
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=50)
|
||||
|
||||
# The old monkeypatched version to get at Silverman's Rule.
|
||||
kde = stats.gaussian_kde(x1)
|
||||
kde.covariance_factor = kde.silverman_factor
|
||||
kde._compute_covariance()
|
||||
y1 = kde(xs)
|
||||
|
||||
# The new saner version.
|
||||
kde2 = stats.gaussian_kde(x1, bw_method='silverman')
|
||||
y2 = kde2(xs)
|
||||
|
||||
assert_array_almost_equal_nulp(y1, y2, nulp=10)
|
||||
|
||||
|
||||
def test_kde_integer_input():
|
||||
"""Regression test for #1181."""
|
||||
x1 = np.arange(5)
|
||||
kde = stats.gaussian_kde(x1)
|
||||
y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, 0.13480721]
|
||||
assert_array_almost_equal(kde(x1), y_expected, decimal=6)
|
||||
|
||||
|
||||
_ftypes = ['float32', 'float64', 'float96', 'float128', 'int32', 'int64']
|
||||
|
||||
@pytest.mark.parametrize("bw_type", _ftypes + ["scott", "silverman"])
|
||||
@pytest.mark.parametrize("dtype", _ftypes)
|
||||
def test_kde_output_dtype(dtype, bw_type):
|
||||
# Check whether the datatypes are available
|
||||
dtype = getattr(np, dtype, None)
|
||||
|
||||
if bw_type in ["scott", "silverman"]:
|
||||
bw = bw_type
|
||||
else:
|
||||
bw_type = getattr(np, bw_type, None)
|
||||
bw = bw_type(3) if bw_type else None
|
||||
|
||||
if any(dt is None for dt in [dtype, bw]):
|
||||
pytest.skip()
|
||||
|
||||
weights = np.arange(5, dtype=dtype)
|
||||
dataset = np.arange(5, dtype=dtype)
|
||||
k = stats.gaussian_kde(dataset, bw_method=bw, weights=weights)
|
||||
points = np.arange(5, dtype=dtype)
|
||||
result = k(points)
|
||||
# weights are always cast to float64
|
||||
assert result.dtype == np.result_type(dataset, points, np.float64(weights),
|
||||
k.factor)
|
||||
|
||||
|
||||
def test_pdf_logpdf_validation():
|
||||
rng = np.random.default_rng(64202298293133848336925499069837723291)
|
||||
xn = rng.standard_normal((2, 10))
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
xs = rng.standard_normal((3, 10))
|
||||
|
||||
msg = "points have dimension 3, dataset has dimension 2"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
gkde.logpdf(xs)
|
||||
|
||||
|
||||
def test_pdf_logpdf():
|
||||
np.random.seed(1)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
xs = np.linspace(-15, 12, 25)
|
||||
pdf = gkde.evaluate(xs)
|
||||
pdf2 = gkde.pdf(xs)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
logpdf = np.log(pdf)
|
||||
logpdf2 = gkde.logpdf(xs)
|
||||
assert_almost_equal(logpdf, logpdf2, decimal=12)
|
||||
|
||||
# There are more points than data
|
||||
gkde = stats.gaussian_kde(xs)
|
||||
pdf = np.log(gkde.evaluate(xn))
|
||||
pdf2 = gkde.logpdf(xn)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
|
||||
def test_pdf_logpdf_weighted():
|
||||
np.random.seed(1)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
wn = np.random.rand(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
xs = np.linspace(-15, 12, 25)
|
||||
pdf = gkde.evaluate(xs)
|
||||
pdf2 = gkde.pdf(xs)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
logpdf = np.log(pdf)
|
||||
logpdf2 = gkde.logpdf(xs)
|
||||
assert_almost_equal(logpdf, logpdf2, decimal=12)
|
||||
|
||||
# There are more points than data
|
||||
gkde = stats.gaussian_kde(xs, weights=np.random.rand(len(xs)))
|
||||
pdf = np.log(gkde.evaluate(xn))
|
||||
pdf2 = gkde.logpdf(xn)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
|
||||
def test_marginal_1_axis():
|
||||
rng = np.random.default_rng(6111799263660870475)
|
||||
n_data = 50
|
||||
n_dim = 10
|
||||
dataset = rng.normal(size=(n_dim, n_data))
|
||||
points = rng.normal(size=(n_dim, 3))
|
||||
|
||||
dimensions = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) # dimensions to keep
|
||||
|
||||
kde = stats.gaussian_kde(dataset)
|
||||
marginal = kde.marginal(dimensions)
|
||||
pdf = marginal.pdf(points[dimensions])
|
||||
|
||||
def marginal_pdf_single(point):
|
||||
def f(x):
|
||||
x = np.concatenate(([x], point[dimensions]))
|
||||
return kde.pdf(x)[0]
|
||||
return integrate.quad(f, -np.inf, np.inf)[0]
|
||||
|
||||
def marginal_pdf(points):
|
||||
return np.apply_along_axis(marginal_pdf_single, axis=0, arr=points)
|
||||
|
||||
ref = marginal_pdf(points)
|
||||
|
||||
assert_allclose(pdf, ref, rtol=1e-6)
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_marginal_2_axis():
|
||||
rng = np.random.default_rng(6111799263660870475)
|
||||
n_data = 30
|
||||
n_dim = 4
|
||||
dataset = rng.normal(size=(n_dim, n_data))
|
||||
points = rng.normal(size=(n_dim, 3))
|
||||
|
||||
dimensions = np.array([1, 3]) # dimensions to keep
|
||||
|
||||
kde = stats.gaussian_kde(dataset)
|
||||
marginal = kde.marginal(dimensions)
|
||||
pdf = marginal.pdf(points[dimensions])
|
||||
|
||||
def marginal_pdf(points):
|
||||
def marginal_pdf_single(point):
|
||||
def f(y, x):
|
||||
w, z = point[dimensions]
|
||||
x = np.array([x, w, y, z])
|
||||
return kde.pdf(x)[0]
|
||||
return integrate.dblquad(f, -np.inf, np.inf, -np.inf, np.inf)[0]
|
||||
|
||||
return np.apply_along_axis(marginal_pdf_single, axis=0, arr=points)
|
||||
|
||||
ref = marginal_pdf(points)
|
||||
|
||||
assert_allclose(pdf, ref, rtol=1e-6)
|
||||
|
||||
|
||||
def test_marginal_iv():
|
||||
# test input validation
|
||||
rng = np.random.default_rng(6111799263660870475)
|
||||
n_data = 30
|
||||
n_dim = 4
|
||||
dataset = rng.normal(size=(n_dim, n_data))
|
||||
points = rng.normal(size=(n_dim, 3))
|
||||
|
||||
kde = stats.gaussian_kde(dataset)
|
||||
|
||||
# check that positive and negative indices are equivalent
|
||||
dimensions1 = [-1, 1]
|
||||
marginal1 = kde.marginal(dimensions1)
|
||||
pdf1 = marginal1.pdf(points[dimensions1])
|
||||
|
||||
dimensions2 = [3, -3]
|
||||
marginal2 = kde.marginal(dimensions2)
|
||||
pdf2 = marginal2.pdf(points[dimensions2])
|
||||
|
||||
assert_equal(pdf1, pdf2)
|
||||
|
||||
# IV for non-integer dimensions
|
||||
message = "Elements of `dimensions` must be integers..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
kde.marginal([1, 2.5])
|
||||
|
||||
# IV for uniquenes
|
||||
message = "All elements of `dimensions` must be unique."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
kde.marginal([1, 2, 2])
|
||||
|
||||
# IV for non-integer dimensions
|
||||
message = (r"Dimensions \[-5 6\] are invalid for a distribution in 4...")
|
||||
with pytest.raises(ValueError, match=message):
|
||||
kde.marginal([1, -5, 6])
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_logpdf_overflow():
|
||||
# regression test for gh-12988; testing against linalg instability for
|
||||
# very high dimensionality kde
|
||||
np.random.seed(1)
|
||||
n_dimensions = 2500
|
||||
n_samples = 5000
|
||||
xn = np.array([np.random.randn(n_samples) + (n) for n in range(
|
||||
0, n_dimensions)])
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
logpdf = gkde.logpdf(np.arange(0, n_dimensions))
|
||||
np.testing.assert_equal(np.isneginf(logpdf[0]), False)
|
||||
np.testing.assert_equal(np.isnan(logpdf[0]), False)
|
||||
|
||||
|
||||
def test_weights_intact():
|
||||
# regression test for gh-9709: weights are not modified
|
||||
np.random.seed(12345)
|
||||
vals = np.random.lognormal(size=100)
|
||||
weights = np.random.choice([1.0, 10.0, 100], size=vals.size)
|
||||
orig_weights = weights.copy()
|
||||
|
||||
stats.gaussian_kde(np.log10(vals), weights=weights)
|
||||
assert_allclose(weights, orig_weights, atol=1e-14, rtol=1e-14)
|
||||
|
||||
|
||||
def test_weights_integer():
|
||||
# integer weights are OK, cf gh-9709 (comment)
|
||||
np.random.seed(12345)
|
||||
values = [0.2, 13.5, 21.0, 75.0, 99.0]
|
||||
weights = [1, 2, 4, 8, 16] # a list of integers
|
||||
pdf_i = stats.gaussian_kde(values, weights=weights)
|
||||
pdf_f = stats.gaussian_kde(values, weights=np.float64(weights))
|
||||
|
||||
xn = [0.3, 11, 88]
|
||||
assert_allclose(pdf_i.evaluate(xn),
|
||||
pdf_f.evaluate(xn), atol=1e-14, rtol=1e-14)
|
||||
|
||||
|
||||
def test_seed():
|
||||
# Test the seed option of the resample method
|
||||
def test_seed_sub(gkde_trail):
|
||||
n_sample = 200
|
||||
# The results should be different without using seed
|
||||
samp1 = gkde_trail.resample(n_sample)
|
||||
samp2 = gkde_trail.resample(n_sample)
|
||||
assert_raises(
|
||||
AssertionError, assert_allclose, samp1, samp2, atol=1e-13
|
||||
)
|
||||
# Use integer seed
|
||||
seed = 831
|
||||
samp1 = gkde_trail.resample(n_sample, seed=seed)
|
||||
samp2 = gkde_trail.resample(n_sample, seed=seed)
|
||||
assert_allclose(samp1, samp2, atol=1e-13)
|
||||
# Use RandomState
|
||||
rstate1 = np.random.RandomState(seed=138)
|
||||
samp1 = gkde_trail.resample(n_sample, seed=rstate1)
|
||||
rstate2 = np.random.RandomState(seed=138)
|
||||
samp2 = gkde_trail.resample(n_sample, seed=rstate2)
|
||||
assert_allclose(samp1, samp2, atol=1e-13)
|
||||
|
||||
# check that np.random.Generator can be used (numpy >= 1.17)
|
||||
if hasattr(np.random, 'default_rng'):
|
||||
# obtain a np.random.Generator object
|
||||
rng = np.random.default_rng(1234)
|
||||
gkde_trail.resample(n_sample, seed=rng)
|
||||
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
wn = np.random.rand(n_basesample)
|
||||
# Test 1D case
|
||||
xn_1d = np.random.randn(n_basesample)
|
||||
|
||||
gkde_1d = stats.gaussian_kde(xn_1d)
|
||||
test_seed_sub(gkde_1d)
|
||||
gkde_1d_weighted = stats.gaussian_kde(xn_1d, weights=wn)
|
||||
test_seed_sub(gkde_1d_weighted)
|
||||
|
||||
# Test 2D case
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
xn_2d = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
|
||||
gkde_2d = stats.gaussian_kde(xn_2d)
|
||||
test_seed_sub(gkde_2d)
|
||||
gkde_2d_weighted = stats.gaussian_kde(xn_2d, weights=wn)
|
||||
test_seed_sub(gkde_2d_weighted)
|
||||
|
||||
|
||||
def test_singular_data_covariance_gh10205():
|
||||
# When the data lie in a lower-dimensional subspace and this causes
|
||||
# and exception, check that the error message is informative.
|
||||
rng = np.random.default_rng(2321583144339784787)
|
||||
mu = np.array([1, 10, 20])
|
||||
sigma = np.array([[4, 10, 0], [10, 25, 0], [0, 0, 100]])
|
||||
data = rng.multivariate_normal(mu, sigma, 1000)
|
||||
try: # doesn't raise any error on some platforms, and that's OK
|
||||
stats.gaussian_kde(data.T)
|
||||
except linalg.LinAlgError:
|
||||
msg = "The data appears to lie in a lower-dimensional subspace..."
|
||||
with assert_raises(linalg.LinAlgError, match=msg):
|
||||
stats.gaussian_kde(data.T)
|
||||
|
||||
|
||||
def test_fewer_points_than_dimensions_gh17436():
|
||||
# When the number of points is fewer than the number of dimensions, the
|
||||
# the covariance matrix would be singular, and the exception tested in
|
||||
# test_singular_data_covariance_gh10205 would occur. However, sometimes
|
||||
# this occurs when the user passes in the transpose of what `gaussian_kde`
|
||||
# expects. This can result in a huge covariance matrix, so bail early.
|
||||
rng = np.random.default_rng(2046127537594925772)
|
||||
rvs = rng.multivariate_normal(np.zeros(3), np.eye(3), size=5)
|
||||
message = "Number of dimensions is greater than number of samples..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.gaussian_kde(rvs)
|
||||
2673
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_morestats.py
vendored
Normal file
2673
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_morestats.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1977
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_mstats_basic.py
vendored
Normal file
1977
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_mstats_basic.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
150
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_mstats_extras.py
vendored
Normal file
150
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_mstats_extras.py
vendored
Normal file
@@ -0,0 +1,150 @@
|
||||
import numpy as np
|
||||
import numpy.ma as ma
|
||||
import scipy.stats.mstats as ms
|
||||
|
||||
from numpy.testing import (assert_equal, assert_almost_equal, assert_,
|
||||
assert_allclose)
|
||||
|
||||
|
||||
def test_compare_medians_ms():
|
||||
x = np.arange(7)
|
||||
y = x + 10
|
||||
assert_almost_equal(ms.compare_medians_ms(x, y), 0)
|
||||
|
||||
y2 = np.linspace(0, 1, num=10)
|
||||
assert_almost_equal(ms.compare_medians_ms(x, y2), 0.017116406778)
|
||||
|
||||
|
||||
def test_hdmedian():
|
||||
# 1-D array
|
||||
x = ma.arange(11)
|
||||
assert_allclose(ms.hdmedian(x), 5, rtol=1e-14)
|
||||
x.mask = ma.make_mask(x)
|
||||
x.mask[:7] = False
|
||||
assert_allclose(ms.hdmedian(x), 3, rtol=1e-14)
|
||||
|
||||
# Check that `var` keyword returns a value. TODO: check whether returned
|
||||
# value is actually correct.
|
||||
assert_(ms.hdmedian(x, var=True).size == 2)
|
||||
|
||||
# 2-D array
|
||||
x2 = ma.arange(22).reshape((11, 2))
|
||||
assert_allclose(ms.hdmedian(x2, axis=0), [10, 11])
|
||||
x2.mask = ma.make_mask(x2)
|
||||
x2.mask[:7, :] = False
|
||||
assert_allclose(ms.hdmedian(x2, axis=0), [6, 7])
|
||||
|
||||
|
||||
def test_rsh():
|
||||
np.random.seed(132345)
|
||||
x = np.random.randn(100)
|
||||
res = ms.rsh(x)
|
||||
# Just a sanity check that the code runs and output shape is correct.
|
||||
# TODO: check that implementation is correct.
|
||||
assert_(res.shape == x.shape)
|
||||
|
||||
# Check points keyword
|
||||
res = ms.rsh(x, points=[0, 1.])
|
||||
assert_(res.size == 2)
|
||||
|
||||
|
||||
def test_mjci():
|
||||
# Tests the Marits-Jarrett estimator
|
||||
data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
|
||||
296,299,306,376,428,515,666,1310,2611])
|
||||
assert_almost_equal(ms.mjci(data),[55.76819,45.84028,198.87875],5)
|
||||
|
||||
|
||||
def test_trimmed_mean_ci():
|
||||
# Tests the confidence intervals of the trimmed mean.
|
||||
data = ma.array([545,555,558,572,575,576,578,580,
|
||||
594,605,635,651,653,661,666])
|
||||
assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
|
||||
assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1),
|
||||
[561.8, 630.6])
|
||||
|
||||
|
||||
def test_idealfourths():
|
||||
# Tests ideal-fourths
|
||||
test = np.arange(100)
|
||||
assert_almost_equal(np.asarray(ms.idealfourths(test)),
|
||||
[24.416667,74.583333],6)
|
||||
test_2D = test.repeat(3).reshape(-1,3)
|
||||
assert_almost_equal(ms.idealfourths(test_2D, axis=0),
|
||||
[[24.416667,24.416667,24.416667],
|
||||
[74.583333,74.583333,74.583333]],6)
|
||||
assert_almost_equal(ms.idealfourths(test_2D, axis=1),
|
||||
test.repeat(2).reshape(-1,2))
|
||||
test = [0, 0]
|
||||
_result = ms.idealfourths(test)
|
||||
assert_(np.isnan(_result).all())
|
||||
|
||||
|
||||
class TestQuantiles:
|
||||
data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
|
||||
0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
|
||||
0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
|
||||
0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
|
||||
0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
|
||||
0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
|
||||
0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
|
||||
0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
|
||||
0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
|
||||
0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
|
||||
0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
|
||||
0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
|
||||
0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
|
||||
0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
|
||||
0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
|
||||
0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
|
||||
0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
|
||||
0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
|
||||
0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
|
||||
0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
|
||||
|
||||
def test_hdquantiles(self):
|
||||
data = self.data
|
||||
assert_almost_equal(ms.hdquantiles(data,[0., 1.]),
|
||||
[0.006514031, 0.995309248])
|
||||
hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75])
|
||||
assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
|
||||
|
||||
data = np.array(data).reshape(10,10)
|
||||
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0)
|
||||
assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75]))
|
||||
assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75]))
|
||||
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True)
|
||||
assert_almost_equal(hdq[...,0],
|
||||
ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True))
|
||||
assert_almost_equal(hdq[...,-1],
|
||||
ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
|
||||
|
||||
def test_hdquantiles_sd(self):
|
||||
# Standard deviation is a jackknife estimator, so we can check if
|
||||
# the efficient version (hdquantiles_sd) matches a rudimentary,
|
||||
# but clear version here.
|
||||
|
||||
hd_std_errs = ms.hdquantiles_sd(self.data)
|
||||
|
||||
# jacknnife standard error, Introduction to the Bootstrap Eq. 11.5
|
||||
n = len(self.data)
|
||||
jdata = np.broadcast_to(self.data, (n, n))
|
||||
jselector = np.logical_not(np.eye(n)) # leave out one sample each row
|
||||
jdata = jdata[jselector].reshape(n, n-1)
|
||||
jdist = ms.hdquantiles(jdata, axis=1)
|
||||
jdist_mean = np.mean(jdist, axis=0)
|
||||
jstd = ((n-1)/n * np.sum((jdist - jdist_mean)**2, axis=0))**.5
|
||||
|
||||
assert_almost_equal(hd_std_errs, jstd)
|
||||
# Test actual values for good measure
|
||||
assert_almost_equal(hd_std_errs, [0.0379258, 0.0380656, 0.0380013])
|
||||
|
||||
two_data_points = ms.hdquantiles_sd([1, 2])
|
||||
assert_almost_equal(two_data_points, [0.5, 0.5, 0.5])
|
||||
|
||||
def test_mquantiles_cimj(self):
|
||||
# Only test that code runs, implementation not checked for correctness
|
||||
ci_lower, ci_upper = ms.mquantiles_cimj(self.data)
|
||||
assert_(ci_lower.size == ci_upper.size == 3)
|
||||
|
||||
|
||||
2905
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_multivariate.py
vendored
Normal file
2905
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_multivariate.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
147
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_odds_ratio.py
vendored
Normal file
147
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_odds_ratio.py
vendored
Normal file
@@ -0,0 +1,147 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_allclose
|
||||
from .._discrete_distns import nchypergeom_fisher, hypergeom
|
||||
from scipy.stats._odds_ratio import odds_ratio
|
||||
from .data.fisher_exact_results_from_r import data
|
||||
|
||||
|
||||
class TestOddsRatio:
|
||||
|
||||
@pytest.mark.parametrize('parameters, rresult', data)
|
||||
def test_results_from_r(self, parameters, rresult):
|
||||
alternative = parameters.alternative.replace('.', '-')
|
||||
result = odds_ratio(parameters.table)
|
||||
# The results computed by R are not very accurate.
|
||||
if result.statistic < 400:
|
||||
or_rtol = 5e-4
|
||||
ci_rtol = 2e-2
|
||||
else:
|
||||
or_rtol = 5e-2
|
||||
ci_rtol = 1e-1
|
||||
assert_allclose(result.statistic,
|
||||
rresult.conditional_odds_ratio, rtol=or_rtol)
|
||||
ci = result.confidence_interval(parameters.confidence_level,
|
||||
alternative)
|
||||
assert_allclose((ci.low, ci.high), rresult.conditional_odds_ratio_ci,
|
||||
rtol=ci_rtol)
|
||||
|
||||
# Also do a self-check for the conditional odds ratio.
|
||||
# With the computed conditional odds ratio as the noncentrality
|
||||
# parameter of the noncentral hypergeometric distribution with
|
||||
# parameters table.sum(), table[0].sum(), and table[:,0].sum() as
|
||||
# total, ngood and nsample, respectively, the mean of the distribution
|
||||
# should equal table[0, 0].
|
||||
cor = result.statistic
|
||||
table = np.array(parameters.table)
|
||||
total = table.sum()
|
||||
ngood = table[0].sum()
|
||||
nsample = table[:, 0].sum()
|
||||
# nchypergeom_fisher does not allow the edge cases where the
|
||||
# noncentrality parameter is 0 or inf, so handle those values
|
||||
# separately here.
|
||||
if cor == 0:
|
||||
nchg_mean = hypergeom.support(total, ngood, nsample)[0]
|
||||
elif cor == np.inf:
|
||||
nchg_mean = hypergeom.support(total, ngood, nsample)[1]
|
||||
else:
|
||||
nchg_mean = nchypergeom_fisher.mean(total, ngood, nsample, cor)
|
||||
assert_allclose(nchg_mean, table[0, 0], rtol=1e-13)
|
||||
|
||||
# Check that the confidence interval is correct.
|
||||
alpha = 1 - parameters.confidence_level
|
||||
if alternative == 'two-sided':
|
||||
if ci.low > 0:
|
||||
sf = nchypergeom_fisher.sf(table[0, 0] - 1,
|
||||
total, ngood, nsample, ci.low)
|
||||
assert_allclose(sf, alpha/2, rtol=1e-11)
|
||||
if np.isfinite(ci.high):
|
||||
cdf = nchypergeom_fisher.cdf(table[0, 0],
|
||||
total, ngood, nsample, ci.high)
|
||||
assert_allclose(cdf, alpha/2, rtol=1e-11)
|
||||
elif alternative == 'less':
|
||||
if np.isfinite(ci.high):
|
||||
cdf = nchypergeom_fisher.cdf(table[0, 0],
|
||||
total, ngood, nsample, ci.high)
|
||||
assert_allclose(cdf, alpha, rtol=1e-11)
|
||||
else:
|
||||
# alternative == 'greater'
|
||||
if ci.low > 0:
|
||||
sf = nchypergeom_fisher.sf(table[0, 0] - 1,
|
||||
total, ngood, nsample, ci.low)
|
||||
assert_allclose(sf, alpha, rtol=1e-11)
|
||||
|
||||
@pytest.mark.parametrize('table', [
|
||||
[[0, 0], [5, 10]],
|
||||
[[5, 10], [0, 0]],
|
||||
[[0, 5], [0, 10]],
|
||||
[[5, 0], [10, 0]],
|
||||
])
|
||||
def test_row_or_col_zero(self, table):
|
||||
result = odds_ratio(table)
|
||||
assert_equal(result.statistic, np.nan)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (0, np.inf))
|
||||
|
||||
@pytest.mark.parametrize("case",
|
||||
[[0.95, 'two-sided', 0.4879913, 2.635883],
|
||||
[0.90, 'two-sided', 0.5588516, 2.301663]])
|
||||
def test_sample_odds_ratio_ci(self, case):
|
||||
# Compare the sample odds ratio confidence interval to the R function
|
||||
# oddsratio.wald from the epitools package, e.g.
|
||||
# > library(epitools)
|
||||
# > table = matrix(c(10, 20, 41, 93), nrow=2, ncol=2, byrow=TRUE)
|
||||
# > result = oddsratio.wald(table)
|
||||
# > result$measure
|
||||
# odds ratio with 95% C.I.
|
||||
# Predictor estimate lower upper
|
||||
# Exposed1 1.000000 NA NA
|
||||
# Exposed2 1.134146 0.4879913 2.635883
|
||||
|
||||
confidence_level, alternative, ref_low, ref_high = case
|
||||
table = [[10, 20], [41, 93]]
|
||||
result = odds_ratio(table, kind='sample')
|
||||
assert_allclose(result.statistic, 1.134146, rtol=1e-6)
|
||||
ci = result.confidence_interval(confidence_level, alternative)
|
||||
assert_allclose([ci.low, ci.high], [ref_low, ref_high], rtol=1e-6)
|
||||
|
||||
@pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided'])
|
||||
def test_sample_odds_ratio_one_sided_ci(self, alternative):
|
||||
# can't find a good reference for one-sided CI, so bump up the sample
|
||||
# size and compare against the conditional odds ratio CI
|
||||
table = [[1000, 2000], [4100, 9300]]
|
||||
res = odds_ratio(table, kind='sample')
|
||||
ref = odds_ratio(table, kind='conditional')
|
||||
assert_allclose(res.statistic, ref.statistic, atol=1e-5)
|
||||
assert_allclose(res.confidence_interval(alternative=alternative),
|
||||
ref.confidence_interval(alternative=alternative),
|
||||
atol=2e-3)
|
||||
|
||||
@pytest.mark.parametrize('kind', ['sample', 'conditional'])
|
||||
@pytest.mark.parametrize('bad_table', [123, "foo", [10, 11, 12]])
|
||||
def test_invalid_table_shape(self, kind, bad_table):
|
||||
with pytest.raises(ValueError, match="Invalid shape"):
|
||||
odds_ratio(bad_table, kind=kind)
|
||||
|
||||
def test_invalid_table_type(self):
|
||||
with pytest.raises(ValueError, match='must be an array of integers'):
|
||||
odds_ratio([[1.0, 3.4], [5.0, 9.9]])
|
||||
|
||||
def test_negative_table_values(self):
|
||||
with pytest.raises(ValueError, match='must be nonnegative'):
|
||||
odds_ratio([[1, 2], [3, -4]])
|
||||
|
||||
def test_invalid_kind(self):
|
||||
with pytest.raises(ValueError, match='`kind` must be'):
|
||||
odds_ratio([[10, 20], [30, 14]], kind='magnetoreluctance')
|
||||
|
||||
def test_invalid_alternative(self):
|
||||
result = odds_ratio([[5, 10], [2, 32]])
|
||||
with pytest.raises(ValueError, match='`alternative` must be'):
|
||||
result.confidence_interval(alternative='depleneration')
|
||||
|
||||
@pytest.mark.parametrize('level', [-0.5, 1.5])
|
||||
def test_invalid_confidence_level(self, level):
|
||||
result = odds_ratio([[5, 10], [2, 32]])
|
||||
with pytest.raises(ValueError, match='must be between 0 and 1'):
|
||||
result.confidence_interval(confidence_level=level)
|
||||
1326
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_qmc.py
vendored
Normal file
1326
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_qmc.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
320
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_rank.py
vendored
Normal file
320
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_rank.py
vendored
Normal file
@@ -0,0 +1,320 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_array_equal
|
||||
|
||||
from scipy.stats import rankdata, tiecorrect
|
||||
import pytest
|
||||
|
||||
|
||||
class TestTieCorrect:
|
||||
|
||||
def test_empty(self):
|
||||
"""An empty array requires no correction, should return 1.0."""
|
||||
ranks = np.array([], dtype=np.float64)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_one(self):
|
||||
"""A single element requires no correction, should return 1.0."""
|
||||
ranks = np.array([1.0], dtype=np.float64)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_no_correction(self):
|
||||
"""Arrays with no ties require no correction."""
|
||||
ranks = np.arange(2.0)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
ranks = np.arange(3.0)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_basic(self):
|
||||
"""Check a few basic examples of the tie correction factor."""
|
||||
# One tie of two elements
|
||||
ranks = np.array([1.0, 2.5, 2.5])
|
||||
c = tiecorrect(ranks)
|
||||
T = 2.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# One tie of two elements (same as above, but tie is not at the end)
|
||||
ranks = np.array([1.5, 1.5, 3.0])
|
||||
c = tiecorrect(ranks)
|
||||
T = 2.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# One tie of three elements
|
||||
ranks = np.array([1.0, 3.0, 3.0, 3.0])
|
||||
c = tiecorrect(ranks)
|
||||
T = 3.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# Two ties, lengths 2 and 3.
|
||||
ranks = np.array([1.5, 1.5, 4.0, 4.0, 4.0])
|
||||
c = tiecorrect(ranks)
|
||||
T1 = 2.0
|
||||
T2 = 3.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - ((T1**3 - T1) + (T2**3 - T2)) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
def test_overflow(self):
|
||||
ntie, k = 2000, 5
|
||||
a = np.repeat(np.arange(k), ntie)
|
||||
n = a.size # ntie * k
|
||||
out = tiecorrect(rankdata(a))
|
||||
assert_equal(out, 1.0 - k * (ntie**3 - ntie) / float(n**3 - n))
|
||||
|
||||
|
||||
class TestRankData:
|
||||
|
||||
def test_empty(self):
|
||||
"""stats.rankdata([]) should return an empty array."""
|
||||
a = np.array([], dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, np.array([], dtype=np.float64))
|
||||
r = rankdata([])
|
||||
assert_array_equal(r, np.array([], dtype=np.float64))
|
||||
|
||||
def test_one(self):
|
||||
"""Check stats.rankdata with an array of length 1."""
|
||||
data = [100]
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, np.array([1.0], dtype=np.float64))
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, np.array([1.0], dtype=np.float64))
|
||||
|
||||
def test_basic(self):
|
||||
"""Basic tests of stats.rankdata."""
|
||||
data = [100, 10, 50]
|
||||
expected = np.array([3.0, 1.0, 2.0], dtype=np.float64)
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, expected)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, expected)
|
||||
|
||||
data = [40, 10, 30, 10, 50]
|
||||
expected = np.array([4.0, 1.5, 3.0, 1.5, 5.0], dtype=np.float64)
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, expected)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, expected)
|
||||
|
||||
data = [20, 20, 20, 10, 10, 10]
|
||||
expected = np.array([5.0, 5.0, 5.0, 2.0, 2.0, 2.0], dtype=np.float64)
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, expected)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, expected)
|
||||
# The docstring states explicitly that the argument is flattened.
|
||||
a2d = a.reshape(2, 3)
|
||||
r = rankdata(a2d)
|
||||
assert_array_equal(r, expected)
|
||||
|
||||
def test_rankdata_object_string(self):
|
||||
min_rank = lambda a: [1 + sum(i < j for i in a) for j in a]
|
||||
max_rank = lambda a: [sum(i <= j for i in a) for j in a]
|
||||
ordinal_rank = lambda a: min_rank([(x, i) for i, x in enumerate(a)])
|
||||
|
||||
def average_rank(a):
|
||||
return [(i + j) / 2.0 for i, j in zip(min_rank(a), max_rank(a))]
|
||||
|
||||
def dense_rank(a):
|
||||
b = np.unique(a)
|
||||
return [1 + sum(i < j for i in b) for j in a]
|
||||
|
||||
rankf = dict(min=min_rank, max=max_rank, ordinal=ordinal_rank,
|
||||
average=average_rank, dense=dense_rank)
|
||||
|
||||
def check_ranks(a):
|
||||
for method in 'min', 'max', 'dense', 'ordinal', 'average':
|
||||
out = rankdata(a, method=method)
|
||||
assert_array_equal(out, rankf[method](a))
|
||||
|
||||
val = ['foo', 'bar', 'qux', 'xyz', 'abc', 'efg', 'ace', 'qwe', 'qaz']
|
||||
check_ranks(np.random.choice(val, 200))
|
||||
check_ranks(np.random.choice(val, 200).astype('object'))
|
||||
|
||||
val = np.array([0, 1, 2, 2.718, 3, 3.141], dtype='object')
|
||||
check_ranks(np.random.choice(val, 200).astype('object'))
|
||||
|
||||
def test_large_int(self):
|
||||
data = np.array([2**60, 2**60+1], dtype=np.uint64)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, [1.0, 2.0])
|
||||
|
||||
data = np.array([2**60, 2**60+1], dtype=np.int64)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, [1.0, 2.0])
|
||||
|
||||
data = np.array([2**60, -2**60+1], dtype=np.int64)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, [2.0, 1.0])
|
||||
|
||||
def test_big_tie(self):
|
||||
for n in [10000, 100000, 1000000]:
|
||||
data = np.ones(n, dtype=int)
|
||||
r = rankdata(data)
|
||||
expected_rank = 0.5 * (n + 1)
|
||||
assert_array_equal(r, expected_rank * data,
|
||||
"test failed with n=%d" % n)
|
||||
|
||||
def test_axis(self):
|
||||
data = [[0, 2, 1],
|
||||
[4, 2, 2]]
|
||||
expected0 = [[1., 1.5, 1.],
|
||||
[2., 1.5, 2.]]
|
||||
r0 = rankdata(data, axis=0)
|
||||
assert_array_equal(r0, expected0)
|
||||
expected1 = [[1., 3., 2.],
|
||||
[3., 1.5, 1.5]]
|
||||
r1 = rankdata(data, axis=1)
|
||||
assert_array_equal(r1, expected1)
|
||||
|
||||
methods = ["average", "min", "max", "dense", "ordinal"]
|
||||
dtypes = [np.float64] + [np.int_]*4
|
||||
|
||||
@pytest.mark.parametrize("axis", [0, 1])
|
||||
@pytest.mark.parametrize("method, dtype", zip(methods, dtypes))
|
||||
def test_size_0_axis(self, axis, method, dtype):
|
||||
shape = (3, 0)
|
||||
data = np.zeros(shape)
|
||||
r = rankdata(data, method=method, axis=axis)
|
||||
assert_equal(r.shape, shape)
|
||||
assert_equal(r.dtype, dtype)
|
||||
|
||||
@pytest.mark.parametrize('axis', range(3))
|
||||
@pytest.mark.parametrize('method', methods)
|
||||
def test_nan_policy_omit_3d(self, axis, method):
|
||||
shape = (20, 21, 22)
|
||||
rng = np.random.default_rng(abs(hash('falafel')))
|
||||
|
||||
a = rng.random(size=shape)
|
||||
i = rng.random(size=shape) < 0.4
|
||||
j = rng.random(size=shape) < 0.1
|
||||
k = rng.random(size=shape) < 0.1
|
||||
a[i] = np.nan
|
||||
a[j] = -np.inf
|
||||
a[k] - np.inf
|
||||
|
||||
def rank_1d_omit(a, method):
|
||||
out = np.zeros_like(a)
|
||||
i = np.isnan(a)
|
||||
a_compressed = a[~i]
|
||||
res = rankdata(a_compressed, method)
|
||||
out[~i] = res
|
||||
out[i] = np.nan
|
||||
return out
|
||||
|
||||
def rank_omit(a, method, axis):
|
||||
return np.apply_along_axis(lambda a: rank_1d_omit(a, method),
|
||||
axis, a)
|
||||
|
||||
res = rankdata(a, method, axis=axis, nan_policy='omit')
|
||||
res0 = rank_omit(a, method, axis=axis)
|
||||
|
||||
assert_array_equal(res, res0)
|
||||
|
||||
def test_nan_policy_2d_axis_none(self):
|
||||
# 2 2d-array test with axis=None
|
||||
data = [[0, np.nan, 3],
|
||||
[4, 2, np.nan],
|
||||
[1, 2, 2]]
|
||||
assert_array_equal(rankdata(data, axis=None, nan_policy='omit'),
|
||||
[1., np.nan, 6., 7., 4., np.nan, 2., 4., 4.])
|
||||
assert_array_equal(rankdata(data, axis=None, nan_policy='propagate'),
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
|
||||
np.nan, np.nan, np.nan])
|
||||
|
||||
def test_nan_policy_raise(self):
|
||||
# 1 1d-array test
|
||||
data = [0, 2, 3, -2, np.nan, np.nan]
|
||||
with pytest.raises(ValueError, match="The input contains nan"):
|
||||
rankdata(data, nan_policy='raise')
|
||||
|
||||
# 2 2d-array test
|
||||
data = [[0, np.nan, 3],
|
||||
[4, 2, np.nan],
|
||||
[np.nan, 2, 2]]
|
||||
|
||||
with pytest.raises(ValueError, match="The input contains nan"):
|
||||
rankdata(data, axis=0, nan_policy="raise")
|
||||
|
||||
with pytest.raises(ValueError, match="The input contains nan"):
|
||||
rankdata(data, axis=1, nan_policy="raise")
|
||||
|
||||
def test_nan_policy_propagate(self):
|
||||
# 1 1d-array test
|
||||
data = [0, 2, 3, -2, np.nan, np.nan]
|
||||
assert_array_equal(rankdata(data, nan_policy='propagate'),
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan])
|
||||
|
||||
# 2 2d-array test
|
||||
data = [[0, np.nan, 3],
|
||||
[4, 2, np.nan],
|
||||
[1, 2, 2]]
|
||||
assert_array_equal(rankdata(data, axis=0, nan_policy='propagate'),
|
||||
[[1, np.nan, np.nan],
|
||||
[3, np.nan, np.nan],
|
||||
[2, np.nan, np.nan]])
|
||||
assert_array_equal(rankdata(data, axis=1, nan_policy='propagate'),
|
||||
[[np.nan, np.nan, np.nan],
|
||||
[np.nan, np.nan, np.nan],
|
||||
[1, 2.5, 2.5]])
|
||||
|
||||
|
||||
_cases = (
|
||||
# values, method, expected
|
||||
([], 'average', []),
|
||||
([], 'min', []),
|
||||
([], 'max', []),
|
||||
([], 'dense', []),
|
||||
([], 'ordinal', []),
|
||||
#
|
||||
([100], 'average', [1.0]),
|
||||
([100], 'min', [1.0]),
|
||||
([100], 'max', [1.0]),
|
||||
([100], 'dense', [1.0]),
|
||||
([100], 'ordinal', [1.0]),
|
||||
#
|
||||
([100, 100, 100], 'average', [2.0, 2.0, 2.0]),
|
||||
([100, 100, 100], 'min', [1.0, 1.0, 1.0]),
|
||||
([100, 100, 100], 'max', [3.0, 3.0, 3.0]),
|
||||
([100, 100, 100], 'dense', [1.0, 1.0, 1.0]),
|
||||
([100, 100, 100], 'ordinal', [1.0, 2.0, 3.0]),
|
||||
#
|
||||
([100, 300, 200], 'average', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'min', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'max', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'dense', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'ordinal', [1.0, 3.0, 2.0]),
|
||||
#
|
||||
([100, 200, 300, 200], 'average', [1.0, 2.5, 4.0, 2.5]),
|
||||
([100, 200, 300, 200], 'min', [1.0, 2.0, 4.0, 2.0]),
|
||||
([100, 200, 300, 200], 'max', [1.0, 3.0, 4.0, 3.0]),
|
||||
([100, 200, 300, 200], 'dense', [1.0, 2.0, 3.0, 2.0]),
|
||||
([100, 200, 300, 200], 'ordinal', [1.0, 2.0, 4.0, 3.0]),
|
||||
#
|
||||
([100, 200, 300, 200, 100], 'average', [1.5, 3.5, 5.0, 3.5, 1.5]),
|
||||
([100, 200, 300, 200, 100], 'min', [1.0, 3.0, 5.0, 3.0, 1.0]),
|
||||
([100, 200, 300, 200, 100], 'max', [2.0, 4.0, 5.0, 4.0, 2.0]),
|
||||
([100, 200, 300, 200, 100], 'dense', [1.0, 2.0, 3.0, 2.0, 1.0]),
|
||||
([100, 200, 300, 200, 100], 'ordinal', [1.0, 3.0, 5.0, 4.0, 2.0]),
|
||||
#
|
||||
([10] * 30, 'ordinal', np.arange(1.0, 31.0)),
|
||||
)
|
||||
|
||||
|
||||
def test_cases():
|
||||
for values, method, expected in _cases:
|
||||
r = rankdata(values, method=method)
|
||||
assert_array_equal(r, expected)
|
||||
96
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_relative_risk.py
vendored
Normal file
96
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_relative_risk.py
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
from scipy.stats.contingency import relative_risk
|
||||
|
||||
|
||||
# Test just the calculation of the relative risk, including edge
|
||||
# cases that result in a relative risk of 0, inf or nan.
|
||||
@pytest.mark.parametrize(
|
||||
'exposed_cases, exposed_total, control_cases, control_total, expected_rr',
|
||||
[(1, 4, 3, 8, 0.25 / 0.375),
|
||||
(0, 10, 5, 20, 0),
|
||||
(0, 10, 0, 20, np.nan),
|
||||
(5, 15, 0, 20, np.inf)]
|
||||
)
|
||||
def test_relative_risk(exposed_cases, exposed_total,
|
||||
control_cases, control_total, expected_rr):
|
||||
result = relative_risk(exposed_cases, exposed_total,
|
||||
control_cases, control_total)
|
||||
assert_allclose(result.relative_risk, expected_rr, rtol=1e-13)
|
||||
|
||||
|
||||
def test_relative_risk_confidence_interval():
|
||||
result = relative_risk(exposed_cases=16, exposed_total=128,
|
||||
control_cases=24, control_total=256)
|
||||
rr = result.relative_risk
|
||||
ci = result.confidence_interval(confidence_level=0.95)
|
||||
# The corresponding calculation in R using the epitools package.
|
||||
#
|
||||
# > library(epitools)
|
||||
# > c <- matrix(c(232, 112, 24, 16), nrow=2)
|
||||
# > result <- riskratio(c)
|
||||
# > result$measure
|
||||
# risk ratio with 95% C.I.
|
||||
# Predictor estimate lower upper
|
||||
# Exposed1 1.000000 NA NA
|
||||
# Exposed2 1.333333 0.7347317 2.419628
|
||||
#
|
||||
# The last line is the result that we want.
|
||||
assert_allclose(rr, 4/3)
|
||||
assert_allclose((ci.low, ci.high), (0.7347317, 2.419628), rtol=5e-7)
|
||||
|
||||
|
||||
def test_relative_risk_ci_conflevel0():
|
||||
result = relative_risk(exposed_cases=4, exposed_total=12,
|
||||
control_cases=5, control_total=30)
|
||||
rr = result.relative_risk
|
||||
assert_allclose(rr, 2.0, rtol=1e-14)
|
||||
ci = result.confidence_interval(0)
|
||||
assert_allclose((ci.low, ci.high), (2.0, 2.0), rtol=1e-12)
|
||||
|
||||
|
||||
def test_relative_risk_ci_conflevel1():
|
||||
result = relative_risk(exposed_cases=4, exposed_total=12,
|
||||
control_cases=5, control_total=30)
|
||||
ci = result.confidence_interval(1)
|
||||
assert_equal((ci.low, ci.high), (0, np.inf))
|
||||
|
||||
|
||||
def test_relative_risk_ci_edge_cases_00():
|
||||
result = relative_risk(exposed_cases=0, exposed_total=12,
|
||||
control_cases=0, control_total=30)
|
||||
assert_equal(result.relative_risk, np.nan)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (np.nan, np.nan))
|
||||
|
||||
|
||||
def test_relative_risk_ci_edge_cases_01():
|
||||
result = relative_risk(exposed_cases=0, exposed_total=12,
|
||||
control_cases=1, control_total=30)
|
||||
assert_equal(result.relative_risk, 0)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (0.0, np.nan))
|
||||
|
||||
|
||||
def test_relative_risk_ci_edge_cases_10():
|
||||
result = relative_risk(exposed_cases=1, exposed_total=12,
|
||||
control_cases=0, control_total=30)
|
||||
assert_equal(result.relative_risk, np.inf)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (np.nan, np.inf))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ec, et, cc, ct', [(0, 0, 10, 20),
|
||||
(-1, 10, 1, 5),
|
||||
(1, 10, 0, 0),
|
||||
(1, 10, -1, 4)])
|
||||
def test_relative_risk_bad_value(ec, et, cc, ct):
|
||||
with pytest.raises(ValueError, match="must be an integer not less than"):
|
||||
relative_risk(ec, et, cc, ct)
|
||||
|
||||
|
||||
def test_relative_risk_bad_type():
|
||||
with pytest.raises(TypeError, match="must be an integer"):
|
||||
relative_risk(1, 10, 2.0, 40)
|
||||
1651
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_resampling.py
vendored
Normal file
1651
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_resampling.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1357
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_sampling.py
vendored
Normal file
1357
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_sampling.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
8173
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_stats.py
vendored
Normal file
8173
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_stats.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
86
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_tukeylambda_stats.py
vendored
Normal file
86
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_tukeylambda_stats.py
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
|
||||
from scipy.stats._tukeylambda_stats import (tukeylambda_variance,
|
||||
tukeylambda_kurtosis)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_known_exact():
|
||||
"""Compare results with some known exact formulas."""
|
||||
# Some exact values of the Tukey Lambda variance and kurtosis:
|
||||
# lambda var kurtosis
|
||||
# 0 pi**2/3 6/5 (logistic distribution)
|
||||
# 0.5 4 - pi (5/3 - pi/2)/(pi/4 - 1)**2 - 3
|
||||
# 1 1/3 -6/5 (uniform distribution on (-1,1))
|
||||
# 2 1/12 -6/5 (uniform distribution on (-1/2, 1/2))
|
||||
|
||||
# lambda = 0
|
||||
var = tukeylambda_variance(0)
|
||||
assert_allclose(var, np.pi**2 / 3, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(0)
|
||||
assert_allclose(kurt, 1.2, atol=1e-10)
|
||||
|
||||
# lambda = 0.5
|
||||
var = tukeylambda_variance(0.5)
|
||||
assert_allclose(var, 4 - np.pi, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(0.5)
|
||||
desired = (5./3 - np.pi/2) / (np.pi/4 - 1)**2 - 3
|
||||
assert_allclose(kurt, desired, atol=1e-10)
|
||||
|
||||
# lambda = 1
|
||||
var = tukeylambda_variance(1)
|
||||
assert_allclose(var, 1.0 / 3, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(1)
|
||||
assert_allclose(kurt, -1.2, atol=1e-10)
|
||||
|
||||
# lambda = 2
|
||||
var = tukeylambda_variance(2)
|
||||
assert_allclose(var, 1.0 / 12, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(2)
|
||||
assert_allclose(kurt, -1.2, atol=1e-10)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_mpmath():
|
||||
"""Compare results with some values that were computed using mpmath."""
|
||||
a10 = dict(atol=1e-10, rtol=0)
|
||||
a12 = dict(atol=1e-12, rtol=0)
|
||||
data = [
|
||||
# lambda variance kurtosis
|
||||
[-0.1, 4.78050217874253547, 3.78559520346454510],
|
||||
[-0.0649, 4.16428023599895777, 2.52019675947435718],
|
||||
[-0.05, 3.93672267890775277, 2.13129793057777277],
|
||||
[-0.001, 3.30128380390964882, 1.21452460083542988],
|
||||
[0.001, 3.27850775649572176, 1.18560634779287585],
|
||||
[0.03125, 2.95927803254615800, 0.804487555161819980],
|
||||
[0.05, 2.78281053405464501, 0.611604043886644327],
|
||||
[0.0649, 2.65282386754100551, 0.476834119532774540],
|
||||
[1.2, 0.242153920578588346, -1.23428047169049726],
|
||||
[10.0, 0.00095237579757703597, 2.37810697355144933],
|
||||
[20.0, 0.00012195121951131043, 7.37654321002709531],
|
||||
]
|
||||
|
||||
for lam, var_expected, kurt_expected in data:
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_allclose(var, var_expected, **a12)
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_allclose(kurt, kurt_expected, **a10)
|
||||
|
||||
# Test with vector arguments (most of the other tests are for single
|
||||
# values).
|
||||
lam, var_expected, kurt_expected = zip(*data)
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_allclose(var, var_expected, **a12)
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_allclose(kurt, kurt_expected, **a10)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_invalid():
|
||||
"""Test values of lambda outside the domains of the functions."""
|
||||
lam = [-1.0, -0.5]
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_equal(var, np.array([np.nan, np.inf]))
|
||||
|
||||
lam = [-1.0, -0.25]
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_equal(kurt, np.array([np.nan, np.inf]))
|
||||
|
||||
158
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_variation.py
vendored
Normal file
158
.CondaPkg/env/lib/python3.11/site-packages/scipy/stats/tests/test_variation.py
vendored
Normal file
@@ -0,0 +1,158 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_allclose
|
||||
import pytest
|
||||
from scipy.stats import variation
|
||||
|
||||
|
||||
class TestVariation:
|
||||
"""
|
||||
Test class for scipy.stats.variation
|
||||
"""
|
||||
|
||||
def test_ddof(self):
|
||||
x = np.arange(9.0)
|
||||
assert_allclose(variation(x, ddof=1), np.sqrt(60/8)/4)
|
||||
|
||||
@pytest.mark.parametrize('sgn', [1, -1])
|
||||
def test_sign(self, sgn):
|
||||
x = np.array([1, 2, 3, 4, 5])
|
||||
v = variation(sgn*x)
|
||||
expected = sgn*np.sqrt(2)/3
|
||||
assert_allclose(v, expected, rtol=1e-10)
|
||||
|
||||
def test_scalar(self):
|
||||
# A scalar is treated like a 1-d sequence with length 1.
|
||||
assert_equal(variation(4.0), 0.0)
|
||||
|
||||
@pytest.mark.parametrize('nan_policy, expected',
|
||||
[('propagate', np.nan),
|
||||
('omit', np.sqrt(20/3)/4)])
|
||||
def test_variation_nan(self, nan_policy, expected):
|
||||
x = np.arange(10.)
|
||||
x[9] = np.nan
|
||||
assert_allclose(variation(x, nan_policy=nan_policy), expected)
|
||||
|
||||
def test_nan_policy_raise(self):
|
||||
x = np.array([1.0, 2.0, np.nan, 3.0])
|
||||
with pytest.raises(ValueError, match='input contains nan'):
|
||||
variation(x, nan_policy='raise')
|
||||
|
||||
def test_bad_nan_policy(self):
|
||||
with pytest.raises(ValueError, match='must be one of'):
|
||||
variation([1, 2, 3], nan_policy='foobar')
|
||||
|
||||
def test_keepdims(self):
|
||||
x = np.arange(10).reshape(2, 5)
|
||||
y = variation(x, axis=1, keepdims=True)
|
||||
expected = np.array([[np.sqrt(2)/2],
|
||||
[np.sqrt(2)/7]])
|
||||
assert_allclose(y, expected)
|
||||
|
||||
@pytest.mark.parametrize('axis, expected',
|
||||
[(0, np.empty((1, 0))),
|
||||
(1, np.full((5, 1), fill_value=np.nan))])
|
||||
def test_keepdims_size0(self, axis, expected):
|
||||
x = np.zeros((5, 0))
|
||||
y = variation(x, axis=axis, keepdims=True)
|
||||
assert_equal(y, expected)
|
||||
|
||||
@pytest.mark.parametrize('incr, expected_fill', [(0, np.inf), (1, np.nan)])
|
||||
def test_keepdims_and_ddof_eq_len_plus_incr(self, incr, expected_fill):
|
||||
x = np.array([[1, 1, 2, 2], [1, 2, 3, 3]])
|
||||
y = variation(x, axis=1, ddof=x.shape[1] + incr, keepdims=True)
|
||||
assert_equal(y, np.full((2, 1), fill_value=expected_fill))
|
||||
|
||||
def test_propagate_nan(self):
|
||||
# Check that the shape of the result is the same for inputs
|
||||
# with and without nans, cf gh-5817
|
||||
a = np.arange(8).reshape(2, -1).astype(float)
|
||||
a[1, 0] = np.nan
|
||||
v = variation(a, axis=1, nan_policy="propagate")
|
||||
assert_allclose(v, [np.sqrt(5/4)/1.5, np.nan], atol=1e-15)
|
||||
|
||||
def test_axis_none(self):
|
||||
# Check that `variation` computes the result on the flattened
|
||||
# input when axis is None.
|
||||
y = variation([[0, 1], [2, 3]], axis=None)
|
||||
assert_allclose(y, np.sqrt(5/4)/1.5)
|
||||
|
||||
def test_bad_axis(self):
|
||||
# Check that an invalid axis raises np.AxisError.
|
||||
x = np.array([[1, 2, 3], [4, 5, 6]])
|
||||
with pytest.raises(np.AxisError):
|
||||
variation(x, axis=10)
|
||||
|
||||
def test_mean_zero(self):
|
||||
# Check that `variation` returns inf for a sequence that is not
|
||||
# identically zero but whose mean is zero.
|
||||
x = np.array([10, -3, 1, -4, -4])
|
||||
y = variation(x)
|
||||
assert_equal(y, np.inf)
|
||||
|
||||
x2 = np.array([x, -10*x])
|
||||
y2 = variation(x2, axis=1)
|
||||
assert_equal(y2, [np.inf, np.inf])
|
||||
|
||||
@pytest.mark.parametrize('x', [np.zeros(5), [], [1, 2, np.inf, 9]])
|
||||
def test_return_nan(self, x):
|
||||
# Test some cases where `variation` returns nan.
|
||||
y = variation(x)
|
||||
assert_equal(y, np.nan)
|
||||
|
||||
@pytest.mark.parametrize('axis, expected',
|
||||
[(0, []), (1, [np.nan]*3), (None, np.nan)])
|
||||
def test_2d_size_zero_with_axis(self, axis, expected):
|
||||
x = np.empty((3, 0))
|
||||
y = variation(x, axis=axis)
|
||||
assert_equal(y, expected)
|
||||
|
||||
def test_neg_inf(self):
|
||||
# Edge case that produces -inf: ddof equals the number of non-nan
|
||||
# values, the values are not constant, and the mean is negative.
|
||||
x1 = np.array([-3, -5])
|
||||
assert_equal(variation(x1, ddof=2), -np.inf)
|
||||
|
||||
x2 = np.array([[np.nan, 1, -10, np.nan],
|
||||
[-20, -3, np.nan, np.nan]])
|
||||
assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
|
||||
[-np.inf, -np.inf])
|
||||
|
||||
@pytest.mark.parametrize("nan_policy", ['propagate', 'omit'])
|
||||
def test_combined_edge_cases(self, nan_policy):
|
||||
x = np.array([[0, 10, np.nan, 1],
|
||||
[0, -5, np.nan, 2],
|
||||
[0, -5, np.nan, 3]])
|
||||
y = variation(x, axis=0, nan_policy=nan_policy)
|
||||
assert_allclose(y, [np.nan, np.inf, np.nan, np.sqrt(2/3)/2])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'ddof, expected',
|
||||
[(0, [np.sqrt(1/6), np.sqrt(5/8), np.inf, 0, np.nan, 0.0, np.nan]),
|
||||
(1, [0.5, np.sqrt(5/6), np.inf, 0, np.nan, 0, np.nan]),
|
||||
(2, [np.sqrt(0.5), np.sqrt(5/4), np.inf, np.nan, np.nan, 0, np.nan])]
|
||||
)
|
||||
def test_more_nan_policy_omit_tests(self, ddof, expected):
|
||||
# The slightly strange formatting in the follow array is my attempt to
|
||||
# maintain a clean tabular arrangement of the data while satisfying
|
||||
# the demands of pycodestyle. Currently, E201 and E241 are not
|
||||
# disabled by the `# noqa` annotation.
|
||||
nan = np.nan
|
||||
x = np.array([[1.0, 2.0, nan, 3.0],
|
||||
[0.0, 4.0, 3.0, 1.0],
|
||||
[nan, -.5, 0.5, nan],
|
||||
[nan, 9.0, 9.0, nan],
|
||||
[nan, nan, nan, nan],
|
||||
[3.0, 3.0, 3.0, 3.0],
|
||||
[0.0, 0.0, 0.0, 0.0]])
|
||||
v = variation(x, axis=1, ddof=ddof, nan_policy='omit')
|
||||
assert_allclose(v, expected)
|
||||
|
||||
def test_variation_ddof(self):
|
||||
# test variation with delta degrees of freedom
|
||||
# regression test for gh-13341
|
||||
a = np.array([1, 2, 3, 4, 5])
|
||||
nan_a = np.array([1, 2, 3, np.nan, 4, 5, np.nan])
|
||||
y = variation(a, ddof=1)
|
||||
nan_y = variation(nan_a, nan_policy="omit", ddof=1)
|
||||
assert_allclose(y, np.sqrt(5/2)/3)
|
||||
assert y == nan_y
|
||||
Reference in New Issue
Block a user