using for loop to install conda package
This commit is contained in:
0
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__init__.py
vendored
Normal file
0
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__init__.py
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/__init__.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/__init__.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/common_tests.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/common_tests.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_axis_nan_policy.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_axis_nan_policy.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_binned_statistic.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_binned_statistic.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_boost_ufuncs.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_boost_ufuncs.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_contingency.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_contingency.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_continuous_basic.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_continuous_basic.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_crosstab.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_crosstab.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_discrete_basic.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_discrete_basic.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_discrete_distns.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_discrete_distns.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_distributions.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_distributions.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_entropy.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_entropy.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_fit.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_fit.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_hypotests.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_hypotests.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_kdeoth.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_kdeoth.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_morestats.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_morestats.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_mstats_basic.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_mstats_basic.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_mstats_extras.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_mstats_extras.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_multivariate.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_multivariate.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_odds_ratio.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_odds_ratio.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_qmc.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_qmc.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_rank.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_rank.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_relative_risk.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_relative_risk.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_resampling.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_resampling.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_sampling.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_sampling.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_stats.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_stats.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_tukeylambda_stats.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_tukeylambda_stats.cpython-311.pyc
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_variation.cpython-311.pyc
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/__pycache__/test_variation.cpython-311.pyc
vendored
Normal file
Binary file not shown.
450
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/common_tests.py
vendored
Normal file
450
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/common_tests.py
vendored
Normal file
@@ -0,0 +1,450 @@
|
||||
import pickle
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
import numpy.ma.testutils as ma_npt
|
||||
|
||||
from scipy._lib._util import getfullargspec_no_self as _getfullargspec
|
||||
from scipy import stats
|
||||
|
||||
|
||||
def check_named_results(res, attributes, ma=False):
|
||||
for i, attr in enumerate(attributes):
|
||||
if ma:
|
||||
ma_npt.assert_equal(res[i], getattr(res, attr))
|
||||
else:
|
||||
npt.assert_equal(res[i], getattr(res, attr))
|
||||
|
||||
|
||||
def check_normalization(distfn, args, distname):
|
||||
norm_moment = distfn.moment(0, *args)
|
||||
npt.assert_allclose(norm_moment, 1.0)
|
||||
|
||||
if distname == "rv_histogram_instance":
|
||||
atol, rtol = 1e-5, 0
|
||||
else:
|
||||
atol, rtol = 1e-7, 1e-7
|
||||
|
||||
normalization_expect = distfn.expect(lambda x: 1, args=args)
|
||||
npt.assert_allclose(normalization_expect, 1.0, atol=atol, rtol=rtol,
|
||||
err_msg=distname, verbose=True)
|
||||
|
||||
_a, _b = distfn.support(*args)
|
||||
normalization_cdf = distfn.cdf(_b, *args)
|
||||
npt.assert_allclose(normalization_cdf, 1.0)
|
||||
|
||||
|
||||
def check_moment(distfn, arg, m, v, msg):
|
||||
m1 = distfn.moment(1, *arg)
|
||||
m2 = distfn.moment(2, *arg)
|
||||
if not np.isinf(m):
|
||||
npt.assert_almost_equal(m1, m, decimal=10, err_msg=msg +
|
||||
' - 1st moment')
|
||||
else: # or np.isnan(m1),
|
||||
npt.assert_(np.isinf(m1),
|
||||
msg + ' - 1st moment -infinite, m1=%s' % str(m1))
|
||||
|
||||
if not np.isinf(v):
|
||||
npt.assert_almost_equal(m2 - m1 * m1, v, decimal=10, err_msg=msg +
|
||||
' - 2ndt moment')
|
||||
else: # or np.isnan(m2),
|
||||
npt.assert_(np.isinf(m2),
|
||||
msg + ' - 2nd moment -infinite, m2=%s' % str(m2))
|
||||
|
||||
|
||||
def check_mean_expect(distfn, arg, m, msg):
|
||||
if np.isfinite(m):
|
||||
m1 = distfn.expect(lambda x: x, arg)
|
||||
npt.assert_almost_equal(m1, m, decimal=5, err_msg=msg +
|
||||
' - 1st moment (expect)')
|
||||
|
||||
|
||||
def check_var_expect(distfn, arg, m, v, msg):
|
||||
kwargs = {'rtol': 5e-6} if msg == "rv_histogram_instance" else {}
|
||||
if np.isfinite(v):
|
||||
m2 = distfn.expect(lambda x: x*x, arg)
|
||||
npt.assert_allclose(m2, v + m*m, **kwargs)
|
||||
|
||||
|
||||
def check_skew_expect(distfn, arg, m, v, s, msg):
|
||||
if np.isfinite(s):
|
||||
m3e = distfn.expect(lambda x: np.power(x-m, 3), arg)
|
||||
npt.assert_almost_equal(m3e, s * np.power(v, 1.5),
|
||||
decimal=5, err_msg=msg + ' - skew')
|
||||
else:
|
||||
npt.assert_(np.isnan(s))
|
||||
|
||||
|
||||
def check_kurt_expect(distfn, arg, m, v, k, msg):
|
||||
if np.isfinite(k):
|
||||
m4e = distfn.expect(lambda x: np.power(x-m, 4), arg)
|
||||
npt.assert_allclose(m4e, (k + 3.) * np.power(v, 2), atol=1e-5, rtol=1e-5,
|
||||
err_msg=msg + ' - kurtosis')
|
||||
elif not np.isposinf(k):
|
||||
npt.assert_(np.isnan(k))
|
||||
|
||||
|
||||
def check_entropy(distfn, arg, msg):
|
||||
ent = distfn.entropy(*arg)
|
||||
npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
|
||||
|
||||
|
||||
def check_private_entropy(distfn, args, superclass):
|
||||
# compare a generic _entropy with the distribution-specific implementation
|
||||
npt.assert_allclose(distfn._entropy(*args),
|
||||
superclass._entropy(distfn, *args))
|
||||
|
||||
|
||||
def check_entropy_vect_scale(distfn, arg):
|
||||
# check 2-d
|
||||
sc = np.asarray([[1, 2], [3, 4]])
|
||||
v_ent = distfn.entropy(*arg, scale=sc)
|
||||
s_ent = [distfn.entropy(*arg, scale=s) for s in sc.ravel()]
|
||||
s_ent = np.asarray(s_ent).reshape(v_ent.shape)
|
||||
assert_allclose(v_ent, s_ent, atol=1e-14)
|
||||
|
||||
# check invalid value, check cast
|
||||
sc = [1, 2, -3]
|
||||
v_ent = distfn.entropy(*arg, scale=sc)
|
||||
s_ent = [distfn.entropy(*arg, scale=s) for s in sc]
|
||||
s_ent = np.asarray(s_ent).reshape(v_ent.shape)
|
||||
assert_allclose(v_ent, s_ent, atol=1e-14)
|
||||
|
||||
|
||||
def check_edge_support(distfn, args):
|
||||
# Make sure that x=self.a and self.b are handled correctly.
|
||||
x = distfn.support(*args)
|
||||
if isinstance(distfn, stats.rv_discrete):
|
||||
x = x[0]-1, x[1]
|
||||
|
||||
npt.assert_equal(distfn.cdf(x, *args), [0.0, 1.0])
|
||||
npt.assert_equal(distfn.sf(x, *args), [1.0, 0.0])
|
||||
|
||||
if distfn.name not in ('skellam', 'dlaplace'):
|
||||
# with a = -inf, log(0) generates warnings
|
||||
npt.assert_equal(distfn.logcdf(x, *args), [-np.inf, 0.0])
|
||||
npt.assert_equal(distfn.logsf(x, *args), [0.0, -np.inf])
|
||||
|
||||
npt.assert_equal(distfn.ppf([0.0, 1.0], *args), x)
|
||||
npt.assert_equal(distfn.isf([0.0, 1.0], *args), x[::-1])
|
||||
|
||||
# out-of-bounds for isf & ppf
|
||||
npt.assert_(np.isnan(distfn.isf([-1, 2], *args)).all())
|
||||
npt.assert_(np.isnan(distfn.ppf([-1, 2], *args)).all())
|
||||
|
||||
|
||||
def check_named_args(distfn, x, shape_args, defaults, meths):
|
||||
## Check calling w/ named arguments.
|
||||
|
||||
# check consistency of shapes, numargs and _parse signature
|
||||
signature = _getfullargspec(distfn._parse_args)
|
||||
npt.assert_(signature.varargs is None)
|
||||
npt.assert_(signature.varkw is None)
|
||||
npt.assert_(not signature.kwonlyargs)
|
||||
npt.assert_(list(signature.defaults) == list(defaults))
|
||||
|
||||
shape_argnames = signature.args[:-len(defaults)] # a, b, loc=0, scale=1
|
||||
if distfn.shapes:
|
||||
shapes_ = distfn.shapes.replace(',', ' ').split()
|
||||
else:
|
||||
shapes_ = ''
|
||||
npt.assert_(len(shapes_) == distfn.numargs)
|
||||
npt.assert_(len(shapes_) == len(shape_argnames))
|
||||
|
||||
# check calling w/ named arguments
|
||||
shape_args = list(shape_args)
|
||||
|
||||
vals = [meth(x, *shape_args) for meth in meths]
|
||||
npt.assert_(np.all(np.isfinite(vals)))
|
||||
|
||||
names, a, k = shape_argnames[:], shape_args[:], {}
|
||||
while names:
|
||||
k.update({names.pop(): a.pop()})
|
||||
v = [meth(x, *a, **k) for meth in meths]
|
||||
npt.assert_array_equal(vals, v)
|
||||
if 'n' not in k.keys():
|
||||
# `n` is first parameter of moment(), so can't be used as named arg
|
||||
npt.assert_equal(distfn.moment(1, *a, **k),
|
||||
distfn.moment(1, *shape_args))
|
||||
|
||||
# unknown arguments should not go through:
|
||||
k.update({'kaboom': 42})
|
||||
assert_raises(TypeError, distfn.cdf, x, **k)
|
||||
|
||||
|
||||
def check_random_state_property(distfn, args):
|
||||
# check the random_state attribute of a distribution *instance*
|
||||
|
||||
# This test fiddles with distfn.random_state. This breaks other tests,
|
||||
# hence need to save it and then restore.
|
||||
rndm = distfn.random_state
|
||||
|
||||
# baseline: this relies on the global state
|
||||
np.random.seed(1234)
|
||||
distfn.random_state = None
|
||||
r0 = distfn.rvs(*args, size=8)
|
||||
|
||||
# use an explicit instance-level random_state
|
||||
distfn.random_state = 1234
|
||||
r1 = distfn.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
distfn.random_state = np.random.RandomState(1234)
|
||||
r2 = distfn.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r2)
|
||||
|
||||
# check that np.random.Generator can be used (numpy >= 1.17)
|
||||
if hasattr(np.random, 'default_rng'):
|
||||
# obtain a np.random.Generator object
|
||||
rng = np.random.default_rng(1234)
|
||||
distfn.rvs(*args, size=1, random_state=rng)
|
||||
|
||||
# can override the instance-level random_state for an individual .rvs call
|
||||
distfn.random_state = 2
|
||||
orig_state = distfn.random_state.get_state()
|
||||
|
||||
r3 = distfn.rvs(*args, size=8, random_state=np.random.RandomState(1234))
|
||||
npt.assert_equal(r0, r3)
|
||||
|
||||
# ... and that does not alter the instance-level random_state!
|
||||
npt.assert_equal(distfn.random_state.get_state(), orig_state)
|
||||
|
||||
# finally, restore the random_state
|
||||
distfn.random_state = rndm
|
||||
|
||||
|
||||
def check_meth_dtype(distfn, arg, meths):
|
||||
q0 = [0.25, 0.5, 0.75]
|
||||
x0 = distfn.ppf(q0, *arg)
|
||||
x_cast = [x0.astype(tp) for tp in
|
||||
(np.int_, np.float16, np.float32, np.float64)]
|
||||
|
||||
for x in x_cast:
|
||||
# casting may have clipped the values, exclude those
|
||||
distfn._argcheck(*arg)
|
||||
x = x[(distfn.a < x) & (x < distfn.b)]
|
||||
for meth in meths:
|
||||
val = meth(x, *arg)
|
||||
npt.assert_(val.dtype == np.float_)
|
||||
|
||||
|
||||
def check_ppf_dtype(distfn, arg):
|
||||
q0 = np.asarray([0.25, 0.5, 0.75])
|
||||
q_cast = [q0.astype(tp) for tp in (np.float16, np.float32, np.float64)]
|
||||
for q in q_cast:
|
||||
for meth in [distfn.ppf, distfn.isf]:
|
||||
val = meth(q, *arg)
|
||||
npt.assert_(val.dtype == np.float_)
|
||||
|
||||
|
||||
def check_cmplx_deriv(distfn, arg):
|
||||
# Distributions allow complex arguments.
|
||||
def deriv(f, x, *arg):
|
||||
x = np.asarray(x)
|
||||
h = 1e-10
|
||||
return (f(x + h*1j, *arg)/h).imag
|
||||
|
||||
x0 = distfn.ppf([0.25, 0.51, 0.75], *arg)
|
||||
x_cast = [x0.astype(tp) for tp in
|
||||
(np.int_, np.float16, np.float32, np.float64)]
|
||||
|
||||
for x in x_cast:
|
||||
# casting may have clipped the values, exclude those
|
||||
distfn._argcheck(*arg)
|
||||
x = x[(distfn.a < x) & (x < distfn.b)]
|
||||
|
||||
pdf, cdf, sf = distfn.pdf(x, *arg), distfn.cdf(x, *arg), distfn.sf(x, *arg)
|
||||
assert_allclose(deriv(distfn.cdf, x, *arg), pdf, rtol=1e-5)
|
||||
assert_allclose(deriv(distfn.logcdf, x, *arg), pdf/cdf, rtol=1e-5)
|
||||
|
||||
assert_allclose(deriv(distfn.sf, x, *arg), -pdf, rtol=1e-5)
|
||||
assert_allclose(deriv(distfn.logsf, x, *arg), -pdf/sf, rtol=1e-5)
|
||||
|
||||
assert_allclose(deriv(distfn.logpdf, x, *arg),
|
||||
deriv(distfn.pdf, x, *arg) / distfn.pdf(x, *arg),
|
||||
rtol=1e-5)
|
||||
|
||||
|
||||
def check_pickling(distfn, args):
|
||||
# check that a distribution instance pickles and unpickles
|
||||
# pay special attention to the random_state property
|
||||
|
||||
# save the random_state (restore later)
|
||||
rndm = distfn.random_state
|
||||
|
||||
# check unfrozen
|
||||
distfn.random_state = 1234
|
||||
distfn.rvs(*args, size=8)
|
||||
s = pickle.dumps(distfn)
|
||||
r0 = distfn.rvs(*args, size=8)
|
||||
|
||||
unpickled = pickle.loads(s)
|
||||
r1 = unpickled.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
# also smoke test some methods
|
||||
medians = [distfn.ppf(0.5, *args), unpickled.ppf(0.5, *args)]
|
||||
npt.assert_equal(medians[0], medians[1])
|
||||
npt.assert_equal(distfn.cdf(medians[0], *args),
|
||||
unpickled.cdf(medians[1], *args))
|
||||
|
||||
# check frozen pickling/unpickling with rvs
|
||||
frozen_dist = distfn(*args)
|
||||
pkl = pickle.dumps(frozen_dist)
|
||||
unpickled = pickle.loads(pkl)
|
||||
|
||||
r0 = frozen_dist.rvs(size=8)
|
||||
r1 = unpickled.rvs(size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
# check pickling/unpickling of .fit method
|
||||
if hasattr(distfn, "fit"):
|
||||
fit_function = distfn.fit
|
||||
pickled_fit_function = pickle.dumps(fit_function)
|
||||
unpickled_fit_function = pickle.loads(pickled_fit_function)
|
||||
assert fit_function.__name__ == unpickled_fit_function.__name__ == "fit"
|
||||
|
||||
# restore the random_state
|
||||
distfn.random_state = rndm
|
||||
|
||||
|
||||
def check_freezing(distfn, args):
|
||||
# regression test for gh-11089: freezing a distribution fails
|
||||
# if loc and/or scale are specified
|
||||
if isinstance(distfn, stats.rv_continuous):
|
||||
locscale = {'loc': 1, 'scale': 2}
|
||||
else:
|
||||
locscale = {'loc': 1}
|
||||
|
||||
rv = distfn(*args, **locscale)
|
||||
assert rv.a == distfn(*args).a
|
||||
assert rv.b == distfn(*args).b
|
||||
|
||||
|
||||
def check_rvs_broadcast(distfunc, distname, allargs, shape, shape_only, otype):
|
||||
np.random.seed(123)
|
||||
sample = distfunc.rvs(*allargs)
|
||||
assert_equal(sample.shape, shape, "%s: rvs failed to broadcast" % distname)
|
||||
if not shape_only:
|
||||
rvs = np.vectorize(lambda *allargs: distfunc.rvs(*allargs), otypes=otype)
|
||||
np.random.seed(123)
|
||||
expected = rvs(*allargs)
|
||||
assert_allclose(sample, expected, rtol=1e-13)
|
||||
|
||||
|
||||
def check_deprecation_warning_gh5982_moment(distfn, arg, distname):
|
||||
# See description of cases that need to be tested in the definition of
|
||||
# scipy.stats.rv_generic.moment
|
||||
shapes = [] if distfn.shapes is None else distfn.shapes.split(", ")
|
||||
kwd_shapes = dict(zip(shapes, arg or [])) # dictionary of shape kwds
|
||||
n = kwd_shapes.pop('n', None)
|
||||
|
||||
message1 = "moment() missing 1 required positional argument"
|
||||
message2 = "_parse_args() missing 1 required positional argument: 'n'"
|
||||
message3 = "moment() got multiple values for first argument"
|
||||
|
||||
if 'n' in shapes:
|
||||
expected = distfn.mean(n=n, **kwd_shapes)
|
||||
|
||||
# A1
|
||||
res = distfn.moment(1, n=n, **kwd_shapes)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# A2
|
||||
with assert_raises(TypeError, match=re.escape(message1)):
|
||||
distfn.moment(n=n, **kwd_shapes)
|
||||
|
||||
# A3
|
||||
# if `n` is not provided at all
|
||||
with assert_raises(TypeError, match=re.escape(message2)):
|
||||
distfn.moment(1, **kwd_shapes)
|
||||
# if `n` is provided as a positional argument
|
||||
res = distfn.moment(1, *arg)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# A4
|
||||
with assert_raises(TypeError, match=re.escape(message1)):
|
||||
distfn.moment(**kwd_shapes)
|
||||
|
||||
else:
|
||||
expected = distfn.mean(**kwd_shapes)
|
||||
|
||||
# B1
|
||||
with assert_raises(TypeError, match=re.escape(message3)):
|
||||
res = distfn.moment(1, n=1, **kwd_shapes)
|
||||
|
||||
# B2
|
||||
with np.testing.assert_warns(DeprecationWarning):
|
||||
res = distfn.moment(n=1, **kwd_shapes)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# B3
|
||||
res = distfn.moment(1, *arg)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# B4
|
||||
with assert_raises(TypeError, match=re.escape(message1)):
|
||||
distfn.moment(**kwd_shapes)
|
||||
|
||||
|
||||
def check_deprecation_warning_gh5982_interval(distfn, arg, distname):
|
||||
# See description of cases that need to be tested in the definition of
|
||||
# scipy.stats.rv_generic.moment
|
||||
shapes = [] if distfn.shapes is None else distfn.shapes.split(", ")
|
||||
kwd_shapes = dict(zip(shapes, arg or [])) # dictionary of shape kwds
|
||||
alpha = kwd_shapes.pop('alpha', None)
|
||||
|
||||
def my_interval(*args, **kwds):
|
||||
return (distfn.ppf(0.25, *args, **kwds),
|
||||
distfn.ppf(0.75, *args, **kwds))
|
||||
|
||||
message1 = "interval() missing 1 required positional argument"
|
||||
message2 = "_parse_args() missing 1 required positional argument: 'alpha'"
|
||||
message3 = "interval() got multiple values for first argument"
|
||||
|
||||
if 'alpha' in shapes:
|
||||
expected = my_interval(alpha=alpha, **kwd_shapes)
|
||||
|
||||
# A1
|
||||
res = distfn.interval(0.5, alpha=alpha, **kwd_shapes)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# A2
|
||||
with assert_raises(TypeError, match=re.escape(message1)):
|
||||
distfn.interval(alpha=alpha, **kwd_shapes)
|
||||
|
||||
# A3
|
||||
# if `alpha` is not provided at all
|
||||
with assert_raises(TypeError, match=re.escape(message2)):
|
||||
distfn.interval(0.5, **kwd_shapes)
|
||||
# if `alpha` is provided as a positional argument
|
||||
res = distfn.interval(0.5, *arg)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# A4
|
||||
with assert_raises(TypeError, match=re.escape(message1)):
|
||||
distfn.interval(**kwd_shapes)
|
||||
|
||||
else:
|
||||
expected = my_interval(**kwd_shapes)
|
||||
|
||||
# B1
|
||||
with assert_raises(TypeError, match=re.escape(message3)):
|
||||
res = distfn.interval(0.5, alpha=1, **kwd_shapes)
|
||||
|
||||
# B2
|
||||
with np.testing.assert_warns(DeprecationWarning):
|
||||
res = distfn.interval(alpha=0.5, **kwd_shapes)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# B3
|
||||
res = distfn.interval(0.5, *arg)
|
||||
assert_allclose(res, expected)
|
||||
|
||||
# B4
|
||||
with assert_raises(TypeError, match=re.escape(message1)):
|
||||
distfn.interval(**kwd_shapes)
|
||||
Binary file not shown.
607
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/fisher_exact_results_from_r.py
vendored
Normal file
607
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/fisher_exact_results_from_r.py
vendored
Normal file
@@ -0,0 +1,607 @@
|
||||
# DO NOT EDIT THIS FILE!
|
||||
# This file was generated by the R script
|
||||
# generate_fisher_exact_results_from_r.R
|
||||
# The script was run with R version 3.6.2 (2019-12-12) at 2020-11-09 06:16:09
|
||||
|
||||
|
||||
from collections import namedtuple
|
||||
import numpy as np
|
||||
|
||||
|
||||
Inf = np.inf
|
||||
|
||||
Parameters = namedtuple('Parameters',
|
||||
['table', 'confidence_level', 'alternative'])
|
||||
RResults = namedtuple('RResults',
|
||||
['pvalue', 'conditional_odds_ratio',
|
||||
'conditional_odds_ratio_ci'])
|
||||
data = [
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.04035202926536294,
|
||||
2.662846672960251))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.02301413756522116,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.004668988338943325,
|
||||
0.895792956493601))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157191,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.4153910882532168,
|
||||
259.2593661129417))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.09580440012477633,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.08056337526385809,
|
||||
1.22704788545557))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.2697004098849359,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.1176691231650079,
|
||||
1.787463657995973))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157192,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.003857141267422399,
|
||||
2.407369893767229))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.06126482213438735,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.451643573543705))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904762,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.024822256141754,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
39.00054996869288))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904761,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.024822256141754,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
39.00054996869287))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=2.005657880389071e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(349.2595113327733,
|
||||
3630.382605689872))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=5.728437460831947e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(152.4166024390096,
|
||||
1425.700792178893))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.574111858126088,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.8520462587912048,
|
||||
1.340148950273938))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.02502345007115455,
|
||||
6.304424772117853))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.02301413756522116,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.001923034001462487,
|
||||
1.53670836950172))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157191,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.2397970951413721,
|
||||
1291.342011095509))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.09580440012477633,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.05127576113762925,
|
||||
1.717176678806983))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.2697004098849359,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.07498546954483619,
|
||||
2.506969905199901))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157192,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.0007743881879531337,
|
||||
4.170192301163831))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.06126482213438735,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
2.642491011905582))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904762,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.496935393325443,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
198.019801980198))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904761,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.496935393325443,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
198.019801980198))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=2.005657880389071e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(270.0334165523604,
|
||||
5461.333333326708))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=5.728437460831947e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(116.7944750275836,
|
||||
1931.995993191814))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.574111858126088,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.7949398282935892,
|
||||
1.436229679394333))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.797867027270803))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0185217259520665,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
0.6785254803404526))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.9782608695652173,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
127.8497388102893))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.05625775074399956,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.032332939718425))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1808979350599346,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.502407513296985))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1652173913043479,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.820421051562392))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0565217391304348,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.06224603077045))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.5,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
19.00192394479939))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.4999999999999999,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
19.00192394479939))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
3045.460216525746))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1186.440170942579))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.7416227010368963,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.293551891610822))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
4.375946050832565))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0185217259520665,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.235282118191202))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.9782608695652173,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
657.2063583945989))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.05625775074399956,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.498867660683128))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1808979350599346,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
2.186159386716762))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1652173913043479,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
3.335351451901569))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0565217391304348,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
2.075407697450433))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.5,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
99.00009507969122))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.4999999999999999,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
99.00009507969123))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
4503.078257659934))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1811.766127544222))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.7416227010368963,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.396522811516685))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.979790445314723,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.05119649909830196,
|
||||
Inf))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9990149169715733,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.007163749169069961,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.1652173913043478,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.5493234651081089,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9849086665340765,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.1003538933958604,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9330176609214881,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.146507416280863,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9782608695652174,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.007821681994077808,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.02380952380952382,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.487678929918272,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.0238095238095238,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.487678929918272,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=2.005657880388915e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(397.784359748113,
|
||||
Inf))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=5.728437460831983e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(174.7148056880929,
|
||||
Inf))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.2959825901308897,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.8828406663967776,
|
||||
Inf))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.979790445314723,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.03045407081240429,
|
||||
Inf))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9990149169715733,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.002768053063547901,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.1652173913043478,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.2998184792279909,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9849086665340765,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.06180414342643172,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9330176609214881,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.09037094010066403,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9782608695652174,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.001521592095430679,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.02380952380952382,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.6661157890359722,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.0238095238095238,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.6661157890359725,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=2.005657880388915e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(297.9619252357688,
|
||||
Inf))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=5.728437460831983e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(130.3213490295859,
|
||||
Inf))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.2959825901308897,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.8176272148267533,
|
||||
Inf))),
|
||||
]
|
||||
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/levy_stable/stable-Z1-cdf-sample-data.npy
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/levy_stable/stable-Z1-cdf-sample-data.npy
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/levy_stable/stable-Z1-pdf-sample-data.npy
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/levy_stable/stable-Z1-pdf-sample-data.npy
vendored
Normal file
Binary file not shown.
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/levy_stable/stable-loc-scale-sample-data.npy
vendored
Normal file
BIN
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/levy_stable/stable-loc-scale-sample-data.npy
vendored
Normal file
Binary file not shown.
108
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/AtmWtAg.dat
vendored
Normal file
108
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/AtmWtAg.dat
vendored
Normal file
@@ -0,0 +1,108 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: AtmWtAg (AtmWtAg.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 108)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Powell, L.J., Murphy, T.J. and Gramlich, J.W. (1982).
|
||||
"The Absolute Isotopic Abundance & Atomic Weight
|
||||
of a Reference Sample of Silver".
|
||||
NBS Journal of Research, 87, pp. 9-19.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
2 Treatments
|
||||
24 Replicates/Cell
|
||||
48 Observations
|
||||
7 Constant Leading Digits
|
||||
Average Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
|
||||
Model: 3 Parameters (mu, tau_1, tau_2)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
|
||||
Between Instrument 1 3.63834187500000E-09 3.63834187500000E-09 1.59467335677930E+01
|
||||
Within Instrument 46 1.04951729166667E-08 2.28155932971014E-10
|
||||
|
||||
Certified R-Squared 2.57426544538321E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.51048314446410E-05
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Instrument AgWt
|
||||
1 107.8681568
|
||||
1 107.8681465
|
||||
1 107.8681572
|
||||
1 107.8681785
|
||||
1 107.8681446
|
||||
1 107.8681903
|
||||
1 107.8681526
|
||||
1 107.8681494
|
||||
1 107.8681616
|
||||
1 107.8681587
|
||||
1 107.8681519
|
||||
1 107.8681486
|
||||
1 107.8681419
|
||||
1 107.8681569
|
||||
1 107.8681508
|
||||
1 107.8681672
|
||||
1 107.8681385
|
||||
1 107.8681518
|
||||
1 107.8681662
|
||||
1 107.8681424
|
||||
1 107.8681360
|
||||
1 107.8681333
|
||||
1 107.8681610
|
||||
1 107.8681477
|
||||
2 107.8681079
|
||||
2 107.8681344
|
||||
2 107.8681513
|
||||
2 107.8681197
|
||||
2 107.8681604
|
||||
2 107.8681385
|
||||
2 107.8681642
|
||||
2 107.8681365
|
||||
2 107.8681151
|
||||
2 107.8681082
|
||||
2 107.8681517
|
||||
2 107.8681448
|
||||
2 107.8681198
|
||||
2 107.8681482
|
||||
2 107.8681334
|
||||
2 107.8681609
|
||||
2 107.8681101
|
||||
2 107.8681512
|
||||
2 107.8681469
|
||||
2 107.8681360
|
||||
2 107.8681254
|
||||
2 107.8681261
|
||||
2 107.8681450
|
||||
2 107.8681368
|
||||
85
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SiRstv.dat
vendored
Normal file
85
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SiRstv.dat
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SiRstv (SiRstv.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 85)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Ehrstein, James and Croarkin, M. Carroll.
|
||||
Unpublished NIST dataset.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
5 Treatments
|
||||
5 Replicates/Cell
|
||||
25 Observations
|
||||
3 Constant Leading Digits
|
||||
Lower Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
|
||||
Model: 6 Parameters (mu,tau_1, ... , tau_5)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Instrument 4 5.11462616000000E-02 1.27865654000000E-02 1.18046237440255E+00
|
||||
Within Instrument 20 2.16636560000000E-01 1.08318280000000E-02
|
||||
|
||||
Certified R-Squared 1.90999039051129E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.04076068334656E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Instrument Resistance
|
||||
1 196.3052
|
||||
1 196.1240
|
||||
1 196.1890
|
||||
1 196.2569
|
||||
1 196.3403
|
||||
2 196.3042
|
||||
2 196.3825
|
||||
2 196.1669
|
||||
2 196.3257
|
||||
2 196.0422
|
||||
3 196.1303
|
||||
3 196.2005
|
||||
3 196.2889
|
||||
3 196.0343
|
||||
3 196.1811
|
||||
4 196.2795
|
||||
4 196.1748
|
||||
4 196.1494
|
||||
4 196.1485
|
||||
4 195.9885
|
||||
5 196.2119
|
||||
5 196.1051
|
||||
5 196.1850
|
||||
5 196.0052
|
||||
5 196.2090
|
||||
249
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs01.dat
vendored
Normal file
249
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs01.dat
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SmLs01 (SmLs01.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
1 Constant Leading Digit
|
||||
Lower Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1.4
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
2 1.3
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
3 1.5
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
4 1.3
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
5 1.5
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
6 1.3
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
7 1.5
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
8 1.3
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
9 1.5
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
1869
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs02.dat
vendored
Normal file
1869
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs02.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
18069
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs03.dat
vendored
Normal file
18069
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs03.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
249
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs04.dat
vendored
Normal file
249
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs04.dat
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SmLs04 (SmLs04.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
7 Constant Leading Digits
|
||||
Average Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1000000.4
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
2 1000000.3
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
3 1000000.5
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
4 1000000.3
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
5 1000000.5
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
6 1000000.3
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
7 1000000.5
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
8 1000000.3
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
9 1000000.5
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
1869
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs05.dat
vendored
Normal file
1869
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs05.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
18069
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs06.dat
vendored
Normal file
18069
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs06.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
249
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs07.dat
vendored
Normal file
249
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs07.dat
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SmLs07 (SmLs07.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
13 Constant Leading Digits
|
||||
Higher Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1000000000000.4
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
2 1000000000000.3
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
3 1000000000000.5
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
4 1000000000000.3
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
5 1000000000000.5
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
6 1000000000000.3
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
7 1000000000000.5
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
8 1000000000000.3
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
9 1000000000000.5
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
1869
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs08.dat
vendored
Normal file
1869
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs08.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
18069
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs09.dat
vendored
Normal file
18069
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_anova/SmLs09.dat
vendored
Normal file
File diff suppressed because it is too large
Load Diff
97
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_linregress/Norris.dat
vendored
Normal file
97
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/nist_linregress/Norris.dat
vendored
Normal file
@@ -0,0 +1,97 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: Norris (Norris.dat)
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 31 to 46)
|
||||
Data (lines 61 to 96)
|
||||
|
||||
Procedure: Linear Least Squares Regression
|
||||
|
||||
Reference: Norris, J., NIST.
|
||||
Calibration of Ozone Monitors.
|
||||
|
||||
Data: 1 Response Variable (y)
|
||||
1 Predictor Variable (x)
|
||||
36 Observations
|
||||
Lower Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
Model: Linear Class
|
||||
2 Parameters (B0,B1)
|
||||
|
||||
y = B0 + B1*x + e
|
||||
|
||||
|
||||
|
||||
Certified Regression Statistics
|
||||
|
||||
Standard Deviation
|
||||
Parameter Estimate of Estimate
|
||||
|
||||
B0 -0.262323073774029 0.232818234301152
|
||||
B1 1.00211681802045 0.429796848199937E-03
|
||||
|
||||
Residual
|
||||
Standard Deviation 0.884796396144373
|
||||
|
||||
R-Squared 0.999993745883712
|
||||
|
||||
|
||||
Certified Analysis of Variance Table
|
||||
|
||||
Source of Degrees of Sums of Mean
|
||||
Variation Freedom Squares Squares F Statistic
|
||||
|
||||
Regression 1 4255954.13232369 4255954.13232369 5436385.54079785
|
||||
Residual 34 26.6173985294224 0.782864662630069
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: y x
|
||||
0.1 0.2
|
||||
338.8 337.4
|
||||
118.1 118.2
|
||||
888.0 884.6
|
||||
9.2 10.1
|
||||
228.1 226.5
|
||||
668.5 666.3
|
||||
998.5 996.3
|
||||
449.1 448.6
|
||||
778.9 777.0
|
||||
559.2 558.2
|
||||
0.3 0.4
|
||||
0.1 0.6
|
||||
778.1 775.5
|
||||
668.8 666.9
|
||||
339.3 338.0
|
||||
448.9 447.5
|
||||
10.8 11.6
|
||||
557.7 556.0
|
||||
228.3 228.1
|
||||
998.0 995.8
|
||||
888.8 887.6
|
||||
119.6 120.2
|
||||
0.3 0.3
|
||||
0.6 0.3
|
||||
557.6 556.8
|
||||
339.3 339.1
|
||||
888.0 887.2
|
||||
998.5 999.0
|
||||
778.9 779.0
|
||||
10.2 11.1
|
||||
117.6 118.3
|
||||
228.9 229.2
|
||||
668.4 669.1
|
||||
449.2 448.9
|
||||
0.2 0.5
|
||||
|
||||
1499
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/studentized_range_mpmath_ref.json
vendored
Normal file
1499
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/data/studentized_range_mpmath_ref.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1044
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_axis_nan_policy.py
vendored
Normal file
1044
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_axis_nan_policy.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
568
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_binned_statistic.py
vendored
Normal file
568
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_binned_statistic.py
vendored
Normal file
@@ -0,0 +1,568 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
from scipy.stats import (binned_statistic, binned_statistic_2d,
|
||||
binned_statistic_dd)
|
||||
from scipy._lib._util import check_random_state
|
||||
|
||||
from .common_tests import check_named_results
|
||||
|
||||
|
||||
class TestBinnedStatistic:
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
rng = check_random_state(9865)
|
||||
cls.x = rng.uniform(size=100)
|
||||
cls.y = rng.uniform(size=100)
|
||||
cls.v = rng.uniform(size=100)
|
||||
cls.X = rng.uniform(size=(100, 3))
|
||||
cls.w = rng.uniform(size=100)
|
||||
cls.u = rng.uniform(size=100) + 1e6
|
||||
|
||||
def test_1d_count(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
count1, edges1, bc = binned_statistic(x, v, 'count', bins=10)
|
||||
count2, edges2 = np.histogram(x, bins=10)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_gh5927(self):
|
||||
# smoke test for gh5927 - binned_statistic was using `is` for string
|
||||
# comparison
|
||||
x = self.x
|
||||
v = self.v
|
||||
statistics = ['mean', 'median', 'count', 'sum']
|
||||
for statistic in statistics:
|
||||
binned_statistic(x, v, statistic, bins=10)
|
||||
|
||||
def test_big_number_std(self):
|
||||
# tests for numerical stability of std calculation
|
||||
# see issue gh-10126 for more
|
||||
x = self.x
|
||||
u = self.u
|
||||
stat1, edges1, bc = binned_statistic(x, u, 'std', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, u, np.std, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
|
||||
def test_empty_bins_std(self):
|
||||
# tests that std returns gives nan for empty bins
|
||||
x = self.x
|
||||
u = self.u
|
||||
print(binned_statistic(x, u, 'count', bins=1000))
|
||||
stat1, edges1, bc = binned_statistic(x, u, 'std', bins=1000)
|
||||
stat2, edges2, bc = binned_statistic(x, u, np.std, bins=1000)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
|
||||
def test_non_finite_inputs_and_int_bins(self):
|
||||
# if either `values` or `sample` contain np.inf or np.nan throw
|
||||
# see issue gh-9010 for more
|
||||
x = self.x
|
||||
u = self.u
|
||||
orig = u[0]
|
||||
u[0] = np.inf
|
||||
assert_raises(ValueError, binned_statistic, u, x, 'std', bins=10)
|
||||
# need to test for non-python specific ints, e.g. np.int8, np.int64
|
||||
assert_raises(ValueError, binned_statistic, u, x, 'std',
|
||||
bins=np.int64(10))
|
||||
u[0] = np.nan
|
||||
assert_raises(ValueError, binned_statistic, u, x, 'count', bins=10)
|
||||
# replace original value, u belongs the class
|
||||
u[0] = orig
|
||||
|
||||
def test_1d_result_attributes(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic(x, v, 'count', bins=10)
|
||||
attributes = ('statistic', 'bin_edges', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_1d_sum(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
sum1, edges1, bc = binned_statistic(x, v, 'sum', bins=10)
|
||||
sum2, edges2 = np.histogram(x, bins=10, weights=v)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_mean(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'mean', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.mean, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_std(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'std', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.std, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_min(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'min', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.min, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_max(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'max', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.max, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_median(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'median', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_bincode(self):
|
||||
x = self.x[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, edges1, bc = binned_statistic(x, v, 'count', bins=3)
|
||||
bc2 = np.array([3, 2, 1, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 1, 2, 3, 1,
|
||||
1, 2, 1])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
assert_allclose(bcount, count1)
|
||||
|
||||
def test_1d_range_keyword(self):
|
||||
# Regression test for gh-3063, range can be (min, max) or [(min, max)]
|
||||
np.random.seed(9865)
|
||||
x = np.arange(30)
|
||||
data = np.random.random(30)
|
||||
|
||||
mean, bins, _ = binned_statistic(x[:15], data[:15])
|
||||
mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)])
|
||||
mean_range2, bins_range2, _ = binned_statistic(x, data, range=(0, 14))
|
||||
|
||||
assert_allclose(mean, mean_range)
|
||||
assert_allclose(bins, bins_range)
|
||||
assert_allclose(mean, mean_range2)
|
||||
assert_allclose(bins, bins_range2)
|
||||
|
||||
def test_1d_multi_values(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
stat1v, edges1v, bc1v = binned_statistic(x, v, 'mean', bins=10)
|
||||
stat1w, edges1w, bc1w = binned_statistic(x, w, 'mean', bins=10)
|
||||
stat2, edges2, bc2 = binned_statistic(x, [v, w], 'mean', bins=10)
|
||||
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(edges1v, edges2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_2d_count(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
count1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'count', bins=5)
|
||||
count2, binx2, biny2 = np.histogram2d(x, y, bins=5)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_result_attributes(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic_2d(x, y, v, 'count', bins=5)
|
||||
attributes = ('statistic', 'x_edge', 'y_edge', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_2d_sum(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
sum1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'sum', bins=5)
|
||||
sum2, binx2, biny2 = np.histogram2d(x, y, bins=5, weights=v)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_mean(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'mean', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_mean_unicode(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_std(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'std', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.std, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_min(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'min', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.min, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_max(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'max', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.max, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_median(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'median', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(
|
||||
x, y, v, np.median, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_bincode(self):
|
||||
x = self.x[:20]
|
||||
y = self.y[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'count', bins=3)
|
||||
bc2 = np.array([17, 11, 6, 16, 11, 17, 18, 17, 17, 7, 6, 18, 16,
|
||||
6, 11, 16, 6, 6, 11, 8])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
count1adj = count1[count1.nonzero()]
|
||||
assert_allclose(bcount, count1adj)
|
||||
|
||||
def test_2d_multi_values(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
stat1v, binx1v, biny1v, bc1v = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=8)
|
||||
stat1w, binx1w, biny1w, bc1w = binned_statistic_2d(
|
||||
x, y, w, 'mean', bins=8)
|
||||
stat2, binx2, biny2, bc2 = binned_statistic_2d(
|
||||
x, y, [v, w], 'mean', bins=8)
|
||||
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(binx1v, binx2)
|
||||
assert_allclose(biny1w, biny2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_2d_binnumbers_unraveled(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat, edgesx, bcx = binned_statistic(x, v, 'mean', bins=20)
|
||||
stat, edgesy, bcy = binned_statistic(y, v, 'mean', bins=10)
|
||||
|
||||
stat2, edgesx2, edgesy2, bc2 = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=(20, 10), expand_binnumbers=True)
|
||||
|
||||
bcx3 = np.searchsorted(edgesx, x, side='right')
|
||||
bcy3 = np.searchsorted(edgesy, y, side='right')
|
||||
|
||||
# `numpy.searchsorted` is non-inclusive on right-edge, compensate
|
||||
bcx3[x == x.max()] -= 1
|
||||
bcy3[y == y.max()] -= 1
|
||||
|
||||
assert_allclose(bcx, bc2[0])
|
||||
assert_allclose(bcy, bc2[1])
|
||||
assert_allclose(bcx3, bc2[0])
|
||||
assert_allclose(bcy3, bc2[1])
|
||||
|
||||
def test_dd_count(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
count2, edges2 = np.histogramdd(X, bins=3)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_result_attributes(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
attributes = ('statistic', 'bin_edges', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_dd_sum(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3)
|
||||
sum2, edges2 = np.histogramdd(X, bins=3, weights=v)
|
||||
sum3, edges3, bc = binned_statistic_dd(X, v, np.sum, bins=3)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1, edges2)
|
||||
assert_allclose(sum1, sum3)
|
||||
assert_allclose(edges1, edges3)
|
||||
|
||||
def test_dd_mean(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'mean', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.mean, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_std(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'std', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_min(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'min', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.min, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_max(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'max', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.max, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_median(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_bincode(self):
|
||||
X = self.X[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92,
|
||||
32, 36, 91, 43, 87, 81, 81])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
count1adj = count1[count1.nonzero()]
|
||||
assert_allclose(bcount, count1adj)
|
||||
|
||||
def test_dd_multi_values(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
for stat in ["count", "sum", "mean", "std", "min", "max", "median",
|
||||
np.std]:
|
||||
stat1v, edges1v, bc1v = binned_statistic_dd(X, v, stat, bins=8)
|
||||
stat1w, edges1w, bc1w = binned_statistic_dd(X, w, stat, bins=8)
|
||||
stat2, edges2, bc2 = binned_statistic_dd(X, [v, w], stat, bins=8)
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(edges1v, edges2)
|
||||
assert_allclose(edges1w, edges2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_dd_binnumbers_unraveled(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat, edgesx, bcx = binned_statistic(X[:, 0], v, 'mean', bins=15)
|
||||
stat, edgesy, bcy = binned_statistic(X[:, 1], v, 'mean', bins=20)
|
||||
stat, edgesz, bcz = binned_statistic(X[:, 2], v, 'mean', bins=10)
|
||||
|
||||
stat2, edges2, bc2 = binned_statistic_dd(
|
||||
X, v, 'mean', bins=(15, 20, 10), expand_binnumbers=True)
|
||||
|
||||
assert_allclose(bcx, bc2[0])
|
||||
assert_allclose(bcy, bc2[1])
|
||||
assert_allclose(bcz, bc2[2])
|
||||
|
||||
def test_dd_binned_statistic_result(self):
|
||||
# NOTE: tests the reuse of bin_edges from previous call
|
||||
x = np.random.random((10000, 3))
|
||||
v = np.random.random((10000))
|
||||
bins = np.linspace(0, 1, 10)
|
||||
bins = (bins, bins, bins)
|
||||
|
||||
result = binned_statistic_dd(x, v, 'mean', bins=bins)
|
||||
stat = result.statistic
|
||||
|
||||
result = binned_statistic_dd(x, v, 'mean',
|
||||
binned_statistic_result=result)
|
||||
stat2 = result.statistic
|
||||
|
||||
assert_allclose(stat, stat2)
|
||||
|
||||
def test_dd_zero_dedges(self):
|
||||
x = np.random.random((10000, 3))
|
||||
v = np.random.random((10000))
|
||||
bins = np.linspace(0, 1, 10)
|
||||
bins = np.append(bins, 1)
|
||||
bins = (bins, bins, bins)
|
||||
with assert_raises(ValueError, match='difference is numerically 0'):
|
||||
binned_statistic_dd(x, v, 'mean', bins=bins)
|
||||
|
||||
def test_dd_range_errors(self):
|
||||
# Test that descriptive exceptions are raised as appropriate for bad
|
||||
# values of the `range` argument. (See gh-12996)
|
||||
with assert_raises(ValueError,
|
||||
match='In range, start must be <= stop'):
|
||||
binned_statistic_dd([self.y], self.v,
|
||||
range=[[1, 0]])
|
||||
with assert_raises(
|
||||
ValueError,
|
||||
match='In dimension 1 of range, start must be <= stop'):
|
||||
binned_statistic_dd([self.x, self.y], self.v,
|
||||
range=[[1, 0], [0, 1]])
|
||||
with assert_raises(
|
||||
ValueError,
|
||||
match='In dimension 2 of range, start must be <= stop'):
|
||||
binned_statistic_dd([self.x, self.y], self.v,
|
||||
range=[[0, 1], [1, 0]])
|
||||
with assert_raises(
|
||||
ValueError,
|
||||
match='range given for 1 dimensions; 2 required'):
|
||||
binned_statistic_dd([self.x, self.y], self.v,
|
||||
range=[[0, 1]])
|
||||
|
||||
def test_binned_statistic_float32(self):
|
||||
X = np.array([0, 0.42358226], dtype=np.float32)
|
||||
stat, _, _ = binned_statistic(X, None, 'count', bins=5)
|
||||
assert_allclose(stat, np.array([1, 0, 0, 0, 1], dtype=np.float64))
|
||||
|
||||
def test_gh14332(self):
|
||||
# Test the wrong output when the `sample` is close to bin edge
|
||||
x = []
|
||||
size = 20
|
||||
for i in range(size):
|
||||
x += [1-0.1**i]
|
||||
|
||||
bins = np.linspace(0,1,11)
|
||||
sum1, edges1, bc = binned_statistic_dd(x, np.ones(len(x)),
|
||||
bins=[bins], statistic='sum')
|
||||
sum2, edges2 = np.histogram(x, bins=bins)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1[0], edges2)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float64, np.complex128])
|
||||
@pytest.mark.parametrize("statistic", [np.mean, np.median, np.sum, np.std,
|
||||
np.min, np.max, 'count',
|
||||
lambda x: (x**2).sum(),
|
||||
lambda x: (x**2).sum() * 1j])
|
||||
def test_dd_all(self, dtype, statistic):
|
||||
def ref_statistic(x):
|
||||
return len(x) if statistic == 'count' else statistic(x)
|
||||
|
||||
rng = np.random.default_rng(3704743126639371)
|
||||
n = 10
|
||||
x = rng.random(size=n)
|
||||
i = x >= 0.5
|
||||
v = rng.random(size=n)
|
||||
if dtype is np.complex128:
|
||||
v = v + rng.random(size=n)*1j
|
||||
|
||||
stat, _, _ = binned_statistic_dd(x, v, statistic, bins=2)
|
||||
ref = np.array([ref_statistic(v[~i]), ref_statistic(v[i])])
|
||||
assert_allclose(stat, ref)
|
||||
assert stat.dtype == np.result_type(ref.dtype, np.float64)
|
||||
44
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_boost_ufuncs.py
vendored
Normal file
44
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_boost_ufuncs.py
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
from scipy.stats import _boost
|
||||
|
||||
|
||||
type_char_to_type_tol = {'f': (np.float32, 32*np.finfo(np.float32).eps),
|
||||
'd': (np.float64, 32*np.finfo(np.float64).eps),
|
||||
'g': (np.longdouble, 32*np.finfo(np.longdouble).eps)}
|
||||
|
||||
|
||||
# Each item in this list is
|
||||
# (func, args, expected_value)
|
||||
# All the values can be represented exactly, even with np.float32.
|
||||
#
|
||||
# This is not an exhaustive test data set of all the functions!
|
||||
# It is a spot check of several functions, primarily for
|
||||
# checking that the different data types are handled correctly.
|
||||
test_data = [
|
||||
(_boost._beta_cdf, (0.5, 2, 3), 0.6875),
|
||||
(_boost._beta_ppf, (0.6875, 2, 3), 0.5),
|
||||
(_boost._beta_pdf, (0.5, 2, 3), 1.5),
|
||||
(_boost._beta_sf, (0.5, 2, 1), 0.75),
|
||||
(_boost._beta_isf, (0.75, 2, 1), 0.5),
|
||||
(_boost._binom_cdf, (1, 3, 0.5), 0.5),
|
||||
(_boost._binom_pdf, (1, 4, 0.5), 0.25),
|
||||
(_boost._hypergeom_cdf, (2, 3, 5, 6), 0.5),
|
||||
(_boost._nbinom_cdf, (1, 4, 0.25), 0.015625),
|
||||
(_boost._ncf_mean, (10, 12, 2.5), 1.5),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
@pytest.mark.parametrize('func, args, expected', test_data)
|
||||
def test_stats_boost_ufunc(func, args, expected):
|
||||
type_sigs = func.types
|
||||
type_chars = [sig.split('->')[-1] for sig in type_sigs]
|
||||
for type_char in type_chars:
|
||||
typ, rtol = type_char_to_type_tol[type_char]
|
||||
args = [typ(arg) for arg in args]
|
||||
value = func(*args)
|
||||
assert isinstance(value, typ)
|
||||
assert_allclose(value, expected, rtol=rtol)
|
||||
241
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_contingency.py
vendored
Normal file
241
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_contingency.py
vendored
Normal file
@@ -0,0 +1,241 @@
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_equal, assert_array_equal,
|
||||
assert_array_almost_equal, assert_approx_equal,
|
||||
assert_allclose)
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
from scipy.special import xlogy
|
||||
from scipy.stats.contingency import (margins, expected_freq,
|
||||
chi2_contingency, association)
|
||||
|
||||
|
||||
def test_margins():
|
||||
a = np.array([1])
|
||||
m = margins(a)
|
||||
assert_equal(len(m), 1)
|
||||
m0 = m[0]
|
||||
assert_array_equal(m0, np.array([1]))
|
||||
|
||||
a = np.array([[1]])
|
||||
m0, m1 = margins(a)
|
||||
expected0 = np.array([[1]])
|
||||
expected1 = np.array([[1]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
|
||||
a = np.arange(12).reshape(2, 6)
|
||||
m0, m1 = margins(a)
|
||||
expected0 = np.array([[15], [51]])
|
||||
expected1 = np.array([[6, 8, 10, 12, 14, 16]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
|
||||
a = np.arange(24).reshape(2, 3, 4)
|
||||
m0, m1, m2 = margins(a)
|
||||
expected0 = np.array([[[66]], [[210]]])
|
||||
expected1 = np.array([[[60], [92], [124]]])
|
||||
expected2 = np.array([[[60, 66, 72, 78]]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
assert_array_equal(m2, expected2)
|
||||
|
||||
|
||||
def test_expected_freq():
|
||||
assert_array_equal(expected_freq([1]), np.array([1.0]))
|
||||
|
||||
observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]])
|
||||
e = expected_freq(observed)
|
||||
assert_array_equal(e, np.ones_like(observed))
|
||||
|
||||
observed = np.array([[10, 10, 20], [20, 20, 20]])
|
||||
e = expected_freq(observed)
|
||||
correct = np.array([[12., 12., 16.], [18., 18., 24.]])
|
||||
assert_array_almost_equal(e, correct)
|
||||
|
||||
|
||||
def test_chi2_contingency_trivial():
|
||||
# Some very simple tests for chi2_contingency.
|
||||
|
||||
# A trivial case
|
||||
obs = np.array([[1, 2], [1, 2]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs, correction=False)
|
||||
assert_equal(chi2, 0.0)
|
||||
assert_equal(p, 1.0)
|
||||
assert_equal(dof, 1)
|
||||
assert_array_equal(obs, expected)
|
||||
|
||||
# A *really* trivial case: 1-D data.
|
||||
obs = np.array([1, 2, 3])
|
||||
chi2, p, dof, expected = chi2_contingency(obs, correction=False)
|
||||
assert_equal(chi2, 0.0)
|
||||
assert_equal(p, 1.0)
|
||||
assert_equal(dof, 0)
|
||||
assert_array_equal(obs, expected)
|
||||
|
||||
|
||||
def test_chi2_contingency_R():
|
||||
# Some test cases that were computed independently, using R.
|
||||
|
||||
# Rcode = \
|
||||
# """
|
||||
# # Data vector.
|
||||
# data <- c(
|
||||
# 12, 34, 23, 4, 47, 11,
|
||||
# 35, 31, 11, 34, 10, 18,
|
||||
# 12, 32, 9, 18, 13, 19,
|
||||
# 12, 12, 14, 9, 33, 25
|
||||
# )
|
||||
#
|
||||
# # Create factor tags:r=rows, c=columns, t=tiers
|
||||
# r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1", "r2", "r3", "r4")))
|
||||
# c <- factor(gl(3, 1, 2*3*4, labels=c("c1", "c2", "c3")))
|
||||
# t <- factor(gl(2, 3, 2*3*4, labels=c("t1", "t2")))
|
||||
#
|
||||
# # 3-way Chi squared test of independence
|
||||
# s = summary(xtabs(data~r+c+t))
|
||||
# print(s)
|
||||
# """
|
||||
# Routput = \
|
||||
# """
|
||||
# Call: xtabs(formula = data ~ r + c + t)
|
||||
# Number of cases in table: 478
|
||||
# Number of factors: 3
|
||||
# Test for independence of all factors:
|
||||
# Chisq = 102.17, df = 17, p-value = 3.514e-14
|
||||
# """
|
||||
obs = np.array(
|
||||
[[[12, 34, 23],
|
||||
[35, 31, 11],
|
||||
[12, 32, 9],
|
||||
[12, 12, 14]],
|
||||
[[4, 47, 11],
|
||||
[34, 10, 18],
|
||||
[18, 13, 19],
|
||||
[9, 33, 25]]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs)
|
||||
assert_approx_equal(chi2, 102.17, significant=5)
|
||||
assert_approx_equal(p, 3.514e-14, significant=4)
|
||||
assert_equal(dof, 17)
|
||||
|
||||
# Rcode = \
|
||||
# """
|
||||
# # Data vector.
|
||||
# data <- c(
|
||||
# #
|
||||
# 12, 17,
|
||||
# 11, 16,
|
||||
# #
|
||||
# 11, 12,
|
||||
# 15, 16,
|
||||
# #
|
||||
# 23, 15,
|
||||
# 30, 22,
|
||||
# #
|
||||
# 14, 17,
|
||||
# 15, 16
|
||||
# )
|
||||
#
|
||||
# # Create factor tags:r=rows, c=columns, d=depths(?), t=tiers
|
||||
# r <- factor(gl(2, 2, 2*2*2*2, labels=c("r1", "r2")))
|
||||
# c <- factor(gl(2, 1, 2*2*2*2, labels=c("c1", "c2")))
|
||||
# d <- factor(gl(2, 4, 2*2*2*2, labels=c("d1", "d2")))
|
||||
# t <- factor(gl(2, 8, 2*2*2*2, labels=c("t1", "t2")))
|
||||
#
|
||||
# # 4-way Chi squared test of independence
|
||||
# s = summary(xtabs(data~r+c+d+t))
|
||||
# print(s)
|
||||
# """
|
||||
# Routput = \
|
||||
# """
|
||||
# Call: xtabs(formula = data ~ r + c + d + t)
|
||||
# Number of cases in table: 262
|
||||
# Number of factors: 4
|
||||
# Test for independence of all factors:
|
||||
# Chisq = 8.758, df = 11, p-value = 0.6442
|
||||
# """
|
||||
obs = np.array(
|
||||
[[[[12, 17],
|
||||
[11, 16]],
|
||||
[[11, 12],
|
||||
[15, 16]]],
|
||||
[[[23, 15],
|
||||
[30, 22]],
|
||||
[[14, 17],
|
||||
[15, 16]]]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs)
|
||||
assert_approx_equal(chi2, 8.758, significant=4)
|
||||
assert_approx_equal(p, 0.6442, significant=4)
|
||||
assert_equal(dof, 11)
|
||||
|
||||
|
||||
def test_chi2_contingency_g():
|
||||
c = np.array([[15, 60], [15, 90]])
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood',
|
||||
correction=False)
|
||||
assert_allclose(g, 2*xlogy(c, c/e).sum())
|
||||
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood',
|
||||
correction=True)
|
||||
c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])
|
||||
assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())
|
||||
|
||||
c = np.array([[10, 12, 10], [12, 10, 10]])
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')
|
||||
assert_allclose(g, 2*xlogy(c, c/e).sum())
|
||||
|
||||
|
||||
def test_chi2_contingency_bad_args():
|
||||
# Test that "bad" inputs raise a ValueError.
|
||||
|
||||
# Negative value in the array of observed frequencies.
|
||||
obs = np.array([[-1, 10], [1, 2]])
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
# The zeros in this will result in zeros in the array
|
||||
# of expected frequencies.
|
||||
obs = np.array([[0, 1], [0, 1]])
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
# A degenerate case: `observed` has size 0.
|
||||
obs = np.empty((0, 8))
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
|
||||
def test_chi2_contingency_yates_gh13875():
|
||||
# Magnitude of Yates' continuity correction should not exceed difference
|
||||
# between expected and observed value of the statistic; see gh-13875
|
||||
observed = np.array([[1573, 3], [4, 0]])
|
||||
p = chi2_contingency(observed)[1]
|
||||
assert_allclose(p, 1, rtol=1e-12)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("correction", [False, True])
|
||||
def test_result(correction):
|
||||
obs = np.array([[1, 2], [1, 2]])
|
||||
res = chi2_contingency(obs, correction=correction)
|
||||
assert_equal((res.statistic, res.pvalue, res.dof, res.expected_freq), res)
|
||||
|
||||
|
||||
def test_bad_association_args():
|
||||
# Invalid Test Statistic
|
||||
assert_raises(ValueError, association, [[1, 2], [3, 4]], "X")
|
||||
# Invalid array shape
|
||||
assert_raises(ValueError, association, [[[1, 2]], [[3, 4]]], "cramer")
|
||||
# chi2_contingency exception
|
||||
assert_raises(ValueError, association, [[-1, 10], [1, 2]], 'cramer')
|
||||
# Invalid Array Item Data Type
|
||||
assert_raises(ValueError, association,
|
||||
np.array([[1, 2], ["dd", 4]], dtype=object), 'cramer')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('stat, expected',
|
||||
[('cramer', 0.09222412010290792),
|
||||
('tschuprow', 0.0775509319944633),
|
||||
('pearson', 0.12932925727138758)])
|
||||
def test_assoc(stat, expected):
|
||||
# 2d Array
|
||||
obs1 = np.array([[12, 13, 14, 15, 16],
|
||||
[17, 16, 18, 19, 11],
|
||||
[9, 15, 14, 12, 11]])
|
||||
a = association(observed=obs1, method=stat)
|
||||
assert_allclose(a, expected)
|
||||
997
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_continuous_basic.py
vendored
Normal file
997
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_continuous_basic.py
vendored
Normal file
@@ -0,0 +1,997 @@
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
from scipy.integrate import IntegrationWarning
|
||||
import itertools
|
||||
|
||||
from scipy import stats
|
||||
from .common_tests import (check_normalization, check_moment, check_mean_expect,
|
||||
check_var_expect, check_skew_expect,
|
||||
check_kurt_expect, check_entropy,
|
||||
check_private_entropy, check_entropy_vect_scale,
|
||||
check_edge_support, check_named_args,
|
||||
check_random_state_property,
|
||||
check_meth_dtype, check_ppf_dtype, check_cmplx_deriv,
|
||||
check_pickling, check_rvs_broadcast, check_freezing,
|
||||
check_deprecation_warning_gh5982_moment,
|
||||
check_deprecation_warning_gh5982_interval)
|
||||
from scipy.stats._distr_params import distcont
|
||||
from scipy.stats._distn_infrastructure import rv_continuous_frozen
|
||||
|
||||
"""
|
||||
Test all continuous distributions.
|
||||
|
||||
Parameters were chosen for those distributions that pass the
|
||||
Kolmogorov-Smirnov test. This provides safe parameters for each
|
||||
distributions so that we can perform further testing of class methods.
|
||||
|
||||
These tests currently check only/mostly for serious errors and exceptions,
|
||||
not for numerically exact results.
|
||||
"""
|
||||
|
||||
# Note that you need to add new distributions you want tested
|
||||
# to _distr_params
|
||||
|
||||
DECIMAL = 5 # specify the precision of the tests # increased from 0 to 5
|
||||
|
||||
# For skipping test_cont_basic
|
||||
distslow = ['recipinvgauss', 'vonmises', 'kappa4', 'vonmises_line',
|
||||
'gausshyper', 'norminvgauss', 'geninvgauss', 'genhyperbolic',
|
||||
'truncnorm', 'truncweibull_min']
|
||||
|
||||
# distxslow are sorted by speed (very slow to slow)
|
||||
distxslow = ['studentized_range', 'kstwo', 'ksone', 'wrapcauchy', 'genexpon']
|
||||
|
||||
# For skipping test_moments, which is already marked slow
|
||||
distxslow_test_moments = ['studentized_range', 'vonmises', 'vonmises_line',
|
||||
'ksone', 'kstwo', 'recipinvgauss', 'genexpon']
|
||||
|
||||
# skip check_fit_args (test is slow)
|
||||
skip_fit_test_mle = ['exponpow', 'exponweib', 'gausshyper', 'genexpon',
|
||||
'halfgennorm', 'gompertz', 'johnsonsb', 'johnsonsu',
|
||||
'kappa4', 'ksone', 'kstwo', 'kstwobign', 'mielke', 'ncf',
|
||||
'nct', 'powerlognorm', 'powernorm', 'recipinvgauss',
|
||||
'trapezoid', 'vonmises', 'vonmises_line', 'levy_stable',
|
||||
'rv_histogram_instance', 'studentized_range']
|
||||
|
||||
# these were really slow in `test_fit`.py.
|
||||
# note that this list is used to skip both fit_test and fit_fix tests
|
||||
slow_fit_test_mm = ['argus', 'exponpow', 'exponweib', 'gausshyper', 'genexpon',
|
||||
'genhalflogistic', 'halfgennorm', 'gompertz', 'johnsonsb',
|
||||
'kappa4', 'kstwobign', 'recipinvgauss',
|
||||
'trapezoid', 'truncexpon', 'vonmises', 'vonmises_line',
|
||||
'studentized_range']
|
||||
# pearson3 fails due to something weird
|
||||
# the first list fails due to non-finite distribution moments encountered
|
||||
# most of the rest fail due to integration warnings
|
||||
# pearson3 is overriden as not implemented due to gh-11746
|
||||
fail_fit_test_mm = (['alpha', 'betaprime', 'bradford', 'burr', 'burr12',
|
||||
'cauchy', 'crystalball', 'f', 'fisk', 'foldcauchy',
|
||||
'genextreme', 'genpareto', 'halfcauchy', 'invgamma',
|
||||
'kappa3', 'levy', 'levy_l', 'loglaplace', 'lomax',
|
||||
'mielke', 'nakagami', 'ncf', 'skewcauchy', 't',
|
||||
'tukeylambda', 'invweibull']
|
||||
+ ['genhyperbolic', 'johnsonsu', 'ksone', 'kstwo',
|
||||
'nct', 'pareto', 'powernorm', 'powerlognorm']
|
||||
+ ['pearson3'])
|
||||
skip_fit_test = {"MLE": skip_fit_test_mle,
|
||||
"MM": slow_fit_test_mm + fail_fit_test_mm}
|
||||
|
||||
# skip check_fit_args_fix (test is slow)
|
||||
skip_fit_fix_test_mle = ['burr', 'exponpow', 'exponweib', 'gausshyper',
|
||||
'genexpon', 'halfgennorm', 'gompertz', 'johnsonsb',
|
||||
'johnsonsu', 'kappa4', 'ksone', 'kstwo', 'kstwobign',
|
||||
'levy_stable', 'mielke', 'ncf', 'ncx2',
|
||||
'powerlognorm', 'powernorm', 'rdist', 'recipinvgauss',
|
||||
'trapezoid', 'vonmises', 'vonmises_line',
|
||||
'studentized_range']
|
||||
# the first list fails due to non-finite distribution moments encountered
|
||||
# most of the rest fail due to integration warnings
|
||||
# pearson3 is overriden as not implemented due to gh-11746
|
||||
fail_fit_fix_test_mm = (['alpha', 'betaprime', 'burr', 'burr12', 'cauchy',
|
||||
'crystalball', 'f', 'fisk', 'foldcauchy',
|
||||
'genextreme', 'genpareto', 'halfcauchy', 'invgamma',
|
||||
'kappa3', 'levy', 'levy_l', 'loglaplace', 'lomax',
|
||||
'mielke', 'nakagami', 'ncf', 'nct', 'skewcauchy', 't',
|
||||
'truncpareto', 'invweibull']
|
||||
+ ['genhyperbolic', 'johnsonsu', 'ksone', 'kstwo',
|
||||
'pareto', 'powernorm', 'powerlognorm']
|
||||
+ ['pearson3'])
|
||||
skip_fit_fix_test = {"MLE": skip_fit_fix_test_mle,
|
||||
"MM": slow_fit_test_mm + fail_fit_fix_test_mm}
|
||||
|
||||
# These distributions fail the complex derivative test below.
|
||||
# Here 'fail' mean produce wrong results and/or raise exceptions, depending
|
||||
# on the implementation details of corresponding special functions.
|
||||
# cf https://github.com/scipy/scipy/pull/4979 for a discussion.
|
||||
fails_cmplx = set(['argus', 'beta', 'betaprime', 'chi', 'chi2', 'cosine',
|
||||
'dgamma', 'dweibull', 'erlang', 'f', 'gamma',
|
||||
'gausshyper', 'gengamma', 'genhyperbolic',
|
||||
'geninvgauss', 'gennorm', 'genpareto',
|
||||
'halfgennorm', 'invgamma',
|
||||
'ksone', 'kstwo', 'kstwobign', 'levy_l', 'loggamma',
|
||||
'logistic', 'loguniform', 'maxwell', 'nakagami',
|
||||
'ncf', 'nct', 'ncx2', 'norminvgauss', 'pearson3', 'rdist',
|
||||
'reciprocal', 'rice', 'skewnorm', 't', 'truncweibull_min',
|
||||
'tukeylambda', 'vonmises', 'vonmises_line',
|
||||
'rv_histogram_instance', 'truncnorm', 'studentized_range'])
|
||||
|
||||
# rv_histogram instances, with uniform and non-uniform bins;
|
||||
# stored as (dist, arg) tuples for cases_test_cont_basic
|
||||
# and cases_test_moments.
|
||||
histogram_test_instances = []
|
||||
case1 = {'a': [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6,
|
||||
6, 6, 6, 7, 7, 7, 8, 8, 9], 'bins': 8} # equal width bins
|
||||
case2 = {'a': [1, 1], 'bins': [0, 1, 10]} # unequal width bins
|
||||
for case, density in itertools.product([case1, case2], [True, False]):
|
||||
_hist = np.histogram(**case, density=density)
|
||||
_rv_hist = stats.rv_histogram(_hist, density=density)
|
||||
histogram_test_instances.append((_rv_hist, tuple()))
|
||||
|
||||
|
||||
def cases_test_cont_basic():
|
||||
for distname, arg in distcont[:] + histogram_test_instances:
|
||||
if distname == 'levy_stable':
|
||||
continue
|
||||
elif distname in distslow:
|
||||
yield pytest.param(distname, arg, marks=pytest.mark.slow)
|
||||
elif distname in distxslow:
|
||||
yield pytest.param(distname, arg, marks=pytest.mark.xslow)
|
||||
else:
|
||||
yield distname, arg
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
@pytest.mark.parametrize('distname,arg', cases_test_cont_basic())
|
||||
@pytest.mark.parametrize('sn, n_fit_samples', [(500, 200)])
|
||||
def test_cont_basic(distname, arg, sn, n_fit_samples):
|
||||
# this test skips slow distributions
|
||||
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'rv_histogram_instance'
|
||||
|
||||
rng = np.random.RandomState(765456)
|
||||
rvs = distfn.rvs(size=sn, *arg, random_state=rng)
|
||||
m, v = distfn.stats(*arg)
|
||||
|
||||
if distname not in {'laplace_asymmetric'}:
|
||||
check_sample_meanvar_(m, v, rvs)
|
||||
check_cdf_ppf(distfn, arg, distname)
|
||||
check_sf_isf(distfn, arg, distname)
|
||||
check_pdf(distfn, arg, distname)
|
||||
check_pdf_logpdf(distfn, arg, distname)
|
||||
check_pdf_logpdf_at_endpoints(distfn, arg, distname)
|
||||
check_cdf_logcdf(distfn, arg, distname)
|
||||
check_sf_logsf(distfn, arg, distname)
|
||||
check_ppf_broadcast(distfn, arg, distname)
|
||||
check_deprecation_warning_gh5982_moment(distfn, arg, distname)
|
||||
check_deprecation_warning_gh5982_interval(distfn, arg, distname)
|
||||
|
||||
alpha = 0.01
|
||||
if distname == 'rv_histogram_instance':
|
||||
check_distribution_rvs(distfn.cdf, arg, alpha, rvs)
|
||||
elif distname != 'geninvgauss':
|
||||
# skip kstest for geninvgauss since cdf is too slow; see test for
|
||||
# rv generation in TestGenInvGauss in test_distributions.py
|
||||
check_distribution_rvs(distname, arg, alpha, rvs)
|
||||
|
||||
locscale_defaults = (0, 1)
|
||||
meths = [distfn.pdf, distfn.logpdf, distfn.cdf, distfn.logcdf,
|
||||
distfn.logsf]
|
||||
# make sure arguments are within support
|
||||
spec_x = {'weibull_max': -0.5, 'levy_l': -0.5,
|
||||
'pareto': 1.5, 'truncpareto': 3.2, 'tukeylambda': 0.3,
|
||||
'rv_histogram_instance': 5.0}
|
||||
x = spec_x.get(distname, 0.5)
|
||||
if distname == 'invweibull':
|
||||
arg = (1,)
|
||||
elif distname == 'ksone':
|
||||
arg = (3,)
|
||||
|
||||
check_named_args(distfn, x, arg, locscale_defaults, meths)
|
||||
check_random_state_property(distfn, arg)
|
||||
check_pickling(distfn, arg)
|
||||
check_freezing(distfn, arg)
|
||||
|
||||
# Entropy
|
||||
if distname not in ['kstwobign', 'kstwo', 'ncf']:
|
||||
check_entropy(distfn, arg, distname)
|
||||
|
||||
if distfn.numargs == 0:
|
||||
check_vecentropy(distfn, arg)
|
||||
|
||||
if (distfn.__class__._entropy != stats.rv_continuous._entropy
|
||||
and distname != 'vonmises'):
|
||||
check_private_entropy(distfn, arg, stats.rv_continuous)
|
||||
|
||||
with npt.suppress_warnings() as sup:
|
||||
sup.filter(IntegrationWarning, "The occurrence of roundoff error")
|
||||
sup.filter(IntegrationWarning, "Extremely bad integrand")
|
||||
sup.filter(RuntimeWarning, "invalid value")
|
||||
check_entropy_vect_scale(distfn, arg)
|
||||
|
||||
check_retrieving_support(distfn, arg)
|
||||
check_edge_support(distfn, arg)
|
||||
|
||||
check_meth_dtype(distfn, arg, meths)
|
||||
check_ppf_dtype(distfn, arg)
|
||||
|
||||
if distname not in fails_cmplx:
|
||||
check_cmplx_deriv(distfn, arg)
|
||||
|
||||
if distname != 'truncnorm':
|
||||
check_ppf_private(distfn, arg, distname)
|
||||
|
||||
for method in ["MLE", "MM"]:
|
||||
if distname not in skip_fit_test[method]:
|
||||
check_fit_args(distfn, arg, rvs[:n_fit_samples], method)
|
||||
|
||||
if distname not in skip_fit_fix_test[method]:
|
||||
check_fit_args_fix(distfn, arg, rvs[:n_fit_samples], method)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname,arg', cases_test_cont_basic())
|
||||
def test_rvs_scalar(distname, arg):
|
||||
# rvs should return a scalar when given scalar arguments (gh-12428)
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'rv_histogram_instance'
|
||||
|
||||
assert np.isscalar(distfn.rvs(*arg))
|
||||
assert np.isscalar(distfn.rvs(*arg, size=()))
|
||||
assert np.isscalar(distfn.rvs(*arg, size=None))
|
||||
|
||||
|
||||
def test_levy_stable_random_state_property():
|
||||
# levy_stable only implements rvs(), so it is skipped in the
|
||||
# main loop in test_cont_basic(). Here we apply just the test
|
||||
# check_random_state_property to levy_stable.
|
||||
check_random_state_property(stats.levy_stable, (0.5, 0.1))
|
||||
|
||||
|
||||
def cases_test_moments():
|
||||
fail_normalization = set()
|
||||
fail_higher = set(['ncf'])
|
||||
|
||||
for distname, arg in distcont[:] + histogram_test_instances:
|
||||
if distname == 'levy_stable':
|
||||
continue
|
||||
|
||||
if distname in distxslow_test_moments:
|
||||
yield pytest.param(distname, arg, True, True, True,
|
||||
marks=pytest.mark.xslow(reason="too slow"))
|
||||
continue
|
||||
|
||||
cond1 = distname not in fail_normalization
|
||||
cond2 = distname not in fail_higher
|
||||
|
||||
marks = list()
|
||||
# Currently unused, `marks` can be used to add a timeout to a test of
|
||||
# a specific distribution. For example, this shows how a timeout could
|
||||
# be added for the 'skewnorm' distribution:
|
||||
#
|
||||
# marks = list()
|
||||
# if distname == 'skewnorm':
|
||||
# marks.append(pytest.mark.timeout(300))
|
||||
|
||||
yield pytest.param(distname, arg, cond1, cond2, False, marks=marks)
|
||||
|
||||
if not cond1 or not cond2:
|
||||
# Run the distributions that have issues twice, once skipping the
|
||||
# not_ok parts, once with the not_ok parts but marked as knownfail
|
||||
yield pytest.param(distname, arg, True, True, True,
|
||||
marks=[pytest.mark.xfail] + marks)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize('distname,arg,normalization_ok,higher_ok,'
|
||||
'is_xfailing',
|
||||
cases_test_moments())
|
||||
def test_moments(distname, arg, normalization_ok, higher_ok, is_xfailing):
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'rv_histogram_instance'
|
||||
|
||||
with npt.suppress_warnings() as sup:
|
||||
sup.filter(IntegrationWarning,
|
||||
"The integral is probably divergent, or slowly convergent.")
|
||||
sup.filter(IntegrationWarning,
|
||||
"The maximum number of subdivisions.")
|
||||
|
||||
if is_xfailing:
|
||||
sup.filter(IntegrationWarning)
|
||||
|
||||
m, v, s, k = distfn.stats(*arg, moments='mvsk')
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
if normalization_ok:
|
||||
check_normalization(distfn, arg, distname)
|
||||
|
||||
if higher_ok:
|
||||
check_mean_expect(distfn, arg, m, distname)
|
||||
check_skew_expect(distfn, arg, m, v, s, distname)
|
||||
check_var_expect(distfn, arg, m, v, distname)
|
||||
check_kurt_expect(distfn, arg, m, v, k, distname)
|
||||
|
||||
check_moment(distfn, arg, m, v, distname)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist,shape_args', distcont)
|
||||
def test_rvs_broadcast(dist, shape_args):
|
||||
if dist in ['gausshyper', 'genexpon', 'studentized_range']:
|
||||
pytest.skip("too slow")
|
||||
|
||||
# If shape_only is True, it means the _rvs method of the
|
||||
# distribution uses more than one random number to generate a random
|
||||
# variate. That means the result of using rvs with broadcasting or
|
||||
# with a nontrivial size will not necessarily be the same as using the
|
||||
# numpy.vectorize'd version of rvs(), so we can only compare the shapes
|
||||
# of the results, not the values.
|
||||
# Whether or not a distribution is in the following list is an
|
||||
# implementation detail of the distribution, not a requirement. If
|
||||
# the implementation the rvs() method of a distribution changes, this
|
||||
# test might also have to be changed.
|
||||
shape_only = dist in ['argus', 'betaprime', 'dgamma', 'dweibull',
|
||||
'exponnorm', 'genhyperbolic', 'geninvgauss',
|
||||
'levy_stable', 'nct', 'norminvgauss', 'rice',
|
||||
'skewnorm', 'semicircular', 'gennorm', 'loggamma']
|
||||
|
||||
distfunc = getattr(stats, dist)
|
||||
loc = np.zeros(2)
|
||||
scale = np.ones((3, 1))
|
||||
nargs = distfunc.numargs
|
||||
allargs = []
|
||||
bshape = [3, 2]
|
||||
# Generate shape parameter arguments...
|
||||
for k in range(nargs):
|
||||
shp = (k + 4,) + (1,)*(k + 2)
|
||||
allargs.append(shape_args[k]*np.ones(shp))
|
||||
bshape.insert(0, k + 4)
|
||||
allargs.extend([loc, scale])
|
||||
# bshape holds the expected shape when loc, scale, and the shape
|
||||
# parameters are all broadcast together.
|
||||
|
||||
check_rvs_broadcast(distfunc, dist, allargs, bshape, shape_only, 'd')
|
||||
|
||||
|
||||
# Expected values of the SF, CDF, PDF were computed using
|
||||
# mpmath with mpmath.mp.dps = 50 and output at 20:
|
||||
#
|
||||
# def ks(x, n):
|
||||
# x = mpmath.mpf(x)
|
||||
# logp = -mpmath.power(6.0*n*x+1.0, 2)/18.0/n
|
||||
# sf, cdf = mpmath.exp(logp), -mpmath.expm1(logp)
|
||||
# pdf = (6.0*n*x+1.0) * 2 * sf/3
|
||||
# print(mpmath.nstr(sf, 20), mpmath.nstr(cdf, 20), mpmath.nstr(pdf, 20))
|
||||
#
|
||||
# Tests use 1/n < x < 1-1/n and n > 1e6 to use the asymptotic computation.
|
||||
# Larger x has a smaller sf.
|
||||
@pytest.mark.parametrize('x,n,sf,cdf,pdf,rtol',
|
||||
[(2.0e-5, 1000000000,
|
||||
0.44932297307934442379, 0.55067702692065557621,
|
||||
35946.137394996276407, 5e-15),
|
||||
(2.0e-9, 1000000000,
|
||||
0.99999999061111115519, 9.3888888448132728224e-9,
|
||||
8.6666665852962971765, 5e-14),
|
||||
(5.0e-4, 1000000000,
|
||||
7.1222019433090374624e-218, 1.0,
|
||||
1.4244408634752704094e-211, 5e-14)])
|
||||
def test_gh17775_regression(x, n, sf, cdf, pdf, rtol):
|
||||
# Regression test for gh-17775. In scipy 1.9.3 and earlier,
|
||||
# these test would fail.
|
||||
#
|
||||
# KS one asymptotic sf ~ e^(-(6nx+1)^2 / 18n)
|
||||
# Given a large 32-bit integer n, 6n will overflow in the c implementation.
|
||||
# Example of broken behaviour:
|
||||
# ksone.sf(2.0e-5, 1000000000) == 0.9374359693473666
|
||||
ks = stats.ksone
|
||||
vals = np.array([ks.sf(x, n), ks.cdf(x, n), ks.pdf(x, n)])
|
||||
expected = np.array([sf, cdf, pdf])
|
||||
npt.assert_allclose(vals, expected, rtol=rtol)
|
||||
# The sf+cdf must sum to 1.0.
|
||||
npt.assert_equal(vals[0] + vals[1], 1.0)
|
||||
# Check inverting the (potentially very small) sf (uses a lower tolerance)
|
||||
npt.assert_allclose([ks.isf(sf, n)], [x], rtol=1e-8)
|
||||
|
||||
|
||||
def test_rvs_gh2069_regression():
|
||||
# Regression tests for gh-2069. In scipy 0.17 and earlier,
|
||||
# these tests would fail.
|
||||
#
|
||||
# A typical example of the broken behavior:
|
||||
# >>> norm.rvs(loc=np.zeros(5), scale=np.ones(5))
|
||||
# array([-2.49613705, -2.49613705, -2.49613705, -2.49613705, -2.49613705])
|
||||
rng = np.random.RandomState(123)
|
||||
vals = stats.norm.rvs(loc=np.zeros(5), scale=1, random_state=rng)
|
||||
d = np.diff(vals)
|
||||
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
|
||||
vals = stats.norm.rvs(loc=0, scale=np.ones(5), random_state=rng)
|
||||
d = np.diff(vals)
|
||||
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
|
||||
vals = stats.norm.rvs(loc=np.zeros(5), scale=np.ones(5), random_state=rng)
|
||||
d = np.diff(vals)
|
||||
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
|
||||
vals = stats.norm.rvs(loc=np.array([[0], [0]]), scale=np.ones(5),
|
||||
random_state=rng)
|
||||
d = np.diff(vals.ravel())
|
||||
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
|
||||
|
||||
assert_raises(ValueError, stats.norm.rvs, [[0, 0], [0, 0]],
|
||||
[[1, 1], [1, 1]], 1)
|
||||
assert_raises(ValueError, stats.gamma.rvs, [2, 3, 4, 5], 0, 1, (2, 2))
|
||||
assert_raises(ValueError, stats.gamma.rvs, [1, 1, 1, 1], [0, 0, 0, 0],
|
||||
[[1], [2]], (4,))
|
||||
|
||||
|
||||
def test_nomodify_gh9900_regression():
|
||||
# Regression test for gh-9990
|
||||
# Prior to gh-9990, calls to stats.truncnorm._cdf() use what ever was
|
||||
# set inside the stats.truncnorm instance during stats.truncnorm.cdf().
|
||||
# This could cause issues wth multi-threaded code.
|
||||
# Since then, the calls to cdf() are not permitted to modify the global
|
||||
# stats.truncnorm instance.
|
||||
tn = stats.truncnorm
|
||||
# Use the right-half truncated normal
|
||||
# Check that the cdf and _cdf return the same result.
|
||||
npt.assert_almost_equal(tn.cdf(1, 0, np.inf), 0.6826894921370859)
|
||||
npt.assert_almost_equal(tn._cdf([1], [0], [np.inf]), 0.6826894921370859)
|
||||
|
||||
# Now use the left-half truncated normal
|
||||
npt.assert_almost_equal(tn.cdf(-1, -np.inf, 0), 0.31731050786291415)
|
||||
npt.assert_almost_equal(tn._cdf([-1], [-np.inf], [0]), 0.31731050786291415)
|
||||
|
||||
# Check that the right-half truncated normal _cdf hasn't changed
|
||||
npt.assert_almost_equal(tn._cdf([1], [0], [np.inf]), 0.6826894921370859) # noqa, NOT 1.6826894921370859
|
||||
npt.assert_almost_equal(tn.cdf(1, 0, np.inf), 0.6826894921370859)
|
||||
|
||||
# Check that the left-half truncated normal _cdf hasn't changed
|
||||
npt.assert_almost_equal(tn._cdf([-1], [-np.inf], [0]), 0.31731050786291415) # noqa, Not -0.6826894921370859
|
||||
npt.assert_almost_equal(tn.cdf(1, -np.inf, 0), 1) # Not 1.6826894921370859
|
||||
npt.assert_almost_equal(tn.cdf(-1, -np.inf, 0), 0.31731050786291415) # Not -0.6826894921370859
|
||||
|
||||
|
||||
def test_broadcast_gh9990_regression():
|
||||
# Regression test for gh-9990
|
||||
# The x-value 7 only lies within the support of 4 of the supplied
|
||||
# distributions. Prior to 9990, one array passed to
|
||||
# stats.reciprocal._cdf would have 4 elements, but an array
|
||||
# previously stored by stats.reciprocal_argcheck() would have 6, leading
|
||||
# to a broadcast error.
|
||||
a = np.array([1, 2, 3, 4, 5, 6])
|
||||
b = np.array([8, 16, 1, 32, 1, 48])
|
||||
ans = [stats.reciprocal.cdf(7, _a, _b) for _a, _b in zip(a,b)]
|
||||
npt.assert_array_almost_equal(stats.reciprocal.cdf(7, a, b), ans)
|
||||
|
||||
ans = [stats.reciprocal.cdf(1, _a, _b) for _a, _b in zip(a,b)]
|
||||
npt.assert_array_almost_equal(stats.reciprocal.cdf(1, a, b), ans)
|
||||
|
||||
ans = [stats.reciprocal.cdf(_a, _a, _b) for _a, _b in zip(a,b)]
|
||||
npt.assert_array_almost_equal(stats.reciprocal.cdf(a, a, b), ans)
|
||||
|
||||
ans = [stats.reciprocal.cdf(_b, _a, _b) for _a, _b in zip(a,b)]
|
||||
npt.assert_array_almost_equal(stats.reciprocal.cdf(b, a, b), ans)
|
||||
|
||||
|
||||
def test_broadcast_gh7933_regression():
|
||||
# Check broadcast works
|
||||
stats.truncnorm.logpdf(
|
||||
np.array([3.0, 2.0, 1.0]),
|
||||
a=(1.5 - np.array([6.0, 5.0, 4.0])) / 3.0,
|
||||
b=np.inf,
|
||||
loc=np.array([6.0, 5.0, 4.0]),
|
||||
scale=3.0
|
||||
)
|
||||
|
||||
|
||||
def test_gh2002_regression():
|
||||
# Add a check that broadcast works in situations where only some
|
||||
# x-values are compatible with some of the shape arguments.
|
||||
x = np.r_[-2:2:101j]
|
||||
a = np.r_[-np.ones(50), np.ones(51)]
|
||||
expected = [stats.truncnorm.pdf(_x, _a, np.inf) for _x, _a in zip(x, a)]
|
||||
ans = stats.truncnorm.pdf(x, a, np.inf)
|
||||
npt.assert_array_almost_equal(ans, expected)
|
||||
|
||||
|
||||
def test_gh1320_regression():
|
||||
# Check that the first example from gh-1320 now works.
|
||||
c = 2.62
|
||||
stats.genextreme.ppf(0.5, np.array([[c], [c + 0.5]]))
|
||||
# The other examples in gh-1320 appear to have stopped working
|
||||
# some time ago.
|
||||
# ans = stats.genextreme.moment(2, np.array([c, c + 0.5]))
|
||||
# expected = np.array([25.50105963, 115.11191437])
|
||||
# stats.genextreme.moment(5, np.array([[c], [c + 0.5]]))
|
||||
# stats.genextreme.moment(5, np.array([c, c + 0.5]))
|
||||
|
||||
|
||||
def test_method_of_moments():
|
||||
# example from https://en.wikipedia.org/wiki/Method_of_moments_(statistics)
|
||||
np.random.seed(1234)
|
||||
x = [0, 0, 0, 0, 1]
|
||||
a = 1/5 - 2*np.sqrt(3)/5
|
||||
b = 1/5 + 2*np.sqrt(3)/5
|
||||
# force use of method of moments (uniform.fit is overriden)
|
||||
loc, scale = super(type(stats.uniform), stats.uniform).fit(x, method="MM")
|
||||
npt.assert_almost_equal(loc, a, decimal=4)
|
||||
npt.assert_almost_equal(loc+scale, b, decimal=4)
|
||||
|
||||
|
||||
def check_sample_meanvar_(popmean, popvar, sample):
|
||||
if np.isfinite(popmean):
|
||||
check_sample_mean(sample, popmean)
|
||||
if np.isfinite(popvar):
|
||||
check_sample_var(sample, popvar)
|
||||
|
||||
|
||||
def check_sample_mean(sample, popmean):
|
||||
# Checks for unlikely difference between sample mean and population mean
|
||||
prob = stats.ttest_1samp(sample, popmean).pvalue
|
||||
assert prob > 0.01
|
||||
|
||||
|
||||
def check_sample_var(sample, popvar):
|
||||
# check that population mean lies within the CI bootstrapped from the
|
||||
# sample. This used to be a chi-squared test for variance, but there were
|
||||
# too many false positives
|
||||
res = stats.bootstrap(
|
||||
(sample,),
|
||||
lambda x, axis: x.var(ddof=1, axis=axis),
|
||||
confidence_level=0.995,
|
||||
)
|
||||
conf = res.confidence_interval
|
||||
low, high = conf.low, conf.high
|
||||
assert low <= popvar <= high
|
||||
|
||||
|
||||
def check_cdf_ppf(distfn, arg, msg):
|
||||
values = [0.001, 0.5, 0.999]
|
||||
npt.assert_almost_equal(distfn.cdf(distfn.ppf(values, *arg), *arg),
|
||||
values, decimal=DECIMAL, err_msg=msg +
|
||||
' - cdf-ppf roundtrip')
|
||||
|
||||
|
||||
def check_sf_isf(distfn, arg, msg):
|
||||
npt.assert_almost_equal(distfn.sf(distfn.isf([0.1, 0.5, 0.9], *arg), *arg),
|
||||
[0.1, 0.5, 0.9], decimal=DECIMAL, err_msg=msg +
|
||||
' - sf-isf roundtrip')
|
||||
npt.assert_almost_equal(distfn.cdf([0.1, 0.9], *arg),
|
||||
1.0 - distfn.sf([0.1, 0.9], *arg),
|
||||
decimal=DECIMAL, err_msg=msg +
|
||||
' - cdf-sf relationship')
|
||||
|
||||
|
||||
def check_pdf(distfn, arg, msg):
|
||||
# compares pdf at median with numerical derivative of cdf
|
||||
median = distfn.ppf(0.5, *arg)
|
||||
eps = 1e-6
|
||||
pdfv = distfn.pdf(median, *arg)
|
||||
if (pdfv < 1e-4) or (pdfv > 1e4):
|
||||
# avoid checking a case where pdf is close to zero or
|
||||
# huge (singularity)
|
||||
median = median + 0.1
|
||||
pdfv = distfn.pdf(median, *arg)
|
||||
cdfdiff = (distfn.cdf(median + eps, *arg) -
|
||||
distfn.cdf(median - eps, *arg))/eps/2.0
|
||||
# replace with better diff and better test (more points),
|
||||
# actually, this works pretty well
|
||||
msg += ' - cdf-pdf relationship'
|
||||
npt.assert_almost_equal(pdfv, cdfdiff, decimal=DECIMAL, err_msg=msg)
|
||||
|
||||
|
||||
def check_pdf_logpdf(distfn, args, msg):
|
||||
# compares pdf at several points with the log of the pdf
|
||||
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
|
||||
vals = distfn.ppf(points, *args)
|
||||
vals = vals[np.isfinite(vals)]
|
||||
pdf = distfn.pdf(vals, *args)
|
||||
logpdf = distfn.logpdf(vals, *args)
|
||||
pdf = pdf[(pdf != 0) & np.isfinite(pdf)]
|
||||
logpdf = logpdf[np.isfinite(logpdf)]
|
||||
msg += " - logpdf-log(pdf) relationship"
|
||||
npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_pdf_logpdf_at_endpoints(distfn, args, msg):
|
||||
# compares pdf with the log of the pdf at the (finite) end points
|
||||
points = np.array([0, 1])
|
||||
vals = distfn.ppf(points, *args)
|
||||
vals = vals[np.isfinite(vals)]
|
||||
pdf = distfn.pdf(vals, *args)
|
||||
logpdf = distfn.logpdf(vals, *args)
|
||||
pdf = pdf[(pdf != 0) & np.isfinite(pdf)]
|
||||
logpdf = logpdf[np.isfinite(logpdf)]
|
||||
msg += " - logpdf-log(pdf) relationship"
|
||||
npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_sf_logsf(distfn, args, msg):
|
||||
# compares sf at several points with the log of the sf
|
||||
points = np.array([0.0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0])
|
||||
vals = distfn.ppf(points, *args)
|
||||
vals = vals[np.isfinite(vals)]
|
||||
sf = distfn.sf(vals, *args)
|
||||
logsf = distfn.logsf(vals, *args)
|
||||
sf = sf[sf != 0]
|
||||
logsf = logsf[np.isfinite(logsf)]
|
||||
msg += " - logsf-log(sf) relationship"
|
||||
npt.assert_almost_equal(np.log(sf), logsf, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_cdf_logcdf(distfn, args, msg):
|
||||
# compares cdf at several points with the log of the cdf
|
||||
points = np.array([0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0])
|
||||
vals = distfn.ppf(points, *args)
|
||||
vals = vals[np.isfinite(vals)]
|
||||
cdf = distfn.cdf(vals, *args)
|
||||
logcdf = distfn.logcdf(vals, *args)
|
||||
cdf = cdf[cdf != 0]
|
||||
logcdf = logcdf[np.isfinite(logcdf)]
|
||||
msg += " - logcdf-log(cdf) relationship"
|
||||
npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_ppf_broadcast(distfn, arg, msg):
|
||||
# compares ppf for multiple argsets.
|
||||
num_repeats = 5
|
||||
args = [] * num_repeats
|
||||
if arg:
|
||||
args = [np.array([_] * num_repeats) for _ in arg]
|
||||
|
||||
median = distfn.ppf(0.5, *arg)
|
||||
medians = distfn.ppf(0.5, *args)
|
||||
msg += " - ppf multiple"
|
||||
npt.assert_almost_equal(medians, [median] * num_repeats, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_distribution_rvs(dist, args, alpha, rvs):
|
||||
# dist is either a cdf function or name of a distribution in scipy.stats.
|
||||
# args are the args for scipy.stats.dist(*args)
|
||||
# alpha is a significance level, ~0.01
|
||||
# rvs is array_like of random variables
|
||||
# test from scipy.stats.tests
|
||||
# this version reuses existing random variables
|
||||
D, pval = stats.kstest(rvs, dist, args=args, N=1000)
|
||||
if (pval < alpha):
|
||||
# The rvs passed in failed the K-S test, which _could_ happen
|
||||
# but is unlikely if alpha is small enough.
|
||||
# Repeat the test with a new sample of rvs.
|
||||
# Generate 1000 rvs, perform a K-S test that the new sample of rvs
|
||||
# are distributed according to the distribution.
|
||||
D, pval = stats.kstest(dist, dist, args=args, N=1000)
|
||||
npt.assert_(pval > alpha, "D = " + str(D) + "; pval = " + str(pval) +
|
||||
"; alpha = " + str(alpha) + "\nargs = " + str(args))
|
||||
|
||||
|
||||
def check_vecentropy(distfn, args):
|
||||
npt.assert_equal(distfn.vecentropy(*args), distfn._entropy(*args))
|
||||
|
||||
|
||||
def check_loc_scale(distfn, arg, m, v, msg):
|
||||
# Make `loc` and `scale` arrays to catch bugs like gh-13580 where
|
||||
# `loc` and `scale` arrays improperly broadcast with shapes.
|
||||
loc, scale = np.array([10.0, 20.0]), np.array([10.0, 20.0])
|
||||
mt, vt = distfn.stats(loc=loc, scale=scale, *arg)
|
||||
npt.assert_allclose(m*scale + loc, mt)
|
||||
npt.assert_allclose(v*scale*scale, vt)
|
||||
|
||||
|
||||
def check_ppf_private(distfn, arg, msg):
|
||||
# fails by design for truncnorm self.nb not defined
|
||||
ppfs = distfn._ppf(np.array([0.1, 0.5, 0.9]), *arg)
|
||||
npt.assert_(not np.any(np.isnan(ppfs)), msg + 'ppf private is nan')
|
||||
|
||||
|
||||
def check_retrieving_support(distfn, args):
|
||||
loc, scale = 1, 2
|
||||
supp = distfn.support(*args)
|
||||
supp_loc_scale = distfn.support(*args, loc=loc, scale=scale)
|
||||
npt.assert_almost_equal(np.array(supp)*scale + loc,
|
||||
np.array(supp_loc_scale))
|
||||
|
||||
|
||||
def check_fit_args(distfn, arg, rvs, method):
|
||||
with np.errstate(all='ignore'), npt.suppress_warnings() as sup:
|
||||
sup.filter(category=RuntimeWarning,
|
||||
message="The shape parameter of the erlang")
|
||||
sup.filter(category=RuntimeWarning,
|
||||
message="floating point number truncated")
|
||||
vals = distfn.fit(rvs, method=method)
|
||||
vals2 = distfn.fit(rvs, optimizer='powell', method=method)
|
||||
# Only check the length of the return; accuracy tested in test_fit.py
|
||||
npt.assert_(len(vals) == 2+len(arg))
|
||||
npt.assert_(len(vals2) == 2+len(arg))
|
||||
|
||||
|
||||
def check_fit_args_fix(distfn, arg, rvs, method):
|
||||
with np.errstate(all='ignore'), npt.suppress_warnings() as sup:
|
||||
sup.filter(category=RuntimeWarning,
|
||||
message="The shape parameter of the erlang")
|
||||
|
||||
vals = distfn.fit(rvs, floc=0, method=method)
|
||||
vals2 = distfn.fit(rvs, fscale=1, method=method)
|
||||
npt.assert_(len(vals) == 2+len(arg))
|
||||
npt.assert_(vals[-2] == 0)
|
||||
npt.assert_(vals2[-1] == 1)
|
||||
npt.assert_(len(vals2) == 2+len(arg))
|
||||
if len(arg) > 0:
|
||||
vals3 = distfn.fit(rvs, f0=arg[0], method=method)
|
||||
npt.assert_(len(vals3) == 2+len(arg))
|
||||
npt.assert_(vals3[0] == arg[0])
|
||||
if len(arg) > 1:
|
||||
vals4 = distfn.fit(rvs, f1=arg[1], method=method)
|
||||
npt.assert_(len(vals4) == 2+len(arg))
|
||||
npt.assert_(vals4[1] == arg[1])
|
||||
if len(arg) > 2:
|
||||
vals5 = distfn.fit(rvs, f2=arg[2], method=method)
|
||||
npt.assert_(len(vals5) == 2+len(arg))
|
||||
npt.assert_(vals5[2] == arg[2])
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
@pytest.mark.parametrize('method', ['pdf', 'logpdf', 'cdf', 'logcdf',
|
||||
'sf', 'logsf', 'ppf', 'isf'])
|
||||
@pytest.mark.parametrize('distname, args', distcont)
|
||||
def test_methods_with_lists(method, distname, args):
|
||||
# Test that the continuous distributions can accept Python lists
|
||||
# as arguments.
|
||||
dist = getattr(stats, distname)
|
||||
f = getattr(dist, method)
|
||||
if distname == 'invweibull' and method.startswith('log'):
|
||||
x = [1.5, 2]
|
||||
else:
|
||||
x = [0.1, 0.2]
|
||||
|
||||
shape2 = [[a]*2 for a in args]
|
||||
loc = [0, 0.1]
|
||||
scale = [1, 1.01]
|
||||
result = f(x, *shape2, loc=loc, scale=scale)
|
||||
npt.assert_allclose(result,
|
||||
[f(*v) for v in zip(x, *shape2, loc, scale)],
|
||||
rtol=1e-14, atol=5e-14)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['pdf', 'logpdf', 'cdf', 'logcdf',
|
||||
'sf', 'logsf', 'ppf', 'isf'])
|
||||
def test_gilbrat_deprecation(method):
|
||||
expected = getattr(stats.gibrat, method)(1)
|
||||
with pytest.warns(
|
||||
DeprecationWarning,
|
||||
match=rf"\s*`gilbrat\.{method}` is deprecated,.*",
|
||||
):
|
||||
result = getattr(stats.gilbrat, method)(1)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['pdf', 'logpdf', 'cdf', 'logcdf',
|
||||
'sf', 'logsf', 'ppf', 'isf'])
|
||||
def test_gilbrat_deprecation_frozen(method):
|
||||
expected = getattr(stats.gibrat, method)(1)
|
||||
with pytest.warns(DeprecationWarning, match=r"\s*`gilbrat` is deprecated"):
|
||||
# warn on instantiation of frozen distribution...
|
||||
g = stats.gilbrat()
|
||||
# ... not on its methods
|
||||
result = getattr(g, method)(1)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_burr_fisk_moment_gh13234_regression():
|
||||
vals0 = stats.burr.moment(1, 5, 4)
|
||||
assert isinstance(vals0, float)
|
||||
|
||||
vals1 = stats.fisk.moment(1, 8)
|
||||
assert isinstance(vals1, float)
|
||||
|
||||
|
||||
def test_moments_with_array_gh12192_regression():
|
||||
# array loc and scalar scale
|
||||
vals0 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), scale=1)
|
||||
expected0 = np.array([1., 2., 3.])
|
||||
npt.assert_equal(vals0, expected0)
|
||||
|
||||
# array loc and invalid scalar scale
|
||||
vals1 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), scale=-1)
|
||||
expected1 = np.array([np.nan, np.nan, np.nan])
|
||||
npt.assert_equal(vals1, expected1)
|
||||
|
||||
# array loc and array scale with invalid entries
|
||||
vals2 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]),
|
||||
scale=[-3, 1, 0])
|
||||
expected2 = np.array([np.nan, 2., np.nan])
|
||||
npt.assert_equal(vals2, expected2)
|
||||
|
||||
# (loc == 0) & (scale < 0)
|
||||
vals3 = stats.norm.moment(order=2, loc=0, scale=-4)
|
||||
expected3 = np.nan
|
||||
npt.assert_equal(vals3, expected3)
|
||||
assert isinstance(vals3, expected3.__class__)
|
||||
|
||||
# array loc with 0 entries and scale with invalid entries
|
||||
vals4 = stats.norm.moment(order=2, loc=[1, 0, 2], scale=[3, -4, -5])
|
||||
expected4 = np.array([10., np.nan, np.nan])
|
||||
npt.assert_equal(vals4, expected4)
|
||||
|
||||
# all(loc == 0) & (array scale with invalid entries)
|
||||
vals5 = stats.norm.moment(order=2, loc=[0, 0, 0], scale=[5., -2, 100.])
|
||||
expected5 = np.array([25., np.nan, 10000.])
|
||||
npt.assert_equal(vals5, expected5)
|
||||
|
||||
# all( (loc == 0) & (scale < 0) )
|
||||
vals6 = stats.norm.moment(order=2, loc=[0, 0, 0], scale=[-5., -2, -100.])
|
||||
expected6 = np.array([np.nan, np.nan, np.nan])
|
||||
npt.assert_equal(vals6, expected6)
|
||||
|
||||
# scalar args, loc, and scale
|
||||
vals7 = stats.chi.moment(order=2, df=1, loc=0, scale=0)
|
||||
expected7 = np.nan
|
||||
npt.assert_equal(vals7, expected7)
|
||||
assert isinstance(vals7, expected7.__class__)
|
||||
|
||||
# array args, scalar loc, and scalar scale
|
||||
vals8 = stats.chi.moment(order=2, df=[1, 2, 3], loc=0, scale=0)
|
||||
expected8 = np.array([np.nan, np.nan, np.nan])
|
||||
npt.assert_equal(vals8, expected8)
|
||||
|
||||
# array args, array loc, and array scale
|
||||
vals9 = stats.chi.moment(order=2, df=[1, 2, 3], loc=[1., 0., 2.],
|
||||
scale=[1., -3., 0.])
|
||||
expected9 = np.array([3.59576912, np.nan, np.nan])
|
||||
npt.assert_allclose(vals9, expected9, rtol=1e-8)
|
||||
|
||||
# (n > 4), all(loc != 0), and all(scale != 0)
|
||||
vals10 = stats.norm.moment(5, [1., 2.], [1., 2.])
|
||||
expected10 = np.array([26., 832.])
|
||||
npt.assert_allclose(vals10, expected10, rtol=1e-13)
|
||||
|
||||
# test broadcasting and more
|
||||
a = [-1.1, 0, 1, 2.2, np.pi]
|
||||
b = [-1.1, 0, 1, 2.2, np.pi]
|
||||
loc = [-1.1, 0, np.sqrt(2)]
|
||||
scale = [-2.1, 0, 1, 2.2, np.pi]
|
||||
|
||||
a = np.array(a).reshape((-1, 1, 1, 1))
|
||||
b = np.array(b).reshape((-1, 1, 1))
|
||||
loc = np.array(loc).reshape((-1, 1))
|
||||
scale = np.array(scale)
|
||||
|
||||
vals11 = stats.beta.moment(order=2, a=a, b=b, loc=loc, scale=scale)
|
||||
|
||||
a, b, loc, scale = np.broadcast_arrays(a, b, loc, scale)
|
||||
|
||||
for i in np.ndenumerate(a):
|
||||
with np.errstate(invalid='ignore', divide='ignore'):
|
||||
i = i[0] # just get the index
|
||||
# check against same function with scalar input
|
||||
expected = stats.beta.moment(order=2, a=a[i], b=b[i],
|
||||
loc=loc[i], scale=scale[i])
|
||||
np.testing.assert_equal(vals11[i], expected)
|
||||
|
||||
|
||||
def test_broadcasting_in_moments_gh12192_regression():
|
||||
vals0 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), scale=[[1]])
|
||||
expected0 = np.array([[1., 2., 3.]])
|
||||
npt.assert_equal(vals0, expected0)
|
||||
assert vals0.shape == expected0.shape
|
||||
|
||||
vals1 = stats.norm.moment(order=1, loc=np.array([[1], [2], [3]]),
|
||||
scale=[1, 2, 3])
|
||||
expected1 = np.array([[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]])
|
||||
npt.assert_equal(vals1, expected1)
|
||||
assert vals1.shape == expected1.shape
|
||||
|
||||
vals2 = stats.chi.moment(order=1, df=[1., 2., 3.], loc=0., scale=1.)
|
||||
expected2 = np.array([0.79788456, 1.25331414, 1.59576912])
|
||||
npt.assert_allclose(vals2, expected2, rtol=1e-8)
|
||||
assert vals2.shape == expected2.shape
|
||||
|
||||
vals3 = stats.chi.moment(order=1, df=[[1.], [2.], [3.]], loc=[0., 1., 2.],
|
||||
scale=[-1., 0., 3.])
|
||||
expected3 = np.array([[np.nan, np.nan, 4.39365368],
|
||||
[np.nan, np.nan, 5.75994241],
|
||||
[np.nan, np.nan, 6.78730736]])
|
||||
npt.assert_allclose(vals3, expected3, rtol=1e-8)
|
||||
assert vals3.shape == expected3.shape
|
||||
|
||||
|
||||
def test_kappa3_array_gh13582():
|
||||
# https://github.com/scipy/scipy/pull/15140#issuecomment-994958241
|
||||
shapes = [0.5, 1.5, 2.5, 3.5, 4.5]
|
||||
moments = 'mvsk'
|
||||
res = np.array([[stats.kappa3.stats(shape, moments=moment)
|
||||
for shape in shapes] for moment in moments])
|
||||
res2 = np.array(stats.kappa3.stats(shapes, moments=moments))
|
||||
npt.assert_allclose(res, res2)
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_kappa4_array_gh13582():
|
||||
h = np.array([-0.5, 2.5, 3.5, 4.5, -3])
|
||||
k = np.array([-0.5, 1, -1.5, 0, 3.5])
|
||||
moments = 'mvsk'
|
||||
res = np.array([[stats.kappa4.stats(h[i], k[i], moments=moment)
|
||||
for i in range(5)] for moment in moments])
|
||||
res2 = np.array(stats.kappa4.stats(h, k, moments=moments))
|
||||
npt.assert_allclose(res, res2)
|
||||
|
||||
# https://github.com/scipy/scipy/pull/15250#discussion_r775112913
|
||||
h = np.array([-1, -1/4, -1/4, 1, -1, 0])
|
||||
k = np.array([1, 1, 1/2, -1/3, -1, 0])
|
||||
res = np.array([[stats.kappa4.stats(h[i], k[i], moments=moment)
|
||||
for i in range(6)] for moment in moments])
|
||||
res2 = np.array(stats.kappa4.stats(h, k, moments=moments))
|
||||
npt.assert_allclose(res, res2)
|
||||
|
||||
# https://github.com/scipy/scipy/pull/15250#discussion_r775115021
|
||||
h = np.array([-1, -0.5, 1])
|
||||
k = np.array([-1, -0.5, 0, 1])[:, None]
|
||||
res2 = np.array(stats.kappa4.stats(h, k, moments=moments))
|
||||
assert res2.shape == (4, 4, 3)
|
||||
|
||||
|
||||
def test_frozen_attributes():
|
||||
# gh-14827 reported that all frozen distributions had both pmf and pdf
|
||||
# attributes; continuous should have pdf and discrete should have pmf.
|
||||
message = "'rv_continuous_frozen' object has no attribute"
|
||||
with pytest.raises(AttributeError, match=message):
|
||||
stats.norm().pmf
|
||||
with pytest.raises(AttributeError, match=message):
|
||||
stats.norm().logpmf
|
||||
stats.norm.pmf = "herring"
|
||||
frozen_norm = stats.norm()
|
||||
assert isinstance(frozen_norm, rv_continuous_frozen)
|
||||
delattr(stats.norm, 'pmf')
|
||||
|
||||
|
||||
def test_skewnorm_pdf_gh16038():
|
||||
rng = np.random.default_rng(0)
|
||||
x, a = -np.inf, 0
|
||||
npt.assert_equal(stats.skewnorm.pdf(x, a), stats.norm.pdf(x))
|
||||
x, a = rng.random(size=(3, 3)), rng.random(size=(3, 3))
|
||||
mask = rng.random(size=(3, 3)) < 0.5
|
||||
a[mask] = 0
|
||||
x_norm = x[mask]
|
||||
res = stats.skewnorm.pdf(x, a)
|
||||
npt.assert_equal(res[mask], stats.norm.pdf(x_norm))
|
||||
npt.assert_equal(res[~mask], stats.skewnorm.pdf(x[~mask], a[~mask]))
|
||||
|
||||
|
||||
# for scalar input, these functions should return scalar output
|
||||
scalar_out = [['rvs', []], ['pdf', [0]], ['logpdf', [0]], ['cdf', [0]],
|
||||
['logcdf', [0]], ['sf', [0]], ['logsf', [0]], ['ppf', [0]],
|
||||
['isf', [0]], ['moment', [1]], ['entropy', []], ['expect', []],
|
||||
['median', []], ['mean', []], ['std', []], ['var', []]]
|
||||
scalars_out = [['interval', [0.95]], ['support', []], ['stats', ['mv']]]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('case', scalar_out + scalars_out)
|
||||
def test_scalar_for_scalar(case):
|
||||
# Some rv_continuous functions returned 0d array instead of NumPy scalar
|
||||
# Guard against regression
|
||||
method_name, args = case
|
||||
method = getattr(stats.norm(), method_name)
|
||||
res = method(*args)
|
||||
if case in scalar_out:
|
||||
assert isinstance(res, np.number)
|
||||
else:
|
||||
assert isinstance(res[0], np.number)
|
||||
assert isinstance(res[1], np.number)
|
||||
|
||||
|
||||
def test_scalar_for_scalar2():
|
||||
# test methods that are not attributes of frozen distributions
|
||||
res = stats.norm.fit([1, 2, 3])
|
||||
assert isinstance(res[0], np.number)
|
||||
assert isinstance(res[1], np.number)
|
||||
res = stats.norm.fit_loc_scale([1, 2, 3])
|
||||
assert isinstance(res[0], np.number)
|
||||
assert isinstance(res[1], np.number)
|
||||
res = stats.norm.nnlf((0, 1), [1, 2, 3])
|
||||
assert isinstance(res, np.number)
|
||||
115
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_crosstab.py
vendored
Normal file
115
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_crosstab.py
vendored
Normal file
@@ -0,0 +1,115 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_array_equal, assert_equal
|
||||
from scipy.stats.contingency import crosstab
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', [False, True])
|
||||
def test_crosstab_basic(sparse):
|
||||
a = [0, 0, 9, 9, 0, 0, 9]
|
||||
b = [2, 1, 3, 1, 2, 3, 3]
|
||||
expected_avals = [0, 9]
|
||||
expected_bvals = [1, 2, 3]
|
||||
expected_count = np.array([[1, 2, 1],
|
||||
[1, 0, 2]])
|
||||
(avals, bvals), count = crosstab(a, b, sparse=sparse)
|
||||
assert_array_equal(avals, expected_avals)
|
||||
assert_array_equal(bvals, expected_bvals)
|
||||
if sparse:
|
||||
assert_array_equal(count.A, expected_count)
|
||||
else:
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
def test_crosstab_basic_1d():
|
||||
# Verify that a single input sequence works as expected.
|
||||
x = [1, 2, 3, 1, 2, 3, 3]
|
||||
expected_xvals = [1, 2, 3]
|
||||
expected_count = np.array([2, 2, 3])
|
||||
(xvals,), count = crosstab(x)
|
||||
assert_array_equal(xvals, expected_xvals)
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
def test_crosstab_basic_3d():
|
||||
# Verify the function for three input sequences.
|
||||
a = 'a'
|
||||
b = 'b'
|
||||
x = [0, 0, 9, 9, 0, 0, 9, 9]
|
||||
y = [a, a, a, a, b, b, b, a]
|
||||
z = [1, 2, 3, 1, 2, 3, 3, 1]
|
||||
expected_xvals = [0, 9]
|
||||
expected_yvals = [a, b]
|
||||
expected_zvals = [1, 2, 3]
|
||||
expected_count = np.array([[[1, 1, 0],
|
||||
[0, 1, 1]],
|
||||
[[2, 0, 1],
|
||||
[0, 0, 1]]])
|
||||
(xvals, yvals, zvals), count = crosstab(x, y, z)
|
||||
assert_array_equal(xvals, expected_xvals)
|
||||
assert_array_equal(yvals, expected_yvals)
|
||||
assert_array_equal(zvals, expected_zvals)
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', [False, True])
|
||||
def test_crosstab_levels(sparse):
|
||||
a = [0, 0, 9, 9, 0, 0, 9]
|
||||
b = [1, 2, 3, 1, 2, 3, 3]
|
||||
expected_avals = [0, 9]
|
||||
expected_bvals = [0, 1, 2, 3]
|
||||
expected_count = np.array([[0, 1, 2, 1],
|
||||
[0, 1, 0, 2]])
|
||||
(avals, bvals), count = crosstab(a, b, levels=[None, [0, 1, 2, 3]],
|
||||
sparse=sparse)
|
||||
assert_array_equal(avals, expected_avals)
|
||||
assert_array_equal(bvals, expected_bvals)
|
||||
if sparse:
|
||||
assert_array_equal(count.A, expected_count)
|
||||
else:
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', [False, True])
|
||||
def test_crosstab_extra_levels(sparse):
|
||||
# The pair of values (-1, 3) will be ignored, because we explicitly
|
||||
# request the counted `a` values to be [0, 9].
|
||||
a = [0, 0, 9, 9, 0, 0, 9, -1]
|
||||
b = [1, 2, 3, 1, 2, 3, 3, 3]
|
||||
expected_avals = [0, 9]
|
||||
expected_bvals = [0, 1, 2, 3]
|
||||
expected_count = np.array([[0, 1, 2, 1],
|
||||
[0, 1, 0, 2]])
|
||||
(avals, bvals), count = crosstab(a, b, levels=[[0, 9], [0, 1, 2, 3]],
|
||||
sparse=sparse)
|
||||
assert_array_equal(avals, expected_avals)
|
||||
assert_array_equal(bvals, expected_bvals)
|
||||
if sparse:
|
||||
assert_array_equal(count.A, expected_count)
|
||||
else:
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
def test_validation_at_least_one():
|
||||
with pytest.raises(TypeError, match='At least one'):
|
||||
crosstab()
|
||||
|
||||
|
||||
def test_validation_same_lengths():
|
||||
with pytest.raises(ValueError, match='must have the same length'):
|
||||
crosstab([1, 2], [1, 2, 3, 4])
|
||||
|
||||
|
||||
def test_validation_sparse_only_two_args():
|
||||
with pytest.raises(ValueError, match='only two input sequences'):
|
||||
crosstab([0, 1, 1], [8, 8, 9], [1, 3, 3], sparse=True)
|
||||
|
||||
|
||||
def test_validation_len_levels_matches_args():
|
||||
with pytest.raises(ValueError, match='number of input sequences'):
|
||||
crosstab([0, 1, 1], [8, 8, 9], levels=([0, 1, 2, 3],))
|
||||
|
||||
|
||||
def test_result():
|
||||
res = crosstab([0, 1], [1, 2])
|
||||
assert_equal((res.elements, res.count), res)
|
||||
545
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_discrete_basic.py
vendored
Normal file
545
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_discrete_basic.py
vendored
Normal file
@@ -0,0 +1,545 @@
|
||||
import numpy.testing as npt
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from scipy import stats
|
||||
from .common_tests import (check_normalization, check_moment, check_mean_expect,
|
||||
check_var_expect, check_skew_expect,
|
||||
check_kurt_expect, check_entropy,
|
||||
check_private_entropy, check_edge_support,
|
||||
check_named_args, check_random_state_property,
|
||||
check_pickling, check_rvs_broadcast, check_freezing,
|
||||
check_deprecation_warning_gh5982_moment,
|
||||
check_deprecation_warning_gh5982_interval)
|
||||
from scipy.stats._distr_params import distdiscrete, invdistdiscrete
|
||||
from scipy.stats._distn_infrastructure import rv_discrete_frozen
|
||||
|
||||
vals = ([1, 2, 3, 4], [0.1, 0.2, 0.3, 0.4])
|
||||
distdiscrete += [[stats.rv_discrete(values=vals), ()]]
|
||||
|
||||
# For these distributions, test_discrete_basic only runs with test mode full
|
||||
distslow = {'zipfian', 'nhypergeom'}
|
||||
|
||||
|
||||
def cases_test_discrete_basic():
|
||||
seen = set()
|
||||
for distname, arg in distdiscrete:
|
||||
if distname in distslow:
|
||||
yield pytest.param(distname, arg, distname, marks=pytest.mark.slow)
|
||||
else:
|
||||
yield distname, arg, distname not in seen
|
||||
seen.add(distname)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
@pytest.mark.parametrize('distname,arg,first_case', cases_test_discrete_basic())
|
||||
def test_discrete_basic(distname, arg, first_case):
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'sample distribution'
|
||||
np.random.seed(9765456)
|
||||
rvs = distfn.rvs(size=2000, *arg)
|
||||
supp = np.unique(rvs)
|
||||
m, v = distfn.stats(*arg)
|
||||
check_cdf_ppf(distfn, arg, supp, distname + ' cdf_ppf')
|
||||
|
||||
check_pmf_cdf(distfn, arg, distname)
|
||||
check_oth(distfn, arg, supp, distname + ' oth')
|
||||
check_edge_support(distfn, arg)
|
||||
check_deprecation_warning_gh5982_moment(distfn, arg, distname)
|
||||
check_deprecation_warning_gh5982_interval(distfn, arg, distname)
|
||||
|
||||
alpha = 0.01
|
||||
check_discrete_chisquare(distfn, arg, rvs, alpha,
|
||||
distname + ' chisquare')
|
||||
|
||||
if first_case:
|
||||
locscale_defaults = (0,)
|
||||
meths = [distfn.pmf, distfn.logpmf, distfn.cdf, distfn.logcdf,
|
||||
distfn.logsf]
|
||||
# make sure arguments are within support
|
||||
# for some distributions, this needs to be overridden
|
||||
spec_k = {'randint': 11, 'hypergeom': 4, 'bernoulli': 0,
|
||||
'nchypergeom_wallenius': 6}
|
||||
k = spec_k.get(distname, 1)
|
||||
check_named_args(distfn, k, arg, locscale_defaults, meths)
|
||||
if distname != 'sample distribution':
|
||||
check_scale_docstring(distfn)
|
||||
check_random_state_property(distfn, arg)
|
||||
check_pickling(distfn, arg)
|
||||
check_freezing(distfn, arg)
|
||||
|
||||
# Entropy
|
||||
check_entropy(distfn, arg, distname)
|
||||
if distfn.__class__._entropy != stats.rv_discrete._entropy:
|
||||
check_private_entropy(distfn, arg, stats.rv_discrete)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
@pytest.mark.parametrize('distname,arg', distdiscrete)
|
||||
def test_moments(distname, arg):
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'sample distribution'
|
||||
m, v, s, k = distfn.stats(*arg, moments='mvsk')
|
||||
check_normalization(distfn, arg, distname)
|
||||
|
||||
# compare `stats` and `moment` methods
|
||||
check_moment(distfn, arg, m, v, distname)
|
||||
check_mean_expect(distfn, arg, m, distname)
|
||||
check_var_expect(distfn, arg, m, v, distname)
|
||||
check_skew_expect(distfn, arg, m, v, s, distname)
|
||||
if distname not in ['zipf', 'yulesimon']:
|
||||
check_kurt_expect(distfn, arg, m, v, k, distname)
|
||||
|
||||
# frozen distr moments
|
||||
check_moment_frozen(distfn, arg, m, 1)
|
||||
check_moment_frozen(distfn, arg, v+m*m, 2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist,shape_args', distdiscrete)
|
||||
def test_rvs_broadcast(dist, shape_args):
|
||||
# If shape_only is True, it means the _rvs method of the
|
||||
# distribution uses more than one random number to generate a random
|
||||
# variate. That means the result of using rvs with broadcasting or
|
||||
# with a nontrivial size will not necessarily be the same as using the
|
||||
# numpy.vectorize'd version of rvs(), so we can only compare the shapes
|
||||
# of the results, not the values.
|
||||
# Whether or not a distribution is in the following list is an
|
||||
# implementation detail of the distribution, not a requirement. If
|
||||
# the implementation the rvs() method of a distribution changes, this
|
||||
# test might also have to be changed.
|
||||
shape_only = dist in ['betabinom', 'skellam', 'yulesimon', 'dlaplace',
|
||||
'nchypergeom_fisher', 'nchypergeom_wallenius']
|
||||
|
||||
try:
|
||||
distfunc = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfunc = dist
|
||||
dist = 'rv_discrete(values=(%r, %r))' % (dist.xk, dist.pk)
|
||||
loc = np.zeros(2)
|
||||
nargs = distfunc.numargs
|
||||
allargs = []
|
||||
bshape = []
|
||||
# Generate shape parameter arguments...
|
||||
for k in range(nargs):
|
||||
shp = (k + 3,) + (1,)*(k + 1)
|
||||
param_val = shape_args[k]
|
||||
allargs.append(np.full(shp, param_val))
|
||||
bshape.insert(0, shp[0])
|
||||
allargs.append(loc)
|
||||
bshape.append(loc.size)
|
||||
# bshape holds the expected shape when loc, scale, and the shape
|
||||
# parameters are all broadcast together.
|
||||
check_rvs_broadcast(distfunc, dist, allargs, bshape, shape_only, [np.int_])
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist,args', distdiscrete)
|
||||
def test_ppf_with_loc(dist, args):
|
||||
try:
|
||||
distfn = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfn = dist
|
||||
#check with a negative, no and positive relocation.
|
||||
np.random.seed(1942349)
|
||||
re_locs = [np.random.randint(-10, -1), 0, np.random.randint(1, 10)]
|
||||
_a, _b = distfn.support(*args)
|
||||
for loc in re_locs:
|
||||
npt.assert_array_equal(
|
||||
[_a-1+loc, _b+loc],
|
||||
[distfn.ppf(0.0, *args, loc=loc), distfn.ppf(1.0, *args, loc=loc)]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist, args', distdiscrete)
|
||||
def test_isf_with_loc(dist, args):
|
||||
try:
|
||||
distfn = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfn = dist
|
||||
# check with a negative, no and positive relocation.
|
||||
np.random.seed(1942349)
|
||||
re_locs = [np.random.randint(-10, -1), 0, np.random.randint(1, 10)]
|
||||
_a, _b = distfn.support(*args)
|
||||
for loc in re_locs:
|
||||
expected = _b + loc, _a - 1 + loc
|
||||
res = distfn.isf(0., *args, loc=loc), distfn.isf(1., *args, loc=loc)
|
||||
npt.assert_array_equal(expected, res)
|
||||
# test broadcasting behaviour
|
||||
re_locs = [np.random.randint(-10, -1, size=(5, 3)),
|
||||
np.zeros((5, 3)),
|
||||
np.random.randint(1, 10, size=(5, 3))]
|
||||
_a, _b = distfn.support(*args)
|
||||
for loc in re_locs:
|
||||
expected = _b + loc, _a - 1 + loc
|
||||
res = distfn.isf(0., *args, loc=loc), distfn.isf(1., *args, loc=loc)
|
||||
npt.assert_array_equal(expected, res)
|
||||
|
||||
|
||||
def check_cdf_ppf(distfn, arg, supp, msg):
|
||||
# supp is assumed to be an array of integers in the support of distfn
|
||||
# (but not necessarily all the integers in the support).
|
||||
# This test assumes that the PMF of any value in the support of the
|
||||
# distribution is greater than 1e-8.
|
||||
|
||||
# cdf is a step function, and ppf(q) = min{k : cdf(k) >= q, k integer}
|
||||
cdf_supp = distfn.cdf(supp, *arg)
|
||||
# In very rare cases, the finite precision calculation of ppf(cdf(supp))
|
||||
# can produce an array in which an element is off by one. We nudge the
|
||||
# CDF values down by 10 ULPs help to avoid this.
|
||||
cdf_supp0 = cdf_supp - 10*np.spacing(cdf_supp)
|
||||
npt.assert_array_equal(distfn.ppf(cdf_supp0, *arg),
|
||||
supp, msg + '-roundtrip')
|
||||
# Repeat the same calculation, but with the CDF values decreased by 1e-8.
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp, *arg) - 1e-8, *arg),
|
||||
supp, msg + '-roundtrip')
|
||||
|
||||
if not hasattr(distfn, 'xk'):
|
||||
_a, _b = distfn.support(*arg)
|
||||
supp1 = supp[supp < _b]
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp1, *arg) + 1e-8, *arg),
|
||||
supp1 + distfn.inc, msg + ' ppf-cdf-next')
|
||||
|
||||
|
||||
def check_pmf_cdf(distfn, arg, distname):
|
||||
if hasattr(distfn, 'xk'):
|
||||
index = distfn.xk
|
||||
else:
|
||||
startind = int(distfn.ppf(0.01, *arg) - 1)
|
||||
index = list(range(startind, startind + 10))
|
||||
cdfs = distfn.cdf(index, *arg)
|
||||
pmfs_cum = distfn.pmf(index, *arg).cumsum()
|
||||
|
||||
atol, rtol = 1e-10, 1e-10
|
||||
if distname == 'skellam': # ncx2 accuracy
|
||||
atol, rtol = 1e-5, 1e-5
|
||||
npt.assert_allclose(cdfs - cdfs[0], pmfs_cum - pmfs_cum[0],
|
||||
atol=atol, rtol=rtol)
|
||||
|
||||
# also check that pmf at non-integral k is zero
|
||||
k = np.asarray(index)
|
||||
k_shifted = k[:-1] + np.diff(k)/2
|
||||
npt.assert_equal(distfn.pmf(k_shifted, *arg), 0)
|
||||
|
||||
# better check frozen distributions, and also when loc != 0
|
||||
loc = 0.5
|
||||
dist = distfn(loc=loc, *arg)
|
||||
npt.assert_allclose(dist.pmf(k[1:] + loc), np.diff(dist.cdf(k + loc)))
|
||||
npt.assert_equal(dist.pmf(k_shifted + loc), 0)
|
||||
|
||||
|
||||
def check_moment_frozen(distfn, arg, m, k):
|
||||
npt.assert_allclose(distfn(*arg).moment(k), m,
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
|
||||
def check_oth(distfn, arg, supp, msg):
|
||||
# checking other methods of distfn
|
||||
npt.assert_allclose(distfn.sf(supp, *arg), 1. - distfn.cdf(supp, *arg),
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
q = np.linspace(0.01, 0.99, 20)
|
||||
npt.assert_allclose(distfn.isf(q, *arg), distfn.ppf(1. - q, *arg),
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
median_sf = distfn.isf(0.5, *arg)
|
||||
npt.assert_(distfn.sf(median_sf - 1, *arg) > 0.5)
|
||||
npt.assert_(distfn.cdf(median_sf + 1, *arg) > 0.5)
|
||||
|
||||
|
||||
def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
|
||||
"""Perform chisquare test for random sample of a discrete distribution
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distname : string
|
||||
name of distribution function
|
||||
arg : sequence
|
||||
parameters of distribution
|
||||
alpha : float
|
||||
significance level, threshold for p-value
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : bool
|
||||
0 if test passes, 1 if test fails
|
||||
|
||||
"""
|
||||
wsupp = 0.05
|
||||
|
||||
# construct intervals with minimum mass `wsupp`.
|
||||
# intervals are left-half-open as in a cdf difference
|
||||
_a, _b = distfn.support(*arg)
|
||||
lo = int(max(_a, -1000))
|
||||
high = int(min(_b, 1000)) + 1
|
||||
distsupport = range(lo, high)
|
||||
last = 0
|
||||
distsupp = [lo]
|
||||
distmass = []
|
||||
for ii in distsupport:
|
||||
current = distfn.cdf(ii, *arg)
|
||||
if current - last >= wsupp - 1e-14:
|
||||
distsupp.append(ii)
|
||||
distmass.append(current - last)
|
||||
last = current
|
||||
if current > (1 - wsupp):
|
||||
break
|
||||
if distsupp[-1] < _b:
|
||||
distsupp.append(_b)
|
||||
distmass.append(1 - last)
|
||||
distsupp = np.array(distsupp)
|
||||
distmass = np.array(distmass)
|
||||
|
||||
# convert intervals to right-half-open as required by histogram
|
||||
histsupp = distsupp + 1e-8
|
||||
histsupp[0] = _a
|
||||
|
||||
# find sample frequencies and perform chisquare test
|
||||
freq, hsupp = np.histogram(rvs, histsupp)
|
||||
chis, pval = stats.chisquare(np.array(freq), len(rvs)*distmass)
|
||||
|
||||
npt.assert_(pval > alpha,
|
||||
'chisquare - test for %s at arg = %s with pval = %s' %
|
||||
(msg, str(arg), str(pval)))
|
||||
|
||||
|
||||
def check_scale_docstring(distfn):
|
||||
if distfn.__doc__ is not None:
|
||||
# Docstrings can be stripped if interpreter is run with -OO
|
||||
npt.assert_('scale' not in distfn.__doc__)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['pmf', 'logpmf', 'cdf', 'logcdf',
|
||||
'sf', 'logsf', 'ppf', 'isf'])
|
||||
@pytest.mark.parametrize('distname, args', distdiscrete)
|
||||
def test_methods_with_lists(method, distname, args):
|
||||
# Test that the discrete distributions can accept Python lists
|
||||
# as arguments.
|
||||
try:
|
||||
dist = getattr(stats, distname)
|
||||
except TypeError:
|
||||
return
|
||||
if method in ['ppf', 'isf']:
|
||||
z = [0.1, 0.2]
|
||||
else:
|
||||
z = [0, 1]
|
||||
p2 = [[p]*2 for p in args]
|
||||
loc = [0, 1]
|
||||
result = dist.pmf(z, *p2, loc=loc)
|
||||
npt.assert_allclose(result,
|
||||
[dist.pmf(*v) for v in zip(z, *p2, loc)],
|
||||
rtol=1e-15, atol=1e-15)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, args', invdistdiscrete)
|
||||
def test_cdf_gh13280_regression(distname, args):
|
||||
# Test for nan output when shape parameters are invalid
|
||||
dist = getattr(stats, distname)
|
||||
x = np.arange(-2, 15)
|
||||
vals = dist.cdf(x, *args)
|
||||
expected = np.nan
|
||||
npt.assert_equal(vals, expected)
|
||||
|
||||
|
||||
def cases_test_discrete_integer_shapes():
|
||||
# distributions parameters that are only allowed to be integral when
|
||||
# fitting, but are allowed to be real as input to PDF, etc.
|
||||
integrality_exceptions = {'nbinom': {'n'}}
|
||||
|
||||
seen = set()
|
||||
for distname, shapes in distdiscrete:
|
||||
if distname in seen:
|
||||
continue
|
||||
seen.add(distname)
|
||||
|
||||
try:
|
||||
dist = getattr(stats, distname)
|
||||
except TypeError:
|
||||
continue
|
||||
|
||||
shape_info = dist._shape_info()
|
||||
|
||||
for i, shape in enumerate(shape_info):
|
||||
if (shape.name in integrality_exceptions.get(distname, set()) or
|
||||
not shape.integrality):
|
||||
continue
|
||||
|
||||
yield distname, shape.name, shapes
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, shapename, shapes',
|
||||
cases_test_discrete_integer_shapes())
|
||||
def test_integer_shapes(distname, shapename, shapes):
|
||||
dist = getattr(stats, distname)
|
||||
shape_info = dist._shape_info()
|
||||
shape_names = [shape.name for shape in shape_info]
|
||||
i = shape_names.index(shapename) # this element of params must be integral
|
||||
|
||||
shapes_copy = list(shapes)
|
||||
|
||||
valid_shape = shapes[i]
|
||||
invalid_shape = valid_shape - 0.5 # arbitrary non-integral value
|
||||
new_valid_shape = valid_shape - 1
|
||||
shapes_copy[i] = [[valid_shape], [invalid_shape], [new_valid_shape]]
|
||||
|
||||
a, b = dist.support(*shapes)
|
||||
x = np.round(np.linspace(a, b, 5))
|
||||
|
||||
pmf = dist.pmf(x, *shapes_copy)
|
||||
assert not np.any(np.isnan(pmf[0, :]))
|
||||
assert np.all(np.isnan(pmf[1, :]))
|
||||
assert not np.any(np.isnan(pmf[2, :]))
|
||||
|
||||
|
||||
def test_frozen_attributes():
|
||||
# gh-14827 reported that all frozen distributions had both pmf and pdf
|
||||
# attributes; continuous should have pdf and discrete should have pmf.
|
||||
message = "'rv_discrete_frozen' object has no attribute"
|
||||
with pytest.raises(AttributeError, match=message):
|
||||
stats.binom(10, 0.5).pdf
|
||||
with pytest.raises(AttributeError, match=message):
|
||||
stats.binom(10, 0.5).logpdf
|
||||
stats.binom.pdf = "herring"
|
||||
frozen_binom = stats.binom(10, 0.5)
|
||||
assert isinstance(frozen_binom, rv_discrete_frozen)
|
||||
delattr(stats.binom, 'pdf')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, shapes', distdiscrete)
|
||||
def test_interval(distname, shapes):
|
||||
# gh-11026 reported that `interval` returns incorrect values when
|
||||
# `confidence=1`. The values were not incorrect, but it was not intuitive
|
||||
# that the left end of the interval should extend beyond the support of the
|
||||
# distribution. Confirm that this is the behavior for all distributions.
|
||||
if isinstance(distname, str):
|
||||
dist = getattr(stats, distname)
|
||||
else:
|
||||
dist = distname
|
||||
a, b = dist.support(*shapes)
|
||||
npt.assert_equal(dist.ppf([0, 1], *shapes), (a-1, b))
|
||||
npt.assert_equal(dist.isf([1, 0], *shapes), (a-1, b))
|
||||
npt.assert_equal(dist.interval(1, *shapes), (a-1, b))
|
||||
|
||||
|
||||
def test_rv_sample():
|
||||
# Thoroughly test rv_sample and check that gh-3758 is resolved
|
||||
|
||||
# Generate a random discrete distribution
|
||||
rng = np.random.default_rng(98430143469)
|
||||
xk = np.sort(rng.random(10) * 10)
|
||||
pk = rng.random(10)
|
||||
pk /= np.sum(pk)
|
||||
dist = stats.rv_discrete(values=(xk, pk))
|
||||
|
||||
# Generate points to the left and right of xk
|
||||
xk_left = (np.array([0] + xk[:-1].tolist()) + xk)/2
|
||||
xk_right = (np.array(xk[1:].tolist() + [xk[-1]+1]) + xk)/2
|
||||
|
||||
# Generate points to the left and right of cdf
|
||||
cdf2 = np.cumsum(pk)
|
||||
cdf2_left = (np.array([0] + cdf2[:-1].tolist()) + cdf2)/2
|
||||
cdf2_right = (np.array(cdf2[1:].tolist() + [1]) + cdf2)/2
|
||||
|
||||
# support - leftmost and rightmost xk
|
||||
a, b = dist.support()
|
||||
assert_allclose(a, xk[0])
|
||||
assert_allclose(b, xk[-1])
|
||||
|
||||
# pmf - supported only on the xk
|
||||
assert_allclose(dist.pmf(xk), pk)
|
||||
assert_allclose(dist.pmf(xk_right), 0)
|
||||
assert_allclose(dist.pmf(xk_left), 0)
|
||||
|
||||
# logpmf is log of the pmf; log(0) = -np.inf
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(dist.logpmf(xk), np.log(pk))
|
||||
assert_allclose(dist.logpmf(xk_right), -np.inf)
|
||||
assert_allclose(dist.logpmf(xk_left), -np.inf)
|
||||
|
||||
# cdf - the cumulative sum of the pmf
|
||||
assert_allclose(dist.cdf(xk), cdf2)
|
||||
assert_allclose(dist.cdf(xk_right), cdf2)
|
||||
assert_allclose(dist.cdf(xk_left), [0]+cdf2[:-1].tolist())
|
||||
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(dist.logcdf(xk), np.log(dist.cdf(xk)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logcdf(xk_right), np.log(dist.cdf(xk_right)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logcdf(xk_left), np.log(dist.cdf(xk_left)),
|
||||
atol=1e-15)
|
||||
|
||||
# sf is 1-cdf
|
||||
assert_allclose(dist.sf(xk), 1-dist.cdf(xk))
|
||||
assert_allclose(dist.sf(xk_right), 1-dist.cdf(xk_right))
|
||||
assert_allclose(dist.sf(xk_left), 1-dist.cdf(xk_left))
|
||||
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(dist.logsf(xk), np.log(dist.sf(xk)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logsf(xk_right), np.log(dist.sf(xk_right)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logsf(xk_left), np.log(dist.sf(xk_left)),
|
||||
atol=1e-15)
|
||||
|
||||
# ppf
|
||||
assert_allclose(dist.ppf(cdf2), xk)
|
||||
assert_allclose(dist.ppf(cdf2_left), xk)
|
||||
assert_allclose(dist.ppf(cdf2_right)[:-1], xk[1:])
|
||||
assert_allclose(dist.ppf(0), a - 1)
|
||||
assert_allclose(dist.ppf(1), b)
|
||||
|
||||
# isf
|
||||
sf2 = dist.sf(xk)
|
||||
assert_allclose(dist.isf(sf2), xk)
|
||||
assert_allclose(dist.isf(1-cdf2_left), dist.ppf(cdf2_left))
|
||||
assert_allclose(dist.isf(1-cdf2_right), dist.ppf(cdf2_right))
|
||||
assert_allclose(dist.isf(0), b)
|
||||
assert_allclose(dist.isf(1), a - 1)
|
||||
|
||||
# interval is (ppf(alpha/2), isf(alpha/2))
|
||||
ps = np.linspace(0.01, 0.99, 10)
|
||||
int2 = dist.ppf(ps/2), dist.isf(ps/2)
|
||||
assert_allclose(dist.interval(1-ps), int2)
|
||||
assert_allclose(dist.interval(0), dist.median())
|
||||
assert_allclose(dist.interval(1), (a-1, b))
|
||||
|
||||
# median is simply ppf(0.5)
|
||||
med2 = dist.ppf(0.5)
|
||||
assert_allclose(dist.median(), med2)
|
||||
|
||||
# all four stats (mean, var, skew, and kurtosis) from the definitions
|
||||
mean2 = np.sum(xk*pk)
|
||||
var2 = np.sum((xk - mean2)**2 * pk)
|
||||
skew2 = np.sum((xk - mean2)**3 * pk) / var2**(3/2)
|
||||
kurt2 = np.sum((xk - mean2)**4 * pk) / var2**2 - 3
|
||||
assert_allclose(dist.mean(), mean2)
|
||||
assert_allclose(dist.std(), np.sqrt(var2))
|
||||
assert_allclose(dist.var(), var2)
|
||||
assert_allclose(dist.stats(moments='mvsk'), (mean2, var2, skew2, kurt2))
|
||||
|
||||
# noncentral moment against definition
|
||||
mom3 = np.sum((xk**3) * pk)
|
||||
assert_allclose(dist.moment(3), mom3)
|
||||
|
||||
# expect - check against moments
|
||||
assert_allclose(dist.expect(lambda x: 1), 1)
|
||||
assert_allclose(dist.expect(), mean2)
|
||||
assert_allclose(dist.expect(lambda x: x**3), mom3)
|
||||
|
||||
# entropy is the negative of the expected value of log(p)
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(-dist.expect(lambda x: dist.logpmf(x)), dist.entropy())
|
||||
|
||||
# RVS is just ppf of uniform random variates
|
||||
rng = np.random.default_rng(98430143469)
|
||||
rvs = dist.rvs(size=100, random_state=rng)
|
||||
rng = np.random.default_rng(98430143469)
|
||||
rvs0 = dist.ppf(rng.random(size=100))
|
||||
assert_allclose(rvs, rvs0)
|
||||
566
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_discrete_distns.py
vendored
Normal file
566
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_discrete_distns.py
vendored
Normal file
@@ -0,0 +1,566 @@
|
||||
import pytest
|
||||
from scipy.stats import (betabinom, hypergeom, nhypergeom, bernoulli,
|
||||
boltzmann, skellam, zipf, zipfian, binom, nbinom,
|
||||
nchypergeom_fisher, nchypergeom_wallenius, randint)
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import (
|
||||
assert_almost_equal, assert_equal, assert_allclose, suppress_warnings
|
||||
)
|
||||
from scipy.special import binom as special_binom
|
||||
from scipy.optimize import root_scalar
|
||||
from scipy.integrate import quad
|
||||
|
||||
|
||||
# The expected values were computed with Wolfram Alpha, using
|
||||
# the expression CDF[HypergeometricDistribution[N, n, M], k].
|
||||
@pytest.mark.parametrize('k, M, n, N, expected, rtol',
|
||||
[(3, 10, 4, 5,
|
||||
0.9761904761904762, 1e-15),
|
||||
(107, 10000, 3000, 215,
|
||||
0.9999999997226765, 1e-15),
|
||||
(10, 10000, 3000, 215,
|
||||
2.681682217692179e-21, 5e-11)])
|
||||
def test_hypergeom_cdf(k, M, n, N, expected, rtol):
|
||||
p = hypergeom.cdf(k, M, n, N)
|
||||
assert_allclose(p, expected, rtol=rtol)
|
||||
|
||||
|
||||
# The expected values were computed with Wolfram Alpha, using
|
||||
# the expression SurvivalFunction[HypergeometricDistribution[N, n, M], k].
|
||||
@pytest.mark.parametrize('k, M, n, N, expected, rtol',
|
||||
[(25, 10000, 3000, 215,
|
||||
0.9999999999052958, 1e-15),
|
||||
(125, 10000, 3000, 215,
|
||||
1.4416781705752128e-18, 5e-11)])
|
||||
def test_hypergeom_sf(k, M, n, N, expected, rtol):
|
||||
p = hypergeom.sf(k, M, n, N)
|
||||
assert_allclose(p, expected, rtol=rtol)
|
||||
|
||||
|
||||
def test_hypergeom_logpmf():
|
||||
# symmetries test
|
||||
# f(k,N,K,n) = f(n-k,N,N-K,n) = f(K-k,N,K,N-n) = f(k,N,n,K)
|
||||
k = 5
|
||||
N = 50
|
||||
K = 10
|
||||
n = 5
|
||||
logpmf1 = hypergeom.logpmf(k, N, K, n)
|
||||
logpmf2 = hypergeom.logpmf(n - k, N, N - K, n)
|
||||
logpmf3 = hypergeom.logpmf(K - k, N, K, N - n)
|
||||
logpmf4 = hypergeom.logpmf(k, N, n, K)
|
||||
assert_almost_equal(logpmf1, logpmf2, decimal=12)
|
||||
assert_almost_equal(logpmf1, logpmf3, decimal=12)
|
||||
assert_almost_equal(logpmf1, logpmf4, decimal=12)
|
||||
|
||||
# test related distribution
|
||||
# Bernoulli distribution if n = 1
|
||||
k = 1
|
||||
N = 10
|
||||
K = 7
|
||||
n = 1
|
||||
hypergeom_logpmf = hypergeom.logpmf(k, N, K, n)
|
||||
bernoulli_logpmf = bernoulli.logpmf(k, K/N)
|
||||
assert_almost_equal(hypergeom_logpmf, bernoulli_logpmf, decimal=12)
|
||||
|
||||
|
||||
def test_nhypergeom_pmf():
|
||||
# test with hypergeom
|
||||
M, n, r = 45, 13, 8
|
||||
k = 6
|
||||
NHG = nhypergeom.pmf(k, M, n, r)
|
||||
HG = hypergeom.pmf(k, M, n, k+r-1) * (M - n - (r-1)) / (M - (k+r-1))
|
||||
assert_allclose(HG, NHG, rtol=1e-10)
|
||||
|
||||
|
||||
def test_nhypergeom_pmfcdf():
|
||||
# test pmf and cdf with arbitrary values.
|
||||
M = 8
|
||||
n = 3
|
||||
r = 4
|
||||
support = np.arange(n+1)
|
||||
pmf = nhypergeom.pmf(support, M, n, r)
|
||||
cdf = nhypergeom.cdf(support, M, n, r)
|
||||
assert_allclose(pmf, [1/14, 3/14, 5/14, 5/14], rtol=1e-13)
|
||||
assert_allclose(cdf, [1/14, 4/14, 9/14, 1.0], rtol=1e-13)
|
||||
|
||||
|
||||
def test_nhypergeom_r0():
|
||||
# test with `r = 0`.
|
||||
M = 10
|
||||
n = 3
|
||||
r = 0
|
||||
pmf = nhypergeom.pmf([[0, 1, 2, 0], [1, 2, 0, 3]], M, n, r)
|
||||
assert_allclose(pmf, [[1, 0, 0, 1], [0, 0, 1, 0]], rtol=1e-13)
|
||||
|
||||
|
||||
def test_nhypergeom_rvs_shape():
|
||||
# Check that when given a size with more dimensions than the
|
||||
# dimensions of the broadcast parameters, rvs returns an array
|
||||
# with the correct shape.
|
||||
x = nhypergeom.rvs(22, [7, 8, 9], [[12], [13]], size=(5, 1, 2, 3))
|
||||
assert x.shape == (5, 1, 2, 3)
|
||||
|
||||
|
||||
def test_nhypergeom_accuracy():
|
||||
# Check that nhypergeom.rvs post-gh-13431 gives the same values as
|
||||
# inverse transform sampling
|
||||
np.random.seed(0)
|
||||
x = nhypergeom.rvs(22, 7, 11, size=100)
|
||||
np.random.seed(0)
|
||||
p = np.random.uniform(size=100)
|
||||
y = nhypergeom.ppf(p, 22, 7, 11)
|
||||
assert_equal(x, y)
|
||||
|
||||
|
||||
def test_boltzmann_upper_bound():
|
||||
k = np.arange(-3, 5)
|
||||
|
||||
N = 1
|
||||
p = boltzmann.pmf(k, 0.123, N)
|
||||
expected = k == 0
|
||||
assert_equal(p, expected)
|
||||
|
||||
lam = np.log(2)
|
||||
N = 3
|
||||
p = boltzmann.pmf(k, lam, N)
|
||||
expected = [0, 0, 0, 4/7, 2/7, 1/7, 0, 0]
|
||||
assert_allclose(p, expected, rtol=1e-13)
|
||||
|
||||
c = boltzmann.cdf(k, lam, N)
|
||||
expected = [0, 0, 0, 4/7, 6/7, 1, 1, 1]
|
||||
assert_allclose(c, expected, rtol=1e-13)
|
||||
|
||||
|
||||
def test_betabinom_a_and_b_unity():
|
||||
# test limiting case that betabinom(n, 1, 1) is a discrete uniform
|
||||
# distribution from 0 to n
|
||||
n = 20
|
||||
k = np.arange(n + 1)
|
||||
p = betabinom(n, 1, 1).pmf(k)
|
||||
expected = np.repeat(1 / (n + 1), n + 1)
|
||||
assert_almost_equal(p, expected)
|
||||
|
||||
|
||||
def test_betabinom_bernoulli():
|
||||
# test limiting case that betabinom(1, a, b) = bernoulli(a / (a + b))
|
||||
a = 2.3
|
||||
b = 0.63
|
||||
k = np.arange(2)
|
||||
p = betabinom(1, a, b).pmf(k)
|
||||
expected = bernoulli(a / (a + b)).pmf(k)
|
||||
assert_almost_equal(p, expected)
|
||||
|
||||
|
||||
def test_issue_10317():
|
||||
alpha, n, p = 0.9, 10, 1
|
||||
assert_equal(nbinom.interval(confidence=alpha, n=n, p=p), (0, 0))
|
||||
|
||||
|
||||
def test_issue_11134():
|
||||
alpha, n, p = 0.95, 10, 0
|
||||
assert_equal(binom.interval(confidence=alpha, n=n, p=p), (0, 0))
|
||||
|
||||
|
||||
def test_issue_7406():
|
||||
np.random.seed(0)
|
||||
assert_equal(binom.ppf(np.random.rand(10), 0, 0.5), 0)
|
||||
|
||||
# Also check that endpoints (q=0, q=1) are correct
|
||||
assert_equal(binom.ppf(0, 0, 0.5), -1)
|
||||
assert_equal(binom.ppf(1, 0, 0.5), 0)
|
||||
|
||||
|
||||
def test_issue_5122():
|
||||
p = 0
|
||||
n = np.random.randint(100, size=10)
|
||||
|
||||
x = 0
|
||||
ppf = binom.ppf(x, n, p)
|
||||
assert_equal(ppf, -1)
|
||||
|
||||
x = np.linspace(0.01, 0.99, 10)
|
||||
ppf = binom.ppf(x, n, p)
|
||||
assert_equal(ppf, 0)
|
||||
|
||||
x = 1
|
||||
ppf = binom.ppf(x, n, p)
|
||||
assert_equal(ppf, n)
|
||||
|
||||
|
||||
def test_issue_1603():
|
||||
assert_equal(binom(1000, np.logspace(-3, -100)).ppf(0.01), 0)
|
||||
|
||||
|
||||
def test_issue_5503():
|
||||
p = 0.5
|
||||
x = np.logspace(3, 14, 12)
|
||||
assert_allclose(binom.cdf(x, 2*x, p), 0.5, atol=1e-2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('x, n, p, cdf_desired', [
|
||||
(300, 1000, 3/10, 0.51559351981411995636),
|
||||
(3000, 10000, 3/10, 0.50493298381929698016),
|
||||
(30000, 100000, 3/10, 0.50156000591726422864),
|
||||
(300000, 1000000, 3/10, 0.50049331906666960038),
|
||||
(3000000, 10000000, 3/10, 0.50015600124585261196),
|
||||
(30000000, 100000000, 3/10, 0.50004933192735230102),
|
||||
(30010000, 100000000, 3/10, 0.98545384016570790717),
|
||||
(29990000, 100000000, 3/10, 0.01455017177985268670),
|
||||
(29950000, 100000000, 3/10, 5.02250963487432024943e-28),
|
||||
])
|
||||
def test_issue_5503pt2(x, n, p, cdf_desired):
|
||||
assert_allclose(binom.cdf(x, n, p), cdf_desired)
|
||||
|
||||
|
||||
def test_issue_5503pt3():
|
||||
# From Wolfram Alpha: CDF[BinomialDistribution[1e12, 1e-12], 2]
|
||||
assert_allclose(binom.cdf(2, 10**12, 10**-12), 0.91969860292869777384)
|
||||
|
||||
|
||||
def test_issue_6682():
|
||||
# Reference value from R:
|
||||
# options(digits=16)
|
||||
# print(pnbinom(250, 50, 32/63, lower.tail=FALSE))
|
||||
assert_allclose(nbinom.sf(250, 50, 32./63.), 1.460458510976452e-35)
|
||||
|
||||
|
||||
def test_boost_divide_by_zero_issue_15101():
|
||||
n = 1000
|
||||
p = 0.01
|
||||
k = 996
|
||||
assert_allclose(binom.pmf(k, n, p), 0.0)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
def test_skellam_gh11474():
|
||||
# test issue reported in gh-11474 caused by `cdfchn`
|
||||
mu = [1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000]
|
||||
cdf = skellam.cdf(0, mu, mu)
|
||||
# generated in R
|
||||
# library(skellam)
|
||||
# options(digits = 16)
|
||||
# mu = c(1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000)
|
||||
# pskellam(0, mu, mu, TRUE)
|
||||
cdf_expected = [0.6542541612768356, 0.5448901559424127, 0.5141135799745580,
|
||||
0.5044605891382528, 0.5019947363350450, 0.5019848365953181,
|
||||
0.5019750827993392, 0.5019466621805060, 0.5018209330219539]
|
||||
assert_allclose(cdf, cdf_expected)
|
||||
|
||||
|
||||
class TestZipfian:
|
||||
def test_zipfian_asymptotic(self):
|
||||
# test limiting case that zipfian(a, n) -> zipf(a) as n-> oo
|
||||
a = 6.5
|
||||
N = 10000000
|
||||
k = np.arange(1, 21)
|
||||
assert_allclose(zipfian.pmf(k, a, N), zipf.pmf(k, a))
|
||||
assert_allclose(zipfian.cdf(k, a, N), zipf.cdf(k, a))
|
||||
assert_allclose(zipfian.sf(k, a, N), zipf.sf(k, a))
|
||||
assert_allclose(zipfian.stats(a, N, moments='msvk'),
|
||||
zipf.stats(a, moments='msvk'))
|
||||
|
||||
def test_zipfian_continuity(self):
|
||||
# test that zipfian(0.999999, n) ~ zipfian(1.000001, n)
|
||||
# (a = 1 switches between methods of calculating harmonic sum)
|
||||
alt1, agt1 = 0.99999999, 1.00000001
|
||||
N = 30
|
||||
k = np.arange(1, N + 1)
|
||||
assert_allclose(zipfian.pmf(k, alt1, N), zipfian.pmf(k, agt1, N),
|
||||
rtol=5e-7)
|
||||
assert_allclose(zipfian.cdf(k, alt1, N), zipfian.cdf(k, agt1, N),
|
||||
rtol=5e-7)
|
||||
assert_allclose(zipfian.sf(k, alt1, N), zipfian.sf(k, agt1, N),
|
||||
rtol=5e-7)
|
||||
assert_allclose(zipfian.stats(alt1, N, moments='msvk'),
|
||||
zipfian.stats(agt1, N, moments='msvk'), rtol=5e-7)
|
||||
|
||||
def test_zipfian_R(self):
|
||||
# test against R VGAM package
|
||||
# library(VGAM)
|
||||
# k <- c(13, 16, 1, 4, 4, 8, 10, 19, 5, 7)
|
||||
# a <- c(1.56712977, 3.72656295, 5.77665117, 9.12168729, 5.79977172,
|
||||
# 4.92784796, 9.36078764, 4.3739616 , 7.48171872, 4.6824154)
|
||||
# n <- c(70, 80, 48, 65, 83, 89, 50, 30, 20, 20)
|
||||
# pmf <- dzipf(k, N = n, shape = a)
|
||||
# cdf <- pzipf(k, N = n, shape = a)
|
||||
# print(pmf)
|
||||
# print(cdf)
|
||||
np.random.seed(0)
|
||||
k = np.random.randint(1, 20, size=10)
|
||||
a = np.random.rand(10)*10 + 1
|
||||
n = np.random.randint(1, 100, size=10)
|
||||
pmf = [8.076972e-03, 2.950214e-05, 9.799333e-01, 3.216601e-06,
|
||||
3.158895e-04, 3.412497e-05, 4.350472e-10, 2.405773e-06,
|
||||
5.860662e-06, 1.053948e-04]
|
||||
cdf = [0.8964133, 0.9998666, 0.9799333, 0.9999995, 0.9998584,
|
||||
0.9999458, 1.0000000, 0.9999920, 0.9999977, 0.9998498]
|
||||
# skip the first point; zipUC is not accurate for low a, n
|
||||
assert_allclose(zipfian.pmf(k, a, n)[1:], pmf[1:], rtol=1e-6)
|
||||
assert_allclose(zipfian.cdf(k, a, n)[1:], cdf[1:], rtol=5e-5)
|
||||
|
||||
np.random.seed(0)
|
||||
naive_tests = np.vstack((np.logspace(-2, 1, 10),
|
||||
np.random.randint(2, 40, 10))).T
|
||||
|
||||
@pytest.mark.parametrize("a, n", naive_tests)
|
||||
def test_zipfian_naive(self, a, n):
|
||||
# test against bare-bones implementation
|
||||
|
||||
@np.vectorize
|
||||
def Hns(n, s):
|
||||
"""Naive implementation of harmonic sum"""
|
||||
return (1/np.arange(1, n+1)**s).sum()
|
||||
|
||||
@np.vectorize
|
||||
def pzip(k, a, n):
|
||||
"""Naive implementation of zipfian pmf"""
|
||||
if k < 1 or k > n:
|
||||
return 0.
|
||||
else:
|
||||
return 1 / k**a / Hns(n, a)
|
||||
|
||||
k = np.arange(n+1)
|
||||
pmf = pzip(k, a, n)
|
||||
cdf = np.cumsum(pmf)
|
||||
mean = np.average(k, weights=pmf)
|
||||
var = np.average((k - mean)**2, weights=pmf)
|
||||
std = var**0.5
|
||||
skew = np.average(((k-mean)/std)**3, weights=pmf)
|
||||
kurtosis = np.average(((k-mean)/std)**4, weights=pmf) - 3
|
||||
assert_allclose(zipfian.pmf(k, a, n), pmf)
|
||||
assert_allclose(zipfian.cdf(k, a, n), cdf)
|
||||
assert_allclose(zipfian.stats(a, n, moments="mvsk"),
|
||||
[mean, var, skew, kurtosis])
|
||||
|
||||
|
||||
class TestNCH():
|
||||
np.random.seed(2) # seeds 0 and 1 had some xl = xu; randint failed
|
||||
shape = (2, 4, 3)
|
||||
max_m = 100
|
||||
m1 = np.random.randint(1, max_m, size=shape) # red balls
|
||||
m2 = np.random.randint(1, max_m, size=shape) # white balls
|
||||
N = m1 + m2 # total balls
|
||||
n = randint.rvs(0, N, size=N.shape) # number of draws
|
||||
xl = np.maximum(0, n-m2) # lower bound of support
|
||||
xu = np.minimum(n, m1) # upper bound of support
|
||||
x = randint.rvs(xl, xu, size=xl.shape)
|
||||
odds = np.random.rand(*x.shape)*2
|
||||
|
||||
# test output is more readable when function names (strings) are passed
|
||||
@pytest.mark.parametrize('dist_name',
|
||||
['nchypergeom_fisher', 'nchypergeom_wallenius'])
|
||||
def test_nch_hypergeom(self, dist_name):
|
||||
# Both noncentral hypergeometric distributions reduce to the
|
||||
# hypergeometric distribution when odds = 1
|
||||
dists = {'nchypergeom_fisher': nchypergeom_fisher,
|
||||
'nchypergeom_wallenius': nchypergeom_wallenius}
|
||||
dist = dists[dist_name]
|
||||
x, N, m1, n = self.x, self.N, self.m1, self.n
|
||||
assert_allclose(dist.pmf(x, N, m1, n, odds=1),
|
||||
hypergeom.pmf(x, N, m1, n))
|
||||
|
||||
def test_nchypergeom_fisher_naive(self):
|
||||
# test against a very simple implementation
|
||||
x, N, m1, n, odds = self.x, self.N, self.m1, self.n, self.odds
|
||||
|
||||
@np.vectorize
|
||||
def pmf_mean_var(x, N, m1, n, w):
|
||||
# simple implementation of nchypergeom_fisher pmf
|
||||
m2 = N - m1
|
||||
xl = np.maximum(0, n-m2)
|
||||
xu = np.minimum(n, m1)
|
||||
|
||||
def f(x):
|
||||
t1 = special_binom(m1, x)
|
||||
t2 = special_binom(m2, n - x)
|
||||
return t1 * t2 * w**x
|
||||
|
||||
def P(k):
|
||||
return sum((f(y)*y**k for y in range(xl, xu + 1)))
|
||||
|
||||
P0 = P(0)
|
||||
P1 = P(1)
|
||||
P2 = P(2)
|
||||
pmf = f(x) / P0
|
||||
mean = P1 / P0
|
||||
var = P2 / P0 - (P1 / P0)**2
|
||||
return pmf, mean, var
|
||||
|
||||
pmf, mean, var = pmf_mean_var(x, N, m1, n, odds)
|
||||
assert_allclose(nchypergeom_fisher.pmf(x, N, m1, n, odds), pmf)
|
||||
assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='m'),
|
||||
mean)
|
||||
assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='v'),
|
||||
var)
|
||||
|
||||
def test_nchypergeom_wallenius_naive(self):
|
||||
# test against a very simple implementation
|
||||
|
||||
np.random.seed(2)
|
||||
shape = (2, 4, 3)
|
||||
max_m = 100
|
||||
m1 = np.random.randint(1, max_m, size=shape)
|
||||
m2 = np.random.randint(1, max_m, size=shape)
|
||||
N = m1 + m2
|
||||
n = randint.rvs(0, N, size=N.shape)
|
||||
xl = np.maximum(0, n-m2)
|
||||
xu = np.minimum(n, m1)
|
||||
x = randint.rvs(xl, xu, size=xl.shape)
|
||||
w = np.random.rand(*x.shape)*2
|
||||
|
||||
def support(N, m1, n, w):
|
||||
m2 = N - m1
|
||||
xl = np.maximum(0, n-m2)
|
||||
xu = np.minimum(n, m1)
|
||||
return xl, xu
|
||||
|
||||
@np.vectorize
|
||||
def mean(N, m1, n, w):
|
||||
m2 = N - m1
|
||||
xl, xu = support(N, m1, n, w)
|
||||
|
||||
def fun(u):
|
||||
return u/m1 + (1 - (n-u)/m2)**w - 1
|
||||
|
||||
return root_scalar(fun, bracket=(xl, xu)).root
|
||||
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(RuntimeWarning,
|
||||
message="invalid value encountered in mean")
|
||||
assert_allclose(nchypergeom_wallenius.mean(N, m1, n, w),
|
||||
mean(N, m1, n, w), rtol=2e-2)
|
||||
|
||||
@np.vectorize
|
||||
def variance(N, m1, n, w):
|
||||
m2 = N - m1
|
||||
u = mean(N, m1, n, w)
|
||||
a = u * (m1 - u)
|
||||
b = (n-u)*(u + m2 - n)
|
||||
return N*a*b / ((N-1) * (m1*b + m2*a))
|
||||
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(RuntimeWarning,
|
||||
message="invalid value encountered in mean")
|
||||
assert_allclose(
|
||||
nchypergeom_wallenius.stats(N, m1, n, w, moments='v'),
|
||||
variance(N, m1, n, w),
|
||||
rtol=5e-2
|
||||
)
|
||||
|
||||
@np.vectorize
|
||||
def pmf(x, N, m1, n, w):
|
||||
m2 = N - m1
|
||||
xl, xu = support(N, m1, n, w)
|
||||
|
||||
def integrand(t):
|
||||
D = w*(m1 - x) + (m2 - (n-x))
|
||||
res = (1-t**(w/D))**x * (1-t**(1/D))**(n-x)
|
||||
return res
|
||||
|
||||
def f(x):
|
||||
t1 = special_binom(m1, x)
|
||||
t2 = special_binom(m2, n - x)
|
||||
the_integral = quad(integrand, 0, 1,
|
||||
epsrel=1e-16, epsabs=1e-16)
|
||||
return t1 * t2 * the_integral[0]
|
||||
|
||||
return f(x)
|
||||
|
||||
pmf0 = pmf(x, N, m1, n, w)
|
||||
pmf1 = nchypergeom_wallenius.pmf(x, N, m1, n, w)
|
||||
|
||||
atol, rtol = 1e-6, 1e-6
|
||||
i = np.abs(pmf1 - pmf0) < atol + rtol*np.abs(pmf0)
|
||||
assert i.sum() > np.prod(shape) / 2 # works at least half the time
|
||||
|
||||
# for those that fail, discredit the naive implementation
|
||||
for N, m1, n, w in zip(N[~i], m1[~i], n[~i], w[~i]):
|
||||
# get the support
|
||||
m2 = N - m1
|
||||
xl, xu = support(N, m1, n, w)
|
||||
x = np.arange(xl, xu + 1)
|
||||
|
||||
# calculate sum of pmf over the support
|
||||
# the naive implementation is very wrong in these cases
|
||||
assert pmf(x, N, m1, n, w).sum() < .5
|
||||
assert_allclose(nchypergeom_wallenius.pmf(x, N, m1, n, w).sum(), 1)
|
||||
|
||||
def test_wallenius_against_mpmath(self):
|
||||
# precompute data with mpmath since naive implementation above
|
||||
# is not reliable. See source code in gh-13330.
|
||||
M = 50
|
||||
n = 30
|
||||
N = 20
|
||||
odds = 2.25
|
||||
# Expected results, computed with mpmath.
|
||||
sup = np.arange(21)
|
||||
pmf = np.array([3.699003068656875e-20,
|
||||
5.89398584245431e-17,
|
||||
2.1594437742911123e-14,
|
||||
3.221458044649955e-12,
|
||||
2.4658279241205077e-10,
|
||||
1.0965862603981212e-08,
|
||||
3.057890479665704e-07,
|
||||
5.622818831643761e-06,
|
||||
7.056482841531681e-05,
|
||||
0.000618899425358671,
|
||||
0.003854172932571669,
|
||||
0.01720592676256026,
|
||||
0.05528844897093792,
|
||||
0.12772363313574242,
|
||||
0.21065898367825722,
|
||||
0.24465958845359234,
|
||||
0.1955114898110033,
|
||||
0.10355390084949237,
|
||||
0.03414490375225675,
|
||||
0.006231989845775931,
|
||||
0.0004715577304677075])
|
||||
mean = 14.808018384813426
|
||||
var = 2.6085975877923717
|
||||
|
||||
# nchypergeom_wallenius.pmf returns 0 for pmf(0) and pmf(1), and pmf(2)
|
||||
# has only three digits of accuracy (~ 2.1511e-14).
|
||||
assert_allclose(nchypergeom_wallenius.pmf(sup, M, n, N, odds), pmf,
|
||||
rtol=1e-13, atol=1e-13)
|
||||
assert_allclose(nchypergeom_wallenius.mean(M, n, N, odds),
|
||||
mean, rtol=1e-13)
|
||||
assert_allclose(nchypergeom_wallenius.var(M, n, N, odds),
|
||||
var, rtol=1e-11)
|
||||
|
||||
@pytest.mark.parametrize('dist_name',
|
||||
['nchypergeom_fisher', 'nchypergeom_wallenius'])
|
||||
def test_rvs_shape(self, dist_name):
|
||||
# Check that when given a size with more dimensions than the
|
||||
# dimensions of the broadcast parameters, rvs returns an array
|
||||
# with the correct shape.
|
||||
dists = {'nchypergeom_fisher': nchypergeom_fisher,
|
||||
'nchypergeom_wallenius': nchypergeom_wallenius}
|
||||
dist = dists[dist_name]
|
||||
x = dist.rvs(50, 30, [[10], [20]], [0.5, 1.0, 2.0], size=(5, 1, 2, 3))
|
||||
assert x.shape == (5, 1, 2, 3)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mu, q, expected",
|
||||
[[10, 120, -1.240089881791596e-38],
|
||||
[1500, 0, -86.61466680572661]])
|
||||
def test_nbinom_11465(mu, q, expected):
|
||||
# test nbinom.logcdf at extreme tails
|
||||
size = 20
|
||||
n, p = size, size/(size+mu)
|
||||
# In R:
|
||||
# options(digits=16)
|
||||
# pnbinom(mu=10, size=20, q=120, log.p=TRUE)
|
||||
assert_allclose(nbinom.logcdf(q, n, p), expected)
|
||||
|
||||
|
||||
def test_gh_17146():
|
||||
# Check that discrete distributions return PMF of zero at non-integral x.
|
||||
# See gh-17146.
|
||||
x = np.linspace(0, 1, 11)
|
||||
p = 0.8
|
||||
pmf = bernoulli(p).pmf(x)
|
||||
i = (x % 1 == 0)
|
||||
assert_allclose(pmf[-1], p)
|
||||
assert_allclose(pmf[0], 1-p)
|
||||
assert_equal(pmf[~i], 0)
|
||||
7625
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_distributions.py
vendored
Normal file
7625
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_distributions.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
287
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_entropy.py
vendored
Normal file
287
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_entropy.py
vendored
Normal file
@@ -0,0 +1,287 @@
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_allclose
|
||||
# avoid new uses of the following; prefer assert/np.testing.assert_allclose
|
||||
from numpy.testing import (assert_, assert_almost_equal,
|
||||
assert_array_almost_equal)
|
||||
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
import scipy.stats as stats
|
||||
|
||||
|
||||
class TestEntropy:
|
||||
def test_entropy_positive(self):
|
||||
# See ticket #497
|
||||
pk = [0.5, 0.2, 0.3]
|
||||
qk = [0.1, 0.25, 0.65]
|
||||
eself = stats.entropy(pk, pk)
|
||||
edouble = stats.entropy(pk, qk)
|
||||
assert_(0.0 == eself)
|
||||
assert_(edouble >= 0.0)
|
||||
|
||||
def test_entropy_base(self):
|
||||
pk = np.ones(16, float)
|
||||
S = stats.entropy(pk, base=2.)
|
||||
assert_(abs(S - 4.) < 1.e-5)
|
||||
|
||||
qk = np.ones(16, float)
|
||||
qk[:8] = 2.
|
||||
S = stats.entropy(pk, qk)
|
||||
S2 = stats.entropy(pk, qk, base=2.)
|
||||
assert_(abs(S/S2 - np.log(2.)) < 1.e-5)
|
||||
|
||||
def test_entropy_zero(self):
|
||||
# Test for PR-479
|
||||
assert_almost_equal(stats.entropy([0, 1, 2]), 0.63651416829481278,
|
||||
decimal=12)
|
||||
|
||||
def test_entropy_2d(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]
|
||||
assert_array_almost_equal(stats.entropy(pk, qk),
|
||||
[0.1933259, 0.18609809])
|
||||
|
||||
def test_entropy_2d_zero(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
qk = [[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]]
|
||||
assert_array_almost_equal(stats.entropy(pk, qk),
|
||||
[np.inf, 0.18609809])
|
||||
|
||||
pk[0][0] = 0.0
|
||||
assert_array_almost_equal(stats.entropy(pk, qk),
|
||||
[0.17403988, 0.18609809])
|
||||
|
||||
def test_entropy_base_2d_nondefault_axis(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
assert_array_almost_equal(stats.entropy(pk, axis=1),
|
||||
[0.63651417, 0.63651417, 0.66156324])
|
||||
|
||||
def test_entropy_2d_nondefault_axis(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]
|
||||
assert_array_almost_equal(stats.entropy(pk, qk, axis=1),
|
||||
[0.231049, 0.231049, 0.127706])
|
||||
|
||||
def test_entropy_raises_value_error(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
qk = [[0.1, 0.2], [0.6, 0.3]]
|
||||
assert_raises(ValueError, stats.entropy, pk, qk)
|
||||
|
||||
def test_base_entropy_with_axis_0_is_equal_to_default(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
assert_array_almost_equal(stats.entropy(pk, axis=0),
|
||||
stats.entropy(pk))
|
||||
|
||||
def test_entropy_with_axis_0_is_equal_to_default(self):
|
||||
pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
|
||||
qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]
|
||||
assert_array_almost_equal(stats.entropy(pk, qk, axis=0),
|
||||
stats.entropy(pk, qk))
|
||||
|
||||
def test_base_entropy_transposed(self):
|
||||
pk = np.array([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
assert_array_almost_equal(stats.entropy(pk.T).T,
|
||||
stats.entropy(pk, axis=1))
|
||||
|
||||
def test_entropy_transposed(self):
|
||||
pk = np.array([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = np.array([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
|
||||
assert_array_almost_equal(stats.entropy(pk.T, qk.T).T,
|
||||
stats.entropy(pk, qk, axis=1))
|
||||
|
||||
def test_entropy_broadcasting(self):
|
||||
np.random.rand(0)
|
||||
x = np.random.rand(3)
|
||||
y = np.random.rand(2, 1)
|
||||
res = stats.entropy(x, y, axis=-1)
|
||||
assert_equal(res[0], stats.entropy(x, y[0]))
|
||||
assert_equal(res[1], stats.entropy(x, y[1]))
|
||||
|
||||
def test_entropy_shape_mismatch(self):
|
||||
x = np.random.rand(10, 1, 12)
|
||||
y = np.random.rand(11, 2)
|
||||
message = "shape mismatch: objects cannot be broadcast"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.entropy(x, y)
|
||||
|
||||
def test_input_validation(self):
|
||||
x = np.random.rand(10)
|
||||
message = "`base` must be a positive number."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.entropy(x, base=-2)
|
||||
|
||||
|
||||
class TestDifferentialEntropy:
|
||||
"""
|
||||
Vasicek results are compared with the R package vsgoftest.
|
||||
|
||||
# library(vsgoftest)
|
||||
#
|
||||
# samp <- c(<values>)
|
||||
# entropy.estimate(x = samp, window = <window_length>)
|
||||
|
||||
"""
|
||||
|
||||
def test_differential_entropy_vasicek(self):
|
||||
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal(100)
|
||||
|
||||
entropy = stats.differential_entropy(values, method='vasicek')
|
||||
assert_allclose(entropy, 1.342551, rtol=1e-6)
|
||||
|
||||
entropy = stats.differential_entropy(values, window_length=1,
|
||||
method='vasicek')
|
||||
assert_allclose(entropy, 1.122044, rtol=1e-6)
|
||||
|
||||
entropy = stats.differential_entropy(values, window_length=8,
|
||||
method='vasicek')
|
||||
assert_allclose(entropy, 1.349401, rtol=1e-6)
|
||||
|
||||
def test_differential_entropy_vasicek_2d_nondefault_axis(self):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((3, 100))
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=1, method='vasicek')
|
||||
assert_allclose(
|
||||
entropy,
|
||||
[1.342551, 1.341826, 1.293775],
|
||||
rtol=1e-6,
|
||||
)
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=1, window_length=1,
|
||||
method='vasicek')
|
||||
assert_allclose(
|
||||
entropy,
|
||||
[1.122044, 1.102944, 1.129616],
|
||||
rtol=1e-6,
|
||||
)
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=1, window_length=8,
|
||||
method='vasicek')
|
||||
assert_allclose(
|
||||
entropy,
|
||||
[1.349401, 1.338514, 1.292332],
|
||||
rtol=1e-6,
|
||||
)
|
||||
|
||||
def test_differential_entropy_raises_value_error(self):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((3, 100))
|
||||
|
||||
error_str = (
|
||||
r"Window length \({window_length}\) must be positive and less "
|
||||
r"than half the sample size \({sample_size}\)."
|
||||
)
|
||||
|
||||
sample_size = values.shape[1]
|
||||
|
||||
for window_length in {-1, 0, sample_size//2, sample_size}:
|
||||
|
||||
formatted_error_str = error_str.format(
|
||||
window_length=window_length,
|
||||
sample_size=sample_size,
|
||||
)
|
||||
|
||||
with assert_raises(ValueError, match=formatted_error_str):
|
||||
stats.differential_entropy(
|
||||
values,
|
||||
window_length=window_length,
|
||||
axis=1,
|
||||
)
|
||||
|
||||
def test_base_differential_entropy_with_axis_0_is_equal_to_default(self):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((100, 3))
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=0)
|
||||
default_entropy = stats.differential_entropy(values)
|
||||
assert_allclose(entropy, default_entropy)
|
||||
|
||||
def test_base_differential_entropy_transposed(self):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((3, 100))
|
||||
|
||||
assert_allclose(
|
||||
stats.differential_entropy(values.T).T,
|
||||
stats.differential_entropy(values, axis=1),
|
||||
)
|
||||
|
||||
def test_input_validation(self):
|
||||
x = np.random.rand(10)
|
||||
|
||||
message = "`base` must be a positive number or `None`."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.differential_entropy(x, base=-2)
|
||||
|
||||
message = "`method` must be one of..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.differential_entropy(x, method='ekki-ekki')
|
||||
|
||||
@pytest.mark.parametrize('method', ['vasicek', 'van es',
|
||||
'ebrahimi', 'correa'])
|
||||
def test_consistency(self, method):
|
||||
# test that method is a consistent estimator
|
||||
n = 10000 if method == 'correa' else 1000000
|
||||
rvs = stats.norm.rvs(size=n, random_state=0)
|
||||
expected = stats.norm.entropy()
|
||||
res = stats.differential_entropy(rvs, method=method)
|
||||
assert_allclose(res, expected, rtol=0.005)
|
||||
|
||||
# values from differential_entropy reference [6], table 1, n=50, m=7
|
||||
norm_rmse_std_cases = { # method: (RMSE, STD)
|
||||
'vasicek': (0.198, 0.109),
|
||||
'van es': (0.212, 0.110),
|
||||
'correa': (0.135, 0.112),
|
||||
'ebrahimi': (0.128, 0.109)
|
||||
}
|
||||
|
||||
@pytest.mark.parametrize('method, expected',
|
||||
list(norm_rmse_std_cases.items()))
|
||||
def test_norm_rmse_std(self, method, expected):
|
||||
# test that RMSE and standard deviation of estimators matches values
|
||||
# given in differential_entropy reference [6]. Incidentally, also
|
||||
# tests vectorization.
|
||||
reps, n, m = 10000, 50, 7
|
||||
rmse_expected, std_expected = expected
|
||||
rvs = stats.norm.rvs(size=(reps, n), random_state=0)
|
||||
true_entropy = stats.norm.entropy()
|
||||
res = stats.differential_entropy(rvs, window_length=m,
|
||||
method=method, axis=-1)
|
||||
assert_allclose(np.sqrt(np.mean((res - true_entropy)**2)),
|
||||
rmse_expected, atol=0.005)
|
||||
assert_allclose(np.std(res), std_expected, atol=0.002)
|
||||
|
||||
# values from differential_entropy reference [6], table 2, n=50, m=7
|
||||
expon_rmse_std_cases = { # method: (RMSE, STD)
|
||||
'vasicek': (0.194, 0.148),
|
||||
'van es': (0.179, 0.149),
|
||||
'correa': (0.155, 0.152),
|
||||
'ebrahimi': (0.151, 0.148)
|
||||
}
|
||||
|
||||
@pytest.mark.parametrize('method, expected',
|
||||
list(expon_rmse_std_cases.items()))
|
||||
def test_expon_rmse_std(self, method, expected):
|
||||
# test that RMSE and standard deviation of estimators matches values
|
||||
# given in differential_entropy reference [6]. Incidentally, also
|
||||
# tests vectorization.
|
||||
reps, n, m = 10000, 50, 7
|
||||
rmse_expected, std_expected = expected
|
||||
rvs = stats.expon.rvs(size=(reps, n), random_state=0)
|
||||
true_entropy = stats.expon.entropy()
|
||||
res = stats.differential_entropy(rvs, window_length=m,
|
||||
method=method, axis=-1)
|
||||
assert_allclose(np.sqrt(np.mean((res - true_entropy)**2)),
|
||||
rmse_expected, atol=0.005)
|
||||
assert_allclose(np.std(res), std_expected, atol=0.002)
|
||||
|
||||
@pytest.mark.parametrize('n, method', [(8, 'van es'),
|
||||
(12, 'ebrahimi'),
|
||||
(1001, 'vasicek')])
|
||||
def test_method_auto(self, n, method):
|
||||
rvs = stats.norm.rvs(size=(n,), random_state=0)
|
||||
res1 = stats.differential_entropy(rvs)
|
||||
res2 = stats.differential_entropy(rvs, method=method)
|
||||
assert res1 == res2
|
||||
850
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_fit.py
vendored
Normal file
850
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_fit.py
vendored
Normal file
@@ -0,0 +1,850 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
import pytest
|
||||
from scipy import stats
|
||||
from scipy.optimize import differential_evolution
|
||||
|
||||
from .test_continuous_basic import distcont
|
||||
from scipy.stats._distn_infrastructure import FitError
|
||||
from scipy.stats._distr_params import distdiscrete
|
||||
from scipy.stats import goodness_of_fit
|
||||
|
||||
|
||||
# this is not a proper statistical test for convergence, but only
|
||||
# verifies that the estimate and true values don't differ by too much
|
||||
|
||||
fit_sizes = [1000, 5000, 10000] # sample sizes to try
|
||||
|
||||
thresh_percent = 0.25 # percent of true parameters for fail cut-off
|
||||
thresh_min = 0.75 # minimum difference estimate - true to fail test
|
||||
|
||||
mle_failing_fits = [
|
||||
'burr',
|
||||
'chi2',
|
||||
'gausshyper',
|
||||
'genexpon',
|
||||
'gengamma',
|
||||
'kappa4',
|
||||
'ksone',
|
||||
'kstwo',
|
||||
'mielke',
|
||||
'ncf',
|
||||
'ncx2',
|
||||
'pearson3',
|
||||
'powerlognorm',
|
||||
'truncexpon',
|
||||
'truncpareto',
|
||||
'tukeylambda',
|
||||
'vonmises',
|
||||
'levy_stable',
|
||||
'trapezoid',
|
||||
'truncweibull_min',
|
||||
'studentized_range',
|
||||
]
|
||||
|
||||
mm_failing_fits = ['alpha', 'betaprime', 'burr', 'burr12', 'cauchy', 'chi',
|
||||
'chi2', 'crystalball', 'dgamma', 'dweibull', 'f',
|
||||
'fatiguelife', 'fisk', 'foldcauchy', 'genextreme',
|
||||
'gengamma', 'genhyperbolic', 'gennorm', 'genpareto',
|
||||
'halfcauchy', 'invgamma', 'invweibull', 'johnsonsu',
|
||||
'kappa3', 'ksone', 'kstwo', 'levy', 'levy_l',
|
||||
'levy_stable', 'loglaplace', 'lomax', 'mielke', 'nakagami',
|
||||
'ncf', 'nct', 'ncx2', 'pareto', 'powerlognorm', 'powernorm',
|
||||
'skewcauchy', 't', 'trapezoid', 'triang', 'truncpareto',
|
||||
'truncweibull_min', 'tukeylambda', 'studentized_range']
|
||||
|
||||
# not sure if these fail, but they caused my patience to fail
|
||||
mm_slow_fits = ['argus', 'exponpow', 'exponweib', 'gausshyper', 'genexpon',
|
||||
'genhalflogistic', 'halfgennorm', 'gompertz', 'johnsonsb',
|
||||
'kappa4', 'kstwobign', 'recipinvgauss',
|
||||
'truncexpon', 'vonmises', 'vonmises_line']
|
||||
|
||||
failing_fits = {"MM": mm_failing_fits + mm_slow_fits, "MLE": mle_failing_fits}
|
||||
|
||||
# Don't run the fit test on these:
|
||||
skip_fit = [
|
||||
'erlang', # Subclass of gamma, generates a warning.
|
||||
'genhyperbolic', # too slow
|
||||
]
|
||||
|
||||
|
||||
def cases_test_cont_fit():
|
||||
# this tests the closeness of the estimated parameters to the true
|
||||
# parameters with fit method of continuous distributions
|
||||
# Note: is slow, some distributions don't converge with sample
|
||||
# size <= 10000
|
||||
for distname, arg in distcont:
|
||||
if distname not in skip_fit:
|
||||
yield distname, arg
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize('distname,arg', cases_test_cont_fit())
|
||||
@pytest.mark.parametrize('method', ["MLE", "MM"])
|
||||
def test_cont_fit(distname, arg, method):
|
||||
if distname in failing_fits[method]:
|
||||
# Skip failing fits unless overridden
|
||||
try:
|
||||
xfail = not int(os.environ['SCIPY_XFAIL'])
|
||||
except Exception:
|
||||
xfail = True
|
||||
if xfail:
|
||||
msg = "Fitting %s doesn't work reliably yet" % distname
|
||||
msg += (" [Set environment variable SCIPY_XFAIL=1 to run this"
|
||||
" test nevertheless.]")
|
||||
pytest.xfail(msg)
|
||||
|
||||
distfn = getattr(stats, distname)
|
||||
|
||||
truearg = np.hstack([arg, [0.0, 1.0]])
|
||||
diffthreshold = np.max(np.vstack([truearg*thresh_percent,
|
||||
np.full(distfn.numargs+2, thresh_min)]),
|
||||
0)
|
||||
|
||||
for fit_size in fit_sizes:
|
||||
# Note that if a fit succeeds, the other fit_sizes are skipped
|
||||
np.random.seed(1234)
|
||||
|
||||
with np.errstate(all='ignore'):
|
||||
rvs = distfn.rvs(size=fit_size, *arg)
|
||||
est = distfn.fit(rvs, method=method) # start with default values
|
||||
|
||||
diff = est - truearg
|
||||
|
||||
# threshold for location
|
||||
diffthreshold[-2] = np.max([np.abs(rvs.mean())*thresh_percent,
|
||||
thresh_min])
|
||||
|
||||
if np.any(np.isnan(est)):
|
||||
raise AssertionError('nan returned in fit')
|
||||
else:
|
||||
if np.all(np.abs(diff) <= diffthreshold):
|
||||
break
|
||||
else:
|
||||
txt = 'parameter: %s\n' % str(truearg)
|
||||
txt += 'estimated: %s\n' % str(est)
|
||||
txt += 'diff : %s\n' % str(diff)
|
||||
raise AssertionError('fit not very good in %s\n' % distfn.name + txt)
|
||||
|
||||
|
||||
def _check_loc_scale_mle_fit(name, data, desired, atol=None):
|
||||
d = getattr(stats, name)
|
||||
actual = d.fit(data)[-2:]
|
||||
assert_allclose(actual, desired, atol=atol,
|
||||
err_msg='poor mle fit of (loc, scale) in %s' % name)
|
||||
|
||||
|
||||
def test_non_default_loc_scale_mle_fit():
|
||||
data = np.array([1.01, 1.78, 1.78, 1.78, 1.88, 1.88, 1.88, 2.00])
|
||||
_check_loc_scale_mle_fit('uniform', data, [1.01, 0.99], 1e-3)
|
||||
_check_loc_scale_mle_fit('expon', data, [1.01, 0.73875], 1e-3)
|
||||
|
||||
|
||||
def test_expon_fit():
|
||||
"""gh-6167"""
|
||||
data = [0, 0, 0, 0, 2, 2, 2, 2]
|
||||
phat = stats.expon.fit(data, floc=0)
|
||||
assert_allclose(phat, [0, 1.0], atol=1e-3)
|
||||
|
||||
|
||||
def test_fit_error():
|
||||
data = np.concatenate([np.zeros(29), np.ones(21)])
|
||||
message = "Optimization converged to parameters that are..."
|
||||
with pytest.raises(FitError, match=message), \
|
||||
pytest.warns(RuntimeWarning):
|
||||
stats.beta.fit(data)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dist, params",
|
||||
[(stats.norm, (0.5, 2.5)), # type: ignore[attr-defined] # noqa
|
||||
(stats.binom, (10, 0.3, 2))]) # type: ignore[attr-defined] # noqa
|
||||
def test_nnlf_and_related_methods(dist, params):
|
||||
rng = np.random.default_rng(983459824)
|
||||
|
||||
if hasattr(dist, 'pdf'):
|
||||
logpxf = dist.logpdf
|
||||
else:
|
||||
logpxf = dist.logpmf
|
||||
|
||||
x = dist.rvs(*params, size=100, random_state=rng)
|
||||
ref = -logpxf(x, *params).sum()
|
||||
res1 = dist.nnlf(params, x)
|
||||
res2 = dist._penalized_nnlf(params, x)
|
||||
assert_allclose(res1, ref)
|
||||
assert_allclose(res2, ref)
|
||||
|
||||
|
||||
def cases_test_fit_mle():
|
||||
# These fail default test or hang
|
||||
skip_basic_fit = {'argus', 'foldnorm', 'truncpareto', 'truncweibull_min',
|
||||
'ksone', 'levy_stable', 'studentized_range', 'kstwo'}
|
||||
slow_basic_fit = {'burr12', 'johnsonsb', 'bradford', 'fisk', 'mielke',
|
||||
'exponpow', 'rdist', 'norminvgauss', 'betaprime',
|
||||
'powerlaw', 'pareto', 'johnsonsu', 'loglaplace',
|
||||
'wrapcauchy', 'weibull_max', 'arcsine', 'binom', 'rice',
|
||||
'uniform', 'f', 'invweibull', 'genpareto',
|
||||
'nbinom', 'kappa3', 'lognorm', 'halfgennorm', 'pearson3',
|
||||
'alpha', 't', 'crystalball', 'fatiguelife', 'nakagami',
|
||||
'kstwobign', 'gompertz', 'dweibull', 'lomax', 'invgauss',
|
||||
'recipinvgauss', 'chi', 'foldcauchy', 'powernorm',
|
||||
'gennorm', 'randint', 'genextreme'}
|
||||
xslow_basic_fit = {'nchypergeom_fisher', 'nchypergeom_wallenius',
|
||||
'gausshyper', 'genexpon', 'gengamma', 'genhyperbolic',
|
||||
'geninvgauss', 'tukeylambda', 'skellam', 'ncx2',
|
||||
'hypergeom', 'nhypergeom', 'zipfian', 'ncf',
|
||||
'truncnorm', 'powerlognorm', 'beta',
|
||||
'loguniform', 'reciprocal', 'trapezoid', 'nct',
|
||||
'kappa4', 'betabinom', 'exponweib', 'genhalflogistic',
|
||||
'burr', 'triang'}
|
||||
|
||||
for dist in dict(distdiscrete + distcont):
|
||||
if dist in skip_basic_fit or not isinstance(dist, str):
|
||||
reason = "tested separately"
|
||||
yield pytest.param(dist, marks=pytest.mark.skip(reason=reason))
|
||||
elif dist in slow_basic_fit:
|
||||
reason = "too slow (>= 0.25s)"
|
||||
yield pytest.param(dist, marks=pytest.mark.slow(reason=reason))
|
||||
elif dist in xslow_basic_fit:
|
||||
reason = "too slow (>= 1.0s)"
|
||||
yield pytest.param(dist, marks=pytest.mark.xslow(reason=reason))
|
||||
else:
|
||||
yield dist
|
||||
|
||||
|
||||
def cases_test_fit_mse():
|
||||
# the first four are so slow that I'm not sure whether they would pass
|
||||
skip_basic_fit = {'levy_stable', 'studentized_range', 'ksone', 'skewnorm',
|
||||
'norminvgauss', # super slow (~1 hr) but passes
|
||||
'kstwo', # very slow (~25 min) but passes
|
||||
'geninvgauss', # quite slow (~4 minutes) but passes
|
||||
'gausshyper', 'genhyperbolic', # integration warnings
|
||||
'argus', # close, but doesn't meet tolerance
|
||||
'vonmises'} # can have negative CDF; doesn't play nice
|
||||
slow_basic_fit = {'wald', 'genextreme', 'anglit', 'semicircular',
|
||||
'kstwobign', 'arcsine', 'genlogistic', 'truncexpon',
|
||||
'fisk', 'uniform', 'exponnorm', 'maxwell', 'lomax',
|
||||
'laplace_asymmetric', 'lognorm', 'foldcauchy',
|
||||
'genpareto', 'powernorm', 'loglaplace', 'foldnorm',
|
||||
'recipinvgauss', 'exponpow', 'bradford', 'weibull_max',
|
||||
'gompertz', 'dweibull', 'truncpareto', 'weibull_min',
|
||||
'johnsonsu', 'loggamma', 'kappa3', 'fatiguelife',
|
||||
'pareto', 'invweibull', 'alpha', 'erlang', 'dgamma',
|
||||
'chi2', 'crystalball', 'nakagami', 'truncweibull_min',
|
||||
't', 'vonmises_line', 'triang', 'wrapcauchy', 'gamma',
|
||||
'mielke', 'chi', 'johnsonsb', 'exponweib',
|
||||
'genhalflogistic', 'randint', 'nhypergeom', 'hypergeom',
|
||||
'betabinom'}
|
||||
xslow_basic_fit = {'burr', 'halfgennorm', 'invgamma',
|
||||
'invgauss', 'powerlaw', 'burr12', 'trapezoid', 'kappa4',
|
||||
'f', 'powerlognorm', 'ncx2', 'rdist', 'reciprocal',
|
||||
'loguniform', 'betaprime', 'rice', 'gennorm',
|
||||
'gengamma', 'truncnorm', 'ncf', 'nct', 'pearson3',
|
||||
'beta', 'genexpon', 'tukeylambda', 'zipfian',
|
||||
'nchypergeom_wallenius', 'nchypergeom_fisher'}
|
||||
warns_basic_fit = {'skellam'} # can remove mark after gh-14901 is resolved
|
||||
|
||||
for dist in dict(distdiscrete + distcont):
|
||||
if dist in skip_basic_fit or not isinstance(dist, str):
|
||||
reason = "Fails. Oh well."
|
||||
yield pytest.param(dist, marks=pytest.mark.skip(reason=reason))
|
||||
elif dist in slow_basic_fit:
|
||||
reason = "too slow (>= 0.25s)"
|
||||
yield pytest.param(dist, marks=pytest.mark.slow(reason=reason))
|
||||
elif dist in xslow_basic_fit:
|
||||
reason = "too slow (>= 1.0s)"
|
||||
yield pytest.param(dist, marks=pytest.mark.xslow(reason=reason))
|
||||
elif dist in warns_basic_fit:
|
||||
mark = pytest.mark.filterwarnings('ignore::RuntimeWarning')
|
||||
yield pytest.param(dist, marks=mark)
|
||||
else:
|
||||
yield dist
|
||||
|
||||
|
||||
def cases_test_fitstart():
|
||||
for distname, shapes in dict(distcont).items():
|
||||
if (not isinstance(distname, str) or
|
||||
distname in {'studentized_range', 'recipinvgauss'}): # slow
|
||||
continue
|
||||
yield distname, shapes
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, shapes', cases_test_fitstart())
|
||||
def test_fitstart(distname, shapes):
|
||||
dist = getattr(stats, distname)
|
||||
rng = np.random.default_rng(216342614)
|
||||
data = rng.random(10)
|
||||
|
||||
with np.errstate(invalid='ignore', divide='ignore'): # irrelevant to test
|
||||
guess = dist._fitstart(data)
|
||||
|
||||
assert dist._argcheck(*guess[:-2])
|
||||
|
||||
|
||||
def assert_nlff_less_or_close(dist, data, params1, params0, rtol=1e-7, atol=0,
|
||||
nlff_name='nnlf'):
|
||||
nlff = getattr(dist, nlff_name)
|
||||
nlff1 = nlff(params1, data)
|
||||
nlff0 = nlff(params0, data)
|
||||
if not (nlff1 < nlff0):
|
||||
np.testing.assert_allclose(nlff1, nlff0, rtol=rtol, atol=atol)
|
||||
|
||||
|
||||
class TestFit:
|
||||
dist = stats.binom # type: ignore[attr-defined]
|
||||
seed = 654634816187
|
||||
rng = np.random.default_rng(seed)
|
||||
data = stats.binom.rvs(5, 0.5, size=100, random_state=rng) # type: ignore[attr-defined] # noqa
|
||||
shape_bounds_a = [(1, 10), (0, 1)]
|
||||
shape_bounds_d = {'n': (1, 10), 'p': (0, 1)}
|
||||
atol = 5e-2
|
||||
rtol = 1e-2
|
||||
tols = {'atol': atol, 'rtol': rtol}
|
||||
|
||||
def opt(self, *args, **kwds):
|
||||
return differential_evolution(*args, seed=0, **kwds)
|
||||
|
||||
def test_dist_iv(self):
|
||||
message = "`dist` must be an instance of..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(10, self.data, self.shape_bounds_a)
|
||||
|
||||
def test_data_iv(self):
|
||||
message = "`data` must be exactly one-dimensional."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, [[1, 2, 3]], self.shape_bounds_a)
|
||||
|
||||
message = "All elements of `data` must be finite numbers."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, [1, 2, 3, np.nan], self.shape_bounds_a)
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, [1, 2, 3, np.inf], self.shape_bounds_a)
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, ['1', '2', '3'], self.shape_bounds_a)
|
||||
|
||||
def test_bounds_iv(self):
|
||||
message = "Bounds provided for the following unrecognized..."
|
||||
shape_bounds = {'n': (1, 10), 'p': (0, 1), '1': (0, 10)}
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "Each element of a `bounds` sequence must be a tuple..."
|
||||
shape_bounds = [(1, 10, 3), (0, 1)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "Each element of `bounds` must be a tuple specifying..."
|
||||
shape_bounds = [(1, 10, 3), (0, 1, 0.5)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
shape_bounds = [1, 0]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "A `bounds` sequence must contain at least 2 elements..."
|
||||
shape_bounds = [(1, 10)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "A `bounds` sequence may not contain more than 3 elements..."
|
||||
bounds = [(1, 10), (1, 10), (1, 10), (1, 10)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, bounds)
|
||||
|
||||
message = "There are no values for `p` on the interval..."
|
||||
shape_bounds = {'n': (1, 10), 'p': (1, 0)}
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "There are no values for `n` on the interval..."
|
||||
shape_bounds = [(10, 1), (0, 1)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "There are no integer values for `n` on the interval..."
|
||||
shape_bounds = [(1.4, 1.6), (0, 1)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
message = "The intersection of user-provided bounds for `n`"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data)
|
||||
shape_bounds = [(-np.inf, np.inf), (0, 1)]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, shape_bounds)
|
||||
|
||||
def test_guess_iv(self):
|
||||
message = "Guesses provided for the following unrecognized..."
|
||||
guess = {'n': 1, 'p': 0.5, '1': 255}
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "Each element of `guess` must be a scalar..."
|
||||
guess = {'n': 1, 'p': 'hi'}
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
guess = [1, 'f']
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
guess = [[1, 2]]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "A `guess` sequence must contain at least 2..."
|
||||
guess = [1]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "A `guess` sequence may not contain more than 3..."
|
||||
guess = [1, 2, 3, 4]
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "Guess for parameter `n` rounded..."
|
||||
guess = {'n': 4.5, 'p': -0.5}
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "Guess for parameter `loc` rounded..."
|
||||
guess = [5, 0.5, 0.5]
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "Guess for parameter `p` clipped..."
|
||||
guess = {'n': 5, 'p': -0.5}
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
message = "Guess for parameter `loc` clipped..."
|
||||
guess = [5, 0.5, 1]
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
|
||||
|
||||
def basic_fit_test(self, dist_name, method):
|
||||
|
||||
N = 5000
|
||||
dist_data = dict(distcont + distdiscrete)
|
||||
rng = np.random.default_rng(self.seed)
|
||||
dist = getattr(stats, dist_name)
|
||||
shapes = np.array(dist_data[dist_name])
|
||||
bounds = np.empty((len(shapes) + 2, 2), dtype=np.float64)
|
||||
bounds[:-2, 0] = shapes/10.**np.sign(shapes)
|
||||
bounds[:-2, 1] = shapes*10.**np.sign(shapes)
|
||||
bounds[-2] = (0, 10)
|
||||
bounds[-1] = (1e-16, 10)
|
||||
loc = rng.uniform(*bounds[-2])
|
||||
scale = rng.uniform(*bounds[-1])
|
||||
ref = list(dist_data[dist_name]) + [loc, scale]
|
||||
|
||||
if getattr(dist, 'pmf', False):
|
||||
ref = ref[:-1]
|
||||
ref[-1] = np.floor(loc)
|
||||
data = dist.rvs(*ref, size=N, random_state=rng)
|
||||
bounds = bounds[:-1]
|
||||
if getattr(dist, 'pdf', False):
|
||||
data = dist.rvs(*ref, size=N, random_state=rng)
|
||||
|
||||
with npt.suppress_warnings() as sup:
|
||||
sup.filter(RuntimeWarning, "overflow encountered")
|
||||
res = stats.fit(dist, data, bounds, method=method,
|
||||
optimizer=self.opt)
|
||||
|
||||
nlff_names = {'mle': 'nnlf', 'mse': '_penalized_nlpsf'}
|
||||
nlff_name = nlff_names[method]
|
||||
assert_nlff_less_or_close(dist, data, res.params, ref, **self.tols,
|
||||
nlff_name=nlff_name)
|
||||
|
||||
@pytest.mark.parametrize("dist_name", cases_test_fit_mle())
|
||||
def test_basic_fit_mle(self, dist_name):
|
||||
self.basic_fit_test(dist_name, "mle")
|
||||
|
||||
@pytest.mark.parametrize("dist_name", cases_test_fit_mse())
|
||||
def test_basic_fit_mse(self, dist_name):
|
||||
self.basic_fit_test(dist_name, "mse")
|
||||
|
||||
def test_argus(self):
|
||||
# Can't guarantee that all distributions will fit all data with
|
||||
# arbitrary bounds. This distribution just happens to fail above.
|
||||
# Try something slightly different.
|
||||
N = 1000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
dist = stats.argus
|
||||
shapes = (1., 2., 3.)
|
||||
data = dist.rvs(*shapes, size=N, random_state=rng)
|
||||
shape_bounds = {'chi': (0.1, 10), 'loc': (0.1, 10), 'scale': (0.1, 10)}
|
||||
res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
|
||||
|
||||
assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols)
|
||||
|
||||
def test_foldnorm(self):
|
||||
# Can't guarantee that all distributions will fit all data with
|
||||
# arbitrary bounds. This distribution just happens to fail above.
|
||||
# Try something slightly different.
|
||||
N = 1000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
dist = stats.foldnorm
|
||||
shapes = (1.952125337355587, 2., 3.)
|
||||
data = dist.rvs(*shapes, size=N, random_state=rng)
|
||||
shape_bounds = {'c': (0.1, 10), 'loc': (0.1, 10), 'scale': (0.1, 10)}
|
||||
res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
|
||||
|
||||
assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols)
|
||||
|
||||
def test_truncpareto(self):
|
||||
# Can't guarantee that all distributions will fit all data with
|
||||
# arbitrary bounds. This distribution just happens to fail above.
|
||||
# Try something slightly different.
|
||||
N = 1000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
dist = stats.truncpareto
|
||||
shapes = (1.8, 5.3, 2.3, 4.1)
|
||||
data = dist.rvs(*shapes, size=N, random_state=rng)
|
||||
shape_bounds = [(0.1, 10)]*4
|
||||
res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
|
||||
|
||||
assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols)
|
||||
|
||||
def test_truncweibull_min(self):
|
||||
# Can't guarantee that all distributions will fit all data with
|
||||
# arbitrary bounds. This distribution just happens to fail above.
|
||||
# Try something slightly different.
|
||||
N = 1000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
dist = stats.truncweibull_min
|
||||
shapes = (2.5, 0.25, 1.75, 2., 3.)
|
||||
data = dist.rvs(*shapes, size=N, random_state=rng)
|
||||
shape_bounds = [(0.1, 10)]*5
|
||||
res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
|
||||
|
||||
assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols)
|
||||
|
||||
def test_missing_shape_bounds(self):
|
||||
# some distributions have a small domain w.r.t. a parameter, e.g.
|
||||
# $p \in [0, 1]$ for binomial distribution
|
||||
# User does not need to provide these because the intersection of the
|
||||
# user's bounds (none) and the distribution's domain is finite
|
||||
N = 1000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
|
||||
dist = stats.binom
|
||||
n, p, loc = 10, 0.65, 0
|
||||
data = dist.rvs(n, p, loc=loc, size=N, random_state=rng)
|
||||
shape_bounds = {'n': np.array([0, 20])} # check arrays are OK, too
|
||||
res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
|
||||
assert_allclose(res.params, (n, p, loc), **self.tols)
|
||||
|
||||
dist = stats.bernoulli
|
||||
p, loc = 0.314159, 0
|
||||
data = dist.rvs(p, loc=loc, size=N, random_state=rng)
|
||||
res = stats.fit(dist, data, optimizer=self.opt)
|
||||
assert_allclose(res.params, (p, loc), **self.tols)
|
||||
|
||||
def test_fit_only_loc_scale(self):
|
||||
# fit only loc
|
||||
N = 5000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
|
||||
dist = stats.norm
|
||||
loc, scale = 1.5, 1
|
||||
data = dist.rvs(loc=loc, size=N, random_state=rng)
|
||||
loc_bounds = (0, 5)
|
||||
bounds = {'loc': loc_bounds}
|
||||
res = stats.fit(dist, data, bounds, optimizer=self.opt)
|
||||
assert_allclose(res.params, (loc, scale), **self.tols)
|
||||
|
||||
# fit only scale
|
||||
loc, scale = 0, 2.5
|
||||
data = dist.rvs(scale=scale, size=N, random_state=rng)
|
||||
scale_bounds = (0, 5)
|
||||
bounds = {'scale': scale_bounds}
|
||||
res = stats.fit(dist, data, bounds, optimizer=self.opt)
|
||||
assert_allclose(res.params, (loc, scale), **self.tols)
|
||||
|
||||
# fit only loc and scale
|
||||
dist = stats.norm
|
||||
loc, scale = 1.5, 2.5
|
||||
data = dist.rvs(loc=loc, scale=scale, size=N, random_state=rng)
|
||||
bounds = {'loc': loc_bounds, 'scale': scale_bounds}
|
||||
res = stats.fit(dist, data, bounds, optimizer=self.opt)
|
||||
assert_allclose(res.params, (loc, scale), **self.tols)
|
||||
|
||||
def test_everything_fixed(self):
|
||||
N = 5000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
|
||||
dist = stats.norm
|
||||
loc, scale = 1.5, 2.5
|
||||
data = dist.rvs(loc=loc, scale=scale, size=N, random_state=rng)
|
||||
|
||||
# loc, scale fixed to 0, 1 by default
|
||||
res = stats.fit(dist, data)
|
||||
assert_allclose(res.params, (0, 1), **self.tols)
|
||||
|
||||
# loc, scale explicitly fixed
|
||||
bounds = {'loc': (loc, loc), 'scale': (scale, scale)}
|
||||
res = stats.fit(dist, data, bounds)
|
||||
assert_allclose(res.params, (loc, scale), **self.tols)
|
||||
|
||||
# `n` gets fixed during polishing
|
||||
dist = stats.binom
|
||||
n, p, loc = 10, 0.65, 0
|
||||
data = dist.rvs(n, p, loc=loc, size=N, random_state=rng)
|
||||
shape_bounds = {'n': (0, 20), 'p': (0.65, 0.65)}
|
||||
res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
|
||||
assert_allclose(res.params, (n, p, loc), **self.tols)
|
||||
|
||||
def test_failure(self):
|
||||
N = 5000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
|
||||
dist = stats.nbinom
|
||||
shapes = (5, 0.5)
|
||||
data = dist.rvs(*shapes, size=N, random_state=rng)
|
||||
|
||||
assert data.min() == 0
|
||||
# With lower bounds on location at 0.5, likelihood is zero
|
||||
bounds = [(0, 30), (0, 1), (0.5, 10)]
|
||||
res = stats.fit(dist, data, bounds)
|
||||
message = "Optimization converged to parameter values that are"
|
||||
assert res.message.startswith(message)
|
||||
assert res.success is False
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_guess(self):
|
||||
# Test that guess helps DE find the desired solution
|
||||
N = 2000
|
||||
rng = np.random.default_rng(self.seed)
|
||||
dist = stats.nhypergeom
|
||||
params = (20, 7, 12, 0)
|
||||
bounds = [(2, 200), (0.7, 70), (1.2, 120), (0, 10)]
|
||||
|
||||
data = dist.rvs(*params, size=N, random_state=rng)
|
||||
|
||||
res = stats.fit(dist, data, bounds, optimizer=self.opt)
|
||||
assert not np.allclose(res.params, params, **self.tols)
|
||||
|
||||
res = stats.fit(dist, data, bounds, guess=params, optimizer=self.opt)
|
||||
assert_allclose(res.params, params, **self.tols)
|
||||
|
||||
def test_mse_accuracy_1(self):
|
||||
# Test maximum spacing estimation against example from Wikipedia
|
||||
# https://en.wikipedia.org/wiki/Maximum_spacing_estimation#Examples
|
||||
data = [2, 4]
|
||||
dist = stats.expon
|
||||
bounds = {'loc': (0, 0), 'scale': (1e-8, 10)}
|
||||
res_mle = stats.fit(dist, data, bounds=bounds, method='mle')
|
||||
assert_allclose(res_mle.params.scale, 3, atol=1e-3)
|
||||
res_mse = stats.fit(dist, data, bounds=bounds, method='mse')
|
||||
assert_allclose(res_mse.params.scale, 3.915, atol=1e-3)
|
||||
|
||||
def test_mse_accuracy_2(self):
|
||||
# Test maximum spacing estimation against example from Wikipedia
|
||||
# https://en.wikipedia.org/wiki/Maximum_spacing_estimation#Examples
|
||||
rng = np.random.default_rng(9843212616816518964)
|
||||
|
||||
dist = stats.uniform
|
||||
n = 10
|
||||
data = dist(3, 6).rvs(size=n, random_state=rng)
|
||||
bounds = {'loc': (0, 10), 'scale': (1e-8, 10)}
|
||||
res = stats.fit(dist, data, bounds=bounds, method='mse')
|
||||
# (loc=3.608118420015416, scale=5.509323262055043)
|
||||
|
||||
x = np.sort(data)
|
||||
a = (n*x[0] - x[-1])/(n - 1)
|
||||
b = (n*x[-1] - x[0])/(n - 1)
|
||||
ref = a, b-a # (3.6081133632151503, 5.509328130317254)
|
||||
assert_allclose(res.params, ref, rtol=1e-4)
|
||||
|
||||
|
||||
# Data from Matlab: https://www.mathworks.com/help/stats/lillietest.html
|
||||
examgrades = [65, 61, 81, 88, 69, 89, 55, 84, 86, 84, 71, 81, 84, 81, 78, 67,
|
||||
96, 66, 73, 75, 59, 71, 69, 63, 79, 76, 63, 85, 87, 88, 80, 71,
|
||||
65, 84, 71, 75, 81, 79, 64, 65, 84, 77, 70, 75, 84, 75, 73, 92,
|
||||
90, 79, 80, 71, 73, 71, 58, 79, 73, 64, 77, 82, 81, 59, 54, 82,
|
||||
57, 79, 79, 73, 74, 82, 63, 64, 73, 69, 87, 68, 81, 73, 83, 73,
|
||||
80, 73, 73, 71, 66, 78, 64, 74, 68, 67, 75, 75, 80, 85, 74, 76,
|
||||
80, 77, 93, 70, 86, 80, 81, 83, 68, 60, 85, 64, 74, 82, 81, 77,
|
||||
66, 85, 75, 81, 69, 60, 83, 72]
|
||||
|
||||
|
||||
class TestGoodnessOfFit:
|
||||
|
||||
def test_gof_iv(self):
|
||||
dist = stats.norm
|
||||
x = [1, 2, 3]
|
||||
|
||||
message = r"`dist` must be a \(non-frozen\) instance of..."
|
||||
with pytest.raises(TypeError, match=message):
|
||||
goodness_of_fit(stats.norm(), x)
|
||||
|
||||
message = "`data` must be a one-dimensional array of numbers."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
goodness_of_fit(dist, [[1, 2, 3]])
|
||||
|
||||
message = "`statistic` must be one of..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
goodness_of_fit(dist, x, statistic='mm')
|
||||
|
||||
message = "`n_mc_samples` must be an integer."
|
||||
with pytest.raises(TypeError, match=message):
|
||||
goodness_of_fit(dist, x, n_mc_samples=1000.5)
|
||||
|
||||
message = "'herring' cannot be used to seed a"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
goodness_of_fit(dist, x, random_state='herring')
|
||||
|
||||
def test_against_ks(self):
|
||||
rng = np.random.default_rng(8517426291317196949)
|
||||
x = examgrades
|
||||
known_params = {'loc': np.mean(x), 'scale': np.std(x, ddof=1)}
|
||||
res = goodness_of_fit(stats.norm, x, known_params=known_params,
|
||||
statistic='ks', random_state=rng)
|
||||
ref = stats.kstest(x, stats.norm(**known_params).cdf, method='exact')
|
||||
assert_allclose(res.statistic, ref.statistic) # ~0.0848
|
||||
assert_allclose(res.pvalue, ref.pvalue, atol=5e-3) # ~0.335
|
||||
|
||||
def test_against_lilliefors(self):
|
||||
rng = np.random.default_rng(2291803665717442724)
|
||||
x = examgrades
|
||||
res = goodness_of_fit(stats.norm, x, statistic='ks', random_state=rng)
|
||||
known_params = {'loc': np.mean(x), 'scale': np.std(x, ddof=1)}
|
||||
ref = stats.kstest(x, stats.norm(**known_params).cdf, method='exact')
|
||||
assert_allclose(res.statistic, ref.statistic) # ~0.0848
|
||||
assert_allclose(res.pvalue, 0.0348, atol=5e-3)
|
||||
|
||||
def test_against_cvm(self):
|
||||
rng = np.random.default_rng(8674330857509546614)
|
||||
x = examgrades
|
||||
known_params = {'loc': np.mean(x), 'scale': np.std(x, ddof=1)}
|
||||
res = goodness_of_fit(stats.norm, x, known_params=known_params,
|
||||
statistic='cvm', random_state=rng)
|
||||
ref = stats.cramervonmises(x, stats.norm(**known_params).cdf)
|
||||
assert_allclose(res.statistic, ref.statistic) # ~0.090
|
||||
assert_allclose(res.pvalue, ref.pvalue, atol=5e-3) # ~0.636
|
||||
|
||||
def test_against_anderson_case_0(self):
|
||||
# "Case 0" is where loc and scale are known [1]
|
||||
rng = np.random.default_rng(7384539336846690410)
|
||||
x = np.arange(1, 101)
|
||||
# loc that produced critical value of statistic found w/ root_scalar
|
||||
known_params = {'loc': 45.01575354024957, 'scale': 30}
|
||||
res = goodness_of_fit(stats.norm, x, known_params=known_params,
|
||||
statistic='ad', random_state=rng)
|
||||
assert_allclose(res.statistic, 2.492) # See [1] Table 1A 1.0
|
||||
assert_allclose(res.pvalue, 0.05, atol=5e-3)
|
||||
|
||||
def test_against_anderson_case_1(self):
|
||||
# "Case 1" is where scale is known and loc is fit [1]
|
||||
rng = np.random.default_rng(5040212485680146248)
|
||||
x = np.arange(1, 101)
|
||||
# scale that produced critical value of statistic found w/ root_scalar
|
||||
known_params = {'scale': 29.957112639101933}
|
||||
res = goodness_of_fit(stats.norm, x, known_params=known_params,
|
||||
statistic='ad', random_state=rng)
|
||||
assert_allclose(res.statistic, 0.908) # See [1] Table 1B 1.1
|
||||
assert_allclose(res.pvalue, 0.1, atol=5e-3)
|
||||
|
||||
def test_against_anderson_case_2(self):
|
||||
# "Case 2" is where loc is known and scale is fit [1]
|
||||
rng = np.random.default_rng(726693985720914083)
|
||||
x = np.arange(1, 101)
|
||||
# loc that produced critical value of statistic found w/ root_scalar
|
||||
known_params = {'loc': 44.5680212261933}
|
||||
res = goodness_of_fit(stats.norm, x, known_params=known_params,
|
||||
statistic='ad', random_state=rng)
|
||||
assert_allclose(res.statistic, 2.904) # See [1] Table 1B 1.2
|
||||
assert_allclose(res.pvalue, 0.025, atol=5e-3)
|
||||
|
||||
def test_against_anderson_case_3(self):
|
||||
# "Case 3" is where both loc and scale are fit [1]
|
||||
rng = np.random.default_rng(6763691329830218206)
|
||||
# c that produced critical value of statistic found w/ root_scalar
|
||||
x = stats.skewnorm.rvs(1.4477847789132101, loc=1, scale=2, size=100,
|
||||
random_state=rng)
|
||||
res = goodness_of_fit(stats.norm, x, statistic='ad', random_state=rng)
|
||||
assert_allclose(res.statistic, 0.559) # See [1] Table 1B 1.2
|
||||
assert_allclose(res.pvalue, 0.15, atol=5e-3)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_against_anderson_gumbel_r(self):
|
||||
rng = np.random.default_rng(7302761058217743)
|
||||
# c that produced critical value of statistic found w/ root_scalar
|
||||
x = stats.genextreme(0.051896837188595134, loc=0.5,
|
||||
scale=1.5).rvs(size=1000, random_state=rng)
|
||||
res = goodness_of_fit(stats.gumbel_r, x, statistic='ad',
|
||||
random_state=rng)
|
||||
ref = stats.anderson(x, dist='gumbel_r')
|
||||
assert_allclose(res.statistic, ref.critical_values[0])
|
||||
assert_allclose(res.pvalue, ref.significance_level[0]/100, atol=5e-3)
|
||||
|
||||
def test_params_effects(self):
|
||||
# Ensure that `guessed_params`, `fit_params`, and `known_params` have
|
||||
# the intended effects.
|
||||
rng = np.random.default_rng(9121950977643805391)
|
||||
x = stats.skewnorm.rvs(-5.044559778383153, loc=1, scale=2, size=50,
|
||||
random_state=rng)
|
||||
|
||||
# Show that `guessed_params` don't fit to the guess,
|
||||
# but `fit_params` and `known_params` respect the provided fit
|
||||
guessed_params = {'c': 13.4}
|
||||
fit_params = {'scale': 13.73}
|
||||
known_params = {'loc': -13.85}
|
||||
rng = np.random.default_rng(9121950977643805391)
|
||||
res1 = goodness_of_fit(stats.weibull_min, x, n_mc_samples=2,
|
||||
guessed_params=guessed_params,
|
||||
fit_params=fit_params,
|
||||
known_params=known_params, random_state=rng)
|
||||
assert not np.allclose(res1.fit_result.params.c, 13.4)
|
||||
assert_equal(res1.fit_result.params.scale, 13.73)
|
||||
assert_equal(res1.fit_result.params.loc, -13.85)
|
||||
|
||||
# Show that changing the guess changes the parameter that gets fit,
|
||||
# and it changes the null distribution
|
||||
guessed_params = {'c': 2}
|
||||
rng = np.random.default_rng(9121950977643805391)
|
||||
res2 = goodness_of_fit(stats.weibull_min, x, n_mc_samples=2,
|
||||
guessed_params=guessed_params,
|
||||
fit_params=fit_params,
|
||||
known_params=known_params, random_state=rng)
|
||||
assert not np.allclose(res2.fit_result.params.c,
|
||||
res1.fit_result.params.c, rtol=1e-8)
|
||||
assert not np.allclose(res2.null_distribution,
|
||||
res1.null_distribution, rtol=1e-8)
|
||||
assert_equal(res2.fit_result.params.scale, 13.73)
|
||||
assert_equal(res2.fit_result.params.loc, -13.85)
|
||||
|
||||
# If we set all parameters as fit_params and known_params,
|
||||
# they're all fixed to those values, but the null distribution
|
||||
# varies.
|
||||
fit_params = {'c': 13.4, 'scale': 13.73}
|
||||
rng = np.random.default_rng(9121950977643805391)
|
||||
res3 = goodness_of_fit(stats.weibull_min, x, n_mc_samples=2,
|
||||
guessed_params=guessed_params,
|
||||
fit_params=fit_params,
|
||||
known_params=known_params, random_state=rng)
|
||||
assert_equal(res3.fit_result.params.c, 13.4)
|
||||
assert_equal(res3.fit_result.params.scale, 13.73)
|
||||
assert_equal(res3.fit_result.params.loc, -13.85)
|
||||
assert not np.allclose(res3.null_distribution, res1.null_distribution)
|
||||
|
||||
|
||||
class TestFitResult:
|
||||
def test_plot_iv(self):
|
||||
rng = np.random.default_rng(1769658657308472721)
|
||||
data = stats.norm.rvs(0, 1, size=100, random_state=rng)
|
||||
|
||||
def optimizer(*args, **kwargs):
|
||||
return differential_evolution(*args, **kwargs, seed=rng)
|
||||
|
||||
bounds = [(0, 30), (0, 1)]
|
||||
res = stats.fit(stats.norm, data, bounds, optimizer=optimizer)
|
||||
try:
|
||||
import matplotlib # noqa
|
||||
message = r"`plot_type` must be one of \{'..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
res.plot(plot_type='llama')
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
message = r"matplotlib must be installed to use method `plot`."
|
||||
with pytest.raises(ModuleNotFoundError, match=message):
|
||||
res.plot(plot_type='llama')
|
||||
1712
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_hypotests.py
vendored
Normal file
1712
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_hypotests.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
604
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_kdeoth.py
vendored
Normal file
604
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_kdeoth.py
vendored
Normal file
@@ -0,0 +1,604 @@
|
||||
from scipy import stats, linalg, integrate
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_almost_equal, assert_, assert_equal,
|
||||
assert_array_almost_equal, assert_array_almost_equal_nulp, assert_allclose)
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
|
||||
def test_kde_1d():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
xn = np.random.randn(n_basesample)
|
||||
xnmean = xn.mean()
|
||||
xnstd = xn.std(ddof=1)
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
xs = np.linspace(-7,7,501)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
|
||||
intervall = xs[1] - xs[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
|
||||
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
|
||||
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
|
||||
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
|
||||
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*intervall, decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
|
||||
(kdepdf*normpdf).sum()*intervall, decimal=2)
|
||||
|
||||
|
||||
def test_kde_1d_weighted():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
xn = np.random.randn(n_basesample)
|
||||
wn = np.random.rand(n_basesample)
|
||||
xnmean = np.average(xn, weights=wn)
|
||||
xnstd = np.sqrt(np.average((xn-xnmean)**2, weights=wn))
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
xs = np.linspace(-7,7,501)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
|
||||
intervall = xs[1] - xs[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
|
||||
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
|
||||
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
|
||||
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
|
||||
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*intervall, decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
|
||||
(kdepdf*normpdf).sum()*intervall, decimal=2)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_kde_2d():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
|
||||
# Need transpose (shape (2, 500)) for kde
|
||||
xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
x, y = np.mgrid[-7:7:500j, -7:7:500j]
|
||||
grid_coords = np.vstack([x.ravel(), y.ravel()])
|
||||
kdepdf = gkde.evaluate(grid_coords)
|
||||
kdepdf = kdepdf.reshape(500, 500)
|
||||
|
||||
normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]), mean=mean, cov=covariance)
|
||||
intervall = y.ravel()[1] - y.ravel()[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
|
||||
|
||||
small = -1e100
|
||||
large = 1e100
|
||||
prob1 = gkde.integrate_box([small, mean[1]], [large, large])
|
||||
prob2 = gkde.integrate_box([small, small], [large, mean[1]])
|
||||
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*(intervall**2), decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
|
||||
(kdepdf*normpdf).sum()*(intervall**2), decimal=2)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_kde_2d_weighted():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
|
||||
# Need transpose (shape (2, 500)) for kde
|
||||
xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
wn = np.random.rand(n_basesample)
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
x, y = np.mgrid[-7:7:500j, -7:7:500j]
|
||||
grid_coords = np.vstack([x.ravel(), y.ravel()])
|
||||
kdepdf = gkde.evaluate(grid_coords)
|
||||
kdepdf = kdepdf.reshape(500, 500)
|
||||
|
||||
normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]), mean=mean, cov=covariance)
|
||||
intervall = y.ravel()[1] - y.ravel()[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
|
||||
|
||||
small = -1e100
|
||||
large = 1e100
|
||||
prob1 = gkde.integrate_box([small, mean[1]], [large, large])
|
||||
prob2 = gkde.integrate_box([small, small], [large, mean[1]])
|
||||
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*(intervall**2), decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
|
||||
(kdepdf*normpdf).sum()*(intervall**2), decimal=2)
|
||||
|
||||
|
||||
def test_kde_bandwidth_method():
|
||||
def scotts_factor(kde_obj):
|
||||
"""Same as default, just check that it works."""
|
||||
return np.power(kde_obj.n, -1./(kde_obj.d+4))
|
||||
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
# Supply a callable
|
||||
gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
|
||||
# Supply a scalar
|
||||
gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
|
||||
|
||||
xs = np.linspace(-7,7,51)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
kdepdf2 = gkde2.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf2)
|
||||
kdepdf3 = gkde3.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf3)
|
||||
|
||||
assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
|
||||
|
||||
|
||||
def test_kde_bandwidth_method_weighted():
|
||||
def scotts_factor(kde_obj):
|
||||
"""Same as default, just check that it works."""
|
||||
return np.power(kde_obj.neff, -1./(kde_obj.d+4))
|
||||
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
# Supply a callable
|
||||
gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
|
||||
# Supply a scalar
|
||||
gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
|
||||
|
||||
xs = np.linspace(-7,7,51)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
kdepdf2 = gkde2.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf2)
|
||||
kdepdf3 = gkde3.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf3)
|
||||
|
||||
assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
|
||||
|
||||
|
||||
# Subclasses that should stay working (extracted from various sources).
|
||||
# Unfortunately the earlier design of gaussian_kde made it necessary for users
|
||||
# to create these kinds of subclasses, or call _compute_covariance() directly.
|
||||
|
||||
class _kde_subclass1(stats.gaussian_kde):
|
||||
def __init__(self, dataset):
|
||||
self.dataset = np.atleast_2d(dataset)
|
||||
self.d, self.n = self.dataset.shape
|
||||
self.covariance_factor = self.scotts_factor
|
||||
self._compute_covariance()
|
||||
|
||||
|
||||
class _kde_subclass2(stats.gaussian_kde):
|
||||
def __init__(self, dataset):
|
||||
self.covariance_factor = self.scotts_factor
|
||||
super().__init__(dataset)
|
||||
|
||||
|
||||
class _kde_subclass4(stats.gaussian_kde):
|
||||
def covariance_factor(self):
|
||||
return 0.5 * self.silverman_factor()
|
||||
|
||||
|
||||
def test_gaussian_kde_subclassing():
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=50)
|
||||
|
||||
# gaussian_kde itself
|
||||
kde = stats.gaussian_kde(x1)
|
||||
ys = kde(xs)
|
||||
|
||||
# subclass 1
|
||||
kde1 = _kde_subclass1(x1)
|
||||
y1 = kde1(xs)
|
||||
assert_array_almost_equal_nulp(ys, y1, nulp=10)
|
||||
|
||||
# subclass 2
|
||||
kde2 = _kde_subclass2(x1)
|
||||
y2 = kde2(xs)
|
||||
assert_array_almost_equal_nulp(ys, y2, nulp=10)
|
||||
|
||||
# subclass 3 was removed because we have no obligation to maintain support
|
||||
# for user invocation of private methods
|
||||
|
||||
# subclass 4
|
||||
kde4 = _kde_subclass4(x1)
|
||||
y4 = kde4(x1)
|
||||
y_expected = [0.06292987, 0.06346938, 0.05860291, 0.08657652, 0.07904017]
|
||||
|
||||
assert_array_almost_equal(y_expected, y4, decimal=6)
|
||||
|
||||
# Not a subclass, but check for use of _compute_covariance()
|
||||
kde5 = kde
|
||||
kde5.covariance_factor = lambda: kde.factor
|
||||
kde5._compute_covariance()
|
||||
y5 = kde5(xs)
|
||||
assert_array_almost_equal_nulp(ys, y5, nulp=10)
|
||||
|
||||
|
||||
def test_gaussian_kde_covariance_caching():
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=5)
|
||||
# These expected values are from scipy 0.10, before some changes to
|
||||
# gaussian_kde. They were not compared with any external reference.
|
||||
y_expected = [0.02463386, 0.04689208, 0.05395444, 0.05337754, 0.01664475]
|
||||
|
||||
# Set the bandwidth, then reset it to the default.
|
||||
kde = stats.gaussian_kde(x1)
|
||||
kde.set_bandwidth(bw_method=0.5)
|
||||
kde.set_bandwidth(bw_method='scott')
|
||||
y2 = kde(xs)
|
||||
|
||||
assert_array_almost_equal(y_expected, y2, decimal=7)
|
||||
|
||||
|
||||
def test_gaussian_kde_monkeypatch():
|
||||
"""Ugly, but people may rely on this. See scipy pull request 123,
|
||||
specifically the linked ML thread "Width of the Gaussian in stats.kde".
|
||||
If it is necessary to break this later on, that is to be discussed on ML.
|
||||
"""
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=50)
|
||||
|
||||
# The old monkeypatched version to get at Silverman's Rule.
|
||||
kde = stats.gaussian_kde(x1)
|
||||
kde.covariance_factor = kde.silverman_factor
|
||||
kde._compute_covariance()
|
||||
y1 = kde(xs)
|
||||
|
||||
# The new saner version.
|
||||
kde2 = stats.gaussian_kde(x1, bw_method='silverman')
|
||||
y2 = kde2(xs)
|
||||
|
||||
assert_array_almost_equal_nulp(y1, y2, nulp=10)
|
||||
|
||||
|
||||
def test_kde_integer_input():
|
||||
"""Regression test for #1181."""
|
||||
x1 = np.arange(5)
|
||||
kde = stats.gaussian_kde(x1)
|
||||
y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, 0.13480721]
|
||||
assert_array_almost_equal(kde(x1), y_expected, decimal=6)
|
||||
|
||||
|
||||
_ftypes = ['float32', 'float64', 'float96', 'float128', 'int32', 'int64']
|
||||
|
||||
@pytest.mark.parametrize("bw_type", _ftypes + ["scott", "silverman"])
|
||||
@pytest.mark.parametrize("dtype", _ftypes)
|
||||
def test_kde_output_dtype(dtype, bw_type):
|
||||
# Check whether the datatypes are available
|
||||
dtype = getattr(np, dtype, None)
|
||||
|
||||
if bw_type in ["scott", "silverman"]:
|
||||
bw = bw_type
|
||||
else:
|
||||
bw_type = getattr(np, bw_type, None)
|
||||
bw = bw_type(3) if bw_type else None
|
||||
|
||||
if any(dt is None for dt in [dtype, bw]):
|
||||
pytest.skip()
|
||||
|
||||
weights = np.arange(5, dtype=dtype)
|
||||
dataset = np.arange(5, dtype=dtype)
|
||||
k = stats.gaussian_kde(dataset, bw_method=bw, weights=weights)
|
||||
points = np.arange(5, dtype=dtype)
|
||||
result = k(points)
|
||||
# weights are always cast to float64
|
||||
assert result.dtype == np.result_type(dataset, points, np.float64(weights),
|
||||
k.factor)
|
||||
|
||||
|
||||
def test_pdf_logpdf_validation():
|
||||
rng = np.random.default_rng(64202298293133848336925499069837723291)
|
||||
xn = rng.standard_normal((2, 10))
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
xs = rng.standard_normal((3, 10))
|
||||
|
||||
msg = "points have dimension 3, dataset has dimension 2"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
gkde.logpdf(xs)
|
||||
|
||||
|
||||
def test_pdf_logpdf():
|
||||
np.random.seed(1)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
xs = np.linspace(-15, 12, 25)
|
||||
pdf = gkde.evaluate(xs)
|
||||
pdf2 = gkde.pdf(xs)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
logpdf = np.log(pdf)
|
||||
logpdf2 = gkde.logpdf(xs)
|
||||
assert_almost_equal(logpdf, logpdf2, decimal=12)
|
||||
|
||||
# There are more points than data
|
||||
gkde = stats.gaussian_kde(xs)
|
||||
pdf = np.log(gkde.evaluate(xn))
|
||||
pdf2 = gkde.logpdf(xn)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
|
||||
def test_pdf_logpdf_weighted():
|
||||
np.random.seed(1)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
wn = np.random.rand(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
xs = np.linspace(-15, 12, 25)
|
||||
pdf = gkde.evaluate(xs)
|
||||
pdf2 = gkde.pdf(xs)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
logpdf = np.log(pdf)
|
||||
logpdf2 = gkde.logpdf(xs)
|
||||
assert_almost_equal(logpdf, logpdf2, decimal=12)
|
||||
|
||||
# There are more points than data
|
||||
gkde = stats.gaussian_kde(xs, weights=np.random.rand(len(xs)))
|
||||
pdf = np.log(gkde.evaluate(xn))
|
||||
pdf2 = gkde.logpdf(xn)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
|
||||
def test_marginal_1_axis():
|
||||
rng = np.random.default_rng(6111799263660870475)
|
||||
n_data = 50
|
||||
n_dim = 10
|
||||
dataset = rng.normal(size=(n_dim, n_data))
|
||||
points = rng.normal(size=(n_dim, 3))
|
||||
|
||||
dimensions = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) # dimensions to keep
|
||||
|
||||
kde = stats.gaussian_kde(dataset)
|
||||
marginal = kde.marginal(dimensions)
|
||||
pdf = marginal.pdf(points[dimensions])
|
||||
|
||||
def marginal_pdf_single(point):
|
||||
def f(x):
|
||||
x = np.concatenate(([x], point[dimensions]))
|
||||
return kde.pdf(x)[0]
|
||||
return integrate.quad(f, -np.inf, np.inf)[0]
|
||||
|
||||
def marginal_pdf(points):
|
||||
return np.apply_along_axis(marginal_pdf_single, axis=0, arr=points)
|
||||
|
||||
ref = marginal_pdf(points)
|
||||
|
||||
assert_allclose(pdf, ref, rtol=1e-6)
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_marginal_2_axis():
|
||||
rng = np.random.default_rng(6111799263660870475)
|
||||
n_data = 30
|
||||
n_dim = 4
|
||||
dataset = rng.normal(size=(n_dim, n_data))
|
||||
points = rng.normal(size=(n_dim, 3))
|
||||
|
||||
dimensions = np.array([1, 3]) # dimensions to keep
|
||||
|
||||
kde = stats.gaussian_kde(dataset)
|
||||
marginal = kde.marginal(dimensions)
|
||||
pdf = marginal.pdf(points[dimensions])
|
||||
|
||||
def marginal_pdf(points):
|
||||
def marginal_pdf_single(point):
|
||||
def f(y, x):
|
||||
w, z = point[dimensions]
|
||||
x = np.array([x, w, y, z])
|
||||
return kde.pdf(x)[0]
|
||||
return integrate.dblquad(f, -np.inf, np.inf, -np.inf, np.inf)[0]
|
||||
|
||||
return np.apply_along_axis(marginal_pdf_single, axis=0, arr=points)
|
||||
|
||||
ref = marginal_pdf(points)
|
||||
|
||||
assert_allclose(pdf, ref, rtol=1e-6)
|
||||
|
||||
|
||||
def test_marginal_iv():
|
||||
# test input validation
|
||||
rng = np.random.default_rng(6111799263660870475)
|
||||
n_data = 30
|
||||
n_dim = 4
|
||||
dataset = rng.normal(size=(n_dim, n_data))
|
||||
points = rng.normal(size=(n_dim, 3))
|
||||
|
||||
kde = stats.gaussian_kde(dataset)
|
||||
|
||||
# check that positive and negative indices are equivalent
|
||||
dimensions1 = [-1, 1]
|
||||
marginal1 = kde.marginal(dimensions1)
|
||||
pdf1 = marginal1.pdf(points[dimensions1])
|
||||
|
||||
dimensions2 = [3, -3]
|
||||
marginal2 = kde.marginal(dimensions2)
|
||||
pdf2 = marginal2.pdf(points[dimensions2])
|
||||
|
||||
assert_equal(pdf1, pdf2)
|
||||
|
||||
# IV for non-integer dimensions
|
||||
message = "Elements of `dimensions` must be integers..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
kde.marginal([1, 2.5])
|
||||
|
||||
# IV for uniquenes
|
||||
message = "All elements of `dimensions` must be unique."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
kde.marginal([1, 2, 2])
|
||||
|
||||
# IV for non-integer dimensions
|
||||
message = (r"Dimensions \[-5 6\] are invalid for a distribution in 4...")
|
||||
with pytest.raises(ValueError, match=message):
|
||||
kde.marginal([1, -5, 6])
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_logpdf_overflow():
|
||||
# regression test for gh-12988; testing against linalg instability for
|
||||
# very high dimensionality kde
|
||||
np.random.seed(1)
|
||||
n_dimensions = 2500
|
||||
n_samples = 5000
|
||||
xn = np.array([np.random.randn(n_samples) + (n) for n in range(
|
||||
0, n_dimensions)])
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
logpdf = gkde.logpdf(np.arange(0, n_dimensions))
|
||||
np.testing.assert_equal(np.isneginf(logpdf[0]), False)
|
||||
np.testing.assert_equal(np.isnan(logpdf[0]), False)
|
||||
|
||||
|
||||
def test_weights_intact():
|
||||
# regression test for gh-9709: weights are not modified
|
||||
np.random.seed(12345)
|
||||
vals = np.random.lognormal(size=100)
|
||||
weights = np.random.choice([1.0, 10.0, 100], size=vals.size)
|
||||
orig_weights = weights.copy()
|
||||
|
||||
stats.gaussian_kde(np.log10(vals), weights=weights)
|
||||
assert_allclose(weights, orig_weights, atol=1e-14, rtol=1e-14)
|
||||
|
||||
|
||||
def test_weights_integer():
|
||||
# integer weights are OK, cf gh-9709 (comment)
|
||||
np.random.seed(12345)
|
||||
values = [0.2, 13.5, 21.0, 75.0, 99.0]
|
||||
weights = [1, 2, 4, 8, 16] # a list of integers
|
||||
pdf_i = stats.gaussian_kde(values, weights=weights)
|
||||
pdf_f = stats.gaussian_kde(values, weights=np.float64(weights))
|
||||
|
||||
xn = [0.3, 11, 88]
|
||||
assert_allclose(pdf_i.evaluate(xn),
|
||||
pdf_f.evaluate(xn), atol=1e-14, rtol=1e-14)
|
||||
|
||||
|
||||
def test_seed():
|
||||
# Test the seed option of the resample method
|
||||
def test_seed_sub(gkde_trail):
|
||||
n_sample = 200
|
||||
# The results should be different without using seed
|
||||
samp1 = gkde_trail.resample(n_sample)
|
||||
samp2 = gkde_trail.resample(n_sample)
|
||||
assert_raises(
|
||||
AssertionError, assert_allclose, samp1, samp2, atol=1e-13
|
||||
)
|
||||
# Use integer seed
|
||||
seed = 831
|
||||
samp1 = gkde_trail.resample(n_sample, seed=seed)
|
||||
samp2 = gkde_trail.resample(n_sample, seed=seed)
|
||||
assert_allclose(samp1, samp2, atol=1e-13)
|
||||
# Use RandomState
|
||||
rstate1 = np.random.RandomState(seed=138)
|
||||
samp1 = gkde_trail.resample(n_sample, seed=rstate1)
|
||||
rstate2 = np.random.RandomState(seed=138)
|
||||
samp2 = gkde_trail.resample(n_sample, seed=rstate2)
|
||||
assert_allclose(samp1, samp2, atol=1e-13)
|
||||
|
||||
# check that np.random.Generator can be used (numpy >= 1.17)
|
||||
if hasattr(np.random, 'default_rng'):
|
||||
# obtain a np.random.Generator object
|
||||
rng = np.random.default_rng(1234)
|
||||
gkde_trail.resample(n_sample, seed=rng)
|
||||
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
wn = np.random.rand(n_basesample)
|
||||
# Test 1D case
|
||||
xn_1d = np.random.randn(n_basesample)
|
||||
|
||||
gkde_1d = stats.gaussian_kde(xn_1d)
|
||||
test_seed_sub(gkde_1d)
|
||||
gkde_1d_weighted = stats.gaussian_kde(xn_1d, weights=wn)
|
||||
test_seed_sub(gkde_1d_weighted)
|
||||
|
||||
# Test 2D case
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
xn_2d = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
|
||||
gkde_2d = stats.gaussian_kde(xn_2d)
|
||||
test_seed_sub(gkde_2d)
|
||||
gkde_2d_weighted = stats.gaussian_kde(xn_2d, weights=wn)
|
||||
test_seed_sub(gkde_2d_weighted)
|
||||
|
||||
|
||||
def test_singular_data_covariance_gh10205():
|
||||
# When the data lie in a lower-dimensional subspace and this causes
|
||||
# and exception, check that the error message is informative.
|
||||
rng = np.random.default_rng(2321583144339784787)
|
||||
mu = np.array([1, 10, 20])
|
||||
sigma = np.array([[4, 10, 0], [10, 25, 0], [0, 0, 100]])
|
||||
data = rng.multivariate_normal(mu, sigma, 1000)
|
||||
try: # doesn't raise any error on some platforms, and that's OK
|
||||
stats.gaussian_kde(data.T)
|
||||
except linalg.LinAlgError:
|
||||
msg = "The data appears to lie in a lower-dimensional subspace..."
|
||||
with assert_raises(linalg.LinAlgError, match=msg):
|
||||
stats.gaussian_kde(data.T)
|
||||
|
||||
|
||||
def test_fewer_points_than_dimensions_gh17436():
|
||||
# When the number of points is fewer than the number of dimensions, the
|
||||
# the covariance matrix would be singular, and the exception tested in
|
||||
# test_singular_data_covariance_gh10205 would occur. However, sometimes
|
||||
# this occurs when the user passes in the transpose of what `gaussian_kde`
|
||||
# expects. This can result in a huge covariance matrix, so bail early.
|
||||
rng = np.random.default_rng(2046127537594925772)
|
||||
rvs = rng.multivariate_normal(np.zeros(3), np.eye(3), size=5)
|
||||
message = "Number of dimensions is greater than number of samples..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.gaussian_kde(rvs)
|
||||
2673
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_morestats.py
vendored
Normal file
2673
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_morestats.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1977
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_mstats_basic.py
vendored
Normal file
1977
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_mstats_basic.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
150
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_mstats_extras.py
vendored
Normal file
150
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_mstats_extras.py
vendored
Normal file
@@ -0,0 +1,150 @@
|
||||
import numpy as np
|
||||
import numpy.ma as ma
|
||||
import scipy.stats.mstats as ms
|
||||
|
||||
from numpy.testing import (assert_equal, assert_almost_equal, assert_,
|
||||
assert_allclose)
|
||||
|
||||
|
||||
def test_compare_medians_ms():
|
||||
x = np.arange(7)
|
||||
y = x + 10
|
||||
assert_almost_equal(ms.compare_medians_ms(x, y), 0)
|
||||
|
||||
y2 = np.linspace(0, 1, num=10)
|
||||
assert_almost_equal(ms.compare_medians_ms(x, y2), 0.017116406778)
|
||||
|
||||
|
||||
def test_hdmedian():
|
||||
# 1-D array
|
||||
x = ma.arange(11)
|
||||
assert_allclose(ms.hdmedian(x), 5, rtol=1e-14)
|
||||
x.mask = ma.make_mask(x)
|
||||
x.mask[:7] = False
|
||||
assert_allclose(ms.hdmedian(x), 3, rtol=1e-14)
|
||||
|
||||
# Check that `var` keyword returns a value. TODO: check whether returned
|
||||
# value is actually correct.
|
||||
assert_(ms.hdmedian(x, var=True).size == 2)
|
||||
|
||||
# 2-D array
|
||||
x2 = ma.arange(22).reshape((11, 2))
|
||||
assert_allclose(ms.hdmedian(x2, axis=0), [10, 11])
|
||||
x2.mask = ma.make_mask(x2)
|
||||
x2.mask[:7, :] = False
|
||||
assert_allclose(ms.hdmedian(x2, axis=0), [6, 7])
|
||||
|
||||
|
||||
def test_rsh():
|
||||
np.random.seed(132345)
|
||||
x = np.random.randn(100)
|
||||
res = ms.rsh(x)
|
||||
# Just a sanity check that the code runs and output shape is correct.
|
||||
# TODO: check that implementation is correct.
|
||||
assert_(res.shape == x.shape)
|
||||
|
||||
# Check points keyword
|
||||
res = ms.rsh(x, points=[0, 1.])
|
||||
assert_(res.size == 2)
|
||||
|
||||
|
||||
def test_mjci():
|
||||
# Tests the Marits-Jarrett estimator
|
||||
data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
|
||||
296,299,306,376,428,515,666,1310,2611])
|
||||
assert_almost_equal(ms.mjci(data),[55.76819,45.84028,198.87875],5)
|
||||
|
||||
|
||||
def test_trimmed_mean_ci():
|
||||
# Tests the confidence intervals of the trimmed mean.
|
||||
data = ma.array([545,555,558,572,575,576,578,580,
|
||||
594,605,635,651,653,661,666])
|
||||
assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
|
||||
assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1),
|
||||
[561.8, 630.6])
|
||||
|
||||
|
||||
def test_idealfourths():
|
||||
# Tests ideal-fourths
|
||||
test = np.arange(100)
|
||||
assert_almost_equal(np.asarray(ms.idealfourths(test)),
|
||||
[24.416667,74.583333],6)
|
||||
test_2D = test.repeat(3).reshape(-1,3)
|
||||
assert_almost_equal(ms.idealfourths(test_2D, axis=0),
|
||||
[[24.416667,24.416667,24.416667],
|
||||
[74.583333,74.583333,74.583333]],6)
|
||||
assert_almost_equal(ms.idealfourths(test_2D, axis=1),
|
||||
test.repeat(2).reshape(-1,2))
|
||||
test = [0, 0]
|
||||
_result = ms.idealfourths(test)
|
||||
assert_(np.isnan(_result).all())
|
||||
|
||||
|
||||
class TestQuantiles:
|
||||
data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
|
||||
0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
|
||||
0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
|
||||
0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
|
||||
0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
|
||||
0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
|
||||
0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
|
||||
0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
|
||||
0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
|
||||
0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
|
||||
0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
|
||||
0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
|
||||
0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
|
||||
0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
|
||||
0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
|
||||
0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
|
||||
0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
|
||||
0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
|
||||
0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
|
||||
0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
|
||||
|
||||
def test_hdquantiles(self):
|
||||
data = self.data
|
||||
assert_almost_equal(ms.hdquantiles(data,[0., 1.]),
|
||||
[0.006514031, 0.995309248])
|
||||
hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75])
|
||||
assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
|
||||
|
||||
data = np.array(data).reshape(10,10)
|
||||
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0)
|
||||
assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75]))
|
||||
assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75]))
|
||||
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True)
|
||||
assert_almost_equal(hdq[...,0],
|
||||
ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True))
|
||||
assert_almost_equal(hdq[...,-1],
|
||||
ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
|
||||
|
||||
def test_hdquantiles_sd(self):
|
||||
# Standard deviation is a jackknife estimator, so we can check if
|
||||
# the efficient version (hdquantiles_sd) matches a rudimentary,
|
||||
# but clear version here.
|
||||
|
||||
hd_std_errs = ms.hdquantiles_sd(self.data)
|
||||
|
||||
# jacknnife standard error, Introduction to the Bootstrap Eq. 11.5
|
||||
n = len(self.data)
|
||||
jdata = np.broadcast_to(self.data, (n, n))
|
||||
jselector = np.logical_not(np.eye(n)) # leave out one sample each row
|
||||
jdata = jdata[jselector].reshape(n, n-1)
|
||||
jdist = ms.hdquantiles(jdata, axis=1)
|
||||
jdist_mean = np.mean(jdist, axis=0)
|
||||
jstd = ((n-1)/n * np.sum((jdist - jdist_mean)**2, axis=0))**.5
|
||||
|
||||
assert_almost_equal(hd_std_errs, jstd)
|
||||
# Test actual values for good measure
|
||||
assert_almost_equal(hd_std_errs, [0.0379258, 0.0380656, 0.0380013])
|
||||
|
||||
two_data_points = ms.hdquantiles_sd([1, 2])
|
||||
assert_almost_equal(two_data_points, [0.5, 0.5, 0.5])
|
||||
|
||||
def test_mquantiles_cimj(self):
|
||||
# Only test that code runs, implementation not checked for correctness
|
||||
ci_lower, ci_upper = ms.mquantiles_cimj(self.data)
|
||||
assert_(ci_lower.size == ci_upper.size == 3)
|
||||
|
||||
|
||||
2905
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_multivariate.py
vendored
Normal file
2905
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_multivariate.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
147
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_odds_ratio.py
vendored
Normal file
147
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_odds_ratio.py
vendored
Normal file
@@ -0,0 +1,147 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_allclose
|
||||
from .._discrete_distns import nchypergeom_fisher, hypergeom
|
||||
from scipy.stats._odds_ratio import odds_ratio
|
||||
from .data.fisher_exact_results_from_r import data
|
||||
|
||||
|
||||
class TestOddsRatio:
|
||||
|
||||
@pytest.mark.parametrize('parameters, rresult', data)
|
||||
def test_results_from_r(self, parameters, rresult):
|
||||
alternative = parameters.alternative.replace('.', '-')
|
||||
result = odds_ratio(parameters.table)
|
||||
# The results computed by R are not very accurate.
|
||||
if result.statistic < 400:
|
||||
or_rtol = 5e-4
|
||||
ci_rtol = 2e-2
|
||||
else:
|
||||
or_rtol = 5e-2
|
||||
ci_rtol = 1e-1
|
||||
assert_allclose(result.statistic,
|
||||
rresult.conditional_odds_ratio, rtol=or_rtol)
|
||||
ci = result.confidence_interval(parameters.confidence_level,
|
||||
alternative)
|
||||
assert_allclose((ci.low, ci.high), rresult.conditional_odds_ratio_ci,
|
||||
rtol=ci_rtol)
|
||||
|
||||
# Also do a self-check for the conditional odds ratio.
|
||||
# With the computed conditional odds ratio as the noncentrality
|
||||
# parameter of the noncentral hypergeometric distribution with
|
||||
# parameters table.sum(), table[0].sum(), and table[:,0].sum() as
|
||||
# total, ngood and nsample, respectively, the mean of the distribution
|
||||
# should equal table[0, 0].
|
||||
cor = result.statistic
|
||||
table = np.array(parameters.table)
|
||||
total = table.sum()
|
||||
ngood = table[0].sum()
|
||||
nsample = table[:, 0].sum()
|
||||
# nchypergeom_fisher does not allow the edge cases where the
|
||||
# noncentrality parameter is 0 or inf, so handle those values
|
||||
# separately here.
|
||||
if cor == 0:
|
||||
nchg_mean = hypergeom.support(total, ngood, nsample)[0]
|
||||
elif cor == np.inf:
|
||||
nchg_mean = hypergeom.support(total, ngood, nsample)[1]
|
||||
else:
|
||||
nchg_mean = nchypergeom_fisher.mean(total, ngood, nsample, cor)
|
||||
assert_allclose(nchg_mean, table[0, 0], rtol=1e-13)
|
||||
|
||||
# Check that the confidence interval is correct.
|
||||
alpha = 1 - parameters.confidence_level
|
||||
if alternative == 'two-sided':
|
||||
if ci.low > 0:
|
||||
sf = nchypergeom_fisher.sf(table[0, 0] - 1,
|
||||
total, ngood, nsample, ci.low)
|
||||
assert_allclose(sf, alpha/2, rtol=1e-11)
|
||||
if np.isfinite(ci.high):
|
||||
cdf = nchypergeom_fisher.cdf(table[0, 0],
|
||||
total, ngood, nsample, ci.high)
|
||||
assert_allclose(cdf, alpha/2, rtol=1e-11)
|
||||
elif alternative == 'less':
|
||||
if np.isfinite(ci.high):
|
||||
cdf = nchypergeom_fisher.cdf(table[0, 0],
|
||||
total, ngood, nsample, ci.high)
|
||||
assert_allclose(cdf, alpha, rtol=1e-11)
|
||||
else:
|
||||
# alternative == 'greater'
|
||||
if ci.low > 0:
|
||||
sf = nchypergeom_fisher.sf(table[0, 0] - 1,
|
||||
total, ngood, nsample, ci.low)
|
||||
assert_allclose(sf, alpha, rtol=1e-11)
|
||||
|
||||
@pytest.mark.parametrize('table', [
|
||||
[[0, 0], [5, 10]],
|
||||
[[5, 10], [0, 0]],
|
||||
[[0, 5], [0, 10]],
|
||||
[[5, 0], [10, 0]],
|
||||
])
|
||||
def test_row_or_col_zero(self, table):
|
||||
result = odds_ratio(table)
|
||||
assert_equal(result.statistic, np.nan)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (0, np.inf))
|
||||
|
||||
@pytest.mark.parametrize("case",
|
||||
[[0.95, 'two-sided', 0.4879913, 2.635883],
|
||||
[0.90, 'two-sided', 0.5588516, 2.301663]])
|
||||
def test_sample_odds_ratio_ci(self, case):
|
||||
# Compare the sample odds ratio confidence interval to the R function
|
||||
# oddsratio.wald from the epitools package, e.g.
|
||||
# > library(epitools)
|
||||
# > table = matrix(c(10, 20, 41, 93), nrow=2, ncol=2, byrow=TRUE)
|
||||
# > result = oddsratio.wald(table)
|
||||
# > result$measure
|
||||
# odds ratio with 95% C.I.
|
||||
# Predictor estimate lower upper
|
||||
# Exposed1 1.000000 NA NA
|
||||
# Exposed2 1.134146 0.4879913 2.635883
|
||||
|
||||
confidence_level, alternative, ref_low, ref_high = case
|
||||
table = [[10, 20], [41, 93]]
|
||||
result = odds_ratio(table, kind='sample')
|
||||
assert_allclose(result.statistic, 1.134146, rtol=1e-6)
|
||||
ci = result.confidence_interval(confidence_level, alternative)
|
||||
assert_allclose([ci.low, ci.high], [ref_low, ref_high], rtol=1e-6)
|
||||
|
||||
@pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided'])
|
||||
def test_sample_odds_ratio_one_sided_ci(self, alternative):
|
||||
# can't find a good reference for one-sided CI, so bump up the sample
|
||||
# size and compare against the conditional odds ratio CI
|
||||
table = [[1000, 2000], [4100, 9300]]
|
||||
res = odds_ratio(table, kind='sample')
|
||||
ref = odds_ratio(table, kind='conditional')
|
||||
assert_allclose(res.statistic, ref.statistic, atol=1e-5)
|
||||
assert_allclose(res.confidence_interval(alternative=alternative),
|
||||
ref.confidence_interval(alternative=alternative),
|
||||
atol=2e-3)
|
||||
|
||||
@pytest.mark.parametrize('kind', ['sample', 'conditional'])
|
||||
@pytest.mark.parametrize('bad_table', [123, "foo", [10, 11, 12]])
|
||||
def test_invalid_table_shape(self, kind, bad_table):
|
||||
with pytest.raises(ValueError, match="Invalid shape"):
|
||||
odds_ratio(bad_table, kind=kind)
|
||||
|
||||
def test_invalid_table_type(self):
|
||||
with pytest.raises(ValueError, match='must be an array of integers'):
|
||||
odds_ratio([[1.0, 3.4], [5.0, 9.9]])
|
||||
|
||||
def test_negative_table_values(self):
|
||||
with pytest.raises(ValueError, match='must be nonnegative'):
|
||||
odds_ratio([[1, 2], [3, -4]])
|
||||
|
||||
def test_invalid_kind(self):
|
||||
with pytest.raises(ValueError, match='`kind` must be'):
|
||||
odds_ratio([[10, 20], [30, 14]], kind='magnetoreluctance')
|
||||
|
||||
def test_invalid_alternative(self):
|
||||
result = odds_ratio([[5, 10], [2, 32]])
|
||||
with pytest.raises(ValueError, match='`alternative` must be'):
|
||||
result.confidence_interval(alternative='depleneration')
|
||||
|
||||
@pytest.mark.parametrize('level', [-0.5, 1.5])
|
||||
def test_invalid_confidence_level(self, level):
|
||||
result = odds_ratio([[5, 10], [2, 32]])
|
||||
with pytest.raises(ValueError, match='must be between 0 and 1'):
|
||||
result.confidence_interval(confidence_level=level)
|
||||
1326
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_qmc.py
vendored
Normal file
1326
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_qmc.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
320
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_rank.py
vendored
Normal file
320
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_rank.py
vendored
Normal file
@@ -0,0 +1,320 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_array_equal
|
||||
|
||||
from scipy.stats import rankdata, tiecorrect
|
||||
import pytest
|
||||
|
||||
|
||||
class TestTieCorrect:
|
||||
|
||||
def test_empty(self):
|
||||
"""An empty array requires no correction, should return 1.0."""
|
||||
ranks = np.array([], dtype=np.float64)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_one(self):
|
||||
"""A single element requires no correction, should return 1.0."""
|
||||
ranks = np.array([1.0], dtype=np.float64)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_no_correction(self):
|
||||
"""Arrays with no ties require no correction."""
|
||||
ranks = np.arange(2.0)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
ranks = np.arange(3.0)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_basic(self):
|
||||
"""Check a few basic examples of the tie correction factor."""
|
||||
# One tie of two elements
|
||||
ranks = np.array([1.0, 2.5, 2.5])
|
||||
c = tiecorrect(ranks)
|
||||
T = 2.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# One tie of two elements (same as above, but tie is not at the end)
|
||||
ranks = np.array([1.5, 1.5, 3.0])
|
||||
c = tiecorrect(ranks)
|
||||
T = 2.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# One tie of three elements
|
||||
ranks = np.array([1.0, 3.0, 3.0, 3.0])
|
||||
c = tiecorrect(ranks)
|
||||
T = 3.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# Two ties, lengths 2 and 3.
|
||||
ranks = np.array([1.5, 1.5, 4.0, 4.0, 4.0])
|
||||
c = tiecorrect(ranks)
|
||||
T1 = 2.0
|
||||
T2 = 3.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - ((T1**3 - T1) + (T2**3 - T2)) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
def test_overflow(self):
|
||||
ntie, k = 2000, 5
|
||||
a = np.repeat(np.arange(k), ntie)
|
||||
n = a.size # ntie * k
|
||||
out = tiecorrect(rankdata(a))
|
||||
assert_equal(out, 1.0 - k * (ntie**3 - ntie) / float(n**3 - n))
|
||||
|
||||
|
||||
class TestRankData:
|
||||
|
||||
def test_empty(self):
|
||||
"""stats.rankdata([]) should return an empty array."""
|
||||
a = np.array([], dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, np.array([], dtype=np.float64))
|
||||
r = rankdata([])
|
||||
assert_array_equal(r, np.array([], dtype=np.float64))
|
||||
|
||||
def test_one(self):
|
||||
"""Check stats.rankdata with an array of length 1."""
|
||||
data = [100]
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, np.array([1.0], dtype=np.float64))
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, np.array([1.0], dtype=np.float64))
|
||||
|
||||
def test_basic(self):
|
||||
"""Basic tests of stats.rankdata."""
|
||||
data = [100, 10, 50]
|
||||
expected = np.array([3.0, 1.0, 2.0], dtype=np.float64)
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, expected)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, expected)
|
||||
|
||||
data = [40, 10, 30, 10, 50]
|
||||
expected = np.array([4.0, 1.5, 3.0, 1.5, 5.0], dtype=np.float64)
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, expected)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, expected)
|
||||
|
||||
data = [20, 20, 20, 10, 10, 10]
|
||||
expected = np.array([5.0, 5.0, 5.0, 2.0, 2.0, 2.0], dtype=np.float64)
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, expected)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, expected)
|
||||
# The docstring states explicitly that the argument is flattened.
|
||||
a2d = a.reshape(2, 3)
|
||||
r = rankdata(a2d)
|
||||
assert_array_equal(r, expected)
|
||||
|
||||
def test_rankdata_object_string(self):
|
||||
min_rank = lambda a: [1 + sum(i < j for i in a) for j in a]
|
||||
max_rank = lambda a: [sum(i <= j for i in a) for j in a]
|
||||
ordinal_rank = lambda a: min_rank([(x, i) for i, x in enumerate(a)])
|
||||
|
||||
def average_rank(a):
|
||||
return [(i + j) / 2.0 for i, j in zip(min_rank(a), max_rank(a))]
|
||||
|
||||
def dense_rank(a):
|
||||
b = np.unique(a)
|
||||
return [1 + sum(i < j for i in b) for j in a]
|
||||
|
||||
rankf = dict(min=min_rank, max=max_rank, ordinal=ordinal_rank,
|
||||
average=average_rank, dense=dense_rank)
|
||||
|
||||
def check_ranks(a):
|
||||
for method in 'min', 'max', 'dense', 'ordinal', 'average':
|
||||
out = rankdata(a, method=method)
|
||||
assert_array_equal(out, rankf[method](a))
|
||||
|
||||
val = ['foo', 'bar', 'qux', 'xyz', 'abc', 'efg', 'ace', 'qwe', 'qaz']
|
||||
check_ranks(np.random.choice(val, 200))
|
||||
check_ranks(np.random.choice(val, 200).astype('object'))
|
||||
|
||||
val = np.array([0, 1, 2, 2.718, 3, 3.141], dtype='object')
|
||||
check_ranks(np.random.choice(val, 200).astype('object'))
|
||||
|
||||
def test_large_int(self):
|
||||
data = np.array([2**60, 2**60+1], dtype=np.uint64)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, [1.0, 2.0])
|
||||
|
||||
data = np.array([2**60, 2**60+1], dtype=np.int64)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, [1.0, 2.0])
|
||||
|
||||
data = np.array([2**60, -2**60+1], dtype=np.int64)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, [2.0, 1.0])
|
||||
|
||||
def test_big_tie(self):
|
||||
for n in [10000, 100000, 1000000]:
|
||||
data = np.ones(n, dtype=int)
|
||||
r = rankdata(data)
|
||||
expected_rank = 0.5 * (n + 1)
|
||||
assert_array_equal(r, expected_rank * data,
|
||||
"test failed with n=%d" % n)
|
||||
|
||||
def test_axis(self):
|
||||
data = [[0, 2, 1],
|
||||
[4, 2, 2]]
|
||||
expected0 = [[1., 1.5, 1.],
|
||||
[2., 1.5, 2.]]
|
||||
r0 = rankdata(data, axis=0)
|
||||
assert_array_equal(r0, expected0)
|
||||
expected1 = [[1., 3., 2.],
|
||||
[3., 1.5, 1.5]]
|
||||
r1 = rankdata(data, axis=1)
|
||||
assert_array_equal(r1, expected1)
|
||||
|
||||
methods = ["average", "min", "max", "dense", "ordinal"]
|
||||
dtypes = [np.float64] + [np.int_]*4
|
||||
|
||||
@pytest.mark.parametrize("axis", [0, 1])
|
||||
@pytest.mark.parametrize("method, dtype", zip(methods, dtypes))
|
||||
def test_size_0_axis(self, axis, method, dtype):
|
||||
shape = (3, 0)
|
||||
data = np.zeros(shape)
|
||||
r = rankdata(data, method=method, axis=axis)
|
||||
assert_equal(r.shape, shape)
|
||||
assert_equal(r.dtype, dtype)
|
||||
|
||||
@pytest.mark.parametrize('axis', range(3))
|
||||
@pytest.mark.parametrize('method', methods)
|
||||
def test_nan_policy_omit_3d(self, axis, method):
|
||||
shape = (20, 21, 22)
|
||||
rng = np.random.default_rng(abs(hash('falafel')))
|
||||
|
||||
a = rng.random(size=shape)
|
||||
i = rng.random(size=shape) < 0.4
|
||||
j = rng.random(size=shape) < 0.1
|
||||
k = rng.random(size=shape) < 0.1
|
||||
a[i] = np.nan
|
||||
a[j] = -np.inf
|
||||
a[k] - np.inf
|
||||
|
||||
def rank_1d_omit(a, method):
|
||||
out = np.zeros_like(a)
|
||||
i = np.isnan(a)
|
||||
a_compressed = a[~i]
|
||||
res = rankdata(a_compressed, method)
|
||||
out[~i] = res
|
||||
out[i] = np.nan
|
||||
return out
|
||||
|
||||
def rank_omit(a, method, axis):
|
||||
return np.apply_along_axis(lambda a: rank_1d_omit(a, method),
|
||||
axis, a)
|
||||
|
||||
res = rankdata(a, method, axis=axis, nan_policy='omit')
|
||||
res0 = rank_omit(a, method, axis=axis)
|
||||
|
||||
assert_array_equal(res, res0)
|
||||
|
||||
def test_nan_policy_2d_axis_none(self):
|
||||
# 2 2d-array test with axis=None
|
||||
data = [[0, np.nan, 3],
|
||||
[4, 2, np.nan],
|
||||
[1, 2, 2]]
|
||||
assert_array_equal(rankdata(data, axis=None, nan_policy='omit'),
|
||||
[1., np.nan, 6., 7., 4., np.nan, 2., 4., 4.])
|
||||
assert_array_equal(rankdata(data, axis=None, nan_policy='propagate'),
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
|
||||
np.nan, np.nan, np.nan])
|
||||
|
||||
def test_nan_policy_raise(self):
|
||||
# 1 1d-array test
|
||||
data = [0, 2, 3, -2, np.nan, np.nan]
|
||||
with pytest.raises(ValueError, match="The input contains nan"):
|
||||
rankdata(data, nan_policy='raise')
|
||||
|
||||
# 2 2d-array test
|
||||
data = [[0, np.nan, 3],
|
||||
[4, 2, np.nan],
|
||||
[np.nan, 2, 2]]
|
||||
|
||||
with pytest.raises(ValueError, match="The input contains nan"):
|
||||
rankdata(data, axis=0, nan_policy="raise")
|
||||
|
||||
with pytest.raises(ValueError, match="The input contains nan"):
|
||||
rankdata(data, axis=1, nan_policy="raise")
|
||||
|
||||
def test_nan_policy_propagate(self):
|
||||
# 1 1d-array test
|
||||
data = [0, 2, 3, -2, np.nan, np.nan]
|
||||
assert_array_equal(rankdata(data, nan_policy='propagate'),
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan])
|
||||
|
||||
# 2 2d-array test
|
||||
data = [[0, np.nan, 3],
|
||||
[4, 2, np.nan],
|
||||
[1, 2, 2]]
|
||||
assert_array_equal(rankdata(data, axis=0, nan_policy='propagate'),
|
||||
[[1, np.nan, np.nan],
|
||||
[3, np.nan, np.nan],
|
||||
[2, np.nan, np.nan]])
|
||||
assert_array_equal(rankdata(data, axis=1, nan_policy='propagate'),
|
||||
[[np.nan, np.nan, np.nan],
|
||||
[np.nan, np.nan, np.nan],
|
||||
[1, 2.5, 2.5]])
|
||||
|
||||
|
||||
_cases = (
|
||||
# values, method, expected
|
||||
([], 'average', []),
|
||||
([], 'min', []),
|
||||
([], 'max', []),
|
||||
([], 'dense', []),
|
||||
([], 'ordinal', []),
|
||||
#
|
||||
([100], 'average', [1.0]),
|
||||
([100], 'min', [1.0]),
|
||||
([100], 'max', [1.0]),
|
||||
([100], 'dense', [1.0]),
|
||||
([100], 'ordinal', [1.0]),
|
||||
#
|
||||
([100, 100, 100], 'average', [2.0, 2.0, 2.0]),
|
||||
([100, 100, 100], 'min', [1.0, 1.0, 1.0]),
|
||||
([100, 100, 100], 'max', [3.0, 3.0, 3.0]),
|
||||
([100, 100, 100], 'dense', [1.0, 1.0, 1.0]),
|
||||
([100, 100, 100], 'ordinal', [1.0, 2.0, 3.0]),
|
||||
#
|
||||
([100, 300, 200], 'average', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'min', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'max', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'dense', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'ordinal', [1.0, 3.0, 2.0]),
|
||||
#
|
||||
([100, 200, 300, 200], 'average', [1.0, 2.5, 4.0, 2.5]),
|
||||
([100, 200, 300, 200], 'min', [1.0, 2.0, 4.0, 2.0]),
|
||||
([100, 200, 300, 200], 'max', [1.0, 3.0, 4.0, 3.0]),
|
||||
([100, 200, 300, 200], 'dense', [1.0, 2.0, 3.0, 2.0]),
|
||||
([100, 200, 300, 200], 'ordinal', [1.0, 2.0, 4.0, 3.0]),
|
||||
#
|
||||
([100, 200, 300, 200, 100], 'average', [1.5, 3.5, 5.0, 3.5, 1.5]),
|
||||
([100, 200, 300, 200, 100], 'min', [1.0, 3.0, 5.0, 3.0, 1.0]),
|
||||
([100, 200, 300, 200, 100], 'max', [2.0, 4.0, 5.0, 4.0, 2.0]),
|
||||
([100, 200, 300, 200, 100], 'dense', [1.0, 2.0, 3.0, 2.0, 1.0]),
|
||||
([100, 200, 300, 200, 100], 'ordinal', [1.0, 3.0, 5.0, 4.0, 2.0]),
|
||||
#
|
||||
([10] * 30, 'ordinal', np.arange(1.0, 31.0)),
|
||||
)
|
||||
|
||||
|
||||
def test_cases():
|
||||
for values, method, expected in _cases:
|
||||
r = rankdata(values, method=method)
|
||||
assert_array_equal(r, expected)
|
||||
96
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_relative_risk.py
vendored
Normal file
96
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_relative_risk.py
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
from scipy.stats.contingency import relative_risk
|
||||
|
||||
|
||||
# Test just the calculation of the relative risk, including edge
|
||||
# cases that result in a relative risk of 0, inf or nan.
|
||||
@pytest.mark.parametrize(
|
||||
'exposed_cases, exposed_total, control_cases, control_total, expected_rr',
|
||||
[(1, 4, 3, 8, 0.25 / 0.375),
|
||||
(0, 10, 5, 20, 0),
|
||||
(0, 10, 0, 20, np.nan),
|
||||
(5, 15, 0, 20, np.inf)]
|
||||
)
|
||||
def test_relative_risk(exposed_cases, exposed_total,
|
||||
control_cases, control_total, expected_rr):
|
||||
result = relative_risk(exposed_cases, exposed_total,
|
||||
control_cases, control_total)
|
||||
assert_allclose(result.relative_risk, expected_rr, rtol=1e-13)
|
||||
|
||||
|
||||
def test_relative_risk_confidence_interval():
|
||||
result = relative_risk(exposed_cases=16, exposed_total=128,
|
||||
control_cases=24, control_total=256)
|
||||
rr = result.relative_risk
|
||||
ci = result.confidence_interval(confidence_level=0.95)
|
||||
# The corresponding calculation in R using the epitools package.
|
||||
#
|
||||
# > library(epitools)
|
||||
# > c <- matrix(c(232, 112, 24, 16), nrow=2)
|
||||
# > result <- riskratio(c)
|
||||
# > result$measure
|
||||
# risk ratio with 95% C.I.
|
||||
# Predictor estimate lower upper
|
||||
# Exposed1 1.000000 NA NA
|
||||
# Exposed2 1.333333 0.7347317 2.419628
|
||||
#
|
||||
# The last line is the result that we want.
|
||||
assert_allclose(rr, 4/3)
|
||||
assert_allclose((ci.low, ci.high), (0.7347317, 2.419628), rtol=5e-7)
|
||||
|
||||
|
||||
def test_relative_risk_ci_conflevel0():
|
||||
result = relative_risk(exposed_cases=4, exposed_total=12,
|
||||
control_cases=5, control_total=30)
|
||||
rr = result.relative_risk
|
||||
assert_allclose(rr, 2.0, rtol=1e-14)
|
||||
ci = result.confidence_interval(0)
|
||||
assert_allclose((ci.low, ci.high), (2.0, 2.0), rtol=1e-12)
|
||||
|
||||
|
||||
def test_relative_risk_ci_conflevel1():
|
||||
result = relative_risk(exposed_cases=4, exposed_total=12,
|
||||
control_cases=5, control_total=30)
|
||||
ci = result.confidence_interval(1)
|
||||
assert_equal((ci.low, ci.high), (0, np.inf))
|
||||
|
||||
|
||||
def test_relative_risk_ci_edge_cases_00():
|
||||
result = relative_risk(exposed_cases=0, exposed_total=12,
|
||||
control_cases=0, control_total=30)
|
||||
assert_equal(result.relative_risk, np.nan)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (np.nan, np.nan))
|
||||
|
||||
|
||||
def test_relative_risk_ci_edge_cases_01():
|
||||
result = relative_risk(exposed_cases=0, exposed_total=12,
|
||||
control_cases=1, control_total=30)
|
||||
assert_equal(result.relative_risk, 0)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (0.0, np.nan))
|
||||
|
||||
|
||||
def test_relative_risk_ci_edge_cases_10():
|
||||
result = relative_risk(exposed_cases=1, exposed_total=12,
|
||||
control_cases=0, control_total=30)
|
||||
assert_equal(result.relative_risk, np.inf)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (np.nan, np.inf))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ec, et, cc, ct', [(0, 0, 10, 20),
|
||||
(-1, 10, 1, 5),
|
||||
(1, 10, 0, 0),
|
||||
(1, 10, -1, 4)])
|
||||
def test_relative_risk_bad_value(ec, et, cc, ct):
|
||||
with pytest.raises(ValueError, match="must be an integer not less than"):
|
||||
relative_risk(ec, et, cc, ct)
|
||||
|
||||
|
||||
def test_relative_risk_bad_type():
|
||||
with pytest.raises(TypeError, match="must be an integer"):
|
||||
relative_risk(1, 10, 2.0, 40)
|
||||
1651
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_resampling.py
vendored
Normal file
1651
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_resampling.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1357
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_sampling.py
vendored
Normal file
1357
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_sampling.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
8173
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_stats.py
vendored
Normal file
8173
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_stats.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
86
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_tukeylambda_stats.py
vendored
Normal file
86
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_tukeylambda_stats.py
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
|
||||
from scipy.stats._tukeylambda_stats import (tukeylambda_variance,
|
||||
tukeylambda_kurtosis)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_known_exact():
|
||||
"""Compare results with some known exact formulas."""
|
||||
# Some exact values of the Tukey Lambda variance and kurtosis:
|
||||
# lambda var kurtosis
|
||||
# 0 pi**2/3 6/5 (logistic distribution)
|
||||
# 0.5 4 - pi (5/3 - pi/2)/(pi/4 - 1)**2 - 3
|
||||
# 1 1/3 -6/5 (uniform distribution on (-1,1))
|
||||
# 2 1/12 -6/5 (uniform distribution on (-1/2, 1/2))
|
||||
|
||||
# lambda = 0
|
||||
var = tukeylambda_variance(0)
|
||||
assert_allclose(var, np.pi**2 / 3, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(0)
|
||||
assert_allclose(kurt, 1.2, atol=1e-10)
|
||||
|
||||
# lambda = 0.5
|
||||
var = tukeylambda_variance(0.5)
|
||||
assert_allclose(var, 4 - np.pi, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(0.5)
|
||||
desired = (5./3 - np.pi/2) / (np.pi/4 - 1)**2 - 3
|
||||
assert_allclose(kurt, desired, atol=1e-10)
|
||||
|
||||
# lambda = 1
|
||||
var = tukeylambda_variance(1)
|
||||
assert_allclose(var, 1.0 / 3, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(1)
|
||||
assert_allclose(kurt, -1.2, atol=1e-10)
|
||||
|
||||
# lambda = 2
|
||||
var = tukeylambda_variance(2)
|
||||
assert_allclose(var, 1.0 / 12, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(2)
|
||||
assert_allclose(kurt, -1.2, atol=1e-10)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_mpmath():
|
||||
"""Compare results with some values that were computed using mpmath."""
|
||||
a10 = dict(atol=1e-10, rtol=0)
|
||||
a12 = dict(atol=1e-12, rtol=0)
|
||||
data = [
|
||||
# lambda variance kurtosis
|
||||
[-0.1, 4.78050217874253547, 3.78559520346454510],
|
||||
[-0.0649, 4.16428023599895777, 2.52019675947435718],
|
||||
[-0.05, 3.93672267890775277, 2.13129793057777277],
|
||||
[-0.001, 3.30128380390964882, 1.21452460083542988],
|
||||
[0.001, 3.27850775649572176, 1.18560634779287585],
|
||||
[0.03125, 2.95927803254615800, 0.804487555161819980],
|
||||
[0.05, 2.78281053405464501, 0.611604043886644327],
|
||||
[0.0649, 2.65282386754100551, 0.476834119532774540],
|
||||
[1.2, 0.242153920578588346, -1.23428047169049726],
|
||||
[10.0, 0.00095237579757703597, 2.37810697355144933],
|
||||
[20.0, 0.00012195121951131043, 7.37654321002709531],
|
||||
]
|
||||
|
||||
for lam, var_expected, kurt_expected in data:
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_allclose(var, var_expected, **a12)
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_allclose(kurt, kurt_expected, **a10)
|
||||
|
||||
# Test with vector arguments (most of the other tests are for single
|
||||
# values).
|
||||
lam, var_expected, kurt_expected = zip(*data)
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_allclose(var, var_expected, **a12)
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_allclose(kurt, kurt_expected, **a10)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_invalid():
|
||||
"""Test values of lambda outside the domains of the functions."""
|
||||
lam = [-1.0, -0.5]
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_equal(var, np.array([np.nan, np.inf]))
|
||||
|
||||
lam = [-1.0, -0.25]
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_equal(kurt, np.array([np.nan, np.inf]))
|
||||
|
||||
158
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_variation.py
vendored
Normal file
158
.CondaPkg/env/Lib/site-packages/scipy/stats/tests/test_variation.py
vendored
Normal file
@@ -0,0 +1,158 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_allclose
|
||||
import pytest
|
||||
from scipy.stats import variation
|
||||
|
||||
|
||||
class TestVariation:
|
||||
"""
|
||||
Test class for scipy.stats.variation
|
||||
"""
|
||||
|
||||
def test_ddof(self):
|
||||
x = np.arange(9.0)
|
||||
assert_allclose(variation(x, ddof=1), np.sqrt(60/8)/4)
|
||||
|
||||
@pytest.mark.parametrize('sgn', [1, -1])
|
||||
def test_sign(self, sgn):
|
||||
x = np.array([1, 2, 3, 4, 5])
|
||||
v = variation(sgn*x)
|
||||
expected = sgn*np.sqrt(2)/3
|
||||
assert_allclose(v, expected, rtol=1e-10)
|
||||
|
||||
def test_scalar(self):
|
||||
# A scalar is treated like a 1-d sequence with length 1.
|
||||
assert_equal(variation(4.0), 0.0)
|
||||
|
||||
@pytest.mark.parametrize('nan_policy, expected',
|
||||
[('propagate', np.nan),
|
||||
('omit', np.sqrt(20/3)/4)])
|
||||
def test_variation_nan(self, nan_policy, expected):
|
||||
x = np.arange(10.)
|
||||
x[9] = np.nan
|
||||
assert_allclose(variation(x, nan_policy=nan_policy), expected)
|
||||
|
||||
def test_nan_policy_raise(self):
|
||||
x = np.array([1.0, 2.0, np.nan, 3.0])
|
||||
with pytest.raises(ValueError, match='input contains nan'):
|
||||
variation(x, nan_policy='raise')
|
||||
|
||||
def test_bad_nan_policy(self):
|
||||
with pytest.raises(ValueError, match='must be one of'):
|
||||
variation([1, 2, 3], nan_policy='foobar')
|
||||
|
||||
def test_keepdims(self):
|
||||
x = np.arange(10).reshape(2, 5)
|
||||
y = variation(x, axis=1, keepdims=True)
|
||||
expected = np.array([[np.sqrt(2)/2],
|
||||
[np.sqrt(2)/7]])
|
||||
assert_allclose(y, expected)
|
||||
|
||||
@pytest.mark.parametrize('axis, expected',
|
||||
[(0, np.empty((1, 0))),
|
||||
(1, np.full((5, 1), fill_value=np.nan))])
|
||||
def test_keepdims_size0(self, axis, expected):
|
||||
x = np.zeros((5, 0))
|
||||
y = variation(x, axis=axis, keepdims=True)
|
||||
assert_equal(y, expected)
|
||||
|
||||
@pytest.mark.parametrize('incr, expected_fill', [(0, np.inf), (1, np.nan)])
|
||||
def test_keepdims_and_ddof_eq_len_plus_incr(self, incr, expected_fill):
|
||||
x = np.array([[1, 1, 2, 2], [1, 2, 3, 3]])
|
||||
y = variation(x, axis=1, ddof=x.shape[1] + incr, keepdims=True)
|
||||
assert_equal(y, np.full((2, 1), fill_value=expected_fill))
|
||||
|
||||
def test_propagate_nan(self):
|
||||
# Check that the shape of the result is the same for inputs
|
||||
# with and without nans, cf gh-5817
|
||||
a = np.arange(8).reshape(2, -1).astype(float)
|
||||
a[1, 0] = np.nan
|
||||
v = variation(a, axis=1, nan_policy="propagate")
|
||||
assert_allclose(v, [np.sqrt(5/4)/1.5, np.nan], atol=1e-15)
|
||||
|
||||
def test_axis_none(self):
|
||||
# Check that `variation` computes the result on the flattened
|
||||
# input when axis is None.
|
||||
y = variation([[0, 1], [2, 3]], axis=None)
|
||||
assert_allclose(y, np.sqrt(5/4)/1.5)
|
||||
|
||||
def test_bad_axis(self):
|
||||
# Check that an invalid axis raises np.AxisError.
|
||||
x = np.array([[1, 2, 3], [4, 5, 6]])
|
||||
with pytest.raises(np.AxisError):
|
||||
variation(x, axis=10)
|
||||
|
||||
def test_mean_zero(self):
|
||||
# Check that `variation` returns inf for a sequence that is not
|
||||
# identically zero but whose mean is zero.
|
||||
x = np.array([10, -3, 1, -4, -4])
|
||||
y = variation(x)
|
||||
assert_equal(y, np.inf)
|
||||
|
||||
x2 = np.array([x, -10*x])
|
||||
y2 = variation(x2, axis=1)
|
||||
assert_equal(y2, [np.inf, np.inf])
|
||||
|
||||
@pytest.mark.parametrize('x', [np.zeros(5), [], [1, 2, np.inf, 9]])
|
||||
def test_return_nan(self, x):
|
||||
# Test some cases where `variation` returns nan.
|
||||
y = variation(x)
|
||||
assert_equal(y, np.nan)
|
||||
|
||||
@pytest.mark.parametrize('axis, expected',
|
||||
[(0, []), (1, [np.nan]*3), (None, np.nan)])
|
||||
def test_2d_size_zero_with_axis(self, axis, expected):
|
||||
x = np.empty((3, 0))
|
||||
y = variation(x, axis=axis)
|
||||
assert_equal(y, expected)
|
||||
|
||||
def test_neg_inf(self):
|
||||
# Edge case that produces -inf: ddof equals the number of non-nan
|
||||
# values, the values are not constant, and the mean is negative.
|
||||
x1 = np.array([-3, -5])
|
||||
assert_equal(variation(x1, ddof=2), -np.inf)
|
||||
|
||||
x2 = np.array([[np.nan, 1, -10, np.nan],
|
||||
[-20, -3, np.nan, np.nan]])
|
||||
assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
|
||||
[-np.inf, -np.inf])
|
||||
|
||||
@pytest.mark.parametrize("nan_policy", ['propagate', 'omit'])
|
||||
def test_combined_edge_cases(self, nan_policy):
|
||||
x = np.array([[0, 10, np.nan, 1],
|
||||
[0, -5, np.nan, 2],
|
||||
[0, -5, np.nan, 3]])
|
||||
y = variation(x, axis=0, nan_policy=nan_policy)
|
||||
assert_allclose(y, [np.nan, np.inf, np.nan, np.sqrt(2/3)/2])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'ddof, expected',
|
||||
[(0, [np.sqrt(1/6), np.sqrt(5/8), np.inf, 0, np.nan, 0.0, np.nan]),
|
||||
(1, [0.5, np.sqrt(5/6), np.inf, 0, np.nan, 0, np.nan]),
|
||||
(2, [np.sqrt(0.5), np.sqrt(5/4), np.inf, np.nan, np.nan, 0, np.nan])]
|
||||
)
|
||||
def test_more_nan_policy_omit_tests(self, ddof, expected):
|
||||
# The slightly strange formatting in the follow array is my attempt to
|
||||
# maintain a clean tabular arrangement of the data while satisfying
|
||||
# the demands of pycodestyle. Currently, E201 and E241 are not
|
||||
# disabled by the `# noqa` annotation.
|
||||
nan = np.nan
|
||||
x = np.array([[1.0, 2.0, nan, 3.0],
|
||||
[0.0, 4.0, 3.0, 1.0],
|
||||
[nan, -.5, 0.5, nan],
|
||||
[nan, 9.0, 9.0, nan],
|
||||
[nan, nan, nan, nan],
|
||||
[3.0, 3.0, 3.0, 3.0],
|
||||
[0.0, 0.0, 0.0, 0.0]])
|
||||
v = variation(x, axis=1, ddof=ddof, nan_policy='omit')
|
||||
assert_allclose(v, expected)
|
||||
|
||||
def test_variation_ddof(self):
|
||||
# test variation with delta degrees of freedom
|
||||
# regression test for gh-13341
|
||||
a = np.array([1, 2, 3, 4, 5])
|
||||
nan_a = np.array([1, 2, 3, np.nan, 4, 5, np.nan])
|
||||
y = variation(a, ddof=1)
|
||||
nan_y = variation(nan_a, nan_policy="omit", ddof=1)
|
||||
assert_allclose(y, np.sqrt(5/2)/3)
|
||||
assert y == nan_y
|
||||
Reference in New Issue
Block a user