This commit is contained in:
ton
2024-10-07 10:13:40 +07:00
parent aa1631742f
commit 3a7d696db6
9729 changed files with 1832837 additions and 161742 deletions

View File

@@ -0,0 +1,349 @@
"""
lazy_loader
===========
Makes it easy to load subpackages and functions on demand.
"""
import ast
import importlib
import importlib.util
import os
import sys
import threading
import types
import warnings
__version__ = "0.4"
__all__ = ["attach", "load", "attach_stub"]
threadlock = threading.Lock()
def attach(package_name, submodules=None, submod_attrs=None):
"""Attach lazily loaded submodules, functions, or other attributes.
Typically, modules import submodules and attributes as follows::
import mysubmodule
import anothersubmodule
from .foo import someattr
The idea is to replace a package's `__getattr__`, `__dir__`, and
`__all__`, such that all imports work exactly the way they would
with normal imports, except that the import occurs upon first use.
The typical way to call this function, replacing the above imports, is::
__getattr__, __dir__, __all__ = lazy.attach(
__name__,
['mysubmodule', 'anothersubmodule'],
{'foo': ['someattr']}
)
This functionality requires Python 3.7 or higher.
Parameters
----------
package_name : str
Typically use ``__name__``.
submodules : set
List of submodules to attach.
submod_attrs : dict
Dictionary of submodule -> list of attributes / functions.
These attributes are imported as they are used.
Returns
-------
__getattr__, __dir__, __all__
"""
if submod_attrs is None:
submod_attrs = {}
if submodules is None:
submodules = set()
else:
submodules = set(submodules)
attr_to_modules = {
attr: mod for mod, attrs in submod_attrs.items() for attr in attrs
}
__all__ = sorted(submodules | attr_to_modules.keys())
def __getattr__(name):
if name in submodules:
return importlib.import_module(f"{package_name}.{name}")
elif name in attr_to_modules:
submod_path = f"{package_name}.{attr_to_modules[name]}"
submod = importlib.import_module(submod_path)
attr = getattr(submod, name)
# If the attribute lives in a file (module) with the same
# name as the attribute, ensure that the attribute and *not*
# the module is accessible on the package.
if name == attr_to_modules[name]:
pkg = sys.modules[package_name]
pkg.__dict__[name] = attr
return attr
else:
raise AttributeError(f"No {package_name} attribute {name}")
def __dir__():
return __all__
if os.environ.get("EAGER_IMPORT", ""):
for attr in set(attr_to_modules.keys()) | submodules:
__getattr__(attr)
return __getattr__, __dir__, list(__all__)
class DelayedImportErrorModule(types.ModuleType):
def __init__(self, frame_data, *args, message, **kwargs):
self.__frame_data = frame_data
self.__message = message
super().__init__(*args, **kwargs)
def __getattr__(self, x):
if x in ("__class__", "__file__", "__frame_data", "__message"):
super().__getattr__(x)
else:
fd = self.__frame_data
raise ModuleNotFoundError(
f"{self.__message}\n\n"
"This error is lazily reported, having originally occured in\n"
f' File {fd["filename"]}, line {fd["lineno"]}, in {fd["function"]}\n\n'
f'----> {"".join(fd["code_context"] or "").strip()}'
)
def load(fullname, *, require=None, error_on_import=False):
"""Return a lazily imported proxy for a module.
We often see the following pattern::
def myfunc():
import numpy as np
np.norm(...)
....
Putting the import inside the function prevents, in this case,
`numpy`, from being imported at function definition time.
That saves time if `myfunc` ends up not being called.
This `load` function returns a proxy module that, upon access, imports
the actual module. So the idiom equivalent to the above example is::
np = lazy.load("numpy")
def myfunc():
np.norm(...)
....
The initial import time is fast because the actual import is delayed
until the first attribute is requested. The overall import time may
decrease as well for users that don't make use of large portions
of your library.
Warning
-------
While lazily loading *sub*packages technically works, it causes the
package (that contains the subpackage) to be eagerly loaded even
if the package is already lazily loaded.
So, you probably shouldn't use subpackages with this `load` feature.
Instead you should encourage the package maintainers to use the
`lazy_loader.attach` to make their subpackages load lazily.
Parameters
----------
fullname : str
The full name of the module or submodule to import. For example::
sp = lazy.load('scipy') # import scipy as sp
require : str
A dependency requirement as defined in PEP-508. For example::
"numpy >=1.24"
If defined, the proxy module will raise an error if the installed
version does not satisfy the requirement.
error_on_import : bool
Whether to postpone raising import errors until the module is accessed.
If set to `True`, import errors are raised as soon as `load` is called.
Returns
-------
pm : importlib.util._LazyModule
Proxy module. Can be used like any regularly imported module.
Actual loading of the module occurs upon first attribute request.
"""
with threadlock:
module = sys.modules.get(fullname)
have_module = module is not None
# Most common, short-circuit
if have_module and require is None:
return module
if "." in fullname:
msg = (
"subpackages can technically be lazily loaded, but it causes the "
"package to be eagerly loaded even if it is already lazily loaded."
"So, you probably shouldn't use subpackages with this lazy feature."
)
warnings.warn(msg, RuntimeWarning)
spec = None
if not have_module:
spec = importlib.util.find_spec(fullname)
have_module = spec is not None
if not have_module:
not_found_message = f"No module named '{fullname}'"
elif require is not None:
try:
have_module = _check_requirement(require)
except ModuleNotFoundError as e:
raise ValueError(
f"Found module '{fullname}' but cannot test "
"requirement '{require}'. "
"Requirements must match distribution name, not module name."
) from e
not_found_message = f"No distribution can be found matching '{require}'"
if not have_module:
if error_on_import:
raise ModuleNotFoundError(not_found_message)
import inspect
try:
parent = inspect.stack()[1]
frame_data = {
"filename": parent.filename,
"lineno": parent.lineno,
"function": parent.function,
"code_context": parent.code_context,
}
return DelayedImportErrorModule(
frame_data,
"DelayedImportErrorModule",
message=not_found_message,
)
finally:
del parent
if spec is not None:
module = importlib.util.module_from_spec(spec)
sys.modules[fullname] = module
loader = importlib.util.LazyLoader(spec.loader)
loader.exec_module(module)
return module
def _check_requirement(require: str) -> bool:
"""Verify that a package requirement is satisfied
If the package is required, a ``ModuleNotFoundError`` is raised
by ``importlib.metadata``.
Parameters
----------
require : str
A dependency requirement as defined in PEP-508
Returns
-------
satisfied : bool
True if the installed version of the dependency matches
the specified version, False otherwise.
"""
import packaging.requirements
try:
import importlib.metadata as importlib_metadata
except ImportError: # PY37
import importlib_metadata
req = packaging.requirements.Requirement(require)
return req.specifier.contains(
importlib_metadata.version(req.name),
prereleases=True,
)
class _StubVisitor(ast.NodeVisitor):
"""AST visitor to parse a stub file for submodules and submod_attrs."""
def __init__(self):
self._submodules = set()
self._submod_attrs = {}
def visit_ImportFrom(self, node: ast.ImportFrom):
if node.level != 1:
raise ValueError(
"Only within-module imports are supported (`from .* import`)"
)
if node.module:
attrs: list = self._submod_attrs.setdefault(node.module, [])
aliases = [alias.name for alias in node.names]
if "*" in aliases:
raise ValueError(
"lazy stub loader does not support star import "
f"`from {node.module} import *`"
)
attrs.extend(aliases)
else:
self._submodules.update(alias.name for alias in node.names)
def attach_stub(package_name: str, filename: str):
"""Attach lazily loaded submodules, functions from a type stub.
This is a variant on ``attach`` that will parse a `.pyi` stub file to
infer ``submodules`` and ``submod_attrs``. This allows static type checkers
to find imports, while still providing lazy loading at runtime.
Parameters
----------
package_name : str
Typically use ``__name__``.
filename : str
Path to `.py` file which has an adjacent `.pyi` file.
Typically use ``__file__``.
Returns
-------
__getattr__, __dir__, __all__
The same output as ``attach``.
Raises
------
ValueError
If a stub file is not found for `filename`, or if the stubfile is formmated
incorrectly (e.g. if it contains an relative import from outside of the module)
"""
stubfile = (
filename if filename.endswith("i") else f"{os.path.splitext(filename)[0]}.pyi"
)
if not os.path.exists(stubfile):
raise ValueError(f"Cannot load imports from non-existent stub {stubfile!r}")
with open(stubfile) as f:
stub_node = ast.parse(f.read())
visitor = _StubVisitor()
visitor.visit(stub_node)
return attach(package_name, visitor._submodules, visitor._submod_attrs)

View File

@@ -0,0 +1,5 @@
import lazy_loader as lazy
__getattr__, __lazy_dir__, __all__ = lazy.attach(
__name__, submod_attrs={"some_func": ["some_func"]}
)

View File

@@ -0,0 +1 @@
from .some_func import some_func

View File

@@ -0,0 +1,3 @@
def some_func():
"""Function with same name as submodule."""
pass

View File

@@ -0,0 +1,13 @@
import threading
import time
import lazy_loader as lazy
def import_np():
time.sleep(0.5)
lazy.load("numpy")
for _ in range(10):
threading.Thread(target=import_np).start()

View File

@@ -0,0 +1,192 @@
import importlib
import os
import subprocess
import sys
import types
from unittest import mock
import pytest
import lazy_loader as lazy
def test_lazy_import_basics():
math = lazy.load("math")
anything_not_real = lazy.load("anything_not_real")
# Now test that accessing attributes does what it should
assert math.sin(math.pi) == pytest.approx(0, 1e-6)
# poor-mans pytest.raises for testing errors on attribute access
try:
anything_not_real.pi
raise AssertionError() # Should not get here
except ModuleNotFoundError:
pass
assert isinstance(anything_not_real, lazy.DelayedImportErrorModule)
# see if it changes for second access
try:
anything_not_real.pi
raise AssertionError() # Should not get here
except ModuleNotFoundError:
pass
def test_lazy_import_subpackages():
with pytest.warns(RuntimeWarning):
hp = lazy.load("html.parser")
assert "html" in sys.modules
assert type(sys.modules["html"]) == type(pytest)
assert isinstance(hp, importlib.util._LazyModule)
assert "html.parser" in sys.modules
assert sys.modules["html.parser"] == hp
def test_lazy_import_impact_on_sys_modules():
math = lazy.load("math")
anything_not_real = lazy.load("anything_not_real")
assert isinstance(math, types.ModuleType)
assert "math" in sys.modules
assert isinstance(anything_not_real, lazy.DelayedImportErrorModule)
assert "anything_not_real" not in sys.modules
# only do this if numpy is installed
pytest.importorskip("numpy")
np = lazy.load("numpy")
assert isinstance(np, types.ModuleType)
assert "numpy" in sys.modules
np.pi # trigger load of numpy
assert isinstance(np, types.ModuleType)
assert "numpy" in sys.modules
def test_lazy_import_nonbuiltins():
np = lazy.load("numpy")
sp = lazy.load("scipy")
if not isinstance(np, lazy.DelayedImportErrorModule):
assert np.sin(np.pi) == pytest.approx(0, 1e-6)
if isinstance(sp, lazy.DelayedImportErrorModule):
try:
sp.pi
raise AssertionError()
except ModuleNotFoundError:
pass
def test_lazy_attach():
name = "mymod"
submods = ["mysubmodule", "anothersubmodule"]
myall = {"not_real_submod": ["some_var_or_func"]}
locls = {
"attach": lazy.attach,
"name": name,
"submods": submods,
"myall": myall,
}
s = "__getattr__, __lazy_dir__, __all__ = attach(name, submods, myall)"
exec(s, {}, locls)
expected = {
"attach": lazy.attach,
"name": name,
"submods": submods,
"myall": myall,
"__getattr__": None,
"__lazy_dir__": None,
"__all__": None,
}
assert locls.keys() == expected.keys()
for k, v in expected.items():
if v is not None:
assert locls[k] == v
def test_attach_same_module_and_attr_name():
from lazy_loader.tests import fake_pkg
# Grab attribute twice, to ensure that importing it does not
# override function by module
assert isinstance(fake_pkg.some_func, types.FunctionType)
assert isinstance(fake_pkg.some_func, types.FunctionType)
# Ensure imports from submodule still work
from lazy_loader.tests.fake_pkg.some_func import some_func
assert isinstance(some_func, types.FunctionType)
FAKE_STUB = """
from . import rank
from ._gaussian import gaussian
from .edges import sobel, scharr, prewitt, roberts
"""
def test_stub_loading(tmp_path):
stub = tmp_path / "stub.pyi"
stub.write_text(FAKE_STUB)
_get, _dir, _all = lazy.attach_stub("my_module", str(stub))
expect = {"gaussian", "sobel", "scharr", "prewitt", "roberts", "rank"}
assert set(_dir()) == set(_all) == expect
def test_stub_loading_parity():
from lazy_loader.tests import fake_pkg
from_stub = lazy.attach_stub(fake_pkg.__name__, fake_pkg.__file__)
stub_getter, stub_dir, stub_all = from_stub
assert stub_all == fake_pkg.__all__
assert stub_dir() == fake_pkg.__lazy_dir__()
assert stub_getter("some_func") == fake_pkg.some_func
def test_stub_loading_errors(tmp_path):
stub = tmp_path / "stub.pyi"
stub.write_text("from ..mod import func\n")
with pytest.raises(ValueError, match="Only within-module imports are supported"):
lazy.attach_stub("name", str(stub))
with pytest.raises(ValueError, match="Cannot load imports from non-existent stub"):
lazy.attach_stub("name", "not a file")
stub2 = tmp_path / "stub2.pyi"
stub2.write_text("from .mod import *\n")
with pytest.raises(ValueError, match=".*does not support star import"):
lazy.attach_stub("name", str(stub2))
def test_require_kwarg():
have_importlib_metadata = importlib.util.find_spec("importlib.metadata") is not None
dot = "." if have_importlib_metadata else "_"
# Test with a module that definitely exists, behavior hinges on requirement
with mock.patch(f"importlib{dot}metadata.version") as version:
version.return_value = "1.0.0"
math = lazy.load("math", require="somepkg >= 2.0")
assert isinstance(math, lazy.DelayedImportErrorModule)
math = lazy.load("math", require="somepkg >= 1.0")
assert math.sin(math.pi) == pytest.approx(0, 1e-6)
# We can fail even after a successful import
math = lazy.load("math", require="somepkg >= 2.0")
assert isinstance(math, lazy.DelayedImportErrorModule)
# When a module can be loaded but the version can't be checked,
# raise a ValueError
with pytest.raises(ValueError):
lazy.load("math", require="somepkg >= 1.0")
def test_parallel_load():
pytest.importorskip("numpy")
subprocess.run(
[
sys.executable,
os.path.join(os.path.dirname(__file__), "import_np_parallel.py"),
]
)