This commit is contained in:
ton
2024-10-07 10:13:40 +07:00
parent aa1631742f
commit 3a7d696db6
9729 changed files with 1832837 additions and 161742 deletions

View File

@@ -1,26 +0,0 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
__all__ = [
"__title__",
"__summary__",
"__uri__",
"__version__",
"__author__",
"__email__",
"__license__",
"__copyright__",
]
__title__ = "packaging"
__summary__ = "Core utilities for Python packages"
__uri__ = "https://github.com/pypa/packaging"
__version__ = "21.3"
__author__ = "Donald Stufft and individual contributors"
__email__ = "donald@stufft.io"
__license__ = "BSD-2-Clause or Apache-2.0"
__copyright__ = "2014-2019 %s" % __author__

View File

@@ -2,24 +2,14 @@
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
from .__about__ import (
__author__,
__copyright__,
__email__,
__license__,
__summary__,
__title__,
__uri__,
__version__,
)
__title__ = "packaging"
__summary__ = "Core utilities for Python packages"
__uri__ = "https://github.com/pypa/packaging"
__all__ = [
"__title__",
"__summary__",
"__uri__",
"__version__",
"__author__",
"__email__",
"__license__",
"__copyright__",
]
__version__ = "24.1"
__author__ = "Donald Stufft and individual contributors"
__email__ = "donald@stufft.io"
__license__ = "BSD-2-Clause or Apache-2.0"
__copyright__ = "2014 %s" % __author__

View File

@@ -0,0 +1,110 @@
"""
ELF file parser.
This provides a class ``ELFFile`` that parses an ELF executable in a similar
interface to ``ZipFile``. Only the read interface is implemented.
Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
"""
from __future__ import annotations
import enum
import os
import struct
from typing import IO
class ELFInvalid(ValueError):
pass
class EIClass(enum.IntEnum):
C32 = 1
C64 = 2
class EIData(enum.IntEnum):
Lsb = 1
Msb = 2
class EMachine(enum.IntEnum):
I386 = 3
S390 = 22
Arm = 40
X8664 = 62
AArc64 = 183
class ELFFile:
"""
Representation of an ELF executable.
"""
def __init__(self, f: IO[bytes]) -> None:
self._f = f
try:
ident = self._read("16B")
except struct.error:
raise ELFInvalid("unable to parse identification")
magic = bytes(ident[:4])
if magic != b"\x7fELF":
raise ELFInvalid(f"invalid magic: {magic!r}")
self.capacity = ident[4] # Format for program header (bitness).
self.encoding = ident[5] # Data structure encoding (endianness).
try:
# e_fmt: Format for program header.
# p_fmt: Format for section header.
# p_idx: Indexes to find p_type, p_offset, and p_filesz.
e_fmt, self._p_fmt, self._p_idx = {
(1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
(1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
(2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
(2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
}[(self.capacity, self.encoding)]
except KeyError:
raise ELFInvalid(
f"unrecognized capacity ({self.capacity}) or "
f"encoding ({self.encoding})"
)
try:
(
_,
self.machine, # Architecture type.
_,
_,
self._e_phoff, # Offset of program header.
_,
self.flags, # Processor-specific flags.
_,
self._e_phentsize, # Size of section.
self._e_phnum, # Number of sections.
) = self._read(e_fmt)
except struct.error as e:
raise ELFInvalid("unable to parse machine and section information") from e
def _read(self, fmt: str) -> tuple[int, ...]:
return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
@property
def interpreter(self) -> str | None:
"""
The path recorded in the ``PT_INTERP`` section header.
"""
for index in range(self._e_phnum):
self._f.seek(self._e_phoff + self._e_phentsize * index)
try:
data = self._read(self._p_fmt)
except struct.error:
continue
if data[self._p_idx[0]] != 3: # Not PT_INTERP.
continue
self._f.seek(data[self._p_idx[1]])
return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
return None

View File

@@ -1,122 +1,72 @@
from __future__ import annotations
import collections
import contextlib
import functools
import os
import re
import struct
import sys
import warnings
from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple
from typing import Generator, Iterator, NamedTuple, Sequence
from ._elffile import EIClass, EIData, ELFFile, EMachine
EF_ARM_ABIMASK = 0xFF000000
EF_ARM_ABI_VER5 = 0x05000000
EF_ARM_ABI_FLOAT_HARD = 0x00000400
# Python does not provide platform information at sufficient granularity to
# identify the architecture of the running executable in some cases, so we
# determine it dynamically by reading the information from the running
# process. This only applies on Linux, which uses the ELF format.
class _ELFFileHeader:
# https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header
class _InvalidELFFileHeader(ValueError):
"""
An invalid ELF file header was found.
"""
ELF_MAGIC_NUMBER = 0x7F454C46
ELFCLASS32 = 1
ELFCLASS64 = 2
ELFDATA2LSB = 1
ELFDATA2MSB = 2
EM_386 = 3
EM_S390 = 22
EM_ARM = 40
EM_X86_64 = 62
EF_ARM_ABIMASK = 0xFF000000
EF_ARM_ABI_VER5 = 0x05000000
EF_ARM_ABI_FLOAT_HARD = 0x00000400
def __init__(self, file: IO[bytes]) -> None:
def unpack(fmt: str) -> int:
try:
data = file.read(struct.calcsize(fmt))
result: Tuple[int, ...] = struct.unpack(fmt, data)
except struct.error:
raise _ELFFileHeader._InvalidELFFileHeader()
return result[0]
self.e_ident_magic = unpack(">I")
if self.e_ident_magic != self.ELF_MAGIC_NUMBER:
raise _ELFFileHeader._InvalidELFFileHeader()
self.e_ident_class = unpack("B")
if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}:
raise _ELFFileHeader._InvalidELFFileHeader()
self.e_ident_data = unpack("B")
if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}:
raise _ELFFileHeader._InvalidELFFileHeader()
self.e_ident_version = unpack("B")
self.e_ident_osabi = unpack("B")
self.e_ident_abiversion = unpack("B")
self.e_ident_pad = file.read(7)
format_h = "<H" if self.e_ident_data == self.ELFDATA2LSB else ">H"
format_i = "<I" if self.e_ident_data == self.ELFDATA2LSB else ">I"
format_q = "<Q" if self.e_ident_data == self.ELFDATA2LSB else ">Q"
format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q
self.e_type = unpack(format_h)
self.e_machine = unpack(format_h)
self.e_version = unpack(format_i)
self.e_entry = unpack(format_p)
self.e_phoff = unpack(format_p)
self.e_shoff = unpack(format_p)
self.e_flags = unpack(format_i)
self.e_ehsize = unpack(format_h)
self.e_phentsize = unpack(format_h)
self.e_phnum = unpack(format_h)
self.e_shentsize = unpack(format_h)
self.e_shnum = unpack(format_h)
self.e_shstrndx = unpack(format_h)
def _get_elf_header() -> Optional[_ELFFileHeader]:
# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
# as the type for `path` until then.
@contextlib.contextmanager
def _parse_elf(path: str) -> Generator[ELFFile | None, None, None]:
try:
with open(sys.executable, "rb") as f:
elf_header = _ELFFileHeader(f)
except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader):
return None
return elf_header
with open(path, "rb") as f:
yield ELFFile(f)
except (OSError, TypeError, ValueError):
yield None
def _is_linux_armhf() -> bool:
def _is_linux_armhf(executable: str) -> bool:
# hard-float ABI can be detected from the ELF header of the running
# process
# https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
elf_header = _get_elf_header()
if elf_header is None:
return False
result = elf_header.e_ident_class == elf_header.ELFCLASS32
result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB
result &= elf_header.e_machine == elf_header.EM_ARM
result &= (
elf_header.e_flags & elf_header.EF_ARM_ABIMASK
) == elf_header.EF_ARM_ABI_VER5
result &= (
elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD
) == elf_header.EF_ARM_ABI_FLOAT_HARD
return result
with _parse_elf(executable) as f:
return (
f is not None
and f.capacity == EIClass.C32
and f.encoding == EIData.Lsb
and f.machine == EMachine.Arm
and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5
and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD
)
def _is_linux_i686() -> bool:
elf_header = _get_elf_header()
if elf_header is None:
return False
result = elf_header.e_ident_class == elf_header.ELFCLASS32
result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB
result &= elf_header.e_machine == elf_header.EM_386
return result
def _is_linux_i686(executable: str) -> bool:
with _parse_elf(executable) as f:
return (
f is not None
and f.capacity == EIClass.C32
and f.encoding == EIData.Lsb
and f.machine == EMachine.I386
)
def _have_compatible_abi(arch: str) -> bool:
if arch == "armv7l":
return _is_linux_armhf()
if arch == "i686":
return _is_linux_i686()
return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"}
def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool:
if "armv7l" in archs:
return _is_linux_armhf(executable)
if "i686" in archs:
return _is_linux_i686(executable)
allowed_archs = {
"x86_64",
"aarch64",
"ppc64",
"ppc64le",
"s390x",
"loongarch64",
"riscv64",
}
return any(arch in allowed_archs for arch in archs)
# If glibc ever changes its major version, we need to know what the last
@@ -124,7 +74,7 @@ def _have_compatible_abi(arch: str) -> bool:
# For now, guess what the highest minor version might be, assume it will
# be 50 for testing. Once this actually happens, update the dictionary
# with the actual value.
_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50)
_LAST_GLIBC_MINOR: dict[int, int] = collections.defaultdict(lambda: 50)
class _GLibCVersion(NamedTuple):
@@ -132,7 +82,7 @@ class _GLibCVersion(NamedTuple):
minor: int
def _glibc_version_string_confstr() -> Optional[str]:
def _glibc_version_string_confstr() -> str | None:
"""
Primary implementation of glibc_version_string using os.confstr.
"""
@@ -141,17 +91,17 @@ def _glibc_version_string_confstr() -> Optional[str]:
# platform module.
# https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
try:
# os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17".
version_string = os.confstr("CS_GNU_LIBC_VERSION")
# Should be a string like "glibc 2.17".
version_string: str | None = os.confstr("CS_GNU_LIBC_VERSION")
assert version_string is not None
_, version = version_string.split()
_, version = version_string.rsplit()
except (AssertionError, AttributeError, OSError, ValueError):
# os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
return None
return version
def _glibc_version_string_ctypes() -> Optional[str]:
def _glibc_version_string_ctypes() -> str | None:
"""
Fallback implementation of glibc_version_string using ctypes.
"""
@@ -195,12 +145,12 @@ def _glibc_version_string_ctypes() -> Optional[str]:
return version_str
def _glibc_version_string() -> Optional[str]:
def _glibc_version_string() -> str | None:
"""Returns glibc version string, or None if not using glibc."""
return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
def _parse_glibc_version(version_str: str) -> Tuple[int, int]:
def _parse_glibc_version(version_str: str) -> tuple[int, int]:
"""Parse glibc version.
We use a regexp instead of str.split because we want to discard any
@@ -211,16 +161,16 @@ def _parse_glibc_version(version_str: str) -> Tuple[int, int]:
m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
if not m:
warnings.warn(
"Expected glibc version with 2 components major.minor,"
" got: %s" % version_str,
f"Expected glibc version with 2 components major.minor,"
f" got: {version_str}",
RuntimeWarning,
)
return -1, -1
return int(m.group("major")), int(m.group("minor"))
@functools.lru_cache()
def _get_glibc_version() -> Tuple[int, int]:
@functools.lru_cache
def _get_glibc_version() -> tuple[int, int]:
version_str = _glibc_version_string()
if version_str is None:
return (-1, -1)
@@ -228,13 +178,13 @@ def _get_glibc_version() -> Tuple[int, int]:
# From PEP 513, PEP 600
def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool:
def _is_compatible(arch: str, version: _GLibCVersion) -> bool:
sys_glibc = _get_glibc_version()
if sys_glibc < version:
return False
# Check for presence of _manylinux module.
try:
import _manylinux # noqa
import _manylinux
except ImportError:
return True
if hasattr(_manylinux, "manylinux_compatible"):
@@ -264,12 +214,22 @@ _LEGACY_MANYLINUX_MAP = {
}
def platform_tags(linux: str, arch: str) -> Iterator[str]:
if not _have_compatible_abi(arch):
def platform_tags(archs: Sequence[str]) -> Iterator[str]:
"""Generate manylinux tags compatible to the current platform.
:param archs: Sequence of compatible architectures.
The first one shall be the closest to the actual architecture and be the part of
platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
The ``linux_`` prefix is assumed as a prerequisite for the current platform to
be manylinux-compatible.
:returns: An iterator of compatible manylinux tags.
"""
if not _have_compatible_abi(sys.executable, archs):
return
# Oldest glibc to be supported regardless of architecture is (2, 17).
too_old_glibc2 = _GLibCVersion(2, 16)
if arch in {"x86_64", "i686"}:
if set(archs) & {"x86_64", "i686"}:
# On x86/i686 also oldest glibc to be supported is (2, 5).
too_old_glibc2 = _GLibCVersion(2, 4)
current_glibc = _GLibCVersion(*_get_glibc_version())
@@ -283,19 +243,20 @@ def platform_tags(linux: str, arch: str) -> Iterator[str]:
for glibc_major in range(current_glibc.major - 1, 1, -1):
glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
for glibc_max in glibc_max_list:
if glibc_max.major == too_old_glibc2.major:
min_minor = too_old_glibc2.minor
else:
# For other glibc major versions oldest supported is (x, 0).
min_minor = -1
for glibc_minor in range(glibc_max.minor, min_minor, -1):
glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
tag = "manylinux_{}_{}".format(*glibc_version)
if _is_compatible(tag, arch, glibc_version):
yield linux.replace("linux", tag)
# Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
if glibc_version in _LEGACY_MANYLINUX_MAP:
legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
if _is_compatible(legacy_tag, arch, glibc_version):
yield linux.replace("linux", legacy_tag)
for arch in archs:
for glibc_max in glibc_max_list:
if glibc_max.major == too_old_glibc2.major:
min_minor = too_old_glibc2.minor
else:
# For other glibc major versions oldest supported is (x, 0).
min_minor = -1
for glibc_minor in range(glibc_max.minor, min_minor, -1):
glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
tag = "manylinux_{}_{}".format(*glibc_version)
if _is_compatible(arch, glibc_version):
yield f"{tag}_{arch}"
# Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
if glibc_version in _LEGACY_MANYLINUX_MAP:
legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
if _is_compatible(arch, glibc_version):
yield f"{legacy_tag}_{arch}"

View File

@@ -4,68 +4,15 @@ This module implements logic to detect if the currently running Python is
linked against musl, and what musl version is used.
"""
import contextlib
from __future__ import annotations
import functools
import operator
import os
import re
import struct
import subprocess
import sys
from typing import IO, Iterator, NamedTuple, Optional, Tuple
from typing import Iterator, NamedTuple, Sequence
def _read_unpacked(f: IO[bytes], fmt: str) -> Tuple[int, ...]:
return struct.unpack(fmt, f.read(struct.calcsize(fmt)))
def _parse_ld_musl_from_elf(f: IO[bytes]) -> Optional[str]:
"""Detect musl libc location by parsing the Python executable.
Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
"""
f.seek(0)
try:
ident = _read_unpacked(f, "16B")
except struct.error:
return None
if ident[:4] != tuple(b"\x7fELF"): # Invalid magic, not ELF.
return None
f.seek(struct.calcsize("HHI"), 1) # Skip file type, machine, and version.
try:
# e_fmt: Format for program header.
# p_fmt: Format for section header.
# p_idx: Indexes to find p_type, p_offset, and p_filesz.
e_fmt, p_fmt, p_idx = {
1: ("IIIIHHH", "IIIIIIII", (0, 1, 4)), # 32-bit.
2: ("QQQIHHH", "IIQQQQQQ", (0, 2, 5)), # 64-bit.
}[ident[4]]
except KeyError:
return None
else:
p_get = operator.itemgetter(*p_idx)
# Find the interpreter section and return its content.
try:
_, e_phoff, _, _, _, e_phentsize, e_phnum = _read_unpacked(f, e_fmt)
except struct.error:
return None
for i in range(e_phnum + 1):
f.seek(e_phoff + e_phentsize * i)
try:
p_type, p_offset, p_filesz = p_get(_read_unpacked(f, p_fmt))
except struct.error:
return None
if p_type != 3: # Not PT_INTERP.
continue
f.seek(p_offset)
interpreter = os.fsdecode(f.read(p_filesz)).strip("\0")
if "musl" not in interpreter:
return None
return interpreter
return None
from ._elffile import ELFFile
class _MuslVersion(NamedTuple):
@@ -73,7 +20,7 @@ class _MuslVersion(NamedTuple):
minor: int
def _parse_musl_version(output: str) -> Optional[_MuslVersion]:
def _parse_musl_version(output: str) -> _MuslVersion | None:
lines = [n for n in (n.strip() for n in output.splitlines()) if n]
if len(lines) < 2 or lines[0][:4] != "musl":
return None
@@ -83,8 +30,8 @@ def _parse_musl_version(output: str) -> Optional[_MuslVersion]:
return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2)))
@functools.lru_cache()
def _get_musl_version(executable: str) -> Optional[_MuslVersion]:
@functools.lru_cache
def _get_musl_version(executable: str) -> _MuslVersion | None:
"""Detect currently-running musl runtime version.
This is done by checking the specified executable's dynamic linking
@@ -95,32 +42,34 @@ def _get_musl_version(executable: str) -> Optional[_MuslVersion]:
Version 1.2.2
Dynamic Program Loader
"""
with contextlib.ExitStack() as stack:
try:
f = stack.enter_context(open(executable, "rb"))
except OSError:
return None
ld = _parse_ld_musl_from_elf(f)
if not ld:
try:
with open(executable, "rb") as f:
ld = ELFFile(f).interpreter
except (OSError, TypeError, ValueError):
return None
proc = subprocess.run([ld], stderr=subprocess.PIPE, universal_newlines=True)
if ld is None or "musl" not in ld:
return None
proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True)
return _parse_musl_version(proc.stderr)
def platform_tags(arch: str) -> Iterator[str]:
def platform_tags(archs: Sequence[str]) -> Iterator[str]:
"""Generate musllinux tags compatible to the current platform.
:param arch: Should be the part of platform tag after the ``linux_``
prefix, e.g. ``x86_64``. The ``linux_`` prefix is assumed as a
prerequisite for the current platform to be musllinux-compatible.
:param archs: Sequence of compatible architectures.
The first one shall be the closest to the actual architecture and be the part of
platform tag after the ``linux_`` prefix, e.g. ``x86_64``.
The ``linux_`` prefix is assumed as a prerequisite for the current platform to
be musllinux-compatible.
:returns: An iterator of compatible musllinux tags.
"""
sys_musl = _get_musl_version(sys.executable)
if sys_musl is None: # Python not dynamically linked against musl.
return
for minor in range(sys_musl.minor, -1, -1):
yield f"musllinux_{sys_musl.major}_{minor}_{arch}"
for arch in archs:
for minor in range(sys_musl.minor, -1, -1):
yield f"musllinux_{sys_musl.major}_{minor}_{arch}"
if __name__ == "__main__": # pragma: no cover

View File

@@ -0,0 +1,354 @@
"""Handwritten parser of dependency specifiers.
The docstring for each __parse_* function contains EBNF-inspired grammar representing
the implementation.
"""
from __future__ import annotations
import ast
from typing import NamedTuple, Sequence, Tuple, Union
from ._tokenizer import DEFAULT_RULES, Tokenizer
class Node:
def __init__(self, value: str) -> None:
self.value = value
def __str__(self) -> str:
return self.value
def __repr__(self) -> str:
return f"<{self.__class__.__name__}('{self}')>"
def serialize(self) -> str:
raise NotImplementedError
class Variable(Node):
def serialize(self) -> str:
return str(self)
class Value(Node):
def serialize(self) -> str:
return f'"{self}"'
class Op(Node):
def serialize(self) -> str:
return str(self)
MarkerVar = Union[Variable, Value]
MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]]
MarkerList = Sequence[Union["MarkerList", MarkerAtom, str]]
class ParsedRequirement(NamedTuple):
name: str
url: str
extras: list[str]
specifier: str
marker: MarkerList | None
# --------------------------------------------------------------------------------------
# Recursive descent parser for dependency specifier
# --------------------------------------------------------------------------------------
def parse_requirement(source: str) -> ParsedRequirement:
return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
"""
requirement = WS? IDENTIFIER WS? extras WS? requirement_details
"""
tokenizer.consume("WS")
name_token = tokenizer.expect(
"IDENTIFIER", expected="package name at the start of dependency specifier"
)
name = name_token.text
tokenizer.consume("WS")
extras = _parse_extras(tokenizer)
tokenizer.consume("WS")
url, specifier, marker = _parse_requirement_details(tokenizer)
tokenizer.expect("END", expected="end of dependency specifier")
return ParsedRequirement(name, url, extras, specifier, marker)
def _parse_requirement_details(
tokenizer: Tokenizer,
) -> tuple[str, str, MarkerList | None]:
"""
requirement_details = AT URL (WS requirement_marker?)?
| specifier WS? (requirement_marker)?
"""
specifier = ""
url = ""
marker = None
if tokenizer.check("AT"):
tokenizer.read()
tokenizer.consume("WS")
url_start = tokenizer.position
url = tokenizer.expect("URL", expected="URL after @").text
if tokenizer.check("END", peek=True):
return (url, specifier, marker)
tokenizer.expect("WS", expected="whitespace after URL")
# The input might end after whitespace.
if tokenizer.check("END", peek=True):
return (url, specifier, marker)
marker = _parse_requirement_marker(
tokenizer, span_start=url_start, after="URL and whitespace"
)
else:
specifier_start = tokenizer.position
specifier = _parse_specifier(tokenizer)
tokenizer.consume("WS")
if tokenizer.check("END", peek=True):
return (url, specifier, marker)
marker = _parse_requirement_marker(
tokenizer,
span_start=specifier_start,
after=(
"version specifier"
if specifier
else "name and no valid version specifier"
),
)
return (url, specifier, marker)
def _parse_requirement_marker(
tokenizer: Tokenizer, *, span_start: int, after: str
) -> MarkerList:
"""
requirement_marker = SEMICOLON marker WS?
"""
if not tokenizer.check("SEMICOLON"):
tokenizer.raise_syntax_error(
f"Expected end or semicolon (after {after})",
span_start=span_start,
)
tokenizer.read()
marker = _parse_marker(tokenizer)
tokenizer.consume("WS")
return marker
def _parse_extras(tokenizer: Tokenizer) -> list[str]:
"""
extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
"""
if not tokenizer.check("LEFT_BRACKET", peek=True):
return []
with tokenizer.enclosing_tokens(
"LEFT_BRACKET",
"RIGHT_BRACKET",
around="extras",
):
tokenizer.consume("WS")
extras = _parse_extras_list(tokenizer)
tokenizer.consume("WS")
return extras
def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
"""
extras_list = identifier (wsp* ',' wsp* identifier)*
"""
extras: list[str] = []
if not tokenizer.check("IDENTIFIER"):
return extras
extras.append(tokenizer.read().text)
while True:
tokenizer.consume("WS")
if tokenizer.check("IDENTIFIER", peek=True):
tokenizer.raise_syntax_error("Expected comma between extra names")
elif not tokenizer.check("COMMA"):
break
tokenizer.read()
tokenizer.consume("WS")
extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
extras.append(extra_token.text)
return extras
def _parse_specifier(tokenizer: Tokenizer) -> str:
"""
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
| WS? version_many WS?
"""
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="version specifier",
):
tokenizer.consume("WS")
parsed_specifiers = _parse_version_many(tokenizer)
tokenizer.consume("WS")
return parsed_specifiers
def _parse_version_many(tokenizer: Tokenizer) -> str:
"""
version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
"""
parsed_specifiers = ""
while tokenizer.check("SPECIFIER"):
span_start = tokenizer.position
parsed_specifiers += tokenizer.read().text
if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
tokenizer.raise_syntax_error(
".* suffix can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position + 1,
)
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
tokenizer.raise_syntax_error(
"Local version label can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position,
)
tokenizer.consume("WS")
if not tokenizer.check("COMMA"):
break
parsed_specifiers += tokenizer.read().text
tokenizer.consume("WS")
return parsed_specifiers
# --------------------------------------------------------------------------------------
# Recursive descent parser for marker expression
# --------------------------------------------------------------------------------------
def parse_marker(source: str) -> MarkerList:
return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
retval = _parse_marker(tokenizer)
tokenizer.expect("END", expected="end of marker expression")
return retval
def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
"""
marker = marker_atom (BOOLOP marker_atom)+
"""
expression = [_parse_marker_atom(tokenizer)]
while tokenizer.check("BOOLOP"):
token = tokenizer.read()
expr_right = _parse_marker_atom(tokenizer)
expression.extend((token.text, expr_right))
return expression
def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
"""
marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
| WS? marker_item WS?
"""
tokenizer.consume("WS")
if tokenizer.check("LEFT_PARENTHESIS", peek=True):
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="marker expression",
):
tokenizer.consume("WS")
marker: MarkerAtom = _parse_marker(tokenizer)
tokenizer.consume("WS")
else:
marker = _parse_marker_item(tokenizer)
tokenizer.consume("WS")
return marker
def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
"""
marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
"""
tokenizer.consume("WS")
marker_var_left = _parse_marker_var(tokenizer)
tokenizer.consume("WS")
marker_op = _parse_marker_op(tokenizer)
tokenizer.consume("WS")
marker_var_right = _parse_marker_var(tokenizer)
tokenizer.consume("WS")
return (marker_var_left, marker_op, marker_var_right)
def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
"""
marker_var = VARIABLE | QUOTED_STRING
"""
if tokenizer.check("VARIABLE"):
return process_env_var(tokenizer.read().text.replace(".", "_"))
elif tokenizer.check("QUOTED_STRING"):
return process_python_str(tokenizer.read().text)
else:
tokenizer.raise_syntax_error(
message="Expected a marker variable or quoted string"
)
def process_env_var(env_var: str) -> Variable:
if env_var in ("platform_python_implementation", "python_implementation"):
return Variable("platform_python_implementation")
else:
return Variable(env_var)
def process_python_str(python_str: str) -> Value:
value = ast.literal_eval(python_str)
return Value(str(value))
def _parse_marker_op(tokenizer: Tokenizer) -> Op:
"""
marker_op = IN | NOT IN | OP
"""
if tokenizer.check("IN"):
tokenizer.read()
return Op("in")
elif tokenizer.check("NOT"):
tokenizer.read()
tokenizer.expect("WS", expected="whitespace after 'not'")
tokenizer.expect("IN", expected="'in' after 'not'")
return Op("not in")
elif tokenizer.check("OP"):
return Op(tokenizer.read().text)
else:
return tokenizer.raise_syntax_error(
"Expected marker operator, one of "
"<=, <, !=, ==, >=, >, ~=, ===, in, not in"
)

View File

@@ -0,0 +1,194 @@
from __future__ import annotations
import contextlib
import re
from dataclasses import dataclass
from typing import Iterator, NoReturn
from .specifiers import Specifier
@dataclass
class Token:
name: str
text: str
position: int
class ParserSyntaxError(Exception):
"""The provided source text could not be parsed correctly."""
def __init__(
self,
message: str,
*,
source: str,
span: tuple[int, int],
) -> None:
self.span = span
self.message = message
self.source = source
super().__init__()
def __str__(self) -> str:
marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
return "\n ".join([self.message, self.source, marker])
DEFAULT_RULES: dict[str, str | re.Pattern[str]] = {
"LEFT_PARENTHESIS": r"\(",
"RIGHT_PARENTHESIS": r"\)",
"LEFT_BRACKET": r"\[",
"RIGHT_BRACKET": r"\]",
"SEMICOLON": r";",
"COMMA": r",",
"QUOTED_STRING": re.compile(
r"""
(
('[^']*')
|
("[^"]*")
)
""",
re.VERBOSE,
),
"OP": r"(===|==|~=|!=|<=|>=|<|>)",
"BOOLOP": r"\b(or|and)\b",
"IN": r"\bin\b",
"NOT": r"\bnot\b",
"VARIABLE": re.compile(
r"""
\b(
python_version
|python_full_version
|os[._]name
|sys[._]platform
|platform_(release|system)
|platform[._](version|machine|python_implementation)
|python_implementation
|implementation_(name|version)
|extra
)\b
""",
re.VERBOSE,
),
"SPECIFIER": re.compile(
Specifier._operator_regex_str + Specifier._version_regex_str,
re.VERBOSE | re.IGNORECASE,
),
"AT": r"\@",
"URL": r"[^ \t]+",
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
"VERSION_PREFIX_TRAIL": r"\.\*",
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
"WS": r"[ \t]+",
"END": r"$",
}
class Tokenizer:
"""Context-sensitive token parsing.
Provides methods to examine the input stream to check whether the next token
matches.
"""
def __init__(
self,
source: str,
*,
rules: dict[str, str | re.Pattern[str]],
) -> None:
self.source = source
self.rules: dict[str, re.Pattern[str]] = {
name: re.compile(pattern) for name, pattern in rules.items()
}
self.next_token: Token | None = None
self.position = 0
def consume(self, name: str) -> None:
"""Move beyond provided token name, if at current position."""
if self.check(name):
self.read()
def check(self, name: str, *, peek: bool = False) -> bool:
"""Check whether the next token has the provided name.
By default, if the check succeeds, the token *must* be read before
another check. If `peek` is set to `True`, the token is not loaded and
would need to be checked again.
"""
assert (
self.next_token is None
), f"Cannot check for {name!r}, already have {self.next_token!r}"
assert name in self.rules, f"Unknown token name: {name!r}"
expression = self.rules[name]
match = expression.match(self.source, self.position)
if match is None:
return False
if not peek:
self.next_token = Token(name, match[0], self.position)
return True
def expect(self, name: str, *, expected: str) -> Token:
"""Expect a certain token name next, failing with a syntax error otherwise.
The token is *not* read.
"""
if not self.check(name):
raise self.raise_syntax_error(f"Expected {expected}")
return self.read()
def read(self) -> Token:
"""Consume the next token and return it."""
token = self.next_token
assert token is not None
self.position += len(token.text)
self.next_token = None
return token
def raise_syntax_error(
self,
message: str,
*,
span_start: int | None = None,
span_end: int | None = None,
) -> NoReturn:
"""Raise ParserSyntaxError at the given position."""
span = (
self.position if span_start is None else span_start,
self.position if span_end is None else span_end,
)
raise ParserSyntaxError(
message,
source=self.source,
span=span,
)
@contextlib.contextmanager
def enclosing_tokens(
self, open_token: str, close_token: str, *, around: str
) -> Iterator[None]:
if self.check(open_token):
open_position = self.position
self.read()
else:
open_position = None
yield
if open_position is None:
return
if not self.check(close_token):
self.raise_syntax_error(
f"Expected matching {close_token} for {open_token}, after {around}",
span_start=open_position,
)
self.read()

View File

@@ -2,25 +2,19 @@
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
from __future__ import annotations
import operator
import os
import platform
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from pip._vendor.pyparsing import ( # noqa: N817
Forward,
Group,
Literal as L,
ParseException,
ParseResults,
QuotedString,
ZeroOrMore,
stringEnd,
stringStart,
)
from typing import Any, Callable, TypedDict, cast
from ._parser import MarkerAtom, MarkerList, Op, Value, Variable
from ._parser import parse_marker as _parse_marker
from ._tokenizer import ParserSyntaxError
from .specifiers import InvalidSpecifier, Specifier
from .utils import canonicalize_name
__all__ = [
"InvalidMarker",
@@ -52,103 +46,97 @@ class UndefinedEnvironmentName(ValueError):
"""
class Node:
def __init__(self, value: Any) -> None:
self.value = value
class Environment(TypedDict):
implementation_name: str
"""The implementation's identifier, e.g. ``'cpython'``."""
def __str__(self) -> str:
return str(self.value)
implementation_version: str
"""
The implementation's version, e.g. ``'3.13.0a2'`` for CPython 3.13.0a2, or
``'7.3.13'`` for PyPy3.10 v7.3.13.
"""
def __repr__(self) -> str:
return f"<{self.__class__.__name__}('{self}')>"
os_name: str
"""
The value of :py:data:`os.name`. The name of the operating system dependent module
imported, e.g. ``'posix'``.
"""
def serialize(self) -> str:
raise NotImplementedError
platform_machine: str
"""
Returns the machine type, e.g. ``'i386'``.
An empty string if the value cannot be determined.
"""
platform_release: str
"""
The system's release, e.g. ``'2.2.0'`` or ``'NT'``.
An empty string if the value cannot be determined.
"""
platform_system: str
"""
The system/OS name, e.g. ``'Linux'``, ``'Windows'`` or ``'Java'``.
An empty string if the value cannot be determined.
"""
platform_version: str
"""
The system's release version, e.g. ``'#3 on degas'``.
An empty string if the value cannot be determined.
"""
python_full_version: str
"""
The Python version as string ``'major.minor.patchlevel'``.
Note that unlike the Python :py:data:`sys.version`, this value will always include
the patchlevel (it defaults to 0).
"""
platform_python_implementation: str
"""
A string identifying the Python implementation, e.g. ``'CPython'``.
"""
python_version: str
"""The Python version as string ``'major.minor'``."""
sys_platform: str
"""
This string contains a platform identifier that can be used to append
platform-specific components to :py:data:`sys.path`, for instance.
For Unix systems, except on Linux and AIX, this is the lowercased OS name as
returned by ``uname -s`` with the first part of the version as returned by
``uname -r`` appended, e.g. ``'sunos5'`` or ``'freebsd8'``, at the time when Python
was built.
"""
class Variable(Node):
def serialize(self) -> str:
return str(self)
class Value(Node):
def serialize(self) -> str:
return f'"{self}"'
class Op(Node):
def serialize(self) -> str:
return str(self)
VARIABLE = (
L("implementation_version")
| L("platform_python_implementation")
| L("implementation_name")
| L("python_full_version")
| L("platform_release")
| L("platform_version")
| L("platform_machine")
| L("platform_system")
| L("python_version")
| L("sys_platform")
| L("os_name")
| L("os.name") # PEP-345
| L("sys.platform") # PEP-345
| L("platform.version") # PEP-345
| L("platform.machine") # PEP-345
| L("platform.python_implementation") # PEP-345
| L("python_implementation") # undocumented setuptools legacy
| L("extra") # PEP-508
)
ALIASES = {
"os.name": "os_name",
"sys.platform": "sys_platform",
"platform.version": "platform_version",
"platform.machine": "platform_machine",
"platform.python_implementation": "platform_python_implementation",
"python_implementation": "platform_python_implementation",
}
VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0])))
VERSION_CMP = (
L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<")
)
MARKER_OP = VERSION_CMP | L("not in") | L("in")
MARKER_OP.setParseAction(lambda s, l, t: Op(t[0]))
MARKER_VALUE = QuotedString("'") | QuotedString('"')
MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0]))
BOOLOP = L("and") | L("or")
MARKER_VAR = VARIABLE | MARKER_VALUE
MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR)
MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0]))
LPAREN = L("(").suppress()
RPAREN = L(")").suppress()
MARKER_EXPR = Forward()
MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN)
MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR)
MARKER = stringStart + MARKER_EXPR + stringEnd
def _coerce_parse_result(results: Union[ParseResults, List[Any]]) -> List[Any]:
if isinstance(results, ParseResults):
return [_coerce_parse_result(i) for i in results]
else:
return results
def _normalize_extra_values(results: Any) -> Any:
"""
Normalize extra values.
"""
if isinstance(results[0], tuple):
lhs, op, rhs = results[0]
if isinstance(lhs, Variable) and lhs.value == "extra":
normalized_extra = canonicalize_name(rhs.value)
rhs = Value(normalized_extra)
elif isinstance(rhs, Variable) and rhs.value == "extra":
normalized_extra = canonicalize_name(lhs.value)
lhs = Value(normalized_extra)
results[0] = lhs, op, rhs
return results
def _format_marker(
marker: Union[List[str], Tuple[Node, ...], str], first: Optional[bool] = True
marker: list[str] | MarkerAtom | str, first: bool | None = True
) -> str:
assert isinstance(marker, (list, tuple, str))
# Sometimes we have a structure like [[...]] which is a single item list
@@ -174,7 +162,7 @@ def _format_marker(
return marker
_operators: Dict[str, Operator] = {
_operators: dict[str, Operator] = {
"in": lambda lhs, rhs: lhs in rhs,
"not in": lambda lhs, rhs: lhs not in rhs,
"<": operator.lt,
@@ -192,35 +180,29 @@ def _eval_op(lhs: str, op: Op, rhs: str) -> bool:
except InvalidSpecifier:
pass
else:
return spec.contains(lhs)
return spec.contains(lhs, prereleases=True)
oper: Optional[Operator] = _operators.get(op.serialize())
oper: Operator | None = _operators.get(op.serialize())
if oper is None:
raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.")
return oper(lhs, rhs)
class Undefined:
pass
def _normalize(*values: str, key: str) -> tuple[str, ...]:
# PEP 685 Comparison of extra names for optional distribution dependencies
# https://peps.python.org/pep-0685/
# > When comparing extra names, tools MUST normalize the names being
# > compared using the semantics outlined in PEP 503 for names
if key == "extra":
return tuple(canonicalize_name(v) for v in values)
# other environment markers don't have such standards
return values
_undefined = Undefined()
def _get_env(environment: Dict[str, str], name: str) -> str:
value: Union[str, Undefined] = environment.get(name, _undefined)
if isinstance(value, Undefined):
raise UndefinedEnvironmentName(
f"{name!r} does not exist in evaluation environment."
)
return value
def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool:
groups: List[List[bool]] = [[]]
def _evaluate_markers(markers: MarkerList, environment: dict[str, str]) -> bool:
groups: list[list[bool]] = [[]]
for marker in markers:
assert isinstance(marker, (list, tuple, str))
@@ -231,12 +213,15 @@ def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool:
lhs, op, rhs = marker
if isinstance(lhs, Variable):
lhs_value = _get_env(environment, lhs.value)
environment_key = lhs.value
lhs_value = environment[environment_key]
rhs_value = rhs.value
else:
lhs_value = lhs.value
rhs_value = _get_env(environment, rhs.value)
environment_key = rhs.value
rhs_value = environment[environment_key]
lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key)
groups[-1].append(_eval_op(lhs_value, op, rhs_value))
else:
assert marker in ["and", "or"]
@@ -246,7 +231,7 @@ def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool:
return any(all(item) for item in groups)
def format_full_version(info: "sys._version_info") -> str:
def format_full_version(info: sys._version_info) -> str:
version = "{0.major}.{0.minor}.{0.micro}".format(info)
kind = info.releaselevel
if kind != "final":
@@ -254,7 +239,7 @@ def format_full_version(info: "sys._version_info") -> str:
return version
def default_environment() -> Dict[str, str]:
def default_environment() -> Environment:
iver = format_full_version(sys.implementation.version)
implementation_name = sys.implementation.name
return {
@@ -274,13 +259,29 @@ def default_environment() -> Dict[str, str]:
class Marker:
def __init__(self, marker: str) -> None:
# Note: We create a Marker object without calling this constructor in
# packaging.requirements.Requirement. If any additional logic is
# added here, make sure to mirror/adapt Requirement.
try:
self._markers = _coerce_parse_result(MARKER.parseString(marker))
except ParseException as e:
raise InvalidMarker(
f"Invalid marker: {marker!r}, parse error at "
f"{marker[e.loc : e.loc + 8]!r}"
)
self._markers = _normalize_extra_values(_parse_marker(marker))
# The attribute `_markers` can be described in terms of a recursive type:
# MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
#
# For example, the following expression:
# python_version > "3.6" or (python_version == "3.6" and os_name == "unix")
#
# is parsed into:
# [
# (<Variable('python_version')>, <Op('>')>, <Value('3.6')>),
# 'and',
# [
# (<Variable('python_version')>, <Op('==')>, <Value('3.6')>),
# 'or',
# (<Variable('os_name')>, <Op('==')>, <Value('unix')>)
# ]
# ]
except ParserSyntaxError as e:
raise InvalidMarker(str(e)) from e
def __str__(self) -> str:
return _format_marker(self._markers)
@@ -288,7 +289,16 @@ class Marker:
def __repr__(self) -> str:
return f"<Marker('{self}')>"
def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool:
def __hash__(self) -> int:
return hash((self.__class__.__name__, str(self)))
def __eq__(self, other: Any) -> bool:
if not isinstance(other, Marker):
return NotImplemented
return str(self) == str(other)
def evaluate(self, environment: dict[str, str] | None = None) -> bool:
"""Evaluate a marker.
Return the boolean from evaluating the given marker against the
@@ -297,8 +307,19 @@ class Marker:
The environment is determined from the current Python process.
"""
current_environment = default_environment()
current_environment = cast("dict[str, str]", default_environment())
current_environment["extra"] = ""
# Work around platform.python_version() returning something that is not PEP 440
# compliant for non-tagged Python builds. We preserve default_environment()'s
# behavior of returning platform.python_version() verbatim, and leave it to the
# caller to provide a syntactically valid version if they want to override it.
if current_environment["python_full_version"].endswith("+"):
current_environment["python_full_version"] += "local"
if environment is not None:
current_environment.update(environment)
# The API used to allow setting extra to None. We need to handle this
# case for backwards compatibility.
if current_environment["extra"] is None:
current_environment["extra"] = ""
return _evaluate_markers(self._markers, current_environment)

View File

@@ -0,0 +1,804 @@
from __future__ import annotations
import email.feedparser
import email.header
import email.message
import email.parser
import email.policy
import typing
from typing import (
Any,
Callable,
Generic,
Literal,
TypedDict,
cast,
)
from . import requirements, specifiers, utils
from . import version as version_module
T = typing.TypeVar("T")
try:
ExceptionGroup
except NameError: # pragma: no cover
class ExceptionGroup(Exception):
"""A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
If :external:exc:`ExceptionGroup` is already defined by Python itself,
that version is used instead.
"""
message: str
exceptions: list[Exception]
def __init__(self, message: str, exceptions: list[Exception]) -> None:
self.message = message
self.exceptions = exceptions
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
else: # pragma: no cover
ExceptionGroup = ExceptionGroup
class InvalidMetadata(ValueError):
"""A metadata field contains invalid data."""
field: str
"""The name of the field that contains invalid data."""
def __init__(self, field: str, message: str) -> None:
self.field = field
super().__init__(message)
# The RawMetadata class attempts to make as few assumptions about the underlying
# serialization formats as possible. The idea is that as long as a serialization
# formats offer some very basic primitives in *some* way then we can support
# serializing to and from that format.
class RawMetadata(TypedDict, total=False):
"""A dictionary of raw core metadata.
Each field in core metadata maps to a key of this dictionary (when data is
provided). The key is lower-case and underscores are used instead of dashes
compared to the equivalent core metadata field. Any core metadata field that
can be specified multiple times or can hold multiple values in a single
field have a key with a plural name. See :class:`Metadata` whose attributes
match the keys of this dictionary.
Core metadata fields that can be specified multiple times are stored as a
list or dict depending on which is appropriate for the field. Any fields
which hold multiple values in a single field are stored as a list.
"""
# Metadata 1.0 - PEP 241
metadata_version: str
name: str
version: str
platforms: list[str]
summary: str
description: str
keywords: list[str]
home_page: str
author: str
author_email: str
license: str
# Metadata 1.1 - PEP 314
supported_platforms: list[str]
download_url: str
classifiers: list[str]
requires: list[str]
provides: list[str]
obsoletes: list[str]
# Metadata 1.2 - PEP 345
maintainer: str
maintainer_email: str
requires_dist: list[str]
provides_dist: list[str]
obsoletes_dist: list[str]
requires_python: str
requires_external: list[str]
project_urls: dict[str, str]
# Metadata 2.0
# PEP 426 attempted to completely revamp the metadata format
# but got stuck without ever being able to build consensus on
# it and ultimately ended up withdrawn.
#
# However, a number of tools had started emitting METADATA with
# `2.0` Metadata-Version, so for historical reasons, this version
# was skipped.
# Metadata 2.1 - PEP 566
description_content_type: str
provides_extra: list[str]
# Metadata 2.2 - PEP 643
dynamic: list[str]
# Metadata 2.3 - PEP 685
# No new fields were added in PEP 685, just some edge case were
# tightened up to provide better interoptability.
_STRING_FIELDS = {
"author",
"author_email",
"description",
"description_content_type",
"download_url",
"home_page",
"license",
"maintainer",
"maintainer_email",
"metadata_version",
"name",
"requires_python",
"summary",
"version",
}
_LIST_FIELDS = {
"classifiers",
"dynamic",
"obsoletes",
"obsoletes_dist",
"platforms",
"provides",
"provides_dist",
"provides_extra",
"requires",
"requires_dist",
"requires_external",
"supported_platforms",
}
_DICT_FIELDS = {
"project_urls",
}
def _parse_keywords(data: str) -> list[str]:
"""Split a string of comma-separate keyboards into a list of keywords."""
return [k.strip() for k in data.split(",")]
def _parse_project_urls(data: list[str]) -> dict[str, str]:
"""Parse a list of label/URL string pairings separated by a comma."""
urls = {}
for pair in data:
# Our logic is slightly tricky here as we want to try and do
# *something* reasonable with malformed data.
#
# The main thing that we have to worry about, is data that does
# not have a ',' at all to split the label from the Value. There
# isn't a singular right answer here, and we will fail validation
# later on (if the caller is validating) so it doesn't *really*
# matter, but since the missing value has to be an empty str
# and our return value is dict[str, str], if we let the key
# be the missing value, then they'd have multiple '' values that
# overwrite each other in a accumulating dict.
#
# The other potentional issue is that it's possible to have the
# same label multiple times in the metadata, with no solid "right"
# answer with what to do in that case. As such, we'll do the only
# thing we can, which is treat the field as unparseable and add it
# to our list of unparsed fields.
parts = [p.strip() for p in pair.split(",", 1)]
parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
# TODO: The spec doesn't say anything about if the keys should be
# considered case sensitive or not... logically they should
# be case-preserving and case-insensitive, but doing that
# would open up more cases where we might have duplicate
# entries.
label, url = parts
if label in urls:
# The label already exists in our set of urls, so this field
# is unparseable, and we can just add the whole thing to our
# unparseable data and stop processing it.
raise KeyError("duplicate labels in project urls")
urls[label] = url
return urls
def _get_payload(msg: email.message.Message, source: bytes | str) -> str:
"""Get the body of the message."""
# If our source is a str, then our caller has managed encodings for us,
# and we don't need to deal with it.
if isinstance(source, str):
payload: str = msg.get_payload()
return payload
# If our source is a bytes, then we're managing the encoding and we need
# to deal with it.
else:
bpayload: bytes = msg.get_payload(decode=True)
try:
return bpayload.decode("utf8", "strict")
except UnicodeDecodeError:
raise ValueError("payload in an invalid encoding")
# The various parse_FORMAT functions here are intended to be as lenient as
# possible in their parsing, while still returning a correctly typed
# RawMetadata.
#
# To aid in this, we also generally want to do as little touching of the
# data as possible, except where there are possibly some historic holdovers
# that make valid data awkward to work with.
#
# While this is a lower level, intermediate format than our ``Metadata``
# class, some light touch ups can make a massive difference in usability.
# Map METADATA fields to RawMetadata.
_EMAIL_TO_RAW_MAPPING = {
"author": "author",
"author-email": "author_email",
"classifier": "classifiers",
"description": "description",
"description-content-type": "description_content_type",
"download-url": "download_url",
"dynamic": "dynamic",
"home-page": "home_page",
"keywords": "keywords",
"license": "license",
"maintainer": "maintainer",
"maintainer-email": "maintainer_email",
"metadata-version": "metadata_version",
"name": "name",
"obsoletes": "obsoletes",
"obsoletes-dist": "obsoletes_dist",
"platform": "platforms",
"project-url": "project_urls",
"provides": "provides",
"provides-dist": "provides_dist",
"provides-extra": "provides_extra",
"requires": "requires",
"requires-dist": "requires_dist",
"requires-external": "requires_external",
"requires-python": "requires_python",
"summary": "summary",
"supported-platform": "supported_platforms",
"version": "version",
}
_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]:
"""Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
This function returns a two-item tuple of dicts. The first dict is of
recognized fields from the core metadata specification. Fields that can be
parsed and translated into Python's built-in types are converted
appropriately. All other fields are left as-is. Fields that are allowed to
appear multiple times are stored as lists.
The second dict contains all other fields from the metadata. This includes
any unrecognized fields. It also includes any fields which are expected to
be parsed into a built-in type but were not formatted appropriately. Finally,
any fields that are expected to appear only once but are repeated are
included in this dict.
"""
raw: dict[str, str | list[str] | dict[str, str]] = {}
unparsed: dict[str, list[str]] = {}
if isinstance(data, str):
parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
else:
parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
# We have to wrap parsed.keys() in a set, because in the case of multiple
# values for a key (a list), the key will appear multiple times in the
# list of keys, but we're avoiding that by using get_all().
for name in frozenset(parsed.keys()):
# Header names in RFC are case insensitive, so we'll normalize to all
# lower case to make comparisons easier.
name = name.lower()
# We use get_all() here, even for fields that aren't multiple use,
# because otherwise someone could have e.g. two Name fields, and we
# would just silently ignore it rather than doing something about it.
headers = parsed.get_all(name) or []
# The way the email module works when parsing bytes is that it
# unconditionally decodes the bytes as ascii using the surrogateescape
# handler. When you pull that data back out (such as with get_all() ),
# it looks to see if the str has any surrogate escapes, and if it does
# it wraps it in a Header object instead of returning the string.
#
# As such, we'll look for those Header objects, and fix up the encoding.
value = []
# Flag if we have run into any issues processing the headers, thus
# signalling that the data belongs in 'unparsed'.
valid_encoding = True
for h in headers:
# It's unclear if this can return more types than just a Header or
# a str, so we'll just assert here to make sure.
assert isinstance(h, (email.header.Header, str))
# If it's a header object, we need to do our little dance to get
# the real data out of it. In cases where there is invalid data
# we're going to end up with mojibake, but there's no obvious, good
# way around that without reimplementing parts of the Header object
# ourselves.
#
# That should be fine since, if mojibacked happens, this key is
# going into the unparsed dict anyways.
if isinstance(h, email.header.Header):
# The Header object stores it's data as chunks, and each chunk
# can be independently encoded, so we'll need to check each
# of them.
chunks: list[tuple[bytes, str | None]] = []
for bin, encoding in email.header.decode_header(h):
try:
bin.decode("utf8", "strict")
except UnicodeDecodeError:
# Enable mojibake.
encoding = "latin1"
valid_encoding = False
else:
encoding = "utf8"
chunks.append((bin, encoding))
# Turn our chunks back into a Header object, then let that
# Header object do the right thing to turn them into a
# string for us.
value.append(str(email.header.make_header(chunks)))
# This is already a string, so just add it.
else:
value.append(h)
# We've processed all of our values to get them into a list of str,
# but we may have mojibake data, in which case this is an unparsed
# field.
if not valid_encoding:
unparsed[name] = value
continue
raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
if raw_name is None:
# This is a bit of a weird situation, we've encountered a key that
# we don't know what it means, so we don't know whether it's meant
# to be a list or not.
#
# Since we can't really tell one way or another, we'll just leave it
# as a list, even though it may be a single item list, because that's
# what makes the most sense for email headers.
unparsed[name] = value
continue
# If this is one of our string fields, then we'll check to see if our
# value is a list of a single item. If it is then we'll assume that
# it was emitted as a single string, and unwrap the str from inside
# the list.
#
# If it's any other kind of data, then we haven't the faintest clue
# what we should parse it as, and we have to just add it to our list
# of unparsed stuff.
if raw_name in _STRING_FIELDS and len(value) == 1:
raw[raw_name] = value[0]
# If this is one of our list of string fields, then we can just assign
# the value, since email *only* has strings, and our get_all() call
# above ensures that this is a list.
elif raw_name in _LIST_FIELDS:
raw[raw_name] = value
# Special Case: Keywords
# The keywords field is implemented in the metadata spec as a str,
# but it conceptually is a list of strings, and is serialized using
# ", ".join(keywords), so we'll do some light data massaging to turn
# this into what it logically is.
elif raw_name == "keywords" and len(value) == 1:
raw[raw_name] = _parse_keywords(value[0])
# Special Case: Project-URL
# The project urls is implemented in the metadata spec as a list of
# specially-formatted strings that represent a key and a value, which
# is fundamentally a mapping, however the email format doesn't support
# mappings in a sane way, so it was crammed into a list of strings
# instead.
#
# We will do a little light data massaging to turn this into a map as
# it logically should be.
elif raw_name == "project_urls":
try:
raw[raw_name] = _parse_project_urls(value)
except KeyError:
unparsed[name] = value
# Nothing that we've done has managed to parse this, so it'll just
# throw it in our unparseable data and move on.
else:
unparsed[name] = value
# We need to support getting the Description from the message payload in
# addition to getting it from the the headers. This does mean, though, there
# is the possibility of it being set both ways, in which case we put both
# in 'unparsed' since we don't know which is right.
try:
payload = _get_payload(parsed, data)
except ValueError:
unparsed.setdefault("description", []).append(
parsed.get_payload(decode=isinstance(data, bytes))
)
else:
if payload:
# Check to see if we've already got a description, if so then both
# it, and this body move to unparseable.
if "description" in raw:
description_header = cast(str, raw.pop("description"))
unparsed.setdefault("description", []).extend(
[description_header, payload]
)
elif "description" in unparsed:
unparsed["description"].append(payload)
else:
raw["description"] = payload
# We need to cast our `raw` to a metadata, because a TypedDict only support
# literal key names, but we're computing our key names on purpose, but the
# way this function is implemented, our `TypedDict` can only have valid key
# names.
return cast(RawMetadata, raw), unparsed
_NOT_FOUND = object()
# Keep the two values in sync.
_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"]
_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"]
_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
class _Validator(Generic[T]):
"""Validate a metadata field.
All _process_*() methods correspond to a core metadata field. The method is
called with the field's raw value. If the raw value is valid it is returned
in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
as appropriate).
"""
name: str
raw_name: str
added: _MetadataVersion
def __init__(
self,
*,
added: _MetadataVersion = "1.0",
) -> None:
self.added = added
def __set_name__(self, _owner: Metadata, name: str) -> None:
self.name = name
self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T:
# With Python 3.8, the caching can be replaced with functools.cached_property().
# No need to check the cache as attribute lookup will resolve into the
# instance's __dict__ before __get__ is called.
cache = instance.__dict__
value = instance._raw.get(self.name)
# To make the _process_* methods easier, we'll check if the value is None
# and if this field is NOT a required attribute, and if both of those
# things are true, we'll skip the the converter. This will mean that the
# converters never have to deal with the None union.
if self.name in _REQUIRED_ATTRS or value is not None:
try:
converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
except AttributeError:
pass
else:
value = converter(value)
cache[self.name] = value
try:
del instance._raw[self.name] # type: ignore[misc]
except KeyError:
pass
return cast(T, value)
def _invalid_metadata(
self, msg: str, cause: Exception | None = None
) -> InvalidMetadata:
exc = InvalidMetadata(
self.raw_name, msg.format_map({"field": repr(self.raw_name)})
)
exc.__cause__ = cause
return exc
def _process_metadata_version(self, value: str) -> _MetadataVersion:
# Implicitly makes Metadata-Version required.
if value not in _VALID_METADATA_VERSIONS:
raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
return cast(_MetadataVersion, value)
def _process_name(self, value: str) -> str:
if not value:
raise self._invalid_metadata("{field} is a required field")
# Validate the name as a side-effect.
try:
utils.canonicalize_name(value, validate=True)
except utils.InvalidName as exc:
raise self._invalid_metadata(
f"{value!r} is invalid for {{field}}", cause=exc
)
else:
return value
def _process_version(self, value: str) -> version_module.Version:
if not value:
raise self._invalid_metadata("{field} is a required field")
try:
return version_module.parse(value)
except version_module.InvalidVersion as exc:
raise self._invalid_metadata(
f"{value!r} is invalid for {{field}}", cause=exc
)
def _process_summary(self, value: str) -> str:
"""Check the field contains no newlines."""
if "\n" in value:
raise self._invalid_metadata("{field} must be a single line")
return value
def _process_description_content_type(self, value: str) -> str:
content_types = {"text/plain", "text/x-rst", "text/markdown"}
message = email.message.EmailMessage()
message["content-type"] = value
content_type, parameters = (
# Defaults to `text/plain` if parsing failed.
message.get_content_type().lower(),
message["content-type"].params,
)
# Check if content-type is valid or defaulted to `text/plain` and thus was
# not parseable.
if content_type not in content_types or content_type not in value.lower():
raise self._invalid_metadata(
f"{{field}} must be one of {list(content_types)}, not {value!r}"
)
charset = parameters.get("charset", "UTF-8")
if charset != "UTF-8":
raise self._invalid_metadata(
f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
)
markdown_variants = {"GFM", "CommonMark"}
variant = parameters.get("variant", "GFM") # Use an acceptable default.
if content_type == "text/markdown" and variant not in markdown_variants:
raise self._invalid_metadata(
f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
f"not {variant!r}",
)
return value
def _process_dynamic(self, value: list[str]) -> list[str]:
for dynamic_field in map(str.lower, value):
if dynamic_field in {"name", "version", "metadata-version"}:
raise self._invalid_metadata(
f"{value!r} is not allowed as a dynamic field"
)
elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
raise self._invalid_metadata(f"{value!r} is not a valid dynamic field")
return list(map(str.lower, value))
def _process_provides_extra(
self,
value: list[str],
) -> list[utils.NormalizedName]:
normalized_names = []
try:
for name in value:
normalized_names.append(utils.canonicalize_name(name, validate=True))
except utils.InvalidName as exc:
raise self._invalid_metadata(
f"{name!r} is invalid for {{field}}", cause=exc
)
else:
return normalized_names
def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
try:
return specifiers.SpecifierSet(value)
except specifiers.InvalidSpecifier as exc:
raise self._invalid_metadata(
f"{value!r} is invalid for {{field}}", cause=exc
)
def _process_requires_dist(
self,
value: list[str],
) -> list[requirements.Requirement]:
reqs = []
try:
for req in value:
reqs.append(requirements.Requirement(req))
except requirements.InvalidRequirement as exc:
raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc)
else:
return reqs
class Metadata:
"""Representation of distribution metadata.
Compared to :class:`RawMetadata`, this class provides objects representing
metadata fields instead of only using built-in types. Any invalid metadata
will cause :exc:`InvalidMetadata` to be raised (with a
:py:attr:`~BaseException.__cause__` attribute as appropriate).
"""
_raw: RawMetadata
@classmethod
def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata:
"""Create an instance from :class:`RawMetadata`.
If *validate* is true, all metadata will be validated. All exceptions
related to validation will be gathered and raised as an :class:`ExceptionGroup`.
"""
ins = cls()
ins._raw = data.copy() # Mutations occur due to caching enriched values.
if validate:
exceptions: list[Exception] = []
try:
metadata_version = ins.metadata_version
metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
except InvalidMetadata as metadata_version_exc:
exceptions.append(metadata_version_exc)
metadata_version = None
# Make sure to check for the fields that are present, the required
# fields (so their absence can be reported).
fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
# Remove fields that have already been checked.
fields_to_check -= {"metadata_version"}
for key in fields_to_check:
try:
if metadata_version:
# Can't use getattr() as that triggers descriptor protocol which
# will fail due to no value for the instance argument.
try:
field_metadata_version = cls.__dict__[key].added
except KeyError:
exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
exceptions.append(exc)
continue
field_age = _VALID_METADATA_VERSIONS.index(
field_metadata_version
)
if field_age > metadata_age:
field = _RAW_TO_EMAIL_MAPPING[key]
exc = InvalidMetadata(
field,
"{field} introduced in metadata version "
"{field_metadata_version}, not {metadata_version}",
)
exceptions.append(exc)
continue
getattr(ins, key)
except InvalidMetadata as exc:
exceptions.append(exc)
if exceptions:
raise ExceptionGroup("invalid metadata", exceptions)
return ins
@classmethod
def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
"""Parse metadata from email headers.
If *validate* is true, the metadata will be validated. All exceptions
related to validation will be gathered and raised as an :class:`ExceptionGroup`.
"""
raw, unparsed = parse_email(data)
if validate:
exceptions: list[Exception] = []
for unparsed_key in unparsed:
if unparsed_key in _EMAIL_TO_RAW_MAPPING:
message = f"{unparsed_key!r} has invalid data"
else:
message = f"unrecognized field: {unparsed_key!r}"
exceptions.append(InvalidMetadata(unparsed_key, message))
if exceptions:
raise ExceptionGroup("unparsed", exceptions)
try:
return cls.from_raw(raw, validate=validate)
except ExceptionGroup as exc_group:
raise ExceptionGroup(
"invalid or unparsed metadata", exc_group.exceptions
) from None
metadata_version: _Validator[_MetadataVersion] = _Validator()
""":external:ref:`core-metadata-metadata-version`
(required; validated to be a valid metadata version)"""
name: _Validator[str] = _Validator()
""":external:ref:`core-metadata-name`
(required; validated using :func:`~packaging.utils.canonicalize_name` and its
*validate* parameter)"""
version: _Validator[version_module.Version] = _Validator()
""":external:ref:`core-metadata-version` (required)"""
dynamic: _Validator[list[str] | None] = _Validator(
added="2.2",
)
""":external:ref:`core-metadata-dynamic`
(validated against core metadata field names and lowercased)"""
platforms: _Validator[list[str] | None] = _Validator()
""":external:ref:`core-metadata-platform`"""
supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1")
""":external:ref:`core-metadata-supported-platform`"""
summary: _Validator[str | None] = _Validator()
""":external:ref:`core-metadata-summary` (validated to contain no newlines)"""
description: _Validator[str | None] = _Validator() # TODO 2.1: can be in body
""":external:ref:`core-metadata-description`"""
description_content_type: _Validator[str | None] = _Validator(added="2.1")
""":external:ref:`core-metadata-description-content-type` (validated)"""
keywords: _Validator[list[str] | None] = _Validator()
""":external:ref:`core-metadata-keywords`"""
home_page: _Validator[str | None] = _Validator()
""":external:ref:`core-metadata-home-page`"""
download_url: _Validator[str | None] = _Validator(added="1.1")
""":external:ref:`core-metadata-download-url`"""
author: _Validator[str | None] = _Validator()
""":external:ref:`core-metadata-author`"""
author_email: _Validator[str | None] = _Validator()
""":external:ref:`core-metadata-author-email`"""
maintainer: _Validator[str | None] = _Validator(added="1.2")
""":external:ref:`core-metadata-maintainer`"""
maintainer_email: _Validator[str | None] = _Validator(added="1.2")
""":external:ref:`core-metadata-maintainer-email`"""
license: _Validator[str | None] = _Validator()
""":external:ref:`core-metadata-license`"""
classifiers: _Validator[list[str] | None] = _Validator(added="1.1")
""":external:ref:`core-metadata-classifier`"""
requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator(
added="1.2"
)
""":external:ref:`core-metadata-requires-dist`"""
requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator(
added="1.2"
)
""":external:ref:`core-metadata-requires-python`"""
# Because `Requires-External` allows for non-PEP 440 version specifiers, we
# don't do any processing on the values.
requires_external: _Validator[list[str] | None] = _Validator(added="1.2")
""":external:ref:`core-metadata-requires-external`"""
project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2")
""":external:ref:`core-metadata-project-url`"""
# PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
# regardless of metadata version.
provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator(
added="2.1",
)
""":external:ref:`core-metadata-provides-extra`"""
provides_dist: _Validator[list[str] | None] = _Validator(added="1.2")
""":external:ref:`core-metadata-provides-dist`"""
obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2")
""":external:ref:`core-metadata-obsoletes-dist`"""
requires: _Validator[list[str] | None] = _Validator(added="1.1")
"""``Requires`` (deprecated)"""
provides: _Validator[list[str] | None] = _Validator(added="1.1")
"""``Provides`` (deprecated)"""
obsoletes: _Validator[list[str] | None] = _Validator(added="1.1")
"""``Obsoletes`` (deprecated)"""

View File

@@ -1,27 +1,15 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
from __future__ import annotations
import re
import string
import urllib.parse
from typing import List, Optional as TOptional, Set
from typing import Any, Iterator
from pip._vendor.pyparsing import ( # noqa
Combine,
Literal as L,
Optional,
ParseException,
Regex,
Word,
ZeroOrMore,
originalTextFor,
stringEnd,
stringStart,
)
from .markers import MARKER_EXPR, Marker
from .specifiers import LegacySpecifier, Specifier, SpecifierSet
from ._parser import parse_requirement as _parse_requirement
from ._tokenizer import ParserSyntaxError
from .markers import Marker, _normalize_extra_values
from .specifiers import SpecifierSet
from .utils import canonicalize_name
class InvalidRequirement(ValueError):
@@ -30,60 +18,6 @@ class InvalidRequirement(ValueError):
"""
ALPHANUM = Word(string.ascii_letters + string.digits)
LBRACKET = L("[").suppress()
RBRACKET = L("]").suppress()
LPAREN = L("(").suppress()
RPAREN = L(")").suppress()
COMMA = L(",").suppress()
SEMICOLON = L(";").suppress()
AT = L("@").suppress()
PUNCTUATION = Word("-_.")
IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM)
IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END))
NAME = IDENTIFIER("name")
EXTRA = IDENTIFIER
URI = Regex(r"[^ ]+")("url")
URL = AT + URI
EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA)
EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras")
VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE)
VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE)
VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY
VERSION_MANY = Combine(
VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False
)("_raw_spec")
_VERSION_SPEC = Optional((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY)
_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "")
VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier")
VERSION_SPEC.setParseAction(lambda s, l, t: t[1])
MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker")
MARKER_EXPR.setParseAction(
lambda s, l, t: Marker(s[t._original_start : t._original_end])
)
MARKER_SEPARATOR = SEMICOLON
MARKER = MARKER_SEPARATOR + MARKER_EXPR
VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER)
URL_AND_MARKER = URL + Optional(MARKER)
NAMED_REQUIREMENT = NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER)
REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd
# pyparsing isn't thread safe during initialization, so we do it eagerly, see
# issue #104
REQUIREMENT.parseString("x[]")
class Requirement:
"""Parse a requirement.
@@ -99,48 +33,59 @@ class Requirement:
def __init__(self, requirement_string: str) -> None:
try:
req = REQUIREMENT.parseString(requirement_string)
except ParseException as e:
raise InvalidRequirement(
f'Parse error at "{ requirement_string[e.loc : e.loc + 8]!r}": {e.msg}'
)
parsed = _parse_requirement(requirement_string)
except ParserSyntaxError as e:
raise InvalidRequirement(str(e)) from e
self.name: str = req.name
if req.url:
parsed_url = urllib.parse.urlparse(req.url)
if parsed_url.scheme == "file":
if urllib.parse.urlunparse(parsed_url) != req.url:
raise InvalidRequirement("Invalid URL given")
elif not (parsed_url.scheme and parsed_url.netloc) or (
not parsed_url.scheme and not parsed_url.netloc
):
raise InvalidRequirement(f"Invalid URL: {req.url}")
self.url: TOptional[str] = req.url
else:
self.url = None
self.extras: Set[str] = set(req.extras.asList() if req.extras else [])
self.specifier: SpecifierSet = SpecifierSet(req.specifier)
self.marker: TOptional[Marker] = req.marker if req.marker else None
self.name: str = parsed.name
self.url: str | None = parsed.url or None
self.extras: set[str] = set(parsed.extras or [])
self.specifier: SpecifierSet = SpecifierSet(parsed.specifier)
self.marker: Marker | None = None
if parsed.marker is not None:
self.marker = Marker.__new__(Marker)
self.marker._markers = _normalize_extra_values(parsed.marker)
def __str__(self) -> str:
parts: List[str] = [self.name]
def _iter_parts(self, name: str) -> Iterator[str]:
yield name
if self.extras:
formatted_extras = ",".join(sorted(self.extras))
parts.append(f"[{formatted_extras}]")
yield f"[{formatted_extras}]"
if self.specifier:
parts.append(str(self.specifier))
yield str(self.specifier)
if self.url:
parts.append(f"@ {self.url}")
yield f"@ {self.url}"
if self.marker:
parts.append(" ")
yield " "
if self.marker:
parts.append(f"; {self.marker}")
yield f"; {self.marker}"
return "".join(parts)
def __str__(self) -> str:
return "".join(self._iter_parts(self.name))
def __repr__(self) -> str:
return f"<Requirement('{self}')>"
def __hash__(self) -> int:
return hash(
(
self.__class__.__name__,
*self._iter_parts(canonicalize_name(self.name)),
)
)
def __eq__(self, other: Any) -> bool:
if not isinstance(other, Requirement):
return NotImplemented
return (
canonicalize_name(self.name) == canonicalize_name(other.name)
and self.extras == other.extras
and self.specifier == other.specifier
and self.url == other.url
and self.marker == other.marker
)

File diff suppressed because it is too large Load Diff

View File

@@ -2,21 +2,21 @@
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
from __future__ import annotations
import logging
import platform
import re
import struct
import subprocess
import sys
import sysconfig
from importlib.machinery import EXTENSION_SUFFIXES
from typing import (
Dict,
FrozenSet,
Iterable,
Iterator,
List,
Optional,
Sequence,
Tuple,
Union,
cast,
)
@@ -27,7 +27,7 @@ logger = logging.getLogger(__name__)
PythonVersion = Sequence[int]
MacVersion = Tuple[int, int]
INTERPRETER_SHORT_NAMES: Dict[str, str] = {
INTERPRETER_SHORT_NAMES: dict[str, str] = {
"python": "py", # Generic.
"cpython": "cp",
"pypy": "pp",
@@ -36,7 +36,7 @@ INTERPRETER_SHORT_NAMES: Dict[str, str] = {
}
_32_BIT_INTERPRETER = sys.maxsize <= 2 ** 32
_32_BIT_INTERPRETER = struct.calcsize("P") == 4
class Tag:
@@ -93,7 +93,7 @@ class Tag:
return f"<{self} @ {id(self)}>"
def parse_tag(tag: str) -> FrozenSet[Tag]:
def parse_tag(tag: str) -> frozenset[Tag]:
"""
Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances.
@@ -109,8 +109,8 @@ def parse_tag(tag: str) -> FrozenSet[Tag]:
return frozenset(tags)
def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
value = sysconfig.get_config_var(name)
def _get_config_var(name: str, warn: bool = False) -> int | str | None:
value: int | str | None = sysconfig.get_config_var(name)
if value is None and warn:
logger.debug(
"Config variable '%s' is unset, Python ABI tag may be incorrect", name
@@ -119,23 +119,40 @@ def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
def _normalize_string(string: str) -> str:
return string.replace(".", "_").replace("-", "_")
return string.replace(".", "_").replace("-", "_").replace(" ", "_")
def _abi3_applies(python_version: PythonVersion) -> bool:
def _is_threaded_cpython(abis: list[str]) -> bool:
"""
Determine if the ABI corresponds to a threaded (`--disable-gil`) build.
The threaded builds are indicated by a "t" in the abiflags.
"""
if len(abis) == 0:
return False
# expect e.g., cp313
m = re.match(r"cp\d+(.*)", abis[0])
if not m:
return False
abiflags = m.group(1)
return "t" in abiflags
def _abi3_applies(python_version: PythonVersion, threading: bool) -> bool:
"""
Determine if the Python version supports abi3.
PEP 384 was first implemented in Python 3.2.
PEP 384 was first implemented in Python 3.2. The threaded (`--disable-gil`)
builds do not support abi3.
"""
return len(python_version) > 1 and tuple(python_version) >= (3, 2)
return len(python_version) > 1 and tuple(python_version) >= (3, 2) and not threading
def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]:
def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> list[str]:
py_version = tuple(py_version) # To allow for version comparison.
abis = []
version = _version_nodot(py_version[:2])
debug = pymalloc = ucs4 = ""
threading = debug = pymalloc = ucs4 = ""
with_debug = _get_config_var("Py_DEBUG", warn)
has_refcount = hasattr(sys, "gettotalrefcount")
# Windows doesn't set Py_DEBUG, so checking for support of debug-compiled
@@ -144,6 +161,8 @@ def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]:
has_ext = "_d.pyd" in EXTENSION_SUFFIXES
if with_debug or (with_debug is None and (has_refcount or has_ext)):
debug = "d"
if py_version >= (3, 13) and _get_config_var("Py_GIL_DISABLED", warn):
threading = "t"
if py_version < (3, 8):
with_pymalloc = _get_config_var("WITH_PYMALLOC", warn)
if with_pymalloc or with_pymalloc is None:
@@ -157,20 +176,15 @@ def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]:
elif debug:
# Debug builds can also load "normal" extension modules.
# We can also assume no UCS-4 or pymalloc requirement.
abis.append(f"cp{version}")
abis.insert(
0,
"cp{version}{debug}{pymalloc}{ucs4}".format(
version=version, debug=debug, pymalloc=pymalloc, ucs4=ucs4
),
)
abis.append(f"cp{version}{threading}")
abis.insert(0, f"cp{version}{threading}{debug}{pymalloc}{ucs4}")
return abis
def cpython_tags(
python_version: Optional[PythonVersion] = None,
abis: Optional[Iterable[str]] = None,
platforms: Optional[Iterable[str]] = None,
python_version: PythonVersion | None = None,
abis: Iterable[str] | None = None,
platforms: Iterable[str] | None = None,
*,
warn: bool = False,
) -> Iterator[Tag]:
@@ -211,11 +225,14 @@ def cpython_tags(
for abi in abis:
for platform_ in platforms:
yield Tag(interpreter, abi, platform_)
if _abi3_applies(python_version):
threading = _is_threaded_cpython(abis)
use_abi3 = _abi3_applies(python_version, threading)
if use_abi3:
yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms)
yield from (Tag(interpreter, "none", platform_) for platform_ in platforms)
if _abi3_applies(python_version):
if use_abi3:
for minor_version in range(python_version[1] - 1, 1, -1):
for platform_ in platforms:
interpreter = "cp{version}".format(
@@ -224,16 +241,51 @@ def cpython_tags(
yield Tag(interpreter, "abi3", platform_)
def _generic_abi() -> Iterator[str]:
abi = sysconfig.get_config_var("SOABI")
if abi:
yield _normalize_string(abi)
def _generic_abi() -> list[str]:
"""
Return the ABI tag based on EXT_SUFFIX.
"""
# The following are examples of `EXT_SUFFIX`.
# We want to keep the parts which are related to the ABI and remove the
# parts which are related to the platform:
# - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310
# - mac: '.cpython-310-darwin.so' => cp310
# - win: '.cp310-win_amd64.pyd' => cp310
# - win: '.pyd' => cp37 (uses _cpython_abis())
# - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73
# - graalpy: '.graalpy-38-native-x86_64-darwin.dylib'
# => graalpy_38_native
ext_suffix = _get_config_var("EXT_SUFFIX", warn=True)
if not isinstance(ext_suffix, str) or ext_suffix[0] != ".":
raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')")
parts = ext_suffix.split(".")
if len(parts) < 3:
# CPython3.7 and earlier uses ".pyd" on Windows.
return _cpython_abis(sys.version_info[:2])
soabi = parts[1]
if soabi.startswith("cpython"):
# non-windows
abi = "cp" + soabi.split("-")[1]
elif soabi.startswith("cp"):
# windows
abi = soabi.split("-")[0]
elif soabi.startswith("pypy"):
abi = "-".join(soabi.split("-")[:2])
elif soabi.startswith("graalpy"):
abi = "-".join(soabi.split("-")[:3])
elif soabi:
# pyston, ironpython, others?
abi = soabi
else:
return []
return [_normalize_string(abi)]
def generic_tags(
interpreter: Optional[str] = None,
abis: Optional[Iterable[str]] = None,
platforms: Optional[Iterable[str]] = None,
interpreter: str | None = None,
abis: Iterable[str] | None = None,
platforms: Iterable[str] | None = None,
*,
warn: bool = False,
) -> Iterator[Tag]:
@@ -251,8 +303,9 @@ def generic_tags(
interpreter = "".join([interp_name, interp_version])
if abis is None:
abis = _generic_abi()
else:
abis = list(abis)
platforms = list(platforms or platform_tags())
abis = list(abis)
if "none" not in abis:
abis.append("none")
for abi in abis:
@@ -276,9 +329,9 @@ def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]:
def compatible_tags(
python_version: Optional[PythonVersion] = None,
interpreter: Optional[str] = None,
platforms: Optional[Iterable[str]] = None,
python_version: PythonVersion | None = None,
interpreter: str | None = None,
platforms: Iterable[str] | None = None,
) -> Iterator[Tag]:
"""
Yields the sequence of tags that are compatible with a specific version of Python.
@@ -310,7 +363,7 @@ def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str:
return "i386"
def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> List[str]:
def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> list[str]:
formats = [cpu_arch]
if cpu_arch == "x86_64":
if version < (10, 4):
@@ -343,7 +396,7 @@ def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> List[str]:
def mac_platforms(
version: Optional[MacVersion] = None, arch: Optional[str] = None
version: MacVersion | None = None, arch: str | None = None
) -> Iterator[str]:
"""
Yields the platform tags for a macOS system.
@@ -356,6 +409,22 @@ def mac_platforms(
version_str, _, cpu_arch = platform.mac_ver()
if version is None:
version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2])))
if version == (10, 16):
# When built against an older macOS SDK, Python will report macOS 10.16
# instead of the real version.
version_str = subprocess.run(
[
sys.executable,
"-sS",
"-c",
"import platform; print(platform.mac_ver()[0])",
],
check=True,
env={"SYSTEM_VERSION_COMPAT": "0"},
stdout=subprocess.PIPE,
text=True,
).stdout
version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2])))
else:
version = version
if arch is None:
@@ -416,15 +485,21 @@ def mac_platforms(
def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]:
linux = _normalize_string(sysconfig.get_platform())
if not linux.startswith("linux_"):
# we should never be here, just yield the sysconfig one and return
yield linux
return
if is_32bit:
if linux == "linux_x86_64":
linux = "linux_i686"
elif linux == "linux_aarch64":
linux = "linux_armv7l"
linux = "linux_armv8l"
_, arch = linux.split("_", 1)
yield from _manylinux.platform_tags(linux, arch)
yield from _musllinux.platform_tags(arch)
yield linux
archs = {"armv8l": ["armv8l", "armv7l"]}.get(arch, [arch])
yield from _manylinux.platform_tags(archs)
yield from _musllinux.platform_tags(archs)
for arch in archs:
yield f"linux_{arch}"
def _generic_platforms() -> Iterator[str]:
@@ -446,6 +521,9 @@ def platform_tags() -> Iterator[str]:
def interpreter_name() -> str:
"""
Returns the name of the running interpreter.
Some implementations have a reserved, two-letter abbreviation which will
be returned when appropriate.
"""
name = sys.implementation.name
return INTERPRETER_SHORT_NAMES.get(name) or name
@@ -482,6 +560,9 @@ def sys_tags(*, warn: bool = False) -> Iterator[Tag]:
yield from generic_tags()
if interp_name == "pp":
yield from compatible_tags(interpreter="pp3")
interp = "pp3"
elif interp_name == "cp":
interp = "cp" + interpreter_version(warn=warn)
else:
yield from compatible_tags()
interp = None
yield from compatible_tags(interpreter=interp)

View File

@@ -2,8 +2,10 @@
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
from __future__ import annotations
import re
from typing import FrozenSet, NewType, Tuple, Union, cast
from typing import NewType, Tuple, Union, cast
from .tags import Tag, parse_tag
from .version import InvalidVersion, Version
@@ -12,6 +14,12 @@ BuildTag = Union[Tuple[()], Tuple[int, str]]
NormalizedName = NewType("NormalizedName", str)
class InvalidName(ValueError):
"""
An invalid distribution name; users should refer to the packaging user guide.
"""
class InvalidWheelFilename(ValueError):
"""
An invalid wheel filename was found, users should refer to PEP 427.
@@ -24,18 +32,31 @@ class InvalidSdistFilename(ValueError):
"""
# Core metadata spec for `Name`
_validate_regex = re.compile(
r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE
)
_canonicalize_regex = re.compile(r"[-_.]+")
_normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$")
# PEP 427: The build number must start with a digit.
_build_tag_regex = re.compile(r"(\d+)(.*)")
def canonicalize_name(name: str) -> NormalizedName:
def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName:
if validate and not _validate_regex.match(name):
raise InvalidName(f"name is invalid: {name!r}")
# This is taken from PEP 503.
value = _canonicalize_regex.sub("-", name).lower()
return cast(NormalizedName, value)
def canonicalize_version(version: Union[Version, str]) -> str:
def is_normalized_name(name: str) -> bool:
return _normalized_regex.match(name) is not None
def canonicalize_version(
version: Version | str, *, strip_trailing_zero: bool = True
) -> str:
"""
This is very similar to Version.__str__, but has one subtle difference
with the way it handles the release segment.
@@ -56,8 +77,11 @@ def canonicalize_version(version: Union[Version, str]) -> str:
parts.append(f"{parsed.epoch}!")
# Release segment
# NB: This strips trailing '.0's to normalize
parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in parsed.release)))
release_segment = ".".join(str(x) for x in parsed.release)
if strip_trailing_zero:
# NB: This strips trailing '.0's to normalize
release_segment = re.sub(r"(\.0)+$", "", release_segment)
parts.append(release_segment)
# Pre-release
if parsed.pre is not None:
@@ -80,7 +104,7 @@ def canonicalize_version(version: Union[Version, str]) -> str:
def parse_wheel_filename(
filename: str,
) -> Tuple[NormalizedName, Version, BuildTag, FrozenSet[Tag]]:
) -> tuple[NormalizedName, Version, BuildTag, frozenset[Tag]]:
if not filename.endswith(".whl"):
raise InvalidWheelFilename(
f"Invalid wheel filename (extension must be '.whl'): {filename}"
@@ -95,11 +119,18 @@ def parse_wheel_filename(
parts = filename.split("-", dashes - 2)
name_part = parts[0]
# See PEP 427 for the rules on escaping the project name
# See PEP 427 for the rules on escaping the project name.
if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None:
raise InvalidWheelFilename(f"Invalid project name: {filename}")
name = canonicalize_name(name_part)
version = Version(parts[1])
try:
version = Version(parts[1])
except InvalidVersion as e:
raise InvalidWheelFilename(
f"Invalid wheel filename (invalid version): {filename}"
) from e
if dashes == 5:
build_part = parts[2]
build_match = _build_tag_regex.match(build_part)
@@ -114,7 +145,7 @@ def parse_wheel_filename(
return (name, version, build, tags)
def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]:
def parse_sdist_filename(filename: str) -> tuple[NormalizedName, Version]:
if filename.endswith(".tar.gz"):
file_stem = filename[: -len(".tar.gz")]
elif filename.endswith(".zip"):
@@ -132,5 +163,12 @@ def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]:
raise InvalidSdistFilename(f"Invalid sdist filename: {filename}")
name = canonicalize_name(name_part)
version = Version(version_part)
try:
version = Version(version_part)
except InvalidVersion as e:
raise InvalidSdistFilename(
f"Invalid sdist filename (invalid version): {filename}"
) from e
return (name, version)

View File

@@ -1,64 +1,73 @@
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
"""
.. testsetup::
from pip._vendor.packaging.version import parse, Version
"""
from __future__ import annotations
import collections
import itertools
import re
import warnings
from typing import Callable, Iterator, List, Optional, SupportsInt, Tuple, Union
from typing import Any, Callable, NamedTuple, SupportsInt, Tuple, Union
from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType
__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"]
__all__ = ["VERSION_PATTERN", "parse", "Version", "InvalidVersion"]
InfiniteTypes = Union[InfinityType, NegativeInfinityType]
PrePostDevType = Union[InfiniteTypes, Tuple[str, int]]
SubLocalType = Union[InfiniteTypes, int, str]
LocalType = Union[
LocalType = Tuple[Union[int, str], ...]
CmpPrePostDevType = Union[InfinityType, NegativeInfinityType, Tuple[str, int]]
CmpLocalType = Union[
NegativeInfinityType,
Tuple[
Union[
SubLocalType,
Tuple[SubLocalType, str],
Tuple[NegativeInfinityType, SubLocalType],
],
...,
],
Tuple[Union[Tuple[int, str], Tuple[NegativeInfinityType, Union[int, str]]], ...],
]
CmpKey = Tuple[
int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType
int,
Tuple[int, ...],
CmpPrePostDevType,
CmpPrePostDevType,
CmpPrePostDevType,
CmpLocalType,
]
LegacyCmpKey = Tuple[int, Tuple[str, ...]]
VersionComparisonMethod = Callable[
[Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool
]
_Version = collections.namedtuple(
"_Version", ["epoch", "release", "dev", "pre", "post", "local"]
)
VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool]
def parse(version: str) -> Union["LegacyVersion", "Version"]:
class _Version(NamedTuple):
epoch: int
release: tuple[int, ...]
dev: tuple[str, int] | None
pre: tuple[str, int] | None
post: tuple[str, int] | None
local: LocalType | None
def parse(version: str) -> Version:
"""Parse the given version string.
>>> parse('1.0.dev1')
<Version('1.0.dev1')>
:param version: The version string to parse.
:raises InvalidVersion: When the version string is not a valid version.
"""
Parse the given version string and return either a :class:`Version` object
or a :class:`LegacyVersion` object depending on if the given version is
a valid PEP 440 version or a legacy version.
"""
try:
return Version(version)
except InvalidVersion:
return LegacyVersion(version)
return Version(version)
class InvalidVersion(ValueError):
"""
An invalid version was found, users should refer to PEP 440.
"""Raised when a version string is not a valid version.
>>> Version("invalid")
Traceback (most recent call last):
...
packaging.version.InvalidVersion: Invalid version: 'invalid'
"""
class _BaseVersion:
_key: Union[CmpKey, LegacyCmpKey]
_key: tuple[Any, ...]
def __hash__(self) -> int:
return hash(self._key)
@@ -66,13 +75,13 @@ class _BaseVersion:
# Please keep the duplicated `isinstance` check
# in the six comparisons hereunder
# unless you find a way to avoid adding overhead function calls.
def __lt__(self, other: "_BaseVersion") -> bool:
def __lt__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key < other._key
def __le__(self, other: "_BaseVersion") -> bool:
def __le__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
@@ -84,13 +93,13 @@ class _BaseVersion:
return self._key == other._key
def __ge__(self, other: "_BaseVersion") -> bool:
def __ge__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key >= other._key
def __gt__(self, other: "_BaseVersion") -> bool:
def __gt__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
@@ -103,133 +112,16 @@ class _BaseVersion:
return self._key != other._key
class LegacyVersion(_BaseVersion):
def __init__(self, version: str) -> None:
self._version = str(version)
self._key = _legacy_cmpkey(self._version)
warnings.warn(
"Creating a LegacyVersion has been deprecated and will be "
"removed in the next major release",
DeprecationWarning,
)
def __str__(self) -> str:
return self._version
def __repr__(self) -> str:
return f"<LegacyVersion('{self}')>"
@property
def public(self) -> str:
return self._version
@property
def base_version(self) -> str:
return self._version
@property
def epoch(self) -> int:
return -1
@property
def release(self) -> None:
return None
@property
def pre(self) -> None:
return None
@property
def post(self) -> None:
return None
@property
def dev(self) -> None:
return None
@property
def local(self) -> None:
return None
@property
def is_prerelease(self) -> bool:
return False
@property
def is_postrelease(self) -> bool:
return False
@property
def is_devrelease(self) -> bool:
return False
_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE)
_legacy_version_replacement_map = {
"pre": "c",
"preview": "c",
"-": "final-",
"rc": "c",
"dev": "@",
}
def _parse_version_parts(s: str) -> Iterator[str]:
for part in _legacy_version_component_re.split(s):
part = _legacy_version_replacement_map.get(part, part)
if not part or part == ".":
continue
if part[:1] in "0123456789":
# pad for numeric comparison
yield part.zfill(8)
else:
yield "*" + part
# ensure that alpha/beta/candidate are before final
yield "*final"
def _legacy_cmpkey(version: str) -> LegacyCmpKey:
# We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch
# greater than or equal to 0. This will effectively put the LegacyVersion,
# which uses the defacto standard originally implemented by setuptools,
# as before all PEP 440 versions.
epoch = -1
# This scheme is taken from pkg_resources.parse_version setuptools prior to
# it's adoption of the packaging library.
parts: List[str] = []
for part in _parse_version_parts(version.lower()):
if part.startswith("*"):
# remove "-" before a prerelease tag
if part < "*final":
while parts and parts[-1] == "*final-":
parts.pop()
# remove trailing zeros from each series of numeric parts
while parts and parts[-1] == "00000000":
parts.pop()
parts.append(part)
return epoch, tuple(parts)
# Deliberately not anchored to the start and end of the string, to make it
# easier for 3rd party code to reuse
VERSION_PATTERN = r"""
_VERSION_PATTERN = r"""
v?
(?:
(?:(?P<epoch>[0-9]+)!)? # epoch
(?P<release>[0-9]+(?:\.[0-9]+)*) # release segment
(?P<pre> # pre-release
[-_\.]?
(?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
(?P<pre_l>alpha|a|beta|b|preview|pre|c|rc)
[-_\.]?
(?P<pre_n>[0-9]+)?
)?
@@ -253,12 +145,56 @@ VERSION_PATTERN = r"""
(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
"""
VERSION_PATTERN = _VERSION_PATTERN
"""
A string containing the regular expression used to match a valid version.
The pattern is not anchored at either end, and is intended for embedding in larger
expressions (for example, matching a version number as part of a file name). The
regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE``
flags set.
:meta hide-value:
"""
class Version(_BaseVersion):
"""This class abstracts handling of a project's versions.
A :class:`Version` instance is comparison aware and can be compared and
sorted using the standard Python interfaces.
>>> v1 = Version("1.0a5")
>>> v2 = Version("1.0")
>>> v1
<Version('1.0a5')>
>>> v2
<Version('1.0')>
>>> v1 < v2
True
>>> v1 == v2
False
>>> v1 > v2
False
>>> v1 >= v2
False
>>> v1 <= v2
True
"""
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
_key: CmpKey
def __init__(self, version: str) -> None:
"""Initialize a Version object.
:param version:
The string representation of a version which will be parsed and normalized
before use.
:raises InvalidVersion:
If the ``version`` does not conform to PEP 440 in any way then this
exception will be raised.
"""
# Validate the version and parse it into pieces
match = self._regex.search(version)
@@ -288,9 +224,19 @@ class Version(_BaseVersion):
)
def __repr__(self) -> str:
"""A representation of the Version that shows all internal state.
>>> Version('1.0.0')
<Version('1.0.0')>
"""
return f"<Version('{self}')>"
def __str__(self) -> str:
"""A string representation of the version that can be rounded-tripped.
>>> str(Version("1.0a5"))
'1.0a5'
"""
parts = []
# Epoch
@@ -320,29 +266,77 @@ class Version(_BaseVersion):
@property
def epoch(self) -> int:
_epoch: int = self._version.epoch
return _epoch
"""The epoch of the version.
>>> Version("2.0.0").epoch
0
>>> Version("1!2.0.0").epoch
1
"""
return self._version.epoch
@property
def release(self) -> Tuple[int, ...]:
_release: Tuple[int, ...] = self._version.release
return _release
def release(self) -> tuple[int, ...]:
"""The components of the "release" segment of the version.
>>> Version("1.2.3").release
(1, 2, 3)
>>> Version("2.0.0").release
(2, 0, 0)
>>> Version("1!2.0.0.post0").release
(2, 0, 0)
Includes trailing zeroes but not the epoch or any pre-release / development /
post-release suffixes.
"""
return self._version.release
@property
def pre(self) -> Optional[Tuple[str, int]]:
_pre: Optional[Tuple[str, int]] = self._version.pre
return _pre
def pre(self) -> tuple[str, int] | None:
"""The pre-release segment of the version.
>>> print(Version("1.2.3").pre)
None
>>> Version("1.2.3a1").pre
('a', 1)
>>> Version("1.2.3b1").pre
('b', 1)
>>> Version("1.2.3rc1").pre
('rc', 1)
"""
return self._version.pre
@property
def post(self) -> Optional[int]:
def post(self) -> int | None:
"""The post-release number of the version.
>>> print(Version("1.2.3").post)
None
>>> Version("1.2.3.post1").post
1
"""
return self._version.post[1] if self._version.post else None
@property
def dev(self) -> Optional[int]:
def dev(self) -> int | None:
"""The development number of the version.
>>> print(Version("1.2.3").dev)
None
>>> Version("1.2.3.dev1").dev
1
"""
return self._version.dev[1] if self._version.dev else None
@property
def local(self) -> Optional[str]:
def local(self) -> str | None:
"""The local version segment of the version.
>>> print(Version("1.2.3").local)
None
>>> Version("1.2.3+abc").local
'abc'
"""
if self._version.local:
return ".".join(str(x) for x in self._version.local)
else:
@@ -350,10 +344,31 @@ class Version(_BaseVersion):
@property
def public(self) -> str:
"""The public portion of the version.
>>> Version("1.2.3").public
'1.2.3'
>>> Version("1.2.3+abc").public
'1.2.3'
>>> Version("1.2.3+abc.dev1").public
'1.2.3'
"""
return str(self).split("+", 1)[0]
@property
def base_version(self) -> str:
"""The "base version" of the version.
>>> Version("1.2.3").base_version
'1.2.3'
>>> Version("1.2.3+abc").base_version
'1.2.3'
>>> Version("1!1.2.3+abc.dev1").base_version
'1!1.2.3'
The "base version" is the public version of the project without any pre or post
release markers.
"""
parts = []
# Epoch
@@ -367,33 +382,78 @@ class Version(_BaseVersion):
@property
def is_prerelease(self) -> bool:
"""Whether this version is a pre-release.
>>> Version("1.2.3").is_prerelease
False
>>> Version("1.2.3a1").is_prerelease
True
>>> Version("1.2.3b1").is_prerelease
True
>>> Version("1.2.3rc1").is_prerelease
True
>>> Version("1.2.3dev1").is_prerelease
True
"""
return self.dev is not None or self.pre is not None
@property
def is_postrelease(self) -> bool:
"""Whether this version is a post-release.
>>> Version("1.2.3").is_postrelease
False
>>> Version("1.2.3.post1").is_postrelease
True
"""
return self.post is not None
@property
def is_devrelease(self) -> bool:
"""Whether this version is a development release.
>>> Version("1.2.3").is_devrelease
False
>>> Version("1.2.3.dev1").is_devrelease
True
"""
return self.dev is not None
@property
def major(self) -> int:
"""The first item of :attr:`release` or ``0`` if unavailable.
>>> Version("1.2.3").major
1
"""
return self.release[0] if len(self.release) >= 1 else 0
@property
def minor(self) -> int:
"""The second item of :attr:`release` or ``0`` if unavailable.
>>> Version("1.2.3").minor
2
>>> Version("1").minor
0
"""
return self.release[1] if len(self.release) >= 2 else 0
@property
def micro(self) -> int:
"""The third item of :attr:`release` or ``0`` if unavailable.
>>> Version("1.2.3").micro
3
>>> Version("1").micro
0
"""
return self.release[2] if len(self.release) >= 3 else 0
def _parse_letter_version(
letter: str, number: Union[str, bytes, SupportsInt]
) -> Optional[Tuple[str, int]]:
letter: str | None, number: str | bytes | SupportsInt | None
) -> tuple[str, int] | None:
if letter:
# We consider there to be an implicit 0 in a pre-release if there is
# not a numeral associated with it.
@@ -429,7 +489,7 @@ def _parse_letter_version(
_local_version_separators = re.compile(r"[\._-]")
def _parse_local_version(local: str) -> Optional[LocalType]:
def _parse_local_version(local: str | None) -> LocalType | None:
"""
Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
"""
@@ -443,13 +503,12 @@ def _parse_local_version(local: str) -> Optional[LocalType]:
def _cmpkey(
epoch: int,
release: Tuple[int, ...],
pre: Optional[Tuple[str, int]],
post: Optional[Tuple[str, int]],
dev: Optional[Tuple[str, int]],
local: Optional[Tuple[SubLocalType]],
release: tuple[int, ...],
pre: tuple[str, int] | None,
post: tuple[str, int] | None,
dev: tuple[str, int] | None,
local: LocalType | None,
) -> CmpKey:
# When we compare a release version, we want to compare it with all of the
# trailing zeros removed. So we'll use a reverse the list, drop all the now
# leading zeros until we come to something non zero, then take the rest
@@ -464,7 +523,7 @@ def _cmpkey(
# if there is not a pre or a post segment. If we have one of those then
# the normal sorting rules will handle this case correctly.
if pre is None and post is None and dev is not None:
_pre: PrePostDevType = NegativeInfinity
_pre: CmpPrePostDevType = NegativeInfinity
# Versions without a pre-release (except as noted above) should sort after
# those with one.
elif pre is None:
@@ -474,21 +533,21 @@ def _cmpkey(
# Versions without a post segment should sort before those with one.
if post is None:
_post: PrePostDevType = NegativeInfinity
_post: CmpPrePostDevType = NegativeInfinity
else:
_post = post
# Versions without a development segment should sort after those with one.
if dev is None:
_dev: PrePostDevType = Infinity
_dev: CmpPrePostDevType = Infinity
else:
_dev = dev
if local is None:
# Versions without a local segment should sort before those with one.
_local: LocalType = NegativeInfinity
_local: CmpLocalType = NegativeInfinity
else:
# Versions with a local segment need that segment parsed to implement
# the sorting rules in PEP440.