This commit is contained in:
ton
2024-10-07 10:13:40 +07:00
parent aa1631742f
commit 3a7d696db6
9729 changed files with 1832837 additions and 161742 deletions

View File

@@ -0,0 +1,124 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from ._license import _license
from ._version import _version
if TYPE_CHECKING:
from ._typing import Buffer
try:
from ._pybase64 import (
_get_simd_flags_compile, # noqa: F401
_get_simd_flags_runtime, # noqa: F401
_get_simd_name,
_get_simd_path,
_set_simd_path, # noqa: F401
b64decode,
b64decode_as_bytearray,
b64encode,
b64encode_as_string,
encodebytes,
)
except ImportError:
from ._fallback import (
_get_simd_name,
_get_simd_path,
b64decode,
b64decode_as_bytearray,
b64encode,
b64encode_as_string,
encodebytes,
)
__all__ = (
"b64decode",
"b64decode_as_bytearray",
"b64encode",
"b64encode_as_string",
"encodebytes",
"standard_b64encode",
"standard_b64decode",
"urlsafe_b64encode",
"urlsafe_b64decode",
)
__version__ = _version
def get_license_text() -> str:
"""Returns pybase64 license information as a :class:`str` object.
The result includes libbase64 license information as well.
"""
return _license
def get_version() -> str:
"""Returns pybase64 version as a :class:`str` object.
The result reports if the C extension is used or not.
e.g. `1.0.0 (C extension active - AVX2)`
"""
simd_name = _get_simd_name(_get_simd_path())
if simd_name != "fallback":
return f"{__version__} (C extension active - {simd_name})"
return f"{__version__} (C extension inactive)"
def standard_b64encode(s: Buffer) -> bytes:
"""Encode bytes using the standard Base64 alphabet.
Argument ``s`` is a :term:`bytes-like object` to encode.
The result is returned as a :class:`bytes` object.
"""
return b64encode(s)
def standard_b64decode(s: str | Buffer) -> bytes:
"""Decode bytes encoded with the standard Base64 alphabet.
Argument ``s`` is a :term:`bytes-like object` or ASCII string to
decode.
The result is returned as a :class:`bytes` object.
A :exc:`binascii.Error` is raised if the input is incorrectly padded.
Characters that are not in the standard alphabet are discarded prior
to the padding check.
"""
return b64decode(s)
def urlsafe_b64encode(s: Buffer) -> bytes:
"""Encode bytes using the URL- and filesystem-safe Base64 alphabet.
Argument ``s`` is a :term:`bytes-like object` to encode.
The result is returned as a :class:`bytes` object.
The alphabet uses '-' instead of '+' and '_' instead of '/'.
"""
return b64encode(s, b"-_")
def urlsafe_b64decode(s: str | Buffer) -> bytes:
"""Decode bytes using the URL- and filesystem-safe Base64 alphabet.
Argument ``s`` is a :term:`bytes-like object` or ASCII string to
decode.
The result is returned as a :class:`bytes` object.
A :exc:`binascii.Error` is raised if the input is incorrectly padded.
Characters that are not in the URL-safe base-64 alphabet, and are not
a plus '+' or slash '/', are discarded prior to the padding check.
The alphabet uses '-' instead of '+' and '_' instead of '/'.
"""
return b64decode(s, b"-_")

View File

@@ -0,0 +1,299 @@
from __future__ import annotations
import argparse
import base64
import sys
from base64 import b64decode as b64decodeValidate
from base64 import encodebytes as b64encodebytes
from collections.abc import Sequence
from timeit import default_timer as timer
from typing import TYPE_CHECKING, Any, BinaryIO, cast
import pybase64
if TYPE_CHECKING:
from pybase64._typing import Decode, Encode, EncodeBytes
def bench_one(
duration: float,
data: bytes,
enc: Encode,
dec: Decode,
encbytes: EncodeBytes,
altchars: bytes | None = None,
validate: bool = False,
) -> None:
duration = duration / 2.0
if not validate and altchars is None:
number = 0
time = timer()
while True:
encodedcontent = encbytes(data)
number += 1
if timer() - time > duration:
break
iter = number
time = timer()
while iter > 0:
encodedcontent = encbytes(data)
iter -= 1
time = timer() - time
print(
"{:<32s} {:9.3f} MB/s ({:,d} bytes -> {:,d} bytes)".format(
encbytes.__module__ + "." + encbytes.__name__ + ":",
((number * len(data)) / (1024.0 * 1024.0)) / time,
len(data),
len(encodedcontent),
)
)
number = 0
time = timer()
while True:
encodedcontent = enc(data, altchars=altchars)
number += 1
if timer() - time > duration:
break
iter = number
time = timer()
while iter > 0:
encodedcontent = enc(data, altchars=altchars)
iter -= 1
time = timer() - time
print(
"{:<32s} {:9.3f} MB/s ({:,d} bytes -> {:,d} bytes)".format(
enc.__module__ + "." + enc.__name__ + ":",
((number * len(data)) / (1024.0 * 1024.0)) / time,
len(data),
len(encodedcontent),
)
)
number = 0
time = timer()
while True:
decodedcontent = dec(encodedcontent, altchars=altchars, validate=validate)
number += 1
if timer() - time > duration:
break
iter = number
time = timer()
while iter > 0:
decodedcontent = dec(encodedcontent, altchars=altchars, validate=validate)
iter -= 1
time = timer() - time
print(
"{:<32s} {:9.3f} MB/s ({:,d} bytes -> {:,d} bytes)".format(
dec.__module__ + "." + dec.__name__ + ":",
((number * len(data)) / (1024.0 * 1024.0)) / time,
len(encodedcontent),
len(data),
)
)
assert decodedcontent == data
def readall(file: BinaryIO) -> bytes:
if file == cast(BinaryIO, sys.stdin):
# Python 3 < 3.9 does not honor the binary flag,
# read from the underlying buffer
if hasattr(file, "buffer"):
return cast(BinaryIO, file.buffer).read()
return file.read() # pragma: no cover
# do not close the file
try:
data = file.read()
finally:
file.close()
return data
def writeall(file: BinaryIO, data: bytes) -> None:
if file == cast(BinaryIO, sys.stdout):
# Python 3 does not honor the binary flag,
# write to the underlying buffer
if hasattr(file, "buffer"):
file.buffer.write(data)
else:
file.write(data) # pragma: no cover
# do not close the file
else:
try:
file.write(data)
finally:
file.close()
def benchmark(duration: float, input: BinaryIO) -> None:
print(__package__ + " " + pybase64.get_version())
data = readall(input)
for altchars in [None, b"-_"]:
for validate in [False, True]:
print(f"bench: altchars={altchars!r:s}, validate={validate!r:s}")
bench_one(
duration,
data,
pybase64.b64encode,
pybase64.b64decode,
pybase64.encodebytes,
altchars,
validate,
)
bench_one(
duration,
data,
base64.b64encode,
b64decodeValidate,
b64encodebytes,
altchars,
validate,
)
def encode(input: BinaryIO, altchars: bytes | None, output: BinaryIO) -> None:
data = readall(input)
data = pybase64.b64encode(data, altchars)
writeall(output, data)
def decode(input: BinaryIO, altchars: bytes | None, validate: bool, output: BinaryIO) -> None:
data = readall(input)
data = pybase64.b64decode(data, altchars, validate)
writeall(output, data)
class LicenseAction(argparse.Action):
def __init__(
self,
option_strings: Sequence[str],
dest: str,
license: str | None = None,
help: str | None = "show license information and exit",
):
super().__init__(
option_strings=option_strings,
dest=dest,
default=argparse.SUPPRESS,
nargs=0,
help=help,
)
self.license = license
def __call__(
self,
parser: argparse.ArgumentParser,
namespace: argparse.Namespace, # noqa: ARG002
values: str | Sequence[Any] | None, # noqa: ARG002
option_string: str | None = None, # noqa: ARG002
) -> None:
print(self.license)
parser.exit()
def main(argv: Sequence[str] | None = None) -> None:
# main parser
parser = argparse.ArgumentParser(
prog=__package__, description=__package__ + " command-line tool."
)
parser.add_argument(
"-V",
"--version",
action="version",
version=__package__ + " " + pybase64.get_version(),
)
parser.add_argument("--license", action=LicenseAction, license=pybase64.get_license_text())
# create sub-parsers
subparsers = parser.add_subparsers(help="tool help")
# benchmark parser
benchmark_parser = subparsers.add_parser("benchmark", help="-h for usage")
benchmark_parser.add_argument(
"-d",
"--duration",
metavar="D",
dest="duration",
type=float,
default=1.0,
help="expected duration for a single encode or decode test",
)
benchmark_parser.add_argument(
"input", type=argparse.FileType("rb"), help="input file used for the benchmark"
)
benchmark_parser.set_defaults(func=benchmark)
# encode parser
encode_parser = subparsers.add_parser("encode", help="-h for usage")
encode_parser.add_argument(
"input", type=argparse.FileType("rb"), help="input file to be encoded"
)
group = encode_parser.add_mutually_exclusive_group()
group.add_argument(
"-u",
"--url",
action="store_const",
const=b"-_",
dest="altchars",
help="use URL encoding",
)
group.add_argument(
"-a",
"--altchars",
dest="altchars",
help="use alternative characters for encoding",
)
encode_parser.add_argument(
"-o",
"--output",
dest="output",
type=argparse.FileType("wb"),
default=sys.stdout,
help="encoded output file (default to stdout)",
)
encode_parser.set_defaults(func=encode)
# decode parser
decode_parser = subparsers.add_parser("decode", help="-h for usage")
decode_parser.add_argument(
"input", type=argparse.FileType("rb"), help="input file to be decoded"
)
group = decode_parser.add_mutually_exclusive_group()
group.add_argument(
"-u",
"--url",
action="store_const",
const=b"-_",
dest="altchars",
help="use URL decoding",
)
group.add_argument(
"-a",
"--altchars",
dest="altchars",
help="use alternative characters for decoding",
)
decode_parser.add_argument(
"-o",
"--output",
dest="output",
type=argparse.FileType("wb"),
default=sys.stdout,
help="decoded output file (default to stdout)",
)
decode_parser.add_argument(
"--no-validation",
dest="validate",
action="store_false",
help="disable validation of the input data",
)
decode_parser.set_defaults(func=decode)
# ready, parse
if argv is None:
argv = sys.argv[1:]
if len(argv) == 0:
argv = ["-h"]
args = vars(parser.parse_args(args=argv))
func = args.pop("func")
func(**args)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,167 @@
from __future__ import annotations
from base64 import b64decode as builtin_decode
from base64 import b64encode as builtin_encode
from base64 import encodebytes as builtin_encodebytes
from binascii import Error as BinAsciiError
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ._typing import Buffer
_bytes_types = (bytes, bytearray) # Types acceptable as binary data
def _get_simd_name(flags: int) -> str:
assert flags == 0
return "fallback"
def _get_simd_path() -> int:
return 0
def _get_bytes(s: str | Buffer) -> bytes | bytearray:
if isinstance(s, str):
try:
return s.encode("ascii")
except UnicodeEncodeError:
msg = "string argument should contain only ASCII characters"
raise ValueError(msg) from None
if isinstance(s, _bytes_types):
return s
try:
mv = memoryview(s)
if not mv.c_contiguous:
msg = f"{s.__class__.__name__!r:s}: underlying buffer is not C-contiguous"
raise BufferError(msg)
return mv.tobytes()
except TypeError:
msg = (
"argument should be a bytes-like object or ASCII "
f"string, not {s.__class__.__name__!r:s}"
)
raise TypeError(msg) from None
def b64decode(
s: str | Buffer, altchars: str | Buffer | None = None, validate: bool = False
) -> bytes:
"""Decode bytes encoded with the standard Base64 alphabet.
Argument ``s`` is a :term:`bytes-like object` or ASCII string to
decode.
Optional ``altchars`` must be a :term:`bytes-like object` or ASCII
string of length 2 which specifies the alternative alphabet used instead
of the '+' and '/' characters.
If ``validate`` is ``False`` (the default), characters that are neither in
the normal base-64 alphabet nor the alternative alphabet are discarded
prior to the padding check.
If ``validate`` is ``True``, these non-alphabet characters in the input
result in a :exc:`binascii.Error`.
The result is returned as a :class:`bytes` object.
A :exc:`binascii.Error` is raised if ``s`` is incorrectly padded.
"""
s = _get_bytes(s)
if altchars is not None:
altchars = _get_bytes(altchars)
if validate:
if len(s) % 4 != 0:
msg = "Incorrect padding"
raise BinAsciiError(msg)
result = builtin_decode(s, altchars, validate=False)
# check length of result vs length of input
expected_len = 0
if len(s) > 0:
padding = 0
# len(s) % 4 != 0 implies len(s) >= 4 here
if s[-2] == 61: # 61 == ord("=")
padding += 1
if s[-1] == 61:
padding += 1
expected_len = 3 * (len(s) // 4) - padding
if expected_len != len(result):
msg = "Non-base64 digit found"
raise BinAsciiError(msg)
return result
return builtin_decode(s, altchars, validate=False)
def b64decode_as_bytearray(
s: str | Buffer, altchars: str | Buffer | None = None, validate: bool = False
) -> bytearray:
"""Decode bytes encoded with the standard Base64 alphabet.
Argument ``s`` is a :term:`bytes-like object` or ASCII string to
decode.
Optional ``altchars`` must be a :term:`bytes-like object` or ASCII
string of length 2 which specifies the alternative alphabet used instead
of the '+' and '/' characters.
If ``validate`` is ``False`` (the default), characters that are neither in
the normal base-64 alphabet nor the alternative alphabet are discarded
prior to the padding check.
If ``validate`` is ``True``, these non-alphabet characters in the input
result in a :exc:`binascii.Error`.
The result is returned as a :class:`bytearray` object.
A :exc:`binascii.Error` is raised if ``s`` is incorrectly padded.
"""
return bytearray(b64decode(s, altchars=altchars, validate=validate))
def b64encode(s: Buffer, altchars: str | Buffer | None = None) -> bytes:
"""Encode bytes using the standard Base64 alphabet.
Argument ``s`` is a :term:`bytes-like object` to encode.
Optional ``altchars`` must be a byte string of length 2 which specifies
an alternative alphabet for the '+' and '/' characters. This allows an
application to e.g. generate url or filesystem safe Base64 strings.
The result is returned as a :class:`bytes` object.
"""
mv = memoryview(s)
if not mv.c_contiguous:
msg = f"{s.__class__.__name__!r:s}: underlying buffer is not C-contiguous"
raise BufferError(msg)
if altchars is not None:
altchars = _get_bytes(altchars)
return builtin_encode(s, altchars)
def b64encode_as_string(s: Buffer, altchars: str | Buffer | None = None) -> str:
"""Encode bytes using the standard Base64 alphabet.
Argument ``s`` is a :term:`bytes-like object` to encode.
Optional ``altchars`` must be a byte string of length 2 which specifies
an alternative alphabet for the '+' and '/' characters. This allows an
application to e.g. generate url or filesystem safe Base64 strings.
The result is returned as a :class:`str` object.
"""
return b64encode(s, altchars).decode("ascii")
def encodebytes(s: Buffer) -> bytes:
"""Encode bytes into a bytes object with newlines (b'\\\\n') inserted after
every 76 bytes of output, and ensuring that there is a trailing newline,
as per :rfc:`2045` (MIME).
Argument ``s`` is a :term:`bytes-like object` to encode.
The result is returned as a :class:`bytes` object.
"""
mv = memoryview(s)
if not mv.c_contiguous:
msg = f"{s.__class__.__name__!r:s}: underlying buffer is not C-contiguous"
raise BufferError(msg)
return builtin_encodebytes(s)

View File

@@ -0,0 +1,61 @@
_license = """pybase64
===============================================================================
BSD 2-Clause License
Copyright (c) 2017-2022, Matthieu Darbois
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
===============================================================================
libbase64
===============================================================================
Copyright (c) 2005-2007, Nick Galbreath
Copyright (c) 2015-2018, Wojciech Muła
Copyright (c) 2016-2017, Matthieu Darbois
Copyright (c) 2013-2022, Alfred Klomp
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
==========================================================================""" \
+ "====="

View File

@@ -0,0 +1 @@
_license: str

View File

@@ -0,0 +1,16 @@
from ._typing import Buffer
def _get_simd_flags_compile() -> int: ...
def _get_simd_flags_runtime() -> int: ...
def _get_simd_name(flags: int) -> str: ...
def _get_simd_path() -> int: ...
def _set_simd_path(flags: int) -> None: ...
def b64decode(
s: str | Buffer, altchars: str | Buffer | None = None, validate: bool = False
) -> bytes: ...
def b64decode_as_bytearray(
s: str | Buffer, altchars: str | Buffer | None = None, validate: bool = False
) -> bytearray: ...
def b64encode(s: Buffer, altchars: str | Buffer | None = None) -> bytes: ...
def b64encode_as_string(s: Buffer, altchars: str | Buffer | None = None) -> str: ...
def encodebytes(s: Buffer) -> bytes: ...

View File

@@ -0,0 +1,35 @@
from __future__ import annotations
import sys
from typing import Protocol
if sys.version_info < (3, 12):
from typing_extensions import Buffer
else:
from collections.abc import Buffer
class Decode(Protocol):
__name__: str
__module__: str
def __call__(
self, s: str | Buffer, altchars: str | Buffer | None = None, validate: bool = False
) -> bytes: ...
class Encode(Protocol):
__name__: str
__module__: str
def __call__(self, s: Buffer, altchars: Buffer | None = None) -> bytes: ...
class EncodeBytes(Protocol):
__name__: str
__module__: str
def __call__(self, s: Buffer) -> bytes: ...
__all__ = ("Buffer", "Decode", "Encode", "EncodeBytes")

View File

@@ -0,0 +1,3 @@
from __future__ import annotations
_version = "1.4.0"

View File