This commit is contained in:
ton
2024-10-07 10:13:40 +07:00
parent aa1631742f
commit 3a7d696db6
9729 changed files with 1832837 additions and 161742 deletions

View File

@@ -381,6 +381,7 @@ class ZipInfo (object):
'compress_size',
'file_size',
'_raw_time',
'_end_offset',
)
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
@@ -415,6 +416,7 @@ class ZipInfo (object):
self.external_attr = 0 # External file attributes
self.compress_size = 0 # Size of the compressed file
self.file_size = 0 # Size of the uncompressed file
self._end_offset = None # Start of the next local header or central directory
# Other attributes are set by class ZipFile:
# header_offset Byte offset to the file header
# CRC CRC-32 of the uncompressed file
@@ -531,6 +533,7 @@ class ZipInfo (object):
if up_unicode_name:
self.filename = _sanitize_filename(up_unicode_name)
else:
import warnings
warnings.warn("Empty unicode path extra field (0x7075)", stacklevel=2)
except struct.error as e:
raise BadZipFile("Corrupt unicode path extra field (0x7075)") from e
@@ -579,7 +582,15 @@ class ZipInfo (object):
def is_dir(self):
"""Return True if this archive member is a directory."""
return self.filename.endswith('/')
if self.filename.endswith('/'):
return True
# The ZIP format specification requires to use forward slashes
# as the directory separator, but in practice some ZIP files
# created on Windows can use backward slashes. For compatibility
# with the extraction code which already handles this:
if os.path.altsep:
return self.filename.endswith((os.path.sep, os.path.altsep))
return False
# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
@@ -1121,8 +1132,12 @@ class ZipExtFile(io.BufferedIOBase):
read_offset = new_pos - curr_pos
buff_offset = read_offset + self._offset
if buff_offset >= 0 and buff_offset < len(self._readbuffer):
# Just move the _offset index if the new position is in the _readbuffer
self._offset = buff_offset
read_offset = 0
# Fast seek uncompressed unencrypted file
if self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0:
elif self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0:
# disable CRC checking after first seeking - it would be invalid
self._expected_crc = None
# seek actual file taking already buffered data into account
@@ -1133,10 +1148,6 @@ class ZipExtFile(io.BufferedIOBase):
# flush read buffer
self._readbuffer = b''
self._offset = 0
elif buff_offset >= 0 and buff_offset < len(self._readbuffer):
# Just move the _offset index if the new position is in the _readbuffer
self._offset = buff_offset
read_offset = 0
elif read_offset < 0:
# Position is before the current position. Reset the ZipExtFile
self._fileobj.seek(self._orig_compress_start)
@@ -1473,6 +1484,12 @@ class ZipFile:
if self.debug > 2:
print("total", total)
end_offset = self.start_dir
for zinfo in sorted(self.filelist,
key=lambda zinfo: zinfo.header_offset,
reverse=True):
zinfo._end_offset = end_offset
end_offset = zinfo.header_offset
def namelist(self):
"""Return a list of file names in the archive."""
@@ -1545,7 +1562,8 @@ class ZipFile:
self._didModify = True
def read(self, name, pwd=None):
"""Return file bytes for name."""
"""Return file bytes for name. 'pwd' is the password to decrypt
encrypted files."""
with self.open(name, "r", pwd) as fp:
return fp.read()
@@ -1629,6 +1647,10 @@ class ZipFile:
'File name in directory %r and header %r differ.'
% (zinfo.orig_filename, fname))
if (zinfo._end_offset is not None and
zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
# check for encrypted flag & handle password
is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
if is_encrypted:
@@ -1693,7 +1715,8 @@ class ZipFile:
"""Extract a member from the archive to the current working directory,
using its full name. Its file information is extracted as accurately
as possible. `member' may be a filename or a ZipInfo object. You can
specify a different directory using `path'.
specify a different directory using `path'. You can specify the
password to decrypt the file using 'pwd'.
"""
if path is None:
path = os.getcwd()
@@ -1706,7 +1729,8 @@ class ZipFile:
"""Extract all members from the archive to the current working
directory. `path' specifies a different directory to extract to.
`members' is optional and must be a subset of the list returned
by namelist().
by namelist(). You can specify the password to decrypt all files
using 'pwd'.
"""
if members is None:
members = self.namelist()
@@ -1757,7 +1781,7 @@ class ZipFile:
# filter illegal characters on Windows
arcname = self._sanitize_windows_name(arcname, os.path.sep)
if not arcname:
if not arcname and not member.is_dir():
raise ValueError("Empty filename.")
targetpath = os.path.join(targetpath, arcname)
@@ -2212,12 +2236,79 @@ class PyZipFile(ZipFile):
return (fname, archivename)
def main(args=None):
import argparse
description = 'A simple command-line interface for zipfile module.'
parser = argparse.ArgumentParser(description=description)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-l', '--list', metavar='<zipfile>',
help='Show listing of a zipfile')
group.add_argument('-e', '--extract', nargs=2,
metavar=('<zipfile>', '<output_dir>'),
help='Extract zipfile into target dir')
group.add_argument('-c', '--create', nargs='+',
metavar=('<name>', '<file>'),
help='Create zipfile from sources')
group.add_argument('-t', '--test', metavar='<zipfile>',
help='Test if a zipfile is valid')
parser.add_argument('--metadata-encoding', metavar='<encoding>',
help='Specify encoding of member names for -l, -e and -t')
args = parser.parse_args(args)
encoding = args.metadata_encoding
if args.test is not None:
src = args.test
with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
badfile = zf.testzip()
if badfile:
print("The following enclosed file is corrupted: {!r}".format(badfile))
print("Done testing")
elif args.list is not None:
src = args.list
with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
zf.printdir()
elif args.extract is not None:
src, curdir = args.extract
with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
zf.extractall(curdir)
elif args.create is not None:
if encoding:
print("Non-conforming encodings not supported with -c.",
file=sys.stderr)
sys.exit(1)
zip_name = args.create.pop(0)
files = args.create
def addToZip(zf, path, zippath):
if os.path.isfile(path):
zf.write(path, zippath, ZIP_DEFLATED)
elif os.path.isdir(path):
if zippath:
zf.write(path, zippath)
for nm in sorted(os.listdir(path)):
addToZip(zf,
os.path.join(path, nm), os.path.join(zippath, nm))
# else: ignore
with ZipFile(zip_name, 'w') as zf:
for path in files:
zippath = os.path.basename(path)
if not zippath:
zippath = os.path.basename(os.path.dirname(path))
if zippath in ('', os.curdir, os.pardir):
zippath = ''
addToZip(zf, path, zippath)
from ._path import ( # noqa: E402
Path,
# used privately for tests
CompleteDirs, # noqa: F401
)
# used privately for tests
from .__main__ import main # noqa: F401, E402

View File

@@ -1,77 +1,4 @@
import sys
import os
from . import ZipFile, ZIP_DEFLATED
def main(args=None):
import argparse
description = 'A simple command-line interface for zipfile module.'
parser = argparse.ArgumentParser(description=description)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-l', '--list', metavar='<zipfile>',
help='Show listing of a zipfile')
group.add_argument('-e', '--extract', nargs=2,
metavar=('<zipfile>', '<output_dir>'),
help='Extract zipfile into target dir')
group.add_argument('-c', '--create', nargs='+',
metavar=('<name>', '<file>'),
help='Create zipfile from sources')
group.add_argument('-t', '--test', metavar='<zipfile>',
help='Test if a zipfile is valid')
parser.add_argument('--metadata-encoding', metavar='<encoding>',
help='Specify encoding of member names for -l, -e and -t')
args = parser.parse_args(args)
encoding = args.metadata_encoding
if args.test is not None:
src = args.test
with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
badfile = zf.testzip()
if badfile:
print("The following enclosed file is corrupted: {!r}".format(badfile))
print("Done testing")
elif args.list is not None:
src = args.list
with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
zf.printdir()
elif args.extract is not None:
src, curdir = args.extract
with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
zf.extractall(curdir)
elif args.create is not None:
if encoding:
print("Non-conforming encodings not supported with -c.",
file=sys.stderr)
sys.exit(1)
zip_name = args.create.pop(0)
files = args.create
def addToZip(zf, path, zippath):
if os.path.isfile(path):
zf.write(path, zippath, ZIP_DEFLATED)
elif os.path.isdir(path):
if zippath:
zf.write(path, zippath)
for nm in sorted(os.listdir(path)):
addToZip(zf,
os.path.join(path, nm), os.path.join(zippath, nm))
# else: ignore
with ZipFile(zip_name, 'w') as zf:
for path in files:
zippath = os.path.basename(path)
if not zippath:
zippath = os.path.basename(os.path.dirname(path))
if zippath in ('', os.curdir, os.pardir):
zippath = ''
addToZip(zf, path, zippath)
from . import main
if __name__ == "__main__":
main()

View File

@@ -1,3 +1,12 @@
"""
A Path-like interface for zipfiles.
This codebase is shared between zipfile.Path in the stdlib
and zipp in PyPI. See
https://github.com/python/importlib_metadata/wiki/Development-Methodology
for more detail.
"""
import io
import posixpath
import zipfile
@@ -34,7 +43,7 @@ def _parents(path):
def _ancestry(path):
"""
Given a path with elements separated by
posixpath.sep, generate all elements of that path
posixpath.sep, generate all elements of that path.
>>> list(_ancestry('b/d'))
['b/d', 'b']
@@ -46,9 +55,14 @@ def _ancestry(path):
['b']
>>> list(_ancestry(''))
[]
Multiple separators are treated like a single.
>>> list(_ancestry('//b//d///f//'))
['//b//d///f', '//b//d', '//b']
"""
path = path.rstrip(posixpath.sep)
while path and path != posixpath.sep:
while path.rstrip(posixpath.sep):
yield path
path, tail = posixpath.split(path)
@@ -174,7 +188,10 @@ def _extract_text_encoding(encoding=None, *args, **kwargs):
class Path:
"""
A pathlib-compatible interface for zip files.
A :class:`importlib.resources.abc.Traversable` interface for zip files.
Implements many of the features users enjoy from
:class:`pathlib.Path`.
Consider a zip file with this structure::

View File

@@ -2,6 +2,19 @@ import re
def translate(pattern):
return match_dirs(translate_core(pattern))
def match_dirs(pattern):
"""
Ensure that zipfile.Path directory names are matched.
zipfile.Path directory names always end in a slash.
"""
return rf'{pattern}[/]?'
def translate_core(pattern):
r"""
Given a glob pattern, produce a regex that matches it.