This commit is contained in:
ton
2024-10-07 10:13:40 +07:00
parent aa1631742f
commit 3a7d696db6
9729 changed files with 1832837 additions and 161742 deletions

View File

@@ -48,11 +48,12 @@ TICK = "'"
specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[\\"]')
def _has_surrogates(s):
"""Return True if s contains surrogate-escaped binary data."""
"""Return True if s may contain surrogate-escaped binary data."""
# This check is based on the fact that unless there are surrogates, utf8
# (Python's default encoding) can encode any string. This is the fastest
# way to check for surrogates, see issue 11454 for timings.
# way to check for surrogates, see bpo-11454 (moved to gh-55663) for timings.
try:
s.encode()
return False
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
return address
def _iter_escaped_chars(addr):
pos = 0
escape = False
for pos, ch in enumerate(addr):
if escape:
yield (pos, '\\' + ch)
escape = False
elif ch == '\\':
escape = True
else:
yield (pos, ch)
if escape:
yield (pos, '\\')
def getaddresses(fieldvalues):
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
all = COMMASPACE.join(str(v) for v in fieldvalues)
a = _AddressList(all)
return a.addresslist
def _strip_quoted_realnames(addr):
"""Strip real names between quotes."""
if '"' not in addr:
# Fast path
return addr
start = 0
open_pos = None
result = []
for pos, ch in _iter_escaped_chars(addr):
if ch == '"':
if open_pos is None:
open_pos = pos
else:
if start != open_pos:
result.append(addr[start:open_pos])
start = pos + 1
open_pos = None
if start < len(addr):
result.append(addr[start:])
return ''.join(result)
supports_strict_parsing = True
def getaddresses(fieldvalues, *, strict=True):
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
its place.
If strict is true, use a strict parser which rejects malformed inputs.
"""
# If strict is true, if the resulting list of parsed addresses is greater
# than the number of fieldvalues in the input list, a parsing error has
# occurred and consequently a list containing a single empty 2-tuple [('',
# '')] is returned in its place. This is done to avoid invalid output.
#
# Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
# Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
# Safe output: [('', '')]
if not strict:
all = COMMASPACE.join(str(v) for v in fieldvalues)
a = _AddressList(all)
return a.addresslist
fieldvalues = [str(v) for v in fieldvalues]
fieldvalues = _pre_parse_validation(fieldvalues)
addr = COMMASPACE.join(fieldvalues)
a = _AddressList(addr)
result = _post_parse_validation(a.addresslist)
# Treat output as invalid if the number of addresses is not equal to the
# expected number of addresses.
n = 0
for v in fieldvalues:
# When a comma is used in the Real Name part it is not a deliminator.
# So strip those out before counting the commas.
v = _strip_quoted_realnames(v)
# Expected number of addresses: 1 + number of commas
n += 1 + v.count(',')
if len(result) != n:
return [('', '')]
return result
def _check_parenthesis(addr):
# Ignore parenthesis in quoted real names.
addr = _strip_quoted_realnames(addr)
opens = 0
for pos, ch in _iter_escaped_chars(addr):
if ch == '(':
opens += 1
elif ch == ')':
opens -= 1
if opens < 0:
return False
return (opens == 0)
def _pre_parse_validation(email_header_fields):
accepted_values = []
for v in email_header_fields:
if not _check_parenthesis(v):
v = "('', '')"
accepted_values.append(v)
return accepted_values
def _post_parse_validation(parsed_email_header_tuples):
accepted_values = []
# The parser would have parsed a correctly formatted domain-literal
# The existence of an [ after parsing indicates a parsing failure
for v in parsed_email_header_tuples:
if '[' in v[1]:
v = ('', '')
accepted_values.append(v)
return accepted_values
def _format_timetuple_and_zone(timetuple, zone):
@@ -128,7 +244,7 @@ def formatdate(timeval=None, localtime=False, usegmt=False):
Fri, 09 Nov 2001 01:08:47 -0000
Optional timeval if given is a floating point time value as accepted by
Optional timeval if given is a floating-point time value as accepted by
gmtime() and localtime(), otherwise the current time is used.
Optional localtime is a flag that when True, interprets timeval, and
@@ -205,16 +321,33 @@ def parsedate_to_datetime(data):
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
def parseaddr(addr):
def parseaddr(addr, *, strict=True):
"""
Parse addr into its constituent realname and email address parts.
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
If strict is True, use a strict parser which rejects malformed inputs.
"""
addrs = _AddressList(addr).addresslist
if not addrs:
return '', ''
if not strict:
addrs = _AddressList(addr).addresslist
if not addrs:
return ('', '')
return addrs[0]
if isinstance(addr, list):
addr = addr[0]
if not isinstance(addr, str):
return ('', '')
addr = _pre_parse_validation([addr])[0]
addrs = _post_parse_validation(_AddressList(addr).addresslist)
if not addrs or len(addrs) > 1:
return ('', '')
return addrs[0]