This commit is contained in:
ton
2024-10-07 10:13:40 +07:00
parent aa1631742f
commit 3a7d696db6
9729 changed files with 1832837 additions and 161742 deletions

View File

@@ -1,4 +1,4 @@
# Auto-generated by Tools/scripts/generate_re_casefix.py.
# Auto-generated by Tools/build/generate_re_casefix.py.
# Maps the code of lowercased character to codes of different lowercased
# characters which have the same uppercase.

View File

@@ -149,6 +149,8 @@ def _compile(code, pattern, flags):
emit(0) # look ahead
else:
lo, hi = av[1].getwidth()
if lo > MAXCODE:
raise error("looks too much behind")
if lo != hi:
raise error("look-behind requires fixed-width pattern")
emit(lo) # look behind
@@ -549,7 +551,7 @@ def _compile_info(code, pattern, flags):
else:
emit(MAXCODE)
prefix = prefix[:MAXCODE]
emit(min(hi, MAXCODE))
emit(hi)
# add literal prefix
if prefix:
emit(len(prefix)) # length

View File

@@ -68,6 +68,10 @@ FLAGS = {
TYPE_FLAGS = SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE
GLOBAL_FLAGS = SRE_FLAG_DEBUG | SRE_FLAG_TEMPLATE
# Maximal value returned by SubPattern.getwidth().
# Must be larger than MAXREPEAT, MAXCODE and sys.maxsize.
MAXWIDTH = 1 << 64
class State:
# keeps track of state for parsing
def __init__(self):
@@ -178,7 +182,7 @@ class SubPattern:
lo = hi = 0
for op, av in self.data:
if op is BRANCH:
i = MAXREPEAT - 1
i = MAXWIDTH
j = 0
for av in av[1]:
l, h = av.getwidth()
@@ -197,7 +201,10 @@ class SubPattern:
elif op in _REPEATCODES:
i, j = av[2].getwidth()
lo = lo + i * av[0]
hi = hi + j * av[1]
if av[1] == MAXREPEAT and j:
hi = MAXWIDTH
else:
hi = hi + j * av[1]
elif op in _UNITCODES:
lo = lo + 1
hi = hi + 1
@@ -217,7 +224,7 @@ class SubPattern:
hi = hi + j
elif op is SUCCESS:
break
self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT)
self.width = min(lo, MAXWIDTH), min(hi, MAXWIDTH)
return self.width
class Tokenizer: