This commit is contained in:
ton
2023-10-05 00:01:27 +07:00
parent 1541297f6d
commit 4a987d90c5
12169 changed files with 502 additions and 2656459 deletions

View File

@@ -1,5 +0,0 @@
#!/usr/bin/env python
import sys
from lib2to3.main import main
sys.exit(main("lib2to3.fixes"))

View File

@@ -1,202 +0,0 @@
#!/usr/bin/env python3
# This script converts a C file to use the PEP 384 type definition API
# Usage: abitype.py < old_code > new_code
import re, sys
###### Replacement of PyTypeObject static instances ##############
# classify each token, giving it a one-letter code:
# S: static
# T: PyTypeObject
# I: ident
# W: whitespace
# =, {, }, ; : themselves
def classify():
res = []
for t,v in tokens:
if t == 'other' and v in "={};":
res.append(v)
elif t == 'ident':
if v == 'PyTypeObject':
res.append('T')
elif v == 'static':
res.append('S')
else:
res.append('I')
elif t == 'ws':
res.append('W')
else:
res.append('.')
return ''.join(res)
# Obtain a list of fields of a PyTypeObject, in declaration order,
# skipping ob_base
# All comments are dropped from the variable (which are typically
# just the slot names, anyway), and information is discarded whether
# the original type was static.
def get_fields(start, real_end):
pos = start
# static?
if tokens[pos][1] == 'static':
pos += 2
# PyTypeObject
pos += 2
# name
name = tokens[pos][1]
pos += 1
while tokens[pos][1] != '{':
pos += 1
pos += 1
# PyVarObject_HEAD_INIT
while tokens[pos][0] in ('ws', 'comment'):
pos += 1
if tokens[pos][1] != 'PyVarObject_HEAD_INIT':
raise Exception('%s has no PyVarObject_HEAD_INIT' % name)
while tokens[pos][1] != ')':
pos += 1
pos += 1
# field definitions: various tokens, comma-separated
fields = []
while True:
while tokens[pos][0] in ('ws', 'comment'):
pos += 1
end = pos
while tokens[end][1] not in ',}':
if tokens[end][1] == '(':
nesting = 1
while nesting:
end += 1
if tokens[end][1] == '(': nesting+=1
if tokens[end][1] == ')': nesting-=1
end += 1
assert end < real_end
# join field, excluding separator and trailing ws
end1 = end-1
while tokens[end1][0] in ('ws', 'comment'):
end1 -= 1
fields.append(''.join(t[1] for t in tokens[pos:end1+1]))
if tokens[end][1] == '}':
break
pos = end+1
return name, fields
# List of type slots as of Python 3.2, omitting ob_base
typeslots = [
'tp_name',
'tp_basicsize',
'tp_itemsize',
'tp_dealloc',
'tp_print',
'tp_getattr',
'tp_setattr',
'tp_reserved',
'tp_repr',
'tp_as_number',
'tp_as_sequence',
'tp_as_mapping',
'tp_hash',
'tp_call',
'tp_str',
'tp_getattro',
'tp_setattro',
'tp_as_buffer',
'tp_flags',
'tp_doc',
'tp_traverse',
'tp_clear',
'tp_richcompare',
'tp_weaklistoffset',
'tp_iter',
'iternextfunc',
'tp_methods',
'tp_members',
'tp_getset',
'tp_base',
'tp_dict',
'tp_descr_get',
'tp_descr_set',
'tp_dictoffset',
'tp_init',
'tp_alloc',
'tp_new',
'tp_free',
'tp_is_gc',
'tp_bases',
'tp_mro',
'tp_cache',
'tp_subclasses',
'tp_weaklist',
'tp_del',
'tp_version_tag',
]
# Generate a PyType_Spec definition
def make_slots(name, fields):
res = []
res.append('static PyType_Slot %s_slots[] = {' % name)
# defaults for spec
spec = { 'tp_itemsize':'0' }
for i, val in enumerate(fields):
if val.endswith('0'):
continue
if typeslots[i] in ('tp_name', 'tp_doc', 'tp_basicsize',
'tp_itemsize', 'tp_flags'):
spec[typeslots[i]] = val
continue
res.append(' {Py_%s, %s},' % (typeslots[i], val))
res.append('};')
res.append('static PyType_Spec %s_spec = {' % name)
res.append(' %s,' % spec['tp_name'])
res.append(' %s,' % spec['tp_basicsize'])
res.append(' %s,' % spec['tp_itemsize'])
res.append(' %s,' % spec['tp_flags'])
res.append(' %s_slots,' % name)
res.append('};\n')
return '\n'.join(res)
if __name__ == '__main__':
############ Simplistic C scanner ##################################
tokenizer = re.compile(
r"(?P<preproc>#.*\n)"
r"|(?P<comment>/\*.*?\*/)"
r"|(?P<ident>[a-zA-Z_][a-zA-Z0-9_]*)"
r"|(?P<ws>[ \t\n]+)"
r"|(?P<other>.)",
re.MULTILINE)
tokens = []
source = sys.stdin.read()
pos = 0
while pos != len(source):
m = tokenizer.match(source, pos)
tokens.append([m.lastgroup, m.group()])
pos += len(tokens[-1][1])
if tokens[-1][0] == 'preproc':
# continuation lines are considered
# only in preprocess statements
while tokens[-1][1].endswith('\\\n'):
nl = source.find('\n', pos)
if nl == -1:
line = source[pos:]
else:
line = source[pos:nl+1]
tokens[-1][1] += line
pos += len(line)
# Main loop: replace all static PyTypeObjects until
# there are none left.
while 1:
c = classify()
m = re.search('(SW)?TWIW?=W?{.*?};', c)
if not m:
break
start = m.start()
end = m.end()
name, fields = get_fields(start, end)
tokens[start:end] = [('',make_slots(name, fields))]
# Output result to stdout
for t, v in tokens:
sys.stdout.write(v)

View File

@@ -1,129 +0,0 @@
"""
Some helper functions to analyze the output of sys.getdxp() (which is
only available if Python was built with -DDYNAMIC_EXECUTION_PROFILE).
These will tell you which opcodes have been executed most frequently
in the current process, and, if Python was also built with -DDXPAIRS,
will tell you which instruction _pairs_ were executed most frequently,
which may help in choosing new instructions.
If Python was built without -DDYNAMIC_EXECUTION_PROFILE, importing
this module will raise a RuntimeError.
If you're running a script you want to profile, a simple way to get
the common pairs is:
$ PYTHONPATH=$PYTHONPATH:<python_srcdir>/Tools/scripts \
./python -i -O the_script.py --args
...
> from analyze_dxp import *
> s = render_common_pairs()
> open('/tmp/some_file', 'w').write(s)
"""
import copy
import opcode
import operator
import sys
import threading
if not hasattr(sys, "getdxp"):
raise RuntimeError("Can't import analyze_dxp: Python built without"
" -DDYNAMIC_EXECUTION_PROFILE.")
_profile_lock = threading.RLock()
_cumulative_profile = sys.getdxp()
# If Python was built with -DDXPAIRS, sys.getdxp() returns a list of
# lists of ints. Otherwise it returns just a list of ints.
def has_pairs(profile):
"""Returns True if the Python that produced the argument profile
was built with -DDXPAIRS."""
return len(profile) > 0 and isinstance(profile[0], list)
def reset_profile():
"""Forgets any execution profile that has been gathered so far."""
with _profile_lock:
sys.getdxp() # Resets the internal profile
global _cumulative_profile
_cumulative_profile = sys.getdxp() # 0s out our copy.
def merge_profile():
"""Reads sys.getdxp() and merges it into this module's cached copy.
We need this because sys.getdxp() 0s itself every time it's called."""
with _profile_lock:
new_profile = sys.getdxp()
if has_pairs(new_profile):
for first_inst in range(len(_cumulative_profile)):
for second_inst in range(len(_cumulative_profile[first_inst])):
_cumulative_profile[first_inst][second_inst] += (
new_profile[first_inst][second_inst])
else:
for inst in range(len(_cumulative_profile)):
_cumulative_profile[inst] += new_profile[inst]
def snapshot_profile():
"""Returns the cumulative execution profile until this call."""
with _profile_lock:
merge_profile()
return copy.deepcopy(_cumulative_profile)
def common_instructions(profile):
"""Returns the most common opcodes in order of descending frequency.
The result is a list of tuples of the form
(opcode, opname, # of occurrences)
"""
if has_pairs(profile) and profile:
inst_list = profile[-1]
else:
inst_list = profile
result = [(op, opcode.opname[op], count)
for op, count in enumerate(inst_list)
if count > 0]
result.sort(key=operator.itemgetter(2), reverse=True)
return result
def common_pairs(profile):
"""Returns the most common opcode pairs in order of descending frequency.
The result is a list of tuples of the form
((1st opcode, 2nd opcode),
(1st opname, 2nd opname),
# of occurrences of the pair)
"""
if not has_pairs(profile):
return []
result = [((op1, op2), (opcode.opname[op1], opcode.opname[op2]), count)
# Drop the row of single-op profiles with [:-1]
for op1, op1profile in enumerate(profile[:-1])
for op2, count in enumerate(op1profile)
if count > 0]
result.sort(key=operator.itemgetter(2), reverse=True)
return result
def render_common_pairs(profile=None):
"""Renders the most common opcode pairs to a string in order of
descending frequency.
The result is a series of lines of the form:
# of occurrences: ('1st opname', '2nd opname')
"""
if profile is None:
profile = snapshot_profile()
def seq():
for _, ops, count in common_pairs(profile):
yield "%s: %s\n" % (count, ops)
return ''.join(seq())

View File

@@ -1,132 +0,0 @@
#! /usr/bin/env python3
"""Show file statistics by extension."""
import os
import sys
class Stats:
def __init__(self):
self.stats = {}
def statargs(self, args):
for arg in args:
if os.path.isdir(arg):
self.statdir(arg)
elif os.path.isfile(arg):
self.statfile(arg)
else:
sys.stderr.write("Can't find %s\n" % arg)
self.addstats("<???>", "unknown", 1)
def statdir(self, dir):
self.addstats("<dir>", "dirs", 1)
try:
names = os.listdir(dir)
except OSError as err:
sys.stderr.write("Can't list %s: %s\n" % (dir, err))
self.addstats("<dir>", "unlistable", 1)
return
for name in sorted(names):
if name.startswith(".#"):
continue # Skip CVS temp files
if name.endswith("~"):
continue # Skip Emacs backup files
full = os.path.join(dir, name)
if os.path.islink(full):
self.addstats("<lnk>", "links", 1)
elif os.path.isdir(full):
self.statdir(full)
else:
self.statfile(full)
def statfile(self, filename):
head, ext = os.path.splitext(filename)
head, base = os.path.split(filename)
if ext == base:
ext = "" # E.g. .cvsignore is deemed not to have an extension
ext = os.path.normcase(ext)
if not ext:
ext = "<none>"
self.addstats(ext, "files", 1)
try:
with open(filename, "rb") as f:
data = f.read()
except IOError as err:
sys.stderr.write("Can't open %s: %s\n" % (filename, err))
self.addstats(ext, "unopenable", 1)
return
self.addstats(ext, "bytes", len(data))
if b'\0' in data:
self.addstats(ext, "binary", 1)
return
if not data:
self.addstats(ext, "empty", 1)
# self.addstats(ext, "chars", len(data))
lines = str(data, "latin-1").splitlines()
self.addstats(ext, "lines", len(lines))
del lines
words = data.split()
self.addstats(ext, "words", len(words))
def addstats(self, ext, key, n):
d = self.stats.setdefault(ext, {})
d[key] = d.get(key, 0) + n
def report(self):
exts = sorted(self.stats)
# Get the column keys
columns = {}
for ext in exts:
columns.update(self.stats[ext])
cols = sorted(columns)
colwidth = {}
colwidth["ext"] = max(map(len, exts))
minwidth = 6
self.stats["TOTAL"] = {}
for col in cols:
total = 0
cw = max(minwidth, len(col))
for ext in exts:
value = self.stats[ext].get(col)
if value is None:
w = 0
else:
w = len("%d" % value)
total += value
cw = max(cw, w)
cw = max(cw, len(str(total)))
colwidth[col] = cw
self.stats["TOTAL"][col] = total
exts.append("TOTAL")
for ext in exts:
self.stats[ext]["ext"] = ext
cols.insert(0, "ext")
def printheader():
for col in cols:
print("%*s" % (colwidth[col], col), end=' ')
print()
printheader()
for ext in exts:
for col in cols:
value = self.stats[ext].get(col, "")
print("%*s" % (colwidth[col], value), end=' ')
print()
printheader() # Another header at the bottom
def main():
args = sys.argv[1:]
if not args:
args = [os.curdir]
s = Stats()
s.statargs(args)
s.report()
if __name__ == "__main__":
main()

View File

@@ -1,61 +0,0 @@
#! /usr/bin/env python3
# Print the product of age and size of each file, in suitable units.
#
# Usage: byteyears [ -a | -m | -c ] file ...
#
# Options -[amc] select atime, mtime (default) or ctime as age.
import sys, os, time
from stat import *
def main():
# Use lstat() to stat files if it exists, else stat()
try:
statfunc = os.lstat
except AttributeError:
statfunc = os.stat
# Parse options
if sys.argv[1] == '-m':
itime = ST_MTIME
del sys.argv[1]
elif sys.argv[1] == '-c':
itime = ST_CTIME
del sys.argv[1]
elif sys.argv[1] == '-a':
itime = ST_CTIME
del sys.argv[1]
else:
itime = ST_MTIME
secs_per_year = 365.0 * 24.0 * 3600.0 # Scale factor
now = time.time() # Current time, for age computations
status = 0 # Exit status, set to 1 on errors
# Compute max file name length
maxlen = 1
for filename in sys.argv[1:]:
maxlen = max(maxlen, len(filename))
# Process each argument in turn
for filename in sys.argv[1:]:
try:
st = statfunc(filename)
except OSError as msg:
sys.stderr.write("can't stat %r: %r\n" % (filename, msg))
status = 1
st = ()
if st:
anytime = st[itime]
size = st[ST_SIZE]
age = now - anytime
byteyears = float(size) * float(age) / secs_per_year
print(filename.ljust(maxlen), end=' ')
print(repr(int(byteyears)).rjust(8))
sys.exit(status)
if __name__ == '__main__':
main()

View File

@@ -1,32 +0,0 @@
#!/usr/bin/env python3
"""
Checks that the version of the projects bundled in ensurepip are the latest
versions available.
"""
import ensurepip
import json
import urllib.request
import sys
def main():
outofdate = False
for project, version in ensurepip._PROJECTS:
data = json.loads(urllib.request.urlopen(
"https://pypi.org/pypi/{}/json".format(project),
cadefault=True,
).read().decode("utf8"))
upstream_version = data["info"]["version"]
if version != upstream_version:
outofdate = True
print("The latest version of {} on PyPI is {}, but ensurepip "
"has {}".format(project, upstream_version, version))
if outofdate:
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -1,275 +0,0 @@
#! /usr/bin/env python3
"""cleanfuture [-d][-r][-v] path ...
-d Dry run. Analyze, but don't make any changes to, files.
-r Recurse. Search for all .py files in subdirectories too.
-v Verbose. Print informative msgs.
Search Python (.py) files for future statements, and remove the features
from such statements that are already mandatory in the version of Python
you're using.
Pass one or more file and/or directory paths. When a directory path, all
.py files within the directory will be examined, and, if the -r option is
given, likewise recursively for subdirectories.
Overwrites files in place, renaming the originals with a .bak extension. If
cleanfuture finds nothing to change, the file is left alone. If cleanfuture
does change a file, the changed file is a fixed-point (i.e., running
cleanfuture on the resulting .py file won't change it again, at least not
until you try it again with a later Python release).
Limitations: You can do these things, but this tool won't help you then:
+ A future statement cannot be mixed with any other statement on the same
physical line (separated by semicolon).
+ A future statement cannot contain an "as" clause.
Example: Assuming you're using Python 2.2, if a file containing
from __future__ import nested_scopes, generators
is analyzed by cleanfuture, the line is rewritten to
from __future__ import generators
because nested_scopes is no longer optional in 2.2 but generators is.
"""
import __future__
import tokenize
import os
import sys
dryrun = 0
recurse = 0
verbose = 0
def errprint(*args):
strings = map(str, args)
msg = ' '.join(strings)
if msg[-1:] != '\n':
msg += '\n'
sys.stderr.write(msg)
def main():
import getopt
global verbose, recurse, dryrun
try:
opts, args = getopt.getopt(sys.argv[1:], "drv")
except getopt.error as msg:
errprint(msg)
return
for o, a in opts:
if o == '-d':
dryrun += 1
elif o == '-r':
recurse += 1
elif o == '-v':
verbose += 1
if not args:
errprint("Usage:", __doc__)
return
for arg in args:
check(arg)
def check(file):
if os.path.isdir(file) and not os.path.islink(file):
if verbose:
print("listing directory", file)
names = os.listdir(file)
for name in names:
fullname = os.path.join(file, name)
if ((recurse and os.path.isdir(fullname) and
not os.path.islink(fullname))
or name.lower().endswith(".py")):
check(fullname)
return
if verbose:
print("checking", file, "...", end=' ')
try:
f = open(file)
except IOError as msg:
errprint("%r: I/O Error: %s" % (file, str(msg)))
return
with f:
ff = FutureFinder(f, file)
changed = ff.run()
if changed:
ff.gettherest()
if changed:
if verbose:
print("changed.")
if dryrun:
print("But this is a dry run, so leaving it alone.")
for s, e, line in changed:
print("%r lines %d-%d" % (file, s+1, e+1))
for i in range(s, e+1):
print(ff.lines[i], end=' ')
if line is None:
print("-- deleted")
else:
print("-- change to:")
print(line, end=' ')
if not dryrun:
bak = file + ".bak"
if os.path.exists(bak):
os.remove(bak)
os.rename(file, bak)
if verbose:
print("renamed", file, "to", bak)
with open(file, "w") as g:
ff.write(g)
if verbose:
print("wrote new", file)
else:
if verbose:
print("unchanged.")
class FutureFinder:
def __init__(self, f, fname):
self.f = f
self.fname = fname
self.ateof = 0
self.lines = [] # raw file lines
# List of (start_index, end_index, new_line) triples.
self.changed = []
# Line-getter for tokenize.
def getline(self):
if self.ateof:
return ""
line = self.f.readline()
if line == "":
self.ateof = 1
else:
self.lines.append(line)
return line
def run(self):
STRING = tokenize.STRING
NL = tokenize.NL
NEWLINE = tokenize.NEWLINE
COMMENT = tokenize.COMMENT
NAME = tokenize.NAME
OP = tokenize.OP
changed = self.changed
get = tokenize.generate_tokens(self.getline).__next__
type, token, (srow, scol), (erow, ecol), line = get()
# Chew up initial comments and blank lines (if any).
while type in (COMMENT, NL, NEWLINE):
type, token, (srow, scol), (erow, ecol), line = get()
# Chew up docstring (if any -- and it may be implicitly catenated!).
while type is STRING:
type, token, (srow, scol), (erow, ecol), line = get()
# Analyze the future stmts.
while 1:
# Chew up comments and blank lines (if any).
while type in (COMMENT, NL, NEWLINE):
type, token, (srow, scol), (erow, ecol), line = get()
if not (type is NAME and token == "from"):
break
startline = srow - 1 # tokenize is one-based
type, token, (srow, scol), (erow, ecol), line = get()
if not (type is NAME and token == "__future__"):
break
type, token, (srow, scol), (erow, ecol), line = get()
if not (type is NAME and token == "import"):
break
type, token, (srow, scol), (erow, ecol), line = get()
# Get the list of features.
features = []
while type is NAME:
features.append(token)
type, token, (srow, scol), (erow, ecol), line = get()
if not (type is OP and token == ','):
break
type, token, (srow, scol), (erow, ecol), line = get()
# A trailing comment?
comment = None
if type is COMMENT:
comment = token
type, token, (srow, scol), (erow, ecol), line = get()
if type is not NEWLINE:
errprint("Skipping file %r; can't parse line %d:\n%s" %
(self.fname, srow, line))
return []
endline = srow - 1
# Check for obsolete features.
okfeatures = []
for f in features:
object = getattr(__future__, f, None)
if object is None:
# A feature we don't know about yet -- leave it in.
# They'll get a compile-time error when they compile
# this program, but that's not our job to sort out.
okfeatures.append(f)
else:
released = object.getMandatoryRelease()
if released is None or released <= sys.version_info:
# Withdrawn or obsolete.
pass
else:
okfeatures.append(f)
# Rewrite the line if at least one future-feature is obsolete.
if len(okfeatures) < len(features):
if len(okfeatures) == 0:
line = None
else:
line = "from __future__ import "
line += ', '.join(okfeatures)
if comment is not None:
line += ' ' + comment
line += '\n'
changed.append((startline, endline, line))
# Loop back for more future statements.
return changed
def gettherest(self):
if self.ateof:
self.therest = ''
else:
self.therest = self.f.read()
def write(self, f):
changed = self.changed
assert changed
# Prevent calling this again.
self.changed = []
# Apply changes in reverse order.
changed.reverse()
for s, e, line in changed:
if line is None:
# pure deletion
del self.lines[s:e+1]
else:
self.lines[s:e+1] = [line]
f.writelines(self.lines)
# Copy over the remainder of the file.
if self.therest:
f.write(self.therest)
if __name__ == '__main__':
main()

View File

@@ -1,129 +0,0 @@
#! /usr/bin/env python3
"""
combinerefs path
A helper for analyzing PYTHONDUMPREFS output.
When the PYTHONDUMPREFS envar is set in a debug build, at Python shutdown
time Py_FinalizeEx() prints the list of all live objects twice: first it
prints the repr() of each object while the interpreter is still fully intact.
After cleaning up everything it can, it prints all remaining live objects
again, but the second time just prints their addresses, refcounts, and type
names (because the interpreter has been torn down, calling repr methods at
this point can get into infinite loops or blow up).
Save all this output into a file, then run this script passing the path to
that file. The script finds both output chunks, combines them, then prints
a line of output for each object still alive at the end:
address refcnt typename repr
address is the address of the object, in whatever format the platform C
produces for a %p format code.
refcnt is of the form
"[" ref "]"
when the object's refcount is the same in both PYTHONDUMPREFS output blocks,
or
"[" ref_before "->" ref_after "]"
if the refcount changed.
typename is Py_TYPE(object)->tp_name, extracted from the second PYTHONDUMPREFS
output block.
repr is repr(object), extracted from the first PYTHONDUMPREFS output block.
CAUTION: If object is a container type, it may not actually contain all the
objects shown in the repr: the repr was captured from the first output block,
and some of the containees may have been released since then. For example,
it's common for the line showing the dict of interned strings to display
strings that no longer exist at the end of Py_FinalizeEx; this can be recognized
(albeit painfully) because such containees don't have a line of their own.
The objects are listed in allocation order, with most-recently allocated
printed first, and the first object allocated printed last.
Simple examples:
00857060 [14] str '__len__'
The str object '__len__' is alive at shutdown time, and both PYTHONDUMPREFS
output blocks said there were 14 references to it. This is probably due to
C modules that intern the string "__len__" and keep a reference to it in a
file static.
00857038 [46->5] tuple ()
46-5 = 41 references to the empty tuple were removed by the cleanup actions
between the times PYTHONDUMPREFS produced output.
00858028 [1025->1456] str '<dummy key>'
The string '<dummy key>', which is used in dictobject.c to overwrite a real
key that gets deleted, grew several hundred references during cleanup. It
suggests that stuff did get removed from dicts by cleanup, but that the dicts
themselves are staying alive for some reason. """
import re
import sys
# Generate lines from fileiter. If whilematch is true, continue reading
# while the regexp object pat matches line. If whilematch is false, lines
# are read so long as pat doesn't match them. In any case, the first line
# that doesn't match pat (when whilematch is true), or that does match pat
# (when whilematch is false), is lost, and fileiter will resume at the line
# following it.
def read(fileiter, pat, whilematch):
for line in fileiter:
if bool(pat.match(line)) == whilematch:
yield line
else:
break
def combinefile(f):
fi = iter(f)
for line in read(fi, re.compile(r'^Remaining objects:$'), False):
pass
crack = re.compile(r'([a-zA-Z\d]+) \[(\d+)\] (.*)')
addr2rc = {}
addr2guts = {}
before = 0
for line in read(fi, re.compile(r'^Remaining object addresses:$'), False):
m = crack.match(line)
if m:
addr, addr2rc[addr], addr2guts[addr] = m.groups()
before += 1
else:
print('??? skipped:', line)
after = 0
for line in read(fi, crack, True):
after += 1
m = crack.match(line)
assert m
addr, rc, guts = m.groups() # guts is type name here
if addr not in addr2rc:
print('??? new object created while tearing down:', line.rstrip())
continue
print(addr, end=' ')
if rc == addr2rc[addr]:
print('[%s]' % rc, end=' ')
else:
print('[%s->%s]' % (addr2rc[addr], rc), end=' ')
print(guts, addr2guts[addr])
print("%d objects before, %d after" % (before, after))
def combine(fname):
with open(fname) as f:
combinefile(f)
if __name__ == '__main__':
combine(sys.argv[1])

View File

@@ -1,26 +0,0 @@
#! /usr/bin/env python3
# Copy one file's atime and mtime to another
import sys
import os
from stat import ST_ATIME, ST_MTIME # Really constants 7 and 8
def main():
if len(sys.argv) != 3:
sys.stderr.write('usage: copytime source destination\n')
sys.exit(2)
file1, file2 = sys.argv[1], sys.argv[2]
try:
stat1 = os.stat(file1)
except OSError:
sys.stderr.write(file1 + ': cannot stat\n')
sys.exit(1)
try:
os.utime(file2, (stat1[ST_ATIME], stat1[ST_MTIME]))
except OSError:
sys.stderr.write(file2 + ': cannot change time\n')
sys.exit(2)
if __name__ == '__main__':
main()

View File

@@ -1,23 +0,0 @@
#! /usr/bin/env python3
"Replace CRLF with LF in argument files. Print names of changed files."
import sys, os
def main():
for filename in sys.argv[1:]:
if os.path.isdir(filename):
print(filename, "Directory!")
continue
with open(filename, "rb") as f:
data = f.read()
if b'\0' in data:
print(filename, "Binary!")
continue
newdata = data.replace(b"\r\n", b"\n")
if newdata != data:
print(filename)
with open(filename, "wb") as f:
f.write(newdata)
if __name__ == '__main__':
main()

View File

@@ -1,135 +0,0 @@
#!/usr/bin/env python3
"""
Synopsis: %(prog)s [-h|-g|-b|-r|-a] dbfile [ picklefile ]
Convert the database file given on the command line to a pickle
representation. The optional flags indicate the type of the database:
-a - open using dbm (any supported format)
-b - open as bsddb btree file
-d - open as dbm file
-g - open as gdbm file
-h - open as bsddb hash file
-r - open as bsddb recno file
The default is hash. If a pickle file is named it is opened for write
access (deleting any existing data). If no pickle file is named, the pickle
output is written to standard output.
"""
import getopt
try:
import bsddb
except ImportError:
bsddb = None
try:
import dbm.ndbm as dbm
except ImportError:
dbm = None
try:
import dbm.gnu as gdbm
except ImportError:
gdbm = None
try:
import dbm.ndbm as anydbm
except ImportError:
anydbm = None
import sys
try:
import pickle as pickle
except ImportError:
import pickle
prog = sys.argv[0]
def usage():
sys.stderr.write(__doc__ % globals())
def main(args):
try:
opts, args = getopt.getopt(args, "hbrdag",
["hash", "btree", "recno", "dbm",
"gdbm", "anydbm"])
except getopt.error:
usage()
return 1
if len(args) == 0 or len(args) > 2:
usage()
return 1
elif len(args) == 1:
dbfile = args[0]
pfile = sys.stdout
else:
dbfile = args[0]
try:
pfile = open(args[1], 'wb')
except IOError:
sys.stderr.write("Unable to open %s\n" % args[1])
return 1
dbopen = None
for opt, arg in opts:
if opt in ("-h", "--hash"):
try:
dbopen = bsddb.hashopen
except AttributeError:
sys.stderr.write("bsddb module unavailable.\n")
return 1
elif opt in ("-b", "--btree"):
try:
dbopen = bsddb.btopen
except AttributeError:
sys.stderr.write("bsddb module unavailable.\n")
return 1
elif opt in ("-r", "--recno"):
try:
dbopen = bsddb.rnopen
except AttributeError:
sys.stderr.write("bsddb module unavailable.\n")
return 1
elif opt in ("-a", "--anydbm"):
try:
dbopen = anydbm.open
except AttributeError:
sys.stderr.write("dbm module unavailable.\n")
return 1
elif opt in ("-g", "--gdbm"):
try:
dbopen = gdbm.open
except AttributeError:
sys.stderr.write("dbm.gnu module unavailable.\n")
return 1
elif opt in ("-d", "--dbm"):
try:
dbopen = dbm.open
except AttributeError:
sys.stderr.write("dbm.ndbm module unavailable.\n")
return 1
if dbopen is None:
if bsddb is None:
sys.stderr.write("bsddb module unavailable - ")
sys.stderr.write("must specify dbtype.\n")
return 1
else:
dbopen = bsddb.hashopen
try:
db = dbopen(dbfile, 'r')
except bsddb.error:
sys.stderr.write("Unable to open %s. " % dbfile)
sys.stderr.write("Check for format or version mismatch.\n")
return 1
for k in db.keys():
pickle.dump((k, db[k]), pfile, 1==1)
db.close()
pfile.close()
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

View File

@@ -1,512 +0,0 @@
"""Deep freeze
The script may be executed by _bootstrap_python interpreter.
Shared library extension modules are not available in that case.
On Windows, and in cross-compilation cases, it is executed
by Python 3.10, and 3.11 features are not available.
"""
import argparse
import ast
import builtins
import collections
import contextlib
import os
import re
import time
import types
from typing import Dict, FrozenSet, TextIO, Tuple
import umarshal
from generate_global_objects import get_identifiers_and_strings
verbose = False
identifiers, strings = get_identifiers_and_strings()
# This must be kept in sync with opcode.py
RESUME = 151
def isprintable(b: bytes) -> bool:
return all(0x20 <= c < 0x7f for c in b)
def make_string_literal(b: bytes) -> str:
res = ['"']
if isprintable(b):
res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\""))
else:
for i in b:
res.append(f"\\x{i:02x}")
res.append('"')
return "".join(res)
CO_FAST_LOCAL = 0x20
CO_FAST_CELL = 0x40
CO_FAST_FREE = 0x80
def get_localsplus(code: types.CodeType):
a = collections.defaultdict(int)
for name in code.co_varnames:
a[name] |= CO_FAST_LOCAL
for name in code.co_cellvars:
a[name] |= CO_FAST_CELL
for name in code.co_freevars:
a[name] |= CO_FAST_FREE
return tuple(a.keys()), bytes(a.values())
def get_localsplus_counts(code: types.CodeType,
names: Tuple[str, ...],
kinds: bytes) -> Tuple[int, int, int, int]:
nlocals = 0
nplaincellvars = 0
ncellvars = 0
nfreevars = 0
assert len(names) == len(kinds)
for name, kind in zip(names, kinds):
if kind & CO_FAST_LOCAL:
nlocals += 1
if kind & CO_FAST_CELL:
ncellvars += 1
elif kind & CO_FAST_CELL:
ncellvars += 1
nplaincellvars += 1
elif kind & CO_FAST_FREE:
nfreevars += 1
assert nlocals == len(code.co_varnames) == code.co_nlocals, \
(nlocals, len(code.co_varnames), code.co_nlocals)
assert ncellvars == len(code.co_cellvars)
assert nfreevars == len(code.co_freevars)
assert len(names) == nlocals + nplaincellvars + nfreevars
return nlocals, nplaincellvars, ncellvars, nfreevars
PyUnicode_1BYTE_KIND = 1
PyUnicode_2BYTE_KIND = 2
PyUnicode_4BYTE_KIND = 4
def analyze_character_width(s: str) -> Tuple[int, bool]:
maxchar = ' '
for c in s:
maxchar = max(maxchar, c)
ascii = False
if maxchar <= '\xFF':
kind = PyUnicode_1BYTE_KIND
ascii = maxchar <= '\x7F'
elif maxchar <= '\uFFFF':
kind = PyUnicode_2BYTE_KIND
else:
kind = PyUnicode_4BYTE_KIND
return kind, ascii
def removesuffix(base: str, suffix: str) -> str:
if base.endswith(suffix):
return base[:len(base) - len(suffix)]
return base
class Printer:
def __init__(self, file: TextIO) -> None:
self.level = 0
self.file = file
self.cache: Dict[tuple[type, object, str], str] = {}
self.hits, self.misses = 0, 0
self.patchups: list[str] = []
self.deallocs: list[str] = []
self.interns: list[str] = []
self.write('#include "Python.h"')
self.write('#include "internal/pycore_gc.h"')
self.write('#include "internal/pycore_code.h"')
self.write('#include "internal/pycore_long.h"')
self.write("")
@contextlib.contextmanager
def indent(self) -> None:
save_level = self.level
try:
self.level += 1
yield
finally:
self.level = save_level
def write(self, arg: str) -> None:
self.file.writelines((" "*self.level, arg, "\n"))
@contextlib.contextmanager
def block(self, prefix: str, suffix: str = "") -> None:
self.write(prefix + " {")
with self.indent():
yield
self.write("}" + suffix)
def object_head(self, typename: str) -> None:
with self.block(".ob_base =", ","):
self.write(f".ob_refcnt = 999999999,")
self.write(f".ob_type = &{typename},")
def object_var_head(self, typename: str, size: int) -> None:
with self.block(".ob_base =", ","):
self.object_head(typename)
self.write(f".ob_size = {size},")
def field(self, obj: object, name: str) -> None:
self.write(f".{name} = {getattr(obj, name)},")
def generate_bytes(self, name: str, b: bytes) -> str:
if b == b"":
return "(PyObject *)&_Py_SINGLETON(bytes_empty)"
if len(b) == 1:
return f"(PyObject *)&_Py_SINGLETON(bytes_characters[{b[0]}])"
self.write("static")
with self.indent():
with self.block("struct"):
self.write("PyObject_VAR_HEAD")
self.write("Py_hash_t ob_shash;")
self.write(f"char ob_sval[{len(b) + 1}];")
with self.block(f"{name} =", ";"):
self.object_var_head("PyBytes_Type", len(b))
self.write(".ob_shash = -1,")
self.write(f".ob_sval = {make_string_literal(b)},")
return f"& {name}.ob_base.ob_base"
def generate_unicode(self, name: str, s: str) -> str:
if s in strings:
return f"&_Py_STR({strings[s]})"
if s in identifiers:
return f"&_Py_ID({s})"
if re.match(r'\A[A-Za-z0-9_]+\Z', s):
name = f"const_str_{s}"
kind, ascii = analyze_character_width(s)
if kind == PyUnicode_1BYTE_KIND:
datatype = "uint8_t"
elif kind == PyUnicode_2BYTE_KIND:
datatype = "uint16_t"
else:
datatype = "uint32_t"
self.write("static")
with self.indent():
with self.block("struct"):
if ascii:
self.write("PyASCIIObject _ascii;")
else:
self.write("PyCompactUnicodeObject _compact;")
self.write(f"{datatype} _data[{len(s)+1}];")
self.deallocs.append(f"_PyStaticUnicode_Dealloc((PyObject *)&{name});")
with self.block(f"{name} =", ";"):
if ascii:
with self.block("._ascii =", ","):
self.object_head("PyUnicode_Type")
self.write(f".length = {len(s)},")
self.write(".hash = -1,")
with self.block(".state =", ","):
self.write(".kind = 1,")
self.write(".compact = 1,")
self.write(".ascii = 1,")
self.write(".ready = 1,")
self.write(f"._data = {make_string_literal(s.encode('ascii'))},")
return f"& {name}._ascii.ob_base"
else:
with self.block("._compact =", ","):
with self.block("._base =", ","):
self.object_head("PyUnicode_Type")
self.write(f".length = {len(s)},")
self.write(".hash = -1,")
with self.block(".state =", ","):
self.write(f".kind = {kind},")
self.write(".compact = 1,")
self.write(".ascii = 0,")
self.write(".ready = 1,")
with self.block(f"._data =", ","):
for i in range(0, len(s), 16):
data = s[i:i+16]
self.write(", ".join(map(str, map(ord, data))) + ",")
if kind == PyUnicode_2BYTE_KIND:
self.patchups.append("if (sizeof(wchar_t) == 2) {")
self.patchups.append(f" {name}._compact._base.wstr = (wchar_t *) {name}._data;")
self.patchups.append(f" {name}._compact.wstr_length = {len(s)};")
self.patchups.append("}")
if kind == PyUnicode_4BYTE_KIND:
self.patchups.append("if (sizeof(wchar_t) == 4) {")
self.patchups.append(f" {name}._compact._base.wstr = (wchar_t *) {name}._data;")
self.patchups.append(f" {name}._compact.wstr_length = {len(s)};")
self.patchups.append("}")
return f"& {name}._compact._base.ob_base"
def generate_code(self, name: str, code: types.CodeType) -> str:
# The ordering here matches PyCode_NewWithPosOnlyArgs()
# (but see below).
co_consts = self.generate(name + "_consts", code.co_consts)
co_names = self.generate(name + "_names", code.co_names)
co_filename = self.generate(name + "_filename", code.co_filename)
co_name = self.generate(name + "_name", code.co_name)
co_qualname = self.generate(name + "_qualname", code.co_qualname)
co_linetable = self.generate(name + "_linetable", code.co_linetable)
co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable)
# These fields are not directly accessible
localsplusnames, localspluskinds = get_localsplus(code)
co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames)
co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds)
# Derived values
nlocals, nplaincellvars, ncellvars, nfreevars = \
get_localsplus_counts(code, localsplusnames, localspluskinds)
co_code_adaptive = make_string_literal(code.co_code)
self.write("static")
with self.indent():
self.write(f"struct _PyCode_DEF({len(code.co_code)})")
with self.block(f"{name} =", ";"):
self.object_var_head("PyCode_Type", len(code.co_code) // 2)
# But the ordering here must match that in cpython/code.h
# (which is a pain because we tend to reorder those for perf)
# otherwise MSVC doesn't like it.
self.write(f".co_consts = {co_consts},")
self.write(f".co_names = {co_names},")
self.write(f".co_exceptiontable = {co_exceptiontable},")
self.field(code, "co_flags")
self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,")
self.write("._co_linearray_entry_size = 0,")
self.field(code, "co_argcount")
self.field(code, "co_posonlyargcount")
self.field(code, "co_kwonlyargcount")
self.field(code, "co_stacksize")
self.field(code, "co_firstlineno")
self.write(f".co_nlocalsplus = {len(localsplusnames)},")
self.field(code, "co_nlocals")
self.write(f".co_nplaincellvars = {nplaincellvars},")
self.write(f".co_ncellvars = {ncellvars},")
self.write(f".co_nfreevars = {nfreevars},")
self.write(f".co_localsplusnames = {co_localsplusnames},")
self.write(f".co_localspluskinds = {co_localspluskinds},")
self.write(f".co_filename = {co_filename},")
self.write(f".co_name = {co_name},")
self.write(f".co_qualname = {co_qualname},")
self.write(f".co_linetable = {co_linetable},")
self.write(f"._co_code = NULL,")
self.write("._co_linearray = NULL,")
self.write(f".co_code_adaptive = {co_code_adaptive},")
for i, op in enumerate(code.co_code[::2]):
if op == RESUME:
self.write(f"._co_firsttraceable = {i},")
break
name_as_code = f"(PyCodeObject *)&{name}"
self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});")
self.interns.append(f"_PyStaticCode_InternStrings({name_as_code})")
return f"& {name}.ob_base.ob_base"
def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str:
if len(t) == 0:
return f"(PyObject *)& _Py_SINGLETON(tuple_empty)"
items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)]
self.write("static")
with self.indent():
with self.block("struct"):
self.write("PyGC_Head _gc_head;")
with self.block("struct", "_object;"):
self.write("PyObject_VAR_HEAD")
if t:
self.write(f"PyObject *ob_item[{len(t)}];")
with self.block(f"{name} =", ";"):
with self.block("._object =", ","):
self.object_var_head("PyTuple_Type", len(t))
if items:
with self.block(f".ob_item =", ","):
for item in items:
self.write(item + ",")
return f"& {name}._object.ob_base.ob_base"
def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None:
sign = -1 if i < 0 else 0 if i == 0 else +1
i = abs(i)
digits: list[int] = []
while i:
i, rem = divmod(i, digit)
digits.append(rem)
self.write("static")
with self.indent():
with self.block("struct"):
self.write("PyObject_VAR_HEAD")
self.write(f"digit ob_digit[{max(1, len(digits))}];")
with self.block(f"{name} =", ";"):
self.object_var_head("PyLong_Type", sign*len(digits))
if digits:
ds = ", ".join(map(str, digits))
self.write(f".ob_digit = {{ {ds} }},")
def generate_int(self, name: str, i: int) -> str:
if -5 <= i <= 256:
return f"(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + {i}]"
if i >= 0:
name = f"const_int_{i}"
else:
name = f"const_int_negative_{abs(i)}"
if abs(i) < 2**15:
self._generate_int_for_bits(name, i, 2**15)
else:
connective = "if"
for bits_in_digit in 15, 30:
self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}")
self._generate_int_for_bits(name, i, 2**bits_in_digit)
connective = "elif"
self.write("#else")
self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"')
self.write("#endif")
# If neither clause applies, it won't compile
return f"& {name}.ob_base.ob_base"
def generate_float(self, name: str, x: float) -> str:
with self.block(f"static PyFloatObject {name} =", ";"):
self.object_head("PyFloat_Type")
self.write(f".ob_fval = {x},")
return f"&{name}.ob_base"
def generate_complex(self, name: str, z: complex) -> str:
with self.block(f"static PyComplexObject {name} =", ";"):
self.object_head("PyComplex_Type")
self.write(f".cval = {{ {z.real}, {z.imag} }},")
return f"&{name}.ob_base"
def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str:
try:
fs = sorted(fs)
except TypeError:
# frozen set with incompatible types, fallback to repr()
fs = sorted(fs, key=repr)
ret = self.generate_tuple(name, tuple(fs))
self.write("// TODO: The above tuple should be a frozenset")
return ret
def generate_file(self, module: str, code: object)-> None:
module = module.replace(".", "_")
self.generate(f"{module}_toplevel", code)
with self.block(f"static void {module}_do_patchups(void)"):
for p in self.patchups:
self.write(p)
self.patchups.clear()
self.write(EPILOGUE.replace("%%NAME%%", module))
def generate(self, name: str, obj: object) -> str:
# Use repr() in the key to distinguish -0.0 from +0.0
key = (type(obj), obj, repr(obj))
if key in self.cache:
self.hits += 1
# print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}")
return self.cache[key]
self.misses += 1
if isinstance(obj, (types.CodeType, umarshal.Code)) :
val = self.generate_code(name, obj)
elif isinstance(obj, tuple):
val = self.generate_tuple(name, obj)
elif isinstance(obj, str):
val = self.generate_unicode(name, obj)
elif isinstance(obj, bytes):
val = self.generate_bytes(name, obj)
elif obj is True:
return "Py_True"
elif obj is False:
return "Py_False"
elif isinstance(obj, int):
val = self.generate_int(name, obj)
elif isinstance(obj, float):
val = self.generate_float(name, obj)
elif isinstance(obj, complex):
val = self.generate_complex(name, obj)
elif isinstance(obj, frozenset):
val = self.generate_frozenset(name, obj)
elif obj is builtins.Ellipsis:
return "Py_Ellipsis"
elif obj is None:
return "Py_None"
else:
raise TypeError(
f"Cannot generate code for {type(obj).__name__} object")
# print(f"Cache store {key!r:.40}: {val!r:.40}")
self.cache[key] = val
return val
EPILOGUE = """
PyObject *
_Py_get_%%NAME%%_toplevel(void)
{
%%NAME%%_do_patchups();
return Py_NewRef((PyObject *) &%%NAME%%_toplevel);
}
"""
FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */"
FROZEN_COMMENT_PY = "/* Auto-generated by Programs/_freeze_module.py */"
FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*"
def is_frozen_header(source: str) -> bool:
return source.startswith((FROZEN_COMMENT_C, FROZEN_COMMENT_PY))
def decode_frozen_data(source: str) -> types.CodeType:
lines = source.splitlines()
while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
del lines[0]
while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
del lines[-1]
values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
data = bytes(values)
return umarshal.loads(data)
def generate(args: list[str], output: TextIO) -> None:
printer = Printer(output)
for arg in args:
file, modname = arg.rsplit(':', 1)
with open(file, "r", encoding="utf8") as fd:
source = fd.read()
if is_frozen_header(source):
code = decode_frozen_data(source)
else:
code = compile(fd.read(), f"<frozen {modname}>", "exec")
printer.generate_file(modname, code)
with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"):
for p in printer.deallocs:
printer.write(p)
with printer.block(f"int\n_Py_Deepfreeze_Init(void)"):
for p in printer.interns:
with printer.block(f"if ({p} < 0)"):
printer.write("return -1;")
printer.write("return 0;")
if verbose:
print(f"Cache hits: {printer.hits}, misses: {printer.misses}")
parser = argparse.ArgumentParser()
parser.add_argument("-o", "--output", help="Defaults to deepfreeze.c", default="deepfreeze.c")
parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics")
parser.add_argument('args', nargs="+", help="Input file and module name (required) in file:modname format")
@contextlib.contextmanager
def report_time(label: str):
t0 = time.time()
try:
yield
finally:
t1 = time.time()
if verbose:
print(f"{label}: {t1-t0:.3f} sec")
def main() -> None:
global verbose
args = parser.parse_args()
verbose = args.verbose
output = args.output
with open(output, "w", encoding="utf-8") as file:
with report_time("generate"):
generate(args.args, file)
if verbose:
print(f"Wrote {os.path.getsize(output)} bytes to {output}")
if __name__ == "__main__":
main()

View File

@@ -1,60 +0,0 @@
#!/usr/bin/env python3
""" Command line interface to difflib.py providing diffs in four formats:
* ndiff: lists every line and highlights interline changes.
* context: highlights clusters of changes in a before/after format.
* unified: highlights clusters of changes in an inline format.
* html: generates side by side comparison with change highlights.
"""
import sys, os, difflib, argparse
from datetime import datetime, timezone
def file_mtime(path):
t = datetime.fromtimestamp(os.stat(path).st_mtime,
timezone.utc)
return t.astimezone().isoformat()
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-c', action='store_true', default=False,
help='Produce a context format diff (default)')
parser.add_argument('-u', action='store_true', default=False,
help='Produce a unified format diff')
parser.add_argument('-m', action='store_true', default=False,
help='Produce HTML side by side diff '
'(can use -c and -l in conjunction)')
parser.add_argument('-n', action='store_true', default=False,
help='Produce a ndiff format diff')
parser.add_argument('-l', '--lines', type=int, default=3,
help='Set number of context lines (default 3)')
parser.add_argument('fromfile')
parser.add_argument('tofile')
options = parser.parse_args()
n = options.lines
fromfile = options.fromfile
tofile = options.tofile
fromdate = file_mtime(fromfile)
todate = file_mtime(tofile)
with open(fromfile) as ff:
fromlines = ff.readlines()
with open(tofile) as tf:
tolines = tf.readlines()
if options.u:
diff = difflib.unified_diff(fromlines, tolines, fromfile, tofile, fromdate, todate, n=n)
elif options.n:
diff = difflib.ndiff(fromlines, tolines)
elif options.m:
diff = difflib.HtmlDiff().make_file(fromlines,tolines,fromfile,tofile,context=options.c,numlines=n)
else:
diff = difflib.context_diff(fromlines, tolines, fromfile, tofile, fromdate, todate, n=n)
sys.stdout.writelines(diff)
if __name__ == '__main__':
main()

View File

@@ -1,60 +0,0 @@
#! /usr/bin/env python3
# Format du output in a tree shape
import os, sys, errno
def main():
total, d = None, {}
with os.popen('du ' + ' '.join(sys.argv[1:])) as p:
for line in p:
i = 0
while line[i] in '0123456789': i = i+1
size = eval(line[:i])
while line[i] in ' \t': i = i+1
filename = line[i:-1]
comps = filename.split('/')
if comps[0] == '': comps[0] = '/'
if comps[len(comps)-1] == '': del comps[len(comps)-1]
total, d = store(size, comps, total, d)
try:
display(total, d)
except IOError as e:
if e.errno != errno.EPIPE:
raise
def store(size, comps, total, d):
if comps == []:
return size, d
if comps[0] not in d:
d[comps[0]] = None, {}
t1, d1 = d[comps[0]]
d[comps[0]] = store(size, comps[1:], t1, d1)
return total, d
def display(total, d):
show(total, d, '')
def show(total, d, prefix):
if not d: return
list = []
sum = 0
for key in d.keys():
tsub, dsub = d[key]
list.append((tsub, key))
if tsub is not None: sum = sum + tsub
## if sum < total:
## list.append((total - sum, os.curdir))
list.sort()
list.reverse()
width = len(repr(list[0][0]))
for tsub, key in list:
if tsub is None:
psub = prefix
else:
print(prefix + repr(tsub).rjust(width) + ' ' + key)
psub = prefix + ' '*(width-1) + '|' + ' '*(len(key)+1)
if key in d:
show(tsub, d[key][1], psub)
if __name__ == '__main__':
main()

View File

@@ -1,57 +0,0 @@
#! /usr/bin/env python3
"""Create a TAGS file for Python programs, usable with GNU Emacs.
usage: eptags pyfiles...
The output TAGS file is usable with Emacs version 18, 19, 20.
Tagged are:
- functions (even inside other defs or classes)
- classes
eptags warns about files it cannot open.
eptags will not give warnings about duplicate tags.
BUGS:
Because of tag duplication (methods with the same name in different
classes), TAGS files are not very useful for most object-oriented
python projects.
"""
import sys,re
expr = r'^[ \t]*(def|class)[ \t]+([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*[:\(]'
matcher = re.compile(expr)
def treat_file(filename, outfp):
"""Append tags found in file named 'filename' to the open file 'outfp'"""
try:
fp = open(filename, 'r')
except OSError:
sys.stderr.write('Cannot open %s\n'%filename)
return
with fp:
charno = 0
lineno = 0
tags = []
size = 0
while 1:
line = fp.readline()
if not line:
break
lineno = lineno + 1
m = matcher.search(line)
if m:
tag = m.group(0) + '\177%d,%d\n' % (lineno, charno)
tags.append(tag)
size = size + len(tag)
charno = charno + len(line)
outfp.write('\f\n%s,%d\n' % (filename,size))
for tag in tags:
outfp.write(tag)
def main():
with open('TAGS', 'w') as outfp:
for filename in sys.argv[1:]:
treat_file(filename, outfp)
if __name__=="__main__":
main()

View File

@@ -1,40 +0,0 @@
#!/usr/bin/env python3
"""
For each argument on the command line, look for it in the set of all Unicode
names. Arguments are treated as case-insensitive regular expressions, e.g.:
% find-uname 'small letter a$' 'horizontal line'
*** small letter a$ matches ***
LATIN SMALL LETTER A (97)
COMBINING LATIN SMALL LETTER A (867)
CYRILLIC SMALL LETTER A (1072)
PARENTHESIZED LATIN SMALL LETTER A (9372)
CIRCLED LATIN SMALL LETTER A (9424)
FULLWIDTH LATIN SMALL LETTER A (65345)
*** horizontal line matches ***
HORIZONTAL LINE EXTENSION (9135)
"""
import unicodedata
import sys
import re
def main(args):
unicode_names = []
for ix in range(sys.maxunicode+1):
try:
unicode_names.append((ix, unicodedata.name(chr(ix))))
except ValueError: # no name for the character
pass
for arg in args:
pat = re.compile(arg, re.I)
matches = [(y,x) for (x,y) in unicode_names
if pat.search(y) is not None]
if matches:
print("***", arg, "matches", "***")
for match in matches:
print("%s (%d)" % match)
if __name__ == "__main__":
main(sys.argv[1:])

View File

@@ -1,128 +0,0 @@
#! /usr/bin/env python3
"""Find the maximum recursion limit that prevents interpreter termination.
This script finds the maximum safe recursion limit on a particular
platform. If you need to change the recursion limit on your system,
this script will tell you a safe upper bound. To use the new limit,
call sys.setrecursionlimit().
This module implements several ways to create infinite recursion in
Python. Different implementations end up pushing different numbers of
C stack frames, depending on how many calls through Python's abstract
C API occur.
After each round of tests, it prints a message:
"Limit of NNNN is fine".
The highest printed value of "NNNN" is therefore the highest potentially
safe limit for your system (which depends on the OS, architecture, but also
the compilation flags). Please note that it is practically impossible to
test all possible recursion paths in the interpreter, so the results of
this test should not be trusted blindly -- although they give a good hint
of which values are reasonable.
NOTE: When the C stack space allocated by your system is exceeded due
to excessive recursion, exact behaviour depends on the platform, although
the interpreter will always fail in a likely brutal way: either a
segmentation fault, a MemoryError, or just a silent abort.
NB: A program that does not use __methods__ can set a higher limit.
"""
import sys
import itertools
class RecursiveBlowup1:
def __init__(self):
self.__init__()
def test_init():
return RecursiveBlowup1()
class RecursiveBlowup2:
def __repr__(self):
return repr(self)
def test_repr():
return repr(RecursiveBlowup2())
class RecursiveBlowup4:
def __add__(self, x):
return x + self
def test_add():
return RecursiveBlowup4() + RecursiveBlowup4()
class RecursiveBlowup5:
def __getattr__(self, attr):
return getattr(self, attr)
def test_getattr():
return RecursiveBlowup5().attr
class RecursiveBlowup6:
def __getitem__(self, item):
return self[item - 2] + self[item - 1]
def test_getitem():
return RecursiveBlowup6()[5]
def test_recurse():
return test_recurse()
def test_cpickle(_cache={}):
import io
try:
import _pickle
except ImportError:
print("cannot import _pickle, skipped!")
return
k, l = None, None
for n in itertools.count():
try:
l = _cache[n]
continue # Already tried and it works, let's save some time
except KeyError:
for i in range(100):
l = [k, l]
k = {i: l}
_pickle.Pickler(io.BytesIO(), protocol=-1).dump(l)
_cache[n] = l
def test_compiler_recursion():
# The compiler uses a scaling factor to support additional levels
# of recursion. This is a sanity check of that scaling to ensure
# it still raises RecursionError even at higher recursion limits
compile("()" * (10 * sys.getrecursionlimit()), "<single>", "single")
def check_limit(n, test_func_name):
sys.setrecursionlimit(n)
if test_func_name.startswith("test_"):
print(test_func_name[5:])
else:
print(test_func_name)
test_func = globals()[test_func_name]
try:
test_func()
# AttributeError can be raised because of the way e.g. PyDict_GetItem()
# silences all exceptions and returns NULL, which is usually interpreted
# as "missing attribute".
except (RecursionError, AttributeError):
pass
else:
print("Yikes!")
if __name__ == '__main__':
limit = 1000
while 1:
check_limit(limit, "test_recurse")
check_limit(limit, "test_add")
check_limit(limit, "test_repr")
check_limit(limit, "test_init")
check_limit(limit, "test_getattr")
check_limit(limit, "test_getitem")
check_limit(limit, "test_cpickle")
check_limit(limit, "test_compiler_recursion")
print("Limit of %d is fine" % limit)
limit = limit + 100

View File

@@ -1,89 +0,0 @@
#! /usr/bin/env python3
"""finddiv - a grep-like tool that looks for division operators.
Usage: finddiv [-l] file_or_directory ...
For directory arguments, all files in the directory whose name ends in
.py are processed, and subdirectories are processed recursively.
This actually tokenizes the files to avoid false hits in comments or
strings literals.
By default, this prints all lines containing a / or /= operator, in
grep -n style. With the -l option specified, it prints the filename
of files that contain at least one / or /= operator.
"""
import os
import sys
import getopt
import tokenize
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "lh")
except getopt.error as msg:
usage(msg)
return 2
if not args:
usage("at least one file argument is required")
return 2
listnames = 0
for o, a in opts:
if o == "-h":
print(__doc__)
return
if o == "-l":
listnames = 1
exit = None
for filename in args:
x = process(filename, listnames)
exit = exit or x
return exit
def usage(msg):
sys.stderr.write("%s: %s\n" % (sys.argv[0], msg))
sys.stderr.write("Usage: %s [-l] file ...\n" % sys.argv[0])
sys.stderr.write("Try `%s -h' for more information.\n" % sys.argv[0])
def process(filename, listnames):
if os.path.isdir(filename):
return processdir(filename, listnames)
try:
fp = open(filename)
except IOError as msg:
sys.stderr.write("Can't open: %s\n" % msg)
return 1
with fp:
g = tokenize.generate_tokens(fp.readline)
lastrow = None
for type, token, (row, col), end, line in g:
if token in ("/", "/="):
if listnames:
print(filename)
break
if row != lastrow:
lastrow = row
print("%s:%d:%s" % (filename, row, line), end=' ')
def processdir(dir, listnames):
try:
names = os.listdir(dir)
except OSError as msg:
sys.stderr.write("Can't list directory: %s\n" % dir)
return 1
files = []
for name in names:
fn = os.path.join(dir, name)
if os.path.normcase(fn).endswith(".py") or os.path.isdir(fn):
files.append(fn)
files.sort(key=os.path.normcase)
exit = None
for fn in files:
x = process(fn, listnames)
exit = exit or x
return exit
if __name__ == "__main__":
sys.exit(main())

View File

@@ -1,43 +0,0 @@
#! /usr/bin/env python3
# findlinksto
#
# find symbolic links to a path matching a regular expression
import os
import sys
import re
import getopt
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], '')
if len(args) < 2:
raise getopt.GetoptError('not enough arguments', None)
except getopt.GetoptError as msg:
sys.stdout = sys.stderr
print(msg)
print('usage: findlinksto pattern directory ...')
sys.exit(2)
pat, dirs = args[0], args[1:]
prog = re.compile(pat)
for dirname in dirs:
os.walk(dirname, visit, prog)
def visit(prog, dirname, names):
if os.path.islink(dirname):
names[:] = []
return
if os.path.ismount(dirname):
print('descend into', dirname)
for name in names:
name = os.path.join(dirname, name)
try:
linkto = os.readlink(name)
if prog.search(linkto) is not None:
print(name, '->', linkto)
except OSError:
pass
if __name__ == '__main__':
main()

View File

@@ -1,107 +0,0 @@
#!/usr/bin/env python3
"""List all those Python files that require a coding directive
Usage: findnocoding.py dir1 [dir2...]
"""
__author__ = "Oleg Broytmann, Georg Brandl"
import sys, os, re, getopt
# our pysource module finds Python source files
try:
import pysource
except ImportError:
# emulate the module with a simple os.walk
class pysource:
has_python_ext = looks_like_python = can_be_compiled = None
def walk_python_files(self, paths, *args, **kwargs):
for path in paths:
if os.path.isfile(path):
yield path.endswith(".py")
elif os.path.isdir(path):
for root, dirs, files in os.walk(path):
for filename in files:
if filename.endswith(".py"):
yield os.path.join(root, filename)
pysource = pysource()
print("The pysource module is not available; "
"no sophisticated Python source file search will be done.", file=sys.stderr)
decl_re = re.compile(rb'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
blank_re = re.compile(rb'^[ \t\f]*(?:[#\r\n]|$)')
def get_declaration(line):
match = decl_re.match(line)
if match:
return match.group(1)
return b''
def has_correct_encoding(text, codec):
try:
str(text, codec)
except UnicodeDecodeError:
return False
else:
return True
def needs_declaration(fullpath):
try:
infile = open(fullpath, 'rb')
except IOError: # Oops, the file was removed - ignore it
return None
with infile:
line1 = infile.readline()
line2 = infile.readline()
if (get_declaration(line1) or
blank_re.match(line1) and get_declaration(line2)):
# the file does have an encoding declaration, so trust it
return False
# check the whole file for non utf-8 characters
rest = infile.read()
if has_correct_encoding(line1+line2+rest, "utf-8"):
return False
return True
usage = """Usage: %s [-cd] paths...
-c: recognize Python source files trying to compile them
-d: debug output""" % sys.argv[0]
if __name__ == '__main__':
try:
opts, args = getopt.getopt(sys.argv[1:], 'cd')
except getopt.error as msg:
print(msg, file=sys.stderr)
print(usage, file=sys.stderr)
sys.exit(1)
is_python = pysource.looks_like_python
debug = False
for o, a in opts:
if o == '-c':
is_python = pysource.can_be_compiled
elif o == '-d':
debug = True
if not args:
print(usage, file=sys.stderr)
sys.exit(1)
for fullpath in pysource.walk_python_files(args, is_python):
if debug:
print("Testing for coding: %s" % fullpath)
result = needs_declaration(fullpath)
if result:
print(fullpath)

View File

@@ -1,316 +0,0 @@
#! /usr/bin/env python3
# Perform massive identifier substitution on C source files.
# This actually tokenizes the files (to some extent) so it can
# avoid making substitutions inside strings or comments.
# Inside strings, substitutions are never made; inside comments,
# it is a user option (off by default).
#
# The substitutions are read from one or more files whose lines,
# when not empty, after stripping comments starting with #,
# must contain exactly two words separated by whitespace: the
# old identifier and its replacement.
#
# The option -r reverses the sense of the substitutions (this may be
# useful to undo a particular substitution).
#
# If the old identifier is prefixed with a '*' (with no intervening
# whitespace), then it will not be substituted inside comments.
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a C file (ends in .h or .c). The special filename '-' means
# operate in filter mode: read stdin, write stdout.
#
# Symbolic links are always ignored (except as explicit directory
# arguments).
#
# The original files are kept as back-up with a "~" suffix.
#
# Changes made are reported to stdout in a diff-like format.
#
# NB: by changing only the function fixline() you can turn this
# into a program for different changes to C source files; by
# changing the function wanted() you can make a different selection of
# files.
import sys
import re
import os
from stat import *
import getopt
err = sys.stderr.write
dbg = err
rep = sys.stdout.write
def usage():
progname = sys.argv[0]
err('Usage: ' + progname +
' [-c] [-r] [-s file] ... file-or-directory ...\n')
err('\n')
err('-c : substitute inside comments\n')
err('-r : reverse direction for following -s options\n')
err('-s substfile : add a file of substitutions\n')
err('\n')
err('Each non-empty non-comment line in a substitution file must\n')
err('contain exactly two words: an identifier and its replacement.\n')
err('Comments start with a # character and end at end of line.\n')
err('If an identifier is preceded with a *, it is not substituted\n')
err('inside a comment even when -c is specified.\n')
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], 'crs:')
except getopt.error as msg:
err('Options error: ' + str(msg) + '\n')
usage()
sys.exit(2)
bad = 0
if not args: # No arguments
usage()
sys.exit(2)
for opt, arg in opts:
if opt == '-c':
setdocomments()
if opt == '-r':
setreverse()
if opt == '-s':
addsubst(arg)
for arg in args:
if os.path.isdir(arg):
if recursedown(arg): bad = 1
elif os.path.islink(arg):
err(arg + ': will not process symbolic links\n')
bad = 1
else:
if fix(arg): bad = 1
sys.exit(bad)
# Change this regular expression to select a different set of files
Wanted = r'^[a-zA-Z0-9_]+\.[ch]$'
def wanted(name):
return re.match(Wanted, name)
def recursedown(dirname):
dbg('recursedown(%r)\n' % (dirname,))
bad = 0
try:
names = os.listdir(dirname)
except OSError as msg:
err(dirname + ': cannot list directory: ' + str(msg) + '\n')
return 1
names.sort()
subdirs = []
for name in names:
if name in (os.curdir, os.pardir): continue
fullname = os.path.join(dirname, name)
if os.path.islink(fullname): pass
elif os.path.isdir(fullname):
subdirs.append(fullname)
elif wanted(name):
if fix(fullname): bad = 1
for fullname in subdirs:
if recursedown(fullname): bad = 1
return bad
def fix(filename):
## dbg('fix(%r)\n' % (filename,))
if filename == '-':
# Filter mode
f = sys.stdin
g = sys.stdout
else:
# File replacement mode
try:
f = open(filename, 'r')
except IOError as msg:
err(filename + ': cannot open: ' + str(msg) + '\n')
return 1
head, tail = os.path.split(filename)
tempname = os.path.join(head, '@' + tail)
g = None
# If we find a match, we rewind the file and start over but
# now copy everything to a temp file.
lineno = 0
initfixline()
while 1:
line = f.readline()
if not line: break
lineno = lineno + 1
while line[-2:] == '\\\n':
nextline = f.readline()
if not nextline: break
line = line + nextline
lineno = lineno + 1
newline = fixline(line)
if newline != line:
if g is None:
try:
g = open(tempname, 'w')
except IOError as msg:
f.close()
err(tempname+': cannot create: '+
str(msg)+'\n')
return 1
f.seek(0)
lineno = 0
initfixline()
rep(filename + ':\n')
continue # restart from the beginning
rep(repr(lineno) + '\n')
rep('< ' + line)
rep('> ' + newline)
if g is not None:
g.write(newline)
# End of file
if filename == '-': return 0 # Done in filter mode
f.close()
if not g: return 0 # No changes
g.close()
# Finishing touch -- move files
# First copy the file's mode to the temp file
try:
statbuf = os.stat(filename)
os.chmod(tempname, statbuf[ST_MODE] & 0o7777)
except OSError as msg:
err(tempname + ': warning: chmod failed (' + str(msg) + ')\n')
# Then make a backup of the original file as filename~
try:
os.rename(filename, filename + '~')
except OSError as msg:
err(filename + ': warning: backup failed (' + str(msg) + ')\n')
# Now move the temp file to the original file
try:
os.rename(tempname, filename)
except OSError as msg:
err(filename + ': rename failed (' + str(msg) + ')\n')
return 1
# Return success
return 0
# Tokenizing ANSI C (partly)
Identifier = '(struct )?[a-zA-Z_][a-zA-Z0-9_]+'
String = r'"([^\n\\"]|\\.)*"'
Char = r"'([^\n\\']|\\.)*'"
CommentStart = r'/\*'
CommentEnd = r'\*/'
Hexnumber = '0[xX][0-9a-fA-F]*[uUlL]*'
Octnumber = '0[0-7]*[uUlL]*'
Decnumber = '[1-9][0-9]*[uUlL]*'
Intnumber = Hexnumber + '|' + Octnumber + '|' + Decnumber
Exponent = '[eE][-+]?[0-9]+'
Pointfloat = r'([0-9]+\.[0-9]*|\.[0-9]+)(' + Exponent + r')?'
Expfloat = '[0-9]+' + Exponent
Floatnumber = Pointfloat + '|' + Expfloat
Number = Floatnumber + '|' + Intnumber
# Anything else is an operator -- don't list this explicitly because of '/*'
OutsideComment = (Identifier, Number, String, Char, CommentStart)
OutsideCommentPattern = '(' + '|'.join(OutsideComment) + ')'
OutsideCommentProgram = re.compile(OutsideCommentPattern)
InsideComment = (Identifier, Number, CommentEnd)
InsideCommentPattern = '(' + '|'.join(InsideComment) + ')'
InsideCommentProgram = re.compile(InsideCommentPattern)
def initfixline():
global Program
Program = OutsideCommentProgram
def fixline(line):
global Program
## print('-->', repr(line))
i = 0
while i < len(line):
match = Program.search(line, i)
if match is None: break
i = match.start()
found = match.group(0)
## if Program is InsideCommentProgram: print(end='... ')
## else: print(end=' ')
## print(found)
if len(found) == 2:
if found == '/*':
Program = InsideCommentProgram
elif found == '*/':
Program = OutsideCommentProgram
n = len(found)
if found in Dict:
subst = Dict[found]
if Program is InsideCommentProgram:
if not Docomments:
print('Found in comment:', found)
i = i + n
continue
if found in NotInComment:
## print(end='Ignored in comment: ')
## print(found, '-->', subst)
## print('Line:', line, end='')
subst = found
## else:
## print(end='Substituting in comment: ')
## print(found, '-->', subst)
## print('Line:', line, end='')
line = line[:i] + subst + line[i+n:]
n = len(subst)
i = i + n
return line
Docomments = 0
def setdocomments():
global Docomments
Docomments = 1
Reverse = 0
def setreverse():
global Reverse
Reverse = (not Reverse)
Dict = {}
NotInComment = {}
def addsubst(substfile):
try:
fp = open(substfile, 'r')
except IOError as msg:
err(substfile + ': cannot read substfile: ' + str(msg) + '\n')
sys.exit(1)
with fp:
lineno = 0
while 1:
line = fp.readline()
if not line: break
lineno = lineno + 1
try:
i = line.index('#')
except ValueError:
i = -1 # Happens to delete trailing \n
words = line[:i].split()
if not words: continue
if len(words) == 3 and words[0] == 'struct':
words[:2] = [words[0] + ' ' + words[1]]
elif len(words) != 2:
err(substfile + '%s:%r: warning: bad line: %r' % (substfile, lineno, line))
continue
if Reverse:
[value, key] = words
else:
[key, value] = words
if value[0] == '*':
value = value[1:]
if key[0] == '*':
key = key[1:]
NotInComment[key] = value
if key in Dict:
err('%s:%r: warning: overriding: %r %r\n' % (substfile, lineno, key, value))
err('%s:%r: warning: previous: %r\n' % (substfile, lineno, Dict[key]))
Dict[key] = value
if __name__ == '__main__':
main()

View File

@@ -1,378 +0,0 @@
#! /usr/bin/env python3
"""fixdiv - tool to fix division operators.
To use this tool, first run `python -Qwarnall yourscript.py 2>warnings'.
This runs the script `yourscript.py' while writing warning messages
about all uses of the classic division operator to the file
`warnings'. The warnings look like this:
<file>:<line>: DeprecationWarning: classic <type> division
The warnings are written to stderr, so you must use `2>' for the I/O
redirect. I know of no way to redirect stderr on Windows in a DOS
box, so you will have to modify the script to set sys.stderr to some
kind of log file if you want to do this on Windows.
The warnings are not limited to the script; modules imported by the
script may also trigger warnings. In fact a useful technique is to
write a test script specifically intended to exercise all code in a
particular module or set of modules.
Then run `python fixdiv.py warnings'. This first reads the warnings,
looking for classic division warnings, and sorts them by file name and
line number. Then, for each file that received at least one warning,
it parses the file and tries to match the warnings up to the division
operators found in the source code. If it is successful, it writes
its findings to stdout, preceded by a line of dashes and a line of the
form:
Index: <file>
If the only findings found are suggestions to change a / operator into
a // operator, the output is acceptable input for the Unix 'patch'
program.
Here are the possible messages on stdout (N stands for a line number):
- A plain-diff-style change ('NcN', a line marked by '<', a line
containing '---', and a line marked by '>'):
A / operator was found that should be changed to //. This is the
recommendation when only int and/or long arguments were seen.
- 'True division / operator at line N' and a line marked by '=':
A / operator was found that can remain unchanged. This is the
recommendation when only float and/or complex arguments were seen.
- 'Ambiguous / operator (..., ...) at line N', line marked by '?':
A / operator was found for which int or long as well as float or
complex arguments were seen. This is highly unlikely; if it occurs,
you may have to restructure the code to keep the classic semantics,
or maybe you don't care about the classic semantics.
- 'No conclusive evidence on line N', line marked by '*':
A / operator was found for which no warnings were seen. This could
be code that was never executed, or code that was only executed
with user-defined objects as arguments. You will have to
investigate further. Note that // can be overloaded separately from
/, using __floordiv__. True division can also be separately
overloaded, using __truediv__. Classic division should be the same
as either of those. (XXX should I add a warning for division on
user-defined objects, to disambiguate this case from code that was
never executed?)
- 'Phantom ... warnings for line N', line marked by '*':
A warning was seen for a line not containing a / operator. The most
likely cause is a warning about code executed by 'exec' or eval()
(see note below), or an indirect invocation of the / operator, for
example via the div() function in the operator module. It could
also be caused by a change to the file between the time the test
script was run to collect warnings and the time fixdiv was run.
- 'More than one / operator in line N'; or
'More than one / operator per statement in lines N-N':
The scanner found more than one / operator on a single line, or in a
statement split across multiple lines. Because the warnings
framework doesn't (and can't) show the offset within the line, and
the code generator doesn't always give the correct line number for
operations in a multi-line statement, we can't be sure whether all
operators in the statement were executed. To be on the safe side,
by default a warning is issued about this case. In practice, these
cases are usually safe, and the -m option suppresses these warning.
- 'Can't find the / operator in line N', line marked by '*':
This really shouldn't happen. It means that the tokenize module
reported a '/' operator but the line it returns didn't contain a '/'
character at the indicated position.
- 'Bad warning for line N: XYZ', line marked by '*':
This really shouldn't happen. It means that a 'classic XYZ
division' warning was read with XYZ being something other than
'int', 'long', 'float', or 'complex'.
Notes:
- The augmented assignment operator /= is handled the same way as the
/ operator.
- This tool never looks at the // operator; no warnings are ever
generated for use of this operator.
- This tool never looks at the / operator when a future division
statement is in effect; no warnings are generated in this case, and
because the tool only looks at files for which at least one classic
division warning was seen, it will never look at files containing a
future division statement.
- Warnings may be issued for code not read from a file, but executed
using the exec() or eval() functions. These may have
<string> in the filename position, in which case the fixdiv script
will attempt and fail to open a file named '<string>' and issue a
warning about this failure; or these may be reported as 'Phantom'
warnings (see above). You're on your own to deal with these. You
could make all recommended changes and add a future division
statement to all affected files, and then re-run the test script; it
should not issue any warnings. If there are any, and you have a
hard time tracking down where they are generated, you can use the
-Werror option to force an error instead of a first warning,
generating a traceback.
- The tool should be run from the same directory as that from which
the original script was run, otherwise it won't be able to open
files given by relative pathnames.
"""
import sys
import getopt
import re
import tokenize
multi_ok = 0
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "hm")
except getopt.error as msg:
usage(msg)
return 2
for o, a in opts:
if o == "-h":
print(__doc__)
return
if o == "-m":
global multi_ok
multi_ok = 1
if not args:
usage("at least one file argument is required")
return 2
if args[1:]:
sys.stderr.write("%s: extra file arguments ignored\n", sys.argv[0])
warnings = readwarnings(args[0])
if warnings is None:
return 1
files = list(warnings.keys())
if not files:
print("No classic division warnings read from", args[0])
return
files.sort()
exit = None
for filename in files:
x = process(filename, warnings[filename])
exit = exit or x
return exit
def usage(msg):
sys.stderr.write("%s: %s\n" % (sys.argv[0], msg))
sys.stderr.write("Usage: %s [-m] warnings\n" % sys.argv[0])
sys.stderr.write("Try `%s -h' for more information.\n" % sys.argv[0])
PATTERN = (r"^(.+?):(\d+): DeprecationWarning: "
r"classic (int|long|float|complex) division$")
def readwarnings(warningsfile):
prog = re.compile(PATTERN)
warnings = {}
try:
f = open(warningsfile)
except IOError as msg:
sys.stderr.write("can't open: %s\n" % msg)
return
with f:
while 1:
line = f.readline()
if not line:
break
m = prog.match(line)
if not m:
if line.find("division") >= 0:
sys.stderr.write("Warning: ignored input " + line)
continue
filename, lineno, what = m.groups()
list = warnings.get(filename)
if list is None:
warnings[filename] = list = []
list.append((int(lineno), sys.intern(what)))
return warnings
def process(filename, list):
print("-"*70)
assert list # if this fails, readwarnings() is broken
try:
fp = open(filename)
except IOError as msg:
sys.stderr.write("can't open: %s\n" % msg)
return 1
with fp:
print("Index:", filename)
f = FileContext(fp)
list.sort()
index = 0 # list[:index] has been processed, list[index:] is still to do
g = tokenize.generate_tokens(f.readline)
while 1:
startlineno, endlineno, slashes = lineinfo = scanline(g)
if startlineno is None:
break
assert startlineno <= endlineno is not None
orphans = []
while index < len(list) and list[index][0] < startlineno:
orphans.append(list[index])
index += 1
if orphans:
reportphantomwarnings(orphans, f)
warnings = []
while index < len(list) and list[index][0] <= endlineno:
warnings.append(list[index])
index += 1
if not slashes and not warnings:
pass
elif slashes and not warnings:
report(slashes, "No conclusive evidence")
elif warnings and not slashes:
reportphantomwarnings(warnings, f)
else:
if len(slashes) > 1:
if not multi_ok:
rows = []
lastrow = None
for (row, col), line in slashes:
if row == lastrow:
continue
rows.append(row)
lastrow = row
assert rows
if len(rows) == 1:
print("*** More than one / operator in line", rows[0])
else:
print("*** More than one / operator per statement", end=' ')
print("in lines %d-%d" % (rows[0], rows[-1]))
intlong = []
floatcomplex = []
bad = []
for lineno, what in warnings:
if what in ("int", "long"):
intlong.append(what)
elif what in ("float", "complex"):
floatcomplex.append(what)
else:
bad.append(what)
lastrow = None
for (row, col), line in slashes:
if row == lastrow:
continue
lastrow = row
line = chop(line)
if line[col:col+1] != "/":
print("*** Can't find the / operator in line %d:" % row)
print("*", line)
continue
if bad:
print("*** Bad warning for line %d:" % row, bad)
print("*", line)
elif intlong and not floatcomplex:
print("%dc%d" % (row, row))
print("<", line)
print("---")
print(">", line[:col] + "/" + line[col:])
elif floatcomplex and not intlong:
print("True division / operator at line %d:" % row)
print("=", line)
elif intlong and floatcomplex:
print("*** Ambiguous / operator (%s, %s) at line %d:" %
("|".join(intlong), "|".join(floatcomplex), row))
print("?", line)
def reportphantomwarnings(warnings, f):
blocks = []
lastrow = None
lastblock = None
for row, what in warnings:
if row != lastrow:
lastblock = [row]
blocks.append(lastblock)
lastblock.append(what)
for block in blocks:
row = block[0]
whats = "/".join(block[1:])
print("*** Phantom %s warnings for line %d:" % (whats, row))
f.report(row, mark="*")
def report(slashes, message):
lastrow = None
for (row, col), line in slashes:
if row != lastrow:
print("*** %s on line %d:" % (message, row))
print("*", chop(line))
lastrow = row
class FileContext:
def __init__(self, fp, window=5, lineno=1):
self.fp = fp
self.window = 5
self.lineno = 1
self.eoflookahead = 0
self.lookahead = []
self.buffer = []
def fill(self):
while len(self.lookahead) < self.window and not self.eoflookahead:
line = self.fp.readline()
if not line:
self.eoflookahead = 1
break
self.lookahead.append(line)
def readline(self):
self.fill()
if not self.lookahead:
return ""
line = self.lookahead.pop(0)
self.buffer.append(line)
self.lineno += 1
return line
def __getitem__(self, index):
self.fill()
bufstart = self.lineno - len(self.buffer)
lookend = self.lineno + len(self.lookahead)
if bufstart <= index < self.lineno:
return self.buffer[index - bufstart]
if self.lineno <= index < lookend:
return self.lookahead[index - self.lineno]
raise KeyError
def report(self, first, last=None, mark="*"):
if last is None:
last = first
for i in range(first, last+1):
try:
line = self[first]
except KeyError:
line = "<missing line>"
print(mark, chop(line))
def scanline(g):
slashes = []
startlineno = None
endlineno = None
for type, token, start, end, line in g:
endlineno = end[0]
if startlineno is None:
startlineno = endlineno
if token in ("/", "/="):
slashes.append((start, line))
if type == tokenize.NEWLINE:
break
return startlineno, endlineno, slashes
def chop(line):
if line.endswith("\n"):
return line[:-1]
else:
return line
if __name__ == "__main__":
sys.exit(main())

Some files were not shown because too many files have changed in this diff Show More