This commit is contained in:
ton
2023-10-05 02:52:21 +07:00
parent 63ca0049be
commit d76e4b2916
5451 changed files with 3 additions and 919197 deletions

View File

@@ -1,153 +0,0 @@
#!/usr/bin/env python3
"""
Convert the X11 locale.alias file into a mapping dictionary suitable
for locale.py.
Written by Marc-Andre Lemburg <mal@genix.com>, 2004-12-10.
"""
import locale
import sys
_locale = locale
# Location of the X11 alias file.
LOCALE_ALIAS = '/usr/share/X11/locale/locale.alias'
# Location of the glibc SUPPORTED locales file.
SUPPORTED = '/usr/share/i18n/SUPPORTED'
def parse(filename):
with open(filename, encoding='latin1') as f:
lines = list(f)
# Remove mojibake in /usr/share/X11/locale/locale.alias.
# b'\xef\xbf\xbd' == '\ufffd'.encode('utf-8')
lines = [line for line in lines if '\xef\xbf\xbd' not in line]
data = {}
for line in lines:
line = line.strip()
if not line:
continue
if line[:1] == '#':
continue
locale, alias = line.split()
# Fix non-standard locale names, e.g. ks_IN@devanagari.UTF-8
if '@' in alias:
alias_lang, _, alias_mod = alias.partition('@')
if '.' in alias_mod:
alias_mod, _, alias_enc = alias_mod.partition('.')
alias = alias_lang + '.' + alias_enc + '@' + alias_mod
# Strip ':'
if locale[-1] == ':':
locale = locale[:-1]
# Lower-case locale
locale = locale.lower()
# Ignore one letter locale mappings (except for 'c')
if len(locale) == 1 and locale != 'c':
continue
# Normalize encoding, if given
if '.' in locale:
lang, encoding = locale.split('.')[:2]
encoding = encoding.replace('-', '')
encoding = encoding.replace('_', '')
locale = lang + '.' + encoding
data[locale] = alias
return data
def parse_glibc_supported(filename):
with open(filename, encoding='latin1') as f:
lines = list(f)
data = {}
for line in lines:
line = line.strip()
if not line:
continue
if line[:1] == '#':
continue
line = line.replace('/', ' ').strip()
line = line.rstrip('\\').rstrip()
words = line.split()
if len(words) != 2:
continue
alias, alias_encoding = words
# Lower-case locale
locale = alias.lower()
# Normalize encoding, if given
if '.' in locale:
lang, encoding = locale.split('.')[:2]
encoding = encoding.replace('-', '')
encoding = encoding.replace('_', '')
locale = lang + '.' + encoding
# Add an encoding to alias
alias, _, modifier = alias.partition('@')
alias = _locale._replace_encoding(alias, alias_encoding)
if modifier and not (modifier == 'euro' and alias_encoding == 'ISO-8859-15'):
alias += '@' + modifier
data[locale] = alias
return data
def pprint(data):
items = sorted(data.items())
for k, v in items:
print(' %-40s%a,' % ('%a:' % k, v))
def print_differences(data, olddata):
items = sorted(olddata.items())
for k, v in items:
if k not in data:
print('# removed %a' % k)
elif olddata[k] != data[k]:
print('# updated %a -> %a to %a' % \
(k, olddata[k], data[k]))
# Additions are not mentioned
def optimize(data):
locale_alias = locale.locale_alias
locale.locale_alias = data.copy()
for k, v in data.items():
del locale.locale_alias[k]
if locale.normalize(k) != v:
locale.locale_alias[k] = v
newdata = locale.locale_alias
errors = check(data)
locale.locale_alias = locale_alias
if errors:
sys.exit(1)
return newdata
def check(data):
# Check that all alias definitions from the X11 file
# are actually mapped to the correct alias locales.
errors = 0
for k, v in data.items():
if locale.normalize(k) != v:
print('ERROR: %a -> %a != %a' % (k, locale.normalize(k), v),
file=sys.stderr)
errors += 1
return errors
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--locale-alias', default=LOCALE_ALIAS,
help='location of the X11 alias file '
'(default: %a)' % LOCALE_ALIAS)
parser.add_argument('--glibc-supported', default=SUPPORTED,
help='location of the glibc SUPPORTED locales file '
'(default: %a)' % SUPPORTED)
args = parser.parse_args()
data = locale.locale_alias.copy()
data.update(parse_glibc_supported(args.glibc_supported))
data.update(parse(args.locale_alias))
while True:
# Repeat optimization while the size is decreased.
n = len(data)
data = optimize(data)
if len(data) == n:
break
print_differences(data, locale.locale_alias)
print()
print('locale_alias = {')
pprint(data)
print('}')

View File

@@ -1,246 +0,0 @@
#! /usr/bin/env python3
# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
"""Generate binary message catalog from textual translation description.
This program converts a textual Uniforum-style message catalog (.po file) into
a binary GNU catalog (.mo file). This is essentially the same function as the
GNU msgfmt program, however, it is a simpler implementation. Currently it
does not handle plural forms but it does handle message contexts.
Usage: msgfmt.py [OPTIONS] filename.po
Options:
-o file
--output-file=file
Specify the output file to write to. If omitted, output will go to a
file named filename.mo (based off the input file name).
-h
--help
Print this message and exit.
-V
--version
Display version information and exit.
"""
import os
import sys
import ast
import getopt
import struct
import array
from email.parser import HeaderParser
__version__ = "1.2"
MESSAGES = {}
def usage(code, msg=''):
print(__doc__, file=sys.stderr)
if msg:
print(msg, file=sys.stderr)
sys.exit(code)
def add(ctxt, id, str, fuzzy):
"Add a non-fuzzy translation to the dictionary."
global MESSAGES
if not fuzzy and str:
if ctxt is None:
MESSAGES[id] = str
else:
MESSAGES[b"%b\x04%b" % (ctxt, id)] = str
def generate():
"Return the generated output."
global MESSAGES
# the keys are sorted in the .mo file
keys = sorted(MESSAGES.keys())
offsets = []
ids = strs = b''
for id in keys:
# For each string, we need size and file offset. Each string is NUL
# terminated; the NUL does not count into the size.
offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
ids += id + b'\0'
strs += MESSAGES[id] + b'\0'
output = ''
# The header is 7 32-bit unsigned integers. We don't use hash tables, so
# the keys start right after the index tables.
# translated string.
keystart = 7*4+16*len(keys)
# and the values start after the keys
valuestart = keystart + len(ids)
koffsets = []
voffsets = []
# The string table first has the list of keys, then the list of values.
# Each entry has first the size of the string, then the file offset.
for o1, l1, o2, l2 in offsets:
koffsets += [l1, o1+keystart]
voffsets += [l2, o2+valuestart]
offsets = koffsets + voffsets
output = struct.pack("Iiiiiii",
0x950412de, # Magic
0, # Version
len(keys), # # of entries
7*4, # start of key index
7*4+len(keys)*8, # start of value index
0, 0) # size and offset of hash table
output += array.array("i", offsets).tobytes()
output += ids
output += strs
return output
def make(filename, outfile):
ID = 1
STR = 2
CTXT = 3
# Compute .mo name from .po name and arguments
if filename.endswith('.po'):
infile = filename
else:
infile = filename + '.po'
if outfile is None:
outfile = os.path.splitext(infile)[0] + '.mo'
try:
with open(infile, 'rb') as f:
lines = f.readlines()
except IOError as msg:
print(msg, file=sys.stderr)
sys.exit(1)
section = msgctxt = None
fuzzy = 0
# Start off assuming Latin-1, so everything decodes without failure,
# until we know the exact encoding
encoding = 'latin-1'
# Parse the catalog
lno = 0
for l in lines:
l = l.decode(encoding)
lno += 1
# If we get a comment line after a msgstr, this is a new entry
if l[0] == '#' and section == STR:
add(msgctxt, msgid, msgstr, fuzzy)
section = msgctxt = None
fuzzy = 0
# Record a fuzzy mark
if l[:2] == '#,' and 'fuzzy' in l:
fuzzy = 1
# Skip comments
if l[0] == '#':
continue
# Now we are in a msgid or msgctxt section, output previous section
if l.startswith('msgctxt'):
if section == STR:
add(msgctxt, msgid, msgstr, fuzzy)
section = CTXT
l = l[7:]
msgctxt = b''
elif l.startswith('msgid') and not l.startswith('msgid_plural'):
if section == STR:
add(msgctxt, msgid, msgstr, fuzzy)
if not msgid:
# See whether there is an encoding declaration
p = HeaderParser()
charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
if charset:
encoding = charset
section = ID
l = l[5:]
msgid = msgstr = b''
is_plural = False
# This is a message with plural forms
elif l.startswith('msgid_plural'):
if section != ID:
print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
file=sys.stderr)
sys.exit(1)
l = l[12:]
msgid += b'\0' # separator of singular and plural
is_plural = True
# Now we are in a msgstr section
elif l.startswith('msgstr'):
section = STR
if l.startswith('msgstr['):
if not is_plural:
print('plural without msgid_plural on %s:%d' % (infile, lno),
file=sys.stderr)
sys.exit(1)
l = l.split(']', 1)[1]
if msgstr:
msgstr += b'\0' # Separator of the various plural forms
else:
if is_plural:
print('indexed msgstr required for plural on %s:%d' % (infile, lno),
file=sys.stderr)
sys.exit(1)
l = l[6:]
# Skip empty lines
l = l.strip()
if not l:
continue
l = ast.literal_eval(l)
if section == CTXT:
msgctxt += l.encode(encoding)
elif section == ID:
msgid += l.encode(encoding)
elif section == STR:
msgstr += l.encode(encoding)
else:
print('Syntax error on %s:%d' % (infile, lno), \
'before:', file=sys.stderr)
print(l, file=sys.stderr)
sys.exit(1)
# Add last entry
if section == STR:
add(msgctxt, msgid, msgstr, fuzzy)
# Compute output
output = generate()
try:
with open(outfile,"wb") as f:
f.write(output)
except IOError as msg:
print(msg, file=sys.stderr)
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
['help', 'version', 'output-file='])
except getopt.error as msg:
usage(1, msg)
outfile = None
# parse options
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-V', '--version'):
print("msgfmt.py", __version__)
sys.exit(0)
elif opt in ('-o', '--output-file'):
outfile = arg
# do it
if not args:
print('No input file given', file=sys.stderr)
print("Try `msgfmt --help' for more information.", file=sys.stderr)
return
for filename in args:
make(filename, outfile)
if __name__ == '__main__':
main()

View File

@@ -1,681 +0,0 @@
#! /usr/bin/env python3
# -*- coding: iso-8859-1 -*-
# Originally written by Barry Warsaw <barry@python.org>
#
# Minimally patched to make it even more xgettext compatible
# by Peter Funk <pf@artcom-gmbh.de>
#
# 2002-11-22 J<>rgen Hermann <jh@web.de>
# Added checks that _() only contains string literals, and
# command line args are resolved to module lists, i.e. you
# can now pass a filename, a module or package name, or a
# directory (including globbing chars, important for Win32).
# Made docstring fit in 80 chars wide displays using pydoc.
#
# for selftesting
try:
import fintl
_ = fintl.gettext
except ImportError:
_ = lambda s: s
__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
internationalization of C programs. Most of these tools are independent of
the programming language and can be used from within Python programs.
Martin von Loewis' work[1] helps considerably in this regard.
There's one problem though; xgettext is the program that scans source code
looking for message strings, but it groks only C (or C++). Python
introduces a few wrinkles, such as dual quoting characters, triple quoted
strings, and raw strings. xgettext understands none of this.
Enter pygettext, which uses Python's standard tokenize module to scan
Python source code, generating .pot files identical to what GNU xgettext[2]
generates for C and C++ code. From there, the standard GNU tools can be
used.
A word about marking Python strings as candidates for translation. GNU
xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
and gettext_noop. But those can be a lot of text to include all over your
code. C and C++ have a trick: they use the C preprocessor. Most
internationalized C source includes a #define for gettext() to _() so that
what has to be written in the source is much less. Thus these are both
translatable strings:
gettext("Translatable String")
_("Translatable String")
Python of course has no preprocessor so this doesn't work so well. Thus,
pygettext searches only for _() by default, but see the -k/--keyword flag
below for how to augment this.
[1] https://www.python.org/workshops/1997-10/proceedings/loewis.html
[2] https://www.gnu.org/software/gettext/gettext.html
NOTE: pygettext attempts to be option and feature compatible with GNU
xgettext where ever possible. However some options are still missing or are
not fully implemented. Also, xgettext's use of command line switches with
option arguments is broken, and in these cases, pygettext just defines
additional switches.
Usage: pygettext [options] inputfile ...
Options:
-a
--extract-all
Extract all strings.
-d name
--default-domain=name
Rename the default output file from messages.pot to name.pot.
-E
--escape
Replace non-ASCII characters with octal escape sequences.
-D
--docstrings
Extract module, class, method, and function docstrings. These do
not need to be wrapped in _() markers, and in fact cannot be for
Python to consider them docstrings. (See also the -X option).
-h
--help
Print this help message and exit.
-k word
--keyword=word
Keywords to look for in addition to the default set, which are:
%(DEFAULTKEYWORDS)s
You can have multiple -k flags on the command line.
-K
--no-default-keywords
Disable the default set of keywords (see above). Any keywords
explicitly added with the -k/--keyword option are still recognized.
--no-location
Do not write filename/lineno location comments.
-n
--add-location
Write filename/lineno location comments indicating where each
extracted string is found in the source. These lines appear before
each msgid. The style of comments is controlled by the -S/--style
option. This is the default.
-o filename
--output=filename
Rename the default output file from messages.pot to filename. If
filename is `-' then the output is sent to standard out.
-p dir
--output-dir=dir
Output files will be placed in directory dir.
-S stylename
--style stylename
Specify which style to use for location comments. Two styles are
supported:
Solaris # File: filename, line: line-number
GNU #: filename:line
The style name is case insensitive. GNU style is the default.
-v
--verbose
Print the names of the files being processed.
-V
--version
Print the version of pygettext and exit.
-w columns
--width=columns
Set width of output to columns.
-x filename
--exclude-file=filename
Specify a file that contains a list of strings that are not be
extracted from the input files. Each string to be excluded must
appear on a line by itself in the file.
-X filename
--no-docstrings=filename
Specify a file that contains a list of files (one per line) that
should not have their docstrings extracted. This is only useful in
conjunction with the -D option above.
If `inputfile' is -, standard input is read.
""")
import os
import importlib.machinery
import importlib.util
import sys
import glob
import time
import getopt
import ast
import token
import tokenize
__version__ = '1.5'
default_keywords = ['_']
DEFAULTKEYWORDS = ', '.join(default_keywords)
EMPTYSTRING = ''
# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
# there.
pot_header = _('''\
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\\n"
"POT-Creation-Date: %(time)s\\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
"Language-Team: LANGUAGE <LL@li.org>\\n"
"MIME-Version: 1.0\\n"
"Content-Type: text/plain; charset=%(charset)s\\n"
"Content-Transfer-Encoding: %(encoding)s\\n"
"Generated-By: pygettext.py %(version)s\\n"
''')
def usage(code, msg=''):
print(__doc__ % globals(), file=sys.stderr)
if msg:
print(msg, file=sys.stderr)
sys.exit(code)
def make_escapes(pass_nonascii):
global escapes, escape
if pass_nonascii:
# Allow non-ascii characters to pass through so that e.g. 'msgid
# "H<>he"' would result not result in 'msgid "H\366he"'. Otherwise we
# escape any character outside the 32..126 range.
mod = 128
escape = escape_ascii
else:
mod = 256
escape = escape_nonascii
escapes = [r"\%03o" % i for i in range(mod)]
for i in range(32, 127):
escapes[i] = chr(i)
escapes[ord('\\')] = r'\\'
escapes[ord('\t')] = r'\t'
escapes[ord('\r')] = r'\r'
escapes[ord('\n')] = r'\n'
escapes[ord('\"')] = r'\"'
def escape_ascii(s, encoding):
return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s)
def escape_nonascii(s, encoding):
return ''.join(escapes[b] for b in s.encode(encoding))
def is_literal_string(s):
return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
def safe_eval(s):
# unwrap quotes, safely
return eval(s, {'__builtins__':{}}, {})
def normalize(s, encoding):
# This converts the various Python string types into a format that is
# appropriate for .po files, namely much closer to C style.
lines = s.split('\n')
if len(lines) == 1:
s = '"' + escape(s, encoding) + '"'
else:
if not lines[-1]:
del lines[-1]
lines[-1] = lines[-1] + '\n'
for i in range(len(lines)):
lines[i] = escape(lines[i], encoding)
lineterm = '\\n"\n"'
s = '""\n"' + lineterm.join(lines) + '"'
return s
def containsAny(str, set):
"""Check whether 'str' contains ANY of the chars in 'set'"""
return 1 in [c in str for c in set]
def getFilesForName(name):
"""Get a list of module files for a filename, a module or package name,
or a directory.
"""
if not os.path.exists(name):
# check for glob chars
if containsAny(name, "*?[]"):
files = glob.glob(name)
list = []
for file in files:
list.extend(getFilesForName(file))
return list
# try to find module or package
try:
spec = importlib.util.find_spec(name)
name = spec.origin
except ImportError:
name = None
if not name:
return []
if os.path.isdir(name):
# find all python files in directory
list = []
# get extension for python source files
_py_ext = importlib.machinery.SOURCE_SUFFIXES[0]
for root, dirs, files in os.walk(name):
# don't recurse into CVS directories
if 'CVS' in dirs:
dirs.remove('CVS')
# add all *.py files to list
list.extend(
[os.path.join(root, file) for file in files
if os.path.splitext(file)[1] == _py_ext]
)
return list
elif os.path.exists(name):
# a single file
return [name]
return []
class TokenEater:
def __init__(self, options):
self.__options = options
self.__messages = {}
self.__state = self.__waiting
self.__data = []
self.__lineno = -1
self.__freshmodule = 1
self.__curfile = None
self.__enclosurecount = 0
def __call__(self, ttype, tstring, stup, etup, line):
# dispatch
## import token
## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
## file=sys.stderr)
self.__state(ttype, tstring, stup[0])
def __waiting(self, ttype, tstring, lineno):
opts = self.__options
# Do docstring extractions, if enabled
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
# module docstring?
if self.__freshmodule:
if ttype == tokenize.STRING and is_literal_string(tstring):
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__freshmodule = 0
return
if ttype in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
return
self.__freshmodule = 0
# class or func/method docstring?
if ttype == tokenize.NAME and tstring in ('class', 'def'):
self.__state = self.__suiteseen
return
if ttype == tokenize.NAME and tstring in opts.keywords:
self.__state = self.__keywordseen
return
if ttype == tokenize.STRING:
maybe_fstring = ast.parse(tstring, mode='eval').body
if not isinstance(maybe_fstring, ast.JoinedStr):
return
for value in filter(lambda node: isinstance(node, ast.FormattedValue),
maybe_fstring.values):
for call in filter(lambda node: isinstance(node, ast.Call),
ast.walk(value)):
func = call.func
if isinstance(func, ast.Name):
func_name = func.id
elif isinstance(func, ast.Attribute):
func_name = func.attr
else:
continue
if func_name not in opts.keywords:
continue
if len(call.args) != 1:
print(_(
'*** %(file)s:%(lineno)s: Seen unexpected amount of'
' positional arguments in gettext call: %(source_segment)s'
) % {
'source_segment': ast.get_source_segment(tstring, call) or tstring,
'file': self.__curfile,
'lineno': lineno
}, file=sys.stderr)
continue
if call.keywords:
print(_(
'*** %(file)s:%(lineno)s: Seen unexpected keyword arguments'
' in gettext call: %(source_segment)s'
) % {
'source_segment': ast.get_source_segment(tstring, call) or tstring,
'file': self.__curfile,
'lineno': lineno
}, file=sys.stderr)
continue
arg = call.args[0]
if not isinstance(arg, ast.Constant):
print(_(
'*** %(file)s:%(lineno)s: Seen unexpected argument type'
' in gettext call: %(source_segment)s'
) % {
'source_segment': ast.get_source_segment(tstring, call) or tstring,
'file': self.__curfile,
'lineno': lineno
}, file=sys.stderr)
continue
if isinstance(arg.value, str):
self.__addentry(arg.value, lineno)
def __suiteseen(self, ttype, tstring, lineno):
# skip over any enclosure pairs until we see the colon
if ttype == tokenize.OP:
if tstring == ':' and self.__enclosurecount == 0:
# we see a colon and we're not in an enclosure: end of def
self.__state = self.__suitedocstring
elif tstring in '([{':
self.__enclosurecount += 1
elif tstring in ')]}':
self.__enclosurecount -= 1
def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise
if ttype == tokenize.STRING and is_literal_string(tstring):
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
tokenize.COMMENT):
# there was no class docstring
self.__state = self.__waiting
def __keywordseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == '(':
self.__data = []
self.__lineno = lineno
self.__state = self.__openseen
else:
self.__state = self.__waiting
def __openseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == ')':
# We've seen the last of the translatable strings. Record the
# line number of the first line of the strings and update the list
# of messages seen. Reset state for the next batch. If there
# were no strings inside _(), then just ignore this entry.
if self.__data:
self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
elif ttype == tokenize.STRING and is_literal_string(tstring):
self.__data.append(safe_eval(tstring))
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
token.NEWLINE, tokenize.NL]:
# warn if we see anything else than STRING or whitespace
print(_(
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
) % {
'token': tstring,
'file': self.__curfile,
'lineno': self.__lineno
}, file=sys.stderr)
self.__state = self.__waiting
def __addentry(self, msg, lineno=None, isdocstring=0):
if lineno is None:
lineno = self.__lineno
if not msg in self.__options.toexclude:
entry = (self.__curfile, lineno)
self.__messages.setdefault(msg, {})[entry] = isdocstring
def set_filename(self, filename):
self.__curfile = filename
self.__freshmodule = 1
def write(self, fp):
options = self.__options
timestamp = time.strftime('%Y-%m-%d %H:%M%z')
encoding = fp.encoding if fp.encoding else 'UTF-8'
print(pot_header % {'time': timestamp, 'version': __version__,
'charset': encoding,
'encoding': '8bit'}, file=fp)
# Sort the entries. First sort each particular entry's keys, then
# sort all the entries by their first item.
reverse = {}
for k, v in self.__messages.items():
keys = sorted(v.keys())
reverse.setdefault(tuple(keys), []).append((k, v))
rkeys = sorted(reverse.keys())
for rkey in rkeys:
rentries = reverse[rkey]
rentries.sort()
for k, v in rentries:
# If the entry was gleaned out of a docstring, then add a
# comment stating so. This is to aid translators who may wish
# to skip translating some unimportant docstrings.
isdocstring = any(v.values())
# k is the message string, v is a dictionary-set of (filename,
# lineno) tuples. We want to sort the entries in v first by
# file name and then by line number.
v = sorted(v.keys())
if not options.writelocations:
pass
# location comments are different b/w Solaris and GNU:
elif options.locationstyle == options.SOLARIS:
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
print(_(
'# File: %(filename)s, line: %(lineno)d') % d, file=fp)
elif options.locationstyle == options.GNU:
# fit as many locations on one line, as long as the
# resulting line length doesn't exceed 'options.width'
locline = '#:'
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
s = _(' %(filename)s:%(lineno)d') % d
if len(locline) + len(s) <= options.width:
locline = locline + s
else:
print(locline, file=fp)
locline = "#:" + s
if len(locline) > 2:
print(locline, file=fp)
if isdocstring:
print('#, docstring', file=fp)
print('msgid', normalize(k, encoding), file=fp)
print('msgstr ""\n', file=fp)
def main():
global default_keywords
try:
opts, args = getopt.getopt(
sys.argv[1:],
'ad:DEhk:Kno:p:S:Vvw:x:X:',
['extract-all', 'default-domain=', 'escape', 'help',
'keyword=', 'no-default-keywords',
'add-location', 'no-location', 'output=', 'output-dir=',
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
'docstrings', 'no-docstrings',
])
except getopt.error as msg:
usage(1, msg)
# for holding option values
class Options:
# constants
GNU = 1
SOLARIS = 2
# defaults
extractall = 0 # FIXME: currently this option has no effect at all.
escape = 0
keywords = []
outpath = ''
outfile = 'messages.pot'
writelocations = 1
locationstyle = GNU
verbose = 0
width = 78
excludefilename = ''
docstrings = 0
nodocstrings = {}
options = Options()
locations = {'gnu' : options.GNU,
'solaris' : options.SOLARIS,
}
# parse options
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-a', '--extract-all'):
options.extractall = 1
elif opt in ('-d', '--default-domain'):
options.outfile = arg + '.pot'
elif opt in ('-E', '--escape'):
options.escape = 1
elif opt in ('-D', '--docstrings'):
options.docstrings = 1
elif opt in ('-k', '--keyword'):
options.keywords.append(arg)
elif opt in ('-K', '--no-default-keywords'):
default_keywords = []
elif opt in ('-n', '--add-location'):
options.writelocations = 1
elif opt in ('--no-location',):
options.writelocations = 0
elif opt in ('-S', '--style'):
options.locationstyle = locations.get(arg.lower())
if options.locationstyle is None:
usage(1, _('Invalid value for --style: %s') % arg)
elif opt in ('-o', '--output'):
options.outfile = arg
elif opt in ('-p', '--output-dir'):
options.outpath = arg
elif opt in ('-v', '--verbose'):
options.verbose = 1
elif opt in ('-V', '--version'):
print(_('pygettext.py (xgettext for Python) %s') % __version__)
sys.exit(0)
elif opt in ('-w', '--width'):
try:
options.width = int(arg)
except ValueError:
usage(1, _('--width argument must be an integer: %s') % arg)
elif opt in ('-x', '--exclude-file'):
options.excludefilename = arg
elif opt in ('-X', '--no-docstrings'):
fp = open(arg)
try:
while 1:
line = fp.readline()
if not line:
break
options.nodocstrings[line[:-1]] = 1
finally:
fp.close()
# calculate escapes
make_escapes(not options.escape)
# calculate all keywords
options.keywords.extend(default_keywords)
# initialize list of strings to exclude
if options.excludefilename:
try:
with open(options.excludefilename) as fp:
options.toexclude = fp.readlines()
except IOError:
print(_(
"Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr)
sys.exit(1)
else:
options.toexclude = []
# resolve args to module lists
expanded = []
for arg in args:
if arg == '-':
expanded.append(arg)
else:
expanded.extend(getFilesForName(arg))
args = expanded
# slurp through all the files
eater = TokenEater(options)
for filename in args:
if filename == '-':
if options.verbose:
print(_('Reading standard input'))
fp = sys.stdin.buffer
closep = 0
else:
if options.verbose:
print(_('Working on %s') % filename)
fp = open(filename, 'rb')
closep = 1
try:
eater.set_filename(filename)
try:
tokens = tokenize.tokenize(fp.readline)
for _token in tokens:
eater(*_token)
except tokenize.TokenError as e:
print('%s: %s, line %d, column %d' % (
e.args[0], filename, e.args[1][0], e.args[1][1]),
file=sys.stderr)
finally:
if closep:
fp.close()
# write the output
if options.outfile == '-':
fp = sys.stdout
closep = 0
else:
if options.outpath:
options.outfile = os.path.join(options.outpath, options.outfile)
fp = open(options.outfile, 'w')
closep = 1
try:
eater.write(fp)
finally:
if closep:
fp.close()
if __name__ == '__main__':
main()
# some more test strings
# this one creates a warning
_('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
_('more' 'than' 'one' 'string')

View File

@@ -1,5 +0,0 @@
#!/usr/bin/env python3
import sys
from lib2to3.main import main
sys.exit(main("lib2to3.fixes"))

View File

@@ -1,32 +0,0 @@
#!/usr/bin/env python3
"""
Checks that the version of the projects bundled in ensurepip are the latest
versions available.
"""
import ensurepip
import json
import urllib.request
import sys
def main():
outofdate = False
for project, version in ensurepip._PROJECTS:
data = json.loads(urllib.request.urlopen(
"https://pypi.org/pypi/{}/json".format(project),
cadefault=True,
).read().decode("utf8"))
upstream_version = data["info"]["version"]
if version != upstream_version:
outofdate = True
print("The latest version of {} on PyPI is {}, but ensurepip "
"has {}".format(project, upstream_version, version))
if outofdate:
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -1,129 +0,0 @@
#! /usr/bin/env python3
"""
combinerefs path
A helper for analyzing PYTHONDUMPREFS output.
When the PYTHONDUMPREFS envar is set in a debug build, at Python shutdown
time Py_FinalizeEx() prints the list of all live objects twice: first it
prints the repr() of each object while the interpreter is still fully intact.
After cleaning up everything it can, it prints all remaining live objects
again, but the second time just prints their addresses, refcounts, and type
names (because the interpreter has been torn down, calling repr methods at
this point can get into infinite loops or blow up).
Save all this output into a file, then run this script passing the path to
that file. The script finds both output chunks, combines them, then prints
a line of output for each object still alive at the end:
address refcnt typename repr
address is the address of the object, in whatever format the platform C
produces for a %p format code.
refcnt is of the form
"[" ref "]"
when the object's refcount is the same in both PYTHONDUMPREFS output blocks,
or
"[" ref_before "->" ref_after "]"
if the refcount changed.
typename is Py_TYPE(object)->tp_name, extracted from the second PYTHONDUMPREFS
output block.
repr is repr(object), extracted from the first PYTHONDUMPREFS output block.
CAUTION: If object is a container type, it may not actually contain all the
objects shown in the repr: the repr was captured from the first output block,
and some of the containees may have been released since then. For example,
it's common for the line showing the dict of interned strings to display
strings that no longer exist at the end of Py_FinalizeEx; this can be recognized
(albeit painfully) because such containees don't have a line of their own.
The objects are listed in allocation order, with most-recently allocated
printed first, and the first object allocated printed last.
Simple examples:
00857060 [14] str '__len__'
The str object '__len__' is alive at shutdown time, and both PYTHONDUMPREFS
output blocks said there were 14 references to it. This is probably due to
C modules that intern the string "__len__" and keep a reference to it in a
file static.
00857038 [46->5] tuple ()
46-5 = 41 references to the empty tuple were removed by the cleanup actions
between the times PYTHONDUMPREFS produced output.
00858028 [1025->1456] str '<dummy key>'
The string '<dummy key>', which is used in dictobject.c to overwrite a real
key that gets deleted, grew several hundred references during cleanup. It
suggests that stuff did get removed from dicts by cleanup, but that the dicts
themselves are staying alive for some reason. """
import re
import sys
# Generate lines from fileiter. If whilematch is true, continue reading
# while the regexp object pat matches line. If whilematch is false, lines
# are read so long as pat doesn't match them. In any case, the first line
# that doesn't match pat (when whilematch is true), or that does match pat
# (when whilematch is false), is lost, and fileiter will resume at the line
# following it.
def read(fileiter, pat, whilematch):
for line in fileiter:
if bool(pat.match(line)) == whilematch:
yield line
else:
break
def combinefile(f):
fi = iter(f)
for line in read(fi, re.compile(r'^Remaining objects:$'), False):
pass
crack = re.compile(r'([a-zA-Z\d]+) \[(\d+)\] (.*)')
addr2rc = {}
addr2guts = {}
before = 0
for line in read(fi, re.compile(r'^Remaining object addresses:$'), False):
m = crack.match(line)
if m:
addr, addr2rc[addr], addr2guts[addr] = m.groups()
before += 1
else:
print('??? skipped:', line)
after = 0
for line in read(fi, crack, True):
after += 1
m = crack.match(line)
assert m
addr, rc, guts = m.groups() # guts is type name here
if addr not in addr2rc:
print('??? new object created while tearing down:', line.rstrip())
continue
print(addr, end=' ')
if rc == addr2rc[addr]:
print('[%s]' % rc, end=' ')
else:
print('[%s->%s]' % (addr2rc[addr], rc), end=' ')
print(guts, addr2guts[addr])
print("%d objects before, %d after" % (before, after))
def combine(fname):
with open(fname) as f:
combinefile(f)
if __name__ == '__main__':
combine(sys.argv[1])

View File

@@ -1,56 +0,0 @@
#!/usr/bin/env python3
#
# Determine threshold for switching from longobject.c divmod to
# _pylong.int_divmod().
from random import randrange
from time import perf_counter as now
from _pylong import int_divmod as divmod_fast
BITS_PER_DIGIT = 30
def rand_digits(n):
top = 1 << (n * BITS_PER_DIGIT)
return randrange(top >> 1, top)
def probe_den(nd):
den = rand_digits(nd)
count = 0
for nn in range(nd, nd + 3000):
num = rand_digits(nn)
t0 = now()
e1, e2 = divmod(num, den)
t1 = now()
f1, f2 = divmod_fast(num, den)
t2 = now()
s1 = t1 - t0
s2 = t2 - t1
assert e1 == f1
assert e2 == f2
if s2 < s1:
count += 1
if count >= 3:
print(
"for",
nd,
"denom digits,",
nn - nd,
"extra num digits is enough",
)
break
else:
count = 0
else:
print("for", nd, "denom digits, no num seems big enough")
def main():
for nd in range(30):
nd = (nd + 1) * 100
probe_den(nd)
if __name__ == '__main__':
main()

View File

@@ -1,5 +0,0 @@
#!/usr/bin/env python3
import pydoc
if __name__ == '__main__':
pydoc.cli()

View File

@@ -1,92 +0,0 @@
"""Run Python's test suite in a fast, rigorous way.
The defaults are meant to be reasonably thorough, while skipping certain
tests that can be time-consuming or resource-intensive (e.g. largefile),
or distracting (e.g. audio and gui). These defaults can be overridden by
simply passing a -u option to this script.
"""
import os
import shlex
import sys
import sysconfig
import test.support
def is_multiprocess_flag(arg):
return arg.startswith('-j') or arg.startswith('--multiprocess')
def is_resource_use_flag(arg):
return arg.startswith('-u') or arg.startswith('--use')
def is_python_flag(arg):
return arg.startswith('-p') or arg.startswith('--python')
def main(regrtest_args):
args = [sys.executable,
'-u', # Unbuffered stdout and stderr
'-W', 'default', # Warnings set to 'default'
'-bb', # Warnings about bytes/bytearray
]
cross_compile = '_PYTHON_HOST_PLATFORM' in os.environ
if (hostrunner := os.environ.get("_PYTHON_HOSTRUNNER")) is None:
hostrunner = sysconfig.get_config_var("HOSTRUNNER")
if cross_compile:
# emulate -E, but keep PYTHONPATH + cross compile env vars, so
# test executable can load correct sysconfigdata file.
keep = {
'_PYTHON_PROJECT_BASE',
'_PYTHON_HOST_PLATFORM',
'_PYTHON_SYSCONFIGDATA_NAME',
'PYTHONPATH'
}
environ = {
name: value for name, value in os.environ.items()
if not name.startswith(('PYTHON', '_PYTHON')) or name in keep
}
else:
environ = os.environ.copy()
args.append("-E")
# Allow user-specified interpreter options to override our defaults.
args.extend(test.support.args_from_interpreter_flags())
args.extend(['-m', 'test', # Run the test suite
'-r', # Randomize test order
'-w', # Re-run failed tests in verbose mode
])
if sys.platform == 'win32':
args.append('-n') # Silence alerts under Windows
if not any(is_multiprocess_flag(arg) for arg in regrtest_args):
if cross_compile and hostrunner:
# For now use only two cores for cross-compiled builds;
# hostrunner can be expensive.
args.extend(['-j', '2'])
else:
args.extend(['-j', '0']) # Use all CPU cores
if not any(is_resource_use_flag(arg) for arg in regrtest_args):
args.extend(['-u', 'all,-largefile,-audio,-gui'])
if cross_compile and hostrunner:
# If HOSTRUNNER is set and -p/--python option is not given, then
# use hostrunner to execute python binary for tests.
if not any(is_python_flag(arg) for arg in regrtest_args):
buildpython = sysconfig.get_config_var("BUILDPYTHON")
args.extend(["--python", f"{hostrunner} {buildpython}"])
args.extend(regrtest_args)
print(shlex.join(args))
if sys.platform == 'win32':
from subprocess import call
sys.exit(call(args))
else:
os.execve(sys.executable, args, environ)
if __name__ == '__main__':
main(sys.argv[1:])

View File

@@ -1,652 +0,0 @@
"""Print a summary of specialization stats for all files in the
default stats folders.
"""
import argparse
import collections
import json
import os.path
import opcode
from datetime import date
import itertools
import sys
if os.name == "nt":
DEFAULT_DIR = "c:\\temp\\py_stats\\"
else:
DEFAULT_DIR = "/tmp/py_stats/"
#Create list of all instruction names
specialized = iter(opcode._specialized_instructions)
opname = ["<0>"]
for name in opcode.opname[1:]:
if name.startswith("<"):
try:
name = next(specialized)
except StopIteration:
pass
opname.append(name)
# opcode_name --> opcode
# Sort alphabetically.
opmap = {name: i for i, name in enumerate(opname)}
opmap = dict(sorted(opmap.items()))
TOTAL = "specialization.hit", "specialization.miss", "execution_count"
def format_ratio(num, den):
"""
Format a ratio as a percentage. When the denominator is 0, returns the empty
string.
"""
if den == 0:
return ""
else:
return f"{num/den:.01%}"
def join_rows(a_rows, b_rows):
"""
Joins two tables together, side-by-side, where the first column in each is a
common key.
"""
if len(a_rows) == 0 and len(b_rows) == 0:
return []
if len(a_rows):
a_ncols = list(set(len(x) for x in a_rows))
if len(a_ncols) != 1:
raise ValueError("Table a is ragged")
if len(b_rows):
b_ncols = list(set(len(x) for x in b_rows))
if len(b_ncols) != 1:
raise ValueError("Table b is ragged")
if len(a_rows) and len(b_rows) and a_ncols[0] != b_ncols[0]:
raise ValueError("Tables have different widths")
if len(a_rows):
ncols = a_ncols[0]
else:
ncols = b_ncols[0]
default = [""] * (ncols - 1)
a_data = {x[0]: x[1:] for x in a_rows}
b_data = {x[0]: x[1:] for x in b_rows}
if len(a_data) != len(a_rows) or len(b_data) != len(b_rows):
raise ValueError("Duplicate keys")
# To preserve ordering, use A's keys as is and then add any in B that aren't
# in A
keys = list(a_data.keys()) + [k for k in b_data.keys() if k not in a_data]
return [(k, *a_data.get(k, default), *b_data.get(k, default)) for k in keys]
def calculate_specialization_stats(family_stats, total):
rows = []
for key in sorted(family_stats):
if key.startswith("specialization.failure_kinds"):
continue
if key in ("specialization.hit", "specialization.miss"):
label = key[len("specialization."):]
elif key == "execution_count":
continue
elif key in ("specialization.success", "specialization.failure", "specializable"):
continue
elif key.startswith("pair"):
continue
else:
label = key
rows.append((f"{label:>12}", f"{family_stats[key]:>12}", format_ratio(family_stats[key], total)))
return rows
def calculate_specialization_success_failure(family_stats):
total_attempts = 0
for key in ("specialization.success", "specialization.failure"):
total_attempts += family_stats.get(key, 0)
rows = []
if total_attempts:
for key in ("specialization.success", "specialization.failure"):
label = key[len("specialization."):]
label = label[0].upper() + label[1:]
val = family_stats.get(key, 0)
rows.append((label, val, format_ratio(val, total_attempts)))
return rows
def calculate_specialization_failure_kinds(name, family_stats, defines):
total_failures = family_stats.get("specialization.failure", 0)
failure_kinds = [ 0 ] * 40
for key in family_stats:
if not key.startswith("specialization.failure_kind"):
continue
_, index = key[:-1].split("[")
index = int(index)
failure_kinds[index] = family_stats[key]
failures = [(value, index) for (index, value) in enumerate(failure_kinds)]
failures.sort(reverse=True)
rows = []
for value, index in failures:
if not value:
continue
rows.append((kind_to_text(index, defines, name), value, format_ratio(value, total_failures)))
return rows
def print_specialization_stats(name, family_stats, defines):
if "specializable" not in family_stats:
return
total = sum(family_stats.get(kind, 0) for kind in TOTAL)
if total == 0:
return
with Section(name, 3, f"specialization stats for {name} family"):
rows = calculate_specialization_stats(family_stats, total)
emit_table(("Kind", "Count", "Ratio"), rows)
rows = calculate_specialization_success_failure(family_stats)
if rows:
print_title("Specialization attempts", 4)
emit_table(("", "Count:", "Ratio:"), rows)
rows = calculate_specialization_failure_kinds(name, family_stats, defines)
emit_table(("Failure kind", "Count:", "Ratio:"), rows)
def print_comparative_specialization_stats(name, base_family_stats, head_family_stats, defines):
if "specializable" not in base_family_stats:
return
base_total = sum(base_family_stats.get(kind, 0) for kind in TOTAL)
head_total = sum(head_family_stats.get(kind, 0) for kind in TOTAL)
if base_total + head_total == 0:
return
with Section(name, 3, f"specialization stats for {name} family"):
base_rows = calculate_specialization_stats(base_family_stats, base_total)
head_rows = calculate_specialization_stats(head_family_stats, head_total)
emit_table(
("Kind", "Base Count", "Base Ratio", "Head Count", "Head Ratio"),
join_rows(base_rows, head_rows)
)
base_rows = calculate_specialization_success_failure(base_family_stats)
head_rows = calculate_specialization_success_failure(head_family_stats)
rows = join_rows(base_rows, head_rows)
if rows:
print_title("Specialization attempts", 4)
emit_table(("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), rows)
base_rows = calculate_specialization_failure_kinds(name, base_family_stats, defines)
head_rows = calculate_specialization_failure_kinds(name, head_family_stats, defines)
emit_table(
("Failure kind", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
join_rows(base_rows, head_rows)
)
def gather_stats(input):
# Note the output of this function must be JSON-serializable
if os.path.isfile(input):
with open(input, "r") as fd:
return json.load(fd)
elif os.path.isdir(input):
stats = collections.Counter()
for filename in os.listdir(input):
with open(os.path.join(input, filename)) as fd:
for line in fd:
try:
key, value = line.split(":")
except ValueError:
print(f"Unparsable line: '{line.strip()}' in {filename}", file=sys.stderr)
continue
key = key.strip()
value = int(value)
stats[key] += value
stats['__nfiles__'] += 1
return stats
else:
raise ValueError(f"{input:r} is not a file or directory path")
def extract_opcode_stats(stats):
opcode_stats = [ {} for _ in range(256) ]
for key, value in stats.items():
if not key.startswith("opcode"):
continue
n, _, rest = key[7:].partition("]")
opcode_stats[int(n)][rest.strip(".")] = value
return opcode_stats
def parse_kinds(spec_src, prefix="SPEC_FAIL"):
defines = collections.defaultdict(list)
start = "#define " + prefix + "_"
for line in spec_src:
line = line.strip()
if not line.startswith(start):
continue
line = line[len(start):]
name, val = line.split()
defines[int(val.strip())].append(name.strip())
return defines
def pretty(defname):
return defname.replace("_", " ").lower()
def kind_to_text(kind, defines, opname):
if kind <= 8:
return pretty(defines[kind][0])
if opname == "LOAD_SUPER_ATTR":
opname = "SUPER"
elif opname.endswith("ATTR"):
opname = "ATTR"
elif opname in ("FOR_ITER", "SEND"):
opname = "ITER"
elif opname.endswith("SUBSCR"):
opname = "SUBSCR"
for name in defines[kind]:
if name.startswith(opname):
return pretty(name[len(opname)+1:])
return "kind " + str(kind)
def categorized_counts(opcode_stats):
basic = 0
specialized = 0
not_specialized = 0
specialized_instructions = {
op for op in opcode._specialized_instructions
if "__" not in op}
for i, opcode_stat in enumerate(opcode_stats):
if "execution_count" not in opcode_stat:
continue
count = opcode_stat['execution_count']
name = opname[i]
if "specializable" in opcode_stat:
not_specialized += count
elif name in specialized_instructions:
miss = opcode_stat.get("specialization.miss", 0)
not_specialized += miss
specialized += count - miss
else:
basic += count
return basic, not_specialized, specialized
def print_title(name, level=2):
print("#"*level, name)
print()
class Section:
def __init__(self, title, level=2, summary=None):
self.title = title
self.level = level
if summary is None:
self.summary = title.lower()
else:
self.summary = summary
def __enter__(self):
print_title(self.title, self.level)
print("<details>")
print("<summary>", self.summary, "</summary>")
print()
return self
def __exit__(*args):
print()
print("</details>")
print()
def to_str(x):
if isinstance(x, int):
return format(x, ",d")
else:
return str(x)
def emit_table(header, rows):
width = len(header)
header_line = "|"
under_line = "|"
for item in header:
under = "---"
if item.endswith(":"):
item = item[:-1]
under += ":"
header_line += item + " | "
under_line += under + "|"
print(header_line)
print(under_line)
for row in rows:
if width is not None and len(row) != width:
raise ValueError("Wrong number of elements in row '" + str(row) + "'")
print("|", " | ".join(to_str(i) for i in row), "|")
print()
def calculate_execution_counts(opcode_stats, total):
counts = []
for i, opcode_stat in enumerate(opcode_stats):
if "execution_count" in opcode_stat:
count = opcode_stat['execution_count']
miss = 0
if "specializable" not in opcode_stat:
miss = opcode_stat.get("specialization.miss")
counts.append((count, opname[i], miss))
counts.sort(reverse=True)
cumulative = 0
rows = []
for (count, name, miss) in counts:
cumulative += count
if miss:
miss = format_ratio(miss, count)
else:
miss = ""
rows.append((name, count, format_ratio(count, total),
format_ratio(cumulative, total), miss))
return rows
def emit_execution_counts(opcode_stats, total):
with Section("Execution counts", summary="execution counts for all instructions"):
rows = calculate_execution_counts(opcode_stats, total)
emit_table(
("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
rows
)
def emit_comparative_execution_counts(
base_opcode_stats, base_total, head_opcode_stats, head_total
):
with Section("Execution counts", summary="execution counts for all instructions"):
base_rows = calculate_execution_counts(base_opcode_stats, base_total)
head_rows = calculate_execution_counts(head_opcode_stats, head_total)
base_data = dict((x[0], x[1:]) for x in base_rows)
head_data = dict((x[0], x[1:]) for x in head_rows)
opcodes = set(base_data.keys()) | set(head_data.keys())
rows = []
default = [0, "0.0%", "0.0%", 0]
for opcode in opcodes:
base_entry = base_data.get(opcode, default)
head_entry = head_data.get(opcode, default)
if base_entry[0] == 0:
change = 1
else:
change = (head_entry[0] - base_entry[0]) / base_entry[0]
rows.append(
(opcode, base_entry[0], head_entry[0],
f"{100*change:0.1f}%"))
rows.sort(key=lambda x: -abs(float(x[-1][:-1])))
emit_table(
("Name", "Base Count:", "Head Count:", "Change:"),
rows
)
def get_defines():
spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
with open(spec_path) as spec_src:
defines = parse_kinds(spec_src)
return defines
def emit_specialization_stats(opcode_stats):
defines = get_defines()
with Section("Specialization stats", summary="specialization stats by family"):
for i, opcode_stat in enumerate(opcode_stats):
name = opname[i]
print_specialization_stats(name, opcode_stat, defines)
def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats):
defines = get_defines()
with Section("Specialization stats", summary="specialization stats by family"):
for i, (base_opcode_stat, head_opcode_stat) in enumerate(zip(base_opcode_stats, head_opcode_stats)):
name = opname[i]
print_comparative_specialization_stats(name, base_opcode_stat, head_opcode_stat, defines)
def calculate_specialization_effectiveness(opcode_stats, total):
basic, not_specialized, specialized = categorized_counts(opcode_stats)
return [
("Basic", basic, format_ratio(basic, total)),
("Not specialized", not_specialized, format_ratio(not_specialized, total)),
("Specialized", specialized, format_ratio(specialized, total)),
]
def emit_specialization_overview(opcode_stats, total):
with Section("Specialization effectiveness"):
rows = calculate_specialization_effectiveness(opcode_stats, total)
emit_table(("Instructions", "Count:", "Ratio:"), rows)
for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")):
total = 0
counts = []
for i, opcode_stat in enumerate(opcode_stats):
# Avoid double counting misses
if title == "Misses" and "specializable" in opcode_stat:
continue
value = opcode_stat.get(field, 0)
counts.append((value, opname[i]))
total += value
counts.sort(reverse=True)
if total:
with Section(f"{title} by instruction", 3):
rows = [ (name, count, format_ratio(count, total)) for (count, name) in counts[:10] ]
emit_table(("Name", "Count:", "Ratio:"), rows)
def emit_comparative_specialization_overview(base_opcode_stats, base_total, head_opcode_stats, head_total):
with Section("Specialization effectiveness"):
base_rows = calculate_specialization_effectiveness(base_opcode_stats, base_total)
head_rows = calculate_specialization_effectiveness(head_opcode_stats, head_total)
emit_table(
("Instructions", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
join_rows(base_rows, head_rows)
)
def get_stats_defines():
stats_path = os.path.join(os.path.dirname(__file__), "../../Include/pystats.h")
with open(stats_path) as stats_src:
defines = parse_kinds(stats_src, prefix="EVAL_CALL")
return defines
def calculate_call_stats(stats):
defines = get_stats_defines()
total = 0
for key, value in stats.items():
if "Calls to" in key:
total += value
rows = []
for key, value in stats.items():
if "Calls to" in key:
rows.append((key, value, format_ratio(value, total)))
elif key.startswith("Calls "):
name, index = key[:-1].split("[")
index = int(index)
label = name + " (" + pretty(defines[index][0]) + ")"
rows.append((label, value, format_ratio(value, total)))
for key, value in stats.items():
if key.startswith("Frame"):
rows.append((key, value, format_ratio(value, total)))
return rows
def emit_call_stats(stats):
with Section("Call stats", summary="Inlined calls and frame stats"):
rows = calculate_call_stats(stats)
emit_table(("", "Count:", "Ratio:"), rows)
def emit_comparative_call_stats(base_stats, head_stats):
with Section("Call stats", summary="Inlined calls and frame stats"):
base_rows = calculate_call_stats(base_stats)
head_rows = calculate_call_stats(head_stats)
rows = join_rows(base_rows, head_rows)
rows.sort(key=lambda x: -float(x[-1][:-1]))
emit_table(
("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
rows
)
def calculate_object_stats(stats):
total_materializations = stats.get("Object new values")
total_allocations = stats.get("Object allocations") + stats.get("Object allocations from freelist")
total_increfs = stats.get("Object interpreter increfs") + stats.get("Object increfs")
total_decrefs = stats.get("Object interpreter decrefs") + stats.get("Object decrefs")
rows = []
for key, value in stats.items():
if key.startswith("Object"):
if "materialize" in key:
ratio = format_ratio(value, total_materializations)
elif "allocations" in key:
ratio = format_ratio(value, total_allocations)
elif "increfs" in key:
ratio = format_ratio(value, total_increfs)
elif "decrefs" in key:
ratio = format_ratio(value, total_decrefs)
else:
ratio = ""
label = key[6:].strip()
label = label[0].upper() + label[1:]
rows.append((label, value, ratio))
return rows
def emit_object_stats(stats):
with Section("Object stats", summary="allocations, frees and dict materializatons"):
rows = calculate_object_stats(stats)
emit_table(("", "Count:", "Ratio:"), rows)
def emit_comparative_object_stats(base_stats, head_stats):
with Section("Object stats", summary="allocations, frees and dict materializatons"):
base_rows = calculate_object_stats(base_stats)
head_rows = calculate_object_stats(head_stats)
emit_table(("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), join_rows(base_rows, head_rows))
def get_total(opcode_stats):
total = 0
for opcode_stat in opcode_stats:
if "execution_count" in opcode_stat:
total += opcode_stat['execution_count']
return total
def emit_pair_counts(opcode_stats, total):
pair_counts = []
for i, opcode_stat in enumerate(opcode_stats):
if i == 0:
continue
for key, value in opcode_stat.items():
if key.startswith("pair_count"):
x, _, _ = key[11:].partition("]")
if value:
pair_counts.append((value, (i, int(x))))
with Section("Pair counts", summary="Pair counts for top 100 pairs"):
pair_counts.sort(reverse=True)
cumulative = 0
rows = []
for (count, pair) in itertools.islice(pair_counts, 100):
i, j = pair
cumulative += count
rows.append((opname[i] + " " + opname[j], count, format_ratio(count, total),
format_ratio(cumulative, total)))
emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
rows
)
with Section("Predecessor/Successor Pairs", summary="Top 5 predecessors and successors of each opcode"):
predecessors = collections.defaultdict(collections.Counter)
successors = collections.defaultdict(collections.Counter)
total_predecessors = collections.Counter()
total_successors = collections.Counter()
for count, (first, second) in pair_counts:
if count:
predecessors[second][first] = count
successors[first][second] = count
total_predecessors[second] += count
total_successors[first] += count
for name, i in opmap.items():
total1 = total_predecessors[i]
total2 = total_successors[i]
if total1 == 0 and total2 == 0:
continue
pred_rows = succ_rows = ()
if total1:
pred_rows = [(opname[pred], count, f"{count/total1:.1%}")
for (pred, count) in predecessors[i].most_common(5)]
if total2:
succ_rows = [(opname[succ], count, f"{count/total2:.1%}")
for (succ, count) in successors[i].most_common(5)]
with Section(name, 3, f"Successors and predecessors for {name}"):
emit_table(("Predecessors", "Count:", "Percentage:"),
pred_rows
)
emit_table(("Successors", "Count:", "Percentage:"),
succ_rows
)
def output_single_stats(stats):
opcode_stats = extract_opcode_stats(stats)
total = get_total(opcode_stats)
emit_execution_counts(opcode_stats, total)
emit_pair_counts(opcode_stats, total)
emit_specialization_stats(opcode_stats)
emit_specialization_overview(opcode_stats, total)
emit_call_stats(stats)
emit_object_stats(stats)
with Section("Meta stats", summary="Meta statistics"):
emit_table(("", "Count:"), [('Number of data files', stats['__nfiles__'])])
def output_comparative_stats(base_stats, head_stats):
base_opcode_stats = extract_opcode_stats(base_stats)
base_total = get_total(base_opcode_stats)
head_opcode_stats = extract_opcode_stats(head_stats)
head_total = get_total(head_opcode_stats)
emit_comparative_execution_counts(
base_opcode_stats, base_total, head_opcode_stats, head_total
)
emit_comparative_specialization_stats(
base_opcode_stats, head_opcode_stats
)
emit_comparative_specialization_overview(
base_opcode_stats, base_total, head_opcode_stats, head_total
)
emit_comparative_call_stats(base_stats, head_stats)
emit_comparative_object_stats(base_stats, head_stats)
def output_stats(inputs, json_output=None):
if len(inputs) == 1:
stats = gather_stats(inputs[0])
if json_output is not None:
json.dump(stats, json_output)
output_single_stats(stats)
elif len(inputs) == 2:
if json_output is not None:
raise ValueError(
"Can not output to JSON when there are multiple inputs"
)
base_stats = gather_stats(inputs[0])
head_stats = gather_stats(inputs[1])
output_comparative_stats(base_stats, head_stats)
print("---")
print("Stats gathered on:", date.today())
def main():
parser = argparse.ArgumentParser(description="Summarize pystats results")
parser.add_argument(
"inputs",
nargs="*",
type=str,
default=[DEFAULT_DIR],
help=f"""
Input source(s).
For each entry, if a .json file, the output provided by --json-output from a previous run;
if a directory, a directory containing raw pystats .txt files.
If one source is provided, its stats are printed.
If two sources are provided, comparative stats are printed.
Default is {DEFAULT_DIR}.
"""
)
parser.add_argument(
"--json-output",
nargs="?",
type=argparse.FileType("w"),
help="Output complete raw results to the given JSON file."
)
args = parser.parse_args()
if len(args.inputs) > 2:
raise ValueError("0-2 arguments may be provided.")
output_stats(args.inputs, json_output=args.json_output)
if __name__ == "__main__":
main()

View File

@@ -1,297 +0,0 @@
'Show relative speeds of local, nonlocal, global, and built-in access.'
# Please leave this code so that it runs under older versions of
# Python 3 (no f-strings). That will allow benchmarking for
# cross-version comparisons. To run the benchmark on Python 2,
# comment-out the nonlocal reads and writes.
from collections import deque, namedtuple
trials = [None] * 500
steps_per_trial = 25
class A(object):
def m(self):
pass
class B(object):
__slots__ = 'x'
def __init__(self, x):
self.x = x
class C(object):
def __init__(self, x):
self.x = x
def read_local(trials=trials):
v_local = 1
for t in trials:
v_local; v_local; v_local; v_local; v_local
v_local; v_local; v_local; v_local; v_local
v_local; v_local; v_local; v_local; v_local
v_local; v_local; v_local; v_local; v_local
v_local; v_local; v_local; v_local; v_local
def make_nonlocal_reader():
v_nonlocal = 1
def inner(trials=trials):
for t in trials:
v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
inner.__name__ = 'read_nonlocal'
return inner
read_nonlocal = make_nonlocal_reader()
v_global = 1
def read_global(trials=trials):
for t in trials:
v_global; v_global; v_global; v_global; v_global
v_global; v_global; v_global; v_global; v_global
v_global; v_global; v_global; v_global; v_global
v_global; v_global; v_global; v_global; v_global
v_global; v_global; v_global; v_global; v_global
def read_builtin(trials=trials):
for t in trials:
oct; oct; oct; oct; oct
oct; oct; oct; oct; oct
oct; oct; oct; oct; oct
oct; oct; oct; oct; oct
oct; oct; oct; oct; oct
def read_classvar_from_class(trials=trials, A=A):
A.x = 1
for t in trials:
A.x; A.x; A.x; A.x; A.x
A.x; A.x; A.x; A.x; A.x
A.x; A.x; A.x; A.x; A.x
A.x; A.x; A.x; A.x; A.x
A.x; A.x; A.x; A.x; A.x
def read_classvar_from_instance(trials=trials, A=A):
A.x = 1
a = A()
for t in trials:
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
def read_instancevar(trials=trials, a=C(1)):
for t in trials:
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
def read_instancevar_slots(trials=trials, a=B(1)):
for t in trials:
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
def read_namedtuple(trials=trials, D=namedtuple('D', ['x'])):
a = D(1)
for t in trials:
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
def read_boundmethod(trials=trials, a=A()):
for t in trials:
a.m; a.m; a.m; a.m; a.m
a.m; a.m; a.m; a.m; a.m
a.m; a.m; a.m; a.m; a.m
a.m; a.m; a.m; a.m; a.m
a.m; a.m; a.m; a.m; a.m
def write_local(trials=trials):
v_local = 1
for t in trials:
v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
def make_nonlocal_writer():
v_nonlocal = 1
def inner(trials=trials):
nonlocal v_nonlocal
for t in trials:
v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
inner.__name__ = 'write_nonlocal'
return inner
write_nonlocal = make_nonlocal_writer()
def write_global(trials=trials):
global v_global
for t in trials:
v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
def write_classvar(trials=trials, A=A):
for t in trials:
A.x = 1; A.x = 1; A.x = 1; A.x = 1; A.x = 1
A.x = 1; A.x = 1; A.x = 1; A.x = 1; A.x = 1
A.x = 1; A.x = 1; A.x = 1; A.x = 1; A.x = 1
A.x = 1; A.x = 1; A.x = 1; A.x = 1; A.x = 1
A.x = 1; A.x = 1; A.x = 1; A.x = 1; A.x = 1
def write_instancevar(trials=trials, a=C(1)):
for t in trials:
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
def write_instancevar_slots(trials=trials, a=B(1)):
for t in trials:
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
def read_list(trials=trials, a=[1]):
for t in trials:
a[0]; a[0]; a[0]; a[0]; a[0]
a[0]; a[0]; a[0]; a[0]; a[0]
a[0]; a[0]; a[0]; a[0]; a[0]
a[0]; a[0]; a[0]; a[0]; a[0]
a[0]; a[0]; a[0]; a[0]; a[0]
def read_deque(trials=trials, a=deque([1])):
for t in trials:
a[0]; a[0]; a[0]; a[0]; a[0]
a[0]; a[0]; a[0]; a[0]; a[0]
a[0]; a[0]; a[0]; a[0]; a[0]
a[0]; a[0]; a[0]; a[0]; a[0]
a[0]; a[0]; a[0]; a[0]; a[0]
def read_dict(trials=trials, a={0: 1}):
for t in trials:
a[0]; a[0]; a[0]; a[0]; a[0]
a[0]; a[0]; a[0]; a[0]; a[0]
a[0]; a[0]; a[0]; a[0]; a[0]
a[0]; a[0]; a[0]; a[0]; a[0]
a[0]; a[0]; a[0]; a[0]; a[0]
def read_strdict(trials=trials, a={'key': 1}):
for t in trials:
a['key']; a['key']; a['key']; a['key']; a['key']
a['key']; a['key']; a['key']; a['key']; a['key']
a['key']; a['key']; a['key']; a['key']; a['key']
a['key']; a['key']; a['key']; a['key']; a['key']
a['key']; a['key']; a['key']; a['key']; a['key']
def list_append_pop(trials=trials, a=[1]):
ap, pop = a.append, a.pop
for t in trials:
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop()
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop()
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop()
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop()
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop()
def deque_append_pop(trials=trials, a=deque([1])):
ap, pop = a.append, a.pop
for t in trials:
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop()
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop()
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop()
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop()
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop()
def deque_append_popleft(trials=trials, a=deque([1])):
ap, pop = a.append, a.popleft
for t in trials:
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop();
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop();
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop();
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop();
ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop(); ap(1); pop();
def write_list(trials=trials, a=[1]):
for t in trials:
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
def write_deque(trials=trials, a=deque([1])):
for t in trials:
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
def write_dict(trials=trials, a={0: 1}):
for t in trials:
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
a[0]=1; a[0]=1; a[0]=1; a[0]=1; a[0]=1
def write_strdict(trials=trials, a={'key': 1}):
for t in trials:
a['key']=1; a['key']=1; a['key']=1; a['key']=1; a['key']=1
a['key']=1; a['key']=1; a['key']=1; a['key']=1; a['key']=1
a['key']=1; a['key']=1; a['key']=1; a['key']=1; a['key']=1
a['key']=1; a['key']=1; a['key']=1; a['key']=1; a['key']=1
a['key']=1; a['key']=1; a['key']=1; a['key']=1; a['key']=1
def loop_overhead(trials=trials):
for t in trials:
pass
if __name__=='__main__':
from timeit import Timer
for f in [
'Variable and attribute read access:',
read_local, read_nonlocal, read_global, read_builtin,
read_classvar_from_class, read_classvar_from_instance,
read_instancevar, read_instancevar_slots,
read_namedtuple, read_boundmethod,
'\nVariable and attribute write access:',
write_local, write_nonlocal, write_global,
write_classvar, write_instancevar, write_instancevar_slots,
'\nData structure read access:',
read_list, read_deque, read_dict, read_strdict,
'\nData structure write access:',
write_list, write_deque, write_dict, write_strdict,
'\nStack (or queue) operations:',
list_append_pop, deque_append_pop, deque_append_popleft,
'\nTiming loop overhead:',
loop_overhead]:
if isinstance(f, str):
print(f)
continue
timing = min(Timer(f).repeat(7, 1000))
timing *= 1000000 / (len(trials) * steps_per_trial)
print('{:6.1f} ns\t{}'.format(timing, f.__name__))