211 lines
6.8 KiB
Python
211 lines
6.8 KiB
Python
import os
|
|
import shutil
|
|
import sys
|
|
import tempfile
|
|
from contextlib import contextmanager
|
|
from distutils import log
|
|
from distutils.errors import CompileError, LinkError
|
|
|
|
__all__ = ["CCompilerCapabilities"]
|
|
|
|
|
|
@contextmanager
|
|
def chdir(path):
|
|
cwd = os.getcwd()
|
|
os.chdir(path)
|
|
try:
|
|
yield
|
|
finally:
|
|
os.chdir(cwd)
|
|
|
|
|
|
@contextmanager
|
|
def mkdtemp(suffix):
|
|
path = tempfile.mkdtemp(suffix)
|
|
try:
|
|
with chdir(path):
|
|
yield path
|
|
finally:
|
|
shutil.rmtree(path)
|
|
|
|
|
|
@contextmanager
|
|
def output(is_quiet):
|
|
if is_quiet: # pragma: no branch
|
|
devnull = open(os.devnull, "w")
|
|
oldstderr = os.dup(sys.stderr.fileno())
|
|
oldstdout = os.dup(sys.stdout.fileno())
|
|
os.dup2(devnull.fileno(), sys.stderr.fileno())
|
|
os.dup2(devnull.fileno(), sys.stdout.fileno())
|
|
try:
|
|
yield
|
|
finally:
|
|
if is_quiet: # pragma: no branch
|
|
os.dup2(oldstderr, sys.stderr.fileno())
|
|
os.dup2(oldstdout, sys.stdout.fileno())
|
|
devnull.close()
|
|
|
|
|
|
class CCompilerCapabilities:
|
|
SIMD_SSSE3 = 0
|
|
SIMD_SSE41 = 1
|
|
SIMD_SSE42 = 2
|
|
SIMD_AVX = 3
|
|
SIMD_AVX2 = 4
|
|
SIMD_NEON32 = 5
|
|
SIMD_NEON64 = 6
|
|
|
|
def __init__(self, compiler):
|
|
self.__capabilities = {}
|
|
self.__cflags = []
|
|
if compiler.compiler_type == "msvc":
|
|
self.__cflags = ["/WX", "/Od"] # pragma: no cover
|
|
else:
|
|
self.__cflags = ["-O0"]
|
|
self.__get_capabilities(compiler)
|
|
|
|
def __has_simd_support(self, compiler, flags, define, include, content):
|
|
quiet = True
|
|
|
|
with mkdtemp("pybase64simdtest") as dname:
|
|
fname = os.path.join(dname, "simd.c")
|
|
with open(fname, "w") as f:
|
|
f.write("""#include <%s>\n""" % include)
|
|
f.write(
|
|
"""\
|
|
int main (int argc, char **argv) {
|
|
%s
|
|
}
|
|
"""
|
|
% content
|
|
)
|
|
with output(quiet):
|
|
for flag in flags:
|
|
lflags = []
|
|
if not len(flag) == 0:
|
|
lflags = [flag]
|
|
try:
|
|
objects = compiler.compile(
|
|
["simd.c"],
|
|
output_dir=dname,
|
|
extra_postargs=lflags + self.__cflags,
|
|
)
|
|
except CompileError:
|
|
continue
|
|
try:
|
|
compiler.link_shared_lib(objects, "a.out", output_dir=dname)
|
|
except (LinkError, TypeError): # pragma: no cover
|
|
continue # pragma: no cover
|
|
return {"support": True, "flags": lflags + define}
|
|
return {"support": False, "flags": []}
|
|
|
|
def __get_capabilities(self, compiler):
|
|
log.info("getting compiler simd support")
|
|
self.__capabilities[CCompilerCapabilities.SIMD_SSSE3] = self.__has_simd_support(
|
|
compiler,
|
|
["", "-mssse3"],
|
|
["-D__SSSE3__"],
|
|
"tmmintrin.h",
|
|
"__m128i t = _mm_loadu_si128((const __m128i*)argv[0]);"
|
|
"t = _mm_shuffle_epi8(t, t);"
|
|
"return _mm_cvtsi128_si32(t);",
|
|
)
|
|
log.info(
|
|
"SSSE3: %s"
|
|
% str(self.__capabilities[CCompilerCapabilities.SIMD_SSSE3]["support"])
|
|
)
|
|
self.__capabilities[CCompilerCapabilities.SIMD_SSE41] = self.__has_simd_support(
|
|
compiler,
|
|
["", "-msse4.1"],
|
|
["-D__SSE4_1__"],
|
|
"smmintrin.h",
|
|
"__m128i t = _mm_loadu_si128((const __m128i*)argv[0]);"
|
|
"t = _mm_mpsadbw_epu8(t, t, 1);"
|
|
"return _mm_cvtsi128_si32(t);",
|
|
)
|
|
log.info(
|
|
"SSE41: %s"
|
|
% str(self.__capabilities[CCompilerCapabilities.SIMD_SSE41]["support"])
|
|
)
|
|
self.__capabilities[CCompilerCapabilities.SIMD_SSE42] = self.__has_simd_support(
|
|
compiler,
|
|
["", "-msse4.2"],
|
|
["-D__SSE4_2__"],
|
|
"nmmintrin.h",
|
|
"__m128i t = _mm_loadu_si128((const __m128i*)argv[0]);"
|
|
"return _mm_cmpistra(t, t, 0);",
|
|
)
|
|
log.info(
|
|
"SSE42: %s"
|
|
% str(self.__capabilities[CCompilerCapabilities.SIMD_SSE42]["support"])
|
|
)
|
|
self.__capabilities[CCompilerCapabilities.SIMD_AVX] = self.__has_simd_support(
|
|
compiler,
|
|
["", "-mavx", "/arch:AVX"],
|
|
["-D__AVX__"],
|
|
"immintrin.h",
|
|
"__m256i y = _mm256_loadu_si256((const __m256i*)argv[0]);"
|
|
"return _mm256_testz_si256(y, y);",
|
|
)
|
|
log.info(
|
|
"AVX: %s"
|
|
% str(self.__capabilities[CCompilerCapabilities.SIMD_AVX]["support"])
|
|
)
|
|
self.__capabilities[CCompilerCapabilities.SIMD_AVX2] = self.__has_simd_support(
|
|
compiler,
|
|
["", "-mavx2", "/arch:AVX2"],
|
|
["-D__AVX2__"],
|
|
"immintrin.h",
|
|
"__m256i y = _mm256_loadu_si256((const __m256i*)argv[0]);"
|
|
"y = _mm256_i32gather_epi32((int const*)argv[1], y, 2);"
|
|
"return _mm_cvtsi128_si32(_mm256_castsi256_si128(y));",
|
|
)
|
|
log.info(
|
|
"AVX2: %s"
|
|
% str(self.__capabilities[CCompilerCapabilities.SIMD_AVX2]["support"])
|
|
)
|
|
self.__capabilities[
|
|
CCompilerCapabilities.SIMD_NEON64
|
|
] = self.__has_simd_support(
|
|
compiler,
|
|
[""],
|
|
[],
|
|
"arm_neon.h",
|
|
"uint8x16_t t = vdupq_n_u8(1);"
|
|
"uint8x16x4_t t4 = {"
|
|
" .val[0]=t, .val[1]=t, .val[2]=t, .val[3]=t};"
|
|
"uint8x16_t o = vqtbx4q_u8(t, t4, t);"
|
|
"return vgetq_lane_s32(vreinterpretq_s32_u8(o), 0);",
|
|
)
|
|
log.info(
|
|
"NEON64: %s"
|
|
% str(self.__capabilities[CCompilerCapabilities.SIMD_NEON64]["support"])
|
|
)
|
|
self.__capabilities[
|
|
CCompilerCapabilities.SIMD_NEON32
|
|
] = self.__has_simd_support(
|
|
compiler,
|
|
[""],
|
|
[],
|
|
"arm_neon.h",
|
|
"uint8x16_t t = vdupq_n_u8(1);"
|
|
"return vgetq_lane_s32(vreinterpretq_s32_u8(t), 0);",
|
|
)
|
|
self.__capabilities[CCompilerCapabilities.SIMD_NEON32][
|
|
"support"
|
|
] &= not self.__capabilities[CCompilerCapabilities.SIMD_NEON64]["support"]
|
|
log.info(
|
|
"NEON32: %s"
|
|
% str(self.__capabilities[CCompilerCapabilities.SIMD_NEON32]["support"])
|
|
)
|
|
|
|
def has(self, what):
|
|
if what not in self.__capabilities:
|
|
return False
|
|
return self.__capabilities[what]["support"]
|
|
|
|
def flags(self, what):
|
|
if not self.has(what): # pragma: no branch
|
|
return self.__capabilities[666]["flags"] # pragma: no cover
|
|
return self.__capabilities[what]["flags"]
|