blob: 71c298b056d47904579b8441cfaf2958c6e9b3d4 [file]
# Copyright 2013 The Emscripten Authors. All rights reserved.
# Emscripten is available under two separate licenses, the MIT license and the
# University of Illinois/NCSA Open Source License. Both these licenses can be
# found in the LICENSE file.
import json
import math
import os
import re
import shutil
import subprocess
import sys
import time
import unittest
import zlib
from abc import ABC, abstractmethod
from pathlib import Path
if __name__ == '__main__':
raise Exception('do not run this file directly; do something like: test/runner.py benchmark')
import clang_native
import common
import jsrun
from common import copy_asset, read_binary, read_file, test_file
from decorators import needs_make, parameterized
from tools import utils
from tools.shared import CLANG_CC, CLANG_CXX, EMCC, PIPE, config
from tools.utils import run_process
# standard arguments for timing:
# 0: no runtime, just startup
# 1: very little runtime
# 2: 0.5 seconds
# 3: 1 second
# 4: 5 seconds
# 5: 10 seconds
DEFAULT_ARG = '4'
EMTEST_REPS = int(os.environ.get('EMTEST_REPS', '5'))
# by default, run just core benchmarks
CORE_BENCHMARKS = True
# if a specific benchmark is requested, don't limit to core
if 'benchmark.' in str(sys.argv):
CORE_BENCHMARKS = False
non_core = unittest.skipIf(CORE_BENCHMARKS, "only running core benchmarks")
OPTIMIZATIONS = '-O3'
PROFILING = 0
LLVM_FEATURE_FLAGS = ['-mnontrapping-fptoint']
# A comma separated list of benchmarkers to run during test_benchmark tests. See
# `named_benchmarkers` for what is available.
EMTEST_BENCHMARKERS = os.getenv('EMTEST_BENCHMARKERS', 'clang,v8,v8-lto,v8-ctors')
class Benchmarker(ABC):
# Whether to record statistics. Set by SizeBenchmarker.
record_stats = False
# called when we init the object, which is during startup, even if we are
# not running benchmarks
def __init__(self, name):
self.name = name
@abstractmethod
def run(self, args):
pass
@abstractmethod
def build(self, parent, filename, shared_args, emcc_args, native_args, native_exec, lib_builder):
pass
@abstractmethod
def get_output_files(self):
pass
def bench(self, args, reps=EMTEST_REPS, output_parser=None, expected_output=None):
self.times = []
for _ in range(reps):
start = time.time()
output = self.run(args)
if expected_output is not None and expected_output not in output:
raise ValueError('Incorrect benchmark output:\n' + output)
if not output_parser or args == ['0']: # if arg is 0, we are not running code, and have no output to parse
curr = time.time() - start
else:
try:
curr = output_parser(output)
except Exception as e:
print(str(e))
print('Parsing benchmark results failed, output was: ' + output)
self.times.append(curr)
def display(self, baseline=None):
# speed
if self.times:
if baseline == self:
baseline = None
mean = sum(self.times) / len(self.times)
squared_times = [x * x for x in self.times]
mean_of_squared = sum(squared_times) / len(self.times)
std = math.sqrt(mean_of_squared - mean * mean)
sorted_times = sorted(self.times)
count = len(sorted_times)
if count % 2 == 0:
median = sum(sorted_times[count // 2 - 1:count // 2 + 1]) / 2
else:
median = sorted_times[count // 2]
print(' %10s: mean: %4.3f (+-%4.3f) secs median: %4.3f range: %4.3f-%4.3f (noise: %4.3f%%) (%d runs)' % (self.name, mean, std, median, min(self.times), max(self.times), 100 * std / mean, len(self.times)), end=' ')
if baseline:
mean_baseline = sum(baseline.times) / len(baseline.times)
final = mean / mean_baseline
print(' Relative: %.2f X slower' % final)
else:
print(' Relative: No baseline recorded yet')
# size
recorded_stats = []
def add_stat(name, size, gzip_size):
recorded_stats.append({
'value': name,
'measurement': size,
})
recorded_stats.append({
'value': name + ' (gzipped)',
'measurement': gzip_size,
})
total_size = 0
total_gzip_size = 0
files = self.get_output_files()
if files:
for file in files:
size = os.path.getsize(file)
gzip_size = len(zlib.compress(read_binary(file)))
if self.record_stats:
add_stat(os.path.basename(file).removeprefix('size_'), size, gzip_size)
total_size += size
total_gzip_size += gzip_size
if self.record_stats:
add_stat('total', total_size, total_gzip_size)
print(' size: %8s, compressed: %8s' % (total_size, total_gzip_size), end=' ')
if self.get_size_text():
print(' (' + self.get_size_text() + ')', end=' ')
print()
return recorded_stats
def get_size_text(self):
return ''
class ToolchainBenchmarker(Benchmarker):
"""ToolchainBenchmarker performs the compile step during run.
It measures the perf of the compiler rather than the generated code.
Some simple tests will just work with these benchmarkers but more complex ones will not because
the arguments to `build` are all ignored.
"""
def __init__(self, name, command):
super().__init__(name)
self.command = command
def run(self, args):
return run_process(self.command + args, stdout=PIPE, stderr=subprocess.STDOUT, check=False).stdout
def get_output_files(self):
return []
def build(self, parent, filename, shared_args, emcc_args, native_args, native_exec, lib_builder):
# no-op
pass
class NativeBenchmarker(Benchmarker):
def __init__(self, name, cc, cxx, cflags=None):
super().__init__(name)
self.cc = cc
self.cxx = cxx
self.cflags = cflags or [OPTIMIZATIONS]
def build(self, parent, filename, shared_args, emcc_args, native_args, native_exec, lib_builder):
native_args = native_args or []
shared_args = shared_args or []
self.parent = parent
if lib_builder:
env = {'CC': self.cc, 'CXX': self.cxx, 'CXXFLAGS': '-Wno-c++11-narrowing'}
env.update(clang_native.get_clang_native_env())
# Avoid mutating incoming native_args list
native_args = native_args.copy()
native_args += lib_builder(self.name, native=True, env_init=env)
if not native_exec:
compiler = self.cxx if filename.endswith('cpp') else self.cc
cmd = compiler + [
'-fno-math-errno',
filename,
'-o', filename + '.native',
] + self.cflags + shared_args + native_args + clang_native.get_clang_native_args()
# print(cmd)
run_process(cmd, env=clang_native.get_clang_native_env())
else:
shutil.copyfile(native_exec, filename + '.native')
shutil.copymode(native_exec, filename + '.native')
final = os.path.dirname(filename) + os.path.sep + self.name + '_' + os.path.basename(filename) + '.native'
shutil.move(filename + '.native', final)
self.filename = final
def run(self, args):
return run_process([self.filename] + args, stdout=PIPE, stderr=subprocess.STDOUT, check=False).stdout
def get_output_files(self):
return [self.filename]
def get_size_text(self):
return 'dynamically linked - libc etc. are not included!'
class EmscriptenBenchmarker(Benchmarker):
def __init__(self, name, engine, cflags=None, env=None):
super().__init__(name)
self.engine = engine
self.cflags = cflags or []
self.env = os.environ.copy()
if env:
self.env.update(env)
def build(self, parent, filename, shared_args, emcc_args, native_args, native_exec, lib_builder):
emcc_args = emcc_args or []
self.filename = filename
llvm_root = self.env.get('LLVM') or config.LLVM_ROOT
if lib_builder:
env_init = self.env.copy()
# Note that we need to pass in all the flags here because some build
# systems (like zlib) if they see a CFLAGS it will override all their
# default flags, including optimizations.
env_init['CFLAGS'] = ' '.join(LLVM_FEATURE_FLAGS + [OPTIMIZATIONS] + self.cflags)
# Avoid mutating incoming emcc_args
emcc_args = emcc_args.copy()
emcc_args += lib_builder('js_' + llvm_root, native=False, env_init=env_init)
final = os.path.dirname(filename) + os.path.sep + self.name + ('_' if self.name else '') + os.path.basename(filename) + '.js'
final = final.replace('.cpp', '')
utils.delete_file(final)
cmd = [
EMCC, filename,
OPTIMIZATIONS,
'-sINITIAL_MEMORY=256MB',
'-sENVIRONMENT=node,shell',
'-o', final,
] + LLVM_FEATURE_FLAGS
if shared_args:
cmd += shared_args
if PROFILING:
cmd += ['--profiling']
else:
cmd += ['--closure=1', '-sMINIMAL_RUNTIME']
# add additional emcc args at the end, which may override other things
# above, such as minimal runtime
cmd += emcc_args + self.cflags
if '-sFILESYSTEM' not in cmd and '-sFORCE_FILESYSTEM' not in cmd:
cmd += ['-sFILESYSTEM=0']
self.cmd = cmd
run_process(cmd, env=self.env)
self.filename = final
def run(self, args):
return jsrun.run_js(self.filename, engine=self.engine, args=args, stderr=subprocess.STDOUT)
def get_output_files(self):
ret = [self.filename]
if 'WASM=0' in self.cmd:
if 'MINIMAL_RUNTIME=0' not in self.cmd:
ret.append(utils.replace_suffix(self.filename, '.asm.js'))
ret.append(utils.replace_suffix(self.filename, '.mem'))
else:
ret.append(self.filename + '.mem')
else:
ret.append(utils.replace_suffix(self.filename, '.wasm'))
return ret
# This benchmarker will make a test benchmark build with Emscripten and record
# the file output sizes in out/test/stats.json. The file format is specified at
# https://skia.googlesource.com/buildbot/+/refs/heads/main/perf/FORMAT.md
# Running the benchmark will be skipped.
class SizeBenchmarker(EmscriptenBenchmarker):
record_stats = True
def __init__(self, name):
# do not set an engine, as we will not run the code
super().__init__(name, engine=None)
# we will not actually run the benchmarks
run = None
CHEERP_BIN = '/opt/cheerp/bin/'
class CheerpBenchmarker(Benchmarker):
def __init__(self, name, engine, cflags=None):
super().__init__(name)
self.engine = engine
self.cflags = cflags or [OPTIMIZATIONS]
def build(self, parent, filename, shared_args, emcc_args, native_args, native_exec, lib_builder):
cheerp_args = [
'-fno-math-errno',
]
cheerp_args += self.cflags
self.parent = parent
if lib_builder:
# build as "native" (so no emcc env stuff), but with all the cheerp stuff
# set in the env
cheerp_args += lib_builder(self.name, native=True, env_init={
'CC': CHEERP_BIN + 'clang',
'CXX': CHEERP_BIN + 'clang++',
'AR': CHEERP_BIN + '../libexec/cheerp-unknown-none-ar',
'LD': CHEERP_BIN + 'clang',
'NM': CHEERP_BIN + 'llvm-nm',
'LDSHARED': CHEERP_BIN + 'clang',
'RANLIB': CHEERP_BIN + '../libexec/cheerp-unknown-none-ranlib',
'CXXFLAGS': '-Wno-c++11-narrowing',
'CHEERP_PREFIX': CHEERP_BIN + '../',
})
if PROFILING:
cheerp_args += ['-cheerp-pretty-code'] # get function names, like emcc --profiling
final = os.path.dirname(filename) + os.path.sep + self.name + ('_' if self.name else '') + os.path.basename(filename) + '.js'
final = final.replace('.cpp', '')
utils.delete_file(final)
dirs_to_delete = []
cheerp_args += ['-cheerp-preexecute']
try:
# print(cheerp_args)
if filename.endswith('.c'):
compiler = CHEERP_BIN + '/clang'
else:
compiler = CHEERP_BIN + '/clang++'
cmd = [compiler] + cheerp_args + [
'-cheerp-linear-heap-size=256',
'-cheerp-secondary-output-file=' + final.replace('.js', '.wasm'),
filename,
'-o', final,
] + shared_args
# print(' '.join(cmd))
run_process(cmd, stdout=PIPE, stderr=PIPE)
self.filename = final
finally:
for dir_ in dirs_to_delete:
utils.delete_dir(dir_)
def run(self, args):
return jsrun.run_js(self.filename, engine=self.engine, args=args, stderr=subprocess.STDOUT)
def get_output_files(self):
return [self.filename, utils.replace_suffix(self.filename, '.wasm')]
# Benchmarkers
benchmarkers: list[Benchmarker] = []
# avoid the baseline compiler running, because it adds a lot of noise
# (the nondeterministic time it takes to get to the full compiler ends up
# mattering as much as the actual benchmark)
aot_v8 = (config.V8_ENGINE if config.V8_ENGINE else []) + ['--no-liftoff']
named_benchmarkers = {
'clang': NativeBenchmarker('clang', [CLANG_CC], [CLANG_CXX]),
'gcc': NativeBenchmarker('gcc', ['gcc', '-no-pie'], ['g++', '-no-pie']),
'size': SizeBenchmarker('size'),
'v8': EmscriptenBenchmarker('v8', aot_v8),
'v8-lto': EmscriptenBenchmarker('v8-lto', aot_v8, ['-flto']),
'v8-ctors': EmscriptenBenchmarker('v8-ctors', aot_v8, ['-sEVAL_CTORS']),
'v8-64': EmscriptenBenchmarker('v8-64', aot_v8, ['-sMEMORY64=2']),
'node': EmscriptenBenchmarker('node', config.NODE_JS_TEST),
'node-64': EmscriptenBenchmarker('node-64', config.NODE_JS_TEST, ['-sMEMORY64=2']),
'cherp-v8': CheerpBenchmarker('cheerp-v8-wasm', aot_v8),
# TODO: ensure no baseline compiler is used, see v8
'sm': EmscriptenBenchmarker('sm', config.SPIDERMONKEY_ENGINE),
'cherp-sm': CheerpBenchmarker('cheerp-sm-wasm', config.SPIDERMONKEY_ENGINE),
'clang-build': ToolchainBenchmarker('clang', [CLANG_CC]),
'emcc-build': ToolchainBenchmarker('emcc', [EMCC]),
}
for name in EMTEST_BENCHMARKERS.split(','):
if name not in named_benchmarkers:
raise Exception('error, unknown benchmarker ' + name)
benchmarkers.append(named_benchmarkers[name])
class benchmark(common.RunnerCore):
save_dir = True
stats = [] # type: ignore
@classmethod
def setUpClass(cls):
super().setUpClass()
fingerprint = ['including compilation', time.asctime()]
try:
fingerprint.append('em: ' + run_process(['git', 'show'], stdout=PIPE).stdout.splitlines()[0])
except Exception:
pass
try:
with common.chdir(os.path.expanduser('~/Dev/mozilla-central')):
fingerprint.append('sm: ' + [line for line in run_process(['hg', 'tip'], stdout=PIPE).stdout.splitlines() if 'changeset' in line][0])
except Exception:
pass
fingerprint.append('llvm: ' + config.LLVM_ROOT)
print('Running Emscripten benchmarks... [ %s ]' % ' | '.join(fingerprint))
@classmethod
def tearDownClass(cls):
super().tearDownClass()
if cls.stats:
output = {
'version': 1,
'git_hash': '',
'results': cls.stats,
}
utils.write_file('stats.json', json.dumps(output, indent=2) + '\n')
def hardcode_arguments(self, code, args):
"""Avoid depending on argument reception from the commandline, where possible.
Here we take the command line arguments and embed them directly into `main` function.
If we cannot find a `main` function, or if we have more than one argument, we
do not do any embedding, and the resulting test will depend on arguments being
passed via argv (which works in most environments).
"""
if not code or 'int main()' in code:
return code
# We only know how to embed a single argument
if len(args) != 1:
return code
main_pattern = 'int main(int argc, char **argv)'
assert main_pattern in code
code = code.replace(main_pattern, 'int benchmark_main(int argc, char **argv)')
code += '''
int main() {
int newArgc = 2;
char* newArgv[] = { (char*)"./program.exe", (char*)"%s" };
int ret = benchmark_main(newArgc, newArgv);
return ret;
}
''' % args[0]
return code
def do_benchmark(self, name, src, expected_output='FAIL', args=None,
emcc_args=None, native_args=None, shared_args=None,
force_c=False, reps=EMTEST_REPS, native_exec=None,
output_parser=None, lib_builder=None,
skip_benchmarkers=None):
if not benchmarkers:
raise Exception('error, no benchmarkers')
args = args or [DEFAULT_ARG]
dirname = self.get_dir()
filename = os.path.join(dirname, name + '.c' + ('' if force_c else 'pp'))
src = self.hardcode_arguments(src, args)
utils.write_file(filename, src)
print()
baseline = None
for b in benchmarkers:
if skip_benchmarkers and b.name in skip_benchmarkers:
continue
if not b.run:
# If we won't run the benchmark, we don't need repetitions.
reps = 0
print('Running benchmarker: %s: %s' % (b.__class__.__name__, b.name))
b.build(self, filename, shared_args, emcc_args, native_args, native_exec, lib_builder)
b.bench(args, reps, output_parser, expected_output)
recorded_stats = b.display(baseline)
if recorded_stats:
self.add_stats(name, recorded_stats)
if not baseline:
# Use the first benchmarker as the baseline. Other benchmarkers can then
# report relative performance compared to this.
baseline = b
def add_stats(self, name, stats):
self.stats.append({
'key': {
'test': name,
'units': 'bytes',
},
'measurements': {
'stats': stats,
},
})
@parameterized({
'': (False,),
# Also interesting to test it without the printfs which allow checking the output. Without
# printf, code size is dominated by the runtime itself (the compiled code is just a few lines).
'nocheck': (True,),
})
def test_primes(self, check):
src = r'''
#include <stdio.h>
#include <math.h>
int main(int argc, char **argv) {
int arg = argc > 1 ? argv[1][0] - '0' : 3;
switch(arg) {
case 0: return 0; break;
case 1: arg = 33000; break;
case 2: arg = 130000; break;
case 3: arg = 220000; break;
case 4: arg = 610000; break;
case 5: arg = 1010000; break;
default:
#ifdef CHECK
printf("error: %d\\n", arg);
#endif
return -1;
}
int primes = 0, curri = 2;
while (primes < arg) {
int ok = true;
for (int j = 2; j < sqrtf(curri); j++) {
if (curri % j == 0) {
ok = false;
break;
}
}
if (ok) {
primes++;
}
curri++;
}
#ifdef CHECK
printf("lastprime: %d.\n", curri-1);
#endif
return 0;
}
'''
self.do_benchmark('primes' if check else 'primes-nocheck', src, 'lastprime:' if check else '', shared_args=['-DCHECK'] if check else [])
def do_toolchain_benchmark(self, args):
# TODO: Perhaps this can be merged with the regular `do_benchmark` somehow.
benchmarkers = [
named_benchmarkers['clang-build'],
named_benchmarkers['emcc-build'],
]
baseline = None
print()
for b in benchmarkers:
b.bench(args)
b.display(baseline)
if not baseline:
# Use the first benchmarker as the baseline. Other benchmarkers can then
# report relative performance compared to this.
baseline = b
def test_compile_noop(self):
self.do_toolchain_benchmark(['--version'])
def test_compile_hello(self):
self.do_toolchain_benchmark(['-c', test_file('hello_world.c')])
def test_memops(self):
src = '''
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char **argv) {
int N, M;
int arg = argc > 1 ? argv[1][0] - '0' : 3;
switch(arg) {
case 0: return 0; break;
case 1: N = 1024*1024; M = 55; break;
case 2: N = 1024*1024; M = 400; break;
case 3: N = 1024*1024; M = 800; break;
case 4: N = 1024*1024; M = 4000; break;
case 5: N = 1024*1024; M = 8000; break;
default: printf("error: %d\\n", arg); return -1;
}
int final = 0;
char *buf = (char*)malloc(N);
for (int t = 0; t < M; t++) {
for (int i = 0; i < N; i++)
buf[i] = (i + final)%256;
for (int i = 0; i < N; i++)
final += buf[i] & 1;
final = final % 1000;
}
printf("final: %d.\\n", final);
return 0;
}
'''
self.do_benchmark('memops', src, 'final:')
@non_core
def test_files(self):
src = r'''
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
int main(int argc, char **argv) {
int N = 100;
int M = 1000;
int K = 1000;
unsigned char *k = (unsigned char*)malloc(K+1), *k2 = (unsigned char*)malloc(K+1);
for (int i = 0; i < K; i++) {
k[i] = (i % 250) + 1;
}
k[K] = 0;
char buf[100];
for (int i = 0; i < N; i++) {
sprintf(buf, "/dev/shm/file-%d.dat", i);
FILE *f = fopen(buf, "w");
for (int j = 0; j < M; j++) {
fwrite(k, 1, (j % K) + 1, f);
}
fclose(f);
}
for (int i = 0; i < N; i++) {
sprintf(buf, "/dev/shm/file-%d.dat", i);
FILE *f = fopen(buf, "r");
for (int j = 0; j < M; j++) {
fread(k2, 1, (j % K) + 1, f);
}
fclose(f);
for (int j = 0; j < K; j++) {
assert(k[j] == k2[j]);
}
unlink(buf);
}
printf("ok");
return 0;
}
'''
self.do_benchmark('files', src, 'ok', emcc_args=['-sFILESYSTEM', '-sMINIMAL_RUNTIME=0', '-sEXIT_RUNTIME'])
def test_copy(self):
src = r'''
#include <stdio.h>
struct vec {
int x, y, z;
int r, g, b;
vec(int x_, int y_, int z_, int r_, int g_, int b_) : x(x_), y(y_), z(z_), r(r_), g(g_), b(b_) {}
static vec add(vec a, vec b) {
return vec(a.x+b.x, a.y+b.y, a.z+b.z, a.r+b.r, a.g+b.g, a.b+b.b);
}
void norm() {
x %= 1024;
y %= 1024;
z %= 1024;
r %= 1024;
b %= 1024;
g %= 1024;
}
int sum() { return x + y + z + r + g + b; }
};
int main(int argc, char **argv) {
int arg = argc > 1 ? argv[1][0] - '0' : 3;
switch(arg) {
case 0: return 0; break;
case 1: arg = 75; break;
case 2: arg = 625; break;
case 3: arg = 1250; break;
case 4: arg = 5*1250; break;
case 5: arg = 10*1250; break;
default: printf("error: %d\\n", arg); return -1;
}
int total = 0;
for (int i = 0; i < arg; i++) {
for (int j = 0; j < 50000; j++) {
vec c(i, i+i%10, j*2, i%255, j%120, i%15);
vec d(j+i%10, j*2, j%255, i%120, j%15, j);
vec e = c;
c.norm();
d.norm();
vec f = vec::add(c, d);
f = vec::add(e, f);
f.norm();
f = vec::add(d, f);
total += f.sum() % 100;
total %= 10240;
}
}
printf("sum:%d\n", total);
return 0;
}
'''
self.do_benchmark('copy', src, 'sum:')
def test_ifs(self):
src = r'''
#include <stdio.h>
#include <stdlib.h>
volatile int x = 0;
__attribute__ ((noinline)) int calc() {
return (x++) & 16384;
}
int main(int argc, char **argv) {
int arg = argc > 1 ? argv[1][0] - '0' : 3;
switch(arg) {
case 0: return 0; break;
case 1: arg = 5*75; break;
case 2: arg = 5*625; break;
case 3: arg = 5*1250; break;
case 4: arg = 5*5*1250; break;
case 5: arg = 5*10*1250; break;
default: printf("error: %d\\n", arg); return -1;
}
int sum = 0;
for (int j = 0; j < 27000; j++) {
for (int i = 0; i < arg; i++) {
if (calc() && calc()) {
sum += 17;
} else {
sum += 19;
}
if (calc() || calc()) {
sum += 23;
}
}
}
printf("ok %d\n", sum);
return 0;
}
'''
self.do_benchmark('ifs', src, 'ok')
def test_conditionals(self):
src = r'''
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv) {
int arg = argc > 1 ? argv[1][0] - '0' : 3;
switch(arg) {
case 0: return 0; break;
case 1: arg = 3*75; break;
case 2: arg = 3*625; break;
case 3: arg = 3*1250; break;
case 4: arg = 3*5*1250; break;
case 5: arg = 3*10*1250; break;
default: printf("error: %d\\n", arg); return -1;
}
int x = 0;
for (int j = 0; j < 27000; j++) {
for (int i = 0; i < arg; i++) {
if (((x*x+11) % 3 == 0) | ((x*(x+2)+17) % 5 == 0)) {
x += 2;
} else {
x++;
}
}
}
printf("ok %d\n", x);
return 0;
}
'''
self.do_benchmark('conditionals', src, 'ok')
def test_fannkuch(self):
src = read_file(test_file('third_party/fannkuch.c')).replace(
'int n = argc > 1 ? atoi(argv[1]) : 0;',
'''
int n;
int arg = argc > 1 ? argv[1][0] - '0' : 3;
switch(arg) {
case 0: return 0; break;
case 1: n = 9; break;
case 2: n = 10; break;
case 3: n = 11; break;
case 4: n = 11; break;
case 5: n = 12; break;
default: printf("error: %d\\n", arg); return -1;
}
''',
)
assert 'switch(arg)' in src
self.do_benchmark('fannkuch', src, 'Pfannkuchen(')
def test_corrections(self):
src = r'''
#include <stdio.h>
#include <math.h>
int main(int argc, char **argv) {
int N, M;
int arg = argc > 1 ? argv[1][0] - '0' : 3;
switch(arg) {
case 0: return 0; break;
case 1: N = 20000; M = 550; break;
case 2: N = 20000; M = 3500; break;
case 3: N = 20000; M = 7000; break;
case 4: N = 20000; M = 5*7000; break;
case 5: N = 20000; M = 10*7000; break;
default: printf("error: %d\\n", arg); return -1;
}
unsigned int f = 0;
unsigned short s = 0;
for (int t = 0; t < M; t++) {
for (int i = 0; i < N; i++) {
f += i / ((t % 5)+1);
if (f > 1000) f /= (t % 3)+1;
if (i % 4 == 0) f += i * (i % 8 == 0 ? 1 : -1);
s += (short(f)*short(f)) % 256;
}
}
printf("final: %d:%d.\n", f, s);
return 0;
}
'''
self.do_benchmark('corrections', src, 'final:')
def test_corrections64(self):
src = r'''
#include <stdio.h>
#include <math.h>
#include <stdint.h>
int main(int argc, char **argv) {
int64_t N, M;
int arg = argc > 1 ? argv[1][0] - '0' : 3;
switch(arg) {
case 0: return 0; break;
case 1: N = 8000; M = 550; break;
case 2: N = 8000; M = 3500; break;
case 3: N = 8000; M = 7000; break;
case 4: N = 8000; M = 5*7000; break;
case 5: N = 8000; M = 10*7000; break;
default: printf("error: %d\\n", arg); return -1;
}
uint64_t f = 0;
uint32_t s = 0;
for (int64_t t = 0; t < M; t++) {
for (int64_t i = 0; i < N; i++) {
f += i / ((t % 5)+1);
if (f > 1000) f /= (t % 3)+1;
if (i % 4 == 0) f += i * (i % 8 == 0 ? 1 : -1);
s += (short(f)*short(f)) % 256;
}
}
printf("final: %lld:%d.\n", f, s);
return 0;
}
'''
self.do_benchmark('corrections64', src, 'final:')
def fasta(self, name, double_rep):
src = read_file(test_file('third_party/fasta.cpp')).replace('double', double_rep)
src = src.replace(' const size_t n = ( argc > 1 ) ? atoi( argv[1] ) : 512;', '''
int n;
int arg = argc > 1 ? argv[1][0] - '0' : 3;
switch(arg) {
case 0: return 0; break;
case 1: n = 19000000/20; break;
case 2: n = 19000000/2; break;
case 3: n = 19000000; break;
case 4: n = 19000000*5; break;
case 5: n = 19000000*10; break;
default: printf("error: %d\\n", arg); return -1;
}
''')
assert 'switch(arg)' in src
self.do_benchmark('fasta', src, '')
def test_fasta_float(self):
self.fasta('fasta_float', 'float')
@non_core
def test_fasta_double(self):
self.fasta('fasta_double', 'double')
def test_skinning(self):
src = read_file(test_file('skinning_test_no_simd.cpp'))
self.do_benchmark('skinning', src, 'blah=0.000000')
def test_havlak(self):
src = read_file(test_file('third_party/havlak.cpp'))
# This runs many recursive calls (DFS) and thus needs a larger stack
self.do_benchmark('havlak', src, 'Found', shared_args=['-std=c++11'],
emcc_args=['-sSTACK_SIZE=1MB'])
def test_base64(self):
src = read_file(test_file('benchmark/base64.c'))
self.do_benchmark('base64', src, 'decode')
@non_core
def test_life(self):
src = read_file(test_file('life.c'))
self.do_benchmark('life', src, '''--------------------------------''', force_c=True)
def test_zzz_linpack(self):
def output_parser(output):
mflops = re.search(r'Unrolled Double Precision ([\d\.]+) Mflops', output).group(1)
return 10000.0 / float(mflops)
self.do_benchmark('linpack_double', read_file(test_file('benchmark/linpack2.c')), '''Unrolled Double Precision''', force_c=True, output_parser=output_parser)
# Benchmarks the synthetic performance of calling native functions.
@non_core
def test_native_functions(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('native_functions', read_file(test_file('benchmark/benchmark_ffis.cpp')), 'Total time:',
output_parser=output_parser,
# Not minimal because this uses functions in library_browsers.js
emcc_args=['-sMINIMAL_RUNTIME=0'],
shared_args=['-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
# Benchmarks the synthetic performance of calling function pointers.
@non_core
def test_native_function_pointers(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('native_functions', read_file(test_file('benchmark/benchmark_ffis.cpp')), 'Total time:',
output_parser=output_parser,
# Not minimal because this uses functions in library_browsers.js
emcc_args=['-sMINIMAL_RUNTIME=0'],
shared_args=['-DBENCHMARK_FUNCTION_POINTER=1', '-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
# Benchmarks the synthetic performance of calling "foreign" JavaScript functions.
@non_core
def test_foreign_functions(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('foreign_functions', read_file(test_file('benchmark/benchmark_ffis.cpp')), 'Total time:',
output_parser=output_parser,
# Not minimal because this uses functions in library_browsers.js
emcc_args=['--js-library', test_file('benchmark/benchmark_ffis.js'), '-sMINIMAL_RUNTIME=0'],
shared_args=['-DBENCHMARK_FOREIGN_FUNCTION=1', '-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
@non_core
def test_memcpy_128b(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('memcpy_128b', read_file(test_file('benchmark/benchmark_memcpy.cpp')), 'Total time:', output_parser=output_parser, shared_args=['-DMAX_COPY=128', '-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
@non_core
def test_memcpy_4k(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('memcpy_4k', read_file(test_file('benchmark/benchmark_memcpy.cpp')), 'Total time:', output_parser=output_parser, shared_args=['-DMIN_COPY=128', '-DMAX_COPY=4096', '-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
@non_core
def test_memcpy_16k(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('memcpy_16k', read_file(test_file('benchmark/benchmark_memcpy.cpp')), 'Total time:', output_parser=output_parser, shared_args=['-DMIN_COPY=4096', '-DMAX_COPY=16384', '-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
@non_core
def test_memcpy_1mb(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('memcpy_1mb', read_file(test_file('benchmark/benchmark_memcpy.cpp')), 'Total time:', output_parser=output_parser, shared_args=['-DMIN_COPY=16384', '-DMAX_COPY=1048576', '-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
@non_core
def test_memcpy_16mb(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('memcpy_16mb', read_file(test_file('benchmark/benchmark_memcpy.cpp')), 'Total time:', output_parser=output_parser, shared_args=['-DMIN_COPY=1048576', '-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
@non_core
def test_memset_128b(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('memset_128b', read_file(test_file('benchmark/benchmark_memset.cpp')), 'Total time:', output_parser=output_parser, shared_args=['-DMAX_COPY=128', '-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
@non_core
def test_memset_4k(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('memset_4k', read_file(test_file('benchmark/benchmark_memset.cpp')), 'Total time:', output_parser=output_parser, shared_args=['-DMIN_COPY=128', '-DMAX_COPY=4096', '-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
@non_core
def test_memset_16k(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('memset_16k', read_file(test_file('benchmark/benchmark_memset.cpp')), 'Total time:', output_parser=output_parser, shared_args=['-DMIN_COPY=4096', '-DMAX_COPY=16384', '-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
@non_core
def test_memset_1mb(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('memset_1mb', read_file(test_file('benchmark/benchmark_memset.cpp')), 'Total time:', output_parser=output_parser, shared_args=['-DMIN_COPY=16384', '-DMAX_COPY=1048576', '-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
@non_core
def test_memset_16mb(self):
def output_parser(output):
return float(re.search(r'Total time: ([\d\.]+)', output).group(1))
self.do_benchmark('memset_16mb', read_file(test_file('benchmark/benchmark_memset.cpp')), 'Total time:', output_parser=output_parser, shared_args=['-DMIN_COPY=1048576', '-DBUILD_FOR_SHELL', '-I' + test_file('benchmark')])
def test_malloc_multithreading(self):
# Multithreaded malloc test. For emcc we use mimalloc here.
src = read_file(test_file('other/test_malloc_multithreading.c'))
# TODO measure with different numbers of cores and not fixed 4
self.do_benchmark('malloc_multithreading', src, 'Done.', shared_args=['-DWORKERS=4', '-pthread'], emcc_args=['-sEXIT_RUNTIME', '-sMALLOC=mimalloc', '-sMINIMAL_RUNTIME=0', '-sINITIAL_MEMORY=512MB'])
def test_matrix_multiply(self):
def output_parser(output):
return float(re.search(r'Total elapsed: ([\d\.]+)', output).group(1))
self.do_benchmark('matrix_multiply', read_file(test_file('matrix_multiply.cpp')), 'Total elapsed:', output_parser=output_parser, shared_args=['-I' + test_file('benchmark')])
def lua(self, benchmark, expected, output_parser=None):
self.cflags.remove('-Werror')
copy_asset(f'third_party/lua/{benchmark}.lua')
def lib_builder(name, native, env_init):
# Inject -sMEMORY64 into node-64 benchmarking runs.
env_init['MYCFLAGS'] = env_init.get('CFLAGS', '')
if '-sMEMORY64' in env_init['MYCFLAGS']:
env_init['MYLDFLAGS'] = '-sMEMORY64'
# We force recomputation for the native benchmarker because this benchmark
# uses native_exec=True, so we need to copy the native executable
return self.get_library(os.path.join('third_party', 'lua_native' if native else 'lua'), [os.path.join('src', 'lua.o'), os.path.join('src', 'liblua.a')], make=['make', 'generic'], configure=None, native=native, cache_name_extra=name, env_init=env_init, force_rebuild=native)
self.do_benchmark('lua_' + benchmark, '', expected,
force_c=True, args=[benchmark + '.lua', DEFAULT_ARG],
emcc_args=['--embed-file', benchmark + '.lua', '-sFORCE_FILESYSTEM', '-sMINIMAL_RUNTIME=0'], # not minimal because of files
lib_builder=lib_builder, native_exec=os.path.join('building', 'third_party', 'lua_native', 'src', 'lua'),
output_parser=output_parser)
def test_zzz_lua_scimark(self):
def output_parser(output):
return 100.0 / float(re.search(r'\nSciMark +([\d\.]+) ', output).group(1))
self.lua('scimark', '[small problem sizes]', output_parser=output_parser)
def test_zzz_lua_binarytrees(self):
# js version: ['binarytrees.lua', {0: 0, 1: 9.5, 2: 11.99, 3: 12.85, 4: 14.72, 5: 15.82}[arguments[0]]]
self.lua('binarytrees', 'long lived tree of depth')
def test_zzz_zlib(self):
self.cflags.remove('-Werror')
src = read_file(test_file('benchmark/test_zlib_benchmark.c'))
def lib_builder(name, native, env_init):
return self.get_library(os.path.join('third_party', 'zlib'), os.path.join('libz.a'), configure=['cmake', '-DCMAKE_POLICY_VERSION_MINIMUM=3.5', '.'], make=['cmake', '--build', '.', '--'], make_args=[], native=native, cache_name_extra=name, env_init=env_init)
self.do_benchmark('zlib', src, 'ok.',
force_c=True, shared_args=['-I' + test_file('third_party/zlib')], lib_builder=lib_builder)
def test_zzz_coremark(self):
src = read_file(test_file('third_party/coremark/core_main.c'))
def lib_builder(name, native, env_init):
return self.get_library('third_party/coremark', [os.path.join('coremark.a')], configure=None, native=native, cache_name_extra=name, env_init=env_init)
def output_parser(output):
iters_sec = re.search(r'Iterations/Sec : ([\d\.]+)', output).group(1)
return 100000.0 / float(iters_sec)
self.do_benchmark('coremark', src, 'Correct operation validated.', shared_args=['-I' + test_file('third_party/coremark')], lib_builder=lib_builder, output_parser=output_parser, force_c=True)
def test_zzz_box2d(self):
src = read_file(test_file('benchmark/test_box2d_benchmark.cpp'))
def lib_builder(name, native, env_init):
return self.get_library(os.path.join('third_party', 'box2d'), ['box2d.a'], configure=None, native=native, cache_name_extra=name, env_init=env_init)
self.do_benchmark('box2d', src, 'frame averages', shared_args=['-I' + test_file('third_party/box2d')], lib_builder=lib_builder)
def test_zzz_bullet(self):
self.cflags.remove('-Werror')
self.cflags += ['-Wno-c++11-narrowing', '-Wno-deprecated-register', '-Wno-writable-strings']
src = read_file(test_file('third_party/bullet/Demos/Benchmarks/BenchmarkDemo.cpp'))
src += read_file(test_file('third_party/bullet/Demos/Benchmarks/main.cpp'))
def lib_builder(name, native, env_init):
cflags = ' '.join(self.cflags) + ' ' + env_init.get('CFLAGS', '')
return self.get_library(str(Path('third_party/bullet')),
['src/BulletDynamics/libBulletDynamics.a',
'src/BulletCollision/libBulletCollision.a',
'src/LinearMath/libLinearMath.a'],
configure=['cmake', '.'], configure_args=['-DCMAKE_POLICY_VERSION_MINIMUM=3.5', '-DBUILD_DEMOS=OFF', '-DBUILD_EXTRAS=OFF', '-DUSE_GLUT=OFF', '-DCMAKE_CXX_STANDARD=14', f'-DCMAKE_CXX_FLAGS={cflags}'],
make=['cmake', '--build', '.', '--'], make_args=[], native=native, cache_name_extra=name, env_init=env_init)
self.do_benchmark('bullet', src, '\nok.\n',
shared_args=['-I' + test_file('third_party/bullet/src'), '-I' + test_file('third_party/bullet/Demos/Benchmarks')],
lib_builder=lib_builder)
def test_zzz_lzma(self):
src = read_file(test_file('benchmark/test_lzma_benchmark.c'))
def lib_builder(name, native, env_init):
return self.get_library(os.path.join('third_party', 'lzma'), [os.path.join('lzma.a')], configure=None, native=native, cache_name_extra=name, env_init=env_init)
self.do_benchmark('lzma', src, 'ok.', shared_args=['-I' + test_file('third_party/lzma')], lib_builder=lib_builder)
def test_zzz_sqlite(self):
src = read_file(test_file('third_party/sqlite/sqlite3.c')) + read_file(test_file('sqlite/speedtest1.c'))
self.do_benchmark('sqlite', src, 'TOTAL...',
native_args=['-ldl', '-pthread'],
shared_args=['-I' + test_file('third_party/sqlite')],
# not minimal because of files
emcc_args=['-sFILESYSTEM', '-sMINIMAL_RUNTIME=0'],
force_c=True)
@needs_make('depends on freetype')
def test_zzz_poppler(self):
utils.write_file('pre.js', '''
var benchmarkArgument = %s;
var benchmarkArgumentToPageCount = {
'0': 0,
'1': 1,
'2': 5,
'3': 15,
'4': 26,
'5': 55,
};
if (benchmarkArgument === 0) {
Module['arguments'] = ['-?'];
Module['printErr'] = function(){};
} else {
// Add 'filename' after 'input.pdf' to write the output so it can be verified.
Module['arguments'] = ['-scale-to', '1024', 'input.pdf', '-f', '1', '-l', '' + benchmarkArgumentToPageCount[benchmarkArgument]];
Module['postRun'] = function() {
var files = [];
for (var x in FS.root.contents) {
if (x.startsWith('filename-')) {
files.push(x);
}
}
files.sort();
var hash = 5381;
var totalSize = 0;
files.forEach(function(file) {
var data = Array.from(MEMFS.getFileDataAsTypedArray(FS.root.contents[file]));
for (var i = 0; i < data.length; i++) {
hash = ((hash << 5) + hash) ^ (data[i] & 0xff);
}
totalSize += data.length;
});
out(files.length + ' files emitted, total output size: ' + totalSize + ', hashed printout: ' + hash);
};
}
''' % DEFAULT_ARG)
def lib_builder(name, native, env_init): # noqa
if '-sMEMORY64' in env_init.get('CFLAGS', ''):
env_init['CPPFLAGS'] = '-sMEMORY64'
env_init['LDFLAGS'] = '-sMEMORY64'
return self.get_poppler_library(env_init=env_init)
self.do_benchmark('poppler', '', 'hashed printout',
shared_args=['-I' + test_file('poppler/include'),
'-I' + test_file('freetype/include')],
emcc_args=['-sFILESYSTEM', '--pre-js=pre.js', '--embed-file',
test_file('poppler/emscripten_html5.pdf') + '@input.pdf',
'-sERROR_ON_UNDEFINED_SYMBOLS=0',
'-sMINIMAL_RUNTIME=0'], # not minimal because of files
lib_builder=lib_builder,
# TODO: Fix poppler native and freetype MEMORY64 builds to be able
# to remove these skips
skip_benchmarkers=['clang', 'gcc', 'v8-64', 'node-64'])