blob: e67a0f35bf70ac67b064f0cc2c8c2e75b4aeaebd [file] [log] [blame] [edit]
/*
* Copyright 2016 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Inlining.
//
// Two versions are provided: inlining and inlining-optimizing. You
// probably want the optimizing version, which will optimize locations
// we inlined into, as inlining by itself creates a block to house the
// inlined code, some temp locals, etc., which can usually be removed
// by optimizations. Note that the two versions use the same heuristics,
// so we don't take into account the overhead if you don't optimize
// afterwards. The non-optimizing version is mainly useful for debugging,
// or if you intend to run a full set of optimizations anyhow on
// everything later.
//
#include <atomic>
#include "ir/branch-utils.h"
#include "ir/debuginfo.h"
#include "ir/drop.h"
#include "ir/eh-utils.h"
#include "ir/element-utils.h"
#include "ir/find_all.h"
#include "ir/literal-utils.h"
#include "ir/localize.h"
#include "ir/module-utils.h"
#include "ir/names.h"
#include "ir/type-updating.h"
#include "ir/utils.h"
#include "parsing.h"
#include "pass.h"
#include "passes/opt-utils.h"
#include "wasm-builder.h"
#include "wasm.h"
namespace wasm {
namespace {
enum class InliningMode {
// We do not know yet if this function can be inlined, as that has
// not been computed yet.
Unknown,
// This function cannot be inlinined in any way.
Uninlineable,
// This function can be inlined fully, that is, normally: the entire function
// can be inlined. This is in contrast to split/partial inlining, see below.
Full,
// This function cannot be inlined normally, but we can use split inlining,
// using pattern "A" or "B" (see below).
SplitPatternA,
SplitPatternB
};
// Useful into on a function, helping us decide if we can inline it
struct FunctionInfo {
std::atomic<Index> refs;
Index size;
bool hasCalls;
bool hasLoops;
bool hasTryDelegate;
// Something is used globally if there is a reference to it in a table or
// export etc.
bool usedGlobally;
// We consider a function to be a trivial call if the body is just a call with
// trivial arguments, like this:
//
// (func $forward (param $x) (param $y)
// (call $target (local.get $x) (local.get $y))
// )
//
// Specifically the body must be a call, and the operands to the call must be
// of size 1 (generally, LocalGet or Const).
bool isTrivialCall;
InliningMode inliningMode;
FunctionInfo() { clear(); }
void clear() {
refs = 0;
size = 0;
hasCalls = false;
hasLoops = false;
hasTryDelegate = false;
usedGlobally = false;
isTrivialCall = false;
inliningMode = InliningMode::Unknown;
}
// Provide an explicit = operator as the |refs| field lacks one by default.
FunctionInfo& operator=(const FunctionInfo& other) {
refs = other.refs.load();
size = other.size;
hasCalls = other.hasCalls;
hasLoops = other.hasLoops;
hasTryDelegate = other.hasTryDelegate;
usedGlobally = other.usedGlobally;
isTrivialCall = other.isTrivialCall;
inliningMode = other.inliningMode;
return *this;
}
// See pass.h for how defaults for these options were chosen.
bool worthFullInlining(PassOptions& options) {
// Until we have proper support for try-delegate, ignore such functions.
// FIXME https://github.com/WebAssembly/binaryen/issues/3634
if (hasTryDelegate) {
return false;
}
// If it's small enough that we always want to inline such things, do so.
if (size <= options.inlining.alwaysInlineMaxSize) {
return true;
}
// If it has one use, then inlining it would likely reduce code size, at
// least for reasonable function sizes.
if (refs == 1 && !usedGlobally &&
size <= options.inlining.oneCallerInlineMaxSize) {
return true;
}
// If it's so big that we have no flexible options that could allow it,
// do not inline.
if (size > options.inlining.flexibleInlineMaxSize) {
return false;
}
// More than one use, so we can't eliminate it after inlining, and inlining
// it will hurt code size. Stop if we are focused on size or not heavily
// focused on speed.
if (options.shrinkLevel > 0 || options.optimizeLevel < 3) {
return false;
}
if (hasCalls) {
// This has calls. If it is just a trivial call itself then inline, as we
// will save a call that way - basically we skip a trampoline in the
// middle - but if it is something more complex, leave it alone, as we may
// not help much (and with recursion we may end up with a wasteful
// increase in code size).
//
// Note that inlining trivial calls may increase code size, e.g. if they
// use a parameter more than once (forcing us after inlining to save that
// value to a local, etc.), but here we are optimizing for speed and not
// size, so we risk it.
return isTrivialCall;
}
// This doesn't have calls. Inline if loops do not prevent us (normally, a
// loop suggests a lot of work and so inlining is less useful).
return !hasLoops || options.inlining.allowFunctionsWithLoops;
}
};
static bool canHandleParams(Function* func) {
// We cannot inline a function if we cannot handle placing its params in a
// locals, as all params become locals.
for (auto param : func->getParams()) {
if (!TypeUpdating::canHandleAsLocal(param)) {
return false;
}
}
return true;
}
using NameInfoMap = std::unordered_map<Name, FunctionInfo>;
struct FunctionInfoScanner
: public WalkerPass<PostWalker<FunctionInfoScanner>> {
bool isFunctionParallel() override { return true; }
FunctionInfoScanner(NameInfoMap& infos) : infos(infos) {}
std::unique_ptr<Pass> create() override {
return std::make_unique<FunctionInfoScanner>(infos);
}
void visitLoop(Loop* curr) {
// having a loop
infos[getFunction()->name].hasLoops = true;
}
void visitCall(Call* curr) {
// can't add a new element in parallel
assert(infos.count(curr->target) > 0);
infos[curr->target].refs++;
// having a call
infos[getFunction()->name].hasCalls = true;
}
// N.B.: CallIndirect and CallRef are intentionally omitted here, as we only
// note direct calls. Direct calls can lead to infinite recursion
// which we need to avoid, while indirect ones may in theory be
// optimized to direct calls later, but we take that risk - which is
// worthwhile as if we do manage to turn an indirect call into something
// else then it can be a big speedup, so we do want to inline code that
// has such indirect calls.
void visitTry(Try* curr) {
if (curr->isDelegate()) {
infos[getFunction()->name].hasTryDelegate = true;
}
}
void visitRefFunc(RefFunc* curr) {
assert(infos.count(curr->func) > 0);
infos[curr->func].refs++;
}
void visitFunction(Function* curr) {
auto& info = infos[curr->name];
if (!canHandleParams(curr)) {
info.inliningMode = InliningMode::Uninlineable;
}
info.size = Measurer::measure(curr->body);
if (auto* call = curr->body->dynCast<Call>()) {
if (info.size == call->operands.size() + 1) {
// This function body is a call with some trivial (size 1) operands like
// LocalGet or Const, so it is a trivial call.
info.isTrivialCall = true;
}
}
}
private:
NameInfoMap& infos;
};
struct InliningAction {
Expression** callSite;
Function* contents;
bool insideATry;
InliningAction(Expression** callSite, Function* contents, bool insideATry)
: callSite(callSite), contents(contents), insideATry(insideATry) {}
};
struct InliningState {
// Maps functions worth inlining to the mode with which we can inline them.
std::unordered_map<Name, InliningMode> inlinableFunctions;
// function name => actions that can be performed in it
std::unordered_map<Name, std::vector<InliningAction>> actionsForFunction;
};
struct Planner : public WalkerPass<TryDepthWalker<Planner>> {
bool isFunctionParallel() override { return true; }
Planner(InliningState* state) : state(state) {}
std::unique_ptr<Pass> create() override {
return std::make_unique<Planner>(state);
}
void visitCall(Call* curr) {
// plan to inline if we know this is valid to inline, and if the call is
// actually performed - if it is dead code, it's pointless to inline.
// we also cannot inline ourselves.
bool isUnreachable;
if (curr->isReturn) {
// Tail calls are only actually unreachable if an argument is
isUnreachable = std::any_of(
curr->operands.begin(), curr->operands.end(), [](Expression* op) {
return op->type == Type::unreachable;
});
} else {
isUnreachable = curr->type == Type::unreachable;
}
if (state->inlinableFunctions.count(curr->target) && !isUnreachable &&
curr->target != getFunction()->name) {
// nest the call in a block. that way the location of the pointer to the
// call will not change even if we inline multiple times into the same
// function, otherwise call1(call2()) might be a problem
auto* block = Builder(*getModule()).makeBlock(curr);
replaceCurrent(block);
// can't add a new element in parallel
assert(state->actionsForFunction.count(getFunction()->name) > 0);
state->actionsForFunction[getFunction()->name].emplace_back(
&block->list[0], getModule()->getFunction(curr->target), tryDepth > 0);
}
}
private:
InliningState* state;
};
struct Updater : public TryDepthWalker<Updater> {
Module* module;
std::map<Index, Index> localMapping;
Name returnName;
Type resultType;
bool isReturn;
Builder* builder;
PassOptions& options;
struct ReturnCallInfo {
// The original `return_call` or `return_call_indirect` or `return_call_ref`
// with its operands replaced with `local.get`s.
Expression* call;
// The branch that is serving as the "return" part of the original
// `return_call`.
Break* branch;
};
// Collect information on return_calls in the inlined body. Each will be
// turned into branches out of the original inlined body followed by
// non-return version of the original `return_call`, followed by a branch out
// to the caller. The branch labels will be filled in at the end of the walk.
std::vector<ReturnCallInfo> returnCallInfos;
Updater(PassOptions& options) : options(options) {}
void visitReturn(Return* curr) {
replaceCurrent(builder->makeBreak(returnName, curr->value));
}
template<typename T> void handleReturnCall(T* curr, Signature sig) {
if (isReturn || !curr->isReturn) {
// If the inlined callsite was already a return_call, then we can keep
// return_calls in the inlined function rather than downgrading them.
// That is, if A->B and B->C and both those calls are return_calls
// then after inlining A->B we want to now have A->C be a
// return_call.
return;
}
if (tryDepth == 0) {
// Return calls in inlined functions should only break out of
// the scope of the inlined code, not the entire function they
// are being inlined into. To achieve this, make the call a
// non-return call and add a break. This does not cause
// unbounded stack growth because inlining and return calling
// both avoid creating a new stack frame.
curr->isReturn = false;
curr->type = sig.results;
// There might still be unreachable children causing this to be
// unreachable.
curr->finalize();
if (sig.results.isConcrete()) {
replaceCurrent(builder->makeBreak(returnName, curr));
} else {
replaceCurrent(builder->blockify(curr, builder->makeBreak(returnName)));
}
} else {
// Set the children to locals as necessary, then add a branch out of the
// inlined body. The branch label will be set later when we create branch
// targets for the calls.
Block* childBlock = ChildLocalizer(curr, getFunction(), *module, options)
.getChildrenReplacement();
Break* branch = builder->makeBreak(Name());
childBlock->list.push_back(branch);
childBlock->type = Type::unreachable;
replaceCurrent(childBlock);
curr->isReturn = false;
curr->type = sig.results;
returnCallInfos.push_back({curr, branch});
}
}
void visitCall(Call* curr) {
handleReturnCall(curr, module->getFunction(curr->target)->getSig());
}
void visitCallIndirect(CallIndirect* curr) {
handleReturnCall(curr, curr->heapType.getSignature());
}
void visitCallRef(CallRef* curr) {
Type targetType = curr->target->type;
if (!targetType.isSignature()) {
// We don't know what type the call should return, but it will also never
// be reached, so we don't need to do anything here.
return;
}
handleReturnCall(curr, targetType.getHeapType().getSignature());
}
void visitLocalGet(LocalGet* curr) {
curr->index = localMapping[curr->index];
}
void visitLocalSet(LocalSet* curr) {
curr->index = localMapping[curr->index];
}
void walk(Expression*& curr) {
PostWalker<Updater>::walk(curr);
if (returnCallInfos.empty()) {
return;
}
Block* body = builder->blockify(curr);
curr = body;
auto blockNames = BranchUtils::BranchAccumulator::get(body);
for (Index i = 0; i < returnCallInfos.size(); ++i) {
auto& info = returnCallInfos[i];
// Add a block containing the previous body and a branch up to the caller.
// Give the block a name that will allow this return_call's original
// callsite to branch out of it then execute the call before returning to
// the caller.
auto name = Names::getValidName(
"__return_call", [&](Name test) { return !blockNames.count(test); }, i);
blockNames.insert(name);
info.branch->name = name;
Block* oldBody = builder->makeBlock(body->list, body->type);
body->list.clear();
if (resultType.isConcrete()) {
body->list.push_back(builder->makeBlock(
name, {builder->makeBreak(returnName, oldBody)}, Type::none));
} else {
oldBody->list.push_back(builder->makeBreak(returnName));
oldBody->name = name;
oldBody->type = Type::none;
body->list.push_back(oldBody);
}
body->list.push_back(info.call);
body->finalize(resultType);
}
}
};
// Core inlining logic. Modifies the outside function (adding locals as
// needed), and returns the inlined code.
//
// An optional name hint can be provided, which will then be used in the name of
// the block we put the inlined code in. Using a unique name hint in each call
// of this function can reduce the risk of name overlaps (which cause fixup work
// in UniqueNameMapper::uniquify).
static Expression* doInlining(Module* module,
Function* into,
const InliningAction& action,
PassOptions& options,
Index nameHint = 0) {
Function* from = action.contents;
auto* call = (*action.callSite)->cast<Call>();
// Works for return_call, too
Type retType = module->getFunction(call->target)->getResults();
// Build the block that will contain the inlined contents.
Builder builder(*module);
auto* block = builder.makeBlock();
auto name = std::string("__inlined_func$") + from->name.toString();
if (nameHint) {
name += '$' + std::to_string(nameHint);
}
block->name = Name(name);
// In the unlikely event that the function already has a branch target with
// this name, fix that up, as otherwise we can get unexpected capture of our
// branches, that is, we could end up with this:
//
// (block $X ;; a new block we add as the target of returns
// (from's contents
// (block $X ;; a block in from's contents with a colliding name
// (br $X ;; a new br we just added that replaces a return
//
// Here the br wants to go to the very outermost block, to represent a
// return from the inlined function's code, but it ends up captured by an
// internal block. We also need to be careful of the call's children:
//
// (block $X ;; a new block we add as the target of returns
// (local.set $param
// (call's first parameter
// (br $X) ;; nested br in call's first parameter
// )
// )
//
// (In this case we could use a second block and define the named block $X
// after the call's parameters, but that adds work for an extremely rare
// situation.) The latter case does not apply if the call is a
// return_call inside a try, because in that case the call's
// children do not appear inside the same block as the inlined body.
bool hoistCall = call->isReturn && action.insideATry;
if (BranchUtils::hasBranchTarget(from->body, block->name) ||
(!hoistCall && BranchUtils::BranchSeeker::has(call, block->name))) {
auto fromNames = BranchUtils::getBranchTargets(from->body);
auto callNames = hoistCall ? BranchUtils::NameSet{}
: BranchUtils::BranchAccumulator::get(call);
block->name = Names::getValidName(block->name, [&](Name test) {
return !fromNames.count(test) && !callNames.count(test);
});
}
// Prepare to update the inlined code's locals and other things.
Updater updater(options);
updater.setFunction(into);
updater.module = module;
updater.resultType = from->getResults();
updater.returnName = block->name;
updater.isReturn = call->isReturn;
updater.builder = &builder;
// Set up a locals mapping
for (Index i = 0; i < from->getNumLocals(); i++) {
updater.localMapping[i] = builder.addVar(into, from->getLocalType(i));
}
if (hoistCall) {
// Wrap the existing function body in a block we can branch out of before
// entering the inlined function body. This block must have a name that is
// different from any other block name above the branch.
auto intoNames = BranchUtils::BranchAccumulator::get(into->body);
auto bodyName =
Names::getValidName(Name("__original_body"),
[&](Name test) { return !intoNames.count(test); });
if (retType.isConcrete()) {
into->body = builder.makeBlock(
bodyName, {builder.makeReturn(into->body)}, Type::none);
} else {
into->body = builder.makeBlock(
bodyName, {into->body, builder.makeReturn()}, Type::none);
}
// Sequence the inlined function body after the original caller body.
into->body = builder.makeSequence(into->body, block, retType);
// Replace the original callsite with an expression that assigns the
// operands into the params and branches out of the original body.
auto numParams = from->getParams().size();
if (numParams) {
auto* branchBlock = builder.makeBlock();
for (Index i = 0; i < numParams; i++) {
branchBlock->list.push_back(
builder.makeLocalSet(updater.localMapping[i], call->operands[i]));
}
branchBlock->list.push_back(builder.makeBreak(bodyName));
branchBlock->finalize(Type::unreachable);
*action.callSite = branchBlock;
} else {
*action.callSite = builder.makeBreak(bodyName);
}
} else {
// Assign the operands into the params
for (Index i = 0; i < from->getParams().size(); i++) {
block->list.push_back(
builder.makeLocalSet(updater.localMapping[i], call->operands[i]));
}
// Zero out the vars (as we may be in a loop, and may depend on their
// zero-init value
for (Index i = 0; i < from->vars.size(); i++) {
auto type = from->vars[i];
if (!LiteralUtils::canMakeZero(type)) {
// Non-zeroable locals do not need to be zeroed out. As they have no
// zero value they by definition should not be used before being written
// to, so any value we set here would not be observed anyhow.
continue;
}
block->list.push_back(
builder.makeLocalSet(updater.localMapping[from->getVarIndexBase() + i],
LiteralUtils::makeZero(type, *module)));
}
if (call->isReturn) {
assert(!action.insideATry);
if (retType.isConcrete()) {
*action.callSite = builder.makeReturn(block);
} else {
*action.callSite = builder.makeSequence(block, builder.makeReturn());
}
} else {
*action.callSite = block;
}
}
// Generate and update the inlined contents
auto* contents = ExpressionManipulator::copy(from->body, *module);
debuginfo::copyBetweenFunctions(from->body, contents, from, into);
updater.walk(contents);
block->list.push_back(contents);
block->type = retType;
// The ReFinalize below will handle propagating unreachability if we need to
// do so, that is, if the call was reachable but now the inlined content we
// replaced it with was unreachable. The opposite case requires special
// handling: ReFinalize works under the assumption that code can become
// unreachable, but it does not go back from that state. But inlining can
// cause that:
//
// (call $A ;; an unreachable call
// (unreachable)
// )
// =>
// (block $__inlined_A_body (result i32) ;; reachable code after inlining
// (unreachable)
// )
//
// That is, if the called function wraps the input parameter in a block with a
// declared type, then the block is not unreachable. And then we might error
// if the outside expects the code to be unreachable - perhaps it only
// validates that way. To fix this, if the call was unreachable then we make
// the inlined code unreachable as well. That also maximizes DCE
// opportunities by propagating unreachability as much as possible.
//
// (Note that we don't need to do this for a return_call, which is always
// unreachable anyhow.)
if (call->type == Type::unreachable && !call->isReturn) {
// Make the replacement code unreachable. Note that we can't just add an
// unreachable at the end, as the block might have breaks to it (returns are
// transformed into those).
Expression* old = block;
if (old->type.isConcrete()) {
old = builder.makeDrop(old);
}
*action.callSite = builder.makeSequence(old, builder.makeUnreachable());
}
// Anything we inlined into may now have non-unique label names, fix it up.
// Note that we must do this before refinalization, as otherwise duplicate
// block labels can lead to errors (the IR must be valid before we
// refinalize).
wasm::UniqueNameMapper::uniquify(into->body);
// Inlining unreachable contents can make things in the function we inlined
// into unreachable.
ReFinalize().walkFunctionInModule(into, module);
// New locals we added may require fixups for nondefaultability.
// FIXME Is this not done automatically?
TypeUpdating::handleNonDefaultableLocals(into, *module);
return block;
}
//
// Function splitting / partial inlining / inlining of conditions.
//
// A function may be too costly to inline, but it may be profitable to
// *partially* inline it. The specific cases optimized here are functions with a
// condition,
//
// function foo(x) {
// if (x) return;
// ..lots and lots of other code..
// }
//
// If the other code after the if is large enough or costly enough, then we will
// not inline the entire function. But it is useful to inline the condition.
// Consider this caller:
//
// function caller(x) {
// foo(0);
// foo(x);
// }
//
// If we inline the condition, we end up with this:
//
// function caller(x) {
// if (0) foo(0);
// if (x) foo(x);
// }
//
// By inlining the condition out of foo() we gain two benefits:
//
// * In the first call here the condition is zero, which means we can
// statically optimize out the call entirely.
// * Even if we can't do that (as in the second call) if at runtime we see the
// condition is false then we avoid the call. That means we perform what is
// hopefully a cheap branch instead of a call and then a branch.
//
// The cost to doing this is an increase in code size, and so this is only done
// when optimizing heavily for speed.
//
// To implement partial inlining we split the function to be inlined. Starting
// with
//
// function foo(x) {
// if (x) return;
// ..lots and lots of other code..
// }
//
// We create the "inlineable" part of it, and the code that is "outlined":
//
// function foo$inlineable(x) {
// if (x) return;
// foo$outlined(x);
// }
// function foo$outlined(x) {
// ..lots and lots of other code..
// }
//
// (Note how if the second function were inlined into the first, we would end
// up where we started, with the original function.) After splitting the
// function in this way, we simply inline the inlineable part using the normal
// mechanism for that. That ends up replacing foo(x); with
//
// if (x) foo$outlined(x);
//
// which is what we wanted.
//
// To reduce the complexity of this feature, it is implemented almost entirely
// in its own class, FunctionSplitter. The main inlining logic just calls out to
// the splitter to check if a function is worth splitting, and to get the split
// part if so.
//
struct FunctionSplitter {
Module* module;
PassOptions& options;
FunctionSplitter(Module* module, PassOptions& options)
: module(module), options(options) {}
// Check if an function could be split in order to at least inline part of it,
// in a worthwhile manner.
//
// Even if this returns a split inlining mode, we may not end up inlining the
// function, as the main inlining logic has a few other considerations to take
// into account (like limitations on which functions can be inlined into in
// each iteration, the number of iterations, etc.). Therefore this function
// may only find out if we *can* split, but not actually do any splitting.
//
// Note that to avoid wasteful work, this function may return "Full" inlining
// mode instead of a split inlining. That is, if it detects that a partial
// inlining will trigger a follow up full inline of the splitted function
// then it will instead return "InliningMode::Full" directly. In more detail,
// imagine we have
//
// foo(10);
//
// which calls
//
// func foo(x) {
// if (x) {
// CODE
// }
// }
//
// If we partially inline then the call becomes
//
// if (10) {
// outlined-CODE()
// }
//
// That is, we've only inlined the |if| out of foo(), and we call a function
// that contains the code in CODE. But if CODE is small enough to be inlined
// then a later iteration of the inliner will do just that. The result of this
// split inlining and then full inlining of the outlined code is exactly the
// same as if we normally inlined from the beginning, but it adds more work,
// so we'd like to avoid it, which we do by seeing when a split would be
// followed by inlining the remainder, and then we return Full here and just
// inline it all normally.
//
// Note that the above issue shows that we may in some cases inline more than
// the normal inlining limit. That is, if the inlining limit is 20, and CODE
// in the example above was 19, then foo()'s size was 21 (when we add the if
// and get of |x|). That leads to the situation where foo() is too big to
// normally inline, but the outlined CODE can then be inlined. And this shows
// that we end up inlining a function of size 21, even though it is larger
// than our inlining limit. We consider this acceptable because it only
// occurs on functions slightly larger than the inlining limit, and only if
// they have the simple forms that split inlining recognizes, that we think
// are useful to inline. Alternatively, if we wanted to avoid this, it would
// add complexity because we'd need to recognize the outlined function with
// CODE in it and *not* inline it even though it is small enough, after
// splitting, to be inlined. That is, splitting creates smaller functions, so
// it can lead to more inlining (but, again, that makes sense since we only
// split on very specific patterns we believe are worth handling in that
// manner).
InliningMode getSplitDrivenInliningMode(Function* func, FunctionInfo& info) {
const Index MaxIfs = options.inlining.partialInliningIfs;
assert(MaxIfs > 0);
auto* body = func->body;
// If the body is a block, and we have breaks to that block, then we cannot
// outline any code - we can't outline a break without the break's target.
if (auto* block = body->dynCast<Block>()) {
if (BranchUtils::BranchSeeker::has(block, block->name)) {
return InliningMode::Uninlineable;
}
}
// All the patterns we look for right now start with an if at the very top
// of the function.
auto* iff = getIf(body);
if (!iff) {
return InliningMode::Uninlineable;
}
// If the condition is not very simple, the benefits of this optimization
// are not obvious.
if (!isSimple(iff->condition)) {
return InliningMode::Uninlineable;
}
// Pattern A: Check if the function begins with
//
// if (simple) return;
//
// TODO: support a return value
if (!iff->ifFalse && func->getResults() == Type::none &&
iff->ifTrue->is<Return>()) {
// The body must be a block, because if it were not then the function
// would be easily inlineable (just an if with a simple condition and a
// return), and we would not even attempt to do splitting.
assert(body->is<Block>());
auto outlinedFunctionSize = info.size - Measurer::measure(iff);
// If outlined function will be worth normal inline, skip the intermediate
// state and inline fully now. Note that if full inlining is disabled we
// will not do this, and instead inline partially.
if (!func->noFullInline &&
outlinedFunctionWorthInlining(info, outlinedFunctionSize)) {
return InliningMode::Full;
}
return InliningMode::SplitPatternA;
}
// Pattern B: Represents a function whose entire body looks like
//
// if (A_1) {
// ..heavy work..
// }
// ..
// if (A_k) {
// ..heavy work..
// }
// B; // an optional final value (which can be a return value)
//
// where there is a small number of such ifs with arguments A1..A_k, and
// A_1..A_k and B (if the final value B exists) are very simple.
//
// Also, each if body must either be unreachable, or it must have type none
// and have no returns. If it is unreachable, for example because it is a
// return, then we will just return a value in the inlineable function:
//
// if (A_i) {
// return outlined(..);
// }
//
// Or, if an if body has type none, then for now we assume that we do not
// need to return a value from there, which makes things simpler, and in
// that case we just do this, which continues onward in the function:
//
// if (A_i) {
// outlined(..);
// }
//
// TODO: handle a possible returned value in this case as well.
//
// Note that the if body type must be unreachable or none, as this is an if
// without an else.
// Find the number of ifs.
Index numIfs = 0;
while (getIf(body, numIfs) && numIfs <= MaxIfs) {
numIfs++;
}
if (numIfs == 0 || numIfs > MaxIfs) {
return InliningMode::Uninlineable;
}
// Look for a final item after the ifs.
auto* finalItem = getItem(body, numIfs);
// The final item must be simple (or not exist, which is simple enough).
if (finalItem && !isSimple(finalItem)) {
return InliningMode::Uninlineable;
}
// There must be no other items after the optional final one.
if (finalItem && getItem(body, numIfs + 1)) {
return InliningMode::Uninlineable;
}
// This has the general shape we seek. Check each if.
for (Index i = 0; i < numIfs; i++) {
auto* iff = getIf(body, i);
// The if must have a simple condition and no else arm.
if (!isSimple(iff->condition) || iff->ifFalse) {
return InliningMode::Uninlineable;
}
if (iff->ifTrue->type == Type::none) {
// This must have no returns.
if (!FindAll<Return>(iff->ifTrue).list.empty()) {
return InliningMode::Uninlineable;
}
} else {
// This is an if without an else, and so the type is either none or
// unreachable, and we ruled out none before.
assert(iff->ifTrue->type == Type::unreachable);
}
}
// Success, this matches the pattern.
// If the outlined function will be worth inlining normally, skip the
// intermediate state and inline fully now. (As above, if full inlining is
// disabled, we only partially inline.)
if (numIfs == 1) {
auto outlinedFunctionSize = Measurer::measure(iff->ifTrue);
if (!func->noFullInline &&
outlinedFunctionWorthInlining(info, outlinedFunctionSize)) {
return InliningMode::Full;
}
}
return InliningMode::SplitPatternB;
}
// Returns the function we should inline, after we split the function into two
// pieces as described above (that is, in the example above, this would return
// foo$inlineable).
//
// This is called when we are definitely inlining the function, and so it will
// perform the splitting (if that has not already been done before).
Function* getInlineableSplitFunction(Function* func,
InliningMode inliningMode) {
assert(inliningMode == InliningMode::SplitPatternA ||
inliningMode == InliningMode::SplitPatternB);
auto& split = splits[func->name];
if (!split.inlineable) {
// We haven't performed the split, do it now.
split.inlineable = doSplit(func, inliningMode);
}
return split.inlineable;
}
// Clean up. When we are done we no longer need the inlineable functions on
// the module, as they have been inlined into all the places we wanted them
// for.
//
// Returns a list of the names of the functions we split.
std::vector<Name> finish() {
std::vector<Name> ret;
std::unordered_set<Name> inlineableNames;
for (auto& [func, split] : splits) {
auto* inlineable = split.inlineable;
if (inlineable) {
inlineableNames.insert(inlineable->name);
ret.push_back(func);
}
}
module->removeFunctions([&](Function* func) {
return inlineableNames.find(func->name) != inlineableNames.end();
});
return ret;
}
private:
// Information about splitting a function.
struct Split {
// The inlineable function out of the two that we generate by splitting.
// That is, foo$inlineable from above.
Function* inlineable = nullptr;
// The outlined function, that is, foo$outlined from above.
Function* outlined = nullptr;
};
// All the splitting we have already performed.
//
// Note that this maps from function names, and not Function*, as the main
// inlining code can remove functions as it goes, but we can rely on names
// staying constant.
std::unordered_map<Name, Split> splits;
bool outlinedFunctionWorthInlining(FunctionInfo& origin, Index sizeEstimate) {
FunctionInfo info;
// Start with a copy of the origin's info, and apply the size estimate.
// This is not accurate, for example the origin function may have
// loop or calls even though this section may not have.
// This is a conservative estimate, that is, it will return true only when
// it should, but might return false when a more precise analysis would
// return true. And it is a practical estimation to avoid extra future work.
info = origin;
info.size = sizeEstimate;
return info.worthFullInlining(options);
}
Function* doSplit(Function* func, InliningMode inliningMode) {
Builder builder(*module);
if (inliningMode == InliningMode::SplitPatternA) {
// Note that "A" in the name here identifies this as being a split from
// pattern A. The second pattern B will have B in the name.
Function* inlineable = copyFunction(func, "inlineable-A");
auto* outlined = copyFunction(func, "outlined-A");
// The inlineable function should only have the if, which will call the
// outlined function with a flipped condition.
auto* inlineableIf = getIf(inlineable->body);
inlineableIf->condition =
builder.makeUnary(EqZInt32, inlineableIf->condition);
inlineableIf->ifTrue = builder.makeCall(
outlined->name, getForwardedArgs(func, builder), Type::none);
inlineable->body = inlineableIf;
// The outlined function no longer needs the initial if.
auto& outlinedList = outlined->body->cast<Block>()->list;
outlinedList.erase(outlinedList.begin());
return inlineable;
}
assert(inliningMode == InliningMode::SplitPatternB);
Function* inlineable = copyFunction(func, "inlineable-B");
const Index MaxIfs = options.inlining.partialInliningIfs;
assert(MaxIfs > 0);
// The inlineable function should only have the ifs, which will call the
// outlined heavy work.
for (Index i = 0; i < MaxIfs; i++) {
// For each if, create an outlined function with the body of that if,
// and call that from the if.
auto* inlineableIf = getIf(inlineable->body, i);
if (!inlineableIf) {
break;
}
auto* outlined = copyFunction(func, "outlined-B");
outlined->body = inlineableIf->ifTrue;
// The outlined function either returns the same results as the original
// one, or nothing, depending on if a value is returned here.
auto valueReturned =
func->getResults() != Type::none && outlined->body->type != Type::none;
outlined->setResults(valueReturned ? func->getResults() : Type::none);
inlineableIf->ifTrue = builder.makeCall(outlined->name,
getForwardedArgs(func, builder),
outlined->getResults());
if (valueReturned) {
inlineableIf->ifTrue = builder.makeReturn(inlineableIf->ifTrue);
}
}
return inlineable;
}
Function* copyFunction(Function* func, std::string prefix) {
// TODO: We copy quite a lot more than we need here, and throw stuff out.
// It is simple to just copy the entire thing to get the params and
// results and all that, but we could be more efficient.
prefix = "byn-split-" + prefix;
return ModuleUtils::copyFunction(
func,
*module,
Names::getValidFunctionName(*module,
prefix + '$' + func->name.toString()));
}
// Get the i-th item in a sequence of initial items in an expression. That is,
// if the item is a block, it may have several such items, and otherwise there
// is a single item, that item itself. This basically provides a simpler
// interface than checking if something is a block or not when there is just
// one item.
//
// Returns nullptr if there is no such item.
static Expression* getItem(Expression* curr, Index i = 0) {
if (auto* block = curr->dynCast<Block>()) {
auto& list = block->list;
if (i < list.size()) {
return list[i];
}
}
if (i == 0) {
return curr;
}
return nullptr;
}
// Get the i-th if in a sequence of initial ifs in an expression. If no such
// if exists, returns nullptr.
static If* getIf(Expression* curr, Index i = 0) {
auto* item = getItem(curr, i);
if (!item) {
return nullptr;
}
if (auto* iff = item->dynCast<If>()) {
return iff;
}
return nullptr;
}
// Checks if an expression is very simple - something simple enough that we
// are willing to inline it in this optimization. This should basically take
// almost no cost at all to compute.
bool isSimple(Expression* curr) {
// For now, support local and global gets, and unary operations.
// TODO: Generalize? Use costs.h?
if (curr->type == Type::unreachable) {
return false;
}
if (curr->is<GlobalGet>() || curr->is<LocalGet>()) {
return true;
}
if (auto* unary = curr->dynCast<Unary>()) {
return isSimple(unary->value);
}
if (auto* is = curr->dynCast<RefIsNull>()) {
return isSimple(is->value);
}
return false;
}
// Returns a list of local.gets, one for each of the parameters to the
// function. This forwards the arguments passed to the inlineable function to
// the outlined one.
std::vector<Expression*> getForwardedArgs(Function* func, Builder& builder) {
std::vector<Expression*> args;
for (Index i = 0; i < func->getNumParams(); i++) {
args.push_back(builder.makeLocalGet(i, func->getLocalType(i)));
}
return args;
}
};
struct Inlining : public Pass {
// This pass changes locals and parameters.
// FIXME DWARF updating does not handle local changes yet.
bool invalidatesDWARF() override { return true; }
// whether to optimize where we inline
bool optimize = false;
// the information for each function. recomputed in each iteraction
NameInfoMap infos;
std::unique_ptr<FunctionSplitter> functionSplitter;
Module* module = nullptr;
void run(Module* module_) override {
module = module_;
// No point to do more iterations than the number of functions, as it means
// we are infinitely recursing (which should be very rare in practice, but
// it is possible that a recursive call can look like it is worth inlining).
Index iterationNumber = 0;
auto numOriginalFunctions = module->functions.size();
// Track in how many iterations a function was inlined into. We are willing
// to inline many times into a function within an iteration, as e.g. that
// helps the case of many calls of a small getter. However, if we only do
// more inlining in separate iterations then it is likely code that was the
// result of previous inlinings that is now being inlined into. That is, an
// old inlining added a call to somewhere, and now we are inlining into that
// call. This is typically recursion, which to some extent can help, but
// then like loop unrolling it loses its benefit quickly, so set a limit
// here.
//
// In addition to inlining into a function, we track how many times we do
// other potentially repetitive operations like splitting a function before
// inlining, as any such repetitive operation should be limited in how many
// times we perform it. (An exception is how many times we inlined a
// function, which we do not want to limit - it can be profitable to inline
// a call into a great many callsites, over many iterations.)
//
// (Track names here, and not Function pointers, as we can remove functions
// while inlining, and it may be confusing during debugging to have a
// pointer to something that was removed.)
std::unordered_map<Name, Index> iterationCounts;
const size_t MaxIterationsForFunc = 5;
while (iterationNumber <= numOriginalFunctions) {
#ifdef INLINING_DEBUG
std::cout << "inlining loop iter " << iterationNumber
<< " (numFunctions: " << module->functions.size() << ")\n";
#endif
iterationNumber++;
std::unordered_set<Function*> inlinedInto;
prepare();
iteration(inlinedInto);
if (inlinedInto.empty()) {
return;
}
#ifdef INLINING_DEBUG
std::cout << " inlined into " << inlinedInto.size() << " funcs.\n";
#endif
for (auto* func : inlinedInto) {
EHUtils::handleBlockNestedPops(func, *module);
}
for (auto* func : inlinedInto) {
if (++iterationCounts[func->name] >= MaxIterationsForFunc) {
return;
}
}
if (functionSplitter) {
auto splitNames = functionSplitter->finish();
for (auto name : splitNames) {
if (++iterationCounts[name] >= MaxIterationsForFunc) {
return;
}
}
}
}
}
void prepare() {
infos.clear();
// fill in info, as we operate on it in parallel (each function to its own
// entry)
for (auto& func : module->functions) {
infos[func->name];
}
{
FunctionInfoScanner scanner(infos);
scanner.run(getPassRunner(), module);
scanner.walkModuleCode(module);
}
for (auto& ex : module->exports) {
if (ex->kind == ExternalKind::Function) {
infos[ex->value].usedGlobally = true;
}
}
if (module->start.is()) {
infos[module->start].usedGlobally = true;
}
// When optimizing heavily for size, we may potentially split functions in
// order to inline parts of them, if partialInliningIfs is enabled.
auto& options = getPassOptions();
if (options.optimizeLevel >= 3 && !options.shrinkLevel &&
options.inlining.partialInliningIfs) {
functionSplitter = std::make_unique<FunctionSplitter>(module, options);
}
}
void iteration(std::unordered_set<Function*>& inlinedInto) {
// decide which to inline
InliningState state;
ModuleUtils::iterDefinedFunctions(*module, [&](Function* func) {
InliningMode inliningMode = getInliningMode(func->name);
assert(inliningMode != InliningMode::Unknown);
if (inliningMode != InliningMode::Uninlineable) {
state.inlinableFunctions[func->name] = inliningMode;
}
});
if (state.inlinableFunctions.empty()) {
return;
}
// Fill in actionsForFunction, as we operate on it in parallel (each
// function to its own entry). Also generate a vector of the function names
// so that in the later loop we can iterate on it deterministically and
// without iterator invalidation.
std::vector<Name> funcNames;
for (auto& func : module->functions) {
state.actionsForFunction[func->name];
funcNames.push_back(func->name);
}
// find and plan inlinings
Planner(&state).run(getPassRunner(), module);
// perform inlinings TODO: parallelize
std::unordered_map<Name, Index> inlinedUses; // how many uses we inlined
// which functions were inlined into
for (auto name : funcNames) {
auto* func = module->getFunction(name);
// if we've inlined a function, don't inline into it in this iteration,
// avoid risk of races
// note that we do not risk stalling progress, as each iteration() will
// inline at least one call before hitting this
if (inlinedUses.count(func->name)) {
continue;
}
for (auto& action : state.actionsForFunction[name]) {
auto* inlinedFunction = action.contents;
// if we've inlined into a function, don't inline it in this iteration,
// avoid risk of races
// note that we do not risk stalling progress, as each iteration() will
// inline at least one call before hitting this
if (inlinedInto.count(inlinedFunction)) {
continue;
}
Name inlinedName = inlinedFunction->name;
if (!isUnderSizeLimit(func->name, inlinedName)) {
continue;
}
// Success - we can inline.
#ifdef INLINING_DEBUG
std::cout << "inline " << inlinedName << " into " << func->name << '\n';
#endif
// Update the action for the actual inlining we are about to perform
// (when splitting, we will actually inline one of the split pieces and
// not the original function itself; note how even if we do that then
// we are still removing a call to the original function here, and so
// we do not need to change anything else lower down - we still want to
// note that we got rid of one use of the original function).
action.contents = getActuallyInlinedFunction(action.contents);
// Perform the inlining and update counts.
doInlining(module, func, action, getPassOptions(), inlinedNameHint++);
inlinedUses[inlinedName]++;
inlinedInto.insert(func);
assert(inlinedUses[inlinedName] <= infos[inlinedName].refs);
}
}
if (optimize && inlinedInto.size() > 0) {
OptUtils::optimizeAfterInlining(inlinedInto, module, getPassRunner());
}
// remove functions that we no longer need after inlining
module->removeFunctions([&](Function* func) {
auto name = func->name;
auto& info = infos[name];
return inlinedUses.count(name) && inlinedUses[name] == info.refs &&
!info.usedGlobally;
});
}
// See explanation in doInlining() for the parameter nameHint.
Index inlinedNameHint = 0;
// Decide for a given function whether to inline, and if so in what mode.
InliningMode getInliningMode(Name name) {
auto* func = module->getFunction(name);
auto& info = infos[name];
if (info.inliningMode != InliningMode::Unknown) {
return info.inliningMode;
}
// Check if the function itself is worth inlining as it is.
if (!func->noFullInline && info.worthFullInlining(getPassOptions())) {
return info.inliningMode = InliningMode::Full;
}
// Otherwise, check if we can at least inline part of it, if we are
// interested in such things.
if (!func->noPartialInline && functionSplitter) {
info.inliningMode = functionSplitter->getSplitDrivenInliningMode(
module->getFunction(name), info);
return info.inliningMode;
}
// Cannot be fully or partially inlined => uninlineable.
info.inliningMode = InliningMode::Uninlineable;
return info.inliningMode;
}
// Gets the actual function to be inlined. Normally this is the function
// itself, but if it is a function that we must first split (i.e., we only
// want to partially inline it) then it will be the inlineable part of the
// split.
//
// This is called right before actually performing the inlining, that is, we
// are guaranteed to inline after this.
Function* getActuallyInlinedFunction(Function* func) {
InliningMode inliningMode = infos[func->name].inliningMode;
// If we want to inline this function itself, do so.
if (inliningMode == InliningMode::Full) {
return func;
}
// Otherwise, this is a case where we want to inline part of it, after
// splitting.
assert(functionSplitter);
return functionSplitter->getInlineableSplitFunction(func, inliningMode);
}
// Checks if the combined size of the code after inlining is under the
// absolute size limit. We have an absolute limit in order to avoid
// extremely-large sizes after inlining, as they may hit limits in VMs and/or
// slow down startup (measurements there indicate something like ~1 second to
// optimize a 100K function). See e.g.
// https://github.com/WebAssembly/binaryen/pull/3730#issuecomment-867939138
// https://github.com/emscripten-core/emscripten/issues/13899#issuecomment-825073344
bool isUnderSizeLimit(Name target, Name source) {
// Estimate the combined binary size from the number of instructions.
auto combinedSize = infos[target].size + infos[source].size;
auto estimatedBinarySize = Measurer::BytesPerExpr * combinedSize;
// The limit is arbitrary, but based on the links above. It is a very high
// value that should appear very rarely in practice (for example, it does
// not occur on the Emscripten benchmark suite of real-world codebases).
const Index MaxCombinedBinarySize = 400 * 1024;
return estimatedBinarySize < MaxCombinedBinarySize;
}
};
} // anonymous namespace
//
// InlineMain
//
// Inline __original_main into main, if they exist. This works around the odd
// thing that clang/llvm currently do, where __original_main contains the user's
// actual main (this is done as a workaround for main having two different
// possible signatures).
//
static const char* MAIN = "main";
static const char* ORIGINAL_MAIN = "__original_main";
struct InlineMainPass : public Pass {
void run(Module* module) override {
auto* main = module->getFunctionOrNull(MAIN);
auto* originalMain = module->getFunctionOrNull(ORIGINAL_MAIN);
if (!main || main->imported() || !originalMain ||
originalMain->imported()) {
return;
}
FindAllPointers<Call> calls(main->body);
Expression** callSite = nullptr;
for (auto* call : calls.list) {
if ((*call)->cast<Call>()->target == ORIGINAL_MAIN) {
if (callSite) {
// More than one call site.
return;
}
callSite = call;
}
}
if (!callSite) {
// No call at all.
return;
}
doInlining(module,
main,
InliningAction(callSite, originalMain, true),
getPassOptions());
}
};
Pass* createInliningPass() { return new Inlining(); }
Pass* createInliningOptimizingPass() {
auto* ret = new Inlining();
ret->optimize = true;
return ret;
}
Pass* createInlineMainPass() { return new InlineMainPass(); }
} // namespace wasm