| /* |
| * Copyright 2016 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // |
| // Inlining. |
| // |
| // Two versions are provided: inlining and inlining-optimizing. You |
| // probably want the optimizing version, which will optimize locations |
| // we inlined into, as inlining by itself creates a block to house the |
| // inlined code, some temp locals, etc., which can usually be removed |
| // by optimizations. Note that the two versions use the same heuristics, |
| // so we don't take into account the overhead if you don't optimize |
| // afterwards. The non-optimizing version is mainly useful for debugging, |
| // or if you intend to run a full set of optimizations anyhow on |
| // everything later. |
| // |
| |
| #include <atomic> |
| |
| #include "ir/branch-utils.h" |
| #include "ir/debuginfo.h" |
| #include "ir/drop.h" |
| #include "ir/eh-utils.h" |
| #include "ir/element-utils.h" |
| #include "ir/find_all.h" |
| #include "ir/literal-utils.h" |
| #include "ir/localize.h" |
| #include "ir/module-utils.h" |
| #include "ir/names.h" |
| #include "ir/type-updating.h" |
| #include "ir/utils.h" |
| #include "parsing.h" |
| #include "pass.h" |
| #include "passes/opt-utils.h" |
| #include "wasm-builder.h" |
| #include "wasm.h" |
| |
| namespace wasm { |
| |
| namespace { |
| |
| enum class InliningMode { |
| // We do not know yet if this function can be inlined, as that has |
| // not been computed yet. |
| Unknown, |
| // This function cannot be inlinined in any way. |
| Uninlineable, |
| // This function can be inlined fully, that is, normally: the entire function |
| // can be inlined. This is in contrast to split/partial inlining, see below. |
| Full, |
| // This function cannot be inlined normally, but we can use split inlining, |
| // using pattern "A" or "B" (see below). |
| SplitPatternA, |
| SplitPatternB |
| }; |
| |
| // Useful into on a function, helping us decide if we can inline it |
| struct FunctionInfo { |
| std::atomic<Index> refs; |
| Index size; |
| bool hasCalls; |
| bool hasLoops; |
| bool hasTryDelegate; |
| // Something is used globally if there is a reference to it in a table or |
| // export etc. |
| bool usedGlobally; |
| // We consider a function to be a trivial call if the body is just a call with |
| // trivial arguments, like this: |
| // |
| // (func $forward (param $x) (param $y) |
| // (call $target (local.get $x) (local.get $y)) |
| // ) |
| // |
| // Specifically the body must be a call, and the operands to the call must be |
| // of size 1 (generally, LocalGet or Const). |
| bool isTrivialCall; |
| InliningMode inliningMode; |
| |
| FunctionInfo() { clear(); } |
| |
| void clear() { |
| refs = 0; |
| size = 0; |
| hasCalls = false; |
| hasLoops = false; |
| hasTryDelegate = false; |
| usedGlobally = false; |
| isTrivialCall = false; |
| inliningMode = InliningMode::Unknown; |
| } |
| |
| // Provide an explicit = operator as the |refs| field lacks one by default. |
| FunctionInfo& operator=(const FunctionInfo& other) { |
| refs = other.refs.load(); |
| size = other.size; |
| hasCalls = other.hasCalls; |
| hasLoops = other.hasLoops; |
| hasTryDelegate = other.hasTryDelegate; |
| usedGlobally = other.usedGlobally; |
| isTrivialCall = other.isTrivialCall; |
| inliningMode = other.inliningMode; |
| return *this; |
| } |
| |
| // See pass.h for how defaults for these options were chosen. |
| bool worthFullInlining(PassOptions& options) { |
| // Until we have proper support for try-delegate, ignore such functions. |
| // FIXME https://github.com/WebAssembly/binaryen/issues/3634 |
| if (hasTryDelegate) { |
| return false; |
| } |
| // If it's small enough that we always want to inline such things, do so. |
| if (size <= options.inlining.alwaysInlineMaxSize) { |
| return true; |
| } |
| // If it has one use, then inlining it would likely reduce code size, at |
| // least for reasonable function sizes. |
| if (refs == 1 && !usedGlobally && |
| size <= options.inlining.oneCallerInlineMaxSize) { |
| return true; |
| } |
| // If it's so big that we have no flexible options that could allow it, |
| // do not inline. |
| if (size > options.inlining.flexibleInlineMaxSize) { |
| return false; |
| } |
| // More than one use, so we can't eliminate it after inlining, and inlining |
| // it will hurt code size. Stop if we are focused on size or not heavily |
| // focused on speed. |
| if (options.shrinkLevel > 0 || options.optimizeLevel < 3) { |
| return false; |
| } |
| if (hasCalls) { |
| // This has calls. If it is just a trivial call itself then inline, as we |
| // will save a call that way - basically we skip a trampoline in the |
| // middle - but if it is something more complex, leave it alone, as we may |
| // not help much (and with recursion we may end up with a wasteful |
| // increase in code size). |
| // |
| // Note that inlining trivial calls may increase code size, e.g. if they |
| // use a parameter more than once (forcing us after inlining to save that |
| // value to a local, etc.), but here we are optimizing for speed and not |
| // size, so we risk it. |
| return isTrivialCall; |
| } |
| // This doesn't have calls. Inline if loops do not prevent us (normally, a |
| // loop suggests a lot of work and so inlining is less useful). |
| return !hasLoops || options.inlining.allowFunctionsWithLoops; |
| } |
| }; |
| |
| static bool canHandleParams(Function* func) { |
| // We cannot inline a function if we cannot handle placing its params in a |
| // locals, as all params become locals. |
| for (auto param : func->getParams()) { |
| if (!TypeUpdating::canHandleAsLocal(param)) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| using NameInfoMap = std::unordered_map<Name, FunctionInfo>; |
| |
| struct FunctionInfoScanner |
| : public WalkerPass<PostWalker<FunctionInfoScanner>> { |
| bool isFunctionParallel() override { return true; } |
| |
| FunctionInfoScanner(NameInfoMap& infos) : infos(infos) {} |
| |
| std::unique_ptr<Pass> create() override { |
| return std::make_unique<FunctionInfoScanner>(infos); |
| } |
| |
| void visitLoop(Loop* curr) { |
| // having a loop |
| infos[getFunction()->name].hasLoops = true; |
| } |
| |
| void visitCall(Call* curr) { |
| // can't add a new element in parallel |
| assert(infos.count(curr->target) > 0); |
| infos[curr->target].refs++; |
| // having a call |
| infos[getFunction()->name].hasCalls = true; |
| } |
| |
| // N.B.: CallIndirect and CallRef are intentionally omitted here, as we only |
| // note direct calls. Direct calls can lead to infinite recursion |
| // which we need to avoid, while indirect ones may in theory be |
| // optimized to direct calls later, but we take that risk - which is |
| // worthwhile as if we do manage to turn an indirect call into something |
| // else then it can be a big speedup, so we do want to inline code that |
| // has such indirect calls. |
| |
| void visitTry(Try* curr) { |
| if (curr->isDelegate()) { |
| infos[getFunction()->name].hasTryDelegate = true; |
| } |
| } |
| |
| void visitRefFunc(RefFunc* curr) { |
| assert(infos.count(curr->func) > 0); |
| infos[curr->func].refs++; |
| } |
| |
| void visitFunction(Function* curr) { |
| auto& info = infos[curr->name]; |
| |
| if (!canHandleParams(curr)) { |
| info.inliningMode = InliningMode::Uninlineable; |
| } |
| |
| info.size = Measurer::measure(curr->body); |
| |
| if (auto* call = curr->body->dynCast<Call>()) { |
| if (info.size == call->operands.size() + 1) { |
| // This function body is a call with some trivial (size 1) operands like |
| // LocalGet or Const, so it is a trivial call. |
| info.isTrivialCall = true; |
| } |
| } |
| } |
| |
| private: |
| NameInfoMap& infos; |
| }; |
| |
| struct InliningAction { |
| Expression** callSite; |
| Function* contents; |
| bool insideATry; |
| |
| InliningAction(Expression** callSite, Function* contents, bool insideATry) |
| : callSite(callSite), contents(contents), insideATry(insideATry) {} |
| }; |
| |
| struct InliningState { |
| // Maps functions worth inlining to the mode with which we can inline them. |
| std::unordered_map<Name, InliningMode> inlinableFunctions; |
| // function name => actions that can be performed in it |
| std::unordered_map<Name, std::vector<InliningAction>> actionsForFunction; |
| }; |
| |
| struct Planner : public WalkerPass<TryDepthWalker<Planner>> { |
| bool isFunctionParallel() override { return true; } |
| |
| Planner(InliningState* state) : state(state) {} |
| |
| std::unique_ptr<Pass> create() override { |
| return std::make_unique<Planner>(state); |
| } |
| |
| void visitCall(Call* curr) { |
| // plan to inline if we know this is valid to inline, and if the call is |
| // actually performed - if it is dead code, it's pointless to inline. |
| // we also cannot inline ourselves. |
| bool isUnreachable; |
| if (curr->isReturn) { |
| // Tail calls are only actually unreachable if an argument is |
| isUnreachable = std::any_of( |
| curr->operands.begin(), curr->operands.end(), [](Expression* op) { |
| return op->type == Type::unreachable; |
| }); |
| } else { |
| isUnreachable = curr->type == Type::unreachable; |
| } |
| if (state->inlinableFunctions.count(curr->target) && !isUnreachable && |
| curr->target != getFunction()->name) { |
| // nest the call in a block. that way the location of the pointer to the |
| // call will not change even if we inline multiple times into the same |
| // function, otherwise call1(call2()) might be a problem |
| auto* block = Builder(*getModule()).makeBlock(curr); |
| replaceCurrent(block); |
| // can't add a new element in parallel |
| assert(state->actionsForFunction.count(getFunction()->name) > 0); |
| state->actionsForFunction[getFunction()->name].emplace_back( |
| &block->list[0], getModule()->getFunction(curr->target), tryDepth > 0); |
| } |
| } |
| |
| private: |
| InliningState* state; |
| }; |
| |
| struct Updater : public TryDepthWalker<Updater> { |
| Module* module; |
| std::map<Index, Index> localMapping; |
| Name returnName; |
| Type resultType; |
| bool isReturn; |
| Builder* builder; |
| PassOptions& options; |
| |
| struct ReturnCallInfo { |
| // The original `return_call` or `return_call_indirect` or `return_call_ref` |
| // with its operands replaced with `local.get`s. |
| Expression* call; |
| // The branch that is serving as the "return" part of the original |
| // `return_call`. |
| Break* branch; |
| }; |
| |
| // Collect information on return_calls in the inlined body. Each will be |
| // turned into branches out of the original inlined body followed by |
| // non-return version of the original `return_call`, followed by a branch out |
| // to the caller. The branch labels will be filled in at the end of the walk. |
| std::vector<ReturnCallInfo> returnCallInfos; |
| |
| Updater(PassOptions& options) : options(options) {} |
| |
| void visitReturn(Return* curr) { |
| replaceCurrent(builder->makeBreak(returnName, curr->value)); |
| } |
| |
| template<typename T> void handleReturnCall(T* curr, Signature sig) { |
| if (isReturn || !curr->isReturn) { |
| // If the inlined callsite was already a return_call, then we can keep |
| // return_calls in the inlined function rather than downgrading them. |
| // That is, if A->B and B->C and both those calls are return_calls |
| // then after inlining A->B we want to now have A->C be a |
| // return_call. |
| return; |
| } |
| |
| if (tryDepth == 0) { |
| // Return calls in inlined functions should only break out of |
| // the scope of the inlined code, not the entire function they |
| // are being inlined into. To achieve this, make the call a |
| // non-return call and add a break. This does not cause |
| // unbounded stack growth because inlining and return calling |
| // both avoid creating a new stack frame. |
| curr->isReturn = false; |
| curr->type = sig.results; |
| // There might still be unreachable children causing this to be |
| // unreachable. |
| curr->finalize(); |
| if (sig.results.isConcrete()) { |
| replaceCurrent(builder->makeBreak(returnName, curr)); |
| } else { |
| replaceCurrent(builder->blockify(curr, builder->makeBreak(returnName))); |
| } |
| } else { |
| // Set the children to locals as necessary, then add a branch out of the |
| // inlined body. The branch label will be set later when we create branch |
| // targets for the calls. |
| Block* childBlock = ChildLocalizer(curr, getFunction(), *module, options) |
| .getChildrenReplacement(); |
| Break* branch = builder->makeBreak(Name()); |
| childBlock->list.push_back(branch); |
| childBlock->type = Type::unreachable; |
| replaceCurrent(childBlock); |
| |
| curr->isReturn = false; |
| curr->type = sig.results; |
| returnCallInfos.push_back({curr, branch}); |
| } |
| } |
| |
| void visitCall(Call* curr) { |
| handleReturnCall(curr, module->getFunction(curr->target)->getSig()); |
| } |
| |
| void visitCallIndirect(CallIndirect* curr) { |
| handleReturnCall(curr, curr->heapType.getSignature()); |
| } |
| |
| void visitCallRef(CallRef* curr) { |
| Type targetType = curr->target->type; |
| if (!targetType.isSignature()) { |
| // We don't know what type the call should return, but it will also never |
| // be reached, so we don't need to do anything here. |
| return; |
| } |
| handleReturnCall(curr, targetType.getHeapType().getSignature()); |
| } |
| |
| void visitLocalGet(LocalGet* curr) { |
| curr->index = localMapping[curr->index]; |
| } |
| |
| void visitLocalSet(LocalSet* curr) { |
| curr->index = localMapping[curr->index]; |
| } |
| |
| void walk(Expression*& curr) { |
| PostWalker<Updater>::walk(curr); |
| if (returnCallInfos.empty()) { |
| return; |
| } |
| |
| Block* body = builder->blockify(curr); |
| curr = body; |
| auto blockNames = BranchUtils::BranchAccumulator::get(body); |
| |
| for (Index i = 0; i < returnCallInfos.size(); ++i) { |
| auto& info = returnCallInfos[i]; |
| |
| // Add a block containing the previous body and a branch up to the caller. |
| // Give the block a name that will allow this return_call's original |
| // callsite to branch out of it then execute the call before returning to |
| // the caller. |
| auto name = Names::getValidName( |
| "__return_call", [&](Name test) { return !blockNames.count(test); }, i); |
| blockNames.insert(name); |
| info.branch->name = name; |
| Block* oldBody = builder->makeBlock(body->list, body->type); |
| body->list.clear(); |
| |
| if (resultType.isConcrete()) { |
| body->list.push_back(builder->makeBlock( |
| name, {builder->makeBreak(returnName, oldBody)}, Type::none)); |
| } else { |
| oldBody->list.push_back(builder->makeBreak(returnName)); |
| oldBody->name = name; |
| oldBody->type = Type::none; |
| body->list.push_back(oldBody); |
| } |
| body->list.push_back(info.call); |
| body->finalize(resultType); |
| } |
| } |
| }; |
| |
| // Core inlining logic. Modifies the outside function (adding locals as |
| // needed), and returns the inlined code. |
| // |
| // An optional name hint can be provided, which will then be used in the name of |
| // the block we put the inlined code in. Using a unique name hint in each call |
| // of this function can reduce the risk of name overlaps (which cause fixup work |
| // in UniqueNameMapper::uniquify). |
| static Expression* doInlining(Module* module, |
| Function* into, |
| const InliningAction& action, |
| PassOptions& options, |
| Index nameHint = 0) { |
| Function* from = action.contents; |
| auto* call = (*action.callSite)->cast<Call>(); |
| |
| // Works for return_call, too |
| Type retType = module->getFunction(call->target)->getResults(); |
| |
| // Build the block that will contain the inlined contents. |
| Builder builder(*module); |
| auto* block = builder.makeBlock(); |
| auto name = std::string("__inlined_func$") + from->name.toString(); |
| if (nameHint) { |
| name += '$' + std::to_string(nameHint); |
| } |
| block->name = Name(name); |
| |
| // In the unlikely event that the function already has a branch target with |
| // this name, fix that up, as otherwise we can get unexpected capture of our |
| // branches, that is, we could end up with this: |
| // |
| // (block $X ;; a new block we add as the target of returns |
| // (from's contents |
| // (block $X ;; a block in from's contents with a colliding name |
| // (br $X ;; a new br we just added that replaces a return |
| // |
| // Here the br wants to go to the very outermost block, to represent a |
| // return from the inlined function's code, but it ends up captured by an |
| // internal block. We also need to be careful of the call's children: |
| // |
| // (block $X ;; a new block we add as the target of returns |
| // (local.set $param |
| // (call's first parameter |
| // (br $X) ;; nested br in call's first parameter |
| // ) |
| // ) |
| // |
| // (In this case we could use a second block and define the named block $X |
| // after the call's parameters, but that adds work for an extremely rare |
| // situation.) The latter case does not apply if the call is a |
| // return_call inside a try, because in that case the call's |
| // children do not appear inside the same block as the inlined body. |
| bool hoistCall = call->isReturn && action.insideATry; |
| if (BranchUtils::hasBranchTarget(from->body, block->name) || |
| (!hoistCall && BranchUtils::BranchSeeker::has(call, block->name))) { |
| auto fromNames = BranchUtils::getBranchTargets(from->body); |
| auto callNames = hoistCall ? BranchUtils::NameSet{} |
| : BranchUtils::BranchAccumulator::get(call); |
| block->name = Names::getValidName(block->name, [&](Name test) { |
| return !fromNames.count(test) && !callNames.count(test); |
| }); |
| } |
| |
| // Prepare to update the inlined code's locals and other things. |
| Updater updater(options); |
| updater.setFunction(into); |
| updater.module = module; |
| updater.resultType = from->getResults(); |
| updater.returnName = block->name; |
| updater.isReturn = call->isReturn; |
| updater.builder = &builder; |
| // Set up a locals mapping |
| for (Index i = 0; i < from->getNumLocals(); i++) { |
| updater.localMapping[i] = builder.addVar(into, from->getLocalType(i)); |
| } |
| |
| if (hoistCall) { |
| // Wrap the existing function body in a block we can branch out of before |
| // entering the inlined function body. This block must have a name that is |
| // different from any other block name above the branch. |
| auto intoNames = BranchUtils::BranchAccumulator::get(into->body); |
| auto bodyName = |
| Names::getValidName(Name("__original_body"), |
| [&](Name test) { return !intoNames.count(test); }); |
| if (retType.isConcrete()) { |
| into->body = builder.makeBlock( |
| bodyName, {builder.makeReturn(into->body)}, Type::none); |
| } else { |
| into->body = builder.makeBlock( |
| bodyName, {into->body, builder.makeReturn()}, Type::none); |
| } |
| |
| // Sequence the inlined function body after the original caller body. |
| into->body = builder.makeSequence(into->body, block, retType); |
| |
| // Replace the original callsite with an expression that assigns the |
| // operands into the params and branches out of the original body. |
| auto numParams = from->getParams().size(); |
| if (numParams) { |
| auto* branchBlock = builder.makeBlock(); |
| for (Index i = 0; i < numParams; i++) { |
| branchBlock->list.push_back( |
| builder.makeLocalSet(updater.localMapping[i], call->operands[i])); |
| } |
| branchBlock->list.push_back(builder.makeBreak(bodyName)); |
| branchBlock->finalize(Type::unreachable); |
| *action.callSite = branchBlock; |
| } else { |
| *action.callSite = builder.makeBreak(bodyName); |
| } |
| } else { |
| // Assign the operands into the params |
| for (Index i = 0; i < from->getParams().size(); i++) { |
| block->list.push_back( |
| builder.makeLocalSet(updater.localMapping[i], call->operands[i])); |
| } |
| // Zero out the vars (as we may be in a loop, and may depend on their |
| // zero-init value |
| for (Index i = 0; i < from->vars.size(); i++) { |
| auto type = from->vars[i]; |
| if (!LiteralUtils::canMakeZero(type)) { |
| // Non-zeroable locals do not need to be zeroed out. As they have no |
| // zero value they by definition should not be used before being written |
| // to, so any value we set here would not be observed anyhow. |
| continue; |
| } |
| block->list.push_back( |
| builder.makeLocalSet(updater.localMapping[from->getVarIndexBase() + i], |
| LiteralUtils::makeZero(type, *module))); |
| } |
| if (call->isReturn) { |
| assert(!action.insideATry); |
| if (retType.isConcrete()) { |
| *action.callSite = builder.makeReturn(block); |
| } else { |
| *action.callSite = builder.makeSequence(block, builder.makeReturn()); |
| } |
| } else { |
| *action.callSite = block; |
| } |
| } |
| |
| // Generate and update the inlined contents |
| auto* contents = ExpressionManipulator::copy(from->body, *module); |
| debuginfo::copyBetweenFunctions(from->body, contents, from, into); |
| updater.walk(contents); |
| block->list.push_back(contents); |
| block->type = retType; |
| |
| // The ReFinalize below will handle propagating unreachability if we need to |
| // do so, that is, if the call was reachable but now the inlined content we |
| // replaced it with was unreachable. The opposite case requires special |
| // handling: ReFinalize works under the assumption that code can become |
| // unreachable, but it does not go back from that state. But inlining can |
| // cause that: |
| // |
| // (call $A ;; an unreachable call |
| // (unreachable) |
| // ) |
| // => |
| // (block $__inlined_A_body (result i32) ;; reachable code after inlining |
| // (unreachable) |
| // ) |
| // |
| // That is, if the called function wraps the input parameter in a block with a |
| // declared type, then the block is not unreachable. And then we might error |
| // if the outside expects the code to be unreachable - perhaps it only |
| // validates that way. To fix this, if the call was unreachable then we make |
| // the inlined code unreachable as well. That also maximizes DCE |
| // opportunities by propagating unreachability as much as possible. |
| // |
| // (Note that we don't need to do this for a return_call, which is always |
| // unreachable anyhow.) |
| if (call->type == Type::unreachable && !call->isReturn) { |
| // Make the replacement code unreachable. Note that we can't just add an |
| // unreachable at the end, as the block might have breaks to it (returns are |
| // transformed into those). |
| Expression* old = block; |
| if (old->type.isConcrete()) { |
| old = builder.makeDrop(old); |
| } |
| *action.callSite = builder.makeSequence(old, builder.makeUnreachable()); |
| } |
| // Anything we inlined into may now have non-unique label names, fix it up. |
| // Note that we must do this before refinalization, as otherwise duplicate |
| // block labels can lead to errors (the IR must be valid before we |
| // refinalize). |
| wasm::UniqueNameMapper::uniquify(into->body); |
| // Inlining unreachable contents can make things in the function we inlined |
| // into unreachable. |
| ReFinalize().walkFunctionInModule(into, module); |
| // New locals we added may require fixups for nondefaultability. |
| // FIXME Is this not done automatically? |
| TypeUpdating::handleNonDefaultableLocals(into, *module); |
| return block; |
| } |
| |
| // |
| // Function splitting / partial inlining / inlining of conditions. |
| // |
| // A function may be too costly to inline, but it may be profitable to |
| // *partially* inline it. The specific cases optimized here are functions with a |
| // condition, |
| // |
| // function foo(x) { |
| // if (x) return; |
| // ..lots and lots of other code.. |
| // } |
| // |
| // If the other code after the if is large enough or costly enough, then we will |
| // not inline the entire function. But it is useful to inline the condition. |
| // Consider this caller: |
| // |
| // function caller(x) { |
| // foo(0); |
| // foo(x); |
| // } |
| // |
| // If we inline the condition, we end up with this: |
| // |
| // function caller(x) { |
| // if (0) foo(0); |
| // if (x) foo(x); |
| // } |
| // |
| // By inlining the condition out of foo() we gain two benefits: |
| // |
| // * In the first call here the condition is zero, which means we can |
| // statically optimize out the call entirely. |
| // * Even if we can't do that (as in the second call) if at runtime we see the |
| // condition is false then we avoid the call. That means we perform what is |
| // hopefully a cheap branch instead of a call and then a branch. |
| // |
| // The cost to doing this is an increase in code size, and so this is only done |
| // when optimizing heavily for speed. |
| // |
| // To implement partial inlining we split the function to be inlined. Starting |
| // with |
| // |
| // function foo(x) { |
| // if (x) return; |
| // ..lots and lots of other code.. |
| // } |
| // |
| // We create the "inlineable" part of it, and the code that is "outlined": |
| // |
| // function foo$inlineable(x) { |
| // if (x) return; |
| // foo$outlined(x); |
| // } |
| // function foo$outlined(x) { |
| // ..lots and lots of other code.. |
| // } |
| // |
| // (Note how if the second function were inlined into the first, we would end |
| // up where we started, with the original function.) After splitting the |
| // function in this way, we simply inline the inlineable part using the normal |
| // mechanism for that. That ends up replacing foo(x); with |
| // |
| // if (x) foo$outlined(x); |
| // |
| // which is what we wanted. |
| // |
| // To reduce the complexity of this feature, it is implemented almost entirely |
| // in its own class, FunctionSplitter. The main inlining logic just calls out to |
| // the splitter to check if a function is worth splitting, and to get the split |
| // part if so. |
| // |
| |
| struct FunctionSplitter { |
| Module* module; |
| PassOptions& options; |
| |
| FunctionSplitter(Module* module, PassOptions& options) |
| : module(module), options(options) {} |
| |
| // Check if an function could be split in order to at least inline part of it, |
| // in a worthwhile manner. |
| // |
| // Even if this returns a split inlining mode, we may not end up inlining the |
| // function, as the main inlining logic has a few other considerations to take |
| // into account (like limitations on which functions can be inlined into in |
| // each iteration, the number of iterations, etc.). Therefore this function |
| // may only find out if we *can* split, but not actually do any splitting. |
| // |
| // Note that to avoid wasteful work, this function may return "Full" inlining |
| // mode instead of a split inlining. That is, if it detects that a partial |
| // inlining will trigger a follow up full inline of the splitted function |
| // then it will instead return "InliningMode::Full" directly. In more detail, |
| // imagine we have |
| // |
| // foo(10); |
| // |
| // which calls |
| // |
| // func foo(x) { |
| // if (x) { |
| // CODE |
| // } |
| // } |
| // |
| // If we partially inline then the call becomes |
| // |
| // if (10) { |
| // outlined-CODE() |
| // } |
| // |
| // That is, we've only inlined the |if| out of foo(), and we call a function |
| // that contains the code in CODE. But if CODE is small enough to be inlined |
| // then a later iteration of the inliner will do just that. The result of this |
| // split inlining and then full inlining of the outlined code is exactly the |
| // same as if we normally inlined from the beginning, but it adds more work, |
| // so we'd like to avoid it, which we do by seeing when a split would be |
| // followed by inlining the remainder, and then we return Full here and just |
| // inline it all normally. |
| // |
| // Note that the above issue shows that we may in some cases inline more than |
| // the normal inlining limit. That is, if the inlining limit is 20, and CODE |
| // in the example above was 19, then foo()'s size was 21 (when we add the if |
| // and get of |x|). That leads to the situation where foo() is too big to |
| // normally inline, but the outlined CODE can then be inlined. And this shows |
| // that we end up inlining a function of size 21, even though it is larger |
| // than our inlining limit. We consider this acceptable because it only |
| // occurs on functions slightly larger than the inlining limit, and only if |
| // they have the simple forms that split inlining recognizes, that we think |
| // are useful to inline. Alternatively, if we wanted to avoid this, it would |
| // add complexity because we'd need to recognize the outlined function with |
| // CODE in it and *not* inline it even though it is small enough, after |
| // splitting, to be inlined. That is, splitting creates smaller functions, so |
| // it can lead to more inlining (but, again, that makes sense since we only |
| // split on very specific patterns we believe are worth handling in that |
| // manner). |
| InliningMode getSplitDrivenInliningMode(Function* func, FunctionInfo& info) { |
| const Index MaxIfs = options.inlining.partialInliningIfs; |
| assert(MaxIfs > 0); |
| |
| auto* body = func->body; |
| |
| // If the body is a block, and we have breaks to that block, then we cannot |
| // outline any code - we can't outline a break without the break's target. |
| if (auto* block = body->dynCast<Block>()) { |
| if (BranchUtils::BranchSeeker::has(block, block->name)) { |
| return InliningMode::Uninlineable; |
| } |
| } |
| |
| // All the patterns we look for right now start with an if at the very top |
| // of the function. |
| auto* iff = getIf(body); |
| if (!iff) { |
| return InliningMode::Uninlineable; |
| } |
| |
| // If the condition is not very simple, the benefits of this optimization |
| // are not obvious. |
| if (!isSimple(iff->condition)) { |
| return InliningMode::Uninlineable; |
| } |
| |
| // Pattern A: Check if the function begins with |
| // |
| // if (simple) return; |
| // |
| // TODO: support a return value |
| if (!iff->ifFalse && func->getResults() == Type::none && |
| iff->ifTrue->is<Return>()) { |
| // The body must be a block, because if it were not then the function |
| // would be easily inlineable (just an if with a simple condition and a |
| // return), and we would not even attempt to do splitting. |
| assert(body->is<Block>()); |
| |
| auto outlinedFunctionSize = info.size - Measurer::measure(iff); |
| // If outlined function will be worth normal inline, skip the intermediate |
| // state and inline fully now. Note that if full inlining is disabled we |
| // will not do this, and instead inline partially. |
| if (!func->noFullInline && |
| outlinedFunctionWorthInlining(info, outlinedFunctionSize)) { |
| return InliningMode::Full; |
| } |
| |
| return InliningMode::SplitPatternA; |
| } |
| |
| // Pattern B: Represents a function whose entire body looks like |
| // |
| // if (A_1) { |
| // ..heavy work.. |
| // } |
| // .. |
| // if (A_k) { |
| // ..heavy work.. |
| // } |
| // B; // an optional final value (which can be a return value) |
| // |
| // where there is a small number of such ifs with arguments A1..A_k, and |
| // A_1..A_k and B (if the final value B exists) are very simple. |
| // |
| // Also, each if body must either be unreachable, or it must have type none |
| // and have no returns. If it is unreachable, for example because it is a |
| // return, then we will just return a value in the inlineable function: |
| // |
| // if (A_i) { |
| // return outlined(..); |
| // } |
| // |
| // Or, if an if body has type none, then for now we assume that we do not |
| // need to return a value from there, which makes things simpler, and in |
| // that case we just do this, which continues onward in the function: |
| // |
| // if (A_i) { |
| // outlined(..); |
| // } |
| // |
| // TODO: handle a possible returned value in this case as well. |
| // |
| // Note that the if body type must be unreachable or none, as this is an if |
| // without an else. |
| |
| // Find the number of ifs. |
| Index numIfs = 0; |
| while (getIf(body, numIfs) && numIfs <= MaxIfs) { |
| numIfs++; |
| } |
| if (numIfs == 0 || numIfs > MaxIfs) { |
| return InliningMode::Uninlineable; |
| } |
| |
| // Look for a final item after the ifs. |
| auto* finalItem = getItem(body, numIfs); |
| |
| // The final item must be simple (or not exist, which is simple enough). |
| if (finalItem && !isSimple(finalItem)) { |
| return InliningMode::Uninlineable; |
| } |
| |
| // There must be no other items after the optional final one. |
| if (finalItem && getItem(body, numIfs + 1)) { |
| return InliningMode::Uninlineable; |
| } |
| // This has the general shape we seek. Check each if. |
| for (Index i = 0; i < numIfs; i++) { |
| auto* iff = getIf(body, i); |
| // The if must have a simple condition and no else arm. |
| if (!isSimple(iff->condition) || iff->ifFalse) { |
| return InliningMode::Uninlineable; |
| } |
| if (iff->ifTrue->type == Type::none) { |
| // This must have no returns. |
| if (!FindAll<Return>(iff->ifTrue).list.empty()) { |
| return InliningMode::Uninlineable; |
| } |
| } else { |
| // This is an if without an else, and so the type is either none or |
| // unreachable, and we ruled out none before. |
| assert(iff->ifTrue->type == Type::unreachable); |
| } |
| } |
| // Success, this matches the pattern. |
| |
| // If the outlined function will be worth inlining normally, skip the |
| // intermediate state and inline fully now. (As above, if full inlining is |
| // disabled, we only partially inline.) |
| if (numIfs == 1) { |
| auto outlinedFunctionSize = Measurer::measure(iff->ifTrue); |
| if (!func->noFullInline && |
| outlinedFunctionWorthInlining(info, outlinedFunctionSize)) { |
| return InliningMode::Full; |
| } |
| } |
| |
| return InliningMode::SplitPatternB; |
| } |
| |
| // Returns the function we should inline, after we split the function into two |
| // pieces as described above (that is, in the example above, this would return |
| // foo$inlineable). |
| // |
| // This is called when we are definitely inlining the function, and so it will |
| // perform the splitting (if that has not already been done before). |
| Function* getInlineableSplitFunction(Function* func, |
| InliningMode inliningMode) { |
| assert(inliningMode == InliningMode::SplitPatternA || |
| inliningMode == InliningMode::SplitPatternB); |
| auto& split = splits[func->name]; |
| |
| if (!split.inlineable) { |
| // We haven't performed the split, do it now. |
| split.inlineable = doSplit(func, inliningMode); |
| } |
| |
| return split.inlineable; |
| } |
| |
| // Clean up. When we are done we no longer need the inlineable functions on |
| // the module, as they have been inlined into all the places we wanted them |
| // for. |
| // |
| // Returns a list of the names of the functions we split. |
| std::vector<Name> finish() { |
| std::vector<Name> ret; |
| std::unordered_set<Name> inlineableNames; |
| for (auto& [func, split] : splits) { |
| auto* inlineable = split.inlineable; |
| if (inlineable) { |
| inlineableNames.insert(inlineable->name); |
| ret.push_back(func); |
| } |
| } |
| module->removeFunctions([&](Function* func) { |
| return inlineableNames.find(func->name) != inlineableNames.end(); |
| }); |
| return ret; |
| } |
| |
| private: |
| // Information about splitting a function. |
| struct Split { |
| // The inlineable function out of the two that we generate by splitting. |
| // That is, foo$inlineable from above. |
| Function* inlineable = nullptr; |
| |
| // The outlined function, that is, foo$outlined from above. |
| Function* outlined = nullptr; |
| }; |
| |
| // All the splitting we have already performed. |
| // |
| // Note that this maps from function names, and not Function*, as the main |
| // inlining code can remove functions as it goes, but we can rely on names |
| // staying constant. |
| std::unordered_map<Name, Split> splits; |
| |
| bool outlinedFunctionWorthInlining(FunctionInfo& origin, Index sizeEstimate) { |
| FunctionInfo info; |
| // Start with a copy of the origin's info, and apply the size estimate. |
| // This is not accurate, for example the origin function may have |
| // loop or calls even though this section may not have. |
| // This is a conservative estimate, that is, it will return true only when |
| // it should, but might return false when a more precise analysis would |
| // return true. And it is a practical estimation to avoid extra future work. |
| info = origin; |
| info.size = sizeEstimate; |
| return info.worthFullInlining(options); |
| } |
| |
| Function* doSplit(Function* func, InliningMode inliningMode) { |
| Builder builder(*module); |
| |
| if (inliningMode == InliningMode::SplitPatternA) { |
| // Note that "A" in the name here identifies this as being a split from |
| // pattern A. The second pattern B will have B in the name. |
| Function* inlineable = copyFunction(func, "inlineable-A"); |
| auto* outlined = copyFunction(func, "outlined-A"); |
| |
| // The inlineable function should only have the if, which will call the |
| // outlined function with a flipped condition. |
| auto* inlineableIf = getIf(inlineable->body); |
| inlineableIf->condition = |
| builder.makeUnary(EqZInt32, inlineableIf->condition); |
| inlineableIf->ifTrue = builder.makeCall( |
| outlined->name, getForwardedArgs(func, builder), Type::none); |
| inlineable->body = inlineableIf; |
| |
| // The outlined function no longer needs the initial if. |
| auto& outlinedList = outlined->body->cast<Block>()->list; |
| outlinedList.erase(outlinedList.begin()); |
| |
| return inlineable; |
| } |
| |
| assert(inliningMode == InliningMode::SplitPatternB); |
| |
| Function* inlineable = copyFunction(func, "inlineable-B"); |
| |
| const Index MaxIfs = options.inlining.partialInliningIfs; |
| assert(MaxIfs > 0); |
| |
| // The inlineable function should only have the ifs, which will call the |
| // outlined heavy work. |
| for (Index i = 0; i < MaxIfs; i++) { |
| // For each if, create an outlined function with the body of that if, |
| // and call that from the if. |
| auto* inlineableIf = getIf(inlineable->body, i); |
| if (!inlineableIf) { |
| break; |
| } |
| auto* outlined = copyFunction(func, "outlined-B"); |
| outlined->body = inlineableIf->ifTrue; |
| |
| // The outlined function either returns the same results as the original |
| // one, or nothing, depending on if a value is returned here. |
| auto valueReturned = |
| func->getResults() != Type::none && outlined->body->type != Type::none; |
| outlined->setResults(valueReturned ? func->getResults() : Type::none); |
| inlineableIf->ifTrue = builder.makeCall(outlined->name, |
| getForwardedArgs(func, builder), |
| outlined->getResults()); |
| if (valueReturned) { |
| inlineableIf->ifTrue = builder.makeReturn(inlineableIf->ifTrue); |
| } |
| } |
| |
| return inlineable; |
| } |
| |
| Function* copyFunction(Function* func, std::string prefix) { |
| // TODO: We copy quite a lot more than we need here, and throw stuff out. |
| // It is simple to just copy the entire thing to get the params and |
| // results and all that, but we could be more efficient. |
| prefix = "byn-split-" + prefix; |
| return ModuleUtils::copyFunction( |
| func, |
| *module, |
| Names::getValidFunctionName(*module, |
| prefix + '$' + func->name.toString())); |
| } |
| |
| // Get the i-th item in a sequence of initial items in an expression. That is, |
| // if the item is a block, it may have several such items, and otherwise there |
| // is a single item, that item itself. This basically provides a simpler |
| // interface than checking if something is a block or not when there is just |
| // one item. |
| // |
| // Returns nullptr if there is no such item. |
| static Expression* getItem(Expression* curr, Index i = 0) { |
| if (auto* block = curr->dynCast<Block>()) { |
| auto& list = block->list; |
| if (i < list.size()) { |
| return list[i]; |
| } |
| } |
| if (i == 0) { |
| return curr; |
| } |
| return nullptr; |
| } |
| |
| // Get the i-th if in a sequence of initial ifs in an expression. If no such |
| // if exists, returns nullptr. |
| static If* getIf(Expression* curr, Index i = 0) { |
| auto* item = getItem(curr, i); |
| if (!item) { |
| return nullptr; |
| } |
| if (auto* iff = item->dynCast<If>()) { |
| return iff; |
| } |
| return nullptr; |
| } |
| |
| // Checks if an expression is very simple - something simple enough that we |
| // are willing to inline it in this optimization. This should basically take |
| // almost no cost at all to compute. |
| bool isSimple(Expression* curr) { |
| // For now, support local and global gets, and unary operations. |
| // TODO: Generalize? Use costs.h? |
| if (curr->type == Type::unreachable) { |
| return false; |
| } |
| if (curr->is<GlobalGet>() || curr->is<LocalGet>()) { |
| return true; |
| } |
| if (auto* unary = curr->dynCast<Unary>()) { |
| return isSimple(unary->value); |
| } |
| if (auto* is = curr->dynCast<RefIsNull>()) { |
| return isSimple(is->value); |
| } |
| return false; |
| } |
| |
| // Returns a list of local.gets, one for each of the parameters to the |
| // function. This forwards the arguments passed to the inlineable function to |
| // the outlined one. |
| std::vector<Expression*> getForwardedArgs(Function* func, Builder& builder) { |
| std::vector<Expression*> args; |
| for (Index i = 0; i < func->getNumParams(); i++) { |
| args.push_back(builder.makeLocalGet(i, func->getLocalType(i))); |
| } |
| return args; |
| } |
| }; |
| |
| struct Inlining : public Pass { |
| // This pass changes locals and parameters. |
| // FIXME DWARF updating does not handle local changes yet. |
| bool invalidatesDWARF() override { return true; } |
| |
| // whether to optimize where we inline |
| bool optimize = false; |
| |
| // the information for each function. recomputed in each iteraction |
| NameInfoMap infos; |
| |
| std::unique_ptr<FunctionSplitter> functionSplitter; |
| |
| Module* module = nullptr; |
| |
| void run(Module* module_) override { |
| module = module_; |
| |
| // No point to do more iterations than the number of functions, as it means |
| // we are infinitely recursing (which should be very rare in practice, but |
| // it is possible that a recursive call can look like it is worth inlining). |
| Index iterationNumber = 0; |
| |
| auto numOriginalFunctions = module->functions.size(); |
| |
| // Track in how many iterations a function was inlined into. We are willing |
| // to inline many times into a function within an iteration, as e.g. that |
| // helps the case of many calls of a small getter. However, if we only do |
| // more inlining in separate iterations then it is likely code that was the |
| // result of previous inlinings that is now being inlined into. That is, an |
| // old inlining added a call to somewhere, and now we are inlining into that |
| // call. This is typically recursion, which to some extent can help, but |
| // then like loop unrolling it loses its benefit quickly, so set a limit |
| // here. |
| // |
| // In addition to inlining into a function, we track how many times we do |
| // other potentially repetitive operations like splitting a function before |
| // inlining, as any such repetitive operation should be limited in how many |
| // times we perform it. (An exception is how many times we inlined a |
| // function, which we do not want to limit - it can be profitable to inline |
| // a call into a great many callsites, over many iterations.) |
| // |
| // (Track names here, and not Function pointers, as we can remove functions |
| // while inlining, and it may be confusing during debugging to have a |
| // pointer to something that was removed.) |
| std::unordered_map<Name, Index> iterationCounts; |
| |
| const size_t MaxIterationsForFunc = 5; |
| |
| while (iterationNumber <= numOriginalFunctions) { |
| #ifdef INLINING_DEBUG |
| std::cout << "inlining loop iter " << iterationNumber |
| << " (numFunctions: " << module->functions.size() << ")\n"; |
| #endif |
| iterationNumber++; |
| |
| std::unordered_set<Function*> inlinedInto; |
| |
| prepare(); |
| iteration(inlinedInto); |
| |
| if (inlinedInto.empty()) { |
| return; |
| } |
| |
| #ifdef INLINING_DEBUG |
| std::cout << " inlined into " << inlinedInto.size() << " funcs.\n"; |
| #endif |
| |
| for (auto* func : inlinedInto) { |
| EHUtils::handleBlockNestedPops(func, *module); |
| } |
| |
| for (auto* func : inlinedInto) { |
| if (++iterationCounts[func->name] >= MaxIterationsForFunc) { |
| return; |
| } |
| } |
| |
| if (functionSplitter) { |
| auto splitNames = functionSplitter->finish(); |
| for (auto name : splitNames) { |
| if (++iterationCounts[name] >= MaxIterationsForFunc) { |
| return; |
| } |
| } |
| } |
| } |
| } |
| |
| void prepare() { |
| infos.clear(); |
| // fill in info, as we operate on it in parallel (each function to its own |
| // entry) |
| for (auto& func : module->functions) { |
| infos[func->name]; |
| } |
| { |
| FunctionInfoScanner scanner(infos); |
| scanner.run(getPassRunner(), module); |
| scanner.walkModuleCode(module); |
| } |
| for (auto& ex : module->exports) { |
| if (ex->kind == ExternalKind::Function) { |
| infos[ex->value].usedGlobally = true; |
| } |
| } |
| if (module->start.is()) { |
| infos[module->start].usedGlobally = true; |
| } |
| |
| // When optimizing heavily for size, we may potentially split functions in |
| // order to inline parts of them, if partialInliningIfs is enabled. |
| auto& options = getPassOptions(); |
| if (options.optimizeLevel >= 3 && !options.shrinkLevel && |
| options.inlining.partialInliningIfs) { |
| functionSplitter = std::make_unique<FunctionSplitter>(module, options); |
| } |
| } |
| |
| void iteration(std::unordered_set<Function*>& inlinedInto) { |
| // decide which to inline |
| InliningState state; |
| ModuleUtils::iterDefinedFunctions(*module, [&](Function* func) { |
| InliningMode inliningMode = getInliningMode(func->name); |
| assert(inliningMode != InliningMode::Unknown); |
| if (inliningMode != InliningMode::Uninlineable) { |
| state.inlinableFunctions[func->name] = inliningMode; |
| } |
| }); |
| if (state.inlinableFunctions.empty()) { |
| return; |
| } |
| // Fill in actionsForFunction, as we operate on it in parallel (each |
| // function to its own entry). Also generate a vector of the function names |
| // so that in the later loop we can iterate on it deterministically and |
| // without iterator invalidation. |
| std::vector<Name> funcNames; |
| for (auto& func : module->functions) { |
| state.actionsForFunction[func->name]; |
| funcNames.push_back(func->name); |
| } |
| // find and plan inlinings |
| Planner(&state).run(getPassRunner(), module); |
| // perform inlinings TODO: parallelize |
| std::unordered_map<Name, Index> inlinedUses; // how many uses we inlined |
| // which functions were inlined into |
| for (auto name : funcNames) { |
| auto* func = module->getFunction(name); |
| // if we've inlined a function, don't inline into it in this iteration, |
| // avoid risk of races |
| // note that we do not risk stalling progress, as each iteration() will |
| // inline at least one call before hitting this |
| if (inlinedUses.count(func->name)) { |
| continue; |
| } |
| for (auto& action : state.actionsForFunction[name]) { |
| auto* inlinedFunction = action.contents; |
| // if we've inlined into a function, don't inline it in this iteration, |
| // avoid risk of races |
| // note that we do not risk stalling progress, as each iteration() will |
| // inline at least one call before hitting this |
| if (inlinedInto.count(inlinedFunction)) { |
| continue; |
| } |
| Name inlinedName = inlinedFunction->name; |
| if (!isUnderSizeLimit(func->name, inlinedName)) { |
| continue; |
| } |
| |
| // Success - we can inline. |
| #ifdef INLINING_DEBUG |
| std::cout << "inline " << inlinedName << " into " << func->name << '\n'; |
| #endif |
| |
| // Update the action for the actual inlining we are about to perform |
| // (when splitting, we will actually inline one of the split pieces and |
| // not the original function itself; note how even if we do that then |
| // we are still removing a call to the original function here, and so |
| // we do not need to change anything else lower down - we still want to |
| // note that we got rid of one use of the original function). |
| action.contents = getActuallyInlinedFunction(action.contents); |
| |
| // Perform the inlining and update counts. |
| doInlining(module, func, action, getPassOptions(), inlinedNameHint++); |
| inlinedUses[inlinedName]++; |
| inlinedInto.insert(func); |
| assert(inlinedUses[inlinedName] <= infos[inlinedName].refs); |
| } |
| } |
| if (optimize && inlinedInto.size() > 0) { |
| OptUtils::optimizeAfterInlining(inlinedInto, module, getPassRunner()); |
| } |
| // remove functions that we no longer need after inlining |
| module->removeFunctions([&](Function* func) { |
| auto name = func->name; |
| auto& info = infos[name]; |
| return inlinedUses.count(name) && inlinedUses[name] == info.refs && |
| !info.usedGlobally; |
| }); |
| } |
| |
| // See explanation in doInlining() for the parameter nameHint. |
| Index inlinedNameHint = 0; |
| |
| // Decide for a given function whether to inline, and if so in what mode. |
| InliningMode getInliningMode(Name name) { |
| auto* func = module->getFunction(name); |
| auto& info = infos[name]; |
| |
| if (info.inliningMode != InliningMode::Unknown) { |
| return info.inliningMode; |
| } |
| |
| // Check if the function itself is worth inlining as it is. |
| if (!func->noFullInline && info.worthFullInlining(getPassOptions())) { |
| return info.inliningMode = InliningMode::Full; |
| } |
| |
| // Otherwise, check if we can at least inline part of it, if we are |
| // interested in such things. |
| if (!func->noPartialInline && functionSplitter) { |
| info.inliningMode = functionSplitter->getSplitDrivenInliningMode( |
| module->getFunction(name), info); |
| return info.inliningMode; |
| } |
| |
| // Cannot be fully or partially inlined => uninlineable. |
| info.inliningMode = InliningMode::Uninlineable; |
| return info.inliningMode; |
| } |
| |
| // Gets the actual function to be inlined. Normally this is the function |
| // itself, but if it is a function that we must first split (i.e., we only |
| // want to partially inline it) then it will be the inlineable part of the |
| // split. |
| // |
| // This is called right before actually performing the inlining, that is, we |
| // are guaranteed to inline after this. |
| Function* getActuallyInlinedFunction(Function* func) { |
| InliningMode inliningMode = infos[func->name].inliningMode; |
| // If we want to inline this function itself, do so. |
| if (inliningMode == InliningMode::Full) { |
| return func; |
| } |
| |
| // Otherwise, this is a case where we want to inline part of it, after |
| // splitting. |
| assert(functionSplitter); |
| return functionSplitter->getInlineableSplitFunction(func, inliningMode); |
| } |
| |
| // Checks if the combined size of the code after inlining is under the |
| // absolute size limit. We have an absolute limit in order to avoid |
| // extremely-large sizes after inlining, as they may hit limits in VMs and/or |
| // slow down startup (measurements there indicate something like ~1 second to |
| // optimize a 100K function). See e.g. |
| // https://github.com/WebAssembly/binaryen/pull/3730#issuecomment-867939138 |
| // https://github.com/emscripten-core/emscripten/issues/13899#issuecomment-825073344 |
| bool isUnderSizeLimit(Name target, Name source) { |
| // Estimate the combined binary size from the number of instructions. |
| auto combinedSize = infos[target].size + infos[source].size; |
| auto estimatedBinarySize = Measurer::BytesPerExpr * combinedSize; |
| // The limit is arbitrary, but based on the links above. It is a very high |
| // value that should appear very rarely in practice (for example, it does |
| // not occur on the Emscripten benchmark suite of real-world codebases). |
| const Index MaxCombinedBinarySize = 400 * 1024; |
| return estimatedBinarySize < MaxCombinedBinarySize; |
| } |
| }; |
| |
| } // anonymous namespace |
| |
| // |
| // InlineMain |
| // |
| // Inline __original_main into main, if they exist. This works around the odd |
| // thing that clang/llvm currently do, where __original_main contains the user's |
| // actual main (this is done as a workaround for main having two different |
| // possible signatures). |
| // |
| |
| static const char* MAIN = "main"; |
| static const char* ORIGINAL_MAIN = "__original_main"; |
| |
| struct InlineMainPass : public Pass { |
| void run(Module* module) override { |
| auto* main = module->getFunctionOrNull(MAIN); |
| auto* originalMain = module->getFunctionOrNull(ORIGINAL_MAIN); |
| if (!main || main->imported() || !originalMain || |
| originalMain->imported()) { |
| return; |
| } |
| FindAllPointers<Call> calls(main->body); |
| Expression** callSite = nullptr; |
| for (auto* call : calls.list) { |
| if ((*call)->cast<Call>()->target == ORIGINAL_MAIN) { |
| if (callSite) { |
| // More than one call site. |
| return; |
| } |
| callSite = call; |
| } |
| } |
| if (!callSite) { |
| // No call at all. |
| return; |
| } |
| doInlining(module, |
| main, |
| InliningAction(callSite, originalMain, true), |
| getPassOptions()); |
| } |
| }; |
| |
| Pass* createInliningPass() { return new Inlining(); } |
| |
| Pass* createInliningOptimizingPass() { |
| auto* ret = new Inlining(); |
| ret->optimize = true; |
| return ret; |
| } |
| |
| Pass* createInlineMainPass() { return new InlineMainPass(); } |
| |
| } // namespace wasm |