blob: 29ce1b92175bc76484048051c58822756fa562a4 [file] [log] [blame]
/*
* Copyright (C) 2025 Apple Inc. All rights reserved.
* Copyright (C) 2023 the V8 project authors. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "WasmInliningDecision.h"
#include "WasmMergedProfile.h"
#include "WasmModule.h"
#include "WasmModuleInformation.h"
#include <wtf/PriorityQueue.h>
#include <wtf/TZoneMallocInlines.h>
#if ENABLE(WEBASSEMBLY)
namespace JSC::Wasm {
namespace WasmInliningDecisionInternal {
static constexpr bool verbose = false;
}
WTF_MAKE_TZONE_ALLOCATED_IMPL(InliningNode);
WTF_MAKE_TZONE_ALLOCATED_IMPL(InliningDecision);
InliningNode::InliningNode(const IPIntCallee& callee, InliningNode* caller, uint8_t caseIndex, unsigned callProfileIndex, size_t wasmSize, double relativeCallCount)
: m_callee(callee)
, m_caller(caller)
, m_caseIndex(caseIndex)
, m_depth(caller ? caller->m_depth + 1 : 0)
, m_callProfileIndex(callProfileIndex)
, m_wasmSize(wasmSize)
, m_relativeCallCount(relativeCallCount)
{
}
double InliningNode::score() const
{
if (!m_wasmSize)
return 0.0;
return m_relativeCallCount / m_wasmSize;
}
// candidate given the initial graph size and the already inlined wire bytes.
bool InliningDecision::canInline(InliningNode* target, size_t initialWasmSize, size_t inlinedWasmSize)
{
size_t wasmSize = target->wasmSize();
if (wasmSize > Options::wasmInliningMaximumWasmCalleeSize())
return false;
// FIXME: There's no fundamental reason we can't inline these including imports.
if (m_module.moduleInformation().callCanClobberInstance(target->callee().index()))
return false;
// For tiny functions, let's be a bit more generous.
if (wasmSize < Options::wasmInliningTinyFunctionThreshold()) {
if (inlinedWasmSize > 100)
inlinedWasmSize -= 100;
else
inlinedWasmSize = 0;
}
// For small-ish functions, the inlining budget is defined by the larger of
// 1) the wasmInliningMinimumBudget and
// 2) the m_maxGrowthFactor * initialWasmSize.
// Inlining a little bit should always be fine even for tiny functions (1),
// otherwise (2) makes sure that the budget scales in relation with the
// original function size, to limit the compile time increase caused by
// inlining.
size_t budgetSmallFunction = std::max<size_t>(Options::wasmInliningMinimumBudget(), m_maxGrowthFactor * initialWasmSize);
// For large functions, growing by the same factor would add too much
// compilation effort, so we also apply a fixed cap. However, independent
// of the budget cap, for large functions we should still allow a little
// inlining, which is why we allow 10% of the graph size is the minimal
// budget even for large functions that exceed the regular budget.
//
// Note for future tuning: it might make sense to allow 20% here, and in
// turn perhaps lower --wasmInliningBudget. The drawback is that this
// would allow truly huge functions to grow even bigger; the benefit is
// that we wouldn't fall off as steep a cliff when hitting the cap.
size_t budgetLargeFunction = std::max<size_t>(m_budgetCap, initialWasmSize * 1.1);
size_t totalSize = initialWasmSize + inlinedWasmSize + wasmSize;
return totalSize < std::min<size_t>(budgetSmallFunction, budgetLargeFunction);
}
InliningNode* InliningNode::callTarget(FunctionSpaceIndex functionIndexSpace, unsigned callProfileIndex)
{
if (m_callSites.size() <= callProfileIndex)
return nullptr;
auto& callSite = m_callSites[callProfileIndex];
for (auto* inlining : callSite) {
if (inlining->callee().index() == functionIndexSpace) {
if (inlining->isInlined())
return inlining;
return nullptr;
}
}
return nullptr;
}
void InliningNode::inlineNode(InliningDecision& decision)
{
m_isInlined = true;
SUPPRESS_UNCOUNTED_ARG auto* profile = decision.profileForCallee(m_callee);
if (!profile->merged())
return;
m_isUnused = false;
m_callSites.grow(profile->size());
for (unsigned index = 0; index < m_callSites.size(); ++index) {
if (profile->isMegamorphic(index))
continue;
auto& callSite = m_callSites[index];
auto candidates = profile->candidates(index);
for (auto& [candidateCallee, callCount] : candidates.callees()) {
if (candidateCallee->compilationMode() != Wasm::CompilationMode::IPIntMode)
continue;
SUPPRESS_UNCOUNTED_LOCAL auto& target = uncheckedDowncast<const IPIntCallee>(*candidateCallee);
double relativeCallCount = 0;
if (profile->totalCount())
relativeCallCount = callCount / static_cast<double>(profile->totalCount());
size_t wasmSize = decision.m_module.moduleInformation().functionWasmSizeImportSpace(candidateCallee->index());
auto& child = decision.m_arena.alloc(target, this, callSite.size(), index, wasmSize, relativeCallCount);
callSite.append(&child);
}
}
}
static double budgetScaleFactor(const Module& module)
{
// If there are few small functions, that indicates that the toolchain
// already performed significant inlining, so we reduce the budget
// significantly as further inlining has diminishing benefits.
// For both major knobs, we apply a smoothened step function based on
// the module's percentage of small functions (sfp):
// sfp <= 25%: use "low" budget
// sfp >= 50%: use "high" budget
// 25% < sfp < 50%: interpolate linearly between both budgets.
double smallFunctionPercentage = module.moduleInformation().m_numSmallFunctions * 100.0 / module.moduleInformation().internalFunctionCount();
if (smallFunctionPercentage <= 25)
return 0;
if (smallFunctionPercentage >= 50)
return 1;
return (smallFunctionPercentage - 25) / 25;
}
InliningDecision::InliningDecision(Module& module, const IPIntCallee& rootCallee)
: m_module(module)
, m_root(m_arena.alloc(rootCallee, nullptr, 0, 0, module.moduleInformation().functionWasmSizeImportSpace(rootCallee.index()), 1.0))
{
double scaled = budgetScaleFactor(module);
int highGrowth = Options::wasmInliningFactor();
// A value of 1 would be equivalent to disabling inlining entirely.
constexpr int lowestUsefulValue = 2;
int lowGrowth = std::max(lowestUsefulValue, highGrowth - 3);
m_maxGrowthFactor = lowGrowth * (1 - scaled) + highGrowth * scaled;
double highCap = Options::wasmInliningBudget();
double lowCap = highCap / 10;
m_budgetCap = lowCap * (1 - scaled) + highCap * scaled;
}
MergedProfile* InliningDecision::profileForCallee(const IPIntCallee& callee)
{
SUPPRESS_UNCOUNTED_ARG return m_profiles.ensure(&callee, [&] {
return m_module.createMergedProfile(callee);
}).iterator->value.get();
}
static bool isHigherPriority(InliningNode* const& lhs, InliningNode* const& rhs)
{
// The ordering is, higher score, lower index, lower pointer.
return std::tuple { lhs->score(), rhs->callee().index(), rhs } > std::tuple { rhs->score(), lhs->callee().index(), lhs };
}
void InliningDecision::expand()
{
PriorityQueue<InliningNode*, isHigherPriority> queue;
auto addChildrenToQueue = [&](InliningNode* target) {
if (target->depth() >= Options::wasmInliningMaximumDepth()) {
dataLogLnIf(WasmInliningDecisionInternal::verbose, "max inlining depth reached]");
return;
}
unsigned actual = 0;
for (const auto& callSite : target->callSites()) {
for (auto* node : callSite) {
queue.enqueue(node);
++actual;
}
}
dataLogLnIf(WasmInliningDecisionInternal::verbose, "queueing ", actual, " callees in ", target->callSites().size(), " sites]");
};
uint32_t initialWasmSize = m_root.wasmSize();
uint32_t inlinedWasmSize = 0;
dataLogIf(WasmInliningDecisionInternal::verbose, "[function ", m_root.callee().index(), ": expanding topmost caller... ");
m_root.inlineNode(*this);
++m_inlinedCount;
addChildrenToQueue(&m_root);
while (!queue.isEmpty()) {
if (!Options::useOMGInlining()) {
dataLogLnIf(WasmInliningDecisionInternal::verbose, " [function ", m_root.callee().index(), ": inlining is disabled, stopping...]");
break;
}
if (m_inlinedCount >= Options::wasmInliningMaximumCount()) {
dataLogLnIf(WasmInliningDecisionInternal::verbose, " [function ", m_root.callee().index(), ": too many inlining candidates, stopping...]");
break;
}
auto* target = queue.dequeue();
dataLogIf(WasmInliningDecisionInternal::verbose, " [function ", m_root.callee().index(), ": in function ", target->caller()->callee().index(), ", considering call #", target->callProfileIndex(), ", case #", target->caseIndex(), ", to function ", target->callee().index(), " relativeCallCount:(", target->relativeCallCount(), "),size:(", target->wasmSize(), "),score:(", target->score(), ")... ");
if (target->wasmSize() >= Options::wasmInliningTinyFunctionThreshold()) {
if (target->score() < 0.0001) {
dataLogLnIf(WasmInliningDecisionInternal::verbose, "not called often enough]");
continue;
}
}
if (!canInline(target, initialWasmSize, inlinedWasmSize)) {
dataLogLnIf(WasmInliningDecisionInternal::verbose, "not enough inlining budget]");
continue;
}
dataLogIf(WasmInliningDecisionInternal::verbose, "decided to inline! ");
target->inlineNode(*this);
++m_inlinedCount;
addChildrenToQueue(target);
constexpr size_t oneLessCall = 6; // Guesstimated savings per call.
size_t addition = target->wasmSize();
if (addition >= oneLessCall)
inlinedWasmSize += (addition - oneLessCall);
}
}
} // namespace JSC::Wasm
#endif