blob: 709fd7d3d85e33263f9b3db956e016a2356c7e9d [file] [log] [blame] [edit]
/*
* Copyright 2016 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Computes code at compile time where possible, replacing it with the
// computed constant.
//
// The "propagate" variant of this pass also propagates constants across
// sets and gets, which implements a standard constant propagation.
//
// Possible nondeterminism: WebAssembly NaN signs are nondeterministic,
// and this pass may optimize e.g. a float 0 / 0 into +nan while a VM may
// emit -nan, which can be a noticeable difference if the bits are
// looked at.
//
#include "ir/iteration.h"
#include "ir/literal-utils.h"
#include "ir/local-graph.h"
#include "ir/manipulation.h"
#include "ir/properties.h"
#include "ir/utils.h"
#include "pass.h"
#include "support/insert_ordered.h"
#include "support/small_vector.h"
#include "wasm-builder.h"
#include "wasm-interpreter.h"
#include "wasm.h"
namespace wasm {
// A map of gets to their constant values. If a get does not have a constant
// value then it does not appear in the map (that avoids allocating for the
// majority of gets).
using GetValues = std::unordered_map<LocalGet*, Literals>;
// A map of values on the heap. This maps the expressions that create the
// heap data (struct.new, array.new, etc.) to the data they are created with.
// Each such expression gets its own GCData created for it. This allows
// computing identity between locals referring to the same GCData, by seeing
// if they point to the same thing.
//
// Note that a source expression may create different data each time it is
// reached in a loop,
//
// (loop
// (if ..
// (local.set $x
// (struct.new ..
// )
// )
// ..compare $x to something..
// )
//
// Just like in SSA form, this is not a problem because the loop entry must
// have a merge, if a value entering the loop might be noticed. In SSA form
// that means a phi is created, and identity is set there. In our
// representation, the merge will cause a local.get of $x to have more
// possible input values than that struct.new, which means we will not infer
// a value for it, and not attempt to say anything about comparisons of $x.
using HeapValues = std::unordered_map<Expression*, std::shared_ptr<GCData>>;
// Precomputes an expression. Errors if we hit anything that can't be
// precomputed. Inherits most of its functionality from
// ConstantExpressionRunner, which it shares with the C-API, but adds handling
// of GetValues computed during the precompute pass.
class PrecomputingExpressionRunner
: public ConstantExpressionRunner<PrecomputingExpressionRunner> {
using Super = ConstantExpressionRunner<PrecomputingExpressionRunner>;
// Concrete values of gets computed during the pass, which the runner does not
// know about since it only records values of sets it visits.
const GetValues& getValues;
HeapValues& heapValues;
// Limit evaluation depth for 2 reasons: first, it is highly unlikely
// that we can do anything useful to precompute a hugely nested expression
// (we should succed at smaller parts of it first). Second, a low limit is
// helpful to avoid platform differences in native stack sizes.
static const Index MAX_DEPTH = 50;
// Limit loop iterations since loops might be infinite. Since we are going to
// replace the expression and must preserve side effects, we limit this to the
// very first iteration because a side effect would be necessary to achieve
// more than one iteration before becoming concrete.
static const Index MAX_LOOP_ITERATIONS = 1;
public:
PrecomputingExpressionRunner(Module* module,
const GetValues& getValues,
HeapValues& heapValues,
bool replaceExpression)
: ConstantExpressionRunner<PrecomputingExpressionRunner>(
module,
replaceExpression ? FlagValues::PRESERVE_SIDEEFFECTS
: FlagValues::DEFAULT,
MAX_DEPTH,
MAX_LOOP_ITERATIONS),
getValues(getValues), heapValues(heapValues) {}
Flow visitLocalGet(LocalGet* curr) {
auto iter = getValues.find(curr);
if (iter != getValues.end()) {
auto values = iter->second;
assert(values.isConcrete());
return Flow(values);
}
return ConstantExpressionRunner<
PrecomputingExpressionRunner>::visitLocalGet(curr);
}
// TODO: Use immutability for values
Flow visitStructNew(StructNew* curr) {
auto flow = Super::visitStructNew(curr);
if (flow.breaking()) {
return flow;
}
return getHeapCreationFlow(flow, curr);
}
Flow visitStructSet(StructSet* curr) { return Flow(NONCONSTANT_FLOW); }
Flow visitStructGet(StructGet* curr) {
if (curr->ref->type != Type::unreachable && !curr->ref->type.isNull()) {
// If this field is immutable then we may be able to precompute this, as
// if we also created the data in this function (or it was created in an
// immutable global) then we know the value in the field. If it is
// immutable, call the super method which will do the rest here. That
// includes checking for the data being properly created, as if it was
// not then we will not have a constant value for it, which means the
// local.get of that value will stop us.
auto& field =
curr->ref->type.getHeapType().getStruct().fields[curr->index];
if (field.mutable_ == Immutable) {
return Super::visitStructGet(curr);
}
}
// Otherwise, we've failed to precompute.
return Flow(NONCONSTANT_FLOW);
}
Flow visitArrayNew(ArrayNew* curr) {
auto flow = Super::visitArrayNew(curr);
if (flow.breaking()) {
return flow;
}
return getHeapCreationFlow(flow, curr);
}
Flow visitArrayNewFixed(ArrayNewFixed* curr) {
auto flow = Super::visitArrayNewFixed(curr);
if (flow.breaking()) {
return flow;
}
return getHeapCreationFlow(flow, curr);
}
Flow visitArraySet(ArraySet* curr) { return Flow(NONCONSTANT_FLOW); }
Flow visitArrayGet(ArrayGet* curr) {
if (curr->ref->type != Type::unreachable && !curr->ref->type.isNull()) {
// See above with struct.get
auto element = curr->ref->type.getHeapType().getArray().element;
if (element.mutable_ == Immutable) {
return Super::visitArrayGet(curr);
}
}
// Otherwise, we've failed to precompute.
return Flow(NONCONSTANT_FLOW);
}
// ArrayLen is not disallowed here as it is an immutable property.
Flow visitArrayCopy(ArrayCopy* curr) { return Flow(NONCONSTANT_FLOW); }
// Generates heap info for a heap-allocating expression.
template<typename T> Flow getHeapCreationFlow(Flow flow, T* curr) {
// We must return a literal that refers to the canonical location for this
// source expression, so that each time we compute a specific struct.new
// we get the same identity.
std::shared_ptr<GCData>& canonical = heapValues[curr];
std::shared_ptr<GCData> newGCData = flow.getSingleValue().getGCData();
if (!canonical) {
canonical = std::make_shared<GCData>(*newGCData);
} else {
*canonical = *newGCData;
}
return Literal(canonical, curr->type.getHeapType());
}
Flow visitStringNew(StringNew* curr) {
if (curr->op != StringNewWTF16Array) {
// TODO: handle other string ops. For now we focus on JS-like strings.
return Flow(NONCONSTANT_FLOW);
}
// string.encode_wtf16_array is effectively an Array read operation, so
// just like ArrayGet above we must check for immutability.
auto refType = curr->ref->type;
if (refType.isRef()) {
auto heapType = refType.getHeapType();
if (heapType.isArray()) {
if (heapType.getArray().element.mutable_ == Immutable) {
return Super::visitStringNew(curr);
}
}
}
// Otherwise, this is mutable or unreachable or otherwise uninteresting.
return Flow(NONCONSTANT_FLOW);
}
Flow visitStringEncode(StringEncode* curr) {
// string.encode_wtf16_array is effectively an Array write operation, so
// just like ArraySet and ArrayCopy above we must mark it as disallowed
// (due to side effects). (And we do not support other operations than
// string.encode_wtf16_array anyhow.)
return Flow(NONCONSTANT_FLOW);
}
};
struct Precompute
: public WalkerPass<
PostWalker<Precompute, UnifiedExpressionVisitor<Precompute>>> {
bool isFunctionParallel() override { return true; }
std::unique_ptr<Pass> create() override {
return std::make_unique<Precompute>(propagate);
}
bool propagate = false;
Precompute(bool propagate) : propagate(propagate) {}
GetValues getValues;
HeapValues heapValues;
bool canPartiallyPrecompute;
void doWalkFunction(Function* func) {
// Perform partial precomputing only when the optimization level is non-
// trivial, as it is slower and less likely to help.
canPartiallyPrecompute = getPassOptions().optimizeLevel >= 2;
// Walk the function and precompute things.
Super::doWalkFunction(func);
partiallyPrecompute(func);
if (!propagate) {
return;
}
// When propagating, we can utilize the graph of local operations to
// precompute the values from a local.set to a local.get. This populates
// getValues which is then used by a subsequent walk that applies those
// values.
bool propagated = propagateLocals(func);
if (propagated) {
// We found constants to propagate and entered them in getValues. Do
// another walk to apply them and perhaps other optimizations that are
// unlocked.
Super::doWalkFunction(func);
// We could also try to partially precompute again, but that is a somewhat
// heavy operation, so we only do it the first time, and leave such things
// for later runs of this pass and for --converge.
}
// Note that in principle even more cycles could find further work here, in
// very rare cases. To avoid constructing a LocalGraph again just for that
// unlikely chance, we leave such things for later.
}
template<typename T> void reuseConstantNode(T* curr, Flow flow) {
if (flow.values.isConcrete()) {
// reuse a const / ref.null / ref.func node if there is one
if (curr->value && flow.values.size() == 1) {
Literal singleValue = flow.getSingleValue();
if (singleValue.type.isNumber()) {
if (auto* c = curr->value->template dynCast<Const>()) {
c->value = singleValue;
c->finalize();
curr->finalize();
return;
}
} else if (singleValue.isNull()) {
if (auto* n = curr->value->template dynCast<RefNull>()) {
n->finalize(singleValue.type);
curr->finalize();
return;
}
} else if (singleValue.type.isRef() &&
singleValue.type.getHeapType().isSignature()) {
if (auto* r = curr->value->template dynCast<RefFunc>()) {
r->func = singleValue.getFunc();
r->finalize();
curr->finalize();
return;
}
}
}
curr->value = flow.getConstExpression(*getModule());
} else {
curr->value = nullptr;
}
curr->finalize();
}
void visitExpression(Expression* curr) {
// TODO: if local.get, only replace with a constant if we don't care about
// size...?
if (Properties::isConstantExpression(curr) || curr->is<Nop>()) {
return;
}
// try to evaluate this into a const
Flow flow = precomputeExpression(curr);
if (!canEmitConstantFor(flow.values)) {
return;
}
if (flow.breaking()) {
if (flow.breakTo == NONCONSTANT_FLOW) {
// This cannot be turned into a constant, but perhaps we can partially
// precompute it.
considerPartiallyPrecomputing(curr);
return;
}
if (flow.breakTo == RETURN_FLOW) {
// this expression causes a return. if it's already a return, reuse the
// node
if (auto* ret = curr->dynCast<Return>()) {
reuseConstantNode(ret, flow);
} else {
Builder builder(*getModule());
replaceCurrent(builder.makeReturn(
flow.values.isConcrete() ? flow.getConstExpression(*getModule())
: nullptr));
}
return;
}
// this expression causes a break, emit it directly. if it's already a br,
// reuse the node.
if (auto* br = curr->dynCast<Break>()) {
br->name = flow.breakTo;
br->condition = nullptr;
reuseConstantNode(br, flow);
} else {
Builder builder(*getModule());
replaceCurrent(builder.makeBreak(
flow.breakTo,
flow.values.isConcrete() ? flow.getConstExpression(*getModule())
: nullptr));
}
return;
}
// this was precomputed
if (flow.values.isConcrete()) {
replaceCurrent(flow.getConstExpression(*getModule()));
} else {
ExpressionManipulator::nop(curr);
}
}
void visitBlock(Block* curr) {
// When block precomputation fails, it can lead to quadratic slowness due to
// the "tower of blocks" pattern used to implement switches:
//
// (block
// (block
// ...
// (block
// (br_table ..
//
// If we try to precompute each block here, and fail on each, then we end up
// doing quadratic work. This is also wasted work as once a nested block
// fails to precompute there is not really a chance to succeed on the
// parent. If we do *not* fail to precompute, however, then we do want to
// precompute such nested blocks, e.g.:
//
// (block $out
// (block
// (br $out)
// )
// )
//
// Here we *can* precompute the inner block, so when we get to the outer one
// we see this:
//
// (block $out
// (br $out)
// )
//
// And that precomputes to nothing. Therefore when we see a child of the
// block that is another block (it failed to precompute to something
// simpler) then we leave early here.
//
// Note that in theory we could still precompute here if wasm had
// instructions that allow such things, e.g.:
//
// (block $out
// (block
// (cause side effect1)
// (cause side effect2)
// )
// (undo those side effects exactly)
// )
//
// We are forced to invent a side effect that we can precisely undo (unlike,
// say locals - a local.set would persist outside of the block, and even if
// we did another set to the original value, this pass doesn't track values
// that way). Only with that can we make the inner block un-precomputable
// (because there are side effects) but the outer one is (because those
// effects are undone). Note that it is critical that we have two things in
// the block, so that we can't precompute it to one of them (which is what
// we did to the br in the previous example). Note also that this is still
// optimizable using other passes, as merge-blocks will fold the two blocks
// together.
if (!curr->list.empty() && curr->list[0]->is<Block>()) {
// The first child is a block, that is, it could not be simplified, so
// this looks like the "tower of blocks" pattern. Avoid quadratic time
// here as explained above. (We could also look at other children of the
// block, but the only real-world pattern identified so far is on the
// first child, so keep things simple here.)
return;
}
// Otherwise, precompute normally like all other expressions.
visitExpression(curr);
}
// If we failed to precompute a constant, perhaps we can still precompute part
// of an expression. Specifically, consider this case:
//
// (A
// (select
// (B)
// (C)
// (condition)
// )
// )
//
// Perhaps we can compute A(B) and A(C). If so, we can emit a better select:
//
// (select
// (constant result of A(B))
// (constant result of A(C))
// (condition)
// )
//
// Note that in general for code size we want to move operations *out* of
// selects and ifs (OptimizeInstructions does that), but here we are
// computing two constants which replace three expressions, so it is
// worthwhile.
//
// To do such partial precomputing, in the main pass we note selects that look
// promising. If we find any then we do a second pass later just for that (as
// doing so requires walking up the stack in a manner that we want to avoid in
// the main pass for overhead reasons; see below).
//
// Note that selects are all we really need here: Other passes would turn an
// if into a select if the arms are simple enough, and only in those cases
// (simple arms) do we have a chance at partially precomputing. For example,
// if an arm is a constant then we can, but if it is a call then we can't.)
// However, there are cases like an if with arms with side effects that end in
// precomputable things, that are missed atm TODO
std::unordered_set<Select*> partiallyPrecomputable;
void considerPartiallyPrecomputing(Expression* curr) {
if (!canPartiallyPrecompute) {
return;
}
if (auto* select = curr->dynCast<Select>()) {
// We only have a reasonable hope of success if the select arms are things
// like constants or global gets. At a first approximation, allow the set
// of things we allow in constant initializers (but we can probably allow
// more here TODO).
//
// We also ignore selects with no parent (that are the entire function
// body) as then there is nothing to optimize into their arms.
auto& wasm = *getModule();
if (Properties::isValidConstantExpression(wasm, select->ifTrue) &&
Properties::isValidConstantExpression(wasm, select->ifFalse) &&
getFunction()->body != select) {
partiallyPrecomputable.insert(select);
}
}
}
// To partially precompute selects we walk up the stack from them, like this:
//
// (A
// (B
// (select
// (C)
// (D)
// (condition)
// )
// )
// )
//
// First we try to apply B to C and D. If that works, we arrive at this:
//
// (A
// (select
// (constant result of B(C))
// (constant result of B(D))
// (condition)
// )
// )
//
// We can then proceed to perhaps apply A. However, even if we failed to apply
// B then we can try to apply A and B together, because that combination may
// succeed where incremental work fails, for example:
//
// (global $C
// (struct.new ;; outer
// (struct.new ;; inner
// (i32.const 10)
// )
// )
// )
//
// (struct.get ;; outer
// (struct.get ;; inner
// (select
// (global.get $C)
// (global.get $D)
// (condition)
// )
// )
// )
//
// Applying the inner struct.get to $C leads us to the inner struct.new, but
// that is an interior pointer in the global - it is not something we can
// refer to using a global.get, so precomputing it fails. However, when we
// apply both struct.gets at once we arrive at the outer struct.new, which is
// in fact the global $C, and we succeed.
void partiallyPrecompute(Function* func) {
if (!canPartiallyPrecompute || partiallyPrecomputable.empty()) {
// Nothing to do.
return;
}
// Walk the function to find the parent stacks of the promising selects. We
// copy the stacks and process them later. We do it like this because if we
// wanted to process stacks as we reached them then we'd trip over
// ourselves: when we optimize we replace a parent, but that parent is an
// expression we'll reach later in the walk, so modifying it is unsafe.
struct StackFinder : public ExpressionStackWalker<StackFinder> {
Precompute& parent;
StackFinder(Precompute& parent) : parent(parent) {}
// We will later iterate on this in the order of insertion, which keeps
// things deterministic, and also usually lets us do consecutive work
// like a select nested in another select's condition, simply because we
// will traverse the selects in postorder (however, because we cannot
// always succeed in an incremental manner - see the comment on this
// function - it is possible in theory that some work can happen only in a
// later execution of the pass).
InsertOrderedMap<Select*, ExpressionStack> stackMap;
void visitSelect(Select* curr) {
if (parent.partiallyPrecomputable.count(curr)) {
stackMap[curr] = expressionStack;
}
}
} stackFinder(*this);
stackFinder.walkFunction(func);
// Note which expressions we've modified as we go, as it is invalid to
// modify more than once. This could happen in theory in a situation like
// this:
//
// (ternary.f32.max ;; fictional instruction for explanatory purposes
// (select ..)
// (select ..)
// (f32.infinity)
// )
//
// When we consider the first select we can see that the computation result
// is always infinity, so we can optimize here and replace the ternary. Then
// the same thing happens with the second select, causing the ternary to be
// replaced again, which is unsafe because it no longer exists after we
// precomputed it the first time. (Note that in this example the result is
// the same either way, but at least in theory an instruction could exist
// for whom there was a difference.) In practice it does not seem that wasm
// has instructions capable of this atm but this code is still useful to
// guard against future problems, and as a minor speedup (quickly skip code
// if it was already modified).
std::unordered_set<Expression*> modified;
for (auto& [select, stack] : stackFinder.stackMap) {
// Each stack ends in the select itself, and contains more than the select
// itself (otherwise we'd have ignored the select), i.e., the select has a
// parent that we can try to optimize into the arms.
assert(stack.back() == select);
assert(stack.size() >= 2);
Index selectIndex = stack.size() - 1;
assert(selectIndex >= 1);
if (modified.count(select)) {
// This select was modified; go to the next one.
continue;
}
// Go up through the parents, until we can't do any more work. At each
// parent we'll try to execute it and all intermediate parents into the
// select arms.
for (Index parentIndex = selectIndex - 1; parentIndex != Index(-1);
parentIndex--) {
auto* parent = stack[parentIndex];
if (modified.count(parent)) {
// This parent was modified; exit the loop on parents as no upper
// parent is valid to try either.
break;
}
// If the parent lacks a concrete type then we can't move it into the
// select: the select needs a concrete (and non-tuple) type. For example
// if the parent is a drop or is unreachable, those are things we don't
// want to handle, and we stop here (once we see one such parent we
// can't expect to make any more progress).
if (!parent->type.isConcrete() || parent->type.isTuple()) {
break;
}
// We are precomputing the select arms, but leaving the condition as-is.
// If the condition breaks to the parent, then we can't move the parent
// into the select arms:
//
// (block $name ;; this must stay outside of the select
// (select
// (B)
// (C)
// (block ;; condition
// (br_if $target
//
// Ignore all control flow for simplicity, as they aren't interesting
// for us, and other passes should have removed them anyhow.
if (Properties::isControlFlowStructure(parent)) {
break;
}
// This looks promising, so try to precompute here. What we do is
// precompute twice, once with the select replaced with the left arm,
// and once with the right. If both succeed then we can create a new
// select (with the same condition as before) whose arms are the
// precomputed values.
auto isValidPrecomputation = [&](const Flow& flow) {
// For now we handle simple concrete values. We could also handle
// breaks in principle TODO
return canEmitConstantFor(flow.values) && !flow.breaking() &&
flow.values.isConcrete();
};
// Find the pointer to the select in its immediate parent so that we can
// replace it first with one arm and then the other.
auto** pointerToSelect =
getChildPointerInImmediateParent(stack, selectIndex, func);
*pointerToSelect = select->ifTrue;
auto ifTrue = precomputeExpression(parent);
if (isValidPrecomputation(ifTrue)) {
*pointerToSelect = select->ifFalse;
auto ifFalse = precomputeExpression(parent);
if (isValidPrecomputation(ifFalse)) {
// Wonderful, we can precompute here! The select can now contain the
// computed values in its arms.
select->ifTrue = ifTrue.getConstExpression(*getModule());
select->ifFalse = ifFalse.getConstExpression(*getModule());
select->finalize();
// The parent of the select is now replaced by the select.
auto** pointerToParent =
getChildPointerInImmediateParent(stack, parentIndex, func);
*pointerToParent = select;
// Update state for further iterations: Mark everything modified and
// move the select to the parent's location.
for (Index i = parentIndex; i <= selectIndex; i++) {
modified.insert(stack[i]);
}
selectIndex = parentIndex;
stack[selectIndex] = select;
stack.resize(selectIndex + 1);
}
}
// Whether we succeeded to precompute here or not, restore the parent's
// pointer to its original state (if we precomputed, the parent is no
// longer in use, but there is no harm in modifying it).
*pointerToSelect = select;
}
}
}
void visitFunction(Function* curr) {
// removing breaks can alter types
ReFinalize().walkFunctionInModule(curr, getModule());
}
private:
// Precompute an expression, returning a flow, which may be a constant
// (that we can replace the expression with if replaceExpression is set).
Flow precomputeExpression(Expression* curr, bool replaceExpression = true) {
Flow flow;
try {
flow = PrecomputingExpressionRunner(
getModule(), getValues, heapValues, replaceExpression)
.visit(curr);
} catch (PrecomputingExpressionRunner::NonconstantException&) {
return Flow(NONCONSTANT_FLOW);
}
// If we are replacing the expression, then the resulting value must be of
// a type we can emit a constant for.
if (!flow.breaking() && replaceExpression &&
!canEmitConstantFor(flow.values)) {
return Flow(NONCONSTANT_FLOW);
}
return flow;
}
// Precomputes the value of an expression, as opposed to the expression
// itself. This differs from precomputeExpression in that we care about
// the value the expression will have, which we cannot necessary replace
// the expression with. For example,
// (local.tee (i32.const 1))
// will have value 1 which we can optimize here, but in precomputeExpression
// we could not do anything.
Literals precomputeValue(Expression* curr) {
// Note that we set replaceExpression to false, as we just care about
// the value here.
Flow flow = precomputeExpression(curr, false /* replaceExpression */);
if (flow.breaking()) {
return {};
}
return flow.values;
}
// Propagates values around. Returns whether we propagated.
bool propagateLocals(Function* func) {
bool propagated = false;
// Using the graph of get-set interactions, do a constant-propagation type
// operation: note which sets are assigned locals, then see if that lets us
// compute other sets as locals (since some of the gets they read may be
// constant). We do this lazily as most locals do not end up with constant
// values that we can propagate.
LazyLocalGraph localGraph(func, getModule());
// A map of sets to their constant values. If a set does not appear here
// then it is not constant, like |getValues|.
std::unordered_map<LocalSet*, Literals> setValues;
// The work list, which will contain sets and gets that have just been
// found to have a constant value. As we only add them to the list when they
// are found to be constant, each can only be added once, and a simple
// vector is enough here (which we can make a small vector to avoid any
// allocation in small-enough functions).
SmallVector<Expression*, 10> work;
// Given a set, see if it has a constant value. If so, note that on
// setValues and add to the work list.
auto checkConstantSet = [&](LocalSet* set) {
if (setValues.count(set)) {
// Already known to be constant.
return;
}
// Precompute the value. Note that this executes the code from scratch
// each time we reach this point, and so we need to be careful about
// repeating side effects if those side effects are expressed *in the
// value*. A case where that can happen is GC data (each struct.new
// creates a new, unique struct, even if the data is equal), and so
// PrecomputingExpressionRunner has special logic to make sure that
// reference identity is preserved properly.
//
// (Other side effects are fine; if an expression does a call and we
// somehow know the entire expression precomputes to a 42, then we can
// propagate that 42 along to the users, regardless of whatever the call
// did globally.)
auto values = precomputeValue(
Properties::getFallthrough(set->value, getPassOptions(), *getModule()));
// We precomputed the *fallthrough* value (which allows us to look through
// some things that would otherwise block us). But in some cases, like a
// ref.cast, the fallthrough value can have an incompatible type for the
// entire expression, which would be invalid for us to propagate, e.g.:
//
// (ref.cast (ref struct)
// (ref.null any)
// )
//
// In such a case the value cannot actually fall through. Ignore such
// cases (which other optimizations can handle) by making sure that we
// only propagate a valid subtype.
if (values.isConcrete() &&
Type::isSubType(values.getType(), set->value->type)) {
setValues[set] = values;
work.push_back(set);
}
};
// The same, for a get.
auto checkConstantGet = [&](LocalGet* get) {
if (getValues.count(get)) {
// Already known to be constant.
return;
}
// For this get to have constant value, all sets must agree on a constant.
Literals values;
bool first = true;
for (auto* set : localGraph.getSets(get)) {
Literals curr;
if (set == nullptr) {
if (getFunction()->isVar(get->index)) {
auto localType = getFunction()->getLocalType(get->index);
if (!localType.isDefaultable()) {
// This is a nondefaultable local that seems to read the default
// value at the function entry. This is either an internal error
// or a case of unreachable code; the latter is possible as
// LocalGraph is not precise in unreachable code. Give up.
return;
} else {
curr = Literal::makeZeros(localType);
}
} else {
// It's a param, so the value is non-constant. Give up.
return;
}
} else {
// If there is an entry for the set, use that constant. Otherwise, the
// set is not constant, and we give up.
auto iter = setValues.find(set);
if (iter == setValues.end()) {
return;
}
curr = iter->second;
}
// We found a concrete value, so there is a chance, if it matches all
// the rest.
assert(curr.isConcrete());
if (first) {
// This is the first ever value we see. All later ones must match it.
values = curr;
first = false;
} else if (values != curr) {
// This later value is not the same as before, give up.
return;
}
}
if (values.isConcrete()) {
// We found a constant value!
getValues[get] = values;
work.push_back(get);
propagated = true;
} else {
// If it is not concrete then, since we early-exited before on any
// possible problem, there must be no sets for this get, which means it
// is in unreachable code. In that case, we never switched |first| from
// true to false.
assert(first == true);
// We could optimize using unreachability here, but we leave that for
// other passes.
}
};
// Check all gets and sets to find which are constant, mark them as such,
// and add propagation work based on that.
for (auto& [curr, _] : localGraph.getLocations()) {
if (auto* set = curr->dynCast<LocalSet>()) {
checkConstantSet(set);
} else {
checkConstantGet(curr->cast<LocalGet>());
}
}
// Propagate constant values while work remains.
while (!work.empty()) {
auto* curr = work.back();
work.pop_back();
// This get or set is a constant value. Check if the things it influences
// become constant.
if (auto* set = curr->dynCast<LocalSet>()) {
for (auto* get : localGraph.getSetInfluences(set)) {
checkConstantGet(get);
}
} else {
auto* get = curr->cast<LocalGet>();
for (auto* set : localGraph.getGetInfluences(get)) {
checkConstantSet(set);
}
}
}
return propagated;
}
bool canEmitConstantFor(const Literals& values) {
for (auto& value : values) {
if (!canEmitConstantFor(value)) {
return false;
}
}
return true;
}
bool canEmitConstantFor(const Literal& value) {
// A null is fine to emit a constant for - we'll emit a RefNull. Otherwise,
// see below about references to GC data.
if (value.isNull()) {
return true;
}
return canEmitConstantFor(value.type);
}
bool canEmitConstantFor(Type type) {
// A function is fine to emit a constant for - we'll emit a RefFunc, which
// is compact and immutable, so there can't be a problem.
if (type.isFunction()) {
return true;
}
// We can emit a StringConst for a string constant.
if (type.isString()) {
return true;
}
// All other reference types cannot be precomputed. Even an immutable GC
// reference is not currently something this pass can handle, as it will
// evaluate and reevaluate code multiple times in e.g. propagateLocals, see
// the comment above.
if (type.isRef()) {
return false;
}
return true;
}
// Helpers for partial precomputing.
// Given a stack of expressions and the index of an expression in it, find
// the pointer to that expression in the parent. This gives us a pointer that
// allows us to replace the expression.
Expression** getChildPointerInImmediateParent(const ExpressionStack& stack,
Index index,
Function* func) {
if (index == 0) {
// There is nothing above this expression, so the pointer referring to it
// is the function's body.
return &func->body;
}
auto* child = stack[index];
auto childIterator = ChildIterator(stack[index - 1]);
for (auto** currChild : childIterator.children) {
if (*currChild == child) {
return currChild;
}
}
WASM_UNREACHABLE("child not found in parent");
}
};
Pass* createPrecomputePass() { return new Precompute(false); }
Pass* createPrecomputePropagatePass() { return new Precompute(true); }
} // namespace wasm