blob: f227dc53631ece8a94a7cbc0642285b7ffe57fbc [file] [log] [blame] [edit]
/*
* Copyright 2024 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Optimizes heap (GC) stores.
//
// TODO: Add dead store elimination / load forwarding here.
//
#include "cfg/cfg-traversal.h"
#include "ir/effects.h"
#include "pass.h"
#include "wasm-builder.h"
#include "wasm.h"
namespace wasm {
namespace {
// In each basic block we will store the relevant heap store operations and
// other actions that matter to our analysis.
struct Info {
std::vector<Expression**> actions;
};
struct HeapStoreOptimization
: public WalkerPass<
CFGWalker<HeapStoreOptimization, Visitor<HeapStoreOptimization>, Info>> {
bool isFunctionParallel() override { return true; }
// Locals are not modified here.
bool requiresNonNullableLocalFixups() override { return false; }
std::unique_ptr<Pass> create() override {
return std::make_unique<HeapStoreOptimization>();
}
// Branches outside of the function can be ignored, as we only look at local
// state in the function. (This may need to change if we do more general dead
// store elimination.)
bool ignoreBranchesOutsideOfFunc = true;
// Store struct.sets and blocks, as we can find patterns among those.
void addAction() {
if (currBasicBlock) {
currBasicBlock->contents.actions.push_back(getCurrentPointer());
}
}
void visitStructSet(StructSet* curr) { addAction(); }
void visitBlock(Block* curr) { addAction(); }
void visitFunction(Function* curr) {
// Now that the walk is complete and we have a CFG, find things to optimize.
for (auto& block : basicBlocks) {
for (auto** currp : block->contents.actions) {
auto* curr = *currp;
if (auto* set = curr->dynCast<StructSet>()) {
optimizeStructSet(set, currp);
} else if (auto* block = curr->dynCast<Block>()) {
optimizeBlock(block);
} else {
WASM_UNREACHABLE("bad action");
}
}
}
}
// Optimize a struct.set. Receives also a pointer to where it is referred to,
// so we can replace it (which we do if we optimize).
void optimizeStructSet(StructSet* curr, Expression** currp) {
// If our reference is a tee of a struct.new, we may be able to fold the
// stored value into the new itself:
//
// (struct.set (local.tee $x (struct.new X Y Z)) X')
// =>
// (local.set $x (struct.new X' Y Z))
//
if (auto* tee = curr->ref->dynCast<LocalSet>()) {
if (auto* new_ = tee->value->dynCast<StructNew>()) {
if (optimizeSubsequentStructSet(new_, curr, tee->index)) {
// Success, so we do not need the struct.set any more, and the tee
// can just be a set instead of us.
tee->makeSet();
*currp = tee;
}
}
}
}
// Similar to the above with struct.set whose reference is a tee of a new, we
// can do the same for subsequent sets in a list:
//
// (local.set $x (struct.new X Y Z))
// (struct.set (local.get $x) X')
// =>
// (local.set $x (struct.new X' Y Z))
//
// We also handle other struct.sets immediately after this one. If the
// instruction following the new is not a struct.set we push the new down if
// possible.
void optimizeBlock(Block* curr) {
auto& list = curr->list;
for (Index i = 0; i < list.size(); i++) {
auto* localSet = list[i]->dynCast<LocalSet>();
if (!localSet) {
continue;
}
auto* new_ = localSet->value->dynCast<StructNew>();
if (!new_) {
continue;
}
// This local.set of a struct.new looks good. Find struct.sets after it to
// optimize.
Index localSetIndex = i;
for (Index j = localSetIndex + 1; j < list.size(); j++) {
// Check that the next instruction is a struct.set on the same local as
// the struct.new.
auto* structSet = list[j]->dynCast<StructSet>();
auto* localGet =
structSet ? structSet->ref->dynCast<LocalGet>() : nullptr;
if (!structSet || !localGet || localGet->index != localSet->index) {
// Any time the pattern no longer matches, we try to push the
// struct.new further down but if it is not possible we stop
// optimizing possible struct.sets for this struct.new.
if (trySwap(list, localSetIndex, j)) {
// Update the index and continue to try again.
localSetIndex = j;
continue;
}
break;
}
// The pattern matches, try to optimize.
if (!optimizeSubsequentStructSet(new_, structSet, localGet->index)) {
break;
} else {
// Success. Replace the set with a nop, and continue to perhaps
// optimize more.
ExpressionManipulator::nop(structSet);
}
}
}
}
// Helper function for optimizeHeapStores. Tries pushing the struct.new at
// index i down to index j, swapping it with the instruction already at j, so
// that it is closer to (potential) later struct.sets.
bool trySwap(ExpressionList& list, Index i, Index j) {
if (j == list.size() - 1) {
// There is no reason to swap with the last element of the list as it
// won't match the pattern because there wont be anything after. This also
// avoids swapping an instruction that does not leave anything in the
// stack by one that could leave something, and that which would be
// incorrect.
return false;
}
if (list[j]->is<LocalSet>() &&
list[j]->dynCast<LocalSet>()->value->is<StructNew>()) {
// Don't swap two struct.new instructions to avoid going back and forth.
return false;
}
// Check if the two expressions can be swapped safely considering their
// effects.
auto firstEffects = effects(list[i]);
auto secondEffects = effects(list[j]);
if (secondEffects.invalidates(firstEffects)) {
return false;
}
std::swap(list[i], list[j]);
return true;
}
// Given a struct.new and a struct.set that occurs right after it, and that
// applies to the same data, try to apply the set during the new. This can be
// either with a nested tee:
//
// (struct.set
// (local.tee $x (struct.new X Y Z))
// X'
// )
// =>
// (local.set $x (struct.new X' Y Z))
//
// or without:
//
// (local.set $x (struct.new X Y Z))
// (struct.set (local.get $x) X')
// =>
// (local.set $x (struct.new X' Y Z))
//
// Returns true if we succeeded.
bool optimizeSubsequentStructSet(StructNew* new_,
StructSet* set,
Index refLocalIndex) {
// Leave unreachable code for DCE, to avoid updating types here.
if (new_->type == Type::unreachable || set->type == Type::unreachable) {
return false;
}
auto index = set->index;
auto& operands = new_->operands;
// Check for effects that prevent us moving the struct.set's value (X' in
// the function comment) into its new position in the struct.new. First, it
// must be ok to move it past the local.set (otherwise, it might read from
// memory using that local, and depend on the struct.new having already
// occurred; or, if it writes to that local, then it would cross another
// write).
auto setValueEffects = effects(set->value);
if (setValueEffects.localsRead.count(refLocalIndex) ||
setValueEffects.localsWritten.count(refLocalIndex)) {
return false;
}
// We must move the set's value past indexes greater than it (Y and Z in
// the example in the comment on this function). If this is not with_default
// then we must check for effects.
// TODO When this function is called repeatedly in a sequence this can
// become quadratic - perhaps we should memoize (though, struct sizes
// tend to not be ridiculously large).
if (!new_->isWithDefault()) {
for (Index i = index + 1; i < operands.size(); i++) {
auto operandEffects = effects(operands[i]);
if (operandEffects.invalidates(setValueEffects)) {
// TODO: we could use locals to reorder everything
return false;
}
}
}
// We can optimize here!
Builder builder(*getModule());
// If this was with_default then we add default values now. That does
// increase code size in some cases (if there are many values, and few sets
// that get removed), but in general this optimization is worth it.
if (new_->isWithDefault()) {
auto& fields = new_->type.getHeapType().getStruct().fields;
for (auto& field : fields) {
auto zero = Literal::makeZero(field.type);
operands.push_back(builder.makeConstantExpression(zero));
}
}
// See if we need to keep the old value.
if (effects(operands[index]).hasUnremovableSideEffects()) {
operands[index] =
builder.makeSequence(builder.makeDrop(operands[index]), set->value);
} else {
operands[index] = set->value;
}
return true;
}
EffectAnalyzer effects(Expression* expr) {
return EffectAnalyzer(getPassOptions(), *getModule(), expr);
}
};
} // anonymous namespace
Pass* createHeapStoreOptimizationPass() { return new HeapStoreOptimization(); }
} // namespace wasm