src/passes/HeapStoreOptimization.cpp - external/github.com/WebAssembly/binaryen - Git at Google

 /*
  * Copyright 2024 WebAssembly Community Group participants
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 //
 // Optimizes heap (GC) stores.
 //
 // TODO: Add dead store elimination / load forwarding here.
 //

 #include "cfg/cfg-traversal.h"
 #include "ir/effects.h"
 #include "pass.h"
 #include "wasm-builder.h"
 #include "wasm.h"

 namespace wasm {

 namespace {

 // In each basic block we will store the relevant heap store operations and
 // other actions that matter to our analysis.
 struct Info {
   std::vector<Expression**> actions;
 };

 struct HeapStoreOptimization
   : public WalkerPass<
       CFGWalker<HeapStoreOptimization, Visitor<HeapStoreOptimization>, Info>> {
   bool isFunctionParallel() override { return true; }

   // Locals are not modified here.
   bool requiresNonNullableLocalFixups() override { return false; }

   std::unique_ptr<Pass> create() override {
     return std::make_unique<HeapStoreOptimization>();
   }

   // Branches outside of the function can be ignored, as we only look at local
   // state in the function. (This may need to change if we do more general dead
   // store elimination.)
   bool ignoreBranchesOutsideOfFunc = true;

   // Store struct.sets and blocks, as we can find patterns among those.
   void addAction() {
     if (currBasicBlock) {
       currBasicBlock->contents.actions.push_back(getCurrentPointer());
     }
   }
   void visitStructSet(StructSet* curr) { addAction(); }
   void visitBlock(Block* curr) { addAction(); }

   void visitFunction(Function* curr) {
     // Now that the walk is complete and we have a CFG, find things to optimize.
     for (auto& block : basicBlocks) {
       for (auto** currp : block->contents.actions) {
         auto* curr = *currp;
         if (auto* set = curr->dynCast<StructSet>()) {
           optimizeStructSet(set, currp);
         } else if (auto* block = curr->dynCast<Block>()) {
           optimizeBlock(block);
         } else {
           WASM_UNREACHABLE("bad action");
         }
       }
     }
   }

   // Optimize a struct.set. Receives also a pointer to where it is referred to,
   // so we can replace it (which we do if we optimize).
   void optimizeStructSet(StructSet* curr, Expression** currp) {
     // If our reference is a tee of a struct.new, we may be able to fold the
     // stored value into the new itself:
     //
     //  (struct.set (local.tee $x (struct.new X Y Z)) X')
     // =>
     //  (local.set $x (struct.new X' Y Z))
     //
     if (auto* tee = curr->ref->dynCast<LocalSet>()) {
       if (auto* new_ = tee->value->dynCast<StructNew>()) {
         if (optimizeSubsequentStructSet(new_, curr, tee->index)) {
           // Success, so we do not need the struct.set any more, and the tee
           // can just be a set instead of us.
           tee->makeSet();
           *currp = tee;
         }
       }
     }
   }

   // Similar to the above with struct.set whose reference is a tee of a new, we
   // can do the same for subsequent sets in a list:
   //
   //  (local.set $x (struct.new X Y Z))
   //  (struct.set (local.get $x) X')
   // =>
   //  (local.set $x (struct.new X' Y Z))
   //
   // We also handle other struct.sets immediately after this one. If the
   // instruction following the new is not a struct.set we push the new down if
   // possible.
   void optimizeBlock(Block* curr) {
     auto& list = curr->list;

     for (Index i = 0; i < list.size(); i++) {
       auto* localSet = list[i]->dynCast<LocalSet>();
       if (!localSet) {
         continue;
       }
       auto* new_ = localSet->value->dynCast<StructNew>();
       if (!new_) {
         continue;
       }

       // This local.set of a struct.new looks good. Find struct.sets after it to
       // optimize.
       Index localSetIndex = i;
       for (Index j = localSetIndex + 1; j < list.size(); j++) {

         // Check that the next instruction is a struct.set on the same local as
         // the struct.new.
         auto* structSet = list[j]->dynCast<StructSet>();
         auto* localGet =
           structSet ? structSet->ref->dynCast<LocalGet>() : nullptr;
         if (!structSet || !localGet || localGet->index != localSet->index) {
           // Any time the pattern no longer matches, we try to push the
           // struct.new further down but if it is not possible we stop
           // optimizing possible struct.sets for this struct.new.
           if (trySwap(list, localSetIndex, j)) {
             // Update the index and continue to try again.
             localSetIndex = j;
             continue;
           }
           break;
         }

         // The pattern matches, try to optimize.
         if (!optimizeSubsequentStructSet(new_, structSet, localGet->index)) {
           break;
         } else {
           // Success. Replace the set with a nop, and continue to perhaps
           // optimize more.
           ExpressionManipulator::nop(structSet);
         }
       }
     }
   }

   // Helper function for optimizeHeapStores. Tries pushing the struct.new at
   // index i down to index j, swapping it with the instruction already at j, so
   // that it is closer to (potential) later struct.sets.
   bool trySwap(ExpressionList& list, Index i, Index j) {
     if (j == list.size() - 1) {
       // There is no reason to swap with the last element of the list as it
       // won't match the pattern because there wont be anything after. This also
       // avoids swapping an instruction that does not leave anything in the
       // stack by one that could leave something, and that which would be
       // incorrect.
       return false;
     }

     if (list[j]->is<LocalSet>() &&
         list[j]->dynCast<LocalSet>()->value->is<StructNew>()) {
       // Don't swap two struct.new instructions to avoid going back and forth.
       return false;
     }
     // Check if the two expressions can be swapped safely considering their
     // effects.
     auto firstEffects = effects(list[i]);
     auto secondEffects = effects(list[j]);
     if (secondEffects.invalidates(firstEffects)) {
       return false;
     }

     std::swap(list[i], list[j]);
     return true;
   }

   // Given a struct.new and a struct.set that occurs right after it, and that
   // applies to the same data, try to apply the set during the new. This can be
   // either with a nested tee:
   //
   //  (struct.set
   //    (local.tee $x (struct.new X Y Z))
   //    X'
   //  )
   // =>
   //  (local.set $x (struct.new X' Y Z))
   //
   // or without:
   //
   //  (local.set $x (struct.new X Y Z))
   //  (struct.set (local.get $x) X')
   // =>
   //  (local.set $x (struct.new X' Y Z))
   //
   // Returns true if we succeeded.
   bool optimizeSubsequentStructSet(StructNew* new_,
                                    StructSet* set,
                                    Index refLocalIndex) {
     // Leave unreachable code for DCE, to avoid updating types here.
     if (new_->type == Type::unreachable || set->type == Type::unreachable) {
       return false;
     }

     auto index = set->index;
     auto& operands = new_->operands;

     // Check for effects that prevent us moving the struct.set's value (X' in
     // the function comment) into its new position in the struct.new. First, it
     // must be ok to move it past the local.set (otherwise, it might read from
     // memory using that local, and depend on the struct.new having already
     // occurred; or, if it writes to that local, then it would cross another
     // write).
     auto setValueEffects = effects(set->value);
     if (setValueEffects.localsRead.count(refLocalIndex) ||
         setValueEffects.localsWritten.count(refLocalIndex)) {
       return false;
     }

     // We must move the set's value past indexes greater than it (Y and Z in
     // the example in the comment on this function). If this is not with_default
     // then we must check for effects.
     // TODO When this function is called repeatedly in a sequence this can
     //      become quadratic - perhaps we should memoize (though, struct sizes
     //      tend to not be ridiculously large).
     if (!new_->isWithDefault()) {
       for (Index i = index + 1; i < operands.size(); i++) {
         auto operandEffects = effects(operands[i]);
         if (operandEffects.invalidates(setValueEffects)) {
           // TODO: we could use locals to reorder everything
           return false;
         }
       }
     }

     // We can optimize here!
     Builder builder(*getModule());

     // If this was with_default then we add default values now. That does
     // increase code size in some cases (if there are many values, and few sets
     // that get removed), but in general this optimization is worth it.
     if (new_->isWithDefault()) {
       auto& fields = new_->type.getHeapType().getStruct().fields;
       for (auto& field : fields) {
         auto zero = Literal::makeZero(field.type);
         operands.push_back(builder.makeConstantExpression(zero));
       }
     }

     // See if we need to keep the old value.
     if (effects(operands[index]).hasUnremovableSideEffects()) {
       operands[index] =
         builder.makeSequence(builder.makeDrop(operands[index]), set->value);
     } else {
       operands[index] = set->value;
     }

     return true;
   }

   EffectAnalyzer effects(Expression* expr) {
     return EffectAnalyzer(getPassOptions(), *getModule(), expr);
   }
 };

 } // anonymous namespace

 Pass* createHeapStoreOptimizationPass() { return new HeapStoreOptimization(); }

 } // namespace wasm
	/*
	* Copyright 2024 WebAssembly Community Group participants
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	//
	// Optimizes heap (GC) stores.
	//
	// TODO: Add dead store elimination / load forwarding here.
	//

	#include "cfg/cfg-traversal.h"
	#include "ir/effects.h"
	#include "pass.h"
	#include "wasm-builder.h"
	#include "wasm.h"

	namespace wasm {

	namespace {

	// In each basic block we will store the relevant heap store operations and
	// other actions that matter to our analysis.
	struct Info {
	std::vector<Expression**> actions;
	};

	struct HeapStoreOptimization
	: public WalkerPass<
	CFGWalker<HeapStoreOptimization, Visitor<HeapStoreOptimization>, Info>> {
	bool isFunctionParallel() override { return true; }

	// Locals are not modified here.
	bool requiresNonNullableLocalFixups() override { return false; }

	std::unique_ptr<Pass> create() override {
	return std::make_unique<HeapStoreOptimization>();
	}

	// Branches outside of the function can be ignored, as we only look at local
	// state in the function. (This may need to change if we do more general dead
	// store elimination.)
	bool ignoreBranchesOutsideOfFunc = true;

	// Store struct.sets and blocks, as we can find patterns among those.
	void addAction() {
	if (currBasicBlock) {
	currBasicBlock->contents.actions.push_back(getCurrentPointer());
	}
	}
	void visitStructSet(StructSet* curr) { addAction(); }
	void visitBlock(Block* curr) { addAction(); }

	void visitFunction(Function* curr) {
	// Now that the walk is complete and we have a CFG, find things to optimize.
	for (auto& block : basicBlocks) {
	for (auto** currp : block->contents.actions) {
	auto* curr = *currp;
	if (auto* set = curr->dynCast<StructSet>()) {
	optimizeStructSet(set, currp);
	} else if (auto* block = curr->dynCast<Block>()) {
	optimizeBlock(block);
	} else {
	WASM_UNREACHABLE("bad action");
	}
	}
	}
	}

	// Optimize a struct.set. Receives also a pointer to where it is referred to,
	// so we can replace it (which we do if we optimize).
	void optimizeStructSet(StructSet* curr, Expression** currp) {
	// If our reference is a tee of a struct.new, we may be able to fold the
	// stored value into the new itself:
	//
	// (struct.set (local.tee $x (struct.new X Y Z)) X')
	// =>
	// (local.set $x (struct.new X' Y Z))
	//
	if (auto* tee = curr->ref->dynCast<LocalSet>()) {
	if (auto* new_ = tee->value->dynCast<StructNew>()) {
	if (optimizeSubsequentStructSet(new_, curr, tee->index)) {
	// Success, so we do not need the struct.set any more, and the tee
	// can just be a set instead of us.
	tee->makeSet();
	*currp = tee;
	}
	}
	}
	}

	// Similar to the above with struct.set whose reference is a tee of a new, we
	// can do the same for subsequent sets in a list:
	//
	// (local.set $x (struct.new X Y Z))
	// (struct.set (local.get $x) X')
	// =>
	// (local.set $x (struct.new X' Y Z))
	//
	// We also handle other struct.sets immediately after this one. If the
	// instruction following the new is not a struct.set we push the new down if
	// possible.
	void optimizeBlock(Block* curr) {
	auto& list = curr->list;

	for (Index i = 0; i < list.size(); i++) {
	auto* localSet = list[i]->dynCast<LocalSet>();
	if (!localSet) {
	continue;
	}
	auto* new_ = localSet->value->dynCast<StructNew>();
	if (!new_) {
	continue;
	}

	// This local.set of a struct.new looks good. Find struct.sets after it to
	// optimize.
	Index localSetIndex = i;
	for (Index j = localSetIndex + 1; j < list.size(); j++) {

	// Check that the next instruction is a struct.set on the same local as
	// the struct.new.
	auto* structSet = list[j]->dynCast<StructSet>();
	auto* localGet =
	structSet ? structSet->ref->dynCast<LocalGet>() : nullptr;
	if (!structSet \|\| !localGet \|\| localGet->index != localSet->index) {
	// Any time the pattern no longer matches, we try to push the
	// struct.new further down but if it is not possible we stop
	// optimizing possible struct.sets for this struct.new.
	if (trySwap(list, localSetIndex, j)) {
	// Update the index and continue to try again.
	localSetIndex = j;
	continue;
	}
	break;
	}

	// The pattern matches, try to optimize.
	if (!optimizeSubsequentStructSet(new_, structSet, localGet->index)) {
	break;
	} else {
	// Success. Replace the set with a nop, and continue to perhaps
	// optimize more.
	ExpressionManipulator::nop(structSet);
	}
	}
	}
	}

	// Helper function for optimizeHeapStores. Tries pushing the struct.new at
	// index i down to index j, swapping it with the instruction already at j, so
	// that it is closer to (potential) later struct.sets.
	bool trySwap(ExpressionList& list, Index i, Index j) {
	if (j == list.size() - 1) {
	// There is no reason to swap with the last element of the list as it
	// won't match the pattern because there wont be anything after. This also
	// avoids swapping an instruction that does not leave anything in the
	// stack by one that could leave something, and that which would be
	// incorrect.
	return false;
	}

	if (list[j]->is<LocalSet>() &&
	list[j]->dynCast<LocalSet>()->value->is<StructNew>()) {
	// Don't swap two struct.new instructions to avoid going back and forth.
	return false;
	}
	// Check if the two expressions can be swapped safely considering their
	// effects.
	auto firstEffects = effects(list[i]);
	auto secondEffects = effects(list[j]);
	if (secondEffects.invalidates(firstEffects)) {
	return false;
	}

	std::swap(list[i], list[j]);
	return true;
	}

	// Given a struct.new and a struct.set that occurs right after it, and that
	// applies to the same data, try to apply the set during the new. This can be
	// either with a nested tee:
	//
	// (struct.set
	// (local.tee $x (struct.new X Y Z))
	// X'
	// )
	// =>
	// (local.set $x (struct.new X' Y Z))
	//
	// or without:
	//
	// (local.set $x (struct.new X Y Z))
	// (struct.set (local.get $x) X')
	// =>
	// (local.set $x (struct.new X' Y Z))
	//
	// Returns true if we succeeded.
	bool optimizeSubsequentStructSet(StructNew* new_,
	StructSet* set,
	Index refLocalIndex) {
	// Leave unreachable code for DCE, to avoid updating types here.
	if (new_->type == Type::unreachable \|\| set->type == Type::unreachable) {
	return false;
	}

	auto index = set->index;
	auto& operands = new_->operands;

	// Check for effects that prevent us moving the struct.set's value (X' in
	// the function comment) into its new position in the struct.new. First, it
	// must be ok to move it past the local.set (otherwise, it might read from
	// memory using that local, and depend on the struct.new having already
	// occurred; or, if it writes to that local, then it would cross another
	// write).
	auto setValueEffects = effects(set->value);
	if (setValueEffects.localsRead.count(refLocalIndex) \|\|
	setValueEffects.localsWritten.count(refLocalIndex)) {
	return false;
	}

	// We must move the set's value past indexes greater than it (Y and Z in
	// the example in the comment on this function). If this is not with_default
	// then we must check for effects.
	// TODO When this function is called repeatedly in a sequence this can
	// become quadratic - perhaps we should memoize (though, struct sizes
	// tend to not be ridiculously large).
	if (!new_->isWithDefault()) {
	for (Index i = index + 1; i < operands.size(); i++) {
	auto operandEffects = effects(operands[i]);
	if (operandEffects.invalidates(setValueEffects)) {
	// TODO: we could use locals to reorder everything
	return false;
	}
	}
	}

	// We can optimize here!
	Builder builder(*getModule());

	// If this was with_default then we add default values now. That does
	// increase code size in some cases (if there are many values, and few sets
	// that get removed), but in general this optimization is worth it.
	if (new_->isWithDefault()) {
	auto& fields = new_->type.getHeapType().getStruct().fields;
	for (auto& field : fields) {
	auto zero = Literal::makeZero(field.type);
	operands.push_back(builder.makeConstantExpression(zero));
	}
	}

	// See if we need to keep the old value.
	if (effects(operands[index]).hasUnremovableSideEffects()) {
	operands[index] =
	builder.makeSequence(builder.makeDrop(operands[index]), set->value);
	} else {
	operands[index] = set->value;
	}

	return true;
	}

	EffectAnalyzer effects(Expression* expr) {
	return EffectAnalyzer(getPassOptions(), *getModule(), expr);
	}
	};

	} // anonymous namespace

	Pass* createHeapStoreOptimizationPass() { return new HeapStoreOptimization(); }

	} // namespace wasm