| /* |
| * Copyright 2017 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // |
| // Eliminate redundant local.sets: if a local already has a particular |
| // value, we don't need to set it again. A common case here is loops |
| // that start at zero, since the default value is initialized to |
| // zero anyhow. |
| // |
| // A risk here is that we extend live ranges, e.g. we may use the default |
| // value at the very end of a function, keeping that local alive throughout. |
| // For that reason it is probably better to run this near the end of |
| // optimization, and especially after coalesce-locals. A final vaccum |
| // should be done after it, as this pass can leave around drop()s of |
| // values no longer necessary. |
| // |
| // So far this tracks constant values, and for everything else it considers |
| // them unique (so each local.set of a non-constant is a unique value, each |
| // merge is a unique value, etc.; there is no sophisticated value numbering |
| // here). |
| // |
| |
| #include <cfg/cfg-traversal.h> |
| #include <ir/literal-utils.h> |
| #include <ir/numbering.h> |
| #include <ir/properties.h> |
| #include <ir/utils.h> |
| #include <pass.h> |
| #include <support/small_set.h> |
| #include <support/unique_deferring_queue.h> |
| #include <wasm-builder.h> |
| #include <wasm.h> |
| |
| namespace wasm { |
| |
| // Map each local index to its current value number (which is computed in |
| // ValueNumbering). |
| using LocalValues = std::vector<Index>; |
| |
| namespace { |
| |
| // information in a basic block |
| struct Info { |
| LocalValues start, end; // the local values at the start and end of the block |
| std::vector<Expression**> items; |
| }; |
| |
| struct RedundantSetElimination |
| : public WalkerPass<CFGWalker<RedundantSetElimination, |
| Visitor<RedundantSetElimination>, |
| Info>> { |
| bool isFunctionParallel() override { return true; } |
| |
| std::unique_ptr<Pass> create() override { |
| return std::make_unique<RedundantSetElimination>(); |
| } |
| |
| // Branches outside of the function can be ignored, as we only look at locals |
| // which vanish when we leave. |
| bool ignoreBranchesOutsideOfFunc = true; |
| |
| Index numLocals; |
| |
| // In rare cases we make a change to a type that requires a refinalize. |
| bool refinalize = false; |
| |
| // cfg traversal work |
| |
| static void doVisitLocalGet(RedundantSetElimination* self, |
| Expression** currp) { |
| if (self->currBasicBlock) { |
| self->currBasicBlock->contents.items.push_back(currp); |
| } |
| } |
| static void doVisitLocalSet(RedundantSetElimination* self, |
| Expression** currp) { |
| if (self->currBasicBlock) { |
| self->currBasicBlock->contents.items.push_back(currp); |
| } |
| } |
| |
| // main entry point |
| |
| void doWalkFunction(Function* func) { |
| numLocals = func->getNumLocals(); |
| if (numLocals == 0) { |
| return; // nothing to do |
| } |
| |
| // Create a unique value for use to mark unseen locations. |
| unseenValue = valueNumbering.getUniqueValue(); |
| |
| // create the CFG by walking the IR |
| CFGWalker<RedundantSetElimination, Visitor<RedundantSetElimination>, Info>:: |
| doWalkFunction(func); |
| // flow values across blocks |
| flowValues(func); |
| // remove redundant sets |
| optimize(func); |
| |
| if (refinalize) { |
| ReFinalize().walkFunctionInModule(func, this->getModule()); |
| } |
| } |
| |
| // Use a value numbering for the values of expressions. |
| ValueNumbering valueNumbering; |
| |
| // In additon to valueNumbering, each block has values for each merge. |
| std::unordered_map<BasicBlock*, std::unordered_map<Index, Index>> |
| blockMergeValues; |
| |
| // A value that indicates we haven't seen this location yet. |
| Index unseenValue; |
| |
| Index getUniqueValue() { |
| auto value = valueNumbering.getUniqueValue(); |
| #ifdef RSE_DEBUG |
| std::cout << "new unique value " << value << '\n'; |
| #endif |
| return value; |
| } |
| |
| Index getValue(Literals lit) { |
| auto value = valueNumbering.getValue(lit); |
| #ifdef RSE_DEBUG |
| std::cout << "lit value " << value << '\n'; |
| #endif |
| return value; |
| } |
| |
| Index getBlockMergeValue(BasicBlock* block, Index index) { |
| auto& mergeValues = blockMergeValues[block]; |
| auto iter = mergeValues.find(index); |
| if (iter != mergeValues.end()) { |
| return iter->second; |
| } |
| #ifdef RSE_DEBUG |
| std::cout << "new block-merge value for " << block << " : " << index |
| << '\n'; |
| #endif |
| return mergeValues[index] = getUniqueValue(); |
| } |
| |
| bool isBlockMergeValue(BasicBlock* block, Index index, Index value) { |
| auto iter = blockMergeValues.find(block); |
| if (iter == blockMergeValues.end()) { |
| return false; |
| } |
| auto& mergeValues = iter->second; |
| auto iter2 = mergeValues.find(index); |
| if (iter2 == mergeValues.end()) { |
| return false; |
| } |
| return value == iter2->second; |
| } |
| |
| Index getValue(Expression* expr, LocalValues& currValues) { |
| if (auto* get = expr->dynCast<LocalGet>()) { |
| // a copy of whatever that was |
| return currValues[get->index]; |
| } |
| auto value = valueNumbering.getValue(expr); |
| #ifdef RSE_DEBUG |
| std::cout << "expr value " << value << '\n'; |
| #endif |
| return value; |
| } |
| |
| // flowing |
| |
| void flowValues(Function* func) { |
| for (auto& block : basicBlocks) { |
| LocalValues& start = block->contents.start; |
| start.resize(numLocals); |
| if (block.get() == entry) { |
| // params are complex values we can't optimize; vars are zeros |
| for (Index i = 0; i < numLocals; i++) { |
| auto type = func->getLocalType(i); |
| if (func->isParam(i)) { |
| #ifdef RSE_DEBUG |
| std::cout << "new param value for " << i << '\n'; |
| #endif |
| start[i] = getUniqueValue(); |
| } else if (!LiteralUtils::canMakeZero(type)) { |
| #ifdef RSE_DEBUG |
| std::cout << "new unique value for non-zeroable " << i << '\n'; |
| #endif |
| start[i] = getUniqueValue(); |
| } else { |
| start[i] = getValue(Literal::makeZeros(type)); |
| } |
| } |
| } else { |
| // other blocks have all unseen values to begin with |
| for (Index i = 0; i < numLocals; i++) { |
| start[i] = unseenValue; |
| } |
| } |
| // the ends all begin unseen |
| LocalValues& end = block->contents.end; |
| end.resize(numLocals); |
| for (Index i = 0; i < numLocals; i++) { |
| end[i] = unseenValue; |
| } |
| } |
| // keep working while stuff is flowing. we use a unique deferred queue |
| // which ensures both FIFO and that we don't do needless work - if |
| // A and B reach C, and both queue C, we only want to do C at the latest |
| // time, when we have information from all those reaching it. |
| UniqueDeferredQueue<BasicBlock*> work; |
| work.push(entry); |
| while (!work.empty()) { |
| auto* curr = work.pop(); |
| #ifdef RSE_DEBUG |
| std::cout << "flow block " << curr << '\n'; |
| #endif |
| // process a block: first, update its start based on those reaching it |
| if (!curr->in.empty()) { |
| if (curr->in.size() == 1) { |
| // just copy the pred, nothing to merge |
| curr->contents.start = (*curr->in.begin())->contents.end; |
| } else { |
| // perform a merge |
| auto in = curr->in; |
| for (Index i = 0; i < numLocals; i++) { |
| auto old = curr->contents.start[i]; |
| // If we already had a merge value here, keep it. |
| // TODO This may have some false positives, as we may e.g. have |
| // a single pred that first gives us x, then later y after |
| // flow led to a merge, and we may see x and y at the same |
| // time due to flow from a successor, and then it looks like |
| // we need a merge but we don't. avoiding that would require |
| // more memory and is probably not worth it, but might be |
| // worth investigating |
| // NB While suboptimal, this simplification provides a simple proof |
| // of convergence. We prove that, in each fixed block+local, |
| // the value number at the end is nondecreasing across |
| // iterations, by induction on the iteration: |
| // * The first iteration is on the entry block. It increases |
| // the value number at the end from 0 (unseen) to something |
| // else (a value number for 0 for locals, a unique value |
| // for params; all >0). |
| // * Induction step: assuming the property holds for all past |
| // iterations, consider the current iteration. Of our |
| // predecessors, those that we iterated on have the property; |
| // those that we haven't will have 0 (unseen). |
| // * If we assign to that local in this block, that will be |
| // the value in the output, forever, and it is greater |
| // than the initial value of 0. |
| // * If we see different values coming in, we create a merge |
| // value number. Its number is higher than everything |
| // else since we give it the next available number, so we |
| // do not decrease in this iteration, and we will output |
| // the same value in the future too (here is where we use |
| // the simplification property). |
| // * Otherwise, we will flow the incoming value through, |
| // and it did not decrease (by induction), so neither do |
| // we. |
| // Finally, given value numbers are nondecreasing, we must |
| // converge since we only keep working as long as we see new |
| // values at the end of a block. |
| // |
| // Not that we don't trust this proof, but the convergence |
| // property (value numbers at block ends do not decrease) is |
| // verified later down. |
| if (isBlockMergeValue(curr, i, old)) { |
| continue; |
| } |
| auto iter = in.begin(); |
| auto value = (*iter)->contents.end[i]; |
| iter++; |
| while (iter != in.end()) { |
| auto otherValue = (*iter)->contents.end[i]; |
| if (value == unseenValue) { |
| value = otherValue; |
| } else if (otherValue == unseenValue) { |
| // nothing to do, other has no information |
| } else if (value != otherValue) { |
| // 2 different values, this is a merged value |
| value = getBlockMergeValue(curr, i); |
| break; // no more work once we see a merge |
| } |
| iter++; |
| } |
| curr->contents.start[i] = value; |
| } |
| } |
| } |
| #ifdef RSE_DEBUG |
| dump("start", curr->contents.start); |
| #endif |
| // flow values through it, then add those we can reach if they need an |
| // update. |
| auto currValues = curr->contents.start; // we'll modify this as we go |
| auto& items = curr->contents.items; |
| for (auto** item : items) { |
| if (auto* set = (*item)->dynCast<LocalSet>()) { |
| auto* value = Properties::getFallthrough( |
| set->value, getPassOptions(), *getModule()); |
| currValues[set->index] = getValue(value, currValues); |
| } |
| } |
| if (currValues == curr->contents.end) { |
| // nothing changed, so no more work to do |
| // note that the first iteration this is always not the case, |
| // since end contains unseen (and then the comparison ends on |
| // the first element) |
| continue; |
| } |
| // update the end state and update children |
| #ifndef NDEBUG |
| // verify the convergence property mentioned in the NB comment |
| // above: the value numbers at the end must be nondecreasing |
| for (Index i = 0; i < numLocals; i++) { |
| assert(currValues[i] >= curr->contents.end[i]); |
| } |
| #endif |
| curr->contents.end.swap(currValues); |
| #ifdef RSE_DEBUG |
| dump("end ", curr->contents.end); |
| #endif |
| for (auto* next : curr->out) { |
| work.push(next); |
| } |
| } |
| } |
| |
| // optimizing |
| void optimize(Function* func) { |
| // Find which locals are refinable, that is, that when we see a global.get |
| // of them we may consider switching to another local index that has the |
| // same value but in a refined type. Computing which locals are relevant for |
| // that optimization is efficient because it avoids a bunch of work below |
| // for hashing numbers etc. |
| std::vector<bool> isRefinable(numLocals, false); |
| for (Index i = 0; i < numLocals; i++) { |
| // TODO: we could also note which locals have "maximal" types, where no |
| // other local is a refinement of them |
| if (func->getLocalType(i).isRef()) { |
| isRefinable[i] = true; |
| } |
| } |
| |
| // in each block, run the values through the sets, |
| // and remove redundant sets when we see them |
| for (auto& block : basicBlocks) { |
| auto currValues = block->contents.start; // we'll modify this as we go |
| auto& items = block->contents.items; |
| |
| // Set up the equivalences at the beginning of the block. We'll update |
| // them as we go, so we can use them at any point in the middle. This data |
| // structure maps a value number to the local indexes that have that |
| // value. |
| // |
| // Note that the set here must be ordered to avoid nondeterminism when |
| // picking between multiple equally-good indexes (we'll pick the first in |
| // the iteration, which will have the lowest index). |
| std::unordered_map<Index, SmallSet<Index, 3>> valueToLocals; |
| assert(currValues.size() == numLocals); |
| for (Index i = 0; i < numLocals; i++) { |
| if (isRefinable[i]) { |
| valueToLocals[currValues[i]].insert(i); |
| } |
| } |
| |
| for (auto** item : items) { |
| if (auto* set = (*item)->dynCast<LocalSet>()) { |
| auto oldValue = currValues[set->index]; |
| auto* value = Properties::getFallthrough( |
| set->value, getPassOptions(), *getModule()); |
| auto newValue = getValue(value, currValues); |
| auto index = set->index; |
| if (newValue == oldValue) { |
| remove(item); |
| } else { |
| // update for later steps |
| currValues[index] = newValue; |
| if (isRefinable[index]) { |
| valueToLocals[oldValue].erase(index); |
| valueToLocals[newValue].insert(index); |
| } |
| } |
| continue; |
| } |
| |
| // For gets, see if there is another index with that value, of a more |
| // refined type. |
| auto* get = (*item)->dynCast<LocalGet>(); |
| if (!isRefinable[get->index]) { |
| continue; |
| } |
| |
| for (auto i : valueToLocals[getValue(get, currValues)]) { |
| auto currType = func->getLocalType(get->index); |
| auto possibleType = func->getLocalType(i); |
| if (possibleType != currType && |
| Type::isSubType(possibleType, currType)) { |
| // We found an improvement! |
| get->index = i; |
| get->type = possibleType; |
| refinalize = true; |
| } |
| } |
| } |
| } |
| } |
| |
| void remove(Expression** item) { |
| auto* set = (*item)->cast<LocalSet>(); |
| auto* value = set->value; |
| if (!set->isTee()) { |
| auto* drop = ExpressionManipulator::convert<LocalSet, Drop>(set); |
| drop->value = value; |
| drop->finalize(); |
| } else { |
| // If we are replacing the set with something of a more specific type, |
| // then we need to refinalize, for example: |
| // |
| // (struct.get $X 0 |
| // (local.tee $x |
| // (..something of type $Y, a subtype of $X..) |
| // ) |
| // ) |
| // |
| // After the replacement the struct.get will read from $Y, whose field may |
| // have a more refined type. |
| if (value->type != set->type) { |
| refinalize = true; |
| } |
| *item = value; |
| } |
| } |
| |
| // debugging |
| |
| void dump(BasicBlock* block) { |
| std::cout << "====\n"; |
| if (block) { |
| std::cout << "block: " << block << '\n'; |
| for (auto* out : block->out) { |
| std::cout << " goes to " << out << '\n'; |
| } |
| } |
| for (Index i = 0; i < block->contents.start.size(); i++) { |
| std::cout << " start[" << i << "] = " << block->contents.start[i] |
| << '\n'; |
| } |
| for (auto** item : block->contents.items) { |
| std::cout << " " << *item << '\n'; |
| } |
| std::cout << "====\n"; |
| } |
| |
| void dump(const char* desc, LocalValues& values) { |
| std::cout << desc << ": "; |
| for (auto x : values) { |
| std::cout << x << ' '; |
| } |
| std::cout << '\n'; |
| } |
| }; |
| |
| } // namespace |
| |
| Pass* createRedundantSetEliminationPass() { |
| return new RedundantSetElimination(); |
| } |
| |
| } // namespace wasm |