| /* |
| * Copyright 2022 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // |
| // Grand Unified Flow Analysis (GUFA) |
| // |
| // Optimize based on information about what content can appear in each location |
| // in the program. This does a whole-program analysis to find that out and |
| // hopefully learn more than the type system does - for example, a type might be |
| // $A, which means $A or any subtype can appear there, but perhaps the analysis |
| // can find that only $A', a particular subtype, can appear there in practice, |
| // and not $A or any subtypes of $A', etc. Or, we may find that no type is |
| // actually possible at a particular location, say if we can prove that the |
| // casts on the way to that location allow nothing through. We can also find |
| // that only a particular value is possible of that type. |
| // |
| // GUFA will infer constants and unreachability, and add those to the code. This |
| // can increase code size if further optimizations are not run later like dead |
| // code elimination and vacuum. The "optimizing" variant of this pass will run |
| // such followup opts automatically in functions where we make changes, and so |
| // it is useful if GUFA is run near the end of the optimization pipeline. |
| // |
| // A variation of this pass will add casts anywhere we can infer a more specific |
| // type, see |castAll| below. |
| // |
| // TODO: GUFA + polymorphic devirtualization + traps-never-happen. If we see |
| // that the possible call targets are {A, B, C}, and GUFA info lets us |
| // prove that A, C will trap if called - say, if they cast the first |
| // parameter to something GUFA proved it cannot be - then we can ignore |
| // them, and devirtualize to a call to B. |
| // |
| |
| #include "ir/drop.h" |
| #include "ir/eh-utils.h" |
| #include "ir/possible-contents.h" |
| #include "ir/properties.h" |
| #include "ir/utils.h" |
| #include "pass.h" |
| #include "wasm.h" |
| |
| namespace wasm { |
| |
| namespace { |
| |
| struct GUFAOptimizer |
| : public WalkerPass< |
| PostWalker<GUFAOptimizer, UnifiedExpressionVisitor<GUFAOptimizer>>> { |
| bool isFunctionParallel() override { return true; } |
| |
| ContentOracle& oracle; |
| |
| // Whether to run further optimizations in functions we modify. |
| bool optimizing; |
| |
| // Whether to add casts to all things that we have inferred a more refined |
| // type for. This increases code size immediately, but later optimizations |
| // generally benefit enough from these casts that overall code size actually |
| // decreases, even if some of these casts remain. However, aside from code |
| // size there may be an increase in the number of casts performed at runtime, |
| // so benchmark carefully. |
| // TODO: Add a pass to remove casts not needed for validation, which users |
| // could run at the very end. However, even with such a pass we may end |
| // up with casts that are needed for validation that were not present |
| // before. |
| bool castAll; |
| |
| GUFAOptimizer(ContentOracle& oracle, bool optimizing, bool castAll) |
| : oracle(oracle), optimizing(optimizing), castAll(castAll) {} |
| |
| std::unique_ptr<Pass> create() override { |
| return std::make_unique<GUFAOptimizer>(oracle, optimizing, castAll); |
| } |
| |
| bool optimized = false; |
| |
| // As we optimize, we replace expressions and create new ones. For new ones |
| // we can infer their contents based on what they replaced, e.g., if we |
| // replaced a local.get with a const, then the PossibleContents of the const |
| // are the same as the local.get (in this simple example, we could also just |
| // infer them from the const itself, of course). Rather than update the |
| // ContentOracle with new contents, which is a shared object among threads, |
| // each function-parallel worker stores a map of new things it created to the |
| // contents for them. |
| std::unordered_map<Expression*, PossibleContents> newContents; |
| |
| Expression* replaceCurrent(Expression* rep) { |
| newContents[rep] = oracle.getContents(getCurrent()); |
| |
| return WalkerPass< |
| PostWalker<GUFAOptimizer, |
| UnifiedExpressionVisitor<GUFAOptimizer>>>::replaceCurrent(rep); |
| } |
| |
| const PossibleContents getContents(Expression* curr) { |
| // If this is something we added ourselves, use that; otherwise the info is |
| // in the oracle. |
| if (auto iter = newContents.find(curr); iter != newContents.end()) { |
| return iter->second; |
| } |
| |
| return oracle.getContents(curr); |
| } |
| |
| void visitExpression(Expression* curr) { |
| // Skip things we can't improve in any way. |
| auto type = curr->type; |
| if (type == Type::unreachable || type == Type::none || |
| Properties::isConstantExpression(curr)) { |
| return; |
| } |
| |
| if (type.isTuple()) { |
| // TODO: tuple types. |
| return; |
| } |
| |
| // Ok, this is an interesting location that we might optimize. See what the |
| // oracle says is possible there. |
| auto contents = getContents(curr); |
| |
| auto& options = getPassOptions(); |
| auto& wasm = *getModule(); |
| Builder builder(wasm); |
| |
| if (contents.getType() == Type::unreachable) { |
| // This cannot contain any possible value at all. It must be unreachable |
| // code. |
| replaceCurrent(getDroppedChildrenAndAppend( |
| curr, wasm, options, builder.makeUnreachable())); |
| optimized = true; |
| return; |
| } |
| |
| // This is reachable. Check if we can emit something optimized for it. |
| // TODO: can we handle more general things here too? |
| if (!contents.canMakeExpression()) { |
| return; |
| } |
| |
| if (contents.isNull() && curr->type.isNullable()) { |
| // Null values are all identical, so just fix up the type here if we need |
| // to (the null's type might not fit in this expression, if it passed |
| // through casts). |
| if (!Type::isSubType(contents.getType(), curr->type)) { |
| contents = PossibleContents::literal( |
| Literal::makeNull(curr->type.getHeapType())); |
| } |
| |
| // Note that if curr's type is *not* nullable, then the code will trap at |
| // runtime (the null must arrive through a cast that will trap). We handle |
| // that below, so we don't need to think about it here. |
| |
| // TODO: would emitting a more specific null be useful when valid? |
| } |
| |
| auto* c = contents.makeExpression(wasm); |
| |
| // We can only place the constant value here if it has the right type. For |
| // example, a block may return (ref any), that is, not allow a null, but in |
| // practice only a null may flow there if it goes through casts that will |
| // trap at runtime. |
| // TODO: GUFA should eventually do this, but it will require it properly |
| // filtering content not just on ref.cast as it does now, but also |
| // ref.as etc. Once it does those we could assert on the type being |
| // valid here. |
| if (Type::isSubType(c->type, curr->type)) { |
| replaceCurrent(getDroppedChildrenAndAppend(curr, wasm, options, c)); |
| optimized = true; |
| } else { |
| // The type is not compatible: we cannot place |c| in this location, even |
| // though we have proven it is the only value possible here. |
| if (Properties::isConstantExpression(c)) { |
| // The type is not compatible and this is a simple constant expression |
| // like a ref.func. That means this code must be unreachable. (See below |
| // for the case of a non-constant.) |
| replaceCurrent(getDroppedChildrenAndAppend( |
| curr, wasm, options, builder.makeUnreachable())); |
| optimized = true; |
| } else { |
| // This is not a constant expression, but we are certain it is the right |
| // value. Atm the only such case we handle is a global.get of an |
| // immutable global. We don't know what the value will be, nor its |
| // specific type, but we do know that a global.get will get that value |
| // properly. However, in this case it does not have the right type for |
| // this location. That can happen since the global.get does not have |
| // exactly the proper type for the contents: the global.get might be |
| // nullable, for example, even though the contents are not actually a |
| // null. For example, consider what happens here: |
| // |
| // (global $foo (ref null any) (struct.new $Foo)) |
| // .. |
| // (ref.as_non_null |
| // (global.get $foo)) |
| // |
| // We create a $Foo in the global $foo, so its value is not a null. But |
| // the global's type is nullable, so the global.get's type will be as |
| // well. When we get to the ref.as_non_null, we then want to replace it |
| // with a global.get - in fact that's what its child already is, showing |
| // it is the right content for it - but that global.get would not have a |
| // non-nullable type like a ref.as_non_null must have, so we cannot |
| // simply replace it. |
| // |
| // For now, do nothing here, but in some cases we could probably |
| // optimize (e.g. by adding a ref.as_non_null in the example) TODO |
| assert(c->is<GlobalGet>()); |
| } |
| } |
| } |
| |
| void visitRefEq(RefEq* curr) { |
| if (curr->type == Type::unreachable) { |
| // Leave this for DCE. |
| return; |
| } |
| |
| auto leftContents = getContents(curr->left); |
| auto rightContents = getContents(curr->right); |
| |
| if (!PossibleContents::haveIntersection(leftContents, rightContents)) { |
| // The contents prove the two sides cannot contain the same reference, so |
| // we infer 0. |
| // |
| // Note that this is fine even if one of the sides is None. In that case, |
| // no value is possible there, and the intersection is empty, so we will |
| // get here and emit a 0. That 0 will never be reached as the None child |
| // will be turned into an unreachable, so it does not cause any problem. |
| auto* result = Builder(*getModule()).makeConst(Literal(int32_t(0))); |
| replaceCurrent(getDroppedChildrenAndAppend( |
| curr, *getModule(), getPassOptions(), result)); |
| } |
| } |
| |
| void visitRefTest(RefTest* curr) { |
| if (curr->type == Type::unreachable) { |
| // Leave this for DCE. |
| return; |
| } |
| |
| auto refContents = getContents(curr->ref); |
| auto refType = refContents.getType(); |
| if (refType.isRef()) { |
| // We have some knowledge of the type here. Use that to optimize: RefTest |
| // returns 1 if the input is of a subtype of the intended type, that is, |
| // we are looking for a type in that cone of types. |
| auto intendedContents = PossibleContents::fullConeType(curr->castType); |
| |
| auto optimize = [&](int32_t result) { |
| auto* last = Builder(*getModule()).makeConst(Literal(int32_t(result))); |
| replaceCurrent(getDroppedChildrenAndAppend( |
| curr, *getModule(), getPassOptions(), last)); |
| }; |
| |
| if (!PossibleContents::haveIntersection(refContents, intendedContents)) { |
| optimize(0); |
| } else if (PossibleContents::isSubContents(refContents, |
| intendedContents)) { |
| optimize(1); |
| } |
| } |
| } |
| |
| void visitRefCast(RefCast* curr) { |
| auto currType = curr->type; |
| auto inferredType = getContents(curr).getType(); |
| if (inferredType.isRef() && inferredType != currType && |
| Type::isSubType(inferredType, currType)) { |
| // We have inferred that this will only contain something of a more |
| // refined type, so we might as well cast to that more refined type. |
| // |
| // Note that we could in principle apply this in all expressions by adding |
| // a cast. However, to be careful with code size, we only refine existing |
| // here. See addNewCasts() for where we add entirely new casts. |
| curr->type = inferredType; |
| optimized = true; |
| } |
| |
| // Apply the usual optimizations as well, such as potentially replacing this |
| // with a constant. |
| visitExpression(curr); |
| } |
| |
| // TODO: If an instruction would trap on null, like struct.get, we could |
| // remove it here if it has no possible contents and if we are in |
| // traps-never-happen mode (that is, we'd have proven it can only trap, |
| // but we assume no traps happen, so it must be dead code). That info |
| // is present in OptimizeInstructions where it removes redundant |
| // ref.as_non_null (it removes them because it knows that the parent |
| // will trap on null anyhow), so maybe there is a way to share that |
| // information about parents. |
| |
| void visitFunction(Function* func) { |
| if (optimized) { |
| // Optimization may introduce more unreachables, which we need to |
| // propagate. |
| ReFinalize().walkFunctionInModule(func, getModule()); |
| } |
| |
| // Potentially add new casts after we do our first pass of optimizations + |
| // refinalize (doing it after refinalizing lets us add as few new casts as |
| // possible). |
| if (castAll && addNewCasts(func)) { |
| optimized = true; |
| } |
| |
| if (!optimized) { |
| return; |
| } |
| |
| // We may add blocks around pops, which we must fix up. |
| EHUtils::handleBlockNestedPops(func, *getModule()); |
| |
| // If we are in "optimizing" mode, we'll also run some more passes on this |
| // function that we just optimized. If not, leave now. |
| if (!optimizing) { |
| return; |
| } |
| |
| PassRunner runner(getPassRunner()); |
| // New unreachables we added have created dead code we can remove. If we do |
| // not do this, then running GUFA repeatedly can actually increase code size |
| // (by adding multiple unneeded unreachables). |
| runner.add("dce"); |
| // New drops we added allow us to remove more unused code and values. As |
| // with unreachables, without a vacuum we may increase code size as in |
| // nested expressions we may apply the same value multiple times: |
| // |
| // (block $out |
| // (block $in |
| // (i32.const 10))) |
| // |
| // In each of the blocks we'll infer the value must be 10, so we'll end up |
| // with this repeating code: |
| // |
| // (block ;; a new block just to drop the old outer block |
| // (drop |
| // (block $out |
| // (drop |
| // (block $in |
| // (i32.const 10) |
| // ) |
| // ) |
| // (i32.const 10) |
| // ) |
| // ) |
| // (i32.const 10) |
| // ) |
| runner.add("vacuum"); |
| runner.runOnFunction(func); |
| } |
| |
| // Add a new cast whenever we know a value contains a more refined type than |
| // in the IR. Returns whether we optimized anything. |
| bool addNewCasts(Function* func) { |
| // Subtyping and casts only make sense if GC is enabled. |
| if (!getModule()->features.hasGC()) { |
| return false; |
| } |
| |
| struct Adder : public PostWalker<Adder, UnifiedExpressionVisitor<Adder>> { |
| GUFAOptimizer& parent; |
| |
| Adder(GUFAOptimizer& parent) : parent(parent) {} |
| |
| bool optimized = false; |
| |
| void visitExpression(Expression* curr) { |
| if (!curr->type.isRef()) { |
| // Ignore anything we cannot infer a type for. |
| return; |
| } |
| |
| auto oracleType = parent.getContents(curr).getType(); |
| if (oracleType.isRef() && oracleType != curr->type && |
| Type::isSubType(oracleType, curr->type)) { |
| replaceCurrent(Builder(*getModule()).makeRefCast(curr, oracleType)); |
| optimized = true; |
| } |
| } |
| }; |
| |
| Adder adder(*this); |
| adder.walkFunctionInModule(func, getModule()); |
| if (adder.optimized) { |
| ReFinalize().walkFunctionInModule(func, getModule()); |
| return true; |
| } |
| return false; |
| } |
| }; |
| |
| struct GUFAPass : public Pass { |
| bool optimizing; |
| bool castAll; |
| |
| GUFAPass(bool optimizing, bool castAll) |
| : optimizing(optimizing), castAll(castAll) {} |
| |
| void run(Module* module) override { |
| ContentOracle oracle(*module, getPassOptions()); |
| GUFAOptimizer(oracle, optimizing, castAll).run(getPassRunner(), module); |
| } |
| }; |
| |
| } // anonymous namespace |
| |
| Pass* createGUFAPass() { return new GUFAPass(false, false); } |
| Pass* createGUFAOptimizingPass() { return new GUFAPass(true, false); } |
| Pass* createGUFACastAllPass() { return new GUFAPass(false, true); } |
| |
| } // namespace wasm |