| /* |
| * Copyright 2021 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // |
| // Apply more specific subtypes to signature/function types where possible. |
| // |
| // This differs from DeadArgumentElimination's refineArgumentTypes() etc. in |
| // that DAE will modify the type of a function. It can only do that if the |
| // function's type is not observable, which means it is not taken by reference. |
| // On the other hand, this pass will modify the signature types themselves, |
| // which means it can optimize functions whose reference is taken, and it does |
| // so while considering all users of the type (across all functions sharing that |
| // type, and all call_refs using it). |
| // |
| |
| #include "ir/export-utils.h" |
| #include "ir/find_all.h" |
| #include "ir/lubs.h" |
| #include "ir/module-utils.h" |
| #include "ir/names.h" |
| #include "ir/subtypes.h" |
| #include "ir/type-updating.h" |
| #include "ir/utils.h" |
| #include "pass.h" |
| #include "wasm-type.h" |
| #include "wasm.h" |
| |
| namespace wasm { |
| |
| namespace { |
| |
| struct SignatureRefining : public Pass { |
| // Only changes heap types and parameter types (but not locals). |
| bool requiresNonNullableLocalFixups() override { return false; } |
| |
| // Maps each heap type to the possible refinement of the types in their |
| // signatures. We will fill this during analysis and then use it while doing |
| // an update of the types. If a type has no improvement that we can find, it |
| // will not appear in this map. |
| std::unordered_map<HeapType, Signature> newSignatures; |
| |
| void run(Module* module) override { |
| if (!module->features.hasGC()) { |
| return; |
| } |
| |
| if (!module->tables.empty()) { |
| // When there are tables we must also take their types into account, which |
| // would require us to take call_indirect, element segments, etc. into |
| // account. For now, do nothing if there are tables. |
| // TODO |
| return; |
| } |
| |
| // First, find all the information we need. Start by collecting inside each |
| // function in parallel. |
| |
| struct Info { |
| // The calls and call_refs. |
| std::vector<Call*> calls; |
| std::vector<CallRef*> callRefs; |
| |
| // Additional calls to take into account. We store intrinsic calls here, |
| // as they must appear twice: call.without.effects is both a normal call |
| // and also takes a final parameter that is a function reference that is |
| // called, and so two signatures are relevant for it. For the latter, we |
| // add the call as an "extra call" (which is an unusual call, as it has an |
| // extra parameter at the end, the function reference, compared to what we |
| // expect for the signature being called). |
| std::vector<Call*> extraCalls; |
| |
| // A possibly improved LUB for the results. |
| LUBFinder resultsLUB; |
| |
| // Normally we can optimize, but some cases prevent a particular signature |
| // type from being changed at all, see below. |
| bool canModify = true; |
| }; |
| |
| // This analysis also modifies the wasm as it goes, as the getResultsLUB() |
| // operation has side effects (see comment on header declaration). |
| ModuleUtils::ParallelFunctionAnalysis<Info, Mutable> analysis( |
| *module, [&](Function* func, Info& info) { |
| if (func->imported()) { |
| // Avoid changing the types of imported functions. Spec and VM support |
| // for that is not yet stable. |
| // TODO: optimize this when possible in the future |
| info.canModify = false; |
| return; |
| } |
| info.calls = std::move(FindAll<Call>(func->body).list); |
| info.callRefs = std::move(FindAll<CallRef>(func->body).list); |
| info.resultsLUB = LUB::getResultsLUB(func, *module); |
| }); |
| |
| // A map of types to all the information combined over all the functions |
| // with that type. |
| std::unordered_map<HeapType, Info> allInfo; |
| |
| // Combine all the information we gathered into that map. |
| for (auto& [func, info] : analysis.map) { |
| // For direct calls, add each call to the type of the function being |
| // called. |
| for (auto* call : info.calls) { |
| allInfo[module->getFunction(call->target)->type].calls.push_back(call); |
| |
| // For call.without.effects, we also add the effective function being |
| // called as well. The final operand is the function reference being |
| // called, which defines that type. |
| if (Intrinsics(*module).isCallWithoutEffects(call)) { |
| auto targetType = call->operands.back()->type; |
| if (!targetType.isRef()) { |
| continue; |
| } |
| allInfo[targetType.getHeapType()].extraCalls.push_back(call); |
| } |
| } |
| |
| // For indirect calls, add each call_ref to the type the call_ref uses. |
| for (auto* callRef : info.callRefs) { |
| auto calledType = callRef->target->type; |
| if (calledType != Type::unreachable) { |
| allInfo[calledType.getHeapType()].callRefs.push_back(callRef); |
| } |
| } |
| |
| // Add the function's return LUB to the one for the heap type of that |
| // function. |
| allInfo[func->type].resultsLUB.combine(info.resultsLUB); |
| |
| // If one function cannot be modified, that entire type cannot be. |
| if (!info.canModify) { |
| allInfo[func->type].canModify = false; |
| } |
| } |
| |
| // We cannot alter the signature of an exported function, as the outside may |
| // notice us doing so. For example, if we turn a parameter from nullable |
| // into non-nullable then callers sending a null will break. Put another |
| // way, we need to see all callers to refine types, and for exports we |
| // cannot do so. |
| // TODO If a function type is passed we should also mark the types used |
| // there, etc., recursively. For now this code just handles the top- |
| // level type, which is enough to keep the fuzzer from erroring. More |
| // generally, we need to decide about adding a "closed-world" flag of |
| // some kind. |
| for (auto* exportedFunc : ExportUtils::getExportedFunctions(*module)) { |
| allInfo[exportedFunc->type].canModify = false; |
| } |
| |
| // For now, do not optimize types that have subtypes. When we modify such a |
| // type we need to modify subtypes as well, similar to the analysis in |
| // TypeRefining, and perhaps we can unify this pass with that. TODO |
| SubTypes subTypes(*module); |
| for (auto& [type, info] : allInfo) { |
| if (!subTypes.getImmediateSubTypes(type).empty()) { |
| info.canModify = false; |
| } else if (type.getDeclaredSuperType()) { |
| // Also avoid modifying types with supertypes, as we do not handle |
| // contravariance here. That is, when we refine parameters we look for |
| // a more refined type, but the type must be *less* refined than the |
| // param type for the parent (or equal) TODO |
| info.canModify = false; |
| } |
| } |
| |
| // Compute optimal LUBs. |
| std::unordered_set<HeapType> seen; |
| for (auto& func : module->functions) { |
| auto type = func->type; |
| if (!seen.insert(type).second) { |
| continue; |
| } |
| |
| auto& info = allInfo[type]; |
| if (!info.canModify) { |
| continue; |
| } |
| |
| auto sig = type.getSignature(); |
| |
| auto numParams = sig.params.size(); |
| std::vector<LUBFinder> paramLUBs(numParams); |
| |
| auto updateLUBs = [&](const ExpressionList& operands) { |
| for (Index i = 0; i < numParams; i++) { |
| paramLUBs[i].note(operands[i]->type); |
| } |
| }; |
| |
| for (auto* call : info.calls) { |
| updateLUBs(call->operands); |
| } |
| for (auto* callRef : info.callRefs) { |
| updateLUBs(callRef->operands); |
| } |
| for (auto* call : info.extraCalls) { |
| // Note that these intrinsic calls have an extra function reference |
| // param at the end, but updateLUBs looks at |numParams| only, so it |
| // considers just the relevant parameters. |
| updateLUBs(call->operands); |
| } |
| |
| // Find the final LUBs, and see if we found an improvement. |
| std::vector<Type> newParamsTypes; |
| for (auto& lub : paramLUBs) { |
| if (!lub.noted()) { |
| break; |
| } |
| newParamsTypes.push_back(lub.getLUB()); |
| } |
| Type newParams; |
| if (newParamsTypes.size() < numParams) { |
| // We did not have type information to calculate a LUB (no calls, or |
| // some param is always unreachable), so there is nothing we can improve |
| // here. Other passes might remove the type entirely. |
| newParams = func->getParams(); |
| } else { |
| newParams = Type(newParamsTypes); |
| } |
| |
| auto& resultsLUB = info.resultsLUB; |
| Type newResults; |
| if (!resultsLUB.noted()) { |
| // We did not have type information to calculate a LUB (no returned |
| // value, or it can return a value but traps instead etc.). |
| newResults = func->getResults(); |
| } else { |
| newResults = resultsLUB.getLUB(); |
| } |
| |
| if (newParams == func->getParams() && newResults == func->getResults()) { |
| continue; |
| } |
| |
| // We found an improvement! |
| newSignatures[type] = Signature(newParams, newResults); |
| |
| if (newResults != func->getResults()) { |
| // Update the types of calls using the signature. |
| for (auto* call : info.calls) { |
| if (call->type != Type::unreachable) { |
| call->type = newResults; |
| } |
| } |
| for (auto* callRef : info.callRefs) { |
| if (callRef->type != Type::unreachable) { |
| callRef->type = newResults; |
| } |
| } |
| } |
| } |
| |
| if (newSignatures.empty()) { |
| // We found nothing to optimize. |
| return; |
| } |
| |
| // Update function contents for their new parameter types. |
| struct CodeUpdater : public WalkerPass<PostWalker<CodeUpdater>> { |
| bool isFunctionParallel() override { return true; } |
| |
| // Updating parameter types cannot affect validation (only updating var |
| // types types might). |
| bool requiresNonNullableLocalFixups() override { return false; } |
| |
| SignatureRefining& parent; |
| Module& wasm; |
| |
| CodeUpdater(SignatureRefining& parent, Module& wasm) |
| : parent(parent), wasm(wasm) {} |
| |
| std::unique_ptr<Pass> create() override { |
| return std::make_unique<CodeUpdater>(parent, wasm); |
| } |
| |
| void doWalkFunction(Function* func) { |
| auto iter = parent.newSignatures.find(func->type); |
| if (iter != parent.newSignatures.end()) { |
| std::vector<Type> newParamsTypes; |
| for (auto param : iter->second.params) { |
| newParamsTypes.push_back(param); |
| } |
| // Do not update local.get/local.tee here, as we will do so in |
| // GlobalTypeRewriter::updateSignatures, below. (Doing an update here |
| // would leave the IR in an inconsistent state of a partial update; |
| // instead, do the full update at the end.) |
| TypeUpdating::updateParamTypes( |
| func, |
| newParamsTypes, |
| wasm, |
| TypeUpdating::LocalUpdatingMode::DoNotUpdate); |
| } |
| } |
| }; |
| CodeUpdater(*this, *module).run(getPassRunner(), module); |
| |
| // Rewrite the types. |
| GlobalTypeRewriter::updateSignatures(newSignatures, *module); |
| |
| // Update intrinsics. |
| updateIntrinsics(module, allInfo); |
| |
| // TODO: we could do this only in relevant functions perhaps |
| ReFinalize().run(getPassRunner(), module); |
| } |
| |
| template<typename HeapInfoMap> |
| void updateIntrinsics(Module* module, HeapInfoMap& map) { |
| // The call.without.effects intrinsic needs to be updated if we refine the |
| // function reference it receives. Imagine that we have this: |
| // |
| // (call $call.without.effects |
| // (ref.func $returns.A) |
| // ) |
| // |
| // If we refined that $returns.A function to actually return a subtype $B, |
| // then now the call.without.effects should return $B, because logically it |
| // is still a direct call to that function. (We could also defer this to |
| // later, if we relaxed validation here, but updating right now is better |
| // for followup optimizations.) |
| |
| // Each time we update we create a new import with the proper type. Keep a |
| // map of them to avoid creating more than one for each type. |
| std::unordered_map<HeapType, Function*> newImports; |
| |
| auto getImportWithNewResults = [&](Function* import, Type newResults) { |
| auto newType = Signature(import->getParams(), newResults); |
| if (auto iter = newImports.find(newType); iter != newImports.end()) { |
| return iter->second; |
| } |
| |
| auto name = Names::getValidFunctionName(*module, import->name); |
| auto newImport = |
| module->addFunction(Builder(*module).makeFunction(name, newType, {})); |
| |
| // Copy the binaryen intrinsic module.base import names. |
| newImport->module = import->module; |
| newImport->base = import->base; |
| |
| newImports[newType] = newImport; |
| return newImport; |
| }; |
| |
| for (auto& [_, info] : map) { |
| for (auto* call : info.calls) { |
| if (Intrinsics(*module).isCallWithoutEffects(call)) { |
| auto targetType = call->operands.back()->type; |
| if (!targetType.isRef()) { |
| continue; |
| } |
| auto heapType = targetType.getHeapType(); |
| if (!heapType.isSignature()) { |
| continue; |
| } |
| auto newResults = heapType.getSignature().results; |
| if (call->type == newResults) { |
| continue; |
| } |
| |
| // The target was refined, so we need to update here. Create a new |
| // import of the refined type, and call that instead. |
| call->target = getImportWithNewResults( |
| module->getFunction(call->target), newResults) |
| ->name; |
| call->type = newResults; |
| } |
| } |
| } |
| } |
| }; |
| |
| } // anonymous namespace |
| |
| Pass* createSignatureRefiningPass() { return new SignatureRefining(); } |
| |
| } // namespace wasm |