| /* |
| * Copyright 2022 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef wasm_ir_possible_contents_h |
| #define wasm_ir_possible_contents_h |
| |
| #include <variant> |
| |
| #include "ir/possible-constant.h" |
| #include "ir/subtypes.h" |
| #include "support/hash.h" |
| #include "support/small_vector.h" |
| #include "wasm-builder.h" |
| #include "wasm.h" |
| |
| namespace wasm { |
| |
| // |
| // PossibleContents represents the possible contents at a particular location |
| // (such as in a local or in a function parameter). This is a little similar to |
| // PossibleConstantValues, but considers more types of contents than constant |
| // values - in particular, it can track types to some extent. |
| // |
| // The specific contents this can vary over are: |
| // |
| // * None: No possible value. |
| // |
| // * Literal: One possible constant value like an i32 of 42. |
| // |
| // * Global: The name of a global whose value is here. We do not know |
| // the actual value at compile time, but we know it is equal |
| // to that global. Typically we can only infer this for |
| // immutable globals. |
| // |
| // * ConeType: Any possible value of a particular type, and a possible |
| // "cone" of a certain depth below it. If the depth is 0 |
| // then only the exact type is possible; if the depth is 1 |
| // then either that type or its immediate subtypes, and so |
| // forth. |
| // A depth of -1 means unlimited: all subtypes are allowed. |
| // If the type here is nullable then null is also allowed. |
| // TODO: Add ConeTypePlusContents or such, which would be |
| // used on e.g. a struct.new with an immutable field |
| // to which we assign a constant: not only do we know |
| // the type, but also certain field's values. |
| // |
| // * Many: Anything else. Many things are possible here, and we do |
| // not track what they might be, so we must assume the worst |
| // in the calling code. |
| // |
| class PossibleContents { |
| struct None : public std::monostate {}; |
| |
| struct GlobalInfo { |
| Name name; |
| // The type of contents. Note that this may not match the type of the |
| // global, if we were filtered. For example: |
| // |
| // (ref.as_non_null |
| // (global.get $nullable-global) |
| // ) |
| // |
| // The contents flowing out will be a Global, but of a non-nullable type, |
| // unlike the original global. |
| Type type; |
| // TODO: Consider adding a depth here, or merging this with ConeType in some |
| // way. In principle, not having depth info can lead to loss of |
| // precision. |
| bool operator==(const GlobalInfo& other) const { |
| return name == other.name && type == other.type; |
| } |
| }; |
| |
| struct ConeType { |
| Type type; |
| Index depth; |
| bool operator==(const ConeType& other) const { |
| return type == other.type && depth == other.depth; |
| } |
| }; |
| |
| struct Many : public std::monostate {}; |
| |
| // TODO: This is similar to the variant in PossibleConstantValues, and perhaps |
| // we could share code, but extending a variant using template magic may |
| // not be worthwhile. Another option might be to make PCV inherit from |
| // this and disallow ConeType etc., but PCV might get slower. |
| using Variant = std::variant<None, Literal, GlobalInfo, ConeType, Many>; |
| Variant value; |
| |
| // Internal convenience for creating a cone type with depth 0, i.e,, an exact |
| // type. |
| static ConeType ExactType(Type type) { return ConeType{type, 0}; } |
| |
| static constexpr Index FullDepth = -1; |
| |
| // Internal convenience for creating a cone type of unbounded depth, i.e., the |
| // full cone of all subtypes for that type. |
| static ConeType FullConeType(Type type) { return ConeType{type, FullDepth}; } |
| |
| template<typename T> PossibleContents(T value) : value(value) {} |
| |
| public: |
| PossibleContents() : value(None()) {} |
| PossibleContents(const PossibleContents& other) = default; |
| |
| // Most users will use one of the following static functions to construct a |
| // new instance: |
| |
| static PossibleContents none() { return PossibleContents{None()}; } |
| static PossibleContents literal(Literal c) { return PossibleContents{c}; } |
| static PossibleContents global(Name name, Type type) { |
| return PossibleContents{GlobalInfo{name, type}}; |
| } |
| // Helper for a cone type with depth 0, i.e., an exact type. |
| static PossibleContents exactType(Type type) { |
| return PossibleContents{ExactType(type)}; |
| } |
| // Helper for a cone with unbounded depth, i.e., the full cone of all subtypes |
| // for that type. |
| static PossibleContents fullConeType(Type type) { |
| return PossibleContents{FullConeType(type)}; |
| } |
| static PossibleContents coneType(Type type, Index depth) { |
| return PossibleContents{ConeType{type, depth}}; |
| } |
| static PossibleContents many() { return PossibleContents{Many()}; } |
| |
| // Helper for creating a PossibleContents based on a wasm type, that is, where |
| // all we know is the wasm type. |
| static PossibleContents fromType(Type type) { |
| assert(type != Type::none); |
| |
| if (type.isRef()) { |
| // For a reference, subtyping matters. |
| return fullConeType(type); |
| } |
| |
| if (type == Type::unreachable) { |
| // Nothing is possible here. |
| return none(); |
| } |
| |
| // Otherwise, this is a concrete MVP type. |
| assert(type.isConcrete()); |
| return exactType(type); |
| } |
| |
| PossibleContents& operator=(const PossibleContents& other) = default; |
| |
| bool operator==(const PossibleContents& other) const { |
| return value == other.value; |
| } |
| |
| bool operator!=(const PossibleContents& other) const { |
| return !(*this == other); |
| } |
| |
| // Combine the information in a given PossibleContents to this one. The |
| // contents here will then include whatever content was possible in |other|. |
| [[nodiscard]] static PossibleContents combine(const PossibleContents& a, |
| const PossibleContents& b); |
| |
| void combine(const PossibleContents& other) { |
| *this = PossibleContents::combine(*this, other); |
| } |
| |
| // Removes anything not in |other| from this object, so that it ends up with |
| // only their intersection. |
| void intersect(const PossibleContents& other); |
| |
| bool isNone() const { return std::get_if<None>(&value); } |
| bool isLiteral() const { return std::get_if<Literal>(&value); } |
| bool isGlobal() const { return std::get_if<GlobalInfo>(&value); } |
| bool isConeType() const { return std::get_if<ConeType>(&value); } |
| bool isMany() const { return std::get_if<Many>(&value); } |
| |
| Literal getLiteral() const { |
| assert(isLiteral()); |
| return std::get<Literal>(value); |
| } |
| |
| Name getGlobal() const { |
| assert(isGlobal()); |
| return std::get<GlobalInfo>(value).name; |
| } |
| |
| bool isNull() const { return isLiteral() && getLiteral().isNull(); } |
| |
| // Return the relevant type here. Note that the *meaning* of the type varies |
| // by the contents: type $foo of a global means that type or any subtype, as a |
| // subtype might be written to it, while type $foo of a Literal or a ConeType |
| // with depth zero means that type and nothing else, etc. (see also |
| // hasExactType). |
| // |
| // If no type is possible, return unreachable; if many types are, return none. |
| Type getType() const { |
| if (auto* literal = std::get_if<Literal>(&value)) { |
| return literal->type; |
| } else if (auto* global = std::get_if<GlobalInfo>(&value)) { |
| return global->type; |
| } else if (auto* coneType = std::get_if<ConeType>(&value)) { |
| return coneType->type; |
| } else if (std::get_if<None>(&value)) { |
| return Type::unreachable; |
| } else if (std::get_if<Many>(&value)) { |
| return Type::none; |
| } else { |
| WASM_UNREACHABLE("bad value"); |
| } |
| } |
| |
| // Returns cone type info. This can be called on non-cone types as well, and |
| // it returns a cone that best describes them. That is, this is like getType() |
| // but it also provides an indication about the depth, if relevant. (If cone |
| // info is not relevant, like when getType() returns none or unreachable, the |
| // depth is set to 0.) |
| ConeType getCone() const { |
| if (auto* literal = std::get_if<Literal>(&value)) { |
| return ExactType(literal->type); |
| } else if (auto* global = std::get_if<GlobalInfo>(&value)) { |
| return FullConeType(global->type); |
| } else if (auto* coneType = std::get_if<ConeType>(&value)) { |
| return *coneType; |
| } else if (std::get_if<None>(&value)) { |
| return ExactType(Type::unreachable); |
| } else if (std::get_if<Many>(&value)) { |
| return ExactType(Type::none); |
| } else { |
| WASM_UNREACHABLE("bad value"); |
| } |
| } |
| |
| // Returns whether the relevant cone for this, as computed by getCone(), is of |
| // full size, that is, includes all subtypes. |
| bool hasFullCone() const { return getCone().depth == FullDepth; } |
| |
| // Returns whether this is a cone type and also is of full size. This differs |
| // from hasFullCone() in that the former can return true for a global, for |
| // example, while this cannot (a global is not a cone type, but the |
| // information we have about its cone is that it is full). |
| bool isFullConeType() const { return isConeType() && hasFullCone(); } |
| |
| // Returns whether the type we can report here is exact, that is, nothing of a |
| // strict subtype might show up - the contents here have an exact type. |
| // |
| // This returns false for None and Many, for whom it is not well-defined. |
| bool hasExactType() const { |
| if (isLiteral()) { |
| return true; |
| } |
| |
| if (auto* coneType = std::get_if<ConeType>(&value)) { |
| return coneType->depth == 0; |
| } |
| |
| return false; |
| } |
| |
| // Returns whether the given contents have any intersection, that is, whether |
| // some value exists that can appear in both |a| and |b|. For example, if |
| // either is None, or if they are different literals, then they have no |
| // intersection. |
| static bool haveIntersection(const PossibleContents& a, |
| const PossibleContents& b); |
| |
| // Returns whether |a| is a subset of |b|, that is, all possible contents of |
| // |a| are also possible in |b|. |
| static bool isSubContents(const PossibleContents& a, |
| const PossibleContents& b); |
| |
| // Whether we can make an Expression* for this containing the proper contents. |
| // We can do that for a Literal (emitting a Const or RefFunc etc.) or a |
| // Global (emitting a GlobalGet), but not for anything else yet. |
| bool canMakeExpression() const { return isLiteral() || isGlobal(); } |
| |
| Expression* makeExpression(Module& wasm) { |
| assert(canMakeExpression()); |
| Builder builder(wasm); |
| if (isLiteral()) { |
| return builder.makeConstantExpression(getLiteral()); |
| } else { |
| auto name = getGlobal(); |
| // Note that we load the type from the module, rather than use the type |
| // in the GlobalInfo, as that type may not match the global (see comment |
| // in the GlobalInfo declaration above). |
| return builder.makeGlobalGet(name, wasm.getGlobal(name)->type); |
| } |
| } |
| |
| size_t hash() const { |
| // First hash the index of the variant, then add the internals for each. |
| size_t ret = std::hash<size_t>()(value.index()); |
| if (isNone() || isMany()) { |
| // Nothing to add. |
| } else if (isLiteral()) { |
| rehash(ret, getLiteral()); |
| } else if (auto* global = std::get_if<GlobalInfo>(&value)) { |
| rehash(ret, global->name); |
| rehash(ret, global->type); |
| } else if (auto* coneType = std::get_if<ConeType>(&value)) { |
| rehash(ret, coneType->type); |
| rehash(ret, coneType->depth); |
| } else { |
| WASM_UNREACHABLE("bad variant"); |
| } |
| return ret; |
| } |
| |
| void dump(std::ostream& o, Module* wasm = nullptr) const { |
| o << '['; |
| if (isNone()) { |
| o << "None"; |
| } else if (isLiteral()) { |
| o << "Literal " << getLiteral(); |
| auto t = getType(); |
| if (t.isRef()) { |
| auto h = t.getHeapType(); |
| o << " HT: " << h; |
| } |
| } else if (isGlobal()) { |
| o << "GlobalInfo $" << getGlobal() << " T: " << getType(); |
| } else if (auto* coneType = std::get_if<ConeType>(&value)) { |
| auto t = coneType->type; |
| o << "ConeType " << t; |
| if (coneType->depth == 0) { |
| o << " exact"; |
| } else { |
| o << " depth=" << coneType->depth; |
| } |
| if (t.isRef()) { |
| auto h = t.getHeapType(); |
| o << " HT: " << h; |
| if (wasm && wasm->typeNames.count(h)) { |
| o << " $" << wasm->typeNames[h].name; |
| } |
| if (t.isNullable()) { |
| o << " null"; |
| } |
| } |
| } else if (isMany()) { |
| o << "Many"; |
| } else { |
| WASM_UNREACHABLE("bad variant"); |
| } |
| o << ']'; |
| } |
| }; |
| |
| // The various *Location structs (ExpressionLocation, ResultLocation, etc.) |
| // describe particular locations where content can appear. |
| |
| // The location of a specific IR expression. |
| struct ExpressionLocation { |
| Expression* expr; |
| // If this expression contains a tuple then each index in the tuple will have |
| // its own location with a corresponding tupleIndex. If this is not a tuple |
| // then we only use tupleIndex 0. |
| Index tupleIndex; |
| bool operator==(const ExpressionLocation& other) const { |
| return expr == other.expr && tupleIndex == other.tupleIndex; |
| } |
| }; |
| |
| // The location of one of the parameters of a function. |
| struct ParamLocation { |
| Function* func; |
| Index index; |
| bool operator==(const ParamLocation& other) const { |
| return func == other.func && index == other.index; |
| } |
| }; |
| |
| // The location of a value in a local. |
| struct LocalLocation { |
| Function* func; |
| Index index; |
| bool operator==(const LocalLocation& other) const { |
| return func == other.func && index == other.index; |
| } |
| }; |
| |
| // The location of one of the results of a function. |
| struct ResultLocation { |
| Function* func; |
| Index index; |
| bool operator==(const ResultLocation& other) const { |
| return func == other.func && index == other.index; |
| } |
| }; |
| |
| // The location of a global in the module. |
| struct GlobalLocation { |
| Name name; |
| bool operator==(const GlobalLocation& other) const { |
| return name == other.name; |
| } |
| }; |
| |
| // The location of one of the parameters in a function signature. |
| struct SignatureParamLocation { |
| HeapType type; |
| Index index; |
| bool operator==(const SignatureParamLocation& other) const { |
| return type == other.type && index == other.index; |
| } |
| }; |
| |
| // The location of one of the results in a function signature. |
| struct SignatureResultLocation { |
| HeapType type; |
| Index index; |
| bool operator==(const SignatureResultLocation& other) const { |
| return type == other.type && index == other.index; |
| } |
| }; |
| |
| // The location of contents in a struct or array (i.e., things that can fit in a |
| // dataref). Note that this is specific to this type - it does not include data |
| // about subtypes or supertypes. |
| // |
| // We store the truncated bits here when the field is packed. That is, if -1 is |
| // written to an i8 then the value here will be 0xff. StructGet/ArrayGet |
| // operations that read a signed value must then perform a sign-extend |
| // operation. |
| struct DataLocation { |
| HeapType type; |
| // The index of the field in a struct, or 0 for an array (where we do not |
| // attempt to differentiate by index). |
| Index index; |
| bool operator==(const DataLocation& other) const { |
| return type == other.type && index == other.index; |
| } |
| }; |
| |
| // The location of anything written to a particular tag. |
| struct TagLocation { |
| Name tag; |
| // If the tag has more than one element, we'll have a separate TagLocation for |
| // each, with corresponding indexes. If the tag has just one element we'll |
| // only have one TagLocation with index 0. |
| Index tupleIndex; |
| bool operator==(const TagLocation& other) const { |
| return tag == other.tag && tupleIndex == other.tupleIndex; |
| } |
| }; |
| |
| // The location of an exnref materialized by a catch_ref or catch_all_ref clause |
| // of a try_table. No data is stored here. exnrefs contain a tag and a payload |
| // at run-time, as well as potential metadata such as stack traces, but we don't |
| // track that. So this is the same as NullLocation in a way: we just need *a* |
| // source of contents for places that receive an exnref. |
| struct CaughtExnRefLocation { |
| bool operator==(const CaughtExnRefLocation& other) const { return true; } |
| }; |
| |
| // A null value. This is used as the location of the default value of a var in a |
| // function, a null written to a struct field in struct.new_with_default, etc. |
| struct NullLocation { |
| Type type; |
| bool operator==(const NullLocation& other) const { |
| return type == other.type; |
| } |
| }; |
| |
| // A special type of location that does not refer to something concrete in the |
| // wasm, but is used to optimize the graph. A "cone read" is a struct.get or |
| // array.get of a type that is not exact, so it can read from either that type |
| // of some of the subtypes (up to a particular subtype depth). |
| // |
| // In general a read of a cone type + depth (as opposed to an exact type) will |
| // require N incoming links, from each of the N subtypes - and we need that |
| // for each struct.get of a cone. If there are M such gets then we have N * M |
| // edges for this. Instead, we make a single canonical "cone read" location, and |
| // add a single link to it from each get, which is only N + M (plus the cost |
| // of adding "latency" in requiring an additional step along the way for the |
| // data to flow along). |
| struct ConeReadLocation { |
| HeapType type; |
| // As in PossibleContents, this represents the how deep we go with subtypes. |
| // 0 means an exact type, 1 means immediate subtypes, etc. (Note that 0 is not |
| // needed since that is what DataLocation already is.) |
| Index depth; |
| // The index of the field in a struct, or 0 for an array (where we do not |
| // attempt to differentiate by index). |
| Index index; |
| bool operator==(const ConeReadLocation& other) const { |
| return type == other.type && depth == other.depth && index == other.index; |
| } |
| }; |
| |
| // A location is a variant over all the possible flavors of locations that we |
| // have. |
| using Location = std::variant<ExpressionLocation, |
| ParamLocation, |
| LocalLocation, |
| ResultLocation, |
| GlobalLocation, |
| SignatureParamLocation, |
| SignatureResultLocation, |
| DataLocation, |
| TagLocation, |
| CaughtExnRefLocation, |
| NullLocation, |
| ConeReadLocation>; |
| |
| } // namespace wasm |
| |
| namespace std { |
| |
| std::ostream& operator<<(std::ostream& stream, |
| const wasm::PossibleContents& contents); |
| |
| template<> struct hash<wasm::PossibleContents> { |
| size_t operator()(const wasm::PossibleContents& contents) const { |
| return contents.hash(); |
| } |
| }; |
| |
| // Define hashes of all the *Location flavors so that Location itself is |
| // hashable and we can use it in unordered maps and sets. |
| |
| template<> struct hash<wasm::ExpressionLocation> { |
| size_t operator()(const wasm::ExpressionLocation& loc) const { |
| return std::hash<std::pair<size_t, wasm::Index>>{}( |
| {size_t(loc.expr), loc.tupleIndex}); |
| } |
| }; |
| |
| template<> struct hash<wasm::ParamLocation> { |
| size_t operator()(const wasm::ParamLocation& loc) const { |
| return std::hash<std::pair<size_t, wasm::Index>>{}( |
| {size_t(loc.func), loc.index}); |
| } |
| }; |
| |
| template<> struct hash<wasm::LocalLocation> { |
| size_t operator()(const wasm::LocalLocation& loc) const { |
| return std::hash<std::pair<size_t, wasm::Index>>{}( |
| {size_t(loc.func), loc.index}); |
| } |
| }; |
| |
| template<> struct hash<wasm::ResultLocation> { |
| size_t operator()(const wasm::ResultLocation& loc) const { |
| return std::hash<std::pair<size_t, wasm::Index>>{}( |
| {size_t(loc.func), loc.index}); |
| } |
| }; |
| |
| template<> struct hash<wasm::GlobalLocation> { |
| size_t operator()(const wasm::GlobalLocation& loc) const { |
| return std::hash<wasm::Name>{}(loc.name); |
| } |
| }; |
| |
| template<> struct hash<wasm::SignatureParamLocation> { |
| size_t operator()(const wasm::SignatureParamLocation& loc) const { |
| return std::hash<std::pair<wasm::HeapType, wasm::Index>>{}( |
| {loc.type, loc.index}); |
| } |
| }; |
| |
| template<> struct hash<wasm::SignatureResultLocation> { |
| size_t operator()(const wasm::SignatureResultLocation& loc) const { |
| return std::hash<std::pair<wasm::HeapType, wasm::Index>>{}( |
| {loc.type, loc.index}); |
| } |
| }; |
| |
| template<> struct hash<wasm::DataLocation> { |
| size_t operator()(const wasm::DataLocation& loc) const { |
| return std::hash<std::pair<wasm::HeapType, wasm::Index>>{}( |
| {loc.type, loc.index}); |
| } |
| }; |
| |
| template<> struct hash<wasm::TagLocation> { |
| size_t operator()(const wasm::TagLocation& loc) const { |
| return std::hash<std::pair<wasm::Name, wasm::Index>>{}( |
| {loc.tag, loc.tupleIndex}); |
| } |
| }; |
| |
| template<> struct hash<wasm::CaughtExnRefLocation> { |
| size_t operator()(const wasm::CaughtExnRefLocation& loc) const { |
| return std::hash<const void*>()("caught-exnref-location"); |
| } |
| }; |
| |
| template<> struct hash<wasm::NullLocation> { |
| size_t operator()(const wasm::NullLocation& loc) const { |
| return std::hash<wasm::Type>{}(loc.type); |
| } |
| }; |
| |
| template<> struct hash<wasm::ConeReadLocation> { |
| size_t operator()(const wasm::ConeReadLocation& loc) const { |
| return std::hash<std::tuple<wasm::HeapType, wasm::Index, wasm::Index>>{}( |
| {loc.type, loc.depth, loc.index}); |
| } |
| }; |
| |
| } // namespace std |
| |
| namespace wasm { |
| |
| // Analyze the entire wasm file to find which contents are possible in which |
| // locations. This assumes a closed world and starts from roots - newly created |
| // values - and propagates them to the locations they reach. After the |
| // analysis the user of this class can ask which contents are possible at any |
| // location. |
| // |
| // This focuses on useful information for the typical user of this API. |
| // Specifically, we find out: |
| // |
| // 1. What locations have no content reaching them at all. That means the code |
| // is unreachable. (Other passes may handle this, but ContentOracle does it |
| // for all things, so it might catch situations other passes do not cover; |
| // and, it takes no effort to support this here). |
| // 2. For all locations, we try to find when they must contain a constant value |
| // like i32(42) or ref.func(foo). |
| // 3. For locations that contain references, information about the subtypes |
| // possible there. For example, if something has wasm type anyref in the IR, |
| // we might find it must contain an exact type of something specific. |
| // |
| // Note that there is not much use in providing type info for locations that are |
| // *not* references. If a local is i32, for example, then it cannot contain any |
| // subtype anyhow, since i32 is not a reference and has no subtypes. And we know |
| // the type i32 from the wasm anyhow, that is, the caller will know it. |
| // Therefore the only useful information we can provide on top of the info |
| // already in the wasm is either that nothing can be there (1, above), or that a |
| // constant must be there (2, above), and so we do not make an effort to track |
| // non-reference types here. This makes the internals of ContentOracle simpler |
| // and faster. A noticeable outcome of that is that querying the contents of an |
| // i32 local will return Many and not ConeType{i32, 0} (assuming we could not |
| // infer either that there must be nothing there, or a constant). Again, the |
| // caller is assumed to know the wasm IR type anyhow, and also other |
| // optimization passes work on the types in the IR, so we do not focus on that |
| // here. |
| class ContentOracle { |
| Module& wasm; |
| const PassOptions& options; |
| |
| void analyze(); |
| |
| public: |
| ContentOracle(Module& wasm, const PassOptions& options) |
| : wasm(wasm), options(options) { |
| analyze(); |
| } |
| |
| // Get the contents possible at a location. |
| PossibleContents getContents(Location location) { |
| auto iter = locationContents.find(location); |
| if (iter == locationContents.end()) { |
| // We know of no possible contents here. |
| return PossibleContents::none(); |
| } |
| return iter->second; |
| } |
| |
| // Helper for the common case of an expression location that is not a |
| // multivalue. |
| PossibleContents getContents(Expression* curr) { |
| assert(curr->type.size() == 1); |
| return getContents(ExpressionLocation{curr, 0}); |
| } |
| |
| private: |
| std::unordered_map<Location, PossibleContents> locationContents; |
| }; |
| |
| } // namespace wasm |
| |
| #endif // wasm_ir_possible_contents_h |