| /* |
| * Copyright 2023 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "ir/names.h" |
| #include "ir/utils.h" |
| #include "pass.h" |
| #include "passes/stringify-walker.h" |
| #include "support/suffix_tree.h" |
| #include "wasm.h" |
| |
| #define OUTLINING_DEBUG 0 |
| |
| #if OUTLINING_DEBUG |
| #define DBG(statement) statement |
| #else |
| #define DBG(statement) |
| #endif |
| |
| // Check a Result or MaybeResult for error and call Fatal() if the error exists. |
| #define ASSERT_OK(val) \ |
| if (auto _val = (val); auto err = _val.getErr()) { \ |
| Fatal() << err->msg; \ |
| } |
| |
| namespace wasm { |
| |
| // Instances of this walker are intended to walk a function at a time, at the |
| // behest of the owner of the instance. |
| struct ReconstructStringifyWalker |
| : public StringifyWalker<ReconstructStringifyWalker> { |
| |
| ReconstructStringifyWalker(Module* wasm) |
| : existingBuilder(*wasm), outlinedBuilder(*wasm) { |
| this->setModule(wasm); |
| DBG(std::cerr << "\nexistingBuilder: " << &existingBuilder |
| << " outlinedBuilder: " << &outlinedBuilder << "\n"); |
| } |
| |
| // As we reconstruct the IR during outlining, we need to know what |
| // state we're in to determine which IRBuilder to send the instruction to. |
| enum ReconstructState { |
| NotInSeq = 0, // Will not be outlined into a new function. |
| InSeq = 1, // Currently being outlined into a new function. |
| InSkipSeq = 2, // A sequence that has already been outlined. |
| }; |
| // We begin with the assumption that we are not currently in a sequence that |
| // will be outlined. |
| ReconstructState state = ReconstructState::NotInSeq; |
| |
| // The list of sequences that will be outlined, contained in the function |
| // currently being walked. |
| std::vector<OutliningSequence> sequences; |
| // Tracks the OutliningSequence the walker is about to outline or is currently |
| // outlining. |
| uint32_t seqCounter = 0; |
| // Counts the number of instructions visited since the function began, |
| // corresponds to the indices in the sequences. |
| uint32_t instrCounter = 0; |
| // A reusable builder for reconstructing the function that will have sequences |
| // of instructions removed to be placed into an outlined function. The removed |
| // sequences will be replaced by a call to the outlined function. |
| IRBuilder existingBuilder; |
| // A reusable builder for constructing the outlined functions that will |
| // contain repeat sequences found in the program. |
| IRBuilder outlinedBuilder; |
| |
| void addUniqueSymbol(SeparatorReason reason) { |
| if (auto curr = reason.getFuncStart()) { |
| startExistingFunction(curr->func); |
| return; |
| } |
| |
| // instrCounter is managed manually and incremented at the beginning of |
| // addUniqueSymbol() and visitExpression(), except for the case where we are |
| // starting a new function, as that resets the counters back to 0. |
| instrCounter++; |
| |
| DBG(std::string desc); |
| if (auto curr = reason.getBlockStart()) { |
| ASSERT_OK(existingBuilder.visitBlockStart(curr->block)); |
| DBG(desc = "Block Start at "); |
| } else if (auto curr = reason.getIfStart()) { |
| // IR builder needs the condition of the If pushed onto the builder before |
| // visitIfStart(), which will expect to be able to pop the condition. |
| // This is always okay to do because the correct condition was installed |
| // onto the If when the outer scope was visited. |
| existingBuilder.push(curr->iff->condition); |
| ASSERT_OK(existingBuilder.visitIfStart(curr->iff)); |
| DBG(desc = "If Start at "); |
| } else if (reason.getElseStart()) { |
| ASSERT_OK(existingBuilder.visitElse()); |
| DBG(desc = "Else Start at "); |
| } else if (auto curr = reason.getLoopStart()) { |
| ASSERT_OK(existingBuilder.visitLoopStart(curr->loop)); |
| DBG(desc = "Loop Start at "); |
| } else if (reason.getEnd()) { |
| ASSERT_OK(existingBuilder.visitEnd()); |
| // Outlining performs an unnested walk of the Wasm module, visiting |
| // each scope one at a time. IRBuilder, in contrast, expects to |
| // visit several nested scopes at a time. Thus, calling end() finalizes |
| // the control flow and places it on IRBuilder's internal stack, ready for |
| // the enclosing scope to consume its expressions off the stack. Since |
| // outlining walks unnested, the enclosing scope never arrives to retrieve |
| // its expressions off the stack, so we must call build() after visitEnd() |
| // to clear the internal stack IRBuilder manages. |
| ASSERT_OK(existingBuilder.build()); |
| DBG(desc = "End at "); |
| } else { |
| DBG(desc = "addUniqueSymbol for unimplemented control flow "); |
| WASM_UNREACHABLE("unimplemented control flow"); |
| } |
| DBG(printAddUniqueSymbol(desc)); |
| } |
| |
| void visitExpression(Expression* curr) { |
| maybeBeginSeq(); |
| |
| IRBuilder* builder = state == InSeq ? &outlinedBuilder |
| : state == NotInSeq ? &existingBuilder |
| : nullptr; |
| if (builder) { |
| if (auto* expr = curr->dynCast<Break>()) { |
| Type type = expr->value ? expr->value->type : Type::none; |
| ASSERT_OK(builder->visitBreakWithType(expr, type)); |
| } else if (auto* expr = curr->dynCast<Switch>()) { |
| Type type = expr->value ? expr->value->type : Type::none; |
| ASSERT_OK(builder->visitSwitchWithType(expr, type)); |
| } else { |
| // Assert ensures new unhandled branch instructions |
| // will quickly cause an error. Serves as a reminder to |
| // implement a new special-case visit*WithType. |
| assert(curr->is<BrOn>() || !Properties::isBranch(curr)); |
| ASSERT_OK(builder->visit(curr)); |
| } |
| } |
| DBG(printVisitExpression(curr)); |
| |
| if (state == InSeq || state == InSkipSeq) { |
| maybeEndSeq(); |
| } |
| } |
| |
| // Helpers |
| void startExistingFunction(Function* func) { |
| ASSERT_OK(existingBuilder.build()); |
| ASSERT_OK(existingBuilder.visitFunctionStart(func)); |
| instrCounter = 0; |
| seqCounter = 0; |
| state = NotInSeq; |
| DBG(std::cerr << "\n" |
| << "Func Start to $" << func->name << " at " |
| << &existingBuilder << "\n"); |
| } |
| |
| ReconstructState getCurrState() { |
| // We are either in a sequence or not in a sequence. If we are in a sequence |
| // and have already created the body of the outlined function that will be |
| // called, then we will skip instructions, otherwise we add the instructions |
| // to the outlined function. If we are not in a sequence, then the |
| // instructions are sent to the existing function. |
| if (seqCounter < sequences.size() && |
| instrCounter >= sequences[seqCounter].startIdx && |
| instrCounter < sequences[seqCounter].endIdx) { |
| return getModule()->getFunction(sequences[seqCounter].func)->body |
| ? InSkipSeq |
| : InSeq; |
| } |
| return NotInSeq; |
| } |
| |
| void maybeBeginSeq() { |
| instrCounter++; |
| auto currState = getCurrState(); |
| if (currState != state) { |
| switch (currState) { |
| case NotInSeq: |
| break; |
| case InSeq: |
| transitionToInSeq(); |
| break; |
| case InSkipSeq: |
| transitionToInSkipSeq(); |
| break; |
| } |
| } |
| state = currState; |
| } |
| |
| void transitionToInSeq() { |
| Function* outlinedFunc = |
| getModule()->getFunction(sequences[seqCounter].func); |
| ASSERT_OK(outlinedBuilder.visitFunctionStart(outlinedFunc)); |
| |
| // Add a local.get instruction for every parameter of the outlined function. |
| Signature sig = outlinedFunc->type.getSignature(); |
| for (Index i = 0; i < sig.params.size(); i++) { |
| ASSERT_OK(outlinedBuilder.makeLocalGet(i)); |
| } |
| |
| // Make a call from the existing function to the outlined function. This |
| // call will replace the instructions moved to the outlined function. |
| ASSERT_OK(existingBuilder.makeCall(outlinedFunc->name, false)); |
| DBG(std::cerr << "\ncreated outlined fn: " << outlinedFunc->name << "\n"); |
| } |
| |
| void transitionToInSkipSeq() { |
| Function* outlinedFunc = |
| getModule()->getFunction(sequences[seqCounter].func); |
| ASSERT_OK(existingBuilder.makeCall(outlinedFunc->name, false)); |
| DBG(std::cerr << "\nstarting to skip instructions " |
| << sequences[seqCounter].startIdx << " - " |
| << sequences[seqCounter].endIdx - 1 << " to " |
| << sequences[seqCounter].func |
| << " and adding call() instead\n"); |
| } |
| |
| void maybeEndSeq() { |
| if (instrCounter + 1 == sequences[seqCounter].endIdx) { |
| transitionToNotInSeq(); |
| state = NotInSeq; |
| } |
| } |
| |
| void transitionToNotInSeq() { |
| DBG(std::cerr << "End of sequence "); |
| if (state == InSeq) { |
| ASSERT_OK(outlinedBuilder.visitEnd()); |
| DBG(std::cerr << "to " << &outlinedBuilder); |
| } |
| DBG(std::cerr << "\n\n"); |
| // Completed a sequence so increase the seqCounter and reset the state. |
| seqCounter++; |
| } |
| |
| #if OUTLINING_DEBUG |
| void printAddUniqueSymbol(std::string desc) { |
| std::cerr << desc << std::to_string(instrCounter) << " to " |
| << &existingBuilder << "\n"; |
| } |
| |
| void printVisitExpression(Expression* curr) { |
| auto* builder = state == InSeq ? &outlinedBuilder |
| : state == NotInSeq ? &existingBuilder |
| : nullptr; |
| auto verb = state == InSkipSeq ? "skipping " : "adding "; |
| std::cerr << verb << std::to_string(instrCounter) << ": " |
| << ShallowExpression{curr} << "(" << curr << ") to " << builder |
| << "\n"; |
| } |
| #endif |
| }; |
| |
| struct Outlining : public Pass { |
| void run(Module* module) { |
| HashStringifyWalker stringify; |
| // Walk the module and create a "string representation" of the program. |
| stringify.walkModule(module); |
| // Collect all of the substrings of the string representation that appear |
| // more than once in the program. |
| auto substrings = |
| StringifyProcessor::repeatSubstrings(stringify.hashString); |
| DBG(printHashString(stringify.hashString, stringify.exprs)); |
| // Remove substrings that are substrings of longer repeat substrings. |
| substrings = StringifyProcessor::dedupe(substrings); |
| // Remove substrings with branch and return instructions until an analysis |
| // is performed to see if the intended destination of the branch is included |
| // in the substring to be outlined. |
| substrings = |
| StringifyProcessor::filterBranches(substrings, stringify.exprs); |
| // Remove substrings with local.set instructions until Outlining is extended |
| // to support arranging for the written values to be returned from the |
| // outlined function and written back to the original locals. |
| substrings = |
| StringifyProcessor::filterLocalSets(substrings, stringify.exprs); |
| // Remove substrings with local.get instructions until Outlining is extended |
| // to support passing the local values as additional arguments to the |
| // outlined function. |
| substrings = |
| StringifyProcessor::filterLocalGets(substrings, stringify.exprs); |
| // Convert substrings to sequences that are more easily outlineable as we |
| // walk the functions in a module. Sequences contain indices that |
| // are relative to the enclosing function while substrings have indices |
| // relative to the entire program. |
| auto sequences = makeSequences(module, substrings, stringify); |
| outline(module, sequences); |
| // Position the outlined functions first in the functions vector to make |
| // the outlining lit tests far more readable. |
| moveOutlinedFunctions(module, substrings.size()); |
| } |
| |
| Name addOutlinedFunction(Module* module, |
| const SuffixTree::RepeatedSubstring& substring, |
| const std::vector<Expression*>& exprs) { |
| auto startIdx = substring.StartIndices[0]; |
| // The outlined functions can be named anything. |
| Name func = Names::getValidFunctionName(*module, std::string("outline$")); |
| // Calculate the function signature for the outlined sequence. |
| StackSignature sig; |
| for (uint32_t exprIdx = startIdx; exprIdx < startIdx + substring.Length; |
| exprIdx++) { |
| sig += StackSignature(exprs[exprIdx]); |
| } |
| module->addFunction( |
| Builder::makeFunction(func, Signature(sig.params, sig.results), {})); |
| return func; |
| } |
| |
| using Sequences = |
| std::unordered_map<Name, std::vector<wasm::OutliningSequence>>; |
| |
| // Converts an array of SuffixTree::RepeatedSubstring to a mapping of original |
| // functions to repeated sequences they contain. These sequences are ordered |
| // by start index by construction because the substring's start indices are |
| // ordered. |
| Sequences makeSequences(Module* module, |
| const Substrings& substrings, |
| const HashStringifyWalker& stringify) { |
| Sequences seqByFunc; |
| for (auto& substring : substrings) { |
| auto func = addOutlinedFunction(module, substring, stringify.exprs); |
| for (auto seqIdx : substring.StartIndices) { |
| // seqIdx is relative to the entire program; making the idx of the |
| // sequence relative to its function is better for outlining because we |
| // walk functions. |
| auto [relativeIdx, existingFunc] = stringify.makeRelative(seqIdx); |
| auto seq = |
| OutliningSequence(relativeIdx, relativeIdx + substring.Length, func); |
| seqByFunc[existingFunc].push_back(seq); |
| } |
| } |
| return seqByFunc; |
| } |
| |
| void outline(Module* module, Sequences seqByFunc) { |
| // TODO: Make this a function-parallel sub-pass. |
| ReconstructStringifyWalker reconstruct(module); |
| std::vector<Name> keys(seqByFunc.size()); |
| std::transform(seqByFunc.begin(), |
| seqByFunc.end(), |
| keys.begin(), |
| [](auto pair) { return pair.first; }); |
| for (auto func : keys) { |
| reconstruct.sequences = std::move(seqByFunc[func]); |
| reconstruct.doWalkFunction(module->getFunction(func)); |
| } |
| } |
| |
| void moveOutlinedFunctions(Module* module, uint32_t outlinedCount) { |
| // Rearrange outlined functions to the beginning of the functions vector by |
| // using std::make_move_iterator to avoid making copies. A temp vector is |
| // created to avoid iterator invalidation. |
| auto count = module->functions.size(); |
| std::vector<std::unique_ptr<Function>> temp( |
| std::make_move_iterator(module->functions.end() - outlinedCount), |
| std::make_move_iterator(module->functions.end())); |
| module->functions.insert(module->functions.begin(), |
| std::make_move_iterator(temp.begin()), |
| std::make_move_iterator(temp.end())); |
| module->functions.resize(count); |
| // After the functions vector is directly manipulated, we need to call |
| // updateFunctionsMap(). |
| module->updateFunctionsMap(); |
| } |
| |
| #if OUTLINING_DEBUG |
| void printHashString(const std::vector<uint32_t>& hashString, |
| const std::vector<Expression*>& exprs) { |
| std::cerr << "\n\n"; |
| for (Index idx = 0; idx < hashString.size(); idx++) { |
| Expression* expr = exprs[idx]; |
| if (expr) { |
| std::cerr << idx << " - " << hashString[idx] << ": " |
| << ShallowExpression{expr} << "\n"; |
| } else { |
| std::cerr << idx << ": unique symbol\n"; |
| } |
| } |
| } |
| #endif |
| }; |
| |
| Pass* createOutliningPass() { return new Outlining(); } |
| |
| } // namespace wasm |