minor changes
diff --git a/src/passes/DeadStoreElimination.cpp b/src/passes/DeadStoreElimination.cpp index 7fa5b9d..e0bd514 100644 --- a/src/passes/DeadStoreElimination.cpp +++ b/src/passes/DeadStoreElimination.cpp
@@ -80,6 +80,25 @@ std::atomic<int> gcHaltedWithNoLoads(0); std::atomic<int> gcHaltedWithLoads(0); +enum class HaltReason { + NoHalt, + Call, + Throw, + Trap, + BranchOut, + Interaction, + Exit, + Unknown +}; + +std::atomic<int> gcHaltCall(0); +std::atomic<int> gcHaltThrow(0); +std::atomic<int> gcHaltTrap(0); +std::atomic<int> gcHaltBranchOut(0); +std::atomic<int> gcHaltInteraction(0); +std::atomic<int> gcHaltExit(0); +std::atomic<int> gcHaltUnknown(0); + // A variation of LocalGraph that can also compare expressions to check for // their equivalence. Basic LocalGraph just looks at locals, while this class // goes further and looks at the structure of the expression, taking into @@ -146,15 +165,27 @@ // // The default behavior here considers all calls to be barriers. Subclasses // can use whole-program information to do better. - bool isBarrier(Expression* curr, const ShallowEffectAnalyzer& currEffects) { + HaltReason isBarrier(Expression* curr, + const ShallowEffectAnalyzer& currEffects) { // TODO: ignore throws of an exception that is definitely caught in this // function // TODO: if we add an "ignore after trap mode" (to assume nothing happens // after a trap) then we could stop assuming any trap can lead to // access of global data, likely greatly reducing the number of // barriers. - return currEffects.calls || currEffects.throws() || currEffects.trap || - currEffects.branchesOut; + if (currEffects.calls) + return HaltReason::Call; + if (currEffects.throws()) + return HaltReason::Throw; + if (currEffects.trap && !currEffects.trapsNeverHappen) + return HaltReason::Trap; + if (currEffects.branchesOut) + return HaltReason::BranchOut; + + return HaltReason::NoHalt; + // return currEffects.calls || currEffects.throws() || (currEffects.trap && + // !currEffects.trapsNeverHappen) || + // currEffects.branchesOut; }; // Returns whether an expression may interact with loads and stores in @@ -431,6 +462,10 @@ return mayAlias(load, store); } + std::cerr << "mayInteractWith\n"; + curr->dump(); + return false; + // This is not a load or a store that we recognize; check for generic heap // interactions. return currEffects.readsMutableStruct || currEffects.writesStruct; @@ -474,6 +509,14 @@ int localHaltedWithNoLoads = 0; int localHaltedWithLoads = 0; + int localHaltCall = 0; + int localHaltThrow = 0; + int localHaltTrap = 0; + int localHaltBranchOut = 0; + int localHaltInteraction = 0; + int localHaltExit = 0; + int localHaltUnknown = 0; + DeadStoreCFG(Module& wasm, Function* func, PassOptions& passOptions) : func(func), passOptions(passOptions), logic(func, passOptions, wasm) { this->setModule(&wasm); @@ -491,13 +534,8 @@ // Add all relevant things to the list of exprs for the current basic block. if (isStore(curr) || isLoad(curr)) { exprs.push_back(curr); - } else if (logic.isBarrier(curr, currEffects)) { - // Barriers can be very common, so as a minor optimization avoid having - // consecutive ones; a single barrier will stop us. - if (exprs.empty() || exprs.back() != barrier) { - exprs.push_back(barrier); - } - } else if (logic.mayInteract(curr, currEffects)) { + } else if (logic.isBarrier(curr, currEffects) != HaltReason::NoHalt || + logic.mayInteract(curr, currEffects)) { exprs.push_back(curr); } } @@ -558,11 +596,37 @@ // When we find something we cannot optimize, stop flowing and mark the // store as unoptimizable. - auto halt = [&]() { + auto halt = [&](HaltReason reason) { work.clear(); understoodStores.erase(store); if (definiteLoads == 0) { localHaltedWithNoLoads++; + switch (reason) { + case HaltReason::NoHalt: + assert(false && "impossible"); + break; + case HaltReason::Call: + localHaltCall++; + break; + case HaltReason::Throw: + localHaltThrow++; + break; + case HaltReason::Trap: + localHaltTrap++; + break; + case HaltReason::BranchOut: + localHaltBranchOut++; + break; + case HaltReason::Interaction: + localHaltInteraction++; + break; + case HaltReason::Exit: + localHaltExit++; + break; + case HaltReason::Unknown: + localHaltUnknown++; + break; + } } else { localHaltedWithLoads++; } @@ -574,14 +638,21 @@ for (size_t i = from; i < block->contents.exprs.size(); i++) { auto* curr = block->contents.exprs[i]; - if (curr == barrier) { - halt(); - return; - } - ShallowEffectAnalyzer currEffects( passOptions, *this->getModule(), curr); + if (auto reason = logic.isBarrier(curr, currEffects); + reason != HaltReason::NoHalt) { + halt(reason); + return; + // if (currEffects.calls) halt(HaltReason::Call); + // else if (currEffects.throws()) halt(HaltReason::Throw); + // else if (currEffects.trap && !currEffects.trapsNeverHappen) + // halt(HaltReason::Trap); else if (currEffects.branchesOut) + // halt(HaltReason::BranchOut); else halt(HaltReason::Unknown); + // return; + } + if (logic.isLoadFrom(curr, currEffects, store)) { // We found a definite load of this store, note it. definiteLoads++; @@ -592,7 +663,7 @@ return; } else if (logic.mayInteractWith(curr, currEffects, store)) { // Stop: we cannot fully analyze the uses of this store. - halt(); + halt(HaltReason::Interaction); return; } } @@ -605,7 +676,7 @@ if (block == this->exit) { // Any value flowing out can be reached by global code outside the // function after we leave. - halt(); + halt(HaltReason::Exit); } }; @@ -665,6 +736,14 @@ gcUnderstoodUsedStores += localUnderstoodUsedStores; gcHaltedWithNoLoads += localHaltedWithNoLoads; gcHaltedWithLoads += localHaltedWithLoads; + + gcHaltCall += localHaltCall; + gcHaltThrow += localHaltThrow; + gcHaltTrap += localHaltTrap; + gcHaltBranchOut += localHaltBranchOut; + gcHaltInteraction += localHaltInteraction; + gcHaltExit += localHaltExit; + gcHaltUnknown += localHaltUnknown; } if (replacer.replacements.empty()) { @@ -718,6 +797,14 @@ gcHaltedWithNoLoads = 0; gcHaltedWithLoads = 0; + gcHaltCall = 0; + gcHaltThrow = 0; + gcHaltTrap = 0; + gcHaltBranchOut = 0; + gcHaltInteraction = 0; + gcHaltExit = 0; + gcHaltUnknown = 0; + // Run the actual DSE in parallel. We use a nested PassRunner to take // advantage of its built-in function parallelism logic. PassRunner passRunner(module, getPassOptions()); @@ -733,6 +820,20 @@ << "\n"; std::cerr << " Halted w/ NO loads (Likely Dead): " << gcHaltedWithNoLoads.load() << "\n"; + if (gcHaltedWithNoLoads > 0) { + std::cerr << " Reason: Call: " << gcHaltCall.load() << "\n"; + std::cerr << " Reason: Throw: " << gcHaltThrow.load() << "\n"; + std::cerr << " Reason: Trap: " << gcHaltTrap.load() << "\n"; + std::cerr << " Reason: BranchOut: " << gcHaltBranchOut.load() + << "\n"; + std::cerr << " Reason: Interaction: " << gcHaltInteraction.load() + << "\n"; + std::cerr << " Reason: Exit: " << gcHaltExit.load() << "\n"; + if (gcHaltUnknown > 0) { + std::cerr << " Reason: Unknown: " << gcHaltUnknown.load() + << "\n"; + } + } std::cerr << " Halted w/ loads (Partially Used): " << gcHaltedWithLoads.load() << "\n"; std::cerr << "=========================\n\n";