| /* |
| * Copyright 2016 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // |
| // Coalesce locals, in order to reduce the total number of locals. This |
| // is similar to register allocation, however, there is never any |
| // spilling, and there isn't a fixed number of locals. |
| // |
| // NB: This pass is nonlinear in the number of locals. It is best to run it |
| // after the number of locals has been somewhat reduced by other passes, |
| // for example by simplify-locals (to remove unneeded uses of locals) and |
| // reorder-locals (to sort them by # of uses and remove all unneeded ones). |
| // |
| |
| #include <algorithm> |
| #include <memory> |
| #include <unordered_set> |
| |
| #include "cfg/liveness-traversal.h" |
| #include "ir/numbering.h" |
| #include "ir/utils.h" |
| #include "pass.h" |
| #include "support/learning.h" |
| #include "support/permutations.h" |
| #include "support/sparse_square_matrix.h" |
| #include "wasm.h" |
| #ifdef CFG_PROFILE |
| #include "support/timing.h" |
| #endif |
| |
| namespace wasm { |
| |
| struct CoalesceLocals |
| : public WalkerPass<LivenessWalker<CoalesceLocals, Visitor<CoalesceLocals>>> { |
| bool isFunctionParallel() override { return true; } |
| |
| // This pass merges locals, mapping the originals to new ones. |
| // FIXME DWARF updating does not handle local changes yet. |
| bool invalidatesDWARF() override { return true; } |
| |
| std::unique_ptr<Pass> create() override { |
| return std::make_unique<CoalesceLocals>(); |
| } |
| |
| // Branches outside of the function can be ignored, as we only look at locals |
| // which vanish when we leave. |
| bool ignoreBranchesOutsideOfFunc = true; |
| |
| // main entry point |
| |
| void doWalkFunction(Function* func); |
| |
| void increaseBackEdgePriorities(); |
| |
| // Calculate interferences between locals. This will will fill |
| // the data structure |interferences|. |
| void calculateInterferences(); |
| |
| void pickIndicesFromOrder(std::vector<Index>& order, |
| std::vector<Index>& indices); |
| void pickIndicesFromOrder(std::vector<Index>& order, |
| std::vector<Index>& indices, |
| Index& removedCopies); |
| |
| // returns a vector of oldIndex => newIndex |
| virtual void pickIndices(std::vector<Index>& indices); |
| |
| void applyIndices(std::vector<Index>& indices, Expression* root); |
| |
| // interference state |
| |
| // canonicalized - accesses should check (low, high) |
| sparse_square_matrix<bool> interferences; |
| |
| void interfere(Index i, Index j) { |
| if (i == j) { |
| return; |
| } |
| interferences.set(std::min(i, j), std::max(i, j), true); |
| } |
| |
| // optimized version where you know that low < high |
| void interfereLowHigh(Index low, Index high) { |
| assert(low < high); |
| interferences.set(low, high, true); |
| } |
| |
| void unInterfere(Index i, Index j) { |
| interferences.set(std::min(i, j), std::max(i, j), false); |
| } |
| |
| bool interferes(Index i, Index j) { |
| return interferences.get(std::min(i, j), std::max(i, j)); |
| } |
| |
| private: |
| // In some cases we need to refinalize at the end. |
| bool refinalize = false; |
| }; |
| |
| void CoalesceLocals::doWalkFunction(Function* func) { |
| Super::doWalkFunction(func); |
| // prioritize back edges |
| increaseBackEdgePriorities(); |
| // use liveness to find interference |
| calculateInterferences(); |
| // pick new indices |
| std::vector<Index> indices; |
| pickIndices(indices); |
| // apply indices |
| applyIndices(indices, func->body); |
| |
| if (refinalize) { |
| ReFinalize().walkFunctionInModule(func, getModule()); |
| } |
| } |
| |
| // A copy on a backedge can be especially costly, forcing us to branch just to |
| // do that copy. Add weight to such copies, so we prioritize getting rid of |
| // them. |
| void CoalesceLocals::increaseBackEdgePriorities() { |
| for (auto* loopTop : loopTops) { |
| // ignore the first edge, it is the initial entry, we just want backedges |
| auto& in = loopTop->in; |
| for (Index i = 1; i < in.size(); i++) { |
| auto* arrivingBlock = in[i]; |
| if (arrivingBlock->out.size() > 1) { |
| // we just want unconditional branches to the loop top, true phi |
| // fragments |
| continue; |
| } |
| for (auto& action : arrivingBlock->contents.actions) { |
| if (action.isSet()) { |
| auto* set = (*action.origin)->cast<LocalSet>(); |
| if (auto* get = getCopy(set)) { |
| // this is indeed a copy, add to the cost (default cost is 2, so |
| // this adds 50%, and can mostly break ties) |
| addCopy(set->index, get->index); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| void CoalesceLocals::calculateInterferences() { |
| interferences.recreate(numLocals); |
| |
| // We will track the values in each local, using a numbering where each index |
| // represents a unique different value. This array maps a local index to the |
| // value index it contains. |
| // |
| // To avoid reallocating this array all the time, allocate it once outside the |
| // loop. |
| std::vector<Index> values(numLocals); |
| |
| ValueNumbering valueNumbering; |
| |
| auto* func = getFunction(); |
| |
| for (auto& curr : basicBlocks) { |
| if (liveBlocks.count(curr.get()) == 0) { |
| continue; // ignore dead blocks |
| } |
| |
| // First, find which gets end a live range. While doing so, also calculate |
| // the effectiveness of sets. |
| auto& actions = curr->contents.actions; |
| std::vector<bool> endsLiveRange(actions.size(), false); |
| auto live = curr->contents.end; |
| for (int i = int(actions.size()) - 1; i >= 0; i--) { |
| auto& action = actions[i]; |
| auto index = action.index; |
| if (action.isGet()) { |
| if (!live.has(index)) { |
| // The local is not live after us, so its liveness ends here. |
| endsLiveRange[i] = true; |
| live.insert(index); |
| } |
| } else { |
| // This is a set. Check if the local is alive after it; if it is then |
| // the set if effective as there is some get that can read the value. |
| if (live.erase(index)) { |
| action.effective = true; |
| } |
| } |
| } |
| |
| // We have processed from the end of the block to the start, updating |live| |
| // as we go, and now it must be equal to the state at the start of the |
| // block. We will also use |live| in the next loop, and assume it begins |
| // in that state. |
| assert(live == curr->contents.start); |
| |
| // Now that we know live ranges, check if locals interfere in this block. |
| // Locals interfere if they might contain different values on areas where |
| // their live ranges overlap. To evaluate that, we do an analysis inside |
| // the block that gives each set a unique value number, and as those flow |
| // around through copies between sets we can see when sets are guaranteed to |
| // be equal. |
| |
| if (curr.get() == entry) { |
| // Each parameter is assumed to have a different value on entry. |
| for (Index i = 0; i < func->getNumParams(); i++) { |
| values[i] = valueNumbering.getUniqueValue(); |
| } |
| |
| for (Index i = func->getNumParams(); i < func->getNumLocals(); i++) { |
| auto type = func->getLocalType(i); |
| if (!LiteralUtils::canMakeZero(type)) { |
| // The default value for a type for which we can't make a zero cannot |
| // be used anyhow, but we must give it some value in this analysis. A |
| // unique one seems least likely to result in surprise during |
| // debugging. |
| values[i] = valueNumbering.getUniqueValue(); |
| } else { |
| values[i] = valueNumbering.getValue(Literal::makeZeros(type)); |
| } |
| } |
| } else { |
| // In any block but the entry, assume that each live local might have a |
| // different value at the start. |
| // TODO: Propagating value IDs across blocks could identify more copies, |
| // however, it would also be nonlinear. |
| for (auto index : curr->contents.start) { |
| values[index] = valueNumbering.getUniqueValue(); |
| } |
| } |
| |
| // Traverse through the block from start to finish. We keep track of both |
| // liveness (in |live|) and the value IDs in each local (in |values|) |
| // while doing so. |
| for (Index i = 0; i < actions.size(); i++) { |
| auto& action = actions[i]; |
| auto index = action.index; |
| if (action.isGet()) { |
| if (endsLiveRange[i]) { |
| [[maybe_unused]] bool erased = live.erase(action.index); |
| assert(erased); |
| } |
| continue; |
| } |
| |
| // This is a set. Find the value being assigned to the local. |
| auto* set = (*action.origin)->cast<LocalSet>(); |
| Index newValue; |
| if (set->value->is<LocalGet>() || set->value->is<LocalSet>()) { |
| // This is a copy: Either it is a get or a tee, that occurs right |
| // before us. Set our new value to theirs. |
| assert(i > 0 && set->value == *actions[i - 1].origin); |
| newValue = values[actions[i - 1].index]; |
| } else { |
| // This is not a copy. |
| newValue = valueNumbering.getValue(set->value); |
| } |
| values[index] = newValue; |
| |
| // If this set has no gets that read from it, then it does not start a |
| // live range, and it cannot cause interference. |
| if (!action.effective) { |
| continue; |
| } |
| |
| // Update interferences: This will interfere with any other local that |
| // is currently live and contains a different value. |
| for (auto other : live) { |
| // This index cannot have been live before this set (as we would be |
| // trampling some other set before us, if so; and then that set would |
| // have been ineffective). We will mark this index as live right after |
| // this loop). |
| assert(other != index); |
| if (values[other] != newValue) { |
| interfere(other, index); |
| } |
| } |
| live.insert(action.index); |
| } |
| |
| // Note that we do not need to do anything for merges: while in general an |
| // interference can happen either in a block or when control flow merges, |
| // in wasm we have default values for all locals. As a result, if a local is |
| // live at the beginning of a block, it will be live at the ends of *all* |
| // the blocks reaching it: there is no possibility of an "unset local." That |
| // is, imagine we have this merge with a conflict: |
| // |
| // [a is set to some value] ->- |
| // | |
| // |->- [merge block where a and b are used] |
| // | |
| // [b is set to some value] ->- |
| // |
| // It is true that a conflict happens in the merge block, and if we had |
| // unset locals then the top block would have b unset, and the bottom block |
| // would have a unset, and so there would be no conflict there and the |
| // problem would only appear in the merge. But in wasm, that a and b are |
| // used in the merge block means that they are live at the end of both the |
| // top and bottom block, and that liveness will extend all the way back to |
| // *some* set of those values, possibly only the zero-initialization at the |
| // function start. Therefore a conflict will be noticed in both the top and |
| // bottom blocks, and that merge block does not need to reason about merging |
| // its inputs. In other words, a conflict will appear in the middle of a |
| // block, somewhere, and therefore we leave it to that block to identify, |
| // and so blocks only need to reason about their own contents and not what |
| // arrives to them. |
| // |
| // The one exception here is the entry to the function, see below. |
| } |
| |
| // We must not try to coalesce parameters as they are fixed. Mark them as |
| // "interfering" so that we do not need to special-case them later. |
| auto numParams = getFunction()->getNumParams(); |
| for (Index i = 0; i < numParams; i++) { |
| for (Index j = i + 1; j < numParams; j++) { |
| interfereLowHigh(i, j); |
| } |
| } |
| |
| // We must handle interference between uses of the zero-init value and |
| // parameters manually. A zero initialization represents a set (to a default |
| // value), and that set would be what alerts us to a conflict, but there is no |
| // actual set in the IR since the zero-init value is applied implicitly. |
| for (auto i : entry->contents.start) { |
| if (i >= numParams) { |
| for (Index j = 0; j < numParams; j++) { |
| interfereLowHigh(j, i); |
| } |
| } |
| } |
| } |
| |
| // Indices decision making |
| |
| void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, |
| std::vector<Index>& indices) { |
| Index removedCopies; |
| pickIndicesFromOrder(order, indices, removedCopies); |
| } |
| |
| void CoalesceLocals::pickIndicesFromOrder(std::vector<Index>& order, |
| std::vector<Index>& indices, |
| Index& removedCopies) { |
| // mostly-simple greedy coloring |
| #if CFG_DEBUG |
| std::cerr << "\npickIndicesFromOrder on " << getFunction()->name << '\n'; |
| std::cerr << getFunction()->body << '\n'; |
| std::cerr << "order:\n"; |
| for (auto i : order) |
| std::cerr << i << ' '; |
| std::cerr << '\n'; |
| std::cerr << "interferences:\n"; |
| for (Index i = 0; i < numLocals; i++) { |
| for (Index j = 0; j < i + 1; j++) { |
| std::cerr << " "; |
| } |
| for (Index j = i + 1; j < numLocals; j++) { |
| std::cerr << int(interferes(i, j)) << ' '; |
| } |
| std::cerr << " : $" << i << '\n'; |
| } |
| std::cerr << "copies:\n"; |
| for (Index i = 0; i < numLocals; i++) { |
| for (Index j = 0; j < i + 1; j++) { |
| std::cerr << " "; |
| } |
| for (Index j = i + 1; j < numLocals; j++) { |
| std::cerr << int(getCopies(i, j)) << ' '; |
| } |
| std::cerr << " : $" << i << '\n'; |
| } |
| std::cerr << "total copies:\n"; |
| for (Index i = 0; i < numLocals; i++) { |
| std::cerr << " $" << i << ": " << totalCopies[i] << '\n'; |
| } |
| #endif |
| // TODO: take into account distribution (99-1 is better than 50-50 with two |
| // registers, for gzip) |
| std::vector<Type> types; |
| // new index * numLocals => list of all interferences of locals merged to it |
| sparse_square_matrix<bool> newInterferences; |
| |
| // new index * numLocals => list of all copies of locals merged to it |
| sparse_square_matrix<uint8_t> newCopies; |
| |
| indices.resize(numLocals); |
| types.resize(numLocals); |
| |
| auto numParams = getFunction()->getNumParams(); |
| |
| newInterferences.recreate(numLocals); |
| newCopies.recreate(numLocals); |
| |
| Index nextFree = 0; |
| removedCopies = 0; |
| // we can't reorder parameters, they are fixed in order, and cannot coalesce |
| Index i = 0; |
| for (; i < numParams; i++) { |
| assert(order[i] == i); // order must leave the params in place |
| indices[i] = i; |
| types[i] = getFunction()->getLocalType(i); |
| for (Index j = numParams; j < numLocals; j++) { |
| newInterferences.set(i, j, interferes(i, j)); |
| newCopies.set(i, j, getCopies(i, j)); |
| } |
| nextFree++; |
| } |
| for (; i < numLocals; i++) { |
| Index actual = order[i]; |
| Index found = -1; |
| uint8_t foundCopies = -1; |
| for (Index j = 0; j < nextFree; j++) { |
| if (!newInterferences.get(j, actual) && |
| getFunction()->getLocalType(actual) == types[j]) { |
| // this does not interfere, so it might be what we want. but pick the |
| // one eliminating the most copies (we could stop looking forward when |
| // there are no more items that have copies anyhow, but it doesn't seem |
| // to help) |
| auto currCopies = newCopies.get(j, actual); |
| if (found == Index(-1) || currCopies > foundCopies) { |
| indices[actual] = found = j; |
| foundCopies = currCopies; |
| } |
| } |
| } |
| if (found == Index(-1)) { |
| indices[actual] = found = nextFree; |
| types[found] = getFunction()->getLocalType(actual); |
| nextFree++; |
| removedCopies += getCopies(found, actual); |
| } else { |
| removedCopies += foundCopies; |
| } |
| #if CFG_DEBUG |
| std::cerr << "set local $" << actual << " to $" << found << '\n'; |
| #endif |
| // merge new interferences and copies for the new index |
| for (Index k = i + 1; k < numLocals; k++) { |
| // go in the order, we only need to update for those we will see later |
| auto j = order[k]; |
| newInterferences.set( |
| found, j, newInterferences.get(found, j) || interferes(actual, j)); |
| newCopies.set(found, j, newCopies.get(found, j) + getCopies(actual, j)); |
| } |
| } |
| } |
| |
| // given a baseline order, adjust it based on an important order of priorities |
| // (higher values are higher priority). The priorities take precedence, unless |
| // they are equal and then the original order should be kept. |
| std::vector<Index> adjustOrderByPriorities(std::vector<Index>& baseline, |
| std::vector<Index>& priorities) { |
| std::vector<Index> ret = baseline; |
| std::vector<Index> reversed = makeReversed(baseline); |
| std::sort(ret.begin(), ret.end(), [&priorities, &reversed](Index x, Index y) { |
| return priorities[x] > priorities[y] || |
| (priorities[x] == priorities[y] && reversed[x] < reversed[y]); |
| }); |
| return ret; |
| } |
| |
| void CoalesceLocals::pickIndices(std::vector<Index>& indices) { |
| if (numLocals == 0) { |
| return; |
| } |
| if (numLocals == 1) { |
| indices.push_back(0); |
| return; |
| } |
| // take into account total copies. but we must keep params in place, so give |
| // them max priority |
| auto adjustedTotalCopies = totalCopies; |
| auto numParams = getFunction()->getNumParams(); |
| for (Index i = 0; i < numParams; i++) { |
| adjustedTotalCopies[i] = std::numeric_limits<Index>::max(); |
| } |
| // first try the natural order. this is less arbitrary than it seems, as the |
| // program may have a natural order of locals inherent in it. |
| auto order = makeIdentity(numLocals); |
| order = adjustOrderByPriorities(order, adjustedTotalCopies); |
| Index removedCopies; |
| pickIndicesFromOrder(order, indices, removedCopies); |
| auto maxIndex = *std::max_element(indices.begin(), indices.end()); |
| // next try the reverse order. this both gives us another chance at something |
| // good, and also the very naturalness of the simple order may be quite |
| // suboptimal |
| setIdentity(order); |
| for (Index i = numParams; i < numLocals; i++) { |
| order[i] = numParams + numLocals - 1 - i; |
| } |
| order = adjustOrderByPriorities(order, adjustedTotalCopies); |
| std::vector<Index> reverseIndices; |
| Index reverseRemovedCopies; |
| pickIndicesFromOrder(order, reverseIndices, reverseRemovedCopies); |
| auto reverseMaxIndex = |
| *std::max_element(reverseIndices.begin(), reverseIndices.end()); |
| // prefer to remove copies foremost, as it matters more for code size (minus |
| // gzip), and improves throughput. |
| if (reverseRemovedCopies > removedCopies || |
| (reverseRemovedCopies == removedCopies && reverseMaxIndex < maxIndex)) { |
| indices.swap(reverseIndices); |
| } |
| } |
| |
| void CoalesceLocals::applyIndices(std::vector<Index>& indices, |
| Expression* root) { |
| assert(indices.size() == numLocals); |
| for (auto& curr : basicBlocks) { |
| auto& actions = curr->contents.actions; |
| for (auto& action : actions) { |
| if (action.isGet()) { |
| auto* get = (*action.origin)->cast<LocalGet>(); |
| get->index = indices[get->index]; |
| } else if (action.isSet()) { |
| auto* set = (*action.origin)->cast<LocalSet>(); |
| set->index = indices[set->index]; |
| // in addition, we can optimize out redundant copies and ineffective |
| // sets |
| if (auto* get = set->value->dynCast<LocalGet>()) { |
| if (get->index == set->index) { |
| action.removeCopy(); |
| continue; |
| } |
| } |
| if (auto* subSet = set->value->dynCast<LocalSet>()) { |
| // Only do so if not only the index matches but also the type. If the |
| // inner type is more refined, leave that for other passes. |
| if (subSet->index == set->index && |
| subSet->value->type == subSet->type) { |
| set->value = subSet->value; |
| continue; |
| } |
| } |
| |
| // Remove ineffective actions, that is, dead stores. |
| // |
| // Note that this may have downsides for non-nullable locals: |
| // |
| // x = whatever; // dead set for validation |
| // if (..) { |
| // x = value1; |
| // } else { |
| // x = value2; |
| // } |
| // |
| // The dead set ensures validation, at the cost of extra code size and |
| // slower speed in some tiers (the optimizing tier, at least, will |
| // remove such dead sets anyhow). In theory keeping such a dead set may |
| // be worthwhile, as it may save code size (by keeping the local |
| // non-nullable and avoiding ref.as_non_nulls later). But the tradeoff |
| // here isn't clear, so do the simple thing for now and remove all dead |
| // sets. |
| if (!action.effective) { |
| // value may have no side effects, further optimizations can eliminate |
| // it |
| auto* value = set->value; |
| if (!set->isTee()) { |
| // we need to drop it |
| Drop* drop = ExpressionManipulator::convert<LocalSet, Drop>(set); |
| drop->value = value; |
| *action.origin = drop; |
| } else { |
| auto originalType = (*action.origin)->type; |
| if (originalType != set->value->type) { |
| // The value had a more refined type, which we must propagate at |
| // the end. |
| refinalize = true; |
| } |
| *action.origin = set->value; |
| } |
| continue; |
| } |
| } |
| } |
| } |
| // update type list |
| auto numParams = getFunction()->getNumParams(); |
| Index newNumLocals = 0; |
| for (auto index : indices) { |
| newNumLocals = std::max(newNumLocals, index + 1); |
| } |
| auto oldVars = getFunction()->vars; |
| getFunction()->vars.resize(newNumLocals - numParams); |
| for (Index index = numParams; index < numLocals; index++) { |
| Index newIndex = indices[index]; |
| if (newIndex >= numParams) { |
| getFunction()->vars[newIndex - numParams] = oldVars[index - numParams]; |
| } |
| } |
| // names are gone |
| getFunction()->localNames.clear(); |
| getFunction()->localIndices.clear(); |
| } |
| |
| struct CoalesceLocalsWithLearning : public CoalesceLocals { |
| std::unique_ptr<Pass> create() override { |
| return std::make_unique<CoalesceLocalsWithLearning>(); |
| } |
| |
| virtual void pickIndices(std::vector<Index>& indices) override; |
| }; |
| |
| void CoalesceLocalsWithLearning::pickIndices(std::vector<Index>& indices) { |
| if (getFunction()->getNumVars() <= 1) { |
| // nothing to think about here |
| CoalesceLocals::pickIndices(indices); |
| return; |
| } |
| |
| struct Order : public std::vector<Index> { |
| void setFitness(double f) { fitness = f; } |
| double getFitness() { return fitness; } |
| void dump(std::string text) { |
| std::cout << text + ": ( "; |
| for (Index i = 0; i < size(); i++) { |
| std::cout << (*this)[i] << " "; |
| } |
| std::cout << ")\n"; |
| std::cout << "of quality: " << getFitness() << "\n"; |
| } |
| |
| private: |
| double fitness; |
| }; |
| |
| struct Generator { |
| Generator(CoalesceLocalsWithLearning* parent) : parent(parent), noise(42) {} |
| |
| void calculateFitness(Order* order) { |
| // apply the order |
| std::vector<Index> indices; // the phenotype |
| Index removedCopies; |
| parent->pickIndicesFromOrder(*order, indices, removedCopies); |
| auto maxIndex = *std::max_element(indices.begin(), indices.end()); |
| assert(maxIndex <= parent->numLocals); |
| // main part of fitness is the number of locals |
| double fitness = parent->numLocals - maxIndex; // higher fitness is better |
| // secondarily, it is nice to not reorder locals unnecessarily |
| double fragment = 1.0 / (2.0 * parent->numLocals); |
| for (Index i = 0; i < parent->numLocals; i++) { |
| if ((*order)[i] == i) { |
| fitness += fragment; // boost for each that wasn't moved |
| } |
| } |
| // removing copies is a secondary concern |
| fitness = (100 * fitness) + removedCopies; |
| order->setFitness(fitness); |
| } |
| |
| Order* makeRandom() { |
| auto* ret = new Order; |
| ret->resize(parent->numLocals); |
| for (Index i = 0; i < parent->numLocals; i++) { |
| (*ret)[i] = i; |
| } |
| if (first) { |
| // as the first guess, use the natural order. this is not arbitrary for |
| // two reasons. first, there may be an inherent order in the input |
| // (frequent indices are lower, etc.). second, by ensuring we start with |
| // the natural order, we ensure we are at least as good as the |
| // non-learning variant. |
| // TODO: use ::pickIndices from the parent, so we literally get the |
| // simpler approach as our first option |
| first = false; |
| } else { |
| // leave params alone, shuffle the rest |
| std::shuffle(ret->begin() + parent->getFunction()->getNumParams(), |
| ret->end(), |
| noise); |
| } |
| calculateFitness(ret); |
| #ifdef CFG_LEARN_DEBUG |
| order->dump("new rando"); |
| #endif |
| return ret; |
| } |
| |
| Order* makeMixture(Order* left, Order* right) { |
| // perturb left using right. this is useful since |
| // we don't care about absolute locations, relative ones matter more, |
| // and a true merge of two vectors could obscure that (e.g. |
| // a.......... and ..........a would merge a into the middle, for no |
| // reason), and cause a lot of unnecessary noise |
| Index size = left->size(); |
| Order reverseRight; // reverseRight[x] is the index of x in right |
| reverseRight.resize(size); |
| for (Index i = 0; i < size; i++) { |
| reverseRight[(*right)[i]] = i; |
| } |
| auto* ret = new Order; |
| *ret = *left; |
| assert(size >= 1); |
| for (Index i = parent->getFunction()->getNumParams(); i < size - 1; i++) { |
| // if (i, i + 1) is in reverse order in right, flip them |
| if (reverseRight[(*ret)[i]] > reverseRight[(*ret)[i + 1]]) { |
| std::swap((*ret)[i], (*ret)[i + 1]); |
| // if we don't skip, we might end up pushing an element all the way to |
| // the end, which is not very perturbation-y |
| i++; |
| } |
| } |
| calculateFitness(ret); |
| #ifdef CFG_LEARN_DEBUG |
| ret->dump("new mixture"); |
| #endif |
| return ret; |
| } |
| |
| private: |
| CoalesceLocalsWithLearning* parent; |
| std::mt19937 noise; |
| bool first = true; |
| }; |
| |
| #ifdef CFG_LEARN_DEBUG |
| std::cout << "[learning for " << getFunction()->name << "]\n"; |
| #endif |
| auto numVars = this->getFunction()->getNumVars(); |
| const int GENERATION_SIZE = |
| std::min(Index(numVars * (numVars - 1)), Index(20)); |
| Generator generator(this); |
| GeneticLearner<Order, double, Generator> learner(generator, GENERATION_SIZE); |
| #ifdef CFG_LEARN_DEBUG |
| learner.getBest()->dump("first best"); |
| #endif |
| // keep working while we see improvement |
| auto oldBest = learner.getBest()->getFitness(); |
| while (1) { |
| learner.runGeneration(); |
| auto newBest = learner.getBest()->getFitness(); |
| if (newBest == oldBest) { |
| break; // unlikely we can improve |
| } |
| oldBest = newBest; |
| #ifdef CFG_LEARN_DEBUG |
| learner.getBest()->dump("current best"); |
| #endif |
| } |
| #ifdef CFG_LEARN_DEBUG |
| learner.getBest()->dump("the best"); |
| #endif |
| // TODO: cache indices in Orders, at the cost of more memory? |
| this->pickIndicesFromOrder(*learner.getBest(), indices); |
| } |
| |
| // declare passes |
| |
| Pass* createCoalesceLocalsPass() { return new CoalesceLocals(); } |
| |
| Pass* createCoalesceLocalsWithLearningPass() { |
| return new CoalesceLocalsWithLearning(); |
| } |
| |
| } // namespace wasm |