Merge remote-tracking branch 'origin/main' into debug.after-inlining
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index 7f06f8c..3ae7b68 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -170,6 +170,8 @@
 
   std::vector<HeapType> heapTypes;
 
+  unsigned lastPrintIndent = 0;
+
   // Print type names by saved name or index if we have a module, or otherwise
   // by generating minimalist names. TODO: Handle conflicts between
   // user-provided names and the fallback indexed names.
@@ -2375,10 +2377,11 @@
 
 void PrintSExpression::printDebugLocation(
   const Function::DebugLocation& location) {
-  if (lastPrintedLocation == location) {
+  if (lastPrintedLocation == location && indent > lastPrintIndent) {
     return;
   }
   lastPrintedLocation = location;
+  lastPrintIndent = indent;
   auto fileName = currModule->debugInfoFileNames[location.fileIndex];
   o << ";;@ " << fileName << ":" << location.lineNumber << ":"
     << location.columnNumber << '\n';
diff --git a/src/wasm-ir-builder.h b/src/wasm-ir-builder.h
index 6a1b3f1..3805a3e 100644
--- a/src/wasm-ir-builder.h
+++ b/src/wasm-ir-builder.h
@@ -42,9 +42,7 @@
   // Get the valid Binaryen IR expression representing the sequence of visited
   // instructions. The IRBuilder is reset and can be used with a fresh sequence
   // of instructions after this is called.
-  Expression* build();
-
-  [[nodiscard]] Result<std::vector<Expression*>> finishInstrs();
+  [[nodiscard]] Result<Expression*> build();
 
   // Call visit() on an existing Expression with its non-child fields
   // initialized to initialize the child fields and refinalize it. The specific
@@ -56,11 +54,13 @@
   [[nodiscard]] Result<> visitStructNew(StructNew*);
   [[nodiscard]] Result<> visitArrayNew(ArrayNew*);
 
+  [[nodiscard]] Result<> visitEnd();
+
   // Alternatively, call makeXYZ to have the IRBuilder allocate the nodes. This
   // is generally safer than calling `visit` because the function signatures
   // ensure that there are no missing fields.
   [[nodiscard]] Result<> makeNop();
-  [[nodiscard]] Result<> makeBlock();
+  [[nodiscard]] Result<> makeBlock(Name label, Type type);
   // [[nodiscard]] Result<> makeIf();
   // [[nodiscard]] Result<> makeLoop();
   // [[nodiscard]] Result<> makeBreak();
@@ -168,9 +168,6 @@
   // [[nodiscard]] Result<> makeStringSliceWTF();
   // [[nodiscard]] Result<> makeStringSliceIter();
 
-  // TODO: make this private.
-  void pushScope(Type type) { scopeStack.push_back({{}, type}); }
-
   void setFunction(Function* func) { this->func = func; }
 
 private:
@@ -183,24 +180,42 @@
   // to have.
   struct BlockCtx {
     std::vector<Expression*> exprStack;
-    Type type;
+    Block* block;
+    // Whether we have seen an unreachable instruction and are in
+    // stack-polymorphic unreachable mode.
+    bool unreachable = false;
   };
 
   // The stack of block contexts currently being parsed.
   std::vector<BlockCtx> scopeStack;
-  std::vector<Expression*>& getExprStack();
-  Type getResultType() {
-    assert(!scopeStack.empty());
-    return scopeStack.back().type;
+
+  BlockCtx& getScope() {
+    if (scopeStack.empty()) {
+      // We are not in a block context, so push a dummy scope.
+      scopeStack.push_back({{}, nullptr});
+    }
+    return scopeStack.back();
   }
 
-  // Whether we have seen an unreachable instruction and are in
-  // stack-polymorphic unreachable mode.
-  bool unreachable = false;
+  [[nodiscard]] Result<Index> addScratchLocal(Type);
+  [[nodiscard]] Result<Expression*> pop();
+  void push(Expression*);
 
-  Result<Index> addScratchLocal(Type);
-  [[nodiscard]] Result<> push(Expression*);
-  Result<Expression*> pop();
+  struct HoistedVal {
+    // The index in the stack of the original value-producing expression.
+    Index valIndex;
+    // The local.get placed on the stack, if any.
+    LocalGet* get;
+  };
+
+  // Find the last value-producing expression, if any, and hoist its value to
+  // the top of the stack using a scratch local if necessary.
+  [[nodiscard]] MaybeResult<HoistedVal> hoistLastValue();
+  // Transform the stack as necessary such that the original producer of the
+  // hoisted value will be popped along with the final expression that produces
+  // the value, if they are different. May only be called directly after
+  // hoistLastValue().
+  [[nodiscard]] Result<> packageHoistedValue(const HoistedVal&);
 };
 
 } // namespace wasm
diff --git a/src/wasm/wasm-ir-builder.cpp b/src/wasm/wasm-ir-builder.cpp
index 6786816..7e163fe 100644
--- a/src/wasm/wasm-ir-builder.cpp
+++ b/src/wasm/wasm-ir-builder.cpp
@@ -39,14 +39,6 @@
 
 } // anonymous namespace
 
-std::vector<Expression*>& IRBuilder::getExprStack() {
-  if (scopeStack.empty()) {
-    // We are not in a function, so push a dummy scope.
-    scopeStack.push_back({{}, Type::none});
-  }
-  return scopeStack.back().exprStack;
-}
-
 Result<Index> IRBuilder::addScratchLocal(Type type) {
   if (!func) {
     return Err{"scratch local required, but there is no function context"};
@@ -55,127 +47,143 @@
   return Builder::addVar(func, name, type);
 }
 
-Result<> IRBuilder::push(Expression* expr) {
-  auto& exprStack = getExprStack();
+MaybeResult<IRBuilder::HoistedVal> IRBuilder::hoistLastValue() {
+  auto& stack = getScope().exprStack;
+  int index = stack.size() - 1;
+  for (; index >= 0; --index) {
+    if (stack[index]->type != Type::none) {
+      break;
+    }
+  }
+  if (index < 0) {
+    // There is no value-producing or unreachable expression.
+    return {};
+  }
+  if (unsigned(index) == stack.size() - 1) {
+    // Value-producing expression already on top of the stack.
+    return HoistedVal{Index(index), nullptr};
+  }
+  auto*& expr = stack[index];
+  auto type = expr->type;
+  if (type == Type::unreachable) {
+    // Make sure the top of the stack also has an unreachable expression.
+    if (stack.back()->type != Type::unreachable) {
+      push(builder.makeUnreachable());
+    }
+    return HoistedVal{Index(index), nullptr};
+  }
+  // Hoist with a scratch local.
+  auto scratchIdx = addScratchLocal(type);
+  CHECK_ERR(scratchIdx);
+  expr = builder.makeLocalSet(*scratchIdx, expr);
+  auto* get = builder.makeLocalGet(*scratchIdx, type);
+  push(get);
+  return HoistedVal{Index(index), get};
+}
+
+Result<> IRBuilder::packageHoistedValue(const HoistedVal& hoisted) {
+  auto& scope = getScope();
+  assert(!scope.exprStack.empty());
+
+  auto packageAsBlock = [&](Type type) {
+    // Create a block containing the producer of the hoisted value, the final
+    // get of the hoisted value, and everything in between.
+    std::vector<Expression*> exprs(scope.exprStack.begin() + hoisted.valIndex,
+                                   scope.exprStack.end());
+    auto* block = builder.makeBlock(exprs, type);
+    scope.exprStack.resize(hoisted.valIndex);
+    push(block);
+  };
+
+  auto type = scope.exprStack.back()->type;
+
+  if (!type.isTuple()) {
+    if (hoisted.get) {
+      packageAsBlock(type);
+    }
+    return Ok{};
+  }
+
+  // We need to break up the hoisted tuple. Create and push a block setting the
+  // tuple to a local and returning its first element, then push additional gets
+  // of each of its subsequent elements. Reuse the scratch local we used for
+  // hoisting, if it exists.
+  Index scratchIdx;
+  if (hoisted.get) {
+    // Update the get on top of the stack to just return the first element.
+    scope.exprStack.back() = builder.makeTupleExtract(hoisted.get, 0);
+    packageAsBlock(type[0]);
+    scratchIdx = hoisted.get->index;
+  } else {
+    auto scratch = addScratchLocal(type);
+    CHECK_ERR(scratch);
+    auto* block = builder.makeSequence(
+      builder.makeLocalSet(*scratch, scope.exprStack.back()),
+      builder.makeTupleExtract(builder.makeLocalGet(*scratch, type), 0),
+      type[0]);
+    scope.exprStack.pop_back();
+    push(block);
+    scratchIdx = *scratch;
+  }
+  for (Index i = 1, size = type.size(); i < size; ++i) {
+    push(builder.makeTupleExtract(builder.makeLocalGet(scratchIdx, type), i));
+  }
+  return Ok{};
+}
+
+void IRBuilder::push(Expression* expr) {
+  auto& scope = getScope();
   if (expr->type == Type::unreachable) {
     // We want to avoid popping back past this most recent unreachable
     // instruction. Drop all prior instructions so they won't be consumed by
     // later instructions but will still be emitted for their side effects, if
     // any.
-    for (auto& expr : exprStack) {
+    for (auto& expr : scope.exprStack) {
       expr = builder.dropIfConcretelyTyped(expr);
     }
-    unreachable = true;
-    exprStack.push_back(expr);
-  } else if (expr->type.isTuple()) {
-    auto scratchIdx = addScratchLocal(expr->type);
-    CHECK_ERR(scratchIdx);
-    CHECK_ERR(push(builder.makeLocalSet(*scratchIdx, expr)));
-    for (Index i = 0; i < expr->type.size(); ++i) {
-      CHECK_ERR(push(builder.makeTupleExtract(
-        builder.makeLocalGet(*scratchIdx, expr->type), i)));
-    }
-  } else {
-    exprStack.push_back(expr);
+    scope.unreachable = true;
   }
-  return Ok{};
+  scope.exprStack.push_back(expr);
 }
 
 Result<Expression*> IRBuilder::pop() {
-  auto& exprStack = getExprStack();
+  auto& scope = getScope();
 
   // Find the suffix of expressions that do not produce values.
-  auto firstNone = exprStack.size();
-  for (; firstNone > 0; --firstNone) {
-    auto* expr = exprStack[firstNone - 1];
-    if (expr->type != Type::none) {
-      break;
-    }
-  }
+  auto hoisted = hoistLastValue();
+  CHECK_ERR(hoisted);
 
-  if (firstNone == 0) {
+  if (!hoisted) {
     // There are no expressions that produce values.
-    if (unreachable) {
+    if (scope.unreachable) {
       return builder.makeUnreachable();
     }
     return Err{"popping from empty stack"};
   }
 
-  if (firstNone == exprStack.size()) {
-    // The last expression produced a value.
-    auto expr = exprStack.back();
-    exprStack.pop_back();
-    return expr;
-  }
+  CHECK_ERR(packageHoistedValue(*hoisted));
 
-  // We need to assemble a block of expressions that returns the value of the
-  // first one using a scratch local (unless it's unreachable, in which case
-  // we can throw the following expressions away).
-  auto* expr = exprStack[firstNone - 1];
-  if (expr->type == Type::unreachable) {
-    exprStack.resize(firstNone - 1);
-    return expr;
-  }
-  auto scratchIdx = addScratchLocal(expr->type);
-  CHECK_ERR(scratchIdx);
-  std::vector<Expression*> exprs;
-  exprs.reserve(exprStack.size() - firstNone + 2);
-  exprs.push_back(builder.makeLocalSet(*scratchIdx, expr));
-  exprs.insert(exprs.end(), exprStack.begin() + firstNone, exprStack.end());
-  exprs.push_back(builder.makeLocalGet(*scratchIdx, expr->type));
-
-  exprStack.resize(firstNone - 1);
-  return builder.makeBlock(exprs, expr->type);
-}
-
-Expression* IRBuilder::build() {
-  auto& exprStack = getExprStack();
-  assert(scopeStack.size() == 1);
-  assert(exprStack.size() == 1);
-
-  auto e = exprStack.back();
-  exprStack.clear();
-  unreachable = false;
-  return e;
-}
-
-Result<std::vector<Expression*>> IRBuilder::finishInstrs() {
-  auto& exprStack = getExprStack();
-  auto type = getResultType();
-
-  // We have finished parsing a sequence of instructions. Fix up the parsed
-  // instructions and reset the context for the next sequence.
-  if (type.isTuple()) {
-    std::vector<Expression*> elems(type.size());
-    bool hadUnreachableElem = false;
-    for (size_t i = 0; i < elems.size(); ++i) {
-      auto elem = pop();
-      CHECK_ERR(elem);
-      elems[elems.size() - 1 - i] = *elem;
-      if ((*elem)->type == Type::unreachable) {
-        // We don't want to pop back past an unreachable here. Push the
-        // unreachable back and throw away any post-unreachable values we have
-        // popped.
-        exprStack.push_back(*elem);
-        hadUnreachableElem = true;
-        break;
-      }
-    }
-    if (!hadUnreachableElem) {
-      exprStack.push_back(builder.makeTupleMake(std::move(elems)));
-    }
-  } else if (type != Type::none) {
-    // Ensure the last expression produces the value.
-    auto expr = pop();
-    CHECK_ERR(expr);
-    exprStack.push_back(*expr);
-  }
-  unreachable = false;
-  auto ret = std::move(exprStack);
-  scopeStack.pop_back();
+  auto* ret = scope.exprStack.back();
+  scope.exprStack.pop_back();
   return ret;
 }
 
+Result<Expression*> IRBuilder::build() {
+  if (scopeStack.empty()) {
+    return builder.makeNop();
+  }
+  if (scopeStack.size() > 1 || scopeStack.back().block != nullptr) {
+    return Err{"unfinished block context"};
+  }
+  if (scopeStack.back().exprStack.size() > 1) {
+    return Err{"unused expressions without block context"};
+  }
+  assert(scopeStack.back().exprStack.size() == 1);
+  auto* expr = scopeStack.back().exprStack.back();
+  scopeStack.clear();
+  return expr;
+}
+
 Result<> IRBuilder::visit(Expression* curr) {
   UnifiedExpressionVisitor<IRBuilder, Result<>>::visit(curr);
   if (auto* block = curr->dynCast<Block>()) {
@@ -185,7 +193,8 @@
     // for other kinds of nodes as well, as done above.
     ReFinalizeNode{}.visit(curr);
   }
-  return push(curr);
+  push(curr);
+  return Ok{};
 }
 
 // Handle the common case of instructions with a constant number of children
@@ -224,7 +233,7 @@
 }
 
 Result<> IRBuilder::visitBlock(Block* curr) {
-  // TODO: Handle popping scope and filling block here instead of externally.
+  scopeStack.push_back({{}, curr});
   return Ok{};
 }
 
@@ -272,9 +281,75 @@
   return Ok{};
 }
 
-Result<> IRBuilder::makeNop() { return push(builder.makeNop()); }
+Result<> IRBuilder::visitEnd() {
+  if (scopeStack.empty() || !scopeStack.back().block) {
+    return Err{"unexpected end"};
+  }
 
-Result<> IRBuilder::makeBlock() { return push(builder.makeBlock()); }
+  auto& scope = scopeStack.back();
+  Block* block = scope.block;
+  if (block->type.isTuple()) {
+    if (scope.unreachable) {
+      // We may not have enough concrete values on the stack to construct the
+      // full tuple, and if we tried to fill out the beginning of a tuple.make
+      // with additional popped `unreachable`s, that could cause a trap to
+      // happen before important side effects. Instead, just drop everything on
+      // the stack and finish with a single unreachable.
+      //
+      // TODO: Validate that the available expressions are a correct suffix of
+      // the expected type, since this will no longer be caught by normal
+      // validation?
+      for (auto& expr : scope.exprStack) {
+        expr = builder.dropIfConcretelyTyped(expr);
+      }
+      if (scope.exprStack.back()->type != Type::unreachable) {
+        scope.exprStack.push_back(builder.makeUnreachable());
+      }
+    } else {
+      auto hoisted = hoistLastValue();
+      CHECK_ERR(hoisted);
+      auto hoistedType = scope.exprStack.back()->type;
+      if (hoistedType.size() != block->type.size()) {
+        // We cannot propagate the hoisted value directly because it does not
+        // have the correct number of elements. Break it up if necessary and
+        // construct our returned tuple from parts.
+        CHECK_ERR(packageHoistedValue(*hoisted));
+        std::vector<Expression*> elems(block->type.size());
+        for (size_t i = 0; i < elems.size(); ++i) {
+          auto elem = pop();
+          CHECK_ERR(elem);
+          elems[elems.size() - 1 - i] = *elem;
+        }
+        scope.exprStack.push_back(builder.makeTupleMake(std::move(elems)));
+      }
+    }
+  } else if (block->type.isConcrete()) {
+    // If the value is buried in none-typed expressions, we have to bring it to
+    // the top.
+    auto hoisted = hoistLastValue();
+    CHECK_ERR(hoisted);
+  }
+  block->list.set(scope.exprStack);
+  // TODO: Track branches so we can know whether this block is a target and
+  // finalize more efficiently.
+  block->finalize(block->type);
+  scopeStack.pop_back();
+  push(block);
+  return Ok{};
+}
+
+Result<> IRBuilder::makeNop() {
+  push(builder.makeNop());
+  return Ok{};
+}
+
+Result<> IRBuilder::makeBlock(Name label, Type type) {
+  auto* block = wasm.allocator.alloc<Block>();
+  block->name = label;
+  block->type = type;
+  scopeStack.push_back({{}, block});
+  return Ok{};
+}
 
 // Result<> IRBuilder::makeIf() {}
 
@@ -289,30 +364,34 @@
 // Result<> IRBuilder::makeCallIndirect() {}
 
 Result<> IRBuilder::makeLocalGet(Index local) {
-  return push(builder.makeLocalGet(local, func->getLocalType(local)));
+  push(builder.makeLocalGet(local, func->getLocalType(local)));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeLocalSet(Index local) {
   LocalSet curr;
   CHECK_ERR(visitLocalSet(&curr));
-  return push(builder.makeLocalSet(local, curr.value));
+  push(builder.makeLocalSet(local, curr.value));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeLocalTee(Index local) {
   LocalSet curr;
   CHECK_ERR(visitLocalSet(&curr));
-  return push(
-    builder.makeLocalTee(local, curr.value, func->getLocalType(local)));
+  push(builder.makeLocalTee(local, curr.value, func->getLocalType(local)));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeGlobalGet(Name global) {
-  return push(builder.makeGlobalGet(global, wasm.getGlobal(global)->type));
+  push(builder.makeGlobalGet(global, wasm.getGlobal(global)->type));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeGlobalSet(Name global) {
   GlobalSet curr;
   CHECK_ERR(visitGlobalSet(&curr));
-  return push(builder.makeGlobalSet(global, curr.value));
+  push(builder.makeGlobalSet(global, curr.value));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeLoad(unsigned bytes,
@@ -323,23 +402,25 @@
                              Name mem) {
   Load curr;
   CHECK_ERR(visitLoad(&curr));
-  return push(
-    builder.makeLoad(bytes, signed_, offset, align, curr.ptr, type, mem));
+  push(builder.makeLoad(bytes, signed_, offset, align, curr.ptr, type, mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeStore(
   unsigned bytes, Address offset, unsigned align, Type type, Name mem) {
   Store curr;
   CHECK_ERR(visitStore(&curr));
-  return push(
+  push(
     builder.makeStore(bytes, offset, align, curr.ptr, curr.value, type, mem));
+  return Ok{};
 }
 
 Result<>
 IRBuilder::makeAtomicLoad(unsigned bytes, Address offset, Type type, Name mem) {
   Load curr;
   CHECK_ERR(visitLoad(&curr));
-  return push(builder.makeAtomicLoad(bytes, offset, curr.ptr, type, mem));
+  push(builder.makeAtomicLoad(bytes, offset, curr.ptr, type, mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeAtomicStore(unsigned bytes,
@@ -348,16 +429,17 @@
                                     Name mem) {
   Store curr;
   CHECK_ERR(visitStore(&curr));
-  return push(
-    builder.makeAtomicStore(bytes, offset, curr.ptr, curr.value, type, mem));
+  push(builder.makeAtomicStore(bytes, offset, curr.ptr, curr.value, type, mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeAtomicRMW(
   AtomicRMWOp op, unsigned bytes, Address offset, Type type, Name mem) {
   AtomicRMW curr;
   CHECK_ERR(visitAtomicRMW(&curr));
-  return push(
+  push(
     builder.makeAtomicRMW(op, bytes, offset, curr.ptr, curr.value, type, mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeAtomicCmpxchg(unsigned bytes,
@@ -366,56 +448,64 @@
                                       Name mem) {
   AtomicCmpxchg curr;
   CHECK_ERR(visitAtomicCmpxchg(&curr));
-  return push(builder.makeAtomicCmpxchg(
+  push(builder.makeAtomicCmpxchg(
     bytes, offset, curr.ptr, curr.expected, curr.replacement, type, mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeAtomicWait(Type type, Address offset, Name mem) {
   AtomicWait curr;
   CHECK_ERR(visitAtomicWait(&curr));
-  return push(builder.makeAtomicWait(
+  push(builder.makeAtomicWait(
     curr.ptr, curr.expected, curr.timeout, type, offset, mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeAtomicNotify(Address offset, Name mem) {
   AtomicNotify curr;
   CHECK_ERR(visitAtomicNotify(&curr));
-  return push(
-    builder.makeAtomicNotify(curr.ptr, curr.notifyCount, offset, mem));
+  push(builder.makeAtomicNotify(curr.ptr, curr.notifyCount, offset, mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeAtomicFence() {
-  return push(builder.makeAtomicFence());
+  push(builder.makeAtomicFence());
+  return Ok{};
 }
 
 Result<> IRBuilder::makeSIMDExtract(SIMDExtractOp op, uint8_t lane) {
   SIMDExtract curr;
   CHECK_ERR(visitSIMDExtract(&curr));
-  return push(builder.makeSIMDExtract(op, curr.vec, lane));
+  push(builder.makeSIMDExtract(op, curr.vec, lane));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeSIMDReplace(SIMDReplaceOp op, uint8_t lane) {
   SIMDReplace curr;
   CHECK_ERR(visitSIMDReplace(&curr));
-  return push(builder.makeSIMDReplace(op, curr.vec, lane, curr.value));
+  push(builder.makeSIMDReplace(op, curr.vec, lane, curr.value));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeSIMDShuffle(const std::array<uint8_t, 16>& lanes) {
   SIMDShuffle curr;
   CHECK_ERR(visitSIMDShuffle(&curr));
-  return push(builder.makeSIMDShuffle(curr.left, curr.right, lanes));
+  push(builder.makeSIMDShuffle(curr.left, curr.right, lanes));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeSIMDTernary(SIMDTernaryOp op) {
   SIMDTernary curr;
   CHECK_ERR(visitSIMDTernary(&curr));
-  return push(builder.makeSIMDTernary(op, curr.a, curr.b, curr.c));
+  push(builder.makeSIMDTernary(op, curr.a, curr.b, curr.c));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeSIMDShift(SIMDShiftOp op) {
   SIMDShift curr;
   CHECK_ERR(visitSIMDShift(&curr));
-  return push(builder.makeSIMDShift(op, curr.vec, curr.shift));
+  push(builder.makeSIMDShift(op, curr.vec, curr.shift));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeSIMDLoad(SIMDLoadOp op,
@@ -424,7 +514,8 @@
                                  Name mem) {
   SIMDLoad curr;
   CHECK_ERR(visitSIMDLoad(&curr));
-  return push(builder.makeSIMDLoad(op, offset, align, curr.ptr, mem));
+  push(builder.makeSIMDLoad(op, offset, align, curr.ptr, mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeSIMDLoadStoreLane(SIMDLoadStoreLaneOp op,
@@ -434,48 +525,55 @@
                                           Name mem) {
   SIMDLoadStoreLane curr;
   CHECK_ERR(visitSIMDLoadStoreLane(&curr));
-  return push(builder.makeSIMDLoadStoreLane(
+  push(builder.makeSIMDLoadStoreLane(
     op, offset, align, lane, curr.ptr, curr.vec, mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeMemoryInit(Name data, Name mem) {
   MemoryInit curr;
   CHECK_ERR(visitMemoryInit(&curr));
-  return push(
-    builder.makeMemoryInit(data, curr.dest, curr.offset, curr.size, mem));
+  push(builder.makeMemoryInit(data, curr.dest, curr.offset, curr.size, mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeDataDrop(Name data) {
-  return push(builder.makeDataDrop(data));
+  push(builder.makeDataDrop(data));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeMemoryCopy(Name destMem, Name srcMem) {
   MemoryCopy curr;
   CHECK_ERR(visitMemoryCopy(&curr));
-  return push(
+  push(
     builder.makeMemoryCopy(curr.dest, curr.source, curr.size, destMem, srcMem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeMemoryFill(Name mem) {
   MemoryFill curr;
   CHECK_ERR(visitMemoryFill(&curr));
-  return push(builder.makeMemoryFill(curr.dest, curr.value, curr.size, mem));
+  push(builder.makeMemoryFill(curr.dest, curr.value, curr.size, mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeConst(Literal val) {
-  return push(builder.makeConst(val));
+  push(builder.makeConst(val));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeUnary(UnaryOp op) {
   Unary curr;
   CHECK_ERR(visitUnary(&curr));
-  return push(builder.makeUnary(op, curr.value));
+  push(builder.makeUnary(op, curr.value));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeBinary(BinaryOp op) {
   Binary curr;
   CHECK_ERR(visitBinary(&curr));
-  return push(builder.makeBinary(op, curr.left, curr.right));
+  push(builder.makeBinary(op, curr.left, curr.right));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeSelect(std::optional<Type> type) {
@@ -487,45 +585,53 @@
   if (type && !Type::isSubType(built->type, *type)) {
     return Err{"select type does not match expected type"};
   }
-  return push(built);
+  push(built);
+  return Ok{};
 }
 
 Result<> IRBuilder::makeDrop() {
   Drop curr;
   CHECK_ERR(visitDrop(&curr));
-  return push(builder.makeDrop(curr.value));
+  push(builder.makeDrop(curr.value));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeReturn() {
   Return curr;
   CHECK_ERR(visitReturn(&curr));
-  return push(builder.makeReturn(curr.value));
+  push(builder.makeReturn(curr.value));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeMemorySize(Name mem) {
-  return push(builder.makeMemorySize(mem));
+  push(builder.makeMemorySize(mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeMemoryGrow(Name mem) {
   MemoryGrow curr;
   CHECK_ERR(visitMemoryGrow(&curr));
-  return push(builder.makeMemoryGrow(curr.delta, mem));
+  push(builder.makeMemoryGrow(curr.delta, mem));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeUnreachable() {
-  return push(builder.makeUnreachable());
+  push(builder.makeUnreachable());
+  return Ok{};
 }
 
 // Result<> IRBuilder::makePop() {}
 
 Result<> IRBuilder::makeRefNull(HeapType type) {
-  return push(builder.makeRefNull(type));
+  push(builder.makeRefNull(type));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeRefIsNull() {
   RefIsNull curr;
   CHECK_ERR(visitRefIsNull(&curr));
-  return push(builder.makeRefIsNull(curr.value));
+  push(builder.makeRefIsNull(curr.value));
+  return Ok{};
 }
 
 // Result<> IRBuilder::makeRefFunc() {}
@@ -533,7 +639,8 @@
 Result<> IRBuilder::makeRefEq() {
   RefEq curr;
   CHECK_ERR(visitRefEq(&curr));
-  return push(builder.makeRefEq(curr.left, curr.right));
+  push(builder.makeRefEq(curr.left, curr.right));
+  return Ok{};
 }
 
 // Result<> IRBuilder::makeTableGet() {}
@@ -557,13 +664,15 @@
 Result<> IRBuilder::makeI31New() {
   I31New curr;
   CHECK_ERR(visitI31New(&curr));
-  return push(builder.makeI31New(curr.value));
+  push(builder.makeI31New(curr.value));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeI31Get(bool signed_) {
   I31Get curr;
   CHECK_ERR(visitI31Get(&curr));
-  return push(builder.makeI31Get(curr.i31, signed_));
+  push(builder.makeI31Get(curr.i31, signed_));
+  return Ok{};
 }
 
 // Result<> IRBuilder::makeCallRef() {}
@@ -579,11 +688,13 @@
   // Differentiate from struct.new_default with a non-empty expression list.
   curr.operands.resize(type.getStruct().fields.size());
   CHECK_ERR(visitStructNew(&curr));
-  return push(builder.makeStructNew(type, std::move(curr.operands)));
+  push(builder.makeStructNew(type, std::move(curr.operands)));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeStructNewDefault(HeapType type) {
-  return push(builder.makeStructNew(type, {}));
+  push(builder.makeStructNew(type, {}));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeStructGet(HeapType type, Index field, bool signed_) {
@@ -591,15 +702,16 @@
   StructGet curr;
   CHECK_ERR(visitStructGet(&curr));
   CHECK_ERR(validateTypeAnnotation(type, curr.ref));
-  return push(
-    builder.makeStructGet(field, curr.ref, fields[field].type, signed_));
+  push(builder.makeStructGet(field, curr.ref, fields[field].type, signed_));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeStructSet(HeapType type, Index field) {
   StructSet curr;
   CHECK_ERR(visitStructSet(&curr));
   CHECK_ERR(validateTypeAnnotation(type, curr.ref));
-  return push(builder.makeStructSet(field, curr.ref, curr.value));
+  push(builder.makeStructSet(field, curr.ref, curr.value));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeArrayNew(HeapType type) {
@@ -607,25 +719,29 @@
   // Differentiate from array.new_default with dummy initializer.
   curr.init = (Expression*)0x01;
   CHECK_ERR(visitArrayNew(&curr));
-  return push(builder.makeArrayNew(type, curr.size, curr.init));
+  push(builder.makeArrayNew(type, curr.size, curr.init));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeArrayNewDefault(HeapType type) {
   ArrayNew curr;
   CHECK_ERR(visitArrayNew(&curr));
-  return push(builder.makeArrayNew(type, curr.size));
+  push(builder.makeArrayNew(type, curr.size));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeArrayNewData(HeapType type, Name data) {
   ArrayNewData curr;
   CHECK_ERR(visitArrayNewData(&curr));
-  return push(builder.makeArrayNewData(type, data, curr.offset, curr.size));
+  push(builder.makeArrayNewData(type, data, curr.offset, curr.size));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeArrayNewElem(HeapType type, Name elem) {
   ArrayNewElem curr;
   CHECK_ERR(visitArrayNewElem(&curr));
-  return push(builder.makeArrayNewElem(type, elem, curr.offset, curr.size));
+  push(builder.makeArrayNewElem(type, elem, curr.offset, curr.size));
+  return Ok{};
 }
 
 // Result<> IRBuilder::makeArrayNewFixed() {}
@@ -634,21 +750,24 @@
   ArrayGet curr;
   CHECK_ERR(visitArrayGet(&curr));
   CHECK_ERR(validateTypeAnnotation(type, curr.ref));
-  return push(builder.makeArrayGet(
+  push(builder.makeArrayGet(
     curr.ref, curr.index, type.getArray().element.type, signed_));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeArraySet(HeapType type) {
   ArraySet curr;
   CHECK_ERR(visitArraySet(&curr));
   CHECK_ERR(validateTypeAnnotation(type, curr.ref));
-  return push(builder.makeArraySet(curr.ref, curr.index, curr.value));
+  push(builder.makeArraySet(curr.ref, curr.index, curr.value));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeArrayLen() {
   ArrayLen curr;
   CHECK_ERR(visitArrayLen(&curr));
-  return push(builder.makeArrayLen(curr.ref));
+  push(builder.makeArrayLen(curr.ref));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeArrayCopy(HeapType destType, HeapType srcType) {
@@ -656,16 +775,17 @@
   CHECK_ERR(visitArrayCopy(&curr));
   CHECK_ERR(validateTypeAnnotation(destType, curr.destRef));
   CHECK_ERR(validateTypeAnnotation(srcType, curr.srcRef));
-  return push(builder.makeArrayCopy(
+  push(builder.makeArrayCopy(
     curr.destRef, curr.destIndex, curr.srcRef, curr.srcIndex, curr.length));
+  return Ok{};
 }
 
 Result<> IRBuilder::makeArrayFill(HeapType type) {
   ArrayFill curr;
   CHECK_ERR(visitArrayFill(&curr));
   CHECK_ERR(validateTypeAnnotation(type, curr.ref));
-  return push(
-    builder.makeArrayFill(curr.ref, curr.index, curr.value, curr.size));
+  push(builder.makeArrayFill(curr.ref, curr.index, curr.value, curr.size));
+  return Ok{};
 }
 
 // Result<> IRBuilder::makeArrayInitData() {}
diff --git a/src/wasm/wat-parser.cpp b/src/wasm/wat-parser.cpp
index bf438ae..3961cbb 100644
--- a/src/wasm/wat-parser.cpp
+++ b/src/wasm/wat-parser.cpp
@@ -676,7 +676,7 @@
   InstrsT finishInstrs(InstrsT&) { return Ok{}; }
 
   ExprT makeExpr(InstrsT) { return Ok{}; }
-  ExprT instrToExpr(InstrT) { return Ok{}; }
+  Result<ExprT> instrToExpr(InstrT) { return Ok{}; }
 
   template<typename HeapTypeT> FieldIdxT getFieldFromIdx(HeapTypeT, uint32_t) {
     return Ok{};
@@ -696,9 +696,11 @@
   MemargT getMemarg(uint64_t, uint32_t) { return Ok{}; }
 
   template<typename BlockTypeT>
-  InstrT makeBlock(Index, std::optional<Name>, BlockTypeT, InstrsT) {
+  InstrT makeBlock(Index, std::optional<Name>, BlockTypeT) {
     return Ok{};
   }
+  InstrT finishBlock(Index, InstrsT) { return Ok{}; }
+
   InstrT makeUnreachable(Index) { return Ok{}; }
   InstrT makeNop(Index) { return Ok{}; }
   InstrT makeBinary(Index, BinaryOp) { return Ok{}; }
@@ -1280,7 +1282,7 @@
   // Keep track of instructions internally rather than letting the general
   // parser collect them.
   using InstrT = Ok;
-  using InstrsT = std::vector<Expression*>;
+  using InstrsT = Ok;
   using ExprT = Expression*;
 
   using FieldIdxT = Index;
@@ -1334,12 +1336,10 @@
 
   HeapType getBlockTypeFromResult(const std::vector<Type> results) {
     assert(results.size() == 1);
-    irBuilder.pushScope(results[0]);
     return HeapType(Signature(Type::none, results[0]));
   }
 
   Result<HeapType> getBlockTypeFromTypeUse(Index pos, HeapType type) {
-    irBuilder.pushScope(type.getSignature().results);
     return type;
   }
 
@@ -1347,11 +1347,9 @@
 
   void appendInstr(Ok&, InstrT instr) {}
 
-  Result<InstrsT> finishInstrs(Ok&) {
-    return withLoc(irBuilder.finishInstrs());
-  }
+  Result<InstrsT> finishInstrs(Ok&) { return Ok{}; }
 
-  Expression* instrToExpr(Ok&) { return irBuilder.build(); }
+  Result<Expression*> instrToExpr(Ok&) { return irBuilder.build(); }
 
   GlobalTypeT makeGlobalType(Mutability, TypeT) { return Ok{}; }
 
@@ -1475,25 +1473,12 @@
                    ImportNames*,
                    TypeUseT,
                    std::optional<LocalsT>,
-                   std::optional<InstrsT> insts,
-                   Index) {
-    Expression* body;
-    if (insts) {
-      switch (insts->size()) {
-        case 0:
-          body = builder.makeNop();
-          break;
-        case 1:
-          body = insts->back();
-          break;
-        default:
-          body = builder.makeBlock(*insts, wasm.functions[index]->getResults());
-          break;
-      }
-    } else {
-      body = builder.makeNop();
-    }
-    wasm.functions[index]->body = body;
+                   std::optional<InstrsT>,
+                   Index pos) {
+    CHECK_ERR(withLoc(pos, irBuilder.visitEnd()));
+    auto body = irBuilder.build();
+    CHECK_ERR(withLoc(pos, body));
+    wasm.functions[index]->body = *body;
     return Ok{};
   }
 
@@ -1537,16 +1522,7 @@
     return Builder::addVar(func, name, type);
   }
 
-  Expression* makeExpr(InstrsT& instrs) {
-    switch (instrs.size()) {
-      case 0:
-        return builder.makeNop();
-      case 1:
-        return instrs.front();
-      default:
-        return builder.makeBlock(instrs);
-    }
-  }
+  Result<Expression*> makeExpr(InstrsT& instrs) { return irBuilder.build(); }
 
   Memarg getMemarg(uint64_t offset, uint32_t align) { return {offset, align}; }
 
@@ -1560,20 +1536,16 @@
     return wasm.memories[0]->name;
   }
 
-  Result<> makeBlock(Index pos,
-                     std::optional<Name> label,
-                     HeapType type,
-                     const std::vector<Expression*>& instrs) {
+  Result<> makeBlock(Index pos, std::optional<Name> label, HeapType type) {
     // TODO: validate labels?
     // TODO: Move error on input types to here?
-    auto results = type.getSignature().results;
-    Block* block = wasm.allocator.alloc<Block>();
-    block->type = results;
-    if (label) {
-      block->name = *label;
-    }
-    block->list.set(instrs);
-    return withLoc(pos, irBuilder.visit(block));
+    return withLoc(pos,
+                   irBuilder.makeBlock(label ? *label : Name{},
+                                       type.getSignature().results));
+  }
+
+  Result<> finishBlock(Index pos, InstrsT) {
+    return withLoc(pos, irBuilder.visitEnd());
   }
 
   Result<> makeUnreachable(Index pos) {
@@ -2595,6 +2567,8 @@
   auto type = blocktype(ctx);
   CHECK_ERR(type);
 
+  ctx.makeBlock(pos, label, *type);
+
   auto insts = instrs(ctx);
   CHECK_ERR(insts);
 
@@ -2612,7 +2586,7 @@
     }
   }
 
-  return ctx.makeBlock(pos, label, *type, std::move(*insts));
+  return ctx.finishBlock(pos, std::move(*insts));
 }
 
 template<typename Ctx>
@@ -3745,7 +3719,9 @@
   } else if (ctx.in.takeLParen()) {
     auto inst = instr(ctx);
     CHECK_ERR(inst);
-    offset = ctx.instrToExpr(*inst);
+    auto offsetExpr = ctx.instrToExpr(*inst);
+    CHECK_ERR(offsetExpr);
+    offset = *offsetExpr;
     if (!ctx.in.takeRParen()) {
       return ctx.in.err("expected end of offset instruction");
     }
@@ -3893,7 +3869,7 @@
     for (Index i = 0; i < decls.funcDefs.size(); ++i) {
       ctx.index = i;
       ctx.setFunction(wasm.functions[i].get());
-      ctx.irBuilder.pushScope(ctx.func->getResults());
+      CHECK_ERR(ctx.irBuilder.makeBlock(Name{}, ctx.func->getResults()));
       WithPosition with(ctx, decls.funcDefs[i].pos);
       auto parsed = func(ctx);
       CHECK_ERR(parsed);
diff --git a/test/fib-dbg.wasm.fromBinary b/test/fib-dbg.wasm.fromBinary
index 3cfa1bc..f36bd22 100644
--- a/test/fib-dbg.wasm.fromBinary
+++ b/test/fib-dbg.wasm.fromBinary
@@ -133,6 +133,7 @@
      (i32.const 0)
     )
    )
+   ;;@ fib.c:3:0
    (if
     (local.get $6)
     (block
@@ -156,6 +157,7 @@
      )
     )
    )
+   ;;@ fib.c:8:0
    (loop $label$4
     (block $label$5
      ;;@ fib.c:4:0
@@ -172,12 +174,14 @@
        (i32.const 1)
       )
      )
+     ;;@ fib.c:3:0
      (local.set $7
       (i32.eq
        (local.get $9)
        (local.get $0)
       )
      )
+     ;;@ fib.c:3:0
      (if
       (local.get $7)
       (block
@@ -201,6 +205,7 @@
        )
       )
      )
+     ;;@ fib.c:3:0
      (br $label$4)
     )
    )
diff --git a/test/lit/wat-kitchen-sink.wast b/test/lit/wat-kitchen-sink.wast
index ff48024..ddf35ed 100644
--- a/test/lit/wat-kitchen-sink.wast
+++ b/test/lit/wat-kitchen-sink.wast
@@ -5,27 +5,29 @@
 (module $parse
  ;; types
 
- ;; CHECK:      (type $void (func))
-
- ;; CHECK:      (type $pair (struct (field (mut i32)) (field (mut i64))))
-
  ;; CHECK:      (type $ret2 (func (result i32 i32)))
  (type $ret2 (func (result i32 i32)))
 
  (rec
-  ;; CHECK:      (type $3 (func (result i32)))
+  ;; CHECK:      (type $void (func))
 
-  ;; CHECK:      (type $4 (func (param i32 i64)))
+  ;; CHECK:      (type $pair (struct (field (mut i32)) (field (mut i64))))
 
   ;; CHECK:      (type $a1 (array i64))
 
+  ;; CHECK:      (type $4 (func (result i32)))
+
+  ;; CHECK:      (type $5 (func (param i32 i64)))
+
   ;; CHECK:      (type $a2 (array (mut f32)))
 
-  ;; CHECK:      (type $7 (func (param i32)))
+  ;; CHECK:      (type $7 (func (result i32 i64)))
 
-  ;; CHECK:      (type $8 (func (param i32 i32 i32)))
+  ;; CHECK:      (type $8 (func (param i32)))
 
-  ;; CHECK:      (type $9 (func (param v128 i32) (result v128)))
+  ;; CHECK:      (type $9 (func (param i32 i32 i32)))
+
+  ;; CHECK:      (type $10 (func (param v128 i32) (result v128)))
 
   ;; CHECK:      (type $packed-i8 (array (mut i8)))
 
@@ -33,31 +35,29 @@
 
   ;; CHECK:      (type $many (func (param i32 i64 f32 f64) (result anyref (ref func))))
 
-  ;; CHECK:      (type $13 (func (param i32 i32)))
+  ;; CHECK:      (type $14 (func (param i32 i32)))
 
-  ;; CHECK:      (type $14 (func (param i32 i32 f64 f64)))
+  ;; CHECK:      (type $15 (func (param i32 i32 f64 f64)))
 
-  ;; CHECK:      (type $15 (func (param i64)))
+  ;; CHECK:      (type $16 (func (param i64)))
 
-  ;; CHECK:      (type $16 (func (param v128) (result i32)))
+  ;; CHECK:      (type $17 (func (param v128) (result i32)))
 
-  ;; CHECK:      (type $17 (func (param v128 v128) (result v128)))
+  ;; CHECK:      (type $18 (func (param v128 v128) (result v128)))
 
-  ;; CHECK:      (type $18 (func (param v128 v128 v128) (result v128)))
+  ;; CHECK:      (type $19 (func (param v128 v128 v128) (result v128)))
 
-  ;; CHECK:      (type $19 (func (param i32 i64 v128)))
+  ;; CHECK:      (type $20 (func (param i32 i64 v128)))
 
-  ;; CHECK:      (type $20 (func (param i32 i32 i64 i64)))
+  ;; CHECK:      (type $21 (func (param i32 i32 i64 i64)))
 
-  ;; CHECK:      (type $21 (func (param i32) (result i32)))
+  ;; CHECK:      (type $22 (func (param i32) (result i32)))
 
-  ;; CHECK:      (type $22 (func (param i32 i64) (result i32 i64)))
+  ;; CHECK:      (type $23 (func (param i32 i64) (result i32 i64)))
 
-  ;; CHECK:      (type $23 (func (param i64) (result i32 i64)))
+  ;; CHECK:      (type $24 (func (param i64) (result i32 i64)))
 
-  ;; CHECK:      (type $24 (func (param i32) (result i32 i64)))
-
-  ;; CHECK:      (type $25 (func (result i32 i64)))
+  ;; CHECK:      (type $25 (func (param i32) (result i32 i64)))
 
   ;; CHECK:      (type $26 (func (param anyref) (result i32)))
 
@@ -225,18 +225,15 @@
  ;; CHECK:      (export "f5.1" (func $fimport$1))
 
  ;; CHECK:      (func $0 (type $void)
- ;; CHECK-NEXT:  (nop)
  ;; CHECK-NEXT: )
 
- ;; CHECK:      (func $f1 (type $7) (param $0 i32)
- ;; CHECK-NEXT:  (nop)
+ ;; CHECK:      (func $f1 (type $8) (param $0 i32)
  ;; CHECK-NEXT: )
  (func $f1 (param i32))
- ;; CHECK:      (func $f2 (type $7) (param $x i32)
- ;; CHECK-NEXT:  (nop)
+ ;; CHECK:      (func $f2 (type $8) (param $x i32)
  ;; CHECK-NEXT: )
  (func $f2 (param $x i32))
- ;; CHECK:      (func $f3 (type $3) (result i32)
+ ;; CHECK:      (func $f3 (type $4) (result i32)
  ;; CHECK-NEXT:  (i32.const 0)
  ;; CHECK-NEXT: )
  (func $f3 (result i32)
@@ -246,7 +243,6 @@
  ;; CHECK-NEXT:  (local $0 i32)
  ;; CHECK-NEXT:  (local $1 i64)
  ;; CHECK-NEXT:  (local $l f32)
- ;; CHECK-NEXT:  (nop)
  ;; CHECK-NEXT: )
  (func $f4 (type 17) (local i32 i64) (local $l f32))
  (func (export "f5.0") (export "f5.1") (import "mod" "f5"))
@@ -311,7 +307,7 @@
   nop
  )
 
- ;; CHECK:      (func $add (type $3) (result i32)
+ ;; CHECK:      (func $add (type $4) (result i32)
  ;; CHECK-NEXT:  (i32.add
  ;; CHECK-NEXT:   (i32.const 1)
  ;; CHECK-NEXT:   (i32.const 2)
@@ -323,7 +319,7 @@
   i32.add
  )
 
- ;; CHECK:      (func $add-folded (type $3) (result i32)
+ ;; CHECK:      (func $add-folded (type $4) (result i32)
  ;; CHECK-NEXT:  (i32.add
  ;; CHECK-NEXT:   (i32.const 1)
  ;; CHECK-NEXT:   (i32.const 2)
@@ -336,7 +332,7 @@
   )
  )
 
- ;; CHECK:      (func $add-stacky (type $3) (result i32)
+ ;; CHECK:      (func $add-stacky (type $4) (result i32)
  ;; CHECK-NEXT:  (local $scratch i32)
  ;; CHECK-NEXT:  (i32.add
  ;; CHECK-NEXT:   (block (result i32)
@@ -356,7 +352,7 @@
   i32.add
  )
 
- ;; CHECK:      (func $add-stacky-2 (type $3) (result i32)
+ ;; CHECK:      (func $add-stacky-2 (type $4) (result i32)
  ;; CHECK-NEXT:  (local $scratch i32)
  ;; CHECK-NEXT:  (i32.add
  ;; CHECK-NEXT:   (i32.const 1)
@@ -376,7 +372,7 @@
   i32.add
  )
 
- ;; CHECK:      (func $add-stacky-3 (type $3) (result i32)
+ ;; CHECK:      (func $add-stacky-3 (type $4) (result i32)
  ;; CHECK-NEXT:  (local $scratch i32)
  ;; CHECK-NEXT:  (local.set $scratch
  ;; CHECK-NEXT:   (i32.add
@@ -394,7 +390,7 @@
   nop
  )
 
- ;; CHECK:      (func $add-stacky-4 (type $3) (result i32)
+ ;; CHECK:      (func $add-stacky-4 (type $4) (result i32)
  ;; CHECK-NEXT:  (local $scratch i32)
  ;; CHECK-NEXT:  (local $scratch_1 i32)
  ;; CHECK-NEXT:  (local $scratch_2 i32)
@@ -428,7 +424,7 @@
   nop
  )
 
- ;; CHECK:      (func $add-unreachable (type $3) (result i32)
+ ;; CHECK:      (func $add-unreachable (type $4) (result i32)
  ;; CHECK-NEXT:  (i32.add
  ;; CHECK-NEXT:   (unreachable)
  ;; CHECK-NEXT:   (i32.const 1)
@@ -440,7 +436,7 @@
   i32.add
  )
 
- ;; CHECK:      (func $add-unreachable-2 (type $3) (result i32)
+ ;; CHECK:      (func $add-unreachable-2 (type $4) (result i32)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (i32.const 1)
  ;; CHECK-NEXT:  )
@@ -455,7 +451,7 @@
   i32.add
  )
 
- ;; CHECK:      (func $add-unreachable-3 (type $3) (result i32)
+ ;; CHECK:      (func $add-unreachable-3 (type $4) (result i32)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (i32.const 1)
  ;; CHECK-NEXT:  )
@@ -554,6 +550,13 @@
  ;; CHECK-NEXT:   (unreachable)
  ;; CHECK-NEXT:   (i32.const 2)
  ;; CHECK-NEXT:  )
+ ;; CHECK-NEXT:  (drop
+ ;; CHECK-NEXT:   (i32.add
+ ;; CHECK-NEXT:    (i32.const 3)
+ ;; CHECK-NEXT:    (i32.const 4)
+ ;; CHECK-NEXT:   )
+ ;; CHECK-NEXT:  )
+ ;; CHECK-NEXT:  (unreachable)
  ;; CHECK-NEXT: )
  (func $add-twice-unreachable (type $ret2)
   unreachable
@@ -572,6 +575,13 @@
  ;; CHECK-NEXT:   )
  ;; CHECK-NEXT:  )
  ;; CHECK-NEXT:  (unreachable)
+ ;; CHECK-NEXT:  (drop
+ ;; CHECK-NEXT:   (i32.add
+ ;; CHECK-NEXT:    (i32.const 3)
+ ;; CHECK-NEXT:    (i32.const 4)
+ ;; CHECK-NEXT:   )
+ ;; CHECK-NEXT:  )
+ ;; CHECK-NEXT:  (unreachable)
  ;; CHECK-NEXT: )
  (func $add-twice-unreachable-2 (type $ret2)
   i32.const 1
@@ -662,7 +672,7 @@
   drop
  )
 
- ;; CHECK:      (func $locals (type $13) (param $0 i32) (param $x i32)
+ ;; CHECK:      (func $locals (type $14) (param $0 i32) (param $x i32)
  ;; CHECK-NEXT:  (local $2 i32)
  ;; CHECK-NEXT:  (local $y i32)
  ;; CHECK-NEXT:  (drop
@@ -722,25 +732,14 @@
  )
 
  ;; CHECK:      (func $block-folded (type $void)
- ;; CHECK-NEXT:  (local $scratch (i32 i32))
  ;; CHECK-NEXT:  (nop)
- ;; CHECK-NEXT:  (local.set $scratch
+ ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (block $l (result i32 i32)
  ;; CHECK-NEXT:    (nop)
  ;; CHECK-NEXT:    (nop)
  ;; CHECK-NEXT:    (unreachable)
  ;; CHECK-NEXT:   )
  ;; CHECK-NEXT:  )
- ;; CHECK-NEXT:  (drop
- ;; CHECK-NEXT:   (tuple.extract 0
- ;; CHECK-NEXT:    (local.get $scratch)
- ;; CHECK-NEXT:   )
- ;; CHECK-NEXT:  )
- ;; CHECK-NEXT:  (drop
- ;; CHECK-NEXT:   (tuple.extract 1
- ;; CHECK-NEXT:    (local.get $scratch)
- ;; CHECK-NEXT:   )
- ;; CHECK-NEXT:  )
  ;; CHECK-NEXT:  (unreachable)
  ;; CHECK-NEXT: )
  (func $block-folded
@@ -758,29 +757,29 @@
  ;; CHECK-NEXT:  (local $scratch_1 (i32 i32))
  ;; CHECK-NEXT:  (local $scratch_2 i32)
  ;; CHECK-NEXT:  (block $0
- ;; CHECK-NEXT:   (local.set $scratch_1
- ;; CHECK-NEXT:    (block $1 (result i32 i32)
- ;; CHECK-NEXT:     (tuple.make
- ;; CHECK-NEXT:      (block $2 (result i32)
- ;; CHECK-NEXT:       (block (result i32)
- ;; CHECK-NEXT:        (local.set $scratch
- ;; CHECK-NEXT:         (block $3 (result i32)
- ;; CHECK-NEXT:          (i32.const 0)
- ;; CHECK-NEXT:         )
- ;; CHECK-NEXT:        )
- ;; CHECK-NEXT:        (nop)
- ;; CHECK-NEXT:        (local.get $scratch)
- ;; CHECK-NEXT:       )
- ;; CHECK-NEXT:      )
- ;; CHECK-NEXT:      (i32.const 1)
- ;; CHECK-NEXT:     )
- ;; CHECK-NEXT:    )
- ;; CHECK-NEXT:   )
  ;; CHECK-NEXT:   (drop
  ;; CHECK-NEXT:    (block (result i32)
  ;; CHECK-NEXT:     (local.set $scratch_2
- ;; CHECK-NEXT:      (tuple.extract 0
- ;; CHECK-NEXT:       (local.get $scratch_1)
+ ;; CHECK-NEXT:      (block (result i32)
+ ;; CHECK-NEXT:       (local.set $scratch_1
+ ;; CHECK-NEXT:        (block $1 (result i32 i32)
+ ;; CHECK-NEXT:         (tuple.make
+ ;; CHECK-NEXT:          (block $2 (result i32)
+ ;; CHECK-NEXT:           (local.set $scratch
+ ;; CHECK-NEXT:            (block $3 (result i32)
+ ;; CHECK-NEXT:             (i32.const 0)
+ ;; CHECK-NEXT:            )
+ ;; CHECK-NEXT:           )
+ ;; CHECK-NEXT:           (nop)
+ ;; CHECK-NEXT:           (local.get $scratch)
+ ;; CHECK-NEXT:          )
+ ;; CHECK-NEXT:          (i32.const 1)
+ ;; CHECK-NEXT:         )
+ ;; CHECK-NEXT:        )
+ ;; CHECK-NEXT:       )
+ ;; CHECK-NEXT:       (tuple.extract 0
+ ;; CHECK-NEXT:        (local.get $scratch_1)
+ ;; CHECK-NEXT:       )
  ;; CHECK-NEXT:      )
  ;; CHECK-NEXT:     )
  ;; CHECK-NEXT:     (drop
@@ -811,7 +810,38 @@
   nop
  )
 
- ;; CHECK:      (func $binary (type $14) (param $0 i32) (param $1 i32) (param $2 f64) (param $3 f64)
+ ;; CHECK:      (func $multivalue-nested (type $ret2) (result i32 i32)
+ ;; CHECK-NEXT:  (local $scratch (i32 i32))
+ ;; CHECK-NEXT:  (block (result i32 i32)
+ ;; CHECK-NEXT:   (nop)
+ ;; CHECK-NEXT:   (local.set $scratch
+ ;; CHECK-NEXT:    (block (result i32 i32)
+ ;; CHECK-NEXT:     (block (result i32 i32)
+ ;; CHECK-NEXT:      (tuple.make
+ ;; CHECK-NEXT:       (i32.const 0)
+ ;; CHECK-NEXT:       (i32.const 1)
+ ;; CHECK-NEXT:      )
+ ;; CHECK-NEXT:     )
+ ;; CHECK-NEXT:    )
+ ;; CHECK-NEXT:   )
+ ;; CHECK-NEXT:   (nop)
+ ;; CHECK-NEXT:   (local.get $scratch)
+ ;; CHECK-NEXT:  )
+ ;; CHECK-NEXT: )
+ (func $multivalue-nested (type $ret2)
+  block (type $ret2)
+   nop
+   block (type $ret2)
+    block (type $ret2)
+     i32.const 0
+     i32.const 1
+    end
+   end
+   nop
+  end
+ )
+
+ ;; CHECK:      (func $binary (type $15) (param $0 i32) (param $1 i32) (param $2 f64) (param $3 f64)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (i32.add
  ;; CHECK-NEXT:    (local.get $0)
@@ -836,7 +866,7 @@
   drop
  )
 
- ;; CHECK:      (func $unary (type $15) (param $0 i64)
+ ;; CHECK:      (func $unary (type $16) (param $0 i64)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (i64.eqz
  ;; CHECK-NEXT:    (local.get $0)
@@ -849,7 +879,7 @@
   drop
  )
 
- ;; CHECK:      (func $select (type $8) (param $0 i32) (param $1 i32) (param $2 i32)
+ ;; CHECK:      (func $select (type $9) (param $0 i32) (param $1 i32) (param $2 i32)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (select
  ;; CHECK-NEXT:    (local.get $0)
@@ -922,7 +952,7 @@
   drop
  )
 
- ;; CHECK:      (func $memory-grow (type $4) (param $0 i32) (param $1 i64)
+ ;; CHECK:      (func $memory-grow (type $5) (param $0 i32) (param $1 i64)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (memory.grow $mem
  ;; CHECK-NEXT:    (local.get $0)
@@ -961,7 +991,7 @@
   global.set 4
  )
 
- ;; CHECK:      (func $load (type $4) (param $0 i32) (param $1 i64)
+ ;; CHECK:      (func $load (type $5) (param $0 i32) (param $1 i64)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (i32.load $mem offset=42
  ;; CHECK-NEXT:    (local.get $0)
@@ -990,7 +1020,7 @@
   drop
  )
 
- ;; CHECK:      (func $store (type $4) (param $0 i32) (param $1 i64)
+ ;; CHECK:      (func $store (type $5) (param $0 i32) (param $1 i64)
  ;; CHECK-NEXT:  (i32.store $mem offset=42 align=1
  ;; CHECK-NEXT:   (local.get $0)
  ;; CHECK-NEXT:   (i32.const 0)
@@ -1016,7 +1046,7 @@
   f32.store $mem-i64
  )
 
- ;; CHECK:      (func $atomic-rmw (type $4) (param $0 i32) (param $1 i64)
+ ;; CHECK:      (func $atomic-rmw (type $5) (param $0 i32) (param $1 i64)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (i32.atomic.rmw16.add_u $mem
  ;; CHECK-NEXT:    (local.get $0)
@@ -1041,7 +1071,7 @@
   drop
  )
 
- ;; CHECK:      (func $atomic-cmpxchg (type $4) (param $0 i32) (param $1 i64)
+ ;; CHECK:      (func $atomic-cmpxchg (type $5) (param $0 i32) (param $1 i64)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (i32.atomic.rmw8.cmpxchg_u $mem
  ;; CHECK-NEXT:    (local.get $0)
@@ -1070,7 +1100,7 @@
   drop
  )
 
- ;; CHECK:      (func $atomic-wait (type $4) (param $0 i32) (param $1 i64)
+ ;; CHECK:      (func $atomic-wait (type $5) (param $0 i32) (param $1 i64)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (memory.atomic.wait32 $mem
  ;; CHECK-NEXT:    (local.get $0)
@@ -1099,7 +1129,7 @@
   drop
  )
 
- ;; CHECK:      (func $atomic-notify (type $4) (param $0 i32) (param $1 i64)
+ ;; CHECK:      (func $atomic-notify (type $5) (param $0 i32) (param $1 i64)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (memory.atomic.notify $mem offset=8
  ;; CHECK-NEXT:    (local.get $0)
@@ -1131,7 +1161,7 @@
   atomic.fence
  )
 
- ;; CHECK:      (func $simd-extract (type $16) (param $0 v128) (result i32)
+ ;; CHECK:      (func $simd-extract (type $17) (param $0 v128) (result i32)
  ;; CHECK-NEXT:  (i32x4.extract_lane 3
  ;; CHECK-NEXT:   (local.get $0)
  ;; CHECK-NEXT:  )
@@ -1141,7 +1171,7 @@
   i32x4.extract_lane 3
  )
 
- ;; CHECK:      (func $simd-replace (type $9) (param $0 v128) (param $1 i32) (result v128)
+ ;; CHECK:      (func $simd-replace (type $10) (param $0 v128) (param $1 i32) (result v128)
  ;; CHECK-NEXT:  (i32x4.replace_lane 2
  ;; CHECK-NEXT:   (local.get $0)
  ;; CHECK-NEXT:   (local.get $1)
@@ -1153,7 +1183,7 @@
   i32x4.replace_lane 2
  )
 
- ;; CHECK:      (func $simd-shuffle (type $17) (param $0 v128) (param $1 v128) (result v128)
+ ;; CHECK:      (func $simd-shuffle (type $18) (param $0 v128) (param $1 v128) (result v128)
  ;; CHECK-NEXT:  (i8x16.shuffle 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23
  ;; CHECK-NEXT:   (local.get $0)
  ;; CHECK-NEXT:   (local.get $1)
@@ -1165,7 +1195,7 @@
   i8x16.shuffle 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23
  )
 
- ;; CHECK:      (func $simd-ternary (type $18) (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
+ ;; CHECK:      (func $simd-ternary (type $19) (param $0 v128) (param $1 v128) (param $2 v128) (result v128)
  ;; CHECK-NEXT:  (v128.bitselect
  ;; CHECK-NEXT:   (local.get $0)
  ;; CHECK-NEXT:   (local.get $1)
@@ -1179,7 +1209,7 @@
   v128.bitselect
  )
 
- ;; CHECK:      (func $simd-shift (type $9) (param $0 v128) (param $1 i32) (result v128)
+ ;; CHECK:      (func $simd-shift (type $10) (param $0 v128) (param $1 i32) (result v128)
  ;; CHECK-NEXT:  (i8x16.shl
  ;; CHECK-NEXT:   (local.get $0)
  ;; CHECK-NEXT:   (local.get $1)
@@ -1191,7 +1221,7 @@
   i8x16.shl
  )
 
- ;; CHECK:      (func $simd-load (type $4) (param $0 i32) (param $1 i64)
+ ;; CHECK:      (func $simd-load (type $5) (param $0 i32) (param $1 i64)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (v128.load8x8_s $mem offset=8
  ;; CHECK-NEXT:    (local.get $0)
@@ -1212,7 +1242,7 @@
   drop
  )
 
- ;; CHECK:      (func $simd-load-store-lane (type $19) (param $0 i32) (param $1 i64) (param $2 v128)
+ ;; CHECK:      (func $simd-load-store-lane (type $20) (param $0 i32) (param $1 i64) (param $2 v128)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (v128.load16_lane $mem 7
  ;; CHECK-NEXT:    (local.get $0)
@@ -1234,7 +1264,7 @@
   v128.store64_lane 3 align=4 0
  )
 
- ;; CHECK:      (func $memory-init (type $8) (param $0 i32) (param $1 i32) (param $2 i32)
+ ;; CHECK:      (func $memory-init (type $9) (param $0 i32) (param $1 i32) (param $2 i32)
  ;; CHECK-NEXT:  (memory.init $mem-i32 $passive
  ;; CHECK-NEXT:   (local.get $0)
  ;; CHECK-NEXT:   (local.get $1)
@@ -1275,7 +1305,7 @@
   data.drop $passive
  )
 
- ;; CHECK:      (func $memory-copy (type $20) (param $0 i32) (param $1 i32) (param $2 i64) (param $3 i64)
+ ;; CHECK:      (func $memory-copy (type $21) (param $0 i32) (param $1 i32) (param $2 i64) (param $3 i64)
  ;; CHECK-NEXT:  (memory.copy $mem $mem
  ;; CHECK-NEXT:   (local.get $0)
  ;; CHECK-NEXT:   (local.get $1)
@@ -1307,7 +1337,7 @@
   memory.copy $mem-i64 3
  )
 
- ;; CHECK:      (func $memory-fill (type $4) (param $0 i32) (param $1 i64)
+ ;; CHECK:      (func $memory-fill (type $5) (param $0 i32) (param $1 i64)
  ;; CHECK-NEXT:  (memory.fill $mem
  ;; CHECK-NEXT:   (local.get $0)
  ;; CHECK-NEXT:   (i32.const 1)
@@ -1346,7 +1376,7 @@
   return
  )
 
- ;; CHECK:      (func $return-one (type $21) (param $0 i32) (result i32)
+ ;; CHECK:      (func $return-one (type $22) (param $0 i32) (result i32)
  ;; CHECK-NEXT:  (return
  ;; CHECK-NEXT:   (local.get $0)
  ;; CHECK-NEXT:  )
@@ -1356,7 +1386,7 @@
   return
  )
 
- ;; CHECK:      (func $return-two (type $22) (param $0 i32) (param $1 i64) (result i32 i64)
+ ;; CHECK:      (func $return-two (type $23) (param $0 i32) (param $1 i64) (result i32 i64)
  ;; CHECK-NEXT:  (return
  ;; CHECK-NEXT:   (tuple.make
  ;; CHECK-NEXT:    (local.get $0)
@@ -1370,7 +1400,7 @@
   return
  )
 
- ;; CHECK:      (func $return-two-first-unreachable (type $23) (param $0 i64) (result i32 i64)
+ ;; CHECK:      (func $return-two-first-unreachable (type $24) (param $0 i64) (result i32 i64)
  ;; CHECK-NEXT:  (return
  ;; CHECK-NEXT:   (tuple.make
  ;; CHECK-NEXT:    (unreachable)
@@ -1384,7 +1414,7 @@
   return
  )
 
- ;; CHECK:      (func $return-two-second-unreachable (type $24) (param $0 i32) (result i32 i64)
+ ;; CHECK:      (func $return-two-second-unreachable (type $25) (param $0 i32) (result i32 i64)
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (local.get $0)
  ;; CHECK-NEXT:  )
@@ -1646,7 +1676,6 @@
  )
 
  ;; CHECK:      (func $use-types (type $59) (param $0 (ref $s0)) (param $1 (ref $s1)) (param $2 (ref $s2)) (param $3 (ref $s3)) (param $4 (ref $s4)) (param $5 (ref $s5)) (param $6 (ref $s6)) (param $7 (ref $s7)) (param $8 (ref $s8)) (param $9 (ref $a0)) (param $10 (ref $a1)) (param $11 (ref $a2)) (param $12 (ref $a3)) (param $13 (ref $subvoid)) (param $14 (ref $submany))
- ;; CHECK-NEXT:  (nop)
  ;; CHECK-NEXT: )
  (func $use-types
   (param (ref $s0))