blob: 2c3b7c8b0d5137108c403bb94a144eba3fd2ffe9 [file] [log] [blame] [edit]
//===- LowerEmAsyncify - transform asynchronous functions for Emscripten/JS -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Lu Wang <coolwanglu@gmail.com>
//
// In JS we don't have functions like sleep(), which is on the other hand very popuar in C/C++ etc.
// This pass tries to convert funcitons calling sleep() into a valid form in JavaScript
// The basic idea is to split the callee at the place where sleep() is called,
// then the first half may schedule the second half using setTimeout.
// But we need to pay lots of attention to analyzing/saving/restoring context variables and return values
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Transforms/NaCl.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h" // for DemoteRegToStack, removeUnreachableBlocks
#include "llvm/Transforms/Utils/PromoteMemToReg.h" // for PromoteMemToReg
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Pass.h"
#include <vector>
#ifdef NDEBUG
#undef assert
#define assert(x) { if (!(x)) report_fatal_error(#x); }
#endif
using namespace llvm;
static cl::list<std::string>
AsyncifyFunctions("emscripten-asyncify-functions",
cl::desc("Functions that call one of these functions, directly or indirectly, will be asyncified"),
cl::CommaSeparated);
static cl::list<std::string>
AsyncifyWhiteList("emscripten-asyncify-whitelist",
cl::desc("Functions that should not be asyncified"),
cl::CommaSeparated);
namespace {
class LowerEmAsyncify: public ModulePass {
Module *TheModule;
public:
static char ID; // Pass identification, replacement for typeid
explicit LowerEmAsyncify() : ModulePass(ID), TheModule(NULL) {
initializeLowerEmAsyncifyPass(*PassRegistry::getPassRegistry());
}
virtual ~LowerEmAsyncify() { }
bool runOnModule(Module &M);
private:
const DataLayout *DL;
Type *Void, *I1, *I32, *I32Ptr;
FunctionType *VFunction, *I1Function, *I32PFunction;
FunctionType *VI32PFunction, *I32PI32Function;
FunctionType *CallbackFunctionType;
Function *AllocAsyncCtxFunction, *ReallocAsyncCtxFunction, *FreeAsyncCtxFunction;
Function *CheckAsyncFunction;
Function *DoNotUnwindFunction, *DoNotUnwindAsyncFunction;
Function *GetAsyncReturnValueAddrFunction;
void initTypesAndFunctions(void);
typedef std::vector<Instruction *> Instructions;
typedef DenseMap<Function*, Instructions> FunctionInstructionsMap;
typedef std::vector<Value*> Values;
typedef SmallPtrSet<BasicBlock*, 16> BasicBlockSet;
// all the information we want for an async call
struct AsyncCallEntry {
Instruction *AsyncCallInst; // calling an async function
BasicBlock *AfterCallBlock; // the block we should continue on after getting the return value of AsynCallInst
CallInst *AllocAsyncCtxInst; // where we allocate the async ctx before the async call, in the original function
Values ContextVariables; // those need to be saved and restored for the async call
StructType *ContextStructType; // The structure constructing all the context variables
BasicBlock *SaveAsyncCtxBlock; // the block in which we save all the variables
Function *CallbackFunc; // the callback function for this async call, which is converted from the original function
};
BasicBlockSet FindReachableBlocksFrom(BasicBlock *src);
// Find everything that we should save and restore for the async call
// save them to Entry.ContextVariables
void FindContextVariables(AsyncCallEntry & Entry);
// The essential function
// F is now in the sync form, transform it into an async form that is valid in JS
void transformAsyncFunction(Function &F, Instructions const& AsyncCalls);
bool IsFunctionPointerCall(const Instruction *I);
};
}
char LowerEmAsyncify::ID = 0;
INITIALIZE_PASS(LowerEmAsyncify, "loweremasyncify",
"Lower async functions for js/emscripten",
false, false)
bool LowerEmAsyncify::runOnModule(Module &M) {
TheModule = &M;
DL = &M.getDataLayout();
std::set<std::string> WhiteList(AsyncifyWhiteList.begin(), AsyncifyWhiteList.end());
/*
* collect all the functions that should be asyncified
* any function that _might_ call an async function is also async
*/
std::vector<Function*> AsyncFunctionsPending;
for(unsigned i = 0; i < AsyncifyFunctions.size(); ++i) {
std::string const& AFName = AsyncifyFunctions[i];
Function *F = TheModule->getFunction(AFName);
if (F && !WhiteList.count(F->getName())) {
AsyncFunctionsPending.push_back(F);
}
}
// No function needed to transform
if (AsyncFunctionsPending.empty()) return false;
// Walk through the call graph and find all the async functions
FunctionInstructionsMap AsyncFunctionCalls;
{
// pessimistic: consider all indirect calls as possibly async
// TODO: deduce based on function types
for (Module::iterator FI = TheModule->begin(), FE = TheModule->end(); FI != FE; ++FI) {
if (WhiteList.count(FI->getName())) continue;
bool has_indirect_call = false;
for (inst_iterator I = inst_begin(FI), E = inst_end(FI); I != E; ++I) {
if (IsFunctionPointerCall(&*I)) {
has_indirect_call = true;
AsyncFunctionCalls[FI].push_back(&*I);
}
}
if (has_indirect_call) AsyncFunctionsPending.push_back(FI);
}
while (!AsyncFunctionsPending.empty()) {
Function *CurFunction = AsyncFunctionsPending.back();
AsyncFunctionsPending.pop_back();
for (Value::user_iterator UI = CurFunction->user_begin(), E = CurFunction->user_end(); UI != E; ++UI) {
ImmutableCallSite ICS(*UI);
if (!ICS) continue;
// we only need those instructions calling the function
// if the function address is used for other purpose, we don't care
if (CurFunction != ICS.getCalledValue()->stripPointerCasts()) continue;
// Now I is either CallInst or InvokeInst
Instruction *I = cast<Instruction>(*UI);
Function *F = I->getParent()->getParent();
if (AsyncFunctionCalls.count(F) == 0) {
AsyncFunctionsPending.push_back(F);
}
AsyncFunctionCalls[F].push_back(I);
}
}
}
// exit if no async function is found at all
if (AsyncFunctionCalls.empty()) return false;
initTypesAndFunctions();
for (FunctionInstructionsMap::iterator I = AsyncFunctionCalls.begin(), E = AsyncFunctionCalls.end();
I != E; ++I) {
transformAsyncFunction(*(I->first), I->second);
}
return true;
}
void LowerEmAsyncify::initTypesAndFunctions(void) {
// Data types
Void = Type::getVoidTy(TheModule->getContext());
I1 = Type::getInt1Ty(TheModule->getContext());
I32 = Type::getInt32Ty(TheModule->getContext());
I32Ptr = Type::getInt32PtrTy(TheModule->getContext());
// Function types
SmallVector<Type*, 2> ArgTypes;
VFunction = FunctionType::get(Void, false);
I1Function = FunctionType::get(I1, false);
I32PFunction = FunctionType::get(I32Ptr, false);
ArgTypes.clear();
ArgTypes.push_back(I32Ptr);
VI32PFunction = FunctionType::get(Void, ArgTypes, false);
ArgTypes.clear();
ArgTypes.push_back(I32);
I32PI32Function = FunctionType::get(I32Ptr, ArgTypes, false);
CallbackFunctionType = VI32PFunction;
// Functions
CheckAsyncFunction = Function::Create(
I1Function,
GlobalValue::ExternalLinkage,
"emscripten_check_async",
TheModule
);
AllocAsyncCtxFunction = Function::Create(
I32PI32Function,
GlobalValue::ExternalLinkage,
"emscripten_alloc_async_context",
TheModule
);
ReallocAsyncCtxFunction = Function::Create(
I32PI32Function,
GlobalValue::ExternalLinkage,
"emscripten_realloc_async_context",
TheModule
);
FreeAsyncCtxFunction = Function::Create(
VI32PFunction,
GlobalValue::ExternalLinkage,
"emscripten_free_async_context",
TheModule
);
DoNotUnwindFunction = Function::Create(
VFunction,
GlobalValue::ExternalLinkage,
"emscripten_do_not_unwind",
TheModule
);
DoNotUnwindAsyncFunction = Function::Create(
VFunction,
GlobalValue::ExternalLinkage,
"emscripten_do_not_unwind_async",
TheModule
);
GetAsyncReturnValueAddrFunction = Function::Create(
I32PFunction,
GlobalValue::ExternalLinkage,
"emscripten_get_async_return_value_addr",
TheModule
);
}
LowerEmAsyncify::BasicBlockSet LowerEmAsyncify::FindReachableBlocksFrom(BasicBlock *src) {
BasicBlockSet ReachableBlockSet;
std::vector<BasicBlock*> pending;
ReachableBlockSet.insert(src);
pending.push_back(src);
while (!pending.empty()) {
BasicBlock *CurBlock = pending.back();
pending.pop_back();
for (succ_iterator SI = succ_begin(CurBlock), SE = succ_end(CurBlock); SI != SE; ++SI) {
if (ReachableBlockSet.count(*SI) == 0) {
ReachableBlockSet.insert(*SI);
pending.push_back(*SI);
}
}
}
return ReachableBlockSet;
}
void LowerEmAsyncify::FindContextVariables(AsyncCallEntry & Entry) {
BasicBlock *AfterCallBlock = Entry.AfterCallBlock;
Function & F = *AfterCallBlock->getParent();
// Create a new entry block as if in the callback function
// theck check variables that no longer properly dominate their uses
BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "", &F, &F.getEntryBlock());
BranchInst::Create(AfterCallBlock, EntryBlock);
DominatorTreeWrapperPass DTW;
DTW.runOnFunction(F);
DominatorTree& DT = DTW.getDomTree();
// These blocks may be using some values defined at or before AsyncCallBlock
BasicBlockSet Ramifications = FindReachableBlocksFrom(AfterCallBlock);
SmallPtrSet<Value*, 256> ContextVariables;
Values Pending;
// Examine the instructions, find all variables that we need to store in the context
for (BasicBlockSet::iterator RI = Ramifications.begin(), RE = Ramifications.end(); RI != RE; ++RI) {
for (BasicBlock::iterator I = (*RI)->begin(), E = (*RI)->end(); I != E; ++I) {
for (unsigned i = 0, NumOperands = I->getNumOperands(); i < NumOperands; ++i) {
Value *O = I->getOperand(i);
if (Instruction *Inst = dyn_cast<Instruction>(O)) {
if (Inst == Entry.AsyncCallInst) continue; // for the original async call, we will load directly from async return value
if (ContextVariables.count(Inst) != 0) continue; // already examined
if (!DT.dominates(Inst, I->getOperandUse(i))) {
// `I` is using `Inst`, yet `Inst` does not dominate `I` if we arrive directly at AfterCallBlock
// so we need to save `Inst` in the context
ContextVariables.insert(Inst);
Pending.push_back(Inst);
}
} else if (Argument *Arg = dyn_cast<Argument>(O)) {
// count() should be as fast/slow as insert, so just insert here
ContextVariables.insert(Arg);
}
}
}
}
// restore F
EntryBlock->eraseFromParent();
Entry.ContextVariables.clear();
Entry.ContextVariables.reserve(ContextVariables.size());
for (SmallPtrSet<Value*, 256>::iterator I = ContextVariables.begin(), E = ContextVariables.end(); I != E; ++I) {
Entry.ContextVariables.push_back(*I);
}
}
/*
* Consider that F contains a call to G, both of which are async:
*
* function F:
* ...
* %0 = G(%1, %2, ...);
* ...
* return %%;
*
* We want to convert F and generate F__asyn_cb
* they are similar, but with minor yet important differences
* Note those `main func only` and `callback func only` instructions
//////////////////////////////////////////////////////////
function F:
...
ctx = alloc_ctx(len, sp); // main func only
// TODO
// we could also do this only after an async call
// but in that case we will need to pass ctx to the function
// since ctx is no longer in the top async stack frame
%0 = G(%1, %2, ...);
if (async) { // G was async
save context variables in ctx
register F.async_cb as the callback in frame
return without unwinding the stack frame
} else { // G was sync
// use %0 as normal
free_ctx(ctx); // main func only
// ctx is freed here, because so far F is still a sync function
// and we don't want any side effects
...
async return value = %%;
return & normally unwind the stack frame // main func only
}
//////////////////////////////////////////////////////////
* And here's F.async_cb
//////////////////////////////////////////////////////////
function F.async_cb(ctx):
load variables from ctx // callback func only
goto resume_point; // callback func only
...
ctx = realloc_ctx(len); // callback func only
// realloc_ctx is different from alloc_ctx
// which reused the current async stack frame
// we want to keep the saved stack pointer
%0 = G(%1, %2, ...);
if (async) {
save context variables in ctx
register F.async_cb as the callback
return without unwinding the stack frame
} else {
resume_point:
%0'= either $0 or the async return value // callback func only
...
async return value = %%
return restore the stack pointer back to the value stored in F // callback func only
// no need to free the ctx
// the scheduler will be aware of this return and handle the stack frames
}
//////////////////////////////////////////////////////////
*/
void LowerEmAsyncify::transformAsyncFunction(Function &F, Instructions const& AsyncCalls) {
assert(!AsyncCalls.empty());
// Pass 0
// collect all the return instructions from the original function
// will use later
std::vector<ReturnInst*> OrigReturns;
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
if (ReturnInst *RI = dyn_cast<ReturnInst>(&*I)) {
OrigReturns.push_back(RI);
}
}
// Pass 1
// Scan each async call and make the basic structure:
// All these will be cloned into the callback functions
// - allocate the async context before calling an async function
// - check async right after calling an async function, save context & return if async, continue if not
// - retrieve the async return value and free the async context if the called function turns out to be sync
std::vector<AsyncCallEntry> AsyncCallEntries;
AsyncCallEntries.reserve(AsyncCalls.size());
for (Instructions::const_iterator I = AsyncCalls.begin(), E = AsyncCalls.end(); I != E; ++I) {
// prepare blocks
Instruction *CurAsyncCall = *I;
// The block containing the async call
BasicBlock *CurBlock = CurAsyncCall->getParent();
// The block should run after the async call
BasicBlock *AfterCallBlock = SplitBlock(CurBlock, CurAsyncCall->getNextNode());
// The block where we store the context and return
BasicBlock *SaveAsyncCtxBlock = BasicBlock::Create(TheModule->getContext(), "SaveAsyncCtx", &F, AfterCallBlock);
// return a dummy value at the end, to make the block valid
new UnreachableInst(TheModule->getContext(), SaveAsyncCtxBlock);
// allocate the context before making the call
// we don't know the size yet, will fix it later
// we cannot insert the instruction later because,
// we need to make sure that all the instructions and blocks are fixed before we can generate DT and find context variables
// In CallHandler.h `sp` will be put as the second parameter
// such that we can take a note of the original sp
CallInst *AllocAsyncCtxInst = CallInst::Create(AllocAsyncCtxFunction, Constant::getNullValue(I32), "AsyncCtx", CurAsyncCall);
// Right after the call
// check async and return if so
// TODO: we can define truly async functions and partial async functions
{
// remove old terminator, which came from SplitBlock
CurBlock->getTerminator()->eraseFromParent();
// go to SaveAsyncCtxBlock if the previous call is async
// otherwise just continue to AfterCallBlock
CallInst *CheckAsync = CallInst::Create(CheckAsyncFunction, "IsAsync", CurBlock);
BranchInst::Create(SaveAsyncCtxBlock, AfterCallBlock, CheckAsync, CurBlock);
}
// take a note of this async call
AsyncCallEntry CurAsyncCallEntry;
CurAsyncCallEntry.AsyncCallInst = CurAsyncCall;
CurAsyncCallEntry.AfterCallBlock = AfterCallBlock;
CurAsyncCallEntry.AllocAsyncCtxInst = AllocAsyncCtxInst;
CurAsyncCallEntry.SaveAsyncCtxBlock = SaveAsyncCtxBlock;
// create an empty function for the callback, which will be constructed later
CurAsyncCallEntry.CallbackFunc = Function::Create(CallbackFunctionType, F.getLinkage(), F.getName() + "__async_cb", TheModule);
AsyncCallEntries.push_back(CurAsyncCallEntry);
}
// Pass 2
// analyze the context variables and construct SaveAsyncCtxBlock for each async call
// also calculate the size of the context and allocate the async context accordingly
for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) {
AsyncCallEntry & CurEntry = *EI;
// Collect everything to be saved
FindContextVariables(CurEntry);
// Pack the variables as a struct
{
// TODO: sort them from large memeber to small ones, in order to make the struct compact even when aligned
SmallVector<Type*, 8> Types;
Types.push_back(CallbackFunctionType->getPointerTo());
for (Values::iterator VI = CurEntry.ContextVariables.begin(), VE = CurEntry.ContextVariables.end(); VI != VE; ++VI) {
Types.push_back((*VI)->getType());
}
CurEntry.ContextStructType = StructType::get(TheModule->getContext(), Types);
}
// fix the size of allocation
CurEntry.AllocAsyncCtxInst->setOperand(0,
ConstantInt::get(I32, DL->getTypeStoreSize(CurEntry.ContextStructType)));
// construct SaveAsyncCtxBlock
{
// fill in SaveAsyncCtxBlock
// temporarily remove the terminator for convenience
CurEntry.SaveAsyncCtxBlock->getTerminator()->eraseFromParent();
assert(CurEntry.SaveAsyncCtxBlock->empty());
Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo();
BitCastInst *AsyncCtxAddr = new BitCastInst(CurEntry.AllocAsyncCtxInst, AsyncCtxAddrTy, "AsyncCtxAddr", CurEntry.SaveAsyncCtxBlock);
SmallVector<Value*, 2> Indices;
// store the callback
{
Indices.push_back(ConstantInt::get(I32, 0));
Indices.push_back(ConstantInt::get(I32, 0));
GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(CurEntry.ContextStructType, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock);
new StoreInst(CurEntry.CallbackFunc, AsyncVarAddr, CurEntry.SaveAsyncCtxBlock);
}
// store the context variables
for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) {
Indices.clear();
Indices.push_back(ConstantInt::get(I32, 0));
Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element is the callback function
GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(CurEntry.ContextStructType, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock);
new StoreInst(CurEntry.ContextVariables[i], AsyncVarAddr, CurEntry.SaveAsyncCtxBlock);
}
// to exit the block, we want to return without unwinding the stack frame
CallInst::Create(DoNotUnwindFunction, "", CurEntry.SaveAsyncCtxBlock);
ReturnInst::Create(TheModule->getContext(),
(F.getReturnType()->isVoidTy() ? 0 : Constant::getNullValue(F.getReturnType())),
CurEntry.SaveAsyncCtxBlock);
}
}
// Pass 3
// now all the SaveAsyncCtxBlock's have been constructed
// we can clone F and construct callback functions
// we could not construct the callbacks in Pass 2 because we need _all_ those SaveAsyncCtxBlock's appear in _each_ callback
for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) {
AsyncCallEntry & CurEntry = *EI;
Function *CurCallbackFunc = CurEntry.CallbackFunc;
ValueToValueMapTy VMap;
// Add the entry block
// load variables from the context
// also update VMap for CloneFunction
BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "AsyncCallbackEntry", CurCallbackFunc);
std::vector<LoadInst *> LoadedAsyncVars;
{
Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo();
BitCastInst *AsyncCtxAddr = new BitCastInst(CurCallbackFunc->arg_begin(), AsyncCtxAddrTy, "AsyncCtx", EntryBlock);
SmallVector<Value*, 2> Indices;
for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) {
Indices.clear();
Indices.push_back(ConstantInt::get(I32, 0));
Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element of AsyncCtx is the callback function
GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(CurEntry.ContextStructType, AsyncCtxAddr, Indices, "", EntryBlock);
LoadedAsyncVars.push_back(new LoadInst(AsyncVarAddr, "", EntryBlock));
// we want the argument to be replaced by the loaded value
if (isa<Argument>(CurEntry.ContextVariables[i]))
VMap[CurEntry.ContextVariables[i]] = LoadedAsyncVars.back();
}
}
// we don't need any argument, just leave dummy entries there to cheat CloneFunctionInto
for (Function::const_arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) {
if (VMap.count(AI) == 0)
VMap[AI] = Constant::getNullValue(AI->getType());
}
// Clone the function
{
SmallVector<ReturnInst*, 8> Returns;
CloneFunctionInto(CurCallbackFunc, &F, VMap, false, Returns);
// return type of the callback functions is always void
// need to fix the return type
if (!F.getReturnType()->isVoidTy()) {
// for those return instructions that are from the original function
// it means we are 'truly' leaving this function
// need to store the return value right before ruturn
for (size_t i = 0; i < OrigReturns.size(); ++i) {
ReturnInst *RI = cast<ReturnInst>(VMap[OrigReturns[i]]);
// Need to store the return value into the global area
CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", RI);
BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, F.getReturnType()->getPointerTo(), "AsyncRetValAddr", RI);
new StoreInst(RI->getOperand(0), RetValAddr, RI);
}
// we want to unwind the stack back to where it was before the original function as called
// but we don't actually need to do this here
// at this point it must be true that no callback is pended
// so the scheduler will correct the stack pointer and pop the frame
// here we just fix the return type
for (size_t i = 0; i < Returns.size(); ++i) {
ReplaceInstWithInst(Returns[i], ReturnInst::Create(TheModule->getContext()));
}
}
}
// the callback function does not have any return value
// so clear all the attributes for return
{
AttributeSet Attrs = CurCallbackFunc->getAttributes();
CurCallbackFunc->setAttributes(
Attrs.removeAttributes(TheModule->getContext(), AttributeSet::ReturnIndex, Attrs.getRetAttributes())
);
}
// in the callback function, we never allocate a new async frame
// instead we reuse the existing one
for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) {
Instruction *I = cast<Instruction>(VMap[EI->AllocAsyncCtxInst]);
ReplaceInstWithInst(I, CallInst::Create(ReallocAsyncCtxFunction, I->getOperand(0), "ReallocAsyncCtx"));
}
// mapped entry point & async call
BasicBlock *ResumeBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]);
Instruction *MappedAsyncCall = cast<Instruction>(VMap[CurEntry.AsyncCallInst]);
// To save space, for each async call in the callback function, we just ignore the sync case, and leave it to the scheduler
// TODO need an option for this
{
for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) {
AsyncCallEntry & CurEntry = *EI;
Instruction *MappedAsyncCallInst = cast<Instruction>(VMap[CurEntry.AsyncCallInst]);
BasicBlock *MappedAsyncCallBlock = MappedAsyncCallInst->getParent();
BasicBlock *MappedAfterCallBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]);
// for the sync case of the call, go to NewBlock (instead of MappedAfterCallBlock)
BasicBlock *NewBlock = BasicBlock::Create(TheModule->getContext(), "", CurCallbackFunc, MappedAfterCallBlock);
MappedAsyncCallBlock->getTerminator()->setSuccessor(1, NewBlock);
// store the return value
if (!MappedAsyncCallInst->use_empty()) {
CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", NewBlock);
BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCallInst->getType()->getPointerTo(), "AsyncRetValAddr", NewBlock);
new StoreInst(MappedAsyncCallInst, RetValAddr, NewBlock);
}
// tell the scheduler that we want to keep the current async stack frame
CallInst::Create(DoNotUnwindAsyncFunction, "", NewBlock);
// finally we go to the SaveAsyncCtxBlock, to register the callbac, save the local variables and leave
BasicBlock *MappedSaveAsyncCtxBlock = cast<BasicBlock>(VMap[CurEntry.SaveAsyncCtxBlock]);
BranchInst::Create(MappedSaveAsyncCtxBlock, NewBlock);
}
}
std::vector<AllocaInst*> ToPromote;
// applying loaded variables in the entry block
{
BasicBlockSet ReachableBlocks = FindReachableBlocksFrom(ResumeBlock);
for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) {
Value *OrigVar = CurEntry.ContextVariables[i];
if (isa<Argument>(OrigVar)) continue; // already processed
Value *CurVar = VMap[OrigVar];
assert(CurVar != MappedAsyncCall);
if (Instruction *Inst = dyn_cast<Instruction>(CurVar)) {
if (ReachableBlocks.count(Inst->getParent())) {
// Inst could be either defined or loaded from the async context
// Do the dirty works in memory
// TODO: might need to check the safety first
// TODO: can we create phi directly?
AllocaInst *Addr = DemoteRegToStack(*Inst, false);
new StoreInst(LoadedAsyncVars[i], Addr, EntryBlock);
ToPromote.push_back(Addr);
} else {
// The parent block is not reachable, which means there is no confliction
// it's safe to replace Inst with the loaded value
assert(Inst != LoadedAsyncVars[i]); // this should only happen when OrigVar is an Argument
Inst->replaceAllUsesWith(LoadedAsyncVars[i]);
}
}
}
}
// resolve the return value of the previous async function
// it could be the value just loaded from the global area
// or directly returned by the function (in its sync case)
if (!CurEntry.AsyncCallInst->use_empty()) {
// load the async return value
CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", EntryBlock);
BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCall->getType()->getPointerTo(), "AsyncRetValAddr", EntryBlock);
LoadInst *RetVal = new LoadInst(RetValAddr, "AsyncRetVal", EntryBlock);
AllocaInst *Addr = DemoteRegToStack(*MappedAsyncCall, false);
new StoreInst(RetVal, Addr, EntryBlock);
ToPromote.push_back(Addr);
}
// TODO remove unreachable blocks before creating phi
// We go right to ResumeBlock from the EntryBlock
BranchInst::Create(ResumeBlock, EntryBlock);
/*
* Creating phi's
* Normal stack frames and async stack frames are interleaving with each other.
* In a callback function, if we call an async function, we might need to realloc the async ctx.
* at this point we don't want anything stored after the ctx,
* such that we can free and extend the ctx by simply update STACKTOP.
* Therefore we don't want any alloca's in callback functions.
*
*/
if (!ToPromote.empty()) {
DominatorTreeWrapperPass DTW;
DTW.runOnFunction(*CurCallbackFunc);
PromoteMemToReg(ToPromote, DTW.getDomTree());
}
removeUnreachableBlocks(*CurCallbackFunc);
}
// Pass 4
// Here are modifications to the original function, which we won't want to be cloned into the callback functions
for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) {
AsyncCallEntry & CurEntry = *EI;
// remove the frame if no async functinon has been called
CallInst::Create(FreeAsyncCtxFunction, CurEntry.AllocAsyncCtxInst, "", CurEntry.AfterCallBlock->getFirstNonPHI());
}
}
bool LowerEmAsyncify::IsFunctionPointerCall(const Instruction *I) {
// mostly from CallHandler.h
ImmutableCallSite CS(I);
if (!CS) return false; // not call nor invoke
const Value *CV = CS.getCalledValue()->stripPointerCasts();
return !isa<const Function>(CV);
}
ModulePass *llvm::createLowerEmAsyncifyPass() {
return new LowerEmAsyncify();
}