| /////////////////////////////////////////////////////////////////////////////// |
| // // |
| // DxilDebugInstrumentation.cpp // |
| // Copyright (C) Microsoft Corporation. All rights reserved. // |
| // This file is distributed under the University of Illinois Open Source // |
| // License. See LICENSE.TXT for details. // |
| // // |
| // Adds instrumentation that enables shader debugging in PIX // |
| // // |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| #include "dxc/DXIL/DxilModule.h" |
| #include "dxc/DXIL/DxilOperations.h" |
| #include "dxc/DXIL/DxilUtil.h" |
| #include "dxc/DxilPIXPasses/DxilPIXPasses.h" |
| #include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h" |
| #include "dxc/HLSL/DxilGenerationPass.h" |
| |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/InstIterator.h" |
| #include "llvm/IR/Module.h" |
| |
| using namespace llvm; |
| using namespace hlsl; |
| |
| // Overview of instrumentation: |
| // |
| // In summary, instructions are added that cause a "trace" of the execution of |
| // the shader to be written out to a UAV. This trace is then used by a debugger |
| // application to provide a post-mortem debugging experience that reconstructs |
| // the execution history of the shader. |
| // |
| // The trace is only required for a particular shader instance of interest, and |
| // a branchless mechanism is used to write the trace either to an incrementing |
| // location within the UAV, or to a "dumping ground" area at the top of the UAV |
| // if the instance is not of interest. |
| // |
| // The following modifications are made: |
| // |
| // First, instructions are added to the top of the entry point function that |
| // implement the following: |
| // - Examine the input variables that define the instance of the shader that is |
| // running. This will |
| // be SV_Position for pixel shaders, SV_Vertex+SV_Instance for vertex |
| // shaders, thread id for compute shaders etc. If these system values need to |
| // be added to the shader, then they are also added to the input signature, |
| // if appropriate. |
| // - Compare the above variables with the instance of interest defined by the |
| // invoker of this pass. |
| // Deduce two values: a multiplicand and an addend that together allow a |
| // branchless calculation of the offset into the UAV at which to write via |
| // "offset = offset * multiplicand + addend." If the instance is NOT of |
| // interest, the multiplicand is zero and the addend is sizeof(UAV)-(a little |
| // bit), causing writes for uninteresting invocations to end up at the top of |
| // the UAV. Otherwise the multiplicand is 1 and the addend is 0. |
| // - Calculate an "instance identifier". Even with the above instance |
| // identification, several invocations may |
| // end up matching the selection criteria. Specifically, this happens during |
| // a draw call in which many triangles overlap the pixel of interest. More on |
| // this below. |
| // |
| // During execution, the instrumentation for most instructions cause data to be |
| // emitted to the UAV. The index at which data is written is identified by |
| // treating the first uint32 of the UAV as an index which is atomically |
| // incremented by the instrumentation. The very first value of this counter that |
| // is encountered by each invocation is used as the "instance identifier" |
| // mentioned above. That instance identifier is written out with each packet, |
| // since many pixel shaders executing in parallel will emit interleaved packets, |
| // and the debugger application uses the identifiers to group packets from each |
| // separate invocation together. |
| // |
| // If an instruction has a non-void and primitive return type, i.e. isn't a |
| // struct, then the instrumentation will write that value out to the UAV as well |
| // as part of the "step" data packet. |
| // |
| // The limiting size of the UAV is enforced in a branchless way by ANDing the |
| // offset with a precomputed value that is sizeof(UAV)-64. The actual size of |
| // the UAV allocated by the caller is required to be a power of two plus 64 for |
| // this reason. The caller detects UAV overrun by examining a canary value close |
| // to the end of the power-of-two size of the UAV. If this value has been |
| // overwritten, the debug session is deemed to have overflowed the UAV. The |
| // caller will than allocate a UAV that is twice the size and try again, up to a |
| // predefined maximum. |
| |
| // Keep these in sync with the same-named value in the debugger application's |
| // WinPixShaderUtils.h |
| |
| constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024; |
| // The actual max size per record is much smaller than this, but it never |
| // hurts to be generous. |
| constexpr size_t CounterOffsetBeyondUsefulData = DebugBufferDumpingGroundSize / 2; |
| |
| // These definitions echo those in the debugger application's |
| // debugshaderrecord.h file |
| enum DebugShaderModifierRecordType { |
| DebugShaderModifierRecordTypeInvocationStartMarker, |
| DebugShaderModifierRecordTypeStep, |
| DebugShaderModifierRecordTypeEvent, |
| DebugShaderModifierRecordTypeInputRegister, |
| DebugShaderModifierRecordTypeReadRegister, |
| DebugShaderModifierRecordTypeWrittenRegister, |
| DebugShaderModifierRecordTypeRegisterRelativeIndex0, |
| DebugShaderModifierRecordTypeRegisterRelativeIndex1, |
| DebugShaderModifierRecordTypeRegisterRelativeIndex2, |
| DebugShaderModifierRecordTypeDXILStepVoid = 251, |
| DebugShaderModifierRecordTypeDXILStepFloat = 252, |
| DebugShaderModifierRecordTypeDXILStepUint32 = 253, |
| DebugShaderModifierRecordTypeDXILStepUint64 = 254, |
| DebugShaderModifierRecordTypeDXILStepDouble = 255, |
| }; |
| |
| // These structs echo those in the debugger application's debugshaderrecord.h |
| // file, but are recapitulated here because the originals use unnamed unions |
| // which are disallowed by DXCompiler's build. |
| // |
| #pragma pack(push, 4) |
| struct DebugShaderModifierRecordHeader { |
| union { |
| struct { |
| uint32_t SizeDwords : 4; |
| uint32_t Flags : 4; |
| uint32_t Type : 8; |
| uint32_t HeaderPayload : 16; |
| } Details; |
| uint32_t u32Header; |
| } Header; |
| uint32_t UID; |
| }; |
| |
| struct DebugShaderModifierRecordDXILStepBase { |
| union { |
| struct { |
| uint32_t SizeDwords : 4; |
| uint32_t Flags : 4; |
| uint32_t Type : 8; |
| uint32_t Opcode : 16; |
| } Details; |
| uint32_t u32Header; |
| } Header; |
| uint32_t UID; |
| uint32_t InstructionOffset; |
| }; |
| |
| template <typename ReturnType> |
| struct DebugShaderModifierRecordDXILStep |
| : public DebugShaderModifierRecordDXILStepBase { |
| ReturnType ReturnValue; |
| union { |
| struct { |
| uint32_t ValueOrdinalBase : 16; |
| uint32_t ValueOrdinalIndex : 16; |
| } Details; |
| uint32_t u32ValueOrdinal; |
| } ValueOrdinal; |
| }; |
| |
| template <> |
| struct DebugShaderModifierRecordDXILStep<void> |
| : public DebugShaderModifierRecordDXILStepBase {}; |
| #pragma pack(pop) |
| |
| uint32_t |
| DebugShaderModifierRecordPayloadSizeDwords(size_t recordTotalSizeBytes) { |
| return ((recordTotalSizeBytes - sizeof(DebugShaderModifierRecordHeader)) / |
| sizeof(uint32_t)); |
| } |
| |
| class DxilDebugInstrumentation : public ModulePass { |
| |
| private: |
| union ParametersAllTogether { |
| unsigned Parameters[3]; |
| struct PixelShaderParameters { |
| unsigned X; |
| unsigned Y; |
| } PixelShader; |
| struct VertexShaderParameters { |
| unsigned VertexId; |
| unsigned InstanceId; |
| } VertexShader; |
| struct ComputeShaderParameters { |
| unsigned ThreadIdX; |
| unsigned ThreadIdY; |
| unsigned ThreadIdZ; |
| } ComputeShader; |
| struct GeometryShaderParameters { |
| unsigned PrimitiveId; |
| unsigned InstanceId; |
| } GeometryShader; |
| } m_Parameters = {{0, 0, 0}}; |
| |
| union SystemValueIndices { |
| struct PixelShaderParameters { |
| unsigned Position; |
| } PixelShader; |
| struct VertexShaderParameters { |
| unsigned VertexId; |
| unsigned InstanceId; |
| } VertexShader; |
| struct GeometryShaderParameters { |
| unsigned PrimitiveId; |
| unsigned InstanceId; |
| } GeometryShader; |
| }; |
| |
| uint64_t m_UAVSize = 1024 * 1024; |
| Value *m_SelectionCriterion = nullptr; |
| CallInst *m_HandleForUAV = nullptr; |
| Value *m_InvocationId = nullptr; |
| |
| // Together these two values allow branchless writing to the UAV. An |
| // invocation of the shader is either of interest or not (e.g. it writes to |
| // the pixel the user selected for debugging or it doesn't). If not of |
| // interest, debugging output will still occur, but it will be relegated to |
| // the very top few bytes of the UAV. Invocations of interest, by contrast, |
| // will be written to the UAV at sequentially increasing offsets. |
| |
| // This value will either be one or zero (one if the invocation is of |
| // interest, zero otherwise) |
| Value *m_OffsetMultiplicand = nullptr; |
| // This will either be zero (if the invocation is of interest) or |
| // (UAVSize)-(SmallValue) if not. |
| Value *m_OffsetAddend = nullptr; |
| |
| Constant *m_OffsetMask = nullptr; |
| |
| Constant *m_CounterOffset = nullptr; |
| |
| struct BuilderContext { |
| Module &M; |
| DxilModule &DM; |
| LLVMContext &Ctx; |
| OP *HlslOP; |
| IRBuilder<> &Builder; |
| }; |
| |
| uint32_t m_RemainingReservedSpaceInBytes = 0; |
| Value *m_CurrentIndex = nullptr; |
| |
| public: |
| static char ID; // Pass identification, replacement for typeid |
| explicit DxilDebugInstrumentation() : ModulePass(ID) {} |
| const char *getPassName() const override { |
| return "Add PIX debug instrumentation"; |
| } |
| void applyOptions(PassOptions O) override; |
| bool runOnModule(Module &M) override; |
| |
| private: |
| SystemValueIndices addRequiredSystemValues(BuilderContext &BC); |
| void addUAV(BuilderContext &BC); |
| void addInvocationSelectionProlog(BuilderContext &BC, |
| SystemValueIndices SVIndices); |
| Value *addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices); |
| Value *addGeometryShaderProlog(BuilderContext &BC, |
| SystemValueIndices SVIndices); |
| Value *addDispatchedShaderProlog(BuilderContext &BC); |
| Value *addVertexShaderProlog(BuilderContext &BC, |
| SystemValueIndices SVIndices); |
| void addDebugEntryValue(BuilderContext &BC, Value *TheValue); |
| void addInvocationStartMarker(BuilderContext &BC); |
| void reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInDwords); |
| void addStoreStepDebugEntry(BuilderContext &BC, StoreInst *Inst); |
| void addStepDebugEntry(BuilderContext &BC, Instruction *Inst); |
| void addStepDebugEntryValue(BuilderContext &BC, std::uint32_t InstNum, |
| Value *V, std::uint32_t ValueOrdinal, |
| Value *ValueOrdinalIndex); |
| uint32_t UAVDumpingGroundOffset(); |
| template <typename ReturnType> |
| void addStepEntryForType(DebugShaderModifierRecordType RecordType, |
| BuilderContext &BC, std::uint32_t InstNum, Value *V, |
| std::uint32_t ValueOrdinal, |
| Value *ValueOrdinalIndex); |
| }; |
| |
| void DxilDebugInstrumentation::applyOptions(PassOptions O) { |
| GetPassOptionUnsigned(O, "parameter0", &m_Parameters.Parameters[0], 0); |
| GetPassOptionUnsigned(O, "parameter1", &m_Parameters.Parameters[1], 0); |
| GetPassOptionUnsigned(O, "parameter2", &m_Parameters.Parameters[2], 0); |
| GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024); |
| } |
| |
| uint32_t DxilDebugInstrumentation::UAVDumpingGroundOffset() { |
| return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize); |
| } |
| |
| DxilDebugInstrumentation::SystemValueIndices |
| DxilDebugInstrumentation::addRequiredSystemValues(BuilderContext &BC) { |
| SystemValueIndices SVIndices{}; |
| |
| hlsl::DxilSignature &InputSignature = BC.DM.GetInputSignature(); |
| |
| auto &InputElements = InputSignature.GetElements(); |
| |
| auto ShaderModel = BC.DM.GetShaderModel(); |
| switch (ShaderModel->GetKind()) { |
| case DXIL::ShaderKind::Amplification: |
| case DXIL::ShaderKind::Mesh: |
| case DXIL::ShaderKind::Compute: |
| // Dispatch* thread Id is not in the input signature |
| break; |
| case DXIL::ShaderKind::Vertex: { |
| { |
| auto Existing_SV_VertexId = std::find_if( |
| InputElements.begin(), InputElements.end(), |
| [](const std::unique_ptr<DxilSignatureElement> &Element) { |
| return Element->GetSemantic()->GetKind() == |
| hlsl::DXIL::SemanticKind::VertexID; |
| }); |
| |
| if (Existing_SV_VertexId == InputElements.end()) { |
| auto Added_SV_VertexId = |
| llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn); |
| Added_SV_VertexId->Initialize("VertexId", hlsl::CompType::getF32(), |
| hlsl::DXIL::InterpolationMode::Undefined, |
| 1, 1); |
| Added_SV_VertexId->AppendSemanticIndex(0); |
| Added_SV_VertexId->SetSigPointKind(DXIL::SigPointKind::VSIn); |
| Added_SV_VertexId->SetKind(hlsl::DXIL::SemanticKind::VertexID); |
| |
| auto index = InputSignature.AppendElement(std::move(Added_SV_VertexId)); |
| SVIndices.VertexShader.VertexId = InputElements[index]->GetID(); |
| } else { |
| SVIndices.VertexShader.VertexId = Existing_SV_VertexId->get()->GetID(); |
| } |
| } |
| { |
| auto Existing_SV_InstanceId = std::find_if( |
| InputElements.begin(), InputElements.end(), |
| [](const std::unique_ptr<DxilSignatureElement> &Element) { |
| return Element->GetSemantic()->GetKind() == |
| hlsl::DXIL::SemanticKind::InstanceID; |
| }); |
| |
| if (Existing_SV_InstanceId == InputElements.end()) { |
| auto Added_SV_InstanceId = |
| llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn); |
| Added_SV_InstanceId->Initialize( |
| "InstanceId", hlsl::CompType::getF32(), |
| hlsl::DXIL::InterpolationMode::Undefined, 1, 1); |
| Added_SV_InstanceId->AppendSemanticIndex(0); |
| Added_SV_InstanceId->SetSigPointKind(DXIL::SigPointKind::VSIn); |
| Added_SV_InstanceId->SetKind(hlsl::DXIL::SemanticKind::InstanceID); |
| |
| auto index = |
| InputSignature.AppendElement(std::move(Added_SV_InstanceId)); |
| SVIndices.VertexShader.InstanceId = InputElements[index]->GetID(); |
| } else { |
| SVIndices.VertexShader.InstanceId = |
| Existing_SV_InstanceId->get()->GetID(); |
| } |
| } |
| } break; |
| case DXIL::ShaderKind::Geometry: |
| // GS Instance Id and Primitive Id are not in the input signature |
| break; |
| case DXIL::ShaderKind::Pixel: { |
| auto Existing_SV_Position = |
| std::find_if(InputElements.begin(), InputElements.end(), |
| [](const std::unique_ptr<DxilSignatureElement> &Element) { |
| return Element->GetSemantic()->GetKind() == |
| hlsl::DXIL::SemanticKind::Position; |
| }); |
| |
| // SV_Position, if present, has to have full mask, so we needn't worry |
| // about the shader having selected components that don't include x or y. |
| // If not present, we add it. |
| if (Existing_SV_Position == InputElements.end()) { |
| auto Added_SV_Position = |
| llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn); |
| Added_SV_Position->Initialize("Position", hlsl::CompType::getF32(), |
| hlsl::DXIL::InterpolationMode::Linear, 1, |
| 4); |
| Added_SV_Position->AppendSemanticIndex(0); |
| Added_SV_Position->SetSigPointKind(DXIL::SigPointKind::PSIn); |
| Added_SV_Position->SetKind(hlsl::DXIL::SemanticKind::Position); |
| |
| auto index = InputSignature.AppendElement(std::move(Added_SV_Position)); |
| SVIndices.PixelShader.Position = InputElements[index]->GetID(); |
| } else { |
| SVIndices.PixelShader.Position = Existing_SV_Position->get()->GetID(); |
| } |
| } break; |
| default: |
| assert(false); // guaranteed by runOnModule |
| } |
| |
| return SVIndices; |
| } |
| |
| Value *DxilDebugInstrumentation::addDispatchedShaderProlog(BuilderContext &BC) { |
| Constant *Zero32Arg = BC.HlslOP->GetU32Const(0); |
| Constant *One32Arg = BC.HlslOP->GetU32Const(1); |
| Constant *Two32Arg = BC.HlslOP->GetU32Const(2); |
| |
| auto ThreadIdFunc = |
| BC.HlslOP->GetOpFunc(DXIL::OpCode::ThreadId, Type::getInt32Ty(BC.Ctx)); |
| Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::ThreadId); |
| auto ThreadIdX = |
| BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Zero32Arg}, "ThreadIdX"); |
| auto ThreadIdY = |
| BC.Builder.CreateCall(ThreadIdFunc, {Opcode, One32Arg}, "ThreadIdY"); |
| auto ThreadIdZ = |
| BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Two32Arg}, "ThreadIdZ"); |
| |
| // Compare to expected thread ID |
| auto CompareToX = BC.Builder.CreateICmpEQ( |
| ThreadIdX, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdX), |
| "CompareToThreadIdX"); |
| auto CompareToY = BC.Builder.CreateICmpEQ( |
| ThreadIdY, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdY), |
| "CompareToThreadIdY"); |
| auto CompareToZ = BC.Builder.CreateICmpEQ( |
| ThreadIdZ, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdZ), |
| "CompareToThreadIdZ"); |
| |
| auto CompareXAndY = |
| BC.Builder.CreateAnd(CompareToX, CompareToY, "CompareXAndY"); |
| |
| auto CompareAll = |
| BC.Builder.CreateAnd(CompareXAndY, CompareToZ, "CompareAll"); |
| |
| return CompareAll; |
| } |
| |
| Value * |
| DxilDebugInstrumentation::addVertexShaderProlog(BuilderContext &BC, |
| SystemValueIndices SVIndices) { |
| Constant *Zero32Arg = BC.HlslOP->GetU32Const(0); |
| Constant *Zero8Arg = BC.HlslOP->GetI8Const(0); |
| UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); |
| |
| auto LoadInputOpFunc = |
| BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getInt32Ty(BC.Ctx)); |
| Constant *LoadInputOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput); |
| Constant *SV_Vert_ID = |
| BC.HlslOP->GetU32Const(SVIndices.VertexShader.VertexId); |
| auto VertId = |
| BC.Builder.CreateCall(LoadInputOpFunc, |
| {LoadInputOpcode, SV_Vert_ID, Zero32Arg /*row*/, |
| Zero8Arg /*column*/, UndefArg}, |
| "VertId"); |
| |
| Constant *SV_Instance_ID = |
| BC.HlslOP->GetU32Const(SVIndices.VertexShader.InstanceId); |
| auto InstanceId = |
| BC.Builder.CreateCall(LoadInputOpFunc, |
| {LoadInputOpcode, SV_Instance_ID, Zero32Arg /*row*/, |
| Zero8Arg /*column*/, UndefArg}, |
| "InstanceId"); |
| |
| // Compare to expected vertex ID and instance ID |
| auto CompareToVert = BC.Builder.CreateICmpEQ( |
| VertId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.VertexId), |
| "CompareToVertId"); |
| auto CompareToInstance = BC.Builder.CreateICmpEQ( |
| InstanceId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.InstanceId), |
| "CompareToInstanceId"); |
| auto CompareBoth = |
| BC.Builder.CreateAnd(CompareToVert, CompareToInstance, "CompareBoth"); |
| |
| return CompareBoth; |
| } |
| |
| Value *DxilDebugInstrumentation::addGeometryShaderProlog( |
| BuilderContext &BC, SystemValueIndices SVIndices) { |
| |
| auto PrimitiveIdOpFunc = |
| BC.HlslOP->GetOpFunc(DXIL::OpCode::PrimitiveID, Type::getInt32Ty(BC.Ctx)); |
| Constant *PrimitiveIdOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::PrimitiveID); |
| auto PrimId = |
| BC.Builder.CreateCall(PrimitiveIdOpFunc, {PrimitiveIdOpcode}, "PrimId"); |
| |
| auto CompareToPrim = BC.Builder.CreateICmpEQ( |
| PrimId, BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.PrimitiveId), |
| "CompareToPrimId"); |
| |
| if (BC.DM.GetGSInstanceCount() <= 1) { |
| return CompareToPrim; |
| } |
| |
| auto GSInstanceIdOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::GSInstanceID, |
| Type::getInt32Ty(BC.Ctx)); |
| Constant *GSInstanceIdOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GSInstanceID); |
| auto GSInstanceId = BC.Builder.CreateCall( |
| GSInstanceIdOpFunc, {GSInstanceIdOpcode}, "GSInstanceId"); |
| |
| // Compare to expected vertex ID and instance ID |
| auto CompareToInstance = BC.Builder.CreateICmpEQ( |
| GSInstanceId, |
| BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.InstanceId), |
| "CompareToInstanceId"); |
| auto CompareBoth = |
| BC.Builder.CreateAnd(CompareToPrim, CompareToInstance, "CompareBoth"); |
| |
| return CompareBoth; |
| } |
| |
| Value * |
| DxilDebugInstrumentation::addPixelShaderProlog(BuilderContext &BC, |
| SystemValueIndices SVIndices) { |
| Constant *Zero32Arg = BC.HlslOP->GetU32Const(0); |
| Constant *Zero8Arg = BC.HlslOP->GetI8Const(0); |
| Constant *One8Arg = BC.HlslOP->GetI8Const(1); |
| UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); |
| |
| // Convert SV_POSITION to UINT |
| Value *XAsInt; |
| Value *YAsInt; |
| { |
| auto LoadInputOpFunc = |
| BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(BC.Ctx)); |
| Constant *LoadInputOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput); |
| Constant *SV_Pos_ID = |
| BC.HlslOP->GetU32Const(SVIndices.PixelShader.Position); |
| auto XPos = |
| BC.Builder.CreateCall(LoadInputOpFunc, |
| {LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, |
| Zero8Arg /*column*/, UndefArg}, |
| "XPos"); |
| auto YPos = |
| BC.Builder.CreateCall(LoadInputOpFunc, |
| {LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, |
| One8Arg /*column*/, UndefArg}, |
| "YPos"); |
| |
| XAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, XPos, |
| Type::getInt32Ty(BC.Ctx), "XIndex"); |
| YAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, YPos, |
| Type::getInt32Ty(BC.Ctx), "YIndex"); |
| } |
| |
| // Compare to expected pixel position and primitive ID |
| auto CompareToX = BC.Builder.CreateICmpEQ( |
| XAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.X), "CompareToX"); |
| auto CompareToY = BC.Builder.CreateICmpEQ( |
| YAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.Y), "CompareToY"); |
| auto ComparePos = BC.Builder.CreateAnd(CompareToX, CompareToY, "ComparePos"); |
| |
| return ComparePos; |
| } |
| |
| void DxilDebugInstrumentation::addUAV(BuilderContext &BC) { |
| // Set up a UAV with structure of a single int |
| unsigned int UAVResourceHandle = |
| static_cast<unsigned int>(BC.DM.GetUAVs().size()); |
| SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(BC.Ctx)}; |
| llvm::StructType *UAVStructTy = |
| llvm::StructType::create(Elements, "PIX_DebugUAV_Type"); |
| std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>(); |
| pUAV->SetGlobalName("PIX_DebugUAVName"); |
| pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo())); |
| pUAV->SetID(UAVResourceHandle); |
| pUAV->SetSpaceID( |
| (unsigned int)-2); // This is the reserved-for-tools register space |
| pUAV->SetSampleCount(1); |
| pUAV->SetGloballyCoherent(false); |
| pUAV->SetHasCounter(false); |
| pUAV->SetCompType(CompType::getI32()); |
| pUAV->SetLowerBound(0); |
| pUAV->SetRangeSize(1); |
| pUAV->SetKind(DXIL::ResourceKind::RawBuffer); |
| pUAV->SetRW(true); |
| |
| auto ID = BC.DM.AddUAV(std::move(pUAV)); |
| assert(ID == UAVResourceHandle); |
| |
| BC.DM.m_ShaderFlags.SetEnableRawAndStructuredBuffers(true); |
| |
| // Create handle for the newly-added UAV |
| Function *CreateHandleOpFunc = |
| BC.HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(BC.Ctx)); |
| Constant *CreateHandleOpcodeArg = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle); |
| Constant *UAVVArg = BC.HlslOP->GetI8Const( |
| static_cast<std::underlying_type<DxilResourceBase::Class>::type>( |
| DXIL::ResourceClass::UAV)); |
| Constant *MetaDataArg = BC.HlslOP->GetU32Const( |
| ID); // position of the metadata record in the corresponding metadata list |
| Constant *IndexArg = BC.HlslOP->GetU32Const(0); // |
| Constant *FalseArg = |
| BC.HlslOP->GetI1Const(0); // non-uniform resource index: false |
| m_HandleForUAV = BC.Builder.CreateCall( |
| CreateHandleOpFunc, |
| {CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg}, |
| "PIX_DebugUAV_Handle"); |
| } |
| |
| void DxilDebugInstrumentation::addInvocationSelectionProlog( |
| BuilderContext &BC, SystemValueIndices SVIndices) { |
| auto ShaderModel = BC.DM.GetShaderModel(); |
| |
| Value *ParameterTestResult = nullptr; |
| switch (ShaderModel->GetKind()) { |
| case DXIL::ShaderKind::Compute: |
| case DXIL::ShaderKind::Amplification: |
| case DXIL::ShaderKind::Mesh: |
| ParameterTestResult = addDispatchedShaderProlog(BC); |
| break; |
| case DXIL::ShaderKind::Geometry: |
| ParameterTestResult = addGeometryShaderProlog(BC, SVIndices); |
| break; |
| case DXIL::ShaderKind::Vertex: |
| ParameterTestResult = addVertexShaderProlog(BC, SVIndices); |
| break; |
| case DXIL::ShaderKind::Pixel: |
| ParameterTestResult = addPixelShaderProlog(BC, SVIndices); |
| break; |
| default: |
| assert(false); // guaranteed by runOnModule |
| } |
| |
| // This is a convenient place to calculate the values that modify the UAV |
| // offset for invocations of interest and for UAV size. |
| m_OffsetMultiplicand = |
| BC.Builder.CreateCast(Instruction::CastOps::ZExt, ParameterTestResult, |
| Type::getInt32Ty(BC.Ctx), "OffsetMultiplicand"); |
| auto InverseOffsetMultiplicand = |
| BC.Builder.CreateSub(BC.HlslOP->GetU32Const(1), m_OffsetMultiplicand, |
| "ComplementOfMultiplicand"); |
| m_OffsetAddend = |
| BC.Builder.CreateMul(BC.HlslOP->GetU32Const(UAVDumpingGroundOffset()), |
| InverseOffsetMultiplicand, "OffsetAddend"); |
| m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1); |
| |
| m_CounterOffset = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() + CounterOffsetBeyondUsefulData); |
| |
| m_SelectionCriterion = ParameterTestResult; |
| } |
| |
| void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC, |
| uint32_t SpaceInBytes) { |
| assert(m_CurrentIndex == nullptr); |
| assert(m_RemainingReservedSpaceInBytes == 0); |
| |
| m_RemainingReservedSpaceInBytes = SpaceInBytes; |
| |
| // Insert the UAV increment instruction: |
| Function *AtomicOpFunc = |
| BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx)); |
| Constant *AtomicBinOpcode = |
| BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp); |
| Constant *AtomicAdd = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add); |
| UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); |
| |
| // so inc will be zero for uninteresting invocations: |
| Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes); |
| Value *IncrementForThisInvocation = BC.Builder.CreateMul( |
| Increment, m_OffsetMultiplicand, "IncrementForThisInvocation"); |
| |
| auto PreviousValue = BC.Builder.CreateCall( |
| AtomicOpFunc, |
| { |
| AtomicBinOpcode, // i32, ; opcode |
| m_HandleForUAV, // %dx.types.Handle, ; resource handle |
| AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, |
| // XOR, IMIN, IMAX, UMIN, UMAX |
| m_CounterOffset, // i32, ; coordinate c0: index in bytes |
| UndefArg, // i32, ; coordinate c1 (unused) |
| UndefArg, // i32, ; coordinate c2 (unused) |
| IncrementForThisInvocation, // i32); increment value |
| }, |
| "UAVIncResult"); |
| |
| if (m_InvocationId == nullptr) { |
| m_InvocationId = PreviousValue; |
| } |
| |
| auto MaskedForLimit = |
| BC.Builder.CreateAnd(PreviousValue, m_OffsetMask, "MaskedForUAVLimit"); |
| // The return value will either end up being itself (multiplied by one and |
| // added with zero) or the "dump uninteresting things here" value of (UAVSize |
| // - a bit). |
| auto MultipliedForInterest = BC.Builder.CreateMul( |
| MaskedForLimit, m_OffsetMultiplicand, "MultipliedForInterest"); |
| auto AddedForInterest = BC.Builder.CreateAdd( |
| MultipliedForInterest, m_OffsetAddend, "AddedForInterest"); |
| m_CurrentIndex = AddedForInterest; |
| } |
| |
| void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC, |
| Value *TheValue) { |
| assert(m_RemainingReservedSpaceInBytes > 0); |
| |
| auto TheValueTypeID = TheValue->getType()->getTypeID(); |
| if (TheValueTypeID == Type::TypeID::DoubleTyID) { |
| Function *SplitDouble = |
| BC.HlslOP->GetOpFunc(OP::OpCode::SplitDouble, TheValue->getType()); |
| Constant *SplitDoubleOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::SplitDouble); |
| auto SplitDoubleIntruction = BC.Builder.CreateCall( |
| SplitDouble, {SplitDoubleOpcode, TheValue}, "SplitDouble"); |
| auto LowBits = |
| BC.Builder.CreateExtractValue(SplitDoubleIntruction, 0, "LowBits"); |
| auto HighBits = |
| BC.Builder.CreateExtractValue(SplitDoubleIntruction, 1, "HighBits"); |
| // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding |
| addDebugEntryValue(BC, LowBits); |
| addDebugEntryValue(BC, HighBits); |
| } else if (TheValueTypeID == Type::TypeID::IntegerTyID && |
| TheValue->getType()->getIntegerBitWidth() == 64) { |
| auto LowBits = |
| BC.Builder.CreateTrunc(TheValue, Type::getInt32Ty(BC.Ctx), "LowBits"); |
| auto ShiftedBits = BC.Builder.CreateLShr(TheValue, 32, "ShiftedBits"); |
| auto HighBits = BC.Builder.CreateTrunc( |
| ShiftedBits, Type::getInt32Ty(BC.Ctx), "HighBits"); |
| // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding |
| addDebugEntryValue(BC, LowBits); |
| addDebugEntryValue(BC, HighBits); |
| } else if (TheValueTypeID == Type::TypeID::IntegerTyID && |
| (TheValue->getType()->getIntegerBitWidth() == 16 || |
| TheValue->getType()->getIntegerBitWidth() == 1)) { |
| auto As32 = |
| BC.Builder.CreateZExt(TheValue, Type::getInt32Ty(BC.Ctx), "As32"); |
| addDebugEntryValue(BC, As32); |
| } else if (TheValueTypeID == Type::TypeID::HalfTyID) { |
| auto AsFloat = |
| BC.Builder.CreateFPCast(TheValue, Type::getFloatTy(BC.Ctx), "AsFloat"); |
| addDebugEntryValue(BC, AsFloat); |
| } else { |
| Function *StoreValue = |
| BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, |
| TheValue->getType()); // Type::getInt32Ty(BC.Ctx)); |
| Constant *StoreValueOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore); |
| UndefValue *Undef32Arg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); |
| UndefValue *UndefArg = nullptr; |
| if (TheValueTypeID == Type::TypeID::IntegerTyID) { |
| UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); |
| } else if (TheValueTypeID == Type::TypeID::FloatTyID) { |
| UndefArg = UndefValue::get(Type::getFloatTy(BC.Ctx)); |
| } else { |
| // The above are the only two valid types for a UAV store |
| assert(false); |
| } |
| Constant *WriteMask_X = BC.HlslOP->GetI8Const(1); |
| (void)BC.Builder.CreateCall( |
| StoreValue, {StoreValueOpcode, // i32 opcode |
| m_HandleForUAV, // %dx.types.Handle, ; resource handle |
| m_CurrentIndex, // i32 c0: index in bytes into UAV |
| Undef32Arg, // i32 c1: unused |
| TheValue, |
| UndefArg, // unused values |
| UndefArg, // unused values |
| UndefArg, // unused values |
| WriteMask_X}); |
| |
| m_RemainingReservedSpaceInBytes -= 4; |
| assert(m_RemainingReservedSpaceInBytes < 1024); // check for underflow |
| |
| if (m_RemainingReservedSpaceInBytes != 0) { |
| m_CurrentIndex = |
| BC.Builder.CreateAdd(m_CurrentIndex, BC.HlslOP->GetU32Const(4)); |
| } else { |
| m_CurrentIndex = nullptr; |
| } |
| } |
| } |
| |
| void DxilDebugInstrumentation::addInvocationStartMarker(BuilderContext &BC) { |
| DebugShaderModifierRecordHeader marker{{{0, 0, 0, 0}}, 0}; |
| reserveDebugEntrySpace(BC, sizeof(marker)); |
| |
| marker.Header.Details.SizeDwords = |
| DebugShaderModifierRecordPayloadSizeDwords(sizeof(marker)); |
| ; |
| marker.Header.Details.Flags = 0; |
| marker.Header.Details.Type = |
| DebugShaderModifierRecordTypeInvocationStartMarker; |
| addDebugEntryValue(BC, BC.HlslOP->GetU32Const(marker.Header.u32Header)); |
| addDebugEntryValue(BC, m_InvocationId); |
| } |
| |
| template <typename ReturnType> |
| void DxilDebugInstrumentation::addStepEntryForType( |
| DebugShaderModifierRecordType RecordType, BuilderContext &BC, |
| std::uint32_t InstNum, Value *V, std::uint32_t ValueOrdinal, |
| Value *ValueOrdinalIndex) { |
| DebugShaderModifierRecordDXILStep<ReturnType> step = {}; |
| reserveDebugEntrySpace(BC, sizeof(step)); |
| |
| step.Header.Details.SizeDwords = |
| DebugShaderModifierRecordPayloadSizeDwords(sizeof(step)); |
| step.Header.Details.Type = static_cast<uint8_t>(RecordType); |
| addDebugEntryValue(BC, BC.HlslOP->GetU32Const(step.Header.u32Header)); |
| addDebugEntryValue(BC, m_InvocationId); |
| addDebugEntryValue(BC, BC.HlslOP->GetU32Const(InstNum)); |
| |
| if (RecordType != DebugShaderModifierRecordTypeDXILStepVoid) { |
| addDebugEntryValue(BC, V); |
| |
| IRBuilder<> &B = BC.Builder; |
| |
| Value *VO = BC.HlslOP->GetU32Const(ValueOrdinal << 16); |
| Value *VOI = B.CreateAnd(ValueOrdinalIndex, BC.HlslOP->GetU32Const(0xFFFF), |
| "ValueOrdinalIndex"); |
| Value *EncodedValueOrdinalAndIndex = |
| BC.Builder.CreateOr(VO, VOI, "ValueOrdinal"); |
| addDebugEntryValue(BC, EncodedValueOrdinalAndIndex); |
| } |
| } |
| |
| void DxilDebugInstrumentation::addStoreStepDebugEntry(BuilderContext &BC, |
| StoreInst *Inst) { |
| std::uint32_t ValueOrdinalBase; |
| std::uint32_t UnusedValueOrdinalSize; |
| llvm::Value *ValueOrdinalIndex; |
| if (!pix_dxil::PixAllocaRegWrite::FromInst(Inst, &ValueOrdinalBase, |
| &UnusedValueOrdinalSize, |
| &ValueOrdinalIndex)) { |
| return; |
| } |
| |
| std::uint32_t InstNum; |
| if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) { |
| return; |
| } |
| |
| addStepDebugEntryValue(BC, InstNum, Inst->getValueOperand(), ValueOrdinalBase, |
| ValueOrdinalIndex); |
| } |
| |
| void DxilDebugInstrumentation::addStepDebugEntry(BuilderContext &BC, |
| Instruction *Inst) { |
| if (Inst->getOpcode() == Instruction::OtherOps::PHI) { |
| return; |
| } |
| |
| if (auto *St = llvm::dyn_cast<llvm::StoreInst>(Inst)) { |
| addStoreStepDebugEntry(BC, St); |
| return; |
| } |
| |
| std::uint32_t RegNum; |
| if (!pix_dxil::PixDxilReg::FromInst(Inst, &RegNum)) { |
| return; |
| } |
| |
| std::uint32_t InstNum; |
| if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) { |
| return; |
| } |
| |
| addStepDebugEntryValue(BC, InstNum, Inst, RegNum, BC.Builder.getInt32(0)); |
| } |
| |
| void DxilDebugInstrumentation::addStepDebugEntryValue( |
| BuilderContext &BC, std::uint32_t InstNum, Value *V, |
| std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex) { |
| const Type::TypeID ID = V->getType()->getTypeID(); |
| |
| switch (ID) { |
| case Type::TypeID::StructTyID: |
| case Type::TypeID::VoidTyID: |
| addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepVoid, BC, |
| InstNum, V, ValueOrdinal, ValueOrdinalIndex); |
| break; |
| case Type::TypeID::FloatTyID: |
| addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC, |
| InstNum, V, ValueOrdinal, ValueOrdinalIndex); |
| break; |
| case Type::TypeID::IntegerTyID: |
| if (V->getType()->getIntegerBitWidth() == 64) { |
| addStepEntryForType<uint64_t>(DebugShaderModifierRecordTypeDXILStepUint64, |
| BC, InstNum, V, ValueOrdinal, |
| ValueOrdinalIndex); |
| } else { |
| addStepEntryForType<uint32_t>(DebugShaderModifierRecordTypeDXILStepUint32, |
| BC, InstNum, V, ValueOrdinal, |
| ValueOrdinalIndex); |
| } |
| break; |
| case Type::TypeID::DoubleTyID: |
| addStepEntryForType<double>(DebugShaderModifierRecordTypeDXILStepDouble, BC, |
| InstNum, V, ValueOrdinal, ValueOrdinalIndex); |
| break; |
| case Type::TypeID::HalfTyID: |
| addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, BC, |
| InstNum, V, ValueOrdinal, ValueOrdinalIndex); |
| break; |
| case Type::TypeID::PointerTyID: |
| // Skip pointer calculation instructions. They aren't particularly |
| // meaningful to the user (being a mere implementation detail for lookup |
| // tables, etc.), and their type is problematic from a UI point of view. The |
| // subsequent instructions that dereference the pointer will be properly |
| // instrumented and show the (meaningful) retrieved value. |
| break; |
| case Type::TypeID::FP128TyID: |
| case Type::TypeID::LabelTyID: |
| case Type::TypeID::MetadataTyID: |
| case Type::TypeID::FunctionTyID: |
| case Type::TypeID::ArrayTyID: |
| case Type::TypeID::VectorTyID: |
| case Type::TypeID::X86_FP80TyID: |
| case Type::TypeID::X86_MMXTyID: |
| case Type::TypeID::PPC_FP128TyID: |
| assert(false); |
| } |
| } |
| |
| bool DxilDebugInstrumentation::runOnModule(Module &M) { |
| DxilModule &DM = M.GetOrCreateDxilModule(); |
| LLVMContext &Ctx = M.getContext(); |
| OP *HlslOP = DM.GetOP(); |
| |
| auto ShaderModel = DM.GetShaderModel(); |
| switch (ShaderModel->GetKind()) { |
| case DXIL::ShaderKind::Amplification: |
| case DXIL::ShaderKind::Mesh: |
| case DXIL::ShaderKind::Vertex: |
| case DXIL::ShaderKind::Geometry: |
| case DXIL::ShaderKind::Pixel: |
| case DXIL::ShaderKind::Compute: |
| break; |
| default: |
| return false; |
| } |
| |
| // First record pointers to all instructions in the function: |
| std::vector<Instruction *> AllInstructions; |
| for (inst_iterator I = inst_begin(DM.GetEntryFunction()), |
| E = inst_end(DM.GetEntryFunction()); |
| I != E; ++I) { |
| AllInstructions.push_back(&*I); |
| } |
| |
| // Branchless instrumentation requires taking care of a few things: |
| // -Each invocation of the shader will be either of interest or not of |
| // interest |
| // -If of interest, the offset into the output UAV will be as expected |
| // -If not, the offset is forced to (UAVsize) - (Small Amount), and that |
| // output is ignored by the CPU-side code. |
| // -The invocation of interest may overflow the UAV. This is handled by taking |
| // the modulus of the |
| // output index. Overflow is then detected on the CPU side by checking for |
| // the presence of a canary value at (UAVSize) - (Small Amount) * 2 (which is |
| // actually a conservative definition of overflow). |
| // |
| |
| Instruction *firstInsertionPt = |
| dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction()); |
| IRBuilder<> Builder(firstInsertionPt); |
| |
| BuilderContext BC{M, DM, Ctx, HlslOP, Builder}; |
| |
| addUAV(BC); |
| auto SystemValues = addRequiredSystemValues(BC); |
| addInvocationSelectionProlog(BC, SystemValues); |
| addInvocationStartMarker(BC); |
| |
| // Explicitly name new blocks in order to provide stable names for testing purposes |
| int NewBlockCounter = 0; |
| |
| auto Fn = DM.GetEntryFunction(); |
| auto &Blocks = Fn->getBasicBlockList(); |
| for (auto &CurrentBlock : Blocks) { |
| struct ValueAndPhi { |
| Value *Val; |
| PHINode *Phi; |
| unsigned Index; |
| }; |
| |
| std::map<BasicBlock *, std::vector<ValueAndPhi>> InsertableEdges; |
| |
| auto &Is = CurrentBlock.getInstList(); |
| for (auto &Inst : Is) { |
| if (Inst.getOpcode() != Instruction::OtherOps::PHI) { |
| break; |
| } |
| PHINode &PN = llvm::cast<PHINode>(Inst); |
| for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { |
| BasicBlock *PhiBB = PN.getIncomingBlock(i); |
| Value *PhiVal = PN.getIncomingValue(i); |
| InsertableEdges[PhiBB].push_back({PhiVal, &PN, i}); |
| } |
| } |
| |
| for (auto &InsertableEdge : InsertableEdges) { |
| auto *NewBlock = BasicBlock::Create(Ctx, "PIXDebug" + std::to_string(NewBlockCounter++), |
| InsertableEdge.first->getParent()); |
| IRBuilder<> Builder(NewBlock); |
| |
| auto *PreviousBlock = InsertableEdge.first; |
| |
| // Modify all successor operands of the terminator in the previous block |
| // that match the current block to point to the new block: |
| TerminatorInst *terminator = PreviousBlock->getTerminator(); |
| unsigned NumSuccessors = terminator->getNumSuccessors(); |
| for (unsigned SuccessorIndex = 0; SuccessorIndex < NumSuccessors; |
| ++SuccessorIndex) { |
| auto *CurrentSuccessor = terminator->getSuccessor(SuccessorIndex); |
| if (CurrentSuccessor == &CurrentBlock) { |
| terminator->setSuccessor(SuccessorIndex, NewBlock); |
| } |
| } |
| |
| // Modify the Phis and add debug instrumentation |
| for (auto &ValueNPhi : InsertableEdge.second) { |
| // Modify the phi to refer to the new block: |
| ValueNPhi.Phi->setIncomingBlock(ValueNPhi.Index, NewBlock); |
| |
| // Add instrumentation to the new block |
| std::uint32_t RegNum; |
| if (!pix_dxil::PixDxilReg::FromInst(ValueNPhi.Phi, &RegNum)) { |
| continue; |
| } |
| |
| std::uint32_t InstNum; |
| if (!pix_dxil::PixDxilInstNum::FromInst(ValueNPhi.Phi, &InstNum)) { |
| continue; |
| } |
| |
| BuilderContext BC{M, DM, Ctx, HlslOP, Builder}; |
| addStepDebugEntryValue(BC, InstNum, ValueNPhi.Val, RegNum, |
| BC.Builder.getInt32(0)); |
| } |
| |
| // Add a branch to the new block to point to the current block |
| Builder.CreateBr(&CurrentBlock); |
| } |
| } |
| |
| // Instrument original instructions: |
| for (auto &Inst : AllInstructions) { |
| // Instrumentation goes after the instruction if it is not a terminator. |
| // Otherwise, Instrumentation goes prior to the instruction. |
| if (!Inst->isTerminator()) { |
| IRBuilder<> Builder(Inst->getNextNode()); |
| BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder}; |
| addStepDebugEntry(BC2, Inst); |
| } else { |
| // Insert before this instruction |
| IRBuilder<> Builder(Inst); |
| BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder}; |
| addStepDebugEntry(BC2, Inst); |
| } |
| } |
| |
| DM.ReEmitDxilResources(); |
| |
| return true; |
| } |
| |
| char DxilDebugInstrumentation::ID = 0; |
| |
| ModulePass *llvm::createDxilDebugInstrumentationPass() { |
| return new DxilDebugInstrumentation(); |
| } |
| |
| INITIALIZE_PASS(DxilDebugInstrumentation, "hlsl-dxil-debug-instrumentation", |
| "HLSL DXIL debug instrumentation for PIX", false, false) |