Improving stream partitioning debug output speed by like 10000x. (#11067)
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.cpp b/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.cpp index f54b670..c6b7593 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.cpp
@@ -20,7 +20,7 @@ #ifndef NDEBUG -void dumpPartition(Partition &partition, AsmState &state) { +void dumpPartition(Partition &partition, AsmState &asmState) { if (partition.affinity) { llvm::dbgs() << " AFFINITY: "; partition.affinity.dump(); @@ -28,36 +28,32 @@ } llvm::dbgs() << " INS:\n "; llvm::interleaveComma(partition.ins, llvm::dbgs(), [&](Value in) { - in.printAsOperand(llvm::dbgs(), state); + in.printAsOperand(llvm::dbgs(), asmState); }); llvm::dbgs() << "\n OUTS:\n "; llvm::interleaveComma(partition.outs, llvm::dbgs(), [&](Value out) { - out.printAsOperand(llvm::dbgs(), state); + out.printAsOperand(llvm::dbgs(), asmState); }); llvm::dbgs() << "\n OPS:\n"; for (auto *op : llvm::reverse(partition.ops)) { llvm::dbgs() << " "; - op->print(llvm::dbgs(), state); + op->print(llvm::dbgs(), asmState); llvm::dbgs() << "\n"; } } -void Partition::dump(Operation *parentOp) { - AsmState state(parentOp); - dumpPartition(*this, state); -} +void Partition::dump(AsmState &asmState) { dumpPartition(*this, asmState); } -void PartitionSet::dump(Operation *parentOp) { - AsmState state(parentOp); +void PartitionSet::dump(AsmState &asmState) { for (auto partition : llvm::enumerate(partitions)) { llvm::dbgs() << "PARTITION[" << partition.index() << "]:\n"; - dumpPartition(partition.value(), state); + dumpPartition(partition.value(), asmState); } } #else -void Partition::dump(Operation *parentOp) {} -void PartitionSet::dump(Operation *parentOp) {} +void Partition::dump(AsmState &asmState) {} +void PartitionSet::dump(AsmState &asmState) {} #endif // !NDEBUG LogicalResult Partition::verify(Location loc) {
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.h b/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.h index fee0047..197c0d0 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.h +++ b/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning.h
@@ -37,7 +37,7 @@ // streamable (such as constants and arithmetic). SetVector<Operation *> ops; - void dump(Operation *parentOp); + void dump(AsmState &asmState); // Verifies that the partition meets the required conditions. LogicalResult verify(Location loc); @@ -53,7 +53,7 @@ // Returns true if the set is empty (no streamable ops). bool empty() const { return partitions.empty(); } - void dump(Operation *parentOp); + void dump(AsmState &asmState); // Verifies that the partition set meets the required conditions. LogicalResult verify(Location loc);
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning/ReferencePartitioning.cpp b/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning/ReferencePartitioning.cpp index f687d5e..7e49fb8 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning/ReferencePartitioning.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Analysis/Partitioning/ReferencePartitioning.cpp
@@ -9,6 +9,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" +#include "mlir/IR/AsmState.h" #include "mlir/IR/PatternMatch.h" #define DEBUG_TYPE "iree-stream-partitioning" @@ -18,6 +19,24 @@ namespace IREE { namespace Stream { +// Returns an AsmState at the ancestor to |block| that is isolated from above. +// Returns nullptr if debug dumps of partitioning is disabled. +static std::unique_ptr<AsmState> getRootAsmState(Block *block) { + LLVM_DEBUG({ + auto *rootOp = block->getParentOp(); + while (auto parentOp = rootOp->getParentOp()) { + if (!isa<IREE::Stream::TimelineOpInterface>(parentOp) && + parentOp->hasTrait<OpTrait::IsIsolatedFromAbove>()) { + rootOp = parentOp; + break; + } + rootOp = parentOp; + } + return std::make_unique<AsmState>(rootOp); + }); + return nullptr; +} + // This is terrible. See Stream/Analysis/Partition.h for a description of what // a real implementation would do. We want cost modeling for tie breakers when // an op could be in multiple partitions, cloning for ops that are not worth @@ -53,6 +72,8 @@ }; DenseMap<Operation *, OpInfo> opInfos; + auto asmState = getRootAsmState(block); + for (auto &op : llvm::reverse(*block)) { // Skip constants; they just add noise (and since they are heavily CSE'd // they have lots of users to test). @@ -65,7 +86,7 @@ if (!mlir::wouldOpBeTriviallyDead(&op)) { LLVM_DEBUG({ llvm::dbgs() << "Side-effecting op forcing flush and freeze:\n"; - op.dump(); + op.print(llvm::dbgs(), *asmState); }); usableBuilders.reset(); } @@ -88,7 +109,7 @@ LLVM_DEBUG({ llvm::dbgs() << "====\nPartitioning op:\n"; - op.dump(); + op.print(llvm::dbgs(), *asmState); }); // Set bits for each partition this op may be able to be placed into. @@ -99,7 +120,7 @@ auto &userInfo = opInfos[user]; LLVM_DEBUG({ llvm::dbgs() << "Testing user:\n"; - user->dump(); + user->print(llvm::dbgs(), *asmState); for (auto membershipOrdinal : userInfo.membership.set_bits()) { llvm::dbgs() << " member of partition " << membershipOrdinal << "\n"; } @@ -230,7 +251,7 @@ partitionSet.partitions.push_back(std::move(partition)); } - LLVM_DEBUG(partitionSet.dump(block->getParentOp())); + LLVM_DEBUG(partitionSet.dump(*asmState)); return partitionSet; } @@ -243,7 +264,7 @@ auto favor = config.getFavor().getValue(); if (favor == IREE::Stream::Favor::Debug) { - // Disable partitioning when favoring debugability. + // Disable partitioning when favoring debuggability. return waveSet; } @@ -262,6 +283,8 @@ }; DenseMap<Operation *, OpInfo> opInfos; + auto asmState = getRootAsmState(block); + for (auto &op : llvm::reverse(*block)) { // Skip constants; they just add noise (and since they are heavily CSE'd // they have lots of users to test). @@ -281,7 +304,7 @@ LLVM_DEBUG({ llvm::dbgs() << "====\nPartitioning op:\n"; - op.dump(); + op.print(llvm::dbgs(), *asmState); }); // Set bits for each wave this op may be able to be placed into. @@ -292,7 +315,7 @@ auto &userInfo = opInfos[user]; LLVM_DEBUG({ llvm::dbgs() << "Testing user:\n"; - user->dump(); + user->print(llvm::dbgs(), *asmState); for (auto membershipOrdinal : userInfo.membership.set_bits()) { llvm::dbgs() << " member of wave " << membershipOrdinal << "\n"; } @@ -375,7 +398,7 @@ waveSet.partitions.push_back(std::move(wave)); } - LLVM_DEBUG(waveSet.dump(block->getParentOp())); + LLVM_DEBUG(waveSet.dump(*asmState)); return waveSet; }