Adding constant storage size estimate to stream statistics. (#13885)
This is an estimate of the on-disk storage required for the constants.
On CPUs this may be mapped memory and on GPUs (and some CPUs) this may
be wired memory.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
index fc91aae..4ca8308 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
@@ -1194,6 +1194,9 @@
case IREE::HAL::CallingConvention::ParameterStruct:
return LLVM::LLVMFunctionType::get(LLVM::LLVMVoidType::get(context),
LLVM::LLVMPointerType::get(context));
+ default:
+ llvm_unreachable("unhandled calling convention");
+ return failure();
}
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/DumpStatistics.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/DumpStatistics.cpp
index c302906..6b66eea 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/DumpStatistics.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/DumpStatistics.cpp
@@ -39,6 +39,9 @@
struct UsageInfo {
// util.globals holding resources mapped by name.
llvm::MapVector<StringRef, IREE::Util::GlobalOp> resourceGlobalOps;
+ // util.buffer.constants that are (for the most part) going to end up in the
+ // final binary.
+ SmallVector<IREE::Util::BufferConstantOp> bufferConstantOps;
// stream.executable ops mapped by name.
llvm::MapVector<StringRef, IREE::Stream::ExecutableOp> executableOps;
@@ -68,6 +71,8 @@
for (auto funcLikeOp : moduleOp.getOps<FunctionOpInterface>()) {
funcLikeOp.walk([&](Operation *op) {
TypeSwitch<Operation *>(op)
+ .Case<IREE::Util::BufferConstantOp>(
+ [&](auto op) { bufferConstantOps.push_back(op); })
.Case<IREE::Stream::ResourceAllocaOp>(
[&](auto op) { allocaOps.push_back(op); })
.Case<IREE::Stream::CmdExecuteOp>(
@@ -120,11 +125,11 @@
void analyze(const UsageInfo &usageInfo) {
// Globals:
- for (auto it : usageInfo.resourceGlobalOps) {
+ for (auto [name, globalOp] : usageInfo.resourceGlobalOps) {
auto globalType =
- llvm::dyn_cast<IREE::Stream::ResourceType>(it.second.getType());
+ llvm::dyn_cast<IREE::Stream::ResourceType>(globalOp.getType());
if (!globalType) continue;
- // TODO(benvanik): analyze size in UsageInfo.
+ // TODO(benvanik): analyze size in UsageInfo where possible.
switch (globalType.getLifetime()) {
case IREE::Stream::Lifetime::Constant:
++constantCount;
@@ -136,6 +141,13 @@
continue;
}
}
+ for (auto constantOp : usageInfo.bufferConstantOps) {
+ if (auto serializableAttr =
+ constantOp.getValue()
+ .dyn_cast<IREE::Util::SerializableAttrInterface>()) {
+ constantSize += serializableAttr.getStorageSize();
+ }
+ }
// Synchronization:
awaitCount = usageInfo.awaitOps.size();
@@ -215,13 +227,15 @@
stats.analyze(usageInfo);
os << llvm::formatv("// Constants: {0}, ", stats.constantCount);
- os << llvm::formatv(
- "{0}{1} B ({2:F2} MiB)\n", stats.constantSizeDynamic ? "minimum " : "",
- stats.constantSize, stats.constantSize / (1 * 1024 * 1024.0f));
+ os << llvm::formatv("estimated storage of {0}{1} B ({2:F2} MiB)\n",
+ stats.constantSizeDynamic ? "minimum " : "",
+ stats.constantSize,
+ stats.constantSize / (1 * 1024 * 1024.0f));
os << llvm::formatv("// Variables: {0}, ", stats.variableCount);
- os << llvm::formatv(
- "{0}{1} B ({2:F2} MiB)\n", stats.variableSizeDynamic ? "minimum " : "",
- stats.variableSize, stats.variableSize / (1 * 1024 * 1024.0f));
+ os << llvm::formatv("(TBD) {0}{1} B ({2:F2} MiB)\n",
+ stats.variableSizeDynamic ? "minimum " : "",
+ stats.variableSize,
+ stats.variableSize / (1 * 1024 * 1024.0f));
os << llvm::formatv("// D->H Syncs: {0}\n", stats.awaitCount);
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/dump_statistics.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/dump_statistics.mlir
index 60a2150..ce9fcd2 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/dump_statistics.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/dump_statistics.mlir
@@ -2,8 +2,8 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(iree-stream-dump-statistics{output-format=csv})" %s 2>&1 | FileCheck %s --check-prefix=CHECK-CSV
// CHECK-PRETTY: Aggregate Statistics
-// CHECK-PRETTY: Constants: 1, 0 B
-// CHECK-PRETTY: Variables: 0, 0 B
+// CHECK-PRETTY: Constants: 1, estimated storage of 192 B
+// CHECK-PRETTY: Variables: 0, (TBD)
// CHECK-PRETTY: D->H Syncs: 2
// CHECK-PRETTY: Submissions: 3, using cumulative 0 B
// CHECK-PRETTY: DMA Fills: 0
@@ -14,7 +14,7 @@
// CHECK-CSV: ; Aggregate Statistics
// CHECK-CSV: "Constants","Constant Size","Variables","Variable Size","Awaits","Submissions","Transient Size","Fills","Copies","Dispatches","Async Calls","Executables"
-// CHECK-CSV: 1,0,0,0,2,3,0,0,2,3,0,2
+// CHECK-CSV: 1,192,0,0,2,3,0,0,2,3,0,2
// CHECK-CSV: ; Execution
// CHECK-CSV: "Depth","Command","Symbol","Length","Invocations","Workload","Operands","Resources"
// CHECK-CSV: 0,"copy",,192,,,,
diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeCHLO.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeCHLO.cpp
index 81941b2..317e55e 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeCHLO.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeCHLO.cpp
@@ -864,14 +864,14 @@
"expect f32 element type");
const double kMaxlog = 88.72283905206835;
const float kErfcPCoefficients[] = {
- +2.326819970068386E-2, -1.387039388740657E-1, +3.687424674597105E-1,
- -5.824733027278666E-1, +6.210004621745983E-1, -4.944515323274145E-1,
- +3.404879937665872E-1, -2.741127028184656E-1, +5.638259427386472E-1,
+ +2.326819970068386E-2f, -1.387039388740657E-1f, +3.687424674597105E-1f,
+ -5.824733027278666E-1f, +6.210004621745983E-1f, -4.944515323274145E-1f,
+ +3.404879937665872E-1f, -2.741127028184656E-1f, +5.638259427386472E-1f,
};
const float kErfcRCoefficients[] = {
- -1.047766399936249E+1, +1.297719955372516E+1, -7.495518717768503E+0,
- +2.921019019210786E+0, -1.015265279202700E+0, +4.218463358204948E-1,
- -2.820767439740514E-1, +5.641895067754075E-1,
+ -1.047766399936249E+1f, +1.297719955372516E+1f, -7.495518717768503E+0f,
+ +2.921019019210786E+0f, -1.015265279202700E+0f, +4.218463358204948E-1f,
+ -2.820767439740514E-1f, +5.641895067754075E-1f,
};
// Let z = -x^2.
@@ -929,9 +929,9 @@
assert(x.getType().cast<ShapedType>().getElementType().isF32() &&
"expect f32 element type");
const float kErfTCoefficients[] = {
- +7.853861353153693E-5, -8.010193625184903E-4, +5.188327685732524E-3,
- -2.685381193529856E-2, +1.128358514861418E-1, -3.761262582423300E-1,
- +1.128379165726710E+0,
+ +7.853861353153693E-5f, -8.010193625184903E-4f, +5.188327685732524E-3f,
+ -2.685381193529856E-2f, +1.128358514861418E-1f, -3.761262582423300E-1f,
+ +1.128379165726710E+0f,
};
// Materialize polynomial approximation for |x| <= 1 as