Adding constant storage size estimate to stream statistics. (#13885)

This is an estimate of the on-disk storage required for the constants.
On CPUs this may be mapped memory and on GPUs (and some CPUs) this may
be wired memory.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
index fc91aae..4ca8308 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
@@ -1194,6 +1194,9 @@
     case IREE::HAL::CallingConvention::ParameterStruct:
       return LLVM::LLVMFunctionType::get(LLVM::LLVMVoidType::get(context),
                                          LLVM::LLVMPointerType::get(context));
+    default:
+      llvm_unreachable("unhandled calling convention");
+      return failure();
   }
 }
 
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/DumpStatistics.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/DumpStatistics.cpp
index c302906..6b66eea 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/DumpStatistics.cpp
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/DumpStatistics.cpp
@@ -39,6 +39,9 @@
 struct UsageInfo {
   // util.globals holding resources mapped by name.
   llvm::MapVector<StringRef, IREE::Util::GlobalOp> resourceGlobalOps;
+  // util.buffer.constants that are (for the most part) going to end up in the
+  // final binary.
+  SmallVector<IREE::Util::BufferConstantOp> bufferConstantOps;
 
   // stream.executable ops mapped by name.
   llvm::MapVector<StringRef, IREE::Stream::ExecutableOp> executableOps;
@@ -68,6 +71,8 @@
     for (auto funcLikeOp : moduleOp.getOps<FunctionOpInterface>()) {
       funcLikeOp.walk([&](Operation *op) {
         TypeSwitch<Operation *>(op)
+            .Case<IREE::Util::BufferConstantOp>(
+                [&](auto op) { bufferConstantOps.push_back(op); })
             .Case<IREE::Stream::ResourceAllocaOp>(
                 [&](auto op) { allocaOps.push_back(op); })
             .Case<IREE::Stream::CmdExecuteOp>(
@@ -120,11 +125,11 @@
 
   void analyze(const UsageInfo &usageInfo) {
     // Globals:
-    for (auto it : usageInfo.resourceGlobalOps) {
+    for (auto [name, globalOp] : usageInfo.resourceGlobalOps) {
       auto globalType =
-          llvm::dyn_cast<IREE::Stream::ResourceType>(it.second.getType());
+          llvm::dyn_cast<IREE::Stream::ResourceType>(globalOp.getType());
       if (!globalType) continue;
-      // TODO(benvanik): analyze size in UsageInfo.
+      // TODO(benvanik): analyze size in UsageInfo where possible.
       switch (globalType.getLifetime()) {
         case IREE::Stream::Lifetime::Constant:
           ++constantCount;
@@ -136,6 +141,13 @@
           continue;
       }
     }
+    for (auto constantOp : usageInfo.bufferConstantOps) {
+      if (auto serializableAttr =
+              constantOp.getValue()
+                  .dyn_cast<IREE::Util::SerializableAttrInterface>()) {
+        constantSize += serializableAttr.getStorageSize();
+      }
+    }
 
     // Synchronization:
     awaitCount = usageInfo.awaitOps.size();
@@ -215,13 +227,15 @@
   stats.analyze(usageInfo);
 
   os << llvm::formatv("//   Constants: {0}, ", stats.constantCount);
-  os << llvm::formatv(
-      "{0}{1} B ({2:F2} MiB)\n", stats.constantSizeDynamic ? "minimum " : "",
-      stats.constantSize, stats.constantSize / (1 * 1024 * 1024.0f));
+  os << llvm::formatv("estimated storage of {0}{1} B ({2:F2} MiB)\n",
+                      stats.constantSizeDynamic ? "minimum " : "",
+                      stats.constantSize,
+                      stats.constantSize / (1 * 1024 * 1024.0f));
   os << llvm::formatv("//   Variables: {0}, ", stats.variableCount);
-  os << llvm::formatv(
-      "{0}{1} B ({2:F2} MiB)\n", stats.variableSizeDynamic ? "minimum " : "",
-      stats.variableSize, stats.variableSize / (1 * 1024 * 1024.0f));
+  os << llvm::formatv("(TBD) {0}{1} B ({2:F2} MiB)\n",
+                      stats.variableSizeDynamic ? "minimum " : "",
+                      stats.variableSize,
+                      stats.variableSize / (1 * 1024 * 1024.0f));
 
   os << llvm::formatv("//  D->H Syncs: {0}\n", stats.awaitCount);
 
diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/dump_statistics.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/dump_statistics.mlir
index 60a2150..ce9fcd2 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/dump_statistics.mlir
+++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/dump_statistics.mlir
@@ -2,8 +2,8 @@
 // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(iree-stream-dump-statistics{output-format=csv})" %s 2>&1 | FileCheck %s --check-prefix=CHECK-CSV
 
 // CHECK-PRETTY: Aggregate Statistics
-// CHECK-PRETTY:   Constants: 1, 0 B
-// CHECK-PRETTY:   Variables: 0, 0 B
+// CHECK-PRETTY:   Constants: 1, estimated storage of 192 B
+// CHECK-PRETTY:   Variables: 0, (TBD)
 // CHECK-PRETTY:  D->H Syncs: 2
 // CHECK-PRETTY: Submissions: 3, using cumulative 0 B
 // CHECK-PRETTY:   DMA Fills: 0
@@ -14,7 +14,7 @@
 
 // CHECK-CSV: ; Aggregate Statistics
 // CHECK-CSV: "Constants","Constant Size","Variables","Variable Size","Awaits","Submissions","Transient Size","Fills","Copies","Dispatches","Async Calls","Executables"
-// CHECK-CSV: 1,0,0,0,2,3,0,0,2,3,0,2
+// CHECK-CSV: 1,192,0,0,2,3,0,0,2,3,0,2
 // CHECK-CSV: ; Execution
 // CHECK-CSV: "Depth","Command","Symbol","Length","Invocations","Workload","Operands","Resources"
 // CHECK-CSV: 0,"copy",,192,,,,
diff --git a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeCHLO.cpp b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeCHLO.cpp
index 81941b2..317e55e 100644
--- a/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeCHLO.cpp
+++ b/compiler/src/iree/compiler/InputConversion/StableHLO/LegalizeCHLO.cpp
@@ -864,14 +864,14 @@
          "expect f32 element type");
   const double kMaxlog = 88.72283905206835;
   const float kErfcPCoefficients[] = {
-      +2.326819970068386E-2, -1.387039388740657E-1, +3.687424674597105E-1,
-      -5.824733027278666E-1, +6.210004621745983E-1, -4.944515323274145E-1,
-      +3.404879937665872E-1, -2.741127028184656E-1, +5.638259427386472E-1,
+      +2.326819970068386E-2f, -1.387039388740657E-1f, +3.687424674597105E-1f,
+      -5.824733027278666E-1f, +6.210004621745983E-1f, -4.944515323274145E-1f,
+      +3.404879937665872E-1f, -2.741127028184656E-1f, +5.638259427386472E-1f,
   };
   const float kErfcRCoefficients[] = {
-      -1.047766399936249E+1, +1.297719955372516E+1, -7.495518717768503E+0,
-      +2.921019019210786E+0, -1.015265279202700E+0, +4.218463358204948E-1,
-      -2.820767439740514E-1, +5.641895067754075E-1,
+      -1.047766399936249E+1f, +1.297719955372516E+1f, -7.495518717768503E+0f,
+      +2.921019019210786E+0f, -1.015265279202700E+0f, +4.218463358204948E-1f,
+      -2.820767439740514E-1f, +5.641895067754075E-1f,
   };
 
   // Let z = -x^2.
@@ -929,9 +929,9 @@
   assert(x.getType().cast<ShapedType>().getElementType().isF32() &&
          "expect f32 element type");
   const float kErfTCoefficients[] = {
-      +7.853861353153693E-5, -8.010193625184903E-4, +5.188327685732524E-3,
-      -2.685381193529856E-2, +1.128358514861418E-1, -3.761262582423300E-1,
-      +1.128379165726710E+0,
+      +7.853861353153693E-5f, -8.010193625184903E-4f, +5.188327685732524E-3f,
+      -2.685381193529856E-2f, +1.128358514861418E-1f, -3.761262582423300E-1f,
+      +1.128379165726710E+0f,
   };
 
   // Materialize polynomial approximation for |x| <= 1 as