Adding support for outputting binary files from tooling. (#16291)

Now numpy files are only written if `.npy` extensions are used and
otherwise value contents are written directly. Support was added for
writing primitive values and HAL or VM buffers by treating them as
scalar/blob ndarrays for numpy.
diff --git a/tools/test/iree-run-module-outputs.mlir b/tools/test/iree-run-module-outputs.mlir
index c06f67e..5ae3e3a 100644
--- a/tools/test/iree-run-module-outputs.mlir
+++ b/tools/test/iree-run-module-outputs.mlir
@@ -37,9 +37,9 @@
 // RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \
 // RUN:  iree-run-module --device=local-sync --module=- --function=numpy \
 // RUN:                  --output= \
-// RUN:                  --output=@%t \
-// RUN:                  --output=+%t) && \
-// RUN:  "%PYTHON" %S/echo_npy.py %t | \
+// RUN:                  --output=@%t.npy \
+// RUN:                  --output=+%t.npy) && \
+// RUN:  "%PYTHON" %S/echo_npy.py %t.npy | \
 // RUN: FileCheck --check-prefix=OUTPUT-NUMPY %s
 func.func @numpy() -> (i32, tensor<f32>, tensor<?x4xi32>) {
   // Output skipped:
@@ -51,3 +51,31 @@
   %2 = flow.tensor.constant dense<[[0,1,2,3],[4,5,6,7]]> : tensor<2x4xi32> -> tensor<?x4xi32>
   return %0, %1, %2 : i32, tensor<f32>, tensor<?x4xi32>
 }
+
+// -----
+
+// Tests output to binary files by round-tripping the output of a function into
+// another invocation reading from the binary files.
+
+// RUN: (iree-compile --iree-hal-target-backends=vmvx %s -o=%t.vmfb && \
+// RUN:  iree-run-module --device=local-sync \
+// RUN:                  --module=%t.vmfb \
+// RUN:                  --function=write_binary \
+// RUN:                  --output=@%t.0.bin \
+// RUN:                  --output=@%t.1.bin && \
+// RUN:  iree-run-module --device=local-sync \
+// RUN:                  --module=%t.vmfb \
+// RUN:                  --function=echo_binary \
+// RUN:                  --input=f32=@%t.0.bin \
+// RUN:                  --input=2x4xi32=@%t.1.bin) | \
+// RUN: FileCheck --check-prefix=OUTPUT-BINARY %s
+func.func @write_binary() -> (tensor<f32>, tensor<?x4xi32>) {
+  %0 = arith.constant dense<4.0> : tensor<f32>
+  %1 = flow.tensor.constant dense<[[0,1,2,3],[4,5,6,7]]> : tensor<2x4xi32> -> tensor<?x4xi32>
+  return %0, %1 : tensor<f32>, tensor<?x4xi32>
+}
+func.func @echo_binary(%arg0: tensor<f32>, %arg1: tensor<?x4xi32>) -> (tensor<f32>, tensor<?x4xi32>) {
+  // OUTPUT-BINARY{LITERAL}: f32=4
+  // OUTPUT-BINARY{LITERAL}: 2x4xi32=[0 1 2 3][4 5 6 7]
+  return %arg0, %arg1 : tensor<f32>, tensor<?x4xi32>
+}