Merge pull request #8486 from google/benvanik-align-64

Changing default stream alignment to 64B.
diff --git a/iree/compiler/Dialect/Stream/IR/StreamTypes.cpp b/iree/compiler/Dialect/Stream/IR/StreamTypes.cpp
index 921b8eb..8e1bc36 100644
--- a/iree/compiler/Dialect/Stream/IR/StreamTypes.cpp
+++ b/iree/compiler/Dialect/Stream/IR/StreamTypes.cpp
@@ -107,10 +107,14 @@
 ResourceConfigAttr ResourceConfigAttr::getDefaultHostConstraints(
     MLIRContext *context) {
   // Picked to represent what we kind of want on CPU today.
+  // TODO(#8484): properly choose this value based on target devices. We don't
+  // yet have the device information up in stream and thus for targets that have
+  // high alignment requirements (128/256/etc) we are not picking the right
+  // value here.
   uint64_t maxAllocationSize = UINT32_MAX;
-  uint64_t minBufferOffsetAlignment = 32ull;
+  uint64_t minBufferOffsetAlignment = 64ull;
   uint64_t maxBufferRange = UINT32_MAX;
-  uint64_t minBufferRangeAlignment = 32ull;
+  uint64_t minBufferRangeAlignment = 64ull;
   return ResourceConfigAttr::get(context, maxAllocationSize,
                                  minBufferOffsetAlignment, maxBufferRange,
                                  minBufferRangeAlignment);
diff --git a/iree/compiler/Dialect/Stream/Transforms/test/pack_constants.mlir b/iree/compiler/Dialect/Stream/Transforms/test/pack_constants.mlir
index a75a6c7..6c91d54 100644
--- a/iree/compiler/Dialect/Stream/Transforms/test/pack_constants.mlir
+++ b/iree/compiler/Dialect/Stream/Transforms/test/pack_constants.mlir
@@ -4,11 +4,11 @@
 // Subsequent tests focus on individual components.
 
 // Constants get packed into composite attributes.
-//      CHECK: #composite_of_64b = #util.composite<64xi8, [
+//      CHECK: #composite_of_128b = #util.composite<128xi8, [
 // CHECK-NEXT:   dense<100> : tensor<1xi32>,
-// CHECK-NEXT:   dense<0> : vector<28xi8>,
+// CHECK-NEXT:   dense<0> : vector<60xi8>,
 // CHECK-NEXT:   dense<[101, 102]> : tensor<2xi32>,
-// CHECK-NEXT:   dense<0> : vector<24xi8>,
+// CHECK-NEXT:   dense<0> : vector<56xi8>,
 // CHECK-NEXT: ]>
 
 // CHECK-LABEL: @resourceConstants
@@ -17,7 +17,7 @@
   %c8 = arith.constant 8 : index
 
   // Fetch the read-only host data containing the constants.
-  // CHECK: %[[RODATA:.+]] = util.byte_buffer.constant {alignment = 32 : i64} : !util.byte_buffer = #composite_of_64b
+  // CHECK: %[[RODATA:.+]] = util.byte_buffer.constant {alignment = 64 : i64} : !util.byte_buffer = #composite_of_128b
   %0:3 = stream.resource.constants :
     !stream.resource<constant>{%c4} = dense<100> : tensor<1xi32>,
     !stream.resource<constant>{%c8} = dense<[101, 102]> : tensor<2xi32>
@@ -26,25 +26,25 @@
   // Try first to map the memory directly into a usable resource. If this
   // succeeds we are done and can avoid allocation/complete immediately.
   // CHECK: %[[DID_MAP:.+]], %[[TRY_MAP:.+]] = stream.resource.try_map %[[RODATA]][%c0] :
-  // CHECK-SAME: !util.byte_buffer -> i1, !stream.resource<constant>{%c64}
+  // CHECK-SAME: !util.byte_buffer -> i1, !stream.resource<constant>{%c128}
   //      CHECK: %[[IF:.+]]:2 = scf.if %[[DID_MAP]] -> (!stream.resource<constant>, !stream.timepoint) {
   // CHECK-NEXT:   %[[IMMEDIATE:.+]] = stream.timepoint.immediate => !stream.timepoint
   // CHECK-NEXT:   scf.yield %[[TRY_MAP]], %[[IMMEDIATE]]
   // CHECK-NEXT: } else {
 
   // If the mapping fails we need to perform an upload via a staging buffer.
-  // CHECK: %[[STAGING:.+]] = stream.resource.map %[[RODATA]][%c0] : !util.byte_buffer -> !stream.resource<staging>{%c64}
-  // CHECK: %[[ALLOC:.+]] = stream.resource.alloc uninitialized : !stream.resource<constant>{%c64}
+  // CHECK: %[[STAGING:.+]] = stream.resource.map %[[RODATA]][%c0] : !util.byte_buffer -> !stream.resource<staging>{%c128}
+  // CHECK: %[[ALLOC:.+]] = stream.resource.alloc uninitialized : !stream.resource<constant>{%c128}
   // CHECK: %[[EXEC_TIMEPOINT:.+]] = stream.cmd.execute
-  // CHECK-SAME: with(%[[STAGING]] as %[[STAGING_CAPTURE:.+]]: !stream.resource<staging>{%c64},
-  // CHECK-SAME:      %[[ALLOC]] as %[[ALLOC_CAPTURE:.+]]: !stream.resource<constant>{%c64}) {
-  // CHECK:   stream.cmd.copy %[[STAGING_CAPTURE]][%c0], %[[ALLOC_CAPTURE]][%c0], %c64 : !stream.resource<staging>{%c64} -> !stream.resource<constant>{%c64}
+  // CHECK-SAME: with(%[[STAGING]] as %[[STAGING_CAPTURE:.+]]: !stream.resource<staging>{%c128},
+  // CHECK-SAME:      %[[ALLOC]] as %[[ALLOC_CAPTURE:.+]]: !stream.resource<constant>{%c128}) {
+  // CHECK:   stream.cmd.copy %[[STAGING_CAPTURE]][%c0], %[[ALLOC_CAPTURE]][%c0], %c128 : !stream.resource<staging>{%c128} -> !stream.resource<constant>{%c128}
   // CHECK: } => !stream.timepoint
   // CHECK: scf.yield %[[ALLOC]], %[[EXEC_TIMEPOINT]]
 
   // Get subviews pointing to the subresources within the packed resource.
-  // CHECK: %[[RES0:.+]] = stream.resource.subview %[[IF]]#0[%c0] : !stream.resource<constant>{%c64} -> !stream.resource<constant>{%c4}
-  // CHECK: %[[RES1:.+]] = stream.resource.subview %[[IF]]#0[%c32] : !stream.resource<constant>{%c64} -> !stream.resource<constant>{%c8}
+  // CHECK: %[[RES0:.+]] = stream.resource.subview %[[IF]]#0[%c0] : !stream.resource<constant>{%c128} -> !stream.resource<constant>{%c4}
+  // CHECK: %[[RES1:.+]] = stream.resource.subview %[[IF]]#0[%c64] : !stream.resource<constant>{%c128} -> !stream.resource<constant>{%c8}
 
   // CHECK: return %[[RES0]], %[[RES1]], %[[IF]]#1
   return %0#0, %0#1, %0#2 : !stream.resource<constant>, !stream.resource<constant>, !stream.timepoint
diff --git a/iree/compiler/InputConversion/TOSA/Passes.cpp b/iree/compiler/InputConversion/TOSA/Passes.cpp
index 0b3bf54..f4b86a9 100644
--- a/iree/compiler/InputConversion/TOSA/Passes.cpp
+++ b/iree/compiler/InputConversion/TOSA/Passes.cpp
@@ -48,6 +48,8 @@
   passManager.addNestedPass<FuncOp>(tosa::createTosaToStandard());
 
   passManager.addNestedPass<FuncOp>(IREE::Flow::createStripSignednessPass());
+  passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
+
   passManager.addNestedPass<FuncOp>(createLinalgQuantizedMatmulToMatmulPass());
   passManager.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());