tests/e2e/tensor_ops/pack_i8.mlir - 3p/openxla/iree - Git at Google

 // These i8 pack ops are seen in CPU codegen with AVX-512 Vector Neural Network Instructions (VNNI).
 // Other backends should still support the tile sizes and other parameters here.

 func.func private @generate_2D_source(%height : index, %width : index) -> tensor<?x?xi8> {
   %init_source = tensor.empty(%height, %width) : tensor<?x?xi8>
   %source = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
       outs(%init_source : tensor<?x?xi8>) {
     ^bb0(%b0 : i8):
       %outer = linalg.index 0 : index
       %inner = linalg.index 1 : index
       %strided = arith.muli %outer, %width : index
       %linearized = arith.addi %inner, %strided : index
       %c256 = arith.constant 256 : index
       %rem = arith.remui %linearized, %c256 : index
       %linearized_i8 = arith.index_cast %rem : index to i8
       linalg.yield %linearized_i8 : i8
   } -> tensor<?x?xi8>
   // This blocks the fusion for inputs and testing ops.
   %0 = util.optimization_barrier %source : tensor<?x?xi8>
   %1 = flow.tensor.tie_shape %0 : tensor<?x?xi8>{%height, %width}
   return %1 : tensor<?x?xi8>
 }

 func.func @static_pack_vnni_lhs_large() {
   %height = arith.constant 128 : index
   %width = arith.constant 256 : index
   %0 = call @generate_2D_source(%height, %width) : (index, index) -> tensor<?x?xi8>
   %source = tensor.cast %0 : tensor<?x?xi8> to tensor<128x256xi8>

   %init_pack = tensor.empty() : tensor<8x128x16x2xi8>
   %pack = tensor.pack %source
     outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2]
     into %init_pack : tensor<128x256xi8> -> tensor<8x128x16x2xi8>

   // Pack without padding is just a reshape followed by a transpose.
   %reshape = tensor.expand_shape %source [[0, 1], [2, 3]] output_shape [8, 16, 128, 2] : tensor<128x256xi8> into tensor<8x16x128x2xi8>
   %init_transpose = tensor.empty() : tensor<8x128x16x2xi8>
   %transpose = linalg.transpose
     ins(%reshape : tensor<8x16x128x2xi8>)
     outs(%init_transpose : tensor<8x128x16x2xi8>)
     permutation = [0, 2, 1, 3]
   check.expect_eq(%pack, %transpose) : tensor<8x128x16x2xi8>
   return
 }

 func.func @static_pack_vnni_rhs_large() {
   %height = arith.constant 256 : index
   %width = arith.constant 512 : index
   %0 = call @generate_2D_source(%height, %width) : (index, index) -> tensor<?x?xi8>
   %source = tensor.cast %0 : tensor<?x?xi8> to tensor<256x512xi8>

   %init_pack = tensor.empty() : tensor<32x128x16x2xi8>
   %pack = tensor.pack %source
     outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2]
     into %init_pack : tensor<256x512xi8> -> tensor<32x128x16x2xi8>

   // Pack without padding is just a reshape followed by a transpose.
   %reshape = tensor.expand_shape %source [[0, 1], [2, 3]] output_shape [128, 2, 32, 16] : tensor<256x512xi8> into tensor<128x2x32x16xi8>
   %init_transpose = tensor.empty() : tensor<32x128x16x2xi8>
   %transpose = linalg.transpose
     ins(%reshape : tensor<128x2x32x16xi8>)
     outs(%init_transpose : tensor<32x128x16x2xi8>)
     permutation = [2, 0, 3, 1]
   check.expect_eq(%pack, %transpose) : tensor<32x128x16x2xi8>
   return
 }

 func.func @static_pack_vnni_lhs_large_with_pad() {
   %height = arith.constant 127 : index
   %width = arith.constant 255 : index
   %0 = call @generate_2D_source(%height, %width) : (index, index) -> tensor<?x?xi8>
   %source = tensor.cast %0 : tensor<?x?xi8> to tensor<127x255xi8>
   %c0_i8 = arith.constant 0 : i8

   %init_pack = tensor.empty() : tensor<8x128x16x2xi8>
   %pack = tensor.pack %source padding_value(%c0_i8 : i8)
     outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2]
     into %init_pack : tensor<127x255xi8> -> tensor<8x128x16x2xi8>

   %pad = tensor.pad %source low[0, 0] high[1, 1] {
     ^bb0(%b0 : index, %b1 : index):
       tensor.yield %c0_i8 : i8
   } : tensor<127x255xi8> to tensor<128x256xi8>
   %reshape = tensor.expand_shape %pad [[0, 1], [2, 3]] output_shape [8, 16, 128, 2] : tensor<128x256xi8> into tensor<8x16x128x2xi8>
   %init_transpose = tensor.empty() : tensor<8x128x16x2xi8>
   %transpose = linalg.transpose
     ins(%reshape : tensor<8x16x128x2xi8>)
     outs(%init_transpose : tensor<8x128x16x2xi8>)
     permutation = [0, 2, 1, 3]
   check.expect_eq(%pack, %transpose) : tensor<8x128x16x2xi8>
   return
 }

 func.func @static_pack_vnni_rhs_large_with_pad() {
   %height = arith.constant 255 : index
   %width = arith.constant 511 : index
   %0 = call @generate_2D_source(%height, %width) : (index, index) -> tensor<?x?xi8>
   %source = tensor.cast %0 : tensor<?x?xi8> to tensor<255x511xi8>
   %c0_i8 = arith.constant 0 : i8

   %init_pack = tensor.empty() : tensor<32x128x16x2xi8>
   %pack = tensor.pack %source padding_value(%c0_i8 : i8)
     outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2]
     into %init_pack : tensor<255x511xi8> -> tensor<32x128x16x2xi8>

   %pad = tensor.pad %source low[0, 0] high[1, 1] {
     ^bb0(%b0 : index, %b1 : index):
       tensor.yield %c0_i8 : i8
   } : tensor<255x511xi8> to tensor<256x512xi8>
   %reshape = tensor.expand_shape %pad [[0, 1], [2, 3]] output_shape [128, 2, 32, 16] : tensor<256x512xi8> into tensor<128x2x32x16xi8>
   %init_transpose = tensor.empty() : tensor<32x128x16x2xi8>
   %transpose = linalg.transpose
     ins(%reshape : tensor<128x2x32x16xi8>)
     outs(%init_transpose : tensor<32x128x16x2xi8>)
     permutation = [2, 0, 3, 1]
   check.expect_eq(%pack, %transpose) : tensor<32x128x16x2xi8>
   return
 }
	// These i8 pack ops are seen in CPU codegen with AVX-512 Vector Neural Network Instructions (VNNI).
	// Other backends should still support the tile sizes and other parameters here.

	func.func private @generate_2D_source(%height : index, %width : index) -> tensor<?x?xi8> {
	%init_source = tensor.empty(%height, %width) : tensor<?x?xi8>
	%source = linalg.generic {
	indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
	iterator_types = ["parallel", "parallel"]}
	outs(%init_source : tensor<?x?xi8>) {
	^bb0(%b0 : i8):
	%outer = linalg.index 0 : index
	%inner = linalg.index 1 : index
	%strided = arith.muli %outer, %width : index
	%linearized = arith.addi %inner, %strided : index
	%c256 = arith.constant 256 : index
	%rem = arith.remui %linearized, %c256 : index
	%linearized_i8 = arith.index_cast %rem : index to i8
	linalg.yield %linearized_i8 : i8
	} -> tensor<?x?xi8>
	// This blocks the fusion for inputs and testing ops.
	%0 = util.optimization_barrier %source : tensor<?x?xi8>
	%1 = flow.tensor.tie_shape %0 : tensor<?x?xi8>{%height, %width}
	return %1 : tensor<?x?xi8>
	}

	func.func @static_pack_vnni_lhs_large() {
	%height = arith.constant 128 : index
	%width = arith.constant 256 : index
	%0 = call @generate_2D_source(%height, %width) : (index, index) -> tensor<?x?xi8>
	%source = tensor.cast %0 : tensor<?x?xi8> to tensor<128x256xi8>

	%init_pack = tensor.empty() : tensor<8x128x16x2xi8>
	%pack = tensor.pack %source
	outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2]
	into %init_pack : tensor<128x256xi8> -> tensor<8x128x16x2xi8>

	// Pack without padding is just a reshape followed by a transpose.
	%reshape = tensor.expand_shape %source [[0, 1], [2, 3]] output_shape [8, 16, 128, 2] : tensor<128x256xi8> into tensor<8x16x128x2xi8>
	%init_transpose = tensor.empty() : tensor<8x128x16x2xi8>
	%transpose = linalg.transpose
	ins(%reshape : tensor<8x16x128x2xi8>)
	outs(%init_transpose : tensor<8x128x16x2xi8>)
	permutation = [0, 2, 1, 3]
	check.expect_eq(%pack, %transpose) : tensor<8x128x16x2xi8>
	return
	}

	func.func @static_pack_vnni_rhs_large() {
	%height = arith.constant 256 : index
	%width = arith.constant 512 : index
	%0 = call @generate_2D_source(%height, %width) : (index, index) -> tensor<?x?xi8>
	%source = tensor.cast %0 : tensor<?x?xi8> to tensor<256x512xi8>

	%init_pack = tensor.empty() : tensor<32x128x16x2xi8>
	%pack = tensor.pack %source
	outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2]
	into %init_pack : tensor<256x512xi8> -> tensor<32x128x16x2xi8>

	// Pack without padding is just a reshape followed by a transpose.
	%reshape = tensor.expand_shape %source [[0, 1], [2, 3]] output_shape [128, 2, 32, 16] : tensor<256x512xi8> into tensor<128x2x32x16xi8>
	%init_transpose = tensor.empty() : tensor<32x128x16x2xi8>
	%transpose = linalg.transpose
	ins(%reshape : tensor<128x2x32x16xi8>)
	outs(%init_transpose : tensor<32x128x16x2xi8>)
	permutation = [2, 0, 3, 1]
	check.expect_eq(%pack, %transpose) : tensor<32x128x16x2xi8>
	return
	}

	func.func @static_pack_vnni_lhs_large_with_pad() {
	%height = arith.constant 127 : index
	%width = arith.constant 255 : index
	%0 = call @generate_2D_source(%height, %width) : (index, index) -> tensor<?x?xi8>
	%source = tensor.cast %0 : tensor<?x?xi8> to tensor<127x255xi8>
	%c0_i8 = arith.constant 0 : i8

	%init_pack = tensor.empty() : tensor<8x128x16x2xi8>
	%pack = tensor.pack %source padding_value(%c0_i8 : i8)
	outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2]
	into %init_pack : tensor<127x255xi8> -> tensor<8x128x16x2xi8>

	%pad = tensor.pad %source low[0, 0] high[1, 1] {
	^bb0(%b0 : index, %b1 : index):
	tensor.yield %c0_i8 : i8
	} : tensor<127x255xi8> to tensor<128x256xi8>
	%reshape = tensor.expand_shape %pad [[0, 1], [2, 3]] output_shape [8, 16, 128, 2] : tensor<128x256xi8> into tensor<8x16x128x2xi8>
	%init_transpose = tensor.empty() : tensor<8x128x16x2xi8>
	%transpose = linalg.transpose
	ins(%reshape : tensor<8x16x128x2xi8>)
	outs(%init_transpose : tensor<8x128x16x2xi8>)
	permutation = [0, 2, 1, 3]
	check.expect_eq(%pack, %transpose) : tensor<8x128x16x2xi8>
	return
	}

	func.func @static_pack_vnni_rhs_large_with_pad() {
	%height = arith.constant 255 : index
	%width = arith.constant 511 : index
	%0 = call @generate_2D_source(%height, %width) : (index, index) -> tensor<?x?xi8>
	%source = tensor.cast %0 : tensor<?x?xi8> to tensor<255x511xi8>
	%c0_i8 = arith.constant 0 : i8

	%init_pack = tensor.empty() : tensor<32x128x16x2xi8>
	%pack = tensor.pack %source padding_value(%c0_i8 : i8)
	outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2]
	into %init_pack : tensor<255x511xi8> -> tensor<32x128x16x2xi8>

	%pad = tensor.pad %source low[0, 0] high[1, 1] {
	^bb0(%b0 : index, %b1 : index):
	tensor.yield %c0_i8 : i8
	} : tensor<255x511xi8> to tensor<256x512xi8>
	%reshape = tensor.expand_shape %pad [[0, 1], [2, 3]] output_shape [128, 2, 32, 16] : tensor<256x512xi8> into tensor<128x2x32x16xi8>
	%init_transpose = tensor.empty() : tensor<32x128x16x2xi8>
	%transpose = linalg.transpose
	ins(%reshape : tensor<128x2x32x16xi8>)
	outs(%init_transpose : tensor<32x128x16x2xi8>)
	permutation = [2, 0, 3, 1]
	check.expect_eq(%pack, %transpose) : tensor<32x128x16x2xi8>
	return
	}