Serializing vector<...i8> to flatbuffers much more efficiently.
Still likely some work we can do here to avoid additional allocs/copies
but this at least removes the last-mile extremely slow iteration over each
byte/conversion to APInt/truncation that was happening before.
diff --git a/iree/compiler/Dialect/VM/Target/Bytecode/ConstantEncoder.cpp b/iree/compiler/Dialect/VM/Target/Bytecode/ConstantEncoder.cpp
index 612d3d0..180535a 100644
--- a/iree/compiler/Dialect/VM/Target/Bytecode/ConstantEncoder.cpp
+++ b/iree/compiler/Dialect/VM/Target/Bytecode/ConstantEncoder.cpp
@@ -36,11 +36,21 @@
static Offset<Vector<uint8_t>> serializeConstantI8Array(
DenseIntElementsAttr attr, FlatBufferBuilder &fbb) {
+ // vm.rodata and other very large constants end up as this; since i8 is i8
+ // everywhere (endianness doesn't matter when you have one byte :) we can
+ // directly access the data and memcpy.
uint8_t *bytePtr = nullptr;
auto byteVector =
fbb.CreateUninitializedVector(attr.getNumElements() * 1, &bytePtr);
- for (const APInt &value : attr.getIntValues()) {
- *(bytePtr++) = value.extractBitsAsZExtValue(8, 0) & UINT8_MAX;
+ if (attr.isSplat()) {
+ // NOTE: this is a slow path and we should have eliminated it earlier on
+ // during constant op conversion.
+ for (const APInt &value : attr.getIntValues()) {
+ *(bytePtr++) = value.extractBitsAsZExtValue(8, 0) & UINT8_MAX;
+ }
+ } else {
+ auto rawData = attr.getRawData();
+ std::memcpy(bytePtr, rawData.data(), rawData.size());
}
return byteVector;
}