Split the alignment flag for non 64-bit types (#9715)

Splits out the ops for remaining, non 64-bit types from the
`IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED` flag and introduces new flags
for the other bit widths.

Splitting into own flags for 8, 16 and 32-bit seems useful since e.g.
the Cortex-M7 processor has unaligned support for
 * LDR/STR (Load/Store Word) and
 * LDRH/STRH (Load/Store Halfword),
see [1], whereas a Cortex-M4 processor "only" has unaligned support
for LDR/STR, see [2].

[1] https://developer.arm.com/documentation/ddi0489/b/programmers-model/system-address-map/unaligned-accesses-that-cross-regions
[2] https://developer.arm.com/documentation/ddi0439/b/Programmers-Model/System-address-map/Unaligned-accesses-that-cross-regions
diff --git a/runtime/src/iree/base/alignment.h b/runtime/src/iree/base/alignment.h
index 5dde61e..64d30de 100644
--- a/runtime/src/iree/base/alignment.h
+++ b/runtime/src/iree/base/alignment.h
@@ -101,15 +101,63 @@
 #define IREE_LE_IDX_8(i) (7 - (i))
 #endif  // IREE_ENDIANNESS_*
 
-#if IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED
+#if IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_8
 
 static inline uint8_t iree_unaligned_load_le_u8(const uint8_t* ptr) {
   return *ptr;
 }
+
+static inline void iree_unaligned_store_le_u8(uint8_t* ptr, uint8_t value) {
+  *ptr = value;
+}
+
+#else
+
+#if defined(IREE_ENDIANNESS_LITTLE)
+
+#define iree_unaligned_load_le_u8(ptr) *(ptr)
+
+#define iree_unaligned_store_le_u8(ptr, value) *(ptr) = (value)
+
+#else
+
+#error "TODO(benvanik): little-endian load/store for big-endian archs"
+
+#endif  // IREE_ENDIANNESS_*
+
+#endif  // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_8
+
+#if IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_16
+
 static inline uint16_t iree_unaligned_load_le_u16(const uint16_t* ptr) {
   const uint8_t* p = (const uint8_t*)ptr;
   return ((uint16_t)p[IREE_LE_IDX_2(0)]) | ((uint16_t)p[IREE_LE_IDX_2(1)] << 8);
 }
+
+static inline void iree_unaligned_store_le_u16(uint16_t* ptr, uint16_t value) {
+  uint8_t* p = (uint8_t*)ptr;
+  p[IREE_LE_IDX_2(0)] = value;
+  p[IREE_LE_IDX_2(1)] = value >> 8;
+}
+
+#else
+
+#if defined(IREE_ENDIANNESS_LITTLE)
+
+#define iree_unaligned_load_le_u16(ptr) *(ptr)
+
+#define iree_unaligned_store_le_u16(ptr, value) *(ptr) = (value)
+
+#else
+
+#error "TODO(benvanik): little-endian load/store for big-endian archs"
+
+#endif  // IREE_ENDIANNESS_*
+
+#endif  // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_16
+
+#if IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_32
+
 static inline uint32_t iree_unaligned_load_le_u32(const uint32_t* ptr) {
   const uint8_t* p = (const uint8_t*)ptr;
   return ((uint32_t)p[IREE_LE_IDX_4(0)]) |
@@ -124,14 +172,6 @@
   return value;
 }
 
-static inline void iree_unaligned_store_le_u8(uint8_t* ptr, uint8_t value) {
-  *ptr = value;
-}
-static inline void iree_unaligned_store_le_u16(uint16_t* ptr, uint16_t value) {
-  uint8_t* p = (uint8_t*)ptr;
-  p[IREE_LE_IDX_2(0)] = value;
-  p[IREE_LE_IDX_2(1)] = value >> 8;
-}
 static inline void iree_unaligned_store_le_u32(uint32_t* ptr, uint32_t value) {
   uint8_t* p = (uint8_t*)ptr;
   p[IREE_LE_IDX_4(0)] = value;
@@ -149,13 +189,9 @@
 
 #if defined(IREE_ENDIANNESS_LITTLE)
 
-#define iree_unaligned_load_le_u8(ptr) *(ptr)
-#define iree_unaligned_load_le_u16(ptr) *(ptr)
 #define iree_unaligned_load_le_u32(ptr) *(ptr)
 #define iree_unaligned_load_le_f32(ptr) *(ptr)
 
-#define iree_unaligned_store_le_u8(ptr, value) *(ptr) = (value)
-#define iree_unaligned_store_le_u16(ptr, value) *(ptr) = (value)
 #define iree_unaligned_store_le_u32(ptr, value) *(ptr) = (value)
 #define iree_unaligned_store_le_f32(ptr, value) *(ptr) = (value)
 
@@ -165,7 +201,7 @@
 
 #endif  // IREE_ENDIANNESS_*
 
-#endif  // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED
+#endif  // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_32
 
 #if IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_64
 
diff --git a/runtime/src/iree/base/target_platform.h b/runtime/src/iree/base/target_platform.h
index 173b7e7..34b25f5 100644
--- a/runtime/src/iree/base/target_platform.h
+++ b/runtime/src/iree/base/target_platform.h
@@ -31,6 +31,9 @@
 // IREE_ENDIANNESS_BIG
 //
 // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED (0/1)
+// IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_8 (0/1)
+// IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_16 (0/1)
+// IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_32 (0/1)
 // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_64 (0/1)
 //
 // IREE_COMPILER_CLANG
@@ -174,8 +177,23 @@
 #define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED 0
 #endif  // !IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED
 
-// Set IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_64 to the value of
+// Set IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_* to the value of
 // IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED if the former was not set before.
+#if !defined(IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_8)
+#define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_8 \
+  IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED
+#endif  // !IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_8
+
+#if !defined(IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_16)
+#define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_16 \
+  IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED
+#endif  // !IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_16
+
+#if !defined(IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_32)
+#define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_32 \
+  IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED
+#endif  // !IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_32
+
 #if !defined(IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_64)
 #define IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED_64 \
   IREE_MEMORY_ACCESS_ALIGNMENT_REQUIRED