Update Kelvin memory access

Kelvin reserves the 31st bit in the memory address to trigger cache invalidation. Update the load/store semantic functions to support that.

PiperOrigin-RevId: 556955595
diff --git a/sim/kelvin_state.h b/sim/kelvin_state.h
index 0cfada0..b323b01 100644
--- a/sim/kelvin_state.h
+++ b/sim/kelvin_state.h
@@ -20,6 +20,8 @@
 // https://spacebeaker.googlesource.com/shodan/hw/kelvin/+/refs/heads/master/hdl/chisel/src/kelvin/Parameters.scala#13.
 inline constexpr uint32_t kVectorLengthInBits = 256;
 
+constexpr uint64_t kKelvinMaxMemoryAddress = 0x3f'ffffULL;  // 4MB
+
 template <typename T>
 using AccArrayTemplate = std::array<T, kVectorLengthInBits / 32>;
 
diff --git a/sim/kelvin_top.h b/sim/kelvin_top.h
index aa1fc56..b279ebd 100644
--- a/sim/kelvin_top.h
+++ b/sim/kelvin_top.h
@@ -29,8 +29,6 @@
 
 namespace kelvin::sim {
 
-constexpr uint64_t kKelvinMaxMemoryAddress = 0x3f'ffffULL;  // 4MB
-
 using ::mpact::sim::generic::DataBuffer;
 
 // Top level class for the Kelvin simulator. This is the main interface for
diff --git a/sim/kelvin_vector_memory_instructions.cc b/sim/kelvin_vector_memory_instructions.cc
index 19f3d42..c595a20 100644
--- a/sim/kelvin_vector_memory_instructions.cc
+++ b/sim/kelvin_vector_memory_instructions.cc
@@ -18,6 +18,14 @@
 using mpact::sim::riscv::RV32VectorDestinationOperand;
 using mpact::sim::riscv::RV32VectorSourceOperand;
 
+// Kelvin HW reserves the 31st bit as the magic cache invalidation bit.
+// SW can update the load/store address to include that bit to trigger immediate
+// cache invalidation. The actual address should exclude that bit. In ISS the
+// invalidation is no-op and the actual address should be in the lower bits.
+//
+// Note the core supports up to 2GB memory (4MB is actually integrated in RTL).
+constexpr uint64_t kMemMask = 0x0000'0000'7fff'ffff;
+
 // Vector load instruction with optional data length, stride and address
 // register post-increment.
 template <typename T>
@@ -29,6 +37,14 @@
 
   const auto num_ops = strip_mine ? 4 : 1;
   auto addr = GetInstructionSource<uint32_t>(inst, 0, 0);
+  // Check and exclude the cache invalidation bit. However, the semihost tests
+  // use the memory space greater than the kelvin HW configuration and do not
+  // comply to the magic bit setting. Exclude the check and mask for those
+  // tests.
+  if (state->max_physical_address() <=
+      kKelvinMaxMemoryAddress) {  // exclude semihost tests
+    addr &= kMemMask;
+  }
 
   uint32_t elts_to_load = num_ops * elts_per_register;
   if (has_length) {
@@ -146,6 +162,10 @@
 
   const auto num_ops = strip_mine ? 4 : 1;
   auto mem_addr = GetInstructionSource<uint32_t>(inst, 1, 0);
+  if (state->max_physical_address() <=
+      kKelvinMaxMemoryAddress) {  // exclude semihost tests
+    mem_addr &= kMemMask;
+  }
   auto vs = static_cast<RV32VectorSourceOperand *>(inst->Source(0));
 
   auto base_addr = mem_addr;
diff --git a/sim/test/kelvin_vector_instructions_test_base.h b/sim/test/kelvin_vector_instructions_test_base.h
index 6f3170e..d5c4f70 100644
--- a/sim/test/kelvin_vector_instructions_test_base.h
+++ b/sim/test/kelvin_vector_instructions_test_base.h
@@ -67,6 +67,10 @@
     }
     memory_->Store(kDataLoadAddress - 4096, db);
     db->DecRef();
+    // data_buffer has the size of 8192, while memory stores from
+    // kDataLoadAddress - 4096, the maximum address is at kDataLoadAddress +
+    // 4095.
+    state_->set_max_physical_address(kDataLoadAddress + 4095);
     for (int i = 1; i < 32; i++) {
       xreg_[i] = state_->GetRegister<RV32Register>(absl::StrCat("x", i)).first;
     }
diff --git a/sim/test/kelvin_vector_memory_instructions_test.cc b/sim/test/kelvin_vector_memory_instructions_test.cc
index 3445d6f..49d058d 100644
--- a/sim/test/kelvin_vector_memory_instructions_test.cc
+++ b/sim/test/kelvin_vector_memory_instructions_test.cc
@@ -39,7 +39,8 @@
   void MemoryLoadStoreOpTestHelper(absl::string_view name, bool has_length,
                                    bool has_stride, bool strip_mine,
                                    bool post_increment, bool x_variant,
-                                   bool is_load, bool is_quad) {
+                                   bool is_load, bool is_quad,
+                                   bool is_uncached = false) {
     InstructionPtr child_instruction(
         new Instruction(next_instruction_address_, state_),
         [](Instruction *inst) { inst->DecRef(); });
@@ -123,8 +124,13 @@
         continue;
       }
       // Set input register values.
+      // The 31-bit of the address defines the cache invalidation. Set it to `1`
+      // when the cache invalidation is tested.
+      auto data_address =
+          is_uncached ? (kelvin::sim::test::kDataLoadAddress | 0x8000'0000U)
+                      : kelvin::sim::test::kDataLoadAddress;
       SetRegisterValues<uint32_t>(
-          {{kelvin::sim::test::kRs1Name, kelvin::sim::test::kDataLoadAddress}});
+          {{kelvin::sim::test::kRs1Name, data_address}});
 
       if (!x_variant) {
         SetRegisterValues<uint32_t>(
@@ -292,6 +298,13 @@
                                      kPostIncrement, kNotXVariant, is_load,
                                      kNotQuad);
     }
+
+    // Extra test for cache invalidation address
+    auto subname = absl::StrCat(name_with_type, "X");
+    MemoryLoadStoreOpTestHelper<T>(subname, kNoLength, kNoStride,
+                                   false /*strip_mine*/,
+                                   false /*post_increment*/, kXVariant, is_load,
+                                   kNotQuad, true /*is_uncached*/);
   }
 
   template <typename T>