Add uncached memory access support to scalar store

PiperOrigin-RevId: 582740507
diff --git a/sim/kelvin_base.isa b/sim/kelvin_base.isa
index 49e7736..518fd78 100644
--- a/sim/kelvin_base.isa
+++ b/sim/kelvin_base.isa
@@ -179,15 +179,15 @@
     sw{: rs1, S_imm12, rs2 : },
       resources: { next_pc, rs1, rs2 : },
       disasm: "sw", "%rs2, %S_imm12(%rs1)",
-      semfunc: "&mpact::sim::riscv::RV32::RiscVISw";
+      semfunc: "&KelvinIStore<uint32_t>";
     sh{: rs1, S_imm12, rs2 : },
       resources: { next_pc, rs1, rs2 : },
       disasm: "sh", "%rs2, %S_imm12(%rs1)",
-      semfunc: "&mpact::sim::riscv::RV32::RiscVISh";
+      semfunc: "&KelvinIStore<uint16_t>";
     sb{: rs1, S_imm12, rs2 : },
       resources: { next_pc, rs1, rs2 : },
       disasm: "sb", "%rs2, %S_imm12(%rs1)",
-      semfunc: "&mpact::sim::riscv::RV32::RiscVISb";
+      semfunc: "&KelvinIStore<uint8_t>";
     fence{: I_imm12 : },
       disasm: "fence",
       semfunc: "&mpact::sim::riscv::RiscVIFence";
diff --git a/sim/kelvin_instructions.cc b/sim/kelvin_instructions.cc
index f2ac50f..4967a8d 100644
--- a/sim/kelvin_instructions.cc
+++ b/sim/kelvin_instructions.cc
@@ -113,4 +113,30 @@
   }
 }
 
+// Handle Store instructions for mmap_uncached addresses
+template <typename T>
+void KelvinIStore(Instruction *inst) {
+  uint32_t base = GetInstructionSource<uint32_t>(inst, 0);
+  int32_t offset = GetInstructionSource<int32_t>(inst, 1);
+  uint32_t address = base + offset;
+  T value = GetInstructionSource<T>(inst, 2);
+  auto *state = static_cast<KelvinState *>(inst->state());
+  // Check and exclude the cache invalidation bit. However, the semihost tests
+  // use the memory space greater than the kelvin HW configuration and do not
+  // comply to the magic bit setting. Exclude the check and mask for those
+  // tests.
+  if (state->max_physical_address() <=
+      kKelvinMaxMemoryAddress) {  // exclude semihost tests
+    address &= kMemMask;
+  }
+  auto *db = state->db_factory()->Allocate(sizeof(T));
+  db->Set<T>(0, value);
+  state->StoreMemory(inst, address, db);
+  db->DecRef();
+}
+
+template void KelvinIStore<uint32_t>(mpact::sim::generic::Instruction *inst);
+template void KelvinIStore<uint16_t>(mpact::sim::generic::Instruction *inst);
+template void KelvinIStore<uint8_t>(mpact::sim::generic::Instruction *inst);
+
 }  // namespace kelvin::sim
diff --git a/sim/kelvin_instructions.h b/sim/kelvin_instructions.h
index d15fc67..be0f77c 100644
--- a/sim/kelvin_instructions.h
+++ b/sim/kelvin_instructions.h
@@ -29,6 +29,9 @@
 
 void KelvinLogInstruction(int log_mode, mpact::sim::generic::Instruction *inst);
 
+template <typename T>
+void KelvinIStore(mpact::sim::generic::Instruction *inst);
+
 }  // namespace kelvin::sim
 
 #endif  // SIM_KELVIN_INSTRUCTIONS_H_
diff --git a/sim/kelvin_state.h b/sim/kelvin_state.h
index 45357cc..6eec9b0 100644
--- a/sim/kelvin_state.h
+++ b/sim/kelvin_state.h
@@ -34,6 +34,14 @@
 
 using Instruction = ::mpact::sim::generic::Instruction;
 
+// Kelvin HW reserves the 31st bit as the magic cache invalidation bit.
+// SW can update the load/store address to include that bit to trigger immediate
+// cache invalidation. The actual address should exclude that bit. In ISS the
+// invalidation is no-op and the actual address should be in the lower bits.
+//
+// Note the core supports up to 2GB memory (4MB is actually integrated in RTL).
+constexpr uint64_t kMemMask = 0x0000'0000'7fff'ffff;
+
 // Default to 256 to match
 // https://opensecura.googlesource.com/hw/kelvin/+/master/hdl/chisel/src/kelvin/Parameters.scala.
 inline constexpr uint32_t kVectorLengthInBits = 256;
diff --git a/sim/kelvin_vector_memory_instructions.cc b/sim/kelvin_vector_memory_instructions.cc
index be16c35..c63b943 100644
--- a/sim/kelvin_vector_memory_instructions.cc
+++ b/sim/kelvin_vector_memory_instructions.cc
@@ -33,14 +33,6 @@
 using mpact::sim::riscv::RV32VectorDestinationOperand;
 using mpact::sim::riscv::RV32VectorSourceOperand;
 
-// Kelvin HW reserves the 31st bit as the magic cache invalidation bit.
-// SW can update the load/store address to include that bit to trigger immediate
-// cache invalidation. The actual address should exclude that bit. In ISS the
-// invalidation is no-op and the actual address should be in the lower bits.
-//
-// Note the core supports up to 2GB memory (4MB is actually integrated in RTL).
-constexpr uint64_t kMemMask = 0x0000'0000'7fff'ffff;
-
 // Vector load instruction with optional data length, stride and address
 // register post-increment.
 template <typename T>