Add vstart check for additional instructions

These follow the reduction instructions. Related functions are renamed
to reflect the change.

Change-Id: Ibed09dd9cb1cbc09f33003e5f923fc5a98f0bbef
diff --git a/hdl/chisel/src/kelvin/rvv/RvvDecode.scala b/hdl/chisel/src/kelvin/rvv/RvvDecode.scala
index 1a85a16..7967a5e 100644
--- a/hdl/chisel/src/kelvin/rvv/RvvDecode.scala
+++ b/hdl/chisel/src/kelvin/rvv/RvvDecode.scala
@@ -41,13 +41,19 @@
     bits(24, 19)
   }
 
+  def vs1(): UInt = {
+    bits(12, 8)
+  }
+
   def funct3(): UInt = {
     bits(7, 5)
   }
 
-  // If this instruction is a "reduction" instruction, that requires vstart=0
-  def isReduction(): Bool = {
+  // These instructions need to trap when vstart is not zero. This includes
+  // all reduction instructions.
+  def requireZeroVstart(): Bool = {
     (opcode === RvvCompressedOpcode.RVVALU) && (funct3() === "b010".U) &&
+        // OPMVV
         MuxLookup(funct6(), false.B)(Seq(
             "b000000".U -> true.B,  // vredsum
             "b000001".U -> true.B,  // vredand
@@ -57,6 +63,17 @@
             "b000101".U -> true.B,  // vredmin
             "b000110".U -> true.B,  // vredmaxu
             "b000111".U -> true.B,  // vredmax
+            "b010000".U -> MuxLookup(vs1(), false.B)(Seq(  // VWXUNARY0
+                "b10000".U -> true.B,  // vcpop
+                "b10001".U -> true.B,  // vfirst
+            )),
+            "b010100".U -> MuxLookup(vs1(), false.B)(Seq(  // VMUNARY0
+                "b00001".U -> true.B,  // vmsbf
+                "b00010".U -> true.B,  // vmsof
+                "b00011".U -> true.B,  // vmsif
+                "b10000".U -> true.B,  // viota
+            )),
+            "b010111".U -> true.B,  // vcompress
             "b110000".U -> true.B,  // vwredsumu
             "b110001".U -> true.B,  // vwredsum
         ))
diff --git a/hdl/chisel/src/kelvin/scalar/Decode.scala b/hdl/chisel/src/kelvin/scalar/Decode.scala
index 5553453..02402c8 100644
--- a/hdl/chisel/src/kelvin/scalar/Decode.scala
+++ b/hdl/chisel/src/kelvin/scalar/Decode.scala
@@ -460,15 +460,16 @@
   }
 
   // ---------------------------------------------------------------------------
-  // Rvv reduction
-  // Don't allow reduction instructions to execute if vstart != 0
-  val rvvReductionInterlock = if (p.enableRvv) {
+  // Rvv Vstart
+  // If an instruction requires vstart == 0, don't allow that instruction to
+  // execute when vstart != 0
+  val rvvVstartInterlock = if (p.enableRvv) {
     (0 until p.instructionLanes).map(i => {
-        val invalidReduction =
+        val invalidVstart =
             decodedInsts(i).rvv.get.valid &&
-            decodedInsts(i).rvv.get.bits.isReduction() &&
+            decodedInsts(i).rvv.get.bits.requireZeroVstart() &&
             (configInvalid(i) || (io.rvvState.get.bits.vstart =/= 0.U))
-        !invalidReduction
+        !invalidVstart
     })
   } else {
     Seq.fill(p.instructionLanes)(true.B)
@@ -543,7 +544,7 @@
       !fence(i) &&           // Don't dispatch if fence interlocked
       slot0Interlock(i) &&   // Special instructions execute out of slot 0 only
       rvvConfigInterlock(i) &&     // Rvv interlock rules
-      rvvReductionInterlock(i) && // Don't dispatch reduction if vstart != 0
+      rvvVstartInterlock(i) && // Don't dispatch illegal vstart != 0
       // rvvLsuInterlock(i) &&  // Dispatch only one Rvv LsuOp
       lsuInterlock(i) && // Ensure lsu instructions can be dispatched into queue
       rvvInterlock(i) && // Ensure rvv instructions can be dispatched into queue
@@ -771,11 +772,11 @@
     for (i <- 0 until p.instructionLanes) {
       io.rvvFault.get(i) := (if (i == 0) {
         // Return fault if vstart != 0
-        val isReduction = decodedInsts(i).rvv.get.valid &&
-            decodedInsts(0).rvv.get.bits.isReduction()
+        val requireZeroVstart = decodedInsts(i).rvv.get.valid &&
+            decodedInsts(0).rvv.get.bits.requireZeroVstart()
         val vStartNotZero = io.rvvState.get.valid &&
             (io.rvvState.get.bits.vstart =/= 0.U)
-        io.inst(0).valid && isReduction && vStartNotZero
+        io.inst(0).valid && requireZeroVstart && vStartNotZero
       } else {
         false.B
       })
diff --git a/tests/cocotb/BUILD b/tests/cocotb/BUILD
index f900f79..464c935 100644
--- a/tests/cocotb/BUILD
+++ b/tests/cocotb/BUILD
@@ -158,6 +158,13 @@
     "core_mini_rvv_add",
     "core_mini_vstart_store",
     "core_mini_vcsr_test",
+    "core_mini_viota_test",
+    "core_mini_vfirst_test",
+    "core_mini_vcpop_test",
+    "core_mini_vcompress_test",
+    "core_mini_vmsbf_test",
+    "core_mini_vmsof_test",
+    "core_mini_vmsif_test",
 ]
 # END_TESTCASES_FOR_rvv_assembly_cocotb_test
 
diff --git a/tests/cocotb/rvv/BUILD b/tests/cocotb/rvv/BUILD
index 09fee67..9320989 100644
--- a/tests/cocotb/rvv/BUILD
+++ b/tests/cocotb/rvv/BUILD
@@ -32,6 +32,27 @@
         "vcsr_test": {
             "srcs": ["vcsr_test.cc"],
         },
+        "viota_test": {
+            "srcs": ["viota_test.cc"],
+        },
+        "vfirst_test": {
+            "srcs": ["vfirst_test.cc"],
+        },
+        "vcpop_test": {
+            "srcs": ["vcpop_test.cc"],
+        },
+        "vcompress_test": {
+            "srcs": ["vcompress_test.cc"],
+        },
+        "vmsbf_test": {
+            "srcs": ["vmsbf_test.cc"],
+        },
+        "vmsof_test": {
+            "srcs": ["vmsof_test.cc"],
+        },
+        "vmsif_test": {
+            "srcs": ["vmsif_test.cc"],
+        },
     },
 )
 
@@ -42,5 +63,12 @@
         "rvv_load.elf",
         "vstart_store.elf",
         "vcsr_test.elf",
+        "viota_test.elf",
+        "vfirst_test.elf",
+        "vcpop_test.elf",
+        "vcompress_test.elf",
+        "vmsbf_test.elf",
+        "vmsof_test.elf",
+        "vmsif_test.elf",
     ],
 )
\ No newline at end of file
diff --git a/tests/cocotb/rvv/vcompress_test.cc b/tests/cocotb/rvv/vcompress_test.cc
new file mode 100644
index 0000000..be376d9
--- /dev/null
+++ b/tests/cocotb/rvv/vcompress_test.cc
@@ -0,0 +1,68 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint32_t vma __attribute__((section(".data"))) = 0;
+uint32_t vta __attribute__((section(".data"))) = 0;
+uint32_t sew __attribute__((section(".data"))) = 0;
+uint32_t lmul __attribute__((section(".data"))) = 0;
+uint32_t vl __attribute__((section(".data"))) = 16;
+uint32_t vstart __attribute__((section(".data"))) = 0;
+
+uint8_t mask_data[16] __attribute__((section(".data")));
+uint8_t input_data[8*16] __attribute__((section(".data")));
+uint8_t result[8*16] __attribute__((section(".data")));
+
+uint32_t faulted __attribute__((section(".data"))) = 0;
+uint32_t mcause __attribute__((section(".data"))) = 0;
+
+// Fault handler to log fault
+extern "C" {
+void kelvin_exception_handler() {
+  faulted = 1;
+  uint32_t local_mcause;
+  asm volatile("csrr %0, mcause" : "=r"(local_mcause));
+  mcause = local_mcause;
+
+  asm volatile("ebreak");
+  while (1) {}
+}
+}
+
+int main(int argc, char **argv) {
+  // Load mask data
+  asm volatile("vsetivli x0, 16, e8, m1, ta, ma");
+  asm volatile("vle8.v v0, (%0)" : : "r"(mask_data));
+
+  // Load input (just load entire array)
+  asm volatile("vsetivli x0, 16, e8, m8, ta, ma");
+  asm volatile("vle8.v v8, (%0)" : : "r"(input_data));
+
+  // Set configuration state
+  uint32_t vtype_to_write = (vma << 7) | (vta << 6) | (sew << 3) | lmul;
+  asm volatile("vsetvl x0, %0, %1": : "r"(vl), "r"(vtype_to_write));
+  uint32_t local_vstart = vstart;
+  asm volatile("csrw vstart, %0" : : "r"(local_vstart));
+
+  // Run vcompress
+  asm volatile("vcompress.vm v16, v8, v0");
+
+  // Store result (entire array)
+  asm volatile("vsetivli x0, 16, e8, m8, ta, ma");
+  asm volatile("vse8.v v16, (%0)" : : "r"(result));
+
+  return 0;
+}
\ No newline at end of file
diff --git a/tests/cocotb/rvv/vcpop_test.cc b/tests/cocotb/rvv/vcpop_test.cc
new file mode 100644
index 0000000..8cded24
--- /dev/null
+++ b/tests/cocotb/rvv/vcpop_test.cc
@@ -0,0 +1,61 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint32_t vma __attribute__((section(".data"))) = 0;
+uint32_t vta __attribute__((section(".data"))) = 0;
+uint32_t sew __attribute__((section(".data"))) = 0;
+uint32_t lmul __attribute__((section(".data"))) = 0;
+uint32_t vl __attribute__((section(".data"))) = 16;
+uint32_t vstart __attribute__((section(".data"))) = 0;
+
+uint8_t mask_data[16] __attribute__((section(".data")));
+uint32_t result __attribute__((section(".data")));
+
+uint32_t faulted __attribute__((section(".data"))) = 0;
+uint32_t mcause __attribute__((section(".data"))) = 0;
+
+// Fault handler to log fault
+extern "C" {
+void kelvin_exception_handler() {
+  faulted = 1;
+  uint32_t local_mcause;
+  asm volatile("csrr %0, mcause" : "=r"(local_mcause));
+  mcause = local_mcause;
+
+  asm volatile("ebreak");
+  while (1) {}
+}
+}
+
+int main(int argc, char **argv) {
+  // Load mask data
+  asm volatile("vsetivli x0, 16, e8, m1, ta, ma");
+  asm volatile("vle8.v v0, (%0)" : : "r"(mask_data));
+
+  // Set configuration state
+  uint32_t vtype_to_write = (vma << 7) | (vta << 6) | (sew << 3) | lmul;
+  asm volatile("vsetvl x0, %0, %1": : "r"(vl), "r"(vtype_to_write));
+  uint32_t local_vstart = vstart;
+  asm volatile("csrw vstart, %0" : : "r"(local_vstart));
+
+  // Run vcpop
+  uint32_t local_result;
+  asm volatile("vcpop.m %0, v0" : "=r"(local_result));
+  result = local_result;
+
+  return 0;
+}
\ No newline at end of file
diff --git a/tests/cocotb/rvv/vfirst_test.cc b/tests/cocotb/rvv/vfirst_test.cc
new file mode 100644
index 0000000..6ca8081
--- /dev/null
+++ b/tests/cocotb/rvv/vfirst_test.cc
@@ -0,0 +1,61 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint32_t vma __attribute__((section(".data"))) = 0;
+uint32_t vta __attribute__((section(".data"))) = 0;
+uint32_t sew __attribute__((section(".data"))) = 0;
+uint32_t lmul __attribute__((section(".data"))) = 0;
+uint32_t vl __attribute__((section(".data"))) = 16;
+uint32_t vstart __attribute__((section(".data"))) = 0;
+
+uint8_t mask_data[16] __attribute__((section(".data")));
+uint32_t result __attribute__((section(".data")));
+
+uint32_t faulted __attribute__((section(".data"))) = 0;
+uint32_t mcause __attribute__((section(".data"))) = 0;
+
+// Fault handler to log fault
+extern "C" {
+void kelvin_exception_handler() {
+  faulted = 1;
+  uint32_t local_mcause;
+  asm volatile("csrr %0, mcause" : "=r"(local_mcause));
+  mcause = local_mcause;
+
+  asm volatile("ebreak");
+  while (1) {}
+}
+}
+
+int main(int argc, char **argv) {
+  // Load mask data
+  asm volatile("vsetivli x0, 16, e8, m1, ta, ma");
+  asm volatile("vle8.v v0, (%0)" : : "r"(mask_data));
+
+  // Set configuration state
+  uint32_t vtype_to_write = (vma << 7) | (vta << 6) | (sew << 3) | lmul;
+  asm volatile("vsetvl x0, %0, %1": : "r"(vl), "r"(vtype_to_write));
+  uint32_t local_vstart = vstart;
+  asm volatile("csrw vstart, %0" : : "r"(local_vstart));
+
+  // Run vfirst
+  uint32_t local_result;
+  asm volatile("vfirst.m %0, v0" : "=r"(local_result));
+  result = local_result;
+
+  return 0;
+}
\ No newline at end of file
diff --git a/tests/cocotb/rvv/viota_test.cc b/tests/cocotb/rvv/viota_test.cc
new file mode 100644
index 0000000..c9a3639
--- /dev/null
+++ b/tests/cocotb/rvv/viota_test.cc
@@ -0,0 +1,62 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint32_t vma __attribute__((section(".data"))) = 0;
+uint32_t vta __attribute__((section(".data"))) = 0;
+uint32_t sew __attribute__((section(".data"))) = 0;
+uint32_t lmul __attribute__((section(".data"))) = 0;
+uint32_t vl __attribute__((section(".data"))) = 16;
+uint32_t vstart __attribute__((section(".data"))) = 0;
+
+uint8_t mask_data[16] __attribute__((section(".data")));
+uint8_t result[16*8] __attribute__((section(".data")));
+
+uint32_t faulted __attribute__((section(".data"))) = 0;
+uint32_t mcause __attribute__((section(".data"))) = 0;
+
+// Fault handler to log fault
+extern "C" {
+void kelvin_exception_handler() {
+  faulted = 1;
+  uint32_t local_mcause;
+  asm volatile("csrr %0, mcause" : "=r"(local_mcause));
+  mcause = local_mcause;
+
+  asm volatile("ebreak");
+  while (1) {}
+}
+}
+
+int main(int argc, char **argv) {
+  // Load mask data
+  asm volatile("vsetivli x0, 16, e8, m1, ta, ma");
+  asm volatile("vle8.v v0, (%0)" : : "r"(mask_data));
+
+  // Set configuration state
+  uint32_t vtype_to_write = (vma << 7) | (vta << 6) | (sew << 3) | lmul;
+  asm volatile("vsetvl x0, %0, %1": : "r"(vl), "r"(vtype_to_write));
+  uint32_t local_vstart = vstart;
+  asm volatile("csrw vstart, %0" : : "r"(local_vstart));
+
+  // Run viota
+  asm volatile("viota.m v8, v0");
+
+  // Store result
+  asm volatile("vse8.v v8, (%0)" : : "r"(result));
+
+  return 0;
+}
\ No newline at end of file
diff --git a/tests/cocotb/rvv/vmsbf_test.cc b/tests/cocotb/rvv/vmsbf_test.cc
new file mode 100644
index 0000000..e0f1e25
--- /dev/null
+++ b/tests/cocotb/rvv/vmsbf_test.cc
@@ -0,0 +1,63 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint32_t vma __attribute__((section(".data"))) = 0;
+uint32_t vta __attribute__((section(".data"))) = 0;
+uint32_t sew __attribute__((section(".data"))) = 0;
+uint32_t lmul __attribute__((section(".data"))) = 0;
+uint32_t vl __attribute__((section(".data"))) = 16;
+uint32_t vstart __attribute__((section(".data"))) = 0;
+
+uint8_t input_data[16] __attribute__((section(".data")));
+uint8_t result[16] __attribute__((section(".data")));
+
+uint32_t faulted __attribute__((section(".data"))) = 0;
+uint32_t mcause __attribute__((section(".data"))) = 0;
+
+// Fault handler to log fault
+extern "C" {
+void kelvin_exception_handler() {
+  faulted = 1;
+  uint32_t local_mcause;
+  asm volatile("csrr %0, mcause" : "=r"(local_mcause));
+  mcause = local_mcause;
+
+  asm volatile("ebreak");
+  while (1) {}
+}
+}
+
+int main(int argc, char **argv) {
+  // Load input data
+  asm volatile("vsetivli x0, 16, e8, m1, ta, ma");
+  asm volatile("vle8.v v0, (%0)" : : "r"(input_data));
+
+  // Set configuration state
+  uint32_t vtype_to_write = (vma << 7) | (vta << 6) | (sew << 3) | lmul;
+  asm volatile("vsetvl x0, %0, %1": : "r"(vl), "r"(vtype_to_write));
+  uint32_t local_vstart = vstart;
+  asm volatile("csrw vstart, %0" : : "r"(local_vstart));
+
+  // Run vmsbf
+  asm volatile("vmsbf.m v8, v0");
+
+  // Store result (entire array)
+  asm volatile("vsetivli x0, 16, e8, m8, ta, ma");
+  asm volatile("vse8.v v8, (%0)" : : "r"(result));
+
+  return 0;
+}
\ No newline at end of file
diff --git a/tests/cocotb/rvv/vmsif_test.cc b/tests/cocotb/rvv/vmsif_test.cc
new file mode 100644
index 0000000..36f014f
--- /dev/null
+++ b/tests/cocotb/rvv/vmsif_test.cc
@@ -0,0 +1,63 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint32_t vma __attribute__((section(".data"))) = 0;
+uint32_t vta __attribute__((section(".data"))) = 0;
+uint32_t sew __attribute__((section(".data"))) = 0;
+uint32_t lmul __attribute__((section(".data"))) = 0;
+uint32_t vl __attribute__((section(".data"))) = 16;
+uint32_t vstart __attribute__((section(".data"))) = 0;
+
+uint8_t input_data[16] __attribute__((section(".data")));
+uint8_t result[16] __attribute__((section(".data")));
+
+uint32_t faulted __attribute__((section(".data"))) = 0;
+uint32_t mcause __attribute__((section(".data"))) = 0;
+
+// Fault handler to log fault
+extern "C" {
+void kelvin_exception_handler() {
+  faulted = 1;
+  uint32_t local_mcause;
+  asm volatile("csrr %0, mcause" : "=r"(local_mcause));
+  mcause = local_mcause;
+
+  asm volatile("ebreak");
+  while (1) {}
+}
+}
+
+int main(int argc, char **argv) {
+  // Load input data
+  asm volatile("vsetivli x0, 16, e8, m1, ta, ma");
+  asm volatile("vle8.v v0, (%0)" : : "r"(input_data));
+
+  // Set configuration state
+  uint32_t vtype_to_write = (vma << 7) | (vta << 6) | (sew << 3) | lmul;
+  asm volatile("vsetvl x0, %0, %1": : "r"(vl), "r"(vtype_to_write));
+  uint32_t local_vstart = vstart;
+  asm volatile("csrw vstart, %0" : : "r"(local_vstart));
+
+  // Run vmsif
+  asm volatile("vmsif.m v8, v0");
+
+  // Store result (entire array)
+  asm volatile("vsetivli x0, 16, e8, m8, ta, ma");
+  asm volatile("vse8.v v8, (%0)" : : "r"(result));
+
+  return 0;
+}
\ No newline at end of file
diff --git a/tests/cocotb/rvv/vmsof_test.cc b/tests/cocotb/rvv/vmsof_test.cc
new file mode 100644
index 0000000..40bf342
--- /dev/null
+++ b/tests/cocotb/rvv/vmsof_test.cc
@@ -0,0 +1,63 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint32_t vma __attribute__((section(".data"))) = 0;
+uint32_t vta __attribute__((section(".data"))) = 0;
+uint32_t sew __attribute__((section(".data"))) = 0;
+uint32_t lmul __attribute__((section(".data"))) = 0;
+uint32_t vl __attribute__((section(".data"))) = 16;
+uint32_t vstart __attribute__((section(".data"))) = 0;
+
+uint8_t input_data[16] __attribute__((section(".data")));
+uint8_t result[16] __attribute__((section(".data")));
+
+uint32_t faulted __attribute__((section(".data"))) = 0;
+uint32_t mcause __attribute__((section(".data"))) = 0;
+
+// Fault handler to log fault
+extern "C" {
+void kelvin_exception_handler() {
+  faulted = 1;
+  uint32_t local_mcause;
+  asm volatile("csrr %0, mcause" : "=r"(local_mcause));
+  mcause = local_mcause;
+
+  asm volatile("ebreak");
+  while (1) {}
+}
+}
+
+int main(int argc, char **argv) {
+  // Load input data
+  asm volatile("vsetivli x0, 16, e8, m1, ta, ma");
+  asm volatile("vle8.v v0, (%0)" : : "r"(input_data));
+
+  // Set configuration state
+  uint32_t vtype_to_write = (vma << 7) | (vta << 6) | (sew << 3) | lmul;
+  asm volatile("vsetvl x0, %0, %1": : "r"(vl), "r"(vtype_to_write));
+  uint32_t local_vstart = vstart;
+  asm volatile("csrw vstart, %0" : : "r"(local_vstart));
+
+  // Run vmsof
+  asm volatile("vmsof.m v8, v0");
+
+  // Store result (entire array)
+  asm volatile("vsetivli x0, 16, e8, m8, ta, ma");
+  asm volatile("vse8.v v8, (%0)" : : "r"(result));
+
+  return 0;
+}
\ No newline at end of file
diff --git a/tests/cocotb/rvv_assembly_cocotb_test.py b/tests/cocotb/rvv_assembly_cocotb_test.py
index 4e3d70c..0c366a0 100644
--- a/tests/cocotb/rvv_assembly_cocotb_test.py
+++ b/tests/cocotb/rvv_assembly_cocotb_test.py
@@ -3,6 +3,22 @@
 from kelvin_test_utils.core_mini_axi_interface import CoreMiniAxiInterface
 from bazel_tools.tools.python.runfiles import runfiles
 
+SEWS = [
+    0b000,  # SEW8
+    0b001,  # SEW16
+    0b010,  # SEW32
+]
+
+LMULS = [
+    0b101,  # LMUL1/8
+    0b110,  # LMUL1/4
+    0b111,  # LMUL1/2
+    0b000,  # LMUL1
+    0b001,  # LMUL2
+    0b010,  # LMUL4
+    0b011,  # LMUL8
+]
+
 @cocotb.test()
 async def core_mini_rvv_load(dut):
     """Testbench to test RVV load intrinsics.
@@ -147,8 +163,7 @@
 
 @cocotb.test()
 async def core_mini_vcsr_test(dut):
-    """Testbench to test vstart store.
-    """
+    """Testbench to test vcsr is set correctly."""
     # Test bench setup
     core_mini_axi = CoreMiniAxiInterface(dut)
     await core_mini_axi.init()
@@ -168,22 +183,6 @@
         vl_addr = core_mini_axi.lookup_symbol(f, "vl")
         vtype_addr = core_mini_axi.lookup_symbol(f, "vtype")
 
-    SEWS = [
-        0b000,  # SEW8
-        0b001,  # SEW16
-        0b010,  # SEW32
-    ]
-
-    LMULS = [
-        0b101,  # LMUL1/8
-        0b110,  # LMUL1/4
-        0b111,  # LMUL1/2
-        0b000,  # LMUL1
-        0b001,  # LMUL2
-        0b010,  # LMUL4
-        0b011,  # LMUL8
-    ]
-
     for ma in range(2):
       for ta in range(2):
         for sew in SEWS:
@@ -209,3 +208,98 @@
             assert (ta == ta_result)
             assert (sew == sew_result)
             assert (lmul == lmul_result)
+
+
+async def test_vstart_not_zero_failure(dut, binary):
+    core_mini_axi = CoreMiniAxiInterface(dut)
+    await core_mini_axi.init()
+    await core_mini_axi.reset()
+    cocotb.start_soon(core_mini_axi.clock.start())
+    r = runfiles.Create()
+
+    elf_path = r.Rlocation(binary)
+    if not elf_path:
+        raise ValueError("elf_path must consist a valid path")
+    with open(elf_path, "rb") as f:
+        entry_point = await core_mini_axi.load_elf(f)
+        vma_addr = core_mini_axi.lookup_symbol(f, "vma")
+        vta_addr = core_mini_axi.lookup_symbol(f, "vta")
+        sew_addr = core_mini_axi.lookup_symbol(f, "sew")
+        lmul_addr = core_mini_axi.lookup_symbol(f, "lmul")
+        vl_addr = core_mini_axi.lookup_symbol(f, "vl")
+        vstart_addr = core_mini_axi.lookup_symbol(f, "vstart")
+        faulted_addr = core_mini_axi.lookup_symbol(f, "faulted")
+        mcause_addr = core_mini_axi.lookup_symbol(f, "mcause")
+
+    for ma in range(2):
+      for ta in range(2):
+        for sew in SEWS:
+          for lmul in LMULS:
+            vl = 4 # TODO(derekjchow): Pick random VL
+            vstart = 1 # Non-zero to trigger failure
+
+            await core_mini_axi.write_word(vma_addr, ma)
+            await core_mini_axi.write_word(vta_addr, ta)
+            await core_mini_axi.write_word(sew_addr, sew)
+            await core_mini_axi.write_word(lmul_addr, lmul)
+            await core_mini_axi.write_word(vl_addr, vl)
+            await core_mini_axi.write_word(vstart_addr, vstart)
+
+            await core_mini_axi.execute_from(entry_point)
+            await core_mini_axi.wait_for_halted()
+
+            faulted_result = (
+                await core_mini_axi.read_word(faulted_addr)).view(np.uint32)[0]
+            assert (faulted_result == 1)
+            mcause_result = (
+                await core_mini_axi.read_word(mcause_addr)).view(np.uint32)[0]
+            assert (mcause_result == 0x2)
+
+
+@cocotb.test()
+async def core_mini_viota_test(dut):
+    """Testbench to test vstart!=0 viota."""
+    await test_vstart_not_zero_failure(
+        dut, "kelvin_hw/tests/cocotb/rvv/viota_test.elf")
+
+
+@cocotb.test()
+async def core_mini_vfirst_test(dut):
+    """Testbench to test vstart!=0 vfirst."""
+    await test_vstart_not_zero_failure(
+        dut, "kelvin_hw/tests/cocotb/rvv/vfirst_test.elf")
+
+
+@cocotb.test()
+async def core_mini_vcpop_test(dut):
+    """Testbench to test vstart!=0 vcpop."""
+    await test_vstart_not_zero_failure(
+        dut, "kelvin_hw/tests/cocotb/rvv/vcpop_test.elf")
+
+
+@cocotb.test()
+async def core_mini_vcompress_test(dut):
+    """Testbench to test vstart!=0 vcompress."""
+    await test_vstart_not_zero_failure(
+        dut, "kelvin_hw/tests/cocotb/rvv/vcompress_test.elf")
+
+
+@cocotb.test()
+async def core_mini_vmsbf_test(dut):
+    """Testbench to test vstart!=0 vmsbf."""
+    await test_vstart_not_zero_failure(
+        dut, "kelvin_hw/tests/cocotb/rvv/vmsbf_test.elf")
+
+
+@cocotb.test()
+async def core_mini_vmsof_test(dut):
+    """Testbench to test vstart!=0 vmsof."""
+    await test_vstart_not_zero_failure(
+        dut, "kelvin_hw/tests/cocotb/rvv/vmsof_test.elf")
+
+
+@cocotb.test()
+async def core_mini_vmsif_test(dut):
+    """Testbench to test vstart!=0 vmsbf."""
+    await test_vstart_not_zero_failure(
+        dut, "kelvin_hw/tests/cocotb/rvv/vmsif_test.elf")
\ No newline at end of file