Add vtype CSR.

Change-Id: I4ebbadcf0b76d3477f22e682da1b844375fbd4ce
diff --git a/hdl/chisel/src/kelvin/rvv/RvvInterface.scala b/hdl/chisel/src/kelvin/rvv/RvvInterface.scala
index 0b12ae5..befb80f 100644
--- a/hdl/chisel/src/kelvin/rvv/RvvInterface.scala
+++ b/hdl/chisel/src/kelvin/rvv/RvvInterface.scala
@@ -26,6 +26,14 @@
   val xrm = Output(UInt(2.W))
   val sew = Output(UInt(3.W))
   val lmul = Output(UInt(3.W))
+
+  /**
+   * Construct the vtype CSR value.
+   * See section 3.4 of the RISC-V Vector Specification v1.0.
+   */
+  def vtype: UInt = {
+    Cat(0.U(24.W), ma, ta, sew, lmul)
+  }
 }
 
 class Lsu2Rvv(p: Parameters) extends Bundle {
diff --git a/hdl/chisel/src/kelvin/scalar/Csr.scala b/hdl/chisel/src/kelvin/scalar/Csr.scala
index f702a97..00b303a 100644
--- a/hdl/chisel/src/kelvin/scalar/Csr.scala
+++ b/hdl/chisel/src/kelvin/scalar/Csr.scala
@@ -22,6 +22,7 @@
   // To Csr from RvvCore
   val vstart = Input(UInt(log2Ceil(p.rvvVlen).W))
   val vl = Input(UInt(log2Ceil(p.rvvVlen).W))
+  val vtype = Input(UInt(32.W))
   val vxrm = Input(UInt(2.W))
   val vxsat = Input(Bool())
   // From Csr to RvvCore
@@ -74,6 +75,7 @@
   val MCYCLEH   = Value(0xB80.U(12.W))
   val MINSTRETH = Value(0xB82.U(12.W))
   val VL        = Value(0xC20.U(12.W))
+  val VTYPE     = Value(0xC21.U(12.W))
   val VLENB     = Value(0xC22.U(12.W))
   val MVENDORID = Value(0xF11.U(12.W))
   val MARCHID   = Value(0xF12.U(12.W))
@@ -302,6 +304,7 @@
   val fcsrEn      = csr_address === CsrAddress.FCSR
   val vstartEn    = Option.when(p.enableRvv) { csr_address === CsrAddress.VSTART }
   val vlEn        = Option.when(p.enableRvv) { csr_address === CsrAddress.VL }
+  val vtypeEn     = Option.when(p.enableRvv) { csr_address === CsrAddress.VTYPE }
   val vxrmEn      = Option.when(p.enableRvv) { csr_address === CsrAddress.VXRM }
   val vxsatEn     = Option.when(p.enableRvv) { csr_address === CsrAddress.VXSAT }
   val mstatusEn   = csr_address === CsrAddress.MSTATUS
@@ -413,6 +416,7 @@
         Seq(
           vstartEn.get -> io.rvv.get.vstart,
           vlEn.get     -> io.rvv.get.vl,
+          vtypeEn.get  -> io.rvv.get.vtype,
           vxrmEn.get   -> io.rvv.get.vxrm,
           vxsatEn.get  -> io.rvv.get.vxsat,
           vlenbEn.get -> 16.U(32.W),  // Vector length in Bytes
diff --git a/hdl/chisel/src/kelvin/scalar/SCore.scala b/hdl/chisel/src/kelvin/scalar/SCore.scala
index 33bae21..999992a 100644
--- a/hdl/chisel/src/kelvin/scalar/SCore.scala
+++ b/hdl/chisel/src/kelvin/scalar/SCore.scala
@@ -449,6 +449,7 @@
     io.rvvcore.get.csr.vxsat_write <> csr.io.rvv.get.vxsat_write
     csr.io.rvv.get.vstart := io.rvvcore.get.csr.vstart
     csr.io.rvv.get.vl := io.rvvcore.get.configState.bits.vl
+    csr.io.rvv.get.vtype := io.rvvcore.get.configState.bits.vtype
     csr.io.rvv.get.vxrm := io.rvvcore.get.csr.vxrm
     csr.io.rvv.get.vxsat := io.rvvcore.get.csr.vxsat
   }
diff --git a/tests/cocotb/BUILD b/tests/cocotb/BUILD
index 6855246..f900f79 100644
--- a/tests/cocotb/BUILD
+++ b/tests/cocotb/BUILD
@@ -157,6 +157,7 @@
     "core_mini_rvv_load",
     "core_mini_rvv_add",
     "core_mini_vstart_store",
+    "core_mini_vcsr_test",
 ]
 # END_TESTCASES_FOR_rvv_assembly_cocotb_test
 
@@ -235,12 +236,6 @@
     verilator_model = ":core_mini_debug_axi_model",
 )
 
-RVV_TEST_BINARY_TARGETS = [
-    "//tests/cocotb/rvv:rvv_load.elf",
-    "//tests/cocotb/rvv:rvv_add.elf",
-    "//tests/cocotb/rvv:vstart_store.elf",
-]
-
 cocotb_test_suite(
     name = "rvv_assembly_cocotb_test",
     simulators = [
@@ -259,10 +254,10 @@
             requirement("tqdm"),
             "@bazel_tools//tools/python/runfiles",
         ],
-        "data": RVV_TEST_BINARY_TARGETS,
+        "data": ["//tests/cocotb/rvv:rvv_assem_tests"],
         "size": "large",
     },
-    vcs_data = RVV_TEST_BINARY_TARGETS + [":coverage_exclude.cfg"],
+    vcs_data = ["//tests/cocotb/rvv:rvv_assem_tests"] + [":coverage_exclude.cfg"],
     vcs_build_args = VCS_BUILD_ARGS,
     vcs_test_args = VCS_TEST_ARGS,
     vcs_defines = VCS_DEFINES,
diff --git a/tests/cocotb/rvv/BUILD b/tests/cocotb/rvv/BUILD
index b53ebe7..09fee67 100644
--- a/tests/cocotb/rvv/BUILD
+++ b/tests/cocotb/rvv/BUILD
@@ -29,5 +29,18 @@
         "vstart_store": {
             "srcs": ["vstart_store.S"],
         },
+        "vcsr_test": {
+            "srcs": ["vcsr_test.cc"],
+        },
     },
 )
+
+filegroup(
+    name = "rvv_assem_tests",
+    srcs = [
+        "rvv_add.elf",
+        "rvv_load.elf",
+        "vstart_store.elf",
+        "vcsr_test.elf",
+    ],
+)
\ No newline at end of file
diff --git a/tests/cocotb/rvv/vcsr_test.cc b/tests/cocotb/rvv/vcsr_test.cc
new file mode 100644
index 0000000..a2612c5
--- /dev/null
+++ b/tests/cocotb/rvv/vcsr_test.cc
@@ -0,0 +1,32 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint32_t vma __attribute__((section(".data"))) = 0;
+uint32_t vta __attribute__((section(".data"))) = 0;
+uint32_t sew __attribute__((section(".data"))) = 0;
+uint32_t lmul __attribute__((section(".data"))) = 0;
+uint32_t vl __attribute__((section(".data"))) = 16;
+uint32_t vtype __attribute__((section(".data"))) = ~0;
+
+int main(int argc, char **argv) {
+  uint32_t vtype_to_write = (vma << 7) | (vta << 6) | (sew << 3) | lmul;
+  asm volatile("vsetvl x0, %0, %1": : "r"(vl), "r"(vtype_to_write));
+  uint32_t vtype_to_read;
+  asm volatile("csrr %0, vtype" : "=r"(vtype_to_read));
+  vtype = vtype_to_read;
+  return 0;
+}
diff --git a/tests/cocotb/rvv_assembly_cocotb_test.py b/tests/cocotb/rvv_assembly_cocotb_test.py
index e436fca..4e3d70c 100644
--- a/tests/cocotb/rvv_assembly_cocotb_test.py
+++ b/tests/cocotb/rvv_assembly_cocotb_test.py
@@ -1,6 +1,5 @@
 import cocotb
 import numpy as np
-import argparse
 from kelvin_test_utils.core_mini_axi_interface import CoreMiniAxiInterface
 from bazel_tools.tools.python.runfiles import runfiles
 
@@ -139,10 +138,74 @@
 
     # vstart is 4, so first 4 elements are skipped.
     # 12 elements are stored.
-    print(f"input_data={input_data}", flush=True)
-    print(f"output_data={output_data}", flush=True)
     assert np.array_equal(output_data[0:4], np.zeros(4, dtype=np.uint8))
     assert np.array_equal(output_data[4:], input_data[4:])
 
     await core_mini_axi.raise_irq()
     await core_mini_axi.wait_for_halted()
+
+
+@cocotb.test()
+async def core_mini_vcsr_test(dut):
+    """Testbench to test vstart store.
+    """
+    # Test bench setup
+    core_mini_axi = CoreMiniAxiInterface(dut)
+    await core_mini_axi.init()
+    await core_mini_axi.reset()
+    cocotb.start_soon(core_mini_axi.clock.start())
+    r = runfiles.Create()
+
+    elf_path = r.Rlocation("kelvin_hw/tests/cocotb/rvv/vcsr_test.elf")
+    if not elf_path:
+        raise ValueError("elf_path must consist a valid path")
+    with open(elf_path, "rb") as f:
+        entry_point = await core_mini_axi.load_elf(f)
+        vma_addr = core_mini_axi.lookup_symbol(f, "vma")
+        vta_addr = core_mini_axi.lookup_symbol(f, "vta")
+        sew_addr = core_mini_axi.lookup_symbol(f, "sew")
+        lmul_addr = core_mini_axi.lookup_symbol(f, "lmul")
+        vl_addr = core_mini_axi.lookup_symbol(f, "vl")
+        vtype_addr = core_mini_axi.lookup_symbol(f, "vtype")
+
+    SEWS = [
+        0b000,  # SEW8
+        0b001,  # SEW16
+        0b010,  # SEW32
+    ]
+
+    LMULS = [
+        0b101,  # LMUL1/8
+        0b110,  # LMUL1/4
+        0b111,  # LMUL1/2
+        0b000,  # LMUL1
+        0b001,  # LMUL2
+        0b010,  # LMUL4
+        0b011,  # LMUL8
+    ]
+
+    for ma in range(2):
+      for ta in range(2):
+        for sew in SEWS:
+          for lmul in LMULS:
+            await core_mini_axi.write_word(vma_addr, ma)
+            await core_mini_axi.write_word(vta_addr, ta)
+            await core_mini_axi.write_word(sew_addr, sew)
+            await core_mini_axi.write_word(lmul_addr, lmul)
+            # TODO(derekjchow): Pick random VL
+            await core_mini_axi.write_word(vl_addr, 1)
+
+            await core_mini_axi.execute_from(entry_point)
+            await core_mini_axi.wait_for_halted()
+
+            vtype_result = (
+                await core_mini_axi.read_word(vtype_addr)).view(np.uint32)[0]
+            ma_result = (vtype_result & (1 << 7)) >> 7
+            ta_result = (vtype_result & (1 << 6)) >> 6
+            sew_result = (vtype_result & (0b111 << 3)) >> 3
+            lmul_result = (vtype_result & 0b111)
+
+            assert (ma == ma_result)
+            assert (ta == ta_result)
+            assert (sew == sew_result)
+            assert (lmul == lmul_result)