Add vill bit to config state and trap on bad state.

Change-Id: I09c100b46bcfd64dbf09922efe6ced794ad4fbe9
diff --git a/hdl/chisel/src/kelvin/rvv/RvvCore.scala b/hdl/chisel/src/kelvin/rvv/RvvCore.scala
index 2917755..c80defe 100644
--- a/hdl/chisel/src/kelvin/rvv/RvvCore.scala
+++ b/hdl/chisel/src/kelvin/rvv/RvvCore.scala
@@ -136,6 +136,13 @@
             |    output [15:0] rd_rob2rt_o_GENI_vxsaturate,""".stripMargin.replaceAll("GENI", i.toString)
     }
 
+    // Add trap interface outputs
+    moduleInterface += """
+        |    output trap_valid,
+        |    output [31:0] trap_bits_pc,
+        |    output [1:0] trap_bits_opcode,
+        |    output [24:0] trap_bits_bits,""".stripMargin
+
     // Remove last comma/linebreak
     moduleInterface = moduleInterface.dropRight(1)
     moduleInterface += "\n);\n"
@@ -151,9 +158,8 @@
     coreInstantiation += "  RVVInstruction [GENN-1:0] inst_data;\n".replaceAll(
             "GENN", instructionLanes.toString)
     for (i <- 0 until instructionLanes) {
-      // TODO(derekjchow): Plumb in pc later
-      // coreInstantiation += "  assign inst_data[GENI].pc = inst_GENI_bits_pc;\n".replaceAll(
-      //     "GENI", i.toString)
+      coreInstantiation += "  assign inst_data[GENI].pc = inst_GENI_bits_pc;\n".replaceAll(
+          "GENI", i.toString)
       coreInstantiation += "  assign inst_data[GENI].opcode = RVVOpCode'(inst_GENI_bits_opcode);\n".replaceAll(
           "GENI", i.toString)
       coreInstantiation += "  assign inst_data[GENI].bits = inst_GENI_bits_bits;\n".replaceAll(
@@ -240,6 +246,7 @@
         |""".stripMargin
 
     coreInstantiation += "  ROB2RT_t [3:0] rd_rob2rt_o;\n"
+    coreInstantiation += "  RVVInstruction trap_data;\n"
 
     coreInstantiation += """  RvvCore#(.N (GENN)) core(
         |      .clk(clk),
@@ -281,7 +288,9 @@
         |      .config_state(config_state),
         |      .rvv_idle(rvv_idle),
         |      .queue_capacity(queue_capacity),
-        |      .rd_rob2rt_o(rd_rob2rt_o)
+        |      .rd_rob2rt_o(rd_rob2rt_o),
+        |      .trap_valid_o(trap_valid),
+        |      .trap_data_o(trap_data)
         |""".stripMargin.replaceAll("GENN", instructionLanes.toString)
     coreInstantiation += "  );\n"
 
@@ -302,6 +311,10 @@
       |  assign rd_rob2rt_o_GENI_vxsaturate = rd_rob2rt_o[GENI].vxsaturate;
       |""".stripMargin.replaceAll("GENI", i.toString)
     }
+    coreInstantiation += """  assign trap_bits_pc = trap_data.pc;
+      |  assign trap_bits_opcode = trap_data.opcode;
+      |  assign trap_bits_bits = trap_data.bits;
+      |""".stripMargin
     for (i <- 0 until instructionLanes) {
       coreInstantiation += "  assign inst_GENI_ready = inst_ready[GENI];\n".replaceAll("GENI", i.toString)
     }
@@ -344,6 +357,7 @@
     val async_rd = Decoupled(new RegfileWriteDataIO)
 
     val rd_rob2rt_o = Vec(4, new Rob2Rt(p))
+    val trap = Output(Valid(new RvvCompressedInstruction))
 
     val vcsr_valid = Output(Bool())
     val vcsr_vstart = Output(UInt(7.W))
@@ -447,6 +461,7 @@
   rvvCoreWrapper.io.rd <> io.rd
   rvvCoreWrapper.io.async_rd <> io.async_rd
   rvvCoreWrapper.io.rd_rob2rt_o <> io.rd_rob2rt_o
+  io.trap := rvvCoreWrapper.io.trap
 
   rvvCoreWrapper.io.vstart := Mux(
       io.csr.vstart_write.valid, io.csr.vstart_write.bits, vstart)
diff --git a/hdl/chisel/src/kelvin/rvv/RvvDecode.scala b/hdl/chisel/src/kelvin/rvv/RvvDecode.scala
index 7967a5e..e508492 100644
--- a/hdl/chisel/src/kelvin/rvv/RvvDecode.scala
+++ b/hdl/chisel/src/kelvin/rvv/RvvDecode.scala
@@ -37,6 +37,15 @@
   val opcode = RvvCompressedOpcode()
   val bits = UInt(25.W)
 
+  def originalEncoding(): UInt = {
+    val lower7bits = MuxLookup(opcode, 0.U)(Seq(
+        RvvCompressedOpcode.RVVLOAD  -> "b0000111".U,
+        RvvCompressedOpcode.RVVSTORE -> "b0100111".U,
+        RvvCompressedOpcode.RVVALU   -> "b1010111".U,
+    ))
+    Cat(bits, lower7bits)
+  }
+
   def funct6(): UInt = {
     bits(24, 19)
   }
diff --git a/hdl/chisel/src/kelvin/rvv/RvvInterface.scala b/hdl/chisel/src/kelvin/rvv/RvvInterface.scala
index befb80f..86ebc2c 100644
--- a/hdl/chisel/src/kelvin/rvv/RvvInterface.scala
+++ b/hdl/chisel/src/kelvin/rvv/RvvInterface.scala
@@ -72,6 +72,9 @@
     // Async scalar regfile writes.
     val async_rd = Decoupled(new RegfileWriteDataIO)
 
+    // Async trap.
+    val trap = Output(Valid(new RvvCompressedInstruction))
+
     // Csr Interface.
     val csr = new RvvCsrIO(p)
 
diff --git a/hdl/chisel/src/kelvin/scalar/FaultManager.scala b/hdl/chisel/src/kelvin/scalar/FaultManager.scala
index 5470413..33a2785 100644
--- a/hdl/chisel/src/kelvin/scalar/FaultManager.scala
+++ b/hdl/chisel/src/kelvin/scalar/FaultManager.scala
@@ -39,6 +39,8 @@
       }))
       val memory_fault = Input(Valid(new FaultInfo(p)))
       val ibus_fault = Input(Bool())
+      val rvv_fault = Option.when(p.enableRvv)(Input(
+          Valid(new FaultManagerOutput)))
       val undef = Input(Vec(p.instructionLanes, new Bundle {
         val inst = UInt(32.W)
       }))
@@ -71,39 +73,43 @@
   val jalr_fault_idx = PriorityEncoder(io.in.fault.map(_.jalr))
   val bxx_fault = io.in.fault.map(_.bxx).reduce(_|_)
   val bxx_fault_idx = PriorityEncoder(io.in.fault.map(_.bxx))
-  val rvv_fault = io.in.fault.map(_.rvv.getOrElse(false.B)).reduce(_|_)
-  val rvv_fault_idx = PriorityEncoder(io.in.fault.map(_.rvv.getOrElse(false.B)))
+  val rvv_dispatch_fault = io.in.fault.map(_.rvv.getOrElse(false.B)).reduce(_|_)
+  val rvv_dispatch_fault_idx = PriorityEncoder(io.in.fault.map(_.rvv.getOrElse(false.B)))
   val instr_access_fault = io.in.memory_fault.valid && io.in.ibus_fault
   val load_fault = io.in.memory_fault.valid && !io.in.memory_fault.bits.write && !io.in.ibus_fault
   val store_fault = io.in.memory_fault.valid && io.in.memory_fault.bits.write && !io.in.ibus_fault
+  val rvv_fault = io.in.rvv_fault.map(_.valid).getOrElse(false.B)
 
-  io.out.valid := fault || io.in.memory_fault.valid
+  io.out.valid := fault || io.in.memory_fault.valid || rvv_fault
   io.out.bits.mepc := MuxCase(0.U(32.W), Seq(
     load_fault -> io.in.memory_fault.bits.epc,
     store_fault -> io.in.memory_fault.bits.epc,
     instr_access_fault -> io.in.memory_fault.bits.epc,
+    rvv_fault -> io.in.rvv_fault.map(_.bits.mepc).getOrElse(0.U),
     fault -> io.in.pc(first_fault).pc,
   ))
   io.out.bits.mcause := MuxCase(0.U(32.W), Seq(
     load_fault -> 5.U(32.W),
     store_fault -> 7.U(32.W),
     instr_access_fault -> 1.U(32.W),
+    rvv_fault -> io.in.rvv_fault.map(_.bits.mcause).getOrElse(2.U(32.W)),
     (csr_fault && (csr_fault_idx === first_fault)) -> 2.U(32.W),
     (jal_fault && (jal_fault_idx === first_fault)) -> 0.U(32.W),
     (jalr_fault && (jalr_fault_idx === first_fault)) -> 0.U(32.W),
     (bxx_fault && (bxx_fault_idx === first_fault)) -> 0.U(32.W),
     (undef_fault && (undef_fault_idx === first_fault)) -> 2.U(32.W),
-    (rvv_fault && (rvv_fault_idx === first_fault)) -> 2.U(32.W),
+    (rvv_dispatch_fault && (rvv_dispatch_fault_idx === first_fault)) -> 2.U(32.W),
   ))
   io.out.bits.mtval := MuxCase(0.U(32.W), Seq(
     load_fault -> io.in.memory_fault.bits.addr,
     store_fault -> io.in.memory_fault.bits.addr,
     instr_access_fault -> 0.U(32.W),
+    rvv_fault -> io.in.rvv_fault.map(_.bits.mtval).getOrElse(0.U),
     (csr_fault && (csr_fault_idx === first_fault)) -> 0.U,
     (jal_fault && (jal_fault_idx === first_fault)) -> io.in.jal(jal_fault_idx).target,
     (jalr_fault && (jalr_fault_idx === first_fault)) -> (io.in.jalr(jalr_fault_idx).target & "xFFFFFFFE".U),
     (bxx_fault && (bxx_fault_idx === first_fault)) -> 0.U(32.W),
     (undef_fault && (undef_fault_idx === first_fault)) -> io.in.undef(undef_fault_idx).inst,
-    (rvv_fault && (rvv_fault_idx === first_fault)) -> io.in.undef(rvv_fault_idx).inst,
+    (rvv_dispatch_fault && (rvv_dispatch_fault_idx === first_fault)) -> io.in.undef(rvv_dispatch_fault_idx).inst,
   ))
 }
\ No newline at end of file
diff --git a/hdl/chisel/src/kelvin/scalar/SCore.scala b/hdl/chisel/src/kelvin/scalar/SCore.scala
index 999992a..1fe4c1d 100644
--- a/hdl/chisel/src/kelvin/scalar/SCore.scala
+++ b/hdl/chisel/src/kelvin/scalar/SCore.scala
@@ -155,6 +155,13 @@
     lsu.io.fault.valid -> lsu.io.fault,
   ))
   fault_manager.io.in.ibus_fault := io.ibus.fault.valid
+  if (p.enableRvv) {
+    fault_manager.io.in.rvv_fault.get.valid := io.rvvcore.get.trap.valid
+    fault_manager.io.in.rvv_fault.get.bits.mepc := io.rvvcore.get.trap.bits.pc
+    fault_manager.io.in.rvv_fault.get.bits.mcause := 2.U(32.W)
+    fault_manager.io.in.rvv_fault.get.bits.mtval :=
+        io.rvvcore.get.trap.bits.originalEncoding()
+  }
   bru(0).io.fault_manager.get := fault_manager.io.out
 
   // ---------------------------------------------------------------------------
diff --git a/hdl/verilog/rvv/design/RvvCore.sv b/hdl/verilog/rvv/design/RvvCore.sv
index fdd885a..23c6c1b 100644
--- a/hdl/verilog/rvv/design/RvvCore.sv
+++ b/hdl/verilog/rvv/design/RvvCore.sv
@@ -80,7 +80,11 @@
   output logic [$clog2(2*N + 1)-1:0] queue_capacity,
 
   // Writeback from reorder buffer
-  output ROB2RT_t [`NUM_RT_UOP-1:0] rd_rob2rt_o
+  output ROB2RT_t [`NUM_RT_UOP-1:0] rd_rob2rt_o,
+
+  // Trap output
+  output logic trap_valid_o,
+  output RVVInstruction trap_data_o
 );
   logic [N-1:0] frontend_cmd_valid;
   RVVCmd [N-1:0] frontend_cmd_data;
@@ -103,6 +107,8 @@
       .cmd_data_o(frontend_cmd_data),
       .queue_capacity_i(queue_capacity_internal),
       .queue_capacity_o(queue_capacity),
+      .trap_valid_o(trap_valid_o),
+      .trap_data_o(trap_data_o),
       .config_state_valid(config_state_valid),
       .config_state(config_state)
   );
diff --git a/hdl/verilog/rvv/design/RvvFrontEnd.sv b/hdl/verilog/rvv/design/RvvFrontEnd.sv
index d69c93c..3946b56 100644
--- a/hdl/verilog/rvv/design/RvvFrontEnd.sv
+++ b/hdl/verilog/rvv/design/RvvFrontEnd.sv
@@ -51,6 +51,10 @@
   input logic [CAPACITYBITS-1:0] queue_capacity_i,  // Number of elements that can be enqueued
   output logic [CAPACITYBITS-1:0] queue_capacity_o,
 
+  // Trap output.
+  output logic trap_valid_o,
+  output RVVInstruction trap_data_o,
+
   // Config state
   output config_state_valid,
   output RVVConfigState config_state
@@ -135,6 +139,7 @@
           (inst_q[i].opcode == RVV) &&
           (inst_q[i].bits[7:5] == 3'b111)) begin
         if (inst_q[i].bits[24] == 0) begin  // vsetvli
+          inst_config_state[i+1].vill = 0;
           inst_config_state[i+1].vl = reg_read_data_i[2*i];
           inst_config_state[i+1].lmul = RVVLMUL'(inst_q[i].bits[15:13]);
           inst_config_state[i+1].sew = RVVSEW'(inst_q[i].bits[18:16]);
@@ -142,6 +147,7 @@
           inst_config_state[i+1].ma = inst_q[i].bits[20];
           is_setvl[i] = 1;
         end else if (inst_q[i].bits[24:23] == 2'b11) begin  // vsetivli
+          inst_config_state[i+1].vill = 0;
           inst_config_state[i+1].vl =
               {{(`VL_WIDTH - 5){1'b0}}, inst_q[i].bits[12:8]};
           inst_config_state[i+1].lmul = RVVLMUL'(inst_q[i].bits[15:13]);
@@ -150,6 +156,7 @@
           inst_config_state[i+1].ma = inst_q[i].bits[20];
           is_setvl[i] = 1;
         end else if (inst_q[i].bits[24:23] == 2'b10) begin  // vsetvl
+          inst_config_state[i+1].vill = 0;
           inst_config_state[i+1].vl = reg_read_data_i[2*i];
           inst_config_state[i+1].lmul =
               RVVLMUL'(reg_read_data_i[(2*i) + 1][2:0]);
@@ -165,7 +172,7 @@
 
   always_ff @(posedge clk or negedge rstn) begin
     if (!rstn) begin
-      // TODO(derekjchow): check if RVV spec specifies arch state on reset.
+      config_state_q.vill <= 1;  // Config is illegal on reset.
       config_state_q.ma <= 0;
       config_state_q.ta <= 0;
       config_state_q.sew <= SEW8;
@@ -180,13 +187,19 @@
   // Propagate outputs
   logic [N-1:0] unaligned_cmd_valid;
   RVVCmd [N-1:0] unaligned_cmd_data;
+  logic [N-1:0] unaligned_trap_valid;  // Should this instruction trap
+  RVVInstruction [N-1:0] unaligned_trap_data;
   always_comb begin
     for (int i = 0; i < N; i++) begin
-      unaligned_cmd_valid[i] = valid_inst_q[i] && !is_setvl[i];
+      unaligned_trap_valid[i] = valid_inst_q[i] && !is_setvl[i] &&
+          inst_config_state[i].vill;
+      unaligned_trap_data[i] = inst_q[i];
+      unaligned_cmd_valid[i] = valid_inst_q[i] && !is_setvl[i] &&
+          !inst_config_state[i].vill;
 
       // Combine instruction + arch state into command
 `ifdef TB_SUPPORT
-      unaligned_cmd_data[i].insts_pc = inst_q[i].pc;
+      unaligned_cmd_data[i].inst_pc = inst_q[i].pc;
 `endif
       unaligned_cmd_data[i].opcode = inst_q[i].opcode;
       unaligned_cmd_data[i].bits = inst_q[i].bits;
@@ -211,6 +224,27 @@
       .data_out(cmd_data_o)
   );
 
+  // Trap
+  logic trap_occurred;
+  RVVInstruction trap_data;
+  assign trap_valid_o = trap_occurred;
+  assign trap_data_o = trap_data;
+  always_comb begin
+    trap_occurred = (unaligned_trap_valid != 0);
+    // Initialize all trap_data fields to some zero value
+    trap_data.pc = '0;
+    trap_data.bits = '0;
+    trap_data.opcode = RVV;
+
+    for (int i = 0; i < N; i++) begin
+      if (unaligned_trap_valid[i]) begin
+        trap_occurred = 1'b1;
+        trap_data = unaligned_trap_data[i];
+        break;
+      end
+    end
+  end
+
   // Assertions
 `ifndef SYNTHESIS
   logic [N-1:0] lsu_requires_rs1_read;
diff --git a/hdl/verilog/rvv/inc/rvv_backend.svh b/hdl/verilog/rvv/inc/rvv_backend.svh
index 170478b..5388cd0 100755
--- a/hdl/verilog/rvv/inc/rvv_backend.svh
+++ b/hdl/verilog/rvv/inc/rvv_backend.svh
@@ -41,6 +41,7 @@
 
 // The architectural configuration state of the RVV core.
 typedef struct packed {
+  logic                         vill; // This configuration is illegal
   logic [`VL_WIDTH-1:0]         vl;       // Max 128, need one extra bit
   logic [`VSTART_WIDTH-1:0]     vstart;
   logic [`VTYPE_VMA_WIDTH-1:0]  ma;        // 0:inactive element undisturbed, 1:inactive element agnostic
@@ -61,6 +62,7 @@
 
 // A decoded instruction forwarded to the RVVCore from the scalar core.
 typedef struct packed {
+  logic [`PC_WIDTH-1:0] pc;
   RVVOpCode             opcode;   // effectively bits [6:0] from instruction
   logic [24:0]          bits;     // bits [31:7] from instruction
 } RVVInstruction;
diff --git a/tests/cocotb/BUILD b/tests/cocotb/BUILD
index 464c935..5c2fad2 100644
--- a/tests/cocotb/BUILD
+++ b/tests/cocotb/BUILD
@@ -165,6 +165,7 @@
     "core_mini_vmsbf_test",
     "core_mini_vmsof_test",
     "core_mini_vmsif_test",
+    "core_mini_vill_test",
 ]
 # END_TESTCASES_FOR_rvv_assembly_cocotb_test
 
diff --git a/tests/cocotb/rvv/BUILD b/tests/cocotb/rvv/BUILD
index 9320989..bb767b6 100644
--- a/tests/cocotb/rvv/BUILD
+++ b/tests/cocotb/rvv/BUILD
@@ -32,6 +32,9 @@
         "vcsr_test": {
             "srcs": ["vcsr_test.cc"],
         },
+        "vill_test": {
+            "srcs": ["vill_test.cc"],
+        },
         "viota_test": {
             "srcs": ["viota_test.cc"],
         },
@@ -63,6 +66,7 @@
         "rvv_load.elf",
         "vstart_store.elf",
         "vcsr_test.elf",
+        "vill_test.elf",
         "viota_test.elf",
         "vfirst_test.elf",
         "vcpop_test.elf",
diff --git a/tests/cocotb/rvv/vill_test.cc b/tests/cocotb/rvv/vill_test.cc
new file mode 100644
index 0000000..2fbcccd
--- /dev/null
+++ b/tests/cocotb/rvv/vill_test.cc
@@ -0,0 +1,43 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Tests instruction raises fault when vill bit is set.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint32_t faulted __attribute__((section(".data"))) = 0;
+uint32_t mcause __attribute__((section(".data"))) = 0;
+uint32_t mtval __attribute__((section(".data"))) = 0;
+
+// Fault handler to log fault
+extern "C" {
+void kelvin_exception_handler() {
+  faulted = 1;
+  uint32_t local_mcause;
+  asm volatile("csrr %0, mcause" : "=r"(local_mcause));
+  mcause = local_mcause;
+  uint32_t local_mtval;
+  asm volatile("csrr %0, mtval" : "=r"(local_mtval));
+  mtval = local_mtval;
+
+  asm volatile("ebreak");
+  while (1) {}
+}
+}
+
+int main(int argc, char **argv) {
+  asm volatile("vadd.vv v0, v1, v2");
+  return 0;
+}
\ No newline at end of file
diff --git a/tests/cocotb/rvv_assembly_cocotb_test.py b/tests/cocotb/rvv_assembly_cocotb_test.py
index 0c366a0..d66d356 100644
--- a/tests/cocotb/rvv_assembly_cocotb_test.py
+++ b/tests/cocotb/rvv_assembly_cocotb_test.py
@@ -302,4 +302,31 @@
 async def core_mini_vmsif_test(dut):
     """Testbench to test vstart!=0 vmsbf."""
     await test_vstart_not_zero_failure(
-        dut, "kelvin_hw/tests/cocotb/rvv/vmsif_test.elf")
\ No newline at end of file
+        dut, "kelvin_hw/tests/cocotb/rvv/vmsif_test.elf")
+
+
+@cocotb.test()
+async def core_mini_vill_test(dut):
+    core_mini_axi = CoreMiniAxiInterface(dut)
+    await core_mini_axi.init()
+    await core_mini_axi.reset()
+    cocotb.start_soon(core_mini_axi.clock.start())
+    r = runfiles.Create()
+
+    elf_path = r.Rlocation("kelvin_hw/tests/cocotb/rvv/vill_test.elf")
+    if not elf_path:
+        raise ValueError("elf_path must consist a valid path")
+    with open(elf_path, "rb") as f:
+        entry_point = await core_mini_axi.load_elf(f)
+        faulted_addr = core_mini_axi.lookup_symbol(f, "faulted")
+        mcause_addr = core_mini_axi.lookup_symbol(f, "mcause")
+
+    await core_mini_axi.execute_from(entry_point)
+    await core_mini_axi.wait_for_halted()
+
+    faulted_result = (
+        await core_mini_axi.read_word(faulted_addr)).view(np.uint32)[0]
+    assert (faulted_result == 1)
+    mcause_result = (
+        await core_mini_axi.read_word(mcause_addr)).view(np.uint32)[0]
+    assert (mcause_result == 0x2)