diff --git a/tests/verilator_sim/BUILD b/tests/verilator_sim/BUILD
index 5c346cd..2739fc1 100644
--- a/tests/verilator_sim/BUILD
+++ b/tests/verilator_sim/BUILD
@@ -167,7 +167,7 @@
     ],
 )
 
-cc_binary(
+cc_test(
     name = "vldst_tb",
     srcs = [
         "kelvin/vldst_tb.cc",
@@ -181,7 +181,7 @@
     ],
 )
 
-cc_binary(
+cc_test(
     name = "vld_tb",
     srcs = [
         "kelvin/vld_tb.cc",
@@ -221,7 +221,7 @@
     ],
 )
 
-cc_binary(
+cc_test(
     name = "vst_tb",
     srcs = [
         "kelvin/vst_tb.cc",
@@ -248,4 +248,3 @@
         "@accellera_systemc//:systemc",
     ]
 )
-
diff --git a/tests/verilator_sim/kelvin/vld_tb.cc b/tests/verilator_sim/kelvin/vld_tb.cc
index de47606..cfb8db6 100644
--- a/tests/verilator_sim/kelvin/vld_tb.cc
+++ b/tests/verilator_sim/kelvin/vld_tb.cc
@@ -194,21 +194,32 @@
     io_axi_data_bits_id = r.id;
     io_axi_data_bits_data = rdata;
 
-#define IN_READ(idx)                                          \
-    Input(io_in_bits_##idx##_bits_m,                          \
-          io_in_bits_##idx##_bits_op.read().get_word(0),      \
-          io_in_bits_##idx##_bits_f2.read().get_word(0),      \
-          io_in_bits_##idx##_bits_sz.read().get_word(0),      \
-          io_in_bits_##idx##_bits_vd_valid,                   \
-          io_in_bits_##idx##_bits_vd_addr.read().get_word(0), \
-          io_in_bits_##idx##_bits_sv_addr.read().get_word(0), \
-          io_in_bits_##idx##_bits_sv_data.read().get_word(0));
+#define IN_READ(idx)                                           \
+  {                                                            \
+    Input(io_in_bits_##idx##_bits_m,                           \
+          io_in_bits_##idx##_bits_op.read().get_word(0),       \
+          io_in_bits_##idx##_bits_f2.read().get_word(0),       \
+          io_in_bits_##idx##_bits_sz.read().get_word(0),       \
+          io_in_bits_##idx##_bits_vd_valid,                    \
+          io_in_bits_##idx##_bits_vd_addr.read().get_word(0),  \
+          io_in_bits_##idx##_bits_sv_addr.read().get_word(0),  \
+          io_in_bits_##idx##_bits_sv_data.read().get_word(0)); \
+    cmd_count_ += io_in_bits_##idx##_bits_m ? 4 : 1;           \
+  }
 
     if (io_in_valid && io_in_ready) {
-      if (io_in_bits_0_valid) { IN_READ(0); cmd_count_ += io_in_bits_0_bits_m ? 4 : 1; }
-      if (io_in_bits_1_valid) { IN_READ(1); cmd_count_ += io_in_bits_1_bits_m ? 4 : 1; }
-      if (io_in_bits_2_valid) { IN_READ(2); cmd_count_ += io_in_bits_2_bits_m ? 4 : 1; }
-      if (io_in_bits_3_valid) { IN_READ(3); cmd_count_ += io_in_bits_3_bits_m ? 4 : 1; }
+      if (io_in_bits_0_valid) {
+        IN_READ(0);
+      }
+      if (io_in_bits_1_valid) {
+        IN_READ(1);
+      }
+      if (io_in_bits_2_valid) {
+        IN_READ(2);
+      }
+      if (io_in_bits_3_valid) {
+        IN_READ(3);
+      }
     }
 
 #define IN_RAND(idx)                                      \
@@ -217,7 +228,6 @@
     const int op = vld;                                   \
     const bool m = rand_bool();                           \
     const int vd_addr = rand_uint32() & (m ? 60 : 63);    \
-    const bool vd_valid = op == vld;                      \
     const uint8_t f2 = rand_int(0, 7);                    \
     const bool stride = (f2 >> 1) & 1;                    \
     const uint32_t mask = ~((1u << kAlignedLsb) - 1);     \
@@ -252,7 +262,7 @@
       wreg_t ref, dut;
       check(wreg_.read(ref), "wreg empty");
       dut.addr = io_write_addr.read().get_word(0);
-      uint32_t* dst = (uint32_t*)dut.data;
+      uint32_t* dst = reinterpret_cast<uint32_t*>(dut.data);
       for (int i = 0; i < kVector / 32; ++i) {
         dst[i] = io_write_data.read().get_word(i);
       }
@@ -266,7 +276,6 @@
 
     if (io_axi_addr_valid && io_axi_addr_ready) {
       raxi_t ref, dut;
-      int cnt = raxi_.count();
       check(raxi_.read(ref), "axi empty");
       raxi_count_++;
       dut.addr = io_axi_addr_bits_addr.read().get_word(0);
@@ -333,15 +342,8 @@
     const bool length = (f2 >> 0) & 1;
 
     const int sm = m ? 4 : 1;
-    uint32_t offset = 0;
-    uint32_t remain = 0;
-
-    if (stride) {
-      offset = data * sz;
-    } else {
-      offset = VLENB;
-    }
-
+    uint32_t offset = stride ? data * sz : VLENB;
+    uint32_t remain;
     if (length) {
       switch (sz) {
         case 1:
@@ -364,16 +366,20 @@
     for (int i = 0; i < (m ? 4 : 1); ++i) {
       wreg_t w;
       w.addr = vd_addr;
-      uint32_t* dst = (uint32_t*)w.data;
+      uint32_t* dst = reinterpret_cast<uint32_t*>(w.data);
       for (int i = 0; i < kVector / 32; ++i) {
         dst[i] = rand_uint32();
       }
 
+      // Turn off the warning for the debug code.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
       const int n = RAxi(0, op, offset, stride, w, addr, remain);
+#pragma GCC diagnostic pop
 
 #if 0  // Do not use
       // Write register lane zeroing.
-      uint8_t* src = (uint8_t*)w.data;
+      uint8_t* src = reinterpret_cast<uint8_t*>(w.data);
       for (int i = 0; i < kVector / 8; ++i) {
         if (i < n) continue;
         src[i] = 0;
@@ -395,7 +401,7 @@
     const uint32_t lsb_addr = addr & ((kVector / 8) - 1);
 
     const uint8_t* src = (const uint8_t*)w.data;
-    uint8_t* dst = (uint8_t*)r.data;
+    uint8_t* dst = reinterpret_cast<uint8_t*>(r.data);
     for (int i = 0; i < kVector / 8; ++i) {
       const int idx = (i + lsb_addr) % (kVector / 8);
       dst[idx] = src[i];
diff --git a/tests/verilator_sim/kelvin/vldst_tb.cc b/tests/verilator_sim/kelvin/vldst_tb.cc
index 7863d39..28d8fa3 100644
--- a/tests/verilator_sim/kelvin/vldst_tb.cc
+++ b/tests/verilator_sim/kelvin/vldst_tb.cc
@@ -201,8 +201,6 @@
     const int vd_addr = rand_uint32() & (m ? 60 : 63);                     \
     const int vs_addr = rand_uint32() & (m ? 60 : 63);                     \
     const int vs_tag = rand_uint32();                                      \
-    const bool vd_valid = op == vld;                                       \
-    const bool vs_valid = op == vst || op == vstq;                         \
     uint32_t sv_addr = std::min(rand_uint32() & ~0x80000000, 0x7fffff00u); \
     uint32_t sv_data = (rand_uint32() >> rand_int(0, 31));                 \
     sv_data = std::min(((0x80000000u - sv_addr) / 64), sv_data);           \
@@ -281,7 +279,7 @@
       wreg_t ref, dut;
       check(wreg_.read(ref), "wreg empty");
       dut.addr = io_write_addr.read().get_word(0);
-      uint32_t* dst = (uint32_t*)dut.data;
+      uint32_t* dst = reinterpret_cast<uint32_t*>(dut.data);
       for (int i = 0; i < kVector / 32; ++i) {
         dst[i] = io_write_data.read().get_word(i);
       }
@@ -301,7 +299,7 @@
       dut.size = io_dbus_size.read().get_word(0);
       dut.last = io_last;
       dut.write = io_dbus_write;
-      uint32_t* dst = (uint32_t*)dut.wdata;
+      uint32_t* dst = reinterpret_cast<uint32_t*>(dut.wdata);
       for (int i = 0; i < kVector / 32; ++i) {
         dst[i] = io_dbus_wdata.read().get_word(i);
       }
@@ -409,8 +407,6 @@
     assert(!(op == vstq && (vd_valid || !vs_valid)));
     assert(!(op == vld && (!vd_valid || vs_valid)));
 
-    rreg_t r;
-
     const bool stride = (f2 >> 1) & 1;
     const bool length = (f2 >> 0) & 1;
 
@@ -448,7 +444,7 @@
       if (vs_valid) {
         r.addr = vs_addr;
         r.tag = vs_tag >> (vs_addr & 3);
-        uint32_t* dst = (uint32_t*)r.data;
+        uint32_t* dst = reinterpret_cast<uint32_t*>(r.data);
         for (int i = 0; i < kVector / 32; ++i) {
           dst[i] = rand_uint32();
         }
@@ -457,7 +453,7 @@
       wreg_t w;
       if (vd_valid) {
         w.addr = vd_addr;
-        uint32_t* dst = (uint32_t*)w.data;
+        uint32_t* dst = reinterpret_cast<uint32_t*>(w.data);
         for (int i = 0; i < kVector / 32; ++i) {
           dst[i] = rand_uint32();
         }
@@ -479,7 +475,7 @@
 
       // Write register lane zeroing.
       if (vd_valid) {
-        uint8_t* src = (uint8_t*)w.data;
+        uint8_t* src = reinterpret_cast<uint8_t*>(w.data);
         for (int i = 0; i < kVector / 8; ++i) {
           if (i < n) continue;
           src[i] = 0;
@@ -510,16 +506,16 @@
 
     if (d.write) {
       const uint8_t* src = (const uint8_t*)r.data;
-      uint8_t* dst = (uint8_t*)d.wdata;
+      uint8_t* dst = reinterpret_cast<uint8_t*>(d.wdata);
       for (int i = 0; i < kVector / 8; ++i) {
         const int idx0 = (i + lsb_addr) % (kVector / 8);
         const int idx1 = (i + lsb_ashf) % (kVector / 8);
-        d.wmask[idx0] = i < d.size;
+        d.wmask[idx0] = static_cast<uint8_t>(static_cast<uint32_t>(i) < d.size);
         dst[idx1] = src[i];
       }
     } else {
       const uint8_t* src = (const uint8_t*)w.data;
-      uint8_t* dst = (uint8_t*)d.rdata;
+      uint8_t* dst = reinterpret_cast<uint8_t*>(d.rdata);
       for (int i = 0; i < kVector / 8; ++i) {
         const int idx = (i + lsb_addr) % (kVector / 8);
         dst[idx] = src[i];
diff --git a/tests/verilator_sim/kelvin/vst_tb.cc b/tests/verilator_sim/kelvin/vst_tb.cc
index 053cd01..9c5aafb 100644
--- a/tests/verilator_sim/kelvin/vst_tb.cc
+++ b/tests/verilator_sim/kelvin/vst_tb.cc
@@ -213,22 +213,35 @@
     io_axi_resp_bits_resp = 0;
     io_axi_resp_bits_id = w.id;
 
-#define IN_READ(idx)                                          \
-    Input(io_in_bits_##idx##_bits_m,                          \
-          io_in_bits_##idx##_bits_op.read().get_word(0),      \
-          io_in_bits_##idx##_bits_f2.read().get_word(0),      \
-          io_in_bits_##idx##_bits_sz.read().get_word(0),      \
-          io_in_bits_##idx##_bits_vs_valid,                   \
-          io_in_bits_##idx##_bits_vs_addr.read().get_word(0), \
-          io_in_bits_##idx##_bits_vs_tag.read().get_word(0),  \
-          io_in_bits_##idx##_bits_sv_addr.read().get_word(0), \
-          io_in_bits_##idx##_bits_sv_data.read().get_word(0));
+#define IN_READ(idx)                                                     \
+  {                                                                      \
+    Input(io_in_bits_##idx##_bits_m,                                     \
+          io_in_bits_##idx##_bits_op.read().get_word(0),                 \
+          io_in_bits_##idx##_bits_f2.read().get_word(0),                 \
+          io_in_bits_##idx##_bits_sz.read().get_word(0),                 \
+          io_in_bits_##idx##_bits_vs_valid,                              \
+          io_in_bits_##idx##_bits_vs_addr.read().get_word(0),            \
+          io_in_bits_##idx##_bits_vs_tag.read().get_word(0),             \
+          io_in_bits_##idx##_bits_sv_addr.read().get_word(0),            \
+          io_in_bits_##idx##_bits_sv_data.read().get_word(0));           \
+    cmd_count_ +=                                                        \
+        (io_in_bits_##idx##_bits_m ? 4 : 1) *                            \
+        (io_in_bits_##idx##_bits_op.read().get_word(0) == vstq ? 4 : 1); \
+  }
 
     if (io_in_valid && io_in_ready) {
-      if (io_in_bits_0_valid) { IN_READ(0); cmd_count_ += (io_in_bits_0_bits_m ? 4 : 1) * (io_in_bits_0_bits_op.read().get_word(0) == vstq ? 4 : 1); }
-      if (io_in_bits_1_valid) { IN_READ(1); cmd_count_ += (io_in_bits_1_bits_m ? 4 : 1) * (io_in_bits_1_bits_op.read().get_word(0) == vstq ? 4 : 1); }
-      if (io_in_bits_2_valid) { IN_READ(2); cmd_count_ += (io_in_bits_2_bits_m ? 4 : 1) * (io_in_bits_2_bits_op.read().get_word(0) == vstq ? 4 : 1); }
-      if (io_in_bits_3_valid) { IN_READ(3); cmd_count_ += (io_in_bits_3_bits_m ? 4 : 1) * (io_in_bits_3_bits_op.read().get_word(0) == vstq ? 4 : 1); }
+      if (io_in_bits_0_valid) {
+        IN_READ(0);
+      }
+      if (io_in_bits_1_valid) {
+        IN_READ(1);
+      }
+      if (io_in_bits_2_valid) {
+        IN_READ(2);
+      }
+      if (io_in_bits_3_valid) {
+        IN_READ(3);
+      }
     }
 
 #define IN_RAND(idx)                                                    \
@@ -238,11 +251,10 @@
     const bool m = rand_bool();                                         \
     const int vs_addr = rand_uint32() & (m ? 60 : 63);                  \
     const int vs_tag = rand_uint32();                                   \
-    const bool vs_valid = op == vst || op == vstq;                      \
     const uint8_t f2 = rand_int(0, 7);                                  \
     const bool stride = (f2 >> 1) & 1;                                  \
-    const uint32_t mask = ~(op == vst ? (1u << kAlignedLsb) - 1 :       \
-                          (1u << (kAlignedLsb - 2)) - 1);               \
+    const uint32_t mask = ~(op == vst ? (1u << kAlignedLsb) - 1         \
+                                      : (1u << (kAlignedLsb - 2)) - 1); \
     uint32_t addr = (rand_uint32() & mask) | 0x80000000u;               \
     uint32_t data = rand_uint32() >> rand_int(0, 32);                   \
     data = std::min(((0xffffffffu - addr) / 64), data);                 \
@@ -293,7 +305,7 @@
       }
 
       sc_bv<kVector> rbits;
-      const uint32_t* src = (const uint32_t*)r.data;
+      const uint32_t* src = reinterpret_cast<const uint32_t*>(r.data);
       for (int i = 0; i < kVector / 32; ++i) {
         rbits.set_word(i, src[i]);
       }
@@ -306,7 +318,7 @@
       waxi_count_++;
       dut.addr = io_axi_addr_bits_addr.read().get_word(0);
       dut.id   = io_axi_addr_bits_id.read().get_word(0);
-      uint32_t* dst = (uint32_t*)dut.data;
+      uint32_t* dst = reinterpret_cast<uint32_t*>(dut.data);
       for (int i = 0; i < kVector / 32; ++i) {
         dst[i] = io_axi_data_bits_data.read().get_word(i);
       }
@@ -325,7 +337,7 @@
     }
   }
 
-private:
+ private:
   struct waxi_t {
     uint32_t addr;
     uint32_t id;
@@ -371,8 +383,6 @@
 
   void Input(bool m, uint8_t op, uint8_t f2, uint8_t sz, bool vs_valid,
              uint8_t vs_addr, uint8_t vs_tag, uint32_t addr, uint32_t data) {
-    rreg_t r;
-
     const bool stride = (f2 >> 1) & 1;
     const bool length = (f2 >> 0) & 1;
 
@@ -410,7 +420,7 @@
       if (vs_valid) {
         r.addr = vs_addr;
         r.tag = vs_tag >> (vs_addr & 3);
-        uint32_t* dst = (uint32_t*)r.data;
+        uint32_t* dst = reinterpret_cast<uint32_t*>(r.data);
         for (int i = 0; i < kVector / 32; ++i) {
           dst[i] = rand_uint32();
         }
@@ -441,19 +451,19 @@
     const uint32_t lsb_addr = addr & ((kVector / 8) - 1);
     const uint32_t vstq_quad = step;
     const uint32_t vstq_offset = vstq_quad << (kAlignedLsb - 2);
-    const uint8_t* src = (const uint8_t*)r.data;
+    const uint8_t* src = reinterpret_cast<const uint8_t*>(r.data);
 
     waxi_t w;
     w.addr = addr & ~0x80000000 & ~(VLENB - 1);  // align to line
     w.size = std::min(remain, is_vstq ? VLENB / 4 : VLENB);
     w.id   = r.addr;
 
-    uint8_t* dst = (uint8_t*)w.data;
+    uint8_t* dst = reinterpret_cast<uint8_t*>(w.data);
 
     for (int i = 0; i < kVector / 8; ++i) {
       const int idx0 = (i + lsb_addr) % (kVector / 8);
       const int idx1 = is_vstq ? (i % (kVector / 8 / 4)) + vstq_offset : i;
-      w.strb[idx0] = i < w.size;
+      w.strb[idx0] = static_cast<uint8_t>(static_cast<uint32_t>(i) < w.size);
       dst[i] = src[idx1];
     }
 
