diff --git a/sim/kelvin_vector_instructions.cc b/sim/kelvin_vector_instructions.cc
index 5470b24..a780422 100644
--- a/sim/kelvin_vector_instructions.cc
+++ b/sim/kelvin_vector_instructions.cc
@@ -150,9 +150,15 @@
 template <typename T>
 void KelvinVAdd(bool scalar, bool strip_mine, Instruction *inst) {
   // Return vs1 + vs2.
-  KelvinBinaryVectorOp(
-      inst, scalar, strip_mine,
-      std::function<T(T, T)>([](T vs1, T vs2) -> T { return vs1 + vs2; }));
+  KelvinBinaryVectorOp(inst, scalar, strip_mine,
+                       std::function<T(T, T)>([](T vs1, T vs2) -> T {
+                         using UT = typename std::make_unsigned<T>::type;
+                         // Cast to unsigned type before the operation to avoid
+                         // undefined overflow behavior in intx_t.
+                         UT uvs1 = static_cast<UT>(vs1);
+                         UT uvs2 = static_cast<UT>(vs2);
+                         return static_cast<T>(uvs1 + uvs2);
+                       }));
 }
 template void KelvinVAdd<int8_t>(bool, bool, Instruction *);
 template void KelvinVAdd<int16_t>(bool, bool, Instruction *);
@@ -161,9 +167,15 @@
 template <typename T>
 void KelvinVSub(bool scalar, bool strip_mine, Instruction *inst) {
   // Return vs1 - vs2.
-  KelvinBinaryVectorOp(
-      inst, scalar, strip_mine,
-      std::function<T(T, T)>([](T vs1, T vs2) -> T { return vs1 - vs2; }));
+  KelvinBinaryVectorOp(inst, scalar, strip_mine,
+                       std::function<T(T, T)>([](T vs1, T vs2) -> T {
+                         using UT = typename std::make_unsigned<T>::type;
+                         // Cast to unsigned type before the operation to avoid
+                         // undefined overflow behavior in intx_t.
+                         UT uvs1 = static_cast<UT>(vs1);
+                         UT uvs2 = static_cast<UT>(vs2);
+                         return static_cast<T>(uvs1 - uvs2);
+                       }));
 }
 template void KelvinVSub<int8_t>(bool, bool, Instruction *);
 template void KelvinVSub<int16_t>(bool, bool, Instruction *);
@@ -172,9 +184,15 @@
 template <typename T>
 void KelvinVRSub(bool scalar, bool strip_mine, Instruction *inst) {
   // Return vs2 - vs1.
-  KelvinBinaryVectorOp(
-      inst, scalar, strip_mine,
-      std::function<T(T, T)>([](T vs1, T vs2) -> T { return vs2 - vs1; }));
+  KelvinBinaryVectorOp(inst, scalar, strip_mine,
+                       std::function<T(T, T)>([](T vs1, T vs2) -> T {
+                         using UT = typename std::make_unsigned<T>::type;
+                         // Cast to unsigned type before the operation to avoid
+                         // undefined overflow behavior in intx_t.
+                         UT uvs1 = static_cast<UT>(vs1);
+                         UT uvs2 = static_cast<UT>(vs2);
+                         return static_cast<T>(uvs2 - uvs1);
+                       }));
 }
 template void KelvinVRSub<int8_t>(bool, bool, Instruction *);
 template void KelvinVRSub<int16_t>(bool, bool, Instruction *);
@@ -317,8 +335,13 @@
   // Return the summation of vd, vs1, and vs2.
   KelvinBinaryVectorOp<false /* halftype */, false /* widen_dst */, T, T, T, T>(
       inst, scalar, strip_mine,
-      std::function<T(T, T, T)>(
-          [](T vd, T vs1, T vs2) -> T { return vd + vs1 + vs2; }));
+      std::function<T(T, T, T)>([](T vd, T vs1, T vs2) -> T {
+        using UT = typename std::make_unsigned<T>::type;
+        UT uvs1 = static_cast<UT>(vs1);
+        UT uvs2 = static_cast<UT>(vs2);
+        UT uvd = static_cast<UT>(vd);
+        return static_cast<T>(uvd + uvs1 + uvs2);
+      }));
 }
 template void KelvinVAdd3<int8_t>(bool, bool, Instruction *);
 template void KelvinVAdd3<int16_t>(bool, bool, Instruction *);
@@ -437,10 +460,12 @@
 template <typename Td, typename Ts2>
 void KelvinVAcc(bool scalar, bool strip_mine, Instruction *inst) {
   // Accumulates operands with widening.
-  KelvinBinaryVectorOp(inst, scalar, strip_mine,
-                       std::function<Td(Td, Ts2)>([](Td vs1, Ts2 vs2) -> Td {
-                         return vs1 + static_cast<Td>(vs2);
-                       }));
+  KelvinBinaryVectorOp(
+      inst, scalar, strip_mine,
+      std::function<Td(Td, Ts2)>([](Td vs1, Ts2 vs2) -> Td {
+        using UTd = typename std::make_unsigned<Td>::type;
+        return static_cast<Td>(static_cast<UTd>(vs1) + static_cast<UTd>(vs2));
+      }));
 }
 template void KelvinVAcc<int16_t, int8_t>(bool, bool, Instruction *);
 template void KelvinVAcc<int32_t, int16_t>(bool, bool, Instruction *);
@@ -712,8 +737,11 @@
     } else if (shamt > 0) {
       s = (static_cast<int64_t>(vs1) + (round ? (1ll << (shamt - 1)) : 0)) >>
           shamt;
-    } else {
-      s = static_cast<int64_t>(vs1) << (-shamt);
+    } else {  // shamt < 0
+      using UT = typename std::make_unsigned<T>::type;
+      UT ushamt = static_cast<UT>(-shamt <= n ? -shamt : n);
+      CHECK(ushamt >= 0 && ushamt <= n);
+      s = static_cast<int64_t>(static_cast<uint64_t>(vs1) << ushamt);
     }
     T neg_max = std::numeric_limits<T>::min();
     T pos_max = std::numeric_limits<T>::max();
@@ -724,6 +752,7 @@
     return s;
   } else {
     constexpr int n = sizeof(T) * 8;
+    // Shift can be positive/negative.
     int shamt = static_cast<typename std::make_signed<T>::type>(vs2);
     uint64_t s = vs1;
     if (!vs1) {
@@ -731,10 +760,12 @@
     } else if (shamt > n) {
       s = 0;
     } else if (shamt > 0) {
-      s = (static_cast<int64_t>(vs1) + (round ? (1ull << (shamt - 1)) : 0)) >>
+      s = (static_cast<uint64_t>(vs1) + (round ? (1ull << (shamt - 1)) : 0)) >>
           shamt;
     } else {
-      s = static_cast<int64_t>(vs1) << (-shamt);
+      using UT = typename std::make_unsigned<T>::type;
+      UT ushamt = static_cast<UT>(-shamt <= n ? -shamt : n);
+      s = static_cast<uint64_t>(vs1) << (ushamt);
     }
     T pos_max = std::numeric_limits<T>::max();
     bool pos_sat = vs1 && (shamt < -n || s > pos_max);
@@ -869,9 +900,16 @@
 // Multiplication of vector elements.
 template <typename T>
 void KelvinVMul(bool scalar, bool strip_mine, Instruction *inst) {
-  KelvinBinaryVectorOp(
-      inst, scalar, strip_mine,
-      std::function<T(T, T)>([](T vs1, T vs2) -> T { return vs1 * vs2; }));
+  KelvinBinaryVectorOp(inst, scalar, strip_mine,
+                       std::function<T(T, T)>([](T vs1, T vs2) -> T {
+                         if (std::is_signed<T>::value) {
+                           return static_cast<T>(static_cast<int64_t>(vs1) *
+                                                 static_cast<int64_t>(vs2));
+                         } else {
+                           return static_cast<T>(static_cast<uint64_t>(vs1) *
+                                                 static_cast<uint64_t>(vs2));
+                         }
+                       }));
 }
 template void KelvinVMul<int8_t>(bool, bool, Instruction *);
 template void KelvinVMul<int16_t>(bool, bool, Instruction *);
diff --git a/sim/test/kelvin_vector_instructions_test.cc b/sim/test/kelvin_vector_instructions_test.cc
index 6aede99..8b5ed9c 100644
--- a/sim/test/kelvin_vector_instructions_test.cc
+++ b/sim/test/kelvin_vector_instructions_test.cc
@@ -321,7 +321,11 @@
 // Vector add.
 template <typename Vd, typename Vs1, typename Vs2>
 struct VAddOp {
-  static Vd Op(Vs1 vs1, Vs2 vs2) { return vs1 + vs2; }
+  static Vd Op(Vs1 vs1, Vs2 vs2) {
+    int64_t vs1_ext = static_cast<int64_t>(vs1);
+    int64_t vs2_ext = static_cast<int64_t>(vs2);
+    return static_cast<Vd>(vs1_ext + vs2_ext);
+  }
   static void KelvinOp(bool scalar, bool strip_mine, Instruction *inst) {
     KelvinVAdd<Vd>(scalar, strip_mine, inst);
   }
@@ -333,7 +337,11 @@
 // Vector subtract.
 template <typename Vd, typename Vs1, typename Vs2>
 struct VSubOp {
-  static Vd Op(Vs1 vs1, Vs2 vs2) { return vs1 - vs2; }
+  static Vd Op(Vs1 vs1, Vs2 vs2) {
+    int64_t vs1_ext = static_cast<int64_t>(vs1);
+    int64_t vs2_ext = static_cast<int64_t>(vs2);
+    return static_cast<Vd>(vs1_ext - vs2_ext);
+  }
   static void KelvinOp(bool scalar, bool strip_mine, Instruction *inst) {
     KelvinVSub<Vd>(scalar, strip_mine, inst);
   }
@@ -345,7 +353,11 @@
 // Vector reverse subtract.
 template <typename Vd, typename Vs1, typename Vs2>
 struct VRSubOp {
-  static Vd Op(Vs1 vs1, Vs2 vs2) { return vs2 - vs1; }
+  static Vd Op(Vs1 vs1, Vs2 vs2) {
+    int64_t vs1_ext = static_cast<int64_t>(vs1);
+    int64_t vs2_ext = static_cast<int64_t>(vs2);
+    return static_cast<Vd>(vs2_ext - vs1_ext);
+  }
   static void KelvinOp(bool scalar, bool strip_mine, Instruction *inst) {
     KelvinVRSub<Vd>(scalar, strip_mine, inst);
   }
@@ -507,7 +519,12 @@
 // Vector add3.
 template <typename Vd, typename Vs1, typename Vs2>
 struct VAdd3Op {
-  static Vd Op(Vd vd, Vs1 vs1, Vs2 vs2) { return vd + vs1 + vs2; }
+  static Vd Op(Vd vd, Vs1 vs1, Vs2 vs2) {
+    int64_t vs1_ext = static_cast<int64_t>(vs1);
+    int64_t vs2_ext = static_cast<int64_t>(vs2);
+    int64_t vd_ext = static_cast<int64_t>(vd);
+    return static_cast<Vd>(vd_ext + vs1_ext + vs2_ext);
+  }
   static void KelvinOp(bool scalar, bool strip_mine, Instruction *inst) {
     KelvinVAdd3<Vd>(scalar, strip_mine, inst);
   }
@@ -647,7 +664,11 @@
 // Vector accumulate with widening.
 template <typename Vd, typename Vs1, typename Vs2>
 struct VAccOp {
-  static Vd Op(Vd vs1, Vs2 vs2) { return vs1 + static_cast<Vd>(vs2); }
+  static Vd Op(Vd vs1, Vs2 vs2) {
+    int64_t vs1_ext = static_cast<int64_t>(vs1);
+    int64_t vs2_ext = static_cast<int64_t>(vs2);
+    return static_cast<Vd>(vs1_ext + vs2_ext);
+  }
   static void KelvinOp(bool scalar, bool strip_mine, Instruction *inst) {
     KelvinVAcc<Vd, Vs2>(scalar, strip_mine, inst);
   }
@@ -999,8 +1020,9 @@
       } else if (shamt > 0) {
         s = (static_cast<int64_t>(vs1) + (round ? (1ll << (shamt - 1)) : 0)) >>
             shamt;
-      } else {
-        s = static_cast<int64_t>(vs1) << (-shamt);
+      } else {  // shamt < 0
+        uint32_t ushamt = static_cast<uint32_t>(-shamt <= n ? -shamt : n);
+        s = static_cast<int64_t>(static_cast<uint64_t>(vs1) << ushamt);
       }
       int64_t neg_max = (-1ull) << (n - 1);
       int64_t pos_max = (1ll << (n - 1)) - 1;
@@ -1025,7 +1047,9 @@
              (round ? (1ull << (shamt - 1)) : 0)) >>
             shamt;
       } else {
-        s = static_cast<uint64_t>(vs1) << (-shamt);
+        using UT = typename std::make_unsigned<Vd>::type;
+        UT ushamt = static_cast<UT>(-shamt <= n ? -shamt : n);
+        s = static_cast<uint64_t>(vs1) << (ushamt);
       }
       uint64_t pos_max = (1ull << n) - 1;
       bool pos_sat = vs1 && (shamt < -n || s >= (1ull << n));
@@ -1207,7 +1231,15 @@
 // Vector elements multiplication.
 template <typename Vd, typename Vs1, typename Vs2>
 struct VMulOp {
-  static Vd Op(Vs1 vs1, Vs2 vs2) { return vs1 * vs2; }
+  static Vd Op(Vs1 vs1, Vs2 vs2) {
+    if (std::is_signed<Vd>::value) {
+      return static_cast<Vd>(static_cast<int64_t>(vs1) *
+                             static_cast<int64_t>(vs2));
+    } else {
+      return static_cast<Vd>(static_cast<uint64_t>(vs1) *
+                             static_cast<uint64_t>(vs2));
+    }
+  }
   static void KelvinOp(bool scalar, bool strip_mine, Instruction *inst) {
     KelvinVMul<Vd>(scalar, strip_mine, inst);
   }
diff --git a/sim/test/kelvin_vector_instructions_test_base.h b/sim/test/kelvin_vector_instructions_test_base.h
index 7335a28..6f3170e 100644
--- a/sim/test/kelvin_vector_instructions_test_base.h
+++ b/sim/test/kelvin_vector_instructions_test_base.h
@@ -174,8 +174,13 @@
     int vs1_size = vector_length_in_bytes / sizeof(Vs1);
     const size_t vs1_regs_count = num_ops * src1_widen_factor;
     std::vector<Vs1> vs1_value(vs1_size * vs1_regs_count);
+    // Use the first 4 elements to check the min/max boundary behavior
+    vs1_value[0] = std::numeric_limits<Vs1>::lowest();
+    vs1_value[1] = std::numeric_limits<Vs1>::lowest();
+    vs1_value[2] = std::numeric_limits<Vs1>::max();
+    vs1_value[3] = std::numeric_limits<Vs1>::max();
     auto vs1_span = absl::Span<Vs1>(vs1_value);
-    FillArrayWithRandomValues<Vs1>(vs1_span);
+    FillArrayWithRandomValues<Vs1>(vs1_span.subspan(4, vs1_span.size() - 4));
     for (int i = 0; i < vs1_regs_count; i++) {
       auto vs1_name = absl::StrCat("v", kVs1 + i);
       SetVectorRegisterValues<Vs1>(
@@ -196,8 +201,14 @@
           static_cast<typename mpact::sim::riscv::SameSignedType<
               RV32Register::ValueType, Ts2>::type>(rs2_reg_value));
     } else if (!halftype_op) {
+      // Use the value slightly greater than min so VShift won't complain
+      // -shamt.
+      vs2_value[0] = std::numeric_limits<Ts2>::lowest() + 1;
+      vs2_value[1] = std::numeric_limits<Ts2>::max();
+      vs2_value[2] = std::numeric_limits<Ts2>::lowest() + 1;
+      vs2_value[3] = std::numeric_limits<Ts2>::max();
       auto vs2_span = absl::Span<Ts2>(vs2_value);
-      FillArrayWithRandomValues<Ts2>(vs2_span);
+      FillArrayWithRandomValues<Ts2>(vs2_span.subspan(4, vs2_span.size() - 4));
       for (int i = 0; i < num_ops; i++) {
         auto vs2_name = absl::StrCat("v", kVs2 + i);
         SetVectorRegisterValues<Ts2>(
@@ -209,8 +220,12 @@
     const size_t vd_size = vector_length_in_bytes / sizeof(Vd);
     const size_t dest_regs_count = num_ops * dest_regs_per_op;
     std::vector<Vd> vd_value(vd_size * dest_regs_count);
+    vd_value[0] = std::numeric_limits<Vd>::lowest();
+    vd_value[1] = std::numeric_limits<Vd>::max();
+    vd_value[2] = std::numeric_limits<Vd>::lowest();
+    vd_value[3] = std::numeric_limits<Vd>::max();
     auto vd_span = absl::Span<Vd>(vd_value);
-    FillArrayWithRandomValues<Vd>(vd_span);
+    FillArrayWithRandomValues<Vd>(vd_span.subspan(4, vd_span.size() - 4));
     for (int i = 0; i < dest_regs_count; i++) {
       auto vd_name = absl::StrCat("v", kVd + i);
       SetVectorRegisterValues<Vd>(
