Add valu_tb SystemC test Add vector op reference implementation and compare the result with chisel Change-Id: Ibbe9a7c2ad0c6752e3b4379c1d141e0bbae376ad
diff --git a/hdl/chisel/BUILD b/hdl/chisel/BUILD index cfad4b6..d260a33 100644 --- a/hdl/chisel/BUILD +++ b/hdl/chisel/BUILD
@@ -20,7 +20,7 @@ deps = [ ":common", ":kelvin", - ] + ], ) chisel_cc_library( @@ -32,7 +32,7 @@ "//hdl/verilog:clock_gate", "//hdl/verilog:sram_1rw_256x256", "//hdl/verilog:sram_1rw_256x288", - ] + ], ) chisel_cc_library( @@ -70,6 +70,13 @@ ) chisel_cc_library( + name = "valu_cc_library", + chisel_lib = ":kelvin", + emit_class = "kelvin.EmitVAlu", + module_name = "VAlu", +) + +chisel_cc_library( name = "vcmdq_cc_library", chisel_lib = ":kelvin", emit_class = "kelvin.EmitVCmdq",
diff --git a/tests/verilator_sim/BUILD b/tests/verilator_sim/BUILD index b0216ce..216b7a7 100644 --- a/tests/verilator_sim/BUILD +++ b/tests/verilator_sim/BUILD
@@ -88,7 +88,21 @@ ], ) -# TODO(derekjchow): Add valu and valuint test benches +cc_test( + name = "valu_tb", + size = "large", + srcs = [ + "kelvin/alu_ref.h", + "kelvin/valu.h", + "kelvin/valu_tb.cc", + ], + deps = [ + ":kelvin_if", + ":sim_libs", + ":vencodeop", + "//hdl/chisel:valu_cc_library", + ], +) cc_library( name = "vdecode",
diff --git a/tests/verilator_sim/kelvin/alu_ref.h b/tests/verilator_sim/kelvin/alu_ref.h new file mode 100644 index 0000000..82d950f --- /dev/null +++ b/tests/verilator_sim/kelvin/alu_ref.h
@@ -0,0 +1,438 @@ +// Copyright 2023 Google LLC +// +// Reference alu ops implementation +#ifndef TESTS_VERILATOR_SIM_KELVIN_ALU_REF_H_ +#define TESTS_VERILATOR_SIM_KELVIN_ALU_REF_H_ + +#include <stdint.h> + +#include <algorithm> +#include <limits> +#include <type_traits> +#include <utility> + +// ----------------------------------------------------------------------------- +// ALU. + +template <typename T> +typename std::make_unsigned<T>::type absd(T a, T b) { + using UT = typename std::make_unsigned<T>::type; + UT ua = static_cast<UT>(a); + UT ub = static_cast<UT>(b); + return a > b ? ua - ub : ub - ua; +} + +template <typename Td, typename Ts> +Td acc(Td a, Ts b) { + assert(sizeof(Td) > sizeof(Ts)); + using UTd = typename std::make_unsigned<Td>::type; + return static_cast<Td>(static_cast<UTd>(a) + static_cast<UTd>(b)); +} + +template <typename T> +T add(T a, T b) { + using UT = typename std::make_unsigned<T>::type; + return static_cast<T>(static_cast<UT>(a) + static_cast<UT>(b)); +} + +template <typename T> +T add3(T a, T b, T c) { + using UT = typename std::make_unsigned<T>::type; + return static_cast<T>(static_cast<UT>(a) + static_cast<UT>(b) + + static_cast<UT>(c)); +} + +// Saturated addition. +template <typename T> +T adds(T a, T b) { + if (std::is_signed<T>::value) { + int64_t m = static_cast<int64_t>(a) + static_cast<int64_t>(b); + m = std::min<int64_t>(std::max<int64_t>(std::numeric_limits<T>::min(), m), + std::numeric_limits<T>::max()); + return m; + } + uint64_t m = static_cast<uint64_t>(a) + static_cast<uint64_t>(b); + m = std::min<uint64_t>(std::numeric_limits<T>::max(), m); + return m; +} + +// Widening add. +template <typename T> +uint32_t addw(T a, T b) { + if (std::is_signed<T>::value) { + return int64_t(a) + int64_t(b); + } + return uint64_t(a) + uint64_t(b); +} + +template <typename T> +T cmp_eq(T a, T b) { + return a == b; +} + +template <typename T> +T cmp_ne(T a, T b) { + return a != b; +} + +template <typename T> +T cmp_lt(T a, T b) { + return a < b; +} + +template <typename T> +T cmp_le(T a, T b) { + return a <= b; +} + +template <typename T> +T cmp_gt(T a, T b) { + return a > b; +} + +template <typename T> +T cmp_ge(T a, T b) { + return a >= b; +} + +template <typename T> +T dup(T b) { + return b; +} + +template <typename T> +T log_and(T a, T b) { + return a & b; +} + +template <typename T> +int log_clb(T x) { + constexpr int n = sizeof(T) * 8; + if (x & (1u << (n - 1))) { + x = ~x; + } + for (int count = 0; count < n; count++) { + if ((x << count) >> (n - 1)) { + return count; + } + } + return n; +} + +template <typename T> +int log_clz(const T x) { + constexpr int n = sizeof(T) * 8; + for (int count = 0; count < n; count++) { + if ((x << count) >> (n - 1)) { + return count; + } + } + return n; +} + +template <typename T> +int log_cpop(T a) { + constexpr int n = sizeof(T) * 8; + int count = 0; + for (int i = 0; i < n; i++) { + if (a & (1 << i)) { + count++; + } + } + return count; +} + +template <typename T> +T log_not(T a) { + return ~a; +} + +template <typename T> +T log_or(T a, T b) { + return a | b; +} + +template <typename T> +T log_rev(T a, T b) { + T count = b & 0b11111; + if (count & 1) a = ((a & 0x55555555) << 1) | ((a & 0xAAAAAAAA) >> 1); + if (count & 2) a = ((a & 0x33333333) << 2) | ((a & 0xCCCCCCCC) >> 2); + if (count & 4) a = ((a & 0x0F0F0F0F) << 4) | ((a & 0xF0F0F0F0) >> 4); + if (sizeof(T) == 1) return a; + if (count & 8) a = ((a & 0x00FF00FF) << 8) | ((a & 0xFF00FF00) >> 8); + if (sizeof(T) == 2) return a; + if (count & 16) a = ((a & 0x0000FFFF) << 16) | ((a & 0xFFFF0000) >> 16); + return a; +} + +template <typename T> +T log_ror(T a, T b) { + if (sizeof(T) == 4) { + if (b & 1) a = (a >> 1) | (a << 31); + if (b & 2) a = (a >> 2) | (a << 30); + if (b & 4) a = (a >> 4) | (a << 28); + if (b & 8) a = (a >> 8) | (a << 24); + if (b & 16) a = (a >> 16) | (a << 16); + } else if (sizeof(T) == 2) { + if (b & 1) a = (a >> 1) | (a << 15); + if (b & 2) a = (a >> 2) | (a << 14); + if (b & 4) a = (a >> 4) | (a << 12); + if (b & 8) a = (a >> 8) | (a << 8); + } else if (sizeof(T) == 1) { + if (b & 1) a = (a >> 1) | (a << 7); + if (b & 2) a = (a >> 2) | (a << 6); + if (b & 4) a = (a >> 4) | (a << 4); + } else { + assert(false); + } + return a; +} + +template <typename T> +T log_xor(T a, T b) { + return a ^ b; +} + +template <typename T> +T hadd(T a, T b, int r) { + if (std::is_signed<T>::value) { + return (static_cast<int64_t>(a) + static_cast<int64_t>(b) + r) >> 1; + } + return (static_cast<uint64_t>(a) + static_cast<uint64_t>(b) + r) >> 1; +} + +template <typename T> +T hsub(T a, T b, int r) { + if (std::is_signed<T>::value) { + return (static_cast<int64_t>(a) - static_cast<int64_t>(b) + r) >> 1; + } + return (static_cast<uint64_t>(a) - static_cast<uint64_t>(b) + r) >> 1; +} + +template <typename T> +T madd(T a, T b, T c) { + if (std::is_signed<T>::value) { + return static_cast<int64_t>(a) * static_cast<int64_t>(b) + + static_cast<int64_t>(c); + } + return static_cast<uint64_t>(a) * static_cast<uint64_t>(b) + + static_cast<uint64_t>(c); +} + +template <typename T> +T max(T a, T b) { + return a > b ? a : b; +} + +template <typename T> +T min(T a, T b) { + return a < b ? a : b; +} + +template <typename T> +T mul(T a, T b) { + return a * b; +} + +template <typename T> +T muls(T a, T b) { + if (std::is_signed<T>::value) { + int64_t m = static_cast<int64_t>(a) * static_cast<int64_t>(b); + m = std::max( + static_cast<int64_t>(std::numeric_limits<T>::min()), + std::min(static_cast<int64_t>(std::numeric_limits<T>::max()), m)); + return m; + } + uint64_t m = uint64_t(a) * uint64_t(b); + m = std::min(static_cast<uint64_t>(std::numeric_limits<T>::max()), m); + return m; +} + +// Widening multiplication. +template <typename T> +uint32_t mulw(T a, T b) { + if (std::is_signed<T>::value) { + return static_cast<int64_t>(a) * static_cast<int64_t>(b); + } + return static_cast<uint64_t>(a) * static_cast<uint64_t>(b); +} + +template <typename T> +T mv(T a) { + return a; +} + +template <typename T> +std::pair<T, T> mvp(T a, T b) { + return {a, b}; +} + +template <typename T> +T dmulh(T a, T b, bool r, bool neg) { + constexpr int n = sizeof(T) * 8; + constexpr T maxNeg = 0x80000000 >> (32 - n); + int64_t m = static_cast<int64_t>(a) * static_cast<int64_t>(b); + if (r) { + int64_t rnd = 0x40000000ll >> (32 - n); + if (m < 0 && neg) { + rnd = (-0x40000000ll) >> (32 - n); + } + m += rnd; + } + m >>= (n - 1); + + if (a == maxNeg && b == maxNeg) { + m = 0x7fffffff >> (32 - n); + } + + return m; +} + +template <typename T> +T mulh(T a, T b, bool r) { + constexpr int n = sizeof(T) * 8; + if (std::is_signed<T>::value) { + int64_t m = static_cast<int64_t>(a) * static_cast<int64_t>(b); + m += r ? 1ll << (n - 1) : 0; + return static_cast<uint64_t>(m) >> n; + } + uint64_t m = static_cast<uint64_t>(a) * static_cast<uint64_t>(b); + m += r ? 1ull << (n - 1) : 0; + return m >> n; +} + +template <typename T> +int32_t padd(T a, T b) { + if (std::is_signed<T>::value) { + return int64_t(a) + int64_t(b); + } + return uint64_t(a) + uint64_t(b); +} + +template <typename T> +uint32_t psub(T a, T b) { + if (std::is_signed<T>::value) { + return int64_t(a) - int64_t(b); + } + return uint64_t(a) - uint64_t(b); +} + +template <typename T> +T rsub(T a, T b) { + using UT = typename std::make_unsigned<T>::type; + return static_cast<T>(static_cast<UT>(b) - static_cast<UT>(a)); +} + +template <typename T> +T shl(T a, T b) { + constexpr int n = sizeof(T) * 8; + b &= (n - 1); + return a << b; +} + +template <typename T> +T shr(T a, T b) { + constexpr int n = sizeof(T) * 8; + b &= (n - 1); + return a >> b; +} + +template <typename T1, typename T2> +T1 srans(T2 a, T1 b, bool r, bool u) { + static_assert(2 * sizeof(T1) == sizeof(T2) || 4 * sizeof(T1) == sizeof(T2)); + assert(std::is_signed<T1>::value == true); + assert(std::is_signed<T2>::value == true); + constexpr int n = sizeof(T2) * 8; + constexpr int m = sizeof(T1) * 8; + b &= (n - 1); + int64_t s = (static_cast<int64_t>(a) + (b && r ? (1ll << (b - 1)) : 0)) >> b; + int64_t neg_max = !u ? -1ll << (m - 1) : 0; + int64_t pos_max = !u ? (1ll << (m - 1)) - 1 : (1ull << m) - 1; + bool neg_sat = s < neg_max; + bool pos_sat = s > pos_max; + bool zero = !a; + if (neg_sat) return neg_max; + if (pos_sat) return pos_max; + if (zero) return 0; + return s; +} + +template <typename T> +T shf(T a, T b, bool r) { + if (std::is_signed<T>::value == true) { + constexpr int n = sizeof(T) * 8; + int shamt = b; + int64_t s = a; + if (!a) { + return 0; + } else if (a < 0 && shamt >= n) { + s = -1 + r; + } else if (a > 0 && shamt >= n) { + s = 0; + } else if (shamt > 0) { + s = (static_cast<int64_t>(a) + (r ? (1ll << (shamt - 1)) : 0)) >> shamt; + } else { // shmat < 0 + using UT = typename std::make_unsigned<T>::type; + UT ushamt = static_cast<UT>(-shamt <= n ? -shamt : n); + s = static_cast<int64_t>(static_cast<uint64_t>(a) << ushamt); + } + + int64_t neg_max = -1ll << (n - 1); + int64_t pos_max = (1ll << (n - 1)) - 1; + bool neg_sat = a < 0 && (shamt <= -n || s < neg_max); + bool pos_sat = a > 0 && (shamt <= -n || s > pos_max); + if (neg_sat) return neg_max; + if (pos_sat) return pos_max; + + return s; + } + constexpr int n = sizeof(T) * 8; + int shamt = static_cast<typename std::make_signed<T>::type>(b); + uint64_t s = a; + if (!a) { + return 0; + } else if (shamt > n) { + s = 0; + } else if (shamt > 0) { + s = (static_cast<uint64_t>(a) + (r ? (1ull << (shamt - 1)) : 0)) >> shamt; + } else { // shamt < 0 + T ushamt = static_cast<T>(-shamt <= n ? -shamt : n); + s = static_cast<uint64_t>(a) << (ushamt); + } + + uint64_t pos_max = (1ull << n) - 1; + bool pos_sat = a && (shamt < -n || s >= (1ull << n)); + if (pos_sat) return pos_max; + + return s; +} + +template <typename T> +T sub(T a, T b) { + using UT = typename std::make_unsigned<T>::type; + return static_cast<T>(static_cast<UT>(a) - static_cast<UT>(b)); +} + +// Saturated subtraction. +template <typename T> +T subs(T a, T b) { + if (std::is_signed<T>::value) { + int64_t m = static_cast<int64_t>(a) - static_cast<int64_t>(b); + m = std::min<int64_t>(std::max<int64_t>(std::numeric_limits<T>::min(), m), + std::numeric_limits<T>::max()); + return m; + } + uint64_t m = static_cast<uint64_t>(a) - static_cast<uint64_t>(b); + m = std::min<uint64_t>(m, std::numeric_limits<T>::max()); + return m; +} + +template <typename T> +uint32_t subw(T a, T b) { + if (std::is_signed<T>::value) { + return static_cast<int64_t>(a) - static_cast<int64_t>(b); + } + return static_cast<uint64_t>(a) - static_cast<uint64_t>(b); +} + +#endif // TESTS_VERILATOR_SIM_KELVIN_ALU_REF_H_
diff --git a/tests/verilator_sim/kelvin/valu.h b/tests/verilator_sim/kelvin/valu.h index afd7b28..0719491 100644 --- a/tests/verilator_sim/kelvin/valu.h +++ b/tests/verilator_sim/kelvin/valu.h
@@ -3,7 +3,7 @@ #ifndef TESTS_VERILATOR_SIM_KELVIN_VALU_H_ #define TESTS_VERILATOR_SIM_KELVIN_VALU_H_ -#include "tools/iss/alu.h" // Modified +#include "tests/verilator_sim/kelvin/alu_ref.h" #include "tests/verilator_sim/kelvin/kelvin_cfg.h" #include "tests/verilator_sim/kelvin/vencodeop.h" @@ -11,8 +11,6 @@ constexpr int kReadPorts = 7; constexpr int kWritePorts = 4; -using namespace encode; - struct valu_t { uint8_t op : 7; uint8_t f2 : 3; @@ -98,7 +96,7 @@ x = func(uint32_t(a)); \ } -#define VOP1PU(func) \ +#define VOP1PU(func) \ if (sz == 1) { \ v = 1; \ w = 1; \ @@ -691,9 +689,9 @@ void VSlidevn(valu_t& op) { constexpr int n = kLanes * 4 / sizeof(T); const int shfamt = (op.f2 & 3) + 1; - const T* in0 = (const T*)op.in[0].data; - const T* in1 = (const T*)op.in[1].data; - T* out = (T*)op.out[0].data; + const T* in0 = reinterpret_cast<const T*>(op.in[0].data); + const T* in1 = reinterpret_cast<const T*>(op.in[1].data); + T* out = reinterpret_cast<T*>(op.out[0].data); for (int i = 0; i < n; ++i) { out[i] = i + shfamt < n ? in0[i + shfamt] : in1[i + shfamt - n]; } @@ -704,9 +702,9 @@ void VSlidevp(valu_t& op) { constexpr int n = kLanes * 4 / sizeof(T); const int shfamt = (op.f2 & 3) + 1; - const T* in0 = (const T*)op.in[0].data; - const T* in1 = (const T*)op.in[1].data; - T* out = (T*)op.out[0].data; + const T* in0 = reinterpret_cast<const T*>(op.in[0].data); + const T* in1 = reinterpret_cast<const T*>(op.in[1].data); + T* out = reinterpret_cast<T*>(op.out[0].data); for (int i = 0; i < n; ++i) { out[i] = i - shfamt < 0 ? in0[n - shfamt + i] : in1[i - shfamt]; } @@ -717,11 +715,11 @@ void VSlidehn2(valu_t& op) { constexpr int n = kLanes * 4 / sizeof(T); const int shfamt = (op.f2 & 3) + 1; - const T* in0 = (const T*)op.in[0].data; - const T* in1 = (const T*)op.in[1].data; - const T* in2 = (const T*)op.in[2].data; - T* out0 = (T*)op.out[0].data; - T* out1 = (T*)op.out[1].data; + const T* in0 = reinterpret_cast<const T*>(op.in[0].data); + const T* in1 = reinterpret_cast<const T*>(op.in[1].data); + const T* in2 = reinterpret_cast<const T*>(op.in[2].data); + T* out0 = reinterpret_cast<T*>(op.out[0].data); + T* out1 = reinterpret_cast<T*>(op.out[1].data); for (int i = 0; i < n; ++i) { out0[i] = i + shfamt < n ? in0[i + shfamt] : in1[i + shfamt - n]; } @@ -736,11 +734,11 @@ void VSlidehp2(valu_t& op) { constexpr int n = kLanes * 4 / sizeof(T); const int shfamt = (op.f2 & 3) + 1; - const T* in0 = (const T*)op.in[0].data; - const T* in1 = (const T*)op.in[1].data; - const T* in2 = (const T*)op.in[2].data; - T* out0 = (T*)op.out[0].data; - T* out1 = (T*)op.out[1].data; + const T* in0 = reinterpret_cast<const T*>(op.in[0].data); + const T* in1 = reinterpret_cast<const T*>(op.in[1].data); + const T* in2 = reinterpret_cast<const T*>(op.in[2].data); + T* out0 = reinterpret_cast<T*>(op.out[0].data); + T* out1 = reinterpret_cast<T*>(op.out[1].data); for (int i = 0; i < n; ++i) { out0[i] = i - shfamt < 0 ? in0[n - shfamt + i] : in1[i - shfamt]; } @@ -798,11 +796,10 @@ template <typename T> void VSel(valu_t& op) { constexpr int n = kLanes * 4 / sizeof(T); - const int shfamt = (op.f2 & 3) + 1; - const T* in0 = (const T*)op.in[0].data; - const T* in1 = (const T*)op.in[1].data; - const T* in2 = (const T*)op.in[2].data; - T* out = (T*)op.out[0].data; + const T* in0 = reinterpret_cast<const T*>(op.in[0].data); + const T* in1 = reinterpret_cast<const T*>(op.in[1].data); + const T* in2 = reinterpret_cast<const T*>(op.in[2].data); + T* out = reinterpret_cast<T*>(op.out[0].data); for (int i = 0; i < n; ++i) { out[i] = in0[i] & 1 ? in2[i] : in1[i]; } @@ -823,10 +820,9 @@ template <typename T> void VEvn(valu_t& op) { constexpr int n = kLanes * 4 / sizeof(T); - constexpr int h = n / 2; - const T* in0 = (const T*)op.in[0].data; - const T* in1 = (const T*)op.in[1].data; - T* out0 = (T*)op.out[0].data; + const T* in0 = reinterpret_cast<const T*>(op.in[0].data); + const T* in1 = reinterpret_cast<const T*>(op.in[1].data); + T* out0 = reinterpret_cast<T*>(op.out[0].data); for (int i = 0; i < n; ++i) { out0[i] = i < n / 2 ? in0[2 * i + 0] : in1[2 * (i - n / 2) + 0]; } @@ -836,10 +832,9 @@ template <typename T> void VOdd(valu_t& op) { constexpr int n = kLanes * 4 / sizeof(T); - constexpr int h = n / 2; - const T* in0 = (const T*)op.in[0].data; - const T* in1 = (const T*)op.in[1].data; - T* out1 = (T*)op.out[1].data; + const T* in0 = reinterpret_cast<const T*>(op.in[0].data); + const T* in1 = reinterpret_cast<const T*>(op.in[1].data); + T* out1 = reinterpret_cast<T*>(op.out[1].data); for (int i = 0; i < n; ++i) { out1[i] = i < n / 2 ? in0[2 * i + 1] : in1[2 * (i - n / 2) + 1]; } @@ -889,10 +884,10 @@ void VZip(valu_t& op) { constexpr int n = kLanes * 4 / sizeof(T); constexpr int h = n / 2; - const T* in0 = (const T*)op.in[0].data; - const T* in1 = (const T*)op.in[1].data; - T* out0 = (T*)op.out[0].data; - T* out1 = (T*)op.out[1].data; + const T* in0 = reinterpret_cast<const T*>(op.in[0].data); + const T* in1 = reinterpret_cast<const T*>(op.in[1].data); + T* out0 = reinterpret_cast<T*>(op.out[0].data); + T* out1 = reinterpret_cast<T*>(op.out[1].data); for (int i = 0; i < n; ++i) { const int j = i / 2; out0[i] = i & 1 ? in1[j + 0] : in0[j + 0]; @@ -937,29 +932,29 @@ } static void VDwconv(valu_t& op) { - const uint32_t* in0 = (const uint32_t*)op.in[0].data; - const uint32_t* in1 = (const uint32_t*)op.in[1].data; - const uint32_t* in2 = (const uint32_t*)op.in[2].data; - const uint32_t* in3 = (const uint32_t*)op.in[3].data; - const uint32_t* in4 = (const uint32_t*)op.in[4].data; - const uint32_t* in5 = (const uint32_t*)op.in[5].data; - uint32_t* out0 = (uint32_t*)op.out[0].data; - uint32_t* out1 = (uint32_t*)op.out[1].data; - uint32_t* out2 = (uint32_t*)op.out[2].data; - uint32_t* out3 = (uint32_t*)op.out[3].data; + const uint32_t* in0 = reinterpret_cast<const uint32_t*>(op.in[0].data); + const uint32_t* in1 = reinterpret_cast<const uint32_t*>(op.in[1].data); + const uint32_t* in2 = reinterpret_cast<const uint32_t*>(op.in[2].data); + const uint32_t* in3 = reinterpret_cast<const uint32_t*>(op.in[3].data); + const uint32_t* in4 = reinterpret_cast<const uint32_t*>(op.in[4].data); + const uint32_t* in5 = reinterpret_cast<const uint32_t*>(op.in[5].data); + uint32_t* out0 = reinterpret_cast<uint32_t*>(op.out[0].data); + uint32_t* out1 = reinterpret_cast<uint32_t*>(op.out[1].data); + uint32_t* out2 = reinterpret_cast<uint32_t*>(op.out[2].data); + uint32_t* out3 = reinterpret_cast<uint32_t*>(op.out[3].data); struct vdwconv_u8_t { - uint32_t mode : 2; // 1:0 - uint32_t sparsity : 2; // 3:2 - uint32_t regbase : 4; // 7:4 - uint32_t rsvd : 4; // 11:8 - uint32_t abias : 9; // 20:12 - uint32_t asign : 1; // 21 - uint32_t bbias : 9; // 30:22 - uint32_t bsign : 1; // 31 + uint32_t mode : 2; // 31:30 + uint32_t sparsity : 2; // 29:28 + uint32_t regbase : 4; // 27:24 + uint32_t rsvd : 4; // 23:20 + uint32_t abias : 9; // 19:11 + uint32_t asign : 1; // 10 + uint32_t bbias : 9; // 9:1 + uint32_t bsign : 1; // 0 } cmd; - uint32_t* p_cmd = (uint32_t*)&cmd; + uint32_t* p_cmd = reinterpret_cast<uint32_t*>(&cmd); *p_cmd = op.sv.data; assert(cmd.mode == 0); assert(cmd.rsvd == 0); @@ -970,7 +965,8 @@ const bool bsign = cmd.bsign; constexpr int n = kVector / 32; - uint32_t sparse[n + 2]; + constexpr int kSparseSize = n + 2; + uint32_t sparse[kSparseSize]; if (cmd.sparsity == 1) { sparse[0] = in0[n - 1]; for (int i = 0; i < kVector / 32; ++i) { @@ -1009,18 +1005,18 @@ static void VAlu(valu_t& op) { // clang-format off switch (op.op) { - case vslidevn: VSlidevn(op); return; - case vslidevp: VSlidevp(op); return; - case vslidehn: VSlidevn(op); return; - case vslidehp: VSlidevp(op); return; - case vslidehn2: VSlidehn2(op); return; - case vslidehp2: VSlidehp2(op); return; - case vsel: VSel(op); return; - case vevn: VEvn(op); return; - case vodd: VOdd(op); return; - case vevnodd: VEvnOdd(op); return; - case vzip: VZip(op); return; - case vdwconv: VDwconv(op); return; + case encode::vslidevn: VSlidevn(op); return; + case encode::vslidevp: VSlidevp(op); return; + case encode::vslidehn: VSlidevn(op); return; + case encode::vslidehp: VSlidevp(op); return; + case encode::vslidehn2: VSlidehn2(op); return; + case encode::vslidehp2: VSlidehp2(op); return; + case encode::vsel: VSel(op); return; + case encode::vevn: VEvn(op); return; + case encode::vodd: VOdd(op); return; + case encode::vevnodd: VEvnOdd(op); return; + case encode::vzip: VZip(op); return; + case encode::vdwconv: VDwconv(op); return; } // clang-format on @@ -1031,72 +1027,70 @@ const uint32_t b = op.in[1].data[i]; const uint32_t c = op.in[2].data[i]; const uint32_t d = op.in[3].data[i]; - const uint32_t e = op.in[4].data[i]; const uint32_t f = op.in[5].data[i]; - const uint32_t g = op.in[6].data[i]; bool v = false; bool w = false; uint32_t x = 0; uint32_t y = 0; - const bool f2_negative = - ((f2 >> 0) & 1) && (op.op == vdmulh || op.op == vdmulh2); + const bool f2_negative = ((f2 >> 0) & 1) && (op.op == encode::vdmulh || + op.op == encode::vdmulh2); const bool f2_round = (f2 >> 1) & 1; const bool f2_signed = - !((f2 >> 0) & 1) || op.op == vdmulh || op.op == vdmulh2; + !((f2 >> 0) & 1) || op.op == encode::vdmulh || op.op == encode::vdmulh2; // clang-format off switch (op.op) { - case vdup: VOPXU(dup); break; - case vadd: VOP2U(add); break; - case vsub: VOP2U(sub); break; - case vrsub: VOP2U(rsub); break; - case veq: VOP2U(cmp_eq); break; - case vne: VOP2U(cmp_ne); break; - case vlt: VOP2(cmp_lt); break; - case vle: VOP2(cmp_le); break; - case vgt: VOP2(cmp_gt); break; - case vge: VOP2(cmp_ge); break; - case vabsd: VOP2(absd); break; - case vmax: VOP2(max); break; - case vmin: VOP2(min); break; - case vadd3: VOP3U(add3); break; - case vand: VOP2U(log_and); break; - case vor: VOP2U(log_or); break; - case vxor: VOP2U(log_xor); break; - case vnot: VOP1U(log_not); break; - case vrev: VOP2U(log_rev); break; - case vror: VOP2U(log_ror); break; - case vclb: VOP1U(log_clb); break; - case vclz: VOP1U(log_clz); break; - case vcpop: VOP1U(log_cpop); break; - case vmv: VOP1U(mv); break; - case vmv2: VOP1PU(mv); break; - case vmvp: VOP2M(mvp); break; - case vshl: VOP2U(shl); break; - case vshr: VOP2(shr); break; - case vshf: VOP2_R(shf, f2_round); break; - case vsrans: VOP3NS_R_U(srans, f2_round, !f2_signed); break; - case vsraqs: VOP3QS_R_U(srans, f2_round, !f2_signed); break; - case vmul: VOP2S(mul); break; - case vmul2: VOP2PS(mul); break; - case vmuls: VOP2(muls); break; - case vmuls2: VOP2P(muls); break; - case vmulw: WOP2(mulw); break; - case vmulh: VOP2_R(mulh, f2_round); break; - case vmulh2: VOP2P_R(mulh, f2_round); break; - case vdmulh: VOP2_R_X(dmulh, f2_round, f2_negative); break; - case vdmulh2: VOP2P_R_X(dmulh, f2_round, f2_negative); break; - case vmadd: VOP3(madd); break; - case vadds: VOP2(adds); break; - case vsubs: VOP2(subs); break; - case vaddw: WOP2(addw); break; - case vsubw: WOP2(subw); break; - case vacc: WOPA(acc); break; - case vpadd: VOPP(padd); break; - case vpsub: VOPP(psub); break; - case vhadd: VOP2_R(hadd, f2_round); break; - case vhsub: VOP2_R(hsub, f2_round); break; + case encode::vdup: VOPXU(dup); break; + case encode::vadd: VOP2U(add); break; + case encode::vsub: VOP2U(sub); break; + case encode::vrsub: VOP2U(rsub); break; + case encode::veq: VOP2U(cmp_eq); break; + case encode::vne: VOP2U(cmp_ne); break; + case encode::vlt: VOP2(cmp_lt); break; + case encode::vle: VOP2(cmp_le); break; + case encode::vgt: VOP2(cmp_gt); break; + case encode::vge: VOP2(cmp_ge); break; + case encode::vabsd: VOP2(absd); break; + case encode::vmax: VOP2(max); break; + case encode::vmin: VOP2(min); break; + case encode::vadd3: VOP3U(add3); break; + case encode::vand: VOP2U(log_and); break; + case encode::vor: VOP2U(log_or); break; + case encode::vxor: VOP2U(log_xor); break; + case encode::vnot: VOP1U(log_not); break; + case encode::vrev: VOP2U(log_rev); break; + case encode::vror: VOP2U(log_ror); break; + case encode::vclb: VOP1U(log_clb); break; + case encode::vclz: VOP1U(log_clz); break; + case encode::vcpop: VOP1U(log_cpop); break; + case encode::vmv: VOP1U(mv); break; + case encode::vmv2: VOP1PU(mv); break; + case encode::vmvp: VOP2M(mvp); break; + case encode::vshl: VOP2U(shl); break; + case encode::vshr: VOP2(shr); break; + case encode::vshf: VOP2_R(shf, f2_round); break; + case encode::vsrans: VOP3NS_R_U(srans, f2_round, !f2_signed); break; + case encode::vsraqs: VOP3QS_R_U(srans, f2_round, !f2_signed); break; + case encode::vmul: VOP2S(mul); break; + case encode::vmul2: VOP2PS(mul); break; + case encode::vmuls: VOP2(muls); break; + case encode::vmuls2: VOP2P(muls); break; + case encode::vmulw: WOP2(mulw); break; + case encode::vmulh: VOP2_R(mulh, f2_round); break; + case encode::vmulh2: VOP2P_R(mulh, f2_round); break; + case encode::vdmulh: VOP2_R_X(dmulh, f2_round, f2_negative); break; + case encode::vdmulh2: VOP2P_R_X(dmulh, f2_round, f2_negative); break; + case encode::vmadd: VOP3(madd); break; + case encode::vadds: VOP2(adds); break; + case encode::vsubs: VOP2(subs); break; + case encode::vaddw: WOP2(addw); break; + case encode::vsubw: WOP2(subw); break; + case encode::vacc: WOPA(acc); break; + case encode::vpadd: VOPP(padd); break; + case encode::vpsub: VOPP(psub); break; + case encode::vhadd: VOP2_R(hadd, f2_round); break; + case encode::vhsub: VOP2_R(hsub, f2_round); break; } // clang-format on
diff --git a/tests/verilator_sim/kelvin/valu_tb.cc b/tests/verilator_sim/kelvin/valu_tb.cc index f3ff74b..5d05ade 100644 --- a/tests/verilator_sim/kelvin/valu_tb.cc +++ b/tests/verilator_sim/kelvin/valu_tb.cc
@@ -1,8 +1,8 @@ // Copyright 2023 Google LLC #include "VVAlu.h" -#include "sysc_tb.h" -#include "valu.h" +#include "tests/verilator_sim/kelvin/valu.h" +#include "tests/verilator_sim/sysc_tb.h" struct VAlu_tb : Sysc_tb { sc_in<bool> io_in_ready; @@ -431,7 +431,7 @@ bool valid = rand_int(0, 3); inputs_t in; - in.op = rand_int(0, kOpEntries - 1); + in.op = rand_int(0, encode::kOpEntries - 1); in.f2 = rand_int(0, 7); in.sz = 1u << rand_int(0, 2); in.m = rand_int(0, 7) == 0; @@ -440,14 +440,15 @@ in.sv.addr = rand_uint32(); in.sv.data = rand_uint32(); - if (in.op == vevn || in.op == vevnodd || in.op == vodd) { + if (in.op == encode::vevn || in.op == encode::vevnodd || + in.op == encode::vodd) { // Disallow even/odd in CRT. - in.op = vadd; + in.op = encode::vadd; } - if (in.op == vdwconv) { + if (in.op == encode::vdwconv) { // Disallow DW in CRT. - in.op = vadd; + in.op = encode::vadd; } // Assign random values to inactive read addr/tag. @@ -464,69 +465,69 @@ } switch (in.op) { - case vabsd: - case vadd: - case vadds: - case vhadd: - case vhsub: - case vmax: - case vmin: - case vrsub: - case vsub: - case vsubs: - case veq: - case vne: - case vlt: - case vle: - case vgt: - case vge: - case vand: - case vclb: - case vclz: - case vcpop: - case vevn: - case vor: - case vrev: - case vror: - case vxor: - case vdmulh: - case vmul: - case vmulh: - case vmuls: - case vshl: - case vshr: - case vshf: + case encode::vabsd: + case encode::vadd: + case encode::vadds: + case encode::vhadd: + case encode::vhsub: + case encode::vmax: + case encode::vmin: + case encode::vrsub: + case encode::vsub: + case encode::vsubs: + case encode::veq: + case encode::vne: + case encode::vlt: + case encode::vle: + case encode::vgt: + case encode::vge: + case encode::vand: + case encode::vclb: + case encode::vclz: + case encode::vcpop: + case encode::vevn: + case encode::vor: + case encode::vrev: + case encode::vror: + case encode::vxor: + case encode::vdmulh: + case encode::vmul: + case encode::vmulh: + case encode::vmuls: + case encode::vshl: + case encode::vshr: + case encode::vshf: in.r[0].valid = true; in.r[1].valid = true; in.w[0].valid = true; break; - case vaddw: - case vevnodd: - case vsubw: - case vmulw: - case vmvp: - case vzip: + case encode::vaddw: + case encode::vevnodd: + case encode::vsubw: + case encode::vmulw: + case encode::vmvp: + case encode::vzip: in.r[0].valid = true; in.r[1].valid = true; in.w[0].valid = true; in.w[1].valid = true; break; - case vacc: + case encode::vacc: in.r[0].valid = true; in.r[1].valid = true; in.r[2].valid = true; in.w[0].valid = true; in.w[1].valid = true; break; - case vadd3: - case vmadd: - case vsrans: + case encode::vadd3: + case encode::vmadd: + case encode::vsrans: in.r[0].valid = true; in.r[1].valid = true; in.r[2].valid = true; in.w[0].valid = true; break; - case vsraqs: + case encode::vsraqs: in.r[0].valid = true; in.r[1].valid = true; in.r[2].valid = true; @@ -535,22 +536,22 @@ in.w[0].valid = true; in.cmdsync = true; break; - case vdup: + case encode::vdup: in.r[1].valid = true; in.w[0].valid = true; break; - case vmv: - case vpadd: - case vpsub: + case encode::vmv: + case encode::vpadd: + case encode::vpsub: in.r[0].valid = true; in.w[0].valid = true; break; - case vodd: + case encode::vodd: in.r[0].valid = true; in.r[1].valid = true; in.w[1].valid = true; break; - case vdwconv: + case encode::vdwconv: in.r[0].valid = true; in.r[1].valid = true; in.r[2].valid = true; @@ -580,7 +581,7 @@ } // Assign inactive write addresses. - if (in.op == vzip) { + if (in.op == encode::vzip) { int addr = 0; valid = valid && FindInactiveWriteAddr2(in.m, wactive, addr); in.w[0].valid = valid; @@ -637,7 +638,7 @@ for (int i = 0; i < kWritePorts; ++i) { if (alu.w[i].valid) { wactive |= 1ull << waddr[i]; - if (in.op == vzip) { + if (in.op == encode::vzip) { waddr[i] += 2; } else { waddr[i]++; // stripmine update @@ -697,10 +698,10 @@ void ProcessInputs(const int idx) { // clang-format off if (!(io_in_valid && io_in_ready) || - idx == 0 && !io_in_bits_0_valid || - idx == 1 && !io_in_bits_1_valid || - idx == 2 && !io_in_bits_2_valid || - idx == 3 && !io_in_bits_3_valid) { + (idx == 0 && !io_in_bits_0_valid) || + (idx == 1 && !io_in_bits_1_valid) || + (idx == 2 && !io_in_bits_2_valid) || + (idx == 3 && !io_in_bits_3_valid)) { cmdq_[idx].clear(); return; } @@ -741,10 +742,10 @@ void ProcessOutputs(const int idx) { // clang-format off - if (idx == 0 && !io_write_0_valid || - idx == 1 && !io_write_1_valid || - idx == 2 && !io_write_2_valid || - idx == 3 && !io_write_3_valid) { + if ((idx == 0 && !io_write_0_valid) || + (idx == 1 && !io_write_1_valid) || + (idx == 2 && !io_write_2_valid) || + (idx == 3 && !io_write_3_valid)) { return; } // clang-format on @@ -771,8 +772,8 @@ if (memcmp(dut, ref, kLanes * 4)) { char s[100]; - sprintf(s, "valu op=%d f2=%d sz=%d", write_[addr].op, write_[addr].f2, - write_[addr].sz); + snprintf(s, sizeof(s), "valu op=%d f2=%d sz=%d", write_[addr].op, + write_[addr].f2, write_[addr].sz); printf("ref[%2d] ", addr); for (int i = 0; i < kLanes; ++i) { printf(" %08x", ref[i]);