Merge "[HW][Kelvin][Sram_1rwm_256x288.v] move FPGA defined macro"
diff --git a/hdl/chisel/BUILD b/hdl/chisel/BUILD
index cfad4b6..2785e9b 100644
--- a/hdl/chisel/BUILD
+++ b/hdl/chisel/BUILD
@@ -20,7 +20,7 @@
deps = [
":common",
":kelvin",
- ]
+ ],
)
chisel_cc_library(
@@ -32,7 +32,7 @@
"//hdl/verilog:clock_gate",
"//hdl/verilog:sram_1rw_256x256",
"//hdl/verilog:sram_1rw_256x288",
- ]
+ ],
)
chisel_cc_library(
@@ -70,6 +70,20 @@
)
chisel_cc_library(
+ name = "valu_cc_library",
+ chisel_lib = ":kelvin",
+ emit_class = "kelvin.EmitVAlu",
+ module_name = "VAlu",
+)
+
+chisel_cc_library(
+ name = "valuint_cc_library",
+ chisel_lib = ":kelvin",
+ emit_class = "kelvin.EmitVAluInt",
+ module_name = "VAluInt",
+)
+
+chisel_cc_library(
name = "vcmdq_cc_library",
chisel_lib = ":kelvin",
emit_class = "kelvin.EmitVCmdq",
diff --git a/tests/verilator_sim/BUILD b/tests/verilator_sim/BUILD
index b0216ce..452c11f 100644
--- a/tests/verilator_sim/BUILD
+++ b/tests/verilator_sim/BUILD
@@ -88,7 +88,43 @@
],
)
-# TODO(derekjchow): Add valu and valuint test benches
+cc_library(
+ name = "valu",
+ hdrs = [
+ "kelvin/alu_ref.h",
+ "kelvin/valu.h",
+ ],
+ deps = [
+ ":vencodeop",
+ ],
+)
+
+cc_test(
+ name = "valu_tb",
+ size = "large",
+ srcs = [
+ "kelvin/valu_tb.cc",
+ ],
+ deps = [
+ ":kelvin_if",
+ ":sim_libs",
+ ":valu",
+ "//hdl/chisel:valu_cc_library",
+ ],
+)
+
+cc_test(
+ name = "valuint_tb",
+ srcs = [
+ "kelvin/valuint_tb.cc",
+ ],
+ deps = [
+ ":kelvin_if",
+ ":sim_libs",
+ ":valu",
+ "//hdl/chisel:valuint_cc_library",
+ ],
+)
cc_library(
name = "vdecode",
diff --git a/tests/verilator_sim/kelvin/alu_ref.h b/tests/verilator_sim/kelvin/alu_ref.h
new file mode 100644
index 0000000..82d950f
--- /dev/null
+++ b/tests/verilator_sim/kelvin/alu_ref.h
@@ -0,0 +1,438 @@
+// Copyright 2023 Google LLC
+//
+// Reference alu ops implementation
+#ifndef TESTS_VERILATOR_SIM_KELVIN_ALU_REF_H_
+#define TESTS_VERILATOR_SIM_KELVIN_ALU_REF_H_
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <limits>
+#include <type_traits>
+#include <utility>
+
+// -----------------------------------------------------------------------------
+// ALU.
+
+template <typename T>
+typename std::make_unsigned<T>::type absd(T a, T b) {
+ using UT = typename std::make_unsigned<T>::type;
+ UT ua = static_cast<UT>(a);
+ UT ub = static_cast<UT>(b);
+ return a > b ? ua - ub : ub - ua;
+}
+
+template <typename Td, typename Ts>
+Td acc(Td a, Ts b) {
+ assert(sizeof(Td) > sizeof(Ts));
+ using UTd = typename std::make_unsigned<Td>::type;
+ return static_cast<Td>(static_cast<UTd>(a) + static_cast<UTd>(b));
+}
+
+template <typename T>
+T add(T a, T b) {
+ using UT = typename std::make_unsigned<T>::type;
+ return static_cast<T>(static_cast<UT>(a) + static_cast<UT>(b));
+}
+
+template <typename T>
+T add3(T a, T b, T c) {
+ using UT = typename std::make_unsigned<T>::type;
+ return static_cast<T>(static_cast<UT>(a) + static_cast<UT>(b) +
+ static_cast<UT>(c));
+}
+
+// Saturated addition.
+template <typename T>
+T adds(T a, T b) {
+ if (std::is_signed<T>::value) {
+ int64_t m = static_cast<int64_t>(a) + static_cast<int64_t>(b);
+ m = std::min<int64_t>(std::max<int64_t>(std::numeric_limits<T>::min(), m),
+ std::numeric_limits<T>::max());
+ return m;
+ }
+ uint64_t m = static_cast<uint64_t>(a) + static_cast<uint64_t>(b);
+ m = std::min<uint64_t>(std::numeric_limits<T>::max(), m);
+ return m;
+}
+
+// Widening add.
+template <typename T>
+uint32_t addw(T a, T b) {
+ if (std::is_signed<T>::value) {
+ return int64_t(a) + int64_t(b);
+ }
+ return uint64_t(a) + uint64_t(b);
+}
+
+template <typename T>
+T cmp_eq(T a, T b) {
+ return a == b;
+}
+
+template <typename T>
+T cmp_ne(T a, T b) {
+ return a != b;
+}
+
+template <typename T>
+T cmp_lt(T a, T b) {
+ return a < b;
+}
+
+template <typename T>
+T cmp_le(T a, T b) {
+ return a <= b;
+}
+
+template <typename T>
+T cmp_gt(T a, T b) {
+ return a > b;
+}
+
+template <typename T>
+T cmp_ge(T a, T b) {
+ return a >= b;
+}
+
+template <typename T>
+T dup(T b) {
+ return b;
+}
+
+template <typename T>
+T log_and(T a, T b) {
+ return a & b;
+}
+
+template <typename T>
+int log_clb(T x) {
+ constexpr int n = sizeof(T) * 8;
+ if (x & (1u << (n - 1))) {
+ x = ~x;
+ }
+ for (int count = 0; count < n; count++) {
+ if ((x << count) >> (n - 1)) {
+ return count;
+ }
+ }
+ return n;
+}
+
+template <typename T>
+int log_clz(const T x) {
+ constexpr int n = sizeof(T) * 8;
+ for (int count = 0; count < n; count++) {
+ if ((x << count) >> (n - 1)) {
+ return count;
+ }
+ }
+ return n;
+}
+
+template <typename T>
+int log_cpop(T a) {
+ constexpr int n = sizeof(T) * 8;
+ int count = 0;
+ for (int i = 0; i < n; i++) {
+ if (a & (1 << i)) {
+ count++;
+ }
+ }
+ return count;
+}
+
+template <typename T>
+T log_not(T a) {
+ return ~a;
+}
+
+template <typename T>
+T log_or(T a, T b) {
+ return a | b;
+}
+
+template <typename T>
+T log_rev(T a, T b) {
+ T count = b & 0b11111;
+ if (count & 1) a = ((a & 0x55555555) << 1) | ((a & 0xAAAAAAAA) >> 1);
+ if (count & 2) a = ((a & 0x33333333) << 2) | ((a & 0xCCCCCCCC) >> 2);
+ if (count & 4) a = ((a & 0x0F0F0F0F) << 4) | ((a & 0xF0F0F0F0) >> 4);
+ if (sizeof(T) == 1) return a;
+ if (count & 8) a = ((a & 0x00FF00FF) << 8) | ((a & 0xFF00FF00) >> 8);
+ if (sizeof(T) == 2) return a;
+ if (count & 16) a = ((a & 0x0000FFFF) << 16) | ((a & 0xFFFF0000) >> 16);
+ return a;
+}
+
+template <typename T>
+T log_ror(T a, T b) {
+ if (sizeof(T) == 4) {
+ if (b & 1) a = (a >> 1) | (a << 31);
+ if (b & 2) a = (a >> 2) | (a << 30);
+ if (b & 4) a = (a >> 4) | (a << 28);
+ if (b & 8) a = (a >> 8) | (a << 24);
+ if (b & 16) a = (a >> 16) | (a << 16);
+ } else if (sizeof(T) == 2) {
+ if (b & 1) a = (a >> 1) | (a << 15);
+ if (b & 2) a = (a >> 2) | (a << 14);
+ if (b & 4) a = (a >> 4) | (a << 12);
+ if (b & 8) a = (a >> 8) | (a << 8);
+ } else if (sizeof(T) == 1) {
+ if (b & 1) a = (a >> 1) | (a << 7);
+ if (b & 2) a = (a >> 2) | (a << 6);
+ if (b & 4) a = (a >> 4) | (a << 4);
+ } else {
+ assert(false);
+ }
+ return a;
+}
+
+template <typename T>
+T log_xor(T a, T b) {
+ return a ^ b;
+}
+
+template <typename T>
+T hadd(T a, T b, int r) {
+ if (std::is_signed<T>::value) {
+ return (static_cast<int64_t>(a) + static_cast<int64_t>(b) + r) >> 1;
+ }
+ return (static_cast<uint64_t>(a) + static_cast<uint64_t>(b) + r) >> 1;
+}
+
+template <typename T>
+T hsub(T a, T b, int r) {
+ if (std::is_signed<T>::value) {
+ return (static_cast<int64_t>(a) - static_cast<int64_t>(b) + r) >> 1;
+ }
+ return (static_cast<uint64_t>(a) - static_cast<uint64_t>(b) + r) >> 1;
+}
+
+template <typename T>
+T madd(T a, T b, T c) {
+ if (std::is_signed<T>::value) {
+ return static_cast<int64_t>(a) * static_cast<int64_t>(b) +
+ static_cast<int64_t>(c);
+ }
+ return static_cast<uint64_t>(a) * static_cast<uint64_t>(b) +
+ static_cast<uint64_t>(c);
+}
+
+template <typename T>
+T max(T a, T b) {
+ return a > b ? a : b;
+}
+
+template <typename T>
+T min(T a, T b) {
+ return a < b ? a : b;
+}
+
+template <typename T>
+T mul(T a, T b) {
+ return a * b;
+}
+
+template <typename T>
+T muls(T a, T b) {
+ if (std::is_signed<T>::value) {
+ int64_t m = static_cast<int64_t>(a) * static_cast<int64_t>(b);
+ m = std::max(
+ static_cast<int64_t>(std::numeric_limits<T>::min()),
+ std::min(static_cast<int64_t>(std::numeric_limits<T>::max()), m));
+ return m;
+ }
+ uint64_t m = uint64_t(a) * uint64_t(b);
+ m = std::min(static_cast<uint64_t>(std::numeric_limits<T>::max()), m);
+ return m;
+}
+
+// Widening multiplication.
+template <typename T>
+uint32_t mulw(T a, T b) {
+ if (std::is_signed<T>::value) {
+ return static_cast<int64_t>(a) * static_cast<int64_t>(b);
+ }
+ return static_cast<uint64_t>(a) * static_cast<uint64_t>(b);
+}
+
+template <typename T>
+T mv(T a) {
+ return a;
+}
+
+template <typename T>
+std::pair<T, T> mvp(T a, T b) {
+ return {a, b};
+}
+
+template <typename T>
+T dmulh(T a, T b, bool r, bool neg) {
+ constexpr int n = sizeof(T) * 8;
+ constexpr T maxNeg = 0x80000000 >> (32 - n);
+ int64_t m = static_cast<int64_t>(a) * static_cast<int64_t>(b);
+ if (r) {
+ int64_t rnd = 0x40000000ll >> (32 - n);
+ if (m < 0 && neg) {
+ rnd = (-0x40000000ll) >> (32 - n);
+ }
+ m += rnd;
+ }
+ m >>= (n - 1);
+
+ if (a == maxNeg && b == maxNeg) {
+ m = 0x7fffffff >> (32 - n);
+ }
+
+ return m;
+}
+
+template <typename T>
+T mulh(T a, T b, bool r) {
+ constexpr int n = sizeof(T) * 8;
+ if (std::is_signed<T>::value) {
+ int64_t m = static_cast<int64_t>(a) * static_cast<int64_t>(b);
+ m += r ? 1ll << (n - 1) : 0;
+ return static_cast<uint64_t>(m) >> n;
+ }
+ uint64_t m = static_cast<uint64_t>(a) * static_cast<uint64_t>(b);
+ m += r ? 1ull << (n - 1) : 0;
+ return m >> n;
+}
+
+template <typename T>
+int32_t padd(T a, T b) {
+ if (std::is_signed<T>::value) {
+ return int64_t(a) + int64_t(b);
+ }
+ return uint64_t(a) + uint64_t(b);
+}
+
+template <typename T>
+uint32_t psub(T a, T b) {
+ if (std::is_signed<T>::value) {
+ return int64_t(a) - int64_t(b);
+ }
+ return uint64_t(a) - uint64_t(b);
+}
+
+template <typename T>
+T rsub(T a, T b) {
+ using UT = typename std::make_unsigned<T>::type;
+ return static_cast<T>(static_cast<UT>(b) - static_cast<UT>(a));
+}
+
+template <typename T>
+T shl(T a, T b) {
+ constexpr int n = sizeof(T) * 8;
+ b &= (n - 1);
+ return a << b;
+}
+
+template <typename T>
+T shr(T a, T b) {
+ constexpr int n = sizeof(T) * 8;
+ b &= (n - 1);
+ return a >> b;
+}
+
+template <typename T1, typename T2>
+T1 srans(T2 a, T1 b, bool r, bool u) {
+ static_assert(2 * sizeof(T1) == sizeof(T2) || 4 * sizeof(T1) == sizeof(T2));
+ assert(std::is_signed<T1>::value == true);
+ assert(std::is_signed<T2>::value == true);
+ constexpr int n = sizeof(T2) * 8;
+ constexpr int m = sizeof(T1) * 8;
+ b &= (n - 1);
+ int64_t s = (static_cast<int64_t>(a) + (b && r ? (1ll << (b - 1)) : 0)) >> b;
+ int64_t neg_max = !u ? -1ll << (m - 1) : 0;
+ int64_t pos_max = !u ? (1ll << (m - 1)) - 1 : (1ull << m) - 1;
+ bool neg_sat = s < neg_max;
+ bool pos_sat = s > pos_max;
+ bool zero = !a;
+ if (neg_sat) return neg_max;
+ if (pos_sat) return pos_max;
+ if (zero) return 0;
+ return s;
+}
+
+template <typename T>
+T shf(T a, T b, bool r) {
+ if (std::is_signed<T>::value == true) {
+ constexpr int n = sizeof(T) * 8;
+ int shamt = b;
+ int64_t s = a;
+ if (!a) {
+ return 0;
+ } else if (a < 0 && shamt >= n) {
+ s = -1 + r;
+ } else if (a > 0 && shamt >= n) {
+ s = 0;
+ } else if (shamt > 0) {
+ s = (static_cast<int64_t>(a) + (r ? (1ll << (shamt - 1)) : 0)) >> shamt;
+ } else { // shmat < 0
+ using UT = typename std::make_unsigned<T>::type;
+ UT ushamt = static_cast<UT>(-shamt <= n ? -shamt : n);
+ s = static_cast<int64_t>(static_cast<uint64_t>(a) << ushamt);
+ }
+
+ int64_t neg_max = -1ll << (n - 1);
+ int64_t pos_max = (1ll << (n - 1)) - 1;
+ bool neg_sat = a < 0 && (shamt <= -n || s < neg_max);
+ bool pos_sat = a > 0 && (shamt <= -n || s > pos_max);
+ if (neg_sat) return neg_max;
+ if (pos_sat) return pos_max;
+
+ return s;
+ }
+ constexpr int n = sizeof(T) * 8;
+ int shamt = static_cast<typename std::make_signed<T>::type>(b);
+ uint64_t s = a;
+ if (!a) {
+ return 0;
+ } else if (shamt > n) {
+ s = 0;
+ } else if (shamt > 0) {
+ s = (static_cast<uint64_t>(a) + (r ? (1ull << (shamt - 1)) : 0)) >> shamt;
+ } else { // shamt < 0
+ T ushamt = static_cast<T>(-shamt <= n ? -shamt : n);
+ s = static_cast<uint64_t>(a) << (ushamt);
+ }
+
+ uint64_t pos_max = (1ull << n) - 1;
+ bool pos_sat = a && (shamt < -n || s >= (1ull << n));
+ if (pos_sat) return pos_max;
+
+ return s;
+}
+
+template <typename T>
+T sub(T a, T b) {
+ using UT = typename std::make_unsigned<T>::type;
+ return static_cast<T>(static_cast<UT>(a) - static_cast<UT>(b));
+}
+
+// Saturated subtraction.
+template <typename T>
+T subs(T a, T b) {
+ if (std::is_signed<T>::value) {
+ int64_t m = static_cast<int64_t>(a) - static_cast<int64_t>(b);
+ m = std::min<int64_t>(std::max<int64_t>(std::numeric_limits<T>::min(), m),
+ std::numeric_limits<T>::max());
+ return m;
+ }
+ uint64_t m = static_cast<uint64_t>(a) - static_cast<uint64_t>(b);
+ m = std::min<uint64_t>(m, std::numeric_limits<T>::max());
+ return m;
+}
+
+template <typename T>
+uint32_t subw(T a, T b) {
+ if (std::is_signed<T>::value) {
+ return static_cast<int64_t>(a) - static_cast<int64_t>(b);
+ }
+ return static_cast<uint64_t>(a) - static_cast<uint64_t>(b);
+}
+
+#endif // TESTS_VERILATOR_SIM_KELVIN_ALU_REF_H_
diff --git a/tests/verilator_sim/kelvin/valu.h b/tests/verilator_sim/kelvin/valu.h
index afd7b28..0719491 100644
--- a/tests/verilator_sim/kelvin/valu.h
+++ b/tests/verilator_sim/kelvin/valu.h
@@ -3,7 +3,7 @@
#ifndef TESTS_VERILATOR_SIM_KELVIN_VALU_H_
#define TESTS_VERILATOR_SIM_KELVIN_VALU_H_
-#include "tools/iss/alu.h" // Modified
+#include "tests/verilator_sim/kelvin/alu_ref.h"
#include "tests/verilator_sim/kelvin/kelvin_cfg.h"
#include "tests/verilator_sim/kelvin/vencodeop.h"
@@ -11,8 +11,6 @@
constexpr int kReadPorts = 7;
constexpr int kWritePorts = 4;
-using namespace encode;
-
struct valu_t {
uint8_t op : 7;
uint8_t f2 : 3;
@@ -98,7 +96,7 @@
x = func(uint32_t(a)); \
}
-#define VOP1PU(func) \
+#define VOP1PU(func) \
if (sz == 1) { \
v = 1; \
w = 1; \
@@ -691,9 +689,9 @@
void VSlidevn(valu_t& op) {
constexpr int n = kLanes * 4 / sizeof(T);
const int shfamt = (op.f2 & 3) + 1;
- const T* in0 = (const T*)op.in[0].data;
- const T* in1 = (const T*)op.in[1].data;
- T* out = (T*)op.out[0].data;
+ const T* in0 = reinterpret_cast<const T*>(op.in[0].data);
+ const T* in1 = reinterpret_cast<const T*>(op.in[1].data);
+ T* out = reinterpret_cast<T*>(op.out[0].data);
for (int i = 0; i < n; ++i) {
out[i] = i + shfamt < n ? in0[i + shfamt] : in1[i + shfamt - n];
}
@@ -704,9 +702,9 @@
void VSlidevp(valu_t& op) {
constexpr int n = kLanes * 4 / sizeof(T);
const int shfamt = (op.f2 & 3) + 1;
- const T* in0 = (const T*)op.in[0].data;
- const T* in1 = (const T*)op.in[1].data;
- T* out = (T*)op.out[0].data;
+ const T* in0 = reinterpret_cast<const T*>(op.in[0].data);
+ const T* in1 = reinterpret_cast<const T*>(op.in[1].data);
+ T* out = reinterpret_cast<T*>(op.out[0].data);
for (int i = 0; i < n; ++i) {
out[i] = i - shfamt < 0 ? in0[n - shfamt + i] : in1[i - shfamt];
}
@@ -717,11 +715,11 @@
void VSlidehn2(valu_t& op) {
constexpr int n = kLanes * 4 / sizeof(T);
const int shfamt = (op.f2 & 3) + 1;
- const T* in0 = (const T*)op.in[0].data;
- const T* in1 = (const T*)op.in[1].data;
- const T* in2 = (const T*)op.in[2].data;
- T* out0 = (T*)op.out[0].data;
- T* out1 = (T*)op.out[1].data;
+ const T* in0 = reinterpret_cast<const T*>(op.in[0].data);
+ const T* in1 = reinterpret_cast<const T*>(op.in[1].data);
+ const T* in2 = reinterpret_cast<const T*>(op.in[2].data);
+ T* out0 = reinterpret_cast<T*>(op.out[0].data);
+ T* out1 = reinterpret_cast<T*>(op.out[1].data);
for (int i = 0; i < n; ++i) {
out0[i] = i + shfamt < n ? in0[i + shfamt] : in1[i + shfamt - n];
}
@@ -736,11 +734,11 @@
void VSlidehp2(valu_t& op) {
constexpr int n = kLanes * 4 / sizeof(T);
const int shfamt = (op.f2 & 3) + 1;
- const T* in0 = (const T*)op.in[0].data;
- const T* in1 = (const T*)op.in[1].data;
- const T* in2 = (const T*)op.in[2].data;
- T* out0 = (T*)op.out[0].data;
- T* out1 = (T*)op.out[1].data;
+ const T* in0 = reinterpret_cast<const T*>(op.in[0].data);
+ const T* in1 = reinterpret_cast<const T*>(op.in[1].data);
+ const T* in2 = reinterpret_cast<const T*>(op.in[2].data);
+ T* out0 = reinterpret_cast<T*>(op.out[0].data);
+ T* out1 = reinterpret_cast<T*>(op.out[1].data);
for (int i = 0; i < n; ++i) {
out0[i] = i - shfamt < 0 ? in0[n - shfamt + i] : in1[i - shfamt];
}
@@ -798,11 +796,10 @@
template <typename T>
void VSel(valu_t& op) {
constexpr int n = kLanes * 4 / sizeof(T);
- const int shfamt = (op.f2 & 3) + 1;
- const T* in0 = (const T*)op.in[0].data;
- const T* in1 = (const T*)op.in[1].data;
- const T* in2 = (const T*)op.in[2].data;
- T* out = (T*)op.out[0].data;
+ const T* in0 = reinterpret_cast<const T*>(op.in[0].data);
+ const T* in1 = reinterpret_cast<const T*>(op.in[1].data);
+ const T* in2 = reinterpret_cast<const T*>(op.in[2].data);
+ T* out = reinterpret_cast<T*>(op.out[0].data);
for (int i = 0; i < n; ++i) {
out[i] = in0[i] & 1 ? in2[i] : in1[i];
}
@@ -823,10 +820,9 @@
template <typename T>
void VEvn(valu_t& op) {
constexpr int n = kLanes * 4 / sizeof(T);
- constexpr int h = n / 2;
- const T* in0 = (const T*)op.in[0].data;
- const T* in1 = (const T*)op.in[1].data;
- T* out0 = (T*)op.out[0].data;
+ const T* in0 = reinterpret_cast<const T*>(op.in[0].data);
+ const T* in1 = reinterpret_cast<const T*>(op.in[1].data);
+ T* out0 = reinterpret_cast<T*>(op.out[0].data);
for (int i = 0; i < n; ++i) {
out0[i] = i < n / 2 ? in0[2 * i + 0] : in1[2 * (i - n / 2) + 0];
}
@@ -836,10 +832,9 @@
template <typename T>
void VOdd(valu_t& op) {
constexpr int n = kLanes * 4 / sizeof(T);
- constexpr int h = n / 2;
- const T* in0 = (const T*)op.in[0].data;
- const T* in1 = (const T*)op.in[1].data;
- T* out1 = (T*)op.out[1].data;
+ const T* in0 = reinterpret_cast<const T*>(op.in[0].data);
+ const T* in1 = reinterpret_cast<const T*>(op.in[1].data);
+ T* out1 = reinterpret_cast<T*>(op.out[1].data);
for (int i = 0; i < n; ++i) {
out1[i] = i < n / 2 ? in0[2 * i + 1] : in1[2 * (i - n / 2) + 1];
}
@@ -889,10 +884,10 @@
void VZip(valu_t& op) {
constexpr int n = kLanes * 4 / sizeof(T);
constexpr int h = n / 2;
- const T* in0 = (const T*)op.in[0].data;
- const T* in1 = (const T*)op.in[1].data;
- T* out0 = (T*)op.out[0].data;
- T* out1 = (T*)op.out[1].data;
+ const T* in0 = reinterpret_cast<const T*>(op.in[0].data);
+ const T* in1 = reinterpret_cast<const T*>(op.in[1].data);
+ T* out0 = reinterpret_cast<T*>(op.out[0].data);
+ T* out1 = reinterpret_cast<T*>(op.out[1].data);
for (int i = 0; i < n; ++i) {
const int j = i / 2;
out0[i] = i & 1 ? in1[j + 0] : in0[j + 0];
@@ -937,29 +932,29 @@
}
static void VDwconv(valu_t& op) {
- const uint32_t* in0 = (const uint32_t*)op.in[0].data;
- const uint32_t* in1 = (const uint32_t*)op.in[1].data;
- const uint32_t* in2 = (const uint32_t*)op.in[2].data;
- const uint32_t* in3 = (const uint32_t*)op.in[3].data;
- const uint32_t* in4 = (const uint32_t*)op.in[4].data;
- const uint32_t* in5 = (const uint32_t*)op.in[5].data;
- uint32_t* out0 = (uint32_t*)op.out[0].data;
- uint32_t* out1 = (uint32_t*)op.out[1].data;
- uint32_t* out2 = (uint32_t*)op.out[2].data;
- uint32_t* out3 = (uint32_t*)op.out[3].data;
+ const uint32_t* in0 = reinterpret_cast<const uint32_t*>(op.in[0].data);
+ const uint32_t* in1 = reinterpret_cast<const uint32_t*>(op.in[1].data);
+ const uint32_t* in2 = reinterpret_cast<const uint32_t*>(op.in[2].data);
+ const uint32_t* in3 = reinterpret_cast<const uint32_t*>(op.in[3].data);
+ const uint32_t* in4 = reinterpret_cast<const uint32_t*>(op.in[4].data);
+ const uint32_t* in5 = reinterpret_cast<const uint32_t*>(op.in[5].data);
+ uint32_t* out0 = reinterpret_cast<uint32_t*>(op.out[0].data);
+ uint32_t* out1 = reinterpret_cast<uint32_t*>(op.out[1].data);
+ uint32_t* out2 = reinterpret_cast<uint32_t*>(op.out[2].data);
+ uint32_t* out3 = reinterpret_cast<uint32_t*>(op.out[3].data);
struct vdwconv_u8_t {
- uint32_t mode : 2; // 1:0
- uint32_t sparsity : 2; // 3:2
- uint32_t regbase : 4; // 7:4
- uint32_t rsvd : 4; // 11:8
- uint32_t abias : 9; // 20:12
- uint32_t asign : 1; // 21
- uint32_t bbias : 9; // 30:22
- uint32_t bsign : 1; // 31
+ uint32_t mode : 2; // 31:30
+ uint32_t sparsity : 2; // 29:28
+ uint32_t regbase : 4; // 27:24
+ uint32_t rsvd : 4; // 23:20
+ uint32_t abias : 9; // 19:11
+ uint32_t asign : 1; // 10
+ uint32_t bbias : 9; // 9:1
+ uint32_t bsign : 1; // 0
} cmd;
- uint32_t* p_cmd = (uint32_t*)&cmd;
+ uint32_t* p_cmd = reinterpret_cast<uint32_t*>(&cmd);
*p_cmd = op.sv.data;
assert(cmd.mode == 0);
assert(cmd.rsvd == 0);
@@ -970,7 +965,8 @@
const bool bsign = cmd.bsign;
constexpr int n = kVector / 32;
- uint32_t sparse[n + 2];
+ constexpr int kSparseSize = n + 2;
+ uint32_t sparse[kSparseSize];
if (cmd.sparsity == 1) {
sparse[0] = in0[n - 1];
for (int i = 0; i < kVector / 32; ++i) {
@@ -1009,18 +1005,18 @@
static void VAlu(valu_t& op) {
// clang-format off
switch (op.op) {
- case vslidevn: VSlidevn(op); return;
- case vslidevp: VSlidevp(op); return;
- case vslidehn: VSlidevn(op); return;
- case vslidehp: VSlidevp(op); return;
- case vslidehn2: VSlidehn2(op); return;
- case vslidehp2: VSlidehp2(op); return;
- case vsel: VSel(op); return;
- case vevn: VEvn(op); return;
- case vodd: VOdd(op); return;
- case vevnodd: VEvnOdd(op); return;
- case vzip: VZip(op); return;
- case vdwconv: VDwconv(op); return;
+ case encode::vslidevn: VSlidevn(op); return;
+ case encode::vslidevp: VSlidevp(op); return;
+ case encode::vslidehn: VSlidevn(op); return;
+ case encode::vslidehp: VSlidevp(op); return;
+ case encode::vslidehn2: VSlidehn2(op); return;
+ case encode::vslidehp2: VSlidehp2(op); return;
+ case encode::vsel: VSel(op); return;
+ case encode::vevn: VEvn(op); return;
+ case encode::vodd: VOdd(op); return;
+ case encode::vevnodd: VEvnOdd(op); return;
+ case encode::vzip: VZip(op); return;
+ case encode::vdwconv: VDwconv(op); return;
}
// clang-format on
@@ -1031,72 +1027,70 @@
const uint32_t b = op.in[1].data[i];
const uint32_t c = op.in[2].data[i];
const uint32_t d = op.in[3].data[i];
- const uint32_t e = op.in[4].data[i];
const uint32_t f = op.in[5].data[i];
- const uint32_t g = op.in[6].data[i];
bool v = false;
bool w = false;
uint32_t x = 0;
uint32_t y = 0;
- const bool f2_negative =
- ((f2 >> 0) & 1) && (op.op == vdmulh || op.op == vdmulh2);
+ const bool f2_negative = ((f2 >> 0) & 1) && (op.op == encode::vdmulh ||
+ op.op == encode::vdmulh2);
const bool f2_round = (f2 >> 1) & 1;
const bool f2_signed =
- !((f2 >> 0) & 1) || op.op == vdmulh || op.op == vdmulh2;
+ !((f2 >> 0) & 1) || op.op == encode::vdmulh || op.op == encode::vdmulh2;
// clang-format off
switch (op.op) {
- case vdup: VOPXU(dup); break;
- case vadd: VOP2U(add); break;
- case vsub: VOP2U(sub); break;
- case vrsub: VOP2U(rsub); break;
- case veq: VOP2U(cmp_eq); break;
- case vne: VOP2U(cmp_ne); break;
- case vlt: VOP2(cmp_lt); break;
- case vle: VOP2(cmp_le); break;
- case vgt: VOP2(cmp_gt); break;
- case vge: VOP2(cmp_ge); break;
- case vabsd: VOP2(absd); break;
- case vmax: VOP2(max); break;
- case vmin: VOP2(min); break;
- case vadd3: VOP3U(add3); break;
- case vand: VOP2U(log_and); break;
- case vor: VOP2U(log_or); break;
- case vxor: VOP2U(log_xor); break;
- case vnot: VOP1U(log_not); break;
- case vrev: VOP2U(log_rev); break;
- case vror: VOP2U(log_ror); break;
- case vclb: VOP1U(log_clb); break;
- case vclz: VOP1U(log_clz); break;
- case vcpop: VOP1U(log_cpop); break;
- case vmv: VOP1U(mv); break;
- case vmv2: VOP1PU(mv); break;
- case vmvp: VOP2M(mvp); break;
- case vshl: VOP2U(shl); break;
- case vshr: VOP2(shr); break;
- case vshf: VOP2_R(shf, f2_round); break;
- case vsrans: VOP3NS_R_U(srans, f2_round, !f2_signed); break;
- case vsraqs: VOP3QS_R_U(srans, f2_round, !f2_signed); break;
- case vmul: VOP2S(mul); break;
- case vmul2: VOP2PS(mul); break;
- case vmuls: VOP2(muls); break;
- case vmuls2: VOP2P(muls); break;
- case vmulw: WOP2(mulw); break;
- case vmulh: VOP2_R(mulh, f2_round); break;
- case vmulh2: VOP2P_R(mulh, f2_round); break;
- case vdmulh: VOP2_R_X(dmulh, f2_round, f2_negative); break;
- case vdmulh2: VOP2P_R_X(dmulh, f2_round, f2_negative); break;
- case vmadd: VOP3(madd); break;
- case vadds: VOP2(adds); break;
- case vsubs: VOP2(subs); break;
- case vaddw: WOP2(addw); break;
- case vsubw: WOP2(subw); break;
- case vacc: WOPA(acc); break;
- case vpadd: VOPP(padd); break;
- case vpsub: VOPP(psub); break;
- case vhadd: VOP2_R(hadd, f2_round); break;
- case vhsub: VOP2_R(hsub, f2_round); break;
+ case encode::vdup: VOPXU(dup); break;
+ case encode::vadd: VOP2U(add); break;
+ case encode::vsub: VOP2U(sub); break;
+ case encode::vrsub: VOP2U(rsub); break;
+ case encode::veq: VOP2U(cmp_eq); break;
+ case encode::vne: VOP2U(cmp_ne); break;
+ case encode::vlt: VOP2(cmp_lt); break;
+ case encode::vle: VOP2(cmp_le); break;
+ case encode::vgt: VOP2(cmp_gt); break;
+ case encode::vge: VOP2(cmp_ge); break;
+ case encode::vabsd: VOP2(absd); break;
+ case encode::vmax: VOP2(max); break;
+ case encode::vmin: VOP2(min); break;
+ case encode::vadd3: VOP3U(add3); break;
+ case encode::vand: VOP2U(log_and); break;
+ case encode::vor: VOP2U(log_or); break;
+ case encode::vxor: VOP2U(log_xor); break;
+ case encode::vnot: VOP1U(log_not); break;
+ case encode::vrev: VOP2U(log_rev); break;
+ case encode::vror: VOP2U(log_ror); break;
+ case encode::vclb: VOP1U(log_clb); break;
+ case encode::vclz: VOP1U(log_clz); break;
+ case encode::vcpop: VOP1U(log_cpop); break;
+ case encode::vmv: VOP1U(mv); break;
+ case encode::vmv2: VOP1PU(mv); break;
+ case encode::vmvp: VOP2M(mvp); break;
+ case encode::vshl: VOP2U(shl); break;
+ case encode::vshr: VOP2(shr); break;
+ case encode::vshf: VOP2_R(shf, f2_round); break;
+ case encode::vsrans: VOP3NS_R_U(srans, f2_round, !f2_signed); break;
+ case encode::vsraqs: VOP3QS_R_U(srans, f2_round, !f2_signed); break;
+ case encode::vmul: VOP2S(mul); break;
+ case encode::vmul2: VOP2PS(mul); break;
+ case encode::vmuls: VOP2(muls); break;
+ case encode::vmuls2: VOP2P(muls); break;
+ case encode::vmulw: WOP2(mulw); break;
+ case encode::vmulh: VOP2_R(mulh, f2_round); break;
+ case encode::vmulh2: VOP2P_R(mulh, f2_round); break;
+ case encode::vdmulh: VOP2_R_X(dmulh, f2_round, f2_negative); break;
+ case encode::vdmulh2: VOP2P_R_X(dmulh, f2_round, f2_negative); break;
+ case encode::vmadd: VOP3(madd); break;
+ case encode::vadds: VOP2(adds); break;
+ case encode::vsubs: VOP2(subs); break;
+ case encode::vaddw: WOP2(addw); break;
+ case encode::vsubw: WOP2(subw); break;
+ case encode::vacc: WOPA(acc); break;
+ case encode::vpadd: VOPP(padd); break;
+ case encode::vpsub: VOPP(psub); break;
+ case encode::vhadd: VOP2_R(hadd, f2_round); break;
+ case encode::vhsub: VOP2_R(hsub, f2_round); break;
}
// clang-format on
diff --git a/tests/verilator_sim/kelvin/valu_tb.cc b/tests/verilator_sim/kelvin/valu_tb.cc
index f3ff74b..5d05ade 100644
--- a/tests/verilator_sim/kelvin/valu_tb.cc
+++ b/tests/verilator_sim/kelvin/valu_tb.cc
@@ -1,8 +1,8 @@
// Copyright 2023 Google LLC
#include "VVAlu.h"
-#include "sysc_tb.h"
-#include "valu.h"
+#include "tests/verilator_sim/kelvin/valu.h"
+#include "tests/verilator_sim/sysc_tb.h"
struct VAlu_tb : Sysc_tb {
sc_in<bool> io_in_ready;
@@ -431,7 +431,7 @@
bool valid = rand_int(0, 3);
inputs_t in;
- in.op = rand_int(0, kOpEntries - 1);
+ in.op = rand_int(0, encode::kOpEntries - 1);
in.f2 = rand_int(0, 7);
in.sz = 1u << rand_int(0, 2);
in.m = rand_int(0, 7) == 0;
@@ -440,14 +440,15 @@
in.sv.addr = rand_uint32();
in.sv.data = rand_uint32();
- if (in.op == vevn || in.op == vevnodd || in.op == vodd) {
+ if (in.op == encode::vevn || in.op == encode::vevnodd ||
+ in.op == encode::vodd) {
// Disallow even/odd in CRT.
- in.op = vadd;
+ in.op = encode::vadd;
}
- if (in.op == vdwconv) {
+ if (in.op == encode::vdwconv) {
// Disallow DW in CRT.
- in.op = vadd;
+ in.op = encode::vadd;
}
// Assign random values to inactive read addr/tag.
@@ -464,69 +465,69 @@
}
switch (in.op) {
- case vabsd:
- case vadd:
- case vadds:
- case vhadd:
- case vhsub:
- case vmax:
- case vmin:
- case vrsub:
- case vsub:
- case vsubs:
- case veq:
- case vne:
- case vlt:
- case vle:
- case vgt:
- case vge:
- case vand:
- case vclb:
- case vclz:
- case vcpop:
- case vevn:
- case vor:
- case vrev:
- case vror:
- case vxor:
- case vdmulh:
- case vmul:
- case vmulh:
- case vmuls:
- case vshl:
- case vshr:
- case vshf:
+ case encode::vabsd:
+ case encode::vadd:
+ case encode::vadds:
+ case encode::vhadd:
+ case encode::vhsub:
+ case encode::vmax:
+ case encode::vmin:
+ case encode::vrsub:
+ case encode::vsub:
+ case encode::vsubs:
+ case encode::veq:
+ case encode::vne:
+ case encode::vlt:
+ case encode::vle:
+ case encode::vgt:
+ case encode::vge:
+ case encode::vand:
+ case encode::vclb:
+ case encode::vclz:
+ case encode::vcpop:
+ case encode::vevn:
+ case encode::vor:
+ case encode::vrev:
+ case encode::vror:
+ case encode::vxor:
+ case encode::vdmulh:
+ case encode::vmul:
+ case encode::vmulh:
+ case encode::vmuls:
+ case encode::vshl:
+ case encode::vshr:
+ case encode::vshf:
in.r[0].valid = true;
in.r[1].valid = true;
in.w[0].valid = true;
break;
- case vaddw:
- case vevnodd:
- case vsubw:
- case vmulw:
- case vmvp:
- case vzip:
+ case encode::vaddw:
+ case encode::vevnodd:
+ case encode::vsubw:
+ case encode::vmulw:
+ case encode::vmvp:
+ case encode::vzip:
in.r[0].valid = true;
in.r[1].valid = true;
in.w[0].valid = true;
in.w[1].valid = true;
break;
- case vacc:
+ case encode::vacc:
in.r[0].valid = true;
in.r[1].valid = true;
in.r[2].valid = true;
in.w[0].valid = true;
in.w[1].valid = true;
break;
- case vadd3:
- case vmadd:
- case vsrans:
+ case encode::vadd3:
+ case encode::vmadd:
+ case encode::vsrans:
in.r[0].valid = true;
in.r[1].valid = true;
in.r[2].valid = true;
in.w[0].valid = true;
break;
- case vsraqs:
+ case encode::vsraqs:
in.r[0].valid = true;
in.r[1].valid = true;
in.r[2].valid = true;
@@ -535,22 +536,22 @@
in.w[0].valid = true;
in.cmdsync = true;
break;
- case vdup:
+ case encode::vdup:
in.r[1].valid = true;
in.w[0].valid = true;
break;
- case vmv:
- case vpadd:
- case vpsub:
+ case encode::vmv:
+ case encode::vpadd:
+ case encode::vpsub:
in.r[0].valid = true;
in.w[0].valid = true;
break;
- case vodd:
+ case encode::vodd:
in.r[0].valid = true;
in.r[1].valid = true;
in.w[1].valid = true;
break;
- case vdwconv:
+ case encode::vdwconv:
in.r[0].valid = true;
in.r[1].valid = true;
in.r[2].valid = true;
@@ -580,7 +581,7 @@
}
// Assign inactive write addresses.
- if (in.op == vzip) {
+ if (in.op == encode::vzip) {
int addr = 0;
valid = valid && FindInactiveWriteAddr2(in.m, wactive, addr);
in.w[0].valid = valid;
@@ -637,7 +638,7 @@
for (int i = 0; i < kWritePorts; ++i) {
if (alu.w[i].valid) {
wactive |= 1ull << waddr[i];
- if (in.op == vzip) {
+ if (in.op == encode::vzip) {
waddr[i] += 2;
} else {
waddr[i]++; // stripmine update
@@ -697,10 +698,10 @@
void ProcessInputs(const int idx) {
// clang-format off
if (!(io_in_valid && io_in_ready) ||
- idx == 0 && !io_in_bits_0_valid ||
- idx == 1 && !io_in_bits_1_valid ||
- idx == 2 && !io_in_bits_2_valid ||
- idx == 3 && !io_in_bits_3_valid) {
+ (idx == 0 && !io_in_bits_0_valid) ||
+ (idx == 1 && !io_in_bits_1_valid) ||
+ (idx == 2 && !io_in_bits_2_valid) ||
+ (idx == 3 && !io_in_bits_3_valid)) {
cmdq_[idx].clear();
return;
}
@@ -741,10 +742,10 @@
void ProcessOutputs(const int idx) {
// clang-format off
- if (idx == 0 && !io_write_0_valid ||
- idx == 1 && !io_write_1_valid ||
- idx == 2 && !io_write_2_valid ||
- idx == 3 && !io_write_3_valid) {
+ if ((idx == 0 && !io_write_0_valid) ||
+ (idx == 1 && !io_write_1_valid) ||
+ (idx == 2 && !io_write_2_valid) ||
+ (idx == 3 && !io_write_3_valid)) {
return;
}
// clang-format on
@@ -771,8 +772,8 @@
if (memcmp(dut, ref, kLanes * 4)) {
char s[100];
- sprintf(s, "valu op=%d f2=%d sz=%d", write_[addr].op, write_[addr].f2,
- write_[addr].sz);
+ snprintf(s, sizeof(s), "valu op=%d f2=%d sz=%d", write_[addr].op,
+ write_[addr].f2, write_[addr].sz);
printf("ref[%2d] ", addr);
for (int i = 0; i < kLanes; ++i) {
printf(" %08x", ref[i]);
diff --git a/tests/verilator_sim/kelvin/valuint_tb.cc b/tests/verilator_sim/kelvin/valuint_tb.cc
index f86ad37..05999be 100644
--- a/tests/verilator_sim/kelvin/valuint_tb.cc
+++ b/tests/verilator_sim/kelvin/valuint_tb.cc
@@ -1,12 +1,12 @@
// Copyright 2023 Google LLC
-#include "VVAluInt.h"
-#include "sysc_tb.h"
-#include "valu.h"
+#include "VVAluInt.h" // Generated.
+#include "tests/verilator_sim/kelvin/valu.h"
+#include "tests/verilator_sim/sysc_tb.h"
struct VAluInt_tb : Sysc_tb {
sc_out<bool> io_in_valid;
- sc_out<sc_bv<kOpBits> > io_in_op;
+ sc_out<sc_bv<encode::kOpBits> > io_in_op;
sc_out<sc_bv<3> > io_in_f2;
sc_out<sc_bv<3> > io_in_sz;
sc_out<sc_bv<6> > io_in_vd_addr;
@@ -41,15 +41,15 @@
const uint8_t ve_addr = rand_int(0, 63);
uint32_t sv_data = 0;
- uint8_t op = rand_int(0, kOpEntries - 1);
+ uint8_t op = rand_int(0, encode::kOpEntries - 1);
// Inputs.
valu_t r = {0};
r_.read(r);
- if (op == vdwconv) {
+ if (op == encode::vdwconv) {
// Disallow DW in CRT.
- op = 0; // TODO
+ op = 0;
}
io_in_valid = valid;
@@ -142,7 +142,7 @@
static void VAluInt_test(char* name, int loops, bool trace) {
sc_signal<bool> io_in_valid;
- sc_signal<sc_bv<kOpBits> > io_in_op;
+ sc_signal<sc_bv<encode::kOpBits> > io_in_op;
sc_signal<sc_bv<3> > io_in_f2;
sc_signal<sc_bv<3> > io_in_sz;
sc_signal<sc_bv<6> > io_in_vd_addr;