[sw, dif_kmac] Add implementation of SHA-3 (blocking only)

Add support for SHA-3 operations to the KMAC DIF. These operations
do not support returning partial results (i.e. they block until the
operation is complete). Support for non-blocking operations will be
added in a future PR.

Note that the squeeze operation will need to be modified when support
for XOF operations, such as SHAKE, are added. I have marked this with
'TODO(XOF):' for now.

This change only contains a smoke test for now. Unit tests will be
added at a later date.

Signed-off-by: Michael Munday <mike.munday@lowrisc.org>
diff --git a/hw/top_earlgrey/dv/chip_sim_cfg.hjson b/hw/top_earlgrey/dv/chip_sim_cfg.hjson
index 1cf77c6..ff13127 100644
--- a/hw/top_earlgrey/dv/chip_sim_cfg.hjson
+++ b/hw/top_earlgrey/dv/chip_sim_cfg.hjson
@@ -236,6 +236,12 @@
       en_run_modes: ["sw_test_mode"]
     }
     {
+      name: chip_dif_kmac_smoketest
+      uvm_test_seq: chip_sw_base_vseq
+      sw_images: ["sw/device/tests/dif_kmac_smoketest:1"]
+      en_run_modes: ["sw_test_mode"]
+    }
+    {
       name: chip_pwrmgr_usbdev_smoketest
       uvm_test_seq: chip_sw_base_vseq
       sw_images: ["sw/device/tests/pwrmgr_usbdev_smoketest:1"]
diff --git a/hw/top_earlgrey/dv/verilator_sim_cfg.hjson b/hw/top_earlgrey/dv/verilator_sim_cfg.hjson
index e33e694..dba761b 100644
--- a/hw/top_earlgrey/dv/verilator_sim_cfg.hjson
+++ b/hw/top_earlgrey/dv/verilator_sim_cfg.hjson
@@ -137,6 +137,10 @@
       sw_images: ["sw/device/tests/dif_uart_smoketest:1"]
     }
     {
+      name: dif_kmac_smoketest
+      sw_images: ["sw/device/tests/dif_kmac_smoketest:1"]
+    }
+    {
       name: flash_ctrl_test
       sw_images: ["sw/device/tests/flash_ctrl_test:1"]
     }
diff --git a/sw/device/lib/dif/dif_kmac.c b/sw/device/lib/dif/dif_kmac.c
index 0161c14..2fbaab7 100644
--- a/sw/device/lib/dif/dif_kmac.c
+++ b/sw/device/lib/dif/dif_kmac.c
@@ -6,9 +6,36 @@
 
 #include <assert.h>
 
+#include "sw/device/lib/base/bitfield.h"
 #include "sw/device/lib/base/memory.h"
 
-// TODO: implement!
+#include "kmac_regs.h"
+
+enum {
+  /**
+   * The maximum amount of usable bits in the output state.
+   *
+   * This constant may be assumed to be a multiple of 32.
+   *
+   * The actual number of usable bits may be lower than the value defined
+   * depending on the mode in use. The intent is that this constant is useful
+   * for sizing fixed length buffers.
+   *
+   * Formula for the rate in bits is:
+   *
+   *   r = 1600 - c
+   *
+   * Where c is the capacity (the security level in bits multiplied by two).
+   *
+   * The lowest security level is 128 (e.g. SHAKE128).
+   */
+  kDifKmacMaximumBitRate = 1600 - (2 * 128),
+
+  /**
+   * The offset of the second share within the output state register.
+   */
+  kDifKmacStateShareOffset = 0x100,
+};
 
 dif_kmac_result_t dif_kmac_customization_string_init(
     const char *data, size_t len, dif_kmac_customization_string_t *out) {
@@ -66,3 +93,343 @@
 
   return kDifKmacOk;
 }
+
+/**
+ * Report whether the hardware is currently idle.
+ *
+ * If the hardware is not idle then the `CFG` register is locked.
+ *
+ * @param params Hardware parameters.
+ * @returns Whether the hardware is currently idle or not.
+ */
+static bool is_state_idle(dif_kmac_params_t params) {
+  uint32_t reg = mmio_region_read32(params.base_addr, KMAC_STATUS_REG_OFFSET);
+  return bitfield_bit32_read(reg, KMAC_STATUS_SHA3_IDLE_BIT);
+}
+
+/**
+ * Report whether the hardware is currently in the absorb state and accepting
+ * writes to the message FIFO.
+ *
+ * Note that writes to the message FIFO may still block if it is full.
+ *
+ * @param params Hardware parameters.
+ * @returns Whether the hardware is currently absorbing or not.
+ */
+static bool is_state_absorb(dif_kmac_params_t params) {
+  uint32_t reg = mmio_region_read32(params.base_addr, KMAC_STATUS_REG_OFFSET);
+  return bitfield_bit32_read(reg, KMAC_STATUS_SHA3_ABSORB_BIT);
+}
+
+/**
+ * Report whether the hardware is currently in the squeeze state which means
+ * that the output state is valid and may be read by software.
+ *
+ * @param params Hardware parameters.
+ * @returns Whether the hardware is currently in the squeeze state or not.
+ */
+static bool is_state_squeeze(dif_kmac_params_t params) {
+  uint32_t reg = mmio_region_read32(params.base_addr, KMAC_STATUS_REG_OFFSET);
+  return bitfield_bit32_read(reg, KMAC_STATUS_SHA3_SQUEEZE_BIT);
+}
+
+dif_kmac_result_t dif_kmac_init(dif_kmac_params_t params, dif_kmac_t *kmac) {
+  if (kmac == NULL) {
+    return kDifKmacBadArg;
+  }
+
+  *kmac = (dif_kmac_t){.params = params};
+  return kDifKmacOk;
+}
+
+dif_kmac_result_t dif_kmac_configure(dif_kmac_t *kmac,
+                                     dif_kmac_config_t config) {
+  if (kmac == NULL) {
+    return kDifKmacBadArg;
+  }
+
+  // Entropy mode.
+  uint32_t entropy_mode_value;
+  bool entropy_ready = false;
+  switch (config.entropy_mode) {
+    case kDifKmacEntropyModeIdle:
+      entropy_mode_value = KMAC_CFG_ENTROPY_MODE_VALUE_IDLE_MODE;
+      break;
+    case kDifKmacEntropyModeEdn:
+      entropy_mode_value = KMAC_CFG_ENTROPY_MODE_VALUE_EDN_MODE;
+      entropy_ready = true;
+      break;
+    case kDifKmacEntropyModeSoftware:
+      entropy_mode_value = KMAC_CFG_ENTROPY_MODE_VALUE_SW_MODE;
+      break;
+    default:
+      return kDifKmacBadArg;
+  }
+
+  // Check that the hardware is in an idle state.
+  if (!is_state_idle(kmac->params)) {
+    return kDifKmacLocked;
+  }
+
+  // Write configuration register.
+  uint32_t cfg_reg = 0;
+  cfg_reg = bitfield_bit32_write(cfg_reg, KMAC_CFG_MSG_ENDIANNESS_BIT,
+                                 config.message_big_endian);
+  cfg_reg = bitfield_bit32_write(cfg_reg, KMAC_CFG_STATE_ENDIANNESS_BIT,
+                                 config.output_big_endian);
+  cfg_reg = bitfield_field32_write(cfg_reg, KMAC_CFG_ENTROPY_MODE_FIELD,
+                                   entropy_mode_value);
+  cfg_reg = bitfield_bit32_write(cfg_reg, KMAC_CFG_ENTROPY_FAST_PROCESS_BIT,
+                                 config.entropy_fast_process);
+  cfg_reg =
+      bitfield_bit32_write(cfg_reg, KMAC_CFG_ENTROPY_READY_BIT, entropy_ready);
+  mmio_region_write32(kmac->params.base_addr, KMAC_CFG_REG_OFFSET, cfg_reg);
+
+  // Write entropy period register.
+  uint32_t entropy_period_reg = 0;
+  entropy_period_reg = bitfield_field32_write(
+      entropy_period_reg, KMAC_ENTROPY_PERIOD_ENTROPY_TIMER_FIELD,
+      config.entropy_reseed_interval);
+  entropy_period_reg = bitfield_field32_write(
+      entropy_period_reg, KMAC_ENTROPY_PERIOD_WAIT_TIMER_FIELD,
+      config.entropy_wait_timer);
+  mmio_region_write32(kmac->params.base_addr, KMAC_ENTROPY_PERIOD_REG_OFFSET,
+                      entropy_period_reg);
+
+  // Write entropy seed registers.
+  mmio_region_write32(kmac->params.base_addr,
+                      KMAC_ENTROPY_SEED_LOWER_REG_OFFSET,
+                      (uint32_t)config.entropy_seed);
+  mmio_region_write32(kmac->params.base_addr,
+                      KMAC_ENTROPY_SEED_UPPER_REG_OFFSET,
+                      (uint32_t)(config.entropy_seed >> 32));
+
+  return kDifKmacOk;
+}
+
+/**
+ * Calculate the rate (r) in bits from the given security level.
+ *
+ * @param security_level Security level in bits.
+ * @returns Rate in bits.
+ */
+static uint32_t calculate_rate_bits(uint32_t security_level) {
+  // Formula for the rate in bits is:
+  //
+  //   r = 1600 - c
+  //
+  // Where c is the capacity (the security level in bits multiplied by two).
+  return 1600 - 2 * security_level;
+}
+
+dif_kmac_result_t dif_kmac_mode_sha3_start(dif_kmac_t *kmac,
+                                           dif_kmac_mode_sha3_t mode) {
+  if (kmac == NULL) {
+    return kDifKmacBadArg;
+  }
+
+  // Set key strength and calculate rate (r) and digest length (d) in 32-bit
+  // words.
+  uint32_t kstrength;
+  switch (mode) {
+    case kDifKmacModeSha3Len224:
+      kstrength = KMAC_CFG_KSTRENGTH_VALUE_L224;
+      kmac->offset = 0;
+      kmac->r = calculate_rate_bits(224) / 32;
+      kmac->d = 224 / 32;
+      break;
+    case kDifKmacModeSha3Len256:
+      kstrength = KMAC_CFG_KSTRENGTH_VALUE_L256;
+      kmac->offset = 0;
+      kmac->r = calculate_rate_bits(256) / 32;
+      kmac->d = 256 / 32;
+      break;
+    case kDifKmacModeSha3Len384:
+      kstrength = KMAC_CFG_KSTRENGTH_VALUE_L384;
+      kmac->offset = 0;
+      kmac->r = calculate_rate_bits(384) / 32;
+      kmac->d = 384 / 32;
+      break;
+    case kDifKmacModeSha3Len512:
+      kstrength = KMAC_CFG_KSTRENGTH_VALUE_L512;
+      kmac->offset = 0;
+      kmac->r = calculate_rate_bits(512) / 32;
+      kmac->d = 512 / 32;
+      break;
+    default:
+      return kDifKmacBadArg;
+  }
+
+  // Hardware must be idle to start an operation.
+  if (!is_state_idle(kmac->params)) {
+    return kDifKmacError;
+  }
+
+  // Configure SHA-3 mode with the given strength.
+  uint32_t cfg_reg =
+      mmio_region_read32(kmac->params.base_addr, KMAC_CFG_REG_OFFSET);
+  cfg_reg =
+      bitfield_field32_write(cfg_reg, KMAC_CFG_KSTRENGTH_FIELD, kstrength);
+  cfg_reg = bitfield_field32_write(cfg_reg, KMAC_CFG_MODE_FIELD,
+                                   KMAC_CFG_MODE_VALUE_SHA3);
+  mmio_region_write32(kmac->params.base_addr, KMAC_CFG_REG_OFFSET, cfg_reg);
+
+  // Issue start command.
+  uint32_t cmd_reg =
+      bitfield_field32_write(0, KMAC_CMD_CMD_FIELD, KMAC_CMD_CMD_VALUE_START);
+  mmio_region_write32(kmac->params.base_addr, KMAC_CMD_REG_OFFSET, cmd_reg);
+
+  // Poll until the status register is in the 'absorb' state.
+  while (true) {
+    if (is_state_absorb(kmac->params)) {
+      break;
+    }
+    // TODO(#6248): check for error.
+  }
+
+  return kDifKmacOk;
+}
+
+dif_kmac_result_t dif_kmac_absorb(dif_kmac_t *kmac, const void *msg, size_t len,
+                                  size_t *processed) {
+  // Set the number of bytes processed to 0.
+  if (processed != NULL) {
+    *processed = 0;
+  }
+
+  if (kmac == NULL || (msg == NULL && len != 0)) {
+    return kDifKmacBadArg;
+  }
+
+  // Check that an operation has been started.
+  if (kmac->r == 0) {
+    return kDifKmacError;
+  }
+
+  // Poll until the the status register is in the 'absorb' state.
+  if (!is_state_absorb(kmac->params)) {
+    return kDifKmacError;
+  }
+
+  // Copy the message one byte at a time.
+  // This could be sped up copying a word at a time but be careful
+  // about message endianness (e.g. only copy a word at a time when in
+  // little-endian mode).
+  for (size_t i = 0; i < len; ++i) {
+    mmio_region_write8(kmac->params.base_addr, KMAC_MSG_FIFO_REG_OFFSET,
+                       ((const uint8_t *)msg)[i]);
+  }
+
+  if (processed != NULL) {
+    *processed = len;
+  }
+  return kDifKmacOk;
+}
+
+dif_kmac_result_t dif_kmac_squeeze(dif_kmac_t *kmac, uint32_t *out, size_t len,
+                                   size_t *processed) {
+  if (kmac == NULL || (out == NULL && len != 0)) {
+    return kDifKmacBadArg;
+  }
+
+  // Set `processed` to 0 so we can return early without setting it again.
+  if (processed != NULL) {
+    *processed = 0;
+  }
+
+  // Move into squeezing state (if not already in it).
+  // Do this even if the length requested is 0 or too big.
+  if (!kmac->squeezing) {
+    kmac->squeezing = true;
+
+    // Issue squeeze command.
+    uint32_t cmd_reg = bitfield_field32_write(0, KMAC_CMD_CMD_FIELD,
+                                              KMAC_CMD_CMD_VALUE_PROCESS);
+    mmio_region_write32(kmac->params.base_addr, KMAC_CMD_REG_OFFSET, cmd_reg);
+  }
+
+  // If the operation has a fixed length output then the total number of bytes
+  // requested must not exceed that length.
+  if (kmac->d != 0 && len > (kmac->d - kmac->offset)) {
+    return kDifKmacError;
+  }
+
+  if (len == 0) {
+    return kDifKmacOk;
+  }
+
+  // Poll the status register until in the 'squeeze' state.
+  while (true) {
+    if (is_state_squeeze(kmac->params)) {
+      break;
+    }
+    // TODO(#6248): check for error.
+  }
+
+  while (len > 0) {
+    size_t n = len;
+    size_t remaining = (kmac->d == 0 ? kmac->r : kmac->d) - kmac->offset;
+    if (n > remaining) {
+      n = remaining;
+    }
+    if (n == 0) {
+      // TODO(XOF): request more state.
+      return kDifKmacError;
+    }
+
+    uint32_t offset = KMAC_STATE_REG_OFFSET + kmac->offset * sizeof(uint32_t);
+    for (size_t i = 0; i < n; ++i) {
+      // Read both shares from state register and combine using XOR.
+      uint32_t share0 = mmio_region_read32(kmac->params.base_addr, offset);
+      uint32_t share1 = mmio_region_read32(kmac->params.base_addr,
+                                           offset + kDifKmacStateShareOffset);
+      *out++ = share0 ^ share1;
+      offset += sizeof(uint32_t);
+    }
+    kmac->offset += n;
+    len -= n;
+    if (processed != NULL) {
+      *processed += n;
+    }
+  }
+  return kDifKmacOk;
+}
+
+dif_kmac_result_t dif_kmac_end(dif_kmac_t *kmac) {
+  if (kmac == NULL) {
+    return kDifKmacBadArg;
+  }
+
+  // The hardware should (must?) complete squeeze operation before the DONE
+  // command is issued.
+  if (!kmac->squeezing) {
+    return kDifKmacError;
+  }
+  while (true) {
+    if (is_state_squeeze(kmac->params)) {
+      break;
+    }
+    // TODO(#6248): check for error.
+  }
+
+  // Issue done command.
+  uint32_t cmd_reg =
+      bitfield_field32_write(0, KMAC_CMD_CMD_FIELD, KMAC_CMD_CMD_VALUE_DONE);
+  mmio_region_write32(kmac->params.base_addr, KMAC_CMD_REG_OFFSET, cmd_reg);
+
+  // Reset state.
+  kmac->squeezing = false;
+  kmac->offset = 0;
+  kmac->r = 0;
+  kmac->d = 0;
+
+  // Poll status register until in idle state.
+  while (true) {
+    if (is_state_idle(kmac->params)) {
+      break;
+    }
+    // TODO(#6248): check for error.
+  }
+
+  return kDifKmacOk;
+}
diff --git a/sw/device/lib/dif/dif_kmac.h b/sw/device/lib/dif/dif_kmac.h
index 80d2269..764b13b 100644
--- a/sw/device/lib/dif/dif_kmac.h
+++ b/sw/device/lib/dif/dif_kmac.h
@@ -97,21 +97,6 @@
 } dif_kmac_params_t;
 
 /**
- * Supported byte order options.
- */
-typedef enum dif_kmac_endianness {
-  /**
-   * Little endian byte ordering.
-   */
-  kDifKmacEndiannessLittle = 0,
-
-  /**
-   * Big endian byte ordering.
-   */
-  kDifKmacEndiannessBig,
-} dif_kmac_endianness_t;
-
-/**
  * Supported entropy modes.
  *
  * Entropy may be provided by the entropy distribution network (EDN) or using a
@@ -138,7 +123,7 @@
    * Entropy fast process mode when enabled prevents the KMAC unit consuming
    * entropy unless it is processing a secret key.
    */
-  dif_kmac_toggle_t entropy_fast_process;
+  bool entropy_fast_process;
 
   /**
    * Entropy seed. Only used when the source of entropy is software.
@@ -159,14 +144,17 @@
   uint16_t entropy_wait_timer;
 
   /**
-   * Byte order used for message.
+   * Convert the message to big-endian byte order.
+   * Note: this option currently had no effect since the message is sent a byte
+   * at a time but will in the future.
    */
-  dif_kmac_endianness_t message_endianness;
+  bool message_big_endian;
 
   /**
-   * Byte order used for output state (digest).
+   * Convert the output state (digest) to big-endian byte order on a word
+   * granularity.
    */
-  dif_kmac_endianness_t output_state_endianness;
+  bool output_big_endian;
 } dif_kmac_config_t;
 
 /**
@@ -179,7 +167,28 @@
 typedef struct dif_kmac {
   dif_kmac_params_t params;
 
-  // TODO: counters and offsets to support streaming APIs.
+  /**
+   * Whether the 'squeezing' phase has been started.
+   */
+  bool squeezing;
+
+  /**
+   * Offset into the output state.
+   */
+  size_t offset;
+
+  /**
+   * The rate (r) in 32-bit words.
+   */
+  size_t r;
+
+  /**
+   * The output length (d) in 32-bit words.
+   *
+   * If the output length is not fixed then this field will be set to 0.
+   */
+  size_t d;
+
 } dif_kmac_t;
 
 /**
@@ -611,12 +620,13 @@
  *
  * @param kmac A KMAC handle.
  * @param[out] out Pointer to output buffer.
- * @param[out] len Number of bytes to write to output buffer.
- * @param[out] processed Number of bytes written to output buffer (optional).
+ * @param[out] len Number of 32-bit words to write to output buffer.
+ * @param[out] processed Number of 32-bit words written to output buffer
+ * (optional).
  * @preturn The result of the operation.
  */
 DIF_WARN_UNUSED_RESULT
-dif_kmac_result_t dif_kmac_squeeze(dif_kmac_t *kmac, void *out, size_t len,
+dif_kmac_result_t dif_kmac_squeeze(dif_kmac_t *kmac, uint32_t *out, size_t len,
                                    size_t *processed);
 
 /**
diff --git a/sw/device/tests/dif/dif_kmac_smoketest.c b/sw/device/tests/dif/dif_kmac_smoketest.c
new file mode 100644
index 0000000..d446ad7
--- /dev/null
+++ b/sw/device/tests/dif/dif_kmac_smoketest.c
@@ -0,0 +1,133 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "sw/device/lib/arch/device.h"
+#include "sw/device/lib/base/memory.h"
+#include "sw/device/lib/base/mmio.h"
+#include "sw/device/lib/dif/dif_kmac.h"
+#include "sw/device/lib/flash_ctrl.h"
+#include "sw/device/lib/runtime/log.h"
+#include "sw/device/lib/testing/check.h"
+#include "sw/device/lib/testing/test_main.h"
+
+#include "hw/top_earlgrey/sw/autogen/top_earlgrey.h"
+
+const test_config_t kTestConfig;
+
+/**
+ * Digest lengths in 32-bit words.
+ */
+#define DIGEST_LEN_SHA3_224 (224 / 32)
+#define DIGEST_LEN_SHA3_256 (256 / 32)
+#define DIGEST_LEN_SHA3_384 (384 / 32)
+#define DIGEST_LEN_SHA3_512 (512 / 32)
+#define DIGEST_LEN_SHA3_MAX DIGEST_LEN_SHA3_512
+
+/**
+ * SHA-3 test description.
+ */
+typedef struct sha3_test {
+  dif_kmac_mode_sha3_t mode;
+
+  const char *message;
+  size_t message_len;
+
+  const uint32_t digest[DIGEST_LEN_SHA3_MAX];
+  size_t digest_len;
+} sha3_test_t;
+
+/**
+ * SHA-3 tests.
+ */
+const sha3_test_t sha3_tests[] = {
+    // Examples taken from NIST FIPS-202 Algorithm Test Vectors:
+    // https://csrc.nist.gov/CSRC/media/Projects/Cryptographic-Algorithm-Validation-Program/documents/sha3/sha-3bytetestvectors.zip
+    {
+        .mode = kDifKmacModeSha3Len224,
+        .message = NULL,
+        .message_len = 0,
+        .digest = {0x42034e6b, 0xb7db6736, 0x45156e3b, 0xabb10e4f, 0x9a7f59d4,
+                   0x3f8e071b, 0xc76b5a5b},
+        .digest_len = DIGEST_LEN_SHA3_224,
+    },
+    {
+        .mode = kDifKmacModeSha3Len256,
+        .message = "\xe7\x37\x21\x05",
+        .message_len = 4,
+        .digest = {0x8ab6423a, 0x8cf279b0, 0x52c7a34c, 0x90276f29, 0x78fec406,
+                   0xd979ebb1, 0x057f7789, 0xae46401e},
+        .digest_len = DIGEST_LEN_SHA3_256,
+    },
+    {
+        .mode = kDifKmacModeSha3Len384,
+        .message = "\xa7\x48\x47\x93\x0a\x03\xab\xee\xa4\x73\xe1\xf3\xdc\x30"
+                   "\xb8\x88\x15",
+        .message_len = 17,
+        .digest = {0x29f9a6db, 0xd6f955fe, 0xc0675f6c, 0xf1823baf, 0xb358cf7b,
+                   0x16f35267, 0x3f08165c, 0x78d48fea, 0xf20369ee, 0xd20a827f,
+                   0xaf5099dd, 0x00678cb4},
+        .digest_len = DIGEST_LEN_SHA3_384,
+    },
+    {
+        .mode = kDifKmacModeSha3Len512,
+        .message =
+            "\x66\x4e\xf2\xe3\xa7\x05\x9d\xaf\x1c\x58\xca\xf5\x20\x08\xc5\x22"
+            "\x7e\x85\xcd\xcb\x83\xb4\xc5\x94\x57\xf0\x2c\x50\x8d\x4f\x4f\x69"
+            "\xf8\x26\xbd\x82\xc0\xcf\xfc\x5c\xb6\xa9\x7a\xf6\xe5\x61\xc6\xf9"
+            "\x69\x70\x00\x52\x85\xe5\x8f\x21\xef\x65\x11\xd2\x6e\x70\x98\x89"
+            "\xa7\xe5\x13\xc4\x34\xc9\x0a\x3c\xf7\x44\x8f\x0c\xae\xec\x71\x14"
+            "\xc7\x47\xb2\xa0\x75\x8a\x3b\x45\x03\xa7\xcf\x0c\x69\x87\x3e\xd3"
+            "\x1d\x94\xdb\xef\x2b\x7b\x2f\x16\x88\x30\xef\x7d\xa3\x32\x2c\x3d"
+            "\x3e\x10\xca\xfb\x7c\x2c\x33\xc8\x3b\xbf\x4c\x46\xa3\x1d\xa9\x0c"
+            "\xff\x3b\xfd\x4c\xcc\x6e\xd4\xb3\x10\x75\x84\x91\xee\xba\x60\x3a"
+            "\x76",
+        .message_len = 145,
+        .digest = {0xf15f82e5, 0xd570c0a3, 0xe7bb2fa5, 0x444a8511, 0x5f295405,
+                   0x69797afb, 0xd10879a1, 0xbebf6301, 0xa6521d8f, 0x13a0e876,
+                   0x1ca1567b, 0xb4fb0fdf, 0x9f89bc56, 0x4bd127c7, 0x322288d8,
+                   0x4e919d54},
+        .digest_len = DIGEST_LEN_SHA3_512,
+    },
+};
+
+bool test_main() {
+  LOG_INFO("Running KMAC DIF test...");
+
+  // Intialize KMAC hardware.
+  dif_kmac_t kmac;
+  CHECK(dif_kmac_init((dif_kmac_params_t){.base_addr = mmio_region_from_addr(
+                                              TOP_EARLGREY_KMAC_BASE_ADDR)},
+                      &kmac) == kDifKmacOk);
+
+  // Configure KMAC hardware using software entropy.
+  dif_kmac_config_t config = (dif_kmac_config_t){
+      .entropy_mode = kDifKmacEntropyModeSoftware,
+      .entropy_seed = 0xffff,
+      .entropy_fast_process = kDifKmacToggleEnabled,
+  };
+  CHECK(dif_kmac_configure(&kmac, config) == kDifKmacOk);
+
+  // Run SHA-3 test cases using single blocking absorb/squeeze operations.
+  for (int i = 0; i < ARRAYSIZE(sha3_tests); ++i) {
+    sha3_test_t test = sha3_tests[i];
+
+    CHECK(dif_kmac_mode_sha3_start(&kmac, test.mode) == kDifKmacOk);
+    if (test.message_len > 0) {
+      CHECK(dif_kmac_absorb(&kmac, test.message, test.message_len, NULL) ==
+            kDifKmacOk);
+    }
+    uint32_t out[DIGEST_LEN_SHA3_MAX];
+    CHECK(DIGEST_LEN_SHA3_MAX >= test.digest_len);
+    CHECK(dif_kmac_squeeze(&kmac, out, test.digest_len, NULL) == kDifKmacOk);
+    CHECK(dif_kmac_end(&kmac) == kDifKmacOk);
+
+    for (int j = 0; j < test.digest_len; ++j) {
+      CHECK(out[j] == test.digest[j],
+            "test %d: mismatch at %d got=0x%x want=0x%x", i, j, out[j],
+            test.digest[j]);
+    }
+  }
+
+  return true;
+}
diff --git a/sw/device/tests/dif/meson.build b/sw/device/tests/dif/meson.build
index 3333b27..289ca22 100644
--- a/sw/device/tests/dif/meson.build
+++ b/sw/device/tests/dif/meson.build
@@ -82,6 +82,24 @@
   }
 }
 
+dif_kmac_smoketest_lib = declare_dependency(
+  link_with: static_library(
+    'dif_kmac_smoketest_lib',
+    sources: ['dif_kmac_smoketest.c'],
+    dependencies: [
+      sw_lib_dif_kmac,
+      sw_lib_runtime_log,
+      sw_lib_mmio,
+      sw_lib_runtime_hart,
+    ],
+  ),
+)
+sw_tests += {
+  'dif_kmac_smoketest': {
+    'library': dif_kmac_smoketest_lib,
+  }
+}
+
 dif_rstmgr_smoketest_lib = declare_dependency(
   link_with: static_library(
     'dif_rstmgr_smoketest_lib',
diff --git a/test/systemtest/config.py b/test/systemtest/config.py
index c953640..3ffb813 100644
--- a/test/systemtest/config.py
+++ b/test/systemtest/config.py
@@ -75,6 +75,9 @@
         "targets": ["sim_verilator"],
     },
     {
+        "name": "dif_kmac_smoketest",
+    },
+    {
         "name": "flash_ctrl_test",
     },
     {