[usbdpi] Streaming data test in t-l sim

Introduce streaming data test; LFSR-generated data, variable packet
length, exercise ACK/NAK response from DPI model, checking of data in
both directions
Multiple back-to-back data packets to bulk endpoint
Streaming test (usbdev_stream_test) passes in Verilator t-l sim

Signed-off-by: Adrian Lees <a.lees@lowrisc.org>
diff --git a/hw/dv/dpi/usbdpi/usbdpi.c b/hw/dv/dpi/usbdpi/usbdpi.c
index 65b7b26..d9ce188 100644
--- a/hw/dv/dpi/usbdpi/usbdpi.c
+++ b/hw/dv/dpi/usbdpi/usbdpi.c
@@ -1267,6 +1267,14 @@
           testUnimplEp(ctx, USB_PID_SETUP, UKDEV_ADDRESS, 1u);
           break;
 
+        case STEP_STREAM_SERVICE:
+          // After the initial testing of the (current) fixed DPI behavior,
+          // we repeatedly try IN transfers, checking and scrambling any
+          // data packets that we received before sending them straight back
+          // to the device for software to check
+          streams_service(ctx);
+          break;
+
         default:
           if (ctx->step < STEP_IDLE_START || ctx->step >= STEP_IDLE_END) {
             pollRX(ctx, ENDPOINT_SERIAL0, false, false);
diff --git a/hw/dv/dpi/usbdpi/usbdpi.core b/hw/dv/dpi/usbdpi/usbdpi.core
index af9297a..c4b244a 100644
--- a/hw/dv/dpi/usbdpi/usbdpi.core
+++ b/hw/dv/dpi/usbdpi/usbdpi.core
@@ -10,12 +10,14 @@
     files:
       - usbdpi.sv: { file_type: systemVerilogSource }
       - usbdpi.c: { file_type: cppSource }
+      - usbdpi_stream.c: { file_type: cppSource }
       - usbdpi_test.c: { file_type: cppSource }
       - usb_crc.c: { file_type: cppSource }
       - usb_monitor.c: { file_type: cppSource }
       - usb_transfer.c: { file_type: cppSource }
       - usb_utils.c: { file_type: cppSource }
       - usbdpi.h: { file_type: cppSource, is_include_file: true }
+      - usbdpi_stream.h: { file_type: cppSource, is_include_file: true }
       - usbdpi_test.h: { file_type: cppSource, is_include_file: true }
       - usb_monitor.h: { file_type: cppSource, is_include_file: true }
       - usb_transfer.h: { file_type: cppSource, is_include_file: true }
diff --git a/hw/dv/dpi/usbdpi/usbdpi.h b/hw/dv/dpi/usbdpi/usbdpi.h
index 1d7488b..cad73ba 100644
--- a/hw/dv/dpi/usbdpi/usbdpi.h
+++ b/hw/dv/dpi/usbdpi/usbdpi.h
@@ -21,6 +21,7 @@
 #endif
 #include "usb_monitor.h"
 #include "usb_transfer.h"
+#include "usbdpi_stream.h"
 
 // Shall we employ a proper simulation of the frame interval (1ms)?
 // TODO - until such time as we can perform multiple control transfers in a
@@ -134,6 +135,10 @@
 // Maximum number of endpoints supported by the DPI model
 #define USBDPI_MAX_ENDPOINTS 16U
 
+// Maximum number of bidirection LFSR-generated byte streams that may be
+// supported simultaneously
+#define USBDPI_MAX_STREAMS (USBDPI_MAX_ENDPOINTS - 1U)
+
 // Maximum number of simultaneous transfer descriptors
 //   (The host model may simply avoid polling for further IN transfers
 //    whilst there are no further desciptors available)
@@ -304,6 +309,23 @@
     uint8_t next_data;
   } ep_out[USBDPI_MAX_ENDPOINTS];
 
+  /**
+   * Number of data streams being used
+   */
+  uint8_t nstreams;
+  /**
+   * Stream number of next stream to attempt IN transfers
+   */
+  uint8_t stream_in;
+  /**
+   * Stream number of next stream to attempt OUT transfers
+   */
+  uint8_t stream_out;
+  /**
+   * Context for streaming data test (usbdev_stream_test)
+   */
+  usbdpi_stream_t stream[USBDPI_MAX_STREAMS];
+
   // Diagnostic logging and bus monitoring
   int loglevel;
   char mon_pathname[FILENAME_MAX];
diff --git a/hw/dv/dpi/usbdpi/usbdpi_stream.c b/hw/dv/dpi/usbdpi/usbdpi_stream.c
new file mode 100644
index 0000000..a6d4139
--- /dev/null
+++ b/hw/dv/dpi/usbdpi/usbdpi_stream.c
@@ -0,0 +1,589 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "usb_utils.h"
+#include "usbdpi.h"
+
+// Seed numbers for the LFSR generators in each transfer direction for
+// the given stream number
+#define USBTST_LFSR_SEED(s) (uint8_t)(0x10U + (s)*7U)
+#define USBDPI_LFSR_SEED(s) (uint8_t)(0x9BU - (s)*7U)
+// Seed number of packet retrying
+#define RETRY_LFSR_SEED(s) (uint8_t)(0x24U + (s)*7U)
+
+// Simple LFSR for 8-bit sequences
+#define LFSR_ADVANCE(lfsr) \
+  (((lfsr) << 1) ^         \
+   ((((lfsr) >> 1) ^ ((lfsr) >> 2) ^ ((lfsr) >> 3) ^ ((lfsr) >> 7)) & 1u))
+
+// Stream signature words
+#define STREAM_SIGNATURE_HEAD 0x579EA01AU
+#define STREAM_SIGNATURE_TAIL 0x160AE975U
+
+// Verbose logging/diagnostic reporting
+// TODO - consider folding this into the existing log level
+static const bool verbose = false;
+
+static const bool expect_sig = true;
+
+// Determine the next stream for which IN data packets shall be requested
+static inline unsigned in_stream_next(usbdpi_ctx_t *ctx);
+
+// Determine the next stream for which OUT data shall be sent
+static inline unsigned out_stream_next(usbdpi_ctx_t *ctx);
+
+// Check a data packet received from the test software (usbdev_stream_test)
+static bool stream_data_check(usbdpi_ctx_t *ctx, usbdpi_stream_t *s,
+                              const usbdpi_transfer_t *rx, bool accept);
+
+// Generate a data packet as if it had been received from the device
+static usbdpi_transfer_t *stream_data_gen(usbdpi_ctx_t *ctx, usbdpi_stream_t *s,
+                                          unsigned len);
+
+// Check a data packet received from the test software (usbdev_stream_test)
+// and collect the data, combined with our LFSR-generated random stream,
+// for later transmission back to the device
+static usbdpi_transfer_t *stream_data_process(usbdpi_ctx_t *ctx,
+                                              usbdpi_stream_t *s,
+                                              usbdpi_transfer_t *rx);
+
+// Check the stream signature
+static bool stream_sig_check(usbdpi_ctx_t *ctx, usbdpi_stream_t *s,
+                             usbdpi_transfer_t *rx);
+
+// Determine the next stream for which IN data packets shall be requested
+inline unsigned in_stream_next(usbdpi_ctx_t *ctx) {
+  uint8_t id = ctx->stream_in;
+  if (++id >= ctx->nstreams) {
+    id = 0U;
+  }
+  ctx->stream_in = id;
+  return id;
+}
+
+// Determine the next stream for which OUT data shall be sent
+inline unsigned out_stream_next(usbdpi_ctx_t *ctx) {
+  uint8_t id = ctx->stream_out;
+  if (++id >= ctx->nstreams) {
+    id = 0U;
+  }
+  ctx->stream_out = id;
+  return id;
+}
+
+// Initialize streaming state for the given number of streams
+bool streams_init(usbdpi_ctx_t *ctx, unsigned nstreams, bool retrieve,
+                  bool checking, bool retrying, bool send) {
+  // Can we support the requested number of streams?
+  if (!nstreams || nstreams > USBDPI_MAX_STREAMS) {
+    return false;
+  }
+
+  if (verbose) {
+    printf("[usbdpi] Stream test running with %u streams(s)\n", nstreams);
+    printf("[usbdpi] - retrieve %c checking %c retrying %c send %c\n",
+           retrieve ? 'Y' : 'N', checking ? 'Y' : 'N', retrying ? 'Y' : 'N',
+           send ? 'Y' : 'N');
+  }
+
+  // Remember the number of streams and initialize the arbitration of
+  // IN and OUT traffic
+  ctx->nstreams = nstreams;
+  ctx->stream_in = 0U;
+  ctx->stream_out = nstreams - 1U;
+
+  for (unsigned id = 0U; id < nstreams; id++) {
+    // Poll device for IN packets in streaming test?
+    ctx->stream[id].retrieve = retrieve;
+    // Attempt to sent OUT packets to device in streaming test?
+    ctx->stream[id].send = send;
+    // Checking of received data against expected LFSR output
+    ctx->stream[id].checking = checking;
+    // Request retrying of IN packets, feigning error
+    ctx->stream[id].retrying = retrying;
+    // Endpoints to be used by this stream
+    ctx->stream[id].ep_in = 1U + id;
+    ctx->stream[id].ep_out = 1U + id;
+    // LFSR state for this byte stream
+    ctx->stream[id].tst_lfsr = USBTST_LFSR_SEED(id);
+    ctx->stream[id].dpi_lfsr = USBDPI_LFSR_SEED(id);
+    // LFSR-controlled packet retrying state
+    ctx->stream[id].retry_lfsr = RETRY_LFSR_SEED(id);
+    ctx->stream[id].nretries = 0U;
+    // No received packets
+    ctx->stream[id].received = NULL;
+  }
+  return true;
+}
+
+// Check a data packet received from the test software (usbdev_stream_test)
+bool stream_data_check(usbdpi_ctx_t *ctx, usbdpi_stream_t *s,
+                       const usbdpi_transfer_t *rx, bool accept) {
+  assert(rx);
+
+  // The byte count _includes_ the DATAx PID and the two CRC bytes
+  //
+  // Note: we are expecting a LFSR-generated byte stream, but we do not
+  //       make assumptions about the number or size of the data packets
+  //       that make up the stream
+
+  unsigned num_bytes = transfer_length(rx);
+  unsigned idx = rx->data_start + 1u;  // Skip past the DATAx PID
+  bool ok = false;
+
+  // Validate the received packet - data length valid and checksum present
+  if (num_bytes >= sizeof(rx->data) || idx + 2u > num_bytes) {
+    printf("[usbdpi] Unexpected/malformed data packet (0x%x 0x%x)\n", idx,
+           num_bytes);
+  } else {
+    // Data field within received packet
+    const uint8_t *sp = &rx->data[idx];
+    num_bytes -= 3u;
+
+    // Check that the CRC16 checksum of the data field is as expected
+    uint16_t rx_crc = sp[num_bytes] | (sp[num_bytes + 1u] << 8);
+    uint16_t crc = CRC16(sp, num_bytes);
+    if (rx_crc != crc) {
+      printf("[usbdpi] Mismatched CRC16 0x%04x received, expected 0x%04x\n",
+             rx_crc, crc);
+    } else {
+      // Data toggle synchronization
+      unsigned pid = ctx->ep_in[s->ep_in].next_data;
+      if (rx->data[0] == pid) {
+        // If we've decided to reject this packet then we still check its
+        // content but we do not advance the data toggle because we're
+        // pretending that we didn't receive it successfully
+        if (accept) {
+          ctx->ep_in[s->ep_in].next_data = DATA_TOGGLE_ADVANCE(pid);
+        }
+
+        // Tentatively acceptable, but we still have to check and report any and
+        // all mismatched bytes
+        ok = accept;
+      }
+
+      // Iff running a performance investigation, checking may be undesired
+      // because it causes us to reject and retry the transmission
+      if (s->checking) {
+        // Note: use a local copy of the LFSR so that we can check the data
+        //       field even on those packets that we choose to reject
+        uint8_t tst_lfsr = s->tst_lfsr;
+        while (num_bytes-- > 0U) {
+          uint8_t recvd = *sp++;
+          if (recvd != tst_lfsr) {
+            printf(
+                "[usbdpi] Mismatched data from device 0x%02x, "
+                "expected 0x%02x\n",
+                recvd, tst_lfsr);
+            ok = false;
+          }
+          // Advance our local LFSR
+          tst_lfsr = LFSR_ADVANCE(tst_lfsr);
+        }
+
+        // Update the LFSR only if we've accepted valid data and will not
+        // be receiving this data again
+        if (accept && ok) {
+          s->tst_lfsr = tst_lfsr;
+        }
+      } else {
+        printf("[usbdpi] Warning: Stream data checking disabled\n");
+      }
+    }
+  }
+
+  return ok;
+}
+
+// Generate a data packet as if it had been received from the device
+usbdpi_transfer_t *stream_data_gen(usbdpi_ctx_t *ctx, usbdpi_stream_t *s,
+                                   unsigned len) {
+  usbdpi_transfer_t *tr = transfer_alloc(ctx);
+  if (tr) {
+    // Pretend that we have successfully received the packet with the correct
+    // data toggling...
+    uint8_t data = ctx->ep_in[s->ep_in].next_data;
+    ctx->ep_in[s->ep_in].next_data = DATA_TOGGLE_ADVANCE(data);
+    // ...and that the data is as expected
+    uint8_t *dp = transfer_data_start(tr, data, len);
+    for (unsigned idx = 0U; idx < len; idx++) {
+      dp[idx] = s->tst_lfsr;
+      s->tst_lfsr = LFSR_ADVANCE(s->tst_lfsr);
+    }
+    transfer_data_end(tr, dp + len);
+  }
+  return tr;
+}
+
+// Process a received data packet to produce a corresponding reply packet
+// by XORing our LFSR output with the received data
+//
+// Note: for now we do this even if the received data mismatches because
+//       only the CPU software has the capacity to decide upon and report
+//       test status
+usbdpi_transfer_t *stream_data_process(usbdpi_ctx_t *ctx, usbdpi_stream_t *s,
+                                       usbdpi_transfer_t *rx) {
+  // Note: checkStreamData has already been called on this packet
+  assert(rx);
+
+  // The byte count _includes_ the DATAx PID and the two CRC bytes
+  unsigned num_bytes = rx->num_bytes;
+  unsigned idx = rx->data_start + 1u;  // Skip past the DATAx PID
+
+  // Data field within received packet
+  const uint8_t *sp = &rx->data[idx];
+  num_bytes -= 3u;
+
+  // Allocate a new buffer for the reply
+  usbdpi_transfer_t *reply = transfer_alloc(ctx);
+  assert(reply);
+
+  // Construct OUT token packet to the target endpoint, using the
+  // appropriate DATAx PID
+  const uint8_t ep_out = s->ep_out;
+  transfer_token(reply, USB_PID_OUT, ctx->dev_address, ep_out);
+  uint8_t *dp =
+      transfer_data_start(reply, ctx->ep_out[ep_out].next_data, num_bytes);
+  assert(dp);
+
+  while (num_bytes-- > 0U) {
+    uint8_t recvd = *sp++;
+
+    // Simply XOR the two LFSR-generated streams together
+    *dp++ = recvd ^ s->dpi_lfsr;
+    if (verbose) {
+      printf("[usbdpi] 0x%02x <- 0x%02x ^ 0x%02x\n", *(dp - 1), recvd,
+             s->dpi_lfsr);
+    }
+
+    // Advance our LFSR
+    //
+    // TODO - decide whether we want to do this here; if the device
+    // responds with a NAK, requiring us to retry, or we decide to
+    // resend the packet, then we don't want to advance again
+    s->dpi_lfsr = LFSR_ADVANCE(s->dpi_lfsr);
+  }
+
+  transfer_data_end(reply, dp);
+
+  return reply;
+}
+
+// Check the stream signature
+bool stream_sig_check(usbdpi_ctx_t *ctx, usbdpi_stream_t *s,
+                      usbdpi_transfer_t *rx) {
+  // Packet should be PID, data field and CRC16
+  if (transfer_length(rx) == 3U + 0x10U) {
+    const uint8_t *sig = transfer_data_field(rx);
+    if (sig) {
+      // Signature format:
+      //   Bits Description
+      //   32   head signature
+      //   8    initial vaue of LFSR
+      //   8    stream number
+      //   16   reserved, SBZ
+      //   32   number of bytes to be transferred
+      //   32   tail signature
+      // Note: all 32-bit quantities are in little endian order
+
+      uint32_t num_bytes = get_le32(&sig[8]);
+      if (verbose) {
+        printf("[usbdpi] Stream signature at %p head 0x%x tail 0x%x\n", sig,
+               get_le32(&sig[0]), get_le32(&sig[12]));
+      }
+
+      // Basic validation check; words are transmitted in little endian order
+      if (get_le32(&sig[0]) == STREAM_SIGNATURE_HEAD &&
+          get_le32(&sig[12]) == STREAM_SIGNATURE_TAIL &&
+          // sanity check on transfer length, though we rely upon the CPU
+          // oftware to send, receive and count the number of bytes
+          num_bytes > 0U && num_bytes < 0x10000000U && !sig[6] && !sig[7]) {
+        // Signature includes the initial value of the device-side LFSR
+        s->tst_lfsr = sig[4];
+        // Update data toggle
+        uint8_t pid = transfer_data_pid(rx);
+        ctx->ep_in[s->ep_in].next_data = DATA_TOGGLE_ADVANCE(pid);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+// Service streaming data (usbdev_stream_test)
+void streams_service(usbdpi_ctx_t *ctx) {
+  if (verbose) {
+    //    printf("[usbdpi] streams_service hostSt %u in %u out %u\n",
+    //    ctx->hostSt,
+    //           ctx->stream_in, ctx->stream_out);
+  }
+
+  // Maximum time for transmission of a packet ought to be circa 80 bytes of
+  // data, 640 bits. Allowing for bitstuffing this means we need to leave ~800
+  const unsigned min_time_left = 800U;
+
+  switch (ctx->hostSt) {
+    // --------------------------------------------
+    // Try to transmit a data packet to the device
+    // --------------------------------------------
+    case HS_STARTFRAME:
+    case HS_STREAMOUT: {
+      // Decide whether we have enough time within this frame to attempt
+      // another transmission
+      uint32_t next_frame = ctx->frame_start + FRAME_INTERVAL;
+      if ((next_frame - ctx->tick_bits) > min_time_left) {  // HACK
+        unsigned id = out_stream_next(ctx);
+        usbdpi_stream_t *s = &ctx->stream[id];
+        if (s->send) {
+          // Start by trying to transmit a data packet that we've received, if
+          // any
+          if (s->received) {
+            if (ctx->sending) {
+              transfer_release(ctx, ctx->sending);
+            }
+
+            // Scramble the oldest received packet with our LFSR-generated byte
+            // stream and send it to the device
+            usbdpi_transfer_t *reply = stream_data_process(ctx, s, s->received);
+            transfer_send(ctx, reply);
+
+            uint32_t max_bits =
+                transfer_length(ctx->sending) * 10 + 160;  // HACK
+            ctx->wait = USBDPI_TIMEOUT(ctx, max_bits);
+            ctx->bus_state = kUsbBulkOut;
+            ctx->lastrxpid = 0;
+            ctx->hostSt = HS_WAITACK;
+          } else {
+            // Nothing to send, try receiving...
+            ctx->hostSt = HS_STREAMIN;
+          }
+        } else {
+          // We're not sending anything - discard any received data
+          while (s->received) {
+            usbdpi_transfer_t *tr = s->received;
+            s->received = tr->next;
+            transfer_release(ctx, tr);
+          }
+          ctx->hostSt = HS_STREAMIN;
+        }
+      } else {
+        // Wait until the next bus frame
+        ctx->hostSt = HS_NEXTFRAME;
+      }
+    } break;
+
+    // Await acknowledgement of the packet that we just transmitted
+    case HS_WAITACK:
+      if (ctx->sending) {
+        // Forget reference to the buffer we just tried to send; the received
+        // packet remains in the list of received buffers to try again later
+        transfer_release(ctx, ctx->sending);
+        ctx->sending = NULL;
+      }
+
+      if (ctx->bus_state == kUsbBulkOutAck) {
+        bool proceed = false;
+
+        if (verbose) {
+          printf("[usbdpi] OUT - response is PID 0x%02x from device (%s)\n",
+                 ctx->lastrxpid, decode_pid(ctx->lastrxpid));
+        }
+
+        switch (ctx->lastrxpid) {
+          case USB_PID_ACK: {
+            // Transmitted packet was accepted, so we can retire it...
+            usbdpi_stream_t *s = &ctx->stream[ctx->stream_out];
+            usbdpi_transfer_t *rx = s->received;
+            assert(rx);
+            s->received = rx->next;
+            transfer_release(ctx, rx);
+
+            uint8_t ep_out = s->ep_out;
+            ctx->ep_out[ep_out].next_data =
+                DATA_TOGGLE_ADVANCE(ctx->ep_out[ep_out].next_data);
+            proceed = true;
+          } break;
+
+          // We may receive a NAK from the device if it is unable to receive the
+          // packet right now
+          case USB_PID_NAK:
+            printf(
+                "[usbdpi] frame 0x%x tick_bits 0x%x NAK received from device\n",
+                ctx->frame, ctx->tick_bits);
+            proceed = true;
+            break;
+
+          default:
+            printf("[usbdpi] Unexpected PID 0x%02x from device (%s)\n",
+                   ctx->lastrxpid, decode_pid(ctx->lastrxpid));
+            ctx->hostSt = HS_NEXTFRAME;
+            break;
+        }
+
+        ctx->hostSt = proceed ? HS_STREAMIN : HS_NEXTFRAME;
+      } else if (ctx->tick_bits >= ctx->wait) {
+        printf("[usbdpi] Timed out waiting for OUT response\n");
+        ctx->hostSt = HS_NEXTFRAME;
+      }
+      break;
+
+    // ---------------------------------------------
+    // Try to collect a data packet from the device
+    // ---------------------------------------------
+    case HS_STREAMIN: {
+      // Decide whether we have enough time within this frame to attempt
+      // another fetch
+      //
+      // TODO - find out what the host behaviour should be at this point;
+      //        the device must be required to respond within a certain
+      //        time interval, and then the bus transmission speed
+      //        determines the maximum delay
+      uint32_t next_frame = ctx->frame_start + FRAME_INTERVAL;
+      if ((next_frame - ctx->tick_bits) > min_time_left) {  // HACK
+        unsigned id = in_stream_next(ctx);
+        usbdpi_stream_t *s = &ctx->stream[id];
+        if (s->retrieve) {
+          // Ensure that a buffer is available for constructing a transfer
+          usbdpi_transfer_t *tr = ctx->sending;
+          if (!tr) {
+            tr = transfer_alloc(ctx);
+            assert(tr);
+
+            ctx->sending = tr;
+          }
+
+          transfer_token(tr, USB_PID_IN, ctx->dev_address, s->ep_in);
+
+          transfer_send(ctx, tr);
+          ctx->bus_state = kUsbBulkInToken;
+          ctx->hostSt = HS_WAIT_PKT;
+          ctx->lastrxpid = 0;
+        } else {
+          // We're not required to poll for IN data, but if we're sending we
+          //   must still fake the reception of valid packet data because
+          //   the sw test will be expecting valid data
+          if (s->send && !s->received) {
+            // For simplicity we just create max length packets
+            const unsigned len = USBDEV_MAX_PACKET_SIZE;
+            s->received = stream_data_gen(ctx, s, len);
+          }
+          ctx->hostSt = HS_STREAMOUT;
+        }
+      } else {
+        // Wait until the next bus frame
+        ctx->hostSt = HS_NEXTFRAME;
+      }
+    } break;
+
+    case HS_WAIT_PKT:
+      // Wait max time for a response + packet
+      ctx->wait = ctx->tick_bits + 18 + 8 + 8 + 64 * 8 + 16;
+      ctx->hostSt = HS_ACKIFDATA;
+      break;
+    case HS_ACKIFDATA:
+      if (ctx->bus_state == kUsbBulkInData && ctx->recving) {
+        // We have a response from the device
+        switch (ctx->lastrxpid) {
+          case USB_PID_DATA0:
+          case USB_PID_DATA1: {
+            // Steal the received packet; it belongs to the stream
+            usbdpi_transfer_t *rx = ctx->recving;
+            ctx->recving = NULL;
+            // Decide whether we want to ACK or NAK this packet
+            unsigned id = ctx->stream_in;
+            usbdpi_stream_t *s = &ctx->stream[id];
+            bool accept = false;
+            if (s->retrying && s->nretries) {
+              s->nretries--;
+            } else {
+              // Decide the number of retries for the next data packet
+              // Note: by randomizing the number of retries, rather than
+              // independently deciding each accept/reject, we guarantee an
+              // upper bound on the run time
+              switch (s->retry_lfsr & 7U) {
+                case 7U:
+                  s->nretries = 3U;
+                  break;
+                case 6U:
+                case 5U:
+                  s->nretries = 2U;
+                  break;
+                case 4U:
+                  s->nretries = 1U;
+                  break;
+                default:
+                  s->nretries = 0U;
+                  break;
+              }
+              s->retry_lfsr = LFSR_ADVANCE(s->retry_lfsr);
+              accept = true;
+            }
+            if (!accept) {
+              printf("[usbdpi] Requesting resend of data\n");
+              usb_monitor_log(ctx->mon, "[usbdpi] Requesting resend of data\n");
+            }
+
+            if (expect_sig && !s->sig_recvd) {
+              // Note: the stream signature is primarily of use on a physical
+              // USB connection to a host since the endpoint to port mapping is
+              // variable. With t-l sim we can rely upon the first packet being
+              // the signature and nothing else
+              accept = stream_sig_check(ctx, s, rx);
+              transfer_release(ctx, rx);
+              // TODO - run time error, signal test failure to the software
+              assert(accept);
+              if (accept) {
+                s->sig_recvd = true;
+              }
+            } else {
+              if (stream_data_check(ctx, s, rx, accept)) {
+                // Collect the received packets in preparation for later
+                // transmission with modification back to the device
+                usbdpi_transfer_t *tr = s->received;
+                if (tr) {
+                  while (tr->next)
+                    tr = tr->next;
+                  tr->next = rx;
+                } else {
+                  s->received = rx;
+                }
+              } else {
+                transfer_release(ctx, rx);
+                accept = false;
+              }
+            }
+
+            usbdpi_transfer_t *tr = ctx->sending;
+            assert(tr);
+
+            transfer_status(ctx, tr, accept ? USB_PID_ACK : USB_PID_NAK);
+
+            ctx->hostSt = HS_STREAMOUT;
+          } break;
+
+          case USB_PID_NAK:
+            // No data available
+            ctx->hostSt = HS_STREAMOUT;
+            break;
+
+          default:
+            printf("[usbdpi] Unexpected PID 0x%02x from device (%s)\n",
+                   ctx->lastrxpid, decode_pid(ctx->lastrxpid));
+            ctx->hostSt = HS_NEXTFRAME;
+            break;
+        }
+      } else if (ctx->tick_bits >= ctx->wait) {
+        printf("[usbdpi] Timed out waiting for IN response\n");
+        ctx->hostSt = HS_NEXTFRAME;
+      }
+      break;
+
+    default:
+      break;
+  }
+}
diff --git a/hw/dv/dpi/usbdpi/usbdpi_stream.h b/hw/dv/dpi/usbdpi/usbdpi_stream.h
new file mode 100644
index 0000000..a48acfc
--- /dev/null
+++ b/hw/dv/dpi/usbdpi/usbdpi_stream.h
@@ -0,0 +1,89 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef OPENTITAN_HW_DV_DPI_USBDPI_USBDPI_STREAM_H_
+#define OPENTITAN_HW_DV_DPI_USBDPI_USBDPI_STREAM_H_
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "usb_transfer.h"
+
+// Forwards declaration of USBDPI context
+typedef struct usbdpi_ctx usbdpi_ctx_t;
+
+// Context for streaming data test (usbdev_stream_test)
+typedef struct usbdpi_stream {
+  /**
+   * Test requires polling device for IN packets
+   */
+  bool retrieve;
+  /**
+   * Test requires attempting to send OUT packets to device
+   */
+  bool send;
+  /**
+   * Request trying of IN packets, feigning error
+   */
+  bool retrying;
+  /**
+   * Checking of received byte stream; disable iff
+   * investigating IN performance
+   */
+  bool checking;
+  /**
+   * Have we received the stream signature yet?
+   */
+  bool sig_recvd;
+  /**
+   * Device endpoint from which IN packets are retrieved
+   */
+  uint8_t ep_in;
+  /**
+   * Device endpoint to which OUT packets are sent
+   */
+  uint8_t ep_out;
+  /**
+   * Device-generated LFSR; predicts data expected from usbdev_stream_test
+   */
+  uint8_t tst_lfsr;
+  /**
+   * DPI-generated LFSR-generated data, to be combined with received data
+   */
+  uint8_t dpi_lfsr;
+  /**
+   * LFSR state for p-randomized retrying of received data
+   */
+  uint8_t retry_lfsr;
+  /**
+   * Number of times (still) to retry before accepting the current data packet
+   */
+  uint8_t nretries;
+  /***
+   * Linked-list of received transfers
+   */
+  usbdpi_transfer_t *received;
+} usbdpi_stream_t;
+
+/**
+ * Initialize streaming state for the given number of streams
+ *
+ * @param  ctx       USBDPI context state
+ * @param  nstreams  Number of concurrent byte streams
+ * @param  retrieve  Retrieve IN packets from device
+ * @param  checking  Checking of received data against expected LFSR output
+ * @param  retrying  Request retrying of IN packets, feigning error
+ * @param  send      Attempt to send OUT packets to device
+ * @return           true iff initialized successfully
+ */
+bool streams_init(usbdpi_ctx_t *ctx, unsigned nstreams, bool retrieve,
+                  bool checking, bool retrying, bool send);
+
+/**
+ * Service streaming data (usbdev_stream_test)
+ *
+ * @param  ctx       USBDPI context state
+ */
+void streams_service(usbdpi_ctx_t *ctx);
+
+#endif  // OPENTITAN_HW_DV_DPI_USBDPI_USBDPI_STREAM_H_
diff --git a/hw/dv/dpi/usbdpi/usbdpi_test.c b/hw/dv/dpi/usbdpi/usbdpi_test.c
index 1d9492c..8c8236e 100644
--- a/hw/dv/dpi/usbdpi/usbdpi_test.c
+++ b/hw/dv/dpi/usbdpi/usbdpi_test.c
@@ -6,6 +6,8 @@
 
 #include <assert.h>
 
+#include "usbdpi_stream.h"
+
 // Test-specific initialization
 void usbdpi_test_init(usbdpi_ctx_t *ctx) {
   // Test-specific initialization code
@@ -17,6 +19,24 @@
       bOK = true;
       break;
 
+    // Initialize streaming test
+    case 1: {
+      // Number of concurrent byte streams
+      const unsigned nstreams = ctx->test_arg[0] & 0xfU;
+      // Poll device for IN packets in streaming test?
+      bool retrieve = (ctx->test_arg[0] & 0x10U) != 0U;
+      // Checking of received data against expected LFSR output
+      bool checking = (ctx->test_arg[0] & 0x20U) != 0U;
+      // Request retrying of IN packets, feigning error
+      bool retrying = (ctx->test_arg[0] & 0x40U) != 0U;
+      // Attempt to send OUT packets to device
+      bool send = (ctx->test_arg[0] & 0x80U) != 0U;
+
+      if (nstreams <= USBDPI_MAX_STREAMS) {
+        bOK = streams_init(ctx, nstreams, retrieve, checking, retrying, send);
+      }
+    } break;
+
     default:
       assert(!"Unrecognised/unsupported test in USBDPI");
       break;
diff --git a/sw/device/tests/BUILD b/sw/device/tests/BUILD
index ee5b006..4ec67c3 100644
--- a/sw/device/tests/BUILD
+++ b/sw/device/tests/BUILD
@@ -1924,6 +1924,30 @@
 )
 
 opentitan_functest(
+    name = "usbdev_stream_test",
+    srcs = ["usbdev_stream_test.c"],
+    cw310 = cw310_params(
+        timeout = "eternal",
+    ),
+    targets = [
+        "verilator",
+        "cw310_test_rom",
+    ],
+    verilator = verilator_params(
+        timeout = "long",
+    ),
+    deps = [
+        "//hw/top_earlgrey/sw/autogen:top_earlgrey",
+        "//sw/device/lib/dif:pinmux",
+        "//sw/device/lib/runtime:log",
+        "//sw/device/lib/runtime:print",
+        "//sw/device/lib/testing:pinmux_testutils",
+        "//sw/device/lib/testing:usb_testutils",
+        "//sw/device/lib/testing/test_framework:ottf_main",
+    ],
+)
+
+opentitan_functest(
     name = "rstmgr_alert_info_test",
     srcs = ["rstmgr_alert_info_test.c"],
     cw310 = cw310_params(
diff --git a/sw/device/tests/usbdev_stream_test.c b/sw/device/tests/usbdev_stream_test.c
new file mode 100644
index 0000000..427582a
--- /dev/null
+++ b/sw/device/tests/usbdev_stream_test.c
@@ -0,0 +1,753 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// USB streaming data test
+//
+// This test requires interaction with the USB DPI model or a test application
+// on the USB host. The test initializes the USB device and configures a set of
+// endpoints for data streaming using bulk transfers.
+//
+// The DPI model mimicks an USB host. After device initialization, it detects
+// the assertion of the pullup and first assigns an address to the device.
+// For this test it will then repeatedly fetch data via IN requests to
+// each stream and propagate that data to the corresponding OUT endpoints.
+//
+// The data itself is pseudo-randomly generated by the sender and,
+// independently, by the receiving code to check that the data has been
+// propapgated unmodified and without data loss, corruption, replication etc.
+
+#include "sw/device/lib/dif/dif_pinmux.h"
+#include "sw/device/lib/runtime/log.h"
+#include "sw/device/lib/runtime/print.h"
+#include "sw/device/lib/testing/pinmux_testutils.h"
+#include "sw/device/lib/testing/test_framework/check.h"
+#include "sw/device/lib/testing/test_framework/ottf_main.h"
+#include "sw/device/lib/testing/usb_testutils.h"
+#include "sw/device/lib/testing/usb_testutils_controlep.h"
+
+#include "hw/top_earlgrey/sw/autogen/top_earlgrey.h"  // Generated.
+
+// Maximum number of concurrent streams
+#ifdef USBDEV_NUM_ENDPOINTS
+// Endpoint zero implements the default control pipe
+#define STREAMS_MAX (USBDEV_NUM_ENDPOINTS - 1U)
+#else
+#define STREAMS_MAX 11U
+#endif
+
+// TODO - currently we are unable to send the configuration descriptor
+// if we try to describe more than two bidirectional endpoints
+#if STREAMS_MAX > 2U
+#undef STREAMS_MAX
+#define STREAMS_MAX 2U
+#endif
+
+// Number of streams to be tested
+#ifndef NUM_STREAMS
+#define NUM_STREAMS STREAMS_MAX
+#endif
+
+// Maximum number of buffer simultaneously awaiting transmission
+// (we must leave some available for packet reception)
+#ifndef MAX_TX_BUFFERS
+#define MAX_TX_BUFFERS 24U
+#endif
+
+// This takes about 256s presently with 10MHz CPU in CW-310 FPGA and physical
+// USB with randomized packet sizes and the default memcpy implementation;
+// The _MEM_FASTER switch drops the run time to 187s
+#define TRANSFER_BYTES_FPGA (0x10U << 20)
+
+// This is appropriate for a Verilator chip simulation with 15 min timeout
+#define TRANSFER_BYTES_VERILATOR 0x2400U
+
+// This is about the amount that we can transfer within a 1 hour 'eternal' test
+//#define TRANSFER_BYTES_LONG (0xD0U << 20)
+
+// Stream signature words
+#define STREAM_SIGNATURE_HEAD 0x579EA01AU
+#define STREAM_SIGNATURE_TAIL 0x160AE975U
+
+// Seed numbers for the LFSR generators in each transfer direction for
+// the given stream number
+#define USBTST_LFSR_SEED(s) (uint8_t)(0x10U + (s)*7U)
+#define USBDPI_LFSR_SEED(s) (uint8_t)(0x9BU - (s)*7U)
+
+// Buffer size randomization
+#define BUFSZ_LFSR_SEED(s) (uint8_t)(0x17U + (s)*7U)
+
+// Simple LFSR for 8-bit sequences
+// Note: zero is an isolated state that shall be avoided
+#define LFSR_ADVANCE(lfsr) \
+  (((lfsr) << 1) ^         \
+   ((((lfsr) >> 1) ^ ((lfsr) >> 2) ^ ((lfsr) >> 3) ^ ((lfsr) >> 7)) & 1U))
+
+// Forward declaration to context state
+typedef struct usbdev_stream_test_ctx usbdev_stream_test_ctx_t;
+
+/**
+ * Stream signature
+ * Note: this needs to be transferred over a byte stream
+ */
+typedef struct __attribute__((packed)) usbdev_stream_sig {
+  /**
+   * Head signature word
+   */
+  uint32_t head_sig;
+  /**
+   * Initial value of LFSR
+   */
+  uint8_t init_lfsr;
+  /**
+   * Stream number
+   */
+  uint8_t stream;
+  /**
+   * Reserved fields; should be zero
+   */
+  uint8_t reserved1;
+  uint8_t reserved2;
+  /**
+   * Number of bytes to be transferred
+   */
+  uint32_t num_bytes;
+  /**
+   * Tail signature word
+   */
+  uint32_t tail_sig;
+} usbdev_stream_sig_t;
+
+/**
+ * Context state for a single stream
+ */
+typedef struct usbdev_stream {
+  /**
+   * Pointer to test context; callback functions receive only stream pointer
+   */
+  usbdev_stream_test_ctx_t *ctx;
+  /**
+   * Stream IDentifier
+   */
+  uint8_t id;
+  /**
+   * Has the stream signature been sent yet?
+   */
+  bool sent_sig;
+  /**
+   * USB device endpoint being used for data transmission
+   */
+  uint8_t tx_ep;
+  /**
+   * Transmission Linear Feedback Shift Register (for PRND data generation)
+   */
+  uint8_t tx_lfsr;
+  /**
+   * Total number of bytes presented to the USB device for transmission
+   */
+  uint32_t tx_bytes;
+  /**
+   * Transmission-side LFSR for selection of buffer size
+   */
+  uint8_t tx_buf_size;
+
+  /**
+   * USB device endpoint being used for data reception
+   */
+  uint8_t rx_ep;
+  /**
+   * Reception-side LFSR state (mirrors USBDPI generation of PRND data)
+   */
+  uint8_t rx_lfsr;
+  /**
+   * Reception-side shadow of transmission LFSR
+   */
+  uint8_t rxtx_lfsr;
+  /**
+   * Total number of bytes received from the USB device
+   */
+  uint32_t rx_bytes;
+  /**
+   * Size of transfer in bytes
+   */
+  uint32_t transfer_bytes;
+} usbdev_stream_t;
+
+/**
+ * Context state for streaming test
+ */
+struct usbdev_stream_test_ctx {
+  /**
+   * Context pointer
+   */
+  usb_testutils_ctx_t *usbdev;
+  /**
+   * State information for each of the test streams
+   */
+  usbdev_stream_t streams[STREAMS_MAX];
+  /**
+   * Per-endpoint limits on the number of buffers that may be queued for
+   * transmission
+   */
+  uint8_t tx_bufs_limit[USBDEV_NUM_ENDPOINTS];
+  /**
+   * Per-endpoint counts of completed buffers queued for transmission
+   */
+  uint8_t tx_bufs_queued[USBDEV_NUM_ENDPOINTS];
+  /**
+   * Total number of completed buffers
+   */
+  uint8_t tx_queued_total;
+  /**
+   * Buffers that have been filled but cannot yet be presented for transmission
+   * TODO - perhaps absorb the buffer queuing into usb_testutils because the dif
+   * API is explicitly not robust against back-to-back sending of multiple
+   * buffers to a single endpoint, and because the read performance is reliant
+   * upon having additional buffer(s) already available for immediate
+   * presentation
+   */
+  // 12 X 24 X 4 (or 8?)( BYTES... could perhaps simplify this at some point
+  dif_usbdev_buffer_t tx_bufs[USBDEV_NUM_ENDPOINTS][MAX_TX_BUFFERS];
+};
+
+/**
+ * Configuration values for USB.
+ * TODO - dymamically construct a config descriptor appropriate to the test;
+ *        this would avoid creating unusable ports on the host and also provide
+ *        a little more testing
+ */
+static const uint8_t config_descriptors[] = {
+    USB_CFG_DSCR_HEAD(USB_CFG_DSCR_LEN + STREAMS_MAX * (USB_INTERFACE_DSCR_LEN +
+                                                        2 * USB_EP_DSCR_LEN),
+                      STREAMS_MAX),
+
+    VEND_INTERFACE_DSCR(0, 2, 0x50, 1),
+    USB_BULK_EP_DSCR(0, 1U, USBDEV_MAX_PACKET_SIZE, 0),
+    USB_BULK_EP_DSCR(1, 1U, USBDEV_MAX_PACKET_SIZE, 0),
+
+    VEND_INTERFACE_DSCR(1, 2, 0x50, 1),
+    USB_BULK_EP_DSCR(0, 2U, USBDEV_MAX_PACKET_SIZE, 0),
+    USB_BULK_EP_DSCR(1, 2U, USBDEV_MAX_PACKET_SIZE, 0),
+};
+
+/**
+ * Test descriptor
+ */
+static const uint8_t test_descriptor[] = {
+    USB_TESTUTILS_TEST_DSCR(1, NUM_STREAMS | 0xF0U, 0, 0, 0)};
+
+/**
+ * USB device context types.
+ */
+static usb_testutils_ctx_t usbdev;
+static usb_testutils_controlep_ctx_t usbdev_control;
+
+/**
+ * Pinmux handle
+ */
+static dif_pinmux_t pinmux;
+
+/**
+ * State information for streaming data test
+ */
+static usbdev_stream_test_ctx_t stream_test;
+
+/**
+ * Specify whether to perform verbose logging, for visibility
+ *   (Note that this substantially alters the timing of interactions with the
+ * DPI model and will increase the simulation time)
+ */
+static bool verbose = false;
+
+/**
+ * Send only maximal length packets?
+ * (important for performance measurements on the USB, but obviously undesirable
+ *  for testing reliability/function)
+ */
+static bool max_packets = false;
+
+/**
+ * Number of streams to be created
+ */
+static const unsigned nstreams = NUM_STREAMS;
+
+/**
+ * Diagnostic logging; expensive
+ */
+static bool log_traffic = false;
+
+// Dump a sequence of bytes as hexadecimal and ASCII for diagnostic purposes
+static void buffer_dump(const uint8_t *data, size_t n) {
+  static const char hex_digits[] = "0123456789abcdef";
+  const unsigned ncols = 0x20u;
+  char buf[ncols * 4u + 2u];
+
+  // Note: we have no generic (s(n))printf functionality and must use LOG_INFO()
+  while (n > 0u) {
+    const unsigned chunk = (n > ncols) ? ncols : (unsigned)n;
+    const uint8_t *row = data;
+    unsigned idx = 0u;
+    char *dp = buf;
+
+    // Columns of hexadecimal bytes
+    while (idx < chunk) {
+      dp[0] = hex_digits[row[idx] >> 4];
+      dp[1] = hex_digits[row[idx++] & 0xfu];
+      dp[2] = ' ';
+      dp += 3;
+    }
+    while (idx++ < ncols) {
+      dp[2] = dp[1] = dp[0] = ' ';
+      dp += 3;
+    }
+
+    // Printable ASCII characters
+    for (unsigned idx = 0u; idx < chunk; idx++) {
+      char ch = row[idx];
+      *dp++ = (ch < ' ' || ch >= 0x80u) ? '.' : ch;
+    }
+    *dp = '\0';
+    LOG_INFO("%s", buf);
+    data += chunk;
+    n -= chunk;
+  }
+}
+
+// Create a stream signature buffer
+static uint32_t buffer_sig_create(usbdev_stream_t *s,
+                                  dif_usbdev_buffer_t *buf) {
+  usbdev_stream_sig_t sig;
+
+  sig.head_sig = STREAM_SIGNATURE_HEAD;
+  sig.init_lfsr = s->tx_lfsr;
+  sig.stream = s->id;
+  sig.reserved1 = 0U;
+  sig.reserved2 = 0U;
+  sig.num_bytes = s->transfer_bytes;
+  sig.tail_sig = STREAM_SIGNATURE_TAIL;
+
+  // Sanity check because the host-side code relies upon the same structure
+  CHECK(sizeof(sig) == 0x10U);
+
+  size_t bytes_written;
+  CHECK_DIF_OK(dif_usbdev_buffer_write(usbdev.dev, buf, (uint8_t *)&sig,
+                                       sizeof(sig), &bytes_written));
+  CHECK(bytes_written == sizeof(sig));
+
+  // Note: stream signature is not included in the count of bytes transferred
+
+  return bytes_written;
+}
+
+// Fill a buffer with LFSR-generated data
+static void buffer_fill(usbdev_stream_t *s, dif_usbdev_buffer_t *buf,
+                        uint8_t num_bytes) {
+  alignas(uint32_t) uint8_t data[USBDEV_MAX_PACKET_SIZE];
+
+  CHECK(num_bytes <= buf->remaining_bytes);
+  CHECK(num_bytes <= sizeof(data));
+
+  if (true) {
+    // Emit LFSR-generated byte stream; keep this brief so that we can
+    // reduce our latency in responding to USB events (usb_testutils employs
+    // polling at present)
+    uint8_t lfsr = s->tx_lfsr;
+
+    const uint8_t *edp = &data[num_bytes];
+    uint8_t *dp = data;
+    while (dp < edp) {
+      *dp++ = lfsr;
+      lfsr = LFSR_ADVANCE(lfsr);
+    }
+
+    // Update the LFSR for the next packet
+    s->tx_lfsr = lfsr;
+  } else {
+    // Undefined buffer contents
+  }
+
+  if (verbose && log_traffic) {
+    buffer_dump(data, num_bytes);
+  }
+
+  size_t bytes_written;
+
+  CHECK_DIF_OK(dif_usbdev_buffer_write(usbdev.dev, buf, data, num_bytes,
+                                       &bytes_written));
+  CHECK(bytes_written == num_bytes);
+  s->tx_bytes += bytes_written;
+}
+
+// Check the contents of a received buffer
+static void buffer_check(usbdev_stream_test_ctx_t *ctx, usbdev_stream_t *s,
+                         dif_usbdev_rx_packet_info_t packet_info,
+                         dif_usbdev_buffer_t buf) {
+  usb_testutils_ctx_t *usbdev = ctx->usbdev;
+  uint8_t len = packet_info.length;
+
+  if (len > 0) {
+    alignas(uint32_t) uint8_t data[USBDEV_MAX_PACKET_SIZE];
+
+    CHECK(len <= sizeof(data));
+
+    size_t bytes_read;
+
+    // Notes: the buffer being read here is USBDEV memory accessed as MMIO, so
+    //        only the DIF accesses it directly. when we consume the final bytes
+    //        from the read buffer, it is automatically returned to the buffer
+    //        pool.
+    CHECK_DIF_OK(dif_usbdev_buffer_read(usbdev->dev, usbdev->buffer_pool, &buf,
+                                        data, len, &bytes_read));
+    CHECK(bytes_read == len);
+
+    if (log_traffic) {
+      buffer_dump(data, bytes_read);
+    }
+
+    // Check received data against expected LFSR-generated byte stream;
+    // keep this brief so that we can reduce our latency in responding to
+    // USB events (usb_testutils employs polling at present)
+    uint8_t rxtx_lfsr = s->rxtx_lfsr;
+    uint8_t rx_lfsr = s->rx_lfsr;
+
+    const uint8_t *esp = &data[bytes_read];
+    const uint8_t *sp = data;
+    while (sp < esp) {
+      // Received data should be the XOR of two LFSR-generated PRND streams -
+      // ours on the
+      //   transmission side, and that of the DPI model
+      uint8_t expected = rxtx_lfsr ^ rx_lfsr;
+      CHECK(expected == *sp,
+            "S%u: Unexpected received data 0x%02x : (LFSRs 0x%02x 0x%02x)",
+            s->id, *sp, rxtx_lfsr, rx_lfsr);
+
+      rxtx_lfsr = LFSR_ADVANCE(rxtx_lfsr);
+      rx_lfsr = LFSR_ADVANCE(rx_lfsr);
+      sp++;
+    }
+
+    // Update the LFSRs for the next packet
+    s->rxtx_lfsr = rxtx_lfsr;
+    s->rx_lfsr = rx_lfsr;
+  } else {
+    // In the event that we've received a zero-length data packet, we still
+    // must return the buffer to the pool
+    CHECK_DIF_OK(
+        dif_usbdev_buffer_return(usbdev->dev, usbdev->buffer_pool, &buf));
+  }
+}
+
+// Callback for successful buffer transmission
+static void strm_tx_done(void *stream_v) {
+  usbdev_stream_t *s = (usbdev_stream_t *)stream_v;
+  usbdev_stream_test_ctx_t *ctx = s->ctx;
+  usb_testutils_ctx_t *usbdev = ctx->usbdev;
+
+  // If we do not have at least one queued buffer then something has gone wrong
+  // and this callback is inappropriate
+  uint8_t tx_ep = s->tx_ep;
+  uint8_t nqueued = ctx->tx_bufs_queued[tx_ep];
+
+  if (verbose) {
+    LOG_INFO("strm_tx_done called. %u (%u total) buffers(s) are queued",
+             nqueued, ctx->tx_queued_total);
+  }
+
+  CHECK(nqueued > 0);
+
+  // Note: since buffer transmission and completion signalling both occur within
+  // the foreground code (polling, not interrupt-driven) there is no issue of
+  // potential races here
+
+  if (nqueued > 0) {
+    // Shuffle the buffer descriptions, without using memmove
+    for (unsigned idx = 1u; idx < nqueued; idx++) {
+      ctx->tx_bufs[tx_ep][idx - 1u] = ctx->tx_bufs[tx_ep][idx];
+    }
+
+    // Is there another buffer ready to be transmitted?
+    ctx->tx_queued_total--;
+    ctx->tx_bufs_queued[tx_ep] = --nqueued;
+
+    if (nqueued) {
+      CHECK_DIF_OK(
+          dif_usbdev_send(usbdev->dev, tx_ep, &ctx->tx_bufs[tx_ep][0u]));
+    }
+  }
+}
+
+// Callback for buffer reception
+static void strm_rx(void *stream_v, dif_usbdev_rx_packet_info_t packet_info,
+                    dif_usbdev_buffer_t buf) {
+  usbdev_stream_t *s = (usbdev_stream_t *)stream_v;
+  usbdev_stream_test_ctx_t *ctx = s->ctx;
+  usb_testutils_ctx_t *usbdev = ctx->usbdev;
+
+  CHECK(packet_info.endpoint == s->rx_ep);
+
+  // We do not expect to receive SETUP packets to this endpoint
+  CHECK(!packet_info.is_setup);
+
+  if (verbose) {
+    LOG_INFO("Stream %u: Received buffer of %u bytes(s)", s->id,
+             packet_info.length);
+  }
+
+  if (true) {
+    buffer_check(ctx, s, packet_info, buf);
+  } else {
+    // Note: this is just test code for measuring the OUT throughput
+    usb_testutils_ctx_t *usbdev = ctx->usbdev;
+    CHECK_DIF_OK(
+        dif_usbdev_buffer_return(usbdev->dev, usbdev->buffer_pool, &buf));
+  }
+
+  s->rx_bytes += packet_info.length;
+}
+
+// Callback for unexpected data reception (IN endpoint)
+static void rx_show(void *stream_v, dif_usbdev_rx_packet_info_t packet_info,
+                    dif_usbdev_buffer_t buf) {
+  usbdev_stream_t *s = (usbdev_stream_t *)stream_v;
+  usbdev_stream_test_ctx_t *ctx = s->ctx;
+  usb_testutils_ctx_t *usbdev = ctx->usbdev;
+  uint8_t data[0x100U];
+  size_t bytes_read;
+  CHECK_DIF_OK(dif_usbdev_buffer_read(usbdev->dev, usbdev->buffer_pool, &buf,
+                                      data, packet_info.length, &bytes_read));
+  LOG_INFO("rx_show packet of %u byte(s) - read %u", packet_info.length,
+           bytes_read);
+  buffer_dump(data, bytes_read);
+}
+
+// Returns an indication of whether a stream has completed its data transfer
+bool stream_completed(const usbdev_stream_t *s) {
+  return (s->tx_bytes >= s->transfer_bytes) &&
+         (s->rx_bytes >= s->transfer_bytes);
+}
+
+// Initialise a stream, preparing it for use
+static void stream_init(usbdev_stream_test_ctx_t *ctx, usbdev_stream_t *s,
+                        uint8_t id, uint8_t ep_in, uint8_t ep_out,
+                        uint32_t transfer_bytes) {
+  // We need to be able to locate the test context given only the stream
+  // pointer within the strm_tx_done callback from usb_testutils
+  s->ctx = ctx;
+
+  // Remember the stream IDentifier
+  s->id = id;
+
+  // Not yet sent stream signature
+  s->sent_sig = false;
+
+  // Initialise the transfer state
+  s->tx_bytes = 0u;
+  s->rx_bytes = 0u;
+  s->transfer_bytes = transfer_bytes;
+
+  // Initialise the LFSR state for transmission and reception sides
+  // - we use a simple LFSR to generate a PRND stream to transmit to the USBPI
+  // - the USBDPI XORs the received data with another LFSR-generated stream of
+  //   its own, and transmits the result back to us
+  // - to check the returned data, our reception code mimics both LFSRs
+  s->tx_lfsr = USBTST_LFSR_SEED(id);
+  s->rxtx_lfsr = s->tx_lfsr;
+  s->rx_lfsr = USBDPI_LFSR_SEED(id);
+
+  // Packet size randomization
+  s->tx_buf_size = BUFSZ_LFSR_SEED(id);
+
+  // Set up the endpoint for IN transfers (TO host)
+  //
+  // Note: We install the rx_show handler to catch any misdirected data
+  // transfers
+  void (*rx)(void *, dif_usbdev_rx_packet_info_t, dif_usbdev_buffer_t) =
+      (ep_in == ep_out) ? strm_rx : rx_show;
+
+  s->tx_ep = ep_in;
+  usb_testutils_endpoint_setup(ctx->usbdev, ep_in, kUsbdevOutStream, s,
+                               strm_tx_done, rx, NULL, NULL);
+  s->rx_ep = ep_out;
+  if (ep_out != ep_in) {
+    // Set up the endpoint for OUT transfers (FROM host)
+    usb_testutils_endpoint_setup(ctx->usbdev, ep_out, kUsbdevOutStream, s, NULL,
+                                 strm_rx, NULL, NULL);
+  }
+}
+
+// Service the given stream, preparing and/or sending any data that we can;
+// data reception is handled via callbacks and requires no attention here
+static void stream_service(usbdev_stream_test_ctx_t *ctx, usbdev_stream_t *s) {
+  // Generate output data as soon as possible and make it available for
+  //   collection by the host
+
+  uint8_t tx_ep = s->tx_ep;
+  uint8_t nqueued = ctx->tx_bufs_queued[tx_ep];
+
+  if (s->tx_bytes < s->transfer_bytes &&        // More bytes to transfer?
+      nqueued < ctx->tx_bufs_limit[tx_ep] &&    // Endpoint allowed buffer?
+      ctx->tx_queued_total < MAX_TX_BUFFERS) {  // Total buffers not exceeded?
+    dif_usbdev_buffer_t buf;
+
+    // See whether we can populate another buffer yet
+    dif_result_t dif_result =
+        dif_usbdev_buffer_request(usbdev.dev, usbdev.buffer_pool, &buf);
+    if (dif_result == kDifOk) {
+      // This is just for reporting the number of buffers presented to the
+      // USB device, as a progress indicator
+      static unsigned bufs_sent = 0u;
+      uint32_t num_bytes;
+
+      if (s->sent_sig) {
+        if (max_packets) {
+          num_bytes = USBDEV_MAX_PACKET_SIZE;
+        } else {
+          // Vary the amount of data sent per buffer
+          num_bytes = s->tx_buf_size % (USBDEV_MAX_PACKET_SIZE + 1u);
+          s->tx_buf_size = LFSR_ADVANCE(s->tx_buf_size);
+        }
+        uint32_t tx_left = s->transfer_bytes - s->tx_bytes;
+        if (num_bytes > tx_left)
+          num_bytes = tx_left;
+
+        buffer_fill(s, &buf, num_bytes);
+      } else {
+        // Construct a signature to send to the host-side software,
+        // identifying the stream and its properties
+        num_bytes = buffer_sig_create(s, &buf);
+        s->sent_sig = true;
+      }
+
+      // Remember the buffer until we're informed that it has been
+      // successfully transmitted
+      //
+      // Note: since the 'tx_done' callback occurs from foreground code that
+      // is polling, there is no issue of interrupt races here
+      ctx->tx_bufs[tx_ep][nqueued] = buf;
+      ctx->tx_bufs_queued[tx_ep] = ++nqueued;
+      ctx->tx_queued_total++;
+
+      // Can we present this buffer for transmission yet?
+      if (nqueued <= 1U) {
+        CHECK_DIF_OK(dif_usbdev_send(usbdev.dev, tx_ep, &buf));
+      }
+
+      if (verbose) {
+        LOG_INFO(
+            "Stream %u: %uth buffer (of 0x%x byte(s)) awaiting transmission",
+            s->id, bufs_sent, num_bytes);
+      }
+      bufs_sent++;
+    } else {
+      // If we have no more buffers available right now, continue polling...
+      CHECK(dif_result == kDifUnavailable);
+    }
+  }
+}
+
+OTTF_DEFINE_TEST_CONFIG();
+
+bool test_main(void) {
+  // Context state for streaming test
+  usbdev_stream_test_ctx_t *ctx = &stream_test;
+
+  CHECK(kDeviceType == kDeviceSimVerilator || kDeviceType == kDeviceFpgaCw310,
+        "This test is not expected to run on platforms other than the "
+        "Verilator simulation or CW310 FPGA. It needs logic on the host side "
+        "to retrieve, scramble and return the generated byte stream");
+
+  LOG_INFO("Running USBDEV Stream Test");
+
+  // Check we can support the requested number of streams
+  CHECK(nstreams && nstreams < USBDEV_NUM_ENDPOINTS);
+
+  // Decide upon the number of bytes to be transferred for the entire test
+  uint32_t transfer_bytes = TRANSFER_BYTES_FPGA;
+  if (kDeviceType == kDeviceSimVerilator) {
+    transfer_bytes = TRANSFER_BYTES_VERILATOR;
+  }
+  transfer_bytes = (transfer_bytes + nstreams - 1) / nstreams;
+  LOG_INFO(" - %u stream(s), 0x%x bytes each", nstreams, transfer_bytes);
+
+  CHECK_DIF_OK(dif_pinmux_init(
+      mmio_region_from_addr(TOP_EARLGREY_PINMUX_AON_BASE_ADDR), &pinmux));
+  pinmux_testutils_init(&pinmux);
+  CHECK_DIF_OK(dif_pinmux_input_select(
+      &pinmux, kTopEarlgreyPinmuxPeripheralInUsbdevSense,
+      kTopEarlgreyPinmuxInselIoc7));
+
+  // Remember context state for usb_testutils context
+  ctx->usbdev = &usbdev;
+
+  // Call `usbdev_init` here so that DPI will not start until the
+  // simulation has finished all of the printing, which takes a while
+  // if `--trace` was passed in.
+  usb_testutils_init(ctx->usbdev, /*pinflip=*/false, /*en_diff_rcvr=*/false,
+                     /*tx_use_d_se0=*/false);
+  usb_testutils_controlep_init(&usbdev_control, ctx->usbdev, 0,
+                               config_descriptors, sizeof(config_descriptors),
+                               test_descriptor, sizeof(test_descriptor));
+  while (usbdev_control.device_state != kUsbTestutilsDeviceConfigured) {
+    usb_testutils_poll(ctx->usbdev);
+  }
+
+  // Initialise the state of each stream
+  for (unsigned id = 0U; id < nstreams; id++) {
+    // Which endpoint are we using for the IN transfers to the host?
+    const uint8_t ep_in = 1u + id;
+    // Which endpoint are we using for the OUT transfers from the host?
+    const uint8_t ep_out = 1u + id;
+    stream_init(ctx, &ctx->streams[id], id, ep_in, ep_out, transfer_bytes);
+  }
+
+  // Decide how many buffers each endpoint may queue up for transmission;
+  // we must ensure that there are buffers available for reception, and we
+  // do not want any endpoint to starve another
+  for (unsigned s = 0U; s < nstreams; s++) {
+    // This is slightly overspending the available buffers, leaving the
+    //   endpoints to vie for the final few buffers, so it's important that
+    //   we limit the total number of buffers across all endpoints too
+    unsigned ep = ctx->streams[s].tx_ep;
+    ctx->tx_bufs_queued[ep] = 0U;
+    ctx->tx_bufs_limit[ep] = (MAX_TX_BUFFERS + nstreams - 1) / nstreams;
+  }
+  ctx->tx_queued_total = 0U;
+
+  if (verbose) {
+    LOG_INFO("Commencing data transfer...");
+  }
+
+  bool bDone = false;
+  do {
+    for (unsigned s = 0U; s < nstreams; s++) {
+      stream_service(ctx, &ctx->streams[s]);
+
+      // We must keep polling regularly in order to handle detection of packet
+      // transmission as well as perform packet reception and checking
+      usb_testutils_poll(ctx->usbdev);
+    }
+
+    // See whether any streams still have more work to do
+    unsigned s = 0U;
+    while (s < nstreams && stream_completed(&ctx->streams[s])) {
+      s++;
+    }
+    bDone = (s >= nstreams);
+  } while (!bDone);
+
+  // Determine the total counts of bytes sent and received
+  uint32_t tx_bytes = 0U;
+  uint32_t rx_bytes = 0U;
+  for (unsigned s = 0U; s < nstreams; s++) {
+    tx_bytes += ctx->streams[s].tx_bytes;
+    rx_bytes += ctx->streams[s].rx_bytes;
+  }
+
+  LOG_INFO("USB sent 0x%x byte(s), received and checked 0x%x byte(s)", tx_bytes,
+           rx_bytes);
+
+  CHECK(tx_bytes == nstreams * transfer_bytes,
+        "Unexpected count of byte(s) sent to USB host");
+
+  return true;
+}