|  | // Copyright lowRISC contributors. | 
|  | // Licensed under the Apache License, Version 2.0, see LICENSE for details. | 
|  | // SPDX-License-Identifier: Apache-2.0 | 
|  |  | 
|  | #include "sw/device/lib/base/hardened_memory.h" | 
|  |  | 
|  | #include "sw/device/lib/base/hardened.h" | 
|  | #include "sw/device/lib/base/memory.h" | 
|  | #include "sw/device/lib/base/random_order.h" | 
|  |  | 
|  | // NOTE: The three hardened_mem* functions have similar contents, but the parts | 
|  | // that are shared between them are commented only in `memcpy()`. | 
|  | void hardened_memcpy(uint32_t *restrict dest, const uint32_t *restrict src, | 
|  | size_t word_len) { | 
|  | random_order_t order; | 
|  | random_order_init(&order, word_len); | 
|  |  | 
|  | size_t count = 0; | 
|  | size_t expected_count = random_order_len(&order); | 
|  |  | 
|  | // Immediately convert `src` and `dest` to addresses, which erases their | 
|  | // provenance and causes their addresses to be exposed (in the provenance | 
|  | // sense). | 
|  | uintptr_t src_addr = (uintptr_t)src; | 
|  | uintptr_t dest_addr = (uintptr_t)dest; | 
|  |  | 
|  | // `decoys` is a small stack array that is filled with uninitialized memory. | 
|  | // It is scratch space for us to do "extra" operations, when the number of | 
|  | // iteration indices the chosen random order is different from `word_len`. | 
|  | // | 
|  | // These extra operations also introduce noise that an attacker must do work | 
|  | // to filter, such as by applying side-channel analysis to obtain an address | 
|  | // trace. | 
|  | uint32_t decoys[8]; | 
|  | uintptr_t decoy_addr = (uintptr_t)&decoys; | 
|  |  | 
|  | // We need to launder `count`, so that the SW.LOOP-COMPLETION check is not | 
|  | // deleted by the compiler. | 
|  | for (; launderw(count) < expected_count; count = launderw(count) + 1) { | 
|  | // The order values themselves are in units of words, but we need `idx` to | 
|  | // be in units of bytes. | 
|  | // | 
|  | // The value obtained from `advance()` is laundered, to prevent | 
|  | // implementation details from leaking across procedures. | 
|  | size_t idx = launderw(random_order_advance(&order)) * sizeof(uint32_t); | 
|  |  | 
|  | // Prevent the compiler from reordering the loop; this ensures a | 
|  | // happens-before among indices consistent with `order`. | 
|  | barrierw(idx); | 
|  |  | 
|  | // Compute putative offsets into `src`, `dest`, and `decoys`. Some of these | 
|  | // may go off the end of `src` and `dest`, but they will not be cast to | 
|  | // pointers in that case. (Note that casting out-of-range addresses to | 
|  | // pointers is UB.) | 
|  | uintptr_t srcp = src_addr + idx; | 
|  | uintptr_t destp = dest_addr + idx; | 
|  | uintptr_t decoy1 = decoy_addr + (idx % sizeof(decoys)); | 
|  | uintptr_t decoy2 = | 
|  | decoy_addr + ((idx + sizeof(decoys) / 2) % sizeof(decoys)); | 
|  |  | 
|  | // Branchlessly select whether to do a "real" copy or a decoy copy, | 
|  | // depending on whether we've gone off the end of the array or not. | 
|  | // | 
|  | // Pretty much everything needs to be laundered: we need to launder `idx` | 
|  | // for obvious reasons, and we need to launder the result of the select, so | 
|  | // that the compiler cannot delete the resulting loads and stores. This is | 
|  | // similar to having used `volatile uint32_t *`. | 
|  | void *src = (void *)launderw( | 
|  | ct_cmovw(ct_sltuw(launderw(idx), word_len), srcp, decoy1)); | 
|  | void *dest = (void *)launderw( | 
|  | ct_cmovw(ct_sltuw(launderw(idx), word_len), destp, decoy2)); | 
|  |  | 
|  | // Perform the copy, without performing a typed dereference operation. | 
|  | write_32(read_32(src), dest); | 
|  | } | 
|  |  | 
|  | HARDENED_CHECK_EQ(count, expected_count); | 
|  | } | 
|  |  | 
|  | // The source of randomness for shred, which may be replaced at link-time. | 
|  | OT_WEAK | 
|  | uint32_t hardened_memshred_random_word(void) { return 0xcaffe17e; } | 
|  |  | 
|  | void hardened_memshred(uint32_t *dest, size_t word_len) { | 
|  | random_order_t order; | 
|  | random_order_init(&order, word_len); | 
|  |  | 
|  | size_t count = 0; | 
|  | size_t expected_count = random_order_len(&order); | 
|  |  | 
|  | uintptr_t data_addr = (uintptr_t)dest; | 
|  |  | 
|  | uint32_t decoys[8]; | 
|  | uintptr_t decoy_addr = (uintptr_t)&decoys; | 
|  |  | 
|  | for (; count < expected_count; count = launderw(count) + 1) { | 
|  | size_t idx = launderw(random_order_advance(&order)) * sizeof(uint32_t); | 
|  | barrierw(idx); | 
|  |  | 
|  | uintptr_t datap = data_addr + idx; | 
|  | uintptr_t decoy = decoy_addr + (idx % sizeof(decoys)); | 
|  |  | 
|  | void *data = (void *)launderw( | 
|  | ct_cmovw(ct_sltuw(launderw(idx), word_len), datap, decoy)); | 
|  |  | 
|  | // Write a freshly-generated random word to `*data`. | 
|  | write_32(hardened_memshred_random_word(), data); | 
|  | } | 
|  |  | 
|  | HARDENED_CHECK_EQ(count, expected_count); | 
|  | } | 
|  |  | 
|  | hardened_bool_t hardened_memeq(const uint32_t *lhs, const uint32_t *rhs, | 
|  | size_t word_len) { | 
|  | random_order_t order; | 
|  | random_order_init(&order, word_len); | 
|  |  | 
|  | size_t count = 0; | 
|  | size_t expected_count = random_order_len(&order); | 
|  |  | 
|  | uintptr_t lhs_addr = (uintptr_t)lhs; | 
|  | uintptr_t rhs_addr = (uintptr_t)rhs; | 
|  |  | 
|  | // `decoys` needs to be filled with equal values this time around. It | 
|  | // should be filled with values with a Hamming weight of around 16, which is | 
|  | // the most common hamming weight among 32-bit words. | 
|  | uint32_t decoys[8] = { | 
|  | 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, | 
|  | 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, | 
|  | }; | 
|  | uintptr_t decoy_addr = (uintptr_t)&decoys; | 
|  |  | 
|  | uint32_t zeros = 0; | 
|  | uint32_t ones = UINT32_MAX; | 
|  |  | 
|  | // The loop is almost token-for-token the one above, but the copy is | 
|  | // replaced with something else. | 
|  | for (; count < expected_count; count = launderw(count) + 1) { | 
|  | size_t idx = launderw(random_order_advance(&order)) * sizeof(uint32_t); | 
|  | barrierw(idx); | 
|  |  | 
|  | uintptr_t ap = lhs_addr + idx; | 
|  | uintptr_t bp = rhs_addr + idx; | 
|  | uintptr_t decoy1 = decoy_addr + (idx % sizeof(decoys)); | 
|  | uintptr_t decoy2 = | 
|  | decoy_addr + ((idx + sizeof(decoys) / 2) % sizeof(decoys)); | 
|  |  | 
|  | void *av = (void *)launderw( | 
|  | ct_cmovw(ct_sltuw(launderw(idx), word_len), ap, decoy1)); | 
|  | void *bv = (void *)launderw( | 
|  | ct_cmovw(ct_sltuw(launderw(idx), word_len), bp, decoy2)); | 
|  |  | 
|  | uint32_t a = read_32(av); | 
|  | uint32_t b = read_32(bv); | 
|  |  | 
|  | // Launder one of the operands, so that the compiler cannot cache the result | 
|  | // of the xor for use in the next operation. | 
|  | // | 
|  | // We launder `zeroes` so that compiler cannot learn that `zeroes` has | 
|  | // strictly more bits set at the end of the loop. | 
|  | zeros = launder32(zeros) | (launder32(a) ^ b); | 
|  |  | 
|  | // Same as above. The compiler can cache the value of `a[offset]`, but it | 
|  | // has no chance to strength-reduce this operation. | 
|  | ones = launder32(ones) & (launder32(a) ^ ~b); | 
|  | } | 
|  |  | 
|  | HARDENED_CHECK_EQ(count, expected_count); | 
|  | if (launder32(zeros) == 0) { | 
|  | HARDENED_CHECK_EQ(ones, UINT32_MAX); | 
|  | return kHardenedBoolTrue; | 
|  | } | 
|  |  | 
|  | HARDENED_CHECK_NE(ones, UINT32_MAX); | 
|  | return kHardenedBoolFalse; | 
|  | } |