[aes] Rework masked Canright S-Box implementations for FPGA synthesis
This commit reworks the masked S-Box implementations and adds required
synthesis attributes for Xilinx Vivado. Other synthesis tools need to be
instructed to translate these attributes accordingly.
Signed-off-by: Pirmin Vogel <vogelpi@lowrisc.org>
diff --git a/hw/ip/aes/lint/aes.vlt b/hw/ip/aes/lint/aes.vlt
index 7c72b09..004c2ba 100644
--- a/hw/ip/aes/lint/aes.vlt
+++ b/hw/ip/aes/lint/aes.vlt
@@ -9,3 +9,6 @@
// Always_comb variable driven after use: 'regular'
// regular is assigned in a for loop, regular[1] depends on regular[0]
lint_off -rule ALWCOMBORDER -file "*/rtl/aes_key_expand.sv" -match "*'regular'"
+
+// Masked SBox implementations may require multiple modules to prevent aggressive synthesis optimizations.
+lint_off -rule DECLFILENAME -file "*/rtl/aes_sbox_*_masked*.sv" -match "Filename 'aes_sbox_*_masked*' does not match MODULE name: *"
diff --git a/hw/ip/aes/rtl/aes_sbox_canright_masked.sv b/hw/ip/aes/rtl/aes_sbox_canright_masked.sv
index 5c9ca27..1d0502c 100644
--- a/hw/ip/aes/rtl/aes_sbox_canright_masked.sv
+++ b/hw/ip/aes/rtl/aes_sbox_canright_masked.sv
@@ -17,10 +17,225 @@
// IMPORTANT NOTE: //
// DO NOT USE THIS FOR SYNTHESIS BLINDLY! //
// //
-// This is a high-level implementation targeting primarily RTL simulation. Synthesis tools might //
-// heavily optimize the design. The result is likely insecure. Use with care. //
+// This implementation targets primarily Xilinx Vivado synthesis as well as RTL simulation. It //
+// contains synthesis attributes specific to Xilinx Vivado to enforce the correct ordering of //
+// operations and avoid aggressive optimization. Other synthesis tools might still heavily //
+// optimize the design. The result is likely insecure. Use with care. //
///////////////////////////////////////////////////////////////////////////////////////////////////
+// Masked inverse in GF(2^4), using normal basis [z^4, z]
+// (see Formulas 6, 13, 14, 15, 21, 22, 23, 24 in the paper)
+module aes_masked_inverse_gf2p4 (
+ input logic [3:0] b,
+ input logic [3:0] q,
+ input logic [1:0] r,
+ input logic [3:0] m1,
+ output logic [3:0] b_inv
+);
+
+ import aes_pkg::*;
+ import aes_sbox_canright_pkg::*;
+
+ logic [1:0] b1, b0, q1, q0, c, r_sq, m11, m10, b1_inv, b0_inv;
+ assign b1 = b[3:2];
+ assign b0 = b[1:0];
+ assign q1 = q[3:2];
+ assign q0 = q[1:0];
+ assign m11 = m1[3:2];
+ assign m10 = m1[1:0];
+
+ // Get re-usable intermediate results.
+ (* keep = "true" *) logic [1:0] mul_b0_q1, mul_b1_q0, mul_q0_q1;
+ assign mul_b0_q1 = aes_mul_gf2p2(b0, q1);
+ assign mul_b1_q0 = aes_mul_gf2p2(b1, q0);
+ assign mul_q0_q1 = aes_mul_gf2p2(q0, q1);
+
+ // Formula 13
+ // IMPORTANT: The following ops must be executed in order (left to right):
+ // c = r ^ aes_scale_omega2_gf2p2(aes_square_gf2p2(b1 ^ b0))
+ // ^ aes_scale_omega2_gf2p2(aes_square_gf2p2(q1 ^ q0))
+ // ^ aes_mul_gf2p2(b1, b0)
+ // ^ mul_b1_q0 ^ mul_b0_q1 ^ mul_q0_q1;
+ //
+ // Get intermediate terms. The terms they are added to depend on the same inputs.
+ // Avoid aggressive synthesis optimizations.
+ (* keep = "true" *) logic [1:0] scale_omega2_b, scale_omega2_q;
+ (* keep = "true" *) logic [1:0] mul_b1_b0;
+ assign scale_omega2_b = aes_scale_omega2_gf2p2(aes_square_gf2p2(b1 ^ b0));
+ assign scale_omega2_q = aes_scale_omega2_gf2p2(aes_square_gf2p2(q1 ^ q0));
+ assign mul_b1_b0 = aes_mul_gf2p2(b1, b0);
+ // Generate c step by step.
+ (* keep = "true" *) logic [1:0] c_0, c_1, c_2, c_3, c_4;
+ assign c_0 = r ^ scale_omega2_b;
+ assign c_1 = c_0 ^ scale_omega2_q;
+ assign c_2 = c_1 ^ mul_b1_b0;
+ assign c_3 = c_2 ^ mul_b1_q0;
+ assign c_4 = c_3 ^ mul_b0_q1;
+ assign c = c_4 ^ mul_q0_q1;
+
+ // Below, c1_inv and c2_inv (derived from the output of the inverse over GF(2^2)) are again
+ // recombined with other inputs to c and c_inv. Aggressive synthesis optimizations across the
+ // GF(2^2) inverter may result in SCA leakage and should be avoided.
+ (* keep = "true" *) logic [1:0] c_inv, c1_inv, c2_inv;
+
+ // Formulas 14 and 15
+ assign c_inv = aes_square_gf2p2(c);
+ assign r_sq = aes_square_gf2p2(r);
+
+ // Re-masking c_inv
+ // Formulas 21 and 23
+ // IMPORTANT: First combine the masks (ops in parens) then apply to c_inv:
+ // c_inv = c_inv ^ (q1 ^ r_sq);
+ // c2_inv = c_inv ^ (q0 ^ q1);
+ //
+ // Generate c1_inv and c2_inv step by step.
+ (* keep = "true" *) logic [1:0] xor_q1_r_sq, xor_q0_q1;
+ assign xor_q1_r_sq = q1 ^ r_sq;
+ assign xor_q0_q1 = q0 ^ q1;
+ assign c1_inv = c_inv ^ xor_q1_r_sq;
+ assign c2_inv = c1_inv ^ xor_q0_q1;
+
+ // Formulas 22 and 24
+ // IMPORTANT: The following ops must be executed in order (left to right):
+ // b1_inv = m11 ^ aes_mul_gf2p2(b0, c1_inv)
+ // ^ mul_b0_q1 ^ aes_mul_gf2p2(q0, c1_inv) ^ mul_q0_q1;
+ // b0_inv = m10 ^ aes_mul_gf2p2(b1, c2_inv)
+ // ^ mul_b1_q0 ^ aes_mul_gf2p2(q1, c2_inv) ^ mul_q0_q1;
+ //
+ // Get intermediate terms. The terms they are added to depend on the same inputs.
+ // Avoid aggressive synthesis optimizations.
+ (* keep = "true" *) logic [1:0] mul_b0_c1_inv, mul_q0_c1_inv;
+ (* keep = "true" *) logic [1:0] mul_b1_c2_inv, mul_q1_c2_inv;
+ assign mul_b0_c1_inv = aes_mul_gf2p2(b0, c1_inv);
+ assign mul_q0_c1_inv = aes_mul_gf2p2(q0, c1_inv);
+ assign mul_b1_c2_inv = aes_mul_gf2p2(b1, c2_inv);
+ assign mul_q1_c2_inv = aes_mul_gf2p2(q1, c2_inv);
+ // Generate b1_inv and b0_inv step by step.
+ (* keep = "true" *) logic [1:0] b1_inv_0, b1_inv_1, b1_inv_2;
+ (* keep = "true" *) logic [1:0] b0_inv_0, b0_inv_1, b0_inv_2;
+ assign b1_inv_0 = m11 ^ mul_b0_c1_inv;
+ assign b1_inv_1 = b1_inv_0 ^ mul_b0_q1;
+ assign b1_inv_2 = b1_inv_1 ^ mul_q0_c1_inv;
+ assign b1_inv = b1_inv_2 ^ mul_q0_q1;
+ assign b0_inv_0 = m10 ^ mul_b1_c2_inv;
+ assign b0_inv_1 = b0_inv_0 ^ mul_b1_q0;
+ assign b0_inv_2 = b0_inv_1 ^ mul_q1_c2_inv;
+ assign b0_inv = b0_inv_2 ^ mul_q0_q1;
+
+ // Note: b_inv is masked by m1, b was masked by q.
+ assign b_inv = {b1_inv, b0_inv};
+endmodule
+
+// Masked inverse in GF(2^8), using normal basis [y^16, y]
+// (see Formulas 3, 12, 25, 26 and 27 in the paper)
+module aes_masked_inverse_gf2p8 (
+ input logic [7:0] a,
+ input logic [7:0] m,
+ input logic [7:0] n,
+ output logic [7:0] a_inv
+);
+
+ import aes_pkg::*;
+ import aes_sbox_canright_pkg::*;
+
+ logic [3:0] a1, a0, m1, m0, q, s1, s0, a1_inv, a0_inv;
+ logic [1:0] r;
+
+ // The output of the inverse over GF(2^4) and signals derived from that are again recombined
+ // with inputs to the GF(2^4) inverter. Aggressive synthesis optimizations across the GF(2^4)
+ // inverter may result in SCA leakage and should be avoided.
+ (* keep = "true" *) logic [3:0] b, b_inv, b2_inv;
+
+ assign a1 = a[7:4];
+ assign a0 = a[3:0];
+ assign m1 = m[7:4];
+ assign m0 = m[3:0];
+
+ // Get re-usable intermediate results.
+ (* keep = "true" *) logic [3:0] mul_a0_m1, mul_a1_m0, mul_m0_m1;
+ assign mul_a0_m1 = aes_mul_gf2p4(a0, m1);
+ assign mul_a1_m0 = aes_mul_gf2p4(a1, m0);
+ assign mul_m0_m1 = aes_mul_gf2p4(m0, m1);
+
+ // q must be independent of m.
+ assign q = n[7:4];
+
+ // Formula 12
+ // IMPORTANT: The following ops must be executed in order (left to right):
+ // b = q ^ aes_square_scale_gf2p4_gf2p2(a1 ^ a0)
+ // ^ aes_square_scale_gf2p4_gf2p2(m1 ^ m0)
+ // ^ aes_mul_gf2p4(a1, a0)
+ // ^ mul_a1_m0 ^ mul_a0_m1 ^ mul_m0_m1;
+ //
+ // Get intermediate terms. The terms they are added to depend on the same inputs.
+ // Avoid aggressive synthesis optimizations.
+ (* keep = "true" *) logic [3:0] mul_a1_a0;
+ assign mul_a1_a0 = aes_mul_gf2p4(a1, a0);
+ // Generate b step by step.
+ (* keep = "true" *) logic [3:0] b_0, b_1, b_2, b_3, b_4;
+ assign b_0 = q ^ aes_square_scale_gf2p4_gf2p2(a1 ^ a0); // q does not depend on a1, a0.
+ assign b_1 = b_0 ^ aes_square_scale_gf2p4_gf2p2(m1 ^ m0); // b_0 does not depend on m1, m0.
+ assign b_2 = b_1 ^ mul_a1_a0;
+ assign b_3 = b_2 ^ mul_a1_m0;
+ assign b_4 = b_3 ^ mul_a0_m1;
+ assign b = b_4 ^ mul_m0_m1;
+
+ // r must be independent of q.
+ assign r = m1[3:2];
+
+ // b is masked by q, b_inv is masked by m1.
+ aes_masked_inverse_gf2p4 aes_masked_inverse_gf2p4 (
+ .b ( b ),
+ .q ( q ),
+ .r ( r ),
+ .m1 ( m1 ),
+ .b_inv ( b_inv )
+ );
+
+ // Formula 26
+ // IMPORTANT: First combine the masks (ops in parens) then apply to b_inv:
+ // b2_inv = b_inv ^ (m1 ^ m0);
+ //
+ // Generate b2_inv step by step.
+ (* keep = "true" *) logic [3:0] xor_m1_m0;
+ assign xor_m1_m0 = m1 ^ m0;
+ assign b2_inv = b_inv ^ xor_m1_m0;
+
+ // s is the specified output mask n.
+ assign s1 = n[7:4];
+ assign s0 = n[3:0];
+
+ // Formulas 25 and 27
+ // IMPORTANT: The following ops must be executed in order (left to right):
+ // a1_inv = s1 ^ aes_mul_gf2p4(a0, b_inv)
+ // ^ mul_a0_m1 ^ aes_mul_gf2p4(m0, b_inv) ^ mul_m0_m1;
+ // a0_inv = s0 ^ aes_mul_gf2p4(a1, b2_inv)
+ // ^ mul_a1_m0 ^ aes_mul_gf2p4(m1, b2_inv) ^ mul_m0_m1;
+ //
+ // Get intermediate terms. The terms they are added to depend on the same inputs.
+ // Avoid aggressive synthesis optimizations.
+ (* keep = "true" *) logic [3:0] mul_a0_b_inv, mul_m0_b_inv;
+ (* keep = "true" *) logic [3:0] mul_a1_b2_inv, mul_m1_b2_inv;
+ assign mul_a0_b_inv = aes_mul_gf2p4(a0, b_inv);
+ assign mul_m0_b_inv = aes_mul_gf2p4(m0, b_inv);
+ assign mul_a1_b2_inv = aes_mul_gf2p4(a1, b2_inv);
+ assign mul_m1_b2_inv = aes_mul_gf2p4(m1, b2_inv);
+ // Generate a1_inv and a0_inv step by step.
+ (* keep = "true" *) logic [3:0] a1_inv_0, a1_inv_1, a1_inv_2;
+ (* keep = "true" *) logic [3:0] a0_inv_0, a0_inv_1, a0_inv_2;
+ assign a1_inv_0 = s1 ^ mul_a0_b_inv;
+ assign a1_inv_1 = a1_inv_0 ^ mul_a0_m1;
+ assign a1_inv_2 = a1_inv_1 ^ mul_m0_b_inv;
+ assign a1_inv = a1_inv_2 ^ mul_m0_m1;
+ assign a0_inv_0 = s0 ^ mul_a1_b2_inv; // Actually, s0 does not depend on a1, b2_inv.
+ assign a0_inv_1 = a0_inv_0 ^ mul_a1_m0;
+ assign a0_inv_2 = a0_inv_1 ^ mul_m1_b2_inv;
+ assign a0_inv = a0_inv_2 ^ mul_m0_m1;
+
+ // Note: a_inv is now masked by s = n, a was masked by m.
+ assign a_inv = {a1_inv, a0_inv};
+endmodule
+
module aes_sbox_canright_masked (
input aes_pkg::ciph_op_e op_i,
input logic [7:0] data_i, // masked, the actual input data is data_i ^ in_mask_i
@@ -32,134 +247,16 @@
import aes_pkg::*;
import aes_sbox_canright_pkg::*;
- ///////////////
- // Functions //
- ///////////////
-
- // Masked inverse in GF(2^4), using normal basis [z^4, z]
- // (see Formulas 6, 13, 14, 15, 21, 22, 23, 24 in the paper)
- function automatic logic [3:0] aes_masked_inverse_gf2p4(logic [3:0] b,
- logic [3:0] q,
- logic [1:0] r,
- logic [3:0] m1);
- logic [3:0] b_inv;
- logic [1:0] b1, b0, q1, q0, c, c_inv, c2_inv, r_sq, m11, m10, b1_inv, b0_inv;
- logic [1:0] mul_b0_q1, mul_b1_q0, mul_q0_q1;
- b1 = b[3:2];
- b0 = b[1:0];
- q1 = q[3:2];
- q0 = q[1:0];
- m11 = m1[3:2];
- m10 = m1[1:0];
-
- // Get re-usable intermediate results.
- mul_b0_q1 = aes_mul_gf2p2(b0, q1);
- mul_b1_q0 = aes_mul_gf2p2(b1, q0);
- mul_q0_q1 = aes_mul_gf2p2(q0, q1);
-
- // Formula 13
- // IMPORTANT: The following ops must be executed in order (left to right):
- c = r ^ aes_scale_omega2_gf2p2(aes_square_gf2p2(b1 ^ b0))
- ^ aes_scale_omega2_gf2p2(aes_square_gf2p2(q1 ^ q0))
- ^ aes_mul_gf2p2(b1, b0)
- ^ aes_mul_gf2p2(b1, q0) ^ mul_b0_q1 ^ mul_q0_q1;
- //
-
- // Formulas 14 and 15
- c_inv = aes_square_gf2p2(c);
- r_sq = aes_square_gf2p2(r);
-
- // Re-masking c_inv
- // Formulas 21 and 23
- // IMPORTANT: First combine the masks (ops in parens) then apply to c_inv:
- c_inv = c_inv ^ (q1 ^ r_sq);
- c2_inv = c_inv ^ (q0 ^ q1);
- //
-
- // Formulas 22 and 24
- // IMPORTANT: The following ops must be executed in order (left to right):
- b1_inv = m11 ^ aes_mul_gf2p2(b0, c_inv)
- ^ mul_b0_q1 ^ aes_mul_gf2p2(q0, c_inv) ^ mul_q0_q1;
- b0_inv = m10 ^ aes_mul_gf2p2(b1, c2_inv)
- ^ mul_b1_q0 ^ aes_mul_gf2p2(q1, c2_inv) ^ mul_q0_q1;
- //
-
- // Note: b_inv is masked by m1, b was masked by q.
- b_inv = {b1_inv, b0_inv};
-
- return b_inv;
- endfunction
-
- // Masked inverse in GF(2^8), using normal basis [y^16, y]
- // (see Formulas 3, 12, 25, 26 and 27 in the paper)
- function automatic logic [7:0] aes_masked_inverse_gf2p8(logic [7:0] a,
- logic [7:0] m,
- logic [7:0] n);
- logic [7:0] a_inv;
- logic [3:0] a1, a0, m1, m0, b, b_inv, b2_inv, q, s1, s0, a1_inv, a0_inv;
- logic [3:0] mul_a0_m1, mul_a1_m0, mul_m0_m1;
- logic [1:0] r;
- a1 = a[7:4];
- a0 = a[3:0];
- m1 = m[7:4];
- m0 = m[3:0];
-
- // Get re-usable intermediate results.
- mul_a0_m1 = aes_mul_gf2p4(a0, m1);
- mul_a1_m0 = aes_mul_gf2p4(a1, m0);
- mul_m0_m1 = aes_mul_gf2p4(m0, m1);
-
- // q must be independent of m.
- q = n[7:4];
-
- // Formula 12
- // IMPORTANT: The following ops must be executed in order (left to right):
- b = q ^ aes_square_scale_gf2p4_gf2p2(a1 ^ a0)
- ^ aes_square_scale_gf2p4_gf2p2(m1 ^ m0)
- ^ aes_mul_gf2p4(a1, a0)
- ^ mul_a1_m0 ^ mul_a0_m1 ^ mul_m0_m1;
- //
-
- // r must be independent of q.
- r = m1[3:2];
-
- // b is masked by q, b_inv is masked by m1.
- b_inv = aes_masked_inverse_gf2p4(b, q, r, m1);
-
- // Formula 26
- // IMPORTANT: First combine the masks (ops in parens) then apply to b_inv:
- b2_inv = b_inv ^ (m1 ^ m0);
- //
-
- // s is the specified output mask n.
- s1 = n[7:4];
- s0 = n[3:0];
-
- // Formulas 25 and 27
- // IMPORTANT: The following ops must be executed in order (left to right):
- a1_inv = s1 ^ aes_mul_gf2p4(a0, b_inv)
- ^ mul_a0_m1 ^ aes_mul_gf2p4(m0, b_inv) ^ mul_m0_m1;
- a0_inv = s0 ^ aes_mul_gf2p4(a1, b2_inv)
- ^ mul_a1_m0 ^ aes_mul_gf2p4(m1, b2_inv) ^ mul_m0_m1;
- //
-
- // Note: a_inv is now masked by s = n, a was masked by m.
- a_inv = {a1_inv, a0_inv};
-
- return a_inv;
- endfunction
-
//////////////////////////
// Masked Canright SBox //
//////////////////////////
- logic [7:0] data_basis_x, data_inverse;
- logic [7:0] in_mask_basis_x;
- logic [7:0] out_mask_basis_x;
+ logic [7:0] in_data_basis_x, out_data_basis_x;
+ logic [7:0] in_mask_basis_x, out_mask_basis_x;
// Convert data to normal basis X.
- assign data_basis_x = (op_i == CIPH_FWD) ? aes_mvm(data_i, A2X) :
- aes_mvm(data_i ^ 8'h63, S2X);
+ assign in_data_basis_x = (op_i == CIPH_FWD) ? aes_mvm(data_i, A2X) :
+ aes_mvm(data_i ^ 8'h63, S2X);
// Convert masks to normal basis X.
// The addition of constant 8'h63 following the affine transformation is skipped.
@@ -171,10 +268,15 @@
aes_mvm(out_mask_i, S2X);
// Do the inversion in normal basis X.
- assign data_inverse = aes_masked_inverse_gf2p8(data_basis_x, in_mask_basis_x, out_mask_basis_x);
+ aes_masked_inverse_gf2p8 aes_masked_inverse_gf2p8 (
+ .a ( in_data_basis_x ), // input
+ .m ( in_mask_basis_x ), // input
+ .n ( out_mask_basis_x ), // input
+ .a_inv ( out_data_basis_x ) // output
+ );
// Convert to basis S or A.
- assign data_o = (op_i == CIPH_FWD) ? (aes_mvm(data_inverse, X2S) ^ 8'h63) :
- (aes_mvm(data_inverse, X2A));
+ assign data_o = (op_i == CIPH_FWD) ? (aes_mvm(out_data_basis_x, X2S) ^ 8'h63) :
+ (aes_mvm(out_data_basis_x, X2A));
endmodule
diff --git a/hw/ip/aes/rtl/aes_sbox_canright_masked_noreuse.sv b/hw/ip/aes/rtl/aes_sbox_canright_masked_noreuse.sv
index d6195a9..b5eb6d9 100644
--- a/hw/ip/aes/rtl/aes_sbox_canright_masked_noreuse.sv
+++ b/hw/ip/aes/rtl/aes_sbox_canright_masked_noreuse.sv
@@ -15,10 +15,228 @@
// IMPORTANT NOTE: //
// DO NOT USE THIS FOR SYNTHESIS BLINDLY! //
// //
-// This is a high-level implementation targeting primarily RTL simulation. Synthesis tools might //
-// heavily optimize the design. The result is likely insecure. Use with care. //
+// This implementation targets primarily Xilinx Vivado synthesis as well as RTL simulation. It //
+// contains synthesis attributes specific to Xilinx Vivado to enforce the correct ordering of //
+// operations and avoid aggressive optimization. Other synthesis tools might still heavily //
+// optimize the design. The result is likely insecure. Use with care. //
///////////////////////////////////////////////////////////////////////////////////////////////////
+// Masked inverse in GF(2^4), using normal basis [z^4, z]
+// (see Formulas 6, 13, 14, 15, 16, 17 in the paper)
+module aes_masked_inverse_gf2p4_noreuse (
+ input logic [3:0] b,
+ input logic [3:0] q,
+ input logic [1:0] r,
+ input logic [3:0] t,
+ output logic [3:0] b_inv
+);
+
+ import aes_pkg::*;
+ import aes_sbox_canright_pkg::*;
+
+ logic [1:0] b1, b0, q1, q0, c, c_inv, r_sq, t1, t0, b1_inv, b0_inv;
+ assign b1 = b[3:2];
+ assign b0 = b[1:0];
+ assign q1 = q[3:2];
+ assign q0 = q[1:0];
+ assign t1 = t[3:2];
+ assign t0 = t[1:0];
+
+ // Formula 13
+ // IMPORTANT: The following ops must be executed in order (left to right):
+ // c = r ^ aes_scale_omega2_gf2p2(aes_square_gf2p2(b1 ^ b0))
+ // ^ aes_scale_omega2_gf2p2(aes_square_gf2p2(q1 ^ q0))
+ // ^ aes_mul_gf2p2(b1, b0)
+ // ^ aes_mul_gf2p2(b1, q0) ^ aes_mul_gf2p2(b0, q1) ^ aes_mul_gf2p2(q1, q0);
+ //
+ // Get intermediate terms. The terms they are added to depend on the same inputs.
+ // Avoid aggressive synthesis optimizations.
+ (* keep = "true" *) logic [1:0] scale_omega2_b, scale_omega2_q;
+ (* keep = "true" *) logic [1:0] mul_b1_b0, mul_b1_q0, mul_b0_q1, mul_q1_q0;
+ assign scale_omega2_b = aes_scale_omega2_gf2p2(aes_square_gf2p2(b1 ^ b0));
+ assign scale_omega2_q = aes_scale_omega2_gf2p2(aes_square_gf2p2(q1 ^ q0));
+ assign mul_b1_b0 = aes_mul_gf2p2(b1, b0);
+ assign mul_b1_q0 = aes_mul_gf2p2(b1, q0);
+ assign mul_b0_q1 = aes_mul_gf2p2(b0, q1);
+ assign mul_q1_q0 = aes_mul_gf2p2(q1, q0);
+
+ // Generate c step by step.
+ (* keep = "true" *) logic [1:0] c_0, c_1, c_2, c_3, c_4;
+ assign c_0 = r ^ scale_omega2_b;
+ assign c_1 = c_0 ^ scale_omega2_q;
+ assign c_2 = c_1 ^ mul_b1_b0;
+ assign c_3 = c_2 ^ mul_b1_q0;
+ assign c_4 = c_3 ^ mul_b0_q1;
+ assign c = c_4 ^ mul_q1_q0;
+
+ // Formulas 14 and 15
+ assign c_inv = aes_square_gf2p2(c);
+ assign r_sq = aes_square_gf2p2(r);
+
+ // Formulas 16 and 17
+ // IMPORTANT: The following ops must be executed in order (left to right):
+ // b1_inv = t1 ^ aes_mul_gf2p2(b0, c_inv)
+ // ^ aes_mul_gf2p2(b0, r_sq) ^ aes_mul_gf2p2(q0, c_inv) ^ aes_mul_gf2p2(q0, r_sq);
+ // b0_inv = t0 ^ aes_mul_gf2p2(b1, c_inv)
+ // ^ aes_mul_gf2p2(b1, r_sq) ^ aes_mul_gf2p2(q1, c_inv) ^ aes_mul_gf2p2(q1, r_sq);
+ //
+ // Get intermediate terms. The terms they are added to depend on the same inputs.
+ // Avoid aggressive synthesis optimizations.
+ (* keep = "true" *) logic [1:0] mul_b0_r_sq, mul_q0_c_inv, mul_q0_r_sq;
+ (* keep = "true" *) logic [1:0] mul_b1_r_sq, mul_q1_c_inv, mul_q1_r_sq;
+ assign mul_b0_r_sq = aes_mul_gf2p2(b0, r_sq);
+ assign mul_q0_c_inv = aes_mul_gf2p2(q0, c_inv);
+ assign mul_q0_r_sq = aes_mul_gf2p2(q0, r_sq);
+ assign mul_b1_r_sq = aes_mul_gf2p2(b1, r_sq);
+ assign mul_q1_c_inv = aes_mul_gf2p2(q1, c_inv);
+ assign mul_q1_r_sq = aes_mul_gf2p2(q1, r_sq);
+
+ // Generate b1_inv and b0_inv step by step.
+ (* keep = "true" *) logic [1:0] b1_inv_0, b1_inv_1, b1_inv_2;
+ (* keep = "true" *) logic [1:0] b0_inv_0, b0_inv_1, b0_inv_2;
+ assign b1_inv_0 = t1 ^ aes_mul_gf2p2(b0, c_inv); // t1 does not depend on b0, c_inv.
+ assign b1_inv_1 = b1_inv_0 ^ mul_b0_r_sq;
+ assign b1_inv_2 = b1_inv_1 ^ mul_q0_c_inv;
+ assign b1_inv = b1_inv_2 ^ mul_q0_r_sq;
+ assign b0_inv_0 = t0 ^ aes_mul_gf2p2(b1, c_inv); // t0 does not depend on b1, c_inv.
+ assign b0_inv_1 = b0_inv_0 ^ mul_b1_r_sq;
+ assign b0_inv_2 = b0_inv_1 ^ mul_q1_c_inv;
+ assign b0_inv = b0_inv_2 ^ mul_q1_r_sq;
+
+ // Note: b_inv is masked by t, b was masked by q.
+ assign b_inv = {b1_inv, b0_inv};
+
+endmodule
+
+// Masked inverse in GF(2^8), using normal basis [y^16, y]
+// (see Formulas 3, 12, 18 and 19 in the paper)
+module aes_masked_inverse_gf2p8_noreuse (
+ input logic [7:0] a,
+ input logic [7:0] m,
+ input logic [7:0] n,
+ output logic [7:0] a_inv
+);
+
+ import aes_pkg::*;
+ import aes_sbox_canright_pkg::*;
+
+ logic [3:0] a1, a0, m1, m0, q, s1, s0, t, a1_inv, a0_inv;
+ logic [1:0] r;
+
+ // The output of the inverse over GF(2^4) and signals derived from that are again recombined
+ // with inputs to the GF(2^4) inverter. Aggressive synthesis optimizations across the GF(2^4)
+ // inverter may result in SCA leakage and should be avoided.
+ (* keep = "true" *) logic [3:0] b, b_inv;
+
+ assign a1 = a[7:4];
+ assign a0 = a[3:0];
+ assign m1 = m[7:4];
+ assign m0 = m[3:0];
+
+ // q must be independent of m.
+ assign q = n[7:4];
+
+ // Formula 12
+ // IMPORTANT: The following ops must be executed in order (left to right):
+ // b = q ^ aes_square_scale_gf2p4_gf2p2(a1 ^ a0)
+ // ^ aes_square_scale_gf2p4_gf2p2(m1 ^ m0)
+ // ^ aes_mul_gf2p4(a1, a0)
+ // ^ aes_mul_gf2p4(a1, m0) ^ aes_mul_gf2p4(a0, m1) ^ aes_mul_gf2p4(m0, m1);
+ //
+ // Get intermediate terms. The terms they are added to depend on the same inputs.
+ // Avoid aggressive synthesis optimizations.
+ (* keep = "true" *) logic [3:0] mul_a1_a0, mul_a1_m0, mul_a0_m1, mul_m0_m1;
+ assign mul_a1_a0 = aes_mul_gf2p4(a1, a0);
+ assign mul_a1_m0 = aes_mul_gf2p4(a1, m0);
+ assign mul_a0_m1 = aes_mul_gf2p4(a0, m1);
+ assign mul_m0_m1 = aes_mul_gf2p4(m0, m1);
+ // Generate b step by step.
+ (* keep = "true" *) logic [3:0] b_0, b_1, b_2, b_3, b_4;
+ assign b_0 = q ^ aes_square_scale_gf2p4_gf2p2(a1 ^ a0); // q does not depend on a1, a0.
+ assign b_1 = b_0 ^ aes_square_scale_gf2p4_gf2p2(m1 ^ m0); // b_0 does not depend on m1, m0.
+ assign b_2 = b_1 ^ mul_a1_a0;
+ assign b_3 = b_2 ^ mul_a1_m0;
+ assign b_4 = b_3 ^ mul_a0_m1;
+ assign b = b_4 ^ mul_m0_m1;
+
+ // r must be independent of q.
+ assign r = m1[3:2];
+
+ // Note that the paper states the following requirements on t:
+ // - t must be independent of r.
+ // - t1 must be independent of q0, t0 must be independent of q1.
+ // - t must be independent of m (for the final steps involving s)
+ // The paper suggests to use t = q. To select s = n for the output mask (s must be independent
+ // of t = q = n[7:4]), we would need t = m0 or similar (not r, m1[3:2] though), but this would
+ // break the random product distribution of aes_mul_gf2p4(m0, t), or aes_mul_gf2p4(m1, t) below
+ // (see Lemma 2 in the paper). For this reason, we select t = q here and apply a final mask
+ // switch from s = m to n after the inversion.
+ assign t = q;
+
+ // b is masked by q, b_inv is masked by t.
+ aes_masked_inverse_gf2p4_noreuse aes_masked_inverse_gf2p4 (
+ .b ( b ),
+ .q ( q ),
+ .r ( r ),
+ .t ( t ),
+ .b_inv ( b_inv )
+ );
+
+ // Note that the paper states the following requirements on s:
+ // - s must be independent of t
+ // - s1 must be independent of m0, s0 must be independent of m1.
+ // The paper suggests to use s = m (the input mask). To still end up with the specified output
+ // mask n, we will apply a final mask switch after the inversion.
+ assign s1 = m1;
+ assign s0 = m0;
+
+ // Formulas 18 and 19
+ // IMPORTANT: The following ops must be executed in order (left to right):
+ // a1_inv = s1 ^ aes_mul_gf2p4(a0, b_inv)
+ // ^ aes_mul_gf2p4(a0, t) ^ aes_mul_gf2p4(m0, b_inv) ^ aes_mul_gf2p4(m0, t);
+ // a0_inv = s0 ^ aes_mul_gf2p4(a1, b_inv)
+ // ^ aes_mul_gf2p4(a1, t) ^ aes_mul_gf2p4(m1, b_inv) ^ aes_mul_gf2p4(m1, t);
+ //
+ // Get intermediate terms. The terms they are added to depend on the same inputs.
+ // Avoid aggressive synthesis optimizations.
+ (* keep = "true" *) logic [3:0] mul_a0_b_inv, mul_a0_t, mul_m0_b_inv, mul_m0_t;
+ (* keep = "true" *) logic [3:0] mul_a1_b_inv, mul_a1_t, mul_m1_b_inv, mul_m1_t;
+ assign mul_a0_b_inv = aes_mul_gf2p4(a0, b_inv);
+ assign mul_a0_t = aes_mul_gf2p4(a0, t);
+ assign mul_m0_b_inv = aes_mul_gf2p4(m0, b_inv);
+ assign mul_m0_t = aes_mul_gf2p4(m0, t);
+ assign mul_a1_b_inv = aes_mul_gf2p4(a1, b_inv);
+ assign mul_a1_t = aes_mul_gf2p4(a1, t);
+ assign mul_m1_b_inv = aes_mul_gf2p4(m1, b_inv);
+ assign mul_m1_t = aes_mul_gf2p4(m1, t);
+
+ // Generate a1_inv and a0_inv step by step.
+ (* keep = "true" *) logic [3:0] a1_inv_0, a1_inv_1, a1_inv_2;
+ (* keep = "true" *) logic [3:0] a0_inv_0, a0_inv_1, a0_inv_2;
+ assign a1_inv_0 = s1 ^ mul_a0_b_inv;
+ assign a1_inv_1 = a1_inv_0 ^ mul_a0_t;
+ assign a1_inv_2 = a1_inv_1 ^ mul_m0_b_inv;
+ assign a1_inv = a1_inv_2 ^ mul_m0_t;
+ assign a0_inv_0 = s0 ^ mul_a1_b_inv;
+ assign a0_inv_1 = a0_inv_0 ^ mul_a1_t;
+ assign a0_inv_2 = a0_inv_1 ^ mul_m1_b_inv;
+ assign a0_inv = a0_inv_2 ^ mul_m1_t;
+
+ // Note: a_inv is now masked by s = m, a was masked by m.
+ (* keep = "true" *) logic [7:0] a_inv_0;
+ assign a_inv_0 = {a1_inv, a0_inv};
+
+ // To have a_inv masked by n (the specified output mask), we perform a final mask switch.
+ // IMPORTANT: The following ops must be executed in order (left to right):
+ // a_inv = a_inv ^ n ^ m;
+ //
+ // Generate a_inv step by step.
+ (* keep = "true" *) logic [7:0] a_inv_1;
+ assign a_inv_1 = a_inv_0 ^ n;
+ assign a_inv = a_inv_1 ^ m;
+
+endmodule
+
module aes_sbox_canright_masked_noreuse (
input aes_pkg::ciph_op_e op_i,
input logic [7:0] data_i, // masked, the actual input data is data_i ^ in_mask_i
@@ -30,130 +248,16 @@
import aes_pkg::*;
import aes_sbox_canright_pkg::*;
- ///////////////
- // Functions //
- ///////////////
-
- // Masked inverse in GF(2^4), using normal basis [z^4, z]
- // (see Formulas 6, 13, 14, 15, 16, 17 in the paper)
- function automatic logic [3:0] aes_masked_inverse_gf2p4(logic [3:0] b,
- logic [3:0] q,
- logic [1:0] r,
- logic [3:0] t);
- logic [3:0] b_inv;
- logic [1:0] b1, b0, q1, q0, c, c_inv, r_sq, t1, t0, b1_inv, b0_inv;
- b1 = b[3:2];
- b0 = b[1:0];
- q1 = q[3:2];
- q0 = q[1:0];
- t1 = t[3:2];
- t0 = t[1:0];
-
- // Formula 13
- // IMPORTANT: The following ops must be executed in order (left to right):
- c = r ^ aes_scale_omega2_gf2p2(aes_square_gf2p2(b1 ^ b0))
- ^ aes_scale_omega2_gf2p2(aes_square_gf2p2(q1 ^ q0))
- ^ aes_mul_gf2p2(b1, b0)
- ^ aes_mul_gf2p2(b1, q0) ^ aes_mul_gf2p2(b0, q1) ^ aes_mul_gf2p2(q1, q0);
- //
-
- // Formulas 14 and 15
- c_inv = aes_square_gf2p2(c);
- r_sq = aes_square_gf2p2(r);
-
- // Formulas 16 and 17
- // IMPORTANT: The following ops must be executed in order (left to right):
- b1_inv = t1 ^ aes_mul_gf2p2(b0, c_inv)
- ^ aes_mul_gf2p2(b0, r_sq) ^ aes_mul_gf2p2(q0, c_inv) ^ aes_mul_gf2p2(q0, r_sq);
- b0_inv = t0 ^ aes_mul_gf2p2(b1, c_inv)
- ^ aes_mul_gf2p2(b1, r_sq) ^ aes_mul_gf2p2(q1, c_inv) ^ aes_mul_gf2p2(q1, r_sq);
- //
-
- // Note: b_inv is masked by t, b was masked by q.
- b_inv = {b1_inv, b0_inv};
-
- return b_inv;
- endfunction
-
- // Masked inverse in GF(2^8), using normal basis [y^16, y]
- // (see Formulas 3, 12, 18 and 19 in the paper)
- function automatic logic [7:0] aes_masked_inverse_gf2p8(logic [7:0] a,
- logic [7:0] m,
- logic [7:0] n);
- logic [7:0] a_inv;
- logic [3:0] a1, a0, m1, m0, b, b_inv, q, s1, s0, t, a1_inv, a0_inv;
- logic [1:0] r;
- a1 = a[7:4];
- a0 = a[3:0];
- m1 = m[7:4];
- m0 = m[3:0];
-
- // q must be independent of m.
- q = n[7:4];
-
- // Formula 12
- // IMPORTANT: The following ops must be executed in order (left to right):
- b = q ^ aes_square_scale_gf2p4_gf2p2(a1 ^ a0)
- ^ aes_square_scale_gf2p4_gf2p2(m1 ^ m0)
- ^ aes_mul_gf2p4(a1, a0)
- ^ aes_mul_gf2p4(a1, m0) ^ aes_mul_gf2p4(a0, m1) ^ aes_mul_gf2p4(m1, m0);
- //
-
- // r must be independent of q.
- r = m1[3:2];
-
- // Note that the paper states the following requirements on t:
- // - t must be independent of r.
- // - t1 must be independent of q0, t0 must be independent of q1.
- // - t must be independent of m (for the final steps involving s)
- // The paper suggests to use t = q. To select s = n for the output mask (s must be independent
- // of t = q = n[7:4]), we would need t = m0 or similar (not r, m1[3:2] though), but this would
- // break the random product distribution of aes_mul_gf2p4(m0, t), or aes_mul_gf2p4(m1, t) below
- // (see Lemma 2 in the paper). For this reason, we select t = q here and apply a final mask
- // switch from s = m to n after the inversion.
- t = q;
-
- // b is masked by q, b_inv is masked by t.
- b_inv = aes_masked_inverse_gf2p4(b, q, r, t);
-
- // Note that the paper states the following requirements on s:
- // - s must be independent of t
- // - s1 must be independent of m0, s0 must be independent of m1.
- // The paper suggests to use s = m (the input mask). To still end up with the specified output
- // mask n, we will apply a final mask switch after the inversion.
- s1 = m1;
- s0 = m0;
-
- // Formulas 18 and 19
- // IMPORTANT: The following ops must be executed in order (left to right):
- a1_inv = s1 ^ aes_mul_gf2p4(a0, b_inv)
- ^ aes_mul_gf2p4(a0, t) ^ aes_mul_gf2p4(m0, b_inv) ^ aes_mul_gf2p4(m0, t);
- a0_inv = s0 ^ aes_mul_gf2p4(a1, b_inv)
- ^ aes_mul_gf2p4(a1, t) ^ aes_mul_gf2p4(m1, b_inv) ^ aes_mul_gf2p4(m1, t);
- //
-
- // Note: a_inv is now masked by s = m, a was masked by m.
- a_inv = {a1_inv, a0_inv};
-
- // To have a_inv masked by n (the specified output mask), we perform a final mask switch.
- // IMPORTANT: The following ops must be executed in order (left to right):
- a_inv = a_inv ^ n ^ m;
- //
-
- return a_inv;
- endfunction
-
//////////////////////////
// Masked Canright SBox //
//////////////////////////
- logic [7:0] data_basis_x, data_inverse;
- logic [7:0] in_mask_basis_x;
- logic [7:0] out_mask_basis_x;
+ logic [7:0] in_data_basis_x, out_data_basis_x;
+ logic [7:0] in_mask_basis_x, out_mask_basis_x;
// Convert data to normal basis X.
- assign data_basis_x = (op_i == CIPH_FWD) ? aes_mvm(data_i, A2X) :
- aes_mvm(data_i ^ 8'h63, S2X);
+ assign in_data_basis_x = (op_i == CIPH_FWD) ? aes_mvm(data_i, A2X) :
+ aes_mvm(data_i ^ 8'h63, S2X);
// Convert masks to normal basis X.
// The addition of constant 8'h63 following the affine transformation is skipped.
@@ -165,10 +269,15 @@
aes_mvm(out_mask_i, S2X);
// Do the inversion in normal basis X.
- assign data_inverse = aes_masked_inverse_gf2p8(data_basis_x, in_mask_basis_x, out_mask_basis_x);
+ aes_masked_inverse_gf2p8_noreuse aes_masked_inverse_gf2p8 (
+ .a ( in_data_basis_x ), // input
+ .m ( in_mask_basis_x ), // input
+ .n ( out_mask_basis_x ), // input
+ .a_inv ( out_data_basis_x ) // output
+ );
// Convert to basis S or A.
- assign data_o = (op_i == CIPH_FWD) ? (aes_mvm(data_inverse, X2S) ^ 8'h63) :
- (aes_mvm(data_inverse, X2A));
+ assign data_o = (op_i == CIPH_FWD) ? (aes_mvm(out_data_basis_x, X2S) ^ 8'h63) :
+ (aes_mvm(out_data_basis_x, X2A));
endmodule