[aes] Use prim_flop_en/buf primitives inside DOM S-Box We use such primitives to encapsulate tool-specific synthesis attributes. When switching to a different synthesis tool, users need to make sure to provide the proper primitives. Otherwise, the resulting netlist might be insecure. Signed-off-by: Pirmin Vogel <vogelpi@lowrisc.org>
diff --git a/hw/ip/aes/rtl/aes_sbox_dom.sv b/hw/ip/aes/rtl/aes_sbox_dom.sv index 337a290..6843e87 100644 --- a/hw/ip/aes/rtl/aes_sbox_dom.sv +++ b/hw/ip/aes/rtl/aes_sbox_dom.sv
@@ -24,10 +24,10 @@ // IMPORTANT NOTE: // // DO NOT USE THIS FOR SYNTHESIS BLINDLY! // // // -// This implementation targets primarily Xilinx Vivado synthesis as well as RTL simulation. It // -// contains synthesis attributes specific to Xilinx Vivado to prevent the synthesis tool from // -// optimizing away registers and to enforce the correct ordering of operations. Other synthesis // -// tools might still heavily optimize the design. The result is likely insecure. Use with care. // +// This implementation relies on primitive cells like prim_buf/flop_en containing tool-specific // +// synthesis attributes to prevent the synthesis tool from optimizing away/re-ordering registers // +// and to enforce the correct ordering of operations. Without the proper primitives, synthesis // +// tools might heavily optimize the design. The result is likely insecure. Use with care. // /////////////////////////////////////////////////////////////////////////////////////////////////// `include "prim_assert.sv" @@ -68,7 +68,7 @@ // Calculation // ///////////////// // Inner-domain terms - (* keep = "true" *) logic [NPower-1:0] mul_ax_ay_d, mul_bx_by_d; + logic [NPower-1:0] mul_ax_ay_d, mul_bx_by_d; if (NPower == 4) begin : gen_inner_mul_gf2p4 assign mul_ax_ay_d = aes_mul_gf2p4(a_x, a_y); assign mul_bx_by_d = aes_mul_gf2p4(b_x, b_y); @@ -94,20 +94,21 @@ /////////////// // Resharing of cross-domain terms logic [NPower-1:0] aq_z0_d, bq_z0_d; - (* keep = "true" *) logic [NPower-1:0] aq_z0_q, bq_z0_q; + logic [NPower-1:0] aq_z0_q, bq_z0_q; assign aq_z0_d = z_0 ^ mul_ax_by; assign bq_z0_d = z_0 ^ mul_ay_bx; // Registers - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - aq_z0_q <= '0; - bq_z0_q <= '0; - end else if (we_i) begin - aq_z0_q <= aq_z0_d; - bq_z0_q <= bq_z0_d; - end - end + prim_flop_en #( + .Width ( 2*NPower ), + .ResetValue ( '0 ) + ) u_prim_flop_abq_z0 ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i ), + .d_i ( {aq_z0_d, bq_z0_d} ), + .q_o ( {aq_z0_q, bq_z0_q} ) + ); ///////////////////////// // Optional Pipelining // @@ -119,16 +120,17 @@ // input data every clock cycle and prevents SCA leakage occurring due to the integration of // reshared cross-domain terms with inner-domain terms derived from different input data. - (* keep = "true" *) logic [NPower-1:0] mul_ax_ay_q, mul_bx_by_q; - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - mul_ax_ay_q <= '0; - mul_bx_by_q <= '0; - end else if (we_i) begin - mul_ax_ay_q <= mul_ax_ay_d; - mul_bx_by_q <= mul_bx_by_d; - end - end + logic [NPower-1:0] mul_ax_ay_q, mul_bx_by_q; + prim_flop_en #( + .Width ( 2*NPower ), + .ResetValue ( '0 ) + ) u_prim_flop_mul_abx_aby ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i ), + .d_i ( {mul_ax_ay_d, mul_bx_by_d} ), + .q_o ( {mul_ax_ay_q, mul_bx_by_q} ) + ); assign mul_ax_ay = mul_ax_ay_q; assign mul_bx_by = mul_bx_by_q; @@ -139,8 +141,17 @@ // this can cause SCA leakage as during the clock cycle in which new data arrives, the new // inner-domain terms are integrated with the previous, reshared cross-domain terms. - assign mul_ax_ay = mul_ax_ay_d; - assign mul_bx_by = mul_bx_by_d; + // Avoid aggressive synthesis optimizations. + logic [NPower-1:0] mul_ax_ay_buf, mul_bx_by_buf; + prim_buf #( + .Width ( 2*NPower ) + ) u_prim_buf_mul_abx_aby ( + .in_i ( {mul_ax_ay_d, mul_bx_by_d} ), + .out_o ( {mul_ax_ay_buf, mul_bx_by_buf} ) + ); + + assign mul_ax_ay = mul_ax_ay_buf; + assign mul_bx_by = mul_bx_by_buf; end ///////////////// @@ -185,20 +196,21 @@ ////////////// // Blinding of y by z. logic [NPower-1:0] a_yz_d, b_yz_d; - (* keep = "true" *) logic [NPower-1:0] a_yz_q, b_yz_q; + logic [NPower-1:0] a_yz_q, b_yz_q; assign a_yz_d = a_y ^ a_z; assign b_yz_d = b_y ^ b_z; // Registers - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - a_yz_q <= '0; - b_yz_q <= '0; - end else if (we_i) begin - a_yz_q <= a_yz_d; - b_yz_q <= b_yz_d; - end - end + prim_flop_en #( + .Width ( 2*NPower ), + .ResetValue ( '0 ) + ) u_prim_flop_ab_yz ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i ), + .d_i ( {a_yz_d, b_yz_d} ), + .q_o ( {a_yz_q, b_yz_q} ) + ); //////////////// // Correction // @@ -230,16 +242,17 @@ // and prevents SCA leakage occurring due to the multiplication of input x with b belonging to // different clock cycles. - (* keep = "true" *) logic [NPower-1:0] a_x_q, b_x_q; - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - a_x_q <= '0; - b_x_q <= '0; - end else if (we_i) begin - a_x_q <= a_x; - b_x_q <= b_x; - end - end + logic [NPower-1:0] a_x_q, b_x_q; + prim_flop_en #( + .Width ( 2*NPower ), + .ResetValue ( '0 ) + ) u_prim_flop_ab_x ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i ), + .d_i ( {a_x, b_x} ), + .q_o ( {a_x_q, b_x_q} ) + ); assign a_x_calc = a_x_q; assign b_x_calc = b_x_q; @@ -318,20 +331,21 @@ ////////////// // Blinding of y by z_0. logic [NPower-1:0] a_yz0_d, b_yz0_d; - (* keep = "true" *) logic [NPower-1:0] a_yz0_q, b_yz0_q; + logic [NPower-1:0] a_yz0_q, b_yz0_q; assign a_yz0_d = a_y ^ z_0; assign b_yz0_d = b_y ^ z_0; // Registers - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - a_yz0_q <= '0; - b_yz0_q <= '0; - end else if (we_i) begin - a_yz0_q <= a_yz0_d; - b_yz0_q <= b_yz0_d; - end - end + prim_flop_en #( + .Width ( 2*NPower ), + .ResetValue ( '0 ) + ) u_prim_flop_ab_yz0 ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i ), + .d_i ( {a_yz0_d, b_yz0_d} ), + .q_o ( {a_yz0_q, b_yz0_q} ) + ); //////////////// // Correction // @@ -342,7 +356,7 @@ // which allows for further optimizations. // Calculation - (* keep = "true" *) logic [NPower-1:0] mul_ax_z0, mul_bx_z0; + logic [NPower-1:0] mul_ax_z0, mul_bx_z0; if (NPower == 4) begin : gen_corr_mul_gf2p4 assign mul_ax_z0 = aes_mul_gf2p4(a_x, z_0); assign mul_bx_z0 = aes_mul_gf2p4(b_x, z_0); @@ -352,22 +366,32 @@ assign mul_bx_z0 = aes_mul_gf2p2(b_x, z_0); end + // Avoid aggressive synthesis optimizations. + logic [NPower-1:0] mul_ax_z0_buf, mul_bx_z0_buf; + prim_buf #( + .Width ( 2*NPower ) + ) u_prim_buf_mul_abx_z0 ( + .in_i ( {mul_ax_z0, mul_bx_z0} ), + .out_o ( {mul_ax_z0_buf, mul_bx_z0_buf} ) + ); + // Resharing logic [NPower-1:0] axz0_z1_d, bxz0_z1_d; - (* keep = "true" *) logic [NPower-1:0] axz0_z1_q, bxz0_z1_q; - assign axz0_z1_d = mul_ax_z0 ^ z_1; - assign bxz0_z1_d = mul_bx_z0 ^ z_1; + logic [NPower-1:0] axz0_z1_q, bxz0_z1_q; + assign axz0_z1_d = mul_ax_z0_buf ^ z_1; + assign bxz0_z1_d = mul_bx_z0_buf ^ z_1; // Registers - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - axz0_z1_q <= '0; - bxz0_z1_q <= '0; - end else if (we_i) begin - axz0_z1_q <= axz0_z1_d; - bxz0_z1_q <= bxz0_z1_d; - end - end + prim_flop_en #( + .Width ( 2*NPower ), + .ResetValue ( '0 ) + ) u_prim_flop_abxz0_z1 ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i ), + .d_i ( {axz0_z1_d, bxz0_z1_d} ), + .q_o ( {axz0_z1_q, bxz0_z1_q} ) + ); ///////////////////////// // Optional Pipelining // @@ -381,20 +405,17 @@ // // The PreDomIndep variant has the required pipeline registers built in already. - (* keep = "true" *) logic [NPower-1:0] a_x_q, b_x_q, a_y_q, b_y_q; - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - a_x_q <= '0; - b_x_q <= '0; - a_y_q <= '0; - b_y_q <= '0; - end else if (we_i) begin - a_x_q <= a_x; - b_x_q <= b_x; - a_y_q <= a_y; - b_y_q <= b_y; - end - end + logic [NPower-1:0] a_x_q, b_x_q, a_y_q, b_y_q; + prim_flop_en #( + .Width ( 4*NPower ), + .ResetValue ( '0 ) + ) u_prim_flop_ab_xy ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i ), + .d_i ( {a_x, b_x, a_y, b_y} ), + .q_o ( {a_x_q, b_x_q, a_y_q, b_y_q} ) + ); assign a_x_calc = a_x_q; assign b_x_calc = b_x_q; @@ -434,7 +455,7 @@ // d_y part: Inner-domain terms of x * y logic [NPower-1:0] mul_ax_ay_d, mul_bx_by_d; - (* keep = "true" *) logic [NPower-1:0] mul_ax_ay_q, mul_bx_by_q; + logic [NPower-1:0] mul_ax_ay_q, mul_bx_by_q; if (NPower == 4) begin : gen_inner_mul_gf2p4 assign mul_ax_ay_d = aes_mul_gf2p4(a_x_calc, a_y_calc); assign mul_bx_by_d = aes_mul_gf2p4(b_x_calc, b_y_calc); @@ -445,31 +466,33 @@ end // Registers - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - mul_ax_ay_q <= '0; - mul_bx_by_q <= '0; - end else if (we_i) begin - mul_ax_ay_q <= mul_ax_ay_d; - mul_bx_by_q <= mul_bx_by_d; - end - end + prim_flop_en #( + .Width ( 2*NPower ), + .ResetValue ( '0 ) + ) u_prim_flop_mul_abx_aby ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i ), + .d_i ( {mul_ax_ay_d, mul_bx_by_d} ), + .q_o ( {mul_ax_ay_q, mul_bx_by_q} ) + ); // Input Registers - (* keep = "true" *) logic [NPower-1:0] a_x_q, b_x_q; - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - a_x_q <= '0; - b_x_q <= '0; - end else if (we_i) begin - a_x_q <= a_x_calc; - b_x_q <= b_x_calc; - end - end + logic [NPower-1:0] a_x_q, b_x_q; + prim_flop_en #( + .Width ( 2*NPower ), + .ResetValue ( '0 ) + ) u_prim_flop_ab_xy ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i ), + .d_i ( {a_x_calc, b_x_calc} ), + .q_o ( {a_x_q, b_x_q} ) + ); // _D_y_z0 part: Cross-domain terms: d_x * _D_y_z0 // Need to use registered version of input x. - (* keep = "true" *) logic [NPower-1:0] mul_ax_byz0, mul_bx_ayz0; + logic [NPower-1:0] mul_ax_byz0, mul_bx_ayz0; if (NPower == 4) begin : gen_cross_mul_gf2p4 assign mul_ax_byz0 = aes_mul_gf2p4(a_x_q, b_yz0_q); assign mul_bx_ayz0 = aes_mul_gf2p4(b_x_q, a_yz0_q); @@ -479,9 +502,18 @@ assign mul_bx_ayz0 = aes_mul_gf2p2(b_x_q, a_yz0_q); end + // Avoid aggressive synthesis optimizations. + logic [NPower-1:0] mul_ax_byz0_buf, mul_bx_ayz0_buf; + prim_buf #( + .Width ( 2*NPower ) + ) u_prim_buf_mul_abx_bayz0 ( + .in_i ( {mul_ax_byz0, mul_bx_ayz0} ), + .out_o ( {mul_ax_byz0_buf, mul_bx_ayz0_buf} ) + ); + // Integration - assign a_q = axz0_z1_q ^ mul_ax_ay_q ^ mul_ax_byz0; - assign b_q = bxz0_z1_q ^ mul_bx_by_q ^ mul_bx_ayz0; + assign a_q = axz0_z1_q ^ mul_ax_ay_q ^ mul_ax_byz0_buf; + assign b_q = bxz0_z1_q ^ mul_bx_by_q ^ mul_bx_ayz0_buf; end else begin : gen_not_pre_dom_indep // This DOM-dep multiplier is not directly followed by an un-pipelined DOM-indep multiplier. As @@ -489,23 +521,41 @@ // with input x which allows saving 2 GF multipliers. // Sum up d_y and _D_y_z0. - (* keep = "true" *) logic [NPower-1:0] a_b, b_b; + logic [NPower-1:0] a_b, b_b; assign a_b = a_y_calc ^ b_yz0_q; assign b_b = b_y_calc ^ a_yz0_q; + // Avoid aggressive synthesis optimizations. + logic [NPower-1:0] a_b_buf, b_b_buf; + prim_buf #( + .Width ( 2*NPower ) + ) u_prim_buf_ab_b ( + .in_i ( {a_b, b_b} ), + .out_o ( {a_b_buf, b_b_buf} ) + ); + // GF multiplications - (* keep = "true" *) logic [NPower-1:0] a_mul_ax_b, b_mul_bx_b; + logic [NPower-1:0] a_mul_ax_b, b_mul_bx_b; if (NPower == 4) begin : gen_mul_gf2p4 - assign a_mul_ax_b = aes_mul_gf2p4(a_x_calc, a_b); - assign b_mul_bx_b = aes_mul_gf2p4(b_x_calc, b_b); + assign a_mul_ax_b = aes_mul_gf2p4(a_x_calc, a_b_buf); + assign b_mul_bx_b = aes_mul_gf2p4(b_x_calc, b_b_buf); end else begin : gen_mul_gf2p2 - assign a_mul_ax_b = aes_mul_gf2p2(a_x_calc, a_b); - assign b_mul_bx_b = aes_mul_gf2p2(b_x_calc, b_b); + assign a_mul_ax_b = aes_mul_gf2p2(a_x_calc, a_b_buf); + assign b_mul_bx_b = aes_mul_gf2p2(b_x_calc, b_b_buf); end + // Avoid aggressive synthesis optimizations. + logic [NPower-1:0] a_mul_ax_b_buf, b_mul_bx_b_buf; + prim_buf #( + .Width ( 2*NPower ) + ) u_prim_buf_ab_mul_abx_b ( + .in_i ( {a_mul_ax_b, b_mul_bx_b} ), + .out_o ( {a_mul_ax_b_buf, b_mul_bx_b_buf} ) + ); + // Integration - assign a_q = axz0_z1_q ^ a_mul_ax_b; - assign b_q = bxz0_z1_q ^ b_mul_bx_b; + assign a_q = axz0_z1_q ^ a_mul_ax_b_buf; + assign b_q = bxz0_z1_q ^ b_mul_bx_b_buf; end // Only GF(2^4) and GF(2^2) is supported. @@ -541,18 +591,19 @@ assign b_gamma0 = b_gamma[1:0]; logic [1:0] a_gamma_ss_d, b_gamma_ss_d; - (* keep = "true" *) logic [1:0] a_gamma_ss_q, b_gamma_ss_q; + logic [1:0] a_gamma_ss_q, b_gamma_ss_q; assign a_gamma_ss_d = aes_scale_omega2_gf2p2(aes_square_gf2p2(a_gamma1 ^ a_gamma0)); assign b_gamma_ss_d = aes_scale_omega2_gf2p2(aes_square_gf2p2(b_gamma1 ^ b_gamma0)); - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - a_gamma_ss_q <= '0; - b_gamma_ss_q <= '0; - end else if (we_i[0]) begin - a_gamma_ss_q <= a_gamma_ss_d; - b_gamma_ss_q <= b_gamma_ss_d; - end - end + prim_flop_en #( + .Width ( 4 ), + .ResetValue ( '0 ) + ) u_prim_flop_ab_gamma_ss ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i[0] ), + .d_i ( {a_gamma_ss_d, b_gamma_ss_d} ), + .q_o ( {a_gamma_ss_q, b_gamma_ss_q} ) + ); aes_dom_dep_mul_gf2pn #( .NPower ( 2 ), @@ -577,26 +628,32 @@ ///////////// // Formulas 14 and 15 in [2]. - (* keep = "true" *) logic [1:0] a_omega, b_omega; + logic [1:0] a_omega, b_omega; assign a_omega = aes_square_gf2p2(a_gamma1_gamma0 ^ a_gamma_ss_q); assign b_omega = aes_square_gf2p2(b_gamma1_gamma0 ^ b_gamma_ss_q); + // Avoid aggressive synthesis optimizations. + logic [1:0] a_omega_buf, b_omega_buf; + prim_buf #( + .Width ( 4 ) + ) u_prim_buf_ab_omega ( + .in_i ( {a_omega, b_omega} ), + .out_o ( {a_omega_buf, b_omega_buf} ) + ); + // Formulas 16 and 17 in [2]. - (* keep = "true" *) logic [1:0] a_gamma1_q, a_gamma0_q, b_gamma1_q, b_gamma0_q; - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - a_gamma1_q <= '0; - a_gamma0_q <= '0; - b_gamma1_q <= '0; - b_gamma0_q <= '0; - end else if (we_i[0]) begin - a_gamma1_q <= a_gamma1; - a_gamma0_q <= a_gamma0; - b_gamma1_q <= b_gamma1; - b_gamma0_q <= b_gamma0; - end - end + logic [1:0] a_gamma1_q, a_gamma0_q, b_gamma1_q, b_gamma0_q; + prim_flop_en #( + .Width ( 8 ), + .ResetValue ( '0 ) + ) u_prim_flop_ab_gamma10 ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i[0] ), + .d_i ( {a_gamma1, a_gamma0, b_gamma1, b_gamma0} ), + .q_o ( {a_gamma1_q, a_gamma0_q, b_gamma1_q, b_gamma0_q} ) + ); aes_dom_dep_mul_gf2pn #( .NPower ( 2 ), @@ -607,9 +664,9 @@ .rst_ni ( rst_ni ), .we_i ( we_i[1] ), .a_x ( a_gamma1_q ), // Share a of x - .a_y ( a_omega ), // Share a of y + .a_y ( a_omega_buf ), // Share a of y .b_x ( b_gamma1_q ), // Share b of x - .b_y ( b_omega ), // Share b of y + .b_y ( b_omega_buf ), // Share b of y .z_0 ( prd_3[5:4] ), // Randomness for blinding .z_1 ( prd_3[7:6] ), // Randomness for resharing .a_q ( a_gamma_inv[1:0] ), // Share a of q @@ -624,9 +681,9 @@ .clk_i ( clk_i ), .rst_ni ( rst_ni ), .we_i ( we_i[1] ), - .a_x ( a_omega ), // Share a of x + .a_x ( a_omega_buf ), // Share a of x .a_y ( a_gamma0_q ), // Share a of y - .b_x ( b_omega ), // Share b of x + .b_x ( b_omega_buf ), // Share b of x .b_y ( b_gamma0_q ), // Share b of y .z_0 ( prd_3[1:0] ), // Randomness for blinding .z_1 ( prd_3[3:2] ), // Randomness for resharing @@ -657,25 +714,25 @@ // Formula 12 in [2]. logic [3:0] a_y1, a_y0, b_y1, b_y0, a_y1_y0, b_y1_y0; - (* keep = "true" *) logic [3:0] a_gamma, b_gamma; assign a_y1 = a_y[7:4]; assign a_y0 = a_y[3:0]; assign b_y1 = b_y[7:4]; assign b_y0 = b_y[3:0]; logic [3:0] a_y_ss_d, b_y_ss_d; - (* keep = "true" *) logic [3:0] a_y_ss_q, b_y_ss_q; + logic [3:0] a_y_ss_q, b_y_ss_q; assign a_y_ss_d = aes_square_scale_gf2p4_gf2p2(a_y1 ^ a_y0); assign b_y_ss_d = aes_square_scale_gf2p4_gf2p2(b_y1 ^ b_y0); - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - a_y_ss_q <= '0; - b_y_ss_q <= '0; - end else if (we_i[0]) begin - a_y_ss_q <= a_y_ss_d; - b_y_ss_q <= b_y_ss_d; - end - end + prim_flop_en #( + .Width ( 8 ), + .ResetValue ( '0 ) + ) u_prim_flop_ab_y_ss ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i[0] ), + .d_i ( {a_y_ss_d, b_y_ss_d} ), + .q_o ( {a_y_ss_q, b_y_ss_q} ) + ); aes_dom_dep_mul_gf2pn #( .NPower ( 4 ), @@ -695,9 +752,19 @@ .b_q ( b_y1_y0 ) // Share b of q ); + logic [3:0] a_gamma, b_gamma; assign a_gamma = a_y_ss_q ^ a_y1_y0; assign b_gamma = b_y_ss_q ^ b_y1_y0; + // Avoid aggressive synthesis optimizations. + logic [3:0] a_gamma_buf, b_gamma_buf; + prim_buf #( + .Width ( 8 ) + ) u_prim_buf_ab_gamma ( + .in_i ( {a_gamma, b_gamma} ), + .out_o ( {a_gamma_buf, b_gamma_buf} ) + ); + //////////////////// // Stages 2 and 3 // //////////////////// @@ -706,15 +773,15 @@ // a_gamma is masked by b_gamma, a_gamma_inv is masked by b_gamma_inv. aes_dom_inverse_gf2p4 u_aes_dom_inverse_gf2p4 ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .we_i ( we_i[2:1] ), - .a_gamma ( a_gamma ), - .b_gamma ( b_gamma ), - .prd_2 ( prd.prd_2 ), - .prd_3 ( prd.prd_3 ), - .a_gamma_inv ( a_theta ), - .b_gamma_inv ( b_theta ) + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .we_i ( we_i[2:1] ), + .a_gamma ( a_gamma_buf ), + .b_gamma ( b_gamma_buf ), + .prd_2 ( prd.prd_2 ), + .prd_3 ( prd.prd_3 ), + .a_gamma_inv ( a_theta ), + .b_gamma_inv ( b_theta ) ); ///////////// @@ -722,20 +789,17 @@ ///////////// // Formulas 18 and 19 in [2]. - (* keep = "true" *) logic [3:0] a_y1_q, a_y0_q, b_y1_q, b_y0_q; - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin - a_y1_q <= '0; - a_y0_q <= '0; - b_y1_q <= '0; - b_y0_q <= '0; - end else if (we_i[2]) begin - a_y1_q <= a_y1; - a_y0_q <= a_y0; - b_y1_q <= b_y1; - b_y0_q <= b_y0; - end - end + logic [3:0] a_y1_q, a_y0_q, b_y1_q, b_y0_q; + prim_flop_en #( + .Width ( 16 ), + .ResetValue ( '0 ) + ) u_prim_flop_ab_y10 ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( we_i[2] ), + .d_i ( {a_y1, a_y0, b_y1, b_y0} ), + .q_o ( {a_y1_q, a_y0_q, b_y1_q, b_y0_q} ) + ); aes_dom_indep_mul_gf2pn #( .NPower ( 4 ),