| // Copyright lowRISC contributors. |
| // Licensed under the Apache License, Version 2.0, see LICENSE for details. |
| // SPDX-License-Identifier: Apache-2.0 |
| // |
| // This module is the single round keccak permutation module |
| // It supports Keccak with up to 1600b of state |
| |
| `include "prim_assert.sv" |
| |
| module keccak_2share |
| import prim_mubi_pkg::*; |
| #( |
| parameter int Width = 1600, // b= {25, 50, 100, 200, 400, 800, 1600} |
| |
| // Derived |
| localparam int W = Width/25, |
| localparam int L = $clog2(W), |
| localparam int MaxRound = 12 + 2*L, // Keccak-f only |
| localparam int RndW = $clog2(MaxRound+1), // Representing up to MaxRound |
| |
| // Control parameters |
| parameter bit EnMasking = 0, // Enable secure hardening |
| localparam int Share = EnMasking ? 2 : 1 |
| ) ( |
| input clk_i, |
| input rst_ni, |
| |
| input lc_ctrl_pkg::lc_tx_t lc_escalate_en_i, // Used to disable SVAs when escalating. |
| |
| input [RndW-1:0] rnd_i, // Current round index |
| input mubi4_t phase_sel_i, // Output mux contol. Used when EnMasking := 1 |
| input [1:0] cycle_i, // Current cycle index. Used when EnMasking := 1 |
| input rand_aux_i, // Auxiliary randomness input. Used when EnMasking := 1 |
| input [Width/2-1:0] rand_i, // Randomness for remasking. Used when EnMasking := 1 |
| input [Width-1:0] s_i [Share], |
| output logic [Width-1:0] s_o [Share] |
| ); |
| /////////// |
| // Types // |
| /////////// |
| // x y z |
| typedef logic [4:0][4:0][W-1:0] box_t; // (x,y,z) state |
| typedef logic [W-1:0] lane_t; // (z) |
| typedef logic [4:0] [W-1:0] plane_t; // (x,z) |
| typedef logic [4:0][4:0] slice_t; // (x,y) |
| typedef logic [4:0][W-1:0] sheet_t; // (y,z) identical to plane_t |
| typedef logic [4:0] row_t; // (x) |
| typedef logic [4:0] col_t; // (y) identical to row_t |
| |
| ////////////// |
| // Keccak_f // |
| ////////////// |
| box_t state_in [Share]; |
| box_t state_out [Share]; |
| box_t theta_data [Share]; |
| box_t rho_data [Share]; |
| box_t pi_data [Share]; |
| box_t chi_data [Share]; |
| box_t iota_data [Share]; |
| |
| box_t phase1_in [Share]; |
| box_t phase1_out [Share]; |
| box_t phase2_in [Share]; |
| box_t phase2_out [Share]; |
| |
| ///////////////// |
| // Unused nets // |
| ///////////////// |
| // Tie off input signals that aren't used in the unmasked implementation. |
| if (!EnMasking) begin : gen_tie_unused |
| logic unused_clk; |
| logic unused_rst_n; |
| mubi4_t unused_phase_sel; |
| logic [1:0] unused_cycle; |
| logic unused_rand_aux; |
| logic [Width/2-1:0] unused_rand; |
| assign unused_clk = clk_i; |
| assign unused_rst_n = rst_ni; |
| assign unused_phase_sel = phase_sel_i; |
| assign unused_cycle = cycle_i; |
| assign unused_rand_aux = rand_aux_i; |
| assign unused_rand = rand_i; |
| end |
| |
| ////////////////////////////////////////////////// |
| // Input/output type conversion and interfacing // |
| ////////////////////////////////////////////////// |
| for (genvar i = 0 ; i < Share ; i++) begin : g_state_inout |
| assign state_in[i] = bitarray_to_box(s_i[i]); |
| assign s_o[i] = box_to_bitarray(state_out[i]); |
| end : g_state_inout |
| |
| if (EnMasking) begin : g_2share_data |
| assign phase1_in = state_in; |
| assign phase2_in = state_in; |
| |
| always_comb begin |
| unique case (phase_sel_i) |
| MuBi4False: state_out = phase1_out; |
| MuBi4True: state_out = phase2_out; |
| default: state_out = phase1_out; |
| endcase |
| end |
| end else begin : g_single_data |
| assign phase1_in = state_in; |
| assign phase2_in = phase1_out; |
| assign state_out = phase2_out; |
| end |
| |
| ////////////// |
| // Datapath // |
| ////////////// |
| // This module has two phases. First phase, it calculates Theta, Rho, Pi steps |
| // in SHA3. At the second phase, it computes Chi and Iota steps. If masking is |
| // not enabled, the two phases are completed within a single clock cycle. |
| // |
| // If masking is enabled, the first phase (Phase1) completes in one cycle. |
| // Then, the output should be stored in the state and given to the input of |
| // this module again. The second phase in the masked version needs three |
| // clock cycles to complete. In the first clock cycle, the first stage of Chi |
| // is computed for the first lane halves. In the second clock cycle, the |
| // module then outputs the updated first lane halves. In the third clock |
| // cycle, the new second lane halves are output. To aggravate SCA, we |
| // randomly decide which lane halves to process first on a per-round basis. |
| // We use additional randomness generated by the PRNG to take this decision |
| // (rand_aux_i). For more details, refer to the comments in the "MUX control" |
| // section below. |
| |
| for (genvar i = 0 ; i < Share ; i++) begin : g_datapath |
| |
| // Phase 1: |
| assign theta_data[i] = theta(phase1_in[i]); |
| // Commented out rho function as vcs complains z-Offset%W isn't constant |
| // assign rho_data[i] = rho(theta_data[i]); |
| |
| assign pi_data[i] = pi(rho_data[i]); |
| |
| // Phase 2 (Cycles 1, 2 and 3): |
| // Chi : See below |
| // Iota: See below |
| end : g_datapath |
| |
| assign phase1_out = pi_data; |
| |
| // Iota adds Round Constants(RC), so only one share should be XORed |
| if (EnMasking) begin : g_2share_iota |
| assign iota_data[0] = iota(chi_data[0], rnd_i); |
| assign iota_data[1] = chi_data[1]; |
| end else begin : g_single_iota |
| assign iota_data[0] = iota(chi_data[0], rnd_i); |
| end |
| |
| if (EnMasking) begin : g_2share_chi |
| // Domain-Oriented Masking |
| // reference: https://eprint.iacr.org/2017/395.pdf |
| |
| localparam int unsigned WSheetHalf = $bits(sheet_t)/2; |
| logic [4:0][WSheetHalf-1:0] in_prd, out_prd; |
| |
| logic low_then_high_d, low_then_high_q; |
| logic in_data_low, out_data_low; |
| logic in_rand_ext; |
| logic update_dom; |
| |
| ///////////////// |
| // MUX control // |
| ///////////////// |
| |
| // Update lane-half processing order in Phase 1 and keep the value constant |
| // for the entire round. |
| assign low_then_high_d = |
| mubi4_test_false_strict(phase_sel_i) ? rand_aux_i : low_then_high_q; |
| |
| always_ff @(posedge clk_i or negedge rst_ni) begin |
| if (!rst_ni) begin |
| low_then_high_q <= 1'b 0; |
| end else begin |
| low_then_high_q <= low_then_high_d; |
| end |
| end |
| |
| // This implementation uses both randomness provided from an external PRNG |
| // as well as intermediate results for remasking the DOM multipliers below. |
| // Per clock cycle, 800b of pseudo-random data (PRD) are required. The |
| // following schedule is used to only ever update the input data when also |
| // providing fresh randomness and vice versa. |
| // |
| // Cycle 0: Compute Theta, Rho, Pi - The DOM multipliers are not evaluated |
| // at all: the inputs are driven by the first lane halves (same |
| // values as in Cycle 3). Also the intermediate results we already |
| // had in Cycle 3 didn't change. |
| // Cycle 1: Compute first stage of Chi for first lane halves using the DOM |
| // multipliers. We use the fresh randomness provided from the |
| // PRNG for remasking. |
| // Cycle 2: Compute second stage of Chi and Iota for first lane halves. |
| // Compute first stage of Chi for second lane halves. We use the |
| // fresh randomness provided from the PRNG for remasking the |
| // DOM multipliers. |
| // Cycle 3: Compute second stage of Chi and Iota for second lane halves. |
| // Feed again first lane halves to DOM multiplier inputs (now |
| // the updated values become visible) together with intermediate |
| // results of Cycle 2. Don't update the register stage inside |
| // the DOM multipliers. |
| always_comb begin |
| unique case (cycle_i) |
| 2'h0: begin |
| in_data_low = low_then_high_q; |
| in_rand_ext = 1'b0; |
| update_dom = 1'b0; |
| end |
| 2'h1: begin |
| in_data_low = low_then_high_q; |
| in_rand_ext = 1'b1; |
| update_dom = 1'b1; |
| end |
| 2'h2: begin |
| in_data_low = ~low_then_high_q; |
| in_rand_ext = 1'b1; |
| update_dom = 1'b1; |
| end |
| 2'h3: begin |
| in_data_low = low_then_high_q; |
| in_rand_ext = 1'b0; |
| update_dom = 1'b0; |
| end |
| default: begin |
| in_data_low = low_then_high_q; |
| in_rand_ext = 1'b0; |
| update_dom = 1'b0; |
| end |
| endcase |
| end |
| |
| // When taking the lower lane halves in, the upper lane halves are output |
| // and vice versa. |
| assign out_data_low = ~in_data_low; |
| |
| ///////////////////// |
| // DOM multipliers // |
| ///////////////////// |
| |
| for (genvar x = 0 ; x < 5 ; x++) begin : g_chi_w |
| localparam int X1 = (x + 1) % 5; |
| localparam int X2 = (x + 2) % 5; |
| |
| sheet_t sheet0[Share]; // Inverted input X1 |
| sheet_t sheet1[Share]; // X2 |
| sheet_t sheet2[Share]; // DOM output |
| |
| assign sheet0[0] = ~phase2_in[0][X1]; |
| assign sheet0[1] = phase2_in[1][X1]; |
| |
| assign sheet1[0] = phase2_in[0][X2]; |
| assign sheet1[1] = phase2_in[1][X2]; |
| |
| // Convert sheet_t to 1D arrays, one for the upper and lower half lane. |
| logic [WSheetHalf-1:0] a0_l, a1_l, b0_l, b1_l; |
| logic [WSheetHalf-1:0] a0_h, a1_h, b0_h, b1_h; |
| logic [WSheetHalf-1:0] a0, a1, b0, b1, q0, q1; |
| |
| assign a0_l = {sheet0[0][0][W/2-1:0], |
| sheet0[0][1][W/2-1:0], |
| sheet0[0][2][W/2-1:0], |
| sheet0[0][3][W/2-1:0], |
| sheet0[0][4][W/2-1:0]}; |
| assign a1_l = {sheet0[1][0][W/2-1:0], |
| sheet0[1][1][W/2-1:0], |
| sheet0[1][2][W/2-1:0], |
| sheet0[1][3][W/2-1:0], |
| sheet0[1][4][W/2-1:0]}; |
| |
| assign a0_h = {sheet0[0][0][W-1:W/2], |
| sheet0[0][1][W-1:W/2], |
| sheet0[0][2][W-1:W/2], |
| sheet0[0][3][W-1:W/2], |
| sheet0[0][4][W-1:W/2]}; |
| assign a1_h = {sheet0[1][0][W-1:W/2], |
| sheet0[1][1][W-1:W/2], |
| sheet0[1][2][W-1:W/2], |
| sheet0[1][3][W-1:W/2], |
| sheet0[1][4][W-1:W/2]}; |
| |
| assign b0_l = {sheet1[0][0][W/2-1:0], |
| sheet1[0][1][W/2-1:0], |
| sheet1[0][2][W/2-1:0], |
| sheet1[0][3][W/2-1:0], |
| sheet1[0][4][W/2-1:0]}; |
| assign b1_l = {sheet1[1][0][W/2-1:0], |
| sheet1[1][1][W/2-1:0], |
| sheet1[1][2][W/2-1:0], |
| sheet1[1][3][W/2-1:0], |
| sheet1[1][4][W/2-1:0]}; |
| |
| assign b0_h = {sheet1[0][0][W-1:W/2], |
| sheet1[0][1][W-1:W/2], |
| sheet1[0][2][W-1:W/2], |
| sheet1[0][3][W-1:W/2], |
| sheet1[0][4][W-1:W/2]}; |
| assign b1_h = {sheet1[1][0][W-1:W/2], |
| sheet1[1][1][W-1:W/2], |
| sheet1[1][2][W-1:W/2], |
| sheet1[1][3][W-1:W/2], |
| sheet1[1][4][W-1:W/2]}; |
| |
| // Input muxing |
| assign a0 = in_data_low ? a0_l : a0_h; |
| assign a1 = in_data_low ? a1_l : a1_h; |
| assign b0 = in_data_low ? b0_l : b0_h; |
| assign b1 = in_data_low ? b1_l : b1_h; |
| |
| // Randomness muxing |
| // Intermediate results are rotated across rows. The new Row x depends on |
| // data from Rows x + 1 and x + 2. Hence we don't want to use intermediate |
| // results from Rows x, x + 1, and x + 2 for remasking. |
| assign in_prd[x] = in_rand_ext ? rand_i[x * WSheetHalf +: WSheetHalf] : |
| out_prd[rot_int(x, 5)]; |
| |
| prim_dom_and_2share #( |
| .DW (WSheetHalf), // a half sheet |
| .Pipeline(1) // Process the full sheet in 3 clock cycles. This reduces |
| // SCA leakage. |
| ) u_dom ( |
| .clk_i, |
| .rst_ni, |
| |
| .a0_i (a0), |
| .a1_i (a1), |
| .b0_i (b0), |
| .b1_i (b1), |
| .z_valid_i (update_dom), |
| .z_i (in_prd[x]), |
| .q0_o (q0), |
| .q1_o (q1), |
| .prd_o (out_prd[x]) |
| ); |
| |
| // Output conversion from q0, q1 to sheet_t |
| // For simplicity, we forward the generated lane half to both the upper |
| // and lower lane halves at this point. The actual output muxing/selection |
| // happens after the Iota step when generating phase2_out from iota_data |
| // and state_in below. |
| assign sheet2[0][4] = {2{q0[W/2*0+:W/2]}}; |
| assign sheet2[0][3] = {2{q0[W/2*1+:W/2]}}; |
| assign sheet2[0][2] = {2{q0[W/2*2+:W/2]}}; |
| assign sheet2[0][1] = {2{q0[W/2*3+:W/2]}}; |
| assign sheet2[0][0] = {2{q0[W/2*4+:W/2]}}; |
| |
| assign sheet2[1][4] = {2{q1[W/2*0+:W/2]}}; |
| assign sheet2[1][3] = {2{q1[W/2*1+:W/2]}}; |
| assign sheet2[1][2] = {2{q1[W/2*2+:W/2]}}; |
| assign sheet2[1][1] = {2{q1[W/2*3+:W/2]}}; |
| assign sheet2[1][0] = {2{q1[W/2*4+:W/2]}}; |
| |
| // Final XOR to generate the output |
| assign chi_data[0][x] = sheet2[0] ^ phase2_in[0][x]; |
| assign chi_data[1][x] = sheet2[1] ^ phase2_in[1][x]; |
| end : g_chi_w |
| |
| // Since Chi and thus Iota are separately applied to the lower and upper half |
| // lanes, we need to forward the input to the other half. |
| for (genvar x = 0 ; x < 5 ; x++) begin : g_2share_phase2_out_row |
| for (genvar y = 0 ; y < 5 ; y++) begin : g_2share_phase2_out_col |
| assign phase2_out[0][x][y] = out_data_low ? |
| { state_in[0][x][y][W-1:W/2], iota_data[0][x][y][W/2-1:0]} : |
| {iota_data[0][x][y][W-1:W/2], state_in[0][x][y][W/2-1:0]}; |
| assign phase2_out[1][x][y] = out_data_low ? |
| { state_in[1][x][y][W-1:W/2], iota_data[1][x][y][W/2-1:0]} : |
| {iota_data[1][x][y][W-1:W/2], state_in[1][x][y][W/2-1:0]}; |
| end |
| end |
| |
| end else begin : g_single_chi |
| assign chi_data[0] = chi(phase2_in[0]); |
| assign phase2_out = iota_data; |
| end |
| |
| // Rho ====================================================================== |
| // As RhoOffset[x][y] is considered as variable int in VCS, |
| // it is replaced with generate statement. |
| // Revised to meet verilator lint. Now RhoOffset is 1-D array |
| localparam int RhoOffset [25] = '{ |
| //y 0 1 2 3 4 x |
| 0, 36, 3, 105, 210, // 0: 0 1 2 3 4 |
| 1, 300, 10, 45, 66, // 1: 5 6 7 8 9 |
| 190, 6, 171, 15, 253, // 2: 10 11 12 13 14 |
| 28, 55, 153, 21, 120, // 3: 15 16 17 18 19 |
| 91, 276, 231, 136, 78 // 4: 20 21 22 23 24 |
| }; |
| for (genvar i = 0 ; i < Share ; i++) begin : g_rho |
| box_t rho_in, rho_out; |
| assign rho_in = theta_data[i]; |
| assign rho_data[i] = rho_out; |
| |
| for (genvar x = 0 ; x < 5 ; x++) begin : gen_rho_x |
| for (genvar y = 0 ; y < 5 ; y++) begin : gen_rho_y |
| localparam int Offset = RhoOffset[5*x+y]%W; |
| localparam int ShiftAmt = W- Offset; |
| if (Offset == 0) begin : gen_offset0 |
| assign rho_out[x][y][W-1:0] = rho_in[x][y][W-1:0]; |
| end else begin : gen_others |
| assign rho_out[x][y][W-1:0] = {rho_in[x][y][0+:ShiftAmt], |
| rho_in[x][y][ShiftAmt+:Offset]}; |
| end |
| end |
| end |
| end : g_rho |
| |
| //////////////// |
| // Assertions // |
| //////////////// |
| |
| `ASSERT_INIT(ValidWidth_A, |
| EnMasking == 0 && Width inside {25, 50, 100, 200, 400, 800, 1600} || |
| EnMasking == 1 && Width inside {50, 100, 200, 400, 800, 1600}) |
| `ASSERT_INIT(ValidW_A, W inside {1, 2, 4, 8, 16, 32, 64}) |
| `ASSERT_INIT(ValidL_A, L inside {0, 1, 2, 3, 4, 5, 6}) |
| `ASSERT_INIT(ValidRound_A, MaxRound <= 24) // Keccak-f only |
| |
| // phase_sel_i shall stay for two cycle after change to 1. |
| lc_ctrl_pkg::lc_tx_t unused_lc_sig; |
| assign unused_lc_sig = lc_escalate_en_i; |
| if (EnMasking) begin : gen_selperiod_chk |
| `ASSUME(SelStayTwoCycleIfTrue_A, |
| ($past(phase_sel_i) == MuBi4False) && (phase_sel_i == MuBi4True) |
| |=> phase_sel_i == MuBi4True, clk_i, !rst_ni || lc_escalate_en_i != lc_ctrl_pkg::Off) |
| end |
| |
| /////////////// |
| // Functions // |
| /////////////// |
| |
| // Convert bitarray to 3D box |
| // Please take a look at FIPS PUB 202 |
| // https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf |
| // > For all triples (x,y,z) such that 0<=x<5, 0<=y<5, and 0<=z<w, |
| // > A[x,y,z]=S[w(5y+x)+z] |
| function automatic box_t bitarray_to_box(logic [Width-1:0] s_in); |
| automatic box_t box; |
| for (int y = 0 ; y < 5 ; y++) begin |
| for (int x = 0 ; x < 5 ; x++) begin |
| for (int z = 0 ; z < W ; z++) begin |
| box[x][y][z] = s_in[W*(5*y+x) + z]; |
| end |
| end |
| end |
| return box; |
| endfunction : bitarray_to_box |
| |
| // Convert 3D cube to bitarray |
| function automatic logic [Width-1:0] box_to_bitarray(box_t state); |
| automatic logic [Width-1:0] bitarray; |
| for (int y = 0 ; y < 5 ; y++) begin |
| for (int x = 0 ; x < 5 ; x++) begin |
| for (int z = 0 ; z < W ; z++) begin |
| bitarray[W*(5*y+x)+z] = state[x][y][z]; |
| end |
| end |
| end |
| return bitarray; |
| endfunction : box_to_bitarray |
| |
| // Rotate integer indices |
| function automatic integer rot_int(integer in, integer num); |
| integer out; |
| if (in == 0) begin |
| out = num - 1; |
| end else begin |
| out = in - 1; |
| end |
| return out; |
| endfunction |
| |
| // Step Mapping ============================================================= |
| // theta |
| // XOR each bit in the state with the parity of two columns |
| // C[x,z] = A[x,0,z] ^ A[x,1,z] ^ A[x,2,z] ^ A[x,3,z] ^ A[x,4,z] |
| // D[x,z] = C[x-1,z] ^ C[x+1,z-1] |
| // theta = A[x,y,z] ^ D[x,z] |
| parameter int ThetaIndexX1 [5] = '{4, 0, 1, 2, 3}; // (x-1)%5 |
| parameter int ThetaIndexX2 [5] = '{1, 2, 3, 4, 0}; // (x+1)%5 |
| function automatic box_t theta(box_t state); |
| plane_t c; |
| plane_t d; |
| box_t result; |
| for (int x = 0 ; x < 5 ; x++) begin |
| c[x] = state[x][0] ^ state[x][1] ^ state[x][2] ^ state[x][3] ^ state[x][4]; |
| end |
| for (int x = 0 ; x < 5 ; x++) begin |
| for (int z = 0 ; z < W ; z++) begin |
| int index_z; |
| index_z = (z == 0) ? W-1 : z-1; // (z+1)%W |
| d[x][z] = c[ThetaIndexX1[x]][z] ^ c[ThetaIndexX2[x]][index_z]; |
| end |
| end |
| for (int x = 0 ; x < 5 ; x++) begin |
| for (int y = 0 ; y < 5 ; y++) begin |
| result[x][y] = state[x][y] ^ d[x]; |
| end |
| end |
| return result; |
| endfunction : theta |
| |
| // rho |
| |
| // Commented out entire rho function due to VCS elaboration error. |
| // (z-RhoOffset[x][y]%W) isn't considered as a constant in VCS. |
| // Even changing it to W-RhoOffset[x][y]%W and assign to ShiftAmt |
| // creates same error. |
| |
| // Offset : Look at Table 2 in FIPS PUB 202 |
| //localparam int RhoOffset [5][5] = '{ |
| // //y 0 1 2 3 4 x |
| // '{ 0, 36, 3, 105, 210},// 0 |
| // '{ 1, 300, 10, 45, 66},// 1 |
| // '{ 190, 6, 171, 15, 253},// 2 |
| // '{ 28, 55, 153, 21, 120},// 3 |
| // '{ 91, 276, 231, 136, 78} // 4 |
| //}; |
| |
| // rotate bits of each lane by offset |
| // 1. rho[0,0,z] = A[0,0,z] |
| // 2. Offset swap |
| // a. (x,y) := (1,0) |
| // b. for t [0..23] |
| // i. rho[x,y,z] = A[x,y,z-(t+1)(t+2)/2] |
| // ii. (x,y) = (y, (2x+3y)) |
| //function automatic box_t rho(box_t state); |
| // box_t result; |
| // for (int x = 0 ; x < 5 ; x++) begin |
| // for (int y = 0 ; y < 5 ; y++) begin |
| // for (int z = 0 ; z < W ; z++) begin |
| // automatic int index_z; |
| // index_z = (z-RhoOffset[x][y])%W; |
| // result[x][y][z] = state[x][y][(z-RhoOffset[x][y])%W]; |
| // end |
| // end |
| // end |
| // return result; |
| //endfunction : rho |
| |
| // pi |
| // rearrange the position of lanes |
| // pi[x,y,z] = state[(x+3y),x,z] |
| localparam int PiRotate [5][5] = '{ |
| //y 0 1 2 3 4 x |
| '{ 0, 3, 1, 4, 2},// 0 |
| '{ 1, 4, 2, 0, 3},// 1 |
| '{ 2, 0, 3, 1, 4},// 2 |
| '{ 3, 1, 4, 2, 0},// 3 |
| '{ 4, 2, 0, 3, 1} // 4 |
| }; |
| function automatic box_t pi(box_t state); |
| box_t result; |
| for (int x = 0 ; x < 5 ; x++) begin |
| for (int y = 0 ; y < 5 ; y++) begin |
| result[x][y][W-1:0] = state[PiRotate[x][y]][x][W-1:0]; |
| end |
| end |
| return result; |
| endfunction : pi |
| |
| // chi |
| // chi[x,y,z] = state[x,y,z] ^ ((state[x+1,y,z] ^ 1) & state[x+2,y,z]) |
| parameter int ChiIndexX1 [5] = '{1, 2, 3, 4, 0}; // (x+1)%5 |
| parameter int ChiIndexX2 [5] = '{2, 3, 4, 0, 1}; // (x+2)%5 |
| function automatic box_t chi(box_t state); |
| box_t result; |
| for (int x = 0 ; x < 5 ; x++) begin |
| result[x] = state[x] ^ ((~state[ChiIndexX1[x]]) & state[ChiIndexX2[x]]); |
| end |
| return result; |
| endfunction : chi |
| |
| // iota |
| // XOR (x,y) = (0,0) with Round Constant (RC) |
| |
| // RC parameter: Precomputed by util/keccak_rc.py. Only up-to 0..L-1 is used |
| // RC = '0 |
| // RC[2**j-1] = rc(j+7*rnd) |
| // rc(t) = |
| // 1. t%255 == 0 -> 1 |
| // 2. R[0:7] = 'b10000000 |
| // 3. for i = [1..t%255] |
| // a. R = 0 || R |
| // b. R[0] = R[0] ^ R[8] |
| // c. R[4] = R[4] ^ R[8] |
| // d. R[5] = R[5] ^ R[8] |
| // e. R[6] = R[6] ^ R[8] |
| // f. R = R[0:7] |
| // 4. return R[0] |
| // RC has L = [0..6] |
| // for lower L case, only chopping lower part of 64bit RC is sufficient. |
| localparam logic [63:0] RC [24] = '{ |
| 64'h 0000_0000_0000_0001, // Round 0 |
| 64'h 0000_0000_0000_8082, // Round 1 |
| 64'h 8000_0000_0000_808A, // Round 2 |
| 64'h 8000_0000_8000_8000, // Round 3 |
| 64'h 0000_0000_0000_808B, // Round 4 |
| 64'h 0000_0000_8000_0001, // Round 5 |
| 64'h 8000_0000_8000_8081, // Round 6 |
| 64'h 8000_0000_0000_8009, // Round 7 |
| 64'h 0000_0000_0000_008A, // Round 8 |
| 64'h 0000_0000_0000_0088, // Round 9 |
| 64'h 0000_0000_8000_8009, // Round 10 |
| 64'h 0000_0000_8000_000A, // Round 11 |
| 64'h 0000_0000_8000_808B, // Round 12 |
| 64'h 8000_0000_0000_008B, // Round 13 |
| 64'h 8000_0000_0000_8089, // Round 14 |
| 64'h 8000_0000_0000_8003, // Round 15 |
| 64'h 8000_0000_0000_8002, // Round 16 |
| 64'h 8000_0000_0000_0080, // Round 17 |
| 64'h 0000_0000_0000_800A, // Round 18 |
| 64'h 8000_0000_8000_000A, // Round 19 |
| 64'h 8000_0000_8000_8081, // Round 20 |
| 64'h 8000_0000_0000_8080, // Round 21 |
| 64'h 0000_0000_8000_0001, // Round 22 |
| 64'h 8000_0000_8000_8008 // Round 23 |
| }; |
| |
| // iota: XOR with RC for (x,y) = (0,0) |
| function automatic box_t iota(box_t state, logic [RndW-1:0] rnd); |
| box_t result; |
| result = state; |
| result[0][0][W-1:0] = state[0][0][W-1:0] ^ RC[rnd][W-1:0]; |
| |
| return result; |
| endfunction : iota |
| |
| // Round function : Rnd(A,i_r) |
| // Not used due to rho function issue described above. |
| |
| //function automatic box_t keccak_rnd(box_t state, logic [RndW-1:0] rnd); |
| // box_t keccak_state; |
| // keccak_state = iota(chi(pi(rho(theta(state)))), rnd); |
| // |
| // return keccak_state; |
| //endfunction : keccak_rnd |
| |
| endmodule |