blob: d6c5f0b2ed7516d6c2dbfc423ab71d98f4a742f7 [file] [log] [blame]
Eunchan Kim0210f992020-08-13 18:02:44 -07001// Copyright lowRISC contributors.
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4//
5// This module is the single round keccak permutation module
6// It supports Keccak with up to 1600b of state
7// Only when EnMasking is enabled, rand_i and sel_i are used
8`include "prim_assert.sv"
9module keccak_2share #(
10 parameter int Width = 1600, // b= {25, 50, 100, 200, 400, 800, 1600}
11
12 // Derived
13 localparam int W = Width/25,
14 localparam int L = $clog2(W),
Eunchan Kim0c6cacf2020-08-19 10:13:33 -070015 localparam int MaxRound = 12 + 2*L, // Keccak-f only
16 localparam int RndW = $clog2(MaxRound+1), // Representing up to MaxRound
Eunchan Kim0210f992020-08-13 18:02:44 -070017
18 // Control parameters
Eunchan Kimde5ecb92020-12-29 12:58:33 -080019 parameter bit EnMasking = 0, // Enable secure hardening
Eunchan Kim0210f992020-08-13 18:02:44 -070020 localparam int Share = EnMasking ? 2 : 1
21) (
22 input clk_i,
23 input rst_ni,
24
25 input [RndW-1:0] rnd_i, // Current Round
Eunchan Kim0c6cacf2020-08-19 10:13:33 -070026 input rand_valid_i,
Eunchan Kim0210f992020-08-13 18:02:44 -070027 input [Width-1:0] rand_i, // Random values. Used when 2Share enabled
28 input sel_i, // Select input/output mux. Used when EnMasking := 1
29 input [Width-1:0] s_i [Share],
30 output logic [Width-1:0] s_o [Share]
31);
32 ///////////
33 // Types //
34 ///////////
35 // x y z
36 typedef logic [4:0][4:0][W-1:0] box_t; // (x,y,z) state
37 typedef logic [W-1:0] lane_t; // (z)
38 typedef logic [4:0] [W-1:0] plane_t; // (x,z)
39 typedef logic [4:0][4:0] slice_t; // (x,y)
40 typedef logic [4:0][W-1:0] sheet_t; // (y,z) identical to plane_t
41 typedef logic [4:0] row_t; // (x)
42 typedef logic [4:0] col_t; // (y) identical to row_t
43
44 //////////////
45 // Keccak_f //
46 //////////////
47 box_t state_in [Share];
48 box_t state_out [Share];
49 box_t theta_data [Share];
50 box_t rho_data [Share];
51 box_t pi_data [Share];
52 box_t chi_data [Share];
53 box_t iota_data [Share];
54
55 box_t phase1_in [Share];
56 box_t phase1_out [Share];
57 box_t phase2_in [Share];
58 box_t phase2_out [Share];
59
Eunchan Kimf2fe3512020-10-13 16:33:59 -070060 /////////////////
61 // Unused nets //
62 /////////////////
63 // clk_i, rst_ni, rand_valid_i are not used when EnMasking is 0. Tying them.
Eunchan Kimde5ecb92020-12-29 12:58:33 -080064 if (!EnMasking) begin : gen_tie_unused
Eunchan Kimf2fe3512020-10-13 16:33:59 -070065 logic unused_clk, unused_rst_n, unused_rand_valid;
Eunchan Kim155579b2020-12-01 11:07:51 -080066 logic [Width-1:0] unused_rand_data;
67 logic unused_sel;
Eunchan Kimf2fe3512020-10-13 16:33:59 -070068 assign unused_clk = clk_i;
69 assign unused_rst_n = rst_ni;
70 assign unused_rand_valid = rand_valid_i;
Eunchan Kim155579b2020-12-01 11:07:51 -080071 assign unused_rand_data = rand_i;
72 assign unused_sel = sel_i;
Eunchan Kimf2fe3512020-10-13 16:33:59 -070073 end
Eunchan Kim0210f992020-08-13 18:02:44 -070074
75 ///////////////////////
76 // Input/ Output Mux //
77 ///////////////////////
78 // This module has two phases. First phase, it calculates Theta, Rho, Pi steps
79 // in SHA3. At the second phase, it computes Chi and Iota steps.
80 // If masking is not enabled, two phases are completed within a cycle.
81 //
82 // If masking is enabled, first phase completes in a cycle. Then, the output
83 // has to be latched. Then the output should be stored in the state and given
84 // to the input of this module again.
85 //
86 // The second phases in masked version needs two cycles to complete.
87 // For two cycles, the input value `s_i` shall stay same. The output value is
88 // correct only at the second cycle.
89 for (genvar i = 0 ; i < Share ; i++) begin : g_state_inout
90 assign state_in[i] = bitarray_to_box(s_i[i]);
91 assign s_o[i] = box_to_bitarray(state_out[i]);
92 end : g_state_inout
93
Eunchan Kimde5ecb92020-12-29 12:58:33 -080094 if (EnMasking) begin : g_2share_data
Eunchan Kim0210f992020-08-13 18:02:44 -070095 assign phase1_in = (sel_i == 1'b 0) ? state_in : '{default:'0};
96 assign phase2_in = (sel_i == 1'b 1) ? state_in : '{default:'0};
97
98 always_comb begin
99 unique case (sel_i)
100 1'b 0: state_out = phase1_out;
101 1'b 1: state_out = phase2_out;
102 default: state_out = '{default: '0};
103 endcase
104 end
105 end else begin : g_single_data
Eunchan Kim0210f992020-08-13 18:02:44 -0700106 assign phase1_in = state_in;
107 assign phase2_in = phase1_out;
108 assign state_out = phase2_out;
109 end
110
111 assign phase1_out = pi_data;
112 assign phase2_out = iota_data;
113
114
115 for (genvar i = 0 ; i < Share ; i++) begin : g_datapath
116
117 // Phase 1:
118 assign theta_data[i] = theta(phase1_in[i]);
119 // Commented out rho function as vcs complains z-Offset%W isn't constant
120 // assign rho_data[i] = rho(theta_data[i]);
121
122 assign pi_data[i] = pi(rho_data[i]);
123
124 // Phase 2:
125 // Chi : See below
126 // Iota: See below
127
128 end : g_datapath
129
130 // Iota adds Round Constants(RC), so only one share should be XORed
Eunchan Kimde5ecb92020-12-29 12:58:33 -0800131 if (EnMasking) begin : g_2share_iota
Eunchan Kim0210f992020-08-13 18:02:44 -0700132 assign iota_data[0] = iota(chi_data[0], rnd_i);
133 assign iota_data[1] = chi_data[1];
134 end else begin : g_single_iota
135 assign iota_data[0] = iota(chi_data[0], rnd_i);
136 end
137
Eunchan Kimde5ecb92020-12-29 12:58:33 -0800138 if (EnMasking) begin : g_2share_chi
Eunchan Kim0210f992020-08-13 18:02:44 -0700139 // Domain-Oriented Masking
140 // reference: https://eprint.iacr.org/2017/395.pdf
141
142 for (genvar x = 0 ; x < 5 ; x++) begin : g_chi_w
143 localparam int X1 = (x + 1) % 5;
144 localparam int X2 = (x + 2) % 5;
145
146 sheet_t sheet0[Share]; // Inverted input X1
147 sheet_t sheet1[Share]; // X2
148 sheet_t sheet2[Share]; // DOM output
149
150 assign sheet0[0] = ~phase2_in[0][X1];
151 assign sheet0[1] = phase2_in[1][X1];
152
153 assign sheet1[0] = phase2_in[0][X2];
154 assign sheet1[1] = phase2_in[1][X2];
155
156 logic [$bits(sheet_t)-1:0] a0, a1, b0, b1, c0, c1, q0, q1;
157
158 // Convert sheet_t to 1D array
159 // TODO: Make this smarter :)
160 assign a0 = {sheet0[0][0],sheet0[0][1],sheet0[0][2],sheet0[0][3],sheet0[0][4]};
161 assign a1 = {sheet0[1][0],sheet0[1][1],sheet0[1][2],sheet0[1][3],sheet0[1][4]};
162
163 assign b0 = {sheet1[0][0],sheet1[0][1],sheet1[0][2],sheet1[0][3],sheet1[0][4]};
164 assign b1 = {sheet1[1][0],sheet1[1][1],sheet1[1][2],sheet1[1][3],sheet1[1][4]};
165
166 // This keccak_f implementation doesn't use the states as entropy sources.
167 // It rather receives the entropy from random number generator.
168 // The module needs 1600b of entropy per round (3 cycles). It is expensive
169 // to make 1600b entropy in every three cycles.
170 //
171 // It is recommended to duplicates smaller size of entropy but expands to
172 // 1600b by not concatenating but shuffling.
173 assign c0 = rand_i[x*$bits(sheet_t)+:$bits(sheet_t)];
174 assign c1 = rand_i[x*$bits(sheet_t)+:$bits(sheet_t)];
175
176 prim_dom_and_2share #(
177 .DW ($bits(sheet_t)), // sheet
178 .EnNegedge(0) // takes two cycle to complete DOM
179 ) u_dom (
180 .clk_i,
181 .rst_ni,
182
Eunchan Kim0c6cacf2020-08-19 10:13:33 -0700183 .a0_i (a0),
184 .a1_i (a1),
185 .b0_i (b0),
186 .b1_i (b1),
187 .c_valid_i (rand_valid_i),
188 .c0_i (c0),
189 .c1_i (c1),
190 .q0_o (q0),
191 .q1_o (q1)
Eunchan Kim0210f992020-08-13 18:02:44 -0700192 );
193
194 // Convert q0, q1 to sheet_t
195 // TODO: Make this smarter
196 assign sheet2[0][4] = q0[W*0+:W];
197 assign sheet2[0][3] = q0[W*1+:W];
198 assign sheet2[0][2] = q0[W*2+:W];
199 assign sheet2[0][1] = q0[W*3+:W];
200 assign sheet2[0][0] = q0[W*4+:W];
201 assign sheet2[1][4] = q1[W*0+:W];
202 assign sheet2[1][3] = q1[W*1+:W];
203 assign sheet2[1][2] = q1[W*2+:W];
204 assign sheet2[1][1] = q1[W*3+:W];
205 assign sheet2[1][0] = q1[W*4+:W];
206
207 // Final XOR to generate the output
208 assign chi_data[0][x] = sheet2[0] ^ phase2_in[0][x];
209 assign chi_data[1][x] = sheet2[1] ^ phase2_in[1][x];
210 end : g_chi_w
211
212 end else begin : g_single_chi
Eunchan Kim0210f992020-08-13 18:02:44 -0700213 assign chi_data[0] = chi(phase2_in[0]);
214 end
215
216 // Rho ======================================================================
217 // As RhoOffset[x][y] is considered as variable int in VCS,
218 // it is replaced with generate statement.
Eunchan Kim2d4b0102020-11-03 17:09:48 -0800219 // Revised to meet verilator lint. Now RhoOffset is 1-D array
220 localparam int RhoOffset [25] = '{
Eunchan Kim0210f992020-08-13 18:02:44 -0700221 //y 0 1 2 3 4 x
Eunchan Kim2d4b0102020-11-03 17:09:48 -0800222 0, 36, 3, 105, 210, // 0: 0 1 2 3 4
223 1, 300, 10, 45, 66, // 1: 5 6 7 8 9
224 190, 6, 171, 15, 253, // 2: 10 11 12 13 14
225 28, 55, 153, 21, 120, // 3: 15 16 17 18 19
226 91, 276, 231, 136, 78 // 4: 20 21 22 23 24
Eunchan Kim0210f992020-08-13 18:02:44 -0700227 };
228 for (genvar i = 0 ; i < Share ; i++) begin : g_rho
229 box_t rho_in, rho_out;
230 assign rho_in = theta_data[i];
231 assign rho_data[i] = rho_out;
232
233 for (genvar x = 0 ; x < 5 ; x++) begin : gen_rho_x
234 for (genvar y = 0 ; y < 5 ; y++) begin : gen_rho_y
Eunchan Kim2d4b0102020-11-03 17:09:48 -0800235 localparam int Offset = RhoOffset[5*x+y]%W;
Eunchan Kim0210f992020-08-13 18:02:44 -0700236 localparam int ShiftAmt = W- Offset;
237 if (Offset == 0) begin : gen_offset0
238 assign rho_out[x][y][W-1:0] = rho_in[x][y][W-1:0];
239 end else begin : gen_others
240 assign rho_out[x][y][W-1:0] = {rho_in[x][y][0+:ShiftAmt],
241 rho_in[x][y][ShiftAmt+:Offset]};
242 end
243 end
244 end
245 end : g_rho
246
247 ////////////////
248 // Assertions //
249 ////////////////
250
251 `ASSERT_INIT(ValidWidth_A, Width inside {25, 50, 100, 200, 400, 800, 1600})
252 `ASSERT_INIT(ValidW_A, W inside {1, 2, 4, 8, 16, 32, 64})
253 `ASSERT_INIT(ValidL_A, L inside {0, 1, 2, 3, 4, 5, 6})
254 `ASSERT_INIT(ValidRound_A, MaxRound <= 24) // Keccak-f only
255
256 // sel_i shall stay for two cycle after change to 1.
Eunchan Kimde5ecb92020-12-29 12:58:33 -0800257 if (EnMasking) begin : gen_selperiod_chk
Eunchan Kim0210f992020-08-13 18:02:44 -0700258 `ASSUME(SelStayTwoCycleIf1_A, $rose(sel_i) |=> sel_i, clk_i, !rst_ni)
259 end
260
261 ///////////////
262 // Functions //
263 ///////////////
264
265 // Convert bitarray to 3D box
266 // Please take a look at FIPS PUB 202
267 // https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf
268 // > For all triples (x,y,z) such that 0<=x<5, 0<=y<5, and 0<=z<w,
269 // > A[x,y,z]=S[w(5y+x)+z]
270 function automatic box_t bitarray_to_box(logic [Width-1:0] s_in);
271 automatic box_t box;
272 for (int y = 0 ; y < 5 ; y++) begin
273 for (int x = 0 ; x < 5 ; x++) begin
274 for (int z = 0 ; z < W ; z++) begin
275 box[x][y][z] = s_in[W*(5*y+x) + z];
276 end
277 end
278 end
279 return box;
280 endfunction : bitarray_to_box
281
282 // Convert 3D cube to bitarray
283 function automatic logic [Width-1:0] box_to_bitarray(box_t state);
284 automatic logic [Width-1:0] bitarray;
285 for (int y = 0 ; y < 5 ; y++) begin
286 for (int x = 0 ; x < 5 ; x++) begin
287 for (int z = 0 ; z < W ; z++) begin
288 bitarray[W*(5*y+x)+z] = state[x][y][z];
289 end
290 end
291 end
292 return bitarray;
293 endfunction : box_to_bitarray
294
295 // Step Mapping =============================================================
296 // theta(θ)
297 // XOR each bit in the state with the parity of two columns
298 // C[x,z] = A[x,0,z] ^ A[x,1,z] ^ A[x,2,z] ^ A[x,3,z] ^ A[x,4,z]
299 // D[x,z] = C[x-1,z] ^ C[x+1,z-1]
300 // theta = A[x,y,z] ^ D[x,z]
Eunchan Kim233ea812020-11-02 15:52:42 -0800301 parameter int ThetaIndexX1 [5] = '{4, 0, 1, 2, 3}; // (x-1)%5
302 parameter int ThetaIndexX2 [5] = '{1, 2, 3, 4, 0}; // (x+1)%5
Eunchan Kim0210f992020-08-13 18:02:44 -0700303 function automatic box_t theta(box_t state);
304 plane_t c;
305 plane_t d;
306 box_t result;
307 for (int x = 0 ; x < 5 ; x++) begin
308 c[x] = state[x][0] ^ state[x][1] ^ state[x][2] ^ state[x][3] ^ state[x][4];
309 end
310 for (int x = 0 ; x < 5 ; x++) begin
Eunchan Kim0210f992020-08-13 18:02:44 -0700311 for (int z = 0 ; z < W ; z++) begin
312 int index_z;
313 index_z = (z == 0) ? W-1 : z-1; // (z+1)%W
Eunchan Kim233ea812020-11-02 15:52:42 -0800314 d[x][z] = c[ThetaIndexX1[x]][z] ^ c[ThetaIndexX2[x]][index_z];
Eunchan Kim0210f992020-08-13 18:02:44 -0700315 end
316 end
317 for (int x = 0 ; x < 5 ; x++) begin
318 for (int y = 0 ; y < 5 ; y++) begin
319 result[x][y] = state[x][y] ^ d[x];
320 end
321 end
322 return result;
323 endfunction : theta
324
325 // rho
326
327 // Commented out entire rho function due to VCS elaboration error.
328 // (z-RhoOffset[x][y]%W) isn't considered as a constant in VCS.
329 // Even changing it to W-RhoOffset[x][y]%W and assign to ShiftAmt
330 // creates same error.
331
332 // Offset : Look at Table 2 in FIPS PUB 202
333 //localparam int RhoOffset [5][5] = '{
334 // //y 0 1 2 3 4 x
335 // '{ 0, 36, 3, 105, 210},// 0
336 // '{ 1, 300, 10, 45, 66},// 1
337 // '{ 190, 6, 171, 15, 253},// 2
338 // '{ 28, 55, 153, 21, 120},// 3
339 // '{ 91, 276, 231, 136, 78} // 4
340 //};
341
342 // rotate bits of each lane by offset
343 // 1. rho[0,0,z] = A[0,0,z]
344 // 2. Offset swap
345 // a. (x,y) := (1,0)
346 // b. for t [0..23]
347 // i. rho[x,y,z] = A[x,y,z-(t+1)(t+2)/2]
348 // ii. (x,y) = (y, (2x+3y))
349 //function automatic box_t rho(box_t state);
350 // box_t result;
351 // for (int x = 0 ; x < 5 ; x++) begin
352 // for (int y = 0 ; y < 5 ; y++) begin
353 // for (int z = 0 ; z < W ; z++) begin
354 // automatic int index_z;
355 // index_z = (z-RhoOffset[x][y])%W;
356 // result[x][y][z] = state[x][y][(z-RhoOffset[x][y])%W];
357 // end
358 // end
359 // end
360 // return result;
361 //endfunction : rho
362
363 // pi
364 // rearrange the position of lanes
365 // pi[x,y,z] = state[(x+3y),x,z]
366 localparam int PiRotate [5][5] = '{
367 //y 0 1 2 3 4 x
368 '{ 0, 3, 1, 4, 2},// 0
369 '{ 1, 4, 2, 0, 3},// 1
370 '{ 2, 0, 3, 1, 4},// 2
371 '{ 3, 1, 4, 2, 0},// 3
372 '{ 4, 2, 0, 3, 1} // 4
373 };
374 function automatic box_t pi(box_t state);
375 box_t result;
376 for (int x = 0 ; x < 5 ; x++) begin
377 for (int y = 0 ; y < 5 ; y++) begin
378 result[x][y][W-1:0] = state[PiRotate[x][y]][x][W-1:0];
379 end
380 end
381 return result;
382 endfunction : pi
383
384 // chi
385 // chi[x,y,z] = state[x,y,z] ^ ((state[x+1,y,z] ^ 1) & state[x+2,y,z])
Eunchan Kim233ea812020-11-02 15:52:42 -0800386 parameter int ChiIndexX1 [5] = '{1, 2, 3, 4, 0}; // (x+1)%5
387 parameter int ChiIndexX2 [5] = '{2, 3, 4, 0, 1}; // (x+2)%5
Eunchan Kim0210f992020-08-13 18:02:44 -0700388 function automatic box_t chi(box_t state);
389 box_t result;
390 for (int x = 0 ; x < 5 ; x++) begin
Eunchan Kim233ea812020-11-02 15:52:42 -0800391 result[x] = state[x] ^ ((~state[ChiIndexX1[x]]) & state[ChiIndexX2[x]]);
Eunchan Kim0210f992020-08-13 18:02:44 -0700392 end
393 return result;
394 endfunction : chi
395
396 // iota
397 // XOR (x,y) = (0,0) with Round Constant (RC)
398
399 // RC parameter: Precomputed by util/keccak_rc.py. Only up-to 0..L-1 is used
400 // RC = '0
401 // RC[2**j-1] = rc(j+7*rnd)
402 // rc(t) =
403 // 1. t%255 == 0 -> 1
404 // 2. R[0:7] = 'b10000000
405 // 3. for i = [1..t%255]
406 // a. R = 0 || R
407 // b. R[0] = R[0] ^ R[8]
408 // c. R[4] = R[4] ^ R[8]
409 // d. R[5] = R[5] ^ R[8]
410 // e. R[6] = R[6] ^ R[8]
411 // f. R = R[0:7]
412 // 4. return R[0]
413 // RC has L = [0..6]
414 // for lower L case, only chopping lower part of 64bit RC is sufficient.
415 localparam logic [63:0] RC [24] = '{
416 64'h 0000_0000_0000_0001, // Round 0
417 64'h 0000_0000_0000_8082, // Round 1
418 64'h 8000_0000_0000_808A, // Round 2
419 64'h 8000_0000_8000_8000, // Round 3
420 64'h 0000_0000_0000_808B, // Round 4
421 64'h 0000_0000_8000_0001, // Round 5
422 64'h 8000_0000_8000_8081, // Round 6
423 64'h 8000_0000_0000_8009, // Round 7
424 64'h 0000_0000_0000_008A, // Round 8
425 64'h 0000_0000_0000_0088, // Round 9
426 64'h 0000_0000_8000_8009, // Round 10
427 64'h 0000_0000_8000_000A, // Round 11
428 64'h 0000_0000_8000_808B, // Round 12
429 64'h 8000_0000_0000_008B, // Round 13
430 64'h 8000_0000_0000_8089, // Round 14
431 64'h 8000_0000_0000_8003, // Round 15
432 64'h 8000_0000_0000_8002, // Round 16
433 64'h 8000_0000_0000_0080, // Round 17
434 64'h 0000_0000_0000_800A, // Round 18
435 64'h 8000_0000_8000_000A, // Round 19
436 64'h 8000_0000_8000_8081, // Round 20
437 64'h 8000_0000_0000_8080, // Round 21
438 64'h 0000_0000_8000_0001, // Round 22
439 64'h 8000_0000_8000_8008 // Round 23
440 };
441
442 // iota: XOR with RC for (x,y) = (0,0)
443 function automatic box_t iota(box_t state, logic [RndW-1:0] rnd);
444 box_t result;
445 result = state;
446 result[0][0][W-1:0] = state[0][0][W-1:0] ^ RC[rnd][W-1:0];
447
448 return result;
449 endfunction : iota
450
451 // Round function : Rnd(A,i_r)
452 // Not used due to rho function issue described above.
453
454 //function automatic box_t keccak_rnd(box_t state, logic [RndW-1:0] rnd);
455 // box_t keccak_state;
456 // keccak_state = iota(chi(pi(rho(theta(state)))), rnd);
457 //
458 // return keccak_state;
459 //endfunction : keccak_rnd
460
461endmodule
462