// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
// This module performs a the multiplication of two operands in Galois field GF(2^Width) modulo the
// provided irreducible polynomial using a parallel Mastrovito multipler [3]. To cut long paths
// potentially occurring for large data widths, the implementation provides a parameter
// StagesPerCycle to decompose the multiplication into Width/StagesPerCycle iterative steps
// (Digit-Serial/Parallel Multiplier [4]).
// Note that this module is not pipelined and produces an output sample every Width/StagesPerCycle
// cycles.
// References:
// [1] Patel, "Parallel Multiplier Designs for the Galois/Counter Mode of Operation",
// [2] Wagner, "The Laws of Cryptography: The Finite Field GF(2^8)",
// [3]: Mastrovito, "VLSI Designs for Multiplication over Finite Fields GF(2^m)",
// [4]: Song et al., "Efficient Finite Field Serial/Parallel Multiplication",
`include ""
module prim_gf_mult #(
parameter int Width = 32,
parameter int StagesPerCycle = Width,
// The field-generating, irreducible polynomial of degree Width.
// Can for example be a Conway polynomial, see
// For Width = 33, the Conway polynomial hast bits 32, 15, 9, 7, 4, 3, 0 set to one.
parameter logic[Width-1:0] IPoly = 1'b1 << 15 |
1'b1 << 9 |
1'b1 << 7 |
1'b1 << 4 |
1'b1 << 3 |
1'b1 << 0
) (
input clk_i,
input rst_ni,
input req_i,
input [Width-1:0] operand_a_i,
input [Width-1:0] operand_b_i,
output logic ack_o,
output logic [Width-1:0] prod_o
`ASSERT_INIT(IntegerLoops_A, (Width % StagesPerCycle) == 0)
`ASSERT_INIT(StagePow2_A, $onehot(StagesPerCycle))
localparam int Loops = Width / StagesPerCycle;
localparam int CntWidth = (Loops == 1) ? 1 : $clog2(Loops);
// reformat operand_b_i
logic [Loops-1:0][StagesPerCycle-1:0] reformat_data;
// this slice of operand bits used during each loop
logic [StagesPerCycle-1:0] op_i_slice;
// the matrix is made up of a series of GF(2^Width) * x
logic [StagesPerCycle-1:0][Width-1:0] matrix;
// since the matrix generation is not done in one go, we must remember
// where it last left off
logic [Width-1:0] vector;
// this variable tracks which loop we are currently operating
logic [CntWidth-1:0] cnt;
// this variable tracks the first loop through the multiply
logic first;
// intermediate prod held between loops
logic [Width-1:0] prod_q, prod_d;
// select current slice
assign reformat_data = operand_b_i;
assign op_i_slice = reformat_data[cnt];
assign first = cnt == 0;
if (StagesPerCycle == Width) begin : gen_all_combo
assign ack_o = 1'b1;
assign cnt = '0;
assign prod_q = '0;
assign vector = '0;
end else begin : gen_decomposed
// multiply is done
assign ack_o = cnt == (Loops - 1);
// advance the stage count and also advance the bit position count
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
cnt <= '0;
end else if (req_i && ack_o) begin
cnt <= '0;
end else if (req_i && cnt < (Loops - 1)) begin
cnt <= cnt + 1'b1;
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
prod_q <= '0;
vector <= '0;
end else if (ack_o) begin
prod_q <= '0;
vector <= '0;
end else if (req_i) begin
prod_q <= prod_d;
vector <= matrix[StagesPerCycle-1];
assign matrix = first ? gen_matrix(operand_a_i, 1'b1) : gen_matrix(vector, 1'b0);
assign prod_d = prod_q ^ gf_mult(matrix, op_i_slice);
// The output is not toggled until it is ready
assign prod_o = ack_o ? prod_d : operand_a_i;
// GF(2^Width) * x
function automatic logic [Width-1:0] gf_mult2(
logic [Width-1:0] operand
logic [Width-1:0] mult_out;
mult_out = operand[Width-1] ? (operand << 1) ^ IPoly : (operand << 1);
return mult_out;
// Matrix generate step
function automatic logic [StagesPerCycle-1:0][Width-1:0] gen_matrix(
logic [Width-1:0] seed,
logic init
logic [StagesPerCycle-1:0][Width-1:0] matrix_out;
matrix_out[0] = init ? seed : gf_mult2(seed);
matrix_out[StagesPerCycle-1:1] = '0;
for (int i = 1; i < StagesPerCycle; i++) begin
matrix_out[i] = gf_mult2(matrix_out[i-1]);
return matrix_out;
// Galois multiply step
function automatic logic [Width-1:0] gf_mult(
logic [StagesPerCycle-1:0][Width-1:0] matrix,
logic [StagesPerCycle-1:0] operand
logic [Width-1:0] mult_out;
logic [Width-1:0] add_vector;
mult_out = '0;
for (int i = 0; i < StagesPerCycle; i++) begin
add_vector = operand[i] ? matrix[i] : '0;
mult_out = mult_out ^ add_vector;
return mult_out;
endfunction // gf_mult
endmodule // prim_gf_mult