blob: e8967880d48eea5db1d5f6f663ec36c615a960dd [file] [log] [blame]
/* Copyright lowRISC Contributors.
* Copyright 2016 The Chromium OS Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE.dcrypto file.
*
* Derived from code in
*
* https://chromium.googlesource.com/chromiumos/platform/ec/+/refs/heads/cr50_stab/chip/g/dcrypto/dcrypto_bn.c
*
*/
d0inv:
bn.xor w0, w0, w0
bn.addi w0, w0, 1
bn.mov w29, w0
loopi 256, 13
bn.mulqacc.z w28.0, w29.0, 0
bn.mulqacc w28.1, w29.0, 64
bn.mulqacc.so w1.L, w28.0, w29.1, 64
bn.mulqacc w28.2, w29.0, 0
bn.mulqacc w28.1, w29.1, 0
bn.mulqacc w28.0, w29.2, 0
bn.mulqacc w28.3, w29.0, 64
bn.mulqacc w28.2, w29.1, 64
bn.mulqacc w28.1, w29.2, 64
bn.mulqacc.so w1.U, w28.0, w29.3, 64
bn.and w1, w1, w0
bn.or w29, w29, w1
bn.add w0, w0, w0
bn.sub w29, w31, w29
ret
selcxSub:
li x8, 5
li x10, 3
li x11, 2
lw x16, 0(x0)
bn.add w31, w31, w31
loop x30, 5
bn.lid x10, 0(x16++)
bn.movr x11, x8
bn.subb w4, w2, w3
bn.sel w3, w4, w2, FG1.C
bn.movr x8++, x10
ret
computeRR:
bn.xor w31, w31, w31
li x3, 0
bn.lid x3, 0(x0)
lw x16, 0(x0)
lw x17, 4(x0)
lw x18, 8(x0)
lw x19, 12(x0)
lw x20, 16(x0)
lw x21, 20(x0)
lw x22, 24(x0)
lw x23, 28(x0)
bn.xor w3, w3, w3
slli x24, x22, 8
li x8, 5
li x10, 3
bn.xor w3, w3, w3
loop x30, 1
bn.movr x8++, x10
bn.sub w3, w31, w0, FG1
jal x1, selcxSub
loop x24, 16
li x8, 5
bn.sub w3, w3, w3, FG1
loop x30, 3
bn.movr x11, x8
bn.addc w2, w2, w2, FG1
bn.movr x8++, x11
jal x1, selcxSub
li x8, 5
lw x16, 0(x0)
bn.sub w3, w3, w3, FG1
loop x30, 3
bn.lid x10, 0(x16++)
bn.movr x11, x8++
bn.cmpb w3, w2, FG1
jal x1, selcxSub
li x0, 0
li x8, 5
loop x30, 2
bn.sid x8, 0(x18++)
addi x8, x8, 1
ret
dmXd0:
bn.mulqacc.z w30.0, w25.0, 0
bn.mulqacc w30.1, w25.0, 64
bn.mulqacc.so w27.L, w30.0, w25.1, 64
bn.mulqacc w30.2, w25.0, 0
bn.mulqacc w30.1, w25.1, 0
bn.mulqacc w30.0, w25.2, 0
bn.mulqacc w30.3, w25.0, 64
bn.mulqacc w30.2, w25.1, 64
bn.mulqacc w30.1, w25.2, 64
bn.mulqacc.so w27.U, w30.0, w25.3, 64
bn.mulqacc w30.3, w25.1, 0
bn.mulqacc w30.2, w25.2, 0
bn.mulqacc w30.1, w25.3, 0
bn.mulqacc w30.3, w25.2, 64
bn.mulqacc.so w26.L, w30.2, w25.3, 64
bn.mulqacc.so w26.U, w30.3, w25.3, 0
ret
dmXa:
bn.mulqacc.z w30.0, w2.0, 0
bn.mulqacc w30.1, w2.0, 64
bn.mulqacc.so w27.L, w30.0, w2.1, 64
bn.mulqacc w30.2, w2.0, 0
bn.mulqacc w30.1, w2.1, 0
bn.mulqacc w30.0, w2.2, 0
bn.mulqacc w30.3, w2.0, 64
bn.mulqacc w30.2, w2.1, 64
bn.mulqacc w30.1, w2.2, 64
bn.mulqacc.so w27.U, w30.0, w2.3, 64
bn.mulqacc w30.3, w2.1, 0
bn.mulqacc w30.2, w2.2, 0
bn.mulqacc w30.1, w2.3, 0
bn.mulqacc w30.3, w2.2, 64
bn.mulqacc.so w26.L, w30.2, w2.3, 64
bn.mulqacc.so w26.U, w30.3, w2.3, 0
ret
mma_sub_cx:
loop x30, 6
bn.lid x13, 0(x16++)
bn.movr x12, x8
bn.subb w29, w30, w24
bn.movr x8, x13
bn.sel w24, w29, w30, FG1.C
bn.movr x8++, x13
ret
mma:
li x12, 30
li x13, 24
li x8, 4
li x10, 4
bn.lid x12, 0(x19++)
jal x1, dmXa
bn.movr x13, x8++
bn.add w30, w27, w24
bn.addc w29, w26, w31
bn.mov w25, w3
jal x1, dmXd0
bn.mov w25, w27
bn.mov w28, w26
bn.mov w24, w30
bn.lid x12, 0(x16++)
jal x1, dmXd0
bn.add w27, w27, w24
bn.addc w28, w26, w31
loop x31, 14
bn.lid x12, 0(x19++)
jal x1, dmXa
bn.movr x13, x8++
bn.add w27, w27, w24
bn.addc w26, w26, w31
bn.add w24, w27, w29
bn.addc w29, w26, w31
bn.lid x12, 0(x16++)
jal x1, dmXd0
bn.add w27, w27, w24
bn.addc w26, w26, w31
bn.add w24, w27, w28, FG1
bn.movr x10++, x13
bn.addc w28, w26, w31, FG1
bn.addc w24, w29, w28, FG1
bn.movr x10++, x13
lw x16, 0(x0)
lw x19, 12(x0)
li x8, 4
li x10, 4
li x12, 30
li x13, 24
jal x1, mma_sub_cx
nop
ret
setupPtrs:
lw x16, 0(x0)
lw x17, 4(x0)
lw x18, 8(x0)
lw x19, 12(x0)
lw x20, 16(x0)
lw x21, 20(x0)
lw x22, 24(x0)
lw x23, 28(x0)
lw x24, 0(x0)
lw x25, 4(x0)
lw x26, 8(x0)
lw x27, 12(x0)
lw x28, 16(x0)
lw x29, 20(x0)
lw x30, 24(x0)
lw x31, 28(x0)
bn.mov w1, w31
li x8, 4
li x9, 3
li x10, 4
li x11, 2
ret
mulx:
li x3, 0
bn.lid x3, 0(x0)
jal x1, setupPtrs
bn.lid x9, 0(x17)
bn.mov w2, w31
loop x30, 1
bn.movr x10++, x11
li x10, 4
loop x30, 8
bn.lid x11, 0(x20++)
add x4, x16, x0
add x5, x19, x0
add x6, x20, x0
jal x1, mma
add x16, x4, x0
add x19, x5, x0
add x20, x6, x0
li x8, 4
loop x30, 2
bn.sid x8, 0(x21++)
addi x8, x8, 1
li x8, 4
ret
mm1_sub_cx:
loop x30, 5
bn.lid x9, 0(x16++)
bn.movr x11, x8++
bn.subb w3, w2, w3
bn.sel w2, w3, w2, FG1.C
bn.sid x11, 0(x21++)
ret
mul1_exp:
bn.lid x9, 0(x17)
bn.mov w2, w31
loop x30, 1
bn.movr x10++, x11
bn.xor w2, w2, w2
bn.addi w2, w2, 1
addi x6, x16, 0
addi x7, x19, 0
loop x30, 4
addi x16, x6, 0
addi x19, x7, 0
jal x1, mma
bn.mov w2, w31
addi x16, x6, 0
addi x19, x7, 0
bn.sub w2, w2, w2, FG1
loop x30, 3
bn.lid x9, 0(x16++)
bn.movr x11, x8++
bn.cmpb w3, w2, FG1
li x8, 4
li x10, 4
addi x16, x6, 0
addi x19, x7, 0
jal x1, mm1_sub_cx
addi x16, x6, 0
addi x19, x7, 0
ret
mul1:
li x3, 0
bn.lid x3, 0(x0)
jal x1, setupPtrs
jal x1, mul1_exp
ecall
sqrx_exp:
lw x16, 32(x0)
lw x17, 36(x0)
lw x18, 40(x0)
lw x19, 44(x0)
lw x20, 48(x0)
lw x21, 52(x0)
lw x22, 56(x0)
lw x23, 60(x0)
bn.lid x9, 0(x17)
bn.mov w2, w31
loop x30, 1
bn.movr x10++, x11
lw x10, 8(x0)
loop x30, 8
bn.lid x11, 0(x20++)
addi x5, x20, 0
addi x6, x16, 0
addi x7, x19, 0
jal x1, mma
addi x20, x5, 0
addi x16, x6, 0
addi x19, x7, 0
li x10, 4
li x8, 4
loop x30, 2
bn.sid x8, 0(x21++)
addi x8, x8, 1
li x8, 4
li x10, 4
lw x12, 16(x0)
lw x13, 20(x0)
ret
mulx_exp:
lw x16, 64(x0)
lw x17, 68(x0)
lw x18, 72(x0)
lw x19, 76(x0)
lw x20, 80(x0)
lw x21, 84(x0)
lw x22, 88(x0)
lw x23, 92(x0)
bn.lid x9, 0(x17)
bn.mov w2, w31
loop x30, 1
bn.movr x10++, x11
li x8, 4
li x10, 4
lw x12, 16(x0)
lw x13, 20(x0)
loop x30, 8
bn.lid x11, 0(x20++)
addi x5, x20, 0
addi x6, x16, 0
addi x7, x19, 0
jal x1, mma
addi x20, x5, 0
addi x16, x6, 0
addi x19, x7, 0
li x8, 4
li x10, 4
lw x12, 16(x0)
lw x13, 20(x0)
ret
selOutOrC:
loop x30, 6
bn.lid x9, 0(x21)
bn.sid x11, 0(x21)
bn.movr x11, x8++
bn.mov w0, w2
bn.sel w2, w0, w3, C
bn.sid x11, 0(x21++)
ret
modexp:
jal x1, mulx
lw x16, 96(x0)
lw x17, 100(x0)
lw x18, 104(x0)
lw x19, 108(x0)
lw x20, 112(x0)
lw x21, 116(x0)
lw x22, 120(x0)
lw x23, 124(x0)
bn.sub w2, w2, w2
loop x30, 3
bn.lid x11, 0(x16++)
bn.subb w2, w31, w2
bn.sid x11, 0(x21++)
slli x24, x22, 8
loop x24, 17
jal x1, sqrx_exp
jal x1, mulx_exp
lw x16, 96(x0)
lw x17, 100(x0)
lw x18, 104(x0)
lw x19, 108(x0)
lw x20, 112(x0)
lw x21, 116(x0)
lw x22, 120(x0)
lw x23, 124(x0)
bn.add w2, w2, w2
loop x30, 3
bn.lid x11, 0(x20)
bn.addc w2, w2, w2
bn.sid x11, 0(x20++)
jal x1, selOutOrC
nop
li x3, 0
bn.lid x3, 96(x0)
lw x16, 96(x0)
lw x17, 100(x0)
lw x18, 104(x0)
lw x19, 108(x0)
lw x20, 112(x0)
lw x21, 116(x0)
lw x22, 120(x0)
lw x23, 124(x0)
jal x1, mul1_exp
ecall
modload:
bn.xor w31, w31, w31
li x3, 0
bn.lid x3, 0(x0)
lw x16, 0(x0)
lw x17, 4(x0)
lw x18, 8(x0)
lw x19, 12(x0)
lw x20, 16(x0)
lw x21, 20(x0)
lw x22, 24(x0)
lw x23, 28(x0)
lw x24, 0(x0)
lw x25, 4(x0)
lw x26, 8(x0)
lw x27, 12(x0)
lw x28, 16(x0)
lw x29, 20(x0)
lw x30, 24(x0)
lw x31, 28(x0)
li x8, 28
li x9, 29
lw x10, 8(x0)
lw x11, 12(x0)
lw x12, 16(x0)
lw x13, 20(x0)
lw x14, 24(x0)
lw x15, 28(x0)
bn.lid x8, 0(x16)
jal x1, d0inv
bn.sid x9, 0(x17)
jal x1, computeRR
ecall