blob: c73439dbe9bfc50b38402e26597b6c9647af5a0c [file] [log] [blame]
/* Copyright lowRISC contributors. */
/* Licensed under the Apache License, Version 2.0, see LICENSE for details. */
/* SPDX-License-Identifier: Apache-2.0 */
# OTBN Smoke test, runs various instructions which are expected to produce the
# final register state see in smoke_expected.txt
.section .text
# x2 = 0xd0beb513
lui x2, 0xd0beb
add x2, x2, 0x513
# x3 = 0xa0be911a
lui x3, 0xa0be9
add x3, x3, 0x11a
# x4 = x2 + x3 = 0x717d462d
add x4, x2, x3
# x5 = x3 - x2 = 0xcfffdc07
sub x5, x3, x2
# x6 = x2 | x3 = 0xf0beb51b
or x6, x2, x3
# x7 = x2 & x3 = 0x80be9112
and x7, x2, x3
# x8 = x2 ^ x3 = 0x70002409
xor x8, x2, x3
# x9 = x2 | 0x123 = 0xd0beb533
or x9, x2, 0x123
# x10 = x2 & 0x7bc = 0x510
and x10, x2, 0x7bc
# x11 = x2 ^ 0x47a = 0xd0beb169
xor x11, x2, 0x47a
# x12 = x2 << 10 = 0xfad44c00
sll x12, x2, 10
# x13 = x2 >> 13 = 0x685f5
srl x13, x2, 13
# x14 = x2 >>> 7 = 0xffa17d6a
sra x14, x2, 7
# x15 = x2 << x3[4:0] = 0x4c000000
sll x15, x2, x3
# x16 = x2 >> x3[4:0] = 0x34
srl x16, x2, x3
# x17 = x2 >>> x3[4:0] = 0xfffffff4
sra x17, x2, x3
# x18 = dmem[16] = 0xfacefeed
lw x18, 16(x0)
# dmem[4] = x9 = 0xd0beb533
sw x9, 4(x0)
# x19 = dmem[4] = 0xd0beb533
lw x19, 4(x0)
# x20 = 0
li x20, 0
bne x19, x18, test_label_1
# x20 = 0xbaad (shouldn't happen due to branch)
li x20, 0xbaad
test_label_1:
# x20 = x20 + 0x123 = 0x123
add x20, x20, 0x123
jal x0, test_label_2
# x20 = x20 + 0x123 = 0x246 (shouldn't happen due to jump)
add x20, x20, 0x123
test_label_2:
# x21 = x20 + x0 = 0123
add x21, x20, x0
beq x20, x21, test_label_3
# x21 = x21 + 0x123 = 0x246 (shouldn't happen due to branch)
add x21, x21, 0x123
test_label_3:
# until LID/SID implemented use mod WSR to load bignum registers with base li
# psuedo-instruction
# mod = 0x78fccc06_2228e9d6_89c9b54f_887cf14e_c79af825_69be586e_9866bb3b_53769ada
li x23, 0x78fccc06
csrrw x0, 0x7d7, x23
li x23, 0x2228e9d6
csrrw x0, 0x7d6, x23
li x23, 0x89c9b54f
csrrw x0, 0x7d5, x23
li x23, 0x887cf14e
csrrw x0, 0x7d4, x23
li x23, 0xc79af825
csrrw x0, 0x7d3, x23
li x23, 0x69be586e
csrrw x0, 0x7d2, x23
li x23, 0x9866bb3b
csrrw x0, 0x7d1, x23
li x23, 0x53769ada
csrrw x0, 0x7d0, x23
# x22 = 0x89c9b54f
csrrs x23, 0x7d5, x0
# Note that some instructions used the fixed inputs (from w1 and w2) others use
# results from previous instructions. When debugging an failure it is recommened
# you first look at the failure from the lowest numbered register as failures
# can cascade into later instructions.
# w1 = mod = 0x78fccc06_2228e9d6_89c9b54f_887cf14e_c79af825_69be586e_9866bb3b_53769ada
bn.wsrr w1, 0x0 /* MOD */
# rnd WSR gives fixed value for now
# w2 = rnd = 0x99999999_99999999_99999999_99999999_99999999_99999999_99999999_99999999
bn.wsrr w2, 0x1 /* RND */
# w3 = w1 + w2 = 0x1296659f_bbc28370_23634ee9_22168ae8_613491bf_0357f208_320054d4_ed103473
bn.add w3, w1, w2
# w4 = w1 - w2 = 0xdf63326c_888f503c_f0301bb5_eee357b5_2e015e8b_d024bed4_fecd21a1_b9dd0141
bn.sub w4, w1, w2
# w5 = w1 | w2 = 0xf9fddd9f_bbb9f9df_99d9bddf_99fdf9df_df9bf9bd_f9bfd9ff_99ffbbbb_dbff9bdb
bn.or w5, w1, w2
# w6 = x1 & w2 = 0x18988800_00088990_89899109_88189108_81989801_09981808_98009919_11109898
bn.and w6, w1, w2
# w7 = w1 ^ w2 = 0xe165559f_bbb1704f_10502cd6_11e568d7_5e0361bc_f027c1f7_01ff22a2_caef0343
bn.xor w7, w1, w2
# w8 = ~w1 = 0x870333f9_ddd71629_76364ab0_77830eb1_386507da_9641a791_679944c4_ac896525
bn.not w8, w1
# w9 = {w1, w2} >> 117 = 0xd7c12b4d_f2c374c3_35d9da9b_b4d6d4cc_cccccccc_cccccccc_cccccccc_cccccccc
bn.rshi w9, w1, w2 >> 117
# mod = w4 = 0xdf63326c_888f503c_f0301bb5_eee357b5_2e015e8b_d024bed4_fecd21a1_b9dd0141
bn.wsrw 0x0 /* MOD */, w4
# w0 = 0
bn.xor w0, w0, w0
# w10 = w6 + w2 = 0xb2322199_99a2232a_23232aa3_21b22aa2_1b32319a_a331b1a2_319a32b2_aaaa3231
bn.addm w10, w6, w2
# w11 = w5 + w2 - mod = 0xb43444cc_ccc4433c_43433bc3_44b43bc4_4b3434cb_c334b4c4_34cc33b3_bbbc3433
bn.addm w11, w5, w2
# w12 = w11 - w2 = 0x1a9aab33_332aa9a2_a9a9a229_ab1aa22a_b19a9b32_299b1b2a_9b329a1a_22229a9a
bn.subm w12, w11, w2
# w13 = (w2 - w11) + mod = 0xc4c88739_5564a69a_4686798c_43c8b58a_7c66c359_a689a3aa_639a8787_97ba66a7
bn.subm w13, w2, w11
# w14 = w8 + w9 = 0x5ec45f47_d09a8aec_ac10254c_2c59e37e_0531d4a7_630e745e_34661191_795631f1
bn.add w14, w8, w9, FG0
# w16 = w1 - w2 = 0xdf63326c_888f503c_f0301bb5_eee357b5_2e015e8b_d024bed4_fecd21a1_b9dd0141 (with borrow = 1)
bn.sub w16, w1, w2, FG1
# w15 = w10 + w11 + 1 (carry) = 0x66666666_66666666_66666666_66666666_66666666_66666666_66666666_66666665
bn.addc w15, w10, w11, FG0
# x17 = w3 - w4 - 1 (borrow) = 0x33333333_33333333_33333333_33333333_33333333_33333333_33333333_33333331
bn.subb w17, w3, w4, FG1
# x24 = {fg1, fg0} = 0x55
csrrs x24, 0x7c8, x0
# w18 = w1 + (w2 << 136) = 0x1296659f_bbc28370_23634ee9_22168a4e_c79af825_69be586e_9866bb3b_53769ada
bn.add w18, w1, w2 << 136
# w19 = w1 & (w2 << 72) = 0x18988800_00088990_89899109_88189108_81989801_09981800_00000000_00000000
bn.and w19, w1, w2 << 72
# w20 = w1 - (w2 >> 184) = 0x78fccc06_2228e9d6_89c9b54f_887cf14e_c79af825_69be57d4_fecd21a1_b9dd0141
bn.sub w20, w1, w2 >> 184
# w21 = w1 | (w2 >> 120) = 0x78fccc06_2228e9d6_89c9b54f_887cf1df_df9bf9bd_f9bfd9ff_99ffbbbb_dbff9bdb
bn.or w21, w1, w2 >> 120
# w22 = w21 + 0x1bd = 0x78fccc06_2228e9d6_89c9b54f_887cf1df_df9bf9bd_f9bfd9ff_99ffbbbb_dbff9d98
bn.addi w22, w21, 0x1bd
# w23 = w21 - 0x207 = 0x78fccc06_2228e9d6_89c9b54f_887cf1df_df9bf9bd_f9bfd9ff_99ffbbbb_dbff99d4
bn.subi w23, w21, 0x207
# *x26 == w24 = dmem[x25 == 0x0] = 0xcccccccc_bbbbbbbb_aaaaaaaa_facefeed_deadbeef_cafed00d_d0beb533_1234abcd
# x25 = x25 + 0x20 = 0x20
li x25, 0
li x26, 24
bn.lid x26, 0(x25++)
# dmem[x25 == 0x20] = *x26 == w20 = 0x78fccc06_2228e9d6_89c9b54f_887cf14e_c79af825_69be57d4_fecd21a1_b9dd0141
# x26 = x26 + 1 = 21 (0x15)
li x26, 20
bn.sid x26++, 0(x25)
# w25 = w24 = 0xcccccccc_bbbbbbbb_aaaaaaaa_facefeed_deadbeef_cafed00d_d0beb533_1234abcd
bn.mov w25, w24
# *x27 == w26 = *x26 == w21 = 0x78fccc06_2228e9d6_89c9b54f_887cf1df_df9bf9bd_f9bfd9ff_99ffbbbb_dbff9bdb
# x26 = x26 + 1 = 22 (0x16)
li x27, 26
bn.movr x27, x26++
# w27 = w2 == w1 ? w5 : w6 = w6 = 0x18988800_00088990_89899109_88189108_81989801_09981808_98009919_11109898
bn.cmp w1, w2
bn.sel w27, w5, w6, FG0.Z
# w28 = (w4 - w3 - 1 (borrow)) & 1 ? w7 : w8 = w7 = 0xe165559f_bbb1704f_10502cd6_11e568d7_5e0361bc_f027c1f7_01ff22a2_caef0343
bn.cmpb w4, w3
bn.sel w28, w7, w8, FG0.L
# acc = w26 = 0x78fccc06_2228e9d6_89c9b54f_887cf1df_df9bf9bd_f9bfd9ff_99ffbbbb_dbff9bdb
bn.wsrw 0x3 /* ACC */, w26
# {w30, w29} = (w28 * w27 + acc) =
# 0x15a7cbef_a5f473e1_860c1110_6bcc33ed_1583aef1_8130f3df_1a806984_c4f3507e
# 41575c5d_24cf5526_1a1d070d_673963ce_e80fed2a_13c1b84d_b1fddf94_eb0953a3
bn.mulqacc w27.0, w28.0, 0
bn.mulqacc w27.1, w28.0, 64
bn.mulqacc.so w29.L, w27.0, w28.1, 64
bn.mulqacc w27.2, w28.0, 0
bn.mulqacc w27.1, w28.1, 0
bn.mulqacc w27.0, w28.2, 0
bn.mulqacc w27.3, w28.0, 64
bn.mulqacc w27.2, w28.1, 64
bn.mulqacc w27.1, w28.2, 64
bn.mulqacc.so w29.U, w27.0, w28.3, 64
bn.mulqacc w27.3, w28.1, 0
bn.mulqacc w27.2, w28.2, 0
bn.mulqacc w27.1, w28.3, 0
bn.mulqacc w27.3, w28.2, 64
bn.mulqacc.so w30.L, w27.2, w28.3, 64
bn.mulqacc.so w30.U, w27.3, w28.3, 0
# w31 = w28[127:0] * w27[127:0] = 0x2f97be14_a0c429f2_53b42730_953d7d2f_0873f36c_1a01de4e_17fe23d9_0f09b7c8
bn.mulqacc.Z w27.0, w28.0, 0
bn.mulqacc w27.0, w28.1, 64
bn.mulqacc w27.1, w28.0, 64
bn.mulqacc.wo w31, w27.1, w28.1, 128
# w0 = acc = 0x2f97be14_a0c429f2_53b42730_953d7d2f_0873f36c_1a01de4e_17fe23d9_0f09b7c8
bn.wsrr w0, 0x3 /* ACC */
# Nested loop testing, inner adds repeated a total of 3 * 5 = 15 times
# x28 = 4, x29 = 3
li x28, 4
li x29, 3
# Outer loop, repeat x29 == 3 times
loop x29, 4
# Inner loop, repeat 5 times
loopi 5, 2
# x28 = x28 + x28 = x28 * 2
add x28, x28, x28
# x29 = x29 + x29 = x29 * 2
add x29, x29, x29
# end of inner loop
# Nested loops cannot end on same instruction
nop
# end of outer loop
# x28 = 4 * (2 ** 15) = 0x00020000
# x29 = 3 * (2 ** 15) = 0x00018000
# Single instruction loop test
# Repeat 5 times
loopi 5, 1
# x28 = x28 + x28 = x28 * 2
add x28, x28, x28
# x28 = 0x00020000 * (2 ** 5) = 0x00400000
jal x0, end
# Place end at fixed address so write to x31 by jal doesn't have changing value
# as more is added to smoke test
.org 0x800
end:
# x31 = 0x804
jal x31, test_fn_1
# test call/return with call stack
jal x1, test_fn_2
# test call stack by pushing values without return
# push 0x80c to call stack
jal x1, call_stack_1
# push 0x810 to call stack
call_stack_1:
jal x1, call_stack_2
# push 0x814 to call stack
call_stack_2:
jal x1, call_stack_3
call_stack_3:
ecall
test_fn_1:
# x21 = 0xcafef00d
li x22, 0xcafef00d
jalr x0, x31, 0
test_fn_2:
# x21 = x21 + 3 = 0xcafef010
addi x22, x22, 3
jalr x0, x1, 0
.section .data
.word 0x1234abcd
.word 0xbaadf00d
.word 0xcafed00d
.word 0xdeadbeef
.word 0xfacefeed
.word 0xaaaaaaaa
.word 0xbbbbbbbb
.word 0xcccccccc