#include "mips_arch.h" #if defined(_MIPS_ARCH_MIPS64R6) # define ddivu(rs,rt) # define mfqt(rd,rs,rt) ddivu rd,rs,rt # define mfrm(rd,rs,rt) dmodu rd,rs,rt #elif defined(_MIPS_ARCH_MIPS32R6) # define divu(rs,rt) # define mfqt(rd,rs,rt) divu rd,rs,rt # define mfrm(rd,rs,rt) modu rd,rs,rt #else # define ddivu(rs,rt) ddivu $0,rs,rt # define mfqt(rd,rs,rt) mflo rd # define mfrm(rd,rs,rt) mfhi rd #endif .rdata .asciiz "mips3.s, Version 1.2" .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov " .text .set noat .align 5 .globl bn_mul_add_words .ent bn_mul_add_words bn_mul_add_words: .set noreorder bgtz $6,bn_mul_add_words_internal move $2,$0 jr $31 move $4,$2 .end bn_mul_add_words .align 5 .ent bn_mul_add_words_internal bn_mul_add_words_internal: .set reorder li $3,-4 and $8,$6,$3 beqz $8,.L_bn_mul_add_words_tail .L_bn_mul_add_words_loop: ld $12,0($5) dmultu ($12,$7) ld $13,0($4) ld $14,8($5) ld $15,8($4) ld $8,2*8($5) ld $9,2*8($4) daddu $13,$2 sltu $2,$13,$2 # All manuals say it "compares 32-bit # values", but it seems to work fine # even on 64-bit registers. mflo ($1,$12,$7) mfhi ($12,$12,$7) daddu $13,$1 daddu $2,$12 dmultu ($14,$7) sltu $1,$13,$1 sd $13,0($4) daddu $2,$1 ld $10,3*8($5) ld $11,3*8($4) daddu $15,$2 sltu $2,$15,$2 mflo ($1,$14,$7) mfhi ($14,$14,$7) daddu $15,$1 daddu $2,$14 dmultu ($8,$7) sltu $1,$15,$1 sd $15,8($4) daddu $2,$1 subu $6,4 daddu $4,4*8 daddu $5,4*8 daddu $9,$2 sltu $2,$9,$2 mflo ($1,$8,$7) mfhi ($8,$8,$7) daddu $9,$1 daddu $2,$8 dmultu ($10,$7) sltu $1,$9,$1 sd $9,-2*8($4) daddu $2,$1 and $8,$6,$3 daddu $11,$2 sltu $2,$11,$2 mflo ($1,$10,$7) mfhi ($10,$10,$7) daddu $11,$1 daddu $2,$10 sltu $1,$11,$1 sd $11,-8($4) .set noreorder bgtz $8,.L_bn_mul_add_words_loop daddu $2,$1 beqz $6,.L_bn_mul_add_words_return nop .L_bn_mul_add_words_tail: .set reorder ld $12,0($5) dmultu ($12,$7) ld $13,0($4) subu $6,1 daddu $13,$2 sltu $2,$13,$2 mflo ($1,$12,$7) mfhi ($12,$12,$7) daddu $13,$1 daddu $2,$12 sltu $1,$13,$1 sd $13,0($4) daddu $2,$1 beqz $6,.L_bn_mul_add_words_return ld $12,8($5) dmultu ($12,$7) ld $13,8($4) subu $6,1 daddu $13,$2 sltu $2,$13,$2 mflo ($1,$12,$7) mfhi ($12,$12,$7) daddu $13,$1 daddu $2,$12 sltu $1,$13,$1 sd $13,8($4) daddu $2,$1 beqz $6,.L_bn_mul_add_words_return ld $12,2*8($5) dmultu ($12,$7) ld $13,2*8($4) daddu $13,$2 sltu $2,$13,$2 mflo ($1,$12,$7) mfhi ($12,$12,$7) daddu $13,$1 daddu $2,$12 sltu $1,$13,$1 sd $13,2*8($4) daddu $2,$1 .L_bn_mul_add_words_return: .set noreorder jr $31 move $4,$2 .end bn_mul_add_words_internal .align 5 .globl bn_mul_words .ent bn_mul_words bn_mul_words: .set noreorder bgtz $6,bn_mul_words_internal move $2,$0 jr $31 move $4,$2 .end bn_mul_words .align 5 .ent bn_mul_words_internal bn_mul_words_internal: .set reorder li $3,-4 and $8,$6,$3 beqz $8,.L_bn_mul_words_tail .L_bn_mul_words_loop: ld $12,0($5) dmultu ($12,$7) ld $14,8($5) ld $8,2*8($5) ld $10,3*8($5) mflo ($1,$12,$7) mfhi ($12,$12,$7) daddu $2,$1 sltu $13,$2,$1 dmultu ($14,$7) sd $2,0($4) daddu $2,$13,$12 subu $6,4 daddu $4,4*8 daddu $5,4*8 mflo ($1,$14,$7) mfhi ($14,$14,$7) daddu $2,$1 sltu $15,$2,$1 dmultu ($8,$7) sd $2,-3*8($4) daddu $2,$15,$14 mflo ($1,$8,$7) mfhi ($8,$8,$7) daddu $2,$1 sltu $9,$2,$1 dmultu ($10,$7) sd $2,-2*8($4) daddu $2,$9,$8 and $8,$6,$3 mflo ($1,$10,$7) mfhi ($10,$10,$7) daddu $2,$1 sltu $11,$2,$1 sd $2,-8($4) .set noreorder bgtz $8,.L_bn_mul_words_loop daddu $2,$11,$10 beqz $6,.L_bn_mul_words_return nop .L_bn_mul_words_tail: .set reorder ld $12,0($5) dmultu ($12,$7) subu $6,1 mflo ($1,$12,$7) mfhi ($12,$12,$7) daddu $2,$1 sltu $13,$2,$1 sd $2,0($4) daddu $2,$13,$12 beqz $6,.L_bn_mul_words_return ld $12,8($5) dmultu ($12,$7) subu $6,1 mflo ($1,$12,$7) mfhi ($12,$12,$7) daddu $2,$1 sltu $13,$2,$1 sd $2,8($4) daddu $2,$13,$12 beqz $6,.L_bn_mul_words_return ld $12,2*8($5) dmultu ($12,$7) mflo ($1,$12,$7) mfhi ($12,$12,$7) daddu $2,$1 sltu $13,$2,$1 sd $2,2*8($4) daddu $2,$13,$12 .L_bn_mul_words_return: .set noreorder jr $31 move $4,$2 .end bn_mul_words_internal .align 5 .globl bn_sqr_words .ent bn_sqr_words bn_sqr_words: .set noreorder bgtz $6,bn_sqr_words_internal move $2,$0 jr $31 move $4,$2 .end bn_sqr_words .align 5 .ent bn_sqr_words_internal bn_sqr_words_internal: .set reorder li $3,-4 and $8,$6,$3 beqz $8,.L_bn_sqr_words_tail .L_bn_sqr_words_loop: ld $12,0($5) dmultu ($12,$12) ld $14,8($5) ld $8,2*8($5) ld $10,3*8($5) mflo ($13,$12,$12) mfhi ($12,$12,$12) sd $13,0($4) sd $12,8($4) dmultu ($14,$14) subu $6,4 daddu $4,8*8 daddu $5,4*8 mflo ($15,$14,$14) mfhi ($14,$14,$14) sd $15,-6*8($4) sd $14,-5*8($4) dmultu ($8,$8) mflo ($9,$8,$8) mfhi ($8,$8,$8) sd $9,-4*8($4) sd $8,-3*8($4) dmultu ($10,$10) and $8,$6,$3 mflo ($11,$10,$10) mfhi ($10,$10,$10) sd $11,-2*8($4) .set noreorder bgtz $8,.L_bn_sqr_words_loop sd $10,-8($4) beqz $6,.L_bn_sqr_words_return nop .L_bn_sqr_words_tail: .set reorder ld $12,0($5) dmultu ($12,$12) subu $6,1 mflo ($13,$12,$12) mfhi ($12,$12,$12) sd $13,0($4) sd $12,8($4) beqz $6,.L_bn_sqr_words_return ld $12,8($5) dmultu ($12,$12) subu $6,1 mflo ($13,$12,$12) mfhi ($12,$12,$12) sd $13,2*8($4) sd $12,3*8($4) beqz $6,.L_bn_sqr_words_return ld $12,2*8($5) dmultu ($12,$12) mflo ($13,$12,$12) mfhi ($12,$12,$12) sd $13,4*8($4) sd $12,5*8($4) .L_bn_sqr_words_return: .set noreorder jr $31 move $4,$2 .end bn_sqr_words_internal .align 5 .globl bn_add_words .ent bn_add_words bn_add_words: .set noreorder bgtz $7,bn_add_words_internal move $2,$0 jr $31 move $4,$2 .end bn_add_words .align 5 .ent bn_add_words_internal bn_add_words_internal: .set reorder li $3,-4 and $1,$7,$3 beqz $1,.L_bn_add_words_tail .L_bn_add_words_loop: ld $12,0($5) ld $8,0($6) subu $7,4 ld $13,8($5) and $1,$7,$3 ld $14,2*8($5) daddu $6,4*8 ld $15,3*8($5) daddu $4,4*8 ld $9,-3*8($6) daddu $5,4*8 ld $10,-2*8($6) ld $11,-8($6) daddu $8,$12 sltu $24,$8,$12 daddu $12,$8,$2 sltu $2,$12,$8 sd $12,-4*8($4) daddu $2,$24 daddu $9,$13 sltu $25,$9,$13 daddu $13,$9,$2 sltu $2,$13,$9 sd $13,-3*8($4) daddu $2,$25 daddu $10,$14 sltu $24,$10,$14 daddu $14,$10,$2 sltu $2,$14,$10 sd $14,-2*8($4) daddu $2,$24 daddu $11,$15 sltu $25,$11,$15 daddu $15,$11,$2 sltu $2,$15,$11 sd $15,-8($4) .set noreorder bgtz $1,.L_bn_add_words_loop daddu $2,$25 beqz $7,.L_bn_add_words_return nop .L_bn_add_words_tail: .set reorder ld $12,0($5) ld $8,0($6) daddu $8,$12 subu $7,1 sltu $24,$8,$12 daddu $12,$8,$2 sltu $2,$12,$8 sd $12,0($4) daddu $2,$24 beqz $7,.L_bn_add_words_return ld $13,8($5) ld $9,8($6) daddu $9,$13 subu $7,1 sltu $25,$9,$13 daddu $13,$9,$2 sltu $2,$13,$9 sd $13,8($4) daddu $2,$25 beqz $7,.L_bn_add_words_return ld $14,2*8($5) ld $10,2*8($6) daddu $10,$14 sltu $24,$10,$14 daddu $14,$10,$2 sltu $2,$14,$10 sd $14,2*8($4) daddu $2,$24 .L_bn_add_words_return: .set noreorder jr $31 move $4,$2 .end bn_add_words_internal .align 5 .globl bn_sub_words .ent bn_sub_words bn_sub_words: .set noreorder bgtz $7,bn_sub_words_internal move $2,$0 jr $31 move $4,$0 .end bn_sub_words .align 5 .ent bn_sub_words_internal bn_sub_words_internal: .set reorder li $3,-4 and $1,$7,$3 beqz $1,.L_bn_sub_words_tail .L_bn_sub_words_loop: ld $12,0($5) ld $8,0($6) subu $7,4 ld $13,8($5) and $1,$7,$3 ld $14,2*8($5) daddu $6,4*8 ld $15,3*8($5) daddu $4,4*8 ld $9,-3*8($6) daddu $5,4*8 ld $10,-2*8($6) ld $11,-8($6) sltu $24,$12,$8 dsubu $8,$12,$8 dsubu $12,$8,$2 sgtu $2,$12,$8 sd $12,-4*8($4) daddu $2,$24 sltu $25,$13,$9 dsubu $9,$13,$9 dsubu $13,$9,$2 sgtu $2,$13,$9 sd $13,-3*8($4) daddu $2,$25 sltu $24,$14,$10 dsubu $10,$14,$10 dsubu $14,$10,$2 sgtu $2,$14,$10 sd $14,-2*8($4) daddu $2,$24 sltu $25,$15,$11 dsubu $11,$15,$11 dsubu $15,$11,$2 sgtu $2,$15,$11 sd $15,-8($4) .set noreorder bgtz $1,.L_bn_sub_words_loop daddu $2,$25 beqz $7,.L_bn_sub_words_return nop .L_bn_sub_words_tail: .set reorder ld $12,0($5) ld $8,0($6) subu $7,1 sltu $24,$12,$8 dsubu $8,$12,$8 dsubu $12,$8,$2 sgtu $2,$12,$8 sd $12,0($4) daddu $2,$24 beqz $7,.L_bn_sub_words_return ld $13,8($5) subu $7,1 ld $9,8($6) sltu $25,$13,$9 dsubu $9,$13,$9 dsubu $13,$9,$2 sgtu $2,$13,$9 sd $13,8($4) daddu $2,$25 beqz $7,.L_bn_sub_words_return ld $14,2*8($5) ld $10,2*8($6) sltu $24,$14,$10 dsubu $10,$14,$10 dsubu $14,$10,$2 sgtu $2,$14,$10 sd $14,2*8($4) daddu $2,$24 .L_bn_sub_words_return: .set noreorder jr $31 move $4,$2 .end bn_sub_words_internal #if 0 /* * The bn_div_3_words entry point is re-used for constant-time interface. * Implementation is retained as historical reference. */ .align 5 .globl bn_div_3_words .ent bn_div_3_words bn_div_3_words: .set noreorder move $7,$4 # we know that bn_div_words does not # touch $7, $10, $11 and preserves $6 # so that we can save two arguments # and return address in registers # instead of stack:-) ld $4,($7) move $10,$5 bne $4,$6,bn_div_3_words_internal ld $5,-8($7) li $2,-1 jr $31 move $4,$2 .end bn_div_3_words .align 5 .ent bn_div_3_words_internal bn_div_3_words_internal: .set reorder move $11,$31 bal bn_div_words_internal move $31,$11 dmultu ($10,$2) ld $14,-2*8($7) move $8,$0 mfhi ($13,$10,$2) mflo ($12,$10,$2) sltu $24,$13,$5 .L_bn_div_3_words_inner_loop: bnez $24,.L_bn_div_3_words_inner_loop_done sgeu $1,$14,$12 seq $25,$13,$5 and $1,$25 sltu $15,$12,$10 daddu $5,$6 dsubu $13,$15 dsubu $12,$10 sltu $24,$13,$5 sltu $8,$5,$6 or $24,$8 .set noreorder beqz $1,.L_bn_div_3_words_inner_loop dsubu $2,1 daddu $2,1 .set reorder .L_bn_div_3_words_inner_loop_done: .set noreorder jr $31 move $4,$2 .end bn_div_3_words_internal #endif .align 5 .globl bn_div_words .ent bn_div_words bn_div_words: .set noreorder bnez $6,bn_div_words_internal li $2,-1 # I would rather signal div-by-zero # which can be done with 'break 7' jr $31 move $4,$2 .end bn_div_words .align 5 .ent bn_div_words_internal bn_div_words_internal: move $3,$0 bltz $6,.L_bn_div_words_body move $25,$3 dsll $6,1 bgtz $6,.-4 addu $25,1 .set reorder negu $13,$25 li $14,-1 dsll $14,$13 and $14,$4 dsrl $1,$5,$13 .set noreorder beqz $14,.+12 nop break 6 # signal overflow .set reorder dsll $4,$25 dsll $5,$25 or $4,$1 .L_bn_div_words_body: dsrl $3,$6,4*8 # bits sgeu $1,$4,$6 .set noreorder beqz $1,.+12 nop dsubu $4,$6 .set reorder li $8,-1 dsrl $9,$4,4*8 # bits dsrl $8,4*8 # q=0xffffffff beq $3,$9,.L_bn_div_words_skip_div1 ddivu ($4,$3) mfqt ($8,$4,$3) .L_bn_div_words_skip_div1: dmultu ($6,$8) dsll $15,$4,4*8 # bits dsrl $1,$5,4*8 # bits or $15,$1 mflo ($12,$6,$8) mfhi ($13,$6,$8) .L_bn_div_words_inner_loop1: sltu $14,$15,$12 seq $24,$9,$13 sltu $1,$9,$13 and $14,$24 sltu $2,$12,$6 or $1,$14 .set noreorder beqz $1,.L_bn_div_words_inner_loop1_done dsubu $13,$2 dsubu $12,$6 b .L_bn_div_words_inner_loop1 dsubu $8,1 .set reorder .L_bn_div_words_inner_loop1_done: dsll $5,4*8 # bits dsubu $4,$15,$12 dsll $2,$8,4*8 # bits li $8,-1 dsrl $9,$4,4*8 # bits dsrl $8,4*8 # q=0xffffffff beq $3,$9,.L_bn_div_words_skip_div2 ddivu ($4,$3) mfqt ($8,$4,$3) .L_bn_div_words_skip_div2: dmultu ($6,$8) dsll $15,$4,4*8 # bits dsrl $1,$5,4*8 # bits or $15,$1 mflo ($12,$6,$8) mfhi ($13,$6,$8) .L_bn_div_words_inner_loop2: sltu $14,$15,$12 seq $24,$9,$13 sltu $1,$9,$13 and $14,$24 sltu $3,$12,$6 or $1,$14 .set noreorder beqz $1,.L_bn_div_words_inner_loop2_done dsubu $13,$3 dsubu $12,$6 b .L_bn_div_words_inner_loop2 dsubu $8,1 .set reorder .L_bn_div_words_inner_loop2_done: dsubu $4,$15,$12 or $2,$8 dsrl $3,$4,$25 # $3 contains remainder if anybody wants it dsrl $6,$25 # restore $6 .set noreorder move $5,$3 jr $31 move $4,$2 .end bn_div_words_internal .align 5 .globl bn_mul_comba8 .ent bn_mul_comba8 bn_mul_comba8: .set noreorder .frame $29,6*8,$31 .mask 0x003f0000,-8 dsubu $29,6*8 sd $21,5*8($29) sd $20,4*8($29) sd $19,3*8($29) sd $18,2*8($29) sd $17,1*8($29) sd $16,0*8($29) .set reorder ld $12,0($5) # If compiled with -mips3 option on # R5000 box assembler barks on this # 1ine with "should not have mult/div # as last instruction in bb (R10K # bug)" warning. If anybody out there # has a clue about how to circumvent # this do send me a note. # ld $8,0($6) ld $13,8($5) ld $14,2*8($5) dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); ld $15,3*8($5) ld $9,8($6) ld $10,2*8($6) ld $11,3*8($6) mflo ($2,$12,$8) mfhi ($3,$12,$8) ld $16,4*8($5) ld $18,5*8($5) dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); ld $20,6*8($5) ld $5,7*8($5) ld $17,4*8($6) ld $19,5*8($6) mflo ($24,$12,$9) mfhi ($25,$12,$9) daddu $3,$24 sltu $1,$3,$24 dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); daddu $7,$25,$1 ld $21,6*8($6) ld $6,7*8($6) sd $2,0($4) # r[0]=c1; mflo ($24,$13,$8) mfhi ($25,$13,$8) daddu $3,$24 sltu $1,$3,$24 dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); daddu $25,$1 daddu $7,$25 sltu $2,$7,$25 sd $3,8($4) # r[1]=c2; mflo ($24,$14,$8) mfhi ($25,$14,$8) daddu $7,$24 sltu $1,$7,$24 dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); daddu $25,$1 daddu $2,$25 mflo ($24,$13,$9) mfhi ($25,$13,$9) daddu $7,$24 sltu $1,$7,$24 dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $3,$2,$25 mflo ($24,$12,$10) mfhi ($25,$12,$10) daddu $7,$24 sltu $1,$7,$24 dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 sd $7,2*8($4) # r[2]=c3; mflo ($24,$12,$11) mfhi ($25,$12,$11) daddu $2,$24 sltu $1,$2,$24 dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $7,$3,$25 mflo ($24,$13,$10) mfhi ($25,$13,$10) daddu $2,$24 sltu $1,$2,$24 dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$14,$9) mfhi ($25,$14,$9) daddu $2,$24 sltu $1,$2,$24 dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$15,$8) mfhi ($25,$15,$8) daddu $2,$24 sltu $1,$2,$24 dmultu ($16,$8) # mul_add_c(a[4],b[0],c2,c3,c1); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 sd $2,3*8($4) # r[3]=c1; mflo ($24,$16,$8) mfhi ($25,$16,$8) daddu $3,$24 sltu $1,$3,$24 dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $2,$7,$25 mflo ($24,$15,$9) mfhi ($25,$15,$9) daddu $3,$24 sltu $1,$3,$24 dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$14,$10) mfhi ($25,$14,$10) daddu $3,$24 sltu $1,$3,$24 dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$13,$11) mfhi ($25,$13,$11) daddu $3,$24 sltu $1,$3,$24 dmultu ($12,$17) # mul_add_c(a[0],b[4],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$12,$17) mfhi ($25,$12,$17) daddu $3,$24 sltu $1,$3,$24 dmultu ($12,$19) # mul_add_c(a[0],b[5],c3,c1,c2); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 sd $3,4*8($4) # r[4]=c2; mflo ($24,$12,$19) mfhi ($25,$12,$19) daddu $7,$24 sltu $1,$7,$24 dmultu ($13,$17) # mul_add_c(a[1],b[4],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $3,$2,$25 mflo ($24,$13,$17) mfhi ($25,$13,$17) daddu $7,$24 sltu $1,$7,$24 dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 mflo ($24,$14,$11) mfhi ($25,$14,$11) daddu $7,$24 sltu $1,$7,$24 dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 mflo ($24,$15,$10) mfhi ($25,$15,$10) daddu $7,$24 sltu $1,$7,$24 dmultu ($16,$9) # mul_add_c(a[4],b[1],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 mflo ($24,$16,$9) mfhi ($25,$16,$9) daddu $7,$24 sltu $1,$7,$24 dmultu ($18,$8) # mul_add_c(a[5],b[0],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 mflo ($24,$18,$8) mfhi ($25,$18,$8) daddu $7,$24 sltu $1,$7,$24 dmultu ($20,$8) # mul_add_c(a[6],b[0],c1,c2,c3); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 sd $7,5*8($4) # r[5]=c3; mflo ($24,$20,$8) mfhi ($25,$20,$8) daddu $2,$24 sltu $1,$2,$24 dmultu ($18,$9) # mul_add_c(a[5],b[1],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $7,$3,$25 mflo ($24,$18,$9) mfhi ($25,$18,$9) daddu $2,$24 sltu $1,$2,$24 dmultu ($16,$10) # mul_add_c(a[4],b[2],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$16,$10) mfhi ($25,$16,$10) daddu $2,$24 sltu $1,$2,$24 dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$15,$11) mfhi ($25,$15,$11) daddu $2,$24 sltu $1,$2,$24 dmultu ($14,$17) # mul_add_c(a[2],b[4],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$14,$17) mfhi ($25,$14,$17) daddu $2,$24 sltu $1,$2,$24 dmultu ($13,$19) # mul_add_c(a[1],b[5],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$13,$19) mfhi ($25,$13,$19) daddu $2,$24 sltu $1,$2,$24 dmultu ($12,$21) # mul_add_c(a[0],b[6],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$12,$21) mfhi ($25,$12,$21) daddu $2,$24 sltu $1,$2,$24 dmultu ($12,$6) # mul_add_c(a[0],b[7],c2,c3,c1); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 sd $2,6*8($4) # r[6]=c1; mflo ($24,$12,$6) mfhi ($25,$12,$6) daddu $3,$24 sltu $1,$3,$24 dmultu ($13,$21) # mul_add_c(a[1],b[6],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $2,$7,$25 mflo ($24,$13,$21) mfhi ($25,$13,$21) daddu $3,$24 sltu $1,$3,$24 dmultu ($14,$19) # mul_add_c(a[2],b[5],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$14,$19) mfhi ($25,$14,$19) daddu $3,$24 sltu $1,$3,$24 dmultu ($15,$17) # mul_add_c(a[3],b[4],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$15,$17) mfhi ($25,$15,$17) daddu $3,$24 sltu $1,$3,$24 dmultu ($16,$11) # mul_add_c(a[4],b[3],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$16,$11) mfhi ($25,$16,$11) daddu $3,$24 sltu $1,$3,$24 dmultu ($18,$10) # mul_add_c(a[5],b[2],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$18,$10) mfhi ($25,$18,$10) daddu $3,$24 sltu $1,$3,$24 dmultu ($20,$9) # mul_add_c(a[6],b[1],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$20,$9) mfhi ($25,$20,$9) daddu $3,$24 sltu $1,$3,$24 dmultu ($5,$8) # mul_add_c(a[7],b[0],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$5,$8) mfhi ($25,$5,$8) daddu $3,$24 sltu $1,$3,$24 dmultu ($5,$9) # mul_add_c(a[7],b[1],c3,c1,c2); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 sd $3,7*8($4) # r[7]=c2; mflo ($24,$5,$9) mfhi ($25,$5,$9) daddu $7,$24 sltu $1,$7,$24 dmultu ($20,$10) # mul_add_c(a[6],b[2],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $3,$2,$25 mflo ($24,$20,$10) mfhi ($25,$20,$10) daddu $7,$24 sltu $1,$7,$24 dmultu ($18,$11) # mul_add_c(a[5],b[3],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 mflo ($24,$18,$11) mfhi ($25,$18,$11) daddu $7,$24 sltu $1,$7,$24 dmultu ($16,$17) # mul_add_c(a[4],b[4],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 mflo ($24,$16,$17) mfhi ($25,$16,$17) daddu $7,$24 sltu $1,$7,$24 dmultu ($15,$19) # mul_add_c(a[3],b[5],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 mflo ($24,$15,$19) mfhi ($25,$15,$19) daddu $7,$24 sltu $1,$7,$24 dmultu ($14,$21) # mul_add_c(a[2],b[6],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 mflo ($24,$14,$21) mfhi ($25,$14,$21) daddu $7,$24 sltu $1,$7,$24 dmultu ($13,$6) # mul_add_c(a[1],b[7],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 mflo ($24,$13,$6) mfhi ($25,$13,$6) daddu $7,$24 sltu $1,$7,$24 dmultu ($14,$6) # mul_add_c(a[2],b[7],c1,c2,c3); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 sd $7,8*8($4) # r[8]=c3; mflo ($24,$14,$6) mfhi ($25,$14,$6) daddu $2,$24 sltu $1,$2,$24 dmultu ($15,$21) # mul_add_c(a[3],b[6],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $7,$3,$25 mflo ($24,$15,$21) mfhi ($25,$15,$21) daddu $2,$24 sltu $1,$2,$24 dmultu ($16,$19) # mul_add_c(a[4],b[5],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$16,$19) mfhi ($25,$16,$19) daddu $2,$24 sltu $1,$2,$24 dmultu ($18,$17) # mul_add_c(a[5],b[4],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$18,$17) mfhi ($25,$18,$17) daddu $2,$24 sltu $1,$2,$24 dmultu ($20,$11) # mul_add_c(a[6],b[3],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$20,$11) mfhi ($25,$20,$11) daddu $2,$24 sltu $1,$2,$24 dmultu ($5,$10) # mul_add_c(a[7],b[2],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$5,$10) mfhi ($25,$5,$10) daddu $2,$24 sltu $1,$2,$24 dmultu ($5,$11) # mul_add_c(a[7],b[3],c2,c3,c1); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 sd $2,9*8($4) # r[9]=c1; mflo ($24,$5,$11) mfhi ($25,$5,$11) daddu $3,$24 sltu $1,$3,$24 dmultu ($20,$17) # mul_add_c(a[6],b[4],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $2,$7,$25 mflo ($24,$20,$17) mfhi ($25,$20,$17) daddu $3,$24 sltu $1,$3,$24 dmultu ($18,$19) # mul_add_c(a[5],b[5],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$18,$19) mfhi ($25,$18,$19) daddu $3,$24 sltu $1,$3,$24 dmultu ($16,$21) # mul_add_c(a[4],b[6],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$16,$21) mfhi ($25,$16,$21) daddu $3,$24 sltu $1,$3,$24 dmultu ($15,$6) # mul_add_c(a[3],b[7],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$15,$6) mfhi ($25,$15,$6) daddu $3,$24 sltu $1,$3,$24 dmultu ($16,$6) # mul_add_c(a[4],b[7],c3,c1,c2); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 sd $3,10*8($4) # r[10]=c2; mflo ($24,$16,$6) mfhi ($25,$16,$6) daddu $7,$24 sltu $1,$7,$24 dmultu ($18,$21) # mul_add_c(a[5],b[6],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $3,$2,$25 mflo ($24,$18,$21) mfhi ($25,$18,$21) daddu $7,$24 sltu $1,$7,$24 dmultu ($20,$19) # mul_add_c(a[6],b[5],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 mflo ($24,$20,$19) mfhi ($25,$20,$19) daddu $7,$24 sltu $1,$7,$24 dmultu ($5,$17) # mul_add_c(a[7],b[4],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 mflo ($24,$5,$17) mfhi ($25,$5,$17) daddu $7,$24 sltu $1,$7,$24 dmultu ($5,$19) # mul_add_c(a[7],b[5],c1,c2,c3); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 sd $7,11*8($4) # r[11]=c3; mflo ($24,$5,$19) mfhi ($25,$5,$19) daddu $2,$24 sltu $1,$2,$24 dmultu ($20,$21) # mul_add_c(a[6],b[6],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $7,$3,$25 mflo ($24,$20,$21) mfhi ($25,$20,$21) daddu $2,$24 sltu $1,$2,$24 dmultu ($18,$6) # mul_add_c(a[5],b[7],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$18,$6) mfhi ($25,$18,$6) daddu $2,$24 sltu $1,$2,$24 dmultu ($20,$6) # mul_add_c(a[6],b[7],c2,c3,c1); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 sd $2,12*8($4) # r[12]=c1; mflo ($24,$20,$6) mfhi ($25,$20,$6) daddu $3,$24 sltu $1,$3,$24 dmultu ($5,$21) # mul_add_c(a[7],b[6],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $2,$7,$25 mflo ($24,$5,$21) mfhi ($25,$5,$21) daddu $3,$24 sltu $1,$3,$24 dmultu ($5,$6) # mul_add_c(a[7],b[7],c3,c1,c2); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 sd $3,13*8($4) # r[13]=c2; mflo ($24,$5,$6) mfhi ($25,$5,$6) daddu $7,$24 sltu $1,$7,$24 daddu $25,$1 daddu $2,$25 sd $7,14*8($4) # r[14]=c3; sd $2,15*8($4) # r[15]=c1; .set noreorder ld $21,5*8($29) ld $20,4*8($29) ld $19,3*8($29) ld $18,2*8($29) ld $17,1*8($29) ld $16,0*8($29) jr $31 daddu $29,6*8 .end bn_mul_comba8 .align 5 .globl bn_mul_comba4 .ent bn_mul_comba4 bn_mul_comba4: .set reorder ld $12,0($5) ld $8,0($6) ld $13,8($5) ld $14,2*8($5) dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); ld $15,3*8($5) ld $9,8($6) ld $10,2*8($6) ld $11,3*8($6) mflo ($2,$12,$8) mfhi ($3,$12,$8) sd $2,0($4) dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); mflo ($24,$12,$9) mfhi ($25,$12,$9) daddu $3,$24 sltu $1,$3,$24 dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); daddu $7,$25,$1 mflo ($24,$13,$8) mfhi ($25,$13,$8) daddu $3,$24 sltu $1,$3,$24 dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); daddu $25,$1 daddu $7,$25 sltu $2,$7,$25 sd $3,8($4) mflo ($24,$14,$8) mfhi ($25,$14,$8) daddu $7,$24 sltu $1,$7,$24 dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); daddu $25,$1 daddu $2,$25 mflo ($24,$13,$9) mfhi ($25,$13,$9) daddu $7,$24 sltu $1,$7,$24 dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $3,$2,$25 mflo ($24,$12,$10) mfhi ($25,$12,$10) daddu $7,$24 sltu $1,$7,$24 dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 sd $7,2*8($4) mflo ($24,$12,$11) mfhi ($25,$12,$11) daddu $2,$24 sltu $1,$2,$24 dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $7,$3,$25 mflo ($24,$13,$10) mfhi ($25,$13,$10) daddu $2,$24 sltu $1,$2,$24 dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$14,$9) mfhi ($25,$14,$9) daddu $2,$24 sltu $1,$2,$24 dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 mflo ($24,$15,$8) mfhi ($25,$15,$8) daddu $2,$24 sltu $1,$2,$24 dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 sd $2,3*8($4) mflo ($24,$15,$9) mfhi ($25,$15,$9) daddu $3,$24 sltu $1,$3,$24 dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $2,$7,$25 mflo ($24,$14,$10) mfhi ($25,$14,$10) daddu $3,$24 sltu $1,$3,$24 dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 mflo ($24,$13,$11) mfhi ($25,$13,$11) daddu $3,$24 sltu $1,$3,$24 dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 sd $3,4*8($4) mflo ($24,$14,$11) mfhi ($25,$14,$11) daddu $7,$24 sltu $1,$7,$24 dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); daddu $25,$1 daddu $2,$25 sltu $3,$2,$25 mflo ($24,$15,$10) mfhi ($25,$15,$10) daddu $7,$24 sltu $1,$7,$24 dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 sd $7,5*8($4) mflo ($24,$15,$11) mfhi ($25,$15,$11) daddu $2,$24 sltu $1,$2,$24 daddu $25,$1 daddu $3,$25 sd $2,6*8($4) sd $3,7*8($4) .set noreorder jr $31 nop .end bn_mul_comba4 .align 5 .globl bn_sqr_comba8 .ent bn_sqr_comba8 bn_sqr_comba8: .set reorder ld $12,0($5) ld $13,8($5) ld $14,2*8($5) ld $15,3*8($5) dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); ld $8,4*8($5) ld $9,5*8($5) ld $10,6*8($5) ld $11,7*8($5) mflo ($2,$12,$12) mfhi ($3,$12,$12) sd $2,0($4) dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); mflo ($24,$12,$13) mfhi ($25,$12,$13) slt $2,$25,$0 dsll $25,1 dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); slt $6,$24,$0 daddu $25,$6 dsll $24,1 daddu $3,$24 sltu $1,$3,$24 daddu $7,$25,$1 sd $3,8($4) sltu $1,$7,$25 daddu $2,$1 mflo ($24,$14,$12) mfhi ($25,$14,$12) daddu $7,$24 sltu $1,$7,$24 dmultu ($13,$13) # forward multiplication daddu $7,$24 daddu $1,$25 sltu $24,$7,$24 daddu $2,$1 daddu $25,$24 sltu $3,$2,$1 daddu $2,$25 sltu $25,$2,$25 daddu $3,$25 mflo ($24,$13,$13) mfhi ($25,$13,$13) daddu $7,$24 sltu $1,$7,$24 dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 sd $7,2*8($4) mflo ($24,$12,$15) mfhi ($25,$12,$15) daddu $2,$24 sltu $1,$2,$24 dmultu ($13,$14) # forward multiplication daddu $2,$24 daddu $1,$25 sltu $24,$2,$24 daddu $3,$1 daddu $25,$24 sltu $7,$3,$1 daddu $3,$25 sltu $25,$3,$25 daddu $7,$25 mflo ($24,$13,$14) mfhi ($25,$13,$14) daddu $2,$24 sltu $1,$2,$24 dmultu ($8,$12) # forward multiplication daddu $2,$24 daddu $1,$25 sltu $24,$2,$24 daddu $3,$1 daddu $25,$24 sltu $1,$3,$1 daddu $3,$25 daddu $7,$1 sltu $25,$3,$25 daddu $7,$25 mflo ($24,$8,$12) mfhi ($25,$8,$12) sd $2,3*8($4) daddu $3,$24 sltu $1,$3,$24 dmultu ($15,$13) # forward multiplication daddu $3,$24 daddu $1,$25 sltu $24,$3,$24 daddu $7,$1 daddu $25,$24 sltu $2,$7,$1 daddu $7,$25 sltu $25,$7,$25 daddu $2,$25 mflo ($24,$15,$13) mfhi ($25,$15,$13) daddu $3,$24 sltu $1,$3,$24 dmultu ($14,$14) # forward multiplication daddu $3,$24 daddu $1,$25 sltu $24,$3,$24 daddu $7,$1 daddu $25,$24 sltu $1,$7,$1 daddu $7,$25 daddu $2,$1 sltu $25,$7,$25 daddu $2,$25 mflo ($24,$14,$14) mfhi ($25,$14,$14) daddu $3,$24 sltu $1,$3,$24 dmultu ($12,$9) # mul_add_c2(a[0],b[5],c3,c1,c2); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 sd $3,4*8($4) mflo ($24,$12,$9) mfhi ($25,$12,$9) daddu $7,$24 sltu $1,$7,$24 dmultu ($13,$8) # forward multiplication daddu $7,$24 daddu $1,$25 sltu $24,$7,$24 daddu $2,$1 daddu $25,$24 sltu $3,$2,$1 daddu $2,$25 sltu $25,$2,$25 daddu $3,$25 mflo ($24,$13,$8) mfhi ($25,$13,$8) daddu $7,$24 sltu $1,$7,$24 dmultu ($14,$15) # forward multiplication daddu $7,$24 daddu $1,$25 sltu $24,$7,$24 daddu $2,$1 daddu $25,$24 sltu $1,$2,$1 daddu $2,$25 daddu $3,$1 sltu $25,$2,$25 daddu $3,$25 mflo ($24,$14,$15) mfhi ($25,$14,$15) daddu $7,$24 sltu $1,$7,$24 dmultu ($10,$12) # forward multiplication daddu $7,$24 daddu $1,$25 sltu $24,$7,$24 daddu $2,$1 daddu $25,$24 sltu $1,$2,$1 daddu $2,$25 daddu $3,$1 sltu $25,$2,$25 daddu $3,$25 mflo ($24,$10,$12) mfhi ($25,$10,$12) sd $7,5*8($4) daddu $2,$24 sltu $1,$2,$24 dmultu ($9,$13) # forward multiplication daddu $2,$24 daddu $1,$25 sltu $24,$2,$24 daddu $3,$1 daddu $25,$24 sltu $7,$3,$1 daddu $3,$25 sltu $25,$3,$25 daddu $7,$25 mflo ($24,$9,$13) mfhi ($25,$9,$13) daddu $2,$24 sltu $1,$2,$24 dmultu ($8,$14) # forward multiplication daddu $2,$24 daddu $1,$25 sltu $24,$2,$24 daddu $3,$1 daddu $25,$24 sltu $1,$3,$1 daddu $3,$25 daddu $7,$1 sltu $25,$3,$25 daddu $7,$25 mflo ($24,$8,$14) mfhi ($25,$8,$14) daddu $2,$24 sltu $1,$2,$24 dmultu ($15,$15) # forward multiplication daddu $2,$24 daddu $1,$25 sltu $24,$2,$24 daddu $3,$1 daddu $25,$24 sltu $1,$3,$1 daddu $3,$25 daddu $7,$1 sltu $25,$3,$25 daddu $7,$25 mflo ($24,$15,$15) mfhi ($25,$15,$15) daddu $2,$24 sltu $1,$2,$24 dmultu ($12,$11) # mul_add_c2(a[0],b[7],c2,c3,c1); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 sd $2,6*8($4) mflo ($24,$12,$11) mfhi ($25,$12,$11) daddu $3,$24 sltu $1,$3,$24 dmultu ($13,$10) # forward multiplication daddu $3,$24 daddu $1,$25 sltu $24,$3,$24 daddu $7,$1 daddu $25,$24 sltu $2,$7,$1 daddu $7,$25 sltu $25,$7,$25 daddu $2,$25 mflo ($24,$13,$10) mfhi ($25,$13,$10) daddu $3,$24 sltu $1,$3,$24 dmultu ($14,$9) # forward multiplication daddu $3,$24 daddu $1,$25 sltu $24,$3,$24 daddu $7,$1 daddu $25,$24 sltu $1,$7,$1 daddu $7,$25 daddu $2,$1 sltu $25,$7,$25 daddu $2,$25 mflo ($24,$14,$9) mfhi ($25,$14,$9) daddu $3,$24 sltu $1,$3,$24 dmultu ($15,$8) # forward multiplication daddu $3,$24 daddu $1,$25 sltu $24,$3,$24 daddu $7,$1 daddu $25,$24 sltu $1,$7,$1 daddu $7,$25 daddu $2,$1 sltu $25,$7,$25 daddu $2,$25 mflo ($24,$15,$8) mfhi ($25,$15,$8) daddu $3,$24 sltu $1,$3,$24 dmultu ($11,$13) # forward multiplication daddu $3,$24 daddu $1,$25 sltu $24,$3,$24 daddu $7,$1 daddu $25,$24 sltu $1,$7,$1 daddu $7,$25 daddu $2,$1 sltu $25,$7,$25 daddu $2,$25 mflo ($24,$11,$13) mfhi ($25,$11,$13) sd $3,7*8($4) daddu $7,$24 sltu $1,$7,$24 dmultu ($10,$14) # forward multiplication daddu $7,$24 daddu $1,$25 sltu $24,$7,$24 daddu $2,$1 daddu $25,$24 sltu $3,$2,$1 daddu $2,$25 sltu $25,$2,$25 daddu $3,$25 mflo ($24,$10,$14) mfhi ($25,$10,$14) daddu $7,$24 sltu $1,$7,$24 dmultu ($9,$15) # forward multiplication daddu $7,$24 daddu $1,$25 sltu $24,$7,$24 daddu $2,$1 daddu $25,$24 sltu $1,$2,$1 daddu $2,$25 daddu $3,$1 sltu $25,$2,$25 daddu $3,$25 mflo ($24,$9,$15) mfhi ($25,$9,$15) daddu $7,$24 sltu $1,$7,$24 dmultu ($8,$8) # forward multiplication daddu $7,$24 daddu $1,$25 sltu $24,$7,$24 daddu $2,$1 daddu $25,$24 sltu $1,$2,$1 daddu $2,$25 daddu $3,$1 sltu $25,$2,$25 daddu $3,$25 mflo ($24,$8,$8) mfhi ($25,$8,$8) daddu $7,$24 sltu $1,$7,$24 dmultu ($14,$11) # mul_add_c2(a[2],b[7],c1,c2,c3); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 sd $7,8*8($4) mflo ($24,$14,$11) mfhi ($25,$14,$11) daddu $2,$24 sltu $1,$2,$24 dmultu ($15,$10) # forward multiplication daddu $2,$24 daddu $1,$25 sltu $24,$2,$24 daddu $3,$1 daddu $25,$24 sltu $7,$3,$1 daddu $3,$25 sltu $25,$3,$25 daddu $7,$25 mflo ($24,$15,$10) mfhi ($25,$15,$10) daddu $2,$24 sltu $1,$2,$24 dmultu ($8,$9) # forward multiplication daddu $2,$24 daddu $1,$25 sltu $24,$2,$24 daddu $3,$1 daddu $25,$24 sltu $1,$3,$1 daddu $3,$25 daddu $7,$1 sltu $25,$3,$25 daddu $7,$25 mflo ($24,$8,$9) mfhi ($25,$8,$9) daddu $2,$24 sltu $1,$2,$24 dmultu ($11,$15) # forward multiplication daddu $2,$24 daddu $1,$25 sltu $24,$2,$24 daddu $3,$1 daddu $25,$24 sltu $1,$3,$1 daddu $3,$25 daddu $7,$1 sltu $25,$3,$25 daddu $7,$25 mflo ($24,$11,$15) mfhi ($25,$11,$15) sd $2,9*8($4) daddu $3,$24 sltu $1,$3,$24 dmultu ($10,$8) # forward multiplication daddu $3,$24 daddu $1,$25 sltu $24,$3,$24 daddu $7,$1 daddu $25,$24 sltu $2,$7,$1 daddu $7,$25 sltu $25,$7,$25 daddu $2,$25 mflo ($24,$10,$8) mfhi ($25,$10,$8) daddu $3,$24 sltu $1,$3,$24 dmultu ($9,$9) # forward multiplication daddu $3,$24 daddu $1,$25 sltu $24,$3,$24 daddu $7,$1 daddu $25,$24 sltu $1,$7,$1 daddu $7,$25 daddu $2,$1 sltu $25,$7,$25 daddu $2,$25 mflo ($24,$9,$9) mfhi ($25,$9,$9) daddu $3,$24 sltu $1,$3,$24 dmultu ($8,$11) # mul_add_c2(a[4],b[7],c3,c1,c2); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 sd $3,10*8($4) mflo ($24,$8,$11) mfhi ($25,$8,$11) daddu $7,$24 sltu $1,$7,$24 dmultu ($9,$10) # forward multiplication daddu $7,$24 daddu $1,$25 sltu $24,$7,$24 daddu $2,$1 daddu $25,$24 sltu $3,$2,$1 daddu $2,$25 sltu $25,$2,$25 daddu $3,$25 mflo ($24,$9,$10) mfhi ($25,$9,$10) daddu $7,$24 sltu $1,$7,$24 dmultu ($11,$9) # forward multiplication daddu $7,$24 daddu $1,$25 sltu $24,$7,$24 daddu $2,$1 daddu $25,$24 sltu $1,$2,$1 daddu $2,$25 daddu $3,$1 sltu $25,$2,$25 daddu $3,$25 mflo ($24,$11,$9) mfhi ($25,$11,$9) sd $7,11*8($4) daddu $2,$24 sltu $1,$2,$24 dmultu ($10,$10) # forward multiplication daddu $2,$24 daddu $1,$25 sltu $24,$2,$24 daddu $3,$1 daddu $25,$24 sltu $7,$3,$1 daddu $3,$25 sltu $25,$3,$25 daddu $7,$25 mflo ($24,$10,$10) mfhi ($25,$10,$10) daddu $2,$24 sltu $1,$2,$24 dmultu ($10,$11) # mul_add_c2(a[6],b[7],c2,c3,c1); daddu $25,$1 daddu $3,$25 sltu $1,$3,$25 daddu $7,$1 sd $2,12*8($4) mflo ($24,$10,$11) mfhi ($25,$10,$11) daddu $3,$24 sltu $1,$3,$24 dmultu ($11,$11) # forward multiplication daddu $3,$24 daddu $1,$25 sltu $24,$3,$24 daddu $7,$1 daddu $25,$24 sltu $2,$7,$1 daddu $7,$25 sltu $25,$7,$25 daddu $2,$25 mflo ($24,$11,$11) mfhi ($25,$11,$11) sd $3,13*8($4) daddu $7,$24 sltu $1,$7,$24 daddu $25,$1 daddu $2,$25 sd $7,14*8($4) sd $2,15*8($4) .set noreorder jr $31 nop .end bn_sqr_comba8 .align 5 .globl bn_sqr_comba4 .ent bn_sqr_comba4 bn_sqr_comba4: .set reorder ld $12,0($5) ld $13,8($5) dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); ld $14,2*8($5) ld $15,3*8($5) mflo ($2,$12,$12) mfhi ($3,$12,$12) sd $2,0($4) dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); mflo ($24,$12,$13) mfhi ($25,$12,$13) slt $2,$25,$0 dsll $25,1 dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); slt $6,$24,$0 daddu $25,$6 dsll $24,1 daddu $3,$24 sltu $1,$3,$24 daddu $7,$25,$1 sd $3,8($4) sltu $1,$7,$25 daddu $2,$1 mflo ($24,$14,$12) mfhi ($25,$14,$12) daddu $7,$24 sltu $1,$7,$24 dmultu ($13,$13) # forward multiplication daddu $7,$24 daddu $1,$25 sltu $24,$7,$24 daddu $2,$1 daddu $25,$24 sltu $3,$2,$1 daddu $2,$25 sltu $25,$2,$25 daddu $3,$25 mflo ($24,$13,$13) mfhi ($25,$13,$13) daddu $7,$24 sltu $1,$7,$24 dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); daddu $25,$1 daddu $2,$25 sltu $1,$2,$25 daddu $3,$1 sd $7,2*8($4) mflo ($24,$12,$15) mfhi ($25,$12,$15) daddu $2,$24 sltu $1,$2,$24 dmultu ($13,$14) # forward multiplication daddu $2,$24 daddu $1,$25 sltu $24,$2,$24 daddu $3,$1 daddu $25,$24 sltu $7,$3,$1 daddu $3,$25 sltu $25,$3,$25 daddu $7,$25 mflo ($24,$13,$14) mfhi ($25,$13,$14) daddu $2,$24 sltu $1,$2,$24 dmultu ($15,$13) # forward multiplication daddu $2,$24 daddu $1,$25 sltu $24,$2,$24 daddu $3,$1 daddu $25,$24 sltu $1,$3,$1 daddu $3,$25 daddu $7,$1 sltu $25,$3,$25 daddu $7,$25 mflo ($24,$15,$13) mfhi ($25,$15,$13) sd $2,3*8($4) daddu $3,$24 sltu $1,$3,$24 dmultu ($14,$14) # forward multiplication daddu $3,$24 daddu $1,$25 sltu $24,$3,$24 daddu $7,$1 daddu $25,$24 sltu $2,$7,$1 daddu $7,$25 sltu $25,$7,$25 daddu $2,$25 mflo ($24,$14,$14) mfhi ($25,$14,$14) daddu $3,$24 sltu $1,$3,$24 dmultu ($14,$15) # mul_add_c2(a[2],b[3],c3,c1,c2); daddu $25,$1 daddu $7,$25 sltu $1,$7,$25 daddu $2,$1 sd $3,4*8($4) mflo ($24,$14,$15) mfhi ($25,$14,$15) daddu $7,$24 sltu $1,$7,$24 dmultu ($15,$15) # forward multiplication daddu $7,$24 daddu $1,$25 sltu $24,$7,$24 daddu $2,$1 daddu $25,$24 sltu $3,$2,$1 daddu $2,$25 sltu $25,$2,$25 daddu $3,$25 mflo ($24,$15,$15) mfhi ($25,$15,$15) sd $7,5*8($4) daddu $2,$24 sltu $1,$2,$24 daddu $25,$1 daddu $3,$25 sd $2,6*8($4) sd $3,7*8($4) .set noreorder jr $31 nop .end bn_sqr_comba4