#ifndef __ASSEMBLER__ # define __ASSEMBLER__ 1 #endif #include "crypto/sparc_arch.h" #ifdef __arch64__ .register %g2,#scratch .register %g3,#scratch #endif .text .align 32 .globl des_t4_key_expand .type des_t4_key_expand,#function des_t4_key_expand: andcc %o0, 0x7, %g0 .word 0x91b20300 !alignaddr %o0,%g0,%o0 bz,pt %icc, 1f ldd [%o0 + 0x00], %f0 ldd [%o0 + 0x08], %f2 .word 0x81b00902 !faligndata %f0,%f2,%f0 1: .word 0x81b026c0 !des_kexpand %f0,0,%f0, .word 0x85b026c1 !des_kexpand %f0,1,%f2, std %f0, [%o1 + 0x00] .word 0x8db0a6c3 !des_kexpand %f2,3,%f6, std %f2, [%o1 + 0x08] .word 0x89b0a6c2 !des_kexpand %f2,2,%f4, .word 0x95b1a6c3 !des_kexpand %f6,3,%f10, std %f6, [%o1 + 0x18] .word 0x91b1a6c2 !des_kexpand %f6,2,%f8, std %f4, [%o1 + 0x10] .word 0x9db2a6c3 !des_kexpand %f10,3,%f14, std %f10, [%o1 + 0x28] .word 0x99b2a6c2 !des_kexpand %f10,2,%f12, std %f8, [%o1 + 0x20] .word 0xa1b3a6c1 !des_kexpand %f14,1,%f16, std %f14, [%o1 + 0x38] .word 0xa9b426c3 !des_kexpand %f16,3,%f20, std %f12, [%o1 + 0x30] .word 0xa5b426c2 !des_kexpand %f16,2,%f18, std %f16, [%o1 + 0x40] .word 0xb1b526c3 !des_kexpand %f20,3,%f24, std %f20, [%o1 + 0x50] .word 0xadb526c2 !des_kexpand %f20,2,%f22, std %f18, [%o1 + 0x48] .word 0xb9b626c3 !des_kexpand %f24,3,%f28, std %f24, [%o1 + 0x60] .word 0xb5b626c2 !des_kexpand %f24,2,%f26, std %f22, [%o1 + 0x58] .word 0xbdb726c1 !des_kexpand %f28,1,%f30, std %f28, [%o1 + 0x70] std %f26, [%o1 + 0x68] retl std %f30, [%o1 + 0x78] .size des_t4_key_expand,.-des_t4_key_expand .globl des_t4_cbc_encrypt .align 32 des_t4_cbc_encrypt: cmp %o2, 0 be,pn SIZE_T_CC, .Lcbc_abort srln %o2, 0, %o2 ! needed on v8+, "nop" on v9 ld [%o4 + 0], %f0 ! load ivec ld [%o4 + 4], %f1 and %o0, 7, %g1 andn %o0, 7, %o0 sll %g1, 3, %g1 mov 0xff, %g3 prefetch [%o0], 20 prefetch [%o0 + 63], 20 sub %g0, %g1, %g2 and %o1, 7, %g4 .word 0x93b24340 !alignaddrl %o1,%g0,%o1 srl %g3, %g4, %g3 srlx %o2, 3, %o2 movrz %g4, 0, %g3 prefetch [%o1], 22 ldd [%o3 + 0x00], %f4 ! load key schedule ldd [%o3 + 0x08], %f6 ldd [%o3 + 0x10], %f8 ldd [%o3 + 0x18], %f10 ldd [%o3 + 0x20], %f12 ldd [%o3 + 0x28], %f14 ldd [%o3 + 0x30], %f16 ldd [%o3 + 0x38], %f18 ldd [%o3 + 0x40], %f20 ldd [%o3 + 0x48], %f22 ldd [%o3 + 0x50], %f24 ldd [%o3 + 0x58], %f26 ldd [%o3 + 0x60], %f28 ldd [%o3 + 0x68], %f30 ldd [%o3 + 0x70], %f32 ldd [%o3 + 0x78], %f34 .Ldes_cbc_enc_loop: ldx [%o0 + 0], %g4 brz,pt %g1, 4f nop ldx [%o0 + 8], %g5 sllx %g4, %g1, %g4 srlx %g5, %g2, %g5 or %g5, %g4, %g4 4: .word 0x85b02304 !movxtod %g4,%f2 prefetch [%o0 + 8+63], 20 add %o0, 8, %o0 .word 0x81b08d80 !fxor %f2,%f0,%f0 ! ^= ivec prefetch [%o1 + 63], 22 .word 0x81b02680 !des_ip %f0,%f0,, .word 0x80c90126 !des_round %f4,%f6,%f0,%f0 .word 0x80ca012a !des_round %f8,%f10,%f0,%f0 .word 0x80cb012e !des_round %f12,%f14,%f0,%f0 .word 0x80cc0132 !des_round %f16,%f18,%f0,%f0 .word 0x80cd0136 !des_round %f20,%f22,%f0,%f0 .word 0x80ce013a !des_round %f24,%f26,%f0,%f0 .word 0x80cf013e !des_round %f28,%f30,%f0,%f0 .word 0x80c84123 !des_round %f32,%f34,%f0,%f0 .word 0x81b026a0 !des_iip %f0,%f0,, brnz,pn %g3, 2f sub %o2, 1, %o2 std %f0, [%o1 + 0] brnz,pt %o2, .Ldes_cbc_enc_loop add %o1, 8, %o1 st %f0, [%o4 + 0] ! write out ivec retl st %f1, [%o4 + 4] .Lcbc_abort: retl nop .align 16 2: ldxa [%o0]0x82, %g4 ! avoid read-after-write hazard ! and ~4x deterioration ! in inp==out case .word 0x85b00900 !faligndata %f0,%f0,%f2 ! handle unaligned output stda %f2, [%o1 + %g3]0xc0 ! partial store add %o1, 8, %o1 orn %g0, %g3, %g3 stda %f2, [%o1 + %g3]0xc0 ! partial store brnz,pt %o2, .Ldes_cbc_enc_loop+4 orn %g0, %g3, %g3 st %f0, [%o4 + 0] ! write out ivec retl st %f1, [%o4 + 4] .type des_t4_cbc_encrypt,#function .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt .globl des_t4_cbc_decrypt .align 32 des_t4_cbc_decrypt: cmp %o2, 0 be,pn SIZE_T_CC, .Lcbc_abort srln %o2, 0, %o2 ! needed on v8+, "nop" on v9 ld [%o4 + 0], %f2 ! load ivec ld [%o4 + 4], %f3 and %o0, 7, %g1 andn %o0, 7, %o0 sll %g1, 3, %g1 mov 0xff, %g3 prefetch [%o0], 20 prefetch [%o0 + 63], 20 sub %g0, %g1, %g2 and %o1, 7, %g4 .word 0x93b24340 !alignaddrl %o1,%g0,%o1 srl %g3, %g4, %g3 srlx %o2, 3, %o2 movrz %g4, 0, %g3 prefetch [%o1], 22 ldd [%o3 + 0x78], %f4 ! load key schedule ldd [%o3 + 0x70], %f6 ldd [%o3 + 0x68], %f8 ldd [%o3 + 0x60], %f10 ldd [%o3 + 0x58], %f12 ldd [%o3 + 0x50], %f14 ldd [%o3 + 0x48], %f16 ldd [%o3 + 0x40], %f18 ldd [%o3 + 0x38], %f20 ldd [%o3 + 0x30], %f22 ldd [%o3 + 0x28], %f24 ldd [%o3 + 0x20], %f26 ldd [%o3 + 0x18], %f28 ldd [%o3 + 0x10], %f30 ldd [%o3 + 0x08], %f32 ldd [%o3 + 0x00], %f34 .Ldes_cbc_dec_loop: ldx [%o0 + 0], %g4 brz,pt %g1, 4f nop ldx [%o0 + 8], %g5 sllx %g4, %g1, %g4 srlx %g5, %g2, %g5 or %g5, %g4, %g4 4: .word 0x81b02304 !movxtod %g4,%f0 prefetch [%o0 + 8+63], 20 add %o0, 8, %o0 prefetch [%o1 + 63], 22 .word 0x81b02680 !des_ip %f0,%f0,, .word 0x80c90126 !des_round %f4,%f6,%f0,%f0 .word 0x80ca012a !des_round %f8,%f10,%f0,%f0 .word 0x80cb012e !des_round %f12,%f14,%f0,%f0 .word 0x80cc0132 !des_round %f16,%f18,%f0,%f0 .word 0x80cd0136 !des_round %f20,%f22,%f0,%f0 .word 0x80ce013a !des_round %f24,%f26,%f0,%f0 .word 0x80cf013e !des_round %f28,%f30,%f0,%f0 .word 0x80c84123 !des_round %f32,%f34,%f0,%f0 .word 0x81b026a0 !des_iip %f0,%f0,, .word 0x81b08d80 !fxor %f2,%f0,%f0 ! ^= ivec .word 0x85b02304 !movxtod %g4,%f2 brnz,pn %g3, 2f sub %o2, 1, %o2 std %f0, [%o1 + 0] brnz,pt %o2, .Ldes_cbc_dec_loop add %o1, 8, %o1 st %f2, [%o4 + 0] ! write out ivec retl st %f3, [%o4 + 4] .align 16 2: ldxa [%o0]0x82, %g4 ! avoid read-after-write hazard ! and ~4x deterioration ! in inp==out case .word 0x81b00900 !faligndata %f0,%f0,%f0 ! handle unaligned output stda %f0, [%o1 + %g3]0xc0 ! partial store add %o1, 8, %o1 orn %g0, %g3, %g3 stda %f0, [%o1 + %g3]0xc0 ! partial store brnz,pt %o2, .Ldes_cbc_dec_loop+4 orn %g0, %g3, %g3 st %f2, [%o4 + 0] ! write out ivec retl st %f3, [%o4 + 4] .type des_t4_cbc_decrypt,#function .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt .globl des_t4_ede3_cbc_encrypt .align 32 des_t4_ede3_cbc_encrypt: cmp %o2, 0 be,pn SIZE_T_CC, .Lcbc_abort srln %o2, 0, %o2 ! needed on v8+, "nop" on v9 ld [%o4 + 0], %f0 ! load ivec ld [%o4 + 4], %f1 and %o0, 7, %g1 andn %o0, 7, %o0 sll %g1, 3, %g1 mov 0xff, %g3 prefetch [%o0], 20 prefetch [%o0 + 63], 20 sub %g0, %g1, %g2 and %o1, 7, %g4 .word 0x93b24340 !alignaddrl %o1,%g0,%o1 srl %g3, %g4, %g3 srlx %o2, 3, %o2 movrz %g4, 0, %g3 prefetch [%o1], 22 ldd [%o3 + 0x00], %f4 ! load key schedule ldd [%o3 + 0x08], %f6 ldd [%o3 + 0x10], %f8 ldd [%o3 + 0x18], %f10 ldd [%o3 + 0x20], %f12 ldd [%o3 + 0x28], %f14 ldd [%o3 + 0x30], %f16 ldd [%o3 + 0x38], %f18 ldd [%o3 + 0x40], %f20 ldd [%o3 + 0x48], %f22 ldd [%o3 + 0x50], %f24 ldd [%o3 + 0x58], %f26 ldd [%o3 + 0x60], %f28 ldd [%o3 + 0x68], %f30 ldd [%o3 + 0x70], %f32 ldd [%o3 + 0x78], %f34 .Ldes_ede3_cbc_enc_loop: ldx [%o0 + 0], %g4 brz,pt %g1, 4f nop ldx [%o0 + 8], %g5 sllx %g4, %g1, %g4 srlx %g5, %g2, %g5 or %g5, %g4, %g4 4: .word 0x85b02304 !movxtod %g4,%f2 prefetch [%o0 + 8+63], 20 add %o0, 8, %o0 .word 0x81b08d80 !fxor %f2,%f0,%f0 ! ^= ivec prefetch [%o1 + 63], 22 .word 0x81b02680 !des_ip %f0,%f0,, .word 0x80c90126 !des_round %f4,%f6,%f0,%f0 .word 0x80ca012a !des_round %f8,%f10,%f0,%f0 .word 0x80cb012e !des_round %f12,%f14,%f0,%f0 .word 0x80cc0132 !des_round %f16,%f18,%f0,%f0 ldd [%o3 + 0x100-0x08], %f36 ldd [%o3 + 0x100-0x10], %f38 .word 0x80cd0136 !des_round %f20,%f22,%f0,%f0 ldd [%o3 + 0x100-0x18], %f40 ldd [%o3 + 0x100-0x20], %f42 .word 0x80ce013a !des_round %f24,%f26,%f0,%f0 ldd [%o3 + 0x100-0x28], %f44 ldd [%o3 + 0x100-0x30], %f46 .word 0x80cf013e !des_round %f28,%f30,%f0,%f0 ldd [%o3 + 0x100-0x38], %f48 ldd [%o3 + 0x100-0x40], %f50 .word 0x80c84123 !des_round %f32,%f34,%f0,%f0 ldd [%o3 + 0x100-0x48], %f52 ldd [%o3 + 0x100-0x50], %f54 .word 0x81b026a0 !des_iip %f0,%f0,, ldd [%o3 + 0x100-0x58], %f56 ldd [%o3 + 0x100-0x60], %f58 .word 0x81b02680 !des_ip %f0,%f0,, ldd [%o3 + 0x100-0x68], %f60 ldd [%o3 + 0x100-0x70], %f62 .word 0x80c94127 !des_round %f36,%f38,%f0,%f0 ldd [%o3 + 0x100-0x78], %f36 ldd [%o3 + 0x100-0x80], %f38 .word 0x80ca412b !des_round %f40,%f42,%f0,%f0 .word 0x80cb412f !des_round %f44,%f46,%f0,%f0 .word 0x80cc4133 !des_round %f48,%f50,%f0,%f0 ldd [%o3 + 0x100+0x00], %f40 ldd [%o3 + 0x100+0x08], %f42 .word 0x80cd4137 !des_round %f52,%f54,%f0,%f0 ldd [%o3 + 0x100+0x10], %f44 ldd [%o3 + 0x100+0x18], %f46 .word 0x80ce413b !des_round %f56,%f58,%f0,%f0 ldd [%o3 + 0x100+0x20], %f48 ldd [%o3 + 0x100+0x28], %f50 .word 0x80cf413f !des_round %f60,%f62,%f0,%f0 ldd [%o3 + 0x100+0x30], %f52 ldd [%o3 + 0x100+0x38], %f54 .word 0x80c94127 !des_round %f36,%f38,%f0,%f0 ldd [%o3 + 0x100+0x40], %f56 ldd [%o3 + 0x100+0x48], %f58 .word 0x81b026a0 !des_iip %f0,%f0,, ldd [%o3 + 0x100+0x50], %f60 ldd [%o3 + 0x100+0x58], %f62 .word 0x81b02680 !des_ip %f0,%f0,, ldd [%o3 + 0x100+0x60], %f36 ldd [%o3 + 0x100+0x68], %f38 .word 0x80ca412b !des_round %f40,%f42,%f0,%f0 ldd [%o3 + 0x100+0x70], %f40 ldd [%o3 + 0x100+0x78], %f42 .word 0x80cb412f !des_round %f44,%f46,%f0,%f0 .word 0x80cc4133 !des_round %f48,%f50,%f0,%f0 .word 0x80cd4137 !des_round %f52,%f54,%f0,%f0 .word 0x80ce413b !des_round %f56,%f58,%f0,%f0 .word 0x80cf413f !des_round %f60,%f62,%f0,%f0 .word 0x80c94127 !des_round %f36,%f38,%f0,%f0 .word 0x80ca412b !des_round %f40,%f42,%f0,%f0 .word 0x81b026a0 !des_iip %f0,%f0,, brnz,pn %g3, 2f sub %o2, 1, %o2 std %f0, [%o1 + 0] brnz,pt %o2, .Ldes_ede3_cbc_enc_loop add %o1, 8, %o1 st %f0, [%o4 + 0] ! write out ivec retl st %f1, [%o4 + 4] .align 16 2: ldxa [%o0]0x82, %g4 ! avoid read-after-write hazard ! and ~2x deterioration ! in inp==out case .word 0x85b00900 !faligndata %f0,%f0,%f2 ! handle unaligned output stda %f2, [%o1 + %g3]0xc0 ! partial store add %o1, 8, %o1 orn %g0, %g3, %g3 stda %f2, [%o1 + %g3]0xc0 ! partial store brnz,pt %o2, .Ldes_ede3_cbc_enc_loop+4 orn %g0, %g3, %g3 st %f0, [%o4 + 0] ! write out ivec retl st %f1, [%o4 + 4] .type des_t4_ede3_cbc_encrypt,#function .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt .globl des_t4_ede3_cbc_decrypt .align 32 des_t4_ede3_cbc_decrypt: cmp %o2, 0 be,pn SIZE_T_CC, .Lcbc_abort srln %o2, 0, %o2 ! needed on v8+, "nop" on v9 ld [%o4 + 0], %f2 ! load ivec ld [%o4 + 4], %f3 and %o0, 7, %g1 andn %o0, 7, %o0 sll %g1, 3, %g1 mov 0xff, %g3 prefetch [%o0], 20 prefetch [%o0 + 63], 20 sub %g0, %g1, %g2 and %o1, 7, %g4 .word 0x93b24340 !alignaddrl %o1,%g0,%o1 srl %g3, %g4, %g3 srlx %o2, 3, %o2 movrz %g4, 0, %g3 prefetch [%o1], 22 ldd [%o3 + 0x100+0x78], %f4 ! load key schedule ldd [%o3 + 0x100+0x70], %f6 ldd [%o3 + 0x100+0x68], %f8 ldd [%o3 + 0x100+0x60], %f10 ldd [%o3 + 0x100+0x58], %f12 ldd [%o3 + 0x100+0x50], %f14 ldd [%o3 + 0x100+0x48], %f16 ldd [%o3 + 0x100+0x40], %f18 ldd [%o3 + 0x100+0x38], %f20 ldd [%o3 + 0x100+0x30], %f22 ldd [%o3 + 0x100+0x28], %f24 ldd [%o3 + 0x100+0x20], %f26 ldd [%o3 + 0x100+0x18], %f28 ldd [%o3 + 0x100+0x10], %f30 ldd [%o3 + 0x100+0x08], %f32 ldd [%o3 + 0x100+0x00], %f34 .Ldes_ede3_cbc_dec_loop: ldx [%o0 + 0], %g4 brz,pt %g1, 4f nop ldx [%o0 + 8], %g5 sllx %g4, %g1, %g4 srlx %g5, %g2, %g5 or %g5, %g4, %g4 4: .word 0x81b02304 !movxtod %g4,%f0 prefetch [%o0 + 8+63], 20 add %o0, 8, %o0 prefetch [%o1 + 63], 22 .word 0x81b02680 !des_ip %f0,%f0,, .word 0x80c90126 !des_round %f4,%f6,%f0,%f0 .word 0x80ca012a !des_round %f8,%f10,%f0,%f0 .word 0x80cb012e !des_round %f12,%f14,%f0,%f0 .word 0x80cc0132 !des_round %f16,%f18,%f0,%f0 ldd [%o3 + 0x80+0x00], %f36 ldd [%o3 + 0x80+0x08], %f38 .word 0x80cd0136 !des_round %f20,%f22,%f0,%f0 ldd [%o3 + 0x80+0x10], %f40 ldd [%o3 + 0x80+0x18], %f42 .word 0x80ce013a !des_round %f24,%f26,%f0,%f0 ldd [%o3 + 0x80+0x20], %f44 ldd [%o3 + 0x80+0x28], %f46 .word 0x80cf013e !des_round %f28,%f30,%f0,%f0 ldd [%o3 + 0x80+0x30], %f48 ldd [%o3 + 0x80+0x38], %f50 .word 0x80c84123 !des_round %f32,%f34,%f0,%f0 ldd [%o3 + 0x80+0x40], %f52 ldd [%o3 + 0x80+0x48], %f54 .word 0x81b026a0 !des_iip %f0,%f0,, ldd [%o3 + 0x80+0x50], %f56 ldd [%o3 + 0x80+0x58], %f58 .word 0x81b02680 !des_ip %f0,%f0,, ldd [%o3 + 0x80+0x60], %f60 ldd [%o3 + 0x80+0x68], %f62 .word 0x80c94127 !des_round %f36,%f38,%f0,%f0 ldd [%o3 + 0x80+0x70], %f36 ldd [%o3 + 0x80+0x78], %f38 .word 0x80ca412b !des_round %f40,%f42,%f0,%f0 .word 0x80cb412f !des_round %f44,%f46,%f0,%f0 .word 0x80cc4133 !des_round %f48,%f50,%f0,%f0 ldd [%o3 + 0x80-0x08], %f40 ldd [%o3 + 0x80-0x10], %f42 .word 0x80cd4137 !des_round %f52,%f54,%f0,%f0 ldd [%o3 + 0x80-0x18], %f44 ldd [%o3 + 0x80-0x20], %f46 .word 0x80ce413b !des_round %f56,%f58,%f0,%f0 ldd [%o3 + 0x80-0x28], %f48 ldd [%o3 + 0x80-0x30], %f50 .word 0x80cf413f !des_round %f60,%f62,%f0,%f0 ldd [%o3 + 0x80-0x38], %f52 ldd [%o3 + 0x80-0x40], %f54 .word 0x80c94127 !des_round %f36,%f38,%f0,%f0 ldd [%o3 + 0x80-0x48], %f56 ldd [%o3 + 0x80-0x50], %f58 .word 0x81b026a0 !des_iip %f0,%f0,, ldd [%o3 + 0x80-0x58], %f60 ldd [%o3 + 0x80-0x60], %f62 .word 0x81b02680 !des_ip %f0,%f0,, ldd [%o3 + 0x80-0x68], %f36 ldd [%o3 + 0x80-0x70], %f38 .word 0x80ca412b !des_round %f40,%f42,%f0,%f0 ldd [%o3 + 0x80-0x78], %f40 ldd [%o3 + 0x80-0x80], %f42 .word 0x80cb412f !des_round %f44,%f46,%f0,%f0 .word 0x80cc4133 !des_round %f48,%f50,%f0,%f0 .word 0x80cd4137 !des_round %f52,%f54,%f0,%f0 .word 0x80ce413b !des_round %f56,%f58,%f0,%f0 .word 0x80cf413f !des_round %f60,%f62,%f0,%f0 .word 0x80c94127 !des_round %f36,%f38,%f0,%f0 .word 0x80ca412b !des_round %f40,%f42,%f0,%f0 .word 0x81b026a0 !des_iip %f0,%f0,, .word 0x81b08d80 !fxor %f2,%f0,%f0 ! ^= ivec .word 0x85b02304 !movxtod %g4,%f2 brnz,pn %g3, 2f sub %o2, 1, %o2 std %f0, [%o1 + 0] brnz,pt %o2, .Ldes_ede3_cbc_dec_loop add %o1, 8, %o1 st %f2, [%o4 + 0] ! write out ivec retl st %f3, [%o4 + 4] .align 16 2: ldxa [%o0]0x82, %g4 ! avoid read-after-write hazard ! and ~3x deterioration ! in inp==out case .word 0x81b00900 !faligndata %f0,%f0,%f0 ! handle unaligned output stda %f0, [%o1 + %g3]0xc0 ! partial store add %o1, 8, %o1 orn %g0, %g3, %g3 stda %f0, [%o1 + %g3]0xc0 ! partial store brnz,pt %o2, .Ldes_ede3_cbc_dec_loop+4 orn %g0, %g3, %g3 st %f2, [%o4 + 0] ! write out ivec retl st %f3, [%o4 + 4] .type des_t4_ede3_cbc_decrypt,#function .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov" .align 4