?? aes-s390x.pl
字號:
#!/usr/bin/env perl# ====================================================================# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL# project. The module is, however, dual licensed under OpenSSL and# CRYPTOGAMS licenses depending on where you obtain it. For further# details see http://www.openssl.org/~appro/cryptogams/.# ====================================================================# AES for s390x.# April 2007.## Software performance improvement over gcc-generated code is ~70% and# in absolute terms is ~73 cycles per byte processed with 128-bit key.# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are# *strictly* in-order execution and issued instruction [in this case# load value from memory is critical] has to complete before execution# flow proceeds. S-boxes are compressed to 2KB[+256B].## As for hardware acceleration support. It's basically a "teaser," as# it can and should be improved in several ways. Most notably support# for CBC is not utilized, nor multiple blocks are ever processed.# Then software key schedule can be postponed till hardware support# detection... Performance improvement over assembler is reportedly# ~2.5x, but can reach >8x [naturally on larger chunks] if proper# support is implemented.# May 2007.## Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided# for 128-bit keys, if hardware support is detected.# Januray 2009.## Add support for hardware AES192/256 and reschedule instructions to# minimize/avoid Address Generation Interlock hazard and to favour# dual-issue z10 pipeline. This gave ~25% improvement on z10 and# almost 50% on z9. The gain is smaller on z10, because being dual-# issue z10 makes it improssible to eliminate the interlock condition:# critial path is not long enough. Yet it spends ~24 cycles per byte# processed with 128-bit key.## Unlike previous version hardware support detection takes place only# at the moment of key schedule setup, which is denoted in key->rounds.# This is done, because deferred key setup can't be made MT-safe, not# for key lengthes longer than 128 bits.## Add AES_cbc_encrypt, which gives incredible performance improvement,# it was measured to be ~6.6x. It's less than previously mentioned 8x,# because software implementation was optimized.$softonly=0; # allow hardware support$t0="%r0"; $mask="%r0";$t1="%r1";$t2="%r2"; $inp="%r2";$t3="%r3"; $out="%r3"; $bits="%r3";$key="%r4";$i1="%r5";$i2="%r6";$i3="%r7";$s0="%r8";$s1="%r9";$s2="%r10";$s3="%r11";$tbl="%r12";$rounds="%r13";$ra="%r14";$sp="%r15";sub _data_word(){ my $i; while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }}$code=<<___;.text.type AES_Te,\@object.align 256AES_Te:___&_data_word( 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);$code.=<<___;# Te4[256].byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16# rcon[].long 0x01000000, 0x02000000, 0x04000000, 0x08000000.long 0x10000000, 0x20000000, 0x40000000, 0x80000000.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0.align 256.size AES_Te,.-AES_Te# void AES_encrypt(const unsigned char *inp, unsigned char *out,# const AES_KEY *key) {.globl AES_encrypt.type AES_encrypt,\@functionAES_encrypt:___$code.=<<___ if (!$softonly); l %r0,240($key) lhi %r1,16 clr %r0,%r1 jl .Lesoft la %r1,0($key) #la %r2,0($inp) la %r4,0($out) lghi %r3,16 # single block length .long 0xb92e0042 # km %r4,%r2 brc 1,.-4 # can this happen? br %r14.align 64.Lesoft:___$code.=<<___; stmg %r3,$ra,24($sp) llgf $s0,0($inp) llgf $s1,4($inp) llgf $s2,8($inp) llgf $s3,12($inp) larl $tbl,AES_Te bras $ra,_s390x_AES_encrypt lg $out,24($sp) st $s0,0($out) st $s1,4($out) st $s2,8($out) st $s3,12($out) lmg %r6,$ra,48($sp) br $ra.size AES_encrypt,.-AES_encrypt.type _s390x_AES_encrypt,\@function.align 16_s390x_AES_encrypt: stg $ra,152($sp) x $s0,0($key) x $s1,4($key) x $s2,8($key) x $s3,12($key) l $rounds,240($key) llill $mask,`0xff<<3` aghi $rounds,-1 j .Lenc_loop.align 16.Lenc_loop: sllg $t1,$s0,`0+3` srlg $t2,$s0,`8-3` srlg $t3,$s0,`16-3` srl $s0,`24-3` nr $s0,$mask ngr $t1,$mask nr $t2,$mask nr $t3,$mask srlg $i1,$s1,`16-3` # i0 sllg $i2,$s1,`0+3` srlg $i3,$s1,`8-3` srl $s1,`24-3` nr $i1,$mask nr $s1,$mask ngr $i2,$mask nr $i3,$mask l $s0,0($s0,$tbl) # Te0[s0>>24] l $t1,1($t1,$tbl) # Te3[s0>>0] l $t2,2($t2,$tbl) # Te2[s0>>8] l $t3,3($t3,$tbl) # Te1[s0>>16] x $s0,3($i1,$tbl) # Te1[s1>>16] l $s1,0($s1,$tbl) # Te0[s1>>24] x $t2,1($i2,$tbl) # Te3[s1>>0] x $t3,2($i3,$tbl) # Te2[s1>>8] srlg $i1,$s2,`8-3` # i0 srlg $i2,$s2,`16-3` # i1 nr $i1,$mask nr $i2,$mask sllg $i3,$s2,`0+3` srl $s2,`24-3` nr $s2,$mask ngr $i3,$mask xr $s1,$t1 srlg $ra,$s3,`8-3` # i1 sllg $t1,$s3,`0+3` # i0 nr $ra,$mask la $key,16($key) ngr $t1,$mask x $s0,2($i1,$tbl) # Te2[s2>>8] x $s1,3($i2,$tbl) # Te1[s2>>16] l $s2,0($s2,$tbl) # Te0[s2>>24] x $t3,1($i3,$tbl) # Te3[s2>>0] srlg $i3,$s3,`16-3` # i2 xr $s2,$t2 srl $s3,`24-3` nr $i3,$mask nr $s3,$mask x $s0,0($key) x $s1,4($key) x $s2,8($key) x $t3,12($key) x $s0,1($t1,$tbl) # Te3[s3>>0] x $s1,2($ra,$tbl) # Te2[s3>>8] x $s2,3($i3,$tbl) # Te1[s3>>16] l $s3,0($s3,$tbl) # Te0[s3>>24] xr $s3,$t3 brct $rounds,.Lenc_loop .align 16 sllg $t1,$s0,`0+3` srlg $t2,$s0,`8-3` ngr $t1,$mask srlg $t3,$s0,`16-3` srl $s0,`24-3` nr $s0,$mask nr $t2,$mask nr $t3,$mask srlg $i1,$s1,`16-3` # i0 sllg $i2,$s1,`0+3` ngr $i2,$mask srlg $i3,$s1,`8-3` srl $s1,`24-3` nr $i1,$mask nr $s1,$mask nr $i3,$mask llgc $s0,2($s0,$tbl) # Te4[s0>>24] llgc $t1,2($t1,$tbl) # Te4[s0>>0] sll $s0,24 llgc $t2,2($t2,$tbl) # Te4[s0>>8] llgc $t3,2($t3,$tbl) # Te4[s0>>16] sll $t2,8 sll $t3,16 llgc $i1,2($i1,$tbl) # Te4[s1>>16] llgc $s1,2($s1,$tbl) # Te4[s1>>24] llgc $i2,2($i2,$tbl) # Te4[s1>>0] llgc $i3,2($i3,$tbl) # Te4[s1>>8] sll $i1,16 sll $s1,24 sll $i3,8 or $s0,$i1 or $s1,$t1 or $t2,$i2 or $t3,$i3 srlg $i1,$s2,`8-3` # i0 srlg $i2,$s2,`16-3` # i1 nr $i1,$mask nr $i2,$mask sllg $i3,$s2,`0+3` srl $s2,`24-3` ngr $i3,$mask nr $s2,$mask sllg $t1,$s3,`0+3` # i0 srlg $ra,$s3,`8-3` # i1 ngr $t1,$mask llgc $i1,2($i1,$tbl) # Te4[s2>>8] llgc $i2,2($i2,$tbl) # Te4[s2>>16] sll $i1,8 llgc $s2,2($s2,$tbl) # Te4[s2>>24] llgc $i3,2($i3,$tbl) # Te4[s2>>0] sll $i2,16 nr $ra,$mask sll $s2,24 or $s0,$i1 or $s1,$i2 or $s2,$t2 or $t3,$i3 srlg $i3,$s3,`16-3` # i2 srl $s3,`24-3` nr $i3,$mask nr $s3,$mask l $t0,16($key) l $t2,20($key) llgc $i1,2($t1,$tbl) # Te4[s3>>0] llgc $i2,2($ra,$tbl) # Te4[s3>>8] llgc $i3,2($i3,$tbl) # Te4[s3>>16] llgc $s3,2($s3,$tbl) # Te4[s3>>24] sll $i2,8 sll $i3,16 sll $s3,24 or $s0,$i1 or $s1,$i2 or $s2,$i3 or $s3,$t3 lg $ra,152($sp) xr $s0,$t0 xr $s1,$t2 x $s2,24($key) x $s3,28($key) br $ra .size _s390x_AES_encrypt,.-_s390x_AES_encrypt___$code.=<<___;.type AES_Td,\@object.align 256AES_Td:___&_data_word( 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -