?? salsa.cpp
字號(hào):
// salsa.cpp - written and placed in the public domain by Wei Dai
// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM salsa.cpp" to generate MASM code
#include "pch.h"
#ifndef CRYPTOPP_GENERATE_X64_MASM
#include "salsa.h"
#include "misc.h"
#include "argnames.h"
#include "cpu.h"
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
#include <emmintrin.h>
#endif
NAMESPACE_BEGIN(CryptoPP)
void Salsa20_TestInstantiations()
{
Salsa20::Encryption x;
}
void Salsa20_Policy::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
{
m_rounds = params.GetIntValueWithDefault(Name::Rounds(), 20);
if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
throw InvalidRounds(StaticAlgorithmName(), m_rounds);
// m_state is reordered for SSE2
GetBlock<word32, LittleEndian, false> get1(key);
get1(m_state[13])(m_state[10])(m_state[7])(m_state[4]);
GetBlock<word32, LittleEndian, false> get2(key + length - 16);
get2(m_state[15])(m_state[12])(m_state[9])(m_state[6]);
// "expand 16-byte k" or "expand 32-byte k"
m_state[0] = 0x61707865;
m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e;
m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32;
m_state[3] = 0x6b206574;
}
void Salsa20_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV)
{
GetBlock<word32, LittleEndian, false> get(IV);
get(m_state[14])(m_state[11]);
m_state[8] = m_state[5] = 0;
}
void Salsa20_Policy::SeekToIteration(lword iterationCount)
{
m_state[8] = (word32)iterationCount;
m_state[5] = (word32)SafeRightShift<32>(iterationCount);
}
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
unsigned int Salsa20_Policy::GetAlignment() const
{
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
if (HasSSE2())
return 16;
else
#endif
return 1;
}
unsigned int Salsa20_Policy::GetOptimalBlockSize() const
{
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
if (HasSSE2())
return 4*BYTES_PER_ITERATION;
else
#endif
return BYTES_PER_ITERATION;
}
#endif
#ifdef CRYPTOPP_X64_MASM_AVAILABLE
extern "C" {
void Salsa20_OperateKeystream(byte *output, const byte *input, size_t iterationCount, int rounds, void *state);
}
#endif
#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
void Salsa20_Policy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
{
#endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
#ifdef CRYPTOPP_X64_MASM_AVAILABLE
Salsa20_OperateKeystream(output, input, iterationCount, m_rounds, m_state.data());
return;
#endif
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
#ifdef CRYPTOPP_GENERATE_X64_MASM
ALIGN 8
Salsa20_OperateKeystream PROC FRAME
mov r10, [rsp + 5*8] ; state
alloc_stack(10*16 + 32*16 + 8)
save_xmm128 xmm6, 0200h
save_xmm128 xmm7, 0210h
save_xmm128 xmm8, 0220h
save_xmm128 xmm9, 0230h
save_xmm128 xmm10, 0240h
save_xmm128 xmm11, 0250h
save_xmm128 xmm12, 0260h
save_xmm128 xmm13, 0270h
save_xmm128 xmm14, 0280h
save_xmm128 xmm15, 0290h
.endprolog
#define REG_output rcx
#define REG_input rdx
#define REG_iterationCount r8
#define REG_state r10
#define REG_rounds e9d
#define REG_roundsLeft eax
#define REG_temp32 r11d
#define REG_temp r11
#define SSE2_WORKSPACE rsp
#else
if (HasSSE2())
{
#if CRYPTOPP_BOOL_X64
#define REG_output %4
#define REG_input %1
#define REG_iterationCount %2
#define REG_state %3
#define REG_rounds %0
#define REG_roundsLeft eax
#define REG_temp32 edx
#define REG_temp rdx
#define SSE2_WORKSPACE %5
__m128i workspace[32];
#else
#define REG_output edi
#define REG_input eax
#define REG_iterationCount ecx
#define REG_state esi
#define REG_rounds edx
#define REG_roundsLeft ebx
#define REG_temp32 ebp
#define REG_temp ebp
#define SSE2_WORKSPACE esp + WORD_SZ
#endif
#ifdef __GNUC__
__asm__ __volatile__
(
".intel_syntax noprefix;"
AS_PUSH_IF86( bx)
#else
void *s = m_state.data();
word32 r = m_rounds;
AS2( mov REG_iterationCount, iterationCount)
AS2( mov REG_input, input)
AS2( mov REG_output, output)
AS2( mov REG_state, s)
AS2( mov REG_rounds, r)
#endif
#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
AS_PUSH_IF86( bp)
AS2( cmp REG_iterationCount, 4)
ASJ( jl, 5, f)
#if CRYPTOPP_BOOL_X86
AS2( mov ebx, esp)
AS2( and esp, -16)
AS2( sub esp, 32*16)
AS1( push ebx)
#endif
#define SSE2_EXPAND_S(i, j) \
ASS( pshufd xmm4, xmm##i, j, j, j, j) \
AS2( movdqa [SSE2_WORKSPACE + (i*4+j)*16 + 256], xmm4)
AS2( movdqa xmm0, [REG_state + 0*16])
AS2( movdqa xmm1, [REG_state + 1*16])
AS2( movdqa xmm2, [REG_state + 2*16])
AS2( movdqa xmm3, [REG_state + 3*16])
SSE2_EXPAND_S(0, 0)
SSE2_EXPAND_S(0, 1)
SSE2_EXPAND_S(0, 2)
SSE2_EXPAND_S(0, 3)
SSE2_EXPAND_S(1, 0)
SSE2_EXPAND_S(1, 2)
SSE2_EXPAND_S(1, 3)
SSE2_EXPAND_S(2, 1)
SSE2_EXPAND_S(2, 2)
SSE2_EXPAND_S(2, 3)
SSE2_EXPAND_S(3, 0)
SSE2_EXPAND_S(3, 1)
SSE2_EXPAND_S(3, 2)
SSE2_EXPAND_S(3, 3)
#define SSE2_EXPAND_S85(i) \
AS2( mov dword ptr [SSE2_WORKSPACE + 8*16 + i*4 + 256], REG_roundsLeft) \
AS2( mov dword ptr [SSE2_WORKSPACE + 5*16 + i*4 + 256], REG_temp32) \
AS2( add REG_roundsLeft, 1) \
AS2( adc REG_temp32, 0)
ASL(1)
AS2( mov REG_roundsLeft, dword ptr [REG_state + 8*4])
AS2( mov REG_temp32, dword ptr [REG_state + 5*4])
SSE2_EXPAND_S85(0)
SSE2_EXPAND_S85(1)
SSE2_EXPAND_S85(2)
SSE2_EXPAND_S85(3)
AS2( mov dword ptr [REG_state + 8*4], REG_roundsLeft)
AS2( mov dword ptr [REG_state + 5*4], REG_temp32)
#define SSE2_QUARTER_ROUND(a, b, d, i) \
AS2( movdqa xmm4, xmm##d) \
AS2( paddd xmm4, xmm##a) \
AS2( movdqa xmm5, xmm4) \
AS2( pslld xmm4, i) \
AS2( psrld xmm5, 32-i) \
AS2( pxor xmm##b, xmm4) \
AS2( pxor xmm##b, xmm5)
#define L01(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) /* y3 */
#define L02(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##C, [SSE2_WORKSPACE + a*16 + i*256]) /* y0 */
#define L03(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) /* y0+y3 */
#define L04(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A)
#define L05(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 7)
#define L06(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-7)
#define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256])
#define L08(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) /* z1 */
#define L09(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + b*16], xmm##A)
#define L10(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A)
#define L11(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) /* z1+y0 */
#define L12(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A)
#define L13(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 9)
#define L14(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-9)
#define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256])
#define L16(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) /* z2 */
#define L17(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + c*16], xmm##A)
#define L18(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A)
#define L19(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##B) /* z2+z1 */
#define L20(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A)
#define L21(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 13)
#define L22(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-13)
#define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256])
#define L24(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) /* z3 */
#define L25(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + d*16], xmm##A)
#define L26(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##D) /* z3+z2 */
#define L27(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A)
#define L28(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 18)
#define L29(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-18)
#define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C) /* xor y0 */
#define L31(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) /* z0 */
#define L32(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + a*16], xmm##A)
#define SSE2_QUARTER_ROUND_X8(i, a, b, c, d, e, f, g, h) \
L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) \
L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) \
L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) \
L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) \
L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) \
L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) \
L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) \
L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) \
L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) \
L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) \
L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) \
L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) \
L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) \
L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) \
L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) \
L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) \
L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) \
L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) \
L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) \
L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) \
L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) \
L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) \
L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) \
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -