74 #ifndef CRYPTOPP_IMPORTS 75 #ifndef CRYPTOPP_GENERATE_X64_MASM 84 #if CRYPTOPP_BOOL_X32 || (defined(CRYPTOPP_LLVM_CLANG_VERSION) && (CRYPTOPP_LLVM_CLANG_VERSION < 30400)) 85 # define CRYPTOPP_DISABLE_RIJNDAEL_ASM 89 #if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) 90 # define CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS 1 94 #define M128I_CAST(x) ((__m128i *)(void *)(x)) 95 #define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x)) 97 #if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 98 # if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) 99 namespace rdtable {CRYPTOPP_ALIGN_DATA(16) word64 Te[256+2];}
100 using namespace rdtable;
102 static word64 Te[256];
104 static word64 Td[256];
105 #else // Not CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS 106 # if defined(CRYPTOPP_X64_MASM_AVAILABLE) 108 namespace rdtable {CRYPTOPP_ALIGN_DATA(16) word64 Te[256+2];}
110 CRYPTOPP_ALIGN_DATA(16) static word32 Te[256*4];
111 CRYPTOPP_ALIGN_DATA(16) static word32 Td[256*4];
112 #endif // CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS 114 static volatile bool s_TeFilled =
false, s_TdFilled =
false;
116 ANONYMOUS_NAMESPACE_BEGIN
118 CRYPTOPP_ALIGN_DATA(16)
119 const word32 s_one[] = {0, 0, 0, 1<<24};
122 CRYPTOPP_ALIGN_DATA(16)
123 const word32 s_rconLE[] = {
124 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
127 #if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86 152 static inline bool AliasedWithTable(
const byte *begin,
const byte *end)
154 ptrdiff_t s0 = uintptr_t(begin)%4096, s1 = uintptr_t(end)%4096;
155 ptrdiff_t t0 = uintptr_t(Te)%4096, t1 = (uintptr_t(Te)+
sizeof(Te))%4096;
157 return (s0 >= t0 && s0 < t1) || (s1 > t0 && s1 <= t1);
159 return (s0 < t1 || s1 <= t1) || (s0 >= t0 || s1 > t0);
164 word32 subkeys[4*12], workspace[8];
165 const byte *inBlocks, *inXorBlocks, *outXorBlocks;
167 size_t inIncrement, inXorIncrement, outXorIncrement, outIncrement;
168 size_t regSpill, lengthAndCounterFlag, keysBegin;
171 const size_t s_aliasPageSize = 4096;
172 const size_t s_aliasBlockSize = 256;
173 const size_t s_sizeToAllocate = s_aliasPageSize + s_aliasBlockSize +
sizeof(Locals);
175 #endif // CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86 177 ANONYMOUS_NAMESPACE_END
181 #define QUARTER_ROUND(L, T, t, a, b, c, d) \ 182 a ^= L(T, 3, byte(t)); t >>= 8;\ 183 b ^= L(T, 2, byte(t)); t >>= 8;\ 184 c ^= L(T, 1, byte(t)); t >>= 8;\ 187 #define QUARTER_ROUND_LE(t, a, b, c, d) \ 188 tempBlock[a] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\ 189 tempBlock[b] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\ 190 tempBlock[c] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\ 191 tempBlock[d] = ((byte *)(Te+t))[1]; 193 #if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 194 #define QUARTER_ROUND_LD(t, a, b, c, d) \ 195 tempBlock[a] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\ 196 tempBlock[b] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\ 197 tempBlock[c] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\ 198 tempBlock[d] = ((byte *)(Td+t))[GetNativeByteOrder()*7]; 200 #define QUARTER_ROUND_LD(t, a, b, c, d) \ 201 tempBlock[a] = Sd[byte(t)]; t >>= 8;\ 202 tempBlock[b] = Sd[byte(t)]; t >>= 8;\ 203 tempBlock[c] = Sd[byte(t)]; t >>= 8;\ 204 tempBlock[d] = Sd[t]; 207 #define QUARTER_ROUND_E(t, a, b, c, d) QUARTER_ROUND(TL_M, Te, t, a, b, c, d) 208 #define QUARTER_ROUND_D(t, a, b, c, d) QUARTER_ROUND(TL_M, Td, t, a, b, c, d) 210 #ifdef CRYPTOPP_LITTLE_ENDIAN 211 #define QUARTER_ROUND_FE(t, a, b, c, d) QUARTER_ROUND(TL_F, Te, t, d, c, b, a) 212 #define QUARTER_ROUND_FD(t, a, b, c, d) QUARTER_ROUND(TL_F, Td, t, d, c, b, a) 213 #if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 214 #define TL_F(T, i, x) (*(word32 *)(void *)((byte *)T + x*8 + (6-i)%4+1)) 215 #define TL_M(T, i, x) (*(word32 *)(void *)((byte *)T + x*8 + (i+3)%4+1)) 217 #define TL_F(T, i, x) rotrFixed(T[x], (3-i)*8) 218 #define TL_M(T, i, x) T[i*256 + x] 221 #define QUARTER_ROUND_FE(t, a, b, c, d) QUARTER_ROUND(TL_F, Te, t, a, b, c, d) 222 #define QUARTER_ROUND_FD(t, a, b, c, d) QUARTER_ROUND(TL_F, Td, t, a, b, c, d) 223 #if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 224 #define TL_F(T, i, x) (*(word32 *)(void *)((byte *)T + x*8 + (4-i)%4)) 227 #define TL_F(T, i, x) rotrFixed(T[x], i*8) 228 #define TL_M(T, i, x) T[i*256 + x] 233 #define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) 234 #define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) 235 #define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) 237 #define f3(x) (f2(x) ^ x) 238 #define f9(x) (f8(x) ^ x) 239 #define fb(x) (f8(x) ^ f2(x) ^ x) 240 #define fd(x) (f8(x) ^ f4(x) ^ x) 241 #define fe(x) (f8(x) ^ f4(x) ^ f2(x)) 243 void Rijndael::Base::FillEncTable()
245 for (
int i=0; i<256; i++)
248 #if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 249 word32 y = word32(x)<<8 | word32(x)<<16 | word32(f2(x))<<24;
250 Te[i] = word64(y | f3(x))<<32 | y;
252 word32 y = f3(x) | word32(x)<<8 | word32(x)<<16 | word32(f2(x))<<24;
253 for (
int j=0; j<4; j++)
256 y = rotrConstant<8>(y);
260 #if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) 261 Te[256] = Te[257] = 0;
266 void Rijndael::Base::FillDecTable()
268 for (
int i=0; i<256; i++)
271 #if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 272 word32 y = word32(fd(x))<<8 | word32(f9(x))<<16 | word32(fe(x))<<24;
273 Td[i] = word64(y | fb(x))<<32 | y | x;
275 word32 y = fb(x) | word32(fd(x))<<8 | word32(f9(x))<<16 | word32(fe(x))<<24;;
276 for (
int j=0; j<4; j++)
279 y = rotrConstant<8>(y);
286 #if (CRYPTOPP_AESNI_AVAILABLE) 287 extern void Rijndael_UncheckedSetKey_SSE4_AESNI(
const byte *userKey,
size_t keyLen, word32* rk);
288 extern void Rijndael_UncheckedSetKeyRev_AESNI(word32 *key,
unsigned int rounds);
290 extern size_t Rijndael_Enc_AdvancedProcessBlocks_AESNI(
const word32 *subkeys,
size_t rounds,
291 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags);
292 extern size_t Rijndael_Dec_AdvancedProcessBlocks_AESNI(
const word32 *subkeys,
size_t rounds,
293 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags);
296 #if (CRYPTOPP_ARM_AES_AVAILABLE) 297 extern size_t Rijndael_Enc_AdvancedProcessBlocks_ARMV8(
const word32 *subkeys,
size_t rounds,
298 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags);
299 extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(
const word32 *subkeys,
size_t rounds,
300 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags);
303 #if (CRYPTOPP_POWER8_AES_AVAILABLE) 304 extern void Rijndael_UncheckedSetKey_POWER8(
const byte* userKey,
size_t keyLen,
305 word32* rk,
const byte* Se);
307 extern size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(
const word32 *subkeys,
size_t rounds,
308 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags);
309 extern size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(
const word32 *subkeys,
size_t rounds,
310 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags);
313 void Rijndael::Base::UncheckedSetKey(
const byte *userKey,
unsigned int keyLen,
const NameValuePairs &)
315 AssertValidKeyLength(keyLen);
317 #if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86 318 m_aliasBlock.New(s_sizeToAllocate);
321 m_aliasBlock.SetMark(0);
324 m_rounds = keyLen/4 + 6;
325 m_key.New(4*(m_rounds+1));
328 #if (CRYPTOPP_AESNI_AVAILABLE && CRYPTOPP_SSE41_AVAILABLE && (!defined(_MSC_VER) || _MSC_VER >= 1600 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)) 334 Rijndael_UncheckedSetKey_SSE4_AESNI(userKey, keyLen, rk);
335 if (!IsForwardTransformation())
336 Rijndael_UncheckedSetKeyRev_AESNI(m_key, m_rounds);
342 #if CRYPTOPP_POWER8_AES_AVAILABLE 347 Rijndael_UncheckedSetKey_POWER8(userKey, keyLen, rk, Se);
353 const word32 *rc = rcon;
358 temp = rk[keyLen/4-1];
359 word32 x = (word32(Se[GETBYTE(temp, 2)]) << 24) ^ (word32(Se[GETBYTE(temp, 1)]) << 16) ^
360 (word32(Se[GETBYTE(temp, 0)]) << 8) ^ Se[GETBYTE(temp, 3)];
361 rk[keyLen/4] = rk[0] ^ x ^ *(rc++);
362 rk[keyLen/4+1] = rk[1] ^ rk[keyLen/4];
363 rk[keyLen/4+2] = rk[2] ^ rk[keyLen/4+1];
364 rk[keyLen/4+3] = rk[3] ^ rk[keyLen/4+2];
366 if (rk + keyLen/4 + 4 == m_key.end())
371 rk[10] = rk[ 4] ^ rk[ 9];
372 rk[11] = rk[ 5] ^ rk[10];
374 else if (keyLen == 32)
377 rk[12] = rk[ 4] ^ (word32(Se[GETBYTE(temp, 3)]) << 24) ^ (word32(Se[GETBYTE(temp, 2)]) << 16) ^ (word32(Se[GETBYTE(temp, 1)]) << 8) ^ Se[GETBYTE(temp, 0)];
378 rk[13] = rk[ 5] ^ rk[12];
379 rk[14] = rk[ 6] ^ rk[13];
380 rk[15] = rk[ 7] ^ rk[14];
387 if (IsForwardTransformation())
400 #define InverseMixColumn(x) \ 401 TL_M(Td, 0, Se[GETBYTE(x, 3)]) ^ TL_M(Td, 1, Se[GETBYTE(x, 2)]) ^ \ 402 TL_M(Td, 2, Se[GETBYTE(x, 1)]) ^ TL_M(Td, 3, Se[GETBYTE(x, 0)]) 405 for (i = 4, j = 4*m_rounds-4; i < j; i += 4, j -= 4)
407 temp = InverseMixColumn(rk[i ]); rk[i ] = InverseMixColumn(rk[j ]); rk[j ] = temp;
408 temp = InverseMixColumn(rk[i + 1]); rk[i + 1] = InverseMixColumn(rk[j + 1]); rk[j + 1] = temp;
409 temp = InverseMixColumn(rk[i + 2]); rk[i + 2] = InverseMixColumn(rk[j + 2]); rk[j + 2] = temp;
410 temp = InverseMixColumn(rk[i + 3]); rk[i + 3] = InverseMixColumn(rk[j + 3]); rk[j + 3] = temp;
413 rk[i+0] = InverseMixColumn(rk[i+0]);
414 rk[i+1] = InverseMixColumn(rk[i+1]);
415 rk[i+2] = InverseMixColumn(rk[i+2]);
416 rk[i+3] = InverseMixColumn(rk[i+3]);
424 #if CRYPTOPP_AESNI_AVAILABLE 428 #if CRYPTOPP_ARM_AES_AVAILABLE 434 void Rijndael::Enc::ProcessAndXorBlock(
const byte *inBlock,
const byte *xorBlock, byte *outBlock)
const 436 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE) || CRYPTOPP_AESNI_AVAILABLE 437 # if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) 443 (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
448 #if (CRYPTOPP_ARM_AES_AVAILABLE) 451 (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
456 #if (CRYPTOPP_POWER8_AES_AVAILABLE) 459 (void)Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
466 word32 s0, s1, s2, s3, t0, t1, t2, t3;
467 Block::Get(inBlock)(s0)(s1)(s2)(s3);
469 const word32 *rk = m_key;
484 volatile word32 _u = 0;
486 #if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 487 for (i=0; i<2048; i+=cacheLineSize)
489 for (i=0; i<1024; i+=cacheLineSize)
491 u &= *(
const word32 *)(
const void *)(((
const byte *)Te)+i);
493 s0 |= u; s1 |= u; s2 |= u; s3 |= u;
495 QUARTER_ROUND_FE(s3, t0, t1, t2, t3)
496 QUARTER_ROUND_FE(s2, t3, t0, t1, t2)
497 QUARTER_ROUND_FE(s1, t2, t3, t0, t1)
498 QUARTER_ROUND_FE(s0, t1, t2, t3, t0)
501 unsigned int r = m_rounds/2 - 1;
504 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
506 QUARTER_ROUND_E(t3, s0, s1, s2, s3)
507 QUARTER_ROUND_E(t2, s3, s0, s1, s2)
508 QUARTER_ROUND_E(t1, s2, s3, s0, s1)
509 QUARTER_ROUND_E(t0, s1, s2, s3, s0)
511 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
513 QUARTER_ROUND_E(s3, t0, t1, t2, t3)
514 QUARTER_ROUND_E(s2, t3, t0, t1, t2)
515 QUARTER_ROUND_E(s1, t2, t3, t0, t1)
516 QUARTER_ROUND_E(s0, t1, t2, t3, t0)
522 byte *
const tempBlock = (byte *)tbw;
524 QUARTER_ROUND_LE(t2, 15, 2, 5, 8)
525 QUARTER_ROUND_LE(t1, 11, 14, 1, 4)
526 QUARTER_ROUND_LE(t0, 7, 10, 13, 0)
527 QUARTER_ROUND_LE(t3, 3, 6, 9, 12)
529 Block::Put(xorBlock, outBlock)(tbw[0]^rk[0])(tbw[1]^rk[1])(tbw[2]^rk[2])(tbw[3]^rk[3]);
532 void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock)
const 534 #if CRYPTOPP_AESNI_AVAILABLE 537 (void)Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
542 #if (CRYPTOPP_ARM_AES_AVAILABLE) 545 (void)Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
550 #if (CRYPTOPP_POWER8_AES_AVAILABLE) 553 (void)Rijndael::Dec::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
560 word32 s0, s1, s2, s3, t0, t1, t2, t3;
561 Block::Get(inBlock)(s0)(s1)(s2)(s3);
563 const word32 *rk = m_key;
578 volatile word32 _u = 0;
580 #if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS) 581 for (i=0; i<2048; i+=cacheLineSize)
583 for (i=0; i<1024; i+=cacheLineSize)
585 u &= *(
const word32 *)(
const void *)(((
const byte *)Td)+i);
587 s0 |= u; s1 |= u; s2 |= u; s3 |= u;
589 QUARTER_ROUND_FD(s3, t2, t1, t0, t3)
590 QUARTER_ROUND_FD(s2, t1, t0, t3, t2)
591 QUARTER_ROUND_FD(s1, t0, t3, t2, t1)
592 QUARTER_ROUND_FD(s0, t3, t2, t1, t0)
595 unsigned int r = m_rounds/2 - 1;
598 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
600 QUARTER_ROUND_D(t3, s2, s1, s0, s3)
601 QUARTER_ROUND_D(t2, s1, s0, s3, s2)
602 QUARTER_ROUND_D(t1, s0, s3, s2, s1)
603 QUARTER_ROUND_D(t0, s3, s2, s1, s0)
605 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
607 QUARTER_ROUND_D(s3, t2, t1, t0, t3)
608 QUARTER_ROUND_D(s2, t1, t0, t3, t2)
609 QUARTER_ROUND_D(s1, t0, t3, t2, t1)
610 QUARTER_ROUND_D(s0, t3, t2, t1, t0)
615 #if !(defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) || defined(CRYPTOPP_ALLOW_RIJNDAEL_UNALIGNED_DATA_ACCESS)) 620 for (i=0; i<256; i+=cacheLineSize)
621 u &= *(
const word32 *)(
const void *)(Sd+i);
622 u &= *(
const word32 *)(
const void *)(Sd+252);
623 t0 |= u; t1 |= u; t2 |= u; t3 |= u;
627 byte *
const tempBlock = (byte *)tbw;
629 QUARTER_ROUND_LD(t2, 7, 2, 13, 8)
630 QUARTER_ROUND_LD(t1, 3, 14, 9, 4)
631 QUARTER_ROUND_LD(t0, 15, 10, 5, 0)
632 QUARTER_ROUND_LD(t3, 11, 6, 1, 12)
634 Block::Put(xorBlock, outBlock)(tbw[0]^rk[0])(tbw[1]^rk[1])(tbw[2]^rk[2])(tbw[3]^rk[3]);
639 #if CRYPTOPP_MSC_VERSION 640 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code 643 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 645 #if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) 647 CRYPTOPP_NAKED
void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(
void *locals,
const word32 *k)
649 CRYPTOPP_UNUSED(locals); CRYPTOPP_UNUSED(k);
651 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 654 #define L_INDEX(i) (L_REG+768+i) 655 #define L_INXORBLOCKS L_INBLOCKS+4 656 #define L_OUTXORBLOCKS L_INBLOCKS+8 657 #define L_OUTBLOCKS L_INBLOCKS+12 658 #define L_INCREMENTS L_INDEX(16*15) 659 #define L_SP L_INDEX(16*16) 660 #define L_LENGTH L_INDEX(16*16+4) 661 #define L_KEYS_BEGIN L_INDEX(16*16+8) 666 #define MXOR(a,b,c) \ 668 AS2( movd mm7, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\ 669 AS2( pxor MM(a), mm7)\ 671 #define MMOV(a,b,c) \ 673 AS2( movd MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\ 678 #define L_INDEX(i) (L_REG+i) 679 #define L_INXORBLOCKS L_INBLOCKS+8 680 #define L_OUTXORBLOCKS L_INBLOCKS+16 681 #define L_OUTBLOCKS L_INBLOCKS+24 682 #define L_INCREMENTS L_INDEX(16*16) 683 #define L_LENGTH L_INDEX(16*18+8) 684 #define L_KEYS_BEGIN L_INDEX(16*19) 696 #define MXOR(a,b,c) \ 698 AS2( xor MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\ 700 #define MMOV(a,b,c) \ 702 AS2( mov MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\ 706 #define L_SUBKEYS L_INDEX(0) 707 #define L_SAVED_X L_SUBKEYS 708 #define L_KEY12 L_INDEX(16*12) 709 #define L_LASTROUND L_INDEX(16*13) 710 #define L_INBLOCKS L_INDEX(16*14) 711 #define MAP0TO4(i) (ASM_MOD(i+3,4)+1) 715 AS2( xor a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\ 719 AS2( mov a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\ 721 #ifdef CRYPTOPP_GENERATE_X64_MASM 723 Rijndael_Enc_AdvancedProcessBlocks PROC FRAME
730 mov AS_REG_7, ?Te@rdtable@
CryptoPP@@3PA_KA
731 mov edi, DWORD PTR [?g_cacheLineSize@
CryptoPP@@3IA]
732 #elif defined(__GNUC__) 736 #
if CRYPTOPP_BOOL_X64
741 AS2( mov AS_REG_7, WORD_REG(si))
747 AS2( lea AS_REG_7, [Te])
748 AS2( mov edi, [g_cacheLineSize])
751 #
if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
752 AS2( mov [ecx+16*12+16*4], esp)
753 AS2( lea esp, [ecx-768])
757 AS2( mov WORD_REG(si), [L_KEYS_BEGIN])
758 AS2( mov WORD_REG(ax), 16)
759 AS2( and WORD_REG(ax), WORD_REG(si))
760 AS2( movdqa xmm3, XMMWORD_PTR [WORD_REG(dx)+16+WORD_REG(ax)])
761 AS2( movdqa [L_KEY12], xmm3)
762 AS2( lea WORD_REG(ax), [WORD_REG(dx)+WORD_REG(ax)+2*16])
763 AS2( sub WORD_REG(ax), WORD_REG(si))
765 AS2( movdqa xmm0, [WORD_REG(ax)+WORD_REG(si)])
766 AS2( movdqa XMMWORD_PTR [L_SUBKEYS+WORD_REG(si)], xmm0)
767 AS2( add WORD_REG(si), 16)
768 AS2( cmp WORD_REG(si), 16*12)
774 AS2( movdqa xmm4, [WORD_REG(ax)+WORD_REG(si)])
775 AS2( movdqa xmm1, [WORD_REG(dx)])
776 AS2( MOVD MM(1), [WORD_REG(dx)+4*4])
777 AS2( mov ebx, [WORD_REG(dx)+5*4])
778 AS2( mov ecx, [WORD_REG(dx)+6*4])
779 AS2( mov edx, [WORD_REG(dx)+7*4])
782 AS2( xor WORD_REG(ax), WORD_REG(ax))
784 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
785 AS2( add WORD_REG(ax), WORD_REG(di))
786 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
787 AS2( add WORD_REG(ax), WORD_REG(di))
788 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
789 AS2( add WORD_REG(ax), WORD_REG(di))
790 AS2( mov esi, [AS_REG_7+WORD_REG(ax)])
791 AS2( add WORD_REG(ax), WORD_REG(di))
792 AS2( cmp WORD_REG(ax), 2048)
798 AS2( test DWORD PTR [L_LENGTH], 1)
804 AS2( mov WORD_REG(si), [L_INBLOCKS])
805 AS2( movdqu xmm2, [WORD_REG(si)])
806 AS2( pxor xmm2, xmm1)
807 AS2( psrldq xmm1, 14)
809 AS2( mov al, BYTE PTR [WORD_REG(si)+15])
810 AS2( MOVD MM(2), eax)
811 #
if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
849 AS2( mov eax, [L_KEY12+0*4])
850 AS2( mov edi, [L_KEY12+2*4])
851 AS2( MOVD MM(0), [L_KEY12+3*4])
858 AS2( xor ebx, [L_KEY12+1*4])
870 AS2( MOVD edx, MM(1))
871 AS2( MOVD [L_SAVED_X+3*4], MM(0))
872 AS2( mov [L_SAVED_X+0*4], eax)
873 AS2( mov [L_SAVED_X+1*4], ebx)
874 AS2( mov [L_SAVED_X+2*4], edi)
880 AS2( MOVD MM(1), [L_KEY12+0*4])
881 AS2( mov ebx, [L_KEY12+1*4])
882 AS2( mov ecx, [L_KEY12+2*4])
883 AS2( mov edx, [L_KEY12+3*4])
885 AS2( mov WORD_REG(ax), [L_INBLOCKS])
886 AS2( movdqu xmm2, [WORD_REG(ax)])
887 AS2( mov WORD_REG(si), [L_INXORBLOCKS])
888 AS2( movdqu xmm5, [WORD_REG(si)])
889 AS2( pxor xmm2, xmm1)
890 AS2( pxor xmm2, xmm5)
923 AS2( MOVD eax, MM(1))
925 AS2( add L_REG, [L_KEYS_BEGIN])
926 AS2( add L_REG, 4*16)
932 AS2( MOVD ecx, MM(2))
933 AS2( MOVD edx, MM(1))
934 AS2( mov eax, [L_SAVED_X+0*4])
935 AS2( mov ebx, [L_SAVED_X+1*4])
937 AS2( and WORD_REG(cx), 255)
939 #
if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
940 AS2( paddb MM(2), mm3)
945 AS2( xor edx, DWORD PTR [AS_REG_7+WORD_REG(cx)*8+3])
949 AS2( xor ecx, [L_SAVED_X+2*4])
952 AS2( xor edx, [L_SAVED_X+3*4])
954 AS2( add L_REG, [L_KEYS_BEGIN])
955 AS2( add L_REG, 3*16)
986 AS2( MOVD MM(0), [L_SUBKEYS-4*16+3*4])
987 AS2( mov edi, [L_SUBKEYS-4*16+2*4])
990 AS2( xor eax, [L_SUBKEYS-4*16+0*4])
991 AS2( xor ebx, [L_SUBKEYS-4*16+1*4])
992 AS2( MOVD edx, MM(0))
995 AS2( MOVD MM(0), [L_SUBKEYS-4*16+7*4])
996 AS2( mov edi, [L_SUBKEYS-4*16+6*4])
999 AS2( xor eax, [L_SUBKEYS-4*16+4*4])
1000 AS2( xor ebx, [L_SUBKEYS-4*16+5*4])
1001 AS2( MOVD edx, MM(0))
1004 AS2( test L_REG, 255)
1008 AS2( sub L_REG, 16*16)
1010 #define LAST(a, b, c) \
1011 AS2( movzx esi, a )\
1012 AS2( movzx edi, BYTE PTR [AS_REG_7+WORD_REG(si)*8+1] )\
1013 AS2( movzx esi, b )\
1014 AS2( xor edi, DWORD PTR [AS_REG_7+WORD_REG(si)*8+0] )\
1015 AS2( mov WORD PTR [L_LASTROUND+c], di )\
1031 AS2( mov WORD_REG(ax), [L_OUTXORBLOCKS])
1032 AS2( mov WORD_REG(bx), [L_OUTBLOCKS])
1034 AS2( mov WORD_REG(cx), [L_LENGTH])
1035 AS2( sub WORD_REG(cx), 16)
1037 AS2( movdqu xmm2, [WORD_REG(ax)])
1038 AS2( pxor xmm2, xmm4)
1040 #
if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
1041 AS2( movdqa xmm0, [L_INCREMENTS])
1042 AS2( paddd xmm0, [L_INBLOCKS])
1043 AS2( movdqa [L_INBLOCKS], xmm0)
1045 AS2( movdqa xmm0, [L_INCREMENTS+16])
1046 AS2( paddq xmm0, [L_INBLOCKS+16])
1047 AS2( movdqa [L_INBLOCKS+16], xmm0)
1050 AS2( pxor xmm2, [L_LASTROUND])
1051 AS2( movdqu [WORD_REG(bx)], xmm2)
1056 AS2( mov [L_LENGTH], WORD_REG(cx))
1057 AS2( test WORD_REG(cx), 1)
1061 #
if CRYPTOPP_BOOL_X64
1062 AS2( movdqa xmm0, [L_INCREMENTS])
1063 AS2( paddq xmm0, [L_INBLOCKS])
1064 AS2( movdqa [L_INBLOCKS], xmm0)
1072 AS2( xorps xmm0, xmm0)
1073 AS2( lea WORD_REG(ax), [L_SUBKEYS+7*16])
1074 AS2( movaps [WORD_REG(ax)-7*16], xmm0)
1075 AS2( movaps [WORD_REG(ax)-6*16], xmm0)
1076 AS2( movaps [WORD_REG(ax)-5*16], xmm0)
1077 AS2( movaps [WORD_REG(ax)-4*16], xmm0)
1078 AS2( movaps [WORD_REG(ax)-3*16], xmm0)
1079 AS2( movaps [WORD_REG(ax)-2*16], xmm0)
1080 AS2( movaps [WORD_REG(ax)-1*16], xmm0)
1081 AS2( movaps [WORD_REG(ax)+0*16], xmm0)
1082 AS2( movaps [WORD_REG(ax)+1*16], xmm0)
1083 AS2( movaps [WORD_REG(ax)+2*16], xmm0)
1084 AS2( movaps [WORD_REG(ax)+3*16], xmm0)
1085 AS2( movaps [WORD_REG(ax)+4*16], xmm0)
1086 AS2( movaps [WORD_REG(ax)+5*16], xmm0)
1087 AS2( movaps [WORD_REG(ax)+6*16], xmm0)
1088 #
if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
1089 AS2( mov esp, [L_SP])
1094 #
if defined(_MSC_VER) && CRYPTOPP_BOOL_X86
1099 #ifdef CRYPTOPP_GENERATE_X64_MASM
1105 Rijndael_Enc_AdvancedProcessBlocks ENDP
1110 :
"c" (locals),
"d" (k),
"S" (Te),
"D" (g_cacheLineSize)
1111 :
"memory",
"cc",
"%eax" 1112 #
if CRYPTOPP_BOOL_X64
1113 ,
"%rbx",
"%r8",
"%r9",
"%r10",
"%r11",
"%r12" 1121 #ifndef CRYPTOPP_GENERATE_X64_MASM 1123 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 1125 void Rijndael_Enc_AdvancedProcessBlocks(
void *locals,
const word32 *k);
1129 #if CRYPTOPP_RIJNDAEL_ADVANCED_PROCESS_BLOCKS 1130 size_t Rijndael::Enc::AdvancedProcessBlocks(
const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
const 1132 #if CRYPTOPP_AESNI_AVAILABLE 1134 return Rijndael_Enc_AdvancedProcessBlocks_AESNI(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1136 #if CRYPTOPP_ARM_AES_AVAILABLE 1138 return Rijndael_Enc_AdvancedProcessBlocks_ARMV8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1140 #if CRYPTOPP_POWER8_AES_AVAILABLE 1142 return Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1145 #if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM) 1148 if (length < BLOCKSIZE)
1151 static const byte *zeros = (
const byte*)(Te+256);
1152 m_aliasBlock.SetMark(m_aliasBlock.size());
1153 byte *space = NULLPTR, *originalSpace =
const_cast<byte*
>(m_aliasBlock.data());
1156 space = originalSpace + (s_aliasBlockSize - (uintptr_t)originalSpace % s_aliasBlockSize) % s_aliasBlockSize;
1157 while (AliasedWithTable(space, space +
sizeof(Locals)))
1163 size_t increment = BLOCKSIZE;
1164 if (flags & BT_ReverseDirection)
1167 inBlocks += length - BLOCKSIZE;
1168 xorBlocks += length - BLOCKSIZE;
1169 outBlocks += length - BLOCKSIZE;
1170 increment = 0-increment;
1173 Locals &locals = *(Locals *)(
void *)space;
1175 locals.inBlocks = inBlocks;
1176 locals.inXorBlocks = (flags & BT_XorInput) && xorBlocks ? xorBlocks : zeros;
1177 locals.outXorBlocks = (flags & BT_XorInput) || !xorBlocks ? zeros : xorBlocks;
1178 locals.outBlocks = outBlocks;
1180 locals.inIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment;
1181 locals.inXorIncrement = (flags & BT_XorInput) && xorBlocks ? increment : 0;
1182 locals.outXorIncrement = (flags & BT_XorInput) || !xorBlocks ? 0 : increment;
1183 locals.outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment;
1185 locals.lengthAndCounterFlag = length - (length%16) -
bool(flags & BT_InBlockIsCounter);
1186 int keysToCopy = m_rounds - (flags & BT_InBlockIsCounter ? 3 : 2);
1187 locals.keysBegin = (12-keysToCopy)*16;
1189 Rijndael_Enc_AdvancedProcessBlocks(&locals, m_key);
1191 return length % BLOCKSIZE;
1198 size_t Rijndael::Dec::AdvancedProcessBlocks(
const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
const 1200 #if CRYPTOPP_AESNI_AVAILABLE 1202 return Rijndael_Dec_AdvancedProcessBlocks_AESNI(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1204 #if CRYPTOPP_ARM_AES_AVAILABLE 1206 return Rijndael_Dec_AdvancedProcessBlocks_ARMV8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1208 #if CRYPTOPP_POWER8_AES_AVAILABLE 1210 return Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1215 #endif // CRYPTOPP_RIJNDAEL_ADVANCED_PROCESS_BLOCKS Utility functions for the Crypto++ library.
bool HasAES()
Determine if an ARM processor has AES available.
Library configuration file.
int GetCacheLineSize()
Provides the cache line size.
Access a block of memory.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Classes for Rijndael encryption algorithm.
Functions for CPU features and intrinsics.
bool HasAESNI()
Determines AES-NI availability.
bool HasSSE2()
Determines SSE2 availability.
bool HasSSE41()
Determines SSE4.1 availability.
Crypto++ library namespace.
Interface for retrieving values given their names.