00001
00002
00003
00004
00005 #include "pch.h"
00006
00007 #ifndef CRYPTOPP_GENERATE_X64_MASM
00008
00009 #include "panama.h"
00010 #include "misc.h"
00011 #include "cpu.h"
00012
00013 NAMESPACE_BEGIN(CryptoPP)
00014
00015 template <class B>
00016 void Panama<B>::Reset()
00017 {
00018 memset(m_state, 0, m_state.SizeInBytes());
00019 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00020 m_state[17] = HasSSSE3();
00021 #endif
00022 }
00023
00024 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
00025
00026 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00027 extern "C" {
00028 void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y);
00029 }
00030 #elif CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00031
00032 #ifdef CRYPTOPP_GENERATE_X64_MASM
00033 Panama_SSE2_Pull PROC FRAME
00034 alloc_stack(2*16+8)
00035 save_xmm128 xmm6, 0h
00036 save_xmm128 xmm7, 10h
00037 .endprolog
00038 #else
00039 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
00040 void CRYPTOPP_NOINLINE Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y)
00041 {
00042 #ifdef __GNUC__
00043 __asm__ __volatile__
00044 (
00045 ".intel_syntax noprefix;"
00046 AS_PUSH_IF86( bx)
00047 #else
00048 AS2( mov AS_REG_1, count)
00049 AS2( mov AS_REG_2, state)
00050 AS2( mov AS_REG_3, z)
00051 AS2( mov AS_REG_4, y)
00052 #endif
00053 #endif
00054
00055 #if CRYPTOPP_BOOL_X86
00056 #define REG_loopEnd [esp]
00057 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
00058 #define REG_loopEnd rdi
00059 #else
00060 #define REG_loopEnd r8
00061 #endif
00062
00063 AS2( shl AS_REG_1, 5)
00064 ASJ( jz, 5, f)
00065 AS2( mov AS_REG_6d, [AS_REG_2+4*17])
00066 AS2( add AS_REG_1, AS_REG_6)
00067
00068 #if CRYPTOPP_BOOL_X64
00069 AS2( mov REG_loopEnd, AS_REG_1)
00070 #else
00071 AS1( push ebp)
00072 AS1( push AS_REG_1)
00073 #endif
00074
00075 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16])
00076 AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16])
00077 AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16])
00078 AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16])
00079 AS2( mov eax, dword ptr [AS_REG_2+4*16])
00080
00081 ASL(4)
00082
00083 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00084 AS2( test AS_REG_6, 1)
00085 ASJ( jnz, 6, f)
00086 #endif
00087 AS2( movdqa xmm6, xmm2)
00088 AS2( movss xmm6, xmm3)
00089 ASS( pshufd xmm5, xmm6, 0, 3, 2, 1)
00090 AS2( movd xmm6, eax)
00091 AS2( movdqa xmm7, xmm3)
00092 AS2( movss xmm7, xmm6)
00093 ASS( pshufd xmm6, xmm7, 0, 3, 2, 1)
00094 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00095 ASJ( jmp, 7, f)
00096 ASL(6)
00097 AS2( movdqa xmm5, xmm3)
00098 AS3( palignr xmm5, xmm2, 4)
00099 AS2( movd xmm6, eax)
00100 AS3( palignr xmm6, xmm3, 4)
00101 ASL(7)
00102 #endif
00103
00104 AS2( movd AS_REG_1d, xmm2)
00105 AS1( not AS_REG_1d)
00106 AS2( movd AS_REG_7d, xmm3)
00107 AS2( or AS_REG_1d, AS_REG_7d)
00108 AS2( xor eax, AS_REG_1d)
00109
00110 #define SSE2_Index(i) ASM_MOD(((i)*13+16), 17)
00111
00112 #define pi(i) \
00113 AS2( movd AS_REG_1d, xmm7)\
00114 AS2( rol AS_REG_1d, ASM_MOD((ASM_MOD(5*i,17)*(ASM_MOD(5*i,17)+1)/2), 32))\
00115 AS2( mov [AS_REG_2+SSE2_Index(ASM_MOD(5*(i), 17))*4], AS_REG_1d)
00116
00117 #define pi4(x, y, z, a, b, c, d) \
00118 AS2( pcmpeqb xmm7, xmm7)\
00119 AS2( pxor xmm7, x)\
00120 AS2( por xmm7, y)\
00121 AS2( pxor xmm7, z)\
00122 pi(a)\
00123 ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
00124 pi(b)\
00125 AS2( punpckhqdq xmm7, xmm7)\
00126 pi(c)\
00127 ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
00128 pi(d)
00129
00130 pi4(xmm1, xmm2, xmm3, 1, 5, 9, 13)
00131 pi4(xmm0, xmm1, xmm2, 2, 6, 10, 14)
00132 pi4(xmm6, xmm0, xmm1, 3, 7, 11, 15)
00133 pi4(xmm5, xmm6, xmm0, 4, 8, 12, 16)
00134
00135
00136 AS2( movdqa xmm4, xmm3)
00137 AS2( punpcklqdq xmm3, xmm2)
00138 AS2( punpckhdq xmm4, xmm2)
00139 AS2( movdqa xmm2, xmm1)
00140 AS2( punpcklqdq xmm1, xmm0)
00141 AS2( punpckhdq xmm2, xmm0)
00142
00143
00144 AS2( test AS_REG_3, AS_REG_3)
00145 ASJ( jz, 0, f)
00146 AS2( movdqa xmm6, xmm4)
00147 AS2( punpcklqdq xmm4, xmm2)
00148 AS2( punpckhqdq xmm6, xmm2)
00149 AS2( test AS_REG_4, 15)
00150 ASJ( jnz, 2, f)
00151 AS2( test AS_REG_4, AS_REG_4)
00152 ASJ( jz, 1, f)
00153 AS2( pxor xmm4, [AS_REG_4])
00154 AS2( pxor xmm6, [AS_REG_4+16])
00155 AS2( add AS_REG_4, 32)
00156 ASJ( jmp, 1, f)
00157 ASL(2)
00158 AS2( movdqu xmm0, [AS_REG_4])
00159 AS2( movdqu xmm2, [AS_REG_4+16])
00160 AS2( pxor xmm4, xmm0)
00161 AS2( pxor xmm6, xmm2)
00162 AS2( add AS_REG_4, 32)
00163 ASL(1)
00164 AS2( test AS_REG_3, 15)
00165 ASJ( jnz, 3, f)
00166 AS2( movdqa XMMWORD_PTR [AS_REG_3], xmm4)
00167 AS2( movdqa XMMWORD_PTR [AS_REG_3+16], xmm6)
00168 AS2( add AS_REG_3, 32)
00169 ASJ( jmp, 0, f)
00170 ASL(3)
00171 AS2( movdqu XMMWORD_PTR [AS_REG_3], xmm4)
00172 AS2( movdqu XMMWORD_PTR [AS_REG_3+16], xmm6)
00173 AS2( add AS_REG_3, 32)
00174 ASL(0)
00175
00176
00177 AS2( lea AS_REG_1, [AS_REG_6 + 32])
00178 AS2( and AS_REG_1, 31*32)
00179 AS2( lea AS_REG_7, [AS_REG_6 + (32-24)*32])
00180 AS2( and AS_REG_7, 31*32)
00181
00182 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8])
00183 AS2( pxor xmm3, xmm0)
00184 ASS( pshufd xmm0, xmm0, 2, 3, 0, 1)
00185 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8], xmm3)
00186 AS2( pxor xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8])
00187 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8], xmm0)
00188
00189 AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8])
00190 AS2( pxor xmm1, xmm4)
00191 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8], xmm1)
00192 AS2( pxor xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8])
00193 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8], xmm4)
00194
00195
00196 AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16])
00197 AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16])
00198 AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16])
00199 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16])
00200
00201 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00202 AS2( test AS_REG_6, 1)
00203 ASJ( jnz, 8, f)
00204 #endif
00205 AS2( movd xmm6, eax)
00206 AS2( movdqa xmm7, xmm3)
00207 AS2( movss xmm7, xmm6)
00208 AS2( movdqa xmm6, xmm2)
00209 AS2( movss xmm6, xmm3)
00210 AS2( movdqa xmm5, xmm1)
00211 AS2( movss xmm5, xmm2)
00212 AS2( movdqa xmm4, xmm0)
00213 AS2( movss xmm4, xmm1)
00214 ASS( pshufd xmm7, xmm7, 0, 3, 2, 1)
00215 ASS( pshufd xmm6, xmm6, 0, 3, 2, 1)
00216 ASS( pshufd xmm5, xmm5, 0, 3, 2, 1)
00217 ASS( pshufd xmm4, xmm4, 0, 3, 2, 1)
00218 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00219 ASJ( jmp, 9, f)
00220 ASL(8)
00221 AS2( movd xmm7, eax)
00222 AS3( palignr xmm7, xmm3, 4)
00223 AS2( movq xmm6, xmm3)
00224 AS3( palignr xmm6, xmm2, 4)
00225 AS2( movq xmm5, xmm2)
00226 AS3( palignr xmm5, xmm1, 4)
00227 AS2( movq xmm4, xmm1)
00228 AS3( palignr xmm4, xmm0, 4)
00229 ASL(9)
00230 #endif
00231
00232 AS2( xor eax, 1)
00233 AS2( movd AS_REG_1d, xmm0)
00234 AS2( xor eax, AS_REG_1d)
00235 AS2( movd AS_REG_1d, xmm3)
00236 AS2( xor eax, AS_REG_1d)
00237
00238 AS2( pxor xmm3, xmm2)
00239 AS2( pxor xmm2, xmm1)
00240 AS2( pxor xmm1, xmm0)
00241 AS2( pxor xmm0, xmm7)
00242 AS2( pxor xmm3, xmm7)
00243 AS2( pxor xmm2, xmm6)
00244 AS2( pxor xmm1, xmm5)
00245 AS2( pxor xmm0, xmm4)
00246
00247
00248 AS2( lea AS_REG_1, [AS_REG_6 + (32-4)*32])
00249 AS2( and AS_REG_1, 31*32)
00250 AS2( lea AS_REG_7, [AS_REG_6 + 16*32])
00251 AS2( and AS_REG_7, 31*32)
00252
00253 AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*16])
00254 AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*16])
00255 AS2( movdqa xmm6, xmm4)
00256 AS2( punpcklqdq xmm4, xmm5)
00257 AS2( punpckhqdq xmm6, xmm5)
00258 AS2( pxor xmm3, xmm4)
00259 AS2( pxor xmm2, xmm6)
00260
00261 AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+1*16])
00262 AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+1*16])
00263 AS2( movdqa xmm6, xmm4)
00264 AS2( punpcklqdq xmm4, xmm5)
00265 AS2( punpckhqdq xmm6, xmm5)
00266 AS2( pxor xmm1, xmm4)
00267 AS2( pxor xmm0, xmm6)
00268
00269
00270 AS2( add AS_REG_6, 32)
00271 AS2( cmp AS_REG_6, REG_loopEnd)
00272 ASJ( jne, 4, b)
00273
00274
00275 AS2( mov [AS_REG_2+4*16], eax)
00276 AS2( movdqa XMMWORD_PTR [AS_REG_2+3*16], xmm3)
00277 AS2( movdqa XMMWORD_PTR [AS_REG_2+2*16], xmm2)
00278 AS2( movdqa XMMWORD_PTR [AS_REG_2+1*16], xmm1)
00279 AS2( movdqa XMMWORD_PTR [AS_REG_2+0*16], xmm0)
00280
00281 #if CRYPTOPP_BOOL_X86
00282 AS2( add esp, 4)
00283 AS1( pop ebp)
00284 #endif
00285 ASL(5)
00286
00287 #ifdef __GNUC__
00288 AS_POP_IF86( bx)
00289 ".att_syntax prefix;"
00290 :
00291 #if CRYPTOPP_BOOL_X64
00292 : "D" (count), "S" (state), "d" (z), "c" (y)
00293 : "%r8", "%r9", "r10", "%eax", "memory", "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
00294 #else
00295 : "c" (count), "d" (state), "S" (z), "D" (y)
00296 : "%eax", "memory", "cc"
00297 #endif
00298 );
00299 #endif
00300 #ifdef CRYPTOPP_GENERATE_X64_MASM
00301 movdqa xmm6, [rsp + 0h]
00302 movdqa xmm7, [rsp + 10h]
00303 add rsp, 2*16+8
00304 ret
00305 Panama_SSE2_Pull ENDP
00306 #else
00307 }
00308 #endif
00309 #endif // #ifdef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00310
00311 #ifndef CRYPTOPP_GENERATE_X64_MASM
00312
00313 template <class B>
00314 void Panama<B>::Iterate(size_t count, const word32 *p, word32 *z, const word32 *y)
00315 {
00316 word32 bstart = m_state[17];
00317 word32 *const aPtr = m_state;
00318 word32 cPtr[17];
00319
00320 #define bPtr ((byte *)(aPtr+20))
00321
00322
00323
00324
00325 #define a(i) aPtr[((i)*13+16) % 17] // 13 is inverse of 4 mod 17
00326 #define c(i) cPtr[((i)*13+16) % 17]
00327
00328 #define b(i, j) b##i[(j)*2%8 + (j)/4]
00329
00330
00331 #define OA(i) z[i] = ConditionalByteReverse(B::ToEnum(), a(i+9))
00332 #define OX(i) z[i] = y[i] ^ ConditionalByteReverse(B::ToEnum(), a(i+9))
00333
00334 #define US(i) {word32 t=b(0,i); b(0,i)=ConditionalByteReverse(B::ToEnum(), p[i])^t; b(25,(i+6)%8)^=t;}
00335 #define UL(i) {word32 t=b(0,i); b(0,i)=a(i+1)^t; b(25,(i+6)%8)^=t;}
00336
00337 #define GP(i) c(5*i%17) = rotlFixed(a(i) ^ (a((i+1)%17) | ~a((i+2)%17)), ((5*i%17)*((5*i%17)+1)/2)%32)
00338
00339 #define T(i,x) a(i) = c(i) ^ c((i+1)%17) ^ c((i+4)%17) ^ x
00340 #define TS1S(i) T(i+1, ConditionalByteReverse(B::ToEnum(), p[i]))
00341 #define TS1L(i) T(i+1, b(4,i))
00342 #define TS2(i) T(i+9, b(16,i))
00343
00344 while (count--)
00345 {
00346 if (z)
00347 {
00348 if (y)
00349 {
00350 OX(0); OX(1); OX(2); OX(3); OX(4); OX(5); OX(6); OX(7);
00351 y += 8;
00352 }
00353 else
00354 {
00355 OA(0); OA(1); OA(2); OA(3); OA(4); OA(5); OA(6); OA(7);
00356 }
00357 z += 8;
00358 }
00359
00360 word32 *const b16 = (word32 *)(bPtr+((bstart+16*32) & 31*32));
00361 word32 *const b4 = (word32 *)(bPtr+((bstart+(32-4)*32) & 31*32));
00362 bstart += 32;
00363 word32 *const b0 = (word32 *)(bPtr+((bstart) & 31*32));
00364 word32 *const b25 = (word32 *)(bPtr+((bstart+(32-25)*32) & 31*32));
00365
00366 if (p)
00367 {
00368 US(0); US(1); US(2); US(3); US(4); US(5); US(6); US(7);
00369 }
00370 else
00371 {
00372 UL(0); UL(1); UL(2); UL(3); UL(4); UL(5); UL(6); UL(7);
00373 }
00374
00375 GP(0);
00376 GP(1);
00377 GP(2);
00378 GP(3);
00379 GP(4);
00380 GP(5);
00381 GP(6);
00382 GP(7);
00383 GP(8);
00384 GP(9);
00385 GP(10);
00386 GP(11);
00387 GP(12);
00388 GP(13);
00389 GP(14);
00390 GP(15);
00391 GP(16);
00392
00393 T(0,1);
00394
00395 if (p)
00396 {
00397 TS1S(0); TS1S(1); TS1S(2); TS1S(3); TS1S(4); TS1S(5); TS1S(6); TS1S(7);
00398 p += 8;
00399 }
00400 else
00401 {
00402 TS1L(0); TS1L(1); TS1L(2); TS1L(3); TS1L(4); TS1L(5); TS1L(6); TS1L(7);
00403 }
00404
00405 TS2(0); TS2(1); TS2(2); TS2(3); TS2(4); TS2(5); TS2(6); TS2(7);
00406 }
00407 m_state[17] = bstart;
00408 }
00409
00410 namespace Weak {
00411 template <class B>
00412 size_t PanamaHash<B>::HashMultipleBlocks(const word32 *input, size_t length)
00413 {
00414 this->Iterate(length / this->BLOCKSIZE, input);
00415 return length % this->BLOCKSIZE;
00416 }
00417
00418 template <class B>
00419 void PanamaHash<B>::TruncatedFinal(byte *hash, size_t size)
00420 {
00421 this->ThrowIfInvalidTruncatedSize(size);
00422
00423 PadLastBlock(this->BLOCKSIZE, 0x01);
00424
00425 HashEndianCorrectedBlock(this->m_data);
00426
00427 this->Iterate(32);
00428
00429 FixedSizeSecBlock<word32, 8> buf;
00430 this->Iterate(1, NULL, buf, NULL);
00431
00432 memcpy(hash, buf, size);
00433
00434 this->Restart();
00435 }
00436 }
00437
00438 template <class B>
00439 void PanamaCipherPolicy<B>::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
00440 {
00441 assert(length==32);
00442 memcpy(m_key, key, 32);
00443 }
00444
00445 template <class B>
00446 void PanamaCipherPolicy<B>::CipherResynchronize(byte *keystreamBuffer, const byte *iv)
00447 {
00448 this->Reset();
00449 this->Iterate(1, m_key);
00450 if (iv && IsAligned<word32>(iv))
00451 this->Iterate(1, (const word32 *)iv);
00452 else
00453 {
00454 FixedSizeSecBlock<word32, 8> buf;
00455 if (iv)
00456 memcpy(buf, iv, 32);
00457 else
00458 memset(buf, 0, 32);
00459 this->Iterate(1, buf);
00460 }
00461
00462 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
00463 if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
00464 Panama_SSE2_Pull(32, this->m_state, NULL, NULL);
00465 else
00466 #endif
00467 this->Iterate(32);
00468 }
00469
00470 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
00471 template <class B>
00472 unsigned int PanamaCipherPolicy<B>::GetAlignment() const
00473 {
00474 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
00475 if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
00476 return 16;
00477 else
00478 #endif
00479 return 1;
00480 }
00481 #endif
00482
00483 template <class B>
00484 void PanamaCipherPolicy<B>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
00485 {
00486 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
00487 if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
00488 Panama_SSE2_Pull(iterationCount, this->m_state, (word32 *)output, (const word32 *)input);
00489 else
00490 #endif
00491 this->Iterate(iterationCount, NULL, (word32 *)output, (const word32 *)input);
00492 }
00493
00494 template class Panama<BigEndian>;
00495 template class Panama<LittleEndian>;
00496
00497 template class Weak::PanamaHash<BigEndian>;
00498 template class Weak::PanamaHash<LittleEndian>;
00499
00500 template class PanamaCipherPolicy<BigEndian>;
00501 template class PanamaCipherPolicy<LittleEndian>;
00502
00503 NAMESPACE_END
00504
00505 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM