00001
00002
00003
00004 #include "pch.h"
00005
00006 #ifndef CRYPTOPP_IMPORTS
00007
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"
00016 #include "sha.h"
00017 #include "cpu.h"
00018
00019 #include <iostream>
00020
00021 #if _MSC_VER >= 1400
00022 #include <intrin.h>
00023 #endif
00024
00025 #ifdef __DECCXX
00026 #include <c_asm.h>
00027 #endif
00028
00029 #ifdef CRYPTOPP_MSVC6_NO_PP
00030 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
00031 #endif
00032
00033 #define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
00034
00035 NAMESPACE_BEGIN(CryptoPP)
00036
00037 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00038 {
00039 if (valueType != typeid(Integer))
00040 return false;
00041 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00042 return true;
00043 }
00044
00045 inline static int Compare(const word *A, const word *B, size_t N)
00046 {
00047 while (N--)
00048 if (A[N] > B[N])
00049 return 1;
00050 else if (A[N] < B[N])
00051 return -1;
00052
00053 return 0;
00054 }
00055
00056 inline static int Increment(word *A, size_t N, word B=1)
00057 {
00058 assert(N);
00059 word t = A[0];
00060 A[0] = t+B;
00061 if (A[0] >= t)
00062 return 0;
00063 for (unsigned i=1; i<N; i++)
00064 if (++A[i])
00065 return 0;
00066 return 1;
00067 }
00068
00069 inline static int Decrement(word *A, size_t N, word B=1)
00070 {
00071 assert(N);
00072 word t = A[0];
00073 A[0] = t-B;
00074 if (A[0] <= t)
00075 return 0;
00076 for (unsigned i=1; i<N; i++)
00077 if (A[i]--)
00078 return 0;
00079 return 1;
00080 }
00081
00082 static void TwosComplement(word *A, size_t N)
00083 {
00084 Decrement(A, N);
00085 for (unsigned i=0; i<N; i++)
00086 A[i] = ~A[i];
00087 }
00088
00089 static word AtomicInverseModPower2(word A)
00090 {
00091 assert(A%2==1);
00092
00093 word R=A%8;
00094
00095 for (unsigned i=3; i<WORD_BITS; i*=2)
00096 R = R*(2-R*A);
00097
00098 assert(R*A==1);
00099 return R;
00100 }
00101
00102
00103
00104 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || defined(__x86_64__)
00105 #define Declare2Words(x) word x##0, x##1;
00106 #define AssignWord(a, b) a##0 = b; a##1 = 0;
00107 #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
00108 #define LowWord(a) a##0
00109 #define HighWord(a) a##1
00110 #ifdef _MSC_VER
00111 #define MultiplyWords(p, a, b) p##0 = _umul128(a, b, &p##1);
00112 #ifndef __INTEL_COMPILER
00113 #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
00114 #endif
00115 #elif defined(__DECCXX)
00116 #define MultiplyWords(p, a, b) p##0 = a*b; p##1 = asm("umulh %a0, %a1, %v0", a, b);
00117 #elif defined(__x86_64__)
00118 #define MultiplyWords(p, a, b) asm ("mulq %3" : "=a"(p##0), "=d"(p##1) : "a"(a), "g"(b) : "cc");
00119 #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00120 #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
00121 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
00122 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
00123 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
00124 #endif
00125 #ifndef Double3Words
00126 #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
00127 #endif
00128 #ifndef Acc2WordsBy2
00129 #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
00130 #endif
00131 #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
00132 #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
00133 #define GetCarry(u) u##1
00134 #define GetBorrow(u) u##1
00135 #else
00136 #define Declare2Words(x) dword x;
00137 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00138 #define MultiplyWords(p, a, b) p = __emulu(a, b);
00139 #else
00140 #define MultiplyWords(p, a, b) p = (dword)a*b;
00141 #endif
00142 #define AssignWord(a, b) a = b;
00143 #define Add2WordsBy1(a, b, c) a = b + c;
00144 #define Acc2WordsBy2(a, b) a += b;
00145 #define LowWord(a) word(a)
00146 #define HighWord(a) word(a>>WORD_BITS)
00147 #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
00148 #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
00149 #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
00150 #define GetCarry(u) HighWord(u)
00151 #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
00152 #endif
00153 #ifndef MulAcc
00154 #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
00155 #endif
00156 #ifndef Acc2WordsBy1
00157 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
00158 #endif
00159 #ifndef Acc3WordsBy2
00160 #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
00161 #endif
00162
00163 class DWord
00164 {
00165 public:
00166 DWord() {}
00167
00168 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00169 explicit DWord(word low)
00170 {
00171 m_whole = low;
00172 }
00173 #else
00174 explicit DWord(word low)
00175 {
00176 m_halfs.low = low;
00177 m_halfs.high = 0;
00178 }
00179 #endif
00180
00181 DWord(word low, word high)
00182 {
00183 m_halfs.low = low;
00184 m_halfs.high = high;
00185 }
00186
00187 static DWord Multiply(word a, word b)
00188 {
00189 DWord r;
00190 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00191 r.m_whole = (dword)a * b;
00192 #elif defined(__x86_64__)
00193 asm ("mulq %3" : "=a"(r.m_halfs.low), "=d"(r.m_halfs.high) : "a"(a), "g"(b) : "cc");
00194 #else
00195 r.m_halfs.low = _umul128(a, b, &r.m_halfs.high);
00196 #endif
00197 return r;
00198 }
00199
00200 static DWord MultiplyAndAdd(word a, word b, word c)
00201 {
00202 DWord r = Multiply(a, b);
00203 return r += c;
00204 }
00205
00206 DWord & operator+=(word a)
00207 {
00208 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00209 m_whole = m_whole + a;
00210 #else
00211 m_halfs.low += a;
00212 m_halfs.high += (m_halfs.low < a);
00213 #endif
00214 return *this;
00215 }
00216
00217 DWord operator+(word a)
00218 {
00219 DWord r;
00220 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00221 r.m_whole = m_whole + a;
00222 #else
00223 r.m_halfs.low = m_halfs.low + a;
00224 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00225 #endif
00226 return r;
00227 }
00228
00229 DWord operator-(DWord a)
00230 {
00231 DWord r;
00232 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00233 r.m_whole = m_whole - a.m_whole;
00234 #else
00235 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00236 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00237 #endif
00238 return r;
00239 }
00240
00241 DWord operator-(word a)
00242 {
00243 DWord r;
00244 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00245 r.m_whole = m_whole - a;
00246 #else
00247 r.m_halfs.low = m_halfs.low - a;
00248 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00249 #endif
00250 return r;
00251 }
00252
00253
00254 word operator/(word divisor);
00255
00256 word operator%(word a);
00257
00258 bool operator!() const
00259 {
00260 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00261 return !m_whole;
00262 #else
00263 return !m_halfs.high && !m_halfs.low;
00264 #endif
00265 }
00266
00267 word GetLowHalf() const {return m_halfs.low;}
00268 word GetHighHalf() const {return m_halfs.high;}
00269 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00270
00271 private:
00272 union
00273 {
00274 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00275 dword m_whole;
00276 #endif
00277 struct
00278 {
00279 #ifdef IS_LITTLE_ENDIAN
00280 word low;
00281 word high;
00282 #else
00283 word high;
00284 word low;
00285 #endif
00286 } m_halfs;
00287 };
00288 };
00289
00290 class Word
00291 {
00292 public:
00293 Word() {}
00294
00295 Word(word value)
00296 {
00297 m_whole = value;
00298 }
00299
00300 Word(hword low, hword high)
00301 {
00302 m_whole = low | (word(high) << (WORD_BITS/2));
00303 }
00304
00305 static Word Multiply(hword a, hword b)
00306 {
00307 Word r;
00308 r.m_whole = (word)a * b;
00309 return r;
00310 }
00311
00312 Word operator-(Word a)
00313 {
00314 Word r;
00315 r.m_whole = m_whole - a.m_whole;
00316 return r;
00317 }
00318
00319 Word operator-(hword a)
00320 {
00321 Word r;
00322 r.m_whole = m_whole - a;
00323 return r;
00324 }
00325
00326
00327 hword operator/(hword divisor)
00328 {
00329 return hword(m_whole / divisor);
00330 }
00331
00332 bool operator!() const
00333 {
00334 return !m_whole;
00335 }
00336
00337 word GetWhole() const {return m_whole;}
00338 hword GetLowHalf() const {return hword(m_whole);}
00339 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00340 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00341
00342 private:
00343 word m_whole;
00344 };
00345
00346
00347 template <class S, class D>
00348 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00349 {
00350
00351 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00352
00353
00354 S Q;
00355 if (S(B1+1) == 0)
00356 Q = A[2];
00357 else
00358 Q = D(A[1], A[2]) / S(B1+1);
00359
00360
00361 D p = D::Multiply(B0, Q);
00362 D u = (D) A[0] - p.GetLowHalf();
00363 A[0] = u.GetLowHalf();
00364 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00365 A[1] = u.GetLowHalf();
00366 A[2] += u.GetHighHalf();
00367
00368
00369 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00370 {
00371 u = (D) A[0] - B0;
00372 A[0] = u.GetLowHalf();
00373 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00374 A[1] = u.GetLowHalf();
00375 A[2] += u.GetHighHalf();
00376 Q++;
00377 assert(Q);
00378 }
00379
00380 return Q;
00381 }
00382
00383
00384 template <class S, class D>
00385 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00386 {
00387 if (!B)
00388 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00389 else
00390 {
00391 S Q[2];
00392 T[0] = Al.GetLowHalf();
00393 T[1] = Al.GetHighHalf();
00394 T[2] = Ah.GetLowHalf();
00395 T[3] = Ah.GetHighHalf();
00396 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00397 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00398 return D(Q[0], Q[1]);
00399 }
00400 }
00401
00402
00403 inline word DWord::operator/(word a)
00404 {
00405 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00406 return word(m_whole / a);
00407 #else
00408 hword r[4];
00409 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00410 #endif
00411 }
00412
00413 inline word DWord::operator%(word a)
00414 {
00415 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00416 return word(m_whole % a);
00417 #else
00418 if (a < (word(1) << (WORD_BITS/2)))
00419 {
00420 hword h = hword(a);
00421 word r = m_halfs.high % h;
00422 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00423 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00424 }
00425 else
00426 {
00427 hword r[4];
00428 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00429 return Word(r[0], r[1]).GetWhole();
00430 }
00431 #endif
00432 }
00433
00434
00435
00436
00437 #if defined(__GNUC__)
00438 #define AddPrologue \
00439 int result; \
00440 __asm__ __volatile__ \
00441 ( \
00442 ".intel_syntax noprefix;"
00443 #define AddEpilogue \
00444 ".att_syntax prefix;" \
00445 : "=a" (result)\
00446 : "d" (C), "a" (A), "D" (B), "c" (N) \
00447 : "%esi", "memory", "cc" \
00448 );\
00449 return result;
00450 #define MulPrologue \
00451 __asm__ __volatile__ \
00452 ( \
00453 ".intel_syntax noprefix;" \
00454 AS1( push ebx) \
00455 AS2( mov ebx, edx)
00456 #define MulEpilogue \
00457 AS1( pop ebx) \
00458 ".att_syntax prefix;" \
00459 : \
00460 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
00461 : "%esi", "memory", "cc" \
00462 );
00463 #define SquPrologue MulPrologue
00464 #define SquEpilogue \
00465 AS1( pop ebx) \
00466 ".att_syntax prefix;" \
00467 : \
00468 : "d" (s_maskLow16), "c" (C), "a" (A) \
00469 : "%esi", "%edi", "memory", "cc" \
00470 );
00471 #define TopPrologue MulPrologue
00472 #define TopEpilogue \
00473 AS1( pop ebx) \
00474 ".att_syntax prefix;" \
00475 : \
00476 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
00477 : "memory", "cc" \
00478 );
00479 #else
00480 #define AddPrologue \
00481 __asm push edi \
00482 __asm push esi \
00483 __asm mov eax, [esp+12] \
00484 __asm mov edi, [esp+16]
00485 #define AddEpilogue \
00486 __asm pop esi \
00487 __asm pop edi \
00488 __asm ret 8
00489 #if _MSC_VER < 1300
00490 #define SaveEBX __asm push ebx
00491 #define RestoreEBX __asm pop ebx
00492 #else
00493 #define SaveEBX
00494 #define RestoreEBX
00495 #endif
00496 #define SquPrologue \
00497 AS2( mov eax, A) \
00498 AS2( mov ecx, C) \
00499 SaveEBX \
00500 AS2( lea ebx, s_maskLow16)
00501 #define MulPrologue \
00502 AS2( mov eax, A) \
00503 AS2( mov edi, B) \
00504 AS2( mov ecx, C) \
00505 SaveEBX \
00506 AS2( lea ebx, s_maskLow16)
00507 #define TopPrologue \
00508 AS2( mov eax, A) \
00509 AS2( mov edi, B) \
00510 AS2( mov ecx, C) \
00511 AS2( mov esi, L) \
00512 SaveEBX \
00513 AS2( lea ebx, s_maskLow16)
00514 #define SquEpilogue RestoreEBX
00515 #define MulEpilogue RestoreEBX
00516 #define TopEpilogue RestoreEBX
00517 #endif
00518
00519 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00520 extern "C" {
00521 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
00522 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
00523 }
00524 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__)
00525 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
00526 {
00527 word result;
00528 __asm__ __volatile__
00529 (
00530 ".intel_syntax;"
00531 AS1( neg %1)
00532 ASJ( jz, 1, f)
00533 AS2( mov %0,[%3+8*%1])
00534 AS2( add %0,[%4+8*%1])
00535 AS2( mov [%2+8*%1],%0)
00536 ASL(0)
00537 AS2( mov %0,[%3+8*%1+8])
00538 AS2( adc %0,[%4+8*%1+8])
00539 AS2( mov [%2+8*%1+8],%0)
00540 AS2( lea %1,[%1+2])
00541 ASJ( jrcxz, 1, f)
00542 AS2( mov %0,[%3+8*%1])
00543 AS2( adc %0,[%4+8*%1])
00544 AS2( mov [%2+8*%1],%0)
00545 ASJ( jmp, 0, b)
00546 ASL(1)
00547 AS2( mov %0, 0)
00548 AS2( adc %0, %0)
00549 ".att_syntax;"
00550 : "=&r" (result)
00551 : "c" (N), "r" (C+N), "r" (A+N), "r" (B+N)
00552 : "memory", "cc"
00553 );
00554 return (int)result;
00555 }
00556
00557 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00558 {
00559 word result;
00560 __asm__ __volatile__
00561 (
00562 ".intel_syntax;"
00563 AS1( neg %1)
00564 ASJ( jz, 1, f)
00565 AS2( mov %0,[%3+8*%1])
00566 AS2( sub %0,[%4+8*%1])
00567 AS2( mov [%2+8*%1],%0)
00568 ASL(0)
00569 AS2( mov %0,[%3+8*%1+8])
00570 AS2( sbb %0,[%4+8*%1+8])
00571 AS2( mov [%2+8*%1+8],%0)
00572 AS2( lea %1,[%1+2])
00573 ASJ( jrcxz, 1, f)
00574 AS2( mov %0,[%3+8*%1])
00575 AS2( sbb %0,[%4+8*%1])
00576 AS2( mov [%2+8*%1],%0)
00577 ASJ( jmp, 0, b)
00578 ASL(1)
00579 AS2( mov %0, 0)
00580 AS2( adc %0, %0)
00581 ".att_syntax;"
00582 : "=&r" (result)
00583 : "c" (N), "r" (C+N), "r" (A+N), "r" (B+N)
00584 : "memory", "cc"
00585 );
00586 return (int)result;
00587 }
00588 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
00589 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00590 {
00591 AddPrologue
00592
00593
00594 AS2( lea eax, [eax+4*ecx])
00595 AS2( lea edi, [edi+4*ecx])
00596 AS2( lea edx, [edx+4*ecx])
00597
00598 AS1( neg ecx)
00599 AS2( test ecx, 2)
00600 ASJ( jz, 0, f)
00601 AS2( sub ecx, 2)
00602 ASJ( jmp, 1, f)
00603
00604 ASL(0)
00605 ASJ( jecxz, 2, f)
00606 AS2( mov esi,[eax+4*ecx])
00607 AS2( adc esi,[edi+4*ecx])
00608 AS2( mov [edx+4*ecx],esi)
00609 AS2( mov esi,[eax+4*ecx+4])
00610 AS2( adc esi,[edi+4*ecx+4])
00611 AS2( mov [edx+4*ecx+4],esi)
00612 ASL(1)
00613 AS2( mov esi,[eax+4*ecx+8])
00614 AS2( adc esi,[edi+4*ecx+8])
00615 AS2( mov [edx+4*ecx+8],esi)
00616 AS2( mov esi,[eax+4*ecx+12])
00617 AS2( adc esi,[edi+4*ecx+12])
00618 AS2( mov [edx+4*ecx+12],esi)
00619
00620 AS2( lea ecx,[ecx+4])
00621 ASJ( jmp, 0, b)
00622
00623 ASL(2)
00624 AS2( mov eax, 0)
00625 AS1( setc al)
00626
00627 AddEpilogue
00628 }
00629
00630 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00631 {
00632 AddPrologue
00633
00634
00635 AS2( lea eax, [eax+4*ecx])
00636 AS2( lea edi, [edi+4*ecx])
00637 AS2( lea edx, [edx+4*ecx])
00638
00639 AS1( neg ecx)
00640 AS2( test ecx, 2)
00641 ASJ( jz, 0, f)
00642 AS2( sub ecx, 2)
00643 ASJ( jmp, 1, f)
00644
00645 ASL(0)
00646 ASJ( jecxz, 2, f)
00647 AS2( mov esi,[eax+4*ecx])
00648 AS2( sbb esi,[edi+4*ecx])
00649 AS2( mov [edx+4*ecx],esi)
00650 AS2( mov esi,[eax+4*ecx+4])
00651 AS2( sbb esi,[edi+4*ecx+4])
00652 AS2( mov [edx+4*ecx+4],esi)
00653 ASL(1)
00654 AS2( mov esi,[eax+4*ecx+8])
00655 AS2( sbb esi,[edi+4*ecx+8])
00656 AS2( mov [edx+4*ecx+8],esi)
00657 AS2( mov esi,[eax+4*ecx+12])
00658 AS2( sbb esi,[edi+4*ecx+12])
00659 AS2( mov [edx+4*ecx+12],esi)
00660
00661 AS2( lea ecx,[ecx+4])
00662 ASJ( jmp, 0, b)
00663
00664 ASL(2)
00665 AS2( mov eax, 0)
00666 AS1( setc al)
00667
00668 AddEpilogue
00669 }
00670
00671 #if CRYPTOPP_INTEGER_SSE2
00672 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
00673 {
00674 AddPrologue
00675
00676
00677 AS2( lea eax, [eax+4*ecx])
00678 AS2( lea edi, [edi+4*ecx])
00679 AS2( lea edx, [edx+4*ecx])
00680
00681 AS1( neg ecx)
00682 AS2( pxor mm2, mm2)
00683 ASJ( jz, 2, f)
00684 AS2( test ecx, 2)
00685 ASJ( jz, 0, f)
00686 AS2( sub ecx, 2)
00687 ASJ( jmp, 1, f)
00688
00689 ASL(0)
00690 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00691 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00692 AS2( paddq mm0, mm1)
00693 AS2( paddq mm2, mm0)
00694 AS2( movd DWORD PTR [edx+4*ecx], mm2)
00695 AS2( psrlq mm2, 32)
00696
00697 AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
00698 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00699 AS2( paddq mm0, mm1)
00700 AS2( paddq mm2, mm0)
00701 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00702 AS2( psrlq mm2, 32)
00703
00704 ASL(1)
00705 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00706 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00707 AS2( paddq mm0, mm1)
00708 AS2( paddq mm2, mm0)
00709 AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
00710 AS2( psrlq mm2, 32)
00711
00712 AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
00713 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00714 AS2( paddq mm0, mm1)
00715 AS2( paddq mm2, mm0)
00716 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00717 AS2( psrlq mm2, 32)
00718
00719 AS2( add ecx, 4)
00720 ASJ( jnz, 0, b)
00721
00722 ASL(2)
00723 AS2( movd eax, mm2)
00724 AS1( emms)
00725
00726 AddEpilogue
00727 }
00728 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
00729 {
00730 AddPrologue
00731
00732
00733 AS2( lea eax, [eax+4*ecx])
00734 AS2( lea edi, [edi+4*ecx])
00735 AS2( lea edx, [edx+4*ecx])
00736
00737 AS1( neg ecx)
00738 AS2( pxor mm2, mm2)
00739 ASJ( jz, 2, f)
00740 AS2( test ecx, 2)
00741 ASJ( jz, 0, f)
00742 AS2( sub ecx, 2)
00743 ASJ( jmp, 1, f)
00744
00745 ASL(0)
00746 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00747 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00748 AS2( psubq mm0, mm1)
00749 AS2( psubq mm0, mm2)
00750 AS2( movd DWORD PTR [edx+4*ecx], mm0)
00751 AS2( psrlq mm0, 63)
00752
00753 AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
00754 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00755 AS2( psubq mm2, mm1)
00756 AS2( psubq mm2, mm0)
00757 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00758 AS2( psrlq mm2, 63)
00759
00760 ASL(1)
00761 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00762 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00763 AS2( psubq mm0, mm1)
00764 AS2( psubq mm0, mm2)
00765 AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
00766 AS2( psrlq mm0, 63)
00767
00768 AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
00769 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00770 AS2( psubq mm2, mm1)
00771 AS2( psubq mm2, mm0)
00772 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00773 AS2( psrlq mm2, 63)
00774
00775 AS2( add ecx, 4)
00776 ASJ( jnz, 0, b)
00777
00778 ASL(2)
00779 AS2( movd eax, mm2)
00780 AS1( emms)
00781
00782 AddEpilogue
00783 }
00784 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00785 #else
00786 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00787 {
00788 assert (N%2 == 0);
00789
00790 Declare2Words(u);
00791 AssignWord(u, 0);
00792 for (size_t i=0; i<N; i+=2)
00793 {
00794 AddWithCarry(u, A[i], B[i]);
00795 C[i] = LowWord(u);
00796 AddWithCarry(u, A[i+1], B[i+1]);
00797 C[i+1] = LowWord(u);
00798 }
00799 return int(GetCarry(u));
00800 }
00801
00802 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00803 {
00804 assert (N%2 == 0);
00805
00806 Declare2Words(u);
00807 AssignWord(u, 0);
00808 for (size_t i=0; i<N; i+=2)
00809 {
00810 SubtractWithBorrow(u, A[i], B[i]);
00811 C[i] = LowWord(u);
00812 SubtractWithBorrow(u, A[i+1], B[i+1]);
00813 C[i+1] = LowWord(u);
00814 }
00815 return int(GetBorrow(u));
00816 }
00817 #endif
00818
00819 static word LinearMultiply(word *C, const word *A, word B, size_t N)
00820 {
00821 word carry=0;
00822 for(unsigned i=0; i<N; i++)
00823 {
00824 Declare2Words(p);
00825 MultiplyWords(p, A[i], B);
00826 Acc2WordsBy1(p, carry);
00827 C[i] = LowWord(p);
00828 carry = HighWord(p);
00829 }
00830 return carry;
00831 }
00832
00833 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
00834
00835 #define Mul_2 \
00836 Mul_Begin(2) \
00837 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00838 Mul_End(1, 1)
00839
00840 #define Mul_4 \
00841 Mul_Begin(4) \
00842 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00843 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00844 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00845 Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
00846 Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
00847 Mul_End(5, 3)
00848
00849 #define Mul_8 \
00850 Mul_Begin(8) \
00851 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00852 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00853 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00854 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00855 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00856 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00857 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00858 Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
00859 Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
00860 Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
00861 Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
00862 Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
00863 Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
00864 Mul_End(13, 7)
00865
00866 #define Mul_16 \
00867 Mul_Begin(16) \
00868 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00869 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00870 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00871 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00872 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00873 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00874 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00875 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00876 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00877 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00878 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00879 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00880 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00881 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00882 Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
00883 Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
00884 Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
00885 Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
00886 Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
00887 Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
00888 Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
00889 Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
00890 Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
00891 Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
00892 Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
00893 Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
00894 Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
00895 Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
00896 Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
00897 Mul_End(29, 15)
00898
00899 #define Squ_2 \
00900 Squ_Begin(2) \
00901 Squ_End(2)
00902
00903 #define Squ_4 \
00904 Squ_Begin(4) \
00905 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00906 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00907 Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
00908 Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
00909 Squ_End(4)
00910
00911 #define Squ_8 \
00912 Squ_Begin(8) \
00913 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00914 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00915 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00916 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00917 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00918 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00919 Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00920 Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00921 Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00922 Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00923 Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
00924 Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
00925 Squ_End(8)
00926
00927 #define Squ_16 \
00928 Squ_Begin(16) \
00929 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00930 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00931 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00932 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00933 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00934 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00935 Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00936 Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00937 Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00938 Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00939 Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
00940 Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
00941 Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
00942 Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
00943 Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
00944 Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
00945 Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
00946 Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
00947 Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
00948 Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
00949 Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
00950 Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
00951 Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
00952 Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
00953 Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
00954 Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
00955 Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
00956 Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
00957 Squ_End(16)
00958
00959 #define Bot_2 \
00960 Mul_Begin(2) \
00961 Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
00962 Bot_End(2)
00963
00964 #define Bot_4 \
00965 Mul_Begin(4) \
00966 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00967 Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
00968 Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
00969 Bot_End(4)
00970
00971 #define Bot_8 \
00972 Mul_Begin(8) \
00973 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00974 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00975 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00976 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00977 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00978 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00979 Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
00980 Bot_End(8)
00981
00982 #define Bot_16 \
00983 Mul_Begin(16) \
00984 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00985 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00986 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00987 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00988 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00989 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00990 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00991 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00992 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00993 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00994 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00995 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00996 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00997 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00998 Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
00999 Bot_End(16)
01000
01001 #endif
01002
01003 #if 0
01004 #define Mul_Begin(n) \
01005 Declare2Words(p) \
01006 Declare2Words(c) \
01007 Declare2Words(d) \
01008 MultiplyWords(p, A[0], B[0]) \
01009 AssignWord(c, LowWord(p)) \
01010 AssignWord(d, HighWord(p))
01011
01012 #define Mul_Acc(i, j) \
01013 MultiplyWords(p, A[i], B[j]) \
01014 Acc2WordsBy1(c, LowWord(p)) \
01015 Acc2WordsBy1(d, HighWord(p))
01016
01017 #define Mul_SaveAcc(k, i, j) \
01018 R[k] = LowWord(c); \
01019 Add2WordsBy1(c, d, HighWord(c)) \
01020 MultiplyWords(p, A[i], B[j]) \
01021 AssignWord(d, HighWord(p)) \
01022 Acc2WordsBy1(c, LowWord(p))
01023
01024 #define Mul_End(n) \
01025 R[2*n-3] = LowWord(c); \
01026 Acc2WordsBy1(d, HighWord(c)) \
01027 MultiplyWords(p, A[n-1], B[n-1])\
01028 Acc2WordsBy2(d, p) \
01029 R[2*n-2] = LowWord(d); \
01030 R[2*n-1] = HighWord(d);
01031
01032 #define Bot_SaveAcc(k, i, j) \
01033 R[k] = LowWord(c); \
01034 word e = LowWord(d) + HighWord(c); \
01035 e += A[i] * B[j];
01036
01037 #define Bot_Acc(i, j) \
01038 e += A[i] * B[j];
01039
01040 #define Bot_End(n) \
01041 R[n-1] = e;
01042 #else
01043 #define Mul_Begin(n) \
01044 Declare2Words(p) \
01045 word c; \
01046 Declare2Words(d) \
01047 MultiplyWords(p, A[0], B[0]) \
01048 c = LowWord(p); \
01049 AssignWord(d, HighWord(p))
01050
01051 #define Mul_Acc(i, j) \
01052 MulAcc(c, d, A[i], B[j])
01053
01054 #define Mul_SaveAcc(k, i, j) \
01055 R[k] = c; \
01056 c = LowWord(d); \
01057 AssignWord(d, HighWord(d)) \
01058 MulAcc(c, d, A[i], B[j])
01059
01060 #define Mul_End(k, i) \
01061 R[k] = c; \
01062 MultiplyWords(p, A[i], B[i]) \
01063 Acc2WordsBy2(p, d) \
01064 R[k+1] = LowWord(p); \
01065 R[k+2] = HighWord(p);
01066
01067 #define Bot_SaveAcc(k, i, j) \
01068 R[k] = c; \
01069 c = LowWord(d); \
01070 c += A[i] * B[j];
01071
01072 #define Bot_Acc(i, j) \
01073 c += A[i] * B[j];
01074
01075 #define Bot_End(n) \
01076 R[n-1] = c;
01077 #endif
01078
01079 #define Squ_Begin(n) \
01080 Declare2Words(p) \
01081 word c; \
01082 Declare2Words(d) \
01083 Declare2Words(e) \
01084 MultiplyWords(p, A[0], A[0]) \
01085 R[0] = LowWord(p); \
01086 AssignWord(e, HighWord(p)) \
01087 MultiplyWords(p, A[0], A[1]) \
01088 c = LowWord(p); \
01089 AssignWord(d, HighWord(p)) \
01090 Squ_NonDiag \
01091
01092 #define Squ_NonDiag \
01093 Double3Words(c, d)
01094
01095 #define Squ_SaveAcc(k, i, j) \
01096 Acc3WordsBy2(c, d, e) \
01097 R[k] = c; \
01098 MultiplyWords(p, A[i], A[j]) \
01099 c = LowWord(p); \
01100 AssignWord(d, HighWord(p)) \
01101
01102 #define Squ_Acc(i, j) \
01103 MulAcc(c, d, A[i], A[j])
01104
01105 #define Squ_Diag(i) \
01106 Squ_NonDiag \
01107 MulAcc(c, d, A[i], A[i])
01108
01109 #define Squ_End(n) \
01110 Acc3WordsBy2(c, d, e) \
01111 R[2*n-3] = c; \
01112 MultiplyWords(p, A[n-1], A[n-1])\
01113 Acc2WordsBy2(p, e) \
01114 R[2*n-2] = LowWord(p); \
01115 R[2*n-1] = HighWord(p);
01116
01117 void Baseline_Multiply2(word *R, const word *A, const word *B)
01118 {
01119 Mul_2
01120 }
01121
01122 void Baseline_Multiply4(word *R, const word *A, const word *B)
01123 {
01124 Mul_4
01125 }
01126
01127 void Baseline_Multiply8(word *R, const word *A, const word *B)
01128 {
01129 Mul_8
01130 }
01131
01132 void Baseline_Square2(word *R, const word *A)
01133 {
01134 Squ_2
01135 }
01136
01137 void Baseline_Square4(word *R, const word *A)
01138 {
01139 Squ_4
01140 }
01141
01142 void Baseline_Square8(word *R, const word *A)
01143 {
01144 Squ_8
01145 }
01146
01147 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
01148 {
01149 Bot_2
01150 }
01151
01152 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
01153 {
01154 Bot_4
01155 }
01156
01157 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
01158 {
01159 Bot_8
01160 }
01161
01162 #define Top_Begin(n) \
01163 Declare2Words(p) \
01164 word c; \
01165 Declare2Words(d) \
01166 MultiplyWords(p, A[0], B[n-2]);\
01167 AssignWord(d, HighWord(p));
01168
01169 #define Top_Acc(i, j) \
01170 MultiplyWords(p, A[i], B[j]);\
01171 Acc2WordsBy1(d, HighWord(p));
01172
01173 #define Top_SaveAcc0(i, j) \
01174 c = LowWord(d); \
01175 AssignWord(d, HighWord(d)) \
01176 MulAcc(c, d, A[i], B[j])
01177
01178 #define Top_SaveAcc1(i, j) \
01179 c = L<c; \
01180 Acc2WordsBy1(d, c); \
01181 c = LowWord(d); \
01182 AssignWord(d, HighWord(d)) \
01183 MulAcc(c, d, A[i], B[j])
01184
01185 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
01186 {
01187 word T[4];
01188 Baseline_Multiply2(T, A, B);
01189 R[0] = T[2];
01190 R[1] = T[3];
01191 }
01192
01193 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
01194 {
01195 Top_Begin(4)
01196 Top_Acc(1, 1) Top_Acc(2, 0) \
01197 Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
01198 Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
01199 Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
01200 Mul_End(1, 3)
01201 }
01202
01203 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
01204 {
01205 Top_Begin(8)
01206 Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
01207 Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
01208 Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
01209 Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
01210 Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
01211 Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
01212 Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
01213 Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
01214 Mul_End(5, 7)
01215 }
01216
01217 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
01218 void Baseline_Multiply16(word *R, const word *A, const word *B)
01219 {
01220 Mul_16
01221 }
01222
01223 void Baseline_Square16(word *R, const word *A)
01224 {
01225 Squ_16
01226 }
01227
01228 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
01229 {
01230 Bot_16
01231 }
01232
01233 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
01234 {
01235 Top_Begin(16)
01236 Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
01237 Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
01238 Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
01239 Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
01240 Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
01241 Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
01242 Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
01243 Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
01244 Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
01245 Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
01246 Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
01247 Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
01248 Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
01249 Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
01250 Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
01251 Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
01252 Mul_End(13, 15)
01253 }
01254 #endif
01255
01256
01257
01258 #if CRYPTOPP_INTEGER_SSE2
01259
01260 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
01261
01262 #undef Mul_Begin
01263 #undef Mul_Acc
01264 #undef Top_Begin
01265 #undef Top_Acc
01266 #undef Squ_Acc
01267 #undef Squ_NonDiag
01268 #undef Squ_Diag
01269 #undef Squ_SaveAcc
01270 #undef Squ_Begin
01271 #undef Mul_SaveAcc
01272 #undef Bot_Acc
01273 #undef Bot_SaveAcc
01274 #undef Bot_End
01275 #undef Squ_End
01276 #undef Mul_End
01277
01278 #define SSE2_FinalSave(k) \
01279 AS2( psllq xmm5, 16) \
01280 AS2( paddq xmm4, xmm5) \
01281 AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
01282
01283 #define SSE2_SaveShift(k) \
01284 AS2( movq xmm0, xmm6) \
01285 AS2( punpckhqdq xmm6, xmm0) \
01286 AS2( movq xmm1, xmm7) \
01287 AS2( punpckhqdq xmm7, xmm1) \
01288 AS2( paddd xmm6, xmm0) \
01289 AS2( pslldq xmm6, 4) \
01290 AS2( paddd xmm7, xmm1) \
01291 AS2( paddd xmm4, xmm6) \
01292 AS2( pslldq xmm7, 4) \
01293 AS2( movq xmm6, xmm4) \
01294 AS2( paddd xmm5, xmm7) \
01295 AS2( movq xmm7, xmm5) \
01296 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01297 AS2( psrlq xmm6, 16) \
01298 AS2( paddq xmm6, xmm7) \
01299 AS2( punpckhqdq xmm4, xmm0) \
01300 AS2( punpckhqdq xmm5, xmm0) \
01301 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
01302 AS2( psrlq xmm6, 3*16) \
01303 AS2( paddd xmm4, xmm6) \
01304
01305 #define Squ_SSE2_SaveShift(k) \
01306 AS2( movq xmm0, xmm6) \
01307 AS2( punpckhqdq xmm6, xmm0) \
01308 AS2( movq xmm1, xmm7) \
01309 AS2( punpckhqdq xmm7, xmm1) \
01310 AS2( paddd xmm6, xmm0) \
01311 AS2( pslldq xmm6, 4) \
01312 AS2( paddd xmm7, xmm1) \
01313 AS2( paddd xmm4, xmm6) \
01314 AS2( pslldq xmm7, 4) \
01315 AS2( movhlps xmm6, xmm4) \
01316 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01317 AS2( paddd xmm5, xmm7) \
01318 AS2( movhps QWORD PTR [esp+12], xmm5)\
01319 AS2( psrlq xmm4, 16) \
01320 AS2( paddq xmm4, xmm5) \
01321 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
01322 AS2( psrlq xmm4, 3*16) \
01323 AS2( paddd xmm4, xmm6) \
01324 AS2( movq QWORD PTR [esp+4], xmm4)\
01325
01326 #define SSE2_FirstMultiply(i) \
01327 AS2( movdqa xmm7, [esi+(i)*16])\
01328 AS2( movdqa xmm5, [edi-(i)*16])\
01329 AS2( pmuludq xmm5, xmm7) \
01330 AS2( movdqa xmm4, [ebx])\
01331 AS2( movdqa xmm6, xmm4) \
01332 AS2( pand xmm4, xmm5) \
01333 AS2( psrld xmm5, 16) \
01334 AS2( pmuludq xmm7, [edx-(i)*16])\
01335 AS2( pand xmm6, xmm7) \
01336 AS2( psrld xmm7, 16)
01337
01338 #define Squ_Begin(n) \
01339 SquPrologue \
01340 AS2( mov esi, esp)\
01341 AS2( and esp, 0xfffffff0)\
01342 AS2( lea edi, [esp-32*n])\
01343 AS2( sub esp, 32*n+16)\
01344 AS1( push esi)\
01345 AS2( mov esi, edi) \
01346 AS2( xor edx, edx) \
01347 ASL(1) \
01348 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01349 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01350 AS2( movdqa [edi+2*edx], xmm0) \
01351 AS2( psrlq xmm0, 32) \
01352 AS2( movdqa [edi+2*edx+16], xmm0) \
01353 AS2( movdqa [edi+16*n+2*edx], xmm1) \
01354 AS2( psrlq xmm1, 32) \
01355 AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
01356 AS2( add edx, 16) \
01357 AS2( cmp edx, 8*(n)) \
01358 ASJ( jne, 1, b) \
01359 AS2( lea edx, [edi+16*n])\
01360 SSE2_FirstMultiply(0) \
01361
01362 #define Squ_Acc(i) \
01363 ASL(LSqu##i) \
01364 AS2( movdqa xmm1, [esi+(i)*16]) \
01365 AS2( movdqa xmm0, [edi-(i)*16]) \
01366 AS2( movdqa xmm2, [ebx]) \
01367 AS2( pmuludq xmm0, xmm1) \
01368 AS2( pmuludq xmm1, [edx-(i)*16]) \
01369 AS2( movdqa xmm3, xmm2) \
01370 AS2( pand xmm2, xmm0) \
01371 AS2( psrld xmm0, 16) \
01372 AS2( paddd xmm4, xmm2) \
01373 AS2( paddd xmm5, xmm0) \
01374 AS2( pand xmm3, xmm1) \
01375 AS2( psrld xmm1, 16) \
01376 AS2( paddd xmm6, xmm3) \
01377 AS2( paddd xmm7, xmm1) \
01378
01379 #define Squ_Acc1(i)
01380 #define Squ_Acc2(i) ASC(call, LSqu##i)
01381 #define Squ_Acc3(i) Squ_Acc2(i)
01382 #define Squ_Acc4(i) Squ_Acc2(i)
01383 #define Squ_Acc5(i) Squ_Acc2(i)
01384 #define Squ_Acc6(i) Squ_Acc2(i)
01385 #define Squ_Acc7(i) Squ_Acc2(i)
01386 #define Squ_Acc8(i) Squ_Acc2(i)
01387
01388 #define SSE2_End(E, n) \
01389 SSE2_SaveShift(2*(n)-3) \
01390 AS2( movdqa xmm7, [esi+16]) \
01391 AS2( movdqa xmm0, [edi]) \
01392 AS2( pmuludq xmm0, xmm7) \
01393 AS2( movdqa xmm2, [ebx]) \
01394 AS2( pmuludq xmm7, [edx]) \
01395 AS2( movdqa xmm6, xmm2) \
01396 AS2( pand xmm2, xmm0) \
01397 AS2( psrld xmm0, 16) \
01398 AS2( paddd xmm4, xmm2) \
01399 AS2( paddd xmm5, xmm0) \
01400 AS2( pand xmm6, xmm7) \
01401 AS2( psrld xmm7, 16) \
01402 SSE2_SaveShift(2*(n)-2) \
01403 SSE2_FinalSave(2*(n)-1) \
01404 AS1( pop esp)\
01405 E
01406
01407 #define Squ_End(n) SSE2_End(SquEpilogue, n)
01408 #define Mul_End(n) SSE2_End(MulEpilogue, n)
01409 #define Top_End(n) SSE2_End(TopEpilogue, n)
01410
01411 #define Squ_Column1(k, i) \
01412 Squ_SSE2_SaveShift(k) \
01413 AS2( add esi, 16) \
01414 SSE2_FirstMultiply(1)\
01415 Squ_Acc##i(i) \
01416 AS2( paddd xmm4, xmm4) \
01417 AS2( paddd xmm5, xmm5) \
01418 AS2( movdqa xmm3, [esi]) \
01419 AS2( movq xmm1, QWORD PTR [esi+8]) \
01420 AS2( pmuludq xmm1, xmm3) \
01421 AS2( pmuludq xmm3, xmm3) \
01422 AS2( movdqa xmm0, [ebx])\
01423 AS2( movdqa xmm2, xmm0) \
01424 AS2( pand xmm0, xmm1) \
01425 AS2( psrld xmm1, 16) \
01426 AS2( paddd xmm6, xmm0) \
01427 AS2( paddd xmm7, xmm1) \
01428 AS2( pand xmm2, xmm3) \
01429 AS2( psrld xmm3, 16) \
01430 AS2( paddd xmm6, xmm6) \
01431 AS2( paddd xmm7, xmm7) \
01432 AS2( paddd xmm4, xmm2) \
01433 AS2( paddd xmm5, xmm3) \
01434 AS2( movq xmm0, QWORD PTR [esp+4])\
01435 AS2( movq xmm1, QWORD PTR [esp+12])\
01436 AS2( paddd xmm4, xmm0)\
01437 AS2( paddd xmm5, xmm1)\
01438
01439 #define Squ_Column0(k, i) \
01440 Squ_SSE2_SaveShift(k) \
01441 AS2( add edi, 16) \
01442 AS2( add edx, 16) \
01443 SSE2_FirstMultiply(1)\
01444 Squ_Acc##i(i) \
01445 AS2( paddd xmm6, xmm6) \
01446 AS2( paddd xmm7, xmm7) \
01447 AS2( paddd xmm4, xmm4) \
01448 AS2( paddd xmm5, xmm5) \
01449 AS2( movq xmm0, QWORD PTR [esp+4])\
01450 AS2( movq xmm1, QWORD PTR [esp+12])\
01451 AS2( paddd xmm4, xmm0)\
01452 AS2( paddd xmm5, xmm1)\
01453
01454 #define SSE2_MulAdd45 \
01455 AS2( movdqa xmm7, [esi]) \
01456 AS2( movdqa xmm0, [edi]) \
01457 AS2( pmuludq xmm0, xmm7) \
01458 AS2( movdqa xmm2, [ebx]) \
01459 AS2( pmuludq xmm7, [edx]) \
01460 AS2( movdqa xmm6, xmm2) \
01461 AS2( pand xmm2, xmm0) \
01462 AS2( psrld xmm0, 16) \
01463 AS2( paddd xmm4, xmm2) \
01464 AS2( paddd xmm5, xmm0) \
01465 AS2( pand xmm6, xmm7) \
01466 AS2( psrld xmm7, 16)
01467
01468 #define Mul_Begin(n) \
01469 MulPrologue \
01470 AS2( mov esi, esp)\
01471 AS2( and esp, 0xfffffff0)\
01472 AS2( sub esp, 48*n+16)\
01473 AS1( push esi)\
01474 AS2( xor edx, edx) \
01475 ASL(1) \
01476 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01477 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01478 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01479 AS2( movdqa [esp+20+2*edx], xmm0) \
01480 AS2( psrlq xmm0, 32) \
01481 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01482 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01483 AS2( psrlq xmm1, 32) \
01484 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01485 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01486 AS2( psrlq xmm2, 32) \
01487 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01488 AS2( add edx, 16) \
01489 AS2( cmp edx, 8*(n)) \
01490 ASJ( jne, 1, b) \
01491 AS2( lea edi, [esp+20])\
01492 AS2( lea edx, [esp+20+16*n])\
01493 AS2( lea esi, [esp+20+32*n])\
01494 SSE2_FirstMultiply(0) \
01495
01496 #define Mul_Acc(i) \
01497 ASL(LMul##i) \
01498 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01499 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01500 AS2( movdqa xmm2, [ebx]) \
01501 AS2( pmuludq xmm0, xmm1) \
01502 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01503 AS2( movdqa xmm3, xmm2) \
01504 AS2( pand xmm2, xmm0) \
01505 AS2( psrld xmm0, 16) \
01506 AS2( paddd xmm4, xmm2) \
01507 AS2( paddd xmm5, xmm0) \
01508 AS2( pand xmm3, xmm1) \
01509 AS2( psrld xmm1, 16) \
01510 AS2( paddd xmm6, xmm3) \
01511 AS2( paddd xmm7, xmm1) \
01512
01513 #define Mul_Acc1(i)
01514 #define Mul_Acc2(i) ASC(call, LMul##i)
01515 #define Mul_Acc3(i) Mul_Acc2(i)
01516 #define Mul_Acc4(i) Mul_Acc2(i)
01517 #define Mul_Acc5(i) Mul_Acc2(i)
01518 #define Mul_Acc6(i) Mul_Acc2(i)
01519 #define Mul_Acc7(i) Mul_Acc2(i)
01520 #define Mul_Acc8(i) Mul_Acc2(i)
01521 #define Mul_Acc9(i) Mul_Acc2(i)
01522 #define Mul_Acc10(i) Mul_Acc2(i)
01523 #define Mul_Acc11(i) Mul_Acc2(i)
01524 #define Mul_Acc12(i) Mul_Acc2(i)
01525 #define Mul_Acc13(i) Mul_Acc2(i)
01526 #define Mul_Acc14(i) Mul_Acc2(i)
01527 #define Mul_Acc15(i) Mul_Acc2(i)
01528 #define Mul_Acc16(i) Mul_Acc2(i)
01529
01530 #define Mul_Column1(k, i) \
01531 SSE2_SaveShift(k) \
01532 AS2( add esi, 16) \
01533 SSE2_MulAdd45\
01534 Mul_Acc##i(i) \
01535
01536 #define Mul_Column0(k, i) \
01537 SSE2_SaveShift(k) \
01538 AS2( add edi, 16) \
01539 AS2( add edx, 16) \
01540 SSE2_MulAdd45\
01541 Mul_Acc##i(i) \
01542
01543 #define Bot_Acc(i) \
01544 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01545 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01546 AS2( pmuludq xmm0, xmm1) \
01547 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01548 AS2( paddq xmm4, xmm0) \
01549 AS2( paddd xmm6, xmm1)
01550
01551 #define Bot_SaveAcc(k) \
01552 SSE2_SaveShift(k) \
01553 AS2( add edi, 16) \
01554 AS2( add edx, 16) \
01555 AS2( movdqa xmm6, [esi]) \
01556 AS2( movdqa xmm0, [edi]) \
01557 AS2( pmuludq xmm0, xmm6) \
01558 AS2( paddq xmm4, xmm0) \
01559 AS2( psllq xmm5, 16) \
01560 AS2( paddq xmm4, xmm5) \
01561 AS2( pmuludq xmm6, [edx])
01562
01563 #define Bot_End(n) \
01564 AS2( movhlps xmm7, xmm6) \
01565 AS2( paddd xmm6, xmm7) \
01566 AS2( psllq xmm6, 32) \
01567 AS2( paddd xmm4, xmm6) \
01568 AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
01569 AS1( pop esp)\
01570 MulEpilogue
01571
01572 #define Top_Begin(n) \
01573 TopPrologue \
01574 AS2( mov edx, esp)\
01575 AS2( and esp, 0xfffffff0)\
01576 AS2( sub esp, 48*n+16)\
01577 AS1( push edx)\
01578 AS2( xor edx, edx) \
01579 ASL(1) \
01580 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01581 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01582 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01583 AS2( movdqa [esp+20+2*edx], xmm0) \
01584 AS2( psrlq xmm0, 32) \
01585 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01586 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01587 AS2( psrlq xmm1, 32) \
01588 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01589 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01590 AS2( psrlq xmm2, 32) \
01591 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01592 AS2( add edx, 16) \
01593 AS2( cmp edx, 8*(n)) \
01594 ASJ( jne, 1, b) \
01595 AS2( mov eax, esi) \
01596 AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
01597 AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
01598 AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
01599 AS2( pxor xmm4, xmm4)\
01600 AS2( pxor xmm5, xmm5)
01601
01602 #define Top_Acc(i) \
01603 AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
01604 AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01605 AS2( psrlq xmm0, 48) \
01606 AS2( paddd xmm5, xmm0)\
01607
01608 #define Top_Column0(i) \
01609 AS2( psllq xmm5, 32) \
01610 AS2( add edi, 16) \
01611 AS2( add edx, 16) \
01612 SSE2_MulAdd45\
01613 Mul_Acc##i(i) \
01614
01615 #define Top_Column1(i) \
01616 SSE2_SaveShift(0) \
01617 AS2( add esi, 16) \
01618 SSE2_MulAdd45\
01619 Mul_Acc##i(i) \
01620 AS2( shr eax, 16) \
01621 AS2( movd xmm0, eax)\
01622 AS2( movd xmm1, [ecx+4])\
01623 AS2( psrld xmm1, 16)\
01624 AS2( pcmpgtd xmm1, xmm0)\
01625 AS2( psrld xmm1, 31)\
01626 AS2( paddd xmm4, xmm1)\
01627
01628 void SSE2_Square4(word *C, const word *A)
01629 {
01630 Squ_Begin(2)
01631 Squ_Column0(0, 1)
01632 Squ_End(2)
01633 }
01634
01635 void SSE2_Square8(word *C, const word *A)
01636 {
01637 Squ_Begin(4)
01638 #ifndef __GNUC__
01639 ASJ( jmp, 0, f)
01640 Squ_Acc(2)
01641 AS1( ret) ASL(0)
01642 #endif
01643 Squ_Column0(0, 1)
01644 Squ_Column1(1, 1)
01645 Squ_Column0(2, 2)
01646 Squ_Column1(3, 1)
01647 Squ_Column0(4, 1)
01648 Squ_End(4)
01649 }
01650
01651 void SSE2_Square16(word *C, const word *A)
01652 {
01653 Squ_Begin(8)
01654 #ifndef __GNUC__
01655 ASJ( jmp, 0, f)
01656 Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01657 AS1( ret) ASL(0)
01658 #endif
01659 Squ_Column0(0, 1)
01660 Squ_Column1(1, 1)
01661 Squ_Column0(2, 2)
01662 Squ_Column1(3, 2)
01663 Squ_Column0(4, 3)
01664 Squ_Column1(5, 3)
01665 Squ_Column0(6, 4)
01666 Squ_Column1(7, 3)
01667 Squ_Column0(8, 3)
01668 Squ_Column1(9, 2)
01669 Squ_Column0(10, 2)
01670 Squ_Column1(11, 1)
01671 Squ_Column0(12, 1)
01672 Squ_End(8)
01673 }
01674
01675 void SSE2_Square32(word *C, const word *A)
01676 {
01677 Squ_Begin(16)
01678 ASJ( jmp, 0, f)
01679 Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01680 AS1( ret) ASL(0)
01681 Squ_Column0(0, 1)
01682 Squ_Column1(1, 1)
01683 Squ_Column0(2, 2)
01684 Squ_Column1(3, 2)
01685 Squ_Column0(4, 3)
01686 Squ_Column1(5, 3)
01687 Squ_Column0(6, 4)
01688 Squ_Column1(7, 4)
01689 Squ_Column0(8, 5)
01690 Squ_Column1(9, 5)
01691 Squ_Column0(10, 6)
01692 Squ_Column1(11, 6)
01693 Squ_Column0(12, 7)
01694 Squ_Column1(13, 7)
01695 Squ_Column0(14, 8)
01696 Squ_Column1(15, 7)
01697 Squ_Column0(16, 7)
01698 Squ_Column1(17, 6)
01699 Squ_Column0(18, 6)
01700 Squ_Column1(19, 5)
01701 Squ_Column0(20, 5)
01702 Squ_Column1(21, 4)
01703 Squ_Column0(22, 4)
01704 Squ_Column1(23, 3)
01705 Squ_Column0(24, 3)
01706 Squ_Column1(25, 2)
01707 Squ_Column0(26, 2)
01708 Squ_Column1(27, 1)
01709 Squ_Column0(28, 1)
01710 Squ_End(16)
01711 }
01712
01713 void SSE2_Multiply4(word *C, const word *A, const word *B)
01714 {
01715 Mul_Begin(2)
01716 #ifndef __GNUC__
01717 ASJ( jmp, 0, f)
01718 Mul_Acc(2)
01719 AS1( ret) ASL(0)
01720 #endif
01721 Mul_Column0(0, 2)
01722 Mul_End(2)
01723 }
01724
01725 void SSE2_Multiply8(word *C, const word *A, const word *B)
01726 {
01727 Mul_Begin(4)
01728 #ifndef __GNUC__
01729 ASJ( jmp, 0, f)
01730 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01731 AS1( ret) ASL(0)
01732 #endif
01733 Mul_Column0(0, 2)
01734 Mul_Column1(1, 3)
01735 Mul_Column0(2, 4)
01736 Mul_Column1(3, 3)
01737 Mul_Column0(4, 2)
01738 Mul_End(4)
01739 }
01740
01741 void SSE2_Multiply16(word *C, const word *A, const word *B)
01742 {
01743 Mul_Begin(8)
01744 #ifndef __GNUC__
01745 ASJ( jmp, 0, f)
01746 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01747 AS1( ret) ASL(0)
01748 #endif
01749 Mul_Column0(0, 2)
01750 Mul_Column1(1, 3)
01751 Mul_Column0(2, 4)
01752 Mul_Column1(3, 5)
01753 Mul_Column0(4, 6)
01754 Mul_Column1(5, 7)
01755 Mul_Column0(6, 8)
01756 Mul_Column1(7, 7)
01757 Mul_Column0(8, 6)
01758 Mul_Column1(9, 5)
01759 Mul_Column0(10, 4)
01760 Mul_Column1(11, 3)
01761 Mul_Column0(12, 2)
01762 Mul_End(8)
01763 }
01764
01765 void SSE2_Multiply32(word *C, const word *A, const word *B)
01766 {
01767 Mul_Begin(16)
01768 ASJ( jmp, 0, f)
01769 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01770 AS1( ret) ASL(0)
01771 Mul_Column0(0, 2)
01772 Mul_Column1(1, 3)
01773 Mul_Column0(2, 4)
01774 Mul_Column1(3, 5)
01775 Mul_Column0(4, 6)
01776 Mul_Column1(5, 7)
01777 Mul_Column0(6, 8)
01778 Mul_Column1(7, 9)
01779 Mul_Column0(8, 10)
01780 Mul_Column1(9, 11)
01781 Mul_Column0(10, 12)
01782 Mul_Column1(11, 13)
01783 Mul_Column0(12, 14)
01784 Mul_Column1(13, 15)
01785 Mul_Column0(14, 16)
01786 Mul_Column1(15, 15)
01787 Mul_Column0(16, 14)
01788 Mul_Column1(17, 13)
01789 Mul_Column0(18, 12)
01790 Mul_Column1(19, 11)
01791 Mul_Column0(20, 10)
01792 Mul_Column1(21, 9)
01793 Mul_Column0(22, 8)
01794 Mul_Column1(23, 7)
01795 Mul_Column0(24, 6)
01796 Mul_Column1(25, 5)
01797 Mul_Column0(26, 4)
01798 Mul_Column1(27, 3)
01799 Mul_Column0(28, 2)
01800 Mul_End(16)
01801 }
01802
01803 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
01804 {
01805 Mul_Begin(2)
01806 Bot_SaveAcc(0) Bot_Acc(2)
01807 Bot_End(2)
01808 }
01809
01810 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
01811 {
01812 Mul_Begin(4)
01813 #ifndef __GNUC__
01814 ASJ( jmp, 0, f)
01815 Mul_Acc(3) Mul_Acc(2)
01816 AS1( ret) ASL(0)
01817 #endif
01818 Mul_Column0(0, 2)
01819 Mul_Column1(1, 3)
01820 Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01821 Bot_End(4)
01822 }
01823
01824 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
01825 {
01826 Mul_Begin(8)
01827 #ifndef __GNUC__
01828 ASJ( jmp, 0, f)
01829 Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01830 AS1( ret) ASL(0)
01831 #endif
01832 Mul_Column0(0, 2)
01833 Mul_Column1(1, 3)
01834 Mul_Column0(2, 4)
01835 Mul_Column1(3, 5)
01836 Mul_Column0(4, 6)
01837 Mul_Column1(5, 7)
01838 Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01839 Bot_End(8)
01840 }
01841
01842 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
01843 {
01844 Mul_Begin(16)
01845 #ifndef __GNUC__
01846 ASJ( jmp, 0, f)
01847 Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01848 AS1( ret) ASL(0)
01849 #endif
01850 Mul_Column0(0, 2)
01851 Mul_Column1(1, 3)
01852 Mul_Column0(2, 4)
01853 Mul_Column1(3, 5)
01854 Mul_Column0(4, 6)
01855 Mul_Column1(5, 7)
01856 Mul_Column0(6, 8)
01857 Mul_Column1(7, 9)
01858 Mul_Column0(8, 10)
01859 Mul_Column1(9, 11)
01860 Mul_Column0(10, 12)
01861 Mul_Column1(11, 13)
01862 Mul_Column0(12, 14)
01863 Mul_Column1(13, 15)
01864 Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01865 Bot_End(16)
01866 }
01867
01868 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
01869 {
01870 Top_Begin(4)
01871 Top_Acc(3) Top_Acc(2) Top_Acc(1)
01872 #ifndef __GNUC__
01873 ASJ( jmp, 0, f)
01874 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01875 AS1( ret) ASL(0)
01876 #endif
01877 Top_Column0(4)
01878 Top_Column1(3)
01879 Mul_Column0(0, 2)
01880 Top_End(2)
01881 }
01882
01883 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
01884 {
01885 Top_Begin(8)
01886 Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01887 #ifndef __GNUC__
01888 ASJ( jmp, 0, f)
01889 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01890 AS1( ret) ASL(0)
01891 #endif
01892 Top_Column0(8)
01893 Top_Column1(7)
01894 Mul_Column0(0, 6)
01895 Mul_Column1(1, 5)
01896 Mul_Column0(2, 4)
01897 Mul_Column1(3, 3)
01898 Mul_Column0(4, 2)
01899 Top_End(4)
01900 }
01901
01902 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
01903 {
01904 Top_Begin(16)
01905 Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01906 #ifndef __GNUC__
01907 ASJ( jmp, 0, f)
01908 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01909 AS1( ret) ASL(0)
01910 #endif
01911 Top_Column0(16)
01912 Top_Column1(15)
01913 Mul_Column0(0, 14)
01914 Mul_Column1(1, 13)
01915 Mul_Column0(2, 12)
01916 Mul_Column1(3, 11)
01917 Mul_Column0(4, 10)
01918 Mul_Column1(5, 9)
01919 Mul_Column0(6, 8)
01920 Mul_Column1(7, 7)
01921 Mul_Column0(8, 6)
01922 Mul_Column1(9, 5)
01923 Mul_Column0(10, 4)
01924 Mul_Column1(11, 3)
01925 Mul_Column0(12, 2)
01926 Top_End(8)
01927 }
01928
01929 #endif // #if CRYPTOPP_INTEGER_SSE2
01930
01931
01932
01933 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
01934 typedef void (* PMul)(word *C, const word *A, const word *B);
01935 typedef void (* PSqu)(word *C, const word *A);
01936 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
01937
01938 #if CRYPTOPP_INTEGER_SSE2
01939 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
01940 static size_t s_recursionLimit = 8;
01941 #else
01942 static const size_t s_recursionLimit = 16;
01943 #endif
01944
01945 static PMul s_pMul[9], s_pBot[9];
01946 static PSqu s_pSqu[9];
01947 static PMulTop s_pTop[9];
01948
01949 static void SetFunctionPointers()
01950 {
01951 s_pMul[0] = &Baseline_Multiply2;
01952 s_pBot[0] = &Baseline_MultiplyBottom2;
01953 s_pSqu[0] = &Baseline_Square2;
01954 s_pTop[0] = &Baseline_MultiplyTop2;
01955 s_pTop[1] = &Baseline_MultiplyTop4;
01956
01957 #if CRYPTOPP_INTEGER_SSE2
01958 if (HasSSE2())
01959 {
01960 #if _MSC_VER != 1200 || defined(NDEBUG)
01961 if (IsP4())
01962 {
01963 s_pAdd = &SSE2_Add;
01964 s_pSub = &SSE2_Sub;
01965 }
01966 #endif
01967
01968 s_recursionLimit = 32;
01969
01970 s_pMul[1] = &SSE2_Multiply4;
01971 s_pMul[2] = &SSE2_Multiply8;
01972 s_pMul[4] = &SSE2_Multiply16;
01973 s_pMul[8] = &SSE2_Multiply32;
01974
01975 s_pBot[1] = &SSE2_MultiplyBottom4;
01976 s_pBot[2] = &SSE2_MultiplyBottom8;
01977 s_pBot[4] = &SSE2_MultiplyBottom16;
01978 s_pBot[8] = &SSE2_MultiplyBottom32;
01979
01980 s_pSqu[1] = &SSE2_Square4;
01981 s_pSqu[2] = &SSE2_Square8;
01982 s_pSqu[4] = &SSE2_Square16;
01983 s_pSqu[8] = &SSE2_Square32;
01984
01985 s_pTop[2] = &SSE2_MultiplyTop8;
01986 s_pTop[4] = &SSE2_MultiplyTop16;
01987 s_pTop[8] = &SSE2_MultiplyTop32;
01988 }
01989 else
01990 #endif
01991 {
01992 s_pMul[1] = &Baseline_Multiply4;
01993 s_pMul[2] = &Baseline_Multiply8;
01994
01995 s_pBot[1] = &Baseline_MultiplyBottom4;
01996 s_pBot[2] = &Baseline_MultiplyBottom8;
01997
01998 s_pSqu[1] = &Baseline_Square4;
01999 s_pSqu[2] = &Baseline_Square8;
02000
02001 s_pTop[2] = &Baseline_MultiplyTop8;
02002
02003 #if !CRYPTOPP_INTEGER_SSE2
02004 s_pMul[4] = &Baseline_Multiply16;
02005 s_pBot[4] = &Baseline_MultiplyBottom16;
02006 s_pSqu[4] = &Baseline_Square16;
02007 s_pTop[4] = &Baseline_MultiplyTop16;
02008 #endif
02009 }
02010 }
02011
02012 inline int Add(word *C, const word *A, const word *B, size_t N)
02013 {
02014 #if CRYPTOPP_INTEGER_SSE2
02015 return s_pAdd(N, C, A, B);
02016 #else
02017 return Baseline_Add(N, C, A, B);
02018 #endif
02019 }
02020
02021 inline int Subtract(word *C, const word *A, const word *B, size_t N)
02022 {
02023 #if CRYPTOPP_INTEGER_SSE2
02024 return s_pSub(N, C, A, B);
02025 #else
02026 return Baseline_Sub(N, C, A, B);
02027 #endif
02028 }
02029
02030
02031
02032
02033 #define A0 A
02034 #define A1 (A+N2)
02035 #define B0 B
02036 #define B1 (B+N2)
02037
02038 #define T0 T
02039 #define T1 (T+N2)
02040 #define T2 (T+N)
02041 #define T3 (T+N+N2)
02042
02043 #define R0 R
02044 #define R1 (R+N2)
02045 #define R2 (R+N)
02046 #define R3 (R+N+N2)
02047
02048
02049
02050
02051
02052
02053 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02054 {
02055 assert(N>=2 && N%2==0);
02056
02057 if (N <= s_recursionLimit)
02058 s_pMul[N/4](R, A, B);
02059 else
02060 {
02061 const size_t N2 = N/2;
02062
02063 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02064 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02065
02066 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02067 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02068
02069 RecursiveMultiply(R2, T2, A1, B1, N2);
02070 RecursiveMultiply(T0, T2, R0, R1, N2);
02071 RecursiveMultiply(R0, T2, A0, B0, N2);
02072
02073
02074
02075 int c2 = Add(R2, R2, R1, N2);
02076 int c3 = c2;
02077 c2 += Add(R1, R2, R0, N2);
02078 c3 += Add(R2, R2, R3, N2);
02079
02080 if (AN2 == BN2)
02081 c3 -= Subtract(R1, R1, T0, N);
02082 else
02083 c3 += Add(R1, R1, T0, N);
02084
02085 c3 += Increment(R2, N2, c2);
02086 assert (c3 >= 0 && c3 <= 2);
02087 Increment(R3, N2, c3);
02088 }
02089 }
02090
02091
02092
02093
02094
02095 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02096 {
02097 assert(N && N%2==0);
02098
02099 if (N <= s_recursionLimit)
02100 s_pSqu[N/4](R, A);
02101 else
02102 {
02103 const size_t N2 = N/2;
02104
02105 RecursiveSquare(R0, T2, A0, N2);
02106 RecursiveSquare(R2, T2, A1, N2);
02107 RecursiveMultiply(T0, T2, A0, A1, N2);
02108
02109 int carry = Add(R1, R1, T0, N);
02110 carry += Add(R1, R1, T0, N);
02111 Increment(R3, N2, carry);
02112 }
02113 }
02114
02115
02116
02117
02118
02119
02120 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02121 {
02122 assert(N>=2 && N%2==0);
02123
02124 if (N <= s_recursionLimit)
02125 s_pBot[N/4](R, A, B);
02126 else
02127 {
02128 const size_t N2 = N/2;
02129
02130 RecursiveMultiply(R, T, A0, B0, N2);
02131 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02132 Add(R1, R1, T0, N2);
02133 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02134 Add(R1, R1, T0, N2);
02135 }
02136 }
02137
02138
02139
02140
02141
02142
02143
02144 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02145 {
02146 assert(N>=2 && N%2==0);
02147
02148 if (N <= s_recursionLimit)
02149 s_pTop[N/4](R, A, B, L[N-1]);
02150 else
02151 {
02152 const size_t N2 = N/2;
02153
02154 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02155 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02156
02157 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02158 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02159
02160 RecursiveMultiply(T0, T2, R0, R1, N2);
02161 RecursiveMultiply(R0, T2, A1, B1, N2);
02162
02163
02164
02165 int t, c3;
02166 int c2 = Subtract(T2, L+N2, L, N2);
02167
02168 if (AN2 == BN2)
02169 {
02170 c2 -= Add(T2, T2, T0, N2);
02171 t = (Compare(T2, R0, N2) == -1);
02172 c3 = t - Subtract(T2, T2, T1, N2);
02173 }
02174 else
02175 {
02176 c2 += Subtract(T2, T2, T0, N2);
02177 t = (Compare(T2, R0, N2) == -1);
02178 c3 = t + Add(T2, T2, T1, N2);
02179 }
02180
02181 c2 += t;
02182 if (c2 >= 0)
02183 c3 += Increment(T2, N2, c2);
02184 else
02185 c3 -= Decrement(T2, N2, -c2);
02186 c3 += Add(R0, T2, R1, N2);
02187
02188 assert (c3 >= 0 && c3 <= 2);
02189 Increment(R1, N2, c3);
02190 }
02191 }
02192
02193 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02194 {
02195 RecursiveMultiply(R, T, A, B, N);
02196 }
02197
02198 inline void Square(word *R, word *T, const word *A, size_t N)
02199 {
02200 RecursiveSquare(R, T, A, N);
02201 }
02202
02203 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02204 {
02205 RecursiveMultiplyBottom(R, T, A, B, N);
02206 }
02207
02208
02209
02210
02211
02212
02213 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02214 {
02215 if (NA == NB)
02216 {
02217 if (A == B)
02218 Square(R, T, A, NA);
02219 else
02220 Multiply(R, T, A, B, NA);
02221
02222 return;
02223 }
02224
02225 if (NA > NB)
02226 {
02227 std::swap(A, B);
02228 std::swap(NA, NB);
02229 }
02230
02231 assert(NB % NA == 0);
02232
02233 if (NA==2 && !A[1])
02234 {
02235 switch (A[0])
02236 {
02237 case 0:
02238 SetWords(R, 0, NB+2);
02239 return;
02240 case 1:
02241 CopyWords(R, B, NB);
02242 R[NB] = R[NB+1] = 0;
02243 return;
02244 default:
02245 R[NB] = LinearMultiply(R, B, A[0], NB);
02246 R[NB+1] = 0;
02247 return;
02248 }
02249 }
02250
02251 size_t i;
02252 if ((NB/NA)%2 == 0)
02253 {
02254 Multiply(R, T, A, B, NA);
02255 CopyWords(T+2*NA, R+NA, NA);
02256
02257 for (i=2*NA; i<NB; i+=2*NA)
02258 Multiply(T+NA+i, T, A, B+i, NA);
02259 for (i=NA; i<NB; i+=2*NA)
02260 Multiply(R+i, T, A, B+i, NA);
02261 }
02262 else
02263 {
02264 for (i=0; i<NB; i+=2*NA)
02265 Multiply(R+i, T, A, B+i, NA);
02266 for (i=NA; i<NB; i+=2*NA)
02267 Multiply(T+NA+i, T, A, B+i, NA);
02268 }
02269
02270 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02271 Increment(R+NB, NA);
02272 }
02273
02274
02275
02276
02277
02278 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02279 {
02280 if (N==2)
02281 {
02282 T[0] = AtomicInverseModPower2(A[0]);
02283 T[1] = 0;
02284 s_pBot[0](T+2, T, A);
02285 TwosComplement(T+2, 2);
02286 Increment(T+2, 2, 2);
02287 s_pBot[0](R, T, T+2);
02288 }
02289 else
02290 {
02291 const size_t N2 = N/2;
02292 RecursiveInverseModPower2(R0, T0, A0, N2);
02293 T0[0] = 1;
02294 SetWords(T0+1, 0, N2-1);
02295 MultiplyTop(R1, T1, T0, R0, A0, N2);
02296 MultiplyBottom(T0, T1, R0, A1, N2);
02297 Add(T0, R1, T0, N2);
02298 TwosComplement(T0, N2);
02299 MultiplyBottom(R1, T1, R0, T0, N2);
02300 }
02301 }
02302
02303
02304
02305
02306
02307
02308
02309 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
02310 {
02311 #if 1
02312 MultiplyBottom(R, T, X, U, N);
02313 MultiplyTop(T, T+N, X, R, M, N);
02314 word borrow = Subtract(T, X+N, T, N);
02315
02316 word carry = Add(T+N, T, M, N);
02317 assert(carry | !borrow);
02318 CopyWords(R, T + ((0-borrow) & N), N);
02319 #elif 0
02320 const word u = 0-U[0];
02321 Declare2Words(p)
02322 for (size_t i=0; i<N; i++)
02323 {
02324 const word t = u * X[i];
02325 word c = 0;
02326 for (size_t j=0; j<N; j+=2)
02327 {
02328 MultiplyWords(p, t, M[j]);
02329 Acc2WordsBy1(p, X[i+j]);
02330 Acc2WordsBy1(p, c);
02331 X[i+j] = LowWord(p);
02332 c = HighWord(p);
02333 MultiplyWords(p, t, M[j+1]);
02334 Acc2WordsBy1(p, X[i+j+1]);
02335 Acc2WordsBy1(p, c);
02336 X[i+j+1] = LowWord(p);
02337 c = HighWord(p);
02338 }
02339
02340 if (Increment(X+N+i, N-i, c))
02341 while (!Subtract(X+N, X+N, M, N)) {}
02342 }
02343
02344 memcpy(R, X+N, N*WORD_SIZE);
02345 #else
02346 __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
02347 for (size_t i=0; i<N; i++)
02348 {
02349 __m64 t = _mm_cvtsi32_si64(X[i]);
02350 t = _mm_mul_su32(t, u);
02351 __m64 c = _mm_setzero_si64();
02352 for (size_t j=0; j<N; j+=2)
02353 {
02354 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
02355 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
02356 c = _mm_add_si64(c, p);
02357 X[i+j] = _mm_cvtsi64_si32(c);
02358 c = _mm_srli_si64(c, 32);
02359 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
02360 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
02361 c = _mm_add_si64(c, p);
02362 X[i+j+1] = _mm_cvtsi64_si32(c);
02363 c = _mm_srli_si64(c, 32);
02364 }
02365
02366 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
02367 while (!Subtract(X+N, X+N, M, N)) {}
02368 }
02369
02370 memcpy(R, X+N, N*WORD_SIZE);
02371 _mm_empty();
02372 #endif
02373 }
02374
02375
02376
02377
02378
02379
02380
02381
02382 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02383 {
02384 assert(N%2==0 && N>=4);
02385
02386 #define M0 M
02387 #define M1 (M+N2)
02388 #define V0 V
02389 #define V1 (V+N2)
02390
02391 #define X0 X
02392 #define X1 (X+N2)
02393 #define X2 (X+N)
02394 #define X3 (X+N+N2)
02395
02396 const size_t N2 = N/2;
02397 Multiply(T0, T2, V0, X3, N2);
02398 int c2 = Add(T0, T0, X0, N);
02399 MultiplyBottom(T3, T2, T0, U, N2);
02400 MultiplyTop(T2, R, T0, T3, M0, N2);
02401 c2 -= Subtract(T2, T1, T2, N2);
02402 Multiply(T0, R, T3, M1, N2);
02403 c2 -= Subtract(T0, T2, T0, N2);
02404 int c3 = -(int)Subtract(T1, X2, T1, N2);
02405 Multiply(R0, T2, V1, X3, N2);
02406 c3 += Add(R, R, T, N);
02407
02408 if (c2>0)
02409 c3 += Increment(R1, N2);
02410 else if (c2<0)
02411 c3 -= Decrement(R1, N2, -c2);
02412
02413 assert(c3>=-1 && c3<=1);
02414 if (c3>0)
02415 Subtract(R, R, M, N);
02416 else if (c3<0)
02417 Add(R, R, M, N);
02418
02419 #undef M0
02420 #undef M1
02421 #undef V0
02422 #undef V1
02423
02424 #undef X0
02425 #undef X1
02426 #undef X2
02427 #undef X3
02428 }
02429
02430 #undef A0
02431 #undef A1
02432 #undef B0
02433 #undef B1
02434
02435 #undef T0
02436 #undef T1
02437 #undef T2
02438 #undef T3
02439
02440 #undef R0
02441 #undef R1
02442 #undef R2
02443 #undef R3
02444
02445
02446
02447
02448
02449
02450
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476
02477
02478
02479
02480
02481
02482
02483
02484
02485
02486
02487
02488
02489
02490
02491
02492
02493
02494
02495
02496
02497
02498
02499
02500
02501
02502
02503
02504
02505
02506
02507
02508
02509 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02510 {
02511 word T[4];
02512 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02513 Q[0] = q.GetLowHalf();
02514 Q[1] = q.GetHighHalf();
02515
02516 #ifndef NDEBUG
02517 if (B[0] || B[1])
02518 {
02519
02520 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02521 word P[4];
02522 s_pMul[0](P, Q, B);
02523 Add(P, P, T, 4);
02524 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02525 }
02526 #endif
02527 }
02528
02529
02530 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02531 {
02532 assert(N && N%2==0);
02533
02534 AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
02535
02536 word borrow = Subtract(R, R, T, N+2);
02537 assert(!borrow && !R[N+1]);
02538
02539 while (R[N] || Compare(R, B, N) >= 0)
02540 {
02541 R[N] -= Subtract(R, R, B, N);
02542 Q[1] += (++Q[0]==0);
02543 assert(Q[0] || Q[1]);
02544 }
02545 }
02546
02547
02548
02549
02550
02551
02552
02553 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02554 {
02555 assert(NA && NB && NA%2==0 && NB%2==0);
02556 assert(B[NB-1] || B[NB-2]);
02557 assert(NB <= NA);
02558
02559
02560 word *const TA=T;
02561 word *const TB=T+NA+2;
02562 word *const TP=T+NA+2+NB;
02563
02564
02565 unsigned shiftWords = (B[NB-1]==0);
02566 TB[0] = TB[NB-1] = 0;
02567 CopyWords(TB+shiftWords, B, NB-shiftWords);
02568 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02569 assert(shiftBits < WORD_BITS);
02570 ShiftWordsLeftByBits(TB, NB, shiftBits);
02571
02572
02573 TA[0] = TA[NA] = TA[NA+1] = 0;
02574 CopyWords(TA+shiftWords, A, NA);
02575 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02576
02577 if (TA[NA+1]==0 && TA[NA] <= 1)
02578 {
02579 Q[NA-NB+1] = Q[NA-NB] = 0;
02580 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02581 {
02582 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02583 ++Q[NA-NB];
02584 }
02585 }
02586 else
02587 {
02588 NA+=2;
02589 assert(Compare(TA+NA-NB, TB, NB) < 0);
02590 }
02591
02592 word BT[2];
02593 BT[0] = TB[NB-2] + 1;
02594 BT[1] = TB[NB-1] + (BT[0]==0);
02595
02596
02597 for (size_t i=NA-2; i>=NB; i-=2)
02598 {
02599 AtomicDivide(Q+i-NB, TA+i-2, BT);
02600 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02601 }
02602
02603
02604 CopyWords(R, TA+shiftWords, NB);
02605 ShiftWordsRightByBits(R, NB, shiftBits);
02606 }
02607
02608 static inline size_t EvenWordCount(const word *X, size_t N)
02609 {
02610 while (N && X[N-2]==0 && X[N-1]==0)
02611 N-=2;
02612 return N;
02613 }
02614
02615
02616
02617
02618
02619
02620
02621 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02622 {
02623 assert(NA<=N && N && N%2==0);
02624
02625 word *b = T;
02626 word *c = T+N;
02627 word *f = T+2*N;
02628 word *g = T+3*N;
02629 size_t bcLen=2, fgLen=EvenWordCount(M, N);
02630 unsigned int k=0, s=0;
02631
02632 SetWords(T, 0, 3*N);
02633 b[0]=1;
02634 CopyWords(f, A, NA);
02635 CopyWords(g, M, N);
02636
02637 while (1)
02638 {
02639 word t=f[0];
02640 while (!t)
02641 {
02642 if (EvenWordCount(f, fgLen)==0)
02643 {
02644 SetWords(R, 0, N);
02645 return 0;
02646 }
02647
02648 ShiftWordsRightByWords(f, fgLen, 1);
02649 if (c[bcLen-1]) bcLen+=2;
02650 assert(bcLen <= N);
02651 ShiftWordsLeftByWords(c, bcLen, 1);
02652 k+=WORD_BITS;
02653 t=f[0];
02654 }
02655
02656 unsigned int i=0;
02657 while (t%2 == 0)
02658 {
02659 t>>=1;
02660 i++;
02661 }
02662 k+=i;
02663
02664 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02665 {
02666 if (s%2==0)
02667 CopyWords(R, b, N);
02668 else
02669 Subtract(R, M, b, N);
02670 return k;
02671 }
02672
02673 ShiftWordsRightByBits(f, fgLen, i);
02674 t=ShiftWordsLeftByBits(c, bcLen, i);
02675 if (t)
02676 {
02677 c[bcLen] = t;
02678 bcLen+=2;
02679 assert(bcLen <= N);
02680 }
02681
02682 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02683 fgLen-=2;
02684
02685 if (Compare(f, g, fgLen)==-1)
02686 {
02687 std::swap(f, g);
02688 std::swap(b, c);
02689 s++;
02690 }
02691
02692 Subtract(f, f, g, fgLen);
02693
02694 if (Add(b, b, c, bcLen))
02695 {
02696 b[bcLen] = 1;
02697 bcLen+=2;
02698 assert(bcLen <= N);
02699 }
02700 }
02701 }
02702
02703
02704
02705
02706
02707 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02708 {
02709 CopyWords(R, A, N);
02710
02711 while (k--)
02712 {
02713 if (R[0]%2==0)
02714 ShiftWordsRightByBits(R, N, 1);
02715 else
02716 {
02717 word carry = Add(R, R, M, N);
02718 ShiftWordsRightByBits(R, N, 1);
02719 R[N-1] += carry<<(WORD_BITS-1);
02720 }
02721 }
02722 }
02723
02724
02725
02726
02727
02728 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02729 {
02730 CopyWords(R, A, N);
02731
02732 while (k--)
02733 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02734 Subtract(R, R, M, N);
02735 }
02736
02737
02738
02739 InitializeInteger::InitializeInteger()
02740 {
02741 if (!g_pAssignIntToInteger)
02742 {
02743 SetFunctionPointers();
02744 g_pAssignIntToInteger = AssignIntToInteger;
02745 }
02746 }
02747
02748 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02749
02750 static inline size_t RoundupSize(size_t n)
02751 {
02752 if (n<=8)
02753 return RoundupSizeTable[n];
02754 else if (n<=16)
02755 return 16;
02756 else if (n<=32)
02757 return 32;
02758 else if (n<=64)
02759 return 64;
02760 else return size_t(1) << BitPrecision(n-1);
02761 }
02762
02763 Integer::Integer()
02764 : reg(2), sign(POSITIVE)
02765 {
02766 reg[0] = reg[1] = 0;
02767 }
02768
02769 Integer::Integer(const Integer& t)
02770 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02771 {
02772 CopyWords(reg, t.reg, reg.size());
02773 }
02774
02775 Integer::Integer(Sign s, lword value)
02776 : reg(2), sign(s)
02777 {
02778 reg[0] = word(value);
02779 reg[1] = word(SafeRightShift<WORD_BITS>(value));
02780 }
02781
02782 Integer::Integer(signed long value)
02783 : reg(2)
02784 {
02785 if (value >= 0)
02786 sign = POSITIVE;
02787 else
02788 {
02789 sign = NEGATIVE;
02790 value = -value;
02791 }
02792 reg[0] = word(value);
02793 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02794 }
02795
02796 Integer::Integer(Sign s, word high, word low)
02797 : reg(2), sign(s)
02798 {
02799 reg[0] = low;
02800 reg[1] = high;
02801 }
02802
02803 bool Integer::IsConvertableToLong() const
02804 {
02805 if (ByteCount() > sizeof(long))
02806 return false;
02807
02808 unsigned long value = (unsigned long)reg[0];
02809 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02810
02811 if (sign==POSITIVE)
02812 return (signed long)value >= 0;
02813 else
02814 return -(signed long)value < 0;
02815 }
02816
02817 signed long Integer::ConvertToLong() const
02818 {
02819 assert(IsConvertableToLong());
02820
02821 unsigned long value = (unsigned long)reg[0];
02822 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02823 return sign==POSITIVE ? value : -(signed long)value;
02824 }
02825
02826 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02827 {
02828 Decode(encodedInteger, byteCount, s);
02829 }
02830
02831 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02832 {
02833 Decode(encodedInteger, byteCount, s);
02834 }
02835
02836 Integer::Integer(BufferedTransformation &bt)
02837 {
02838 BERDecode(bt);
02839 }
02840
02841 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02842 {
02843 Randomize(rng, bitcount);
02844 }
02845
02846 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02847 {
02848 if (!Randomize(rng, min, max, rnType, equiv, mod))
02849 throw Integer::RandomNumberNotFound();
02850 }
02851
02852 Integer Integer::Power2(size_t e)
02853 {
02854 Integer r((word)0, BitsToWords(e+1));
02855 r.SetBit(e);
02856 return r;
02857 }
02858
02859 template <long i>
02860 struct NewInteger
02861 {
02862 Integer * operator()() const
02863 {
02864 return new Integer(i);
02865 }
02866 };
02867
02868 const Integer &Integer::Zero()
02869 {
02870 return Singleton<Integer>().Ref();
02871 }
02872
02873 const Integer &Integer::One()
02874 {
02875 return Singleton<Integer, NewInteger<1> >().Ref();
02876 }
02877
02878 const Integer &Integer::Two()
02879 {
02880 return Singleton<Integer, NewInteger<2> >().Ref();
02881 }
02882
02883 bool Integer::operator!() const
02884 {
02885 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02886 }
02887
02888 Integer& Integer::operator=(const Integer& t)
02889 {
02890 if (this != &t)
02891 {
02892 if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
02893 reg.New(RoundupSize(t.WordCount()));
02894 CopyWords(reg, t.reg, reg.size());
02895 sign = t.sign;
02896 }
02897 return *this;
02898 }
02899
02900 bool Integer::GetBit(size_t n) const
02901 {
02902 if (n/WORD_BITS >= reg.size())
02903 return 0;
02904 else
02905 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02906 }
02907
02908 void Integer::SetBit(size_t n, bool value)
02909 {
02910 if (value)
02911 {
02912 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02913 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02914 }
02915 else
02916 {
02917 if (n/WORD_BITS < reg.size())
02918 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02919 }
02920 }
02921
02922 byte Integer::GetByte(size_t n) const
02923 {
02924 if (n/WORD_SIZE >= reg.size())
02925 return 0;
02926 else
02927 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02928 }
02929
02930 void Integer::SetByte(size_t n, byte value)
02931 {
02932 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02933 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02934 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02935 }
02936
02937 lword Integer::GetBits(size_t i, size_t n) const
02938 {
02939 lword v = 0;
02940 assert(n <= sizeof(v)*8);
02941 for (unsigned int j=0; j<n; j++)
02942 v |= lword(GetBit(i+j)) << j;
02943 return v;
02944 }
02945
02946 Integer Integer::operator-() const
02947 {
02948 Integer result(*this);
02949 result.Negate();
02950 return result;
02951 }
02952
02953 Integer Integer::AbsoluteValue() const
02954 {
02955 Integer result(*this);
02956 result.sign = POSITIVE;
02957 return result;
02958 }
02959
02960 void Integer::swap(Integer &a)
02961 {
02962 reg.swap(a.reg);
02963 std::swap(sign, a.sign);
02964 }
02965
02966 Integer::Integer(word value, size_t length)
02967 : reg(RoundupSize(length)), sign(POSITIVE)
02968 {
02969 reg[0] = value;
02970 SetWords(reg+1, 0, reg.size()-1);
02971 }
02972
02973 template <class T>
02974 static Integer StringToInteger(const T *str)
02975 {
02976 int radix;
02977
02978
02979 unsigned int length;
02980 for (length = 0; str[length] != 0; length++) {}
02981
02982 Integer v;
02983
02984 if (length == 0)
02985 return v;
02986
02987 switch (str[length-1])
02988 {
02989 case 'h':
02990 case 'H':
02991 radix=16;
02992 break;
02993 case 'o':
02994 case 'O':
02995 radix=8;
02996 break;
02997 case 'b':
02998 case 'B':
02999 radix=2;
03000 break;
03001 default:
03002 radix=10;
03003 }
03004
03005 if (length > 2 && str[0] == '0' && str[1] == 'x')
03006 radix = 16;
03007
03008 for (unsigned i=0; i<length; i++)
03009 {
03010 int digit;
03011
03012 if (str[i] >= '0' && str[i] <= '9')
03013 digit = str[i] - '0';
03014 else if (str[i] >= 'A' && str[i] <= 'F')
03015 digit = str[i] - 'A' + 10;
03016 else if (str[i] >= 'a' && str[i] <= 'f')
03017 digit = str[i] - 'a' + 10;
03018 else
03019 digit = radix;
03020
03021 if (digit < radix)
03022 {
03023 v *= radix;
03024 v += digit;
03025 }
03026 }
03027
03028 if (str[0] == '-')
03029 v.Negate();
03030
03031 return v;
03032 }
03033
03034 Integer::Integer(const char *str)
03035 : reg(2), sign(POSITIVE)
03036 {
03037 *this = StringToInteger(str);
03038 }
03039
03040 Integer::Integer(const wchar_t *str)
03041 : reg(2), sign(POSITIVE)
03042 {
03043 *this = StringToInteger(str);
03044 }
03045
03046 unsigned int Integer::WordCount() const
03047 {
03048 return (unsigned int)CountWords(reg, reg.size());
03049 }
03050
03051 unsigned int Integer::ByteCount() const
03052 {
03053 unsigned wordCount = WordCount();
03054 if (wordCount)
03055 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03056 else
03057 return 0;
03058 }
03059
03060 unsigned int Integer::BitCount() const
03061 {
03062 unsigned wordCount = WordCount();
03063 if (wordCount)
03064 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03065 else
03066 return 0;
03067 }
03068
03069 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03070 {
03071 StringStore store(input, inputLen);
03072 Decode(store, inputLen, s);
03073 }
03074
03075 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03076 {
03077 assert(bt.MaxRetrievable() >= inputLen);
03078
03079 byte b;
03080 bt.Peek(b);
03081 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03082
03083 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03084 {
03085 bt.Skip(1);
03086 inputLen--;
03087 bt.Peek(b);
03088 }
03089
03090 reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03091
03092 for (size_t i=inputLen; i > 0; i--)
03093 {
03094 bt.Get(b);
03095 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03096 }
03097
03098 if (sign == NEGATIVE)
03099 {
03100 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03101 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03102 TwosComplement(reg, reg.size());
03103 }
03104 }
03105
03106 size_t Integer::MinEncodedSize(Signedness signedness) const
03107 {
03108 unsigned int outputLen = STDMAX(1U, ByteCount());
03109 if (signedness == UNSIGNED)
03110 return outputLen;
03111 if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03112 outputLen++;
03113 if (IsNegative() && *this < -Power2(outputLen*8-1))
03114 outputLen++;
03115 return outputLen;
03116 }
03117
03118 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03119 {
03120 ArraySink sink(output, outputLen);
03121 Encode(sink, outputLen, signedness);
03122 }
03123
03124 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03125 {
03126 if (signedness == UNSIGNED || NotNegative())
03127 {
03128 for (size_t i=outputLen; i > 0; i--)
03129 bt.Put(GetByte(i-1));
03130 }
03131 else
03132 {
03133
03134 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
03135 temp.Encode(bt, outputLen, UNSIGNED);
03136 }
03137 }
03138
03139 void Integer::DEREncode(BufferedTransformation &bt) const
03140 {
03141 DERGeneralEncoder enc(bt, INTEGER);
03142 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03143 enc.MessageEnd();
03144 }
03145
03146 void Integer::BERDecode(const byte *input, size_t len)
03147 {
03148 StringStore store(input, len);
03149 BERDecode(store);
03150 }
03151
03152 void Integer::BERDecode(BufferedTransformation &bt)
03153 {
03154 BERGeneralDecoder dec(bt, INTEGER);
03155 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03156 BERDecodeError();
03157 Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03158 dec.MessageEnd();
03159 }
03160
03161 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03162 {
03163 DERGeneralEncoder enc(bt, OCTET_STRING);
03164 Encode(enc, length);
03165 enc.MessageEnd();
03166 }
03167
03168 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03169 {
03170 BERGeneralDecoder dec(bt, OCTET_STRING);
03171 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03172 BERDecodeError();
03173 Decode(dec, length);
03174 dec.MessageEnd();
03175 }
03176
03177 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03178 {
03179 ArraySink sink(output, len);
03180 return OpenPGPEncode(sink);
03181 }
03182
03183 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03184 {
03185 word16 bitCount = BitCount();
03186 bt.PutWord16(bitCount);
03187 size_t byteCount = BitsToBytes(bitCount);
03188 Encode(bt, byteCount);
03189 return 2 + byteCount;
03190 }
03191
03192 void Integer::OpenPGPDecode(const byte *input, size_t len)
03193 {
03194 StringStore store(input, len);
03195 OpenPGPDecode(store);
03196 }
03197
03198 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03199 {
03200 word16 bitCount;
03201 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03202 throw OpenPGPDecodeErr();
03203 Decode(bt, BitsToBytes(bitCount));
03204 }
03205
03206 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03207 {
03208 const size_t nbytes = nbits/8 + 1;
03209 SecByteBlock buf(nbytes);
03210 rng.GenerateBlock(buf, nbytes);
03211 if (nbytes)
03212 buf[0] = (byte)Crop(buf[0], nbits % 8);
03213 Decode(buf, nbytes, UNSIGNED);
03214 }
03215
03216 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03217 {
03218 if (min > max)
03219 throw InvalidArgument("Integer: Min must be no greater than Max");
03220
03221 Integer range = max - min;
03222 const unsigned int nbits = range.BitCount();
03223
03224 do
03225 {
03226 Randomize(rng, nbits);
03227 }
03228 while (*this > range);
03229
03230 *this += min;
03231 }
03232
03233 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03234 {
03235 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03236 }
03237
03238 class KDF2_RNG : public RandomNumberGenerator
03239 {
03240 public:
03241 KDF2_RNG(const byte *seed, size_t seedSize)
03242 : m_counter(0), m_counterAndSeed(seedSize + 4)
03243 {
03244 memcpy(m_counterAndSeed + 4, seed, seedSize);
03245 }
03246
03247 void GenerateBlock(byte *output, size_t size)
03248 {
03249 PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03250 ++m_counter;
03251 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03252 }
03253
03254 private:
03255 word32 m_counter;
03256 SecByteBlock m_counterAndSeed;
03257 };
03258
03259 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
03260 {
03261 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03262 Integer max;
03263 if (!params.GetValue("Max", max))
03264 {
03265 int bitLength;
03266 if (params.GetIntValue("BitLength", bitLength))
03267 max = Integer::Power2(bitLength);
03268 else
03269 throw InvalidArgument("Integer: missing Max argument");
03270 }
03271 if (min > max)
03272 throw InvalidArgument("Integer: Min must be no greater than Max");
03273
03274 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03275 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03276
03277 if (equiv.IsNegative() || equiv >= mod)
03278 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03279
03280 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03281
03282 member_ptr<KDF2_RNG> kdf2Rng;
03283 ConstByteArrayParameter seed;
03284 if (params.GetValue("Seed", seed))
03285 {
03286 ByteQueue bq;
03287 DERSequenceEncoder seq(bq);
03288 min.DEREncode(seq);
03289 max.DEREncode(seq);
03290 equiv.DEREncode(seq);
03291 mod.DEREncode(seq);
03292 DEREncodeUnsigned(seq, rnType);
03293 DEREncodeOctetString(seq, seed.begin(), seed.size());
03294 seq.MessageEnd();
03295
03296 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03297 bq.Get(finalSeed, finalSeed.size());
03298 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03299 }
03300 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03301
03302 switch (rnType)
03303 {
03304 case ANY:
03305 if (mod == One())
03306 Randomize(rng, min, max);
03307 else
03308 {
03309 Integer min1 = min + (equiv-min)%mod;
03310 if (max < min1)
03311 return false;
03312 Randomize(rng, Zero(), (max - min1) / mod);
03313 *this *= mod;
03314 *this += min1;
03315 }
03316 return true;
03317
03318 case PRIME:
03319 {
03320 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03321
03322 int i;
03323 i = 0;
03324 while (1)
03325 {
03326 if (++i==16)
03327 {
03328
03329 Integer first = min;
03330 if (FirstPrime(first, max, equiv, mod, pSelector))
03331 {
03332
03333 *this = first;
03334 if (!FirstPrime(first, max, equiv, mod, pSelector))
03335 return true;
03336 }
03337 else
03338 return false;
03339 }
03340
03341 Randomize(rng, min, max);
03342 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03343 return true;
03344 }
03345 }
03346
03347 default:
03348 throw InvalidArgument("Integer: invalid RandomNumberType argument");
03349 }
03350 }
03351
03352 std::istream& operator>>(std::istream& in, Integer &a)
03353 {
03354 char c;
03355 unsigned int length = 0;
03356 SecBlock<char> str(length + 16);
03357
03358 std::ws(in);
03359
03360 do
03361 {
03362 in.read(&c, 1);
03363 str[length++] = c;
03364 if (length >= str.size())
03365 str.Grow(length + 16);
03366 }
03367 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03368
03369 if (in.gcount())
03370 in.putback(c);
03371 str[length-1] = '\0';
03372 a = Integer(str);
03373
03374 return in;
03375 }
03376
03377 std::ostream& operator<<(std::ostream& out, const Integer &a)
03378 {
03379
03380 long f = out.flags() & std::ios::basefield;
03381 int base, block;
03382 char suffix;
03383 switch(f)
03384 {
03385 case std::ios::oct :
03386 base = 8;
03387 block = 8;
03388 suffix = 'o';
03389 break;
03390 case std::ios::hex :
03391 base = 16;
03392 block = 4;
03393 suffix = 'h';
03394 break;
03395 default :
03396 base = 10;
03397 block = 3;
03398 suffix = '.';
03399 }
03400
03401 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03402 Integer temp1=a, temp2;
03403 unsigned i=0;
03404 const char vec[]="0123456789ABCDEF";
03405
03406 if (a.IsNegative())
03407 {
03408 out << '-';
03409 temp1.Negate();
03410 }
03411
03412 if (!a)
03413 out << '0';
03414
03415 while (!!temp1)
03416 {
03417 word digit;
03418 Integer::Divide(digit, temp2, temp1, base);
03419 s[i++]=vec[digit];
03420 temp1=temp2;
03421 }
03422
03423 while (i--)
03424 {
03425 out << s[i];
03426
03427
03428 }
03429 return out << suffix;
03430 }
03431
03432 Integer& Integer::operator++()
03433 {
03434 if (NotNegative())
03435 {
03436 if (Increment(reg, reg.size()))
03437 {
03438 reg.CleanGrow(2*reg.size());
03439 reg[reg.size()/2]=1;
03440 }
03441 }
03442 else
03443 {
03444 word borrow = Decrement(reg, reg.size());
03445 assert(!borrow);
03446 if (WordCount()==0)
03447 *this = Zero();
03448 }
03449 return *this;
03450 }
03451
03452 Integer& Integer::operator--()
03453 {
03454 if (IsNegative())
03455 {
03456 if (Increment(reg, reg.size()))
03457 {
03458 reg.CleanGrow(2*reg.size());
03459 reg[reg.size()/2]=1;
03460 }
03461 }
03462 else
03463 {
03464 if (Decrement(reg, reg.size()))
03465 *this = -One();
03466 }
03467 return *this;
03468 }
03469
03470 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03471 {
03472 int carry;
03473 if (a.reg.size() == b.reg.size())
03474 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03475 else if (a.reg.size() > b.reg.size())
03476 {
03477 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03478 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03479 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03480 }
03481 else
03482 {
03483 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03484 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03485 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03486 }
03487
03488 if (carry)
03489 {
03490 sum.reg.CleanGrow(2*sum.reg.size());
03491 sum.reg[sum.reg.size()/2] = 1;
03492 }
03493 sum.sign = Integer::POSITIVE;
03494 }
03495
03496 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03497 {
03498 unsigned aSize = a.WordCount();
03499 aSize += aSize%2;
03500 unsigned bSize = b.WordCount();
03501 bSize += bSize%2;
03502
03503 if (aSize == bSize)
03504 {
03505 if (Compare(a.reg, b.reg, aSize) >= 0)
03506 {
03507 Subtract(diff.reg, a.reg, b.reg, aSize);
03508 diff.sign = Integer::POSITIVE;
03509 }
03510 else
03511 {
03512 Subtract(diff.reg, b.reg, a.reg, aSize);
03513 diff.sign = Integer::NEGATIVE;
03514 }
03515 }
03516 else if (aSize > bSize)
03517 {
03518 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03519 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03520 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03521 assert(!borrow);
03522 diff.sign = Integer::POSITIVE;
03523 }
03524 else
03525 {
03526 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03527 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03528 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03529 assert(!borrow);
03530 diff.sign = Integer::NEGATIVE;
03531 }
03532 }
03533
03534
03535 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03536 {
03537 return a < b ? b : a;
03538 }
03539
03540 Integer Integer::Plus(const Integer& b) const
03541 {
03542 Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03543 if (NotNegative())
03544 {
03545 if (b.NotNegative())
03546 PositiveAdd(sum, *this, b);
03547 else
03548 PositiveSubtract(sum, *this, b);
03549 }
03550 else
03551 {
03552 if (b.NotNegative())
03553 PositiveSubtract(sum, b, *this);
03554 else
03555 {
03556 PositiveAdd(sum, *this, b);
03557 sum.sign = Integer::NEGATIVE;
03558 }
03559 }
03560 return sum;
03561 }
03562
03563 Integer& Integer::operator+=(const Integer& t)
03564 {
03565 reg.CleanGrow(t.reg.size());
03566 if (NotNegative())
03567 {
03568 if (t.NotNegative())
03569 PositiveAdd(*this, *this, t);
03570 else
03571 PositiveSubtract(*this, *this, t);
03572 }
03573 else
03574 {
03575 if (t.NotNegative())
03576 PositiveSubtract(*this, t, *this);
03577 else
03578 {
03579 PositiveAdd(*this, *this, t);
03580 sign = Integer::NEGATIVE;
03581 }
03582 }
03583 return *this;
03584 }
03585
03586 Integer Integer::Minus(const Integer& b) const
03587 {
03588 Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03589 if (NotNegative())
03590 {
03591 if (b.NotNegative())
03592 PositiveSubtract(diff, *this, b);
03593 else
03594 PositiveAdd(diff, *this, b);
03595 }
03596 else
03597 {
03598 if (b.NotNegative())
03599 {
03600 PositiveAdd(diff, *this, b);
03601 diff.sign = Integer::NEGATIVE;
03602 }
03603 else
03604 PositiveSubtract(diff, b, *this);
03605 }
03606 return diff;
03607 }
03608
03609 Integer& Integer::operator-=(const Integer& t)
03610 {
03611 reg.CleanGrow(t.reg.size());
03612 if (NotNegative())
03613 {
03614 if (t.NotNegative())
03615 PositiveSubtract(*this, *this, t);
03616 else
03617 PositiveAdd(*this, *this, t);
03618 }
03619 else
03620 {
03621 if (t.NotNegative())
03622 {
03623 PositiveAdd(*this, *this, t);
03624 sign = Integer::NEGATIVE;
03625 }
03626 else
03627 PositiveSubtract(*this, t, *this);
03628 }
03629 return *this;
03630 }
03631
03632 Integer& Integer::operator<<=(size_t n)
03633 {
03634 const size_t wordCount = WordCount();
03635 const size_t shiftWords = n / WORD_BITS;
03636 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03637
03638 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03639 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03640 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03641 return *this;
03642 }
03643
03644 Integer& Integer::operator>>=(size_t n)
03645 {
03646 const size_t wordCount = WordCount();
03647 const size_t shiftWords = n / WORD_BITS;
03648 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03649
03650 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03651 if (wordCount > shiftWords)
03652 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03653 if (IsNegative() && WordCount()==0)
03654 *this = Zero();
03655 return *this;
03656 }
03657
03658 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03659 {
03660 size_t aSize = RoundupSize(a.WordCount());
03661 size_t bSize = RoundupSize(b.WordCount());
03662
03663 product.reg.CleanNew(RoundupSize(aSize+bSize));
03664 product.sign = Integer::POSITIVE;
03665
03666 IntegerSecBlock workspace(aSize + bSize);
03667 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03668 }
03669
03670 void Multiply(Integer &product, const Integer &a, const Integer &b)
03671 {
03672 PositiveMultiply(product, a, b);
03673
03674 if (a.NotNegative() != b.NotNegative())
03675 product.Negate();
03676 }
03677
03678 Integer Integer::Times(const Integer &b) const
03679 {
03680 Integer product;
03681 Multiply(product, *this, b);
03682 return product;
03683 }
03684
03685
03686
03687
03688
03689
03690
03691
03692
03693
03694
03695
03696
03697
03698
03699
03700
03701
03702
03703
03704
03705
03706
03707 void PositiveDivide(Integer &remainder, Integer "ient,
03708 const Integer &a, const Integer &b)
03709 {
03710 unsigned aSize = a.WordCount();
03711 unsigned bSize = b.WordCount();
03712
03713 if (!bSize)
03714 throw Integer::DivideByZero();
03715
03716 if (a.PositiveCompare(b) == -1)
03717 {
03718 remainder = a;
03719 remainder.sign = Integer::POSITIVE;
03720 quotient = Integer::Zero();
03721 return;
03722 }
03723
03724 aSize += aSize%2;
03725 bSize += bSize%2;
03726
03727 remainder.reg.CleanNew(RoundupSize(bSize));
03728 remainder.sign = Integer::POSITIVE;
03729 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03730 quotient.sign = Integer::POSITIVE;
03731
03732 IntegerSecBlock T(aSize+3*(bSize+2));
03733 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03734 }
03735
03736 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
03737 {
03738 PositiveDivide(remainder, quotient, dividend, divisor);
03739
03740 if (dividend.IsNegative())
03741 {
03742 quotient.Negate();
03743 if (remainder.NotZero())
03744 {
03745 --quotient;
03746 remainder = divisor.AbsoluteValue() - remainder;
03747 }
03748 }
03749
03750 if (divisor.IsNegative())
03751 quotient.Negate();
03752 }
03753
03754 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03755 {
03756 q = a;
03757 q >>= n;
03758
03759 const size_t wordCount = BitsToWords(n);
03760 if (wordCount <= a.WordCount())
03761 {
03762 r.reg.resize(RoundupSize(wordCount));
03763 CopyWords(r.reg, a.reg, wordCount);
03764 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03765 if (n % WORD_BITS != 0)
03766 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
03767 }
03768 else
03769 {
03770 r.reg.resize(RoundupSize(a.WordCount()));
03771 CopyWords(r.reg, a.reg, r.reg.size());
03772 }
03773 r.sign = POSITIVE;
03774
03775 if (a.IsNegative() && r.NotZero())
03776 {
03777 --q;
03778 r = Power2(n) - r;
03779 }
03780 }
03781
03782 Integer Integer::DividedBy(const Integer &b) const
03783 {
03784 Integer remainder, quotient;
03785 Integer::Divide(remainder, quotient, *this, b);
03786 return quotient;
03787 }
03788
03789 Integer Integer::Modulo(const Integer &b) const
03790 {
03791 Integer remainder, quotient;
03792 Integer::Divide(remainder, quotient, *this, b);
03793 return remainder;
03794 }
03795
03796 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
03797 {
03798 if (!divisor)
03799 throw Integer::DivideByZero();
03800
03801 assert(divisor);
03802
03803 if ((divisor & (divisor-1)) == 0)
03804 {
03805 quotient = dividend >> (BitPrecision(divisor)-1);
03806 remainder = dividend.reg[0] & (divisor-1);
03807 return;
03808 }
03809
03810 unsigned int i = dividend.WordCount();
03811 quotient.reg.CleanNew(RoundupSize(i));
03812 remainder = 0;
03813 while (i--)
03814 {
03815 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03816 remainder = DWord(dividend.reg[i], remainder) % divisor;
03817 }
03818
03819 if (dividend.NotNegative())
03820 quotient.sign = POSITIVE;
03821 else
03822 {
03823 quotient.sign = NEGATIVE;
03824 if (remainder)
03825 {
03826 --quotient;
03827 remainder = divisor - remainder;
03828 }
03829 }
03830 }
03831
03832 Integer Integer::DividedBy(word b) const
03833 {
03834 word remainder;
03835 Integer quotient;
03836 Integer::Divide(remainder, quotient, *this, b);
03837 return quotient;
03838 }
03839
03840 word Integer::Modulo(word divisor) const
03841 {
03842 if (!divisor)
03843 throw Integer::DivideByZero();
03844
03845 assert(divisor);
03846
03847 word remainder;
03848
03849 if ((divisor & (divisor-1)) == 0)
03850 remainder = reg[0] & (divisor-1);
03851 else
03852 {
03853 unsigned int i = WordCount();
03854
03855 if (divisor <= 5)
03856 {
03857 DWord sum(0, 0);
03858 while (i--)
03859 sum += reg[i];
03860 remainder = sum % divisor;
03861 }
03862 else
03863 {
03864 remainder = 0;
03865 while (i--)
03866 remainder = DWord(reg[i], remainder) % divisor;
03867 }
03868 }
03869
03870 if (IsNegative() && remainder)
03871 remainder = divisor - remainder;
03872
03873 return remainder;
03874 }
03875
03876 void Integer::Negate()
03877 {
03878 if (!!(*this))
03879 sign = Sign(1-sign);
03880 }
03881
03882 int Integer::PositiveCompare(const Integer& t) const
03883 {
03884 unsigned size = WordCount(), tSize = t.WordCount();
03885
03886 if (size == tSize)
03887 return CryptoPP::Compare(reg, t.reg, size);
03888 else
03889 return size > tSize ? 1 : -1;
03890 }
03891
03892 int Integer::Compare(const Integer& t) const
03893 {
03894 if (NotNegative())
03895 {
03896 if (t.NotNegative())
03897 return PositiveCompare(t);
03898 else
03899 return 1;
03900 }
03901 else
03902 {
03903 if (t.NotNegative())
03904 return -1;
03905 else
03906 return -PositiveCompare(t);
03907 }
03908 }
03909
03910 Integer Integer::SquareRoot() const
03911 {
03912 if (!IsPositive())
03913 return Zero();
03914
03915
03916 Integer x, y = Power2((BitCount()+1)/2);
03917 assert(y*y >= *this);
03918
03919 do
03920 {
03921 x = y;
03922 y = (x + *this/x) >> 1;
03923 } while (y<x);
03924
03925 return x;
03926 }
03927
03928 bool Integer::IsSquare() const
03929 {
03930 Integer r = SquareRoot();
03931 return *this == r.Squared();
03932 }
03933
03934 bool Integer::IsUnit() const
03935 {
03936 return (WordCount() == 1) && (reg[0] == 1);
03937 }
03938
03939 Integer Integer::MultiplicativeInverse() const
03940 {
03941 return IsUnit() ? *this : Zero();
03942 }
03943
03944 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03945 {
03946 return x*y%m;
03947 }
03948
03949 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03950 {
03951 ModularArithmetic mr(m);
03952 return mr.Exponentiate(x, e);
03953 }
03954
03955 Integer Integer::Gcd(const Integer &a, const Integer &b)
03956 {
03957 return EuclideanDomainOf<Integer>().Gcd(a, b);
03958 }
03959
03960 Integer Integer::InverseMod(const Integer &m) const
03961 {
03962 assert(m.NotNegative());
03963
03964 if (IsNegative() || *this>=m)
03965 return (*this%m).InverseMod(m);
03966
03967 if (m.IsEven())
03968 {
03969 if (!m || IsEven())
03970 return Zero();
03971 if (*this == One())
03972 return One();
03973
03974 Integer u = m.InverseMod(*this);
03975 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03976 }
03977
03978 SecBlock<word> T(m.reg.size() * 4);
03979 Integer r((word)0, m.reg.size());
03980 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03981 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03982 return r;
03983 }
03984
03985 word Integer::InverseMod(word mod) const
03986 {
03987 word g0 = mod, g1 = *this % mod;
03988 word v0 = 0, v1 = 1;
03989 word y;
03990
03991 while (g1)
03992 {
03993 if (g1 == 1)
03994 return v1;
03995 y = g0 / g1;
03996 g0 = g0 % g1;
03997 v0 += y * v1;
03998
03999 if (!g0)
04000 break;
04001 if (g0 == 1)
04002 return mod-v0;
04003 y = g1 / g0;
04004 g1 = g1 % g0;
04005 v1 += y * v0;
04006 }
04007 return 0;
04008 }
04009
04010
04011
04012 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
04013 {
04014 BERSequenceDecoder seq(bt);
04015 OID oid(seq);
04016 if (oid != ASN1::prime_field())
04017 BERDecodeError();
04018 m_modulus.BERDecode(seq);
04019 seq.MessageEnd();
04020 m_result.reg.resize(m_modulus.reg.size());
04021 }
04022
04023 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04024 {
04025 DERSequenceEncoder seq(bt);
04026 ASN1::prime_field().DEREncode(seq);
04027 m_modulus.DEREncode(seq);
04028 seq.MessageEnd();
04029 }
04030
04031 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04032 {
04033 a.DEREncodeAsOctetString(out, MaxElementByteLength());
04034 }
04035
04036 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04037 {
04038 a.BERDecodeAsOctetString(in, MaxElementByteLength());
04039 }
04040
04041 const Integer& ModularArithmetic::Half(const Integer &a) const
04042 {
04043 if (a.reg.size()==m_modulus.reg.size())
04044 {
04045 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04046 return m_result;
04047 }
04048 else
04049 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04050 }
04051
04052 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04053 {
04054 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04055 {
04056 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04057 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04058 {
04059 CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04060 }
04061 return m_result;
04062 }
04063 else
04064 {
04065 m_result1 = a+b;
04066 if (m_result1 >= m_modulus)
04067 m_result1 -= m_modulus;
04068 return m_result1;
04069 }
04070 }
04071
04072 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04073 {
04074 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04075 {
04076 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04077 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04078 {
04079 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04080 }
04081 }
04082 else
04083 {
04084 a+=b;
04085 if (a>=m_modulus)
04086 a-=m_modulus;
04087 }
04088
04089 return a;
04090 }
04091
04092 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04093 {
04094 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04095 {
04096 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04097 CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04098 return m_result;
04099 }
04100 else
04101 {
04102 m_result1 = a-b;
04103 if (m_result1.IsNegative())
04104 m_result1 += m_modulus;
04105 return m_result1;
04106 }
04107 }
04108
04109 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04110 {
04111 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04112 {
04113 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04114 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04115 }
04116 else
04117 {
04118 a-=b;
04119 if (a.IsNegative())
04120 a+=m_modulus;
04121 }
04122
04123 return a;
04124 }
04125
04126 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04127 {
04128 if (!a)
04129 return a;
04130
04131 CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04132 if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04133 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04134
04135 return m_result;
04136 }
04137
04138 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04139 {
04140 if (m_modulus.IsOdd())
04141 {
04142 MontgomeryRepresentation dr(m_modulus);
04143 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04144 }
04145 else
04146 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04147 }
04148
04149 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04150 {
04151 if (m_modulus.IsOdd())
04152 {
04153 MontgomeryRepresentation dr(m_modulus);
04154 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04155 for (unsigned int i=0; i<exponentsCount; i++)
04156 results[i] = dr.ConvertOut(results[i]);
04157 }
04158 else
04159 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04160 }
04161
04162 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
04163 : ModularArithmetic(m),
04164 m_u((word)0, m_modulus.reg.size()),
04165 m_workspace(5*m_modulus.reg.size())
04166 {
04167 if (!m_modulus.IsOdd())
04168 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04169
04170 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04171 }
04172
04173 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04174 {
04175 word *const T = m_workspace.begin();
04176 word *const R = m_result.reg.begin();
04177 const size_t N = m_modulus.reg.size();
04178 assert(a.reg.size()<=N && b.reg.size()<=N);
04179
04180 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04181 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04182 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04183 return m_result;
04184 }
04185
04186 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04187 {
04188 word *const T = m_workspace.begin();
04189 word *const R = m_result.reg.begin();
04190 const size_t N = m_modulus.reg.size();
04191 assert(a.reg.size()<=N);
04192
04193 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04194 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04195 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04196 return m_result;
04197 }
04198
04199 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04200 {
04201 word *const T = m_workspace.begin();
04202 word *const R = m_result.reg.begin();
04203 const size_t N = m_modulus.reg.size();
04204 assert(a.reg.size()<=N);
04205
04206 CopyWords(T, a.reg, a.reg.size());
04207 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04208 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04209 return m_result;
04210 }
04211
04212 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04213 {
04214
04215 word *const T = m_workspace.begin();
04216 word *const R = m_result.reg.begin();
04217 const size_t N = m_modulus.reg.size();
04218 assert(a.reg.size()<=N);
04219
04220 CopyWords(T, a.reg, a.reg.size());
04221 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04222 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04223 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04224
04225
04226
04227 if (k>N*WORD_BITS)
04228 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04229 else
04230 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04231
04232 return m_result;
04233 }
04234
04235 NAMESPACE_END
04236
04237 #endif