From 354bb40e75d94466e91fe6960523612c9d17ccfb Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Thu, 2 Nov 2017 23:11:29 +0300 Subject: Add implementation --- mysql/extra/yassl/taocrypt/src/integer.cpp | 3894 ++++++++++++++++++++++++++++ 1 file changed, 3894 insertions(+) create mode 100644 mysql/extra/yassl/taocrypt/src/integer.cpp (limited to 'mysql/extra/yassl/taocrypt/src/integer.cpp') diff --git a/mysql/extra/yassl/taocrypt/src/integer.cpp b/mysql/extra/yassl/taocrypt/src/integer.cpp new file mode 100644 index 0000000..478a13c --- /dev/null +++ b/mysql/extra/yassl/taocrypt/src/integer.cpp @@ -0,0 +1,3894 @@ +/* Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + +/* based on Wei Dai's integer.cpp from CryptoPP */ + +#include "runtime.hpp" +#include "integer.hpp" +#include "modarith.hpp" +#include "asn.hpp" + + + +#ifdef __DECCXX + #include // for asm overflow assembly +#endif + +#if defined(_M_X64) || defined(_M_IA64) + #include +#pragma intrinsic(_umul128) +#endif + + +#ifdef __GNUC__ + #include + #include +#endif + + +#ifdef SSE2_INTRINSICS_AVAILABLE + #ifdef __GNUC__ + #include + #ifdef TAOCRYPT_MEMALIGN_AVAILABLE + #include + #else + #include + #endif + #else + #include + #endif +#elif defined(_MSC_VER) && defined(_M_IX86) +/* #pragma message("You do not seem to have the Visual C++ Processor Pack ") + #pragma message("installed, so use of SSE2 intrinsics will be disabled.") +*/ + #pragma message("installed, so use of SSE2 intrinsics will be disabled.") +#elif defined(__GNUC__) && defined(__i386__) +/* #warning You do not have GCC 3.3 or later, or did not specify the -msse2 \ + compiler option. Use of SSE2 intrinsics will be disabled. +*/ +#endif + + +namespace TaoCrypt { + + +#ifdef SSE2_INTRINSICS_AVAILABLE + +template +CPP_TYPENAME AlignedAllocator::pointer AlignedAllocator::allocate( + size_type n, const void *) +{ + if (n > this->max_size()) + return 0; + if (n == 0) + return 0; + if (n >= 4) + { + void* p; + #ifdef TAOCRYPT_MM_MALLOC_AVAILABLE + p = _mm_malloc(sizeof(T)*n, 16); + #elif defined(TAOCRYPT_MEMALIGN_AVAILABLE) + p = memalign(16, sizeof(T)*n); + #elif defined(TAOCRYPT_MALLOC_ALIGNMENT_IS_16) + p = malloc(sizeof(T)*n); + #else + p = (byte *)malloc(sizeof(T)*n + 8); + // assume malloc alignment is at least 8 + #endif + + #ifdef TAOCRYPT_NO_ALIGNED_ALLOC + m_pBlock = p; + if (!IsAlignedOn(p, 16)) + { + p = (byte *)p + 8; + } + #endif + + return (T*)p; + } + return NEW_TC T[n]; +} + + +template +void AlignedAllocator::deallocate(void* p, size_type n) +{ + memset(p, 0, n*sizeof(T)); + if (n >= 4) + { + #ifdef TAOCRYPT_MM_MALLOC_AVAILABLE + _mm_free(p); + #elif defined(TAOCRYPT_NO_ALIGNED_ALLOC) + free(m_pBlock); + m_pBlock = 0; + #else + free(p); + #endif + } + else + tcArrayDelete((T *)p); +} + +#endif // SSE2 + + +// ******** start of integer needs + +// start 5.2.1 adds DWord and Word ******** + +// ******************************************************** + +class DWord { +public: +DWord() {} + +#ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE + explicit DWord(word low) + { + whole_ = low; + } +#else + explicit DWord(word low) + { + halfs_.low = low; + halfs_.high = 0; + } +#endif + + DWord(word low, word high) + { + halfs_.low = low; + halfs_.high = high; + } + + static DWord Multiply(word a, word b) + { + DWord r; + + #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE + r.whole_ = (dword)a * b; + + #elif defined(_M_X64) || defined(_M_IA64) + r.halfs_.low = _umul128(a, b, &r.halfs_.high); + + #elif defined(__ia64__) + r.halfs_.low = a*b; + __asm__("xmpy.hu %0=%1,%2" : "=f" (r.halfs_.high) + : "f" (a), "f" (b)); + + #elif defined(_ARCH_PPC64) + r.halfs_.low = a*b; + __asm__("mulhdu %0,%1,%2" : "=r" (r.halfs_.high) + : "r" (a), "r" (b) : "cc"); + + #elif defined(__x86_64__) + __asm__("mulq %3" : "=d" (r.halfs_.high), "=a" (r.halfs_.low) : + "a" (a), "rm" (b) : "cc"); + + #elif defined(__mips64) + __asm__("dmultu %2,%3" : "=h" (r.halfs_.high), "=l" (r.halfs_.low) + : "r" (a), "r" (b)); + + #elif defined(_M_IX86) + // for testing + word64 t = (word64)a * b; + r.halfs_.high = ((word32 *)(&t))[1]; + r.halfs_.low = (word32)t; + #else + #error can not implement DWord + #endif + + return r; + } + + static DWord MultiplyAndAdd(word a, word b, word c) + { + DWord r = Multiply(a, b); + return r += c; + } + + DWord & operator+=(word a) + { + #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE + whole_ = whole_ + a; + #else + halfs_.low += a; + halfs_.high += (halfs_.low < a); + #endif + return *this; + } + + DWord operator+(word a) + { + DWord r; + #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE + r.whole_ = whole_ + a; + #else + r.halfs_.low = halfs_.low + a; + r.halfs_.high = halfs_.high + (r.halfs_.low < a); + #endif + return r; + } + + DWord operator-(DWord a) + { + DWord r; + #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE + r.whole_ = whole_ - a.whole_; + #else + r.halfs_.low = halfs_.low - a.halfs_.low; + r.halfs_.high = halfs_.high - a.halfs_.high - + (r.halfs_.low > halfs_.low); + #endif + return r; + } + + DWord operator-(word a) + { + DWord r; + #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE + r.whole_ = whole_ - a; + #else + r.halfs_.low = halfs_.low - a; + r.halfs_.high = halfs_.high - (r.halfs_.low > halfs_.low); + #endif + return r; + } + + // returns quotient, which must fit in a word + word operator/(word divisor); + + word operator%(word a); + + bool operator!() const + { + #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE + return !whole_; + #else + return !halfs_.high && !halfs_.low; + #endif + } + + word GetLowHalf() const {return halfs_.low;} + word GetHighHalf() const {return halfs_.high;} + word GetHighHalfAsBorrow() const {return 0-halfs_.high;} + +private: + struct dword_struct + { + #ifdef LITTLE_ENDIAN_ORDER + word low; + word high; + #else + word high; + word low; + #endif + }; + + union + { + #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE + dword whole_; + #endif + struct dword_struct halfs_; + }; +}; + + +class Word { +public: + Word() {} + + Word(word value) + { + whole_ = value; + } + + Word(hword low, hword high) + { + whole_ = low | (word(high) << (WORD_BITS/2)); + } + + static Word Multiply(hword a, hword b) + { + Word r; + r.whole_ = (word)a * b; + return r; + } + + Word operator-(Word a) + { + Word r; + r.whole_ = whole_ - a.whole_; + return r; + } + + Word operator-(hword a) + { + Word r; + r.whole_ = whole_ - a; + return r; + } + + // returns quotient, which must fit in a word + hword operator/(hword divisor) + { + return hword(whole_ / divisor); + } + + bool operator!() const + { + return !whole_; + } + + word GetWhole() const {return whole_;} + hword GetLowHalf() const {return hword(whole_);} + hword GetHighHalf() const {return hword(whole_>>(WORD_BITS/2));} + hword GetHighHalfAsBorrow() const {return 0-hword(whole_>>(WORD_BITS/2));} + +private: + word whole_; +}; + + +// dummy is VC60 compiler bug workaround +// do a 3 word by 2 word divide, returns quotient and leaves remainder in A +template +S DivideThreeWordsByTwo(S* A, S B0, S B1, D* dummy_VC6_WorkAround = 0) +{ + // estimate the quotient: do a 2 S by 1 S divide + S Q; + if (S(B1+1) == 0) + Q = A[2]; + else + Q = D(A[1], A[2]) / S(B1+1); + + // now subtract Q*B from A + D p = D::Multiply(B0, Q); + D u = (D) A[0] - p.GetLowHalf(); + A[0] = u.GetLowHalf(); + u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - + D::Multiply(B1, Q); + A[1] = u.GetLowHalf(); + A[2] += u.GetHighHalf(); + + // Q <= actual quotient, so fix it + while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) + { + u = (D) A[0] - B0; + A[0] = u.GetLowHalf(); + u = (D) A[1] - B1 - u.GetHighHalfAsBorrow(); + A[1] = u.GetLowHalf(); + A[2] += u.GetHighHalf(); + Q++; + } + + return Q; +} + + +// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1 +template +inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B) +{ + if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS) + return D(Ah.GetLowHalf(), Ah.GetHighHalf()); + else + { + S Q[2]; + T[0] = Al.GetLowHalf(); + T[1] = Al.GetHighHalf(); + T[2] = Ah.GetLowHalf(); + T[3] = Ah.GetHighHalf(); + Q[1] = DivideThreeWordsByTwo(T+1, B.GetLowHalf(), + B.GetHighHalf()); + Q[0] = DivideThreeWordsByTwo(T, B.GetLowHalf(), B.GetHighHalf()); + return D(Q[0], Q[1]); + } +} + + +// returns quotient, which must fit in a word +inline word DWord::operator/(word a) +{ + #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE + return word(whole_ / a); + #else + hword r[4]; + return DivideFourWordsByTwo(r, halfs_.low, + halfs_.high, a).GetWhole(); + #endif +} + +inline word DWord::operator%(word a) +{ + #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE + return word(whole_ % a); + #else + if (a < (word(1) << (WORD_BITS/2))) + { + hword h = hword(a); + word r = halfs_.high % h; + r = ((halfs_.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h; + return hword((hword(halfs_.low) + (r << (WORD_BITS/2))) % h); + } + else + { + hword r[4]; + DivideFourWordsByTwo(r, halfs_.low, halfs_.high, a); + return Word(r[0], r[1]).GetWhole(); + } + #endif +} + + + +// end 5.2.1 DWord and Word adds + + + + + +static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8}; + +static inline unsigned int RoundupSize(unsigned int n) +{ + if (n<=8) + return RoundupSizeTable[n]; + else if (n<=16) + return 16; + else if (n<=32) + return 32; + else if (n<=64) + return 64; + else return 1U << BitPrecision(n-1); +} + + +static int Compare(const word *A, const word *B, unsigned int N) +{ + while (N--) + if (A[N] > B[N]) + return 1; + else if (A[N] < B[N]) + return -1; + + return 0; +} + +static word Increment(word *A, unsigned int N, word B=1) +{ + word t = A[0]; + A[0] = t+B; + if (A[0] >= t) + return 0; + for (unsigned i=1; i= A0) + if (B0 >= B1) + { + s = 0; + d = (dword)(A1-A0)*(B0-B1); + } + else + { + s = (A1-A0); + d = (dword)s*(word)(B0-B1); + } + else + if (B0 > B1) + { + s = (B0-B1); + d = (word)(A1-A0)*(dword)s; + } + else + { + s = 0; + d = (dword)(A0-A1)*(B1-B0); + } +*/ + // this segment is the branchless equivalent of above + word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]}; + unsigned int ai = A[1] < A[0]; + unsigned int bi = B[0] < B[1]; + unsigned int di = ai & bi; + DWord d = DWord::Multiply(D[di], D[di+2]); + D[1] = D[3] = 0; + unsigned int si = ai + !bi; + word s = D[si]; + + DWord A0B0 = DWord::Multiply(A[0], B[0]); + C[0] = A0B0.GetLowHalf(); + + DWord A1B1 = DWord::Multiply(A[1], B[1]); + DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + + A1B1.GetLowHalf(); + C[1] = t.GetLowHalf(); + + t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + + A1B1.GetHighHalf() - s; + C[2] = t.GetLowHalf(); + C[3] = t.GetHighHalf(); +} + +void Portable::Multiply2Bottom(word *C, const word *A, const word *B) +{ + DWord t = DWord::Multiply(A[0], B[0]); + C[0] = t.GetLowHalf(); + C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0]; +} + +word Portable::Multiply2Add(word *C, const word *A, const word *B) +{ + word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]}; + unsigned int ai = A[1] < A[0]; + unsigned int bi = B[0] < B[1]; + unsigned int di = ai & bi; + DWord d = DWord::Multiply(D[di], D[di+2]); + D[1] = D[3] = 0; + unsigned int si = ai + !bi; + word s = D[si]; + + DWord A0B0 = DWord::Multiply(A[0], B[0]); + DWord t = A0B0 + C[0]; + C[0] = t.GetLowHalf(); + + DWord A1B1 = DWord::Multiply(A[1], B[1]); + t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + + A1B1.GetLowHalf() + C[1]; + C[1] = t.GetLowHalf(); + + t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2]; + C[2] = t.GetLowHalf(); + + t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3]; + C[3] = t.GetLowHalf(); + return t.GetHighHalf(); +} + + +#define MulAcc(x, y) \ + p = DWord::MultiplyAndAdd(A[x], B[y], c); \ + c = p.GetLowHalf(); \ + p = (DWord) d + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e += p.GetHighHalf(); + +#define SaveMulAcc(s, x, y) \ + R[s] = c; \ + p = DWord::MultiplyAndAdd(A[x], B[y], d); \ + c = p.GetLowHalf(); \ + p = (DWord) e + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e = p.GetHighHalf(); + +#define SquAcc(x, y) \ + q = DWord::Multiply(A[x], A[y]); \ + p = q + c; \ + c = p.GetLowHalf(); \ + p = (DWord) d + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e += p.GetHighHalf(); \ + p = q + c; \ + c = p.GetLowHalf(); \ + p = (DWord) d + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e += p.GetHighHalf(); + +#define SaveSquAcc(s, x, y) \ + R[s] = c; \ + q = DWord::Multiply(A[x], A[y]); \ + p = q + d; \ + c = p.GetLowHalf(); \ + p = (DWord) e + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e = p.GetHighHalf(); \ + p = q + c; \ + c = p.GetLowHalf(); \ + p = (DWord) d + p.GetHighHalf(); \ + d = p.GetLowHalf(); \ + e += p.GetHighHalf(); + + +void Portable::Multiply4(word *R, const word *A, const word *B) +{ + DWord p; + word c, d, e; + + p = DWord::Multiply(A[0], B[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); + d = e = 0; + + MulAcc(0, 1); + MulAcc(1, 0); + + SaveMulAcc(1, 2, 0); + MulAcc(1, 1); + MulAcc(0, 2); + + SaveMulAcc(2, 0, 3); + MulAcc(1, 2); + MulAcc(2, 1); + MulAcc(3, 0); + + SaveMulAcc(3, 3, 1); + MulAcc(2, 2); + MulAcc(1, 3); + + SaveMulAcc(4, 2, 3); + MulAcc(3, 2); + + R[5] = c; + p = DWord::MultiplyAndAdd(A[3], B[3], d); + R[6] = p.GetLowHalf(); + R[7] = e + p.GetHighHalf(); +} + +void Portable::Square2(word *R, const word *A) +{ + DWord p, q; + word c, d, e; + + p = DWord::Multiply(A[0], A[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); + d = e = 0; + + SquAcc(0, 1); + + R[1] = c; + p = DWord::MultiplyAndAdd(A[1], A[1], d); + R[2] = p.GetLowHalf(); + R[3] = e + p.GetHighHalf(); +} + +void Portable::Square4(word *R, const word *A) +{ +#ifdef _MSC_VER + // VC60 workaround: MSVC 6.0 has an optimization bug that makes + // (dword)A*B where either A or B has been cast to a dword before + // very expensive. Revisit this function when this + // bug is fixed. + Multiply4(R, A, A); +#else + const word *B = A; + DWord p, q; + word c, d, e; + + p = DWord::Multiply(A[0], A[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); + d = e = 0; + + SquAcc(0, 1); + + SaveSquAcc(1, 2, 0); + MulAcc(1, 1); + + SaveSquAcc(2, 0, 3); + SquAcc(1, 2); + + SaveSquAcc(3, 3, 1); + MulAcc(2, 2); + + SaveSquAcc(4, 2, 3); + + R[5] = c; + p = DWord::MultiplyAndAdd(A[3], A[3], d); + R[6] = p.GetLowHalf(); + R[7] = e + p.GetHighHalf(); +#endif +} + +void Portable::Multiply8(word *R, const word *A, const word *B) +{ + DWord p; + word c, d, e; + + p = DWord::Multiply(A[0], B[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); + d = e = 0; + + MulAcc(0, 1); + MulAcc(1, 0); + + SaveMulAcc(1, 2, 0); + MulAcc(1, 1); + MulAcc(0, 2); + + SaveMulAcc(2, 0, 3); + MulAcc(1, 2); + MulAcc(2, 1); + MulAcc(3, 0); + + SaveMulAcc(3, 0, 4); + MulAcc(1, 3); + MulAcc(2, 2); + MulAcc(3, 1); + MulAcc(4, 0); + + SaveMulAcc(4, 0, 5); + MulAcc(1, 4); + MulAcc(2, 3); + MulAcc(3, 2); + MulAcc(4, 1); + MulAcc(5, 0); + + SaveMulAcc(5, 0, 6); + MulAcc(1, 5); + MulAcc(2, 4); + MulAcc(3, 3); + MulAcc(4, 2); + MulAcc(5, 1); + MulAcc(6, 0); + + SaveMulAcc(6, 0, 7); + MulAcc(1, 6); + MulAcc(2, 5); + MulAcc(3, 4); + MulAcc(4, 3); + MulAcc(5, 2); + MulAcc(6, 1); + MulAcc(7, 0); + + SaveMulAcc(7, 1, 7); + MulAcc(2, 6); + MulAcc(3, 5); + MulAcc(4, 4); + MulAcc(5, 3); + MulAcc(6, 2); + MulAcc(7, 1); + + SaveMulAcc(8, 2, 7); + MulAcc(3, 6); + MulAcc(4, 5); + MulAcc(5, 4); + MulAcc(6, 3); + MulAcc(7, 2); + + SaveMulAcc(9, 3, 7); + MulAcc(4, 6); + MulAcc(5, 5); + MulAcc(6, 4); + MulAcc(7, 3); + + SaveMulAcc(10, 4, 7); + MulAcc(5, 6); + MulAcc(6, 5); + MulAcc(7, 4); + + SaveMulAcc(11, 5, 7); + MulAcc(6, 6); + MulAcc(7, 5); + + SaveMulAcc(12, 6, 7); + MulAcc(7, 6); + + R[13] = c; + p = DWord::MultiplyAndAdd(A[7], B[7], d); + R[14] = p.GetLowHalf(); + R[15] = e + p.GetHighHalf(); +} + +void Portable::Multiply4Bottom(word *R, const word *A, const word *B) +{ + DWord p; + word c, d, e; + + p = DWord::Multiply(A[0], B[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); + d = e = 0; + + MulAcc(0, 1); + MulAcc(1, 0); + + SaveMulAcc(1, 2, 0); + MulAcc(1, 1); + MulAcc(0, 2); + + R[2] = c; + R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0]; +} + +void Portable::Multiply8Bottom(word *R, const word *A, const word *B) +{ + DWord p; + word c, d, e; + + p = DWord::Multiply(A[0], B[0]); + R[0] = p.GetLowHalf(); + c = p.GetHighHalf(); + d = e = 0; + + MulAcc(0, 1); + MulAcc(1, 0); + + SaveMulAcc(1, 2, 0); + MulAcc(1, 1); + MulAcc(0, 2); + + SaveMulAcc(2, 0, 3); + MulAcc(1, 2); + MulAcc(2, 1); + MulAcc(3, 0); + + SaveMulAcc(3, 0, 4); + MulAcc(1, 3); + MulAcc(2, 2); + MulAcc(3, 1); + MulAcc(4, 0); + + SaveMulAcc(4, 0, 5); + MulAcc(1, 4); + MulAcc(2, 3); + MulAcc(3, 2); + MulAcc(4, 1); + MulAcc(5, 0); + + SaveMulAcc(5, 0, 6); + MulAcc(1, 5); + MulAcc(2, 4); + MulAcc(3, 3); + MulAcc(4, 2); + MulAcc(5, 1); + MulAcc(6, 0); + + R[6] = c; + R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] + + A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0]; +} + + +#undef MulAcc +#undef SaveMulAcc +#undef SquAcc +#undef SaveSquAcc + +// optimized + +#ifdef TAOCRYPT_X86ASM_AVAILABLE + +// ************** x86 feature detection *************** + + +#ifdef SSE2_INTRINSICS_AVAILABLE + +#ifndef _MSC_VER + static jmp_buf s_env; + static void SigIllHandler(int) + { + longjmp(s_env, 1); + } +#endif + +static bool HasSSE2() +{ + if (!IsPentium()) + return false; + + word32 cpuid[4]; + CpuId(1, cpuid); + if ((cpuid[3] & (1 << 26)) == 0) + return false; + +#ifdef _MSC_VER + __try + { + __asm xorpd xmm0, xmm0 // executing SSE2 instruction + } + __except (1) + { + return false; + } + return true; +#else + typedef void (*SigHandler)(int); + + SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + bool result = true; + if (setjmp(s_env)) + result = false; + else + __asm __volatile ("xorpd %xmm0, %xmm0"); + + signal(SIGILL, oldHandler); + return result; +#endif +} +#endif // SSE2_INTRINSICS_AVAILABLE + + +static bool IsP4() +{ + if (!IsPentium()) + return false; + + word32 cpuid[4]; + + CpuId(1, cpuid); + return ((cpuid[0] >> 8) & 0xf) == 0xf; +} + +// ************** Pentium/P4 optimizations *************** + +class PentiumOptimized : public Portable +{ +public: + static word TAOCRYPT_CDECL Add(word *C, const word *A, const word *B, + unsigned int N); + static word TAOCRYPT_CDECL Subtract(word *C, const word *A, const word *B, + unsigned int N); + static void TAOCRYPT_CDECL Multiply4(word *C, const word *A, + const word *B); + static void TAOCRYPT_CDECL Multiply8(word *C, const word *A, + const word *B); + static void TAOCRYPT_CDECL Multiply8Bottom(word *C, const word *A, + const word *B); +}; + +class P4Optimized +{ +public: + static word TAOCRYPT_CDECL Add(word *C, const word *A, const word *B, + unsigned int N); + static word TAOCRYPT_CDECL Subtract(word *C, const word *A, const word *B, + unsigned int N); +#ifdef SSE2_INTRINSICS_AVAILABLE + static void TAOCRYPT_CDECL Multiply4(word *C, const word *A, + const word *B); + static void TAOCRYPT_CDECL Multiply8(word *C, const word *A, + const word *B); + static void TAOCRYPT_CDECL Multiply8Bottom(word *C, const word *A, + const word *B); +#endif +}; + +typedef word (TAOCRYPT_CDECL * PAddSub)(word *C, const word *A, const word *B, + unsigned int N); +typedef void (TAOCRYPT_CDECL * PMul)(word *C, const word *A, const word *B); + +static PAddSub s_pAdd, s_pSub; +#ifdef SSE2_INTRINSICS_AVAILABLE +static PMul s_pMul4, s_pMul8, s_pMul8B; +#endif + +static void SetPentiumFunctionPointers() +{ + if (!IsPentium()) + { + s_pAdd = &Portable::Add; + s_pSub = &Portable::Subtract; + } + else if (IsP4()) + { + s_pAdd = &P4Optimized::Add; + s_pSub = &P4Optimized::Subtract; + } + else + { + s_pAdd = &PentiumOptimized::Add; + s_pSub = &PentiumOptimized::Subtract; + } + +#ifdef SSE2_INTRINSICS_AVAILABLE + if (!IsPentium()) + { + s_pMul4 = &Portable::Multiply4; + s_pMul8 = &Portable::Multiply8; + s_pMul8B = &Portable::Multiply8Bottom; + } + else if (HasSSE2()) + { + s_pMul4 = &P4Optimized::Multiply4; + s_pMul8 = &P4Optimized::Multiply8; + s_pMul8B = &P4Optimized::Multiply8Bottom; + } + else + { + s_pMul4 = &PentiumOptimized::Multiply4; + s_pMul8 = &PentiumOptimized::Multiply8; + s_pMul8B = &PentiumOptimized::Multiply8Bottom; + } +#endif +} + +static const char s_RunAtStartupSetPentiumFunctionPointers = + (SetPentiumFunctionPointers(), 0); + + +class LowLevel : public PentiumOptimized +{ +public: + inline static word Add(word *C, const word *A, const word *B, + unsigned int N) + {return s_pAdd(C, A, B, N);} + inline static word Subtract(word *C, const word *A, const word *B, + unsigned int N) + {return s_pSub(C, A, B, N);} + inline static void Square4(word *R, const word *A) + {Multiply4(R, A, A);} +#ifdef SSE2_INTRINSICS_AVAILABLE + inline static void Multiply4(word *C, const word *A, const word *B) + {s_pMul4(C, A, B);} + inline static void Multiply8(word *C, const word *A, const word *B) + {s_pMul8(C, A, B);} + inline static void Multiply8Bottom(word *C, const word *A, const word *B) + {s_pMul8B(C, A, B);} +#endif +}; + +// use some tricks to share assembly code between MSVC and GCC +#ifdef _MSC_VER + #define TAOCRYPT_NAKED __declspec(naked) + #define AS1(x) __asm x + #define AS2(x, y) __asm x, y + #define AddPrologue \ + __asm push ebp \ + __asm push ebx \ + __asm push esi \ + __asm push edi \ + __asm mov ecx, [esp+20] \ + __asm mov edx, [esp+24] \ + __asm mov ebx, [esp+28] \ + __asm mov esi, [esp+32] + #define AddEpilogue \ + __asm pop edi \ + __asm pop esi \ + __asm pop ebx \ + __asm pop ebp \ + __asm ret + #define MulPrologue \ + __asm push ebp \ + __asm push ebx \ + __asm push esi \ + __asm push edi \ + __asm mov ecx, [esp+28] \ + __asm mov esi, [esp+24] \ + __asm push [esp+20] + #define MulEpilogue \ + __asm add esp, 4 \ + __asm pop edi \ + __asm pop esi \ + __asm pop ebx \ + __asm pop ebp \ + __asm ret +#else + #define TAOCRYPT_NAKED + #define AS1(x) #x ";" + #define AS2(x, y) #x ", " #y ";" + #define AddPrologue \ + word res; \ + __asm__ __volatile__ \ + ( \ + "push %%ebx;" /* save this manually, in case of -fPIC */ \ + "mov %3, %%ebx;" \ + ".intel_syntax noprefix;" \ + "push ebp;" + #define AddEpilogue \ + "pop ebp;" \ + ".att_syntax prefix;" \ + "pop %%ebx;" \ + "mov %%eax, %0;" \ + : "=g" (res) \ + : "c" (C), "d" (A), "m" (B), "S" (N) \ + : "%edi", "memory", "cc" \ + ); \ + return res; + + #define MulPrologue \ + __asm__ __volatile__ \ + ( \ + "push %%ebx;" /* save this manually, in case of -fPIC */ \ + "push %%ebp;" \ + "push %0;" \ + ".intel_syntax noprefix;" + #define MulEpilogue \ + "add esp, 4;" \ + "pop ebp;" \ + "pop ebx;" \ + ".att_syntax prefix;" \ + : \ + : "rm" (Z), "S" (X), "c" (Y) \ + : "%eax", "%edx", "%edi", "memory", "cc" \ + ); +#endif + +TAOCRYPT_NAKED word PentiumOptimized::Add(word *C, const word *A, + const word *B, unsigned int N) +{ + AddPrologue + + // now: ebx = B, ecx = C, edx = A, esi = N + AS2( sub ecx, edx) // hold the distance between C & A so we + // can add this to A to get C + AS2( xor eax, eax) // clear eax + + AS2( sub eax, esi) // eax is a negative index from end of B + AS2( lea ebx, [ebx+4*esi]) // ebx is end of B + + AS2( sar eax, 1) // unit of eax is now dwords; this also + // clears the carry flag + AS1( jz loopendAdd) // if no dwords then nothing to do + + AS1(loopstartAdd:) + AS2( mov esi,[edx]) // load lower word of A + AS2( mov ebp,[edx+4]) // load higher word of A + + AS2( mov edi,[ebx+8*eax]) // load lower word of B + AS2( lea edx,[edx+8]) // advance A and C + + AS2( adc esi,edi) // add lower words + AS2( mov edi,[ebx+8*eax+4]) // load higher word of B + + AS2( adc ebp,edi) // add higher words + AS1( inc eax) // advance B + + AS2( mov [edx+ecx-8],esi) // store lower word result + AS2( mov [edx+ecx-4],ebp) // store higher word result + + AS1( jnz loopstartAdd) // loop until eax overflows and becomes zero + + AS1(loopendAdd:) + AS2( adc eax, 0) // store carry into eax (return result register) + + AddEpilogue +} + +TAOCRYPT_NAKED word PentiumOptimized::Subtract(word *C, const word *A, + const word *B, unsigned int N) +{ + AddPrologue + + // now: ebx = B, ecx = C, edx = A, esi = N + AS2( sub ecx, edx) // hold the distance between C & A so we + // can add this to A to get C + AS2( xor eax, eax) // clear eax + + AS2( sub eax, esi) // eax is a negative index from end of B + AS2( lea ebx, [ebx+4*esi]) // ebx is end of B + + AS2( sar eax, 1) // unit of eax is now dwords; this also + // clears the carry flag + AS1( jz loopendSub) // if no dwords then nothing to do + + AS1(loopstartSub:) + AS2( mov esi,[edx]) // load lower word of A + AS2( mov ebp,[edx+4]) // load higher word of A + + AS2( mov edi,[ebx+8*eax]) // load lower word of B + AS2( lea edx,[edx+8]) // advance A and C + + AS2( sbb esi,edi) // subtract lower words + AS2( mov edi,[ebx+8*eax+4]) // load higher word of B + + AS2( sbb ebp,edi) // subtract higher words + AS1( inc eax) // advance B + + AS2( mov [edx+ecx-8],esi) // store lower word result + AS2( mov [edx+ecx-4],ebp) // store higher word result + + AS1( jnz loopstartSub) // loop until eax overflows and becomes zero + + AS1(loopendSub:) + AS2( adc eax, 0) // store carry into eax (return result register) + + AddEpilogue +} + +// On Pentium 4, the adc and sbb instructions are very expensive, so avoid them. + +TAOCRYPT_NAKED word P4Optimized::Add(word *C, const word *A, const word *B, + unsigned int N) +{ + AddPrologue + + // now: ebx = B, ecx = C, edx = A, esi = N + AS2( xor eax, eax) + AS1( neg esi) + AS1( jz loopendAddP4) // if no dwords then nothing to do + + AS2( mov edi, [edx]) + AS2( mov ebp, [ebx]) + AS1( jmp carry1AddP4) + + AS1(loopstartAddP4:) + AS2( mov edi, [edx+8]) + AS2( add ecx, 8) + AS2( add edx, 8) + AS2( mov ebp, [ebx]) + AS2( add edi, eax) + AS1( jc carry1AddP4) + AS2( xor eax, eax) + + AS1(carry1AddP4:) + AS2( add edi, ebp) + AS2( mov ebp, 1) + AS2( mov [ecx], edi) + AS2( mov edi, [edx+4]) + AS2( cmovc eax, ebp) + AS2( mov ebp, [ebx+4]) + AS2( add ebx, 8) + AS2( add edi, eax) + AS1( jc carry2AddP4) + AS2( xor eax, eax) + + AS1(carry2AddP4:) + AS2( add edi, ebp) + AS2( mov ebp, 1) + AS2( cmovc eax, ebp) + AS2( mov [ecx+4], edi) + AS2( add esi, 2) + AS1( jnz loopstartAddP4) + + AS1(loopendAddP4:) + + AddEpilogue +} + +TAOCRYPT_NAKED word P4Optimized::Subtract(word *C, const word *A, + const word *B, unsigned int N) +{ + AddPrologue + + // now: ebx = B, ecx = C, edx = A, esi = N + AS2( xor eax, eax) + AS1( neg esi) + AS1( jz loopendSubP4) // if no dwords then nothing to do + + AS2( mov edi, [edx]) + AS2( mov ebp, [ebx]) + AS1( jmp carry1SubP4) + + AS1(loopstartSubP4:) + AS2( mov edi, [edx+8]) + AS2( add edx, 8) + AS2( add ecx, 8) + AS2( mov ebp, [ebx]) + AS2( sub edi, eax) + AS1( jc carry1SubP4) + AS2( xor eax, eax) + + AS1(carry1SubP4:) + AS2( sub edi, ebp) + AS2( mov ebp, 1) + AS2( mov [ecx], edi) + AS2( mov edi, [edx+4]) + AS2( cmovc eax, ebp) + AS2( mov ebp, [ebx+4]) + AS2( add ebx, 8) + AS2( sub edi, eax) + AS1( jc carry2SubP4) + AS2( xor eax, eax) + + AS1(carry2SubP4:) + AS2( sub edi, ebp) + AS2( mov ebp, 1) + AS2( cmovc eax, ebp) + AS2( mov [ecx+4], edi) + AS2( add esi, 2) + AS1( jnz loopstartSubP4) + + AS1(loopendSubP4:) + + AddEpilogue +} + +// multiply assembly code originally contributed by Leonard Janke + +#define MulStartup \ + AS2(xor ebp, ebp) \ + AS2(xor edi, edi) \ + AS2(xor ebx, ebx) + +#define MulShiftCarry \ + AS2(mov ebp, edx) \ + AS2(mov edi, ebx) \ + AS2(xor ebx, ebx) + +#define MulAccumulateBottom(i,j) \ + AS2(mov eax, [ecx+4*j]) \ + AS2(imul eax, dword ptr [esi+4*i]) \ + AS2(add ebp, eax) + +#define MulAccumulate(i,j) \ + AS2(mov eax, [ecx+4*j]) \ + AS1(mul dword ptr [esi+4*i]) \ + AS2(add ebp, eax) \ + AS2(adc edi, edx) \ + AS2(adc bl, bh) + +#define MulStoreDigit(i) \ + AS2(mov edx, edi) \ + AS2(mov edi, [esp]) \ + AS2(mov [edi+4*i], ebp) + +#define MulLastDiagonal(digits) \ + AS2(mov eax, [ecx+4*(digits-1)]) \ + AS1(mul dword ptr [esi+4*(digits-1)]) \ + AS2(add ebp, eax) \ + AS2(adc edx, edi) \ + AS2(mov edi, [esp]) \ + AS2(mov [edi+4*(2*digits-2)], ebp) \ + AS2(mov [edi+4*(2*digits-1)], edx) + +TAOCRYPT_NAKED void PentiumOptimized::Multiply4(word* Z, const word* X, + const word* Y) +{ + MulPrologue + // now: [esp] = Z, esi = X, ecx = Y + MulStartup + MulAccumulate(0,0) + MulStoreDigit(0) + MulShiftCarry + + MulAccumulate(1,0) + MulAccumulate(0,1) + MulStoreDigit(1) + MulShiftCarry + + MulAccumulate(2,0) + MulAccumulate(1,1) + MulAccumulate(0,2) + MulStoreDigit(2) + MulShiftCarry + + MulAccumulate(3,0) + MulAccumulate(2,1) + MulAccumulate(1,2) + MulAccumulate(0,3) + MulStoreDigit(3) + MulShiftCarry + + MulAccumulate(3,1) + MulAccumulate(2,2) + MulAccumulate(1,3) + MulStoreDigit(4) + MulShiftCarry + + MulAccumulate(3,2) + MulAccumulate(2,3) + MulStoreDigit(5) + MulShiftCarry + + MulLastDiagonal(4) + MulEpilogue +} + +TAOCRYPT_NAKED void PentiumOptimized::Multiply8(word* Z, const word* X, + const word* Y) +{ + MulPrologue + // now: [esp] = Z, esi = X, ecx = Y + MulStartup + MulAccumulate(0,0) + MulStoreDigit(0) + MulShiftCarry + + MulAccumulate(1,0) + MulAccumulate(0,1) + MulStoreDigit(1) + MulShiftCarry + + MulAccumulate(2,0) + MulAccumulate(1,1) + MulAccumulate(0,2) + MulStoreDigit(2) + MulShiftCarry + + MulAccumulate(3,0) + MulAccumulate(2,1) + MulAccumulate(1,2) + MulAccumulate(0,3) + MulStoreDigit(3) + MulShiftCarry + + MulAccumulate(4,0) + MulAccumulate(3,1) + MulAccumulate(2,2) + MulAccumulate(1,3) + MulAccumulate(0,4) + MulStoreDigit(4) + MulShiftCarry + + MulAccumulate(5,0) + MulAccumulate(4,1) + MulAccumulate(3,2) + MulAccumulate(2,3) + MulAccumulate(1,4) + MulAccumulate(0,5) + MulStoreDigit(5) + MulShiftCarry + + MulAccumulate(6,0) + MulAccumulate(5,1) + MulAccumulate(4,2) + MulAccumulate(3,3) + MulAccumulate(2,4) + MulAccumulate(1,5) + MulAccumulate(0,6) + MulStoreDigit(6) + MulShiftCarry + + MulAccumulate(7,0) + MulAccumulate(6,1) + MulAccumulate(5,2) + MulAccumulate(4,3) + MulAccumulate(3,4) + MulAccumulate(2,5) + MulAccumulate(1,6) + MulAccumulate(0,7) + MulStoreDigit(7) + MulShiftCarry + + MulAccumulate(7,1) + MulAccumulate(6,2) + MulAccumulate(5,3) + MulAccumulate(4,4) + MulAccumulate(3,5) + MulAccumulate(2,6) + MulAccumulate(1,7) + MulStoreDigit(8) + MulShiftCarry + + MulAccumulate(7,2) + MulAccumulate(6,3) + MulAccumulate(5,4) + MulAccumulate(4,5) + MulAccumulate(3,6) + MulAccumulate(2,7) + MulStoreDigit(9) + MulShiftCarry + + MulAccumulate(7,3) + MulAccumulate(6,4) + MulAccumulate(5,5) + MulAccumulate(4,6) + MulAccumulate(3,7) + MulStoreDigit(10) + MulShiftCarry + + MulAccumulate(7,4) + MulAccumulate(6,5) + MulAccumulate(5,6) + MulAccumulate(4,7) + MulStoreDigit(11) + MulShiftCarry + + MulAccumulate(7,5) + MulAccumulate(6,6) + MulAccumulate(5,7) + MulStoreDigit(12) + MulShiftCarry + + MulAccumulate(7,6) + MulAccumulate(6,7) + MulStoreDigit(13) + MulShiftCarry + + MulLastDiagonal(8) + MulEpilogue +} + +TAOCRYPT_NAKED void PentiumOptimized::Multiply8Bottom(word* Z, const word* X, + const word* Y) +{ + MulPrologue + // now: [esp] = Z, esi = X, ecx = Y + MulStartup + MulAccumulate(0,0) + MulStoreDigit(0) + MulShiftCarry + + MulAccumulate(1,0) + MulAccumulate(0,1) + MulStoreDigit(1) + MulShiftCarry + + MulAccumulate(2,0) + MulAccumulate(1,1) + MulAccumulate(0,2) + MulStoreDigit(2) + MulShiftCarry + + MulAccumulate(3,0) + MulAccumulate(2,1) + MulAccumulate(1,2) + MulAccumulate(0,3) + MulStoreDigit(3) + MulShiftCarry + + MulAccumulate(4,0) + MulAccumulate(3,1) + MulAccumulate(2,2) + MulAccumulate(1,3) + MulAccumulate(0,4) + MulStoreDigit(4) + MulShiftCarry + + MulAccumulate(5,0) + MulAccumulate(4,1) + MulAccumulate(3,2) + MulAccumulate(2,3) + MulAccumulate(1,4) + MulAccumulate(0,5) + MulStoreDigit(5) + MulShiftCarry + + MulAccumulate(6,0) + MulAccumulate(5,1) + MulAccumulate(4,2) + MulAccumulate(3,3) + MulAccumulate(2,4) + MulAccumulate(1,5) + MulAccumulate(0,6) + MulStoreDigit(6) + MulShiftCarry + + MulAccumulateBottom(7,0) + MulAccumulateBottom(6,1) + MulAccumulateBottom(5,2) + MulAccumulateBottom(4,3) + MulAccumulateBottom(3,4) + MulAccumulateBottom(2,5) + MulAccumulateBottom(1,6) + MulAccumulateBottom(0,7) + MulStoreDigit(7) + MulEpilogue +} + +#undef AS1 +#undef AS2 + +#else // not x86 - no processor specific code at this layer + +typedef Portable LowLevel; + +#endif + +#ifdef SSE2_INTRINSICS_AVAILABLE + +#ifdef __GNUC__ +#define TAOCRYPT_FASTCALL +#else +#define TAOCRYPT_FASTCALL __fastcall +#endif + +static void TAOCRYPT_FASTCALL P4_Mul(__m128i *C, const __m128i *A, + const __m128i *B) +{ + __m128i a3210 = _mm_load_si128(A); + __m128i b3210 = _mm_load_si128(B); + + __m128i sum; + + __m128i z = _mm_setzero_si128(); + __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210); + C[0] = a2b2_a0b0; + + __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0)); + __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1)); + __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021); + __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z); + __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z); + C[1] = _mm_add_epi64(a1b0, a0b1); + + __m128i a31 = _mm_srli_epi64(a3210, 32); + __m128i b31 = _mm_srli_epi64(b3210, 32); + __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31); + C[6] = a3b3_a1b1; + + __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z); + __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2)); + __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012); + __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z); + __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z); + sum = _mm_add_epi64(a1b1, a0b2); + C[2] = _mm_add_epi64(sum, a2b0); + + __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1)); + __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3)); + __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012); + __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103); + __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z); + __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z); + __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z); + __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z); + __m128i sum1 = _mm_add_epi64(a3b0, a1b2); + sum = _mm_add_epi64(a2b1, a0b3); + C[3] = _mm_add_epi64(sum, sum1); + + __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103); + __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z); + __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z); + __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z); + sum = _mm_add_epi64(a2b2, a3b1); + C[4] = _mm_add_epi64(sum, a1b3); + + __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2)); + __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3)); + __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203); + __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z); + __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z); + C[5] = _mm_add_epi64(a3b2, a2b3); +} + +void P4Optimized::Multiply4(word *C, const word *A, const word *B) +{ + __m128i temp[7]; + const word *w = (word *)temp; + const __m64 *mw = (__m64 *)w; + + P4_Mul(temp, (__m128i *)A, (__m128i *)B); + + C[0] = w[0]; + + __m64 s1, s2; + + __m64 w1 = _mm_cvtsi32_si64(w[1]); + __m64 w4 = mw[2]; + __m64 w6 = mw[3]; + __m64 w8 = mw[4]; + __m64 w10 = mw[5]; + __m64 w12 = mw[6]; + __m64 w14 = mw[7]; + __m64 w16 = mw[8]; + __m64 w18 = mw[9]; + __m64 w20 = mw[10]; + __m64 w22 = mw[11]; + __m64 w26 = _mm_cvtsi32_si64(w[26]); + + s1 = _mm_add_si64(w1, w4); + C[1] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s2 = _mm_add_si64(w6, w8); + s1 = _mm_add_si64(s1, s2); + C[2] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s2 = _mm_add_si64(w10, w12); + s1 = _mm_add_si64(s1, s2); + C[3] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s2 = _mm_add_si64(w14, w16); + s1 = _mm_add_si64(s1, s2); + C[4] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s2 = _mm_add_si64(w18, w20); + s1 = _mm_add_si64(s1, s2); + C[5] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s2 = _mm_add_si64(w22, w26); + s1 = _mm_add_si64(s1, s2); + C[6] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + C[7] = _mm_cvtsi64_si32(s1) + w[27]; + _mm_empty(); +} + +void P4Optimized::Multiply8(word *C, const word *A, const word *B) +{ + __m128i temp[28]; + const word *w = (word *)temp; + const __m64 *mw = (__m64 *)w; + const word *x = (word *)temp+7*4; + const __m64 *mx = (__m64 *)x; + const word *y = (word *)temp+7*4*2; + const __m64 *my = (__m64 *)y; + const word *z = (word *)temp+7*4*3; + const __m64 *mz = (__m64 *)z; + + P4_Mul(temp, (__m128i *)A, (__m128i *)B); + + P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B); + + P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1); + + P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1); + + C[0] = w[0]; + + __m64 s1, s2, s3, s4; + + __m64 w1 = _mm_cvtsi32_si64(w[1]); + __m64 w4 = mw[2]; + __m64 w6 = mw[3]; + __m64 w8 = mw[4]; + __m64 w10 = mw[5]; + __m64 w12 = mw[6]; + __m64 w14 = mw[7]; + __m64 w16 = mw[8]; + __m64 w18 = mw[9]; + __m64 w20 = mw[10]; + __m64 w22 = mw[11]; + __m64 w26 = _mm_cvtsi32_si64(w[26]); + __m64 w27 = _mm_cvtsi32_si64(w[27]); + + __m64 x0 = _mm_cvtsi32_si64(x[0]); + __m64 x1 = _mm_cvtsi32_si64(x[1]); + __m64 x4 = mx[2]; + __m64 x6 = mx[3]; + __m64 x8 = mx[4]; + __m64 x10 = mx[5]; + __m64 x12 = mx[6]; + __m64 x14 = mx[7]; + __m64 x16 = mx[8]; + __m64 x18 = mx[9]; + __m64 x20 = mx[10]; + __m64 x22 = mx[11]; + __m64 x26 = _mm_cvtsi32_si64(x[26]); + __m64 x27 = _mm_cvtsi32_si64(x[27]); + + __m64 y0 = _mm_cvtsi32_si64(y[0]); + __m64 y1 = _mm_cvtsi32_si64(y[1]); + __m64 y4 = my[2]; + __m64 y6 = my[3]; + __m64 y8 = my[4]; + __m64 y10 = my[5]; + __m64 y12 = my[6]; + __m64 y14 = my[7]; + __m64 y16 = my[8]; + __m64 y18 = my[9]; + __m64 y20 = my[10]; + __m64 y22 = my[11]; + __m64 y26 = _mm_cvtsi32_si64(y[26]); + __m64 y27 = _mm_cvtsi32_si64(y[27]); + + __m64 z0 = _mm_cvtsi32_si64(z[0]); + __m64 z1 = _mm_cvtsi32_si64(z[1]); + __m64 z4 = mz[2]; + __m64 z6 = mz[3]; + __m64 z8 = mz[4]; + __m64 z10 = mz[5]; + __m64 z12 = mz[6]; + __m64 z14 = mz[7]; + __m64 z16 = mz[8]; + __m64 z18 = mz[9]; + __m64 z20 = mz[10]; + __m64 z22 = mz[11]; + __m64 z26 = _mm_cvtsi32_si64(z[26]); + + s1 = _mm_add_si64(w1, w4); + C[1] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s2 = _mm_add_si64(w6, w8); + s1 = _mm_add_si64(s1, s2); + C[2] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s2 = _mm_add_si64(w10, w12); + s1 = _mm_add_si64(s1, s2); + C[3] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(x0, y0); + s2 = _mm_add_si64(w14, w16); + s1 = _mm_add_si64(s1, s3); + s1 = _mm_add_si64(s1, s2); + C[4] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(x1, y1); + s4 = _mm_add_si64(x4, y4); + s1 = _mm_add_si64(s1, w18); + s3 = _mm_add_si64(s3, s4); + s1 = _mm_add_si64(s1, w20); + s1 = _mm_add_si64(s1, s3); + C[5] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(x6, y6); + s4 = _mm_add_si64(x8, y8); + s1 = _mm_add_si64(s1, w22); + s3 = _mm_add_si64(s3, s4); + s1 = _mm_add_si64(s1, w26); + s1 = _mm_add_si64(s1, s3); + C[6] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(x10, y10); + s4 = _mm_add_si64(x12, y12); + s1 = _mm_add_si64(s1, w27); + s3 = _mm_add_si64(s3, s4); + s1 = _mm_add_si64(s1, s3); + C[7] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(x14, y14); + s4 = _mm_add_si64(x16, y16); + s1 = _mm_add_si64(s1, z0); + s3 = _mm_add_si64(s3, s4); + s1 = _mm_add_si64(s1, s3); + C[8] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(x18, y18); + s4 = _mm_add_si64(x20, y20); + s1 = _mm_add_si64(s1, z1); + s3 = _mm_add_si64(s3, s4); + s1 = _mm_add_si64(s1, z4); + s1 = _mm_add_si64(s1, s3); + C[9] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(x22, y22); + s4 = _mm_add_si64(x26, y26); + s1 = _mm_add_si64(s1, z6); + s3 = _mm_add_si64(s3, s4); + s1 = _mm_add_si64(s1, z8); + s1 = _mm_add_si64(s1, s3); + C[10] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(x27, y27); + s1 = _mm_add_si64(s1, z10); + s1 = _mm_add_si64(s1, z12); + s1 = _mm_add_si64(s1, s3); + C[11] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(z14, z16); + s1 = _mm_add_si64(s1, s3); + C[12] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(z18, z20); + s1 = _mm_add_si64(s1, s3); + C[13] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(z22, z26); + s1 = _mm_add_si64(s1, s3); + C[14] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + C[15] = z[27] + _mm_cvtsi64_si32(s1); + _mm_empty(); +} + +void P4Optimized::Multiply8Bottom(word *C, const word *A, const word *B) +{ + __m128i temp[21]; + const word *w = (word *)temp; + const __m64 *mw = (__m64 *)w; + const word *x = (word *)temp+7*4; + const __m64 *mx = (__m64 *)x; + const word *y = (word *)temp+7*4*2; + const __m64 *my = (__m64 *)y; + + P4_Mul(temp, (__m128i *)A, (__m128i *)B); + + P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B); + + P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1); + + C[0] = w[0]; + + __m64 s1, s2, s3, s4; + + __m64 w1 = _mm_cvtsi32_si64(w[1]); + __m64 w4 = mw[2]; + __m64 w6 = mw[3]; + __m64 w8 = mw[4]; + __m64 w10 = mw[5]; + __m64 w12 = mw[6]; + __m64 w14 = mw[7]; + __m64 w16 = mw[8]; + __m64 w18 = mw[9]; + __m64 w20 = mw[10]; + __m64 w22 = mw[11]; + __m64 w26 = _mm_cvtsi32_si64(w[26]); + + __m64 x0 = _mm_cvtsi32_si64(x[0]); + __m64 x1 = _mm_cvtsi32_si64(x[1]); + __m64 x4 = mx[2]; + __m64 x6 = mx[3]; + __m64 x8 = mx[4]; + + __m64 y0 = _mm_cvtsi32_si64(y[0]); + __m64 y1 = _mm_cvtsi32_si64(y[1]); + __m64 y4 = my[2]; + __m64 y6 = my[3]; + __m64 y8 = my[4]; + + s1 = _mm_add_si64(w1, w4); + C[1] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s2 = _mm_add_si64(w6, w8); + s1 = _mm_add_si64(s1, s2); + C[2] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s2 = _mm_add_si64(w10, w12); + s1 = _mm_add_si64(s1, s2); + C[3] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(x0, y0); + s2 = _mm_add_si64(w14, w16); + s1 = _mm_add_si64(s1, s3); + s1 = _mm_add_si64(s1, s2); + C[4] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(x1, y1); + s4 = _mm_add_si64(x4, y4); + s1 = _mm_add_si64(s1, w18); + s3 = _mm_add_si64(s3, s4); + s1 = _mm_add_si64(s1, w20); + s1 = _mm_add_si64(s1, s3); + C[5] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + s3 = _mm_add_si64(x6, y6); + s4 = _mm_add_si64(x8, y8); + s1 = _mm_add_si64(s1, w22); + s3 = _mm_add_si64(s3, s4); + s1 = _mm_add_si64(s1, w26); + s1 = _mm_add_si64(s1, s3); + C[6] = _mm_cvtsi64_si32(s1); + s1 = _mm_srli_si64(s1, 32); + + C[7] = _mm_cvtsi64_si32(s1) + w[27] + x[10] + y[10] + x[12] + y[12]; + _mm_empty(); +} + +#endif // #ifdef SSE2_INTRINSICS_AVAILABLE + +// end optimized + +// ******************************************************** + +#define A0 A +#define A1 (A+N2) +#define B0 B +#define B1 (B+N2) + +#define T0 T +#define T1 (T+N2) +#define T2 (T+N) +#define T3 (T+N+N2) + +#define R0 R +#define R1 (R+N2) +#define R2 (R+N) +#define R3 (R+N+N2) + +//VC60 workaround: compiler bug triggered without the extra dummy parameters + +// R[2*N] - result = A*B +// T[2*N] - temporary work space +// A[N] --- multiplier +// B[N] --- multiplicant + + +void RecursiveMultiply(word *R, word *T, const word *A, const word *B, + unsigned int N) +{ + if (LowLevel::MultiplyRecursionLimit() >= 8 && N==8) + LowLevel::Multiply8(R, A, B); + else if (LowLevel::MultiplyRecursionLimit() >= 4 && N==4) + LowLevel::Multiply4(R, A, B); + else if (N==2) + LowLevel::Multiply2(R, A, B); + else + { + const unsigned int N2 = N/2; + int carry; + + int aComp = Compare(A0, A1, N2); + int bComp = Compare(B0, B1, N2); + + switch (2*aComp + aComp + bComp) + { + case -4: + LowLevel::Subtract(R0, A1, A0, N2); + LowLevel::Subtract(R1, B0, B1, N2); + RecursiveMultiply(T0, T2, R0, R1, N2); + LowLevel::Subtract(T1, T1, R0, N2); + carry = -1; + break; + case -2: + LowLevel::Subtract(R0, A1, A0, N2); + LowLevel::Subtract(R1, B0, B1, N2); + RecursiveMultiply(T0, T2, R0, R1, N2); + carry = 0; + break; + case 2: + LowLevel::Subtract(R0, A0, A1, N2); + LowLevel::Subtract(R1, B1, B0, N2); + RecursiveMultiply(T0, T2, R0, R1, N2); + carry = 0; + break; + case 4: + LowLevel::Subtract(R0, A1, A0, N2); + LowLevel::Subtract(R1, B0, B1, N2); + RecursiveMultiply(T0, T2, R0, R1, N2); + LowLevel::Subtract(T1, T1, R1, N2); + carry = -1; + break; + default: + SetWords(T0, 0, N); + carry = 0; + } + + RecursiveMultiply(R0, T2, A0, B0, N2); + RecursiveMultiply(R2, T2, A1, B1, N2); + + // now T[01] holds (A1-A0)*(B0-B1),R[01] holds A0*B0, R[23] holds A1*B1 + + carry += LowLevel::Add(T0, T0, R0, N); + carry += LowLevel::Add(T0, T0, R2, N); + carry += LowLevel::Add(R1, R1, T0, N); + + Increment(R3, N2, carry); + } +} + + +void RecursiveSquare(word *R, word *T, const word *A, unsigned int N) +{ + if (LowLevel::SquareRecursionLimit() >= 4 && N==4) + LowLevel::Square4(R, A); + else if (N==2) + LowLevel::Square2(R, A); + else + { + const unsigned int N2 = N/2; + + RecursiveSquare(R0, T2, A0, N2); + RecursiveSquare(R2, T2, A1, N2); + RecursiveMultiply(T0, T2, A0, A1, N2); + + word carry = LowLevel::Add(R1, R1, T0, N); + carry += LowLevel::Add(R1, R1, T0, N); + Increment(R3, N2, carry); + } +} + + +// R[N] - bottom half of A*B +// T[N] - temporary work space +// A[N] - multiplier +// B[N] - multiplicant + + +void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, + unsigned int N) +{ + if (LowLevel::MultiplyBottomRecursionLimit() >= 8 && N==8) + LowLevel::Multiply8Bottom(R, A, B); + else if (LowLevel::MultiplyBottomRecursionLimit() >= 4 && N==4) + LowLevel::Multiply4Bottom(R, A, B); + else if (N==2) + LowLevel::Multiply2Bottom(R, A, B); + else + { + const unsigned int N2 = N/2; + + RecursiveMultiply(R, T, A0, B0, N2); + RecursiveMultiplyBottom(T0, T1, A1, B0, N2); + LowLevel::Add(R1, R1, T0, N2); + RecursiveMultiplyBottom(T0, T1, A0, B1, N2); + LowLevel::Add(R1, R1, T0, N2); + } +} + + +void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, + const word *B, unsigned int N) +{ + if (N==4) + { + LowLevel::Multiply4(T, A, B); + memcpy(R, T+4, 4*WORD_SIZE); + } + else if (N==2) + { + LowLevel::Multiply2(T, A, B); + memcpy(R, T+2, 2*WORD_SIZE); + } + else + { + const unsigned int N2 = N/2; + int carry; + + int aComp = Compare(A0, A1, N2); + int bComp = Compare(B0, B1, N2); + + switch (2*aComp + aComp + bComp) + { + case -4: + LowLevel::Subtract(R0, A1, A0, N2); + LowLevel::Subtract(R1, B0, B1, N2); + RecursiveMultiply(T0, T2, R0, R1, N2); + LowLevel::Subtract(T1, T1, R0, N2); + carry = -1; + break; + case -2: + LowLevel::Subtract(R0, A1, A0, N2); + LowLevel::Subtract(R1, B0, B1, N2); + RecursiveMultiply(T0, T2, R0, R1, N2); + carry = 0; + break; + case 2: + LowLevel::Subtract(R0, A0, A1, N2); + LowLevel::Subtract(R1, B1, B0, N2); + RecursiveMultiply(T0, T2, R0, R1, N2); + carry = 0; + break; + case 4: + LowLevel::Subtract(R0, A1, A0, N2); + LowLevel::Subtract(R1, B0, B1, N2); + RecursiveMultiply(T0, T2, R0, R1, N2); + LowLevel::Subtract(T1, T1, R1, N2); + carry = -1; + break; + default: + SetWords(T0, 0, N); + carry = 0; + } + + RecursiveMultiply(T2, R0, A1, B1, N2); + + // now T[01] holds (A1-A0)*(B0-B1), T[23] holds A1*B1 + + word c2 = LowLevel::Subtract(R0, L+N2, L, N2); + c2 += LowLevel::Subtract(R0, R0, T0, N2); + word t = (Compare(R0, T2, N2) == -1); + + carry += t; + carry += Increment(R0, N2, c2+t); + carry += LowLevel::Add(R0, R0, T1, N2); + carry += LowLevel::Add(R0, R0, T3, N2); + + CopyWords(R1, T3, N2); + Increment(R1, N2, carry); + } +} + + +inline word Add(word *C, const word *A, const word *B, unsigned int N) +{ + return LowLevel::Add(C, A, B, N); +} + +inline word Subtract(word *C, const word *A, const word *B, unsigned int N) +{ + return LowLevel::Subtract(C, A, B, N); +} + +inline void Multiply(word *R, word *T, const word *A, const word *B, + unsigned int N) +{ + RecursiveMultiply(R, T, A, B, N); +} + +inline void Square(word *R, word *T, const word *A, unsigned int N) +{ + RecursiveSquare(R, T, A, N); +} + + +void AsymmetricMultiply(word *R, word *T, const word *A, unsigned int NA, + const word *B, unsigned int NB) +{ + if (NA == NB) + { + if (A == B) + Square(R, T, A, NA); + else + Multiply(R, T, A, B, NA); + + return; + } + + if (NA > NB) + { + STL::swap(A, B); + STL::swap(NA, NB); + } + + if (NA==2 && !A[1]) + { + switch (A[0]) + { + case 0: + SetWords(R, 0, NB+2); + return; + case 1: + CopyWords(R, B, NB); + R[NB] = R[NB+1] = 0; + return; + default: + R[NB] = LinearMultiply(R, B, A[0], NB); + R[NB+1] = 0; + return; + } + } + + Multiply(R, T, A, B, NA); + CopyWords(T+2*NA, R+NA, NA); + + unsigned i; + + for (i=2*NA; i>=1; + i++; + } + k+=i; + + if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2) + { + if (s%2==0) + CopyWords(R, b, N); + else + Subtract(R, M, b, N); + return k; + } + + ShiftWordsRightByBits(f, fgLen, i); + t=ShiftWordsLeftByBits(c, bcLen, i); + if (t) + { + c[bcLen] = t; + bcLen+=2; + } + + if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0) + fgLen-=2; + + if (Compare(f, g, fgLen)==-1) + { + STL::swap(f, g); + STL::swap(b, c); + s++; + } + + Subtract(f, f, g, fgLen); + + if (Add(b, b, c, bcLen)) + { + b[bcLen] = 1; + bcLen+=2; + } + } +} + +// R[N] - result = A/(2^k) mod M +// A[N] - input +// M[N] - modulus + +void DivideByPower2Mod(word *R, const word *A, unsigned int k, const word *M, + unsigned int N) +{ + CopyWords(R, A, N); + + while (k--) + { + if (R[0]%2==0) + ShiftWordsRightByBits(R, N, 1); + else + { + word carry = Add(R, R, M, N); + ShiftWordsRightByBits(R, N, 1); + R[N-1] += carry<<(WORD_BITS-1); + } + } +} + +// R[N] - result = A*(2^k) mod M +// A[N] - input +// M[N] - modulus + +void MultiplyByPower2Mod(word *R, const word *A, unsigned int k, const word *M, + unsigned int N) +{ + CopyWords(R, A, N); + + while (k--) + if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0) + Subtract(R, R, M, N); +} + + +// ********** end of integer needs + + +Integer::Integer() + : reg_(2), sign_(POSITIVE) +{ + reg_[0] = reg_[1] = 0; +} + + +Integer::Integer(const Integer& t) + : reg_(RoundupSize(t.WordCount())), sign_(t.sign_) +{ + CopyWords(reg_.get_buffer(), t.reg_.get_buffer(), reg_.size()); +} + + +Integer::Integer(signed long value) + : reg_(2) +{ + if (value >= 0) + sign_ = POSITIVE; + else + { + sign_ = NEGATIVE; + value = -value; + } + reg_[0] = word(value); + reg_[1] = word(SafeRightShift(value)); +} + + +Integer::Integer(Sign s, word high, word low) + : reg_(2), sign_(s) +{ + reg_[0] = low; + reg_[1] = high; +} + + +Integer::Integer(word value, unsigned int length) + : reg_(RoundupSize(length)), sign_(POSITIVE) +{ + reg_[0] = value; + SetWords(reg_ + 1, 0, reg_.size() - 1); +} + + +Integer::Integer(const byte *encodedInteger, unsigned int byteCount, + Signedness s) +{ + Decode(encodedInteger, byteCount, s); +} + +class BadBER {}; + +// BER Decode Source +Integer::Integer(Source& source) + : reg_(2), sign_(POSITIVE) +{ + Decode(source); +} + +void Integer::Decode(Source& source) +{ + byte b = source.next(); + if (b != INTEGER) { + source.SetError(INTEGER_E); + return; + } + + word32 length = GetLength(source); + if (length == 0 || source.GetError().What()) return; + + if ( (b = source.next()) == 0x00) + length--; + else + source.prev(); + + if (source.IsLeft(length) == false) return; + + unsigned int words = (length + WORD_SIZE - 1) / WORD_SIZE; + words = RoundupSize(words); + if (words > reg_.size()) reg_.CleanNew(words); + + for (int j = length; j > 0; j--) { + b = source.next(); + reg_ [(j-1) / WORD_SIZE] |= (word)b << ((j-1) % WORD_SIZE) * 8; + } +} + + +void Integer::Decode(const byte* input, unsigned int inputLen, Signedness s) +{ + unsigned int idx(0); + byte b = 0; + if (inputLen>0) + b = input[idx]; // peek + sign_ = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE; + + while (inputLen>0 && (sign_==POSITIVE ? b==0 : b==0xff)) + { + idx++; // skip + if (--inputLen>0) + b = input[idx]; // peek + } + + reg_.CleanNew(RoundupSize(BytesToWords(inputLen))); + + for (unsigned int i=inputLen; i > 0; i--) + { + b = input[idx++]; + reg_[(i-1)/WORD_SIZE] |= (word)b << ((i-1)%WORD_SIZE)*8; + } + + if (sign_ == NEGATIVE) + { + for (unsigned i=inputLen; i 0; i--) + output[idx++] = GetByte(i-1); + } + else + { + // take two's complement of *this + Integer temp = Integer::Power2(8*max(ByteCount(), outputLen)) + *this; + for (unsigned i=0; i range); + + *this += min; +} + + +Integer Integer::Power2(unsigned int e) +{ + Integer r((word)0, BitsToWords(e + 1)); + r.SetBit(e); + return r; +} + + +void Integer::SetBit(unsigned int n, bool value) +{ + if (value) + { + reg_.CleanGrow(RoundupSize(BitsToWords(n + 1))); + reg_[n / WORD_BITS] |= (word(1) << (n % WORD_BITS)); + } + else + { + if (n / WORD_BITS < reg_.size()) + reg_[n / WORD_BITS] &= ~(word(1) << (n % WORD_BITS)); + } +} + + +void Integer::SetByte(unsigned int n, byte value) +{ + reg_.CleanGrow(RoundupSize(BytesToWords(n+1))); + reg_[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE)); + reg_[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE)); +} + + +void Integer::Negate() +{ + if (!!(*this)) // don't flip sign if *this==0 + sign_ = Sign(1 - sign_); +} + + +bool Integer::operator!() const +{ + return IsNegative() ? false : (reg_[0]==0 && WordCount()==0); +} + + +Integer& Integer::operator=(const Integer& t) +{ + if (this != &t) + { + reg_.New(RoundupSize(t.WordCount())); + CopyWords(reg_.get_buffer(), t.reg_.get_buffer(), reg_.size()); + sign_ = t.sign_; + } + return *this; +} + + +Integer& Integer::operator+=(const Integer& t) +{ + reg_.CleanGrow(t.reg_.size()); + if (NotNegative()) + { + if (t.NotNegative()) + PositiveAdd(*this, *this, t); + else + PositiveSubtract(*this, *this, t); + } + else + { + if (t.NotNegative()) + PositiveSubtract(*this, t, *this); + else + { + PositiveAdd(*this, *this, t); + sign_ = Integer::NEGATIVE; + } + } + return *this; +} + + +Integer Integer::operator-() const +{ + Integer result(*this); + result.Negate(); + return result; +} + + +Integer& Integer::operator-=(const Integer& t) +{ + reg_.CleanGrow(t.reg_.size()); + if (NotNegative()) + { + if (t.NotNegative()) + PositiveSubtract(*this, *this, t); + else + PositiveAdd(*this, *this, t); + } + else + { + if (t.NotNegative()) + { + PositiveAdd(*this, *this, t); + sign_ = Integer::NEGATIVE; + } + else + PositiveSubtract(*this, t, *this); + } + return *this; +} + + +Integer& Integer::operator++() +{ + if (NotNegative()) + { + if (Increment(reg_.get_buffer(), reg_.size())) + { + reg_.CleanGrow(2*reg_.size()); + reg_[reg_.size()/2]=1; + } + } + else + { + word borrow = Decrement(reg_.get_buffer(), reg_.size()); + (void)borrow; // shut up compiler + if (WordCount()==0) + *this = Zero(); + } + return *this; +} + +Integer& Integer::operator--() +{ + if (IsNegative()) + { + if (Increment(reg_.get_buffer(), reg_.size())) + { + reg_.CleanGrow(2*reg_.size()); + reg_[reg_.size()/2]=1; + } + } + else + { + if (Decrement(reg_.get_buffer(), reg_.size())) + *this = -One(); + } + return *this; +} + + +Integer& Integer::operator<<=(unsigned int n) +{ + const unsigned int wordCount = WordCount(); + const unsigned int shiftWords = n / WORD_BITS; + const unsigned int shiftBits = n % WORD_BITS; + + reg_.CleanGrow(RoundupSize(wordCount+BitsToWords(n))); + ShiftWordsLeftByWords(reg_.get_buffer(), wordCount + shiftWords, + shiftWords); + ShiftWordsLeftByBits(reg_+shiftWords, wordCount+BitsToWords(shiftBits), + shiftBits); + return *this; +} + +Integer& Integer::operator>>=(unsigned int n) +{ + const unsigned int wordCount = WordCount(); + const unsigned int shiftWords = n / WORD_BITS; + const unsigned int shiftBits = n % WORD_BITS; + + ShiftWordsRightByWords(reg_.get_buffer(), wordCount, shiftWords); + if (wordCount > shiftWords) + ShiftWordsRightByBits(reg_.get_buffer(), wordCount-shiftWords, + shiftBits); + if (IsNegative() && WordCount()==0) // avoid -0 + *this = Zero(); + return *this; +} + + +void PositiveAdd(Integer& sum, const Integer& a, const Integer& b) +{ + word carry; + if (a.reg_.size() == b.reg_.size()) + carry = Add(sum.reg_.get_buffer(), a.reg_.get_buffer(), + b.reg_.get_buffer(), a.reg_.size()); + else if (a.reg_.size() > b.reg_.size()) + { + carry = Add(sum.reg_.get_buffer(), a.reg_.get_buffer(), + b.reg_.get_buffer(), b.reg_.size()); + CopyWords(sum.reg_+b.reg_.size(), a.reg_+b.reg_.size(), + a.reg_.size()-b.reg_.size()); + carry = Increment(sum.reg_+b.reg_.size(), a.reg_.size()-b.reg_.size(), + carry); + } + else + { + carry = Add(sum.reg_.get_buffer(), a.reg_.get_buffer(), + b.reg_.get_buffer(), a.reg_.size()); + CopyWords(sum.reg_+a.reg_.size(), b.reg_+a.reg_.size(), + b.reg_.size()-a.reg_.size()); + carry = Increment(sum.reg_+a.reg_.size(), b.reg_.size()-a.reg_.size(), + carry); + } + + if (carry) + { + sum.reg_.CleanGrow(2*sum.reg_.size()); + sum.reg_[sum.reg_.size()/2] = 1; + } + sum.sign_ = Integer::POSITIVE; +} + +void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b) +{ + unsigned aSize = a.WordCount(); + aSize += aSize%2; + unsigned bSize = b.WordCount(); + bSize += bSize%2; + + if (aSize == bSize) + { + if (Compare(a.reg_.get_buffer(), b.reg_.get_buffer(), aSize) >= 0) + { + Subtract(diff.reg_.get_buffer(), a.reg_.get_buffer(), + b.reg_.get_buffer(), aSize); + diff.sign_ = Integer::POSITIVE; + } + else + { + Subtract(diff.reg_.get_buffer(), b.reg_.get_buffer(), + a.reg_.get_buffer(), aSize); + diff.sign_ = Integer::NEGATIVE; + } + } + else if (aSize > bSize) + { + word borrow = Subtract(diff.reg_.get_buffer(), a.reg_.get_buffer(), + b.reg_.get_buffer(), bSize); + CopyWords(diff.reg_+bSize, a.reg_+bSize, aSize-bSize); + borrow = Decrement(diff.reg_+bSize, aSize-bSize, borrow); + diff.sign_ = Integer::POSITIVE; + } + else + { + word borrow = Subtract(diff.reg_.get_buffer(), b.reg_.get_buffer(), + a.reg_.get_buffer(), aSize); + CopyWords(diff.reg_+aSize, b.reg_+aSize, bSize-aSize); + borrow = Decrement(diff.reg_+aSize, bSize-aSize, borrow); + diff.sign_ = Integer::NEGATIVE; + } +} + + +unsigned int Integer::MinEncodedSize(Signedness signedness) const +{ + unsigned int outputLen = max(1U, ByteCount()); + if (signedness == UNSIGNED) + return outputLen; + if (NotNegative() && (GetByte(outputLen-1) & 0x80)) + outputLen++; + if (IsNegative() && *this < -Power2(outputLen*8-1)) + outputLen++; + return outputLen; +} + + +int Integer::Compare(const Integer& t) const +{ + if (NotNegative()) + { + if (t.NotNegative()) + return PositiveCompare(t); + else + return 1; + } + else + { + if (t.NotNegative()) + return -1; + else + return -PositiveCompare(t); + } +} + + +int Integer::PositiveCompare(const Integer& t) const +{ + unsigned size = WordCount(), tSize = t.WordCount(); + + if (size == tSize) + return TaoCrypt::Compare(reg_.get_buffer(), t.reg_.get_buffer(), size); + else + return size > tSize ? 1 : -1; +} + + +bool Integer::GetBit(unsigned int n) const +{ + if (n/WORD_BITS >= reg_.size()) + return 0; + else + return bool((reg_[n/WORD_BITS] >> (n % WORD_BITS)) & 1); +} + + +unsigned long Integer::GetBits(unsigned int i, unsigned int n) const +{ + unsigned long v = 0; + for (unsigned int j=0; j= reg_.size()) + return 0; + else + return byte(reg_[n/WORD_SIZE] >> ((n%WORD_SIZE)*8)); +} + + +unsigned int Integer::BitCount() const +{ + unsigned wordCount = WordCount(); + if (wordCount) + return (wordCount-1)*WORD_BITS + BitPrecision(reg_[wordCount-1]); + else + return 0; +} + + +unsigned int Integer::ByteCount() const +{ + unsigned wordCount = WordCount(); + if (wordCount) + return (wordCount-1)*WORD_SIZE + BytePrecision(reg_[wordCount-1]); + else + return 0; +} + + +unsigned int Integer::WordCount() const +{ + return CountWords(reg_.get_buffer(), reg_.size()); +} + + +bool Integer::IsConvertableToLong() const +{ + if (ByteCount() > sizeof(long)) + return false; + + unsigned long value = reg_[0]; + value += SafeLeftShift(reg_[1]); + + if (sign_ == POSITIVE) + return (signed long)value >= 0; + else + return -(signed long)value < 0; +} + + +signed long Integer::ConvertToLong() const +{ + unsigned long value = reg_[0]; + value += SafeLeftShift(reg_[1]); + return sign_ == POSITIVE ? value : -(signed long)value; +} + + +void Integer::Swap(Integer& a) +{ + reg_.Swap(a.reg_); + STL::swap(sign_, a.sign_); +} + + +Integer Integer::Plus(const Integer& b) const +{ + Integer sum((word)0, max(reg_.size(), b.reg_.size())); + if (NotNegative()) + { + if (b.NotNegative()) + PositiveAdd(sum, *this, b); + else + PositiveSubtract(sum, *this, b); + } + else + { + if (b.NotNegative()) + PositiveSubtract(sum, b, *this); + else + { + PositiveAdd(sum, *this, b); + sum.sign_ = Integer::NEGATIVE; + } + } + return sum; +} + + +Integer Integer::Minus(const Integer& b) const +{ + Integer diff((word)0, max(reg_.size(), b.reg_.size())); + if (NotNegative()) + { + if (b.NotNegative()) + PositiveSubtract(diff, *this, b); + else + PositiveAdd(diff, *this, b); + } + else + { + if (b.NotNegative()) + { + PositiveAdd(diff, *this, b); + diff.sign_ = Integer::NEGATIVE; + } + else + PositiveSubtract(diff, b, *this); + } + return diff; +} + + +Integer Integer::Times(const Integer &b) const +{ + Integer product; + Multiply(product, *this, b); + return product; +} + + +#undef A0 +#undef A1 +#undef B0 +#undef B1 + +#undef T0 +#undef T1 +#undef T2 +#undef T3 + +#undef R0 +#undef R1 +#undef R2 +#undef R3 + + +static inline void AtomicDivide(word *Q, const word *A, const word *B) +{ + word T[4]; + DWord q = DivideFourWordsByTwo(T, DWord(A[0], A[1]), + DWord(A[2], A[3]), DWord(B[0], B[1])); + Q[0] = q.GetLowHalf(); + Q[1] = q.GetHighHalf(); + +#ifndef NDEBUG + if (B[0] || B[1]) + { + // multiply quotient and divisor and add remainder, make sure it + // equals dividend + word P[4]; + Portable::Multiply2(P, Q, B); + Add(P, P, T, 4); + } +#endif +} + + +// for use by Divide(), corrects the underestimated quotient {Q1,Q0} +static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, + unsigned int N) +{ + if (Q[1]) + { + T[N] = T[N+1] = 0; + unsigned i; + for (i=0; i= 0) + { + R[N] -= Subtract(R, R, B, N); + Q[1] += (++Q[0]==0); + } +} + +// R[NB] -------- remainder = A%B +// Q[NA-NB+2] --- quotient = A/B +// T[NA+2*NB+4] - temp work space +// A[NA] -------- dividend +// B[NB] -------- divisor + + +void Divide(word* R, word* Q, word* T, const word* A, unsigned int NA, + const word* B, unsigned int NB) +{ + // set up temporary work space + word *const TA=T; + word *const TB=T+NA+2; + word *const TP=T+NA+2+NB; + + // copy B into TB and normalize it so that TB has highest bit set to 1 + unsigned shiftWords = (B[NB-1]==0); + TB[0] = TB[NB-1] = 0; + CopyWords(TB+shiftWords, B, NB-shiftWords); + unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]); + ShiftWordsLeftByBits(TB, NB, shiftBits); + + // copy A into TA and normalize it + TA[0] = TA[NA] = TA[NA+1] = 0; + CopyWords(TA+shiftWords, A, NA); + ShiftWordsLeftByBits(TA, NA+2, shiftBits); + + if (TA[NA+1]==0 && TA[NA] <= 1) + { + Q[NA-NB+1] = Q[NA-NB] = 0; + while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0) + { + TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB); + ++Q[NA-NB]; + } + } + else + { + NA+=2; + } + + word BT[2]; + BT[0] = TB[NB-2] + 1; + BT[1] = TB[NB-1] + (BT[0]==0); + + // start reducing TA mod TB, 2 words at a time + for (unsigned i=NA-2; i>=NB; i-=2) + { + AtomicDivide(Q+i-NB, TA+i-2, BT); + CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB); + } + + // copy TA into R, and denormalize it + CopyWords(R, TA+shiftWords, NB); + ShiftWordsRightByBits(R, NB, shiftBits); +} + + +void PositiveDivide(Integer& remainder, Integer& quotient, + const Integer& a, const Integer& b) +{ + unsigned aSize = a.WordCount(); + unsigned bSize = b.WordCount(); + + if (a.PositiveCompare(b) == -1) + { + remainder = a; + remainder.sign_ = Integer::POSITIVE; + quotient = Integer::Zero(); + return; + } + + aSize += aSize%2; // round up to next even number + bSize += bSize%2; + + remainder.reg_.CleanNew(RoundupSize(bSize)); + remainder.sign_ = Integer::POSITIVE; + quotient.reg_.CleanNew(RoundupSize(aSize-bSize+2)); + quotient.sign_ = Integer::POSITIVE; + + AlignedWordBlock T(aSize+2*bSize+4); + Divide(remainder.reg_.get_buffer(), quotient.reg_.get_buffer(), + T.get_buffer(), a.reg_.get_buffer(), aSize, b.reg_.get_buffer(), + bSize); +} + +void Integer::Divide(Integer &remainder, Integer "ient, + const Integer ÷nd, const Integer &divisor) +{ + PositiveDivide(remainder, quotient, dividend, divisor); + + if (dividend.IsNegative()) + { + quotient.Negate(); + if (remainder.NotZero()) + { + --quotient; + remainder = divisor.AbsoluteValue() - remainder; + } + } + + if (divisor.IsNegative()) + quotient.Negate(); +} + +void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, + unsigned int n) +{ + q = a; + q >>= n; + + const unsigned int wordCount = BitsToWords(n); + if (wordCount <= a.WordCount()) + { + r.reg_.resize(RoundupSize(wordCount)); + CopyWords(r.reg_.get_buffer(), a.reg_.get_buffer(), wordCount); + SetWords(r.reg_+wordCount, 0, r.reg_.size()-wordCount); + if (n % WORD_BITS != 0) + r.reg_[wordCount-1] %= (word(1) << (n % WORD_BITS)); + } + else + { + r.reg_.resize(RoundupSize(a.WordCount())); + CopyWords(r.reg_.get_buffer(), a.reg_.get_buffer(), r.reg_.size()); + } + r.sign_ = POSITIVE; + + if (a.IsNegative() && r.NotZero()) + { + --q; + r = Power2(n) - r; + } +} + +Integer Integer::DividedBy(const Integer &b) const +{ + Integer remainder, quotient; + Integer::Divide(remainder, quotient, *this, b); + return quotient; +} + +Integer Integer::Modulo(const Integer &b) const +{ + Integer remainder, quotient; + Integer::Divide(remainder, quotient, *this, b); + return remainder; +} + +void Integer::Divide(word &remainder, Integer "ient, + const Integer ÷nd, word divisor) +{ + if ((divisor & (divisor-1)) == 0) // divisor is a power of 2 + { + quotient = dividend >> (BitPrecision(divisor)-1); + remainder = dividend.reg_[0] & (divisor-1); + return; + } + + unsigned int i = dividend.WordCount(); + quotient.reg_.CleanNew(RoundupSize(i)); + remainder = 0; + while (i--) + { + quotient.reg_[i] = DWord(dividend.reg_[i], remainder) / divisor; + remainder = DWord(dividend.reg_[i], remainder) % divisor; + } + + if (dividend.NotNegative()) + quotient.sign_ = POSITIVE; + else + { + quotient.sign_ = NEGATIVE; + if (remainder) + { + --quotient; + remainder = divisor - remainder; + } + } +} + +Integer Integer::DividedBy(word b) const +{ + word remainder; + Integer quotient; + Integer::Divide(remainder, quotient, *this, b); + return quotient; +} + +word Integer::Modulo(word divisor) const +{ + word remainder; + + if ((divisor & (divisor-1)) == 0) // divisor is a power of 2 + remainder = reg_[0] & (divisor-1); + else + { + unsigned int i = WordCount(); + + if (divisor <= 5) + { + DWord sum(0, 0); + while (i--) + sum += reg_[i]; + remainder = sum % divisor; + } + else + { + remainder = 0; + while (i--) + remainder = DWord(reg_[i], remainder) % divisor; + } + } + + if (IsNegative() && remainder) + remainder = divisor - remainder; + + return remainder; +} + + +Integer Integer::AbsoluteValue() const +{ + Integer result(*this); + result.sign_ = POSITIVE; + return result; +} + + +Integer Integer::SquareRoot() const +{ + if (!IsPositive()) + return Zero(); + + // overestimate square root + Integer x, y = Power2((BitCount()+1)/2); + + do + { + x = y; + y = (x + *this/x) >> 1; + } while (y=m) + return (*this%m).InverseMod(m); + + if (m.IsEven()) + { + if (!m || IsEven()) + return Zero(); // no inverse + if (*this == One()) + return One(); + + Integer u = m.InverseMod(*this); + return !u ? Zero() : (m*(*this-u)+1)/(*this); + } + + AlignedWordBlock T(m.reg_.size() * 4); + Integer r((word)0, m.reg_.size()); + unsigned k = AlmostInverse(r.reg_.get_buffer(), T.get_buffer(), + reg_.get_buffer(), reg_.size(), + m.reg_.get_buffer(), m.reg_.size()); + DivideByPower2Mod(r.reg_.get_buffer(), r.reg_.get_buffer(), k, + m.reg_.get_buffer(), m.reg_.size()); + return r; +} + +word Integer::InverseMod(const word mod) const +{ + word g0 = mod, g1 = *this % mod; + word v0 = 0, v1 = 1; + word y; + + while (g1) + { + if (g1 == 1) + return v1; + y = g0 / g1; + g0 = g0 % g1; + v0 += y * v1; + + if (!g0) + break; + if (g0 == 1) + return mod-v0; + y = g1 / g0; + g1 = g1 % g0; + v1 += y * v0; + } + return 0; +} + +// ********* ModArith stuff + +const Integer& ModularArithmetic::Half(const Integer &a) const +{ + if (a.reg_.size()==modulus.reg_.size()) + { + TaoCrypt::DivideByPower2Mod(result.reg_.begin(), a.reg_.begin(), 1, + modulus.reg_.begin(), a.reg_.size()); + return result; + } + else + return result1 = (a.IsEven() ? (a >> 1) : ((a+modulus) >> 1)); +} + +const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const +{ + if (a.reg_.size()==modulus.reg_.size() && + b.reg_.size()==modulus.reg_.size()) + { + if (TaoCrypt::Add(result.reg_.begin(), a.reg_.begin(), b.reg_.begin(), + a.reg_.size()) + || Compare(result.reg_.get_buffer(), modulus.reg_.get_buffer(), + a.reg_.size()) >= 0) + { + TaoCrypt::Subtract(result.reg_.begin(), result.reg_.begin(), + modulus.reg_.begin(), a.reg_.size()); + } + return result; + } + else + { + result1 = a+b; + if (result1 >= modulus) + result1 -= modulus; + return result1; + } +} + +Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const +{ + if (a.reg_.size()==modulus.reg_.size() && + b.reg_.size()==modulus.reg_.size()) + { + if (TaoCrypt::Add(a.reg_.get_buffer(), a.reg_.get_buffer(), + b.reg_.get_buffer(), a.reg_.size()) + || Compare(a.reg_.get_buffer(), modulus.reg_.get_buffer(), + a.reg_.size()) >= 0) + { + TaoCrypt::Subtract(a.reg_.get_buffer(), a.reg_.get_buffer(), + modulus.reg_.get_buffer(), a.reg_.size()); + } + } + else + { + a+=b; + if (a>=modulus) + a-=modulus; + } + + return a; +} + +const Integer& ModularArithmetic::Subtract(const Integer &a, + const Integer &b) const +{ + if (a.reg_.size()==modulus.reg_.size() && + b.reg_.size()==modulus.reg_.size()) + { + if (TaoCrypt::Subtract(result.reg_.begin(), a.reg_.begin(), + b.reg_.begin(), a.reg_.size())) + TaoCrypt::Add(result.reg_.begin(), result.reg_.begin(), + modulus.reg_.begin(), a.reg_.size()); + return result; + } + else + { + result1 = a-b; + if (result1.IsNegative()) + result1 += modulus; + return result1; + } +} + +Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const +{ + if (a.reg_.size()==modulus.reg_.size() && + b.reg_.size()==modulus.reg_.size()) + { + if (TaoCrypt::Subtract(a.reg_.get_buffer(), a.reg_.get_buffer(), + b.reg_.get_buffer(), a.reg_.size())) + TaoCrypt::Add(a.reg_.get_buffer(), a.reg_.get_buffer(), + modulus.reg_.get_buffer(), a.reg_.size()); + } + else + { + a-=b; + if (a.IsNegative()) + a+=modulus; + } + + return a; +} + +const Integer& ModularArithmetic::Inverse(const Integer &a) const +{ + if (!a) + return a; + + CopyWords(result.reg_.begin(), modulus.reg_.begin(), modulus.reg_.size()); + if (TaoCrypt::Subtract(result.reg_.begin(), result.reg_.begin(), + a.reg_.begin(), a.reg_.size())) + Decrement(result.reg_.begin()+a.reg_.size(), 1, + modulus.reg_.size()-a.reg_.size()); + + return result; +} + +Integer ModularArithmetic::CascadeExponentiate(const Integer &x, + const Integer &e1, const Integer &y, const Integer &e2) const +{ + if (modulus.IsOdd()) + { + MontgomeryRepresentation dr(modulus); + return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, + dr.ConvertIn(y), e2)); + } + else + return AbstractRing::CascadeExponentiate(x, e1, y, e2); +} + +void ModularArithmetic::SimultaneousExponentiate(Integer *results, + const Integer &base, const Integer *exponents, + unsigned int exponentsCount) const +{ + if (modulus.IsOdd()) + { + MontgomeryRepresentation dr(modulus); + dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, + exponentsCount); + for (unsigned int i=0; i