aboutsummaryrefslogtreecommitdiff
path: root/mysql/extra/yassl/taocrypt/src/ripemd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'mysql/extra/yassl/taocrypt/src/ripemd.cpp')
-rw-r--r--mysql/extra/yassl/taocrypt/src/ripemd.cpp844
1 files changed, 844 insertions, 0 deletions
diff --git a/mysql/extra/yassl/taocrypt/src/ripemd.cpp b/mysql/extra/yassl/taocrypt/src/ripemd.cpp
new file mode 100644
index 0000000..5d03dc6
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/ripemd.cpp
@@ -0,0 +1,844 @@
+/*
+ Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ MA 02110-1301 USA.
+*/
+
+
+/* based on Wei Dai's ripemd.cpp from CryptoPP */
+
+#include "runtime.hpp"
+#include "ripemd.hpp"
+#ifdef USE_SYS_STL
+ #include <algorithm>
+#else
+ #include "algorithm.hpp"
+#endif
+
+
+namespace STL = STL_NAMESPACE;
+
+
+
+namespace TaoCrypt {
+
+void RIPEMD160::Init()
+{
+ digest_[0] = 0x67452301L;
+ digest_[1] = 0xefcdab89L;
+ digest_[2] = 0x98badcfeL;
+ digest_[3] = 0x10325476L;
+ digest_[4] = 0xc3d2e1f0L;
+
+ buffLen_ = 0;
+ loLen_ = 0;
+ hiLen_ = 0;
+}
+
+
+RIPEMD160::RIPEMD160(const RIPEMD160& that)
+ : HASHwithTransform(DIGEST_SIZE / sizeof(word32), BLOCK_SIZE)
+{
+ buffLen_ = that.buffLen_;
+ loLen_ = that.loLen_;
+ hiLen_ = that.hiLen_;
+
+ memcpy(digest_, that.digest_, DIGEST_SIZE);
+ memcpy(buffer_, that.buffer_, BLOCK_SIZE);
+}
+
+
+RIPEMD160& RIPEMD160::operator= (const RIPEMD160& that)
+{
+ RIPEMD160 tmp(that);
+ Swap(tmp);
+
+ return *this;
+}
+
+
+void RIPEMD160::Swap(RIPEMD160& other)
+{
+ STL::swap(loLen_, other.loLen_);
+ STL::swap(hiLen_, other.hiLen_);
+ STL::swap(buffLen_, other.buffLen_);
+
+ memcpy(digest_, other.digest_, DIGEST_SIZE);
+ memcpy(buffer_, other.buffer_, BLOCK_SIZE);
+}
+
+
+#ifdef DO_RIPEMD_ASM
+
+// Update digest with data of size len
+void RIPEMD160::Update(const byte* data, word32 len)
+{
+ if (!isMMX) {
+ HASHwithTransform::Update(data, len);
+ return;
+ }
+
+ byte* local = reinterpret_cast<byte*>(buffer_);
+
+ // remove buffered data if possible
+ if (buffLen_) {
+ word32 add = min(len, BLOCK_SIZE - buffLen_);
+ memcpy(&local[buffLen_], data, add);
+
+ buffLen_ += add;
+ data += add;
+ len -= add;
+
+ if (buffLen_ == BLOCK_SIZE) {
+ Transform();
+ AddLength(BLOCK_SIZE);
+ buffLen_ = 0;
+ }
+ }
+
+ // all at once for asm
+ if (buffLen_ == 0) {
+ word32 times = len / BLOCK_SIZE;
+ if (times) {
+ AsmTransform(data, times);
+ const word32 add = BLOCK_SIZE * times;
+ AddLength(add);
+ len -= add;
+ data += add;
+ }
+ }
+
+ // cache any data left
+ if (len) {
+ memcpy(&local[buffLen_], data, len);
+ buffLen_ += len;
+ }
+}
+
+#endif // DO_RIPEMD_ASM
+
+
+// for all
+#define F(x, y, z) (x ^ y ^ z)
+#define G(x, y, z) (z ^ (x & (y^z)))
+#define H(x, y, z) (z ^ (x | ~y))
+#define I(x, y, z) (y ^ (z & (x^y)))
+#define J(x, y, z) (x ^ (y | ~z))
+
+#define k0 0
+#define k1 0x5a827999
+#define k2 0x6ed9eba1
+#define k3 0x8f1bbcdc
+#define k4 0xa953fd4e
+#define k5 0x50a28be6
+#define k6 0x5c4dd124
+#define k7 0x6d703ef3
+#define k8 0x7a6d76e9
+#define k9 0
+
+// for 160 and 320
+#define Subround(f, a, b, c, d, e, x, s, k) \
+ a += f(b, c, d) + x + k;\
+ a = rotlFixed((word32)a, s) + e;\
+ c = rotlFixed((word32)c, 10U)
+
+
+void RIPEMD160::Transform()
+{
+ unsigned long a1, b1, c1, d1, e1, a2, b2, c2, d2, e2;
+ a1 = a2 = digest_[0];
+ b1 = b2 = digest_[1];
+ c1 = c2 = digest_[2];
+ d1 = d2 = digest_[3];
+ e1 = e2 = digest_[4];
+
+ Subround(F, a1, b1, c1, d1, e1, buffer_[ 0], 11, k0);
+ Subround(F, e1, a1, b1, c1, d1, buffer_[ 1], 14, k0);
+ Subround(F, d1, e1, a1, b1, c1, buffer_[ 2], 15, k0);
+ Subround(F, c1, d1, e1, a1, b1, buffer_[ 3], 12, k0);
+ Subround(F, b1, c1, d1, e1, a1, buffer_[ 4], 5, k0);
+ Subround(F, a1, b1, c1, d1, e1, buffer_[ 5], 8, k0);
+ Subround(F, e1, a1, b1, c1, d1, buffer_[ 6], 7, k0);
+ Subround(F, d1, e1, a1, b1, c1, buffer_[ 7], 9, k0);
+ Subround(F, c1, d1, e1, a1, b1, buffer_[ 8], 11, k0);
+ Subround(F, b1, c1, d1, e1, a1, buffer_[ 9], 13, k0);
+ Subround(F, a1, b1, c1, d1, e1, buffer_[10], 14, k0);
+ Subround(F, e1, a1, b1, c1, d1, buffer_[11], 15, k0);
+ Subround(F, d1, e1, a1, b1, c1, buffer_[12], 6, k0);
+ Subround(F, c1, d1, e1, a1, b1, buffer_[13], 7, k0);
+ Subround(F, b1, c1, d1, e1, a1, buffer_[14], 9, k0);
+ Subround(F, a1, b1, c1, d1, e1, buffer_[15], 8, k0);
+
+ Subround(G, e1, a1, b1, c1, d1, buffer_[ 7], 7, k1);
+ Subround(G, d1, e1, a1, b1, c1, buffer_[ 4], 6, k1);
+ Subround(G, c1, d1, e1, a1, b1, buffer_[13], 8, k1);
+ Subround(G, b1, c1, d1, e1, a1, buffer_[ 1], 13, k1);
+ Subround(G, a1, b1, c1, d1, e1, buffer_[10], 11, k1);
+ Subround(G, e1, a1, b1, c1, d1, buffer_[ 6], 9, k1);
+ Subround(G, d1, e1, a1, b1, c1, buffer_[15], 7, k1);
+ Subround(G, c1, d1, e1, a1, b1, buffer_[ 3], 15, k1);
+ Subround(G, b1, c1, d1, e1, a1, buffer_[12], 7, k1);
+ Subround(G, a1, b1, c1, d1, e1, buffer_[ 0], 12, k1);
+ Subround(G, e1, a1, b1, c1, d1, buffer_[ 9], 15, k1);
+ Subround(G, d1, e1, a1, b1, c1, buffer_[ 5], 9, k1);
+ Subround(G, c1, d1, e1, a1, b1, buffer_[ 2], 11, k1);
+ Subround(G, b1, c1, d1, e1, a1, buffer_[14], 7, k1);
+ Subround(G, a1, b1, c1, d1, e1, buffer_[11], 13, k1);
+ Subround(G, e1, a1, b1, c1, d1, buffer_[ 8], 12, k1);
+
+ Subround(H, d1, e1, a1, b1, c1, buffer_[ 3], 11, k2);
+ Subround(H, c1, d1, e1, a1, b1, buffer_[10], 13, k2);
+ Subround(H, b1, c1, d1, e1, a1, buffer_[14], 6, k2);
+ Subround(H, a1, b1, c1, d1, e1, buffer_[ 4], 7, k2);
+ Subround(H, e1, a1, b1, c1, d1, buffer_[ 9], 14, k2);
+ Subround(H, d1, e1, a1, b1, c1, buffer_[15], 9, k2);
+ Subround(H, c1, d1, e1, a1, b1, buffer_[ 8], 13, k2);
+ Subround(H, b1, c1, d1, e1, a1, buffer_[ 1], 15, k2);
+ Subround(H, a1, b1, c1, d1, e1, buffer_[ 2], 14, k2);
+ Subround(H, e1, a1, b1, c1, d1, buffer_[ 7], 8, k2);
+ Subround(H, d1, e1, a1, b1, c1, buffer_[ 0], 13, k2);
+ Subround(H, c1, d1, e1, a1, b1, buffer_[ 6], 6, k2);
+ Subround(H, b1, c1, d1, e1, a1, buffer_[13], 5, k2);
+ Subround(H, a1, b1, c1, d1, e1, buffer_[11], 12, k2);
+ Subround(H, e1, a1, b1, c1, d1, buffer_[ 5], 7, k2);
+ Subround(H, d1, e1, a1, b1, c1, buffer_[12], 5, k2);
+
+ Subround(I, c1, d1, e1, a1, b1, buffer_[ 1], 11, k3);
+ Subround(I, b1, c1, d1, e1, a1, buffer_[ 9], 12, k3);
+ Subround(I, a1, b1, c1, d1, e1, buffer_[11], 14, k3);
+ Subround(I, e1, a1, b1, c1, d1, buffer_[10], 15, k3);
+ Subround(I, d1, e1, a1, b1, c1, buffer_[ 0], 14, k3);
+ Subround(I, c1, d1, e1, a1, b1, buffer_[ 8], 15, k3);
+ Subround(I, b1, c1, d1, e1, a1, buffer_[12], 9, k3);
+ Subround(I, a1, b1, c1, d1, e1, buffer_[ 4], 8, k3);
+ Subround(I, e1, a1, b1, c1, d1, buffer_[13], 9, k3);
+ Subround(I, d1, e1, a1, b1, c1, buffer_[ 3], 14, k3);
+ Subround(I, c1, d1, e1, a1, b1, buffer_[ 7], 5, k3);
+ Subround(I, b1, c1, d1, e1, a1, buffer_[15], 6, k3);
+ Subround(I, a1, b1, c1, d1, e1, buffer_[14], 8, k3);
+ Subround(I, e1, a1, b1, c1, d1, buffer_[ 5], 6, k3);
+ Subround(I, d1, e1, a1, b1, c1, buffer_[ 6], 5, k3);
+ Subround(I, c1, d1, e1, a1, b1, buffer_[ 2], 12, k3);
+
+ Subround(J, b1, c1, d1, e1, a1, buffer_[ 4], 9, k4);
+ Subround(J, a1, b1, c1, d1, e1, buffer_[ 0], 15, k4);
+ Subround(J, e1, a1, b1, c1, d1, buffer_[ 5], 5, k4);
+ Subround(J, d1, e1, a1, b1, c1, buffer_[ 9], 11, k4);
+ Subround(J, c1, d1, e1, a1, b1, buffer_[ 7], 6, k4);
+ Subround(J, b1, c1, d1, e1, a1, buffer_[12], 8, k4);
+ Subround(J, a1, b1, c1, d1, e1, buffer_[ 2], 13, k4);
+ Subround(J, e1, a1, b1, c1, d1, buffer_[10], 12, k4);
+ Subround(J, d1, e1, a1, b1, c1, buffer_[14], 5, k4);
+ Subround(J, c1, d1, e1, a1, b1, buffer_[ 1], 12, k4);
+ Subround(J, b1, c1, d1, e1, a1, buffer_[ 3], 13, k4);
+ Subround(J, a1, b1, c1, d1, e1, buffer_[ 8], 14, k4);
+ Subround(J, e1, a1, b1, c1, d1, buffer_[11], 11, k4);
+ Subround(J, d1, e1, a1, b1, c1, buffer_[ 6], 8, k4);
+ Subround(J, c1, d1, e1, a1, b1, buffer_[15], 5, k4);
+ Subround(J, b1, c1, d1, e1, a1, buffer_[13], 6, k4);
+
+ Subround(J, a2, b2, c2, d2, e2, buffer_[ 5], 8, k5);
+ Subround(J, e2, a2, b2, c2, d2, buffer_[14], 9, k5);
+ Subround(J, d2, e2, a2, b2, c2, buffer_[ 7], 9, k5);
+ Subround(J, c2, d2, e2, a2, b2, buffer_[ 0], 11, k5);
+ Subround(J, b2, c2, d2, e2, a2, buffer_[ 9], 13, k5);
+ Subround(J, a2, b2, c2, d2, e2, buffer_[ 2], 15, k5);
+ Subround(J, e2, a2, b2, c2, d2, buffer_[11], 15, k5);
+ Subround(J, d2, e2, a2, b2, c2, buffer_[ 4], 5, k5);
+ Subround(J, c2, d2, e2, a2, b2, buffer_[13], 7, k5);
+ Subround(J, b2, c2, d2, e2, a2, buffer_[ 6], 7, k5);
+ Subround(J, a2, b2, c2, d2, e2, buffer_[15], 8, k5);
+ Subround(J, e2, a2, b2, c2, d2, buffer_[ 8], 11, k5);
+ Subround(J, d2, e2, a2, b2, c2, buffer_[ 1], 14, k5);
+ Subround(J, c2, d2, e2, a2, b2, buffer_[10], 14, k5);
+ Subround(J, b2, c2, d2, e2, a2, buffer_[ 3], 12, k5);
+ Subround(J, a2, b2, c2, d2, e2, buffer_[12], 6, k5);
+
+ Subround(I, e2, a2, b2, c2, d2, buffer_[ 6], 9, k6);
+ Subround(I, d2, e2, a2, b2, c2, buffer_[11], 13, k6);
+ Subround(I, c2, d2, e2, a2, b2, buffer_[ 3], 15, k6);
+ Subround(I, b2, c2, d2, e2, a2, buffer_[ 7], 7, k6);
+ Subround(I, a2, b2, c2, d2, e2, buffer_[ 0], 12, k6);
+ Subround(I, e2, a2, b2, c2, d2, buffer_[13], 8, k6);
+ Subround(I, d2, e2, a2, b2, c2, buffer_[ 5], 9, k6);
+ Subround(I, c2, d2, e2, a2, b2, buffer_[10], 11, k6);
+ Subround(I, b2, c2, d2, e2, a2, buffer_[14], 7, k6);
+ Subround(I, a2, b2, c2, d2, e2, buffer_[15], 7, k6);
+ Subround(I, e2, a2, b2, c2, d2, buffer_[ 8], 12, k6);
+ Subround(I, d2, e2, a2, b2, c2, buffer_[12], 7, k6);
+ Subround(I, c2, d2, e2, a2, b2, buffer_[ 4], 6, k6);
+ Subround(I, b2, c2, d2, e2, a2, buffer_[ 9], 15, k6);
+ Subround(I, a2, b2, c2, d2, e2, buffer_[ 1], 13, k6);
+ Subround(I, e2, a2, b2, c2, d2, buffer_[ 2], 11, k6);
+
+ Subround(H, d2, e2, a2, b2, c2, buffer_[15], 9, k7);
+ Subround(H, c2, d2, e2, a2, b2, buffer_[ 5], 7, k7);
+ Subround(H, b2, c2, d2, e2, a2, buffer_[ 1], 15, k7);
+ Subround(H, a2, b2, c2, d2, e2, buffer_[ 3], 11, k7);
+ Subround(H, e2, a2, b2, c2, d2, buffer_[ 7], 8, k7);
+ Subround(H, d2, e2, a2, b2, c2, buffer_[14], 6, k7);
+ Subround(H, c2, d2, e2, a2, b2, buffer_[ 6], 6, k7);
+ Subround(H, b2, c2, d2, e2, a2, buffer_[ 9], 14, k7);
+ Subround(H, a2, b2, c2, d2, e2, buffer_[11], 12, k7);
+ Subround(H, e2, a2, b2, c2, d2, buffer_[ 8], 13, k7);
+ Subround(H, d2, e2, a2, b2, c2, buffer_[12], 5, k7);
+ Subround(H, c2, d2, e2, a2, b2, buffer_[ 2], 14, k7);
+ Subround(H, b2, c2, d2, e2, a2, buffer_[10], 13, k7);
+ Subround(H, a2, b2, c2, d2, e2, buffer_[ 0], 13, k7);
+ Subround(H, e2, a2, b2, c2, d2, buffer_[ 4], 7, k7);
+ Subround(H, d2, e2, a2, b2, c2, buffer_[13], 5, k7);
+
+ Subround(G, c2, d2, e2, a2, b2, buffer_[ 8], 15, k8);
+ Subround(G, b2, c2, d2, e2, a2, buffer_[ 6], 5, k8);
+ Subround(G, a2, b2, c2, d2, e2, buffer_[ 4], 8, k8);
+ Subround(G, e2, a2, b2, c2, d2, buffer_[ 1], 11, k8);
+ Subround(G, d2, e2, a2, b2, c2, buffer_[ 3], 14, k8);
+ Subround(G, c2, d2, e2, a2, b2, buffer_[11], 14, k8);
+ Subround(G, b2, c2, d2, e2, a2, buffer_[15], 6, k8);
+ Subround(G, a2, b2, c2, d2, e2, buffer_[ 0], 14, k8);
+ Subround(G, e2, a2, b2, c2, d2, buffer_[ 5], 6, k8);
+ Subround(G, d2, e2, a2, b2, c2, buffer_[12], 9, k8);
+ Subround(G, c2, d2, e2, a2, b2, buffer_[ 2], 12, k8);
+ Subround(G, b2, c2, d2, e2, a2, buffer_[13], 9, k8);
+ Subround(G, a2, b2, c2, d2, e2, buffer_[ 9], 12, k8);
+ Subround(G, e2, a2, b2, c2, d2, buffer_[ 7], 5, k8);
+ Subround(G, d2, e2, a2, b2, c2, buffer_[10], 15, k8);
+ Subround(G, c2, d2, e2, a2, b2, buffer_[14], 8, k8);
+
+ Subround(F, b2, c2, d2, e2, a2, buffer_[12], 8, k9);
+ Subround(F, a2, b2, c2, d2, e2, buffer_[15], 5, k9);
+ Subround(F, e2, a2, b2, c2, d2, buffer_[10], 12, k9);
+ Subround(F, d2, e2, a2, b2, c2, buffer_[ 4], 9, k9);
+ Subround(F, c2, d2, e2, a2, b2, buffer_[ 1], 12, k9);
+ Subround(F, b2, c2, d2, e2, a2, buffer_[ 5], 5, k9);
+ Subround(F, a2, b2, c2, d2, e2, buffer_[ 8], 14, k9);
+ Subround(F, e2, a2, b2, c2, d2, buffer_[ 7], 6, k9);
+ Subround(F, d2, e2, a2, b2, c2, buffer_[ 6], 8, k9);
+ Subround(F, c2, d2, e2, a2, b2, buffer_[ 2], 13, k9);
+ Subround(F, b2, c2, d2, e2, a2, buffer_[13], 6, k9);
+ Subround(F, a2, b2, c2, d2, e2, buffer_[14], 5, k9);
+ Subround(F, e2, a2, b2, c2, d2, buffer_[ 0], 15, k9);
+ Subround(F, d2, e2, a2, b2, c2, buffer_[ 3], 13, k9);
+ Subround(F, c2, d2, e2, a2, b2, buffer_[ 9], 11, k9);
+ Subround(F, b2, c2, d2, e2, a2, buffer_[11], 11, k9);
+
+ c1 = digest_[1] + c1 + d2;
+ digest_[1] = digest_[2] + d1 + e2;
+ digest_[2] = digest_[3] + e1 + a2;
+ digest_[3] = digest_[4] + a1 + b2;
+ digest_[4] = digest_[0] + b1 + c2;
+ digest_[0] = c1;
+}
+
+
+#ifdef DO_RIPEMD_ASM
+
+/*
+ // F(x ^ y ^ z)
+ // place in esi
+#define ASMF(x, y, z) \
+ AS2( mov esi, x ) \
+ AS2( xor esi, y ) \
+ AS2( xor esi, z )
+
+
+ // G(z ^ (x & (y^z)))
+ // place in esi
+#define ASMG(x, y, z) \
+ AS2( mov esi, z ) \
+ AS2( xor esi, y ) \
+ AS2( and esi, x ) \
+ AS2( xor esi, z )
+
+
+ // H(z ^ (x | ~y))
+ // place in esi
+#define ASMH(x, y, z) \
+ AS2( mov esi, y ) \
+ AS1( not esi ) \
+ AS2( or esi, x ) \
+ AS2( xor esi, z )
+
+
+ // I(y ^ (z & (x^y)))
+ // place in esi
+#define ASMI(x, y, z) \
+ AS2( mov esi, y ) \
+ AS2( xor esi, x ) \
+ AS2( and esi, z ) \
+ AS2( xor esi, y )
+
+
+ // J(x ^ (y | ~z)))
+ // place in esi
+#define ASMJ(x, y, z) \
+ AS2( mov esi, z ) \
+ AS1( not esi ) \
+ AS2( or esi, y ) \
+ AS2( xor esi, x )
+
+
+// for 160 and 320
+// #define ASMSubround(f, a, b, c, d, e, i, s, k)
+// a += f(b, c, d) + data[i] + k;
+// a = rotlFixed((word32)a, s) + e;
+// c = rotlFixed((word32)c, 10U)
+
+#define ASMSubround(f, a, b, c, d, e, index, s, k) \
+ // a += f(b, c, d) + data[i] + k \
+ AS2( mov esp, [edi + index * 4] ) \
+ f(b, c, d) \
+ AS2( add esi, k ) \
+ AS2( add esi, esp ) \
+ AS2( add a, esi ) \
+ // a = rotlFixed((word32)a, s) + e \
+ AS2( rol a, s ) \
+ AS2( rol c, 10 ) \
+ // c = rotlFixed((word32)c, 10U) \
+ AS2( add a, e )
+*/
+
+
+// combine F into subround w/ setup
+// esi already has c, setup for next round when done
+// esp already has edi[index], setup for next round when done
+
+#define ASMSubroundF(a, b, c, d, e, index, s) \
+ /* a += (b ^ c ^ d) + data[i] + k */ \
+ AS2( xor esi, b ) \
+ AS2( add a, [edi + index * 4] ) \
+ AS2( xor esi, d ) \
+ AS2( add a, esi ) \
+ /* a = rotlFixed((word32)a, s) + e */ \
+ AS2( mov esi, b ) \
+ AS2( rol a, s ) \
+ /* c = rotlFixed((word32)c, 10U) */ \
+ AS2( rol c, 10 ) \
+ AS2( add a, e )
+
+
+// combine G into subround w/ setup
+// esi already has c, setup for next round when done
+// esp already has edi[index], setup for next round when done
+
+#define ASMSubroundG(a, b, c, d, e, index, s, k) \
+ /* a += (d ^ (b & (c^d))) + data[i] + k */ \
+ AS2( xor esi, d ) \
+ AS2( and esi, b ) \
+ AS2( add a, [edi + index * 4] ) \
+ AS2( xor esi, d ) \
+ AS2( lea a, [esi + a + k] ) \
+ /* a = rotlFixed((word32)a, s) + e */ \
+ AS2( mov esi, b ) \
+ AS2( rol a, s ) \
+ /* c = rotlFixed((word32)c, 10U) */ \
+ AS2( rol c, 10 ) \
+ AS2( add a, e )
+
+
+// combine H into subround w/ setup
+// esi already has c, setup for next round when done
+// esp already has edi[index], setup for next round when done
+
+#define ASMSubroundH(a, b, c, d, e, index, s, k) \
+ /* a += (d ^ (b | ~c)) + data[i] + k */ \
+ AS1( not esi ) \
+ AS2( or esi, b ) \
+ AS2( add a, [edi + index * 4] ) \
+ AS2( xor esi, d ) \
+ AS2( lea a, [esi + a + k] ) \
+ /* a = rotlFixed((word32)a, s) + e */ \
+ AS2( mov esi, b ) \
+ AS2( rol a, s ) \
+ /* c = rotlFixed((word32)c, 10U) */ \
+ AS2( rol c, 10 ) \
+ AS2( add a, e )
+
+
+// combine I into subround w/ setup
+// esi already has c, setup for next round when done
+// esp already has edi[index], setup for next round when done
+
+#define ASMSubroundI(a, b, c, d, e, index, s, k) \
+ /* a += (c ^ (d & (b^c))) + data[i] + k */ \
+ AS2( xor esi, b ) \
+ AS2( and esi, d ) \
+ AS2( add a, [edi + index * 4] ) \
+ AS2( xor esi, c ) \
+ AS2( lea a, [esi + a + k] ) \
+ /* a = rotlFixed((word32)a, s) + e */ \
+ AS2( mov esi, b ) \
+ AS2( rol a, s ) \
+ /* c = rotlFixed((word32)c, 10U) */ \
+ AS2( rol c, 10 ) \
+ AS2( add a, e )
+
+
+// combine J into subround w/ setup
+// esi already has d, setup for next round when done
+// esp already has edi[index], setup for next round when done
+
+#define ASMSubroundJ(a, b, c, d, e, index, s, k) \
+ /* a += (b ^ (c | ~d))) + data[i] + k */ \
+ AS1( not esi ) \
+ AS2( or esi, c ) \
+ /* c = rotlFixed((word32)c, 10U) */ \
+ AS2( add a, [edi + index * 4] ) \
+ AS2( xor esi, b ) \
+ AS2( rol c, 10 ) \
+ AS2( lea a, [esi + a + k] ) \
+ /* a = rotlFixed((word32)a, s) + e */ \
+ AS2( rol a, s ) \
+ AS2( mov esi, c ) \
+ AS2( add a, e )
+
+
+#ifdef _MSC_VER
+ __declspec(naked)
+#else
+ __attribute__ ((noinline))
+#endif
+void RIPEMD160::AsmTransform(const byte* data, word32 times)
+{
+#ifdef __GNUC__
+ #define AS1(x) #x ";"
+ #define AS2(x, y) #x ", " #y ";"
+
+ #define PROLOG() \
+ __asm__ __volatile__ \
+ ( \
+ ".intel_syntax noprefix;" \
+ "push ebx;" \
+ "push ebp;"
+ #define EPILOG() \
+ "pop ebp;" \
+ "pop ebx;" \
+ "emms;" \
+ ".att_syntax;" \
+ : \
+ : "c" (this), "D" (data), "d" (times) \
+ : "%esi", "%eax", "memory", "cc" \
+ );
+
+#else
+ #define AS1(x) __asm x
+ #define AS2(x, y) __asm x, y
+
+ #define PROLOG() \
+ AS1( push ebp ) \
+ AS2( mov ebp, esp ) \
+ AS2( movd mm3, edi ) \
+ AS2( movd mm4, ebx ) \
+ AS2( movd mm5, esi ) \
+ AS2( movd mm6, ebp ) \
+ AS2( mov edi, DWORD PTR [ebp + 8] ) \
+ AS2( mov edx, DWORD PTR [ebp + 12] )
+
+ #define EPILOG() \
+ AS2( movd ebp, mm6 ) \
+ AS2( movd esi, mm5 ) \
+ AS2( movd ebx, mm4 ) \
+ AS2( movd edi, mm3 ) \
+ AS2( mov esp, ebp ) \
+ AS1( pop ebp ) \
+ AS1( emms ) \
+ AS1( ret 8 )
+
+#endif
+
+ PROLOG()
+
+ #ifdef OLD_GCC_OFFSET
+ AS2( lea esi, [ecx + 20] ) // digest_[0]
+ #else
+ AS2( lea esi, [ecx + 16] ) // digest_[0]
+ #endif
+
+ AS2( sub esp, 24 ) // make room for tmp a1 - e1
+ AS2( movd mm1, esi ) // store digest_
+
+#ifdef _MSC_VER
+ AS1( loopStart: ) // loopStart
+#else
+ AS1( 0: ) // loopStart for some gas (need numeric for jump back
+#endif
+
+ AS2( movd mm2, edx ) // store times_
+
+ AS2( mov eax, [esi] ) // a1
+ AS2( mov ebx, [esi + 4] ) // b1
+ AS2( mov ecx, [esi + 8] ) // c1
+ AS2( mov edx, [esi + 12] ) // d1
+ AS2( mov ebp, [esi + 16] ) // e1
+
+ // setup
+ AS2( mov esi, ecx )
+
+ ASMSubroundF( eax, ebx, ecx, edx, ebp, 0, 11)
+ ASMSubroundF( ebp, eax, ebx, ecx, edx, 1, 14)
+ ASMSubroundF( edx, ebp, eax, ebx, ecx, 2, 15)
+ ASMSubroundF( ecx, edx, ebp, eax, ebx, 3, 12)
+ ASMSubroundF( ebx, ecx, edx, ebp, eax, 4, 5)
+ ASMSubroundF( eax, ebx, ecx, edx, ebp, 5, 8)
+ ASMSubroundF( ebp, eax, ebx, ecx, edx, 6, 7)
+ ASMSubroundF( edx, ebp, eax, ebx, ecx, 7, 9)
+ ASMSubroundF( ecx, edx, ebp, eax, ebx, 8, 11)
+ ASMSubroundF( ebx, ecx, edx, ebp, eax, 9, 13)
+ ASMSubroundF( eax, ebx, ecx, edx, ebp, 10, 14)
+ ASMSubroundF( ebp, eax, ebx, ecx, edx, 11, 15)
+ ASMSubroundF( edx, ebp, eax, ebx, ecx, 12, 6)
+ ASMSubroundF( ecx, edx, ebp, eax, ebx, 13, 7)
+ ASMSubroundF( ebx, ecx, edx, ebp, eax, 14, 9)
+ ASMSubroundF( eax, ebx, ecx, edx, ebp, 15, 8)
+
+ ASMSubroundG( ebp, eax, ebx, ecx, edx, 7, 7, k1)
+ ASMSubroundG( edx, ebp, eax, ebx, ecx, 4, 6, k1)
+ ASMSubroundG( ecx, edx, ebp, eax, ebx, 13, 8, k1)
+ ASMSubroundG( ebx, ecx, edx, ebp, eax, 1, 13, k1)
+ ASMSubroundG( eax, ebx, ecx, edx, ebp, 10, 11, k1)
+ ASMSubroundG( ebp, eax, ebx, ecx, edx, 6, 9, k1)
+ ASMSubroundG( edx, ebp, eax, ebx, ecx, 15, 7, k1)
+ ASMSubroundG( ecx, edx, ebp, eax, ebx, 3, 15, k1)
+ ASMSubroundG( ebx, ecx, edx, ebp, eax, 12, 7, k1)
+ ASMSubroundG( eax, ebx, ecx, edx, ebp, 0, 12, k1)
+ ASMSubroundG( ebp, eax, ebx, ecx, edx, 9, 15, k1)
+ ASMSubroundG( edx, ebp, eax, ebx, ecx, 5, 9, k1)
+ ASMSubroundG( ecx, edx, ebp, eax, ebx, 2, 11, k1)
+ ASMSubroundG( ebx, ecx, edx, ebp, eax, 14, 7, k1)
+ ASMSubroundG( eax, ebx, ecx, edx, ebp, 11, 13, k1)
+ ASMSubroundG( ebp, eax, ebx, ecx, edx, 8, 12, k1)
+
+ ASMSubroundH( edx, ebp, eax, ebx, ecx, 3, 11, k2)
+ ASMSubroundH( ecx, edx, ebp, eax, ebx, 10, 13, k2)
+ ASMSubroundH( ebx, ecx, edx, ebp, eax, 14, 6, k2)
+ ASMSubroundH( eax, ebx, ecx, edx, ebp, 4, 7, k2)
+ ASMSubroundH( ebp, eax, ebx, ecx, edx, 9, 14, k2)
+ ASMSubroundH( edx, ebp, eax, ebx, ecx, 15, 9, k2)
+ ASMSubroundH( ecx, edx, ebp, eax, ebx, 8, 13, k2)
+ ASMSubroundH( ebx, ecx, edx, ebp, eax, 1, 15, k2)
+ ASMSubroundH( eax, ebx, ecx, edx, ebp, 2, 14, k2)
+ ASMSubroundH( ebp, eax, ebx, ecx, edx, 7, 8, k2)
+ ASMSubroundH( edx, ebp, eax, ebx, ecx, 0, 13, k2)
+ ASMSubroundH( ecx, edx, ebp, eax, ebx, 6, 6, k2)
+ ASMSubroundH( ebx, ecx, edx, ebp, eax, 13, 5, k2)
+ ASMSubroundH( eax, ebx, ecx, edx, ebp, 11, 12, k2)
+ ASMSubroundH( ebp, eax, ebx, ecx, edx, 5, 7, k2)
+ ASMSubroundH( edx, ebp, eax, ebx, ecx, 12, 5, k2)
+
+ ASMSubroundI( ecx, edx, ebp, eax, ebx, 1, 11, k3)
+ ASMSubroundI( ebx, ecx, edx, ebp, eax, 9, 12, k3)
+ ASMSubroundI( eax, ebx, ecx, edx, ebp, 11, 14, k3)
+ ASMSubroundI( ebp, eax, ebx, ecx, edx, 10, 15, k3)
+ ASMSubroundI( edx, ebp, eax, ebx, ecx, 0, 14, k3)
+ ASMSubroundI( ecx, edx, ebp, eax, ebx, 8, 15, k3)
+ ASMSubroundI( ebx, ecx, edx, ebp, eax, 12, 9, k3)
+ ASMSubroundI( eax, ebx, ecx, edx, ebp, 4, 8, k3)
+ ASMSubroundI( ebp, eax, ebx, ecx, edx, 13, 9, k3)
+ ASMSubroundI( edx, ebp, eax, ebx, ecx, 3, 14, k3)
+ ASMSubroundI( ecx, edx, ebp, eax, ebx, 7, 5, k3)
+ ASMSubroundI( ebx, ecx, edx, ebp, eax, 15, 6, k3)
+ ASMSubroundI( eax, ebx, ecx, edx, ebp, 14, 8, k3)
+ ASMSubroundI( ebp, eax, ebx, ecx, edx, 5, 6, k3)
+ ASMSubroundI( edx, ebp, eax, ebx, ecx, 6, 5, k3)
+ ASMSubroundI( ecx, edx, ebp, eax, ebx, 2, 12, k3)
+
+ // setup
+ AS2( mov esi, ebp )
+
+ ASMSubroundJ( ebx, ecx, edx, ebp, eax, 4, 9, k4)
+ ASMSubroundJ( eax, ebx, ecx, edx, ebp, 0, 15, k4)
+ ASMSubroundJ( ebp, eax, ebx, ecx, edx, 5, 5, k4)
+ ASMSubroundJ( edx, ebp, eax, ebx, ecx, 9, 11, k4)
+ ASMSubroundJ( ecx, edx, ebp, eax, ebx, 7, 6, k4)
+ ASMSubroundJ( ebx, ecx, edx, ebp, eax, 12, 8, k4)
+ ASMSubroundJ( eax, ebx, ecx, edx, ebp, 2, 13, k4)
+ ASMSubroundJ( ebp, eax, ebx, ecx, edx, 10, 12, k4)
+ ASMSubroundJ( edx, ebp, eax, ebx, ecx, 14, 5, k4)
+ ASMSubroundJ( ecx, edx, ebp, eax, ebx, 1, 12, k4)
+ ASMSubroundJ( ebx, ecx, edx, ebp, eax, 3, 13, k4)
+ ASMSubroundJ( eax, ebx, ecx, edx, ebp, 8, 14, k4)
+ ASMSubroundJ( ebp, eax, ebx, ecx, edx, 11, 11, k4)
+ ASMSubroundJ( edx, ebp, eax, ebx, ecx, 6, 8, k4)
+ ASMSubroundJ( ecx, edx, ebp, eax, ebx, 15, 5, k4)
+ ASMSubroundJ( ebx, ecx, edx, ebp, eax, 13, 6, k4)
+
+ // store a1 - e1 on stack
+ AS2( movd esi, mm1 ) // digest_
+
+ AS2( mov [esp], eax )
+ AS2( mov [esp + 4], ebx )
+ AS2( mov [esp + 8], ecx )
+ AS2( mov [esp + 12], edx )
+ AS2( mov [esp + 16], ebp )
+
+ AS2( mov eax, [esi] ) // a2
+ AS2( mov ebx, [esi + 4] ) // b2
+ AS2( mov ecx, [esi + 8] ) // c2
+ AS2( mov edx, [esi + 12] ) // d2
+ AS2( mov ebp, [esi + 16] ) // e2
+
+
+ // setup
+ AS2( mov esi, edx )
+
+ ASMSubroundJ( eax, ebx, ecx, edx, ebp, 5, 8, k5)
+ ASMSubroundJ( ebp, eax, ebx, ecx, edx, 14, 9, k5)
+ ASMSubroundJ( edx, ebp, eax, ebx, ecx, 7, 9, k5)
+ ASMSubroundJ( ecx, edx, ebp, eax, ebx, 0, 11, k5)
+ ASMSubroundJ( ebx, ecx, edx, ebp, eax, 9, 13, k5)
+ ASMSubroundJ( eax, ebx, ecx, edx, ebp, 2, 15, k5)
+ ASMSubroundJ( ebp, eax, ebx, ecx, edx, 11, 15, k5)
+ ASMSubroundJ( edx, ebp, eax, ebx, ecx, 4, 5, k5)
+ ASMSubroundJ( ecx, edx, ebp, eax, ebx, 13, 7, k5)
+ ASMSubroundJ( ebx, ecx, edx, ebp, eax, 6, 7, k5)
+ ASMSubroundJ( eax, ebx, ecx, edx, ebp, 15, 8, k5)
+ ASMSubroundJ( ebp, eax, ebx, ecx, edx, 8, 11, k5)
+ ASMSubroundJ( edx, ebp, eax, ebx, ecx, 1, 14, k5)
+ ASMSubroundJ( ecx, edx, ebp, eax, ebx, 10, 14, k5)
+ ASMSubroundJ( ebx, ecx, edx, ebp, eax, 3, 12, k5)
+ ASMSubroundJ( eax, ebx, ecx, edx, ebp, 12, 6, k5)
+
+ // setup
+ AS2( mov esi, ebx )
+
+ ASMSubroundI( ebp, eax, ebx, ecx, edx, 6, 9, k6)
+ ASMSubroundI( edx, ebp, eax, ebx, ecx, 11, 13, k6)
+ ASMSubroundI( ecx, edx, ebp, eax, ebx, 3, 15, k6)
+ ASMSubroundI( ebx, ecx, edx, ebp, eax, 7, 7, k6)
+ ASMSubroundI( eax, ebx, ecx, edx, ebp, 0, 12, k6)
+ ASMSubroundI( ebp, eax, ebx, ecx, edx, 13, 8, k6)
+ ASMSubroundI( edx, ebp, eax, ebx, ecx, 5, 9, k6)
+ ASMSubroundI( ecx, edx, ebp, eax, ebx, 10, 11, k6)
+ ASMSubroundI( ebx, ecx, edx, ebp, eax, 14, 7, k6)
+ ASMSubroundI( eax, ebx, ecx, edx, ebp, 15, 7, k6)
+ ASMSubroundI( ebp, eax, ebx, ecx, edx, 8, 12, k6)
+ ASMSubroundI( edx, ebp, eax, ebx, ecx, 12, 7, k6)
+ ASMSubroundI( ecx, edx, ebp, eax, ebx, 4, 6, k6)
+ ASMSubroundI( ebx, ecx, edx, ebp, eax, 9, 15, k6)
+ ASMSubroundI( eax, ebx, ecx, edx, ebp, 1, 13, k6)
+ ASMSubroundI( ebp, eax, ebx, ecx, edx, 2, 11, k6)
+
+ ASMSubroundH( edx, ebp, eax, ebx, ecx, 15, 9, k7)
+ ASMSubroundH( ecx, edx, ebp, eax, ebx, 5, 7, k7)
+ ASMSubroundH( ebx, ecx, edx, ebp, eax, 1, 15, k7)
+ ASMSubroundH( eax, ebx, ecx, edx, ebp, 3, 11, k7)
+ ASMSubroundH( ebp, eax, ebx, ecx, edx, 7, 8, k7)
+ ASMSubroundH( edx, ebp, eax, ebx, ecx, 14, 6, k7)
+ ASMSubroundH( ecx, edx, ebp, eax, ebx, 6, 6, k7)
+ ASMSubroundH( ebx, ecx, edx, ebp, eax, 9, 14, k7)
+ ASMSubroundH( eax, ebx, ecx, edx, ebp, 11, 12, k7)
+ ASMSubroundH( ebp, eax, ebx, ecx, edx, 8, 13, k7)
+ ASMSubroundH( edx, ebp, eax, ebx, ecx, 12, 5, k7)
+ ASMSubroundH( ecx, edx, ebp, eax, ebx, 2, 14, k7)
+ ASMSubroundH( ebx, ecx, edx, ebp, eax, 10, 13, k7)
+ ASMSubroundH( eax, ebx, ecx, edx, ebp, 0, 13, k7)
+ ASMSubroundH( ebp, eax, ebx, ecx, edx, 4, 7, k7)
+ ASMSubroundH( edx, ebp, eax, ebx, ecx, 13, 5, k7)
+
+ ASMSubroundG( ecx, edx, ebp, eax, ebx, 8, 15, k8)
+ ASMSubroundG( ebx, ecx, edx, ebp, eax, 6, 5, k8)
+ ASMSubroundG( eax, ebx, ecx, edx, ebp, 4, 8, k8)
+ ASMSubroundG( ebp, eax, ebx, ecx, edx, 1, 11, k8)
+ ASMSubroundG( edx, ebp, eax, ebx, ecx, 3, 14, k8)
+ ASMSubroundG( ecx, edx, ebp, eax, ebx, 11, 14, k8)
+ ASMSubroundG( ebx, ecx, edx, ebp, eax, 15, 6, k8)
+ ASMSubroundG( eax, ebx, ecx, edx, ebp, 0, 14, k8)
+ ASMSubroundG( ebp, eax, ebx, ecx, edx, 5, 6, k8)
+ ASMSubroundG( edx, ebp, eax, ebx, ecx, 12, 9, k8)
+ ASMSubroundG( ecx, edx, ebp, eax, ebx, 2, 12, k8)
+ ASMSubroundG( ebx, ecx, edx, ebp, eax, 13, 9, k8)
+ ASMSubroundG( eax, ebx, ecx, edx, ebp, 9, 12, k8)
+ ASMSubroundG( ebp, eax, ebx, ecx, edx, 7, 5, k8)
+ ASMSubroundG( edx, ebp, eax, ebx, ecx, 10, 15, k8)
+ ASMSubroundG( ecx, edx, ebp, eax, ebx, 14, 8, k8)
+
+ ASMSubroundF( ebx, ecx, edx, ebp, eax, 12, 8)
+ ASMSubroundF( eax, ebx, ecx, edx, ebp, 15, 5)
+ ASMSubroundF( ebp, eax, ebx, ecx, edx, 10, 12)
+ ASMSubroundF( edx, ebp, eax, ebx, ecx, 4, 9)
+ ASMSubroundF( ecx, edx, ebp, eax, ebx, 1, 12)
+ ASMSubroundF( ebx, ecx, edx, ebp, eax, 5, 5)
+ ASMSubroundF( eax, ebx, ecx, edx, ebp, 8, 14)
+ ASMSubroundF( ebp, eax, ebx, ecx, edx, 7, 6)
+ ASMSubroundF( edx, ebp, eax, ebx, ecx, 6, 8)
+ ASMSubroundF( ecx, edx, ebp, eax, ebx, 2, 13)
+ ASMSubroundF( ebx, ecx, edx, ebp, eax, 13, 6)
+ ASMSubroundF( eax, ebx, ecx, edx, ebp, 14, 5)
+ ASMSubroundF( ebp, eax, ebx, ecx, edx, 0, 15)
+ ASMSubroundF( edx, ebp, eax, ebx, ecx, 3, 13)
+ ASMSubroundF( ecx, edx, ebp, eax, ebx, 9, 11)
+ ASMSubroundF( ebx, ecx, edx, ebp, eax, 11, 11)
+
+ // advance data and store for next round
+ AS2( add edi, 64 )
+ AS2( movd esi, mm1 ) // digest_
+ AS2( movd mm0, edi ) // store
+
+ // now edi as tmp
+
+ // c1 = digest_[1] + c1 + d2;
+ AS2( add [esp + 8], edx ) // + d2
+ AS2( mov edi, [esi + 4] ) // digest_[1]
+ AS2( add [esp + 8], edi )
+
+ // digest_[1] = digest_[2] + d1 + e2;
+ AS2( mov [esi + 4], ebp ) // e2
+ AS2( mov edi, [esp + 12] ) // d1
+ AS2( add edi, [esi + 8] ) // digest_[2]
+ AS2( add [esi + 4], edi )
+
+ // digest_[2] = digest_[3] + e1 + a2;
+ AS2( mov [esi + 8], eax ) // a2
+ AS2( mov edi, [esp + 16] ) // e1
+ AS2( add edi, [esi + 12] ) // digest_[3]
+ AS2( add [esi + 8], edi )
+
+ // digest_[3] = digest_[4] + a1 + b2;
+ AS2( mov [esi + 12], ebx ) // b2
+ AS2( mov edi, [esp] ) // a1
+ AS2( add edi, [esi + 16] ) // digest_[4]
+ AS2( add [esi + 12], edi )
+
+ // digest_[4] = digest_[0] + b1 + c2;
+ AS2( mov [esi + 16], ecx ) // c2
+ AS2( mov edi, [esp + 4] ) // b1
+ AS2( add edi, [esi] ) // digest_[0]
+ AS2( add [esi + 16], edi )
+
+ // digest_[0] = c1;
+ AS2( mov edi, [esp + 8] ) // c1
+ AS2( mov [esi], edi )
+
+ // setup for loop back
+ AS2( movd edx, mm2 ) // times
+ AS2( movd edi, mm0 ) // data, already advanced
+ AS1( dec edx )
+#ifdef _MSC_VER
+ AS1( jnz loopStart ) // loopStart
+#else
+ AS1( jnz 0b ) // loopStart
+#endif
+
+ // inline adjust
+ AS2( add esp, 24 ) // fix room on stack
+
+ EPILOG()
+}
+
+
+#endif // DO_RIPEMD_ASM
+
+
+} // namespace TaoCrypt