22 files changed, 13356 insertions, 0 deletions
diff --git a/mysql/extra/yassl/taocrypt/src/aes.cpp b/mysql/extra/yassl/taocrypt/src/aes.cpp
new file mode 100644
index 0000000..3fcf80a
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/aes.cpp
@@ -0,0 +1,1885 @@
+/*
+   Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+/* C++ based on Wei Dai's aes.cpp from CryptoPP */
+/* x86 asm original */
+
+#if defined(TAOCRYPT_KERNEL_MODE)
+    #define DO_TAOCRYPT_KERNEL_MODE
+#endif                                  // only some modules now support this
+
+#include "runtime.hpp"
+#include "aes.hpp"
+
+
+namespace TaoCrypt {
+
+
+#if defined(DO_AES_ASM)
+
+// ia32 optimized version
+void AES::Process(byte* out, const byte* in, word32 sz)
+{
+    if (!isMMX) {
+        Mode_BASE::Process(out, in, sz);
+        return;
+    }
+
+    word32 blocks = sz / BLOCK_SIZE;
+
+    if (mode_ == ECB)
+        while (blocks--) {
+            if (dir_ == ENCRYPTION)
+                AsmEncrypt(in, out, (void*)Te0);
+            else
+                AsmDecrypt(in, out, (void*)Td0);               
+            out += BLOCK_SIZE;
+            in  += BLOCK_SIZE;
+        }
+    else if (mode_ == CBC) {
+        if (dir_ == ENCRYPTION) {
+            while (blocks--) {
+                r_[0] ^= *(word32*)in;
+                r_[1] ^= *(word32*)(in +  4);
+                r_[2] ^= *(word32*)(in +  8);
+                r_[3] ^= *(word32*)(in + 12);
+
+                AsmEncrypt((byte*)r_, (byte*)r_, (void*)Te0);
+
+                memcpy(out, r_, BLOCK_SIZE);
+                out += BLOCK_SIZE;
+                in  += BLOCK_SIZE;
+            }
+        }
+        else {
+            while (blocks--) {
+                AsmDecrypt(in, out, (void*)Td0);
+                
+                *(word32*)out        ^= r_[0];
+                *(word32*)(out +  4) ^= r_[1];
+                *(word32*)(out +  8) ^= r_[2];
+                *(word32*)(out + 12) ^= r_[3];
+
+                memcpy(r_, in, BLOCK_SIZE);
+                out += BLOCK_SIZE;
+                in  += BLOCK_SIZE;
+            }
+        }
+    }
+}
+
+#endif // DO_AES_ASM
+
+
+void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/)
+{
+    if (keylen <= 16)
+        keylen = 16;
+    else if (keylen >= 32)
+        keylen = 32;
+    else if (keylen != 24)
+        keylen = 24;
+    
+    rounds_ = keylen/4 + 6;
+
+    word32 temp, *rk = key_;
+    unsigned int i=0;
+
+    GetUserKey(BigEndianOrder, rk, keylen/4, userKey, keylen);
+
+    switch(keylen)
+    {
+    case 16:
+        while (true)
+        {
+            temp  = rk[3];
+            rk[4] = rk[0] ^
+                (Te2[GETBYTE(temp, 2)] & 0xff000000) ^
+                (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^
+                (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^
+                (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^
+                rcon_[i];
+            rk[5] = rk[1] ^ rk[4];
+            rk[6] = rk[2] ^ rk[5];
+            rk[7] = rk[3] ^ rk[6];
+            if (++i == 10)
+                break;
+            rk += 4;
+        }
+        break;
+
+    case 24:
+        while (true)    // for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack
+        {
+            temp = rk[ 5];
+            rk[ 6] = rk[ 0] ^
+                (Te2[GETBYTE(temp, 2)] & 0xff000000) ^
+                (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^
+                (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^
+                (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^
+                rcon_[i];
+            rk[ 7] = rk[ 1] ^ rk[ 6];
+            rk[ 8] = rk[ 2] ^ rk[ 7];
+            rk[ 9] = rk[ 3] ^ rk[ 8];
+            if (++i == 8)
+                break;
+            rk[10] = rk[ 4] ^ rk[ 9];
+            rk[11] = rk[ 5] ^ rk[10];
+            rk += 6;
+        }
+        break;
+
+    case 32:
+        while (true)
+        {
+            temp = rk[ 7];
+            rk[ 8] = rk[ 0] ^
+                (Te2[GETBYTE(temp, 2)] & 0xff000000) ^
+                (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^
+                (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^
+                (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^
+                rcon_[i];
+            rk[ 9] = rk[ 1] ^ rk[ 8];
+            rk[10] = rk[ 2] ^ rk[ 9];
+            rk[11] = rk[ 3] ^ rk[10];
+            if (++i == 7)
+                break;
+            temp = rk[11];
+            rk[12] = rk[ 4] ^
+                (Te2[GETBYTE(temp, 3)] & 0xff000000) ^
+                (Te3[GETBYTE(temp, 2)] & 0x00ff0000) ^
+                (Te0[GETBYTE(temp, 1)] & 0x0000ff00) ^
+                (Te1[GETBYTE(temp, 0)] & 0x000000ff);
+            rk[13] = rk[ 5] ^ rk[12];
+            rk[14] = rk[ 6] ^ rk[13];
+            rk[15] = rk[ 7] ^ rk[14];
+
+            rk += 8;
+        }
+        break;
+    }
+
+    if (dir_ == DECRYPTION)
+    {
+        unsigned int i, j;
+        rk = key_;
+
+        /* invert the order of the round keys: */
+        for (i = 0, j = 4*rounds_; i < j; i += 4, j -= 4) {
+            temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
+            temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
+            temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
+            temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
+        }
+        // apply the inverse MixColumn transform to all round keys but the
+        // first and the last:
+        for (i = 1; i < rounds_; i++) {
+            rk += 4;
+            rk[0] =
+                Td0[Te1[GETBYTE(rk[0], 3)] & 0xff] ^
+                Td1[Te1[GETBYTE(rk[0], 2)] & 0xff] ^
+                Td2[Te1[GETBYTE(rk[0], 1)] & 0xff] ^
+                Td3[Te1[GETBYTE(rk[0], 0)] & 0xff];
+            rk[1] =
+                Td0[Te1[GETBYTE(rk[1], 3)] & 0xff] ^
+                Td1[Te1[GETBYTE(rk[1], 2)] & 0xff] ^
+                Td2[Te1[GETBYTE(rk[1], 1)] & 0xff] ^
+                Td3[Te1[GETBYTE(rk[1], 0)] & 0xff];
+            rk[2] =
+                Td0[Te1[GETBYTE(rk[2], 3)] & 0xff] ^
+                Td1[Te1[GETBYTE(rk[2], 2)] & 0xff] ^
+                Td2[Te1[GETBYTE(rk[2], 1)] & 0xff] ^
+                Td3[Te1[GETBYTE(rk[2], 0)] & 0xff];
+            rk[3] =
+                Td0[Te1[GETBYTE(rk[3], 3)] & 0xff] ^
+                Td1[Te1[GETBYTE(rk[3], 2)] & 0xff] ^
+                Td2[Te1[GETBYTE(rk[3], 1)] & 0xff] ^
+                Td3[Te1[GETBYTE(rk[3], 0)] & 0xff];
+        }
+    }
+}
+
+
+void AES::ProcessAndXorBlock(const byte* in, const byte* xOr, byte* out) const
+{
+    if (dir_ == ENCRYPTION)
+        encrypt(in, xOr, out);
+    else
+        decrypt(in, xOr, out);
+}
+
+
+typedef BlockGetAndPut<word32, BigEndian> gpBlock;
+
+	
+void AES::encrypt(const byte* inBlock, const byte* xorBlock,
+                  byte* outBlock) const
+{
+    word32 s0, s1, s2, s3;
+    word32 t0, t1, t2, t3;
+
+    const word32 *rk = key_;
+    /*
+     * map byte array block to cipher state
+     * and add initial round key:
+     */
+    gpBlock::Get(inBlock)(s0)(s1)(s2)(s3);
+    s0 ^= rk[0];
+    s1 ^= rk[1];
+    s2 ^= rk[2];
+    s3 ^= rk[3];
+   
+    s0 |= PreFetchTe();
+    /*
+     * Nr - 1 full rounds:
+     */
+
+    unsigned int r = rounds_ >> 1;
+    for (;;) {
+        t0 =
+            Te0[GETBYTE(s0, 3)] ^
+            Te1[GETBYTE(s1, 2)]  ^
+            Te2[GETBYTE(s2, 1)]  ^
+            Te3[GETBYTE(s3, 0)]  ^
+            rk[4];
+        t1 =
+            Te0[GETBYTE(s1, 3)] ^
+            Te1[GETBYTE(s2, 2)]  ^
+            Te2[GETBYTE(s3, 1)]  ^
+            Te3[GETBYTE(s0, 0)]  ^
+            rk[5];
+        t2 =
+            Te0[GETBYTE(s2, 3)] ^
+            Te1[GETBYTE(s3, 2)]  ^
+            Te2[GETBYTE(s0, 1)]  ^
+            Te3[GETBYTE(s1, 0)]  ^
+            rk[6];
+        t3 =
+            Te0[GETBYTE(s3, 3)] ^
+            Te1[GETBYTE(s0, 2)]  ^
+            Te2[GETBYTE(s1, 1)]  ^
+            Te3[GETBYTE(s2, 0)]  ^
+            rk[7];
+
+        rk += 8;
+        if (--r == 0) {
+            break;
+        }
+        
+        s0 =
+            Te0[GETBYTE(t0, 3)] ^
+            Te1[GETBYTE(t1, 2)] ^
+            Te2[GETBYTE(t2, 1)] ^
+            Te3[GETBYTE(t3, 0)] ^
+            rk[0];
+        s1 =
+            Te0[GETBYTE(t1, 3)] ^
+            Te1[GETBYTE(t2, 2)] ^
+            Te2[GETBYTE(t3, 1)] ^
+            Te3[GETBYTE(t0, 0)] ^
+            rk[1];
+        s2 =
+            Te0[GETBYTE(t2, 3)] ^
+            Te1[GETBYTE(t3, 2)] ^
+            Te2[GETBYTE(t0, 1)] ^
+            Te3[GETBYTE(t1, 0)] ^
+            rk[2];
+        s3 =
+            Te0[GETBYTE(t3, 3)] ^
+            Te1[GETBYTE(t0, 2)] ^
+            Te2[GETBYTE(t1, 1)] ^
+            Te3[GETBYTE(t2, 0)] ^
+            rk[3];
+    }
+
+    /*
+     * apply last round and
+     * map cipher state to byte array block:
+     */
+
+    s0 =
+        (Te2[GETBYTE(t0, 3)] & 0xff000000) ^
+        (Te3[GETBYTE(t1, 2)] & 0x00ff0000) ^
+        (Te0[GETBYTE(t2, 1)] & 0x0000ff00) ^
+        (Te1[GETBYTE(t3, 0)] & 0x000000ff) ^
+        rk[0];
+    s1 =
+        (Te2[GETBYTE(t1, 3)] & 0xff000000) ^
+        (Te3[GETBYTE(t2, 2)] & 0x00ff0000) ^
+        (Te0[GETBYTE(t3, 1)] & 0x0000ff00) ^
+        (Te1[GETBYTE(t0, 0)] & 0x000000ff) ^
+        rk[1];
+    s2 =
+        (Te2[GETBYTE(t2, 3)] & 0xff000000) ^
+        (Te3[GETBYTE(t3, 2)] & 0x00ff0000) ^
+        (Te0[GETBYTE(t0, 1)] & 0x0000ff00) ^
+        (Te1[GETBYTE(t1, 0)] & 0x000000ff) ^
+        rk[2];
+    s3 =
+        (Te2[GETBYTE(t3, 3)] & 0xff000000) ^
+        (Te3[GETBYTE(t0, 2)] & 0x00ff0000) ^
+        (Te0[GETBYTE(t1, 1)] & 0x0000ff00) ^
+        (Te1[GETBYTE(t2, 0)] & 0x000000ff) ^
+        rk[3];
+
+
+    gpBlock::Put(xorBlock, outBlock)(s0)(s1)(s2)(s3);
+}
+
+
+void AES::decrypt(const byte* inBlock, const byte* xorBlock,
+                  byte* outBlock) const
+{
+    word32 s0, s1, s2, s3;
+    word32 t0, t1, t2, t3;
+    const word32* rk = key_;
+
+    /*
+     * map byte array block to cipher state
+     * and add initial round key:
+     */
+    gpBlock::Get(inBlock)(s0)(s1)(s2)(s3);
+    s0 ^= rk[0];
+    s1 ^= rk[1];
+    s2 ^= rk[2];
+    s3 ^= rk[3];
+
+    s0 |= PreFetchTd();
+
+    /*
+     * Nr - 1 full rounds:
+     */
+
+    unsigned int r = rounds_ >> 1;
+    for (;;) {
+        t0 =
+            Td0[GETBYTE(s0, 3)] ^
+            Td1[GETBYTE(s3, 2)] ^
+            Td2[GETBYTE(s2, 1)] ^
+            Td3[GETBYTE(s1, 0)] ^
+            rk[4];
+        t1 =
+            Td0[GETBYTE(s1, 3)] ^
+            Td1[GETBYTE(s0, 2)] ^
+            Td2[GETBYTE(s3, 1)] ^
+            Td3[GETBYTE(s2, 0)] ^
+            rk[5];
+        t2 =
+            Td0[GETBYTE(s2, 3)] ^
+            Td1[GETBYTE(s1, 2)] ^
+            Td2[GETBYTE(s0, 1)] ^
+            Td3[GETBYTE(s3, 0)] ^
+            rk[6];
+        t3 =
+            Td0[GETBYTE(s3, 3)] ^
+            Td1[GETBYTE(s2, 2)] ^
+            Td2[GETBYTE(s1, 1)] ^
+            Td3[GETBYTE(s0, 0)] ^
+            rk[7];
+
+        rk += 8;
+        if (--r == 0) {
+            break;
+        }
+
+        s0 =
+            Td0[GETBYTE(t0, 3)] ^
+            Td1[GETBYTE(t3, 2)] ^
+            Td2[GETBYTE(t2, 1)] ^
+            Td3[GETBYTE(t1, 0)] ^
+            rk[0];
+        s1 =
+            Td0[GETBYTE(t1, 3)] ^
+            Td1[GETBYTE(t0, 2)] ^
+            Td2[GETBYTE(t3, 1)] ^
+            Td3[GETBYTE(t2, 0)] ^
+            rk[1];
+        s2 =
+            Td0[GETBYTE(t2, 3)] ^
+            Td1[GETBYTE(t1, 2)] ^
+            Td2[GETBYTE(t0, 1)] ^
+            Td3[GETBYTE(t3, 0)] ^
+            rk[2];
+        s3 =
+            Td0[GETBYTE(t3, 3)] ^
+            Td1[GETBYTE(t2, 2)] ^
+            Td2[GETBYTE(t1, 1)] ^
+            Td3[GETBYTE(t0, 0)] ^
+            rk[3];
+    }
+    /*
+     * apply last round and
+     * map cipher state to byte array block:
+     */
+
+    t0 |= PreFetchCTd4();
+
+    s0 =
+        ((word32)CTd4[GETBYTE(t0, 3)] << 24) ^
+        ((word32)CTd4[GETBYTE(t3, 2)] << 16) ^
+        ((word32)CTd4[GETBYTE(t2, 1)] <<  8) ^
+        ((word32)CTd4[GETBYTE(t1, 0)]) ^
+        rk[0];
+    s1 =
+        ((word32)CTd4[GETBYTE(t1, 3)]  << 24) ^
+        ((word32)CTd4[GETBYTE(t0, 2)]  << 16) ^
+        ((word32)CTd4[GETBYTE(t3, 1)]  <<  8) ^
+        ((word32)CTd4[GETBYTE(t2, 0)]) ^
+        rk[1];
+    s2 =
+        ((word32)CTd4[GETBYTE(t2, 3)] << 24  ) ^
+        ((word32)CTd4[GETBYTE(t1, 2)] << 16 ) ^
+        ((word32)CTd4[GETBYTE(t0, 1)] <<  8 ) ^
+        ((word32)CTd4[GETBYTE(t3, 0)]) ^
+        rk[2];
+    s3 =
+        ((word32)CTd4[GETBYTE(t3, 3)] << 24) ^
+        ((word32)CTd4[GETBYTE(t2, 2)] << 16) ^
+        ((word32)CTd4[GETBYTE(t1, 1)] <<  8) ^
+        ((word32)CTd4[GETBYTE(t0, 0)]) ^
+        rk[3];
+
+    gpBlock::Put(xorBlock, outBlock)(s0)(s1)(s2)(s3);
+}
+
+
+#if defined(DO_AES_ASM)
+    #ifdef __GNUC__
+        #define AS1(x)    #x ";"
+        #define AS2(x, y) #x ", " #y ";"
+
+        #define PROLOG()  \
+        __asm__ __volatile__ \
+        ( \
+            ".intel_syntax noprefix;" \
+            "push ebx;" \
+            "push ebp;" \
+            "movd mm7, ebp;" \
+            "movd mm4, eax;" \
+            "mov  ebp, edx;"  \
+            "sub  esp, 4;" 
+        #define EPILOG()  \
+            "add esp, 4;" \
+            "pop ebp;" \
+            "pop ebx;" \
+       	    "emms;" \
+       	    ".att_syntax;" \
+                : \
+                : "c" (this), "S" (inBlock), "d" (boxes), "a" (outBlock) \
+                : "%edi", "memory", "cc" \
+        );
+
+    #else
+        #define AS1(x)    __asm x
+        #define AS2(x, y) __asm x, y
+
+        #define PROLOG() \
+            AS1(    push  ebp                           )   \
+            AS2(    mov   ebp, esp                      )   \
+            AS2(    movd  mm3, edi                      )   \
+            AS2(    movd  mm4, ebx                      )   \
+            AS2(    sub   esp, 4                        )   \
+            AS2(    movd  mm7, ebp                      )   \
+            AS2(    mov   [ebp - 4], esi                )   \
+            AS2(    mov   esi, DWORD PTR [ebp +  8]     )   \
+            AS2(    mov   ebp, DWORD PTR [ebp + 16]     )
+
+        // ebp is restored at end
+        #define EPILOG()  \
+            AS2(    mov   esi, [ebp - 4]                )   \
+            AS2(    movd  ebx, mm4                      )   \
+            AS2(    movd  edi, mm3                      )   \
+            AS2(    mov   esp, ebp                      )   \
+            AS1(    pop   ebp                           )   \
+            AS1(    emms                                )   \
+            AS1(    ret   12                            )
+            
+            
+    #endif
+
+
+#ifdef _MSC_VER
+    __declspec(naked) 
+#else
+    __attribute__ ((noinline))
+#endif
+void AES::AsmEncrypt(const byte* inBlock, byte* outBlock, void* boxes) const
+{
+
+    PROLOG()
+
+    #ifdef OLD_GCC_OFFSET
+        AS2(    mov   edx, DWORD PTR [ecx + 60]     )   // rounds
+        AS2(    lea   edi, [ecx + 64]               )   // rk
+    #else
+        AS2(    mov   edx, DWORD PTR [ecx + 56]     )   // rounds
+        AS2(    lea   edi, [ecx + 60]               )   // rk
+    #endif
+
+    AS1(    dec   edx                           )
+    AS2(    movd  mm6, edi                      )   // save rk
+    AS2(    movd  mm5, edx                      )   // save rounds
+  
+    AS2(    mov   eax, DWORD PTR [esi]                                  )
+    AS2(    mov   ebx, DWORD PTR [esi + 4]                              )
+    AS2(    mov   ecx, DWORD PTR [esi + 8]                              )
+    AS2(    mov   edx, DWORD PTR [esi + 12]                             )
+
+    AS1(    bswap eax                                                   )
+    AS1(    bswap ebx                                                   )
+    AS1(    bswap ecx                                                   )
+    AS1(    bswap edx                                                   )
+
+    AS2(    xor   eax, DWORD PTR [edi]               )   // s0
+    AS2(    xor   ebx, DWORD PTR [edi +  4]          )   // s1
+    AS2(    xor   ecx, DWORD PTR [edi +  8]          )   // s2
+    AS2(    xor   edx, DWORD PTR [edi + 12]          )   // s3
+
+#ifdef _MSC_VER
+    AS1( loop1: )  // loop1
+#else
+    AS1(1:  )      // loop1
+#endif
+            /* Put0 (mm0) =  
+                Te0[get0,rs 24] ^
+                Te1[get1,rs 16] ^
+                Te2[get2,rs  8] ^
+                Te3[get3,rs  0]
+            */
+       
+    AS2(    mov   esi, eax                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + esi*4]                          )
+                                                    
+    AS2(    mov   edi, ebx                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   esi, DWORD PTR [ebp + 1024 + edi*4]                   )
+
+    AS2(    movzx edi, ch                                               )
+    AS2(    xor   esi, DWORD PTR [ebp + 2048 + edi*4]                   )
+
+    AS2(    movzx edi, dl                                               )
+    AS2(    xor   esi, DWORD PTR [ebp + 3072 + edi*4]                   )
+
+    AS2(    movd  mm0, esi                                              )
+
+             /* Put1 (mm1) =  
+                Te0[get1,rs 24] ^
+                Te1[get2,rs 16] ^
+                Te2[get3,rs  8] ^
+                Te3[get0,rs  0]
+            */
+
+    AS2(    mov   esi, ebx                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + esi*4]                          )
+
+    AS2(    mov   edi, ecx                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   esi, DWORD PTR [ebp + 1024 + edi*4]                   )
+
+    AS2(    movzx edi, dh                                               )
+    AS2(    xor   esi, DWORD PTR [ebp + 2048 + edi*4]                   )
+
+    AS2(    movzx edi, al                                               )
+    AS2(    xor   esi, DWORD PTR [ebp + 3072 + edi*4]                   )
+
+    AS2(    movd  mm1, esi                                              )
+
+
+             /* Put2 (mm2) =  
+                Te0[get2,rs 24] ^
+                Te1[get3,rs 16] ^
+                Te2[get0,rs  8] ^
+                Te3[get1,rs  0] 
+            */
+
+    AS2(    mov   esi, ecx                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + esi*4]                          )
+
+    AS2(    mov   edi, edx                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   esi, DWORD PTR [ebp + 1024 + edi*4]                   )
+
+    AS2(    movzx edi, ah                                               )
+    AS2(    xor   esi, DWORD PTR [ebp + 2048 + edi*4]                   )
+
+    AS2(    movzx edi, bl                                               )
+    AS2(    xor   esi, DWORD PTR [ebp + 3072 + edi*4]                   )
+
+    AS2(    movd  mm2, esi                                              )
+
+             /* Put3 (edx) =  
+                Te0[get3,rs 24] ^
+                Te1[get0,rs 16] ^
+                Te2[get1,rs  8] ^
+                Te3[get2,rs  0] 
+            */
+
+    AS2(    mov   esi, edx                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   edx, DWORD PTR [ebp + esi*4]                          )
+
+    AS2(    mov   edi, eax                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   edx, DWORD PTR [ebp + 1024 + edi*4]                   )
+
+    AS2(    movzx esi, bh                                               )
+    AS2(    xor   edx, DWORD PTR [ebp + 2048 + esi*4]                   )
+
+    AS2(    movzx edi, cl                                               )
+    AS2(    xor   edx, DWORD PTR [ebp + 3072 + edi*4]                   )
+
+            // xOr
+
+    AS2(    movd   esi, mm6                      )   //  rk
+
+    AS2(    movd   eax, mm0                                             )
+    AS2(    add    esi, 16                                              )
+    AS2(    movd   ebx, mm1                                             )
+    AS2(    movd   mm6, esi                      )   //  save back
+    AS2(    movd   ecx, mm2                                             )
+
+    AS2(    xor   eax, DWORD PTR [esi]                                  )
+    AS2(    xor   ebx, DWORD PTR [esi +  4]                             )
+    AS2(    movd  edi, mm5                                              )
+    AS2(    xor   ecx, DWORD PTR [esi +  8]                             )
+    AS2(    xor   edx, DWORD PTR [esi + 12]                             )
+
+    AS1(    dec   edi                                                   )
+    AS2(    movd  mm5, edi                                              )
+
+#ifdef _MSC_VER
+    AS1(    jnz   loop1)  // loop1
+#else
+    AS1(    jnz   1b )    // loop1
+#endif
+
+            // last round
+            /*
+            Put0 (mm0) =
+                (Te4[get0, rs24] & 0xff000000) ^  h = 4278190080
+                (Te4[get1, rs16] & 0x00ff0000) ^  h =   16711680
+                (Te4[get2, rs 8] & 0x0000ff00) ^  h =      65280
+                (Te4[get3, rs 0] & 0x000000ff)    h =        255
+            */
+    AS2(    mov   esi, eax                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + 4096 + esi*4]                   )
+    AS2(    and   esi, 4278190080                                       )
+
+    AS2(    mov   edi, ebx                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 16711680                                         )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movzx edi, ch                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 65280                                            )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movzx edi, dl                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movd  mm0, esi                                              )
+
+            /*
+            Put1 (mm1) =
+                (Te4[get1, rs24] & 0xff000000) ^  h = 4278190080
+                (Te4[get2, rs16] & 0x00ff0000) ^  h =   16711680
+                (Te4[get3, rs 8] & 0x0000ff00) ^  h =      65280
+                (Te4[get0, rs 0] & 0x000000ff)    h =        255
+            */
+    AS2(    mov   esi, ebx                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + 4096 + esi*4]                   )
+    AS2(    and   esi, 4278190080                                       )
+
+    AS2(    mov   edi, ecx                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 16711680                                         )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movzx edi, dh                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 65280                                            )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movzx edi, al                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movd  mm1, esi                                              )
+
+            /*
+            Put2 (mm2) =
+                (Te4[get2, rs24] & 0xff000000) ^  h = 4278190080
+                (Te4[get3, rs16] & 0x00ff0000) ^  h =   16711680
+                (Te4[get0, rs 8] & 0x0000ff00) ^  h =      65280
+                (Te4[get1, rs 0] & 0x000000ff)    h =        255
+            */
+    AS2(    mov   esi, ecx                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + 4096 + esi*4]                   )
+    AS2(    and   esi, 4278190080                                       )
+
+    AS2(    mov   edi, edx                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 16711680                                         )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movzx edi, ah                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 65280                                            )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movzx edi, bl                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movd  mm2, esi                                              )
+
+            /*
+            Put3 (edx) =
+                (Te4[get3, rs24] & 0xff000000) ^  h = 4278190080
+                (Te4[get0, rs16] & 0x00ff0000) ^  h =   16711680
+                (Te4[get1, rs 8] & 0x0000ff00) ^  h =      65280
+                (Te4[get2, rs 0] & 0x000000ff)    h =        255
+            */
+    AS2(    mov   esi, edx                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   edx, DWORD PTR [ebp + 4096 + esi*4]                   )
+    AS2(    and   edx, 4278190080                                       )
+
+    AS2(    mov   edi, eax                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    mov   esi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   esi, 16711680                                         )
+    AS2(    xor   edx, esi                                              )
+
+    AS2(    movzx esi, bh                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + esi*4]                   )
+    AS2(    and   edi, 65280                                            )
+    AS2(    xor   edx, edi                                              )
+
+    AS2(    movzx edi, cl                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   esi, 255                                              )
+    AS2(    xor   edx, esi                                              )
+
+    
+            // xOr
+    AS2(    movd   eax, mm0                                             )
+    AS2(    movd   esi, mm6                      )   //  rk
+    AS2(    movd   ebx, mm1                                             )
+    AS2(    add    esi, 16                                               )
+    AS2(    movd   ecx, mm2                                             )
+
+    AS2(    xor   eax, DWORD PTR [esi]                                  )
+    AS2(    xor   ebx, DWORD PTR [esi +  4]                             )
+    AS2(    xor   ecx, DWORD PTR [esi +  8]                             )
+    AS2(    xor   edx, DWORD PTR [esi + 12]                             )
+
+    // end
+    AS2(    movd  ebp, mm7                                              )
+
+            // swap
+    AS1(    bswap eax                                                   )
+    AS1(    bswap ebx                                                   )
+
+            // store
+    #ifdef __GNUC__
+        AS2(    movd esi, mm4                       )   //  outBlock
+    #else
+        AS2(    mov  esi, DWORD PTR [ebp + 12]      )   //  outBlock
+    #endif
+
+    AS1(    bswap ecx                                                   )
+    AS1(    bswap edx                                                   )
+
+    AS2(    mov DWORD PTR [esi],      eax                               )
+    AS2(    mov DWORD PTR [esi +  4], ebx                               )
+    AS2(    mov DWORD PTR [esi +  8], ecx                               )
+    AS2(    mov DWORD PTR [esi + 12], edx                               )
+
+
+    EPILOG()
+}
+
+
+#ifdef _MSC_VER
+    __declspec(naked) 
+#else
+    __attribute__ ((noinline))
+#endif
+void AES::AsmDecrypt(const byte* inBlock, byte* outBlock, void* boxes) const
+{
+
+    PROLOG()
+
+    #ifdef OLD_GCC_OFFSET
+        AS2(    mov   edx, DWORD PTR [ecx + 60]     )   // rounds
+        AS2(    lea   edi, [ecx + 64]               )   // rk 
+    #else
+        AS2(    mov   edx, DWORD PTR [ecx + 56]     )   // rounds
+        AS2(    lea   edi, [ecx + 60]               )   // rk 
+    #endif
+   
+    AS1(    dec   edx                           )
+    AS2(    movd  mm6, edi                      )   // save rk
+    AS2(    movd  mm5, edx                      )   // save rounds
+
+    AS2(    mov   eax, DWORD PTR [esi]                                  )
+    AS2(    mov   ebx, DWORD PTR [esi + 4]                              )
+    AS2(    mov   ecx, DWORD PTR [esi + 8]                              )
+    AS2(    mov   edx, DWORD PTR [esi + 12]                             )
+
+    AS1(    bswap eax                                                   )
+    AS1(    bswap ebx                                                   )
+    AS1(    bswap ecx                                                   )
+    AS1(    bswap edx                                                   )
+
+    AS2(    xor   eax, DWORD PTR [edi]               )   // s0
+    AS2(    xor   ebx, DWORD PTR [edi +  4]          )   // s1
+    AS2(    xor   ecx, DWORD PTR [edi +  8]          )   // s2
+    AS2(    xor   edx, DWORD PTR [edi + 12]          )   // s3
+
+
+#ifdef _MSC_VER
+    AS1( loop2: )  // loop2
+#else
+    AS1(2:  )      // loop2
+#endif
+       /*   Put0 (mm0) =
+            Td0[GETBYTE(get0, rs24)] ^
+            Td1[GETBYTE(get3, rs16)] ^
+            Td2[GETBYTE(get2, rs 8)] ^
+            Td3[GETBYTE(tet1,     )]  
+        */
+    AS2(    mov   esi, eax                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + esi*4]                          )
+                                                    
+    AS2(    mov   edi, edx                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   esi, DWORD PTR [ebp + 1024 + edi*4]                   )
+
+    AS2(    movzx edi, ch                                               )
+    AS2(    xor   esi, DWORD PTR [ebp + 2048 + edi*4]                   )
+
+    AS2(    movzx edi, bl                                               )
+    AS2(    xor   esi, DWORD PTR [ebp + 3072 + edi*4]                   )
+
+    AS2(    movd  mm0, esi                                              )
+
+      /*    Put1 (mm1) =
+            Td0[GETBYTE(get1, rs24)] ^
+            Td1[GETBYTE(get0, rs16)] ^
+            Td2[GETBYTE(get3, rs 8)] ^
+            Td3[GETBYTE(tet2,     )]  
+        */
+    AS2(    mov   esi, ebx                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + esi*4]                          )
+                                                    
+    AS2(    mov   edi, eax                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   esi, DWORD PTR [ebp + 1024 + edi*4]                   )
+
+    AS2(    movzx edi, dh                                               )
+    AS2(    xor   esi, DWORD PTR [ebp + 2048 + edi*4]                   )
+
+    AS2(    movzx edi, cl                                               )
+    AS2(    xor   esi, DWORD PTR [ebp + 3072 + edi*4]                   )
+
+    AS2(    movd  mm1, esi                                              )
+
+      /*    Put2 (mm2) =
+            Td0[GETBYTE(get2, rs24)] ^
+            Td1[GETBYTE(get1, rs16)] ^
+            Td2[GETBYTE(get0, rs 8)] ^
+            Td3[GETBYTE(tet3,     )]  
+      */
+    AS2(    mov   esi, ecx                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + esi*4]                          )
+                                                    
+    AS2(    mov   edi, ebx                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   esi, DWORD PTR [ebp + 1024 + edi*4]                   )
+
+    AS2(    movzx edi, ah                                               )
+    AS2(    xor   esi, DWORD PTR [ebp + 2048 + edi*4]                   )
+
+    AS2(    movzx edi, dl                                               )
+    AS2(    xor   esi, DWORD PTR [ebp + 3072 + edi*4]                   )
+
+    AS2(    movd  mm2, esi                                              )
+
+      /*    Put3 (edx) =
+            Td0[GETBYTE(get3, rs24)] ^
+            Td1[GETBYTE(get2, rs16)] ^
+            Td2[GETBYTE(get1, rs 8)] ^
+            Td3[GETBYTE(tet0,     )]  
+      */
+    AS2(    mov   esi, edx                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   edx, DWORD PTR [ebp + esi*4]                          )
+                                                    
+    AS2(    mov   edi, ecx                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   edx, DWORD PTR [ebp + 1024 + edi*4]                   )
+
+    AS2(    movzx esi, bh                                               )
+    AS2(    xor   edx, DWORD PTR [ebp + 2048 + esi*4]                   )
+
+    AS2(    movzx edi, al                                               )
+    AS2(    xor   edx, DWORD PTR [ebp + 3072 + edi*4]                   )
+
+
+            // xOr
+
+    AS2(    movd  esi, mm6                      )   //  rk
+    AS2(    add   esi, 16                                               )
+    AS2(    movd  mm6, esi                      )   //  save back
+
+    AS2(    movd  eax, mm0                                              )
+    AS2(    movd  ebx, mm1                                              )
+    AS2(    movd  ecx, mm2                                              )
+
+    AS2(    xor   eax, DWORD PTR [esi]                                  )
+    AS2(    xor   ebx, DWORD PTR [esi +  4]                             )
+    AS2(    xor   ecx, DWORD PTR [esi +  8]                             )
+    AS2(    xor   edx, DWORD PTR [esi + 12]                             )
+
+    AS2(    movd  edi, mm5                                              )
+    AS1(    dec   edi                                                   )
+    AS2(    movd  mm5, edi                                              )
+
+#ifdef _MSC_VER
+    AS1(    jnz   loop2)  // loop2
+#else
+    AS1(    jnz   2b )    // loop2
+#endif
+
+            // last round
+            /*
+            Put0 (mm0) =
+                (Td4[get0, rs24] & 0xff000000) ^  h = 4278190080
+                (Td4[get3, rs16] & 0x00ff0000) ^  h =   16711680
+                (Td4[get2, rs 8] & 0x0000ff00) ^  h =      65280
+                (Td4[get1, rs 0] & 0x000000ff)    h =        255
+            */
+    AS2(    mov   esi, eax                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + 4096 + esi*4]                   )
+    AS2(    and   esi, 4278190080                                       )
+
+    AS2(    mov   edi, edx                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 16711680                                         )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movzx edi, ch                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 65280                                            )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movzx edi, bl                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movd  mm0, esi                                              )
+
+            /*
+            Put1 (mm1) =
+                (Td4[get1, rs24] & 0xff000000) ^  h = 4278190080
+                (Td4[get0, rs16] & 0x00ff0000) ^  h =   16711680
+                (Td4[get3, rs 8] & 0x0000ff00) ^  h =      65280
+                (Td4[get2, rs 0] & 0x000000ff)    h =        255
+            */
+    AS2(    mov   esi, ebx                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + 4096 + esi*4]                   )
+    AS2(    and   esi, 4278190080                                       )
+
+    AS2(    mov   edi, eax                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 16711680                                         )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movzx edi, dh                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 65280                                            )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movzx edi, cl                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movd  mm1, esi                                              )
+
+            /*
+            Put2 (mm2) =
+                (Td4[get2, rs24] & 0xff000000) ^  h = 4278190080
+                (Td4[get1, rs16] & 0x00ff0000) ^  h =   16711680
+                (Td4[get0, rs 8] & 0x0000ff00) ^  h =      65280
+                (Td4[get3, rs 0] & 0x000000ff)    h =        255
+            */
+    AS2(    mov   esi, ecx                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + 4096 + esi*4]                   )
+    AS2(    and   esi, 4278190080                                       )
+
+    AS2(    mov   edi, ebx                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 16711680                                         )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movzx edi, ah                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 65280                                            )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movzx edi, dl                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   edi, 255                                              )
+    AS2(    xor   esi, edi                                              )
+
+    AS2(    movd  mm2, esi                                              )
+
+            /*
+            Put3 (edx) =
+                (Td4[get3, rs24] & 0xff000000) ^  h = 4278190080
+                (Td4[get2, rs16] & 0x00ff0000) ^  h =   16711680
+                (Td4[get1, rs 8] & 0x0000ff00) ^  h =      65280
+                (Td4[get0, rs 0] & 0x000000ff)    h =        255
+            */
+    AS2(    mov   esi, edx                                              )
+    AS2(    shr   esi, 24                                               )
+    AS2(    mov   edx, DWORD PTR [ebp + 4096 + esi*4]                   )
+    AS2(    and   edx, 4278190080                                       )
+
+    AS2(    mov   edi, ecx                                              )
+    AS2(    shr   edi, 16                                               )
+    AS2(    and   edi, 255                                              )
+    AS2(    mov   esi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   esi, 16711680                                         )
+    AS2(    xor   edx, esi                                              )
+
+    AS2(    movzx esi, bh                                               )
+    AS2(    mov   edi, DWORD PTR [ebp + 4096 + esi*4]                   )
+    AS2(    and   edi, 65280                                            )
+    AS2(    xor   edx, edi                                              )
+
+    AS2(    movzx edi, al                                               )
+    AS2(    mov   esi, DWORD PTR [ebp + 4096 + edi*4]                   )
+    AS2(    and   esi, 255                                              )
+    AS2(    xor   edx, esi                                              )
+
+
+            // xOr
+    AS2(    movd  esi, mm6                      )   //  rk
+    AS2(    add   esi, 16                                               )
+
+    AS2(    movd   eax, mm0                                             )
+    AS2(    movd   ebx, mm1                                             )
+    AS2(    movd   ecx, mm2                                             )
+
+    AS2(    xor   eax, DWORD PTR [esi]                                  )
+    AS2(    xor   ebx, DWORD PTR [esi +  4]                             )
+    AS2(    xor   ecx, DWORD PTR [esi +  8]                             )
+    AS2(    xor   edx, DWORD PTR [esi + 12]                             )
+
+    // end
+    AS2(    movd  ebp, mm7                                              )
+
+            // swap
+    AS1(    bswap eax                                                   )
+    AS1(    bswap ebx                                                   )
+    AS1(    bswap ecx                                                   )
+    AS1(    bswap edx                                                   )
+
+            // store
+    #ifdef __GNUC__
+        AS2(    movd esi, mm4                        )   //  outBlock
+    #else
+        AS2(    mov esi,  DWORD PTR [ebp + 12]       )   //  outBlock
+    #endif
+    AS2(    mov DWORD PTR [esi],      eax                               )
+    AS2(    mov DWORD PTR [esi +  4], ebx                               )
+    AS2(    mov DWORD PTR [esi +  8], ecx                               )
+    AS2(    mov DWORD PTR [esi + 12], edx                               )
+
+
+    EPILOG()
+}
+
+
+
+#endif // defined(DO_AES_ASM)
+
+
+
+const word32 AES::Te[5][256] = {
+{
+    0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+    0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+    0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+    0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+    0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+    0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+    0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+    0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+    0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+    0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+    0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+    0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+    0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+    0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+    0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+    0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+    0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+    0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+    0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+    0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+    0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+    0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+    0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+    0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+    0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+    0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+    0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+    0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+    0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+    0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+    0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+    0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+    0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+    0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+    0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+    0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+    0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+    0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+    0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+    0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+    0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+    0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+    0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+    0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+    0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+    0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+    0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+    0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+    0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+    0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+    0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+    0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+    0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+    0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+    0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+    0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+    0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+    0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+    0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+    0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+    0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+    0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+    0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+    0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+},
+{
+    0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+    0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+    0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+    0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+    0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+    0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+    0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+    0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+    0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+    0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+    0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+    0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+    0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+    0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+    0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+    0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+    0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+    0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+    0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+    0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+    0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+    0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+    0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+    0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+    0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+    0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+    0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+    0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+    0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+    0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+    0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+    0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+    0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+    0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+    0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+    0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+    0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+    0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+    0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+    0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+    0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+    0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+    0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+    0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+    0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+    0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+    0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+    0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+    0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+    0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+    0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+    0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+    0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+    0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+    0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+    0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+    0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+    0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+    0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+    0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+    0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+    0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+    0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+    0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+},
+{
+    0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+    0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+    0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+    0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+    0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+    0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+    0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+    0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+    0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+    0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+    0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+    0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+    0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+    0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+    0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+    0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+    0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+    0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+    0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+    0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+    0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+    0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+    0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+    0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+    0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+    0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+    0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+    0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+    0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+    0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+    0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+    0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+    0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+    0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+    0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+    0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+    0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+    0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+    0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+    0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+    0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+    0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+    0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+    0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+    0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+    0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+    0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+    0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+    0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+    0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+    0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+    0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+    0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+    0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+    0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+    0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+    0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+    0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+    0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+    0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+    0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+    0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+    0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+    0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+},
+{
+    0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+    0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+    0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+    0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+    0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+    0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+    0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+    0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+    0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+    0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+    0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+    0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+    0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+    0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+    0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+    0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+    0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+    0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+    0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+    0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+    0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+    0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+    0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+    0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+    0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+    0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+    0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+    0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+    0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+    0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+    0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+    0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+    0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+    0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+    0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+    0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+    0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+    0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+    0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+    0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+    0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+    0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+    0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+    0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+    0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+    0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+    0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+    0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+    0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+    0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+    0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+    0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+    0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+    0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+    0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+    0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+    0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+    0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+    0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+    0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+    0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+    0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+    0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+    0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+},
+{
+    0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
+    0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
+    0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
+    0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
+    0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
+    0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
+    0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
+    0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
+    0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
+    0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
+    0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
+    0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
+    0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
+    0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
+    0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
+    0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
+    0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
+    0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
+    0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
+    0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
+    0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
+    0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
+    0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
+    0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
+    0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
+    0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
+    0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
+    0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
+    0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
+    0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
+    0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
+    0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
+    0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
+    0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
+    0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
+    0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
+    0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
+    0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
+    0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
+    0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
+    0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
+    0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
+    0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
+    0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
+    0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
+    0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
+    0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
+    0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
+    0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
+    0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
+    0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
+    0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
+    0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
+    0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
+    0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
+    0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
+    0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
+    0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
+    0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
+    0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
+    0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
+    0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
+    0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
+    0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
+}
+};
+
+
+const word32 AES::Td[5][256] = {
+{
+    0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+    0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+    0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+    0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+    0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+    0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+    0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+    0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+    0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+    0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+    0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+    0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+    0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+    0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+    0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+    0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+    0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+    0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+    0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+    0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+    0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+    0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+    0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+    0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+    0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+    0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+    0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+    0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+    0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+    0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+    0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+    0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+    0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+    0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+    0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+    0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+    0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+    0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+    0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+    0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+    0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+    0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+    0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+    0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+    0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+    0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+    0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+    0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+    0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+    0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+    0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+    0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+    0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+    0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+    0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+    0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+    0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+    0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+    0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+    0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+    0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+    0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+    0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+    0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
+},
+{
+    0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+    0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+    0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+    0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+    0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+    0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+    0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+    0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+    0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+    0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+    0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+    0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+    0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+    0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+    0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+    0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+    0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+    0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+    0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+    0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+    0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+    0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+    0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+    0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+    0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+    0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+    0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+    0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+    0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+    0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+    0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+    0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+    0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+    0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+    0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+    0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+    0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+    0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+    0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+    0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+    0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+    0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+    0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+    0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+    0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+    0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+    0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+    0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+    0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+    0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+    0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+    0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+    0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+    0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+    0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+    0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+    0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+    0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+    0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+    0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+    0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+    0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+    0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+    0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
+},
+{
+    0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+    0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+    0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+    0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+    0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+    0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+    0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+    0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+    0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+    0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+    0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+    0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+    0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+    0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+    0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+    0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+    0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+    0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+    0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+    0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+
+    0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+    0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+    0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+    0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+    0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+    0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+    0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+    0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+    0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+    0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+    0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+    0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+    0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+    0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+    0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+    0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+    0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+    0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+    0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+    0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+    0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+    0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+    0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+    0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+    0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+    0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+    0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+    0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+    0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+    0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+    0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+    0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+    0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+    0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+    0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+    0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+    0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+    0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+    0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+    0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+    0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+    0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+    0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+    0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
+},
+{
+    0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+    0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+    0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+    0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+    0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+    0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+    0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+    0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+    0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+    0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+    0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+    0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+    0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+    0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+    0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+    0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+    0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+    0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+    0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+    0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+    0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+    0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+    0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+    0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+    0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+    0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+    0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+    0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+    0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+    0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+    0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+    0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+    0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+    0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+    0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+    0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+    0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+    0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+    0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+    0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+    0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+    0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+    0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+    0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+    0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+    0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+    0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+    0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+    0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+    0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+    0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+    0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+    0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+    0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+    0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+    0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+    0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+    0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+    0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+    0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+    0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+    0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+    0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+    0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+},
+{
+    0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
+    0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
+    0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
+    0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
+    0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
+    0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
+    0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
+    0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
+    0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
+    0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
+    0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
+    0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
+    0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
+    0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
+    0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
+    0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
+    0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
+    0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
+    0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
+    0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
+    0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
+    0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
+    0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
+    0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
+    0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
+    0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
+    0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
+    0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
+    0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
+    0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
+    0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
+    0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
+    0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
+    0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
+    0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
+    0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
+    0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
+    0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
+    0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
+    0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
+    0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
+    0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
+    0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
+    0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
+    0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
+    0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
+    0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
+    0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
+    0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
+    0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
+    0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
+    0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
+    0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
+    0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
+    0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
+    0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
+    0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
+    0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
+    0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
+    0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
+    0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
+    0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
+    0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
+    0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
+}
+};
+
+const byte AES::CTd4[256] =
+{
+    0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
+    0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
+    0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
+    0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
+    0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
+    0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
+    0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
+    0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
+    0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
+    0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
+    0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
+    0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
+    0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
+    0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
+    0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
+    0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
+    0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
+    0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
+    0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
+    0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
+    0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
+    0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
+    0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
+    0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
+    0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
+    0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
+    0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
+    0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
+    0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
+    0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
+    0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
+    0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
+};
+
+
+const word32* AES::Te0 = AES::Te[0];
+const word32* AES::Te1 = AES::Te[1];
+const word32* AES::Te2 = AES::Te[2];
+const word32* AES::Te3 = AES::Te[3];
+
+const word32* AES::Td0 = AES::Td[0];
+const word32* AES::Td1 = AES::Td[1];
+const word32* AES::Td2 = AES::Td[2];
+const word32* AES::Td3 = AES::Td[3];
+
+
+
+} // namespace
+
diff --git a/mysql/extra/yassl/taocrypt/src/aestables.cpp b/mysql/extra/yassl/taocrypt/src/aestables.cpp
new file mode 100644
index 0000000..60795a5
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/aestables.cpp
@@ -0,0 +1,36 @@
+/*
+   Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+/* based on Wei Dai's aestables.cpp from CryptoPP */
+
+#include "runtime.hpp"
+#include "aes.hpp"
+
+
+namespace TaoCrypt {
+
+
+const word32 AES::rcon_[] = {
+    0x01000000, 0x02000000, 0x04000000, 0x08000000,
+    0x10000000, 0x20000000, 0x40000000, 0x80000000,
+    0x1B000000, 0x36000000, 
+    /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+
+} // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/algebra.cpp b/mysql/extra/yassl/taocrypt/src/algebra.cpp
new file mode 100644
index 0000000..ace1704
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/algebra.cpp
@@ -0,0 +1,336 @@
+/*
+   Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
+
+/* based on Wei Dai's algebra.cpp from CryptoPP */
+#undef  NDEBUG
+#define DEBUG   // GCC 4.0 bug if NDEBUG and Optimize > 1
+
+#include "runtime.hpp"
+#include "algebra.hpp"
+#ifdef USE_SYS_STL
+    #include <vector>
+#else
+    #include "vector.hpp"
+#endif
+
+
+namespace STL = STL_NAMESPACE;
+
+
+namespace TaoCrypt {
+
+
+const Integer& AbstractGroup::Double(const Element &a) const
+{
+    return Add(a, a);
+}
+
+const Integer& AbstractGroup::Subtract(const Element &a, const Element &b) const
+{
+    // make copy of a in case Inverse() overwrites it
+    Element a1(a);
+    return Add(a1, Inverse(b));
+}
+
+Integer& AbstractGroup::Accumulate(Element &a, const Element &b) const
+{
+    return a = Add(a, b);
+}
+
+Integer& AbstractGroup::Reduce(Element &a, const Element &b) const
+{
+    return a = Subtract(a, b);
+}
+
+const Integer& AbstractRing::Square(const Element &a) const
+{
+    return Multiply(a, a);
+}
+
+
+const Integer& AbstractRing::Divide(const Element &a, const Element &b) const
+{
+    // make copy of a in case MultiplicativeInverse() overwrites it
+    Element a1(a);
+    return Multiply(a1, MultiplicativeInverse(b));
+}
+
+
+const Integer& AbstractEuclideanDomain::Mod(const Element &a,
+                                            const Element &b) const
+{
+    Element q;
+    DivisionAlgorithm(result, q, a, b);
+    return result;
+}
+
+const Integer& AbstractEuclideanDomain::Gcd(const Element &a,
+                                            const Element &b) const
+{
+    STL::vector<Element> g(3);
+    g[0]= b;
+    g[1]= a;
+    unsigned int i0=0, i1=1, i2=2;
+
+    while (!Equal(g[i1], this->Identity()))
+    {
+        g[i2] = Mod(g[i0], g[i1]);
+        unsigned int t = i0; i0 = i1; i1 = i2; i2 = t;
+    }
+
+    return result = g[i0];
+}
+
+
+Integer AbstractGroup::ScalarMultiply(const Element &base,
+                                      const Integer &exponent) const
+{
+    Element result;
+    SimultaneousMultiply(&result, base, &exponent, 1);
+    return result;
+}
+
+
+Integer AbstractGroup::CascadeScalarMultiply(const Element &x,
+                  const Integer &e1, const Element &y, const Integer &e2) const
+{
+    const unsigned expLen = max(e1.BitCount(), e2.BitCount());
+    if (expLen==0)
+        return Identity();
+
+    const unsigned w = (expLen <= 46 ? 1 : (expLen <= 260 ? 2 : 3));
+    const unsigned tableSize = 1<<w;
+    STL::vector<Element> powerTable(tableSize << w);
+
+    powerTable[1] = x;
+    powerTable[tableSize] = y;
+    if (w==1)
+        powerTable[3] = Add(x,y);
+    else
+    {
+        powerTable[2] = Double(x);
+        powerTable[2*tableSize] = Double(y);
+
+        unsigned i, j;
+
+        for (i=3; i<tableSize; i+=2)
+            powerTable[i] = Add(powerTable[i-2], powerTable[2]);
+        for (i=1; i<tableSize; i+=2)
+            for (j=i+tableSize; j<(tableSize<<w); j+=tableSize)
+                powerTable[j] = Add(powerTable[j-tableSize], y);
+
+        for (i=3*tableSize; i<(tableSize<<w); i+=2*tableSize)
+            powerTable[i] = Add(powerTable[i-2*tableSize],
+            powerTable[2*tableSize]);
+        for (i=tableSize; i<(tableSize<<w); i+=2*tableSize)
+            for (j=i+2; j<i+tableSize; j+=2)
+                powerTable[j] = Add(powerTable[j-1], x);
+    }
+
+    Element result;
+    unsigned power1 = 0, power2 = 0, prevPosition = expLen-1;
+    bool firstTime = true;
+
+    for (int i = expLen-1; i>=0; i--)
+    {
+        power1 = 2*power1 + e1.GetBit(i);
+        power2 = 2*power2 + e2.GetBit(i);
+
+        if (i==0 || 2*power1 >= tableSize || 2*power2 >= tableSize)
+        {
+            unsigned squaresBefore = prevPosition-i;
+            unsigned squaresAfter = 0;
+            prevPosition = i;
+            while ((power1 || power2) && power1%2 == 0 && power2%2==0)
+            {
+                power1 /= 2;
+                power2 /= 2;
+                squaresBefore--;
+                squaresAfter++;
+            }
+            if (firstTime)
+            {
+                result = powerTable[(power2<<w) + power1];
+                firstTime = false;
+            }
+            else
+            {
+                while (squaresBefore--)
+                result = Double(result);
+                if (power1 || power2)
+                    Accumulate(result, powerTable[(power2<<w) + power1]);
+            }
+            while (squaresAfter--)
+                result = Double(result);
+            power1 = power2 = 0;
+        }
+    }
+    return result;
+}
+
+
+struct WindowSlider
+{
+    WindowSlider(const Integer &exp, bool fastNegate,
+                 unsigned int windowSizeIn=0)
+        : exp(exp), windowModulus(Integer::One()), windowSize(windowSizeIn),
+          windowBegin(0), fastNegate(fastNegate), firstTime(true),
+          finished(false)
+    {
+        if (windowSize == 0)
+        {
+            unsigned int expLen = exp.BitCount();
+            windowSize = expLen <= 17 ? 1 : (expLen <= 24 ? 2 : 
+                (expLen <= 70 ? 3 : (expLen <= 197 ? 4 : (expLen <= 539 ? 5 : 
+                (expLen <= 1434 ? 6 : 7)))));
+        }
+        windowModulus <<= windowSize;
+    }
+
+    void FindNextWindow()
+    {
+        unsigned int expLen = exp.WordCount() * WORD_BITS;
+        unsigned int skipCount = firstTime ? 0 : windowSize;
+        firstTime = false;
+        while (!exp.GetBit(skipCount))
+        {
+            if (skipCount >= expLen)
+            {
+                finished = true;
+                return;
+            }
+            skipCount++;
+        }
+
+        exp >>= skipCount;
+        windowBegin += skipCount;
+        expWindow = exp % (1 << windowSize);
+
+        if (fastNegate && exp.GetBit(windowSize))
+        {
+            negateNext = true;
+            expWindow = (1 << windowSize) - expWindow;
+            exp += windowModulus;
+        }
+        else
+            negateNext = false;
+    }
+
+    Integer exp, windowModulus;
+    unsigned int windowSize, windowBegin, expWindow;
+    bool fastNegate, negateNext, firstTime, finished;
+};
+
+
+void AbstractGroup::SimultaneousMultiply(Integer *results, const Integer &base,
+                          const Integer *expBegin, unsigned int expCount) const
+{
+    STL::vector<STL::vector<Element> > buckets(expCount);
+    STL::vector<WindowSlider> exponents;
+    exponents.reserve(expCount);
+    unsigned int i;
+
+    for (i=0; i<expCount; i++)
+    {
+        exponents.push_back(WindowSlider(*expBegin++, InversionIsFast(), 0));
+        exponents[i].FindNextWindow();
+        buckets[i].resize(1<<(exponents[i].windowSize-1), Identity());
+    }
+
+    unsigned int expBitPosition = 0;
+    Element g = base;
+    bool notDone = true;
+
+    while (notDone)
+    {
+        notDone = false;
+        for (i=0; i<expCount; i++)
+        {
+            if (!exponents[i].finished && expBitPosition == 
+                 exponents[i].windowBegin)
+            {
+                Element &bucket = buckets[i][exponents[i].expWindow/2];
+                if (exponents[i].negateNext)
+                    Accumulate(bucket, Inverse(g));
+                else
+                    Accumulate(bucket, g);
+                exponents[i].FindNextWindow();
+            }
+            notDone = notDone || !exponents[i].finished;
+        }
+
+        if (notDone)
+        {
+            g = Double(g);
+            expBitPosition++;
+        }
+    }
+
+    for (i=0; i<expCount; i++)
+    {
+        Element &r = *results++;
+        r = buckets[i][buckets[i].size()-1];
+        if (buckets[i].size() > 1)
+        {
+            for (size_t j = buckets[i].size()-2; j >= 1; j--)
+            {
+                Accumulate(buckets[i][j], buckets[i][j+1]);
+                Accumulate(r, buckets[i][j]);
+            }
+            Accumulate(buckets[i][0], buckets[i][1]);
+            r = Add(Double(r), buckets[i][0]);
+        }
+    }
+}
+
+Integer AbstractRing::Exponentiate(const Element &base,
+                                   const Integer &exponent) const
+{
+    Element result;
+    SimultaneousExponentiate(&result, base, &exponent, 1);
+    return result;
+}
+
+
+Integer AbstractRing::CascadeExponentiate(const Element &x,
+                  const Integer &e1, const Element &y, const Integer &e2) const
+{
+    return MultiplicativeGroup().AbstractGroup::CascadeScalarMultiply(
+                x, e1, y, e2);
+}
+
+
+void AbstractRing::SimultaneousExponentiate(Integer *results,
+                                            const Integer &base,
+                         const Integer *exponents, unsigned int expCount) const
+{
+    MultiplicativeGroup().AbstractGroup::SimultaneousMultiply(results, base,
+                                                          exponents, expCount);
+}
+
+
+} // namespace
+
+
+#ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION
+namespace mySTL {
+template TaoCrypt::WindowSlider* uninit_copy<TaoCrypt::WindowSlider*, TaoCrypt::WindowSlider*>(TaoCrypt::WindowSlider*, TaoCrypt::WindowSlider*, TaoCrypt::WindowSlider*);
+template void destroy<TaoCrypt::WindowSlider*>(TaoCrypt::WindowSlider*, TaoCrypt::WindowSlider*);
+template TaoCrypt::WindowSlider* GetArrayMemory<TaoCrypt::WindowSlider>(size_t);
+template void FreeArrayMemory<TaoCrypt::WindowSlider>(TaoCrypt::WindowSlider*);
+}
+#endif
+
diff --git a/mysql/extra/yassl/taocrypt/src/arc4.cpp b/mysql/extra/yassl/taocrypt/src/arc4.cpp
new file mode 100644
index 0000000..10a3a7d
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/arc4.cpp
@@ -0,0 +1,243 @@
+/*
+   Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+/* based on Wei Dai's arc4.cpp from CryptoPP */
+
+#include "runtime.hpp"
+#include "arc4.hpp"
+
+
+#if defined(TAOCRYPT_X86ASM_AVAILABLE) && defined(TAO_ASM)
+    #define DO_ARC4_ASM
+#endif
+
+
+namespace TaoCrypt {
+
+void ARC4::SetKey(const byte* key, word32 length)
+{
+    x_ = 1;
+    y_ = 0;
+
+    word32 i;
+
+    for (i = 0; i < STATE_SIZE; i++)
+        state_[i] = i;
+
+    word32 keyIndex = 0, stateIndex = 0;
+
+    for (i = 0; i < STATE_SIZE; i++) {
+        word32 a = state_[i];
+        stateIndex += key[keyIndex] + a;
+        stateIndex &= 0xFF;
+        state_[i] = state_[stateIndex];
+        state_[stateIndex] = a;
+
+        if (++keyIndex >= length)
+            keyIndex = 0;
+    }
+}
+
+
+// local
+namespace {
+
+inline unsigned int MakeByte(word32& x, word32& y, byte* s)
+{
+    word32 a = s[x];
+    y = (y+a) & 0xff;
+
+    word32 b = s[y];
+    s[x] = b;
+    s[y] = a;
+    x = (x+1) & 0xff;
+
+    return s[(a+b) & 0xff];
+}
+
+} // namespace
+
+
+
+void ARC4::Process(byte* out, const byte* in, word32 length)
+{
+    if (length == 0) return;
+
+#ifdef DO_ARC4_ASM
+    if (isMMX) {
+        AsmProcess(out, in, length);
+        return;
+    } 
+#endif
+
+    byte *const s = state_;
+    word32 x = x_;
+    word32 y = y_;
+
+    if (in == out)
+        while (length--)
+            *out++ ^= MakeByte(x, y, s);
+    else
+        while(length--)
+            *out++ = *in++ ^ MakeByte(x, y, s);
+    x_ = x;
+    y_ = y;
+}
+
+
+
+#ifdef DO_ARC4_ASM
+
+#ifdef _MSC_VER
+    __declspec(naked)
+#else
+    __attribute__ ((noinline))
+#endif
+void ARC4::AsmProcess(byte* out, const byte* in, word32 length)
+{
+#ifdef __GNUC__
+    #define AS1(x)    #x ";"
+    #define AS2(x, y) #x ", " #y ";"
+
+    #define PROLOG()  \
+    __asm__ __volatile__ \
+    ( \
+        ".intel_syntax noprefix;" \
+        "push ebx;" \
+        "push ebp;" \
+        "mov ebp, eax;"
+    #define EPILOG()  \
+        "pop ebp;" \
+        "pop ebx;" \
+       	"emms;" \
+       	".att_syntax;" \
+            : \
+            : "c" (this), "D" (out), "S" (in), "a" (length) \
+            : "%edx", "memory", "cc" \
+    );
+
+#else
+    #define AS1(x)    __asm x
+    #define AS2(x, y) __asm x, y
+
+    #define PROLOG() \
+        AS1(    push  ebp                       )   \
+        AS2(    mov   ebp, esp                  )   \
+        AS2(    movd  mm3, edi                  )   \
+        AS2(    movd  mm4, ebx                  )   \
+        AS2(    movd  mm5, esi                  )   \
+        AS2(    movd  mm6, ebp                  )   \
+        AS2(    mov   edi, DWORD PTR [ebp +  8] )   \
+        AS2(    mov   esi, DWORD PTR [ebp + 12] )   \
+        AS2(    mov   ebp, DWORD PTR [ebp + 16] )
+
+    #define EPILOG() \
+        AS2(    movd  ebp, mm6                  )   \
+        AS2(    movd  esi, mm5                  )   \
+        AS2(    movd  ebx, mm4                  )   \
+        AS2(    movd  edi, mm3                  )   \
+        AS2(    mov   esp, ebp                  )   \
+        AS1(    pop   ebp                       )   \
+        AS1(    emms                            )   \
+        AS1(    ret 12                          )
+        
+#endif
+
+    PROLOG()
+
+    AS2(    sub    esp, 4                   )   // make room 
+
+    AS2(    cmp    ebp, 0                   )
+    AS1(    jz     nothing                  )
+
+    AS2(    mov    [esp], ebp               )   // length
+
+    AS2(    movzx  edx, BYTE PTR [ecx + 1]  )   // y
+    AS2(    lea    ebp, [ecx + 2]           )   // state_
+    AS2(    movzx  ecx, BYTE PTR [ecx]      )   // x
+
+    // setup loop
+    // a = s[x];
+    AS2(    movzx  eax, BYTE PTR [ebp + ecx]    )
+
+
+#ifdef _MSC_VER
+    AS1( loopStart: )  // loopStart
+#else
+    AS1( 0: )          // loopStart for some gas (need numeric for jump back 
+#endif
+
+    // y = (y+a) & 0xff;
+    AS2(    add    edx, eax                     )
+    AS2(    and    edx, 255                     )
+
+    // b = s[y];
+    AS2(    movzx  ebx, BYTE PTR [ebp + edx]    )
+
+    // s[x] = b;
+    AS2(    mov    [ebp + ecx], bl              )
+
+    // s[y] = a;
+    AS2(    mov    [ebp + edx], al              )
+
+    // x = (x+1) & 0xff;
+    AS1(    inc    ecx                          )
+    AS2(    and    ecx, 255                     )
+
+    //return s[(a+b) & 0xff];
+    AS2(    add    eax, ebx                     )
+    AS2(    and    eax, 255                     )
+    
+    AS2(    movzx  ebx, BYTE PTR [ebp + eax]    )
+
+    // a = s[x];   for next round
+    AS2(    movzx  eax, BYTE PTR [ebp + ecx]    )
+
+    // xOr w/ inByte
+    AS2(    xor    bl,  BYTE PTR [esi]          )
+    AS1(    inc    esi                          )
+
+    // write to outByte
+    AS2(    mov    [edi], bl                    )
+    AS1(    inc    edi                          )
+
+    AS1(    dec    DWORD PTR [esp]              )
+#ifdef _MSC_VER
+    AS1(    jnz   loopStart )  // loopStart
+#else
+    AS1(    jnz   0b )         // loopStart
+#endif
+
+
+    // write back to x_ and y_
+    AS2(    mov    [ebp - 2], cl            )
+    AS2(    mov    [ebp - 1], dl            )
+
+
+AS1( nothing:                           )
+
+    // inline adjust 
+    AS2(    add   esp, 4               )   // fix room on stack
+
+    EPILOG()
+}
+
+#endif // DO_ARC4_ASM
+
+
+}  // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/asn.cpp b/mysql/extra/yassl/taocrypt/src/asn.cpp
new file mode 100644
index 0000000..e0aef45
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/asn.cpp
@@ -0,0 +1,1348 @@
+/*
+   Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+/* asn.cpp implements ASN1 BER, PublicKey, and x509v3 decoding 
+*/
+
+#include "runtime.hpp"
+#include "asn.hpp"
+#include "file.hpp"
+#include "integer.hpp"
+#include "rsa.hpp"
+#include "dsa.hpp"
+#include "dh.hpp"
+#include "md5.hpp"
+#include "md2.hpp"
+#include "sha.hpp"
+#include "coding.hpp"
+#include <time.h>     // gmtime();
+#include "memory.hpp" // some auto_ptr don't have reset, also need auto_array
+
+
+namespace TaoCrypt {
+
+// like atoi but only use first byte
+word32 btoi(byte b)
+{
+    return b - 0x30;
+}
+
+
+// two byte date/time, add to value
+void GetTime(int *value, const byte* date, int& i)
+{
+    *value += btoi(date[i++]) * 10;
+    *value += btoi(date[i++]);
+}
+
+
+bool ASN1_TIME_extract(const unsigned char* date, unsigned char format,
+                       tm *t)
+{
+  int i = 0;
+  memset(t, 0, sizeof (tm));
+
+  if (format != UTC_TIME && format != GENERALIZED_TIME)
+    return false;
+
+  if (format == UTC_TIME) {
+    if (btoi(date[0]) >= 5)
+      t->tm_year = 1900;
+    else
+      t->tm_year = 2000;
+  }
+  else  { // format == GENERALIZED_TIME
+    t->tm_year += btoi(date[i++]) * 1000;
+    t->tm_year += btoi(date[i++]) * 100;
+  }
+
+  GetTime(&t->tm_year, date, i);     t->tm_year -= 1900; // adjust
+  GetTime(&t->tm_mon,  date, i);     t->tm_mon  -= 1;    // adjust
+  GetTime(&t->tm_mday, date, i);
+  GetTime(&t->tm_hour, date, i);
+  GetTime(&t->tm_min,  date, i);
+  GetTime(&t->tm_sec,  date, i);
+
+  if (date[i] != 'Z')     // only Zulu supported for this profile
+    return false;
+  return true;
+}
+
+
+namespace { // locals
+
+
+// to the second
+bool operator>(tm& a, tm& b)
+{
+    if (a.tm_year > b.tm_year)
+        return true;
+
+    if (a.tm_year == b.tm_year && a.tm_mon > b.tm_mon)
+        return true;
+    
+    if (a.tm_year == b.tm_year && a.tm_mon == b.tm_mon && a.tm_mday >b.tm_mday)
+        return true;
+
+    if (a.tm_year == b.tm_year && a.tm_mon == b.tm_mon &&
+        a.tm_mday == b.tm_mday && a.tm_hour > b.tm_hour)
+        return true;
+
+    if (a.tm_year == b.tm_year && a.tm_mon == b.tm_mon &&
+        a.tm_mday == b.tm_mday && a.tm_hour == b.tm_hour &&
+        a.tm_min > b.tm_min)
+        return true;
+
+    if (a.tm_year == b.tm_year && a.tm_mon == b.tm_mon &&
+        a.tm_mday == b.tm_mday && a.tm_hour == b.tm_hour &&
+        a.tm_min  == b.tm_min  && a.tm_sec > b.tm_sec)
+        return true;
+
+    return false;
+}
+
+
+bool operator<(tm& a, tm&b)
+{
+    return (b>a);
+}
+
+
+// Make sure before and after dates are valid
+bool ValidateDate(const byte* date, byte format, CertDecoder::DateType dt)
+{
+    tm certTime;
+
+    if (!ASN1_TIME_extract(date, format, &certTime))
+        return false;
+
+    time_t ltime = time(0);
+    tm* localTime = gmtime(&ltime);
+
+    if (dt == CertDecoder::BEFORE) {
+        if (*localTime < certTime)
+            return false;
+    }
+    else
+        if (*localTime > certTime)
+            return false;
+
+    return true;
+}
+
+
+class BadCertificate {};
+
+} // local namespace
+
+
+
+// used by Integer as well
+word32 GetLength(Source& source)
+{
+    word32 length = 0;
+
+    byte b = source.next();
+    if (b >= LONG_LENGTH) {        
+        word32 bytes = b & 0x7F;
+
+        if (source.IsLeft(bytes) == false) return 0;
+
+        while (bytes--) {
+            b = source.next();
+            length = (length << 8) | b;
+        }
+    }
+    else
+        length = b;
+
+    if (source.IsLeft(length) == false) return 0;
+
+    return length;
+}
+
+
+word32 SetLength(word32 length, byte* output)
+{
+    word32 i = 0;
+
+    if (length < LONG_LENGTH)
+        output[i++] = length;
+    else {
+        output[i++] = BytePrecision(length) | 0x80;
+      
+        for (int j = BytePrecision(length); j; --j) {
+            output[i] = length >> (j - 1) * 8;
+            i++;
+        }
+    }
+    return i;
+}
+
+
+PublicKey::PublicKey(const byte* k, word32 s) : key_(0), sz_(0)
+{
+    if (s) {
+        SetSize(s);
+        SetKey(k);
+    }
+}
+
+
+void PublicKey::SetSize(word32 s)
+{
+    sz_ = s;
+    key_ = NEW_TC byte[sz_];
+}
+
+
+void PublicKey::SetKey(const byte* k)
+{
+    memcpy(key_, k, sz_);
+}
+
+
+void PublicKey::AddToEnd(const byte* data, word32 len)
+{
+    mySTL::auto_array<byte> tmp(NEW_TC byte[sz_ + len]);
+
+    memcpy(tmp.get(), key_, sz_);
+    memcpy(tmp.get() + sz_, data, len);
+
+    byte* del = 0;
+    STL::swap(del, key_);
+    tcArrayDelete(del);
+
+    key_ = tmp.release();
+    sz_ += len;
+}
+
+
+Signer::Signer(const byte* k, word32 kSz, const char* n, const byte* h)
+    : key_(k, kSz)
+{
+    size_t sz = strlen(n);
+    memcpy(name_, n, sz);
+    name_[sz] = 0;
+
+    memcpy(hash_, h, SHA::DIGEST_SIZE);
+}
+
+Signer::~Signer()
+{
+}
+
+
+Error BER_Decoder::GetError()
+{ 
+    return source_.GetError(); 
+}
+
+
+Integer& BER_Decoder::GetInteger(Integer& integer)
+{
+    if (!source_.GetError().What())
+        integer.Decode(source_);
+    return integer;
+}
+
+  
+// Read a Sequence, return length
+word32 BER_Decoder::GetSequence()
+{
+    if (source_.GetError().What()) return 0;
+
+    byte b = source_.next();
+    if (b != (SEQUENCE | CONSTRUCTED)) {
+        source_.SetError(SEQUENCE_E);
+        return 0;
+    }
+
+    return GetLength(source_);
+}
+
+
+// Read a Sequence, return length
+word32 BER_Decoder::GetSet()
+{
+    if (source_.GetError().What()) return 0;
+
+    byte b = source_.next();
+    if (b != (SET | CONSTRUCTED)) {
+        source_.SetError(SET_E);
+        return 0;
+    }
+
+    return GetLength(source_);
+}
+
+
+// Read Version, return it
+word32 BER_Decoder::GetVersion()
+{
+    if (source_.GetError().What()) return 0;
+
+    byte b = source_.next();
+    if (b != INTEGER) {
+        source_.SetError(INTEGER_E);
+        return 0;
+    }
+
+    b = source_.next();
+    if (b != 0x01) {
+        source_.SetError(VERSION_E);
+        return 0;
+    }
+
+    return source_.next();
+}
+
+
+// Read ExplicitVersion, return it or 0 if not there (not an error)
+word32 BER_Decoder::GetExplicitVersion()
+{
+    if (source_.GetError().What()) return 0;
+
+    byte b = source_.next();
+
+    if (b == (CONTEXT_SPECIFIC | CONSTRUCTED)) { // not an error if not here
+        source_.next();
+        return GetVersion();
+    }
+    else 
+        source_.prev(); // put back
+  
+    return 0;
+}
+
+
+// Decode a BER encoded RSA Private Key
+void RSA_Private_Decoder::Decode(RSA_PrivateKey& key)
+{
+    ReadHeader();
+    if (source_.GetError().What()) return;
+    // public
+    key.SetModulus(GetInteger(Integer().Ref()));
+    key.SetPublicExponent(GetInteger(Integer().Ref()));
+
+    // private
+    key.SetPrivateExponent(GetInteger(Integer().Ref()));
+    key.SetPrime1(GetInteger(Integer().Ref()));
+    key.SetPrime2(GetInteger(Integer().Ref()));
+    key.SetModPrime1PrivateExponent(GetInteger(Integer().Ref()));
+    key.SetModPrime2PrivateExponent(GetInteger(Integer().Ref()));
+    key.SetMultiplicativeInverseOfPrime2ModPrime1(GetInteger(Integer().Ref()));
+}
+
+
+void RSA_Private_Decoder::ReadHeader()
+{
+    GetSequence();
+    GetVersion();
+}
+
+
+// Decode a BER encoded DSA Private Key
+void DSA_Private_Decoder::Decode(DSA_PrivateKey& key)
+{
+    ReadHeader();
+    if (source_.GetError().What()) return;
+    // group parameters
+    key.SetModulus(GetInteger(Integer().Ref()));
+    key.SetSubGroupOrder(GetInteger(Integer().Ref()));
+    key.SetSubGroupGenerator(GetInteger(Integer().Ref()));
+
+    // key
+    key.SetPublicPart(GetInteger(Integer().Ref()));
+    key.SetPrivatePart(GetInteger(Integer().Ref()));   
+}
+
+
+void DSA_Private_Decoder::ReadHeader()
+{
+    GetSequence();
+    GetVersion();
+}
+
+
+// Decode a BER encoded RSA Public Key
+void RSA_Public_Decoder::Decode(RSA_PublicKey& key)
+{
+    ReadHeader();
+    if (source_.GetError().What()) return;
+
+    ReadHeaderOpenSSL();   // may or may not be
+    if (source_.GetError().What()) return;
+
+    // public key
+    key.SetModulus(GetInteger(Integer().Ref()));
+    key.SetPublicExponent(GetInteger(Integer().Ref()));
+}
+
+
+// Read OpenSSL format public header
+void RSA_Public_Decoder::ReadHeaderOpenSSL()
+{
+    byte b = source_.next();  // peek
+    source_.prev();
+
+    if (b != INTEGER) { // have OpenSSL public format
+        GetSequence();
+        b = source_.next();
+        if (b != OBJECT_IDENTIFIER) {
+            source_.SetError(OBJECT_ID_E);
+            return;
+        }
+
+        word32 len = GetLength(source_);
+        source_.advance(len);
+
+        b = source_.next();
+        if (b == TAG_NULL) {   // could have NULL tag and 0 terminator, may not 
+            b = source_.next();
+            if (b != 0) {
+                source_.SetError(EXPECT_0_E);
+                return; 
+            }
+        }
+        else
+            source_.prev();   // put back
+
+        b = source_.next();
+        if (b != BIT_STRING) {   
+            source_.SetError(BIT_STR_E);
+            return; 
+        }
+
+        len = GetLength(source_); 
+        b = source_.next();
+        if (b != 0)           // could have 0
+            source_.prev();   // put back
+        
+        GetSequence();
+    }
+}
+
+
+void RSA_Public_Decoder::ReadHeader()
+{
+    GetSequence();
+}
+
+
+// Decode a BER encoded DSA Public Key
+void DSA_Public_Decoder::Decode(DSA_PublicKey& key)
+{
+    ReadHeader();
+    if (source_.GetError().What()) return;
+
+    // group parameters
+    key.SetModulus(GetInteger(Integer().Ref()));
+    key.SetSubGroupOrder(GetInteger(Integer().Ref()));
+    key.SetSubGroupGenerator(GetInteger(Integer().Ref()));
+
+    // key
+    key.SetPublicPart(GetInteger(Integer().Ref()));
+}
+
+
+void DSA_Public_Decoder::ReadHeader()
+{
+    GetSequence();
+}
+
+
+void DH_Decoder::ReadHeader()
+{
+    GetSequence();
+}
+
+
+// Decode a BER encoded Diffie-Hellman Key
+void DH_Decoder::Decode(DH& key)
+{
+    ReadHeader();
+    if (source_.GetError().What()) return;
+
+    // group parms
+    key.SetP(GetInteger(Integer().Ref()));
+    key.SetG(GetInteger(Integer().Ref()));
+}
+
+
+CertDecoder::CertDecoder(Source& s, bool decode, SignerList* signers,
+                         bool noVerify, CertType ct)
+    : BER_Decoder(s), certBegin_(0), sigIndex_(0), sigLength_(0), subCnPos_(-1),
+      subCnLen_(0), issCnPos_(-1), issCnLen_(0), signature_(0),
+      verify_(!noVerify)
+{
+    issuer_[0] = 0;
+    subject_[0] = 0;
+
+    if (decode)
+        Decode(signers, ct);
+
+}
+
+
+CertDecoder::~CertDecoder()
+{
+    tcArrayDelete(signature_);
+}
+
+
+// process certificate header, set signature offset
+void CertDecoder::ReadHeader()
+{
+    if (source_.GetError().What()) return;
+
+    GetSequence();  // total
+    certBegin_ = source_.get_index();
+
+    sigIndex_ = GetSequence();  // this cert
+    sigIndex_ += source_.get_index();
+
+    GetExplicitVersion(); // version
+    GetInteger(Integer().Ref());  // serial number
+}
+
+
+// Decode a x509v3 Certificate
+void CertDecoder::Decode(SignerList* signers, CertType ct)
+{
+    if (source_.GetError().What()) return;
+    DecodeToKey();
+    if (source_.GetError().What()) return;
+
+    if (source_.get_index() != sigIndex_)
+        source_.set_index(sigIndex_);
+
+    word32 confirmOID = GetAlgoId();
+    GetSignature();
+    if (source_.GetError().What()) return;
+
+    if ( confirmOID != signatureOID_ ) {
+        source_.SetError(SIG_OID_E);
+        return;
+    }
+    
+    if (ct != CA && verify_ && !ValidateSignature(signers))
+        source_.SetError(SIG_OTHER_E);
+}
+
+
+void CertDecoder::DecodeToKey()
+{
+    ReadHeader();
+    signatureOID_ = GetAlgoId();
+    GetName(ISSUER);   
+    GetValidity();
+    GetName(SUBJECT);   
+    GetKey();
+}
+
+
+// Read public key
+void CertDecoder::GetKey()
+{
+    if (source_.GetError().What()) return;
+
+    GetSequence();    
+    keyOID_ = GetAlgoId();
+
+    if (keyOID_ == RSAk) {
+        byte b = source_.next();
+        if (b != BIT_STRING) {
+            source_.SetError(BIT_STR_E);
+            return;
+        }
+        b = source_.next();      // length, future
+        b = source_.next(); 
+        while(b != 0)
+            b = source_.next();
+    }
+    else if (keyOID_ == DSAk)
+        ;   // do nothing
+    else {
+        source_.SetError(UNKNOWN_OID_E);
+        return;
+    }
+
+    StoreKey();
+    if (keyOID_ == DSAk)
+        AddDSA();
+}
+
+
+// Save public key
+void CertDecoder::StoreKey()
+{
+    if (source_.GetError().What()) return;
+
+    word32 read = source_.get_index();
+    word32 length = GetSequence();
+
+    read = source_.get_index() - read;
+    length += read;
+
+    if (source_.GetError().What()) return;
+    while (read--) source_.prev();
+
+    if (source_.IsLeft(length) == false) return;
+    key_.SetSize(length);
+    key_.SetKey(source_.get_current());
+    source_.advance(length);
+}
+
+
+// DSA has public key after group
+void CertDecoder::AddDSA()
+{
+    if (source_.GetError().What()) return;
+
+    byte b = source_.next();
+    if (b != BIT_STRING) {
+        source_.SetError(BIT_STR_E);
+        return;
+    }
+    b = source_.next();      // length, future
+    b = source_.next(); 
+    while(b != 0)
+        b = source_.next();
+
+    word32 idx = source_.get_index();
+    b = source_.next();
+    if (b != INTEGER) {
+        source_.SetError(INTEGER_E);
+        return;
+    }
+
+    word32 length = GetLength(source_);
+    length += source_.get_index() - idx;
+
+    if (source_.IsLeft(length) == false) return;
+
+    key_.AddToEnd(source_.get_buffer() + idx, length);    
+}
+
+
+// process algo OID by summing, return it
+word32 CertDecoder::GetAlgoId()
+{
+    if (source_.GetError().What()) return 0;
+    word32 length = GetSequence();
+
+    if (source_.GetError().What()) return 0;
+    
+    byte b = source_.next();
+    if (b != OBJECT_IDENTIFIER) {
+        source_.SetError(OBJECT_ID_E);
+        return 0;
+    }
+
+    length = GetLength(source_);
+    if (source_.IsLeft(length) == false) return 0;
+
+    word32 oid = 0;
+    while(length--)
+        oid += source_.next();        // just sum it up for now
+
+    // could have NULL tag and 0 terminator, but may not
+    b = source_.next();
+    if (b == TAG_NULL) {
+        b = source_.next();
+        if (b != 0) {
+            source_.SetError(EXPECT_0_E);
+            return 0;
+        }
+    }
+    else
+        // go back, didn't have it
+        b = source_.prev();
+
+    return oid;
+}
+
+
+// read cert signature, store in signature_
+word32 CertDecoder::GetSignature()
+{
+    if (source_.GetError().What()) return 0;
+    byte b = source_.next();
+
+    if (b != BIT_STRING) {
+        source_.SetError(BIT_STR_E);
+        return 0;
+    }
+
+    sigLength_ = GetLength(source_);
+    if (sigLength_ <= 1 || source_.IsLeft(sigLength_) == false) {
+        source_.SetError(CONTENT_E);
+        return 0;
+    }
+  
+    b = source_.next();
+    if (b != 0) {
+        source_.SetError(EXPECT_0_E);
+        return 0;
+    }
+    sigLength_--;
+
+    signature_ = NEW_TC byte[sigLength_];
+    memcpy(signature_, source_.get_current(), sigLength_);
+    source_.advance(sigLength_);
+
+    return sigLength_;
+}
+
+
+// read cert digest, store in signature_
+word32 CertDecoder::GetDigest()
+{
+    if (source_.GetError().What()) return 0;
+    byte b = source_.next();
+
+    if (b != OCTET_STRING) {
+        source_.SetError(OCTET_STR_E);
+        return 0;
+    }
+
+    sigLength_ = GetLength(source_);
+
+    signature_ = NEW_TC byte[sigLength_];
+    memcpy(signature_, source_.get_current(), sigLength_);
+    source_.advance(sigLength_);
+
+    return sigLength_;
+}
+
+
+// memory length checked add tag to buffer
+char* CertDecoder::AddTag(char* ptr, const char* buf_end, const char* tag_name,
+                          word32 tag_name_length, word32 tag_value_length)
+{
+    if (ptr + tag_name_length + tag_value_length > buf_end) {
+        source_.SetError(CONTENT_E);
+        return 0;
+    }
+
+    memcpy(ptr, tag_name, tag_name_length);
+    ptr += tag_name_length;
+
+    memcpy(ptr, source_.get_current(), tag_value_length);
+    ptr += tag_value_length;
+
+    return ptr;
+}
+
+
+// process NAME, either issuer or subject
+void CertDecoder::GetName(NameType nt)
+{
+    if (source_.GetError().What()) return;
+
+    SHA    sha;
+    word32 length = GetSequence();  // length of all distinguished names
+
+    if (length >= ASN_NAME_MAX)
+        return;
+    if (source_.IsLeft(length) == false) return;
+    length += source_.get_index();
+    
+    char* ptr;
+    char* buf_end;
+
+    if (nt == ISSUER) {
+        ptr = issuer_;
+        buf_end = ptr + sizeof(issuer_) - 1;   // 1 byte for trailing 0
+    }
+    else {
+        ptr = subject_;
+        buf_end = ptr + sizeof(subject_) - 1;  // 1 byte for trailing 0
+    }
+
+    while (source_.get_index() < length) {
+        GetSet();
+        if (source_.GetError().What() == SET_E) {
+            source_.SetError(NO_ERROR_E);  // extensions may only have sequence 
+            source_.prev();
+        }
+        GetSequence();
+
+        byte b = source_.next();
+        if (b != OBJECT_IDENTIFIER) {
+            source_.SetError(OBJECT_ID_E);
+            return;
+        }
+
+        word32 oidSz = GetLength(source_);
+        if (source_.IsLeft(oidSz) == false) return;
+
+        byte joint[2];
+        if (source_.IsLeft(sizeof(joint)) == false) return;
+        memcpy(joint, source_.get_current(), sizeof(joint));
+
+        // v1 name types
+        if (joint[0] == 0x55 && joint[1] == 0x04) {
+            source_.advance(2);
+            byte   id      = source_.next();  
+            b              = source_.next();    // strType
+            word32 strLen  = GetLength(source_);
+
+            if (source_.IsLeft(strLen) == false) return;
+
+            switch (id) {
+            case COMMON_NAME:
+                if (!(ptr = AddTag(ptr, buf_end, "/CN=", 4, strLen)))
+                    return;
+                if (nt == ISSUER) {
+                    issCnPos_ = (int)(ptr - strLen - issuer_);
+                    issCnLen_ = (int)strLen;
+                } else {
+                    subCnPos_ = (int)(ptr - strLen - subject_);
+                    subCnLen_ = (int)strLen;
+                }
+                break;
+            case SUR_NAME:
+                if (!(ptr = AddTag(ptr, buf_end, "/SN=", 4, strLen)))
+                    return;
+                break;
+            case COUNTRY_NAME:
+                if (!(ptr = AddTag(ptr, buf_end, "/C=", 3, strLen)))
+                    return;
+                break;
+            case LOCALITY_NAME:
+                if (!(ptr = AddTag(ptr, buf_end, "/L=", 3, strLen)))
+                    return;
+                break;
+            case STATE_NAME:
+                if (!(ptr = AddTag(ptr, buf_end, "/ST=", 4, strLen)))
+                    return;
+                break;
+            case ORG_NAME:
+                if (!(ptr = AddTag(ptr, buf_end, "/O=", 3, strLen)))
+                    return;
+                break;
+            case ORGUNIT_NAME:
+                if (!(ptr = AddTag(ptr, buf_end, "/OU=", 4, strLen)))
+                    return;
+                break;
+            }
+
+            sha.Update(source_.get_current(), strLen);
+            source_.advance(strLen);
+        }
+        else { 
+            bool email = false;
+            if (joint[0] == 0x2a && joint[1] == 0x86)  // email id hdr
+                email = true;
+
+            source_.advance(oidSz + 1);
+            word32 length = GetLength(source_);
+            if (source_.IsLeft(length) == false) return;
+
+            if (email) {
+                if (!(ptr = AddTag(ptr, buf_end, "/emailAddress=", 14, length)))
+                    return; 
+            }
+
+            source_.advance(length);
+        }
+    }
+
+    *ptr = 0;
+
+    if (nt == ISSUER)
+        sha.Final(issuerHash_);
+    else
+        sha.Final(subjectHash_);
+}
+
+
+// process a Date, either BEFORE or AFTER
+void CertDecoder::GetDate(DateType dt)
+{
+    if (source_.GetError().What()) return;
+
+    byte b = source_.next();
+    if (b != UTC_TIME && b != GENERALIZED_TIME) {
+        source_.SetError(TIME_E);
+        return;
+    }
+
+    word32 length = GetLength(source_);
+    if (source_.IsLeft(length) == false) return;
+
+    byte date[MAX_DATE_SZ];
+    if (length > MAX_DATE_SZ || length < MIN_DATE_SZ) {
+        source_.SetError(DATE_SZ_E);
+        return;
+    }
+
+    memcpy(date, source_.get_current(), length);
+    source_.advance(length);
+
+    if (!ValidateDate(date, b, dt) && verify_) {
+        if (dt == BEFORE)
+            source_.SetError(BEFORE_DATE_E);
+        else
+            source_.SetError(AFTER_DATE_E);
+    }
+
+    // save for later use
+    if (dt == BEFORE) {
+        memcpy(beforeDate_, date, length);
+        beforeDate_[length] = 0;
+        beforeDateType_= b;
+    }
+    else {  // after
+        memcpy(afterDate_, date, length);
+        afterDate_[length] = 0;
+        afterDateType_= b;
+    }       
+}
+
+
+void CertDecoder::GetValidity()
+{
+    if (source_.GetError().What()) return;
+
+    GetSequence();
+    GetDate(BEFORE);
+    GetDate(AFTER);
+}
+
+
+bool CertDecoder::ValidateSelfSignature()
+{
+    Source pub(key_.GetKey(), key_.size());
+    return ConfirmSignature(pub);
+}
+
+
+// extract compare signature hash from plain and place into digest
+void CertDecoder::GetCompareHash(const byte* plain, word32 sz, byte* digest,
+                                 word32 digSz)
+{
+    if (source_.GetError().What()) return;
+
+    Source s(plain, sz);
+    CertDecoder dec(s, false);
+
+    dec.GetSequence();
+    dec.GetAlgoId();
+    dec.GetDigest();
+
+    if (dec.sigLength_ > digSz) {
+        source_.SetError(SIG_LEN_E);
+        return;
+    }
+
+    memcpy(digest, dec.signature_, dec.sigLength_);
+}
+
+
+// validate signature signed by someone else
+bool CertDecoder::ValidateSignature(SignerList* signers)
+{
+    if (!signers)
+        return false;
+
+    SignerList::iterator first = signers->begin();
+    SignerList::iterator last  = signers->end();
+
+    while (first != last) {
+        if ( memcmp(issuerHash_, (*first)->GetHash(), SHA::DIGEST_SIZE) == 0) {
+      
+            const PublicKey& iKey = (*first)->GetPublicKey();
+            Source pub(iKey.GetKey(), iKey.size());
+            return ConfirmSignature(pub);
+        }   
+        ++first;
+    }
+    return false;
+}
+
+
+// confirm certificate signature
+bool CertDecoder::ConfirmSignature(Source& pub)
+{
+    HashType ht;
+    mySTL::auto_ptr<HASH> hasher;
+
+    if (signatureOID_ == MD5wRSA) {
+        hasher.reset(NEW_TC MD5);
+        ht = MD5h;
+    }
+    else if (signatureOID_ == MD2wRSA) {
+        hasher.reset(NEW_TC MD2);
+        ht = MD2h;
+    }
+    else if (signatureOID_ == SHAwRSA || signatureOID_ == SHAwDSA) {
+        hasher.reset(NEW_TC SHA);
+        ht = SHAh;
+    }
+    else if (signatureOID_ == SHA256wRSA || signatureOID_ == SHA256wDSA) {
+        hasher.reset(NEW_TC SHA256);
+        ht = SHA256h;
+    }
+#ifdef WORD64_AVAILABLE
+    else if (signatureOID_ == SHA384wRSA) {
+        hasher.reset(NEW_TC SHA384);
+        ht = SHA384h;
+    }
+    else if (signatureOID_ == SHA512wRSA) {
+        hasher.reset(NEW_TC SHA512);
+        ht = SHA512h;
+    }
+#endif
+    else {
+        source_.SetError(UNKOWN_SIG_E);
+        return false;
+    }
+
+    byte digest[MAX_SHA2_DIGEST_SIZE];      // largest size
+
+    hasher->Update(source_.get_buffer() + certBegin_, sigIndex_ - certBegin_);
+    hasher->Final(digest);
+
+    if (keyOID_ == RSAk) {
+        // put in ASN.1 signature format
+        Source build;
+        Signature_Encoder(digest, hasher->getDigestSize(), ht, build);
+
+        RSA_PublicKey pubKey(pub);
+        RSAES_Encryptor enc(pubKey);
+
+        if (pubKey.FixedCiphertextLength() != sigLength_) {
+            source_.SetError(SIG_LEN_E);
+            return false;
+        }
+
+        return enc.SSL_Verify(build.get_buffer(), build.size(), signature_);
+    }
+    else  { // DSA
+        // extract r and s from sequence
+        byte seqDecoded[DSA_SIG_SZ];
+        memset(seqDecoded, 0, sizeof(seqDecoded));
+        DecodeDSA_Signature(seqDecoded, signature_, sigLength_);
+
+        DSA_PublicKey pubKey(pub);
+        DSA_Verifier  ver(pubKey);
+
+        return ver.Verify(digest, seqDecoded);
+    }
+}
+
+
+Signature_Encoder::Signature_Encoder(const byte* dig, word32 digSz,
+                                     HashType digOID, Source& source)
+{
+    // build bottom up
+
+    // Digest
+    byte digArray[MAX_DIGEST_SZ];
+    word32 digestSz = SetDigest(dig, digSz, digArray);
+
+    // AlgoID
+    byte algoArray[MAX_ALGO_SZ];
+    word32 algoSz = SetAlgoID(digOID, algoArray);
+
+    // Sequence
+    byte seqArray[MAX_SEQ_SZ];
+    word32 seqSz = SetSequence(digestSz + algoSz, seqArray);
+
+    source.grow(seqSz + algoSz + digestSz);  // make sure enough room
+    source.add(seqArray,  seqSz);
+    source.add(algoArray, algoSz);
+    source.add(digArray,  digestSz);
+}
+
+
+
+word32 Signature_Encoder::SetDigest(const byte* d, word32 dSz, byte* output)
+{
+    output[0] = OCTET_STRING;
+    output[1] = dSz;
+    memcpy(&output[2], d, dSz);
+    
+    return dSz + 2;
+}
+
+
+
+word32 DER_Encoder::SetAlgoID(HashType aOID, byte* output)
+{
+    // adding TAG_NULL and 0 to end
+    static const byte shaAlgoID[] = { 0x2b, 0x0e, 0x03, 0x02, 0x1a,
+                                      0x05, 0x00 };
+    static const byte md5AlgoID[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d,
+                                      0x02, 0x05, 0x05, 0x00  };
+    static const byte md2AlgoID[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d,
+                                      0x02, 0x02, 0x05, 0x00};
+    static const byte sha256AlgoID[] = { 0x60, 0x86, 0x48, 0x01, 0x65, 0x03,
+                                         0x04, 0x02, 0x01, 0x05, 0x00 };
+    static const byte sha384AlgoID[] = { 0x60, 0x86, 0x48, 0x01, 0x65, 0x03,
+                                         0x04, 0x02, 0x02, 0x05, 0x00 };
+    static const byte sha512AlgoID[] = { 0x60, 0x86, 0x48, 0x01, 0x65, 0x03,
+                                         0x04, 0x02, 0x03, 0x05, 0x00 };
+    int algoSz = 0;
+    const byte* algoName = 0;
+
+    switch (aOID) {
+    case SHAh:
+        algoSz = sizeof(shaAlgoID);
+        algoName = shaAlgoID;
+        break;
+
+    case SHA256h:
+        algoSz = sizeof(sha256AlgoID);
+        algoName = sha256AlgoID;
+        break;
+
+    case SHA384h:
+        algoSz = sizeof(sha384AlgoID);
+        algoName = sha384AlgoID;
+        break;
+
+    case SHA512h:
+        algoSz = sizeof(sha512AlgoID);
+        algoName = sha512AlgoID;
+        break;
+
+    case MD2h:
+        algoSz = sizeof(md2AlgoID);
+        algoName = md2AlgoID;
+        break;
+
+    case MD5h:
+        algoSz = sizeof(md5AlgoID);
+        algoName = md5AlgoID;
+        break;
+
+    default:
+        error_.SetError(UNKOWN_HASH_E);
+        return 0;
+    }
+
+
+    byte ID_Length[MAX_LENGTH_SZ];
+    word32 idSz = SetLength(algoSz - 2, ID_Length); // don't include TAG_NULL/0
+
+    byte seqArray[MAX_SEQ_SZ + 1];  // add object_id to end
+    word32 seqSz = SetSequence(idSz + algoSz + 1, seqArray);
+    seqArray[seqSz++] = OBJECT_IDENTIFIER;
+
+    memcpy(output, seqArray, seqSz);
+    memcpy(output + seqSz, ID_Length, idSz);
+    memcpy(output + seqSz + idSz, algoName, algoSz);
+
+    return seqSz + idSz + algoSz;
+}
+
+
+word32 SetSequence(word32 len, byte* output)
+{
+  
+    output[0] = SEQUENCE | CONSTRUCTED;
+    return SetLength(len, output + 1) + 1;
+}
+
+
+word32 EncodeDSA_Signature(const byte* signature, byte* output)
+{
+    Integer r(signature, 20);
+    Integer s(signature + 20, 20);
+
+    return EncodeDSA_Signature(r, s, output);
+}
+
+
+word32 EncodeDSA_Signature(const Integer& r, const Integer& s, byte* output)
+{
+    word32 rSz = r.ByteCount();
+    word32 sSz = s.ByteCount();
+
+    byte rLen[MAX_LENGTH_SZ + 1];
+    byte sLen[MAX_LENGTH_SZ + 1];
+
+    rLen[0] = INTEGER;
+    sLen[0] = INTEGER;
+
+    word32 rLenSz = SetLength(rSz, &rLen[1]) + 1;
+    word32 sLenSz = SetLength(sSz, &sLen[1]) + 1;
+
+    byte seqArray[MAX_SEQ_SZ];
+
+    word32 seqSz = SetSequence(rLenSz + rSz + sLenSz + sSz, seqArray);
+    
+    // seq
+    memcpy(output, seqArray, seqSz);
+    // r
+    memcpy(output + seqSz, rLen, rLenSz);
+    r.Encode(output + seqSz + rLenSz, rSz);
+    // s
+    memcpy(output + seqSz + rLenSz + rSz, sLen, sLenSz);
+    s.Encode(output + seqSz + rLenSz + rSz + sLenSz, sSz);
+
+    return seqSz + rLenSz + rSz + sLenSz + sSz;
+}
+
+
+// put sequence encoded dsa signature into decoded in 2 20 byte integers
+word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz)
+{
+    Source source(encoded, sz);
+
+    if (source.next() != (SEQUENCE | CONSTRUCTED)) {
+        source.SetError(SEQUENCE_E);
+        return 0;
+    }
+
+    GetLength(source);  // total
+
+    // r
+    if (source.next() != INTEGER) {
+        source.SetError(INTEGER_E);
+        return 0;
+    }
+    word32 rLen = GetLength(source);
+    if (rLen != 20) {
+        while (rLen > 20 && source.remaining() > 0) {  // zero's at front, eat
+            source.next();
+            --rLen;
+        }
+        if (rLen < 20) { // add zero's to front so 20 bytes
+            word32 tmpLen = rLen;
+            while (tmpLen < 20) {
+            decoded[0] = 0;
+            decoded++;
+                tmpLen++;
+        }
+        }
+    }
+    memcpy(decoded, source.get_buffer() + source.get_index(), rLen);
+    source.advance(rLen);
+
+    // s
+    if (source.next() != INTEGER) {
+        source.SetError(INTEGER_E);
+        return 0;
+    }
+    word32 sLen = GetLength(source);
+    if (sLen != 20) {
+        while (sLen > 20 && source.remaining() > 0) {
+            source.next();          // zero's at front, eat
+            --sLen;
+        }
+        if (sLen < 20) { // add zero's to front so 20 bytes
+            word32 tmpLen = sLen;
+            while (tmpLen < 20) {
+                decoded[rLen] = 0;
+            decoded++;
+                tmpLen++;
+        }
+        }
+    }
+    memcpy(decoded + rLen, source.get_buffer() + source.get_index(), sLen);
+    source.advance(sLen);
+
+    return 40;
+}
+
+
+/*
+// Get Cert in PEM format from BEGIN to END
+int GetCert(Source& source)
+{
+    char header[] = "-----BEGIN CERTIFICATE-----";
+    char footer[] = "-----END CERTIFICATE-----";
+
+    char* begin = strstr((char*)source.get_buffer(), header);
+    char* end   = strstr((char*)source.get_buffer(), footer);
+
+    if (!begin || !end || begin >= end) return -1;
+
+    end += strlen(footer); 
+    if (*end == '\r') end++;
+
+    Source tmp((byte*)begin, end - begin + 1);
+    source.Swap(tmp);
+
+    return 0;
+}
+
+
+
+// Decode a BER encoded PKCS12 structure
+void PKCS12_Decoder::Decode()
+{
+    ReadHeader();
+    if (source_.GetError().What()) return;
+
+    // Get AuthSafe
+
+    GetSequence();
+    
+        // get object id
+    byte obj_id = source_.next();
+    if (obj_id != OBJECT_IDENTIFIER) {
+        source_.SetError(OBJECT_ID_E);
+        return;
+    }
+
+    word32 length = GetLength(source_);
+
+    word32 algo_sum = 0;
+    while (length--)
+        algo_sum += source_.next();
+
+    
+       
+
+
+
+    // Get MacData optional
+    // mac     digestInfo  like certdecoder::getdigest?
+    // macsalt octet string
+    // iter    integer
+    
+}
+
+
+void PKCS12_Decoder::ReadHeader()
+{
+    // Gets Version
+    GetSequence();
+    GetVersion();
+}
+
+
+// Get Cert in PEM format from pkcs12 file
+int GetPKCS_Cert(const char* password, Source& source)
+{
+    PKCS12_Decoder pkcs12(source);
+    pkcs12.Decode();
+
+    return 0;
+}
+*/
+
+
+
+} // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/coding.cpp b/mysql/extra/yassl/taocrypt/src/coding.cpp
new file mode 100644
index 0000000..bc4727c
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/coding.cpp
@@ -0,0 +1,266 @@
+/*
+   Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+/* coding.cpp implements hex and base64 encoding/decoing
+*/
+
+#include "runtime.hpp"
+#include "coding.hpp"
+#include "file.hpp"
+
+
+namespace TaoCrypt {
+
+
+namespace { // locals
+
+const byte bad = 0xFF;  // invalid encoding
+
+const byte hexEncode[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+                           'A', 'B', 'C', 'D', 'E', 'F'
+                         };
+
+const byte hexDecode[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+                           bad, bad, bad, bad, bad, bad, bad,
+                           10, 11, 12, 13, 14, 15 
+                         };  // A starts at 0x41 not 0x3A
+
+
+const byte base64Encode[] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+                              'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
+                              'U', 'V', 'W', 'X', 'Y', 'Z',
+                              'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
+                              'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
+                              'u', 'v', 'w', 'x', 'y', 'z',
+                              '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+                              '+', '/'
+                            };
+
+const byte base64Decode[] = { 62, bad, bad, bad, 63,   // + starts at 0x2B
+                              52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+                              bad, bad, bad, bad, bad, bad, bad,
+                              0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+                              10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                              20, 21, 22, 23, 24, 25,
+                              bad, bad, bad, bad, bad, bad,
+                              26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+                              36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
+                              46, 47, 48, 49, 50, 51
+                            };
+
+const byte pad = '=';
+const int pemLineSz = 64;
+
+}  // local namespace
+
+
+// Hex Encode
+void HexEncoder::Encode()
+{
+    word32 bytes = plain_.size();
+    encoded_.New(bytes * 2);
+
+    word32 i = 0;
+
+    while (bytes--) {
+        byte p = plain_.next();
+
+        byte b  = p >> 4;
+        byte b2 = p & 0xF;
+
+        encoded_[i++] = hexEncode[b];
+        encoded_[i++] = hexEncode[b2];
+    }
+
+    plain_.reset(encoded_);
+}
+
+
+// Hex Decode
+void HexDecoder::Decode()
+{
+    word32 bytes = coded_.size();
+    decoded_.New(bytes / 2);
+
+    word32 i(0);
+
+    while (bytes) {
+        byte b  = coded_.next() - 0x30;  // 0 starts at 0x30
+        byte b2 = coded_.next() - 0x30;
+
+        // sanity checks
+        if (b >= sizeof(hexDecode)/sizeof(hexDecode[0])) {
+            coded_.SetError(PEM_E);
+            return;
+        }
+        if (b2 >= sizeof(hexDecode)/sizeof(hexDecode[0])) {
+            coded_.SetError(PEM_E);
+            return;
+        }
+
+        b  = hexDecode[b];
+        b2 = hexDecode[b2];
+
+        decoded_[i++] = (b << 4) | b2;
+        bytes -= 2;
+    }
+
+    coded_.reset(decoded_);
+}
+
+
+// Base 64 Encode
+void Base64Encoder::Encode()
+{
+    word32 bytes = plain_.size();
+    word32 outSz = (bytes + 3 - 1) / 3 * 4;
+
+    outSz += (outSz + pemLineSz - 1) / pemLineSz;  // new lines
+    encoded_.New(outSz);
+
+    word32 i = 0;
+    word32 j = 0;
+    
+    while (bytes > 2) {
+        byte b1 = plain_.next();
+        byte b2 = plain_.next();
+        byte b3 = plain_.next();
+
+        // encoded idx
+        byte e1 = b1 >> 2;
+        byte e2 = ((b1 & 0x3) << 4) | (b2 >> 4);
+        byte e3 = ((b2 & 0xF) << 2) | (b3 >> 6);
+        byte e4 = b3 & 0x3F;
+
+        // store
+        encoded_[i++] = base64Encode[e1];
+        encoded_[i++] = base64Encode[e2];
+        encoded_[i++] = base64Encode[e3];
+        encoded_[i++] = base64Encode[e4];
+
+        bytes -= 3;
+
+        if ((++j % 16) == 0 && bytes)
+            encoded_[i++] = '\n';
+    }
+
+    // last integral
+    if (bytes) {
+        bool twoBytes = (bytes == 2);
+
+        byte b1 = plain_.next();
+        byte b2 = (twoBytes) ? plain_.next() : 0;
+
+        byte e1 = b1 >> 2;
+        byte e2 = ((b1 & 0x3) << 4) | (b2 >> 4);
+        byte e3 =  (b2 & 0xF) << 2;
+
+        encoded_[i++] = base64Encode[e1];
+        encoded_[i++] = base64Encode[e2];
+        encoded_[i++] = (twoBytes) ? base64Encode[e3] : pad;
+        encoded_[i++] = pad;
+    } 
+
+    encoded_[i++] = '\n';
+    
+    if (i == outSz)
+        plain_.reset(encoded_);
+}
+
+
+// Base 64 Decode
+void Base64Decoder::Decode()
+{
+    word32 bytes = coded_.size();
+    word32 plainSz = bytes - ((bytes + (pemLineSz - 1)) / pemLineSz); 
+    const  byte maxIdx = (byte)sizeof(base64Decode) + 0x2B - 1;
+    plainSz = ((plainSz * 3) / 4) + 3;
+    decoded_.New(plainSz);
+
+    word32 i = 0;
+    word32 j = 0;
+
+    while (bytes > 3) {
+        byte e1 = coded_.next();
+        byte e2 = coded_.next();
+        byte e3 = coded_.next();
+        byte e4 = coded_.next();
+
+        if (e1 == 0)            // end file 0's
+            break;
+
+        bool pad3 = false;
+        bool pad4 = false;
+        if (e3 == pad)
+            pad3 = true;
+        if (e4 == pad)
+            pad4 = true;
+
+        if (e1 < 0x2B || e2 < 0x2B || e3 < 0x2B || e4 < 0x2B) {
+            coded_.SetError(PEM_E);
+            return;
+        }
+
+        if (e1 > maxIdx || e2 > maxIdx || e3 > maxIdx || e4 > maxIdx) {
+            coded_.SetError(PEM_E);
+            return;
+        }
+
+        e1 = base64Decode[e1 - 0x2B];
+        e2 = base64Decode[e2 - 0x2B];
+        e3 = (e3 == pad) ? 0 : base64Decode[e3 - 0x2B];
+        e4 = (e4 == pad) ? 0 : base64Decode[e4 - 0x2B];
+
+        byte b1 = (e1 << 2) | (e2 >> 4);
+        byte b2 = ((e2 & 0xF) << 4) | (e3 >> 2);
+        byte b3 = ((e3 & 0x3) << 6) | e4;
+
+        decoded_[i++] = b1;
+        if (!pad3)
+            decoded_[i++] = b2;
+        if (!pad4)
+            decoded_[i++] = b3;
+        else
+            break;
+        
+        bytes -= 4;
+        if ((++j % 16) == 0) {
+            byte endLine = coded_.next();
+            bytes--;
+            while (endLine == ' ') {        // remove possible whitespace
+                endLine = coded_.next();
+                bytes--;
+            }
+            if (endLine == '\r') {
+                endLine = coded_.next();
+                bytes--;
+            }
+            if (endLine != '\n') {
+                coded_.SetError(PEM_E); 
+                return;
+            }
+        }
+    }
+
+    if (i != decoded_.size())
+        decoded_.resize(i);
+    coded_.reset(decoded_);
+}
+
+
+} // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/des.cpp b/mysql/extra/yassl/taocrypt/src/des.cpp
new file mode 100644
index 0000000..53777f0
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/des.cpp
@@ -0,0 +1,778 @@
+/*
+   Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+/* C++ part based on Wei Dai's des.cpp from CryptoPP */
+/* x86 asm is original */
+
+
+#if defined(TAOCRYPT_KERNEL_MODE)
+    #define DO_TAOCRYPT_KERNEL_MODE
+#endif                                  // only some modules now support this
+
+
+#include "runtime.hpp"
+#include "des.hpp"
+#ifdef USE_SYS_STL
+    #include <algorithm>
+#else
+    #include "algorithm.hpp"
+#endif
+
+
+namespace STL = STL_NAMESPACE;
+
+
+
+namespace TaoCrypt {
+
+
+/* permuted choice table (key) */
+static const byte pc1[] = {
+       57, 49, 41, 33, 25, 17,  9,
+        1, 58, 50, 42, 34, 26, 18,
+       10,  2, 59, 51, 43, 35, 27,
+       19, 11,  3, 60, 52, 44, 36,
+
+       63, 55, 47, 39, 31, 23, 15,
+        7, 62, 54, 46, 38, 30, 22,
+       14,  6, 61, 53, 45, 37, 29,
+       21, 13,  5, 28, 20, 12,  4
+};
+
+/* number left rotations of pc1 */
+static const byte totrot[] = {
+       1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28
+};
+
+/* permuted choice key (table) */
+static const byte pc2[] = {
+       14, 17, 11, 24,  1,  5,
+        3, 28, 15,  6, 21, 10,
+       23, 19, 12,  4, 26,  8,
+       16,  7, 27, 20, 13,  2,
+       41, 52, 31, 37, 47, 55,
+       30, 40, 51, 45, 33, 48,
+       44, 49, 39, 56, 34, 53,
+       46, 42, 50, 36, 29, 32
+};
+
+/* End of DES-defined tables */
+
+/* bit 0 is left-most in byte */
+static const int bytebit[] = {
+       0200,0100,040,020,010,04,02,01
+};
+
+const word32 Spbox[8][64] = {
+{
+0x01010400,0x00000000,0x00010000,0x01010404,
+0x01010004,0x00010404,0x00000004,0x00010000,
+0x00000400,0x01010400,0x01010404,0x00000400,
+0x01000404,0x01010004,0x01000000,0x00000004,
+0x00000404,0x01000400,0x01000400,0x00010400,
+0x00010400,0x01010000,0x01010000,0x01000404,
+0x00010004,0x01000004,0x01000004,0x00010004,
+0x00000000,0x00000404,0x00010404,0x01000000,
+0x00010000,0x01010404,0x00000004,0x01010000,
+0x01010400,0x01000000,0x01000000,0x00000400,
+0x01010004,0x00010000,0x00010400,0x01000004,
+0x00000400,0x00000004,0x01000404,0x00010404,
+0x01010404,0x00010004,0x01010000,0x01000404,
+0x01000004,0x00000404,0x00010404,0x01010400,
+0x00000404,0x01000400,0x01000400,0x00000000,
+0x00010004,0x00010400,0x00000000,0x01010004},
+{
+0x80108020,0x80008000,0x00008000,0x00108020,
+0x00100000,0x00000020,0x80100020,0x80008020,
+0x80000020,0x80108020,0x80108000,0x80000000,
+0x80008000,0x00100000,0x00000020,0x80100020,
+0x00108000,0x00100020,0x80008020,0x00000000,
+0x80000000,0x00008000,0x00108020,0x80100000,
+0x00100020,0x80000020,0x00000000,0x00108000,
+0x00008020,0x80108000,0x80100000,0x00008020,
+0x00000000,0x00108020,0x80100020,0x00100000,
+0x80008020,0x80100000,0x80108000,0x00008000,
+0x80100000,0x80008000,0x00000020,0x80108020,
+0x00108020,0x00000020,0x00008000,0x80000000,
+0x00008020,0x80108000,0x00100000,0x80000020,
+0x00100020,0x80008020,0x80000020,0x00100020,
+0x00108000,0x00000000,0x80008000,0x00008020,
+0x80000000,0x80100020,0x80108020,0x00108000},
+{
+0x00000208,0x08020200,0x00000000,0x08020008,
+0x08000200,0x00000000,0x00020208,0x08000200,
+0x00020008,0x08000008,0x08000008,0x00020000,
+0x08020208,0x00020008,0x08020000,0x00000208,
+0x08000000,0x00000008,0x08020200,0x00000200,
+0x00020200,0x08020000,0x08020008,0x00020208,
+0x08000208,0x00020200,0x00020000,0x08000208,
+0x00000008,0x08020208,0x00000200,0x08000000,
+0x08020200,0x08000000,0x00020008,0x00000208,
+0x00020000,0x08020200,0x08000200,0x00000000,
+0x00000200,0x00020008,0x08020208,0x08000200,
+0x08000008,0x00000200,0x00000000,0x08020008,
+0x08000208,0x00020000,0x08000000,0x08020208,
+0x00000008,0x00020208,0x00020200,0x08000008,
+0x08020000,0x08000208,0x00000208,0x08020000,
+0x00020208,0x00000008,0x08020008,0x00020200},
+{
+0x00802001,0x00002081,0x00002081,0x00000080,
+0x00802080,0x00800081,0x00800001,0x00002001,
+0x00000000,0x00802000,0x00802000,0x00802081,
+0x00000081,0x00000000,0x00800080,0x00800001,
+0x00000001,0x00002000,0x00800000,0x00802001,
+0x00000080,0x00800000,0x00002001,0x00002080,
+0x00800081,0x00000001,0x00002080,0x00800080,
+0x00002000,0x00802080,0x00802081,0x00000081,
+0x00800080,0x00800001,0x00802000,0x00802081,
+0x00000081,0x00000000,0x00000000,0x00802000,
+0x00002080,0x00800080,0x00800081,0x00000001,
+0x00802001,0x00002081,0x00002081,0x00000080,
+0x00802081,0x00000081,0x00000001,0x00002000,
+0x00800001,0x00002001,0x00802080,0x00800081,
+0x00002001,0x00002080,0x00800000,0x00802001,
+0x00000080,0x00800000,0x00002000,0x00802080},
+{
+0x00000100,0x02080100,0x02080000,0x42000100,
+0x00080000,0x00000100,0x40000000,0x02080000,
+0x40080100,0x00080000,0x02000100,0x40080100,
+0x42000100,0x42080000,0x00080100,0x40000000,
+0x02000000,0x40080000,0x40080000,0x00000000,
+0x40000100,0x42080100,0x42080100,0x02000100,
+0x42080000,0x40000100,0x00000000,0x42000000,
+0x02080100,0x02000000,0x42000000,0x00080100,
+0x00080000,0x42000100,0x00000100,0x02000000,
+0x40000000,0x02080000,0x42000100,0x40080100,
+0x02000100,0x40000000,0x42080000,0x02080100,
+0x40080100,0x00000100,0x02000000,0x42080000,
+0x42080100,0x00080100,0x42000000,0x42080100,
+0x02080000,0x00000000,0x40080000,0x42000000,
+0x00080100,0x02000100,0x40000100,0x00080000,
+0x00000000,0x40080000,0x02080100,0x40000100},
+{
+0x20000010,0x20400000,0x00004000,0x20404010,
+0x20400000,0x00000010,0x20404010,0x00400000,
+0x20004000,0x00404010,0x00400000,0x20000010,
+0x00400010,0x20004000,0x20000000,0x00004010,
+0x00000000,0x00400010,0x20004010,0x00004000,
+0x00404000,0x20004010,0x00000010,0x20400010,
+0x20400010,0x00000000,0x00404010,0x20404000,
+0x00004010,0x00404000,0x20404000,0x20000000,
+0x20004000,0x00000010,0x20400010,0x00404000,
+0x20404010,0x00400000,0x00004010,0x20000010,
+0x00400000,0x20004000,0x20000000,0x00004010,
+0x20000010,0x20404010,0x00404000,0x20400000,
+0x00404010,0x20404000,0x00000000,0x20400010,
+0x00000010,0x00004000,0x20400000,0x00404010,
+0x00004000,0x00400010,0x20004010,0x00000000,
+0x20404000,0x20000000,0x00400010,0x20004010},
+{
+0x00200000,0x04200002,0x04000802,0x00000000,
+0x00000800,0x04000802,0x00200802,0x04200800,
+0x04200802,0x00200000,0x00000000,0x04000002,
+0x00000002,0x04000000,0x04200002,0x00000802,
+0x04000800,0x00200802,0x00200002,0x04000800,
+0x04000002,0x04200000,0x04200800,0x00200002,
+0x04200000,0x00000800,0x00000802,0x04200802,
+0x00200800,0x00000002,0x04000000,0x00200800,
+0x04000000,0x00200800,0x00200000,0x04000802,
+0x04000802,0x04200002,0x04200002,0x00000002,
+0x00200002,0x04000000,0x04000800,0x00200000,
+0x04200800,0x00000802,0x00200802,0x04200800,
+0x00000802,0x04000002,0x04200802,0x04200000,
+0x00200800,0x00000000,0x00000002,0x04200802,
+0x00000000,0x00200802,0x04200000,0x00000800,
+0x04000002,0x04000800,0x00000800,0x00200002},
+{
+0x10001040,0x00001000,0x00040000,0x10041040,
+0x10000000,0x10001040,0x00000040,0x10000000,
+0x00040040,0x10040000,0x10041040,0x00041000,
+0x10041000,0x00041040,0x00001000,0x00000040,
+0x10040000,0x10000040,0x10001000,0x00001040,
+0x00041000,0x00040040,0x10040040,0x10041000,
+0x00001040,0x00000000,0x00000000,0x10040040,
+0x10000040,0x10001000,0x00041040,0x00040000,
+0x00041040,0x00040000,0x10041000,0x00001000,
+0x00000040,0x10040040,0x00001000,0x00041040,
+0x10001000,0x00000040,0x10000040,0x10040000,
+0x10040040,0x10000000,0x00040000,0x10001040,
+0x00000000,0x10041040,0x00040040,0x10000040,
+0x10040000,0x10001000,0x10001040,0x00000000,
+0x10041040,0x00041000,0x00041000,0x00001040,
+0x00001040,0x00040040,0x10000000,0x10041000}
+};
+
+
+void BasicDES::SetKey(const byte* key, word32 /*length*/, CipherDir dir)
+{
+    byte buffer[56+56+8];
+    byte *const pc1m = buffer;                 /* place to modify pc1 into */
+    byte *const pcr = pc1m + 56;               /* place to rotate pc1 into */
+    byte *const ks = pcr + 56;
+    int i,j,l;
+    int m;
+
+    for (j = 0; j < 56; j++) {          /* convert pc1 to bits of key */
+        l = pc1[j] - 1;                 /* integer bit location  */
+        m = l & 07;                     /* find bit              */
+        pc1m[j] = (key[l >> 3] &        /* find which key byte l is in */
+            bytebit[m])                 /* and which bit of that byte */
+            ? 1 : 0;                    /* and store 1-bit result */
+    }
+    for (i = 0; i < 16; i++) {          /* key chunk for each iteration */
+        memset(ks, 0, 8);               /* Clear key schedule */
+        for (j = 0; j < 56; j++)        /* rotate pc1 the right amount */
+            pcr[j] = pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l: l-28];
+        /* rotate left and right halves independently */
+        for (j = 0; j < 48; j++){   /* select bits individually */
+            /* check bit that goes to ks[j] */
+            if (pcr[pc2[j] - 1]){
+                /* mask it in if it's there */
+                l= j % 6;
+                ks[j/6] |= bytebit[l] >> 2;
+            }
+        }
+        /* Now convert to odd/even interleaved form for use in F */
+        k_[2*i] = ((word32)ks[0] << 24)
+            | ((word32)ks[2] << 16)
+            | ((word32)ks[4] << 8)
+            | ((word32)ks[6]);
+        k_[2*i + 1] = ((word32)ks[1] << 24)
+            | ((word32)ks[3] << 16)
+            | ((word32)ks[5] << 8)
+            | ((word32)ks[7]);
+    }
+    
+    // reverse key schedule order
+    if (dir == DECRYPTION)
+        for (i = 0; i < 16; i += 2) {
+            STL::swap(k_[i],   k_[32 - 2 - i]);
+            STL::swap(k_[i+1], k_[32 - 1 - i]);
+        }
+   
+}
+
+static inline void IPERM(word32& left, word32& right)
+{
+    word32 work;
+
+    right = rotlFixed(right, 4U);
+    work = (left ^ right) & 0xf0f0f0f0;
+    left ^= work;
+
+    right = rotrFixed(right^work, 20U);
+    work = (left ^ right) & 0xffff0000;
+    left ^= work;
+
+    right = rotrFixed(right^work, 18U);
+    work = (left ^ right) & 0x33333333;
+    left ^= work;
+
+    right = rotrFixed(right^work, 6U);
+    work = (left ^ right) & 0x00ff00ff;
+    left ^= work;
+
+    right = rotlFixed(right^work, 9U);
+    work = (left ^ right) & 0xaaaaaaaa;
+    left = rotlFixed(left^work, 1U);
+    right ^= work;
+}
+
+static inline void FPERM(word32& left, word32& right)
+{
+    word32 work;
+
+    right = rotrFixed(right, 1U);
+    work = (left ^ right) & 0xaaaaaaaa;
+    right ^= work;
+    left = rotrFixed(left^work, 9U);
+    work = (left ^ right) & 0x00ff00ff;
+    right ^= work;
+    left = rotlFixed(left^work, 6U);
+    work = (left ^ right) & 0x33333333;
+    right ^= work;
+    left = rotlFixed(left^work, 18U);
+    work = (left ^ right) & 0xffff0000;
+    right ^= work;
+    left = rotlFixed(left^work, 20U);
+    work = (left ^ right) & 0xf0f0f0f0;
+    right ^= work;
+    left = rotrFixed(left^work, 4U);
+}
+
+
+void BasicDES::RawProcessBlock(word32& lIn, word32& rIn) const
+{
+    word32 l = lIn, r = rIn;
+    const word32* kptr = k_;
+
+    for (unsigned i=0; i<8; i++)
+    {
+        word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0];
+        l ^= Spbox[6][(work) & 0x3f]
+          ^  Spbox[4][(work >> 8) & 0x3f]
+          ^  Spbox[2][(work >> 16) & 0x3f]
+          ^  Spbox[0][(work >> 24) & 0x3f];
+        work = r ^ kptr[4*i+1];
+        l ^= Spbox[7][(work) & 0x3f]
+          ^  Spbox[5][(work >> 8) & 0x3f]
+          ^  Spbox[3][(work >> 16) & 0x3f]
+          ^  Spbox[1][(work >> 24) & 0x3f];
+
+        work = rotrFixed(l, 4U) ^ kptr[4*i+2];
+        r ^= Spbox[6][(work) & 0x3f]
+          ^  Spbox[4][(work >> 8) & 0x3f]
+          ^  Spbox[2][(work >> 16) & 0x3f]
+          ^  Spbox[0][(work >> 24) & 0x3f];
+        work = l ^ kptr[4*i+3];
+        r ^= Spbox[7][(work) & 0x3f]
+          ^  Spbox[5][(work >> 8) & 0x3f]
+          ^  Spbox[3][(work >> 16) & 0x3f]
+          ^  Spbox[1][(work >> 24) & 0x3f];
+    }
+
+    lIn = l; rIn = r;
+}
+
+
+
+typedef BlockGetAndPut<word32, BigEndian> Block;
+
+
+void DES::ProcessAndXorBlock(const byte* in, const byte* xOr, byte* out) const
+{
+    word32 l,r;
+    Block::Get(in)(l)(r);
+    IPERM(l,r);
+
+    RawProcessBlock(l, r);
+
+    FPERM(l,r);
+    Block::Put(xOr, out)(r)(l);
+}
+
+
+void DES_EDE2::SetKey(const byte* key, word32 sz, CipherDir dir)
+{
+    des1_.SetKey(key, sz, dir);
+    des2_.SetKey(key + 8, sz, ReverseDir(dir));
+}
+
+
+void DES_EDE2::ProcessAndXorBlock(const byte* in, const byte* xOr,
+                                  byte* out) const
+{
+    word32 l,r;
+    Block::Get(in)(l)(r);
+    IPERM(l,r);
+
+    des1_.RawProcessBlock(l, r);
+    des2_.RawProcessBlock(r, l);
+    des1_.RawProcessBlock(l, r);
+
+    FPERM(l,r);
+    Block::Put(xOr, out)(r)(l);
+}
+
+
+void DES_EDE3::SetKey(const byte* key, word32 sz, CipherDir dir)
+{
+    des1_.SetKey(key+(dir==ENCRYPTION?0:2*8), sz, dir);
+    des2_.SetKey(key+8, sz, ReverseDir(dir));
+    des3_.SetKey(key+(dir==DECRYPTION?0:2*8), sz, dir);
+}
+
+
+
+#if defined(DO_DES_ASM)
+
+// ia32 optimized version
+void DES_EDE3::Process(byte* out, const byte* in, word32 sz)
+{
+    if (!isMMX) {
+        Mode_BASE::Process(out, in, sz);
+        return;
+    }
+
+    word32 blocks = sz / DES_BLOCK_SIZE;
+
+    if (mode_ == CBC)    
+        if (dir_ == ENCRYPTION)
+            while (blocks--) {
+                r_[0] ^= *(word32*)in;
+                r_[1] ^= *(word32*)(in + 4);
+
+                AsmProcess((byte*)r_, (byte*)r_, (void*)Spbox);
+                
+                memcpy(out, r_, DES_BLOCK_SIZE);
+
+                in  += DES_BLOCK_SIZE;
+                out += DES_BLOCK_SIZE;
+            }
+        else
+            while (blocks--) {
+                AsmProcess(in, out, (void*)Spbox);
+               
+                *(word32*)out       ^= r_[0];
+                *(word32*)(out + 4) ^= r_[1];
+
+                memcpy(r_, in, DES_BLOCK_SIZE);
+
+                out += DES_BLOCK_SIZE;
+                in  += DES_BLOCK_SIZE;
+            }
+    else
+        while (blocks--) {
+            AsmProcess(in, out, (void*)Spbox);
+           
+            out += DES_BLOCK_SIZE;
+            in  += DES_BLOCK_SIZE;
+        }
+}
+
+#endif // DO_DES_ASM
+
+
+void DES_EDE3::ProcessAndXorBlock(const byte* in, const byte* xOr,
+                                  byte* out) const
+{
+    word32 l,r;
+    Block::Get(in)(l)(r);
+    IPERM(l,r);
+
+    des1_.RawProcessBlock(l, r);
+    des2_.RawProcessBlock(r, l);
+    des3_.RawProcessBlock(l, r);
+
+    FPERM(l,r);
+    Block::Put(xOr, out)(r)(l);
+}
+
+
+#if defined(DO_DES_ASM)
+
+/* Uses IPERM algorithm from above
+
+   left  is in eax
+   right is in ebx
+
+   uses ecx
+*/
+#define AsmIPERM() \
+    AS2(    rol   ebx, 4                        )   \
+    AS2(    mov   ecx, eax                      )   \
+    AS2(    xor   ecx, ebx                      )   \
+    AS2(    and   ecx, 0xf0f0f0f0               )   \
+    AS2(    xor   ebx, ecx                      )   \
+    AS2(    xor   eax, ecx                      )   \
+    AS2(    ror   ebx, 20                       )   \
+    AS2(    mov   ecx, eax                      )   \
+    AS2(    xor   ecx, ebx                      )   \
+    AS2(    and   ecx, 0xffff0000               )   \
+    AS2(    xor   ebx, ecx                      )   \
+    AS2(    xor   eax, ecx                      )   \
+    AS2(    ror   ebx, 18                       )   \
+    AS2(    mov   ecx, eax                      )   \
+    AS2(    xor   ecx, ebx                      )   \
+    AS2(    and   ecx, 0x33333333               )   \
+    AS2(    xor   ebx, ecx                      )   \
+    AS2(    xor   eax, ecx                      )   \
+    AS2(    ror   ebx, 6                        )   \
+    AS2(    mov   ecx, eax                      )   \
+    AS2(    xor   ecx, ebx                      )   \
+    AS2(    and   ecx, 0x00ff00ff               )   \
+    AS2(    xor   ebx, ecx                      )   \
+    AS2(    xor   eax, ecx                      )   \
+    AS2(    rol   ebx, 9                        )   \
+    AS2(    mov   ecx, eax                      )   \
+    AS2(    xor   ecx, ebx                      )   \
+    AS2(    and   ecx, 0xaaaaaaaa               )   \
+    AS2(    xor   eax, ecx                      )   \
+    AS2(    rol   eax, 1                        )   \
+    AS2(    xor   ebx, ecx                      )
+
+
+/* Uses FPERM algorithm from above
+
+   left  is in eax
+   right is in ebx
+
+   uses ecx
+*/
+#define AsmFPERM()    \
+    AS2(    ror  ebx, 1                     )    \
+    AS2(    mov  ecx, eax                   )    \
+    AS2(    xor  ecx, ebx                   )    \
+    AS2(    and  ecx, 0xaaaaaaaa            )    \
+    AS2(    xor  eax, ecx                   )    \
+    AS2(    xor  ebx, ecx                   )    \
+    AS2(    ror  eax, 9                     )    \
+    AS2(    mov  ecx, ebx                   )    \
+    AS2(    xor  ecx, eax                   )    \
+    AS2(    and  ecx, 0x00ff00ff            )    \
+    AS2(    xor  eax, ecx                   )    \
+    AS2(    xor  ebx, ecx                   )    \
+    AS2(    rol  eax, 6                     )    \
+    AS2(    mov  ecx, ebx                   )    \
+    AS2(    xor  ecx, eax                   )    \
+    AS2(    and  ecx, 0x33333333            )    \
+    AS2(    xor  eax, ecx                   )    \
+    AS2(    xor  ebx, ecx                   )    \
+    AS2(    rol  eax, 18                    )    \
+    AS2(    mov  ecx, ebx                   )    \
+    AS2(    xor  ecx, eax                   )    \
+    AS2(    and  ecx, 0xffff0000            )    \
+    AS2(    xor  eax, ecx                   )    \
+    AS2(    xor  ebx, ecx                   )    \
+    AS2(    rol  eax, 20                    )    \
+    AS2(    mov  ecx, ebx                   )    \
+    AS2(    xor  ecx, eax                   )    \
+    AS2(    and  ecx, 0xf0f0f0f0            )    \
+    AS2(    xor  eax, ecx                   )    \
+    AS2(    xor  ebx, ecx                   )    \
+    AS2(    ror  eax, 4                     )
+
+
+
+
+/* DesRound implements this algorithm:
+
+        word32 work = rotrFixed(r, 4U) ^ key[0];
+        l ^= Spbox[6][(work) & 0x3f]
+          ^  Spbox[4][(work >> 8) & 0x3f]
+          ^  Spbox[2][(work >> 16) & 0x3f]
+          ^  Spbox[0][(work >> 24) & 0x3f];
+        work = r ^ key[1];
+        l ^= Spbox[7][(work) & 0x3f]
+          ^  Spbox[5][(work >> 8) & 0x3f]
+          ^  Spbox[3][(work >> 16) & 0x3f]
+          ^  Spbox[1][(work >> 24) & 0x3f];
+
+        work = rotrFixed(l, 4U) ^ key[2];
+        r ^= Spbox[6][(work) & 0x3f]
+          ^  Spbox[4][(work >> 8) & 0x3f]
+          ^  Spbox[2][(work >> 16) & 0x3f]
+          ^  Spbox[0][(work >> 24) & 0x3f];
+        work = l ^ key[3];
+        r ^= Spbox[7][(work) & 0x3f]
+          ^  Spbox[5][(work >> 8) & 0x3f]
+          ^  Spbox[3][(work >> 16) & 0x3f]
+          ^  Spbox[1][(work >> 24) & 0x3f];
+
+   left  is in aex
+   right is in ebx
+   key   is in edx
+
+   edvances key for next round
+
+   uses ecx, esi, and edi
+*/
+#define DesRound() \
+    AS2(    mov   ecx,  ebx                     )\
+    AS2(    mov   esi,  DWORD PTR [edx]         )\
+    AS2(    ror   ecx,  4                       )\
+    AS2(    xor   ecx,  esi                     )\
+    AS2(    and   ecx,  0x3f3f3f3f              )\
+    AS2(    movzx esi,  cl                      )\
+    AS2(    movzx edi,  ch                      )\
+    AS2(    xor   eax,  [ebp + esi*4 + 6*256]   )\
+    AS2(    shr   ecx,  16                      )\
+    AS2(    xor   eax,  [ebp + edi*4 + 4*256]   )\
+    AS2(    movzx esi,  cl                      )\
+    AS2(    movzx edi,  ch                      )\
+    AS2(    xor   eax,  [ebp + esi*4 + 2*256]   )\
+    AS2(    mov   esi,  DWORD PTR [edx + 4]     )\
+    AS2(    xor   eax,  [ebp + edi*4]           )\
+    AS2(    mov   ecx,  ebx                     )\
+    AS2(    xor   ecx,  esi                     )\
+    AS2(    and   ecx,  0x3f3f3f3f              )\
+    AS2(    movzx esi,  cl                      )\
+    AS2(    movzx edi,  ch                      )\
+    AS2(    xor   eax,  [ebp + esi*4 + 7*256]   )\
+    AS2(    shr   ecx,  16                      )\
+    AS2(    xor   eax,  [ebp + edi*4 + 5*256]   )\
+    AS2(    movzx esi,  cl                      )\
+    AS2(    movzx edi,  ch                      )\
+    AS2(    xor   eax,  [ebp + esi*4 + 3*256]   )\
+    AS2(    mov   esi,  DWORD PTR [edx + 8]     )\
+    AS2(    xor   eax,  [ebp + edi*4 + 1*256]   )\
+    AS2(    mov   ecx,  eax                     )\
+    AS2(    ror   ecx,  4                       )\
+    AS2(    xor   ecx,  esi                     )\
+    AS2(    and   ecx,  0x3f3f3f3f              )\
+    AS2(    movzx esi,  cl                      )\
+    AS2(    movzx edi,  ch                      )\
+    AS2(    xor   ebx,  [ebp + esi*4 + 6*256]   )\
+    AS2(    shr   ecx,  16                      )\
+    AS2(    xor   ebx,  [ebp + edi*4 + 4*256]   )\
+    AS2(    movzx esi,  cl                      )\
+    AS2(    movzx edi,  ch                      )\
+    AS2(    xor   ebx,  [ebp + esi*4 + 2*256]   )\
+    AS2(    mov   esi,  DWORD PTR [edx + 12]    )\
+    AS2(    xor   ebx,  [ebp + edi*4]           )\
+    AS2(    mov   ecx,  eax                     )\
+    AS2(    xor   ecx,  esi                     )\
+    AS2(    and   ecx,  0x3f3f3f3f              )\
+    AS2(    movzx esi,  cl                      )\
+    AS2(    movzx edi,  ch                      )\
+    AS2(    xor   ebx,  [ebp + esi*4 + 7*256]   )\
+    AS2(    shr   ecx,  16                      )\
+    AS2(    xor   ebx,  [ebp + edi*4 + 5*256]   )\
+    AS2(    movzx esi,  cl                      )\
+    AS2(    movzx edi,  ch                      )\
+    AS2(    xor   ebx,  [ebp + esi*4 + 3*256]   )\
+    AS2(    add   edx,  16                      )\
+    AS2(    xor   ebx,  [ebp + edi*4 + 1*256]   )
+
+
+#ifdef _MSC_VER
+    __declspec(naked) 
+#else
+    __attribute__ ((noinline))
+#endif
+void DES_EDE3::AsmProcess(const byte* in, byte* out, void* box) const
+{
+#ifdef __GNUC__
+    #define AS1(x)    #x ";"
+    #define AS2(x, y) #x ", " #y ";"
+
+    #define PROLOG()  \
+    __asm__ __volatile__ \
+    ( \
+        ".intel_syntax noprefix;" \
+        "push ebx;" \
+        "push ebp;" \
+        "movd mm6, ebp;" \
+        "movd mm7, ecx;" \
+        "mov  ebp, eax;"
+    #define EPILOG()  \
+        "pop ebp;" \
+        "pop ebx;" \
+       	"emms;" \
+       	".att_syntax;" \
+            :  \
+            : "d" (this), "S" (in), "a" (box), "c" (out) \
+            : "%edi", "memory", "cc" \
+    );
+
+#else
+    #define AS1(x)      __asm x
+    #define AS2(x, y)   __asm x, y
+
+    #define PROLOG()  \
+        AS1(    push  ebp                           )   \
+        AS2(    mov   ebp, esp                      )   \
+        AS2(    movd  mm3, edi                      )   \
+        AS2(    movd  mm4, ebx                      )   \
+        AS2(    movd  mm5, esi                      )   \
+        AS2(    movd  mm6, ebp                      )   \
+        AS2(    mov   esi, DWORD PTR [ebp +  8]     )   \
+        AS2(    mov   edx, ecx                      )   \
+        AS2(    mov   ebp, DWORD PTR [ebp + 16]     )
+
+    // ebp restored at end
+    #define EPILOG() \
+        AS2(    movd  edi, mm3                      )   \
+        AS2(    movd  ebx, mm4                      )   \
+        AS2(    movd  esi, mm5                      )   \
+        AS2(    mov   esp, ebp                      )   \
+        AS1(    pop   ebp                           )   \
+        AS1(    emms                                )   \
+        AS1(    ret 12                              )
+
+#endif
+
+
+    PROLOG()
+
+    AS2(    movd  mm2, edx                      )
+
+    #ifdef OLD_GCC_OFFSET
+        AS2(    add   edx, 60                       )   // des1 = des1 key
+    #else
+        AS2(    add   edx, 56                       )   // des1 = des1 key
+    #endif
+
+    AS2(    mov   eax, DWORD PTR [esi]          )
+    AS2(    mov   ebx, DWORD PTR [esi + 4]      )
+    AS1(    bswap eax                           )    // left
+    AS1(    bswap ebx                           )    // right
+
+    AsmIPERM()
+
+    DesRound() // 1
+    DesRound() // 2
+    DesRound() // 3
+    DesRound() // 4
+    DesRound() // 5
+    DesRound() // 6
+    DesRound() // 7
+    DesRound() // 8
+
+    // swap left and right 
+    AS2(    xchg  eax, ebx                      )
+
+    DesRound() // 1
+    DesRound() // 2
+    DesRound() // 3
+    DesRound() // 4
+    DesRound() // 5
+    DesRound() // 6
+    DesRound() // 7
+    DesRound() // 8
+
+    // swap left and right
+    AS2(    xchg  eax, ebx                      )
+
+    DesRound() // 1
+    DesRound() // 2
+    DesRound() // 3
+    DesRound() // 4
+    DesRound() // 5
+    DesRound() // 6
+    DesRound() // 7
+    DesRound() // 8
+
+    AsmFPERM()
+
+    //end
+    AS2(    movd  ebp, mm6                      )
+
+    // swap and write out
+    AS1(    bswap ebx                           )
+    AS1(    bswap eax                           )
+
+#ifdef __GNUC__
+    AS2(    movd  esi, mm7   )   // outBlock
+#else
+    AS2(    mov   esi, DWORD PTR [ebp +  12]    )   // outBlock
+#endif
+
+    AS2(    mov   DWORD PTR [esi],     ebx      )   // right first
+    AS2(    mov   DWORD PTR [esi + 4], eax      )
+    
+
+    EPILOG()
+}
+
+
+
+#endif // defined(DO_DES_ASM)
+
+
+}  // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/dh.cpp b/mysql/extra/yassl/taocrypt/src/dh.cpp
new file mode 100644
index 0000000..615a8c6
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/dh.cpp
@@ -0,0 +1,103 @@
+/*
+   Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+
+/* dh.cpp implements Diffie-Hellman support
+*/
+
+#include "runtime.hpp"
+#include "dh.hpp"
+#include "asn.hpp"
+#include <math.h>
+
+namespace TaoCrypt {
+
+
+namespace {  // locals
+
+unsigned int DiscreteLogWorkFactor(unsigned int n)
+{
+    // assuming discrete log takes about the same time as factoring
+    if (n<5)
+        return 0;
+    else
+        return (unsigned int)(2.4 * pow((double)n, 1.0/3.0) *
+                pow(log(double(n)), 2.0/3.0) - 5);
+}
+
+} // namespace locals
+
+
+// Generate a DH Key Pair
+void DH::GenerateKeyPair(RandomNumberGenerator& rng, byte* priv, byte* pub)
+{
+    GeneratePrivate(rng, priv);
+    GeneratePublic(priv, pub);
+}
+
+
+// Generate private value
+void DH::GeneratePrivate(RandomNumberGenerator& rng, byte* priv)
+{
+    Integer x(rng, Integer::One(), min(p_ - 1,
+        Integer::Power2(2*DiscreteLogWorkFactor(p_.BitCount())) ) );
+    x.Encode(priv, p_.ByteCount());
+}
+
+
+// Generate public value
+void DH::GeneratePublic(const byte* priv, byte* pub)
+{
+    const word32 bc(p_.ByteCount());
+    Integer x(priv, bc);
+    Integer y(a_exp_b_mod_c(g_, x, p_));
+    y.Encode(pub, bc);
+}
+
+
+// Generate Agreement
+void DH::Agree(byte* agree, const byte* priv, const byte* otherPub, word32
+               otherSz)
+{
+    const word32 bc(p_.ByteCount());
+    Integer x(priv, bc);
+    Integer y;
+    if (otherSz)
+        y.Decode(otherPub, otherSz);
+    else
+        y.Decode(otherPub, bc);
+
+    Integer z(a_exp_b_mod_c(y, x, p_));
+    z.Encode(agree, bc);
+}
+
+
+DH::DH(Source& source)
+{
+    Initialize(source);
+}
+
+
+void DH::Initialize(Source& source)
+{
+    DH_Decoder decoder(source);
+    decoder.Decode(*this);
+}
+
+
+} // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/dsa.cpp b/mysql/extra/yassl/taocrypt/src/dsa.cpp
new file mode 100644
index 0000000..b19fed9
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/dsa.cpp
@@ -0,0 +1,274 @@
+/*
+   Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+
+#include "runtime.hpp"
+#include "dsa.hpp"
+#include "sha.hpp"
+#include "asn.hpp"
+#include "modarith.hpp"
+
+
+namespace TaoCrypt {
+
+
+void DSA_PublicKey::Swap(DSA_PublicKey& other)
+{
+    p_.Swap(other.p_);
+    q_.Swap(other.q_);
+    g_.Swap(other.g_);
+    y_.Swap(other.y_);
+}
+
+
+DSA_PublicKey::DSA_PublicKey(const DSA_PublicKey& other)
+    : p_(other.p_), q_(other.q_), g_(other.g_), y_(other.y_)
+{}
+
+
+DSA_PublicKey& DSA_PublicKey::operator=(const DSA_PublicKey& that)
+{
+    DSA_PublicKey tmp(that);
+    Swap(tmp);
+    return *this;
+}
+
+
+DSA_PublicKey::DSA_PublicKey(Source& source)
+{
+    Initialize(source);
+}
+
+
+void DSA_PublicKey::Initialize(Source& source)
+{
+    DSA_Public_Decoder decoder(source);
+    decoder.Decode(*this);
+}
+
+
+void DSA_PublicKey::Initialize(const Integer& p, const Integer& q,
+                               const Integer& g, const Integer& y)
+{
+    p_ = p;
+    q_ = q;
+    g_ = g;
+    y_ = y;
+}
+   
+
+const Integer& DSA_PublicKey::GetModulus() const
+{
+    return p_;
+}
+
+const Integer& DSA_PublicKey::GetSubGroupOrder() const
+{
+    return q_;
+}
+
+
+const Integer& DSA_PublicKey::GetSubGroupGenerator() const
+{
+    return g_;
+}
+
+
+const Integer& DSA_PublicKey::GetPublicPart() const
+{
+    return y_;
+}
+
+
+void DSA_PublicKey::SetModulus(const Integer& p)
+{
+    p_ = p;
+}
+
+
+void DSA_PublicKey::SetSubGroupOrder(const Integer& q)
+{
+    q_ = q;
+}
+
+
+void DSA_PublicKey::SetSubGroupGenerator(const Integer& g)
+{
+    g_ = g;
+}
+
+
+void DSA_PublicKey::SetPublicPart(const Integer& y)
+{
+    y_ = y;
+}
+
+
+word32 DSA_PublicKey::SignatureLength() const
+{
+    return GetSubGroupOrder().ByteCount() * 2;  // r and s
+}
+
+
+
+DSA_PrivateKey::DSA_PrivateKey(Source& source)
+{
+    Initialize(source);
+}
+
+
+void DSA_PrivateKey::Initialize(Source& source)
+{
+    DSA_Private_Decoder decoder(source);
+    decoder.Decode(*this);
+}
+
+
+void DSA_PrivateKey::Initialize(const Integer& p, const Integer& q,
+                                const Integer& g, const Integer& y,
+                                const Integer& x)
+{
+    DSA_PublicKey::Initialize(p, q, g, y);
+    x_ = x;
+}
+
+
+const Integer& DSA_PrivateKey::GetPrivatePart() const
+{
+    return x_;
+}
+
+
+void DSA_PrivateKey::SetPrivatePart(const Integer& x)
+{
+    x_ = x;
+}
+
+
+DSA_Signer::DSA_Signer(const DSA_PrivateKey& key)
+    : key_(key)
+{}
+
+
+word32 DSA_Signer::Sign(const byte* sha_digest, byte* sig,
+                        RandomNumberGenerator& rng)
+{
+    const Integer& p = key_.GetModulus();
+    const Integer& q = key_.GetSubGroupOrder();
+    const Integer& g = key_.GetSubGroupGenerator();
+    const Integer& x = key_.GetPrivatePart();
+    byte* tmpPtr = sig;  // initial signature output
+
+    Integer k(rng, 1, q - 1);
+
+    r_ =  a_exp_b_mod_c(g, k, p);
+    r_ %= q;
+
+    Integer H(sha_digest, SHA::DIGEST_SIZE);  // sha Hash(m)
+
+    Integer kInv = k.InverseMod(q);
+    s_ = (kInv * (H + x*r_)) % q;
+
+    if (!(!!r_ && !!s_))
+        return -1;
+
+    int rSz = r_.ByteCount();
+    int tmpSz = rSz;
+
+    while (tmpSz++ < SHA::DIGEST_SIZE) {
+        *sig++ = 0;
+    }
+    
+    r_.Encode(sig,  rSz);
+
+    sig = tmpPtr + SHA::DIGEST_SIZE;  // advance sig output to s
+    int sSz = s_.ByteCount();
+    tmpSz = sSz;
+
+    while (tmpSz++ < SHA::DIGEST_SIZE) {
+        *sig++ = 0;
+    }
+
+    s_.Encode(sig, sSz);
+
+    return 40;
+}
+
+
+DSA_Verifier::DSA_Verifier(const DSA_PublicKey& key)
+    : key_(key)
+{}
+
+
+bool DSA_Verifier::Verify(const byte* sha_digest, const byte* sig)
+{
+    const Integer& p = key_.GetModulus();
+    const Integer& q = key_.GetSubGroupOrder();
+    const Integer& g = key_.GetSubGroupGenerator();
+    const Integer& y = key_.GetPublicPart();
+
+    int sz = q.ByteCount();
+
+    r_.Decode(sig, sz);
+    s_.Decode(sig + sz, sz);
+
+    if (r_ >= q || r_ < 1 || s_ >= q || s_ < 1)
+        return false;
+
+    Integer H(sha_digest, SHA::DIGEST_SIZE);  // sha Hash(m)
+
+    Integer w = s_.InverseMod(q);
+    Integer u1 = (H  * w) % q;
+    Integer u2 = (r_ * w) % q;
+
+    // verify r == ((g^u1 * y^u2) mod p) mod q
+    ModularArithmetic ma(p);
+    Integer v = ma.CascadeExponentiate(g, u1, y, u2);
+    v %= q;
+
+    return r_ == v;
+}
+
+
+
+
+const Integer& DSA_Signer::GetR() const
+{
+    return r_;
+}
+
+
+const Integer& DSA_Signer::GetS() const
+{
+    return s_;
+}
+
+
+const Integer& DSA_Verifier::GetR() const
+{
+    return r_;
+}
+
+
+const Integer& DSA_Verifier::GetS() const
+{
+    return s_;
+}
+
+
+} // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/file.cpp b/mysql/extra/yassl/taocrypt/src/file.cpp
new file mode 100644
index 0000000..7c2044b
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/file.cpp
@@ -0,0 +1,115 @@
+/*
+   Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+/* file.cpp implements File Sources and Sinks
+*/
+
+#include "runtime.hpp"
+#include "file.hpp"
+
+
+namespace TaoCrypt {
+
+
+FileSource::FileSource(const char* fname, Source& source)
+{
+    file_ = fopen(fname, "rb");
+    if (file_) get(source);
+}
+
+
+FileSource::~FileSource()
+{
+    if (file_)
+        fclose(file_);
+}
+
+
+
+// return size of source from beginning or current position
+word32 FileSource::size(bool use_current)
+{
+    long current = ftell(file_);
+    long begin   = current;
+
+    if (!use_current) {
+        fseek(file_, 0, SEEK_SET);
+        begin = ftell(file_);
+    }
+
+    fseek(file_, 0, SEEK_END);
+    long end = ftell(file_);
+
+    fseek(file_, current, SEEK_SET);
+
+    return end - begin;
+}
+
+
+word32 FileSource::size_left()
+{
+    return size(true);
+}
+
+
+// fill file source from source
+word32 FileSource::get(Source& source)
+{
+    word32 sz(size());
+    if (source.size() < sz)
+        source.grow(sz);
+
+    size_t bytes = fread(source.buffer_.get_buffer(), 1, sz, file_);
+
+    if (bytes == 1)
+        return sz;
+    else
+        return 0;
+}
+
+
+FileSink::FileSink(const char* fname, Source& source)
+{
+    file_ = fopen(fname, "wb");
+    if (file_) put(source);
+}
+
+
+FileSink::~FileSink()
+{
+    if (file_)
+        fclose(file_);
+}
+
+
+// fill source from file sink
+void FileSink::put(Source& source)
+{
+    fwrite(source.get_buffer(), 1, source.size(), file_);
+}
+
+
+// swap with other and reset to beginning
+void Source::reset(ByteBlock& otherBlock)
+{
+    buffer_.Swap(otherBlock);   
+    current_ = 0;
+}
+
+
+}  // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/hash.cpp b/mysql/extra/yassl/taocrypt/src/hash.cpp
new file mode 100644
index 0000000..c176e6a
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/hash.cpp
@@ -0,0 +1,191 @@
+/*
+   Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+/* hash.cpp implements a base for digest types
+*/
+
+#include "runtime.hpp"
+#include <string.h>
+
+#include "hash.hpp"
+
+
+namespace TaoCrypt {
+
+
+HASHwithTransform::HASHwithTransform(word32 digSz, word32 buffSz)
+{
+}
+
+
+void HASHwithTransform::AddLength(word32 len)
+{
+    HashLengthType tmp = loLen_;
+    if ( (loLen_ += len) < tmp)
+        hiLen_++;                       // carry low to high
+    hiLen_ += SafeRightShift<8*sizeof(HashLengthType)>(len);
+}
+
+
+// Update digest with data of size len, do in blocks
+void HASHwithTransform::Update(const byte* data, word32 len)
+{
+    // do block size increments
+    word32 blockSz = getBlockSize();
+    byte*  local   = reinterpret_cast<byte*>(buffer_);
+
+    while (len) {
+        word32 add = min(len, blockSz - buffLen_);
+        memcpy(&local[buffLen_], data, add);
+
+        buffLen_ += add;
+        data     += add;
+        len      -= add;
+
+        if (buffLen_ == blockSz) {
+            ByteReverseIf(local, local, blockSz, getByteOrder());
+            Transform();
+            AddLength(blockSz);
+            buffLen_ = 0;
+        }
+    }
+}
+
+
+// Final process, place digest in hash
+void HASHwithTransform::Final(byte* hash)
+{
+    word32    blockSz  = getBlockSize();
+    word32    digestSz = getDigestSize();
+    word32    padSz    = getPadSize();
+    ByteOrder order    = getByteOrder();
+
+    AddLength(buffLen_);                        // before adding pads
+    HashLengthType preLoLen = GetBitCountLo();
+    HashLengthType preHiLen = GetBitCountHi();
+    byte*     local         = reinterpret_cast<byte*>(buffer_);
+
+    local[buffLen_++] = 0x80;  // add 1
+
+    // pad with zeros
+    if (buffLen_ > padSz) {
+        memset(&local[buffLen_], 0, blockSz - buffLen_);
+        buffLen_ += blockSz - buffLen_;
+
+        ByteReverseIf(local, local, blockSz, order);
+        Transform();
+        buffLen_ = 0;
+    }
+    memset(&local[buffLen_], 0, padSz - buffLen_);
+   
+    ByteReverseIf(local, local, blockSz, order);
+    
+    memcpy(&local[padSz],   order ? &preHiLen : &preLoLen, sizeof(preLoLen));
+    memcpy(&local[padSz+4], order ? &preLoLen : &preHiLen, sizeof(preLoLen));
+
+    Transform();
+    ByteReverseIf(digest_, digest_, digestSz, order);
+    memcpy(hash, digest_, digestSz);
+
+    Init();  // reset state
+}
+
+
+#ifdef WORD64_AVAILABLE
+
+HASH64withTransform::HASH64withTransform(word32 digSz, word32 buffSz)
+{
+}
+
+
+void HASH64withTransform::AddLength(word32 len)
+{
+    HashLengthType tmp = loLen_;
+    if ( (loLen_ += len) < tmp)
+        hiLen_++;                       // carry low to high
+    hiLen_ += SafeRightShift<8*sizeof(HashLengthType)>(len);
+}
+
+
+// Update digest with data of size len, do in blocks
+void HASH64withTransform::Update(const byte* data, word32 len)
+{
+    // do block size increments
+    word32 blockSz = getBlockSize();
+    byte*  local   = reinterpret_cast<byte*>(buffer_);
+
+    while (len) {
+        word32 add = min(len, blockSz - buffLen_);
+        memcpy(&local[buffLen_], data, add);
+
+        buffLen_ += add;
+        data     += add;
+        len      -= add;
+
+        if (buffLen_ == blockSz) {
+            ByteReverseIf(buffer_, buffer_, blockSz, getByteOrder());
+            Transform();
+            AddLength(blockSz);
+            buffLen_ = 0;
+        }
+    }
+}
+
+
+// Final process, place digest in hash
+void HASH64withTransform::Final(byte* hash)
+{
+    word32    blockSz  = getBlockSize();
+    word32    digestSz = getDigestSize();
+    word32    padSz    = getPadSize();
+    ByteOrder order    = getByteOrder();
+
+    AddLength(buffLen_);                        // before adding pads
+    HashLengthType preLoLen = GetBitCountLo();
+    HashLengthType preHiLen = GetBitCountHi();
+    byte*     local         = reinterpret_cast<byte*>(buffer_);
+
+    local[buffLen_++] = 0x80;  // add 1
+
+    // pad with zeros
+    if (buffLen_ > padSz) {
+        memset(&local[buffLen_], 0, blockSz - buffLen_);
+        buffLen_ += blockSz - buffLen_;
+
+        ByteReverseIf(buffer_, buffer_, blockSz, order);
+        Transform();
+        buffLen_ = 0;
+    }
+    memset(&local[buffLen_], 0, padSz - buffLen_);
+   
+    ByteReverseIf(buffer_, buffer_, padSz, order);
+    
+    buffer_[blockSz / sizeof(word64) - 2] = order ? preHiLen : preLoLen;
+    buffer_[blockSz / sizeof(word64) - 1] = order ? preLoLen : preHiLen;
+
+    Transform();
+    ByteReverseIf(digest_, digest_, digestSz, order);
+    memcpy(hash, digest_, digestSz);
+
+    Init();  // reset state
+}
+
+#endif // WORD64_AVAILABLE
+
+
+} // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/hc128.cpp b/mysql/extra/yassl/taocrypt/src/hc128.cpp
new file mode 100644
index 0000000..1d329c8
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/hc128.cpp
@@ -0,0 +1,317 @@
+/*
+   Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+
+#include "runtime.hpp"
+#include "hc128.hpp"
+
+
+
+namespace TaoCrypt {
+
+
+
+
+#ifdef BIG_ENDIAN_ORDER
+    #define LITTLE32(x) ByteReverse((word32)x)
+#else
+    #define LITTLE32(x) (x)
+#endif
+
+
+/*h1 function*/
+#define h1(x, y) {                              \
+     byte a,c;                                  \
+     a = (byte) (x);                            \
+     c = (byte) ((x) >> 16);                    \
+     y = (T_[512+a])+(T_[512+256+c]);           \
+}
+
+/*h2 function*/
+#define h2(x, y) {                              \
+     byte a,c;                                  \
+     a = (byte) (x);                            \
+     c = (byte) ((x) >> 16);                    \
+     y = (T_[a])+(T_[256+c]);                   \
+}
+
+/*one step of HC-128, update P and generate 32 bits keystream*/
+#define step_P(u,v,a,b,c,d,n){                  \
+     word32 tem0,tem1,tem2,tem3;                \
+     h1((X_[(d)]),tem3);                        \
+     tem0 = rotrFixed((T_[(v)]),23);            \
+     tem1 = rotrFixed((X_[(c)]),10);            \
+     tem2 = rotrFixed((X_[(b)]),8);             \
+     (T_[(u)]) += tem2+(tem0 ^ tem1);           \
+     (X_[(a)]) = (T_[(u)]);                     \
+     (n) = tem3 ^ (T_[(u)]) ;                   \
+}       
+
+/*one step of HC-128, update Q and generate 32 bits keystream*/
+#define step_Q(u,v,a,b,c,d,n){                  \
+     word32 tem0,tem1,tem2,tem3;                \
+     h2((Y_[(d)]),tem3);                        \
+     tem0 = rotrFixed((T_[(v)]),(32-23));       \
+     tem1 = rotrFixed((Y_[(c)]),(32-10));       \
+     tem2 = rotrFixed((Y_[(b)]),(32-8));        \
+     (T_[(u)]) += tem2 + (tem0 ^ tem1);         \
+     (Y_[(a)]) = (T_[(u)]);                     \
+     (n) = tem3 ^ (T_[(u)]) ;                   \
+}   
+
+
+/*16 steps of HC-128, generate 512 bits keystream*/
+void HC128::GenerateKeystream(word32* keystream)  
+{
+   word32 cc,dd;
+   cc = counter1024_ & 0x1ff;
+   dd = (cc+16)&0x1ff;
+
+   if (counter1024_ < 512)	
+   {   		
+      counter1024_ = (counter1024_ + 16) & 0x3ff;
+      step_P(cc+0, cc+1, 0, 6, 13,4, keystream[0]);
+      step_P(cc+1, cc+2, 1, 7, 14,5, keystream[1]);
+      step_P(cc+2, cc+3, 2, 8, 15,6, keystream[2]);
+      step_P(cc+3, cc+4, 3, 9, 0, 7, keystream[3]);
+      step_P(cc+4, cc+5, 4, 10,1, 8, keystream[4]);
+      step_P(cc+5, cc+6, 5, 11,2, 9, keystream[5]);
+      step_P(cc+6, cc+7, 6, 12,3, 10,keystream[6]);
+      step_P(cc+7, cc+8, 7, 13,4, 11,keystream[7]);
+      step_P(cc+8, cc+9, 8, 14,5, 12,keystream[8]);
+      step_P(cc+9, cc+10,9, 15,6, 13,keystream[9]);
+      step_P(cc+10,cc+11,10,0, 7, 14,keystream[10]);
+      step_P(cc+11,cc+12,11,1, 8, 15,keystream[11]);
+      step_P(cc+12,cc+13,12,2, 9, 0, keystream[12]);
+      step_P(cc+13,cc+14,13,3, 10,1, keystream[13]);
+      step_P(cc+14,cc+15,14,4, 11,2, keystream[14]);
+      step_P(cc+15,dd+0, 15,5, 12,3, keystream[15]);
+   }
+   else				    
+   {
+	  counter1024_ = (counter1024_ + 16) & 0x3ff;
+      step_Q(512+cc+0, 512+cc+1, 0, 6, 13,4, keystream[0]);
+      step_Q(512+cc+1, 512+cc+2, 1, 7, 14,5, keystream[1]);
+      step_Q(512+cc+2, 512+cc+3, 2, 8, 15,6, keystream[2]);
+      step_Q(512+cc+3, 512+cc+4, 3, 9, 0, 7, keystream[3]);
+      step_Q(512+cc+4, 512+cc+5, 4, 10,1, 8, keystream[4]);
+      step_Q(512+cc+5, 512+cc+6, 5, 11,2, 9, keystream[5]);
+      step_Q(512+cc+6, 512+cc+7, 6, 12,3, 10,keystream[6]);
+      step_Q(512+cc+7, 512+cc+8, 7, 13,4, 11,keystream[7]);
+      step_Q(512+cc+8, 512+cc+9, 8, 14,5, 12,keystream[8]);
+      step_Q(512+cc+9, 512+cc+10,9, 15,6, 13,keystream[9]);
+      step_Q(512+cc+10,512+cc+11,10,0, 7, 14,keystream[10]);
+      step_Q(512+cc+11,512+cc+12,11,1, 8, 15,keystream[11]);
+      step_Q(512+cc+12,512+cc+13,12,2, 9, 0, keystream[12]);
+      step_Q(512+cc+13,512+cc+14,13,3, 10,1, keystream[13]);
+      step_Q(512+cc+14,512+cc+15,14,4, 11,2, keystream[14]);
+      step_Q(512+cc+15,512+dd+0, 15,5, 12,3, keystream[15]);
+   }
+}
+
+
+/* The following defines the initialization functions */
+#define f1(x)  (rotrFixed((x),7)  ^ rotrFixed((x),18) ^ ((x) >> 3))
+#define f2(x)  (rotrFixed((x),17) ^ rotrFixed((x),19) ^ ((x) >> 10))
+
+/*update table P*/
+#define update_P(u,v,a,b,c,d){                      \
+     word32 tem0,tem1,tem2,tem3;                    \
+     tem0 = rotrFixed((T_[(v)]),23);                \
+     tem1 = rotrFixed((X_[(c)]),10);                \
+     tem2 = rotrFixed((X_[(b)]),8);                 \
+     h1((X_[(d)]),tem3);                            \
+     (T_[(u)]) = ((T_[(u)]) + tem2+(tem0^tem1)) ^ tem3;     \
+     (X_[(a)]) = (T_[(u)]);                         \
+}  
+
+/*update table Q*/
+#define update_Q(u,v,a,b,c,d){                      \
+     word32 tem0,tem1,tem2,tem3;                    \
+     tem0 = rotrFixed((T_[(v)]),(32-23));           \
+     tem1 = rotrFixed((Y_[(c)]),(32-10));           \
+     tem2 = rotrFixed((Y_[(b)]),(32-8));            \
+     h2((Y_[(d)]),tem3);                            \
+     (T_[(u)]) = ((T_[(u)]) + tem2+(tem0^tem1)) ^ tem3;     \
+     (Y_[(a)]) = (T_[(u)]);                         \
+}     
+
+/*16 steps of HC-128, without generating keystream, */
+/*but use the outputs to update P and Q*/
+void HC128::SetupUpdate()  /*each time 16 steps*/
+{
+   word32 cc,dd;
+   cc = counter1024_ & 0x1ff;
+   dd = (cc+16)&0x1ff;
+
+   if (counter1024_ < 512)	
+   {   		
+      counter1024_ = (counter1024_ + 16) & 0x3ff;
+      update_P(cc+0, cc+1, 0, 6, 13, 4);
+      update_P(cc+1, cc+2, 1, 7, 14, 5);
+      update_P(cc+2, cc+3, 2, 8, 15, 6);
+      update_P(cc+3, cc+4, 3, 9, 0,  7);
+      update_P(cc+4, cc+5, 4, 10,1,  8);
+      update_P(cc+5, cc+6, 5, 11,2,  9);
+      update_P(cc+6, cc+7, 6, 12,3,  10);
+      update_P(cc+7, cc+8, 7, 13,4,  11);
+      update_P(cc+8, cc+9, 8, 14,5,  12);
+      update_P(cc+9, cc+10,9, 15,6,  13);
+      update_P(cc+10,cc+11,10,0, 7,  14);
+      update_P(cc+11,cc+12,11,1, 8,  15);
+      update_P(cc+12,cc+13,12,2, 9,  0);
+      update_P(cc+13,cc+14,13,3, 10, 1);
+      update_P(cc+14,cc+15,14,4, 11, 2);
+      update_P(cc+15,dd+0, 15,5, 12, 3);   
+   }
+   else				    
+   {
+      counter1024_ = (counter1024_ + 16) & 0x3ff;
+      update_Q(512+cc+0, 512+cc+1, 0, 6, 13, 4);
+      update_Q(512+cc+1, 512+cc+2, 1, 7, 14, 5);
+      update_Q(512+cc+2, 512+cc+3, 2, 8, 15, 6);
+      update_Q(512+cc+3, 512+cc+4, 3, 9, 0,  7);
+      update_Q(512+cc+4, 512+cc+5, 4, 10,1,  8);
+      update_Q(512+cc+5, 512+cc+6, 5, 11,2,  9);
+      update_Q(512+cc+6, 512+cc+7, 6, 12,3,  10);
+      update_Q(512+cc+7, 512+cc+8, 7, 13,4,  11);
+      update_Q(512+cc+8, 512+cc+9, 8, 14,5,  12);
+      update_Q(512+cc+9, 512+cc+10,9, 15,6,  13);
+      update_Q(512+cc+10,512+cc+11,10,0, 7,  14);
+      update_Q(512+cc+11,512+cc+12,11,1, 8,  15);
+      update_Q(512+cc+12,512+cc+13,12,2, 9,  0);
+      update_Q(512+cc+13,512+cc+14,13,3, 10, 1);
+      update_Q(512+cc+14,512+cc+15,14,4, 11, 2);
+      update_Q(512+cc+15,512+dd+0, 15,5, 12, 3); 
+   }       
+}
+
+
+/* for the 128-bit key:  key[0]...key[15]
+*  key[0] is the least significant byte of ctx->key[0] (K_0);
+*  key[3] is the most significant byte of ctx->key[0]  (K_0);
+*  ...
+*  key[12] is the least significant byte of ctx->key[3] (K_3)
+*  key[15] is the most significant byte of ctx->key[3]  (K_3)
+*
+*  for the 128-bit iv:  iv[0]...iv[15]
+*  iv[0] is the least significant byte of ctx->iv[0] (IV_0);
+*  iv[3] is the most significant byte of ctx->iv[0]  (IV_0);
+*  ...
+*  iv[12] is the least significant byte of ctx->iv[3] (IV_3)
+*  iv[15] is the most significant byte of ctx->iv[3]  (IV_3)
+*/
+
+
+
+void HC128::SetIV(const byte* iv)
+{ 
+    word32 i;
+	
+	for (i = 0; i < (128 >> 5); i++)
+        iv_[i] = LITTLE32(((word32*)iv)[i]);
+	
+    for (; i < 8; i++) iv_[i] = iv_[i-4];
+  
+    /* expand the key and IV into the table T */ 
+    /* (expand the key and IV into the table P and Q) */ 
+	
+	for (i = 0; i < 8;  i++)   T_[i] = key_[i];
+	for (i = 8; i < 16; i++)   T_[i] = iv_[i-8];
+
+    for (i = 16; i < (256+16); i++) 
+		T_[i] = f2(T_[i-2]) + T_[i-7] + f1(T_[i-15]) + T_[i-16]+i;
+    
+	for (i = 0; i < 16;  i++)  T_[i] = T_[256+i];
+
+	for (i = 16; i < 1024; i++) 
+		T_[i] = f2(T_[i-2]) + T_[i-7] + f1(T_[i-15]) + T_[i-16]+256+i;
+    
+    /* initialize counter1024, X and Y */
+	counter1024_ = 0;
+	for (i = 0; i < 16; i++) X_[i] = T_[512-16+i];
+    for (i = 0; i < 16; i++) Y_[i] = T_[512+512-16+i];
+    
+    /* run the cipher 1024 steps before generating the output */
+	for (i = 0; i < 64; i++)  SetupUpdate();  
+}
+
+
+void HC128::SetKey(const byte* key, const byte* iv)
+{ 
+  word32 i;  
+
+  /* Key size in bits 128 */ 
+  for (i = 0; i < (128 >> 5); i++)
+      key_[i] = LITTLE32(((word32*)key)[i]);
+ 
+  for ( ; i < 8 ; i++) key_[i] = key_[i-4];
+
+  SetIV(iv);
+}
+
+
+/* The following defines the encryption of data stream */
+void HC128::Process(byte* output, const byte* input, word32 msglen)
+{
+  word32 i, keystream[16];
+
+  for ( ; msglen >= 64; msglen -= 64, input += 64, output += 64)
+  {
+	  GenerateKeystream(keystream);
+
+      /* unroll loop */
+	  ((word32*)output)[0]  = ((word32*)input)[0]  ^ LITTLE32(keystream[0]);
+	  ((word32*)output)[1]  = ((word32*)input)[1]  ^ LITTLE32(keystream[1]);
+	  ((word32*)output)[2]  = ((word32*)input)[2]  ^ LITTLE32(keystream[2]);
+	  ((word32*)output)[3]  = ((word32*)input)[3]  ^ LITTLE32(keystream[3]);
+	  ((word32*)output)[4]  = ((word32*)input)[4]  ^ LITTLE32(keystream[4]);
+	  ((word32*)output)[5]  = ((word32*)input)[5]  ^ LITTLE32(keystream[5]);
+	  ((word32*)output)[6]  = ((word32*)input)[6]  ^ LITTLE32(keystream[6]);
+	  ((word32*)output)[7]  = ((word32*)input)[7]  ^ LITTLE32(keystream[7]);
+	  ((word32*)output)[8]  = ((word32*)input)[8]  ^ LITTLE32(keystream[8]);
+	  ((word32*)output)[9]  = ((word32*)input)[9]  ^ LITTLE32(keystream[9]);
+	  ((word32*)output)[10] = ((word32*)input)[10] ^ LITTLE32(keystream[10]);
+	  ((word32*)output)[11] = ((word32*)input)[11] ^ LITTLE32(keystream[11]);
+	  ((word32*)output)[12] = ((word32*)input)[12] ^ LITTLE32(keystream[12]);
+	  ((word32*)output)[13] = ((word32*)input)[13] ^ LITTLE32(keystream[13]);
+	  ((word32*)output)[14] = ((word32*)input)[14] ^ LITTLE32(keystream[14]);
+	  ((word32*)output)[15] = ((word32*)input)[15] ^ LITTLE32(keystream[15]);
+  }
+
+  if (msglen > 0)
+  {
+      GenerateKeystream(keystream);
+
+#ifdef BIG_ENDIAN_ORDER
+      {
+          word32 wordsLeft = msglen / sizeof(word32);
+          if (msglen % sizeof(word32)) wordsLeft++;
+          
+          ByteReverse(keystream, keystream, wordsLeft * sizeof(word32));
+      }
+#endif
+
+      for (i = 0; i < msglen; i++)
+	      output[i] = input[i] ^ ((byte*)keystream)[i];
+  }
+
+}
+
+
+}  // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/integer.cpp b/mysql/extra/yassl/taocrypt/src/integer.cpp
new file mode 100644
index 0000000..478a13c
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/integer.cpp
@@ -0,0 +1,3894 @@
+/* Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
+
+/* based on Wei Dai's integer.cpp from CryptoPP */
+
+#include "runtime.hpp"
+#include "integer.hpp"
+#include "modarith.hpp"
+#include "asn.hpp"
+
+
+
+#ifdef __DECCXX
+    #include <c_asm.h>  // for asm overflow assembly
+#endif
+
+#if defined(_M_X64) || defined(_M_IA64)
+    #include <intrin.h> 
+#pragma intrinsic(_umul128)
+#endif
+
+
+#ifdef __GNUC__
+    #include <signal.h>
+    #include <setjmp.h>
+#endif
+
+
+#ifdef SSE2_INTRINSICS_AVAILABLE
+    #ifdef __GNUC__
+        #include <xmmintrin.h>
+        #ifdef TAOCRYPT_MEMALIGN_AVAILABLE
+            #include <malloc.h>
+        #else
+            #include <stdlib.h>
+        #endif
+    #else
+        #include <emmintrin.h>
+    #endif
+#elif defined(_MSC_VER) && defined(_M_IX86)
+/*    #pragma message("You do not seem to have the Visual C++ Processor Pack ")
+     #pragma message("installed, so use of SSE2 intrinsics will be disabled.")
+*/
+    #pragma message("installed, so use of SSE2 intrinsics will be disabled.")
+#elif defined(__GNUC__) && defined(__i386__)
+/*   #warning You do not have GCC 3.3 or later, or did not specify the -msse2 \
+             compiler option. Use of SSE2 intrinsics will be disabled.
+*/
+#endif
+
+
+namespace TaoCrypt {
+
+
+#ifdef SSE2_INTRINSICS_AVAILABLE
+
+template <class T>
+CPP_TYPENAME AlignedAllocator<T>::pointer AlignedAllocator<T>::allocate(
+                                           size_type n, const void *)
+{
+    if (n > this->max_size())
+        return 0;
+    if (n == 0)
+        return 0;
+    if (n >= 4)
+    {
+        void* p;
+    #ifdef TAOCRYPT_MM_MALLOC_AVAILABLE
+        p = _mm_malloc(sizeof(T)*n, 16);
+    #elif defined(TAOCRYPT_MEMALIGN_AVAILABLE)
+        p = memalign(16, sizeof(T)*n);
+    #elif defined(TAOCRYPT_MALLOC_ALIGNMENT_IS_16)
+        p = malloc(sizeof(T)*n);
+    #else
+        p = (byte *)malloc(sizeof(T)*n + 8);
+        // assume malloc alignment is at least 8
+    #endif
+
+    #ifdef TAOCRYPT_NO_ALIGNED_ALLOC
+        m_pBlock = p;
+        if (!IsAlignedOn(p, 16))
+        {
+            p = (byte *)p + 8;
+        }
+    #endif
+
+        return (T*)p;
+    }
+    return NEW_TC T[n];
+}
+
+
+template <class T>
+void AlignedAllocator<T>::deallocate(void* p, size_type n)
+{
+    memset(p, 0, n*sizeof(T));
+    if (n >= 4)
+    {
+        #ifdef TAOCRYPT_MM_MALLOC_AVAILABLE
+            _mm_free(p);
+        #elif defined(TAOCRYPT_NO_ALIGNED_ALLOC)
+            free(m_pBlock);
+            m_pBlock = 0;
+        #else
+            free(p);
+        #endif
+    }
+    else
+        tcArrayDelete((T *)p);
+}
+
+#endif  // SSE2
+
+
+// ********  start of integer needs
+
+// start 5.2.1 adds DWord and Word ********
+
+// ********************************************************
+
+class DWord {
+public:
+DWord() {}
+
+#ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE
+    explicit DWord(word low)
+    {
+        whole_ = low;
+    }
+#else
+    explicit DWord(word low)
+    {
+        halfs_.low = low;
+        halfs_.high = 0;
+    }
+#endif
+
+    DWord(word low, word high)
+    {
+        halfs_.low = low;
+        halfs_.high = high;
+    }
+
+    static DWord Multiply(word a, word b)
+    {
+        DWord r;
+
+        #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE
+            r.whole_ = (dword)a * b;
+
+        #elif defined(_M_X64) || defined(_M_IA64)
+            r.halfs_.low = _umul128(a, b, &r.halfs_.high);
+
+        #elif defined(__ia64__)
+            r.halfs_.low = a*b;
+            __asm__("xmpy.hu %0=%1,%2" : "=f" (r.halfs_.high)
+                : "f" (a), "f" (b));
+
+        #elif defined(_ARCH_PPC64)
+            r.halfs_.low = a*b;
+            __asm__("mulhdu %0,%1,%2" : "=r" (r.halfs_.high)
+                : "r" (a), "r" (b) : "cc");
+
+        #elif defined(__x86_64__)
+            __asm__("mulq %3" : "=d" (r.halfs_.high), "=a" (r.halfs_.low) :
+                "a" (a), "rm" (b) : "cc");
+
+        #elif defined(__mips64)
+            __asm__("dmultu %2,%3" : "=h" (r.halfs_.high), "=l" (r.halfs_.low)
+                : "r" (a), "r" (b));
+
+        #elif defined(_M_IX86)
+            // for testing
+            word64 t = (word64)a * b;
+            r.halfs_.high = ((word32 *)(&t))[1];
+            r.halfs_.low = (word32)t;
+        #else
+            #error can not implement DWord
+        #endif
+
+        return r;
+    }
+
+    static DWord MultiplyAndAdd(word a, word b, word c)
+    {
+        DWord r = Multiply(a, b);
+        return r += c;
+    }
+
+    DWord & operator+=(word a)
+    {
+        #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE
+            whole_ = whole_ + a;
+        #else
+            halfs_.low += a;
+            halfs_.high += (halfs_.low < a);
+        #endif
+        return *this;
+    }
+
+    DWord operator+(word a)
+    {
+        DWord r;
+        #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE
+            r.whole_ = whole_ + a;
+        #else
+            r.halfs_.low = halfs_.low + a;
+            r.halfs_.high = halfs_.high + (r.halfs_.low < a);
+        #endif
+        return r;
+    }
+
+    DWord operator-(DWord a)
+    {
+        DWord r;
+        #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE
+            r.whole_ = whole_ - a.whole_;
+        #else
+            r.halfs_.low = halfs_.low - a.halfs_.low;
+            r.halfs_.high = halfs_.high - a.halfs_.high -
+                             (r.halfs_.low > halfs_.low);
+        #endif
+        return r;
+    }
+
+    DWord operator-(word a)
+    {
+        DWord r;
+        #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE
+            r.whole_ = whole_ - a;
+        #else
+            r.halfs_.low = halfs_.low - a;
+            r.halfs_.high = halfs_.high - (r.halfs_.low > halfs_.low);
+        #endif
+        return r;
+    }
+
+    // returns quotient, which must fit in a word
+    word operator/(word divisor);
+
+    word operator%(word a);
+
+    bool operator!() const
+    {
+    #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE
+        return !whole_;
+    #else
+        return !halfs_.high && !halfs_.low;
+    #endif
+    }
+
+    word GetLowHalf() const {return halfs_.low;}
+    word GetHighHalf() const {return halfs_.high;}
+    word GetHighHalfAsBorrow() const {return 0-halfs_.high;}
+
+private:
+    struct dword_struct
+        {
+        #ifdef LITTLE_ENDIAN_ORDER
+            word low;
+            word high;
+        #else
+            word high;
+            word low;
+        #endif
+    };
+
+    union
+    {
+    #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE
+        dword whole_;
+    #endif
+        struct dword_struct halfs_;
+    };
+};
+
+
+class Word {
+public:
+    Word() {}
+
+    Word(word value)
+    {
+        whole_ = value;
+    }
+
+    Word(hword low, hword high)
+    {
+        whole_ = low | (word(high) << (WORD_BITS/2));
+    }
+
+    static Word Multiply(hword a, hword b)
+    {
+        Word r;
+        r.whole_ = (word)a * b;
+        return r;
+    }
+
+    Word operator-(Word a)
+    {
+        Word r;
+        r.whole_ = whole_ - a.whole_;
+        return r;
+    }
+
+    Word operator-(hword a)
+    {
+        Word r;
+        r.whole_ = whole_ - a;
+        return r;
+    }
+
+    // returns quotient, which must fit in a word
+    hword operator/(hword divisor)
+    {
+        return hword(whole_ / divisor);
+    }
+
+    bool operator!() const
+    {
+        return !whole_;
+    }
+
+    word GetWhole() const {return whole_;}
+    hword GetLowHalf() const {return hword(whole_);}
+    hword GetHighHalf() const {return hword(whole_>>(WORD_BITS/2));}
+    hword GetHighHalfAsBorrow() const {return 0-hword(whole_>>(WORD_BITS/2));}
+
+private:
+    word whole_;
+};
+
+
+// dummy is VC60 compiler bug workaround
+// do a 3 word by 2 word divide, returns quotient and leaves remainder in A
+template <class S, class D>
+S DivideThreeWordsByTwo(S* A, S B0, S B1, D* dummy_VC6_WorkAround = 0)
+{
+    // estimate the quotient: do a 2 S by 1 S divide
+    S Q;
+    if (S(B1+1) == 0)
+        Q = A[2];
+    else
+        Q = D(A[1], A[2]) / S(B1+1);
+
+    // now subtract Q*B from A
+    D p = D::Multiply(B0, Q);
+    D u = (D) A[0] - p.GetLowHalf();
+    A[0] = u.GetLowHalf();
+    u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - 
+            D::Multiply(B1, Q);
+    A[1] = u.GetLowHalf();
+    A[2] += u.GetHighHalf();
+
+    // Q <= actual quotient, so fix it
+    while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
+    {
+        u = (D) A[0] - B0;
+        A[0] = u.GetLowHalf();
+        u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
+        A[1] = u.GetLowHalf();
+        A[2] += u.GetHighHalf();
+        Q++;
+    }
+
+    return Q;
+}
+
+
+// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
+template <class S, class D>
+inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
+{
+    if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
+        return D(Ah.GetLowHalf(), Ah.GetHighHalf());
+    else
+    {
+        S Q[2];
+        T[0] = Al.GetLowHalf();
+        T[1] = Al.GetHighHalf(); 
+        T[2] = Ah.GetLowHalf();
+        T[3] = Ah.GetHighHalf();
+        Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(),
+                                                B.GetHighHalf());
+        Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
+        return D(Q[0], Q[1]);
+    }
+}
+
+
+// returns quotient, which must fit in a word
+inline word DWord::operator/(word a)
+{
+    #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE
+        return word(whole_ / a);
+    #else
+        hword r[4];
+        return DivideFourWordsByTwo<hword, Word>(r, halfs_.low,
+                                                    halfs_.high, a).GetWhole();
+    #endif
+}
+
+inline word DWord::operator%(word a)
+{
+    #ifdef TAOCRYPT_NATIVE_DWORD_AVAILABLE
+        return word(whole_ % a);
+    #else
+        if (a < (word(1) << (WORD_BITS/2)))
+        {
+            hword h = hword(a);
+            word r = halfs_.high % h;
+            r = ((halfs_.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
+            return hword((hword(halfs_.low) + (r << (WORD_BITS/2))) % h);
+        }
+        else
+        {
+            hword r[4];
+            DivideFourWordsByTwo<hword, Word>(r, halfs_.low, halfs_.high, a);
+            return Word(r[0], r[1]).GetWhole();
+        }
+    #endif
+}
+
+
+
+// end 5.2.1 DWord and Word adds
+
+
+
+
+
+static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
+
+static inline unsigned int RoundupSize(unsigned int n)
+{
+    if (n<=8)
+        return RoundupSizeTable[n];
+    else if (n<=16)
+        return 16;
+    else if (n<=32)
+        return 32;
+    else if (n<=64)
+        return 64;
+    else return 1U << BitPrecision(n-1);
+}
+
+
+static int Compare(const word *A, const word *B, unsigned int N)
+{
+    while (N--)
+        if (A[N] > B[N])
+            return 1;
+        else if (A[N] < B[N])
+            return -1;
+
+    return 0;
+}
+
+static word Increment(word *A, unsigned int N, word B=1)
+{
+    word t = A[0];
+    A[0] = t+B;
+    if (A[0] >= t)
+        return 0;
+    for (unsigned i=1; i<N; i++)
+        if (++A[i])
+            return 0;
+    return 1;
+}
+
+static word Decrement(word *A, unsigned int N, word B=1)
+{
+    word t = A[0];
+    A[0] = t-B;
+    if (A[0] <= t)
+        return 0;
+    for (unsigned i=1; i<N; i++)
+        if (A[i]--)
+            return 0;
+    return 1;
+}
+
+static void TwosComplement(word *A, unsigned int N)
+{
+    Decrement(A, N);
+    for (unsigned i=0; i<N; i++)
+        A[i] = ~A[i];
+}
+
+
+static word LinearMultiply(word *C, const word *A, word B, unsigned int N)
+{
+    word carry=0;
+    for(unsigned i=0; i<N; i++)
+    {
+        DWord p = DWord::MultiplyAndAdd(A[i], B, carry);
+        C[i] = p.GetLowHalf();
+        carry = p.GetHighHalf();
+    }
+    return carry;
+}
+
+
+static word AtomicInverseModPower2(word A)
+{
+    word R=A%8;
+
+    for (unsigned i=3; i<WORD_BITS; i*=2)
+        R = R*(2-R*A);
+
+    return R;
+}
+
+
+// ********************************************************
+
+class Portable
+{
+public:
+    static word TAOCRYPT_CDECL Add(word *C, const word *A, const word *B,
+                                   unsigned int N);
+    static word TAOCRYPT_CDECL Subtract(word *C, const word *A, const word*B,
+                                        unsigned int N);
+    static void TAOCRYPT_CDECL Multiply2(word *C, const word *A, const word *B);
+    static word TAOCRYPT_CDECL Multiply2Add(word *C,
+                                            const word *A, const word *B);
+    static void TAOCRYPT_CDECL Multiply4(word *C, const word *A, const word *B);
+    static void TAOCRYPT_CDECL Multiply8(word *C, const word *A, const word *B);
+    static unsigned int TAOCRYPT_CDECL MultiplyRecursionLimit() {return 8;}
+
+    static void TAOCRYPT_CDECL Multiply2Bottom(word *C, const word *A,
+                                               const word *B);
+    static void TAOCRYPT_CDECL Multiply4Bottom(word *C, const word *A,
+                                               const word *B);
+    static void TAOCRYPT_CDECL Multiply8Bottom(word *C, const word *A,
+                                               const word *B);
+    static unsigned int TAOCRYPT_CDECL MultiplyBottomRecursionLimit(){return 8;}
+
+    static void TAOCRYPT_CDECL Square2(word *R, const word *A);
+    static void TAOCRYPT_CDECL Square4(word *R, const word *A);
+    static unsigned int TAOCRYPT_CDECL SquareRecursionLimit() {return 4;}
+};
+
+word Portable::Add(word *C, const word *A, const word *B, unsigned int N)
+{
+    DWord u(0, 0);
+    for (unsigned int i = 0; i < N; i+=2)
+    {
+        u = DWord(A[i]) + B[i] + u.GetHighHalf();
+        C[i] = u.GetLowHalf();
+        u = DWord(A[i+1]) + B[i+1] + u.GetHighHalf();
+        C[i+1] = u.GetLowHalf();
+    }
+    return u.GetHighHalf();
+}
+
+word Portable::Subtract(word *C, const word *A, const word *B, unsigned int N)
+{
+    DWord u(0, 0);
+    for (unsigned int i = 0; i < N; i+=2)
+    {
+        u = (DWord) A[i] - B[i] - u.GetHighHalfAsBorrow();
+        C[i] = u.GetLowHalf();
+        u = (DWord) A[i+1] - B[i+1] - u.GetHighHalfAsBorrow();
+        C[i+1] = u.GetLowHalf();
+    }
+    return 0-u.GetHighHalf();
+}
+
+void Portable::Multiply2(word *C, const word *A, const word *B)
+{
+/*
+    word s;
+    dword d;
+
+    if (A1 >= A0)
+        if (B0 >= B1)
+        {
+            s = 0;
+            d = (dword)(A1-A0)*(B0-B1);
+        }
+        else
+        {
+            s = (A1-A0);
+            d = (dword)s*(word)(B0-B1);
+        }
+    else
+        if (B0 > B1)
+        {
+            s = (B0-B1);
+            d = (word)(A1-A0)*(dword)s;
+        }
+        else
+        {
+            s = 0;
+            d = (dword)(A0-A1)*(B1-B0);
+        }
+*/
+    // this segment is the branchless equivalent of above
+    word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
+    unsigned int ai = A[1] < A[0];
+    unsigned int bi = B[0] < B[1];
+    unsigned int di = ai & bi;
+    DWord d = DWord::Multiply(D[di], D[di+2]);
+    D[1] = D[3] = 0;
+    unsigned int si = ai + !bi;
+    word s = D[si];
+
+    DWord A0B0 = DWord::Multiply(A[0], B[0]);
+    C[0] = A0B0.GetLowHalf();
+
+    DWord A1B1 = DWord::Multiply(A[1], B[1]);
+    DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf()
+                       + A1B1.GetLowHalf();
+    C[1] = t.GetLowHalf();
+
+    t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf()
+             + A1B1.GetHighHalf() - s;
+    C[2] = t.GetLowHalf();
+    C[3] = t.GetHighHalf();
+}
+
+void Portable::Multiply2Bottom(word *C, const word *A, const word *B)
+{
+    DWord t = DWord::Multiply(A[0], B[0]);
+    C[0] = t.GetLowHalf();
+    C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0];
+}
+
+word Portable::Multiply2Add(word *C, const word *A, const word *B)
+{
+    word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
+    unsigned int ai = A[1] < A[0];
+    unsigned int bi = B[0] < B[1];
+    unsigned int di = ai & bi;
+    DWord d = DWord::Multiply(D[di], D[di+2]);
+    D[1] = D[3] = 0;
+    unsigned int si = ai + !bi;
+    word s = D[si];
+
+    DWord A0B0 = DWord::Multiply(A[0], B[0]);
+    DWord t = A0B0 + C[0];
+    C[0] = t.GetLowHalf();
+
+    DWord A1B1 = DWord::Multiply(A[1], B[1]);
+    t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() +
+        A1B1.GetLowHalf() + C[1];
+    C[1] = t.GetLowHalf();
+
+    t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() +
+        d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2];
+    C[2] = t.GetLowHalf();
+
+    t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3];
+    C[3] = t.GetLowHalf();
+    return t.GetHighHalf();
+}
+
+
+#define MulAcc(x, y)                                \
+    p = DWord::MultiplyAndAdd(A[x], B[y], c);       \
+    c = p.GetLowHalf();                             \
+    p = (DWord) d + p.GetHighHalf();                \
+    d = p.GetLowHalf();                             \
+    e += p.GetHighHalf();
+
+#define SaveMulAcc(s, x, y)                         \
+    R[s] = c;                                       \
+    p = DWord::MultiplyAndAdd(A[x], B[y], d);       \
+    c = p.GetLowHalf();                             \
+    p = (DWord) e + p.GetHighHalf();                \
+    d = p.GetLowHalf();                             \
+    e = p.GetHighHalf();
+
+#define SquAcc(x, y)                                \
+    q = DWord::Multiply(A[x], A[y]);                \
+    p = q + c;                                      \
+    c = p.GetLowHalf();                             \
+    p = (DWord) d + p.GetHighHalf();                \
+    d = p.GetLowHalf();                             \
+    e += p.GetHighHalf();                           \
+    p = q + c;                                      \
+    c = p.GetLowHalf();                             \
+    p = (DWord) d + p.GetHighHalf();                \
+    d = p.GetLowHalf();                             \
+    e += p.GetHighHalf();
+
+#define SaveSquAcc(s, x, y)                         \
+    R[s] = c;                                       \
+    q = DWord::Multiply(A[x], A[y]);                \
+    p = q + d;                                      \
+    c = p.GetLowHalf();                             \
+    p = (DWord) e + p.GetHighHalf();                \
+    d = p.GetLowHalf();                             \
+    e = p.GetHighHalf();                            \
+    p = q + c;                                      \
+    c = p.GetLowHalf();                             \
+    p = (DWord) d + p.GetHighHalf();                \
+    d = p.GetLowHalf();                             \
+    e += p.GetHighHalf();
+
+
+void Portable::Multiply4(word *R, const word *A, const word *B)
+{
+    DWord p;
+    word c, d, e;
+
+    p = DWord::Multiply(A[0], B[0]);
+    R[0] = p.GetLowHalf();
+    c = p.GetHighHalf();
+    d = e = 0;
+
+    MulAcc(0, 1);
+    MulAcc(1, 0);
+
+    SaveMulAcc(1, 2, 0);
+    MulAcc(1, 1);
+    MulAcc(0, 2);
+
+    SaveMulAcc(2, 0, 3);
+    MulAcc(1, 2);
+    MulAcc(2, 1);
+    MulAcc(3, 0);
+
+    SaveMulAcc(3, 3, 1);
+    MulAcc(2, 2);
+    MulAcc(1, 3);
+
+    SaveMulAcc(4, 2, 3);
+    MulAcc(3, 2);
+
+    R[5] = c;
+    p = DWord::MultiplyAndAdd(A[3], B[3], d);
+    R[6] = p.GetLowHalf();
+    R[7] = e + p.GetHighHalf();
+}
+
+void Portable::Square2(word *R, const word *A)
+{
+    DWord p, q;
+    word c, d, e;
+
+    p = DWord::Multiply(A[0], A[0]);
+    R[0] = p.GetLowHalf();
+    c = p.GetHighHalf();
+    d = e = 0;
+
+    SquAcc(0, 1);
+
+    R[1] = c;
+    p = DWord::MultiplyAndAdd(A[1], A[1], d);
+    R[2] = p.GetLowHalf();
+    R[3] = e + p.GetHighHalf();
+}
+
+void Portable::Square4(word *R, const word *A)
+{
+#ifdef _MSC_VER
+    // VC60 workaround: MSVC 6.0 has an optimization bug that makes
+    // (dword)A*B where either A or B has been cast to a dword before
+    // very expensive. Revisit this function when this
+    // bug is fixed.
+    Multiply4(R, A, A);
+#else
+    const word *B = A;
+    DWord p, q;
+    word c, d, e;
+
+    p = DWord::Multiply(A[0], A[0]);
+    R[0] = p.GetLowHalf();
+    c = p.GetHighHalf();
+    d = e = 0;
+
+    SquAcc(0, 1);
+
+    SaveSquAcc(1, 2, 0);
+    MulAcc(1, 1);
+
+    SaveSquAcc(2, 0, 3);
+    SquAcc(1, 2);
+
+    SaveSquAcc(3, 3, 1);
+    MulAcc(2, 2);
+
+    SaveSquAcc(4, 2, 3);
+
+    R[5] = c;
+    p = DWord::MultiplyAndAdd(A[3], A[3], d);
+    R[6] = p.GetLowHalf();
+    R[7] = e + p.GetHighHalf();
+#endif
+}
+
+void Portable::Multiply8(word *R, const word *A, const word *B)
+{
+    DWord p;
+    word c, d, e;
+
+    p = DWord::Multiply(A[0], B[0]);
+    R[0] = p.GetLowHalf();
+    c = p.GetHighHalf();
+    d = e = 0;
+
+    MulAcc(0, 1);
+    MulAcc(1, 0);
+
+    SaveMulAcc(1, 2, 0);
+    MulAcc(1, 1);
+    MulAcc(0, 2);
+
+    SaveMulAcc(2, 0, 3);
+    MulAcc(1, 2);
+    MulAcc(2, 1);
+    MulAcc(3, 0);
+
+    SaveMulAcc(3, 0, 4);
+    MulAcc(1, 3);
+    MulAcc(2, 2);
+    MulAcc(3, 1);
+    MulAcc(4, 0);
+
+    SaveMulAcc(4, 0, 5);
+    MulAcc(1, 4);
+    MulAcc(2, 3);
+    MulAcc(3, 2);
+    MulAcc(4, 1);
+    MulAcc(5, 0);
+
+    SaveMulAcc(5, 0, 6);
+    MulAcc(1, 5);
+    MulAcc(2, 4);
+    MulAcc(3, 3);
+    MulAcc(4, 2);
+    MulAcc(5, 1);
+    MulAcc(6, 0);
+
+    SaveMulAcc(6, 0, 7);
+    MulAcc(1, 6);
+    MulAcc(2, 5);
+    MulAcc(3, 4);
+    MulAcc(4, 3);
+    MulAcc(5, 2);
+    MulAcc(6, 1);
+    MulAcc(7, 0);
+
+    SaveMulAcc(7, 1, 7);
+    MulAcc(2, 6);
+    MulAcc(3, 5);
+    MulAcc(4, 4);
+    MulAcc(5, 3);
+    MulAcc(6, 2);
+    MulAcc(7, 1);
+
+    SaveMulAcc(8, 2, 7);
+    MulAcc(3, 6);
+    MulAcc(4, 5);
+    MulAcc(5, 4);
+    MulAcc(6, 3);
+    MulAcc(7, 2);
+
+    SaveMulAcc(9, 3, 7);
+    MulAcc(4, 6);
+    MulAcc(5, 5);
+    MulAcc(6, 4);
+    MulAcc(7, 3);
+
+    SaveMulAcc(10, 4, 7);
+    MulAcc(5, 6);
+    MulAcc(6, 5);
+    MulAcc(7, 4);
+
+    SaveMulAcc(11, 5, 7);
+    MulAcc(6, 6);
+    MulAcc(7, 5);
+
+    SaveMulAcc(12, 6, 7);
+    MulAcc(7, 6);
+
+    R[13] = c;
+    p = DWord::MultiplyAndAdd(A[7], B[7], d);
+    R[14] = p.GetLowHalf();
+    R[15] = e + p.GetHighHalf();
+}
+
+void Portable::Multiply4Bottom(word *R, const word *A, const word *B)
+{
+    DWord p;
+    word c, d, e;
+
+    p = DWord::Multiply(A[0], B[0]);
+    R[0] = p.GetLowHalf();
+    c = p.GetHighHalf();
+    d = e = 0;
+
+    MulAcc(0, 1);
+    MulAcc(1, 0);
+
+    SaveMulAcc(1, 2, 0);
+    MulAcc(1, 1);
+    MulAcc(0, 2);
+
+    R[2] = c;
+    R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
+}
+
+void Portable::Multiply8Bottom(word *R, const word *A, const word *B)
+{
+    DWord p;
+    word c, d, e;
+
+    p = DWord::Multiply(A[0], B[0]);
+    R[0] = p.GetLowHalf();
+    c = p.GetHighHalf();
+    d = e = 0;
+
+    MulAcc(0, 1);
+    MulAcc(1, 0);
+
+    SaveMulAcc(1, 2, 0);
+    MulAcc(1, 1);
+    MulAcc(0, 2);
+
+    SaveMulAcc(2, 0, 3);
+    MulAcc(1, 2);
+    MulAcc(2, 1);
+    MulAcc(3, 0);
+
+    SaveMulAcc(3, 0, 4);
+    MulAcc(1, 3);
+    MulAcc(2, 2);
+    MulAcc(3, 1);
+    MulAcc(4, 0);
+
+    SaveMulAcc(4, 0, 5);
+    MulAcc(1, 4);
+    MulAcc(2, 3);
+    MulAcc(3, 2);
+    MulAcc(4, 1);
+    MulAcc(5, 0);
+
+    SaveMulAcc(5, 0, 6);
+    MulAcc(1, 5);
+    MulAcc(2, 4);
+    MulAcc(3, 3);
+    MulAcc(4, 2);
+    MulAcc(5, 1);
+    MulAcc(6, 0);
+
+    R[6] = c;
+    R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
+               A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
+}
+
+
+#undef MulAcc
+#undef SaveMulAcc
+#undef SquAcc
+#undef SaveSquAcc
+
+// optimized
+
+#ifdef TAOCRYPT_X86ASM_AVAILABLE
+
+// ************** x86 feature detection ***************
+
+
+#ifdef SSE2_INTRINSICS_AVAILABLE
+
+#ifndef _MSC_VER
+    static jmp_buf s_env;
+    static void SigIllHandler(int)
+    {
+        longjmp(s_env, 1);
+    }
+#endif
+
+static bool HasSSE2()
+{
+    if (!IsPentium())
+        return false;
+
+    word32 cpuid[4];
+    CpuId(1, cpuid);
+    if ((cpuid[3] & (1 << 26)) == 0)
+        return false;
+
+#ifdef _MSC_VER
+    __try
+    {
+        __asm xorpd xmm0, xmm0        // executing SSE2 instruction
+    }
+    __except (1)
+    {
+        return false;
+    }
+    return true;
+#else
+    typedef void (*SigHandler)(int);
+
+    SigHandler oldHandler = signal(SIGILL, SigIllHandler);
+    if (oldHandler == SIG_ERR)
+        return false;
+
+    bool result = true;
+    if (setjmp(s_env))
+        result = false;
+    else
+        __asm __volatile ("xorpd %xmm0, %xmm0");
+
+    signal(SIGILL, oldHandler);
+    return result;
+#endif
+}
+#endif // SSE2_INTRINSICS_AVAILABLE
+
+
+static bool IsP4()
+{
+    if (!IsPentium())
+        return false;
+
+    word32 cpuid[4];
+
+    CpuId(1, cpuid);
+    return ((cpuid[0] >> 8) & 0xf) == 0xf;
+}
+
+// ************** Pentium/P4 optimizations ***************
+
+class PentiumOptimized : public Portable
+{
+public:
+    static word TAOCRYPT_CDECL Add(word *C, const word *A, const word *B,
+                                   unsigned int N);
+    static word TAOCRYPT_CDECL Subtract(word *C, const word *A, const word *B,
+                                        unsigned int N);
+    static void TAOCRYPT_CDECL Multiply4(word *C, const word *A,
+                                         const word *B);
+    static void TAOCRYPT_CDECL Multiply8(word *C, const word *A,
+                                         const word *B);
+    static void TAOCRYPT_CDECL Multiply8Bottom(word *C, const word *A,
+                                               const word *B);
+};
+
+class P4Optimized
+{
+public:
+    static word TAOCRYPT_CDECL Add(word *C, const word *A, const word *B,
+                                   unsigned int N);
+    static word TAOCRYPT_CDECL Subtract(word *C, const word *A, const word *B,
+                                        unsigned int N);
+#ifdef SSE2_INTRINSICS_AVAILABLE
+    static void TAOCRYPT_CDECL Multiply4(word *C, const word *A,
+                                         const word *B);
+    static void TAOCRYPT_CDECL Multiply8(word *C, const word *A,
+                                         const word *B);
+    static void TAOCRYPT_CDECL Multiply8Bottom(word *C, const word *A,
+                                               const word *B);
+#endif
+};
+
+typedef word (TAOCRYPT_CDECL * PAddSub)(word *C, const word *A, const word *B,
+                                        unsigned int N);
+typedef void (TAOCRYPT_CDECL * PMul)(word *C, const word *A, const word *B);
+
+static PAddSub s_pAdd, s_pSub;
+#ifdef SSE2_INTRINSICS_AVAILABLE
+static PMul s_pMul4, s_pMul8, s_pMul8B;
+#endif
+
+static void SetPentiumFunctionPointers()
+{
+    if (!IsPentium())
+    {   
+        s_pAdd = &Portable::Add;
+        s_pSub = &Portable::Subtract;
+    }
+    else if (IsP4())
+    {
+        s_pAdd = &P4Optimized::Add;
+        s_pSub = &P4Optimized::Subtract;
+    }
+    else
+    {
+        s_pAdd = &PentiumOptimized::Add;
+        s_pSub = &PentiumOptimized::Subtract;
+    }
+
+#ifdef SSE2_INTRINSICS_AVAILABLE
+    if (!IsPentium()) 
+    {
+        s_pMul4 = &Portable::Multiply4;
+        s_pMul8 = &Portable::Multiply8;
+        s_pMul8B = &Portable::Multiply8Bottom;
+    }
+    else if (HasSSE2())
+    {
+        s_pMul4 = &P4Optimized::Multiply4;
+        s_pMul8 = &P4Optimized::Multiply8;
+        s_pMul8B = &P4Optimized::Multiply8Bottom;
+    }
+    else
+    {
+        s_pMul4 = &PentiumOptimized::Multiply4;
+        s_pMul8 = &PentiumOptimized::Multiply8;
+        s_pMul8B = &PentiumOptimized::Multiply8Bottom;
+    }
+#endif
+}
+
+static const char s_RunAtStartupSetPentiumFunctionPointers =
+    (SetPentiumFunctionPointers(), 0);
+
+
+class LowLevel : public PentiumOptimized
+{
+public:
+    inline static word Add(word *C, const word *A, const word *B,
+                           unsigned int N)
+        {return s_pAdd(C, A, B, N);}
+    inline static word Subtract(word *C, const word *A, const word *B,
+                                unsigned int N)
+        {return s_pSub(C, A, B, N);}
+    inline static void Square4(word *R, const word *A)
+        {Multiply4(R, A, A);}
+#ifdef SSE2_INTRINSICS_AVAILABLE
+    inline static void Multiply4(word *C, const word *A, const word *B)
+        {s_pMul4(C, A, B);}
+    inline static void Multiply8(word *C, const word *A, const word *B)
+        {s_pMul8(C, A, B);}
+    inline static void Multiply8Bottom(word *C, const word *A, const word *B)
+        {s_pMul8B(C, A, B);}
+#endif
+};
+
+// use some tricks to share assembly code between MSVC and GCC
+#ifdef _MSC_VER
+    #define TAOCRYPT_NAKED __declspec(naked)
+    #define AS1(x) __asm x
+    #define AS2(x, y) __asm x, y
+    #define AddPrologue \
+        __asm	push ebp \
+        __asm	push ebx \
+        __asm	push esi \
+        __asm	push edi \
+        __asm	mov		ecx, [esp+20] \
+        __asm	mov		edx, [esp+24] \
+        __asm	mov		ebx, [esp+28] \
+        __asm	mov		esi, [esp+32]
+    #define AddEpilogue \
+        __asm	pop edi \
+        __asm	pop esi \
+        __asm	pop ebx \
+        __asm	pop ebp \
+        __asm	ret
+    #define MulPrologue \
+        __asm	push ebp \
+        __asm	push ebx \
+        __asm	push esi \
+        __asm	push edi \
+        __asm	mov ecx, [esp+28] \
+        __asm	mov esi, [esp+24] \
+        __asm	push [esp+20]
+    #define MulEpilogue \
+        __asm	add esp, 4 \
+        __asm	pop edi \
+        __asm	pop esi \
+        __asm	pop ebx \
+        __asm	pop ebp \
+        __asm	ret
+#else
+    #define TAOCRYPT_NAKED
+    #define AS1(x) #x ";"
+    #define AS2(x, y) #x ", " #y ";"
+    #define AddPrologue \
+        word res; \
+        __asm__ __volatile__ \
+        ( \
+            "push %%ebx;"	/* save this manually, in case of -fPIC */ \
+            "mov %3, %%ebx;" \
+            ".intel_syntax noprefix;" \
+            "push ebp;"
+    #define AddEpilogue \
+            "pop ebp;" \
+            ".att_syntax prefix;" \
+            "pop %%ebx;" \
+            "mov %%eax, %0;" \
+                    : "=g" (res) \
+                    : "c" (C), "d" (A), "m" (B), "S" (N) \
+                    : "%edi", "memory", "cc" \
+        ); \
+        return res;
+
+    #define MulPrologue \
+        __asm__ __volatile__ \
+        ( \
+            "push %%ebx;"	/* save this manually, in case of -fPIC */ \
+            "push %%ebp;" \
+            "push %0;" \
+            ".intel_syntax noprefix;"
+    #define MulEpilogue \
+            "add esp, 4;" \
+            "pop ebp;" \
+            "pop ebx;" \
+            ".att_syntax prefix;" \
+            : \
+            : "rm" (Z), "S" (X), "c" (Y) \
+            : "%eax", "%edx", "%edi", "memory", "cc" \
+        );
+#endif
+
+TAOCRYPT_NAKED word PentiumOptimized::Add(word *C, const word *A,
+                                          const word *B, unsigned int N)
+{
+    AddPrologue
+
+    // now: ebx = B, ecx = C, edx = A, esi = N
+    AS2(    sub ecx, edx)           // hold the distance between C & A so we
+                                    // can add this to A to get C
+    AS2(    xor eax, eax)           // clear eax
+
+    AS2(    sub eax, esi)           // eax is a negative index from end of B
+    AS2(    lea ebx, [ebx+4*esi])   // ebx is end of B
+
+    AS2(    sar eax, 1)             // unit of eax is now dwords; this also
+                                    // clears the carry flag
+    AS1(    jz  loopendAdd)         // if no dwords then nothing to do
+
+    AS1(loopstartAdd:)
+    AS2(    mov    esi,[edx])           // load lower word of A
+    AS2(    mov    ebp,[edx+4])         // load higher word of A
+
+    AS2(    mov    edi,[ebx+8*eax])     // load lower word of B
+    AS2(    lea    edx,[edx+8])         // advance A and C
+
+    AS2(    adc    esi,edi)             // add lower words
+    AS2(    mov    edi,[ebx+8*eax+4])   // load higher word of B
+
+    AS2(    adc    ebp,edi)             // add higher words
+    AS1(    inc    eax)                 // advance B
+
+    AS2(    mov    [edx+ecx-8],esi)     // store lower word result
+    AS2(    mov    [edx+ecx-4],ebp)     // store higher word result
+
+    AS1(    jnz    loopstartAdd)   // loop until eax overflows and becomes zero
+
+    AS1(loopendAdd:)
+    AS2(    adc eax, 0)     // store carry into eax (return result register)
+
+    AddEpilogue
+}
+
+TAOCRYPT_NAKED word PentiumOptimized::Subtract(word *C, const word *A,
+                                               const word *B, unsigned int N)
+{
+    AddPrologue
+
+    // now: ebx = B, ecx = C, edx = A, esi = N
+    AS2(    sub ecx, edx)           // hold the distance between C & A so we
+                                    // can add this to A to get C
+    AS2(    xor eax, eax)           // clear eax
+
+    AS2(    sub eax, esi)           // eax is a negative index from end of B
+    AS2(    lea ebx, [ebx+4*esi])   // ebx is end of B
+
+    AS2(    sar eax, 1)             // unit of eax is now dwords; this also
+                                    // clears the carry flag
+    AS1(    jz  loopendSub)         // if no dwords then nothing to do
+
+    AS1(loopstartSub:)
+    AS2(    mov    esi,[edx])           // load lower word of A
+    AS2(    mov    ebp,[edx+4])         // load higher word of A
+
+    AS2(    mov    edi,[ebx+8*eax])     // load lower word of B
+    AS2(    lea    edx,[edx+8])         // advance A and C
+
+    AS2(    sbb    esi,edi)             // subtract lower words
+    AS2(    mov    edi,[ebx+8*eax+4])   // load higher word of B
+
+    AS2(    sbb    ebp,edi)             // subtract higher words
+    AS1(    inc    eax)                 // advance B
+
+    AS2(    mov    [edx+ecx-8],esi)     // store lower word result
+    AS2(    mov    [edx+ecx-4],ebp)     // store higher word result
+
+    AS1(    jnz    loopstartSub)   // loop until eax overflows and becomes zero
+
+    AS1(loopendSub:)
+    AS2(    adc eax, 0)     // store carry into eax (return result register)
+
+    AddEpilogue
+}
+
+// On Pentium 4, the adc and sbb instructions are very expensive, so avoid them.
+
+TAOCRYPT_NAKED word P4Optimized::Add(word *C, const word *A, const word *B,
+                                     unsigned int N)
+{
+    AddPrologue
+
+    // now: ebx = B, ecx = C, edx = A, esi = N
+    AS2(    xor     eax, eax)
+    AS1(    neg     esi)
+    AS1(    jz      loopendAddP4)       // if no dwords then nothing to do
+
+    AS2(    mov     edi, [edx])
+    AS2(    mov     ebp, [ebx])
+    AS1(    jmp     carry1AddP4)
+
+    AS1(loopstartAddP4:)
+    AS2(    mov     edi, [edx+8])
+    AS2(    add     ecx, 8)
+    AS2(    add     edx, 8)
+    AS2(    mov     ebp, [ebx])
+    AS2(    add     edi, eax)
+    AS1(    jc      carry1AddP4)
+    AS2(    xor     eax, eax)
+
+    AS1(carry1AddP4:)
+    AS2(    add     edi, ebp)
+    AS2(    mov     ebp, 1)
+    AS2(    mov     [ecx], edi)
+    AS2(    mov     edi, [edx+4])
+    AS2(    cmovc   eax, ebp)
+    AS2(    mov     ebp, [ebx+4])
+    AS2(    add     ebx, 8)
+    AS2(    add     edi, eax)
+    AS1(    jc      carry2AddP4)
+    AS2(    xor     eax, eax)
+
+    AS1(carry2AddP4:)
+    AS2(    add     edi, ebp)
+    AS2(    mov     ebp, 1)
+    AS2(    cmovc   eax, ebp)
+    AS2(    mov     [ecx+4], edi)
+    AS2(    add     esi, 2)
+    AS1(    jnz     loopstartAddP4)
+
+    AS1(loopendAddP4:)
+
+    AddEpilogue
+}
+
+TAOCRYPT_NAKED word P4Optimized::Subtract(word *C, const word *A,
+                                          const word *B, unsigned int N)
+{
+    AddPrologue
+
+    // now: ebx = B, ecx = C, edx = A, esi = N
+    AS2(    xor     eax, eax)
+    AS1(    neg     esi)
+    AS1(    jz      loopendSubP4)       // if no dwords then nothing to do
+
+    AS2(    mov     edi, [edx])
+    AS2(    mov     ebp, [ebx])
+    AS1(    jmp     carry1SubP4)
+
+    AS1(loopstartSubP4:)
+    AS2(    mov     edi, [edx+8])
+    AS2(    add     edx, 8)
+    AS2(    add     ecx, 8)
+    AS2(    mov     ebp, [ebx])
+    AS2(    sub     edi, eax)
+    AS1(    jc      carry1SubP4)
+    AS2(    xor     eax, eax)
+
+    AS1(carry1SubP4:)
+    AS2(    sub     edi, ebp)
+    AS2(    mov     ebp, 1)
+    AS2(    mov     [ecx], edi)
+    AS2(    mov     edi, [edx+4])
+    AS2(    cmovc   eax, ebp)
+    AS2(    mov     ebp, [ebx+4])
+    AS2(    add     ebx, 8)
+    AS2(    sub     edi, eax)
+    AS1(    jc      carry2SubP4)
+    AS2(    xor     eax, eax)
+
+    AS1(carry2SubP4:)
+    AS2(    sub     edi, ebp)
+    AS2(    mov     ebp, 1)
+    AS2(    cmovc   eax, ebp)
+    AS2(    mov     [ecx+4], edi)
+    AS2(    add     esi, 2)
+    AS1(    jnz     loopstartSubP4)
+
+    AS1(loopendSubP4:)
+
+    AddEpilogue
+}
+
+// multiply assembly code originally contributed by Leonard Janke
+
+#define MulStartup \
+    AS2(xor ebp, ebp) \
+    AS2(xor edi, edi) \
+    AS2(xor ebx, ebx) 
+
+#define MulShiftCarry \
+    AS2(mov ebp, edx) \
+    AS2(mov edi, ebx) \
+    AS2(xor ebx, ebx)
+
+#define MulAccumulateBottom(i,j) \
+    AS2(mov eax, [ecx+4*j]) \
+    AS2(imul eax, dword ptr [esi+4*i]) \
+    AS2(add ebp, eax)
+
+#define MulAccumulate(i,j) \
+    AS2(mov eax, [ecx+4*j]) \
+    AS1(mul dword ptr [esi+4*i]) \
+    AS2(add ebp, eax) \
+    AS2(adc edi, edx) \
+    AS2(adc bl, bh)
+
+#define MulStoreDigit(i)  \
+    AS2(mov edx, edi) \
+    AS2(mov edi, [esp]) \
+    AS2(mov [edi+4*i], ebp)
+
+#define MulLastDiagonal(digits) \
+    AS2(mov eax, [ecx+4*(digits-1)]) \
+    AS1(mul dword ptr [esi+4*(digits-1)]) \
+    AS2(add ebp, eax) \
+    AS2(adc edx, edi) \
+    AS2(mov edi, [esp]) \
+    AS2(mov [edi+4*(2*digits-2)], ebp) \
+    AS2(mov [edi+4*(2*digits-1)], edx)
+
+TAOCRYPT_NAKED void PentiumOptimized::Multiply4(word* Z, const word* X,
+                                                const word* Y)
+{
+    MulPrologue
+    // now: [esp] = Z, esi = X, ecx = Y
+    MulStartup
+    MulAccumulate(0,0)
+    MulStoreDigit(0)
+    MulShiftCarry
+
+    MulAccumulate(1,0)
+    MulAccumulate(0,1)
+    MulStoreDigit(1)
+    MulShiftCarry
+
+    MulAccumulate(2,0)
+    MulAccumulate(1,1)
+    MulAccumulate(0,2)
+    MulStoreDigit(2)
+    MulShiftCarry
+
+    MulAccumulate(3,0)
+    MulAccumulate(2,1)
+    MulAccumulate(1,2)
+    MulAccumulate(0,3)
+    MulStoreDigit(3)
+    MulShiftCarry
+
+    MulAccumulate(3,1)
+    MulAccumulate(2,2)
+    MulAccumulate(1,3)
+    MulStoreDigit(4)
+    MulShiftCarry
+
+    MulAccumulate(3,2)
+    MulAccumulate(2,3)
+    MulStoreDigit(5)
+    MulShiftCarry
+
+    MulLastDiagonal(4)
+    MulEpilogue
+}
+
+TAOCRYPT_NAKED void PentiumOptimized::Multiply8(word* Z, const word* X,
+                                                const word* Y)
+{
+    MulPrologue
+    // now: [esp] = Z, esi = X, ecx = Y
+    MulStartup
+    MulAccumulate(0,0)
+    MulStoreDigit(0)
+    MulShiftCarry
+
+    MulAccumulate(1,0)
+    MulAccumulate(0,1)
+    MulStoreDigit(1)
+    MulShiftCarry
+
+    MulAccumulate(2,0)
+    MulAccumulate(1,1)
+    MulAccumulate(0,2)
+    MulStoreDigit(2)
+    MulShiftCarry
+
+    MulAccumulate(3,0)
+    MulAccumulate(2,1)
+    MulAccumulate(1,2)
+    MulAccumulate(0,3)
+    MulStoreDigit(3)
+    MulShiftCarry
+
+    MulAccumulate(4,0)
+    MulAccumulate(3,1)
+    MulAccumulate(2,2)
+    MulAccumulate(1,3)
+    MulAccumulate(0,4)
+    MulStoreDigit(4)
+    MulShiftCarry
+
+    MulAccumulate(5,0)
+    MulAccumulate(4,1)
+    MulAccumulate(3,2)
+    MulAccumulate(2,3)
+    MulAccumulate(1,4)
+    MulAccumulate(0,5)
+    MulStoreDigit(5)
+    MulShiftCarry
+
+    MulAccumulate(6,0)
+    MulAccumulate(5,1)
+    MulAccumulate(4,2)
+    MulAccumulate(3,3)
+    MulAccumulate(2,4)
+    MulAccumulate(1,5)
+    MulAccumulate(0,6)
+    MulStoreDigit(6)
+    MulShiftCarry
+
+    MulAccumulate(7,0)
+    MulAccumulate(6,1)
+    MulAccumulate(5,2)
+    MulAccumulate(4,3)
+    MulAccumulate(3,4)
+    MulAccumulate(2,5)
+    MulAccumulate(1,6)
+    MulAccumulate(0,7)
+    MulStoreDigit(7)
+    MulShiftCarry
+
+    MulAccumulate(7,1)
+    MulAccumulate(6,2)
+    MulAccumulate(5,3)
+    MulAccumulate(4,4)
+    MulAccumulate(3,5)
+    MulAccumulate(2,6)
+    MulAccumulate(1,7)
+    MulStoreDigit(8)
+    MulShiftCarry
+
+    MulAccumulate(7,2)
+    MulAccumulate(6,3)
+    MulAccumulate(5,4)
+    MulAccumulate(4,5)
+    MulAccumulate(3,6)
+    MulAccumulate(2,7)
+    MulStoreDigit(9)
+    MulShiftCarry
+
+    MulAccumulate(7,3)
+    MulAccumulate(6,4)
+    MulAccumulate(5,5)
+    MulAccumulate(4,6)
+    MulAccumulate(3,7)
+    MulStoreDigit(10)
+    MulShiftCarry
+
+    MulAccumulate(7,4)
+    MulAccumulate(6,5)
+    MulAccumulate(5,6)
+    MulAccumulate(4,7)
+    MulStoreDigit(11)
+    MulShiftCarry
+
+    MulAccumulate(7,5)
+    MulAccumulate(6,6)
+    MulAccumulate(5,7)
+    MulStoreDigit(12)
+    MulShiftCarry
+
+    MulAccumulate(7,6)
+    MulAccumulate(6,7)
+    MulStoreDigit(13)
+    MulShiftCarry
+
+    MulLastDiagonal(8)
+    MulEpilogue
+}
+
+TAOCRYPT_NAKED void PentiumOptimized::Multiply8Bottom(word* Z, const word* X,
+                                                      const word* Y)
+{
+    MulPrologue
+    // now: [esp] = Z, esi = X, ecx = Y
+    MulStartup
+    MulAccumulate(0,0)
+    MulStoreDigit(0)
+    MulShiftCarry
+
+    MulAccumulate(1,0)
+    MulAccumulate(0,1)
+    MulStoreDigit(1)
+    MulShiftCarry
+
+    MulAccumulate(2,0)
+    MulAccumulate(1,1)
+    MulAccumulate(0,2)
+    MulStoreDigit(2)
+    MulShiftCarry
+
+    MulAccumulate(3,0)
+    MulAccumulate(2,1)
+    MulAccumulate(1,2)
+    MulAccumulate(0,3)
+    MulStoreDigit(3)
+    MulShiftCarry
+
+    MulAccumulate(4,0)
+    MulAccumulate(3,1)
+    MulAccumulate(2,2)
+    MulAccumulate(1,3)
+    MulAccumulate(0,4)
+    MulStoreDigit(4)
+    MulShiftCarry
+
+    MulAccumulate(5,0)
+    MulAccumulate(4,1)
+    MulAccumulate(3,2)
+    MulAccumulate(2,3)
+    MulAccumulate(1,4)
+    MulAccumulate(0,5)
+    MulStoreDigit(5)
+    MulShiftCarry
+
+    MulAccumulate(6,0)
+    MulAccumulate(5,1)
+    MulAccumulate(4,2)
+    MulAccumulate(3,3)
+    MulAccumulate(2,4)
+    MulAccumulate(1,5)
+    MulAccumulate(0,6)
+    MulStoreDigit(6)
+    MulShiftCarry
+
+    MulAccumulateBottom(7,0)
+    MulAccumulateBottom(6,1)
+    MulAccumulateBottom(5,2)
+    MulAccumulateBottom(4,3)
+    MulAccumulateBottom(3,4)
+    MulAccumulateBottom(2,5)
+    MulAccumulateBottom(1,6)
+    MulAccumulateBottom(0,7)
+    MulStoreDigit(7)
+    MulEpilogue
+}
+
+#undef AS1
+#undef AS2
+
+#else	// not x86 - no processor specific code at this layer
+
+typedef Portable LowLevel;
+
+#endif
+
+#ifdef SSE2_INTRINSICS_AVAILABLE
+
+#ifdef __GNUC__
+#define TAOCRYPT_FASTCALL
+#else
+#define TAOCRYPT_FASTCALL __fastcall
+#endif
+
+static void TAOCRYPT_FASTCALL P4_Mul(__m128i *C, const __m128i *A,
+                                     const __m128i *B)
+{
+    __m128i a3210 = _mm_load_si128(A);
+    __m128i b3210 = _mm_load_si128(B);
+
+    __m128i sum;
+
+    __m128i z = _mm_setzero_si128();
+    __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210);
+    C[0] = a2b2_a0b0;
+
+    __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0));
+    __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1));
+    __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021);
+    __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z);
+    __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z);
+    C[1] = _mm_add_epi64(a1b0, a0b1);
+
+    __m128i a31 = _mm_srli_epi64(a3210, 32);
+    __m128i b31 = _mm_srli_epi64(b3210, 32);
+    __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31);
+    C[6] = a3b3_a1b1;
+
+    __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z);
+    __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2));
+    __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012);
+    __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z);
+    __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z);
+    sum = _mm_add_epi64(a1b1, a0b2);
+    C[2] = _mm_add_epi64(sum, a2b0);
+
+    __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1));
+    __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3));
+    __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012);
+    __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103);
+    __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z);
+    __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z);
+    __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z);
+    __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z);
+    __m128i sum1 = _mm_add_epi64(a3b0, a1b2);
+    sum = _mm_add_epi64(a2b1, a0b3);
+    C[3] = _mm_add_epi64(sum, sum1);
+
+    __m128i	a3b1_a1b3 = _mm_mul_epu32(a2301, b2103);
+    __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z);
+    __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z);
+    __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z);
+    sum = _mm_add_epi64(a2b2, a3b1);
+    C[4] = _mm_add_epi64(sum, a1b3);
+
+    __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2));
+    __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3));
+    __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203);
+    __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z);
+    __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z);
+    C[5] = _mm_add_epi64(a3b2, a2b3);
+}
+
+void P4Optimized::Multiply4(word *C, const word *A, const word *B)
+{
+    __m128i temp[7];
+    const word *w = (word *)temp;
+    const __m64 *mw = (__m64 *)w;
+
+    P4_Mul(temp, (__m128i *)A, (__m128i *)B);
+
+    C[0] = w[0];
+
+    __m64 s1, s2;
+
+    __m64 w1 = _mm_cvtsi32_si64(w[1]);
+    __m64 w4 = mw[2];
+    __m64 w6 = mw[3];
+    __m64 w8 = mw[4];
+    __m64 w10 = mw[5];
+    __m64 w12 = mw[6];
+    __m64 w14 = mw[7];
+    __m64 w16 = mw[8];
+    __m64 w18 = mw[9];
+    __m64 w20 = mw[10];
+    __m64 w22 = mw[11];
+    __m64 w26 = _mm_cvtsi32_si64(w[26]);
+
+    s1 = _mm_add_si64(w1, w4);
+    C[1] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s2 = _mm_add_si64(w6, w8);
+    s1 = _mm_add_si64(s1, s2);
+    C[2] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s2 = _mm_add_si64(w10, w12);
+    s1 = _mm_add_si64(s1, s2);
+    C[3] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s2 = _mm_add_si64(w14, w16);
+    s1 = _mm_add_si64(s1, s2);
+    C[4] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s2 = _mm_add_si64(w18, w20);
+    s1 = _mm_add_si64(s1, s2);
+    C[5] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s2 = _mm_add_si64(w22, w26);
+    s1 = _mm_add_si64(s1, s2);
+    C[6] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    C[7] = _mm_cvtsi64_si32(s1) + w[27];
+    _mm_empty();
+}
+
+void P4Optimized::Multiply8(word *C, const word *A, const word *B)
+{
+    __m128i temp[28];
+    const word *w = (word *)temp;
+    const __m64 *mw = (__m64 *)w;
+    const word *x = (word *)temp+7*4;
+    const __m64 *mx = (__m64 *)x;
+    const word *y = (word *)temp+7*4*2;
+    const __m64 *my = (__m64 *)y;
+    const word *z = (word *)temp+7*4*3;
+    const __m64 *mz = (__m64 *)z;
+
+    P4_Mul(temp, (__m128i *)A, (__m128i *)B);
+
+    P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
+
+    P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
+
+    P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1);
+
+    C[0] = w[0];
+
+    __m64 s1, s2, s3, s4;
+
+    __m64 w1 = _mm_cvtsi32_si64(w[1]);
+    __m64 w4 = mw[2];
+    __m64 w6 = mw[3];
+    __m64 w8 = mw[4];
+    __m64 w10 = mw[5];
+    __m64 w12 = mw[6];
+    __m64 w14 = mw[7];
+    __m64 w16 = mw[8];
+    __m64 w18 = mw[9];
+    __m64 w20 = mw[10];
+    __m64 w22 = mw[11];
+    __m64 w26 = _mm_cvtsi32_si64(w[26]);
+    __m64 w27 = _mm_cvtsi32_si64(w[27]);
+
+    __m64 x0 = _mm_cvtsi32_si64(x[0]);
+    __m64 x1 = _mm_cvtsi32_si64(x[1]);
+    __m64 x4 = mx[2];
+    __m64 x6 = mx[3];
+    __m64 x8 = mx[4];
+    __m64 x10 = mx[5];
+    __m64 x12 = mx[6];
+    __m64 x14 = mx[7];
+    __m64 x16 = mx[8];
+    __m64 x18 = mx[9];
+    __m64 x20 = mx[10];
+    __m64 x22 = mx[11];
+    __m64 x26 = _mm_cvtsi32_si64(x[26]);
+    __m64 x27 = _mm_cvtsi32_si64(x[27]);
+
+    __m64 y0 = _mm_cvtsi32_si64(y[0]);
+    __m64 y1 = _mm_cvtsi32_si64(y[1]);
+    __m64 y4 = my[2];
+    __m64 y6 = my[3];
+    __m64 y8 = my[4];
+    __m64 y10 = my[5];
+    __m64 y12 = my[6];
+    __m64 y14 = my[7];
+    __m64 y16 = my[8];
+    __m64 y18 = my[9];
+    __m64 y20 = my[10];
+    __m64 y22 = my[11];
+    __m64 y26 = _mm_cvtsi32_si64(y[26]);
+    __m64 y27 = _mm_cvtsi32_si64(y[27]);
+
+    __m64 z0 = _mm_cvtsi32_si64(z[0]);
+    __m64 z1 = _mm_cvtsi32_si64(z[1]);
+    __m64 z4 = mz[2];
+    __m64 z6 = mz[3];
+    __m64 z8 = mz[4];
+    __m64 z10 = mz[5];
+    __m64 z12 = mz[6];
+    __m64 z14 = mz[7];
+    __m64 z16 = mz[8];
+    __m64 z18 = mz[9];
+    __m64 z20 = mz[10];
+    __m64 z22 = mz[11];
+    __m64 z26 = _mm_cvtsi32_si64(z[26]);
+
+    s1 = _mm_add_si64(w1, w4);
+    C[1] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s2 = _mm_add_si64(w6, w8);
+    s1 = _mm_add_si64(s1, s2);
+    C[2] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s2 = _mm_add_si64(w10, w12);
+    s1 = _mm_add_si64(s1, s2);
+    C[3] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(x0, y0);
+    s2 = _mm_add_si64(w14, w16);
+    s1 = _mm_add_si64(s1, s3);
+    s1 = _mm_add_si64(s1, s2);
+    C[4] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(x1, y1);
+    s4 = _mm_add_si64(x4, y4);
+    s1 = _mm_add_si64(s1, w18);
+    s3 = _mm_add_si64(s3, s4);
+    s1 = _mm_add_si64(s1, w20);
+    s1 = _mm_add_si64(s1, s3);
+    C[5] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(x6, y6);
+    s4 = _mm_add_si64(x8, y8);
+    s1 = _mm_add_si64(s1, w22);
+    s3 = _mm_add_si64(s3, s4);
+    s1 = _mm_add_si64(s1, w26);
+    s1 = _mm_add_si64(s1, s3);
+    C[6] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(x10, y10);
+    s4 = _mm_add_si64(x12, y12);
+    s1 = _mm_add_si64(s1, w27);
+    s3 = _mm_add_si64(s3, s4);
+    s1 = _mm_add_si64(s1, s3);
+    C[7] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(x14, y14);
+    s4 = _mm_add_si64(x16, y16);
+    s1 = _mm_add_si64(s1, z0);
+    s3 = _mm_add_si64(s3, s4);
+    s1 = _mm_add_si64(s1, s3);
+    C[8] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(x18, y18);
+    s4 = _mm_add_si64(x20, y20);
+    s1 = _mm_add_si64(s1, z1);
+    s3 = _mm_add_si64(s3, s4);
+    s1 = _mm_add_si64(s1, z4);
+    s1 = _mm_add_si64(s1, s3);
+    C[9] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(x22, y22);
+    s4 = _mm_add_si64(x26, y26);
+    s1 = _mm_add_si64(s1, z6);
+    s3 = _mm_add_si64(s3, s4);
+    s1 = _mm_add_si64(s1, z8);
+    s1 = _mm_add_si64(s1, s3);
+    C[10] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(x27, y27);
+    s1 = _mm_add_si64(s1, z10);
+    s1 = _mm_add_si64(s1, z12);
+    s1 = _mm_add_si64(s1, s3);
+    C[11] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(z14, z16);
+    s1 = _mm_add_si64(s1, s3);
+    C[12] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(z18, z20);
+    s1 = _mm_add_si64(s1, s3);
+    C[13] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(z22, z26);
+    s1 = _mm_add_si64(s1, s3);
+    C[14] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    C[15] = z[27] + _mm_cvtsi64_si32(s1);
+    _mm_empty();
+}
+
+void P4Optimized::Multiply8Bottom(word *C, const word *A, const word *B)
+{
+    __m128i temp[21];
+    const word *w = (word *)temp;
+    const __m64 *mw = (__m64 *)w;
+    const word *x = (word *)temp+7*4;
+    const __m64 *mx = (__m64 *)x;
+    const word *y = (word *)temp+7*4*2;
+    const __m64 *my = (__m64 *)y;
+
+    P4_Mul(temp, (__m128i *)A, (__m128i *)B);
+
+    P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
+
+    P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
+
+    C[0] = w[0];
+
+    __m64 s1, s2, s3, s4;
+
+    __m64 w1 = _mm_cvtsi32_si64(w[1]);
+    __m64 w4 = mw[2];
+    __m64 w6 = mw[3];
+    __m64 w8 = mw[4];
+    __m64 w10 = mw[5];
+    __m64 w12 = mw[6];
+    __m64 w14 = mw[7];
+    __m64 w16 = mw[8];
+    __m64 w18 = mw[9];
+    __m64 w20 = mw[10];
+    __m64 w22 = mw[11];
+    __m64 w26 = _mm_cvtsi32_si64(w[26]);
+
+    __m64 x0 = _mm_cvtsi32_si64(x[0]);
+    __m64 x1 = _mm_cvtsi32_si64(x[1]);
+    __m64 x4 = mx[2];
+    __m64 x6 = mx[3];
+    __m64 x8 = mx[4];
+
+    __m64 y0 = _mm_cvtsi32_si64(y[0]);
+    __m64 y1 = _mm_cvtsi32_si64(y[1]);
+    __m64 y4 = my[2];
+    __m64 y6 = my[3];
+    __m64 y8 = my[4];
+
+    s1 = _mm_add_si64(w1, w4);
+    C[1] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s2 = _mm_add_si64(w6, w8);
+    s1 = _mm_add_si64(s1, s2);
+    C[2] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s2 = _mm_add_si64(w10, w12);
+    s1 = _mm_add_si64(s1, s2);
+    C[3] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(x0, y0);
+    s2 = _mm_add_si64(w14, w16);
+    s1 = _mm_add_si64(s1, s3);
+    s1 = _mm_add_si64(s1, s2);
+    C[4] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(x1, y1);
+    s4 = _mm_add_si64(x4, y4);
+    s1 = _mm_add_si64(s1, w18);
+    s3 = _mm_add_si64(s3, s4);
+    s1 = _mm_add_si64(s1, w20);
+    s1 = _mm_add_si64(s1, s3);
+    C[5] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    s3 = _mm_add_si64(x6, y6);
+    s4 = _mm_add_si64(x8, y8);
+    s1 = _mm_add_si64(s1, w22);
+    s3 = _mm_add_si64(s3, s4);
+    s1 = _mm_add_si64(s1, w26);
+    s1 = _mm_add_si64(s1, s3);
+    C[6] = _mm_cvtsi64_si32(s1);
+    s1 = _mm_srli_si64(s1, 32);
+
+    C[7] = _mm_cvtsi64_si32(s1) + w[27] + x[10] + y[10] + x[12] + y[12];
+    _mm_empty();
+}
+
+#endif	// #ifdef SSE2_INTRINSICS_AVAILABLE
+
+// end optimized
+
+// ********************************************************
+
+#define A0      A
+#define A1      (A+N2)
+#define B0      B
+#define B1      (B+N2)
+
+#define T0      T
+#define T1      (T+N2)
+#define T2      (T+N)
+#define T3      (T+N+N2)
+
+#define R0      R
+#define R1      (R+N2)
+#define R2      (R+N)
+#define R3      (R+N+N2)
+
+//VC60 workaround: compiler bug triggered without the extra dummy parameters
+
+// R[2*N] - result = A*B
+// T[2*N] - temporary work space
+// A[N] --- multiplier
+// B[N] --- multiplicant
+
+
+void RecursiveMultiply(word *R, word *T, const word *A, const word *B,
+                       unsigned int N)
+{
+    if (LowLevel::MultiplyRecursionLimit() >= 8 && N==8)
+        LowLevel::Multiply8(R, A, B);
+    else if (LowLevel::MultiplyRecursionLimit() >= 4 && N==4)
+        LowLevel::Multiply4(R, A, B);
+    else if (N==2)
+        LowLevel::Multiply2(R, A, B);
+    else
+    {
+        const unsigned int N2 = N/2;
+        int carry;
+
+        int aComp = Compare(A0, A1, N2);
+        int bComp = Compare(B0, B1, N2);
+
+        switch (2*aComp + aComp + bComp)
+        {
+        case -4:
+            LowLevel::Subtract(R0, A1, A0, N2);
+            LowLevel::Subtract(R1, B0, B1, N2);
+            RecursiveMultiply(T0, T2, R0, R1, N2);
+            LowLevel::Subtract(T1, T1, R0, N2);
+            carry = -1;
+            break;
+        case -2:
+            LowLevel::Subtract(R0, A1, A0, N2);
+            LowLevel::Subtract(R1, B0, B1, N2);
+            RecursiveMultiply(T0, T2, R0, R1, N2);
+            carry = 0;
+            break;
+        case 2:
+            LowLevel::Subtract(R0, A0, A1, N2);
+            LowLevel::Subtract(R1, B1, B0, N2);
+            RecursiveMultiply(T0, T2, R0, R1, N2);
+            carry = 0;
+            break;
+        case 4:
+            LowLevel::Subtract(R0, A1, A0, N2);
+            LowLevel::Subtract(R1, B0, B1, N2);
+            RecursiveMultiply(T0, T2, R0, R1, N2);
+            LowLevel::Subtract(T1, T1, R1, N2);
+            carry = -1;
+            break;
+        default:
+            SetWords(T0, 0, N);
+            carry = 0;
+        }
+
+        RecursiveMultiply(R0, T2, A0, B0, N2);
+        RecursiveMultiply(R2, T2, A1, B1, N2);
+
+        // now T[01] holds (A1-A0)*(B0-B1),R[01] holds A0*B0, R[23] holds A1*B1
+
+        carry += LowLevel::Add(T0, T0, R0, N);
+        carry += LowLevel::Add(T0, T0, R2, N);
+        carry += LowLevel::Add(R1, R1, T0, N);
+
+        Increment(R3, N2, carry);
+    }
+}
+
+
+void RecursiveSquare(word *R, word *T, const word *A, unsigned int N)                     
+{
+    if (LowLevel::SquareRecursionLimit() >= 4 && N==4)
+        LowLevel::Square4(R, A);
+    else if (N==2)
+        LowLevel::Square2(R, A);
+    else
+    {
+        const unsigned int N2 = N/2;
+
+        RecursiveSquare(R0, T2, A0, N2);
+        RecursiveSquare(R2, T2, A1, N2);
+        RecursiveMultiply(T0, T2, A0, A1, N2);
+
+        word carry = LowLevel::Add(R1, R1, T0, N);
+        carry += LowLevel::Add(R1, R1, T0, N);
+        Increment(R3, N2, carry);
+    }
+}
+
+
+// R[N] - bottom half of A*B
+// T[N] - temporary work space
+// A[N] - multiplier
+// B[N] - multiplicant
+
+
+void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B,
+                             unsigned int N)
+{
+    if (LowLevel::MultiplyBottomRecursionLimit() >= 8 && N==8)
+        LowLevel::Multiply8Bottom(R, A, B);
+    else if (LowLevel::MultiplyBottomRecursionLimit() >= 4 && N==4)
+        LowLevel::Multiply4Bottom(R, A, B);
+    else if (N==2)
+        LowLevel::Multiply2Bottom(R, A, B);
+    else
+    {
+        const unsigned int N2 = N/2;
+
+        RecursiveMultiply(R, T, A0, B0, N2);
+        RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
+        LowLevel::Add(R1, R1, T0, N2);
+        RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
+        LowLevel::Add(R1, R1, T0, N2);
+    }
+}
+
+
+void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A,
+                          const word *B, unsigned int N)
+{
+    if (N==4)
+    {
+        LowLevel::Multiply4(T, A, B);
+        memcpy(R, T+4, 4*WORD_SIZE);
+    }
+    else if (N==2)
+    {
+        LowLevel::Multiply2(T, A, B);
+        memcpy(R, T+2, 2*WORD_SIZE);
+    }
+    else
+    {
+        const unsigned int N2 = N/2;
+        int carry;
+
+        int aComp = Compare(A0, A1, N2);
+        int bComp = Compare(B0, B1, N2);
+
+        switch (2*aComp + aComp + bComp)
+        {
+        case -4:
+            LowLevel::Subtract(R0, A1, A0, N2);
+            LowLevel::Subtract(R1, B0, B1, N2);
+            RecursiveMultiply(T0, T2, R0, R1, N2);
+            LowLevel::Subtract(T1, T1, R0, N2);
+            carry = -1;
+            break;
+        case -2:
+            LowLevel::Subtract(R0, A1, A0, N2);
+            LowLevel::Subtract(R1, B0, B1, N2);
+            RecursiveMultiply(T0, T2, R0, R1, N2);
+            carry = 0;
+            break;
+        case 2:
+            LowLevel::Subtract(R0, A0, A1, N2);
+            LowLevel::Subtract(R1, B1, B0, N2);
+            RecursiveMultiply(T0, T2, R0, R1, N2);
+            carry = 0;
+            break;
+        case 4:
+            LowLevel::Subtract(R0, A1, A0, N2);
+            LowLevel::Subtract(R1, B0, B1, N2);
+            RecursiveMultiply(T0, T2, R0, R1, N2);
+            LowLevel::Subtract(T1, T1, R1, N2);
+            carry = -1;
+            break;
+        default:
+            SetWords(T0, 0, N);
+            carry = 0;
+        }
+
+        RecursiveMultiply(T2, R0, A1, B1, N2);
+
+        // now T[01] holds (A1-A0)*(B0-B1), T[23] holds A1*B1
+
+        word c2 = LowLevel::Subtract(R0, L+N2, L, N2);
+        c2 += LowLevel::Subtract(R0, R0, T0, N2);
+        word t = (Compare(R0, T2, N2) == -1);
+
+        carry += t;
+        carry += Increment(R0, N2, c2+t);
+        carry += LowLevel::Add(R0, R0, T1, N2);
+        carry += LowLevel::Add(R0, R0, T3, N2);
+
+        CopyWords(R1, T3, N2);
+        Increment(R1, N2, carry);
+    }
+}
+
+
+inline word Add(word *C, const word *A, const word *B, unsigned int N)
+{
+    return LowLevel::Add(C, A, B, N);
+}
+
+inline word Subtract(word *C, const word *A, const word *B, unsigned int N)
+{
+    return LowLevel::Subtract(C, A, B, N);
+}
+
+inline void Multiply(word *R, word *T, const word *A, const word *B,
+                     unsigned int N)
+{
+    RecursiveMultiply(R, T, A, B, N);
+}
+
+inline void Square(word *R, word *T, const word *A, unsigned int N)
+{
+    RecursiveSquare(R, T, A, N);
+}
+
+
+void AsymmetricMultiply(word *R, word *T, const word *A, unsigned int NA,
+                        const word *B, unsigned int NB)
+{
+    if (NA == NB)
+    {
+        if (A == B)
+            Square(R, T, A, NA);
+        else
+            Multiply(R, T, A, B, NA);
+
+        return;
+    }
+
+    if (NA > NB)
+    {
+        STL::swap(A, B);
+        STL::swap(NA, NB);
+    }
+
+    if (NA==2 && !A[1])
+    {
+        switch (A[0])
+        {
+        case 0:
+            SetWords(R, 0, NB+2);
+            return;
+        case 1:
+            CopyWords(R, B, NB);
+            R[NB] = R[NB+1] = 0;
+            return;
+        default:
+            R[NB] = LinearMultiply(R, B, A[0], NB);
+            R[NB+1] = 0;
+            return;
+        }
+    }
+
+    Multiply(R, T, A, B, NA);
+    CopyWords(T+2*NA, R+NA, NA);
+
+    unsigned i;
+
+    for (i=2*NA; i<NB; i+=2*NA)
+        Multiply(T+NA+i, T, A, B+i, NA);
+    for (i=NA; i<NB; i+=2*NA)
+        Multiply(R+i, T, A, B+i, NA);
+
+    if (Add(R+NA, R+NA, T+2*NA, NB-NA))
+        Increment(R+NB, NA);
+}
+
+
+void PositiveMultiply(Integer& product, const Integer& a, const Integer& b)
+{
+    unsigned int aSize = RoundupSize(a.WordCount());
+    unsigned int bSize = RoundupSize(b.WordCount());
+
+    product.reg_.CleanNew(RoundupSize(aSize + bSize));
+    product.sign_ = Integer::POSITIVE;
+
+    AlignedWordBlock workspace(aSize + bSize);
+    AsymmetricMultiply(product.reg_.get_buffer(), workspace.get_buffer(),
+                       a.reg_.get_buffer(), aSize, b.reg_.get_buffer(), bSize);
+}
+
+void Multiply(Integer &product, const Integer &a, const Integer &b)
+{
+    PositiveMultiply(product, a, b);
+
+    if (a.NotNegative() != b.NotNegative())
+        product.Negate();
+}
+
+
+static inline unsigned int EvenWordCount(const word *X, unsigned int N)
+{
+    while (N && X[N-2]==0 && X[N-1]==0)
+        N-=2;
+    return N;
+}
+
+
+unsigned int AlmostInverse(word *R, word *T, const word *A, unsigned int NA,
+                           const word *M, unsigned int N)
+{
+    word *b = T;
+    word *c = T+N;
+    word *f = T+2*N;
+    word *g = T+3*N;
+    unsigned int bcLen=2, fgLen=EvenWordCount(M, N);
+    unsigned int k=0, s=0;
+
+    SetWords(T, 0, 3*N);
+    b[0]=1;
+    CopyWords(f, A, NA);
+    CopyWords(g, M, N);
+
+    while (1)
+    {
+        word t=f[0];
+        while (!t)
+        {
+            if (EvenWordCount(f, fgLen)==0)
+            {
+                SetWords(R, 0, N);
+                return 0;
+            }
+
+            ShiftWordsRightByWords(f, fgLen, 1);
+            if (c[bcLen-1]) bcLen+=2;
+            ShiftWordsLeftByWords(c, bcLen, 1);
+            k+=WORD_BITS;
+            t=f[0];
+        }
+
+        unsigned int i=0;
+        while (t%2 == 0)
+        {
+            t>>=1;
+            i++;
+        }
+        k+=i;
+
+        if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
+        {
+            if (s%2==0)
+                CopyWords(R, b, N);
+            else
+                Subtract(R, M, b, N);
+            return k;
+        }
+
+        ShiftWordsRightByBits(f, fgLen, i);
+        t=ShiftWordsLeftByBits(c, bcLen, i);
+        if (t)
+        {
+            c[bcLen] = t;
+            bcLen+=2;
+        }
+
+        if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
+            fgLen-=2;
+
+        if (Compare(f, g, fgLen)==-1)
+        {
+            STL::swap(f, g);
+            STL::swap(b, c);
+            s++;
+        }
+
+        Subtract(f, f, g, fgLen);
+
+        if (Add(b, b, c, bcLen))
+        {
+            b[bcLen] = 1;
+            bcLen+=2;
+        }
+    }
+}
+
+// R[N] - result = A/(2^k) mod M
+// A[N] - input
+// M[N] - modulus
+
+void DivideByPower2Mod(word *R, const word *A, unsigned int k, const word *M,
+                       unsigned int N)
+{
+    CopyWords(R, A, N);
+
+    while (k--)
+    {
+        if (R[0]%2==0)
+            ShiftWordsRightByBits(R, N, 1);
+        else
+        {
+            word carry = Add(R, R, M, N);
+            ShiftWordsRightByBits(R, N, 1);
+            R[N-1] += carry<<(WORD_BITS-1);
+        }
+    }
+}
+
+// R[N] - result = A*(2^k) mod M
+// A[N] - input
+// M[N] - modulus
+
+void MultiplyByPower2Mod(word *R, const word *A, unsigned int k, const word *M,
+                         unsigned int N)
+{
+    CopyWords(R, A, N);
+
+    while (k--)
+        if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
+            Subtract(R, R, M, N);
+}
+
+
+// ********** end of integer needs
+
+
+Integer::Integer()
+    : reg_(2), sign_(POSITIVE)
+{
+    reg_[0] = reg_[1] = 0;
+}
+
+
+Integer::Integer(const Integer& t)
+    : reg_(RoundupSize(t.WordCount())), sign_(t.sign_)
+{
+    CopyWords(reg_.get_buffer(), t.reg_.get_buffer(), reg_.size());
+}
+
+
+Integer::Integer(signed long value)
+    : reg_(2)
+{
+    if (value >= 0)
+        sign_ = POSITIVE;
+    else
+    {
+        sign_ = NEGATIVE;
+        value = -value;
+    }
+    reg_[0] = word(value);
+    reg_[1] = word(SafeRightShift<WORD_BITS, unsigned long>(value));
+}
+
+
+Integer::Integer(Sign s, word high, word low)
+    : reg_(2), sign_(s)
+{
+    reg_[0] = low;
+    reg_[1] = high;
+}
+
+
+Integer::Integer(word value, unsigned int length)
+    : reg_(RoundupSize(length)), sign_(POSITIVE)
+{
+    reg_[0] = value;
+    SetWords(reg_ + 1, 0, reg_.size() - 1);
+}
+
+
+Integer::Integer(const byte *encodedInteger, unsigned int byteCount,
+                 Signedness s)
+{
+    Decode(encodedInteger, byteCount, s);
+}
+
+class BadBER {};
+
+// BER Decode Source
+Integer::Integer(Source& source)
+    : reg_(2), sign_(POSITIVE)
+{
+    Decode(source);
+}
+
+void Integer::Decode(Source& source)
+{
+    byte b = source.next();
+    if (b != INTEGER) {
+        source.SetError(INTEGER_E);
+        return;
+    }
+
+    word32 length = GetLength(source);
+    if (length == 0 || source.GetError().What()) return;
+
+    if ( (b = source.next()) == 0x00)
+        length--;
+    else
+        source.prev();
+
+    if (source.IsLeft(length) == false) return;
+ 
+    unsigned int words = (length + WORD_SIZE - 1) / WORD_SIZE;
+    words = RoundupSize(words);
+    if (words > reg_.size()) reg_.CleanNew(words);
+
+    for (int j = length; j > 0; j--) {
+        b = source.next();
+        reg_ [(j-1) / WORD_SIZE] |= (word)b << ((j-1) % WORD_SIZE) * 8;
+    }
+}
+
+
+void Integer::Decode(const byte* input, unsigned int inputLen, Signedness s)
+{
+    unsigned int idx(0);
+    byte b = 0; 
+    if (inputLen>0)
+        b = input[idx];   // peek
+    sign_  = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
+
+    while (inputLen>0 && (sign_==POSITIVE ? b==0 : b==0xff))
+    {
+        idx++;   // skip
+        if (--inputLen>0)
+            b = input[idx];  // peek
+    }
+
+    reg_.CleanNew(RoundupSize(BytesToWords(inputLen)));
+
+    for (unsigned int i=inputLen; i > 0; i--)
+    {
+        b = input[idx++];
+        reg_[(i-1)/WORD_SIZE] |= (word)b << ((i-1)%WORD_SIZE)*8;
+    }
+
+    if (sign_ == NEGATIVE)
+    {
+        for (unsigned i=inputLen; i<reg_.size()*WORD_SIZE; i++)
+            reg_[i/WORD_SIZE] |= (word)0xff << (i%WORD_SIZE)*8;
+        TwosComplement(reg_.get_buffer(), reg_.size());
+    }
+}
+
+
+unsigned int Integer::Encode(byte* output, unsigned int outputLen,
+                       Signedness signedness) const
+{
+    unsigned int idx(0);
+    if (signedness == UNSIGNED || NotNegative())
+    {
+        for (unsigned int i=outputLen; i > 0; i--)
+            output[idx++] = GetByte(i-1);
+    }
+    else
+    {
+        // take two's complement of *this
+        Integer temp = Integer::Power2(8*max(ByteCount(), outputLen)) + *this;
+        for (unsigned i=0; i<outputLen; i++)
+            output[idx++] = temp.GetByte(outputLen-i-1);
+    }
+    return outputLen;
+}
+
+
+static Integer* zero = 0;
+
+const Integer &Integer::Zero()
+{
+    if (!zero)
+        zero = NEW_TC Integer;
+    return *zero;
+}
+
+
+static Integer* one = 0;
+
+const Integer &Integer::One()
+{
+    if (!one)
+        one = NEW_TC Integer(1,2);
+    return *one;
+}
+
+
+// Clean up static singleton holders, not a leak, but helpful to have gone
+// when checking for leaks
+void CleanUp()
+{
+    tcDelete(one);
+    tcDelete(zero);
+
+    // In case user calls more than once, prevent seg fault
+    one  = 0;
+    zero = 0;
+}
+
+Integer::Integer(RandomNumberGenerator& rng, const Integer& min,
+                 const Integer& max)
+{
+    Randomize(rng, min, max);
+}
+
+
+void Integer::Randomize(RandomNumberGenerator& rng, unsigned int nbits)
+{
+    const unsigned int nbytes = nbits/8 + 1;
+    ByteBlock buf(nbytes);
+    rng.GenerateBlock(buf.get_buffer(), nbytes);
+    if (nbytes)
+        buf[0] = (byte)Crop(buf[0], nbits % 8);
+    Decode(buf.get_buffer(), nbytes, UNSIGNED);
+}
+
+void Integer::Randomize(RandomNumberGenerator& rng, const Integer& min,
+                        const Integer& max)
+{
+    Integer range = max - min;
+    const unsigned int nbits = range.BitCount();
+
+    do
+    {
+        Randomize(rng, nbits);
+    }
+    while (*this > range);
+
+    *this += min;
+}
+
+
+Integer Integer::Power2(unsigned int e)
+{
+    Integer r((word)0, BitsToWords(e + 1));
+    r.SetBit(e);
+    return r;
+}
+
+
+void Integer::SetBit(unsigned int n, bool value)
+{
+    if (value)
+    {
+        reg_.CleanGrow(RoundupSize(BitsToWords(n + 1)));
+        reg_[n / WORD_BITS] |= (word(1) << (n % WORD_BITS));
+    }
+    else
+    {
+        if (n / WORD_BITS < reg_.size())
+            reg_[n / WORD_BITS] &= ~(word(1) << (n % WORD_BITS));
+    }
+}
+
+
+void Integer::SetByte(unsigned int n, byte value)
+{
+    reg_.CleanGrow(RoundupSize(BytesToWords(n+1)));
+    reg_[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
+    reg_[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
+}
+
+
+void Integer::Negate()
+{
+    if (!!(*this))	// don't flip sign if *this==0
+        sign_ = Sign(1 - sign_);
+}
+
+
+bool Integer::operator!() const
+{
+    return IsNegative() ? false : (reg_[0]==0 && WordCount()==0);
+}
+
+
+Integer& Integer::operator=(const Integer& t)
+{
+    if (this != &t)
+    {
+        reg_.New(RoundupSize(t.WordCount()));
+        CopyWords(reg_.get_buffer(), t.reg_.get_buffer(), reg_.size());
+        sign_ = t.sign_;
+    }
+    return *this;
+}
+
+
+Integer& Integer::operator+=(const Integer& t)
+{
+    reg_.CleanGrow(t.reg_.size());
+    if (NotNegative())
+    {
+        if (t.NotNegative())
+            PositiveAdd(*this, *this, t);
+        else
+            PositiveSubtract(*this, *this, t);
+    }
+    else
+    {
+        if (t.NotNegative())
+            PositiveSubtract(*this, t, *this);
+        else
+        {
+            PositiveAdd(*this, *this, t);
+            sign_ = Integer::NEGATIVE;
+        }
+    }
+    return *this;
+}
+
+
+Integer Integer::operator-() const
+{
+    Integer result(*this);
+    result.Negate();
+    return result;
+}
+
+
+Integer& Integer::operator-=(const Integer& t)
+{
+    reg_.CleanGrow(t.reg_.size());
+    if (NotNegative())
+    {
+        if (t.NotNegative())
+            PositiveSubtract(*this, *this, t);
+        else
+            PositiveAdd(*this, *this, t);
+    }
+    else
+    {
+        if (t.NotNegative())
+        {
+            PositiveAdd(*this, *this, t);
+            sign_ = Integer::NEGATIVE;
+        }
+        else
+            PositiveSubtract(*this, t, *this);
+    }
+    return *this;
+}
+
+
+Integer& Integer::operator++()
+{
+    if (NotNegative())
+    {
+        if (Increment(reg_.get_buffer(), reg_.size()))
+        {
+            reg_.CleanGrow(2*reg_.size());
+            reg_[reg_.size()/2]=1;
+        }
+    }
+    else
+    {
+        word borrow = Decrement(reg_.get_buffer(), reg_.size());
+        (void)borrow;           // shut up compiler
+        if (WordCount()==0)
+            *this = Zero();
+    }
+    return *this;
+}
+
+Integer& Integer::operator--()
+{
+    if (IsNegative())
+    {
+        if (Increment(reg_.get_buffer(), reg_.size()))
+        {
+            reg_.CleanGrow(2*reg_.size());
+            reg_[reg_.size()/2]=1;
+        }
+    }
+    else
+    {
+        if (Decrement(reg_.get_buffer(), reg_.size()))
+            *this = -One();
+    }
+    return *this;
+}
+
+
+Integer& Integer::operator<<=(unsigned int n)
+{
+    const unsigned int wordCount = WordCount();
+    const unsigned int shiftWords = n / WORD_BITS;
+    const unsigned int shiftBits = n % WORD_BITS;
+
+    reg_.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
+    ShiftWordsLeftByWords(reg_.get_buffer(), wordCount + shiftWords,
+                          shiftWords);
+    ShiftWordsLeftByBits(reg_+shiftWords, wordCount+BitsToWords(shiftBits),
+                         shiftBits);
+    return *this;
+}
+
+Integer& Integer::operator>>=(unsigned int n)
+{
+    const unsigned int wordCount = WordCount();
+    const unsigned int shiftWords = n / WORD_BITS;
+    const unsigned int shiftBits = n % WORD_BITS;
+
+    ShiftWordsRightByWords(reg_.get_buffer(), wordCount, shiftWords);
+    if (wordCount > shiftWords)
+        ShiftWordsRightByBits(reg_.get_buffer(), wordCount-shiftWords,
+                              shiftBits);
+    if (IsNegative() && WordCount()==0)   // avoid -0
+        *this = Zero();
+    return *this;
+}
+
+
+void PositiveAdd(Integer& sum, const Integer& a, const Integer& b)
+{
+    word carry;
+    if (a.reg_.size() == b.reg_.size())
+        carry = Add(sum.reg_.get_buffer(), a.reg_.get_buffer(),
+                    b.reg_.get_buffer(), a.reg_.size());
+    else if (a.reg_.size() > b.reg_.size())
+    {
+        carry = Add(sum.reg_.get_buffer(), a.reg_.get_buffer(),
+                    b.reg_.get_buffer(), b.reg_.size());
+        CopyWords(sum.reg_+b.reg_.size(), a.reg_+b.reg_.size(),
+                  a.reg_.size()-b.reg_.size());
+        carry = Increment(sum.reg_+b.reg_.size(), a.reg_.size()-b.reg_.size(),
+                          carry);
+    }
+    else
+    {
+        carry = Add(sum.reg_.get_buffer(), a.reg_.get_buffer(),
+                    b.reg_.get_buffer(), a.reg_.size());
+        CopyWords(sum.reg_+a.reg_.size(), b.reg_+a.reg_.size(),
+                  b.reg_.size()-a.reg_.size());
+        carry = Increment(sum.reg_+a.reg_.size(), b.reg_.size()-a.reg_.size(),
+                          carry);
+    }
+
+    if (carry)
+    {
+        sum.reg_.CleanGrow(2*sum.reg_.size());
+        sum.reg_[sum.reg_.size()/2] = 1;
+    }
+    sum.sign_ = Integer::POSITIVE;
+}
+
+void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
+{
+    unsigned aSize = a.WordCount();
+    aSize += aSize%2;
+    unsigned bSize = b.WordCount();
+    bSize += bSize%2;
+
+    if (aSize == bSize)
+    {
+        if (Compare(a.reg_.get_buffer(), b.reg_.get_buffer(), aSize) >= 0)
+        {
+            Subtract(diff.reg_.get_buffer(), a.reg_.get_buffer(),
+                     b.reg_.get_buffer(), aSize);
+            diff.sign_ = Integer::POSITIVE;
+        }
+        else
+        {
+            Subtract(diff.reg_.get_buffer(), b.reg_.get_buffer(),
+                     a.reg_.get_buffer(), aSize);
+            diff.sign_ = Integer::NEGATIVE;
+        }
+    }
+    else if (aSize > bSize)
+    {
+        word borrow = Subtract(diff.reg_.get_buffer(), a.reg_.get_buffer(),
+                               b.reg_.get_buffer(), bSize);
+        CopyWords(diff.reg_+bSize, a.reg_+bSize, aSize-bSize);
+        borrow = Decrement(diff.reg_+bSize, aSize-bSize, borrow);
+        diff.sign_ = Integer::POSITIVE;
+    }
+    else
+    {
+        word borrow = Subtract(diff.reg_.get_buffer(), b.reg_.get_buffer(),
+                               a.reg_.get_buffer(), aSize);
+        CopyWords(diff.reg_+aSize, b.reg_+aSize, bSize-aSize);
+        borrow = Decrement(diff.reg_+aSize, bSize-aSize, borrow);
+        diff.sign_ = Integer::NEGATIVE;
+    }
+}
+
+
+unsigned int Integer::MinEncodedSize(Signedness signedness) const
+{
+    unsigned int outputLen = max(1U, ByteCount());
+    if (signedness == UNSIGNED)
+        return outputLen;
+    if (NotNegative() && (GetByte(outputLen-1) & 0x80))
+        outputLen++;
+    if (IsNegative() && *this < -Power2(outputLen*8-1))
+        outputLen++;
+    return outputLen;
+}
+
+
+int Integer::Compare(const Integer& t) const
+{
+    if (NotNegative())
+    {
+        if (t.NotNegative())
+            return PositiveCompare(t);
+        else
+            return 1;
+    }
+    else
+    {
+        if (t.NotNegative())
+            return -1;
+        else
+            return -PositiveCompare(t);
+    }
+}
+
+
+int Integer::PositiveCompare(const Integer& t) const
+{
+    unsigned size = WordCount(), tSize = t.WordCount();
+
+    if (size == tSize)
+        return TaoCrypt::Compare(reg_.get_buffer(), t.reg_.get_buffer(), size);
+    else
+        return size > tSize ? 1 : -1;
+}
+
+
+bool Integer::GetBit(unsigned int n) const
+{
+    if (n/WORD_BITS >= reg_.size())
+        return 0;
+    else
+        return bool((reg_[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
+}
+
+
+unsigned long Integer::GetBits(unsigned int i, unsigned int n) const
+{
+    unsigned long v = 0;
+    for (unsigned int j=0; j<n; j++)
+        v |= GetBit(i+j) << j;
+    return v;
+}
+
+
+byte Integer::GetByte(unsigned int n) const
+{
+    if (n/WORD_SIZE >= reg_.size())
+        return 0;
+    else
+        return byte(reg_[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
+}
+
+
+unsigned int Integer::BitCount() const
+{
+    unsigned wordCount = WordCount();
+    if (wordCount)
+        return (wordCount-1)*WORD_BITS + BitPrecision(reg_[wordCount-1]);
+    else
+        return 0;
+}
+
+
+unsigned int Integer::ByteCount() const
+{
+    unsigned wordCount = WordCount();
+    if (wordCount)
+        return (wordCount-1)*WORD_SIZE + BytePrecision(reg_[wordCount-1]);
+    else
+        return 0;
+}
+
+
+unsigned int Integer::WordCount() const
+{
+    return CountWords(reg_.get_buffer(), reg_.size());
+}
+
+
+bool Integer::IsConvertableToLong() const
+{
+    if (ByteCount() > sizeof(long))
+        return false;
+
+    unsigned long value = reg_[0];
+    value += SafeLeftShift<WORD_BITS, unsigned long>(reg_[1]);
+
+    if (sign_ == POSITIVE)
+        return (signed long)value >= 0;
+    else
+        return -(signed long)value < 0;
+}
+
+
+signed long Integer::ConvertToLong() const
+{
+    unsigned long value = reg_[0];
+    value += SafeLeftShift<WORD_BITS, unsigned long>(reg_[1]);
+    return sign_ == POSITIVE ? value : -(signed long)value;
+}
+
+
+void Integer::Swap(Integer& a)
+{
+    reg_.Swap(a.reg_);
+    STL::swap(sign_, a.sign_);
+}
+
+
+Integer Integer::Plus(const Integer& b) const
+{
+    Integer sum((word)0, max(reg_.size(), b.reg_.size()));
+    if (NotNegative())
+    {
+        if (b.NotNegative())
+            PositiveAdd(sum, *this, b);
+        else
+            PositiveSubtract(sum, *this, b);
+    }
+    else
+    {
+        if (b.NotNegative())
+            PositiveSubtract(sum, b, *this);
+        else
+        {
+            PositiveAdd(sum, *this, b);
+            sum.sign_ = Integer::NEGATIVE;
+        }
+    }
+    return sum;
+}
+
+
+Integer Integer::Minus(const Integer& b) const
+{
+    Integer diff((word)0, max(reg_.size(), b.reg_.size()));
+    if (NotNegative())
+    {
+        if (b.NotNegative())
+            PositiveSubtract(diff, *this, b);
+        else
+            PositiveAdd(diff, *this, b);
+    }
+    else
+    {
+        if (b.NotNegative())
+        {
+            PositiveAdd(diff, *this, b);
+            diff.sign_ = Integer::NEGATIVE;
+        }
+        else
+            PositiveSubtract(diff, b, *this);
+    }
+    return diff;
+}
+
+
+Integer Integer::Times(const Integer &b) const
+{
+    Integer product;
+    Multiply(product, *this, b);
+    return product;
+}
+
+
+#undef A0
+#undef A1
+#undef B0
+#undef B1
+
+#undef T0
+#undef T1
+#undef T2
+#undef T3
+
+#undef R0
+#undef R1
+#undef R2
+#undef R3
+
+
+static inline void AtomicDivide(word *Q, const word *A, const word *B)
+{
+    word T[4];
+    DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]),
+                                         DWord(A[2], A[3]), DWord(B[0], B[1]));
+    Q[0] = q.GetLowHalf();
+    Q[1] = q.GetHighHalf();
+
+#ifndef NDEBUG
+    if (B[0] || B[1])
+    {
+        // multiply quotient and divisor and add remainder, make sure it 
+        // equals dividend
+        word P[4];
+        Portable::Multiply2(P, Q, B);
+        Add(P, P, T, 4);
+    }
+#endif
+}
+
+
+// for use by Divide(), corrects the underestimated quotient {Q1,Q0}
+static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B,
+                                    unsigned int N)
+{
+    if (Q[1])
+    {
+        T[N] = T[N+1] = 0;
+        unsigned i;
+        for (i=0; i<N; i+=4)
+            LowLevel::Multiply2(T+i, Q, B+i);
+        for (i=2; i<N; i+=4)
+            if (LowLevel::Multiply2Add(T+i, Q, B+i))
+                T[i+5] += (++T[i+4]==0);
+    }
+    else
+    {
+        T[N] = LinearMultiply(T, B, Q[0], N);
+        T[N+1] = 0;
+    }
+
+    word borrow = Subtract(R, R, T, N+2);
+    (void)borrow;       // shut up compiler
+
+    while (R[N] || Compare(R, B, N) >= 0)
+    {
+        R[N] -= Subtract(R, R, B, N);
+        Q[1] += (++Q[0]==0);
+    }
+}
+
+// R[NB] -------- remainder = A%B
+// Q[NA-NB+2] --- quotient	= A/B
+// T[NA+2*NB+4] - temp work space
+// A[NA] -------- dividend
+// B[NB] -------- divisor
+
+
+void Divide(word* R, word* Q, word* T, const word* A, unsigned int NA,
+            const word* B, unsigned int NB)
+{
+    // set up temporary work space
+    word *const TA=T;
+    word *const TB=T+NA+2;
+    word *const TP=T+NA+2+NB;
+
+    // copy B into TB and normalize it so that TB has highest bit set to 1
+    unsigned shiftWords = (B[NB-1]==0);
+    TB[0] = TB[NB-1] = 0;
+    CopyWords(TB+shiftWords, B, NB-shiftWords);
+    unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
+    ShiftWordsLeftByBits(TB, NB, shiftBits);
+
+    // copy A into TA and normalize it
+    TA[0] = TA[NA] = TA[NA+1] = 0;
+    CopyWords(TA+shiftWords, A, NA);
+    ShiftWordsLeftByBits(TA, NA+2, shiftBits);
+
+    if (TA[NA+1]==0 && TA[NA] <= 1)
+    {
+        Q[NA-NB+1] = Q[NA-NB] = 0;
+        while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
+        {
+            TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
+            ++Q[NA-NB];
+        }
+    }
+    else
+    {
+        NA+=2;
+    }
+
+    word BT[2];
+    BT[0] = TB[NB-2] + 1;
+    BT[1] = TB[NB-1] + (BT[0]==0);
+
+    // start reducing TA mod TB, 2 words at a time
+    for (unsigned i=NA-2; i>=NB; i-=2)
+    {
+        AtomicDivide(Q+i-NB, TA+i-2, BT);
+        CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
+    }
+
+    // copy TA into R, and denormalize it
+    CopyWords(R, TA+shiftWords, NB);
+    ShiftWordsRightByBits(R, NB, shiftBits);
+}
+
+
+void PositiveDivide(Integer& remainder, Integer& quotient,
+                   const Integer& a, const Integer& b)
+{
+    unsigned aSize = a.WordCount();
+    unsigned bSize = b.WordCount();
+
+    if (a.PositiveCompare(b) == -1)
+    {
+        remainder = a;
+        remainder.sign_ = Integer::POSITIVE;
+        quotient = Integer::Zero();
+        return;
+    }
+
+    aSize += aSize%2;	// round up to next even number
+    bSize += bSize%2;
+
+    remainder.reg_.CleanNew(RoundupSize(bSize));
+    remainder.sign_ = Integer::POSITIVE;
+    quotient.reg_.CleanNew(RoundupSize(aSize-bSize+2));
+    quotient.sign_ = Integer::POSITIVE;
+
+    AlignedWordBlock T(aSize+2*bSize+4);
+    Divide(remainder.reg_.get_buffer(), quotient.reg_.get_buffer(),
+           T.get_buffer(), a.reg_.get_buffer(), aSize, b.reg_.get_buffer(),
+           bSize);
+}
+
+void Integer::Divide(Integer &remainder, Integer &quotient,
+                     const Integer &dividend, const Integer &divisor)
+{
+    PositiveDivide(remainder, quotient, dividend, divisor);
+
+    if (dividend.IsNegative())
+    {
+        quotient.Negate();
+        if (remainder.NotZero())
+        {
+            --quotient;
+            remainder = divisor.AbsoluteValue() - remainder;
+        }
+    }
+
+    if (divisor.IsNegative())
+        quotient.Negate();
+}
+
+void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a,
+                               unsigned int n)
+{
+    q = a;
+    q >>= n;
+
+    const unsigned int wordCount = BitsToWords(n);
+    if (wordCount <= a.WordCount())
+    {
+        r.reg_.resize(RoundupSize(wordCount));
+        CopyWords(r.reg_.get_buffer(), a.reg_.get_buffer(), wordCount);
+        SetWords(r.reg_+wordCount, 0, r.reg_.size()-wordCount);
+        if (n % WORD_BITS != 0)
+          r.reg_[wordCount-1] %= (word(1) << (n % WORD_BITS));
+    }
+    else
+    {
+        r.reg_.resize(RoundupSize(a.WordCount()));
+        CopyWords(r.reg_.get_buffer(), a.reg_.get_buffer(), r.reg_.size());
+    }
+    r.sign_ = POSITIVE;
+
+    if (a.IsNegative() && r.NotZero())
+    {
+        --q;
+        r = Power2(n) - r;
+    }
+}
+
+Integer Integer::DividedBy(const Integer &b) const
+{
+    Integer remainder, quotient;
+    Integer::Divide(remainder, quotient, *this, b);
+    return quotient;
+}
+
+Integer Integer::Modulo(const Integer &b) const
+{
+    Integer remainder, quotient;
+    Integer::Divide(remainder, quotient, *this, b);
+    return remainder;
+}
+
+void Integer::Divide(word &remainder, Integer &quotient,
+                     const Integer &dividend, word divisor)
+{
+    if ((divisor & (divisor-1)) == 0)	// divisor is a power of 2
+    {
+        quotient = dividend >> (BitPrecision(divisor)-1);
+        remainder = dividend.reg_[0] & (divisor-1);
+        return;
+    }
+
+    unsigned int i = dividend.WordCount();
+    quotient.reg_.CleanNew(RoundupSize(i));
+    remainder = 0;
+    while (i--)
+    {
+        quotient.reg_[i] = DWord(dividend.reg_[i], remainder) / divisor;
+        remainder = DWord(dividend.reg_[i], remainder) % divisor;
+    }
+
+    if (dividend.NotNegative())
+        quotient.sign_ = POSITIVE;
+    else
+    {
+        quotient.sign_ = NEGATIVE;
+        if (remainder)
+        {
+            --quotient;
+            remainder = divisor - remainder;
+        }
+    }
+}
+
+Integer Integer::DividedBy(word b) const
+{
+    word remainder;
+    Integer quotient;
+    Integer::Divide(remainder, quotient, *this, b);
+    return quotient;
+}
+
+word Integer::Modulo(word divisor) const
+{
+    word remainder;
+
+    if ((divisor & (divisor-1)) == 0)	// divisor is a power of 2
+        remainder = reg_[0] & (divisor-1);
+    else
+    {
+        unsigned int i = WordCount();
+
+        if (divisor <= 5)
+        {
+            DWord sum(0, 0);
+            while (i--)
+                sum += reg_[i];
+            remainder = sum % divisor;
+        }
+        else
+        {
+            remainder = 0;
+            while (i--)
+                remainder = DWord(reg_[i], remainder) % divisor;
+        }
+    }
+
+    if (IsNegative() && remainder)
+        remainder = divisor - remainder;
+
+    return remainder;
+}
+
+
+Integer Integer::AbsoluteValue() const
+{
+    Integer result(*this);
+    result.sign_ = POSITIVE;
+    return result;
+}
+
+
+Integer Integer::SquareRoot() const
+{
+    if (!IsPositive())
+        return Zero();
+
+    // overestimate square root
+    Integer x, y = Power2((BitCount()+1)/2);
+
+    do
+    {
+        x = y;
+        y = (x + *this/x) >> 1;
+    } while (y<x);
+
+    return x;
+}
+
+bool Integer::IsSquare() const
+{
+    Integer r = SquareRoot();
+    return *this == r.Squared();
+}
+
+bool Integer::IsUnit() const
+{
+    return (WordCount() == 1) && (reg_[0] == 1);
+}
+
+Integer Integer::MultiplicativeInverse() const
+{
+    return IsUnit() ? *this : Zero();
+}
+
+Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
+{
+    return x*y%m;
+}
+
+Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
+{
+    ModularArithmetic mr(m);
+    return mr.Exponentiate(x, e);
+}
+
+Integer Integer::Gcd(const Integer &a, const Integer &b)
+{
+    return EuclideanDomainOf().Gcd(a, b);
+}
+
+Integer Integer::InverseMod(const Integer &m) const
+{
+    if (IsNegative() || *this>=m)
+        return (*this%m).InverseMod(m);
+
+    if (m.IsEven())
+    {
+        if (!m || IsEven())
+            return Zero();	// no inverse
+        if (*this == One())
+            return One();
+
+        Integer u = m.InverseMod(*this);
+        return !u ? Zero() : (m*(*this-u)+1)/(*this);
+    }
+
+    AlignedWordBlock T(m.reg_.size() * 4);
+    Integer r((word)0, m.reg_.size());
+    unsigned k = AlmostInverse(r.reg_.get_buffer(), T.get_buffer(),
+                               reg_.get_buffer(), reg_.size(),
+                               m.reg_.get_buffer(), m.reg_.size());
+    DivideByPower2Mod(r.reg_.get_buffer(), r.reg_.get_buffer(), k,
+                      m.reg_.get_buffer(), m.reg_.size());
+    return r;
+}
+
+word Integer::InverseMod(const word mod) const
+{
+    word g0 = mod, g1 = *this % mod;
+    word v0 = 0, v1 = 1;
+    word y;
+
+    while (g1)
+    {
+        if (g1 == 1)
+            return v1;
+        y = g0 / g1;
+        g0 = g0 % g1;
+        v0 += y * v1;
+
+        if (!g0)
+            break;
+        if (g0 == 1)
+            return mod-v0;
+        y = g1 / g0;
+        g1 = g1 % g0;
+        v1 += y * v0;
+    }
+    return 0;
+}
+
+// ********* ModArith stuff
+
+const Integer& ModularArithmetic::Half(const Integer &a) const
+{
+    if (a.reg_.size()==modulus.reg_.size())
+    {
+        TaoCrypt::DivideByPower2Mod(result.reg_.begin(), a.reg_.begin(), 1,
+                                    modulus.reg_.begin(), a.reg_.size());
+        return result;
+    }
+    else
+        return result1 = (a.IsEven() ? (a >> 1) : ((a+modulus) >> 1));
+}
+
+const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
+{
+    if (a.reg_.size()==modulus.reg_.size() && 
+        b.reg_.size()==modulus.reg_.size())
+    {
+        if (TaoCrypt::Add(result.reg_.begin(), a.reg_.begin(), b.reg_.begin(),
+                          a.reg_.size())
+            || Compare(result.reg_.get_buffer(), modulus.reg_.get_buffer(),
+                       a.reg_.size()) >= 0)
+        {
+            TaoCrypt::Subtract(result.reg_.begin(), result.reg_.begin(),
+                               modulus.reg_.begin(), a.reg_.size());
+        }
+        return result;
+    }
+    else
+    {
+        result1 = a+b;
+        if (result1 >= modulus)
+            result1 -= modulus;
+        return result1;
+    }
+}
+
+Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
+{
+    if (a.reg_.size()==modulus.reg_.size() && 
+        b.reg_.size()==modulus.reg_.size())
+    {
+        if (TaoCrypt::Add(a.reg_.get_buffer(), a.reg_.get_buffer(),
+                          b.reg_.get_buffer(), a.reg_.size())
+            || Compare(a.reg_.get_buffer(), modulus.reg_.get_buffer(),
+                       a.reg_.size()) >= 0)
+        {
+            TaoCrypt::Subtract(a.reg_.get_buffer(), a.reg_.get_buffer(),
+                               modulus.reg_.get_buffer(), a.reg_.size());
+        }
+    }
+    else
+    {
+        a+=b;
+        if (a>=modulus)
+            a-=modulus;
+    }
+
+    return a;
+}
+
+const Integer& ModularArithmetic::Subtract(const Integer &a,
+                                           const Integer &b) const
+{
+    if (a.reg_.size()==modulus.reg_.size() && 
+        b.reg_.size()==modulus.reg_.size())
+    {
+        if (TaoCrypt::Subtract(result.reg_.begin(), a.reg_.begin(),
+                               b.reg_.begin(), a.reg_.size()))
+            TaoCrypt::Add(result.reg_.begin(), result.reg_.begin(),
+                          modulus.reg_.begin(), a.reg_.size());
+        return result;
+    }
+    else
+    {
+        result1 = a-b;
+        if (result1.IsNegative())
+            result1 += modulus;
+        return result1;
+    }
+}
+
+Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
+{
+    if (a.reg_.size()==modulus.reg_.size() && 
+        b.reg_.size()==modulus.reg_.size())
+    {
+        if (TaoCrypt::Subtract(a.reg_.get_buffer(), a.reg_.get_buffer(),
+                               b.reg_.get_buffer(), a.reg_.size()))
+            TaoCrypt::Add(a.reg_.get_buffer(), a.reg_.get_buffer(),
+                          modulus.reg_.get_buffer(), a.reg_.size());
+    }
+    else
+    {
+        a-=b;
+        if (a.IsNegative())
+            a+=modulus;
+    }
+
+    return a;
+}
+
+const Integer& ModularArithmetic::Inverse(const Integer &a) const
+{
+    if (!a)
+        return a;
+
+    CopyWords(result.reg_.begin(), modulus.reg_.begin(), modulus.reg_.size());
+    if (TaoCrypt::Subtract(result.reg_.begin(), result.reg_.begin(),
+                           a.reg_.begin(), a.reg_.size()))
+        Decrement(result.reg_.begin()+a.reg_.size(), 1,
+                  modulus.reg_.size()-a.reg_.size());
+
+    return result;
+}
+
+Integer ModularArithmetic::CascadeExponentiate(const Integer &x,
+                  const Integer &e1, const Integer &y, const Integer &e2) const
+{
+    if (modulus.IsOdd())
+    {
+        MontgomeryRepresentation dr(modulus);
+        return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1,
+                                                    dr.ConvertIn(y), e2));
+    }
+    else
+        return AbstractRing::CascadeExponentiate(x, e1, y, e2);
+}
+
+void ModularArithmetic::SimultaneousExponentiate(Integer *results,
+        const Integer &base, const Integer *exponents,
+        unsigned int exponentsCount) const
+{
+    if (modulus.IsOdd())
+    {
+        MontgomeryRepresentation dr(modulus);
+        dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents,
+                                    exponentsCount);
+        for (unsigned int i=0; i<exponentsCount; i++)
+            results[i] = dr.ConvertOut(results[i]);
+    }
+    else
+        AbstractRing::SimultaneousExponentiate(results, base,
+                                                    exponents, exponentsCount);
+}
+
+
+// ********************************************************
+
+#define A0      A
+#define A1      (A+N2)
+#define B0      B
+#define B1      (B+N2)
+
+#define T0      T
+#define T1      (T+N2)
+#define T2      (T+N)
+#define T3      (T+N+N2)
+
+#define R0      R
+#define R1      (R+N2)
+#define R2      (R+N)
+#define R3      (R+N+N2)
+
+
+inline void MultiplyBottom(word *R, word *T, const word *A, const word *B,
+                           unsigned int N)
+{
+    RecursiveMultiplyBottom(R, T, A, B, N);
+}
+
+inline void MultiplyTop(word *R, word *T, const word *L, const word *A,
+                        const word *B, unsigned int N)
+{
+    RecursiveMultiplyTop(R, T, L, A, B, N);
+}
+
+
+// R[N] --- result = X/(2**(WORD_BITS*N)) mod M
+// T[3*N] - temporary work space
+// X[2*N] - number to be reduced
+// M[N] --- modulus
+// U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
+
+void MontgomeryReduce(word *R, word *T, const word *X, const word *M,
+                      const word *U, unsigned int N)
+{
+    MultiplyBottom(R, T, X, U, N);
+    MultiplyTop(T, T+N, X, R, M, N);
+    word borrow = Subtract(T, X+N, T, N);
+    // defend against timing attack by doing this Add even when not needed
+    word carry = Add(T+N, T, M, N);
+    (void)carry;            // shut up compiler
+    CopyWords(R, T + (borrow ? N : 0), N);
+}
+
+// R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
+// T[3*N/2] - temporary work space
+// A[N] ----- an odd number as input
+
+void RecursiveInverseModPower2(word *R, word *T, const word *A, unsigned int N)
+{
+    if (N==2)
+    {
+        T[0] = AtomicInverseModPower2(A[0]);
+        T[1] = 0;
+        LowLevel::Multiply2Bottom(T+2, T, A);
+        TwosComplement(T+2, 2);
+        Increment(T+2, 2, 2);
+        LowLevel::Multiply2Bottom(R, T, T+2);
+    }
+    else
+    {
+        const unsigned int N2 = N/2;
+        RecursiveInverseModPower2(R0, T0, A0, N2);
+        T0[0] = 1;
+        SetWords(T0+1, 0, N2-1);
+        MultiplyTop(R1, T1, T0, R0, A0, N2);
+        MultiplyBottom(T0, T1, R0, A1, N2);
+        Add(T0, R1, T0, N2);
+        TwosComplement(T0, N2);
+        MultiplyBottom(R1, T1, R0, T0, N2);
+    }
+}
+
+
+#undef A0
+#undef A1
+#undef B0
+#undef B1
+
+#undef T0
+#undef T1
+#undef T2
+#undef T3
+
+#undef R0
+#undef R1
+#undef R2
+#undef R3
+
+
+// modulus must be odd
+MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
+    : ModularArithmetic(m),
+      u((word)0, modulus.reg_.size()),
+      workspace(5*modulus.reg_.size())
+{
+    RecursiveInverseModPower2(u.reg_.get_buffer(), workspace.get_buffer(),
+                              modulus.reg_.get_buffer(), modulus.reg_.size());
+}
+
+const Integer& MontgomeryRepresentation::Multiply(const Integer &a,
+                                                  const Integer &b) const
+{
+    word *const T = workspace.begin();
+    word *const R = result.reg_.begin();
+    const unsigned int N = modulus.reg_.size();
+
+    AsymmetricMultiply(T, T+2*N, a.reg_.get_buffer(), a.reg_.size(),
+                       b.reg_.get_buffer(), b.reg_.size());
+    SetWords(T+a.reg_.size()+b.reg_.size(),0, 2*N-a.reg_.size()-b.reg_.size());
+    MontgomeryReduce(R, T+2*N, T, modulus.reg_.get_buffer(),
+                     u.reg_.get_buffer(), N);
+    return result;
+}
+
+const Integer& MontgomeryRepresentation::Square(const Integer &a) const
+{
+    word *const T = workspace.begin();
+    word *const R = result.reg_.begin();
+    const unsigned int N = modulus.reg_.size();
+
+    TaoCrypt::Square(T, T+2*N, a.reg_.get_buffer(), a.reg_.size());
+    SetWords(T+2*a.reg_.size(), 0, 2*N-2*a.reg_.size());
+    MontgomeryReduce(R, T+2*N, T, modulus.reg_.get_buffer(),
+                     u.reg_.get_buffer(), N);
+    return result;
+}
+
+Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
+{
+    word *const T = workspace.begin();
+    word *const R = result.reg_.begin();
+    const unsigned int N = modulus.reg_.size();
+
+    CopyWords(T, a.reg_.get_buffer(), a.reg_.size());
+    SetWords(T+a.reg_.size(), 0, 2*N-a.reg_.size());
+    MontgomeryReduce(R, T+2*N, T, modulus.reg_.get_buffer(),
+                     u.reg_.get_buffer(), N);
+    return result;
+}
+
+const Integer& MontgomeryRepresentation::MultiplicativeInverse(
+                                                        const Integer &a) const
+{
+//  return (EuclideanMultiplicativeInverse(a, modulus)<<
+//      (2*WORD_BITS*modulus.reg_.size()))%modulus;
+    word *const T = workspace.begin();
+    word *const R = result.reg_.begin();
+    const unsigned int N = modulus.reg_.size();
+
+    CopyWords(T, a.reg_.get_buffer(), a.reg_.size());
+    SetWords(T+a.reg_.size(), 0, 2*N-a.reg_.size());
+    MontgomeryReduce(R, T+2*N, T, modulus.reg_.get_buffer(),
+                     u.reg_.get_buffer(), N);
+    unsigned k = AlmostInverse(R, T, R, N, modulus.reg_.get_buffer(), N);
+
+//  cout << "k=" << k << " N*32=" << 32*N << endl;
+
+    if (k>N*WORD_BITS)
+        DivideByPower2Mod(R, R, k-N*WORD_BITS, modulus.reg_.get_buffer(), N);
+    else
+        MultiplyByPower2Mod(R, R, N*WORD_BITS-k, modulus.reg_.get_buffer(), N);
+
+    return result;
+}
+
+
+//  mod Root stuff
+Integer ModularRoot(const Integer &a, const Integer &dp, const Integer &dq,
+                    const Integer &p, const Integer &q, const Integer &u)
+{
+    Integer p2 = ModularExponentiation((a % p), dp, p);
+    Integer q2 = ModularExponentiation((a % q), dq, q);
+    return CRT(p2, p, q2, q, u);
+}
+
+Integer CRT(const Integer &xp, const Integer &p, const Integer &xq,
+            const Integer &q, const Integer &u)
+{
+    // isn't operator overloading great?
+    return p * (u * (xq-xp) % q) + xp;
+}
+
+
+#ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION
+#ifndef TAOCRYPT_NATIVE_DWORD_AVAILABLE
+template hword DivideThreeWordsByTwo<hword, Word>(hword*, hword, hword, Word*);
+#endif
+template word DivideThreeWordsByTwo<word, DWord>(word*, word, word, DWord*);
+#ifdef SSE2_INTRINSICS_AVAILABLE
+template class AlignedAllocator<word>;
+#endif
+#endif
+
+
+} // namespace
+
diff --git a/mysql/extra/yassl/taocrypt/src/md2.cpp b/mysql/extra/yassl/taocrypt/src/md2.cpp
new file mode 100644
index 0000000..3dfc0d6
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/md2.cpp
@@ -0,0 +1,125 @@
+/*
+   Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+
+/* based on Wei Dai's md2.cpp from CryptoPP */
+
+#include "runtime.hpp"
+#include "md2.hpp"
+#include <string.h>
+
+namespace TaoCrypt {
+
+
+MD2::MD2()
+    : X_(X_SIZE), C_(BLOCK_SIZE), buffer_(BLOCK_SIZE)
+{
+    Init();
+}
+
+void MD2::Init()
+{
+    memset(X_.get_buffer(), 0, X_SIZE);
+    memset(C_.get_buffer(), 0, BLOCK_SIZE);
+    memset(buffer_.get_buffer(), 0, BLOCK_SIZE);
+    count_ = 0;
+}
+
+
+void MD2::Update(const byte* data, word32 len)
+{
+
+    static const byte S[256] = 
+    {
+        41, 46, 67, 201, 162, 216, 124, 1, 61, 54, 84, 161, 236, 240, 6,
+        19, 98, 167, 5, 243, 192, 199, 115, 140, 152, 147, 43, 217, 188,
+        76, 130, 202, 30, 155, 87, 60, 253, 212, 224, 22, 103, 66, 111, 24,
+        138, 23, 229, 18, 190, 78, 196, 214, 218, 158, 222, 73, 160, 251,
+        245, 142, 187, 47, 238, 122, 169, 104, 121, 145, 21, 178, 7, 63,
+        148, 194, 16, 137, 11, 34, 95, 33, 128, 127, 93, 154, 90, 144, 50,
+        39, 53, 62, 204, 231, 191, 247, 151, 3, 255, 25, 48, 179, 72, 165,
+        181, 209, 215, 94, 146, 42, 172, 86, 170, 198, 79, 184, 56, 210,
+        150, 164, 125, 182, 118, 252, 107, 226, 156, 116, 4, 241, 69, 157,
+        112, 89, 100, 113, 135, 32, 134, 91, 207, 101, 230, 45, 168, 2, 27,
+        96, 37, 173, 174, 176, 185, 246, 28, 70, 97, 105, 52, 64, 126, 15,
+        85, 71, 163, 35, 221, 81, 175, 58, 195, 92, 249, 206, 186, 197,
+        234, 38, 44, 83, 13, 110, 133, 40, 132, 9, 211, 223, 205, 244, 65,
+        129, 77, 82, 106, 220, 55, 200, 108, 193, 171, 250, 36, 225, 123,
+        8, 12, 189, 177, 74, 120, 136, 149, 139, 227, 99, 232, 109, 233,
+        203, 213, 254, 59, 0, 29, 57, 242, 239, 183, 14, 102, 88, 208, 228,
+        166, 119, 114, 248, 235, 117, 75, 10, 49, 68, 80, 180, 143, 237,
+        31, 26, 219, 153, 141, 51, 159, 17, 131, 20
+    };
+
+    while (len) {
+        word32 L = (PAD_SIZE - count_) < len ? (PAD_SIZE - count_) : len;
+        memcpy(buffer_.get_buffer() + count_, data, L);
+        count_ += L;
+        data += L;
+        len  -= L;
+
+        if (count_==PAD_SIZE) {
+            count_ = 0;
+            memcpy(X_.get_buffer() + PAD_SIZE, buffer_.get_buffer(), PAD_SIZE);
+            byte t = C_[15];
+
+            int i;
+            for(i = 0; i < PAD_SIZE; i++) {
+                X_[32 + i] = X_[PAD_SIZE + i] ^ X_[i];
+                t = C_[i] ^= S[buffer_[i] ^ t];
+            }
+
+            t=0;
+            for(i = 0; i < 18; i++) {
+                for(int j = 0; j < X_SIZE; j += 8) {
+                    t = X_[j+0] ^= S[t];
+                    t = X_[j+1] ^= S[t];
+                    t = X_[j+2] ^= S[t];
+                    t = X_[j+3] ^= S[t];
+                    t = X_[j+4] ^= S[t];
+                    t = X_[j+5] ^= S[t];
+                    t = X_[j+6] ^= S[t];
+                    t = X_[j+7] ^= S[t];
+                }
+                t = (t + i) & 0xFF;
+            }
+        }
+    }
+}
+
+
+void MD2::Final(byte *hash)
+{
+    byte   padding[BLOCK_SIZE];
+    word32 padLen = PAD_SIZE - count_;
+
+    for (word32 i = 0; i < padLen; i++)
+        padding[i] = static_cast<byte>(padLen);
+
+    Update(padding, padLen);
+    Update(C_.get_buffer(), BLOCK_SIZE);
+
+    memcpy(hash, X_.get_buffer(), DIGEST_SIZE);
+
+    Init();
+}
+
+
+
+
+} // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/md4.cpp b/mysql/extra/yassl/taocrypt/src/md4.cpp
new file mode 100644
index 0000000..9364a1c
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/md4.cpp
@@ -0,0 +1,157 @@
+/*
+   Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+
+/* based on Wei Dai's md4.cpp from CryptoPP */
+
+#include "runtime.hpp"
+#include "md4.hpp"
+#ifdef USE_SYS_STL
+    #include <algorithm>
+#else
+    #include "algorithm.hpp"
+#endif
+
+
+namespace STL = STL_NAMESPACE;
+   
+
+namespace TaoCrypt {
+
+void MD4::Init()
+{
+    digest_[0] = 0x67452301L;
+    digest_[1] = 0xefcdab89L;
+    digest_[2] = 0x98badcfeL;
+    digest_[3] = 0x10325476L;
+
+    buffLen_ = 0;
+    loLen_  = 0;
+    hiLen_  = 0;
+}
+
+
+MD4::MD4(const MD4& that) : HASHwithTransform(DIGEST_SIZE / sizeof(word32),
+                                              BLOCK_SIZE) 
+{ 
+    buffLen_ = that.buffLen_;
+    loLen_  =  that.loLen_;
+    hiLen_  =  that.hiLen_;
+
+    memcpy(digest_, that.digest_, DIGEST_SIZE);
+    memcpy(buffer_, that.buffer_, BLOCK_SIZE);
+}
+
+MD4& MD4::operator= (const MD4& that)
+{
+    MD4 tmp(that);
+    Swap(tmp);
+
+    return *this;
+}
+
+
+void MD4::Swap(MD4& other)
+{
+    STL::swap(loLen_,   other.loLen_);
+    STL::swap(hiLen_,   other.hiLen_);
+    STL::swap(buffLen_, other.buffLen_);
+
+    memcpy(digest_, other.digest_, DIGEST_SIZE);
+    memcpy(buffer_, other.buffer_, BLOCK_SIZE);
+}
+
+
+void MD4::Transform()
+{
+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+
+    word32 A, B, C, D;
+
+    A = digest_[0];
+    B = digest_[1];
+    C = digest_[2];
+    D = digest_[3];
+
+#define function(a,b,c,d,k,s) a=rotlFixed(a+F(b,c,d)+buffer_[k],s);
+    function(A,B,C,D, 0, 3);
+    function(D,A,B,C, 1, 7);
+    function(C,D,A,B, 2,11);
+    function(B,C,D,A, 3,19);
+    function(A,B,C,D, 4, 3);
+    function(D,A,B,C, 5, 7);
+    function(C,D,A,B, 6,11);
+    function(B,C,D,A, 7,19);
+    function(A,B,C,D, 8, 3);
+    function(D,A,B,C, 9, 7);
+    function(C,D,A,B,10,11);
+    function(B,C,D,A,11,19);
+    function(A,B,C,D,12, 3);
+    function(D,A,B,C,13, 7);
+    function(C,D,A,B,14,11);
+    function(B,C,D,A,15,19);
+
+#undef function	  
+#define function(a,b,c,d,k,s) a=rotlFixed(a+G(b,c,d)+buffer_[k]+0x5a827999,s);
+    function(A,B,C,D, 0, 3);
+    function(D,A,B,C, 4, 5);
+    function(C,D,A,B, 8, 9);
+    function(B,C,D,A,12,13);
+    function(A,B,C,D, 1, 3);
+    function(D,A,B,C, 5, 5);
+    function(C,D,A,B, 9, 9);
+    function(B,C,D,A,13,13);
+    function(A,B,C,D, 2, 3);
+    function(D,A,B,C, 6, 5);
+    function(C,D,A,B,10, 9);
+    function(B,C,D,A,14,13);
+    function(A,B,C,D, 3, 3);
+    function(D,A,B,C, 7, 5);
+    function(C,D,A,B,11, 9);
+    function(B,C,D,A,15,13);
+
+#undef function	 
+#define function(a,b,c,d,k,s) a=rotlFixed(a+H(b,c,d)+buffer_[k]+0x6ed9eba1,s);
+    function(A,B,C,D, 0, 3);
+    function(D,A,B,C, 8, 9);
+    function(C,D,A,B, 4,11);
+    function(B,C,D,A,12,15);
+    function(A,B,C,D, 2, 3);
+    function(D,A,B,C,10, 9);
+    function(C,D,A,B, 6,11);
+    function(B,C,D,A,14,15);
+    function(A,B,C,D, 1, 3);
+    function(D,A,B,C, 9, 9);
+    function(C,D,A,B, 5,11);
+    function(B,C,D,A,13,15);
+    function(A,B,C,D, 3, 3);
+    function(D,A,B,C,11, 9);
+    function(C,D,A,B, 7,11);
+    function(B,C,D,A,15,15);
+
+    digest_[0] += A;
+    digest_[1] += B;
+    digest_[2] += C;
+    digest_[3] += D;
+}
+
+
+} // namespace
+
diff --git a/mysql/extra/yassl/taocrypt/src/md5.cpp b/mysql/extra/yassl/taocrypt/src/md5.cpp
new file mode 100644
index 0000000..45cfa8a
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/md5.cpp
@@ -0,0 +1,506 @@
+/*
+   Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+
+/* based on Wei Dai's md5.cpp from CryptoPP */
+
+#include "runtime.hpp"
+#include "md5.hpp"
+#ifdef USE_SYS_STL
+    #include <algorithm>
+#else
+    #include "algorithm.hpp"
+#endif
+
+
+namespace STL = STL_NAMESPACE;
+
+
+
+namespace TaoCrypt {
+
+void MD5::Init()
+{
+    digest_[0] = 0x67452301L;
+    digest_[1] = 0xefcdab89L;
+    digest_[2] = 0x98badcfeL;
+    digest_[3] = 0x10325476L;
+
+    buffLen_ = 0;
+    loLen_  = 0;
+    hiLen_  = 0;
+}
+
+
+MD5::MD5(const MD5& that) : HASHwithTransform(DIGEST_SIZE / sizeof(word32),
+                                              BLOCK_SIZE) 
+{ 
+    buffLen_ = that.buffLen_;
+    loLen_  =  that.loLen_;
+    hiLen_  =  that.hiLen_;
+
+    memcpy(digest_, that.digest_, DIGEST_SIZE);
+    memcpy(buffer_, that.buffer_, BLOCK_SIZE);
+}
+
+MD5& MD5::operator= (const MD5& that)
+{
+    MD5 tmp(that);
+    Swap(tmp);
+
+    return *this;
+}
+
+
+void MD5::Swap(MD5& other)
+{
+    STL::swap(loLen_,   other.loLen_);
+    STL::swap(hiLen_,   other.hiLen_);
+    STL::swap(buffLen_, other.buffLen_);
+
+    memcpy(digest_, other.digest_, DIGEST_SIZE);
+    memcpy(buffer_, other.buffer_, BLOCK_SIZE);
+}
+
+
+#ifdef DO_MD5_ASM
+
+// Update digest with data of size len
+void MD5::Update(const byte* data, word32 len)
+{
+    if (!isMMX) {
+        HASHwithTransform::Update(data, len);
+        return;
+    }
+
+    byte* local = reinterpret_cast<byte*>(buffer_);
+
+    // remove buffered data if possible
+    if (buffLen_)  {   
+        word32 add = min(len, BLOCK_SIZE - buffLen_);
+        memcpy(&local[buffLen_], data, add);
+
+        buffLen_ += add;
+        data     += add;
+        len      -= add;
+
+        if (buffLen_ == BLOCK_SIZE) {
+            Transform();
+            AddLength(BLOCK_SIZE);
+            buffLen_ = 0;
+        }
+    }
+
+    // at once for asm
+    if (buffLen_ == 0) {
+        word32 times = len / BLOCK_SIZE;
+        if (times) {
+            AsmTransform(data, times);
+            const word32 add = BLOCK_SIZE * times;
+            AddLength(add);
+            len  -= add;
+            data += add;
+        }
+    }
+
+    // cache any data left
+    if (len) {
+        memcpy(&local[buffLen_], data, len);
+        buffLen_ += len;
+    }
+}
+
+
+
+
+/*
+    // w = rotlFixed(w + f(x, y, z) + index[edi] + data, s) + x
+#define ASMMD5STEP(f, w, x, y, z, index, data, s)       \
+    f(x, y, z)                                          \
+    AS2(    mov   ebp, [edi + index * 4]            )   \
+    AS2(    lea     w, [esi + w + data]             )   \
+    AS2(    add     w, ebp                          )   \
+    AS2(    rol     w, s                            )   \
+    AS2(    add     w, x                            )
+
+
+    // F1(x, y, z) (z ^ (x & (y ^ z)))
+    // place in esi
+#define ASMF1(x, y, z) \
+    AS2(    mov   esi, y                )   \
+    AS2(    xor   esi, z                )   \
+    AS2(    and   esi, x                )   \
+    AS2(    xor   esi, z                )
+
+
+#define ASMF2(x, y, z) ASMF1(z, x, y)
+
+
+    // F3(x ^ y ^ z)
+    // place in esi
+#define ASMF3(x, y, z)  \
+    AS2(    mov   esi, x                )   \
+    AS2(    xor   esi, y                )   \
+    AS2(    xor   esi, z                )
+
+
+
+    // F4(x, y, z) (y ^ (x | ~z))
+    // place in esi
+#define ASMF4(x, y, z)  \
+    AS2(    mov   esi, z                )   \
+    AS1(    not   esi                   )   \
+    AS2(     or   esi, x                )   \
+    AS2(    xor   esi, y                )
+*/
+
+
+    // combine above ASMMD5STEP(f w/ each f ASMF1 - F4
+
+    // esi already set up, after using set for next round
+    // ebp already set up, set up using next round index
+    
+#define MD5STEP1(w, x, y, z, index, data, s)    \
+    AS2(    xor   esi, z                    )   \
+    AS2(    and   esi, x                    )   \
+    AS2(    lea     w, [ebp + w + data]     )   \
+    AS2(    xor   esi, z                    )   \
+    AS2(    add     w, esi                  )   \
+    AS2(    mov   esi, x                    )   \
+    AS2(    rol     w, s                    )   \
+    AS2(    mov   ebp, [edi + index * 4]    )   \
+    AS2(    add     w, x                    )
+
+#define MD5STEP2(w, x, y, z, index, data, s)    \
+    AS2(    xor   esi, x                    )   \
+    AS2(    and   esi, z                    )   \
+    AS2(    lea     w, [ebp + w + data]     )   \
+    AS2(    xor   esi, y                    )   \
+    AS2(    add     w, esi                  )   \
+    AS2(    mov   esi, x                    )   \
+    AS2(    rol     w, s                    )   \
+    AS2(    mov   ebp, [edi + index * 4]    )   \
+    AS2(    add     w, x                    )
+
+
+#define MD5STEP3(w, x, y, z, index, data, s)    \
+    AS2(    xor   esi, z                    )   \
+    AS2(    lea     w, [ebp + w + data]     )   \
+    AS2(    xor   esi, x                    )   \
+    AS2(    add     w, esi                  )   \
+    AS2(    mov   esi, x                    )   \
+    AS2(    rol     w, s                    )   \
+    AS2(    mov   ebp, [edi + index * 4]    )   \
+    AS2(    add     w, x                    )
+
+
+#define MD5STEP4(w, x, y, z, index, data, s)    \
+    AS2(     or   esi, x                    )   \
+    AS2(    lea     w, [ebp + w + data]     )   \
+    AS2(    xor   esi, y                    )   \
+    AS2(    add     w, esi                  )   \
+    AS2(    mov   esi, y                    )   \
+    AS2(    rol     w, s                    )   \
+    AS1(    not   esi                       )   \
+    AS2(    mov   ebp, [edi + index * 4]    )   \
+    AS2(    add     w, x                    )
+
+
+
+#ifdef _MSC_VER
+    __declspec(naked) 
+#else
+    __attribute__ ((noinline))
+#endif
+void MD5::AsmTransform(const byte* data, word32 times)
+{
+#ifdef __GNUC__
+    #define AS1(x)    #x ";"
+    #define AS2(x, y) #x ", " #y ";"
+
+    #define PROLOG()  \
+    __asm__ __volatile__ \
+    ( \
+        ".intel_syntax noprefix;" \
+        "push ebx;" \
+        "push ebp;"
+    #define EPILOG()  \
+        "pop ebp;" \
+        "pop ebx;" \
+       	"emms;" \
+       	".att_syntax;" \
+            : \
+            : "c" (this), "D" (data), "a" (times) \
+            : "%esi", "%edx", "memory", "cc" \
+    );
+
+#else
+    #define AS1(x)    __asm x
+    #define AS2(x, y) __asm x, y
+
+    #define PROLOG() \
+        AS1(    push  ebp                       )   \
+        AS2(    mov   ebp, esp                  )   \
+        AS2(    movd  mm3, edi                  )   \
+        AS2(    movd  mm4, ebx                  )   \
+        AS2(    movd  mm5, esi                  )   \
+        AS2(    movd  mm6, ebp                  )   \
+        AS2(    mov   edi, DWORD PTR [ebp +  8] )   \
+        AS2(    mov   eax, DWORD PTR [ebp + 12] )
+
+    #define EPILOG() \
+        AS2(    movd  ebp, mm6                  )   \
+        AS2(    movd  esi, mm5                  )   \
+        AS2(    movd  ebx, mm4                  )   \
+        AS2(    movd  edi, mm3                  )   \
+        AS2(    mov   esp, ebp                  )   \
+        AS1(    pop   ebp                       )   \
+        AS1(    emms                            )   \
+        AS1(    ret  8                          )
+        
+#endif
+
+
+    PROLOG()
+
+    AS2(    mov   esi, ecx              )
+
+    #ifdef OLD_GCC_OFFSET
+        AS2(    add   esi, 20               )   // digest_[0]
+    #else
+        AS2(    add   esi, 16               )   // digest_[0]
+    #endif
+
+    AS2(    movd  mm2, eax              )   // store times_
+    AS2(    movd  mm1, esi              )   // store digest_
+    
+    AS2(    mov   eax, [esi]            )   // a
+    AS2(    mov   ebx, [esi +  4]       )   // b
+    AS2(    mov   ecx, [esi +  8]       )   // c
+    AS2(    mov   edx, [esi + 12]       )   // d
+  
+#ifdef _MSC_VER
+    AS1( loopStart: )  // loopStart
+#else
+    AS1( 0: )          // loopStart for some gas (need numeric for jump back 
+#endif
+
+    // set up
+    AS2(    mov   esi, ecx      )
+    AS2(    mov   ebp, [edi]    )
+
+    MD5STEP1( eax, ebx, ecx, edx, 1,   0xd76aa478,  7)
+    MD5STEP1( edx, eax, ebx, ecx, 2,   0xe8c7b756, 12)
+    MD5STEP1( ecx, edx, eax, ebx, 3,   0x242070db, 17)
+    MD5STEP1( ebx, ecx, edx, eax, 4,   0xc1bdceee, 22)
+    MD5STEP1( eax, ebx, ecx, edx, 5,   0xf57c0faf,  7)
+    MD5STEP1( edx, eax, ebx, ecx, 6,   0x4787c62a, 12)
+    MD5STEP1( ecx, edx, eax, ebx, 7,   0xa8304613, 17)
+    MD5STEP1( ebx, ecx, edx, eax, 8,   0xfd469501, 22)
+    MD5STEP1( eax, ebx, ecx, edx, 9,   0x698098d8,  7)
+    MD5STEP1( edx, eax, ebx, ecx, 10,  0x8b44f7af, 12)
+    MD5STEP1( ecx, edx, eax, ebx, 11,  0xffff5bb1, 17)
+    MD5STEP1( ebx, ecx, edx, eax, 12,  0x895cd7be, 22)
+    MD5STEP1( eax, ebx, ecx, edx, 13,  0x6b901122,  7)
+    MD5STEP1( edx, eax, ebx, ecx, 14,  0xfd987193, 12)
+    MD5STEP1( ecx, edx, eax, ebx, 15,  0xa679438e, 17)
+    MD5STEP1( ebx, ecx, edx, eax, 1,   0x49b40821, 22)
+
+    MD5STEP2( eax, ebx, ecx, edx, 6,  0xf61e2562,  5)
+    MD5STEP2( edx, eax, ebx, ecx, 11, 0xc040b340,  9)
+    MD5STEP2( ecx, edx, eax, ebx, 0,  0x265e5a51, 14)
+    MD5STEP2( ebx, ecx, edx, eax, 5,  0xe9b6c7aa, 20)
+    MD5STEP2( eax, ebx, ecx, edx, 10, 0xd62f105d,  5)
+    MD5STEP2( edx, eax, ebx, ecx, 15, 0x02441453,  9)
+    MD5STEP2( ecx, edx, eax, ebx, 4,  0xd8a1e681, 14)
+    MD5STEP2( ebx, ecx, edx, eax, 9,  0xe7d3fbc8, 20)
+    MD5STEP2( eax, ebx, ecx, edx, 14, 0x21e1cde6,  5)
+    MD5STEP2( edx, eax, ebx, ecx, 3,  0xc33707d6,  9)
+    MD5STEP2( ecx, edx, eax, ebx, 8,  0xf4d50d87, 14)
+    MD5STEP2( ebx, ecx, edx, eax, 13, 0x455a14ed, 20)
+    MD5STEP2( eax, ebx, ecx, edx, 2,  0xa9e3e905,  5)
+    MD5STEP2( edx, eax, ebx, ecx, 7,  0xfcefa3f8,  9)
+    MD5STEP2( ecx, edx, eax, ebx, 12, 0x676f02d9, 14)
+    MD5STEP2( ebx, ecx, edx, eax, 5,  0x8d2a4c8a, 20)
+
+    MD5STEP3(  eax, ebx, ecx, edx, 8,   0xfffa3942,  4)
+    MD5STEP3(  edx, eax, ebx, ecx, 11,  0x8771f681, 11)
+    MD5STEP3(  ecx, edx, eax, ebx, 14,  0x6d9d6122, 16)
+    MD5STEP3(  ebx, ecx, edx, eax, 1,   0xfde5380c, 23)
+    MD5STEP3(  eax, ebx, ecx, edx, 4,   0xa4beea44,  4)
+    MD5STEP3(  edx, eax, ebx, ecx, 7,   0x4bdecfa9, 11)
+    MD5STEP3(  ecx, edx, eax, ebx, 10,  0xf6bb4b60, 16)
+    MD5STEP3(  ebx, ecx, edx, eax, 13,  0xbebfbc70, 23)
+    MD5STEP3(  eax, ebx, ecx, edx, 0,   0x289b7ec6,  4)
+    MD5STEP3(  edx, eax, ebx, ecx, 3,   0xeaa127fa, 11)
+    MD5STEP3(  ecx, edx, eax, ebx, 6,   0xd4ef3085, 16)
+    MD5STEP3(  ebx, ecx, edx, eax, 9,   0x04881d05, 23)
+    MD5STEP3(  eax, ebx, ecx, edx, 12,  0xd9d4d039,  4)
+    MD5STEP3(  edx, eax, ebx, ecx, 15,  0xe6db99e5, 11)
+    MD5STEP3(  ecx, edx, eax, ebx, 2,   0x1fa27cf8, 16)
+    MD5STEP3(  ebx, ecx, edx, eax, 0,   0xc4ac5665, 23)
+
+    // setup
+    AS2(    mov   esi, edx      )
+    AS1(    not   esi           )
+
+    MD5STEP4(  eax, ebx, ecx, edx, 7,   0xf4292244,  6)
+    MD5STEP4(  edx, eax, ebx, ecx, 14,  0x432aff97, 10)
+    MD5STEP4(  ecx, edx, eax, ebx, 5,   0xab9423a7, 15)
+    MD5STEP4(  ebx, ecx, edx, eax, 12,  0xfc93a039, 21)
+    MD5STEP4(  eax, ebx, ecx, edx, 3,   0x655b59c3,  6)
+    MD5STEP4(  edx, eax, ebx, ecx, 10,  0x8f0ccc92, 10)
+    MD5STEP4(  ecx, edx, eax, ebx, 1,   0xffeff47d, 15)
+    MD5STEP4(  ebx, ecx, edx, eax, 8,   0x85845dd1, 21)
+    MD5STEP4(  eax, ebx, ecx, edx, 15,  0x6fa87e4f,  6)
+    MD5STEP4(  edx, eax, ebx, ecx, 6,   0xfe2ce6e0, 10)
+    MD5STEP4(  ecx, edx, eax, ebx, 13,  0xa3014314, 15)
+    MD5STEP4(  ebx, ecx, edx, eax, 4,   0x4e0811a1, 21)
+    MD5STEP4(  eax, ebx, ecx, edx, 11,  0xf7537e82,  6)
+    MD5STEP4(  edx, eax, ebx, ecx, 2,   0xbd3af235, 10)
+    MD5STEP4(  ecx, edx, eax, ebx, 9,   0x2ad7d2bb, 15)
+    MD5STEP4(  ebx, ecx, edx, eax, 9,   0xeb86d391, 21)
+    
+    AS2(    movd  esi, mm1              )   // digest_
+
+    AS2(    add   [esi],      eax       )   // write out
+    AS2(    add   [esi +  4], ebx       )
+    AS2(    add   [esi +  8], ecx       )
+    AS2(    add   [esi + 12], edx       )
+
+    AS2(    add   edi, 64               )
+
+    AS2(    mov   eax, [esi]            )
+    AS2(    mov   ebx, [esi +  4]       )
+    AS2(    mov   ecx, [esi +  8]       )
+    AS2(    mov   edx, [esi + 12]       )
+
+    AS2(    movd  ebp, mm2              )   // times
+    AS1(    dec   ebp                   )
+    AS2(    movd  mm2, ebp              )
+#ifdef _MSC_VER
+    AS1(    jnz   loopStart )  // loopStart
+#else
+    AS1(    jnz   0b )         // loopStart
+#endif
+
+
+    EPILOG()
+}
+
+
+#endif // DO_MD5_ASM
+
+
+void MD5::Transform()
+{
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+#define MD5STEP(f, w, x, y, z, data, s) \
+    w = rotlFixed(w + f(x, y, z) + data, s) + x
+
+    // Copy context->state[] to working vars 
+    word32 a = digest_[0];
+    word32 b = digest_[1];
+    word32 c = digest_[2];
+    word32 d = digest_[3];
+
+    MD5STEP(F1, a, b, c, d, buffer_[0]  + 0xd76aa478,  7);
+    MD5STEP(F1, d, a, b, c, buffer_[1]  + 0xe8c7b756, 12);
+    MD5STEP(F1, c, d, a, b, buffer_[2]  + 0x242070db, 17);
+    MD5STEP(F1, b, c, d, a, buffer_[3]  + 0xc1bdceee, 22);
+    MD5STEP(F1, a, b, c, d, buffer_[4]  + 0xf57c0faf,  7);
+    MD5STEP(F1, d, a, b, c, buffer_[5]  + 0x4787c62a, 12);
+    MD5STEP(F1, c, d, a, b, buffer_[6]  + 0xa8304613, 17);
+    MD5STEP(F1, b, c, d, a, buffer_[7]  + 0xfd469501, 22);
+    MD5STEP(F1, a, b, c, d, buffer_[8]  + 0x698098d8,  7);
+    MD5STEP(F1, d, a, b, c, buffer_[9]  + 0x8b44f7af, 12);
+    MD5STEP(F1, c, d, a, b, buffer_[10] + 0xffff5bb1, 17);
+    MD5STEP(F1, b, c, d, a, buffer_[11] + 0x895cd7be, 22);
+    MD5STEP(F1, a, b, c, d, buffer_[12] + 0x6b901122,  7);
+    MD5STEP(F1, d, a, b, c, buffer_[13] + 0xfd987193, 12);
+    MD5STEP(F1, c, d, a, b, buffer_[14] + 0xa679438e, 17);
+    MD5STEP(F1, b, c, d, a, buffer_[15] + 0x49b40821, 22);
+
+    MD5STEP(F2, a, b, c, d, buffer_[1]  + 0xf61e2562,  5);
+    MD5STEP(F2, d, a, b, c, buffer_[6]  + 0xc040b340,  9);
+    MD5STEP(F2, c, d, a, b, buffer_[11] + 0x265e5a51, 14);
+    MD5STEP(F2, b, c, d, a, buffer_[0]  + 0xe9b6c7aa, 20);
+    MD5STEP(F2, a, b, c, d, buffer_[5]  + 0xd62f105d,  5);
+    MD5STEP(F2, d, a, b, c, buffer_[10] + 0x02441453,  9);
+    MD5STEP(F2, c, d, a, b, buffer_[15] + 0xd8a1e681, 14);
+    MD5STEP(F2, b, c, d, a, buffer_[4]  + 0xe7d3fbc8, 20);
+    MD5STEP(F2, a, b, c, d, buffer_[9]  + 0x21e1cde6,  5);
+    MD5STEP(F2, d, a, b, c, buffer_[14] + 0xc33707d6,  9);
+    MD5STEP(F2, c, d, a, b, buffer_[3]  + 0xf4d50d87, 14);
+    MD5STEP(F2, b, c, d, a, buffer_[8]  + 0x455a14ed, 20);
+    MD5STEP(F2, a, b, c, d, buffer_[13] + 0xa9e3e905,  5);
+    MD5STEP(F2, d, a, b, c, buffer_[2]  + 0xfcefa3f8,  9);
+    MD5STEP(F2, c, d, a, b, buffer_[7]  + 0x676f02d9, 14);
+    MD5STEP(F2, b, c, d, a, buffer_[12] + 0x8d2a4c8a, 20);
+
+    MD5STEP(F3, a, b, c, d, buffer_[5]  + 0xfffa3942,  4);
+    MD5STEP(F3, d, a, b, c, buffer_[8]  + 0x8771f681, 11);
+    MD5STEP(F3, c, d, a, b, buffer_[11] + 0x6d9d6122, 16);
+    MD5STEP(F3, b, c, d, a, buffer_[14] + 0xfde5380c, 23);
+    MD5STEP(F3, a, b, c, d, buffer_[1]  + 0xa4beea44,  4);
+    MD5STEP(F3, d, a, b, c, buffer_[4]  + 0x4bdecfa9, 11);
+    MD5STEP(F3, c, d, a, b, buffer_[7]  + 0xf6bb4b60, 16);
+    MD5STEP(F3, b, c, d, a, buffer_[10] + 0xbebfbc70, 23);
+    MD5STEP(F3, a, b, c, d, buffer_[13] + 0x289b7ec6,  4);
+    MD5STEP(F3, d, a, b, c, buffer_[0]  + 0xeaa127fa, 11);
+    MD5STEP(F3, c, d, a, b, buffer_[3]  + 0xd4ef3085, 16);
+    MD5STEP(F3, b, c, d, a, buffer_[6]  + 0x04881d05, 23);
+    MD5STEP(F3, a, b, c, d, buffer_[9]  + 0xd9d4d039,  4);
+    MD5STEP(F3, d, a, b, c, buffer_[12] + 0xe6db99e5, 11);
+    MD5STEP(F3, c, d, a, b, buffer_[15] + 0x1fa27cf8, 16);
+    MD5STEP(F3, b, c, d, a, buffer_[2]  + 0xc4ac5665, 23);
+
+    MD5STEP(F4, a, b, c, d, buffer_[0]  + 0xf4292244,  6);
+    MD5STEP(F4, d, a, b, c, buffer_[7]  + 0x432aff97, 10);
+    MD5STEP(F4, c, d, a, b, buffer_[14] + 0xab9423a7, 15);
+    MD5STEP(F4, b, c, d, a, buffer_[5]  + 0xfc93a039, 21);
+    MD5STEP(F4, a, b, c, d, buffer_[12] + 0x655b59c3,  6);
+    MD5STEP(F4, d, a, b, c, buffer_[3]  + 0x8f0ccc92, 10);
+    MD5STEP(F4, c, d, a, b, buffer_[10] + 0xffeff47d, 15);
+    MD5STEP(F4, b, c, d, a, buffer_[1]  + 0x85845dd1, 21);
+    MD5STEP(F4, a, b, c, d, buffer_[8]  + 0x6fa87e4f,  6);
+    MD5STEP(F4, d, a, b, c, buffer_[15] + 0xfe2ce6e0, 10);
+    MD5STEP(F4, c, d, a, b, buffer_[6]  + 0xa3014314, 15);
+    MD5STEP(F4, b, c, d, a, buffer_[13] + 0x4e0811a1, 21);
+    MD5STEP(F4, a, b, c, d, buffer_[4]  + 0xf7537e82,  6);
+    MD5STEP(F4, d, a, b, c, buffer_[11] + 0xbd3af235, 10);
+    MD5STEP(F4, c, d, a, b, buffer_[2]  + 0x2ad7d2bb, 15);
+    MD5STEP(F4, b, c, d, a, buffer_[9]  + 0xeb86d391, 21);
+    
+    // Add the working vars back into digest state[]
+    digest_[0] += a;
+    digest_[1] += b;
+    digest_[2] += c;
+    digest_[3] += d;
+
+    // Wipe variables
+    a = b = c = d = 0;
+}
+
+
+} // namespace
+
diff --git a/mysql/extra/yassl/taocrypt/src/misc.cpp b/mysql/extra/yassl/taocrypt/src/misc.cpp
new file mode 100644
index 0000000..b576d3d
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/misc.cpp
@@ -0,0 +1,297 @@
+/*
+   Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+/* based on Wei Dai's misc.cpp from CryptoPP */
+
+
+#include "runtime.hpp"
+#include "misc.hpp"
+
+
+#ifdef __GNUC__
+    #include <signal.h>
+    #include <setjmp.h>
+#endif
+
+#ifdef USE_SYS_STL
+    #include <algorithm>
+#else
+    #include "algorithm.hpp"
+#endif
+
+namespace STL = STL_NAMESPACE;
+
+
+#ifdef YASSL_PURE_C
+
+    void* operator new(size_t sz, TaoCrypt::new_t)
+    {
+        void* ptr = malloc(sz ? sz : 1);
+        if (!ptr) abort();
+
+        return ptr;
+    }
+
+
+    void operator delete(void* ptr, TaoCrypt::new_t)
+    {
+        if (ptr) free(ptr);
+    }
+
+
+    void* operator new[](size_t sz, TaoCrypt::new_t nt)
+    {
+        return ::operator new(sz, nt);
+    }
+
+
+    void operator delete[](void* ptr, TaoCrypt::new_t nt)
+    {
+        ::operator delete(ptr, nt);
+    }
+
+
+    /* uncomment to test
+    // make sure not using globals anywhere by forgetting to use overloaded
+    void* operator new(size_t sz);
+
+    void operator delete(void* ptr);
+
+    void* operator new[](size_t sz);
+
+    void operator delete[](void* ptr);
+    */
+
+
+    namespace TaoCrypt {
+
+        new_t tc;   // for library new
+
+    }
+
+#ifdef __sun
+
+// Handler for pure virtual functions
+namespace __Crun {
+    void pure_error() {
+    }
+}
+
+#endif
+
+#if defined(__ICC) || defined(__INTEL_COMPILER) || (__GNUC__ > 2)
+
+extern "C" {
+
+    int __cxa_pure_virtual() {
+      return 0;
+    }
+
+}  // extern "C"
+
+#endif
+
+#endif // YASSL_PURE_C
+
+
+namespace TaoCrypt {
+
+
+inline void XorWords(word* r, const word* a, unsigned int n)
+{
+    for (unsigned int i=0; i<n; i++)
+        r[i] ^= a[i];
+}
+
+
+void xorbuf(byte* buf, const byte* mask, unsigned int count)
+{
+    if (((size_t)buf | (size_t)mask | count) % WORD_SIZE == 0)
+        XorWords((word *)buf, (const word *)mask, count/WORD_SIZE);
+    else
+    {
+        for (unsigned int i=0; i<count; i++)
+            buf[i] ^= mask[i];
+    }
+}
+
+
+unsigned int BytePrecision(word value)
+{
+    unsigned int i;
+    for (i=sizeof(value); i; --i)
+        if (value >> (i-1)*8)
+            break;
+
+    return i;
+}
+
+
+unsigned int BitPrecision(word value)
+{
+    if (!value)
+        return 0;
+
+    unsigned int l = 0,
+                 h = 8 * sizeof(value);
+
+    while (h-l > 1)
+    {
+        unsigned int t = (l+h)/2;
+        if (value >> t)
+            l = t;
+        else
+            h = t;
+    }
+
+    return h;
+}
+
+
+word Crop(word value, unsigned int size)
+{
+    if (size < 8*sizeof(value))
+        return (value & ((1L << size) - 1));
+    else
+        return value;
+}
+
+
+
+#ifdef TAOCRYPT_X86ASM_AVAILABLE
+
+
+bool HaveCpuId()
+{
+#ifdef _MSC_VER
+    __try
+    {
+        __asm
+        {
+            mov eax, 0
+            cpuid
+        }            
+    }
+    __except (1)
+    {
+        return false;
+    }
+    return true;
+#else
+    word32 eax, ebx;
+    __asm__ __volatile
+    (
+        /* Put EFLAGS in eax and ebx */
+        "pushf;"
+        "pushf;"
+        "pop %0;"
+        "movl %0,%1;"
+
+        /* Flip the cpuid bit and store back in EFLAGS */
+        "xorl $0x200000,%0;"
+        "push %0;"
+        "popf;"
+
+        /* Read EFLAGS again */
+        "pushf;"
+        "pop %0;"
+        "popf"
+        : "=r" (eax), "=r" (ebx)
+        :
+        : "cc"
+    );
+
+    if (eax == ebx)
+        return false;
+    return true;
+#endif
+}
+
+
+void CpuId(word32 input, word32 *output)
+{
+#ifdef __GNUC__
+    __asm__
+    (
+        // save ebx in case -fPIC is being used
+        "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
+        : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d"(output[3])
+        : "a" (input)
+    );
+#else
+    __asm
+    {
+        mov eax, input
+        cpuid
+        mov edi, output
+        mov [edi], eax
+        mov [edi+4], ebx
+        mov [edi+8], ecx
+        mov [edi+12], edx
+    }
+#endif
+}
+
+
+bool IsPentium()
+{
+    if (!HaveCpuId())
+        return false;
+
+    word32 cpuid[4];
+
+    CpuId(0, cpuid);
+    STL::swap(cpuid[2], cpuid[3]);
+    if (memcmp(cpuid+1, "GenuineIntel", 12) != 0)
+        return false;
+
+    CpuId(1, cpuid);
+    byte family = ((cpuid[0] >> 8) & 0xf);
+    if (family < 5)
+        return false;
+
+    return true;
+}
+
+
+
+static bool IsMmx()
+{
+    if (!IsPentium())
+        return false;
+
+    word32 cpuid[4];
+
+    CpuId(1, cpuid);
+    if ((cpuid[3] & (1 << 23)) == 0)
+        return false;
+
+    return true;
+}
+
+
+bool isMMX = IsMmx();
+
+
+#endif // TAOCRYPT_X86ASM_AVAILABLE
+
+
+
+
+}  // namespace
+
diff --git a/mysql/extra/yassl/taocrypt/src/rabbit.cpp b/mysql/extra/yassl/taocrypt/src/rabbit.cpp
new file mode 100644
index 0000000..5e32f38
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/rabbit.cpp
@@ -0,0 +1,255 @@
+/*
+   Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+
+#include "runtime.hpp"
+#include "rabbit.hpp"
+
+
+
+namespace TaoCrypt {
+
+
+#define U32V(x)  (word32)(x)
+
+
+#ifdef BIG_ENDIAN_ORDER
+    #define LITTLE32(x) ByteReverse((word32)x)
+#else
+    #define LITTLE32(x) (x)
+#endif
+
+
+// local
+namespace {
+
+
+/* Square a 32-bit unsigned integer to obtain the 64-bit result and return */
+/* the upper 32 bits XOR the lower 32 bits */
+word32 RABBIT_g_func(word32 x)
+{
+    /* Temporary variables */
+    word32 a, b, h, l;
+
+    /* Construct high and low argument for squaring */
+    a = x&0xFFFF;
+    b = x>>16;
+
+    /* Calculate high and low result of squaring */
+    h = (((U32V(a*a)>>17) + U32V(a*b))>>15) + b*b;
+    l = x*x;
+
+    /* Return high XOR low */
+    return U32V(h^l);
+}
+
+
+} // namespace local
+
+
+/* Calculate the next internal state */
+void Rabbit::NextState(RabbitCtx which)
+{
+    /* Temporary variables */
+    word32 g[8], c_old[8], i;
+
+    Ctx* ctx;
+
+    if (which == Master)
+        ctx = &masterCtx_;
+    else
+        ctx = &workCtx_;
+
+    /* Save old counter values */
+    for (i=0; i<8; i++)
+        c_old[i] = ctx->c[i];
+
+    /* Calculate new counter values */
+    ctx->c[0] = U32V(ctx->c[0] + 0x4D34D34D + ctx->carry);
+    ctx->c[1] = U32V(ctx->c[1] + 0xD34D34D3 + (ctx->c[0] < c_old[0]));
+    ctx->c[2] = U32V(ctx->c[2] + 0x34D34D34 + (ctx->c[1] < c_old[1]));
+    ctx->c[3] = U32V(ctx->c[3] + 0x4D34D34D + (ctx->c[2] < c_old[2]));
+    ctx->c[4] = U32V(ctx->c[4] + 0xD34D34D3 + (ctx->c[3] < c_old[3]));
+    ctx->c[5] = U32V(ctx->c[5] + 0x34D34D34 + (ctx->c[4] < c_old[4]));
+    ctx->c[6] = U32V(ctx->c[6] + 0x4D34D34D + (ctx->c[5] < c_old[5]));
+    ctx->c[7] = U32V(ctx->c[7] + 0xD34D34D3 + (ctx->c[6] < c_old[6]));
+    ctx->carry = (ctx->c[7] < c_old[7]);
+   
+    /* Calculate the g-values */
+    for (i=0;i<8;i++)
+        g[i] = RABBIT_g_func(U32V(ctx->x[i] + ctx->c[i]));
+
+    /* Calculate new state values */
+    ctx->x[0] = U32V(g[0] + rotlFixed(g[7],16) + rotlFixed(g[6], 16));
+    ctx->x[1] = U32V(g[1] + rotlFixed(g[0], 8) + g[7]);
+    ctx->x[2] = U32V(g[2] + rotlFixed(g[1],16) + rotlFixed(g[0], 16));
+    ctx->x[3] = U32V(g[3] + rotlFixed(g[2], 8) + g[1]);
+    ctx->x[4] = U32V(g[4] + rotlFixed(g[3],16) + rotlFixed(g[2], 16));
+    ctx->x[5] = U32V(g[5] + rotlFixed(g[4], 8) + g[3]);
+    ctx->x[6] = U32V(g[6] + rotlFixed(g[5],16) + rotlFixed(g[4], 16));
+    ctx->x[7] = U32V(g[7] + rotlFixed(g[6], 8) + g[5]);
+}
+
+
+/* IV setup */
+void Rabbit::SetIV(const byte* iv)
+{
+    /* Temporary variables */
+    word32 i0, i1, i2, i3, i;
+      
+    /* Generate four subvectors */
+    i0 = LITTLE32(*(word32*)(iv+0));
+    i2 = LITTLE32(*(word32*)(iv+4));
+    i1 = (i0>>16) | (i2&0xFFFF0000);
+    i3 = (i2<<16) | (i0&0x0000FFFF);
+
+    /* Modify counter values */
+    workCtx_.c[0] = masterCtx_.c[0] ^ i0;
+    workCtx_.c[1] = masterCtx_.c[1] ^ i1;
+    workCtx_.c[2] = masterCtx_.c[2] ^ i2;
+    workCtx_.c[3] = masterCtx_.c[3] ^ i3;
+    workCtx_.c[4] = masterCtx_.c[4] ^ i0;
+    workCtx_.c[5] = masterCtx_.c[5] ^ i1;
+    workCtx_.c[6] = masterCtx_.c[6] ^ i2;
+    workCtx_.c[7] = masterCtx_.c[7] ^ i3;
+
+    /* Copy state variables */
+    for (i=0; i<8; i++)
+        workCtx_.x[i] = masterCtx_.x[i];
+    workCtx_.carry = masterCtx_.carry;
+
+    /* Iterate the system four times */
+    for (i=0; i<4; i++)
+        NextState(Work);
+}
+
+
+/* Key setup */
+void Rabbit::SetKey(const byte* key, const byte* iv)
+{
+    /* Temporary variables */
+    word32 k0, k1, k2, k3, i;
+
+    /* Generate four subkeys */
+    k0 = LITTLE32(*(word32*)(key+ 0));
+    k1 = LITTLE32(*(word32*)(key+ 4));
+    k2 = LITTLE32(*(word32*)(key+ 8));
+    k3 = LITTLE32(*(word32*)(key+12));
+
+    /* Generate initial state variables */
+    masterCtx_.x[0] = k0;
+    masterCtx_.x[2] = k1;
+    masterCtx_.x[4] = k2;
+    masterCtx_.x[6] = k3;
+    masterCtx_.x[1] = U32V(k3<<16) | (k2>>16);
+    masterCtx_.x[3] = U32V(k0<<16) | (k3>>16);
+    masterCtx_.x[5] = U32V(k1<<16) | (k0>>16);
+    masterCtx_.x[7] = U32V(k2<<16) | (k1>>16);
+
+    /* Generate initial counter values */
+    masterCtx_.c[0] = rotlFixed(k2, 16);
+    masterCtx_.c[2] = rotlFixed(k3, 16);
+    masterCtx_.c[4] = rotlFixed(k0, 16);
+    masterCtx_.c[6] = rotlFixed(k1, 16);
+    masterCtx_.c[1] = (k0&0xFFFF0000) | (k1&0xFFFF);
+    masterCtx_.c[3] = (k1&0xFFFF0000) | (k2&0xFFFF);
+    masterCtx_.c[5] = (k2&0xFFFF0000) | (k3&0xFFFF);
+    masterCtx_.c[7] = (k3&0xFFFF0000) | (k0&0xFFFF);
+
+    /* Clear carry bit */
+    masterCtx_.carry = 0;
+
+    /* Iterate the system four times */
+    for (i=0; i<4; i++)
+        NextState(Master);
+
+    /* Modify the counters */
+    for (i=0; i<8; i++)
+        masterCtx_.c[i] ^= masterCtx_.x[(i+4)&0x7];
+
+    /* Copy master instance to work instance */
+    for (i=0; i<8; i++) {
+        workCtx_.x[i] = masterCtx_.x[i];
+        workCtx_.c[i] = masterCtx_.c[i];
+    }
+    workCtx_.carry = masterCtx_.carry;
+
+    if (iv) SetIV(iv);    
+}
+
+
+/* Encrypt/decrypt a message of any size */
+void Rabbit::Process(byte* output, const byte* input, word32 msglen)
+{
+    /* Temporary variables */
+    word32 i;
+
+    /* Encrypt/decrypt all full blocks */
+    while (msglen >= 16) {
+        /* Iterate the system */
+        NextState(Work);
+
+        /* Encrypt/decrypt 16 bytes of data */
+        *(word32*)(output+ 0) = *(word32*)(input+ 0) ^
+                   LITTLE32(workCtx_.x[0] ^ (workCtx_.x[5]>>16) ^
+                   U32V(workCtx_.x[3]<<16));
+        *(word32*)(output+ 4) = *(word32*)(input+ 4) ^
+                   LITTLE32(workCtx_.x[2] ^ (workCtx_.x[7]>>16) ^
+                   U32V(workCtx_.x[5]<<16));
+        *(word32*)(output+ 8) = *(word32*)(input+ 8) ^
+                   LITTLE32(workCtx_.x[4] ^ (workCtx_.x[1]>>16) ^
+                   U32V(workCtx_.x[7]<<16));
+        *(word32*)(output+12) = *(word32*)(input+12) ^
+                   LITTLE32(workCtx_.x[6] ^ (workCtx_.x[3]>>16) ^
+                   U32V(workCtx_.x[1]<<16));
+
+        /* Increment pointers and decrement length */
+        input  += 16;
+        output += 16;
+        msglen -= 16;
+    }
+
+    /* Encrypt/decrypt remaining data */
+    if (msglen) {
+
+        word32 tmp[4];
+        byte*  buffer = (byte*)tmp;
+
+        memset(tmp, 0, sizeof(tmp));   /* help static analysis */
+
+        /* Iterate the system */
+        NextState(Work);
+
+        /* Generate 16 bytes of pseudo-random data */
+        tmp[0] = LITTLE32(workCtx_.x[0] ^
+                  (workCtx_.x[5]>>16) ^ U32V(workCtx_.x[3]<<16));
+        tmp[1] = LITTLE32(workCtx_.x[2] ^ 
+                  (workCtx_.x[7]>>16) ^ U32V(workCtx_.x[5]<<16));
+        tmp[2] = LITTLE32(workCtx_.x[4] ^ 
+                  (workCtx_.x[1]>>16) ^ U32V(workCtx_.x[7]<<16));
+        tmp[3] = LITTLE32(workCtx_.x[6] ^ 
+                  (workCtx_.x[3]>>16) ^ U32V(workCtx_.x[1]<<16));
+
+        /* Encrypt/decrypt the data */
+        for (i=0; i<msglen; i++)
+            output[i] = input[i] ^ buffer[i];
+    }
+}
+
+
+}  // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/random.cpp b/mysql/extra/yassl/taocrypt/src/random.cpp
new file mode 100644
index 0000000..26dae7d
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/random.cpp
@@ -0,0 +1,138 @@
+/*
+   Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+
+/* random.cpp implements a crypto secure Random Number Generator using an OS
+   specific seed, switch to /dev/random for more security but may block
+*/
+
+#include "runtime.hpp"
+#include "random.hpp"
+#include <string.h>
+#include <time.h>
+
+#if defined(_WIN32)
+    #include <windows.h>
+    #include <wincrypt.h>
+#else
+    #include <errno.h>
+    #include <fcntl.h>
+    #include <unistd.h>
+#endif // _WIN32
+
+namespace TaoCrypt {
+
+
+// Get seed and key cipher
+RandomNumberGenerator::RandomNumberGenerator()
+{
+    byte key[32];
+    byte junk[256];
+
+    seed_.GenerateSeed(key, sizeof(key));
+    cipher_.SetKey(key, sizeof(key));
+    GenerateBlock(junk, sizeof(junk));  // rid initial state
+}
+
+
+// place a generated block in output
+void RandomNumberGenerator::GenerateBlock(byte* output, word32 sz)
+{
+    memset(output, 0, sz);
+    cipher_.Process(output, output, sz);
+}
+
+
+byte RandomNumberGenerator::GenerateByte()
+{
+    byte b;
+    GenerateBlock(&b, 1);
+
+    return b;
+}
+
+
+#if defined(_WIN32)
+
+/* The OS_Seed implementation for windows */
+
+OS_Seed::OS_Seed()
+{
+    if(!CryptAcquireContext(&handle_, 0, 0, PROV_RSA_FULL,
+                             CRYPT_VERIFYCONTEXT))
+        error_.SetError(WINCRYPT_E);
+}
+
+
+OS_Seed::~OS_Seed()
+{
+    CryptReleaseContext(handle_, 0);
+}
+
+
+void OS_Seed::GenerateSeed(byte* output, word32 sz)
+{
+    if (!CryptGenRandom(handle_, sz, output))
+        error_.SetError(CRYPTGEN_E);
+}
+
+
+#else
+
+/* The default OS_Seed implementation */
+
+OS_Seed::OS_Seed()
+{
+    fd_ = open("/dev/urandom",O_RDONLY);
+    if (fd_ == -1) {
+        fd_ = open("/dev/random",O_RDONLY);
+        if (fd_ == -1)
+            error_.SetError(OPEN_RAN_E);
+    }
+}
+
+
+OS_Seed::~OS_Seed() 
+{
+    close(fd_);
+}
+
+
+// may block
+void OS_Seed::GenerateSeed(byte* output, word32 sz)
+{
+    while (sz) {
+        int len = read(fd_, output, sz);
+        if (len == -1) {
+            error_.SetError(READ_RAN_E);
+            return;
+        }
+
+        sz     -= len;
+        output += len;
+
+        if (sz)
+            sleep(1);
+    }
+}
+
+#endif // _WIN32
+
+
+
+} // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/ripemd.cpp b/mysql/extra/yassl/taocrypt/src/ripemd.cpp
new file mode 100644
index 0000000..5d03dc6
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/ripemd.cpp
@@ -0,0 +1,844 @@
+/*
+   Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+
+/* based on Wei Dai's ripemd.cpp from CryptoPP */
+
+#include "runtime.hpp"
+#include "ripemd.hpp"
+#ifdef USE_SYS_STL
+    #include <algorithm>
+#else
+    #include "algorithm.hpp"
+#endif
+
+
+namespace STL = STL_NAMESPACE;
+
+
+
+namespace TaoCrypt {
+
+void RIPEMD160::Init()
+{
+    digest_[0] = 0x67452301L;
+    digest_[1] = 0xefcdab89L;
+    digest_[2] = 0x98badcfeL;
+    digest_[3] = 0x10325476L;
+    digest_[4] = 0xc3d2e1f0L;
+
+    buffLen_ = 0;
+    loLen_  = 0;
+    hiLen_  = 0;
+}
+
+
+RIPEMD160::RIPEMD160(const RIPEMD160& that)
+    : HASHwithTransform(DIGEST_SIZE / sizeof(word32), BLOCK_SIZE) 
+{ 
+    buffLen_ = that.buffLen_;
+    loLen_   = that.loLen_;
+    hiLen_   = that.hiLen_;
+
+    memcpy(digest_, that.digest_, DIGEST_SIZE);
+    memcpy(buffer_, that.buffer_, BLOCK_SIZE);
+}
+
+
+RIPEMD160& RIPEMD160::operator= (const RIPEMD160& that)
+{
+    RIPEMD160 tmp(that);
+    Swap(tmp);
+
+    return *this;
+}
+
+
+void RIPEMD160::Swap(RIPEMD160& other)
+{
+    STL::swap(loLen_,   other.loLen_);
+    STL::swap(hiLen_,   other.hiLen_);
+    STL::swap(buffLen_, other.buffLen_);
+
+    memcpy(digest_, other.digest_, DIGEST_SIZE);
+    memcpy(buffer_, other.buffer_, BLOCK_SIZE);
+}
+
+
+#ifdef DO_RIPEMD_ASM
+
+// Update digest with data of size len
+void RIPEMD160::Update(const byte* data, word32 len)
+{
+    if (!isMMX) {
+        HASHwithTransform::Update(data, len);
+        return;
+    }
+
+    byte* local = reinterpret_cast<byte*>(buffer_);
+
+    // remove buffered data if possible
+    if (buffLen_)  {   
+        word32 add = min(len, BLOCK_SIZE - buffLen_);
+        memcpy(&local[buffLen_], data, add);
+
+        buffLen_ += add;
+        data     += add;
+        len      -= add;
+
+        if (buffLen_ == BLOCK_SIZE) {
+            Transform();
+            AddLength(BLOCK_SIZE);
+            buffLen_ = 0;
+        }
+    }
+
+    // all at once for asm
+    if (buffLen_ == 0) {
+        word32 times = len / BLOCK_SIZE;
+        if (times) {
+            AsmTransform(data, times);
+            const word32 add = BLOCK_SIZE * times;
+            AddLength(add);
+            len  -= add;
+            data += add;
+        }
+    }
+
+    // cache any data left
+    if (len) {
+        memcpy(&local[buffLen_], data, len);
+        buffLen_ += len;
+    }
+}
+
+#endif // DO_RIPEMD_ASM
+
+
+// for all
+#define F(x, y, z)    (x ^ y ^ z) 
+#define G(x, y, z)    (z ^ (x & (y^z)))
+#define H(x, y, z)    (z ^ (x | ~y))
+#define I(x, y, z)    (y ^ (z & (x^y)))
+#define J(x, y, z)    (x ^ (y | ~z))
+
+#define k0 0
+#define k1 0x5a827999
+#define k2 0x6ed9eba1
+#define k3 0x8f1bbcdc
+#define k4 0xa953fd4e
+#define k5 0x50a28be6
+#define k6 0x5c4dd124
+#define k7 0x6d703ef3
+#define k8 0x7a6d76e9
+#define k9 0
+
+// for 160 and 320
+#define Subround(f, a, b, c, d, e, x, s, k) \
+    a += f(b, c, d) + x + k;\
+    a = rotlFixed((word32)a, s) + e;\
+    c = rotlFixed((word32)c, 10U)
+
+
+void RIPEMD160::Transform()
+{
+    unsigned long a1, b1, c1, d1, e1, a2, b2, c2, d2, e2;
+    a1 = a2 = digest_[0];
+    b1 = b2 = digest_[1];
+    c1 = c2 = digest_[2];
+    d1 = d2 = digest_[3];
+    e1 = e2 = digest_[4];
+
+    Subround(F, a1, b1, c1, d1, e1, buffer_[ 0], 11, k0);
+    Subround(F, e1, a1, b1, c1, d1, buffer_[ 1], 14, k0);
+    Subround(F, d1, e1, a1, b1, c1, buffer_[ 2], 15, k0);
+    Subround(F, c1, d1, e1, a1, b1, buffer_[ 3], 12, k0);
+    Subround(F, b1, c1, d1, e1, a1, buffer_[ 4],  5, k0);
+    Subround(F, a1, b1, c1, d1, e1, buffer_[ 5],  8, k0);
+    Subround(F, e1, a1, b1, c1, d1, buffer_[ 6],  7, k0);
+    Subround(F, d1, e1, a1, b1, c1, buffer_[ 7],  9, k0);
+    Subround(F, c1, d1, e1, a1, b1, buffer_[ 8], 11, k0);
+    Subround(F, b1, c1, d1, e1, a1, buffer_[ 9], 13, k0);
+    Subround(F, a1, b1, c1, d1, e1, buffer_[10], 14, k0);
+    Subround(F, e1, a1, b1, c1, d1, buffer_[11], 15, k0);
+    Subround(F, d1, e1, a1, b1, c1, buffer_[12],  6, k0);
+    Subround(F, c1, d1, e1, a1, b1, buffer_[13],  7, k0);
+    Subround(F, b1, c1, d1, e1, a1, buffer_[14],  9, k0);
+    Subround(F, a1, b1, c1, d1, e1, buffer_[15],  8, k0);
+
+    Subround(G, e1, a1, b1, c1, d1, buffer_[ 7],  7, k1);
+    Subround(G, d1, e1, a1, b1, c1, buffer_[ 4],  6, k1);
+    Subround(G, c1, d1, e1, a1, b1, buffer_[13],  8, k1);
+    Subround(G, b1, c1, d1, e1, a1, buffer_[ 1], 13, k1);
+    Subround(G, a1, b1, c1, d1, e1, buffer_[10], 11, k1);
+    Subround(G, e1, a1, b1, c1, d1, buffer_[ 6],  9, k1);
+    Subround(G, d1, e1, a1, b1, c1, buffer_[15],  7, k1);
+    Subround(G, c1, d1, e1, a1, b1, buffer_[ 3], 15, k1);
+    Subround(G, b1, c1, d1, e1, a1, buffer_[12],  7, k1);
+    Subround(G, a1, b1, c1, d1, e1, buffer_[ 0], 12, k1);
+    Subround(G, e1, a1, b1, c1, d1, buffer_[ 9], 15, k1);
+    Subround(G, d1, e1, a1, b1, c1, buffer_[ 5],  9, k1);
+    Subround(G, c1, d1, e1, a1, b1, buffer_[ 2], 11, k1);
+    Subround(G, b1, c1, d1, e1, a1, buffer_[14],  7, k1);
+    Subround(G, a1, b1, c1, d1, e1, buffer_[11], 13, k1);
+    Subround(G, e1, a1, b1, c1, d1, buffer_[ 8], 12, k1);
+
+    Subround(H, d1, e1, a1, b1, c1, buffer_[ 3], 11, k2);
+    Subround(H, c1, d1, e1, a1, b1, buffer_[10], 13, k2);
+    Subround(H, b1, c1, d1, e1, a1, buffer_[14],  6, k2);
+    Subround(H, a1, b1, c1, d1, e1, buffer_[ 4],  7, k2);
+    Subround(H, e1, a1, b1, c1, d1, buffer_[ 9], 14, k2);
+    Subround(H, d1, e1, a1, b1, c1, buffer_[15],  9, k2);
+    Subround(H, c1, d1, e1, a1, b1, buffer_[ 8], 13, k2);
+    Subround(H, b1, c1, d1, e1, a1, buffer_[ 1], 15, k2);
+    Subround(H, a1, b1, c1, d1, e1, buffer_[ 2], 14, k2);
+    Subround(H, e1, a1, b1, c1, d1, buffer_[ 7],  8, k2);
+    Subround(H, d1, e1, a1, b1, c1, buffer_[ 0], 13, k2);
+    Subround(H, c1, d1, e1, a1, b1, buffer_[ 6],  6, k2);
+    Subround(H, b1, c1, d1, e1, a1, buffer_[13],  5, k2);
+    Subround(H, a1, b1, c1, d1, e1, buffer_[11], 12, k2);
+    Subround(H, e1, a1, b1, c1, d1, buffer_[ 5],  7, k2);
+    Subround(H, d1, e1, a1, b1, c1, buffer_[12],  5, k2);
+
+    Subround(I, c1, d1, e1, a1, b1, buffer_[ 1], 11, k3);
+    Subround(I, b1, c1, d1, e1, a1, buffer_[ 9], 12, k3);
+    Subround(I, a1, b1, c1, d1, e1, buffer_[11], 14, k3);
+    Subround(I, e1, a1, b1, c1, d1, buffer_[10], 15, k3);
+    Subround(I, d1, e1, a1, b1, c1, buffer_[ 0], 14, k3);
+    Subround(I, c1, d1, e1, a1, b1, buffer_[ 8], 15, k3);
+    Subround(I, b1, c1, d1, e1, a1, buffer_[12],  9, k3);
+    Subround(I, a1, b1, c1, d1, e1, buffer_[ 4],  8, k3);
+    Subround(I, e1, a1, b1, c1, d1, buffer_[13],  9, k3);
+    Subround(I, d1, e1, a1, b1, c1, buffer_[ 3], 14, k3);
+    Subround(I, c1, d1, e1, a1, b1, buffer_[ 7],  5, k3);
+    Subround(I, b1, c1, d1, e1, a1, buffer_[15],  6, k3);
+    Subround(I, a1, b1, c1, d1, e1, buffer_[14],  8, k3);
+    Subround(I, e1, a1, b1, c1, d1, buffer_[ 5],  6, k3);
+    Subround(I, d1, e1, a1, b1, c1, buffer_[ 6],  5, k3);
+    Subround(I, c1, d1, e1, a1, b1, buffer_[ 2], 12, k3);
+
+    Subround(J, b1, c1, d1, e1, a1, buffer_[ 4],  9, k4);
+    Subround(J, a1, b1, c1, d1, e1, buffer_[ 0], 15, k4);
+    Subround(J, e1, a1, b1, c1, d1, buffer_[ 5],  5, k4);
+    Subround(J, d1, e1, a1, b1, c1, buffer_[ 9], 11, k4);
+    Subround(J, c1, d1, e1, a1, b1, buffer_[ 7],  6, k4);
+    Subround(J, b1, c1, d1, e1, a1, buffer_[12],  8, k4);
+    Subround(J, a1, b1, c1, d1, e1, buffer_[ 2], 13, k4);
+    Subround(J, e1, a1, b1, c1, d1, buffer_[10], 12, k4);
+    Subround(J, d1, e1, a1, b1, c1, buffer_[14],  5, k4);
+    Subround(J, c1, d1, e1, a1, b1, buffer_[ 1], 12, k4);
+    Subround(J, b1, c1, d1, e1, a1, buffer_[ 3], 13, k4);
+    Subround(J, a1, b1, c1, d1, e1, buffer_[ 8], 14, k4);
+    Subround(J, e1, a1, b1, c1, d1, buffer_[11], 11, k4);
+    Subround(J, d1, e1, a1, b1, c1, buffer_[ 6],  8, k4);
+    Subround(J, c1, d1, e1, a1, b1, buffer_[15],  5, k4);
+    Subround(J, b1, c1, d1, e1, a1, buffer_[13],  6, k4);
+
+    Subround(J, a2, b2, c2, d2, e2, buffer_[ 5],  8, k5);
+    Subround(J, e2, a2, b2, c2, d2, buffer_[14],  9, k5);
+    Subround(J, d2, e2, a2, b2, c2, buffer_[ 7],  9, k5);
+    Subround(J, c2, d2, e2, a2, b2, buffer_[ 0], 11, k5);
+    Subround(J, b2, c2, d2, e2, a2, buffer_[ 9], 13, k5);
+    Subround(J, a2, b2, c2, d2, e2, buffer_[ 2], 15, k5);
+    Subround(J, e2, a2, b2, c2, d2, buffer_[11], 15, k5);
+    Subround(J, d2, e2, a2, b2, c2, buffer_[ 4],  5, k5);
+    Subround(J, c2, d2, e2, a2, b2, buffer_[13],  7, k5);
+    Subround(J, b2, c2, d2, e2, a2, buffer_[ 6],  7, k5);
+    Subround(J, a2, b2, c2, d2, e2, buffer_[15],  8, k5);
+    Subround(J, e2, a2, b2, c2, d2, buffer_[ 8], 11, k5);
+    Subround(J, d2, e2, a2, b2, c2, buffer_[ 1], 14, k5);
+    Subround(J, c2, d2, e2, a2, b2, buffer_[10], 14, k5);
+    Subround(J, b2, c2, d2, e2, a2, buffer_[ 3], 12, k5);
+    Subround(J, a2, b2, c2, d2, e2, buffer_[12],  6, k5);
+
+    Subround(I, e2, a2, b2, c2, d2, buffer_[ 6],  9, k6); 
+    Subround(I, d2, e2, a2, b2, c2, buffer_[11], 13, k6);
+    Subround(I, c2, d2, e2, a2, b2, buffer_[ 3], 15, k6);
+    Subround(I, b2, c2, d2, e2, a2, buffer_[ 7],  7, k6);
+    Subround(I, a2, b2, c2, d2, e2, buffer_[ 0], 12, k6);
+    Subround(I, e2, a2, b2, c2, d2, buffer_[13],  8, k6);
+    Subround(I, d2, e2, a2, b2, c2, buffer_[ 5],  9, k6);
+    Subround(I, c2, d2, e2, a2, b2, buffer_[10], 11, k6);
+    Subround(I, b2, c2, d2, e2, a2, buffer_[14],  7, k6);
+    Subround(I, a2, b2, c2, d2, e2, buffer_[15],  7, k6);
+    Subround(I, e2, a2, b2, c2, d2, buffer_[ 8], 12, k6);
+    Subround(I, d2, e2, a2, b2, c2, buffer_[12],  7, k6);
+    Subround(I, c2, d2, e2, a2, b2, buffer_[ 4],  6, k6);
+    Subround(I, b2, c2, d2, e2, a2, buffer_[ 9], 15, k6);
+    Subround(I, a2, b2, c2, d2, e2, buffer_[ 1], 13, k6);
+    Subround(I, e2, a2, b2, c2, d2, buffer_[ 2], 11, k6);
+
+    Subround(H, d2, e2, a2, b2, c2, buffer_[15],  9, k7);
+    Subround(H, c2, d2, e2, a2, b2, buffer_[ 5],  7, k7);
+    Subround(H, b2, c2, d2, e2, a2, buffer_[ 1], 15, k7);
+    Subround(H, a2, b2, c2, d2, e2, buffer_[ 3], 11, k7);
+    Subround(H, e2, a2, b2, c2, d2, buffer_[ 7],  8, k7);
+    Subround(H, d2, e2, a2, b2, c2, buffer_[14],  6, k7);
+    Subround(H, c2, d2, e2, a2, b2, buffer_[ 6],  6, k7);
+    Subround(H, b2, c2, d2, e2, a2, buffer_[ 9], 14, k7);
+    Subround(H, a2, b2, c2, d2, e2, buffer_[11], 12, k7);
+    Subround(H, e2, a2, b2, c2, d2, buffer_[ 8], 13, k7);
+    Subround(H, d2, e2, a2, b2, c2, buffer_[12],  5, k7);
+    Subround(H, c2, d2, e2, a2, b2, buffer_[ 2], 14, k7);
+    Subround(H, b2, c2, d2, e2, a2, buffer_[10], 13, k7);
+    Subround(H, a2, b2, c2, d2, e2, buffer_[ 0], 13, k7);
+    Subround(H, e2, a2, b2, c2, d2, buffer_[ 4],  7, k7);
+    Subround(H, d2, e2, a2, b2, c2, buffer_[13],  5, k7);
+
+    Subround(G, c2, d2, e2, a2, b2, buffer_[ 8], 15, k8);
+    Subround(G, b2, c2, d2, e2, a2, buffer_[ 6],  5, k8);
+    Subround(G, a2, b2, c2, d2, e2, buffer_[ 4],  8, k8);
+    Subround(G, e2, a2, b2, c2, d2, buffer_[ 1], 11, k8);
+    Subround(G, d2, e2, a2, b2, c2, buffer_[ 3], 14, k8);
+    Subround(G, c2, d2, e2, a2, b2, buffer_[11], 14, k8);
+    Subround(G, b2, c2, d2, e2, a2, buffer_[15],  6, k8);
+    Subround(G, a2, b2, c2, d2, e2, buffer_[ 0], 14, k8);
+    Subround(G, e2, a2, b2, c2, d2, buffer_[ 5],  6, k8);
+    Subround(G, d2, e2, a2, b2, c2, buffer_[12],  9, k8);
+    Subround(G, c2, d2, e2, a2, b2, buffer_[ 2], 12, k8);
+    Subround(G, b2, c2, d2, e2, a2, buffer_[13],  9, k8);
+    Subround(G, a2, b2, c2, d2, e2, buffer_[ 9], 12, k8);
+    Subround(G, e2, a2, b2, c2, d2, buffer_[ 7],  5, k8);
+    Subround(G, d2, e2, a2, b2, c2, buffer_[10], 15, k8);
+    Subround(G, c2, d2, e2, a2, b2, buffer_[14],  8, k8);
+
+    Subround(F, b2, c2, d2, e2, a2, buffer_[12],  8, k9);
+    Subround(F, a2, b2, c2, d2, e2, buffer_[15],  5, k9);
+    Subround(F, e2, a2, b2, c2, d2, buffer_[10], 12, k9);
+    Subround(F, d2, e2, a2, b2, c2, buffer_[ 4],  9, k9);
+    Subround(F, c2, d2, e2, a2, b2, buffer_[ 1], 12, k9);
+    Subround(F, b2, c2, d2, e2, a2, buffer_[ 5],  5, k9);
+    Subround(F, a2, b2, c2, d2, e2, buffer_[ 8], 14, k9);
+    Subround(F, e2, a2, b2, c2, d2, buffer_[ 7],  6, k9);
+    Subround(F, d2, e2, a2, b2, c2, buffer_[ 6],  8, k9);
+    Subround(F, c2, d2, e2, a2, b2, buffer_[ 2], 13, k9);
+    Subround(F, b2, c2, d2, e2, a2, buffer_[13],  6, k9);
+    Subround(F, a2, b2, c2, d2, e2, buffer_[14],  5, k9);
+    Subround(F, e2, a2, b2, c2, d2, buffer_[ 0], 15, k9);
+    Subround(F, d2, e2, a2, b2, c2, buffer_[ 3], 13, k9);
+    Subround(F, c2, d2, e2, a2, b2, buffer_[ 9], 11, k9);
+    Subround(F, b2, c2, d2, e2, a2, buffer_[11], 11, k9);
+
+    c1         = digest_[1] + c1 + d2;
+    digest_[1] = digest_[2] + d1 + e2;
+    digest_[2] = digest_[3] + e1 + a2;
+    digest_[3] = digest_[4] + a1 + b2;
+    digest_[4] = digest_[0] + b1 + c2;
+    digest_[0] = c1;
+}
+
+
+#ifdef DO_RIPEMD_ASM
+
+/*
+    // F(x ^ y ^ z)
+    // place in esi
+#define ASMF(x, y, z)  \
+    AS2(    mov   esi, x                )   \
+    AS2(    xor   esi, y                )   \
+    AS2(    xor   esi, z                )
+
+
+    // G(z ^ (x & (y^z)))
+    // place in esi
+#define ASMG(x, y, z)  \
+    AS2(    mov   esi, z                )   \
+    AS2(    xor   esi, y                )   \
+    AS2(    and   esi, x                )   \
+    AS2(    xor   esi, z                )
+
+    
+    // H(z ^ (x | ~y))
+    // place in esi
+#define ASMH(x, y, z) \
+    AS2(    mov   esi, y                )   \
+    AS1(    not   esi                   )   \
+    AS2(     or   esi, x                )   \
+    AS2(    xor   esi, z                )
+
+
+    // I(y ^ (z & (x^y)))
+    // place in esi
+#define ASMI(x, y, z)  \
+    AS2(    mov   esi, y                )   \
+    AS2(    xor   esi, x                )   \
+    AS2(    and   esi, z                )   \
+    AS2(    xor   esi, y                )
+
+
+    // J(x ^ (y | ~z)))
+    // place in esi
+#define ASMJ(x, y, z)   \
+    AS2(    mov   esi, z                )   \
+    AS1(    not   esi                   )   \
+    AS2(     or   esi, y                )   \
+    AS2(    xor   esi, x                )
+
+
+// for 160 and 320
+// #define ASMSubround(f, a, b, c, d, e, i, s, k) 
+//    a += f(b, c, d) + data[i] + k;
+//    a = rotlFixed((word32)a, s) + e;
+//    c = rotlFixed((word32)c, 10U)
+
+#define ASMSubround(f, a, b, c, d, e, index, s, k) \
+    // a += f(b, c, d) + data[i] + k                    \
+    AS2(    mov   esp, [edi + index * 4]            )   \
+    f(b, c, d)                                          \
+    AS2(    add   esi, k                            )   \
+    AS2(    add   esi, esp                          )   \
+    AS2(    add     a, esi                          )   \
+    // a = rotlFixed((word32)a, s) + e                  \
+    AS2(    rol     a, s                            )   \
+    AS2(    rol     c, 10                           )   \
+    // c = rotlFixed((word32)c, 10U)                    \
+    AS2(    add     a, e                            )
+*/
+
+
+// combine F into subround w/ setup
+// esi already has c, setup for next round when done
+// esp already has edi[index], setup for next round when done
+
+#define ASMSubroundF(a, b, c, d, e, index, s) \
+    /* a += (b ^ c ^ d) + data[i] + k  */               \
+    AS2(    xor   esi, b                            )   \
+    AS2(    add     a, [edi + index * 4]            )   \
+    AS2(    xor   esi, d                            )   \
+    AS2(    add     a, esi                          )   \
+    /* a = rotlFixed((word32)a, s) + e */               \
+    AS2(    mov   esi, b                            )   \
+    AS2(    rol     a, s                            )   \
+    /* c = rotlFixed((word32)c, 10U) */                 \
+    AS2(    rol     c, 10                           )   \
+    AS2(    add     a, e                            )
+
+
+// combine G into subround w/ setup
+// esi already has c, setup for next round when done
+// esp already has edi[index], setup for next round when done
+
+#define ASMSubroundG(a, b, c, d, e, index, s, k) \
+    /* a += (d ^ (b & (c^d))) + data[i] + k  */         \
+    AS2(    xor   esi, d                            )   \
+    AS2(    and   esi, b                            )   \
+    AS2(    add     a, [edi + index * 4]            )   \
+    AS2(    xor   esi, d                            )   \
+    AS2(    lea     a, [esi + a + k]                )   \
+    /* a = rotlFixed((word32)a, s) + e */               \
+    AS2(    mov   esi, b                            )   \
+    AS2(    rol     a, s                            )   \
+    /* c = rotlFixed((word32)c, 10U) */                 \
+    AS2(    rol     c, 10                           )   \
+    AS2(    add     a, e                            )
+
+
+// combine H into subround w/ setup
+// esi already has c, setup for next round when done
+// esp already has edi[index], setup for next round when done
+
+#define ASMSubroundH(a, b, c, d, e, index, s, k) \
+    /* a += (d ^ (b | ~c)) + data[i] + k  */            \
+    AS1(    not   esi                               )   \
+    AS2(     or   esi, b                            )   \
+    AS2(    add     a, [edi + index * 4]            )   \
+    AS2(    xor   esi, d                            )   \
+    AS2(    lea     a, [esi + a + k]                )   \
+    /* a = rotlFixed((word32)a, s) + e */               \
+    AS2(    mov   esi, b                            )   \
+    AS2(    rol     a, s                            )   \
+    /* c = rotlFixed((word32)c, 10U) */                 \
+    AS2(    rol     c, 10                           )   \
+    AS2(    add     a, e                            )
+
+
+// combine I into subround w/ setup
+// esi already has c, setup for next round when done
+// esp already has edi[index], setup for next round when done
+
+#define ASMSubroundI(a, b, c, d, e, index, s, k) \
+    /* a += (c ^ (d & (b^c))) + data[i] + k  */         \
+    AS2(    xor   esi, b                            )   \
+    AS2(    and   esi, d                            )   \
+    AS2(    add     a, [edi + index * 4]            )   \
+    AS2(    xor   esi, c                            )   \
+    AS2(    lea     a, [esi + a + k]                )   \
+    /* a = rotlFixed((word32)a, s) + e */               \
+    AS2(    mov   esi, b                            )   \
+    AS2(    rol     a, s                            )   \
+    /* c = rotlFixed((word32)c, 10U) */                 \
+    AS2(    rol     c, 10                           )   \
+    AS2(    add     a, e                            )
+
+
+// combine J into subround w/ setup
+// esi already has d, setup for next round when done
+// esp already has edi[index], setup for next round when done
+
+#define ASMSubroundJ(a, b, c, d, e, index, s, k) \
+    /* a += (b ^ (c | ~d))) + data[i] + k  */           \
+    AS1(    not   esi                               )   \
+    AS2(     or   esi, c                            )   \
+    /* c = rotlFixed((word32)c, 10U) */                 \
+    AS2(    add     a, [edi + index * 4]            )   \
+    AS2(    xor   esi, b                            )   \
+    AS2(    rol     c, 10                           )   \
+    AS2(    lea     a, [esi + a + k]                )   \
+    /* a = rotlFixed((word32)a, s) + e */               \
+    AS2(    rol     a, s                            )   \
+    AS2(    mov   esi, c                            )   \
+    AS2(    add     a, e                            )
+
+
+#ifdef _MSC_VER
+    __declspec(naked) 
+#else
+    __attribute__ ((noinline))
+#endif
+void RIPEMD160::AsmTransform(const byte* data, word32 times)
+{
+#ifdef __GNUC__
+    #define AS1(x)    #x ";"
+    #define AS2(x, y) #x ", " #y ";"
+
+    #define PROLOG()  \
+    __asm__ __volatile__ \
+    ( \
+        ".intel_syntax noprefix;" \
+        "push ebx;" \
+        "push ebp;"
+    #define EPILOG()  \
+        "pop ebp;" \
+        "pop ebx;" \
+       	"emms;" \
+       	".att_syntax;" \
+            : \
+            : "c" (this), "D" (data), "d" (times) \
+            : "%esi", "%eax", "memory", "cc" \
+    );
+
+#else
+    #define AS1(x)    __asm x
+    #define AS2(x, y) __asm x, y
+
+    #define PROLOG() \
+        AS1(    push  ebp                       )   \
+        AS2(    mov   ebp, esp                  )   \
+        AS2(    movd  mm3, edi                  )   \
+        AS2(    movd  mm4, ebx                  )   \
+        AS2(    movd  mm5, esi                  )   \
+        AS2(    movd  mm6, ebp                  )   \
+        AS2(    mov   edi, DWORD PTR [ebp +  8] )   \
+        AS2(    mov   edx, DWORD PTR [ebp + 12] )
+
+    #define EPILOG() \
+        AS2(    movd  ebp, mm6                  )   \
+        AS2(    movd  esi, mm5                  )   \
+        AS2(    movd  ebx, mm4                  )   \
+        AS2(    movd  edi, mm3                  )   \
+        AS2(    mov   esp, ebp                  )   \
+        AS1(    pop   ebp                       )   \
+        AS1(    emms                            )   \
+        AS1(    ret   8                         )
+        
+#endif
+
+    PROLOG()
+
+    #ifdef OLD_GCC_OFFSET
+        AS2(    lea   esi, [ecx + 20]               )   // digest_[0]
+    #else
+        AS2(    lea   esi, [ecx + 16]               )   // digest_[0]
+    #endif
+
+    AS2(    sub   esp, 24               )   // make room for tmp a1 - e1
+    AS2(    movd  mm1, esi              )   // store digest_
+    
+#ifdef _MSC_VER
+    AS1( loopStart: )  // loopStart
+#else
+    AS1( 0: )          // loopStart for some gas (need numeric for jump back 
+#endif
+
+    AS2(    movd  mm2, edx              )   // store times_
+
+    AS2(    mov   eax, [esi]            )   // a1
+    AS2(    mov   ebx, [esi +  4]       )   // b1
+    AS2(    mov   ecx, [esi +  8]       )   // c1
+    AS2(    mov   edx, [esi + 12]       )   // d1
+    AS2(    mov   ebp, [esi + 16]       )   // e1
+
+    // setup 
+    AS2(    mov   esi, ecx      )
+
+    ASMSubroundF( eax, ebx, ecx, edx, ebp,  0, 11)
+    ASMSubroundF( ebp, eax, ebx, ecx, edx,  1, 14)
+    ASMSubroundF( edx, ebp, eax, ebx, ecx,  2, 15)
+    ASMSubroundF( ecx, edx, ebp, eax, ebx,  3, 12)
+    ASMSubroundF( ebx, ecx, edx, ebp, eax,  4,  5)
+    ASMSubroundF( eax, ebx, ecx, edx, ebp,  5,  8)
+    ASMSubroundF( ebp, eax, ebx, ecx, edx,  6,  7)
+    ASMSubroundF( edx, ebp, eax, ebx, ecx,  7,  9)
+    ASMSubroundF( ecx, edx, ebp, eax, ebx,  8, 11)
+    ASMSubroundF( ebx, ecx, edx, ebp, eax,  9, 13)
+    ASMSubroundF( eax, ebx, ecx, edx, ebp, 10, 14)
+    ASMSubroundF( ebp, eax, ebx, ecx, edx, 11, 15)
+    ASMSubroundF( edx, ebp, eax, ebx, ecx, 12,  6)
+    ASMSubroundF( ecx, edx, ebp, eax, ebx, 13,  7)
+    ASMSubroundF( ebx, ecx, edx, ebp, eax, 14,  9)
+    ASMSubroundF( eax, ebx, ecx, edx, ebp, 15,  8)
+
+    ASMSubroundG( ebp, eax, ebx, ecx, edx,  7,  7, k1)
+    ASMSubroundG( edx, ebp, eax, ebx, ecx,  4,  6, k1)
+    ASMSubroundG( ecx, edx, ebp, eax, ebx, 13,  8, k1)
+    ASMSubroundG( ebx, ecx, edx, ebp, eax,  1, 13, k1)
+    ASMSubroundG( eax, ebx, ecx, edx, ebp, 10, 11, k1)
+    ASMSubroundG( ebp, eax, ebx, ecx, edx,  6,  9, k1)
+    ASMSubroundG( edx, ebp, eax, ebx, ecx, 15,  7, k1)
+    ASMSubroundG( ecx, edx, ebp, eax, ebx,  3, 15, k1)
+    ASMSubroundG( ebx, ecx, edx, ebp, eax, 12,  7, k1)
+    ASMSubroundG( eax, ebx, ecx, edx, ebp,  0, 12, k1)
+    ASMSubroundG( ebp, eax, ebx, ecx, edx,  9, 15, k1)
+    ASMSubroundG( edx, ebp, eax, ebx, ecx,  5,  9, k1)
+    ASMSubroundG( ecx, edx, ebp, eax, ebx,  2, 11, k1)
+    ASMSubroundG( ebx, ecx, edx, ebp, eax, 14,  7, k1)
+    ASMSubroundG( eax, ebx, ecx, edx, ebp, 11, 13, k1)
+    ASMSubroundG( ebp, eax, ebx, ecx, edx,  8, 12, k1)
+
+    ASMSubroundH( edx, ebp, eax, ebx, ecx,  3, 11, k2)
+    ASMSubroundH( ecx, edx, ebp, eax, ebx, 10, 13, k2)
+    ASMSubroundH( ebx, ecx, edx, ebp, eax, 14,  6, k2)
+    ASMSubroundH( eax, ebx, ecx, edx, ebp,  4,  7, k2)
+    ASMSubroundH( ebp, eax, ebx, ecx, edx,  9, 14, k2)
+    ASMSubroundH( edx, ebp, eax, ebx, ecx, 15,  9, k2)
+    ASMSubroundH( ecx, edx, ebp, eax, ebx,  8, 13, k2)
+    ASMSubroundH( ebx, ecx, edx, ebp, eax,  1, 15, k2)
+    ASMSubroundH( eax, ebx, ecx, edx, ebp,  2, 14, k2)
+    ASMSubroundH( ebp, eax, ebx, ecx, edx,  7,  8, k2)
+    ASMSubroundH( edx, ebp, eax, ebx, ecx,  0, 13, k2)
+    ASMSubroundH( ecx, edx, ebp, eax, ebx,  6,  6, k2)
+    ASMSubroundH( ebx, ecx, edx, ebp, eax, 13,  5, k2)
+    ASMSubroundH( eax, ebx, ecx, edx, ebp, 11, 12, k2)
+    ASMSubroundH( ebp, eax, ebx, ecx, edx,  5,  7, k2)
+    ASMSubroundH( edx, ebp, eax, ebx, ecx, 12,  5, k2)
+
+    ASMSubroundI( ecx, edx, ebp, eax, ebx,  1, 11, k3)
+    ASMSubroundI( ebx, ecx, edx, ebp, eax,  9, 12, k3)
+    ASMSubroundI( eax, ebx, ecx, edx, ebp, 11, 14, k3)
+    ASMSubroundI( ebp, eax, ebx, ecx, edx, 10, 15, k3)
+    ASMSubroundI( edx, ebp, eax, ebx, ecx,  0, 14, k3)
+    ASMSubroundI( ecx, edx, ebp, eax, ebx,  8, 15, k3)
+    ASMSubroundI( ebx, ecx, edx, ebp, eax, 12,  9, k3)
+    ASMSubroundI( eax, ebx, ecx, edx, ebp,  4,  8, k3)
+    ASMSubroundI( ebp, eax, ebx, ecx, edx, 13,  9, k3)
+    ASMSubroundI( edx, ebp, eax, ebx, ecx,  3, 14, k3)
+    ASMSubroundI( ecx, edx, ebp, eax, ebx,  7,  5, k3)
+    ASMSubroundI( ebx, ecx, edx, ebp, eax, 15,  6, k3)
+    ASMSubroundI( eax, ebx, ecx, edx, ebp, 14,  8, k3)
+    ASMSubroundI( ebp, eax, ebx, ecx, edx,  5,  6, k3)
+    ASMSubroundI( edx, ebp, eax, ebx, ecx,  6,  5, k3)
+    ASMSubroundI( ecx, edx, ebp, eax, ebx,  2, 12, k3)
+
+    // setup
+    AS2(    mov   esi, ebp      )
+
+    ASMSubroundJ( ebx, ecx, edx, ebp, eax,  4,  9, k4)
+    ASMSubroundJ( eax, ebx, ecx, edx, ebp,  0, 15, k4)
+    ASMSubroundJ( ebp, eax, ebx, ecx, edx,  5,  5, k4)
+    ASMSubroundJ( edx, ebp, eax, ebx, ecx,  9, 11, k4)
+    ASMSubroundJ( ecx, edx, ebp, eax, ebx,  7,  6, k4)
+    ASMSubroundJ( ebx, ecx, edx, ebp, eax, 12,  8, k4)
+    ASMSubroundJ( eax, ebx, ecx, edx, ebp,  2, 13, k4)
+    ASMSubroundJ( ebp, eax, ebx, ecx, edx, 10, 12, k4)
+    ASMSubroundJ( edx, ebp, eax, ebx, ecx, 14,  5, k4)
+    ASMSubroundJ( ecx, edx, ebp, eax, ebx,  1, 12, k4)
+    ASMSubroundJ( ebx, ecx, edx, ebp, eax,  3, 13, k4)
+    ASMSubroundJ( eax, ebx, ecx, edx, ebp,  8, 14, k4)
+    ASMSubroundJ( ebp, eax, ebx, ecx, edx, 11, 11, k4)
+    ASMSubroundJ( edx, ebp, eax, ebx, ecx,  6,  8, k4)
+    ASMSubroundJ( ecx, edx, ebp, eax, ebx, 15,  5, k4)
+    ASMSubroundJ( ebx, ecx, edx, ebp, eax, 13,  6, k4)
+
+    // store a1 - e1 on stack
+    AS2(    movd  esi, mm1              )   // digest_
+
+    AS2(    mov   [esp],      eax       )
+    AS2(    mov   [esp +  4], ebx       )
+    AS2(    mov   [esp +  8], ecx       )
+    AS2(    mov   [esp + 12], edx       )
+    AS2(    mov   [esp + 16], ebp       )
+
+    AS2(    mov   eax, [esi]            )   // a2
+    AS2(    mov   ebx, [esi +  4]       )   // b2
+    AS2(    mov   ecx, [esi +  8]       )   // c2
+    AS2(    mov   edx, [esi + 12]       )   // d2
+    AS2(    mov   ebp, [esi + 16]       )   // e2
+
+
+    // setup
+    AS2(    mov   esi, edx      )
+
+    ASMSubroundJ( eax, ebx, ecx, edx, ebp,  5,  8, k5)
+    ASMSubroundJ( ebp, eax, ebx, ecx, edx, 14,  9, k5)
+    ASMSubroundJ( edx, ebp, eax, ebx, ecx,  7,  9, k5)
+    ASMSubroundJ( ecx, edx, ebp, eax, ebx,  0, 11, k5)
+    ASMSubroundJ( ebx, ecx, edx, ebp, eax,  9, 13, k5)
+    ASMSubroundJ( eax, ebx, ecx, edx, ebp,  2, 15, k5)
+    ASMSubroundJ( ebp, eax, ebx, ecx, edx, 11, 15, k5)
+    ASMSubroundJ( edx, ebp, eax, ebx, ecx,  4,  5, k5)
+    ASMSubroundJ( ecx, edx, ebp, eax, ebx, 13,  7, k5)
+    ASMSubroundJ( ebx, ecx, edx, ebp, eax,  6,  7, k5)
+    ASMSubroundJ( eax, ebx, ecx, edx, ebp, 15,  8, k5)
+    ASMSubroundJ( ebp, eax, ebx, ecx, edx,  8, 11, k5)
+    ASMSubroundJ( edx, ebp, eax, ebx, ecx,  1, 14, k5)
+    ASMSubroundJ( ecx, edx, ebp, eax, ebx, 10, 14, k5)
+    ASMSubroundJ( ebx, ecx, edx, ebp, eax,  3, 12, k5)
+    ASMSubroundJ( eax, ebx, ecx, edx, ebp, 12,  6, k5)
+
+    // setup
+    AS2(    mov   esi, ebx      )
+
+    ASMSubroundI( ebp, eax, ebx, ecx, edx,  6,  9, k6) 
+    ASMSubroundI( edx, ebp, eax, ebx, ecx, 11, 13, k6)
+    ASMSubroundI( ecx, edx, ebp, eax, ebx,  3, 15, k6)
+    ASMSubroundI( ebx, ecx, edx, ebp, eax,  7,  7, k6)
+    ASMSubroundI( eax, ebx, ecx, edx, ebp,  0, 12, k6)
+    ASMSubroundI( ebp, eax, ebx, ecx, edx, 13,  8, k6)
+    ASMSubroundI( edx, ebp, eax, ebx, ecx,  5,  9, k6)
+    ASMSubroundI( ecx, edx, ebp, eax, ebx, 10, 11, k6)
+    ASMSubroundI( ebx, ecx, edx, ebp, eax, 14,  7, k6)
+    ASMSubroundI( eax, ebx, ecx, edx, ebp, 15,  7, k6)
+    ASMSubroundI( ebp, eax, ebx, ecx, edx,  8, 12, k6)
+    ASMSubroundI( edx, ebp, eax, ebx, ecx, 12,  7, k6)
+    ASMSubroundI( ecx, edx, ebp, eax, ebx,  4,  6, k6)
+    ASMSubroundI( ebx, ecx, edx, ebp, eax,  9, 15, k6)
+    ASMSubroundI( eax, ebx, ecx, edx, ebp,  1, 13, k6)
+    ASMSubroundI( ebp, eax, ebx, ecx, edx,  2, 11, k6)
+
+    ASMSubroundH( edx, ebp, eax, ebx, ecx, 15,  9, k7)
+    ASMSubroundH( ecx, edx, ebp, eax, ebx,  5,  7, k7)
+    ASMSubroundH( ebx, ecx, edx, ebp, eax,  1, 15, k7)
+    ASMSubroundH( eax, ebx, ecx, edx, ebp,  3, 11, k7)
+    ASMSubroundH( ebp, eax, ebx, ecx, edx,  7,  8, k7)
+    ASMSubroundH( edx, ebp, eax, ebx, ecx, 14,  6, k7)
+    ASMSubroundH( ecx, edx, ebp, eax, ebx,  6,  6, k7)
+    ASMSubroundH( ebx, ecx, edx, ebp, eax,  9, 14, k7)
+    ASMSubroundH( eax, ebx, ecx, edx, ebp, 11, 12, k7)
+    ASMSubroundH( ebp, eax, ebx, ecx, edx,  8, 13, k7)
+    ASMSubroundH( edx, ebp, eax, ebx, ecx, 12,  5, k7)
+    ASMSubroundH( ecx, edx, ebp, eax, ebx,  2, 14, k7)
+    ASMSubroundH( ebx, ecx, edx, ebp, eax, 10, 13, k7)
+    ASMSubroundH( eax, ebx, ecx, edx, ebp,  0, 13, k7)
+    ASMSubroundH( ebp, eax, ebx, ecx, edx,  4,  7, k7)
+    ASMSubroundH( edx, ebp, eax, ebx, ecx, 13,  5, k7)
+
+    ASMSubroundG( ecx, edx, ebp, eax, ebx,  8, 15, k8)
+    ASMSubroundG( ebx, ecx, edx, ebp, eax,  6,  5, k8)
+    ASMSubroundG( eax, ebx, ecx, edx, ebp,  4,  8, k8)
+    ASMSubroundG( ebp, eax, ebx, ecx, edx,  1, 11, k8)
+    ASMSubroundG( edx, ebp, eax, ebx, ecx,  3, 14, k8)
+    ASMSubroundG( ecx, edx, ebp, eax, ebx, 11, 14, k8)
+    ASMSubroundG( ebx, ecx, edx, ebp, eax, 15,  6, k8)
+    ASMSubroundG( eax, ebx, ecx, edx, ebp,  0, 14, k8)
+    ASMSubroundG( ebp, eax, ebx, ecx, edx,  5,  6, k8)
+    ASMSubroundG( edx, ebp, eax, ebx, ecx, 12,  9, k8)
+    ASMSubroundG( ecx, edx, ebp, eax, ebx,  2, 12, k8)
+    ASMSubroundG( ebx, ecx, edx, ebp, eax, 13,  9, k8)
+    ASMSubroundG( eax, ebx, ecx, edx, ebp,  9, 12, k8)
+    ASMSubroundG( ebp, eax, ebx, ecx, edx,  7,  5, k8)
+    ASMSubroundG( edx, ebp, eax, ebx, ecx, 10, 15, k8)
+    ASMSubroundG( ecx, edx, ebp, eax, ebx, 14,  8, k8)
+
+    ASMSubroundF( ebx, ecx, edx, ebp, eax, 12,  8)
+    ASMSubroundF( eax, ebx, ecx, edx, ebp, 15,  5)
+    ASMSubroundF( ebp, eax, ebx, ecx, edx, 10, 12)
+    ASMSubroundF( edx, ebp, eax, ebx, ecx,  4,  9)
+    ASMSubroundF( ecx, edx, ebp, eax, ebx,  1, 12)
+    ASMSubroundF( ebx, ecx, edx, ebp, eax,  5,  5)
+    ASMSubroundF( eax, ebx, ecx, edx, ebp,  8, 14)
+    ASMSubroundF( ebp, eax, ebx, ecx, edx,  7,  6)
+    ASMSubroundF( edx, ebp, eax, ebx, ecx,  6,  8)
+    ASMSubroundF( ecx, edx, ebp, eax, ebx,  2, 13)
+    ASMSubroundF( ebx, ecx, edx, ebp, eax, 13,  6)
+    ASMSubroundF( eax, ebx, ecx, edx, ebp, 14,  5)
+    ASMSubroundF( ebp, eax, ebx, ecx, edx,  0, 15)
+    ASMSubroundF( edx, ebp, eax, ebx, ecx,  3, 13)
+    ASMSubroundF( ecx, edx, ebp, eax, ebx,  9, 11)
+    ASMSubroundF( ebx, ecx, edx, ebp, eax, 11, 11)
+
+    // advance data and store for next round
+    AS2(    add   edi, 64                       )
+    AS2(    movd  esi, mm1                      )   // digest_
+    AS2(    movd  mm0, edi                      )   // store
+
+    // now edi as tmp
+
+    // c1         = digest_[1] + c1 + d2;
+    AS2(    add   [esp +  8], edx               )   // + d2
+    AS2(    mov   edi, [esi + 4]                )   // digest_[1]
+    AS2(    add   [esp +  8], edi               )
+
+    // digest_[1] = digest_[2] + d1 + e2;
+    AS2(    mov   [esi + 4], ebp                )   // e2
+    AS2(    mov   edi, [esp + 12]               )   // d1
+    AS2(    add   edi, [esi + 8]                )   // digest_[2]
+    AS2(    add   [esi + 4], edi                )
+
+    // digest_[2] = digest_[3] + e1 + a2;
+    AS2(    mov   [esi + 8], eax                )   // a2
+    AS2(    mov   edi, [esp + 16]               )   // e1
+    AS2(    add   edi, [esi + 12]               )   // digest_[3]
+    AS2(    add   [esi + 8], edi                )
+
+    // digest_[3] = digest_[4] + a1 + b2;
+    AS2(    mov   [esi + 12], ebx               )   // b2
+    AS2(    mov   edi, [esp]                    )   // a1
+    AS2(    add   edi, [esi + 16]               )   // digest_[4]
+    AS2(    add   [esi + 12], edi               )
+
+    // digest_[4] = digest_[0] + b1 + c2;
+    AS2(    mov   [esi + 16], ecx               )   // c2
+    AS2(    mov   edi, [esp +  4]               )   // b1
+    AS2(    add   edi, [esi]                    )   // digest_[0]
+    AS2(    add   [esi + 16], edi               )
+
+    // digest_[0] = c1;
+    AS2(    mov   edi, [esp +  8]               )   // c1
+    AS2(    mov   [esi], edi                    )
+
+    // setup for loop back
+    AS2(    movd  edx, mm2              )   // times
+    AS2(    movd  edi, mm0              )   // data, already advanced
+    AS1(    dec   edx                   )
+#ifdef _MSC_VER
+    AS1(    jnz   loopStart )  // loopStart
+#else
+    AS1(    jnz   0b )         // loopStart
+#endif
+
+    // inline adjust 
+    AS2(    add   esp, 24               )   // fix room on stack
+
+    EPILOG()
+}
+
+
+#endif // DO_RIPEMD_ASM
+
+
+} // namespace TaoCrypt
diff --git a/mysql/extra/yassl/taocrypt/src/rsa.cpp b/mysql/extra/yassl/taocrypt/src/rsa.cpp
new file mode 100644
index 0000000..73f678e
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/rsa.cpp
@@ -0,0 +1,215 @@
+/*
+   Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+/* based on Wei Dai's rsa.cpp from CryptoPP */
+
+#include "runtime.hpp"
+#include "rsa.hpp"
+#include "asn.hpp"
+#include "modarith.hpp"
+
+
+
+namespace TaoCrypt {
+
+
+Integer RSA_PublicKey::ApplyFunction(const Integer& x) const
+{
+    return a_exp_b_mod_c(x, e_, n_);
+}
+
+
+RSA_PublicKey::RSA_PublicKey(Source& source)
+{
+    Initialize(source);
+}
+
+
+void RSA_PublicKey::Initialize(Source& source)
+{
+    RSA_Public_Decoder decoder(source);
+    decoder.Decode(*this);
+}
+
+
+Integer RSA_PrivateKey::CalculateInverse(RandomNumberGenerator& rng,
+                                         const Integer& x) const
+{
+    ModularArithmetic modn(n_);
+
+    Integer r(rng, Integer::One(), n_ - Integer::One());
+    Integer re = modn.Exponentiate(r, e_);
+    re = modn.Multiply(re, x);			// blind
+
+    // here we follow the notation of PKCS #1 and let u=q inverse mod p
+    // but in ModRoot, u=p inverse mod q, so we reverse the order of p and q
+
+    Integer y = ModularRoot(re, dq_, dp_, q_, p_, u_);
+    y = modn.Divide(y, r);				    // unblind
+       
+    return y;
+}
+
+
+RSA_PrivateKey::RSA_PrivateKey(Source& source)
+{
+    Initialize(source);
+}
+
+
+void RSA_PrivateKey::Initialize(Source& source)
+{
+    RSA_Private_Decoder decoder(source);
+    decoder.Decode(*this);
+}
+
+
+void RSA_BlockType2::Pad(const byte *input, word32 inputLen, byte *pkcsBlock,
+                         word32 pkcsBlockLen, RandomNumberGenerator& rng) const
+{
+    // convert from bit length to byte length
+    if (pkcsBlockLen % 8 != 0)
+    {
+        pkcsBlock[0] = 0;
+        pkcsBlock++;
+    }
+    pkcsBlockLen /= 8;
+
+    pkcsBlock[0] = 2;  // block type 2
+
+    // pad with non-zero random bytes
+    word32 padLen = pkcsBlockLen - inputLen - 1;
+    rng.GenerateBlock(&pkcsBlock[1], padLen);
+    for (word32 i = 1; i < padLen; i++)
+        if (pkcsBlock[i] == 0) pkcsBlock[i] = 0x01;
+    
+    pkcsBlock[pkcsBlockLen-inputLen-1] = 0;     // separator
+    memcpy(pkcsBlock+pkcsBlockLen-inputLen, input, inputLen);
+}
+
+word32 RSA_BlockType2::UnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen,
+                           byte *output) const
+{
+    bool invalid = false;
+    unsigned int maxOutputLen = SaturatingSubtract(pkcsBlockLen / 8, 10U);
+
+    // convert from bit length to byte length
+    if (pkcsBlockLen % 8 != 0)
+    {
+        invalid = (pkcsBlock[0] != 0) || invalid;
+        pkcsBlock++;
+    }
+    pkcsBlockLen /= 8;
+
+    // Require block type 2.
+    invalid = (pkcsBlock[0] != 2) || invalid;
+
+    // skip past the padding until we find the separator
+    unsigned i=1;
+    while (i<pkcsBlockLen && pkcsBlock[i++]) { // null body
+        }
+    if (!(i==pkcsBlockLen || pkcsBlock[i-1]==0))
+        return 0;
+
+    unsigned int outputLen = pkcsBlockLen - i;
+    invalid = (outputLen > maxOutputLen) || invalid;
+
+    if (invalid)
+        return 0;
+
+    memcpy (output, pkcsBlock+i, outputLen);
+    return outputLen;
+}
+
+
+void RSA_BlockType1::Pad(const byte* input, word32 inputLen, byte* pkcsBlock,
+                         word32 pkcsBlockLen, RandomNumberGenerator&) const
+{
+    // sanity checks
+    if (input == NULL || pkcsBlock == NULL)
+        return;
+
+    // convert from bit length to byte length
+    if (pkcsBlockLen % 8 != 0)
+    {
+        pkcsBlock[0] = 0;
+        pkcsBlock++;
+    }
+    pkcsBlockLen /= 8;
+
+    pkcsBlock[0] = 1;  // block type 1 for SSL
+
+    // pad with 0xff bytes
+    memset(&pkcsBlock[1], 0xFF, pkcsBlockLen - inputLen - 2);
+
+    pkcsBlock[pkcsBlockLen-inputLen-1] = 0;     // separator
+    memcpy(pkcsBlock+pkcsBlockLen-inputLen, input, inputLen);
+}
+
+
+word32 RSA_BlockType1::UnPad(const byte* pkcsBlock, word32 pkcsBlockLen,
+                             byte* output) const
+{
+    bool invalid = false;
+    unsigned int maxOutputLen = SaturatingSubtract(pkcsBlockLen / 8, 10U);
+
+    // convert from bit length to byte length
+    if (pkcsBlockLen % 8 != 0)
+    {
+        invalid = (pkcsBlock[0] != 0) || invalid;
+        pkcsBlock++;
+    }
+    pkcsBlockLen /= 8;
+
+    // Require block type 1 for SSL.
+    invalid = (pkcsBlock[0] != 1) || invalid;
+
+    // skip past the padding until we find the separator
+    unsigned i=1;
+    while (i<pkcsBlockLen && pkcsBlock[i++] == 0xFF) { // null body
+        }
+    if (!(i==pkcsBlockLen || pkcsBlock[i-1]==0))
+        return 0;
+
+    unsigned int outputLen = pkcsBlockLen - i;
+    invalid = (outputLen > maxOutputLen) || invalid;
+
+    if (invalid)
+        return 0;
+
+    memcpy(output, pkcsBlock+i, outputLen);
+    return outputLen;
+}
+
+
+word32 SSL_Decrypt(const RSA_PublicKey& key, const byte* sig, byte* plain)
+{
+    PK_Lengths lengths(key.GetModulus());
+   
+    ByteBlock paddedBlock(BitsToBytes(lengths.PaddedBlockBitLength()));
+    Integer x = key.ApplyFunction(Integer(sig,
+                                          lengths.FixedCiphertextLength()));
+    if (x.ByteCount() > paddedBlock.size())
+        x = Integer::Zero();	
+    x.Encode(paddedBlock.get_buffer(), paddedBlock.size());
+    return RSA_BlockType1().UnPad(paddedBlock.get_buffer(),
+                                  lengths.PaddedBlockBitLength(), plain);
+}
+
+
+} // namespace
diff --git a/mysql/extra/yassl/taocrypt/src/sha.cpp b/mysql/extra/yassl/taocrypt/src/sha.cpp
new file mode 100644
index 0000000..4206f7f
--- /dev/null
+++ b/mysql/extra/yassl/taocrypt/src/sha.cpp
@@ -0,0 +1,1033 @@
+/*
+   Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING. If not, write to the
+   Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA  02110-1301  USA.
+*/
+
+/* based on Wei Dai's sha.cpp from CryptoPP */
+
+#include "runtime.hpp"
+#include <string.h>
+#include "sha.hpp"
+#ifdef USE_SYS_STL
+    #include <algorithm>
+#else
+    #include "algorithm.hpp"
+#endif
+
+
+namespace STL = STL_NAMESPACE;
+
+
+
+namespace TaoCrypt {
+
+#define blk0(i) (W[i] = buffer_[i])
+#define blk1(i) (W[i&15] = \
+                 rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1))
+
+#define f1(x,y,z) (z^(x &(y^z)))
+#define f2(x,y,z) (x^y^z)
+#define f3(x,y,z) ((x&y)|(z&(x|y)))
+#define f4(x,y,z) (x^y^z)
+
+// (R0+R1), R2, R3, R4 are the different operations used in SHA1
+#define R0(v,w,x,y,z,i) z+= f1(w,x,y) + blk0(i) + 0x5A827999+ \
+                        rotlFixed(v,5); w = rotlFixed(w,30);
+#define R1(v,w,x,y,z,i) z+= f1(w,x,y) + blk1(i) + 0x5A827999+ \
+                        rotlFixed(v,5); w = rotlFixed(w,30);
+#define R2(v,w,x,y,z,i) z+= f2(w,x,y) + blk1(i) + 0x6ED9EBA1+ \
+                        rotlFixed(v,5); w = rotlFixed(w,30);
+#define R3(v,w,x,y,z,i) z+= f3(w,x,y) + blk1(i) + 0x8F1BBCDC+ \
+                        rotlFixed(v,5); w = rotlFixed(w,30);
+#define R4(v,w,x,y,z,i) z+= f4(w,x,y) + blk1(i) + 0xCA62C1D6+ \
+                        rotlFixed(v,5); w = rotlFixed(w,30);
+
+
+void SHA::Init()
+{
+    digest_[0] = 0x67452301L;
+    digest_[1] = 0xEFCDAB89L;
+    digest_[2] = 0x98BADCFEL;
+    digest_[3] = 0x10325476L;
+    digest_[4] = 0xC3D2E1F0L;
+
+    buffLen_ = 0;
+    loLen_  = 0;
+    hiLen_  = 0;
+}
+
+void SHA256::Init()
+{
+    digest_[0] = 0x6A09E667L;
+    digest_[1] = 0xBB67AE85L;
+    digest_[2] = 0x3C6EF372L;
+    digest_[3] = 0xA54FF53AL;
+    digest_[4] = 0x510E527FL;
+    digest_[5] = 0x9B05688CL;
+    digest_[6] = 0x1F83D9ABL;
+    digest_[7] = 0x5BE0CD19L;
+
+    buffLen_ = 0;
+    loLen_  = 0;
+    hiLen_  = 0;
+}
+
+
+void SHA224::Init()
+{
+    digest_[0] = 0xc1059ed8;
+    digest_[1] = 0x367cd507;
+    digest_[2] = 0x3070dd17;
+    digest_[3] = 0xf70e5939;
+    digest_[4] = 0xffc00b31;
+    digest_[5] = 0x68581511;
+    digest_[6] = 0x64f98fa7;
+    digest_[7] = 0xbefa4fa4;
+
+    buffLen_ = 0;
+    loLen_  = 0;
+    hiLen_  = 0;
+}
+
+
+#ifdef WORD64_AVAILABLE
+
+void SHA512::Init()
+{
+    digest_[0] = W64LIT(0x6a09e667f3bcc908);
+    digest_[1] = W64LIT(0xbb67ae8584caa73b);
+    digest_[2] = W64LIT(0x3c6ef372fe94f82b);
+    digest_[3] = W64LIT(0xa54ff53a5f1d36f1);
+    digest_[4] = W64LIT(0x510e527fade682d1);
+    digest_[5] = W64LIT(0x9b05688c2b3e6c1f);
+    digest_[6] = W64LIT(0x1f83d9abfb41bd6b);
+    digest_[7] = W64LIT(0x5be0cd19137e2179);
+
+    buffLen_ = 0;
+    loLen_  = 0;
+    hiLen_  = 0;
+}
+
+
+void SHA384::Init()
+{
+    digest_[0] = W64LIT(0xcbbb9d5dc1059ed8);
+    digest_[1] = W64LIT(0x629a292a367cd507);
+    digest_[2] = W64LIT(0x9159015a3070dd17);
+    digest_[3] = W64LIT(0x152fecd8f70e5939);
+    digest_[4] = W64LIT(0x67332667ffc00b31);
+    digest_[5] = W64LIT(0x8eb44a8768581511);
+    digest_[6] = W64LIT(0xdb0c2e0d64f98fa7);
+    digest_[7] = W64LIT(0x47b5481dbefa4fa4);
+
+    buffLen_ = 0;
+    loLen_  = 0;
+    hiLen_  = 0;
+}
+
+#endif // WORD64_AVAILABLE
+
+
+SHA::SHA(const SHA& that) : HASHwithTransform(DIGEST_SIZE / sizeof(word32),
+                                              BLOCK_SIZE) 
+{ 
+    buffLen_ = that.buffLen_;
+    loLen_   = that.loLen_;
+    hiLen_   = that.hiLen_;
+
+    memcpy(digest_, that.digest_, DIGEST_SIZE);
+    memcpy(buffer_, that.buffer_, BLOCK_SIZE);
+}
+
+
+SHA256::SHA256(const SHA256& that) : HASHwithTransform(DIGEST_SIZE /
+                                       sizeof(word32), BLOCK_SIZE) 
+{ 
+    buffLen_ = that.buffLen_;
+    loLen_   = that.loLen_;
+    hiLen_   = that.hiLen_;
+
+    memcpy(digest_, that.digest_, DIGEST_SIZE);
+    memcpy(buffer_, that.buffer_, BLOCK_SIZE);
+}
+
+
+SHA224::SHA224(const SHA224& that) : HASHwithTransform(SHA256::DIGEST_SIZE /
+                                       sizeof(word32), BLOCK_SIZE) 
+{ 
+    buffLen_ = that.buffLen_;
+    loLen_   = that.loLen_;
+    hiLen_   = that.hiLen_;
+
+    memcpy(digest_, that.digest_, DIGEST_SIZE);
+    memcpy(buffer_, that.buffer_, BLOCK_SIZE);
+}
+
+
+#ifdef WORD64_AVAILABLE 
+
+SHA512::SHA512(const SHA512& that) : HASH64withTransform(DIGEST_SIZE /
+                                       sizeof(word64), BLOCK_SIZE) 
+{ 
+    buffLen_ = that.buffLen_;
+    loLen_   = that.loLen_;
+    hiLen_   = that.hiLen_;
+
+    memcpy(digest_, that.digest_, DIGEST_SIZE);
+    memcpy(buffer_, that.buffer_, BLOCK_SIZE);
+}
+
+
+SHA384::SHA384(const SHA384& that) : HASH64withTransform(SHA512::DIGEST_SIZE /
+                                       sizeof(word64), BLOCK_SIZE) 
+{ 
+    buffLen_ = that.buffLen_;
+    loLen_   = that.loLen_;
+    hiLen_   = that.hiLen_;
+
+    memcpy(digest_, that.digest_, DIGEST_SIZE);
+    memcpy(buffer_, that.buffer_, BLOCK_SIZE);
+}
+
+#endif // WORD64_AVAILABLE
+
+
+SHA& SHA::operator= (const SHA& that)
+{
+    SHA tmp(that);
+    Swap(tmp);
+
+    return *this;
+}
+
+
+SHA256& SHA256::operator= (const SHA256& that)
+{
+    SHA256 tmp(that);
+    Swap(tmp);
+
+    return *this;
+}
+
+
+SHA224& SHA224::operator= (const SHA224& that)
+{
+    SHA224 tmp(that);
+    Swap(tmp);
+
+    return *this;
+}
+
+
+#ifdef WORD64_AVAILABLE
+
+SHA512& SHA512::operator= (const SHA512& that)
+{
+    SHA512 tmp(that);
+    Swap(tmp);
+
+    return *this;
+}
+
+
+SHA384& SHA384::operator= (const SHA384& that)
+{
+    SHA384 tmp(that);
+    Swap(tmp);
+
+    return *this;
+}
+
+#endif // WORD64_AVAILABLE
+
+
+void SHA::Swap(SHA& other)
+{
+    STL::swap(loLen_,   other.loLen_);
+    STL::swap(hiLen_,   other.hiLen_);
+    STL::swap(buffLen_, other.buffLen_);
+
+    memcpy(digest_, other.digest_, DIGEST_SIZE);
+    memcpy(buffer_, other.buffer_, BLOCK_SIZE);
+}
+
+
+void SHA256::Swap(SHA256& other)
+{
+    STL::swap(loLen_,   other.loLen_);
+    STL::swap(hiLen_,   other.hiLen_);
+    STL::swap(buffLen_, other.buffLen_);
+
+    memcpy(digest_, other.digest_, DIGEST_SIZE);
+    memcpy(buffer_, other.buffer_, BLOCK_SIZE);
+}
+
+
+void SHA224::Swap(SHA224& other)
+{
+    STL::swap(loLen_,   other.loLen_);
+    STL::swap(hiLen_,   other.hiLen_);
+    STL::swap(buffLen_, other.buffLen_);
+
+    memcpy(digest_, other.digest_, DIGEST_SIZE);
+    memcpy(buffer_, other.buffer_, BLOCK_SIZE);
+}
+
+
+#ifdef WORD64_AVAILABLE
+
+void SHA512::Swap(SHA512& other)
+{
+    STL::swap(loLen_,   other.loLen_);
+    STL::swap(hiLen_,   other.hiLen_);
+    STL::swap(buffLen_, other.buffLen_);
+
+    memcpy(digest_, other.digest_, DIGEST_SIZE);
+    memcpy(buffer_, other.buffer_, BLOCK_SIZE);
+}
+
+
+void SHA384::Swap(SHA384& other)
+{
+    STL::swap(loLen_,   other.loLen_);
+    STL::swap(hiLen_,   other.hiLen_);
+    STL::swap(buffLen_, other.buffLen_);
+
+    memcpy(digest_, other.digest_, DIGEST_SIZE);
+    memcpy(buffer_, other.buffer_, BLOCK_SIZE);
+}
+
+#endif // WORD64_AVIALABLE
+
+
+#ifdef DO_SHA_ASM
+
+// Update digest with data of size len
+void SHA::Update(const byte* data, word32 len)
+{
+    if (!isMMX) {
+        HASHwithTransform::Update(data, len);
+        return;
+    }
+
+    byte* local = reinterpret_cast<byte*>(buffer_);
+
+    // remove buffered data if possible
+    if (buffLen_)  {   
+        word32 add = min(len, BLOCK_SIZE - buffLen_);
+        memcpy(&local[buffLen_], data, add);
+
+        buffLen_ += add;
+        data     += add;
+        len      -= add;
+
+        if (buffLen_ == BLOCK_SIZE) {
+            ByteReverse(local, local, BLOCK_SIZE);
+            Transform();
+            AddLength(BLOCK_SIZE);
+            buffLen_ = 0;
+        }
+    }
+
+    // all at once for asm
+    if (buffLen_ == 0) {
+        word32 times = len / BLOCK_SIZE;
+        if (times) {
+            AsmTransform(data, times);
+            const word32 add = BLOCK_SIZE * times;
+            AddLength(add);
+            len  -= add;
+            data += add;
+        }
+    }
+
+    // cache any data left
+    if (len) {
+        memcpy(&local[buffLen_], data, len);
+        buffLen_ += len;
+    }
+}
+
+#endif // DO_SHA_ASM
+
+
+void SHA::Transform()
+{
+    word32 W[BLOCK_SIZE / sizeof(word32)];
+
+    // Copy context->state[] to working vars 
+    word32 a = digest_[0];
+    word32 b = digest_[1];
+    word32 c = digest_[2];
+    word32 d = digest_[3];
+    word32 e = digest_[4];
+
+    // 4 rounds of 20 operations each. Loop unrolled. 
+    R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
+    R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
+    R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
+    R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
+
+    R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
+
+    R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
+    R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
+    R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
+    R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
+    R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
+
+    R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
+    R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
+    R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
+    R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
+    R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
+
+    R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
+    R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
+    R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
+    R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
+    R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
+
+    // Add the working vars back into digest state[]
+    digest_[0] += a;
+    digest_[1] += b;
+    digest_[2] += c;
+    digest_[3] += d;
+    digest_[4] += e;
+
+    // Wipe variables
+    a = b = c = d = e = 0;
+    memset(W, 0, sizeof(W));
+}
+
+
+#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
+
+#define Ch(x,y,z) (z^(x&(y^z)))
+#define Maj(x,y,z) ((x&y)|(z&(x|y)))
+
+#define a(i) T[(0-i)&7]
+#define b(i) T[(1-i)&7]
+#define c(i) T[(2-i)&7]
+#define d(i) T[(3-i)&7]
+#define e(i) T[(4-i)&7]
+#define f(i) T[(5-i)&7]
+#define g(i) T[(6-i)&7]
+#define h(i) T[(7-i)&7]
+
+#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk0(i));\
+	d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
+
+// for SHA256
+#define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
+#define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
+#define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
+#define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
+
+
+static const word32 K256[64] = {
+	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+
+static void Transform256(word32* digest_, word32* buffer_)
+{
+    const  word32* K = K256;
+
+    word32 W[16];
+    word32 T[8];
+
+    // Copy digest to working vars
+    memcpy(T, digest_, sizeof(T));
+
+    // 64 operations, partially loop unrolled
+    for (unsigned int j = 0; j < 64; j += 16) {
+        R( 0); R( 1); R( 2); R( 3);
+        R( 4); R( 5); R( 6); R( 7);
+        R( 8); R( 9); R(10); R(11);
+        R(12); R(13); R(14); R(15);
+    }
+
+    // Add the working vars back into digest
+    digest_[0] += a(0);
+    digest_[1] += b(0);
+    digest_[2] += c(0);
+    digest_[3] += d(0);
+    digest_[4] += e(0);
+    digest_[5] += f(0);
+    digest_[6] += g(0);
+    digest_[7] += h(0);
+
+    // Wipe variables
+    memset(W, 0, sizeof(W));
+    memset(T, 0, sizeof(T));
+}
+
+
+// undef for 256
+#undef S0
+#undef S1
+#undef s0
+#undef s1
+
+
+void SHA256::Transform()
+{
+    Transform256(digest_, buffer_);
+}
+
+
+void SHA224::Transform()
+{
+    Transform256(digest_, buffer_);
+}
+
+
+#ifdef WORD64_AVAILABLE
+
+static const word64 K512[80] = {
+	W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
+	W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
+	W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
+	W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
+	W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
+	W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
+	W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
+	W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
+	W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
+	W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
+	W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
+	W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
+	W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
+	W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
+	W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
+	W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
+	W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
+	W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
+	W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
+	W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
+	W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
+	W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
+	W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
+	W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
+	W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
+	W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
+	W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
+	W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
+	W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
+	W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
+	W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
+	W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
+	W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
+	W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
+	W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
+	W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
+	W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
+	W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
+	W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
+	W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
+};
+
+
+// for SHA512
+#define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39))
+#define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41))
+#define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7))
+#define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
+
+
+static void Transform512(word64* digest_, word64* buffer_)
+{
+    const word64* K = K512;
+
+    word64 W[16];
+    word64 T[8];
+
+    // Copy digest to working vars
+    memcpy(T, digest_, sizeof(T));
+
+    // 64 operations, partially loop unrolled
+    for (unsigned int j = 0; j < 80; j += 16) {
+        R( 0); R( 1); R( 2); R( 3);
+        R( 4); R( 5); R( 6); R( 7);
+        R( 8); R( 9); R(10); R(11);
+        R(12); R(13); R(14); R(15);
+    }
+
+    // Add the working vars back into digest 
+
+    digest_[0] += a(0);
+    digest_[1] += b(0);
+    digest_[2] += c(0);
+    digest_[3] += d(0);
+    digest_[4] += e(0);
+    digest_[5] += f(0);
+    digest_[6] += g(0);
+    digest_[7] += h(0);
+
+    // Wipe variables
+    memset(W, 0, sizeof(W));
+    memset(T, 0, sizeof(T));
+}
+
+
+void SHA512::Transform()
+{
+    Transform512(digest_, buffer_);
+}
+
+
+void SHA384::Transform()
+{
+    Transform512(digest_, buffer_);
+}
+
+#endif // WORD64_AVIALABLE
+
+
+#ifdef DO_SHA_ASM
+
+// f1(x,y,z) (z^(x &(y^z)))
+// place in esi
+#define ASMf1(x,y,z)   \
+    AS2(    mov   esi, y    )   \
+    AS2(    xor   esi, z    )   \
+    AS2(    and   esi, x    )   \
+    AS2(    xor   esi, z    )
+
+
+// R0(v,w,x,y,z,i) =
+//      z+= f1(w,x,y) + W[i] + 0x5A827999 + rotlFixed(v,5);
+//      w = rotlFixed(w,30);
+
+//      use esi for f
+//      use edi as tmp
+
+
+#define ASMR0(v,w,x,y,z,i) \
+    AS2(    mov   esi, x                        )   \
+    AS2(    mov   edi, [esp + i * 4]            )   \
+    AS2(    xor   esi, y                        )   \
+    AS2(    and   esi, w                        )   \
+    AS2(    lea     z, [edi + z + 0x5A827999]   )   \
+    AS2(    mov   edi, v                        )   \
+    AS2(    xor   esi, y                        )   \
+    AS2(    rol   edi, 5                        )   \
+    AS2(    add     z, esi                      )   \
+    AS2(    rol     w, 30                       )   \
+    AS2(    add     z, edi                      )
+
+
+/*  Some macro stuff, but older gas ( < 2,16 ) can't process &, so do by hand
+    % won't work on gas at all
+
+#define xstr(s) str(s)
+#define  str(s) #s
+
+#define WOFF1(a) ( a       & 15)
+#define WOFF2(a) ((a +  2) & 15)
+#define WOFF3(a) ((a +  8) & 15)
+#define WOFF4(a) ((a + 13) & 15)
+
+#ifdef __GNUC__
+    #define WGET1(i) asm("mov esp, [edi - "xstr(WOFF1(i))" * 4] ");
+    #define WGET2(i) asm("xor esp, [edi - "xstr(WOFF2(i))" * 4] ");
+    #define WGET3(i) asm("xor esp, [edi - "xstr(WOFF3(i))" * 4] ");
+    #define WGET4(i) asm("xor esp, [edi - "xstr(WOFF4(i))" * 4] ");
+    #define WPUT1(i) asm("mov [edi - "xstr(WOFF1(i))" * 4], esp ");
+#else
+    #define WGET1(i) AS2( mov   esp, [edi - WOFF1(i) * 4]   )
+    #define WGET2(i) AS2( xor   esp, [edi - WOFF2(i) * 4]   )
+    #define WGET3(i) AS2( xor   esp, [edi - WOFF3(i) * 4]   )
+    #define WGET4(i) AS2( xor   esp, [edi - WOFF4(i) * 4]   )
+    #define WPUT1(i) AS2( mov   [edi - WOFF1(i) * 4], esp   )
+#endif
+*/
+
+// ASMR1 = ASMR0 but use esp for W calcs
+
+#define ASMR1(v,w,x,y,z,i,W1,W2,W3,W4) \
+    AS2(    mov   edi, [esp + W1 * 4]           )   \
+    AS2(    mov   esi, x                        )   \
+    AS2(    xor   edi, [esp + W2 * 4]           )   \
+    AS2(    xor   esi, y                        )   \
+    AS2(    xor   edi, [esp + W3 * 4]           )   \
+    AS2(    and   esi, w                        )   \
+    AS2(    xor   edi, [esp + W4 * 4]           )   \
+    AS2(    rol   edi, 1                        )   \
+    AS2(    xor   esi, y                        )   \
+    AS2(    mov   [esp + W1 * 4], edi           )   \
+    AS2(    lea     z, [edi + z + 0x5A827999]   )   \
+    AS2(    mov   edi, v                        )   \
+    AS2(    rol   edi, 5                        )   \
+    AS2(    add     z, esi                      )   \
+    AS2(    rol     w, 30                       )   \
+    AS2(    add     z, edi                      )
+
+
+// ASMR2 = ASMR1 but f is xor, xor instead
+
+#define ASMR2(v,w,x,y,z,i,W1,W2,W3,W4) \
+    AS2(    mov   edi, [esp + W1 * 4]           )   \
+    AS2(    mov   esi, x                        )   \
+    AS2(    xor   edi, [esp + W2 * 4]           )   \
+    AS2(    xor   esi, y                        )   \
+    AS2(    xor   edi, [esp + W3 * 4]           )   \
+    AS2(    xor   esi, w                        )   \
+    AS2(    xor   edi, [esp + W4 * 4]           )   \
+    AS2(    rol   edi, 1                        )   \
+    AS2(    add     z, esi                      )   \
+    AS2(    mov   [esp + W1 * 4], edi           )   \
+    AS2(    lea     z, [edi + z + 0x6ED9EBA1]   )   \
+    AS2(    mov   edi, v                        )   \
+    AS2(    rol   edi, 5                        )   \
+    AS2(    rol     w, 30                       )   \
+    AS2(    add     z, edi                      )
+
+
+// ASMR3 = ASMR2 but f is (x&y)|(z&(x|y))
+//               which is (w&x)|(y&(w|x))
+
+#define ASMR3(v,w,x,y,z,i,W1,W2,W3,W4) \
+    AS2(    mov   edi, [esp + W1 * 4]           )   \
+    AS2(    mov   esi, x                        )   \
+    AS2(    xor   edi, [esp + W2 * 4]           )   \
+    AS2(     or   esi, w                        )   \
+    AS2(    xor   edi, [esp + W3 * 4]           )   \
+    AS2(    and   esi, y                        )   \
+    AS2(    xor   edi, [esp + W4 * 4]           )   \
+    AS2(    movd  mm0, esi                      )   \
+    AS2(    rol   edi, 1                        )   \
+    AS2(    mov   esi, x                        )   \
+    AS2(    mov   [esp + W1 * 4], edi           )   \
+    AS2(    and   esi, w                        )   \
+    AS2(    lea     z, [edi + z + 0x8F1BBCDC]   )   \
+    AS2(    movd  edi, mm0                      )   \
+    AS2(     or   esi, edi                      )   \
+    AS2(    mov   edi, v                        )   \
+    AS2(    rol   edi, 5                        )   \
+    AS2(    add     z, esi                      )   \
+    AS2(    rol     w, 30                       )   \
+    AS2(    add     z, edi                      )
+
+
+// ASMR4 = ASMR2 but different constant
+
+#define ASMR4(v,w,x,y,z,i,W1,W2,W3,W4) \
+    AS2(    mov   edi, [esp + W1 * 4]           )   \
+    AS2(    mov   esi, x                        )   \
+    AS2(    xor   edi, [esp + W2 * 4]           )   \
+    AS2(    xor   esi, y                        )   \
+    AS2(    xor   edi, [esp + W3 * 4]           )   \
+    AS2(    xor   esi, w                        )   \
+    AS2(    xor   edi, [esp + W4 * 4]           )   \
+    AS2(    rol   edi, 1                        )   \
+    AS2(    add     z, esi                      )   \
+    AS2(    mov   [esp + W1 * 4], edi           )   \
+    AS2(    lea     z, [edi + z + 0xCA62C1D6]   )   \
+    AS2(    mov   edi, v                        )   \
+    AS2(    rol   edi, 5                        )   \
+    AS2(    rol     w, 30                       )   \
+    AS2(    add     z, edi                      )
+
+
+#ifdef _MSC_VER
+    __declspec(naked)
+#else
+    __attribute__ ((noinline))
+#endif
+void SHA::AsmTransform(const byte* data, word32 times)
+{
+#ifdef __GNUC__
+    #define AS1(x)    #x ";"
+    #define AS2(x, y) #x ", " #y ";"
+
+    #define PROLOG()  \
+    __asm__ __volatile__ \
+    ( \
+        ".intel_syntax noprefix;" \
+        "push ebx;" \
+        "push ebp;"
+    #define EPILOG()  \
+        "pop ebp;" \
+        "pop ebx;" \
+       	"emms;" \
+       	".att_syntax;" \
+            : \
+            : "c" (this), "D" (data), "a" (times) \
+            : "%esi", "%edx", "memory", "cc" \
+    );
+
+#else
+    #define AS1(x)    __asm x
+    #define AS2(x, y) __asm x, y
+
+    #define PROLOG() \
+        AS1(    push  ebp                           )   \
+        AS2(    mov   ebp, esp                      )   \
+        AS2(    movd  mm3, edi                      )   \
+        AS2(    movd  mm4, ebx                      )   \
+        AS2(    movd  mm5, esi                      )   \
+        AS2(    movd  mm6, ebp                      )   \
+        AS2(    mov   edi, data                     )   \
+        AS2(    mov   eax, times                    )
+
+    #define EPILOG() \
+        AS2(    movd  ebp, mm6                  )   \
+        AS2(    movd  esi, mm5                  )   \
+        AS2(    movd  ebx, mm4                  )   \
+        AS2(    movd  edi, mm3                  )   \
+        AS2(    mov   esp, ebp                  )   \
+        AS1(    pop   ebp                       )   \
+        AS1(    emms   )                            \
+        AS1(    ret 8  )   
+#endif
+
+    PROLOG()
+
+    AS2(    mov   esi, ecx              )
+
+    #ifdef OLD_GCC_OFFSET
+        AS2(    add   esi, 20               )   // digest_[0]
+    #else
+        AS2(    add   esi, 16               )   // digest_[0]
+    #endif
+
+    AS2(    movd  mm2, eax              )   // store times_
+    AS2(    movd  mm1, esi              )   // store digest_
+
+    AS2(    sub   esp, 68               )   // make room on stack
+
+#ifdef _MSC_VER
+    AS1( loopStart: )  // loopStart
+#else
+    AS1( 0: )          // loopStart for some gas (need numeric for jump back 
+#endif
+
+    // byte reverse 16 words of input, 4 at a time, put on stack for W[]
+
+    // part 1
+    AS2(    mov   eax, [edi]        )
+    AS2(    mov   ebx, [edi +  4]   )
+    AS2(    mov   ecx, [edi +  8]   )
+    AS2(    mov   edx, [edi + 12]   )
+
+    AS1(    bswap eax   )
+    AS1(    bswap ebx   )
+    AS1(    bswap ecx   )
+    AS1(    bswap edx   )
+
+    AS2(    mov   [esp],      eax   )
+    AS2(    mov   [esp +  4], ebx   )
+    AS2(    mov   [esp +  8], ecx   )
+    AS2(    mov   [esp + 12], edx   )
+
+    // part 2
+    AS2(    mov   eax, [edi + 16]   )
+    AS2(    mov   ebx, [edi + 20]   )
+    AS2(    mov   ecx, [edi + 24]   )
+    AS2(    mov   edx, [edi + 28]   )
+
+    AS1(    bswap eax   )
+    AS1(    bswap ebx   )
+    AS1(    bswap ecx   )
+    AS1(    bswap edx   )
+
+    AS2(    mov   [esp + 16], eax   )
+    AS2(    mov   [esp + 20], ebx   )
+    AS2(    mov   [esp + 24], ecx   )
+    AS2(    mov   [esp + 28], edx   )
+
+
+    // part 3
+    AS2(    mov   eax, [edi + 32]   )
+    AS2(    mov   ebx, [edi + 36]   )
+    AS2(    mov   ecx, [edi + 40]   )
+    AS2(    mov   edx, [edi + 44]   )
+
+    AS1(    bswap eax   )
+    AS1(    bswap ebx   )
+    AS1(    bswap ecx   )
+    AS1(    bswap edx   )
+
+    AS2(    mov   [esp + 32], eax   )
+    AS2(    mov   [esp + 36], ebx   )
+    AS2(    mov   [esp + 40], ecx   )
+    AS2(    mov   [esp + 44], edx   )
+
+
+    // part 4
+    AS2(    mov   eax, [edi + 48]   )
+    AS2(    mov   ebx, [edi + 52]   )
+    AS2(    mov   ecx, [edi + 56]   )
+    AS2(    mov   edx, [edi + 60]   )
+
+    AS1(    bswap eax   )
+    AS1(    bswap ebx   )
+    AS1(    bswap ecx   )
+    AS1(    bswap edx   )
+
+    AS2(    mov   [esp + 48], eax   )
+    AS2(    mov   [esp + 52], ebx   )
+    AS2(    mov   [esp + 56], ecx   )
+    AS2(    mov   [esp + 60], edx   )
+
+    AS2(    mov   [esp + 64], edi   )   // store edi for end
+
+    // read from digest_
+    AS2(    mov   eax, [esi]            )   // a1
+    AS2(    mov   ebx, [esi +  4]       )   // b1
+    AS2(    mov   ecx, [esi +  8]       )   // c1
+    AS2(    mov   edx, [esi + 12]       )   // d1
+    AS2(    mov   ebp, [esi + 16]       )   // e1
+
+
+    ASMR0(eax, ebx, ecx, edx, ebp,  0)
+    ASMR0(ebp, eax, ebx, ecx, edx,  1)
+    ASMR0(edx, ebp, eax, ebx, ecx,  2)
+    ASMR0(ecx, edx, ebp, eax, ebx,  3)
+    ASMR0(ebx, ecx, edx, ebp, eax,  4)
+    ASMR0(eax, ebx, ecx, edx, ebp,  5)
+    ASMR0(ebp, eax, ebx, ecx, edx,  6)
+    ASMR0(edx, ebp, eax, ebx, ecx,  7)
+    ASMR0(ecx, edx, ebp, eax, ebx,  8)
+    ASMR0(ebx, ecx, edx, ebp, eax,  9)
+    ASMR0(eax, ebx, ecx, edx, ebp, 10)
+    ASMR0(ebp, eax, ebx, ecx, edx, 11)
+    ASMR0(edx, ebp, eax, ebx, ecx, 12)
+    ASMR0(ecx, edx, ebp, eax, ebx, 13)
+    ASMR0(ebx, ecx, edx, ebp, eax, 14)
+    ASMR0(eax, ebx, ecx, edx, ebp, 15)
+
+    ASMR1(ebp, eax, ebx, ecx, edx, 16,  0,  2,  8, 13)
+    ASMR1(edx, ebp, eax, ebx, ecx, 17,  1,  3,  9, 14)
+    ASMR1(ecx, edx, ebp, eax, ebx, 18,  2,  4, 10, 15)
+    ASMR1(ebx, ecx, edx, ebp, eax, 19,  3,  5, 11,  0)
+
+    ASMR2(eax, ebx, ecx, edx, ebp, 20,  4,  6, 12,  1)
+    ASMR2(ebp, eax, ebx, ecx, edx, 21,  5,  7, 13,  2)
+    ASMR2(edx, ebp, eax, ebx, ecx, 22,  6,  8, 14,  3)
+    ASMR2(ecx, edx, ebp, eax, ebx, 23,  7,  9, 15,  4)
+    ASMR2(ebx, ecx, edx, ebp, eax, 24,  8, 10,  0,  5)
+    ASMR2(eax, ebx, ecx, edx, ebp, 25,  9, 11,  1,  6)
+    ASMR2(ebp, eax, ebx, ecx, edx, 26, 10, 12,  2,  7)
+    ASMR2(edx, ebp, eax, ebx, ecx, 27, 11, 13,  3,  8)
+    ASMR2(ecx, edx, ebp, eax, ebx, 28, 12, 14,  4,  9)
+    ASMR2(ebx, ecx, edx, ebp, eax, 29, 13, 15,  5, 10)
+    ASMR2(eax, ebx, ecx, edx, ebp, 30, 14,  0,  6, 11)
+    ASMR2(ebp, eax, ebx, ecx, edx, 31, 15,  1,  7, 12)
+    ASMR2(edx, ebp, eax, ebx, ecx, 32,  0,  2,  8, 13)
+    ASMR2(ecx, edx, ebp, eax, ebx, 33,  1,  3,  9, 14)
+    ASMR2(ebx, ecx, edx, ebp, eax, 34,  2,  4, 10, 15)
+    ASMR2(eax, ebx, ecx, edx, ebp, 35,  3,  5, 11,  0)
+    ASMR2(ebp, eax, ebx, ecx, edx, 36,  4,  6, 12,  1)
+    ASMR2(edx, ebp, eax, ebx, ecx, 37,  5,  7, 13,  2)
+    ASMR2(ecx, edx, ebp, eax, ebx, 38,  6,  8, 14,  3)
+    ASMR2(ebx, ecx, edx, ebp, eax, 39,  7,  9, 15,  4)
+
+
+    ASMR3(eax, ebx, ecx, edx, ebp, 40,  8, 10,  0,  5)
+    ASMR3(ebp, eax, ebx, ecx, edx, 41,  9, 11,  1,  6)
+    ASMR3(edx, ebp, eax, ebx, ecx, 42, 10, 12,  2,  7)
+    ASMR3(ecx, edx, ebp, eax, ebx, 43, 11, 13,  3,  8)
+    ASMR3(ebx, ecx, edx, ebp, eax, 44, 12, 14,  4,  9)
+    ASMR3(eax, ebx, ecx, edx, ebp, 45, 13, 15,  5, 10)
+    ASMR3(ebp, eax, ebx, ecx, edx, 46, 14,  0,  6, 11)
+    ASMR3(edx, ebp, eax, ebx, ecx, 47, 15,  1,  7, 12)
+    ASMR3(ecx, edx, ebp, eax, ebx, 48,  0,  2,  8, 13)
+    ASMR3(ebx, ecx, edx, ebp, eax, 49,  1,  3,  9, 14)
+    ASMR3(eax, ebx, ecx, edx, ebp, 50,  2,  4, 10, 15)
+    ASMR3(ebp, eax, ebx, ecx, edx, 51,  3,  5, 11,  0)
+    ASMR3(edx, ebp, eax, ebx, ecx, 52,  4,  6, 12,  1)
+    ASMR3(ecx, edx, ebp, eax, ebx, 53,  5,  7, 13,  2)
+    ASMR3(ebx, ecx, edx, ebp, eax, 54,  6,  8, 14,  3)
+    ASMR3(eax, ebx, ecx, edx, ebp, 55,  7,  9, 15,  4)
+    ASMR3(ebp, eax, ebx, ecx, edx, 56,  8, 10,  0,  5)
+    ASMR3(edx, ebp, eax, ebx, ecx, 57,  9, 11,  1,  6)
+    ASMR3(ecx, edx, ebp, eax, ebx, 58, 10, 12,  2,  7)
+    ASMR3(ebx, ecx, edx, ebp, eax, 59, 11, 13,  3,  8)
+
+    ASMR4(eax, ebx, ecx, edx, ebp, 60, 12, 14,  4,  9)
+    ASMR4(ebp, eax, ebx, ecx, edx, 61, 13, 15,  5, 10)
+    ASMR4(edx, ebp, eax, ebx, ecx, 62, 14,  0,  6, 11)
+    ASMR4(ecx, edx, ebp, eax, ebx, 63, 15,  1,  7, 12)
+    ASMR4(ebx, ecx, edx, ebp, eax, 64,  0,  2,  8, 13)
+    ASMR4(eax, ebx, ecx, edx, ebp, 65,  1,  3,  9, 14)
+    ASMR4(ebp, eax, ebx, ecx, edx, 66,  2,  4, 10, 15)
+    ASMR4(edx, ebp, eax, ebx, ecx, 67,  3,  5, 11,  0)
+    ASMR4(ecx, edx, ebp, eax, ebx, 68,  4,  6, 12,  1)
+    ASMR4(ebx, ecx, edx, ebp, eax, 69,  5,  7, 13,  2)
+    ASMR4(eax, ebx, ecx, edx, ebp, 70,  6,  8, 14,  3)
+    ASMR4(ebp, eax, ebx, ecx, edx, 71,  7,  9, 15,  4)
+    ASMR4(edx, ebp, eax, ebx, ecx, 72,  8, 10,  0,  5)
+    ASMR4(ecx, edx, ebp, eax, ebx, 73,  9, 11,  1,  6)
+    ASMR4(ebx, ecx, edx, ebp, eax, 74, 10, 12,  2,  7)
+    ASMR4(eax, ebx, ecx, edx, ebp, 75, 11, 13,  3,  8)
+    ASMR4(ebp, eax, ebx, ecx, edx, 76, 12, 14,  4,  9)
+    ASMR4(edx, ebp, eax, ebx, ecx, 77, 13, 15,  5, 10)
+    ASMR4(ecx, edx, ebp, eax, ebx, 78, 14,  0,  6, 11)
+    ASMR4(ebx, ecx, edx, ebp, eax, 79, 15,  1,  7, 12)
+
+
+    AS2(    movd  esi, mm1              )   // digest_
+
+    AS2(    add   [esi],      eax       )   // write out
+    AS2(    add   [esi +  4], ebx       )
+    AS2(    add   [esi +  8], ecx       )
+    AS2(    add   [esi + 12], edx       )
+    AS2(    add   [esi + 16], ebp       )
+
+    // setup next round
+    AS2(    movd  ebp, mm2              )   // times
+ 
+    AS2(    mov   edi, DWORD PTR [esp + 64] )   // data
+    
+    AS2(    add   edi, 64               )   // next round of data
+    AS2(    mov   [esp + 64], edi       )   // restore
+    
+    AS1(    dec   ebp                   )
+    AS2(    movd  mm2, ebp              )
+#ifdef _MSC_VER
+    AS1(    jnz   loopStart )  // loopStart
+#else
+    AS1(    jnz   0b )         // loopStart
+#endif
+
+    // inline adjust 
+    AS2(    add   esp, 68               )   // fix room on stack
+
+    EPILOG()
+}
+
+
+#endif // DO_SHA_ASM
+
+} // namespace