From 354bb40e75d94466e91fe6960523612c9d17ccfb Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Thu, 2 Nov 2017 23:11:29 +0300 Subject: Add implementation --- mysql/extra/yassl/taocrypt/src/des.cpp | 778 +++++++++++++++++++++++++++++++++ 1 file changed, 778 insertions(+) create mode 100644 mysql/extra/yassl/taocrypt/src/des.cpp (limited to 'mysql/extra/yassl/taocrypt/src/des.cpp') diff --git a/mysql/extra/yassl/taocrypt/src/des.cpp b/mysql/extra/yassl/taocrypt/src/des.cpp new file mode 100644 index 0000000..53777f0 --- /dev/null +++ b/mysql/extra/yassl/taocrypt/src/des.cpp @@ -0,0 +1,778 @@ +/* + Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + MA 02110-1301 USA. +*/ + +/* C++ part based on Wei Dai's des.cpp from CryptoPP */ +/* x86 asm is original */ + + +#if defined(TAOCRYPT_KERNEL_MODE) + #define DO_TAOCRYPT_KERNEL_MODE +#endif // only some modules now support this + + +#include "runtime.hpp" +#include "des.hpp" +#ifdef USE_SYS_STL + #include +#else + #include "algorithm.hpp" +#endif + + +namespace STL = STL_NAMESPACE; + + + +namespace TaoCrypt { + + +/* permuted choice table (key) */ +static const byte pc1[] = { + 57, 49, 41, 33, 25, 17, 9, + 1, 58, 50, 42, 34, 26, 18, + 10, 2, 59, 51, 43, 35, 27, + 19, 11, 3, 60, 52, 44, 36, + + 63, 55, 47, 39, 31, 23, 15, + 7, 62, 54, 46, 38, 30, 22, + 14, 6, 61, 53, 45, 37, 29, + 21, 13, 5, 28, 20, 12, 4 +}; + +/* number left rotations of pc1 */ +static const byte totrot[] = { + 1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28 +}; + +/* permuted choice key (table) */ +static const byte pc2[] = { + 14, 17, 11, 24, 1, 5, + 3, 28, 15, 6, 21, 10, + 23, 19, 12, 4, 26, 8, + 16, 7, 27, 20, 13, 2, + 41, 52, 31, 37, 47, 55, + 30, 40, 51, 45, 33, 48, + 44, 49, 39, 56, 34, 53, + 46, 42, 50, 36, 29, 32 +}; + +/* End of DES-defined tables */ + +/* bit 0 is left-most in byte */ +static const int bytebit[] = { + 0200,0100,040,020,010,04,02,01 +}; + +const word32 Spbox[8][64] = { +{ +0x01010400,0x00000000,0x00010000,0x01010404, +0x01010004,0x00010404,0x00000004,0x00010000, +0x00000400,0x01010400,0x01010404,0x00000400, +0x01000404,0x01010004,0x01000000,0x00000004, +0x00000404,0x01000400,0x01000400,0x00010400, +0x00010400,0x01010000,0x01010000,0x01000404, +0x00010004,0x01000004,0x01000004,0x00010004, +0x00000000,0x00000404,0x00010404,0x01000000, +0x00010000,0x01010404,0x00000004,0x01010000, +0x01010400,0x01000000,0x01000000,0x00000400, +0x01010004,0x00010000,0x00010400,0x01000004, +0x00000400,0x00000004,0x01000404,0x00010404, +0x01010404,0x00010004,0x01010000,0x01000404, +0x01000004,0x00000404,0x00010404,0x01010400, +0x00000404,0x01000400,0x01000400,0x00000000, +0x00010004,0x00010400,0x00000000,0x01010004}, +{ +0x80108020,0x80008000,0x00008000,0x00108020, +0x00100000,0x00000020,0x80100020,0x80008020, +0x80000020,0x80108020,0x80108000,0x80000000, +0x80008000,0x00100000,0x00000020,0x80100020, +0x00108000,0x00100020,0x80008020,0x00000000, +0x80000000,0x00008000,0x00108020,0x80100000, +0x00100020,0x80000020,0x00000000,0x00108000, +0x00008020,0x80108000,0x80100000,0x00008020, +0x00000000,0x00108020,0x80100020,0x00100000, +0x80008020,0x80100000,0x80108000,0x00008000, +0x80100000,0x80008000,0x00000020,0x80108020, +0x00108020,0x00000020,0x00008000,0x80000000, +0x00008020,0x80108000,0x00100000,0x80000020, +0x00100020,0x80008020,0x80000020,0x00100020, +0x00108000,0x00000000,0x80008000,0x00008020, +0x80000000,0x80100020,0x80108020,0x00108000}, +{ +0x00000208,0x08020200,0x00000000,0x08020008, +0x08000200,0x00000000,0x00020208,0x08000200, +0x00020008,0x08000008,0x08000008,0x00020000, +0x08020208,0x00020008,0x08020000,0x00000208, +0x08000000,0x00000008,0x08020200,0x00000200, +0x00020200,0x08020000,0x08020008,0x00020208, +0x08000208,0x00020200,0x00020000,0x08000208, +0x00000008,0x08020208,0x00000200,0x08000000, +0x08020200,0x08000000,0x00020008,0x00000208, +0x00020000,0x08020200,0x08000200,0x00000000, +0x00000200,0x00020008,0x08020208,0x08000200, +0x08000008,0x00000200,0x00000000,0x08020008, +0x08000208,0x00020000,0x08000000,0x08020208, +0x00000008,0x00020208,0x00020200,0x08000008, +0x08020000,0x08000208,0x00000208,0x08020000, +0x00020208,0x00000008,0x08020008,0x00020200}, +{ +0x00802001,0x00002081,0x00002081,0x00000080, +0x00802080,0x00800081,0x00800001,0x00002001, +0x00000000,0x00802000,0x00802000,0x00802081, +0x00000081,0x00000000,0x00800080,0x00800001, +0x00000001,0x00002000,0x00800000,0x00802001, +0x00000080,0x00800000,0x00002001,0x00002080, +0x00800081,0x00000001,0x00002080,0x00800080, +0x00002000,0x00802080,0x00802081,0x00000081, +0x00800080,0x00800001,0x00802000,0x00802081, +0x00000081,0x00000000,0x00000000,0x00802000, +0x00002080,0x00800080,0x00800081,0x00000001, +0x00802001,0x00002081,0x00002081,0x00000080, +0x00802081,0x00000081,0x00000001,0x00002000, +0x00800001,0x00002001,0x00802080,0x00800081, +0x00002001,0x00002080,0x00800000,0x00802001, +0x00000080,0x00800000,0x00002000,0x00802080}, +{ +0x00000100,0x02080100,0x02080000,0x42000100, +0x00080000,0x00000100,0x40000000,0x02080000, +0x40080100,0x00080000,0x02000100,0x40080100, +0x42000100,0x42080000,0x00080100,0x40000000, +0x02000000,0x40080000,0x40080000,0x00000000, +0x40000100,0x42080100,0x42080100,0x02000100, +0x42080000,0x40000100,0x00000000,0x42000000, +0x02080100,0x02000000,0x42000000,0x00080100, +0x00080000,0x42000100,0x00000100,0x02000000, +0x40000000,0x02080000,0x42000100,0x40080100, +0x02000100,0x40000000,0x42080000,0x02080100, +0x40080100,0x00000100,0x02000000,0x42080000, +0x42080100,0x00080100,0x42000000,0x42080100, +0x02080000,0x00000000,0x40080000,0x42000000, +0x00080100,0x02000100,0x40000100,0x00080000, +0x00000000,0x40080000,0x02080100,0x40000100}, +{ +0x20000010,0x20400000,0x00004000,0x20404010, +0x20400000,0x00000010,0x20404010,0x00400000, +0x20004000,0x00404010,0x00400000,0x20000010, +0x00400010,0x20004000,0x20000000,0x00004010, +0x00000000,0x00400010,0x20004010,0x00004000, +0x00404000,0x20004010,0x00000010,0x20400010, +0x20400010,0x00000000,0x00404010,0x20404000, +0x00004010,0x00404000,0x20404000,0x20000000, +0x20004000,0x00000010,0x20400010,0x00404000, +0x20404010,0x00400000,0x00004010,0x20000010, +0x00400000,0x20004000,0x20000000,0x00004010, +0x20000010,0x20404010,0x00404000,0x20400000, +0x00404010,0x20404000,0x00000000,0x20400010, +0x00000010,0x00004000,0x20400000,0x00404010, +0x00004000,0x00400010,0x20004010,0x00000000, +0x20404000,0x20000000,0x00400010,0x20004010}, +{ +0x00200000,0x04200002,0x04000802,0x00000000, +0x00000800,0x04000802,0x00200802,0x04200800, +0x04200802,0x00200000,0x00000000,0x04000002, +0x00000002,0x04000000,0x04200002,0x00000802, +0x04000800,0x00200802,0x00200002,0x04000800, +0x04000002,0x04200000,0x04200800,0x00200002, +0x04200000,0x00000800,0x00000802,0x04200802, +0x00200800,0x00000002,0x04000000,0x00200800, +0x04000000,0x00200800,0x00200000,0x04000802, +0x04000802,0x04200002,0x04200002,0x00000002, +0x00200002,0x04000000,0x04000800,0x00200000, +0x04200800,0x00000802,0x00200802,0x04200800, +0x00000802,0x04000002,0x04200802,0x04200000, +0x00200800,0x00000000,0x00000002,0x04200802, +0x00000000,0x00200802,0x04200000,0x00000800, +0x04000002,0x04000800,0x00000800,0x00200002}, +{ +0x10001040,0x00001000,0x00040000,0x10041040, +0x10000000,0x10001040,0x00000040,0x10000000, +0x00040040,0x10040000,0x10041040,0x00041000, +0x10041000,0x00041040,0x00001000,0x00000040, +0x10040000,0x10000040,0x10001000,0x00001040, +0x00041000,0x00040040,0x10040040,0x10041000, +0x00001040,0x00000000,0x00000000,0x10040040, +0x10000040,0x10001000,0x00041040,0x00040000, +0x00041040,0x00040000,0x10041000,0x00001000, +0x00000040,0x10040040,0x00001000,0x00041040, +0x10001000,0x00000040,0x10000040,0x10040000, +0x10040040,0x10000000,0x00040000,0x10001040, +0x00000000,0x10041040,0x00040040,0x10000040, +0x10040000,0x10001000,0x10001040,0x00000000, +0x10041040,0x00041000,0x00041000,0x00001040, +0x00001040,0x00040040,0x10000000,0x10041000} +}; + + +void BasicDES::SetKey(const byte* key, word32 /*length*/, CipherDir dir) +{ + byte buffer[56+56+8]; + byte *const pc1m = buffer; /* place to modify pc1 into */ + byte *const pcr = pc1m + 56; /* place to rotate pc1 into */ + byte *const ks = pcr + 56; + int i,j,l; + int m; + + for (j = 0; j < 56; j++) { /* convert pc1 to bits of key */ + l = pc1[j] - 1; /* integer bit location */ + m = l & 07; /* find bit */ + pc1m[j] = (key[l >> 3] & /* find which key byte l is in */ + bytebit[m]) /* and which bit of that byte */ + ? 1 : 0; /* and store 1-bit result */ + } + for (i = 0; i < 16; i++) { /* key chunk for each iteration */ + memset(ks, 0, 8); /* Clear key schedule */ + for (j = 0; j < 56; j++) /* rotate pc1 the right amount */ + pcr[j] = pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l: l-28]; + /* rotate left and right halves independently */ + for (j = 0; j < 48; j++){ /* select bits individually */ + /* check bit that goes to ks[j] */ + if (pcr[pc2[j] - 1]){ + /* mask it in if it's there */ + l= j % 6; + ks[j/6] |= bytebit[l] >> 2; + } + } + /* Now convert to odd/even interleaved form for use in F */ + k_[2*i] = ((word32)ks[0] << 24) + | ((word32)ks[2] << 16) + | ((word32)ks[4] << 8) + | ((word32)ks[6]); + k_[2*i + 1] = ((word32)ks[1] << 24) + | ((word32)ks[3] << 16) + | ((word32)ks[5] << 8) + | ((word32)ks[7]); + } + + // reverse key schedule order + if (dir == DECRYPTION) + for (i = 0; i < 16; i += 2) { + STL::swap(k_[i], k_[32 - 2 - i]); + STL::swap(k_[i+1], k_[32 - 1 - i]); + } + +} + +static inline void IPERM(word32& left, word32& right) +{ + word32 work; + + right = rotlFixed(right, 4U); + work = (left ^ right) & 0xf0f0f0f0; + left ^= work; + + right = rotrFixed(right^work, 20U); + work = (left ^ right) & 0xffff0000; + left ^= work; + + right = rotrFixed(right^work, 18U); + work = (left ^ right) & 0x33333333; + left ^= work; + + right = rotrFixed(right^work, 6U); + work = (left ^ right) & 0x00ff00ff; + left ^= work; + + right = rotlFixed(right^work, 9U); + work = (left ^ right) & 0xaaaaaaaa; + left = rotlFixed(left^work, 1U); + right ^= work; +} + +static inline void FPERM(word32& left, word32& right) +{ + word32 work; + + right = rotrFixed(right, 1U); + work = (left ^ right) & 0xaaaaaaaa; + right ^= work; + left = rotrFixed(left^work, 9U); + work = (left ^ right) & 0x00ff00ff; + right ^= work; + left = rotlFixed(left^work, 6U); + work = (left ^ right) & 0x33333333; + right ^= work; + left = rotlFixed(left^work, 18U); + work = (left ^ right) & 0xffff0000; + right ^= work; + left = rotlFixed(left^work, 20U); + work = (left ^ right) & 0xf0f0f0f0; + right ^= work; + left = rotrFixed(left^work, 4U); +} + + +void BasicDES::RawProcessBlock(word32& lIn, word32& rIn) const +{ + word32 l = lIn, r = rIn; + const word32* kptr = k_; + + for (unsigned i=0; i<8; i++) + { + word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0]; + l ^= Spbox[6][(work) & 0x3f] + ^ Spbox[4][(work >> 8) & 0x3f] + ^ Spbox[2][(work >> 16) & 0x3f] + ^ Spbox[0][(work >> 24) & 0x3f]; + work = r ^ kptr[4*i+1]; + l ^= Spbox[7][(work) & 0x3f] + ^ Spbox[5][(work >> 8) & 0x3f] + ^ Spbox[3][(work >> 16) & 0x3f] + ^ Spbox[1][(work >> 24) & 0x3f]; + + work = rotrFixed(l, 4U) ^ kptr[4*i+2]; + r ^= Spbox[6][(work) & 0x3f] + ^ Spbox[4][(work >> 8) & 0x3f] + ^ Spbox[2][(work >> 16) & 0x3f] + ^ Spbox[0][(work >> 24) & 0x3f]; + work = l ^ kptr[4*i+3]; + r ^= Spbox[7][(work) & 0x3f] + ^ Spbox[5][(work >> 8) & 0x3f] + ^ Spbox[3][(work >> 16) & 0x3f] + ^ Spbox[1][(work >> 24) & 0x3f]; + } + + lIn = l; rIn = r; +} + + + +typedef BlockGetAndPut Block; + + +void DES::ProcessAndXorBlock(const byte* in, const byte* xOr, byte* out) const +{ + word32 l,r; + Block::Get(in)(l)(r); + IPERM(l,r); + + RawProcessBlock(l, r); + + FPERM(l,r); + Block::Put(xOr, out)(r)(l); +} + + +void DES_EDE2::SetKey(const byte* key, word32 sz, CipherDir dir) +{ + des1_.SetKey(key, sz, dir); + des2_.SetKey(key + 8, sz, ReverseDir(dir)); +} + + +void DES_EDE2::ProcessAndXorBlock(const byte* in, const byte* xOr, + byte* out) const +{ + word32 l,r; + Block::Get(in)(l)(r); + IPERM(l,r); + + des1_.RawProcessBlock(l, r); + des2_.RawProcessBlock(r, l); + des1_.RawProcessBlock(l, r); + + FPERM(l,r); + Block::Put(xOr, out)(r)(l); +} + + +void DES_EDE3::SetKey(const byte* key, word32 sz, CipherDir dir) +{ + des1_.SetKey(key+(dir==ENCRYPTION?0:2*8), sz, dir); + des2_.SetKey(key+8, sz, ReverseDir(dir)); + des3_.SetKey(key+(dir==DECRYPTION?0:2*8), sz, dir); +} + + + +#if defined(DO_DES_ASM) + +// ia32 optimized version +void DES_EDE3::Process(byte* out, const byte* in, word32 sz) +{ + if (!isMMX) { + Mode_BASE::Process(out, in, sz); + return; + } + + word32 blocks = sz / DES_BLOCK_SIZE; + + if (mode_ == CBC) + if (dir_ == ENCRYPTION) + while (blocks--) { + r_[0] ^= *(word32*)in; + r_[1] ^= *(word32*)(in + 4); + + AsmProcess((byte*)r_, (byte*)r_, (void*)Spbox); + + memcpy(out, r_, DES_BLOCK_SIZE); + + in += DES_BLOCK_SIZE; + out += DES_BLOCK_SIZE; + } + else + while (blocks--) { + AsmProcess(in, out, (void*)Spbox); + + *(word32*)out ^= r_[0]; + *(word32*)(out + 4) ^= r_[1]; + + memcpy(r_, in, DES_BLOCK_SIZE); + + out += DES_BLOCK_SIZE; + in += DES_BLOCK_SIZE; + } + else + while (blocks--) { + AsmProcess(in, out, (void*)Spbox); + + out += DES_BLOCK_SIZE; + in += DES_BLOCK_SIZE; + } +} + +#endif // DO_DES_ASM + + +void DES_EDE3::ProcessAndXorBlock(const byte* in, const byte* xOr, + byte* out) const +{ + word32 l,r; + Block::Get(in)(l)(r); + IPERM(l,r); + + des1_.RawProcessBlock(l, r); + des2_.RawProcessBlock(r, l); + des3_.RawProcessBlock(l, r); + + FPERM(l,r); + Block::Put(xOr, out)(r)(l); +} + + +#if defined(DO_DES_ASM) + +/* Uses IPERM algorithm from above + + left is in eax + right is in ebx + + uses ecx +*/ +#define AsmIPERM() \ + AS2( rol ebx, 4 ) \ + AS2( mov ecx, eax ) \ + AS2( xor ecx, ebx ) \ + AS2( and ecx, 0xf0f0f0f0 ) \ + AS2( xor ebx, ecx ) \ + AS2( xor eax, ecx ) \ + AS2( ror ebx, 20 ) \ + AS2( mov ecx, eax ) \ + AS2( xor ecx, ebx ) \ + AS2( and ecx, 0xffff0000 ) \ + AS2( xor ebx, ecx ) \ + AS2( xor eax, ecx ) \ + AS2( ror ebx, 18 ) \ + AS2( mov ecx, eax ) \ + AS2( xor ecx, ebx ) \ + AS2( and ecx, 0x33333333 ) \ + AS2( xor ebx, ecx ) \ + AS2( xor eax, ecx ) \ + AS2( ror ebx, 6 ) \ + AS2( mov ecx, eax ) \ + AS2( xor ecx, ebx ) \ + AS2( and ecx, 0x00ff00ff ) \ + AS2( xor ebx, ecx ) \ + AS2( xor eax, ecx ) \ + AS2( rol ebx, 9 ) \ + AS2( mov ecx, eax ) \ + AS2( xor ecx, ebx ) \ + AS2( and ecx, 0xaaaaaaaa ) \ + AS2( xor eax, ecx ) \ + AS2( rol eax, 1 ) \ + AS2( xor ebx, ecx ) + + +/* Uses FPERM algorithm from above + + left is in eax + right is in ebx + + uses ecx +*/ +#define AsmFPERM() \ + AS2( ror ebx, 1 ) \ + AS2( mov ecx, eax ) \ + AS2( xor ecx, ebx ) \ + AS2( and ecx, 0xaaaaaaaa ) \ + AS2( xor eax, ecx ) \ + AS2( xor ebx, ecx ) \ + AS2( ror eax, 9 ) \ + AS2( mov ecx, ebx ) \ + AS2( xor ecx, eax ) \ + AS2( and ecx, 0x00ff00ff ) \ + AS2( xor eax, ecx ) \ + AS2( xor ebx, ecx ) \ + AS2( rol eax, 6 ) \ + AS2( mov ecx, ebx ) \ + AS2( xor ecx, eax ) \ + AS2( and ecx, 0x33333333 ) \ + AS2( xor eax, ecx ) \ + AS2( xor ebx, ecx ) \ + AS2( rol eax, 18 ) \ + AS2( mov ecx, ebx ) \ + AS2( xor ecx, eax ) \ + AS2( and ecx, 0xffff0000 ) \ + AS2( xor eax, ecx ) \ + AS2( xor ebx, ecx ) \ + AS2( rol eax, 20 ) \ + AS2( mov ecx, ebx ) \ + AS2( xor ecx, eax ) \ + AS2( and ecx, 0xf0f0f0f0 ) \ + AS2( xor eax, ecx ) \ + AS2( xor ebx, ecx ) \ + AS2( ror eax, 4 ) + + + + +/* DesRound implements this algorithm: + + word32 work = rotrFixed(r, 4U) ^ key[0]; + l ^= Spbox[6][(work) & 0x3f] + ^ Spbox[4][(work >> 8) & 0x3f] + ^ Spbox[2][(work >> 16) & 0x3f] + ^ Spbox[0][(work >> 24) & 0x3f]; + work = r ^ key[1]; + l ^= Spbox[7][(work) & 0x3f] + ^ Spbox[5][(work >> 8) & 0x3f] + ^ Spbox[3][(work >> 16) & 0x3f] + ^ Spbox[1][(work >> 24) & 0x3f]; + + work = rotrFixed(l, 4U) ^ key[2]; + r ^= Spbox[6][(work) & 0x3f] + ^ Spbox[4][(work >> 8) & 0x3f] + ^ Spbox[2][(work >> 16) & 0x3f] + ^ Spbox[0][(work >> 24) & 0x3f]; + work = l ^ key[3]; + r ^= Spbox[7][(work) & 0x3f] + ^ Spbox[5][(work >> 8) & 0x3f] + ^ Spbox[3][(work >> 16) & 0x3f] + ^ Spbox[1][(work >> 24) & 0x3f]; + + left is in aex + right is in ebx + key is in edx + + edvances key for next round + + uses ecx, esi, and edi +*/ +#define DesRound() \ + AS2( mov ecx, ebx )\ + AS2( mov esi, DWORD PTR [edx] )\ + AS2( ror ecx, 4 )\ + AS2( xor ecx, esi )\ + AS2( and ecx, 0x3f3f3f3f )\ + AS2( movzx esi, cl )\ + AS2( movzx edi, ch )\ + AS2( xor eax, [ebp + esi*4 + 6*256] )\ + AS2( shr ecx, 16 )\ + AS2( xor eax, [ebp + edi*4 + 4*256] )\ + AS2( movzx esi, cl )\ + AS2( movzx edi, ch )\ + AS2( xor eax, [ebp + esi*4 + 2*256] )\ + AS2( mov esi, DWORD PTR [edx + 4] )\ + AS2( xor eax, [ebp + edi*4] )\ + AS2( mov ecx, ebx )\ + AS2( xor ecx, esi )\ + AS2( and ecx, 0x3f3f3f3f )\ + AS2( movzx esi, cl )\ + AS2( movzx edi, ch )\ + AS2( xor eax, [ebp + esi*4 + 7*256] )\ + AS2( shr ecx, 16 )\ + AS2( xor eax, [ebp + edi*4 + 5*256] )\ + AS2( movzx esi, cl )\ + AS2( movzx edi, ch )\ + AS2( xor eax, [ebp + esi*4 + 3*256] )\ + AS2( mov esi, DWORD PTR [edx + 8] )\ + AS2( xor eax, [ebp + edi*4 + 1*256] )\ + AS2( mov ecx, eax )\ + AS2( ror ecx, 4 )\ + AS2( xor ecx, esi )\ + AS2( and ecx, 0x3f3f3f3f )\ + AS2( movzx esi, cl )\ + AS2( movzx edi, ch )\ + AS2( xor ebx, [ebp + esi*4 + 6*256] )\ + AS2( shr ecx, 16 )\ + AS2( xor ebx, [ebp + edi*4 + 4*256] )\ + AS2( movzx esi, cl )\ + AS2( movzx edi, ch )\ + AS2( xor ebx, [ebp + esi*4 + 2*256] )\ + AS2( mov esi, DWORD PTR [edx + 12] )\ + AS2( xor ebx, [ebp + edi*4] )\ + AS2( mov ecx, eax )\ + AS2( xor ecx, esi )\ + AS2( and ecx, 0x3f3f3f3f )\ + AS2( movzx esi, cl )\ + AS2( movzx edi, ch )\ + AS2( xor ebx, [ebp + esi*4 + 7*256] )\ + AS2( shr ecx, 16 )\ + AS2( xor ebx, [ebp + edi*4 + 5*256] )\ + AS2( movzx esi, cl )\ + AS2( movzx edi, ch )\ + AS2( xor ebx, [ebp + esi*4 + 3*256] )\ + AS2( add edx, 16 )\ + AS2( xor ebx, [ebp + edi*4 + 1*256] ) + + +#ifdef _MSC_VER + __declspec(naked) +#else + __attribute__ ((noinline)) +#endif +void DES_EDE3::AsmProcess(const byte* in, byte* out, void* box) const +{ +#ifdef __GNUC__ + #define AS1(x) #x ";" + #define AS2(x, y) #x ", " #y ";" + + #define PROLOG() \ + __asm__ __volatile__ \ + ( \ + ".intel_syntax noprefix;" \ + "push ebx;" \ + "push ebp;" \ + "movd mm6, ebp;" \ + "movd mm7, ecx;" \ + "mov ebp, eax;" + #define EPILOG() \ + "pop ebp;" \ + "pop ebx;" \ + "emms;" \ + ".att_syntax;" \ + : \ + : "d" (this), "S" (in), "a" (box), "c" (out) \ + : "%edi", "memory", "cc" \ + ); + +#else + #define AS1(x) __asm x + #define AS2(x, y) __asm x, y + + #define PROLOG() \ + AS1( push ebp ) \ + AS2( mov ebp, esp ) \ + AS2( movd mm3, edi ) \ + AS2( movd mm4, ebx ) \ + AS2( movd mm5, esi ) \ + AS2( movd mm6, ebp ) \ + AS2( mov esi, DWORD PTR [ebp + 8] ) \ + AS2( mov edx, ecx ) \ + AS2( mov ebp, DWORD PTR [ebp + 16] ) + + // ebp restored at end + #define EPILOG() \ + AS2( movd edi, mm3 ) \ + AS2( movd ebx, mm4 ) \ + AS2( movd esi, mm5 ) \ + AS2( mov esp, ebp ) \ + AS1( pop ebp ) \ + AS1( emms ) \ + AS1( ret 12 ) + +#endif + + + PROLOG() + + AS2( movd mm2, edx ) + + #ifdef OLD_GCC_OFFSET + AS2( add edx, 60 ) // des1 = des1 key + #else + AS2( add edx, 56 ) // des1 = des1 key + #endif + + AS2( mov eax, DWORD PTR [esi] ) + AS2( mov ebx, DWORD PTR [esi + 4] ) + AS1( bswap eax ) // left + AS1( bswap ebx ) // right + + AsmIPERM() + + DesRound() // 1 + DesRound() // 2 + DesRound() // 3 + DesRound() // 4 + DesRound() // 5 + DesRound() // 6 + DesRound() // 7 + DesRound() // 8 + + // swap left and right + AS2( xchg eax, ebx ) + + DesRound() // 1 + DesRound() // 2 + DesRound() // 3 + DesRound() // 4 + DesRound() // 5 + DesRound() // 6 + DesRound() // 7 + DesRound() // 8 + + // swap left and right + AS2( xchg eax, ebx ) + + DesRound() // 1 + DesRound() // 2 + DesRound() // 3 + DesRound() // 4 + DesRound() // 5 + DesRound() // 6 + DesRound() // 7 + DesRound() // 8 + + AsmFPERM() + + //end + AS2( movd ebp, mm6 ) + + // swap and write out + AS1( bswap ebx ) + AS1( bswap eax ) + +#ifdef __GNUC__ + AS2( movd esi, mm7 ) // outBlock +#else + AS2( mov esi, DWORD PTR [ebp + 12] ) // outBlock +#endif + + AS2( mov DWORD PTR [esi], ebx ) // right first + AS2( mov DWORD PTR [esi + 4], eax ) + + + EPILOG() +} + + + +#endif // defined(DO_DES_ASM) + + +} // namespace -- cgit v1.1