/* Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */ /* C++ part based on Wei Dai's des.cpp from CryptoPP */ /* x86 asm is original */ #if defined(TAOCRYPT_KERNEL_MODE) #define DO_TAOCRYPT_KERNEL_MODE #endif // only some modules now support this #include "runtime.hpp" #include "des.hpp" #ifdef USE_SYS_STL #include #else #include "algorithm.hpp" #endif namespace STL = STL_NAMESPACE; namespace TaoCrypt { /* permuted choice table (key) */ static const byte pc1[] = { 57, 49, 41, 33, 25, 17, 9, 1, 58, 50, 42, 34, 26, 18, 10, 2, 59, 51, 43, 35, 27, 19, 11, 3, 60, 52, 44, 36, 63, 55, 47, 39, 31, 23, 15, 7, 62, 54, 46, 38, 30, 22, 14, 6, 61, 53, 45, 37, 29, 21, 13, 5, 28, 20, 12, 4 }; /* number left rotations of pc1 */ static const byte totrot[] = { 1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28 }; /* permuted choice key (table) */ static const byte pc2[] = { 14, 17, 11, 24, 1, 5, 3, 28, 15, 6, 21, 10, 23, 19, 12, 4, 26, 8, 16, 7, 27, 20, 13, 2, 41, 52, 31, 37, 47, 55, 30, 40, 51, 45, 33, 48, 44, 49, 39, 56, 34, 53, 46, 42, 50, 36, 29, 32 }; /* End of DES-defined tables */ /* bit 0 is left-most in byte */ static const int bytebit[] = { 0200,0100,040,020,010,04,02,01 }; const word32 Spbox[8][64] = { { 0x01010400,0x00000000,0x00010000,0x01010404, 0x01010004,0x00010404,0x00000004,0x00010000, 0x00000400,0x01010400,0x01010404,0x00000400, 0x01000404,0x01010004,0x01000000,0x00000004, 0x00000404,0x01000400,0x01000400,0x00010400, 0x00010400,0x01010000,0x01010000,0x01000404, 0x00010004,0x01000004,0x01000004,0x00010004, 0x00000000,0x00000404,0x00010404,0x01000000, 0x00010000,0x01010404,0x00000004,0x01010000, 0x01010400,0x01000000,0x01000000,0x00000400, 0x01010004,0x00010000,0x00010400,0x01000004, 0x00000400,0x00000004,0x01000404,0x00010404, 0x01010404,0x00010004,0x01010000,0x01000404, 0x01000004,0x00000404,0x00010404,0x01010400, 0x00000404,0x01000400,0x01000400,0x00000000, 0x00010004,0x00010400,0x00000000,0x01010004}, { 0x80108020,0x80008000,0x00008000,0x00108020, 0x00100000,0x00000020,0x80100020,0x80008020, 0x80000020,0x80108020,0x80108000,0x80000000, 0x80008000,0x00100000,0x00000020,0x80100020, 0x00108000,0x00100020,0x80008020,0x00000000, 0x80000000,0x00008000,0x00108020,0x80100000, 0x00100020,0x80000020,0x00000000,0x00108000, 0x00008020,0x80108000,0x80100000,0x00008020, 0x00000000,0x00108020,0x80100020,0x00100000, 0x80008020,0x80100000,0x80108000,0x00008000, 0x80100000,0x80008000,0x00000020,0x80108020, 0x00108020,0x00000020,0x00008000,0x80000000, 0x00008020,0x80108000,0x00100000,0x80000020, 0x00100020,0x80008020,0x80000020,0x00100020, 0x00108000,0x00000000,0x80008000,0x00008020, 0x80000000,0x80100020,0x80108020,0x00108000}, { 0x00000208,0x08020200,0x00000000,0x08020008, 0x08000200,0x00000000,0x00020208,0x08000200, 0x00020008,0x08000008,0x08000008,0x00020000, 0x08020208,0x00020008,0x08020000,0x00000208, 0x08000000,0x00000008,0x08020200,0x00000200, 0x00020200,0x08020000,0x08020008,0x00020208, 0x08000208,0x00020200,0x00020000,0x08000208, 0x00000008,0x08020208,0x00000200,0x08000000, 0x08020200,0x08000000,0x00020008,0x00000208, 0x00020000,0x08020200,0x08000200,0x00000000, 0x00000200,0x00020008,0x08020208,0x08000200, 0x08000008,0x00000200,0x00000000,0x08020008, 0x08000208,0x00020000,0x08000000,0x08020208, 0x00000008,0x00020208,0x00020200,0x08000008, 0x08020000,0x08000208,0x00000208,0x08020000, 0x00020208,0x00000008,0x08020008,0x00020200}, { 0x00802001,0x00002081,0x00002081,0x00000080, 0x00802080,0x00800081,0x00800001,0x00002001, 0x00000000,0x00802000,0x00802000,0x00802081, 0x00000081,0x00000000,0x00800080,0x00800001, 0x00000001,0x00002000,0x00800000,0x00802001, 0x00000080,0x00800000,0x00002001,0x00002080, 0x00800081,0x00000001,0x00002080,0x00800080, 0x00002000,0x00802080,0x00802081,0x00000081, 0x00800080,0x00800001,0x00802000,0x00802081, 0x00000081,0x00000000,0x00000000,0x00802000, 0x00002080,0x00800080,0x00800081,0x00000001, 0x00802001,0x00002081,0x00002081,0x00000080, 0x00802081,0x00000081,0x00000001,0x00002000, 0x00800001,0x00002001,0x00802080,0x00800081, 0x00002001,0x00002080,0x00800000,0x00802001, 0x00000080,0x00800000,0x00002000,0x00802080}, { 0x00000100,0x02080100,0x02080000,0x42000100, 0x00080000,0x00000100,0x40000000,0x02080000, 0x40080100,0x00080000,0x02000100,0x40080100, 0x42000100,0x42080000,0x00080100,0x40000000, 0x02000000,0x40080000,0x40080000,0x00000000, 0x40000100,0x42080100,0x42080100,0x02000100, 0x42080000,0x40000100,0x00000000,0x42000000, 0x02080100,0x02000000,0x42000000,0x00080100, 0x00080000,0x42000100,0x00000100,0x02000000, 0x40000000,0x02080000,0x42000100,0x40080100, 0x02000100,0x40000000,0x42080000,0x02080100, 0x40080100,0x00000100,0x02000000,0x42080000, 0x42080100,0x00080100,0x42000000,0x42080100, 0x02080000,0x00000000,0x40080000,0x42000000, 0x00080100,0x02000100,0x40000100,0x00080000, 0x00000000,0x40080000,0x02080100,0x40000100}, { 0x20000010,0x20400000,0x00004000,0x20404010, 0x20400000,0x00000010,0x20404010,0x00400000, 0x20004000,0x00404010,0x00400000,0x20000010, 0x00400010,0x20004000,0x20000000,0x00004010, 0x00000000,0x00400010,0x20004010,0x00004000, 0x00404000,0x20004010,0x00000010,0x20400010, 0x20400010,0x00000000,0x00404010,0x20404000, 0x00004010,0x00404000,0x20404000,0x20000000, 0x20004000,0x00000010,0x20400010,0x00404000, 0x20404010,0x00400000,0x00004010,0x20000010, 0x00400000,0x20004000,0x20000000,0x00004010, 0x20000010,0x20404010,0x00404000,0x20400000, 0x00404010,0x20404000,0x00000000,0x20400010, 0x00000010,0x00004000,0x20400000,0x00404010, 0x00004000,0x00400010,0x20004010,0x00000000, 0x20404000,0x20000000,0x00400010,0x20004010}, { 0x00200000,0x04200002,0x04000802,0x00000000, 0x00000800,0x04000802,0x00200802,0x04200800, 0x04200802,0x00200000,0x00000000,0x04000002, 0x00000002,0x04000000,0x04200002,0x00000802, 0x04000800,0x00200802,0x00200002,0x04000800, 0x04000002,0x04200000,0x04200800,0x00200002, 0x04200000,0x00000800,0x00000802,0x04200802, 0x00200800,0x00000002,0x04000000,0x00200800, 0x04000000,0x00200800,0x00200000,0x04000802, 0x04000802,0x04200002,0x04200002,0x00000002, 0x00200002,0x04000000,0x04000800,0x00200000, 0x04200800,0x00000802,0x00200802,0x04200800, 0x00000802,0x04000002,0x04200802,0x04200000, 0x00200800,0x00000000,0x00000002,0x04200802, 0x00000000,0x00200802,0x04200000,0x00000800, 0x04000002,0x04000800,0x00000800,0x00200002}, { 0x10001040,0x00001000,0x00040000,0x10041040, 0x10000000,0x10001040,0x00000040,0x10000000, 0x00040040,0x10040000,0x10041040,0x00041000, 0x10041000,0x00041040,0x00001000,0x00000040, 0x10040000,0x10000040,0x10001000,0x00001040, 0x00041000,0x00040040,0x10040040,0x10041000, 0x00001040,0x00000000,0x00000000,0x10040040, 0x10000040,0x10001000,0x00041040,0x00040000, 0x00041040,0x00040000,0x10041000,0x00001000, 0x00000040,0x10040040,0x00001000,0x00041040, 0x10001000,0x00000040,0x10000040,0x10040000, 0x10040040,0x10000000,0x00040000,0x10001040, 0x00000000,0x10041040,0x00040040,0x10000040, 0x10040000,0x10001000,0x10001040,0x00000000, 0x10041040,0x00041000,0x00041000,0x00001040, 0x00001040,0x00040040,0x10000000,0x10041000} }; void BasicDES::SetKey(const byte* key, word32 /*length*/, CipherDir dir) { byte buffer[56+56+8]; byte *const pc1m = buffer; /* place to modify pc1 into */ byte *const pcr = pc1m + 56; /* place to rotate pc1 into */ byte *const ks = pcr + 56; int i,j,l; int m; for (j = 0; j < 56; j++) { /* convert pc1 to bits of key */ l = pc1[j] - 1; /* integer bit location */ m = l & 07; /* find bit */ pc1m[j] = (key[l >> 3] & /* find which key byte l is in */ bytebit[m]) /* and which bit of that byte */ ? 1 : 0; /* and store 1-bit result */ } for (i = 0; i < 16; i++) { /* key chunk for each iteration */ memset(ks, 0, 8); /* Clear key schedule */ for (j = 0; j < 56; j++) /* rotate pc1 the right amount */ pcr[j] = pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l: l-28]; /* rotate left and right halves independently */ for (j = 0; j < 48; j++){ /* select bits individually */ /* check bit that goes to ks[j] */ if (pcr[pc2[j] - 1]){ /* mask it in if it's there */ l= j % 6; ks[j/6] |= bytebit[l] >> 2; } } /* Now convert to odd/even interleaved form for use in F */ k_[2*i] = ((word32)ks[0] << 24) | ((word32)ks[2] << 16) | ((word32)ks[4] << 8) | ((word32)ks[6]); k_[2*i + 1] = ((word32)ks[1] << 24) | ((word32)ks[3] << 16) | ((word32)ks[5] << 8) | ((word32)ks[7]); } // reverse key schedule order if (dir == DECRYPTION) for (i = 0; i < 16; i += 2) { STL::swap(k_[i], k_[32 - 2 - i]); STL::swap(k_[i+1], k_[32 - 1 - i]); } } static inline void IPERM(word32& left, word32& right) { word32 work; right = rotlFixed(right, 4U); work = (left ^ right) & 0xf0f0f0f0; left ^= work; right = rotrFixed(right^work, 20U); work = (left ^ right) & 0xffff0000; left ^= work; right = rotrFixed(right^work, 18U); work = (left ^ right) & 0x33333333; left ^= work; right = rotrFixed(right^work, 6U); work = (left ^ right) & 0x00ff00ff; left ^= work; right = rotlFixed(right^work, 9U); work = (left ^ right) & 0xaaaaaaaa; left = rotlFixed(left^work, 1U); right ^= work; } static inline void FPERM(word32& left, word32& right) { word32 work; right = rotrFixed(right, 1U); work = (left ^ right) & 0xaaaaaaaa; right ^= work; left = rotrFixed(left^work, 9U); work = (left ^ right) & 0x00ff00ff; right ^= work; left = rotlFixed(left^work, 6U); work = (left ^ right) & 0x33333333; right ^= work; left = rotlFixed(left^work, 18U); work = (left ^ right) & 0xffff0000; right ^= work; left = rotlFixed(left^work, 20U); work = (left ^ right) & 0xf0f0f0f0; right ^= work; left = rotrFixed(left^work, 4U); } void BasicDES::RawProcessBlock(word32& lIn, word32& rIn) const { word32 l = lIn, r = rIn; const word32* kptr = k_; for (unsigned i=0; i<8; i++) { word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0]; l ^= Spbox[6][(work) & 0x3f] ^ Spbox[4][(work >> 8) & 0x3f] ^ Spbox[2][(work >> 16) & 0x3f] ^ Spbox[0][(work >> 24) & 0x3f]; work = r ^ kptr[4*i+1]; l ^= Spbox[7][(work) & 0x3f] ^ Spbox[5][(work >> 8) & 0x3f] ^ Spbox[3][(work >> 16) & 0x3f] ^ Spbox[1][(work >> 24) & 0x3f]; work = rotrFixed(l, 4U) ^ kptr[4*i+2]; r ^= Spbox[6][(work) & 0x3f] ^ Spbox[4][(work >> 8) & 0x3f] ^ Spbox[2][(work >> 16) & 0x3f] ^ Spbox[0][(work >> 24) & 0x3f]; work = l ^ kptr[4*i+3]; r ^= Spbox[7][(work) & 0x3f] ^ Spbox[5][(work >> 8) & 0x3f] ^ Spbox[3][(work >> 16) & 0x3f] ^ Spbox[1][(work >> 24) & 0x3f]; } lIn = l; rIn = r; } typedef BlockGetAndPut Block; void DES::ProcessAndXorBlock(const byte* in, const byte* xOr, byte* out) const { word32 l,r; Block::Get(in)(l)(r); IPERM(l,r); RawProcessBlock(l, r); FPERM(l,r); Block::Put(xOr, out)(r)(l); } void DES_EDE2::SetKey(const byte* key, word32 sz, CipherDir dir) { des1_.SetKey(key, sz, dir); des2_.SetKey(key + 8, sz, ReverseDir(dir)); } void DES_EDE2::ProcessAndXorBlock(const byte* in, const byte* xOr, byte* out) const { word32 l,r; Block::Get(in)(l)(r); IPERM(l,r); des1_.RawProcessBlock(l, r); des2_.RawProcessBlock(r, l); des1_.RawProcessBlock(l, r); FPERM(l,r); Block::Put(xOr, out)(r)(l); } void DES_EDE3::SetKey(const byte* key, word32 sz, CipherDir dir) { des1_.SetKey(key+(dir==ENCRYPTION?0:2*8), sz, dir); des2_.SetKey(key+8, sz, ReverseDir(dir)); des3_.SetKey(key+(dir==DECRYPTION?0:2*8), sz, dir); } #if defined(DO_DES_ASM) // ia32 optimized version void DES_EDE3::Process(byte* out, const byte* in, word32 sz) { if (!isMMX) { Mode_BASE::Process(out, in, sz); return; } word32 blocks = sz / DES_BLOCK_SIZE; if (mode_ == CBC) if (dir_ == ENCRYPTION) while (blocks--) { r_[0] ^= *(word32*)in; r_[1] ^= *(word32*)(in + 4); AsmProcess((byte*)r_, (byte*)r_, (void*)Spbox); memcpy(out, r_, DES_BLOCK_SIZE); in += DES_BLOCK_SIZE; out += DES_BLOCK_SIZE; } else while (blocks--) { AsmProcess(in, out, (void*)Spbox); *(word32*)out ^= r_[0]; *(word32*)(out + 4) ^= r_[1]; memcpy(r_, in, DES_BLOCK_SIZE); out += DES_BLOCK_SIZE; in += DES_BLOCK_SIZE; } else while (blocks--) { AsmProcess(in, out, (void*)Spbox); out += DES_BLOCK_SIZE; in += DES_BLOCK_SIZE; } } #endif // DO_DES_ASM void DES_EDE3::ProcessAndXorBlock(const byte* in, const byte* xOr, byte* out) const { word32 l,r; Block::Get(in)(l)(r); IPERM(l,r); des1_.RawProcessBlock(l, r); des2_.RawProcessBlock(r, l); des3_.RawProcessBlock(l, r); FPERM(l,r); Block::Put(xOr, out)(r)(l); } #if defined(DO_DES_ASM) /* Uses IPERM algorithm from above left is in eax right is in ebx uses ecx */ #define AsmIPERM() \ AS2( rol ebx, 4 ) \ AS2( mov ecx, eax ) \ AS2( xor ecx, ebx ) \ AS2( and ecx, 0xf0f0f0f0 ) \ AS2( xor ebx, ecx ) \ AS2( xor eax, ecx ) \ AS2( ror ebx, 20 ) \ AS2( mov ecx, eax ) \ AS2( xor ecx, ebx ) \ AS2( and ecx, 0xffff0000 ) \ AS2( xor ebx, ecx ) \ AS2( xor eax, ecx ) \ AS2( ror ebx, 18 ) \ AS2( mov ecx, eax ) \ AS2( xor ecx, ebx ) \ AS2( and ecx, 0x33333333 ) \ AS2( xor ebx, ecx ) \ AS2( xor eax, ecx ) \ AS2( ror ebx, 6 ) \ AS2( mov ecx, eax ) \ AS2( xor ecx, ebx ) \ AS2( and ecx, 0x00ff00ff ) \ AS2( xor ebx, ecx ) \ AS2( xor eax, ecx ) \ AS2( rol ebx, 9 ) \ AS2( mov ecx, eax ) \ AS2( xor ecx, ebx ) \ AS2( and ecx, 0xaaaaaaaa ) \ AS2( xor eax, ecx ) \ AS2( rol eax, 1 ) \ AS2( xor ebx, ecx ) /* Uses FPERM algorithm from above left is in eax right is in ebx uses ecx */ #define AsmFPERM() \ AS2( ror ebx, 1 ) \ AS2( mov ecx, eax ) \ AS2( xor ecx, ebx ) \ AS2( and ecx, 0xaaaaaaaa ) \ AS2( xor eax, ecx ) \ AS2( xor ebx, ecx ) \ AS2( ror eax, 9 ) \ AS2( mov ecx, ebx ) \ AS2( xor ecx, eax ) \ AS2( and ecx, 0x00ff00ff ) \ AS2( xor eax, ecx ) \ AS2( xor ebx, ecx ) \ AS2( rol eax, 6 ) \ AS2( mov ecx, ebx ) \ AS2( xor ecx, eax ) \ AS2( and ecx, 0x33333333 ) \ AS2( xor eax, ecx ) \ AS2( xor ebx, ecx ) \ AS2( rol eax, 18 ) \ AS2( mov ecx, ebx ) \ AS2( xor ecx, eax ) \ AS2( and ecx, 0xffff0000 ) \ AS2( xor eax, ecx ) \ AS2( xor ebx, ecx ) \ AS2( rol eax, 20 ) \ AS2( mov ecx, ebx ) \ AS2( xor ecx, eax ) \ AS2( and ecx, 0xf0f0f0f0 ) \ AS2( xor eax, ecx ) \ AS2( xor ebx, ecx ) \ AS2( ror eax, 4 ) /* DesRound implements this algorithm: word32 work = rotrFixed(r, 4U) ^ key[0]; l ^= Spbox[6][(work) & 0x3f] ^ Spbox[4][(work >> 8) & 0x3f] ^ Spbox[2][(work >> 16) & 0x3f] ^ Spbox[0][(work >> 24) & 0x3f]; work = r ^ key[1]; l ^= Spbox[7][(work) & 0x3f] ^ Spbox[5][(work >> 8) & 0x3f] ^ Spbox[3][(work >> 16) & 0x3f] ^ Spbox[1][(work >> 24) & 0x3f]; work = rotrFixed(l, 4U) ^ key[2]; r ^= Spbox[6][(work) & 0x3f] ^ Spbox[4][(work >> 8) & 0x3f] ^ Spbox[2][(work >> 16) & 0x3f] ^ Spbox[0][(work >> 24) & 0x3f]; work = l ^ key[3]; r ^= Spbox[7][(work) & 0x3f] ^ Spbox[5][(work >> 8) & 0x3f] ^ Spbox[3][(work >> 16) & 0x3f] ^ Spbox[1][(work >> 24) & 0x3f]; left is in aex right is in ebx key is in edx edvances key for next round uses ecx, esi, and edi */ #define DesRound() \ AS2( mov ecx, ebx )\ AS2( mov esi, DWORD PTR [edx] )\ AS2( ror ecx, 4 )\ AS2( xor ecx, esi )\ AS2( and ecx, 0x3f3f3f3f )\ AS2( movzx esi, cl )\ AS2( movzx edi, ch )\ AS2( xor eax, [ebp + esi*4 + 6*256] )\ AS2( shr ecx, 16 )\ AS2( xor eax, [ebp + edi*4 + 4*256] )\ AS2( movzx esi, cl )\ AS2( movzx edi, ch )\ AS2( xor eax, [ebp + esi*4 + 2*256] )\ AS2( mov esi, DWORD PTR [edx + 4] )\ AS2( xor eax, [ebp + edi*4] )\ AS2( mov ecx, ebx )\ AS2( xor ecx, esi )\ AS2( and ecx, 0x3f3f3f3f )\ AS2( movzx esi, cl )\ AS2( movzx edi, ch )\ AS2( xor eax, [ebp + esi*4 + 7*256] )\ AS2( shr ecx, 16 )\ AS2( xor eax, [ebp + edi*4 + 5*256] )\ AS2( movzx esi, cl )\ AS2( movzx edi, ch )\ AS2( xor eax, [ebp + esi*4 + 3*256] )\ AS2( mov esi, DWORD PTR [edx + 8] )\ AS2( xor eax, [ebp + edi*4 + 1*256] )\ AS2( mov ecx, eax )\ AS2( ror ecx, 4 )\ AS2( xor ecx, esi )\ AS2( and ecx, 0x3f3f3f3f )\ AS2( movzx esi, cl )\ AS2( movzx edi, ch )\ AS2( xor ebx, [ebp + esi*4 + 6*256] )\ AS2( shr ecx, 16 )\ AS2( xor ebx, [ebp + edi*4 + 4*256] )\ AS2( movzx esi, cl )\ AS2( movzx edi, ch )\ AS2( xor ebx, [ebp + esi*4 + 2*256] )\ AS2( mov esi, DWORD PTR [edx + 12] )\ AS2( xor ebx, [ebp + edi*4] )\ AS2( mov ecx, eax )\ AS2( xor ecx, esi )\ AS2( and ecx, 0x3f3f3f3f )\ AS2( movzx esi, cl )\ AS2( movzx edi, ch )\ AS2( xor ebx, [ebp + esi*4 + 7*256] )\ AS2( shr ecx, 16 )\ AS2( xor ebx, [ebp + edi*4 + 5*256] )\ AS2( movzx esi, cl )\ AS2( movzx edi, ch )\ AS2( xor ebx, [ebp + esi*4 + 3*256] )\ AS2( add edx, 16 )\ AS2( xor ebx, [ebp + edi*4 + 1*256] ) #ifdef _MSC_VER __declspec(naked) #else __attribute__ ((noinline)) #endif void DES_EDE3::AsmProcess(const byte* in, byte* out, void* box) const { #ifdef __GNUC__ #define AS1(x) #x ";" #define AS2(x, y) #x ", " #y ";" #define PROLOG() \ __asm__ __volatile__ \ ( \ ".intel_syntax noprefix;" \ "push ebx;" \ "push ebp;" \ "movd mm6, ebp;" \ "movd mm7, ecx;" \ "mov ebp, eax;" #define EPILOG() \ "pop ebp;" \ "pop ebx;" \ "emms;" \ ".att_syntax;" \ : \ : "d" (this), "S" (in), "a" (box), "c" (out) \ : "%edi", "memory", "cc" \ ); #else #define AS1(x) __asm x #define AS2(x, y) __asm x, y #define PROLOG() \ AS1( push ebp ) \ AS2( mov ebp, esp ) \ AS2( movd mm3, edi ) \ AS2( movd mm4, ebx ) \ AS2( movd mm5, esi ) \ AS2( movd mm6, ebp ) \ AS2( mov esi, DWORD PTR [ebp + 8] ) \ AS2( mov edx, ecx ) \ AS2( mov ebp, DWORD PTR [ebp + 16] ) // ebp restored at end #define EPILOG() \ AS2( movd edi, mm3 ) \ AS2( movd ebx, mm4 ) \ AS2( movd esi, mm5 ) \ AS2( mov esp, ebp ) \ AS1( pop ebp ) \ AS1( emms ) \ AS1( ret 12 ) #endif PROLOG() AS2( movd mm2, edx ) #ifdef OLD_GCC_OFFSET AS2( add edx, 60 ) // des1 = des1 key #else AS2( add edx, 56 ) // des1 = des1 key #endif AS2( mov eax, DWORD PTR [esi] ) AS2( mov ebx, DWORD PTR [esi + 4] ) AS1( bswap eax ) // left AS1( bswap ebx ) // right AsmIPERM() DesRound() // 1 DesRound() // 2 DesRound() // 3 DesRound() // 4 DesRound() // 5 DesRound() // 6 DesRound() // 7 DesRound() // 8 // swap left and right AS2( xchg eax, ebx ) DesRound() // 1 DesRound() // 2 DesRound() // 3 DesRound() // 4 DesRound() // 5 DesRound() // 6 DesRound() // 7 DesRound() // 8 // swap left and right AS2( xchg eax, ebx ) DesRound() // 1 DesRound() // 2 DesRound() // 3 DesRound() // 4 DesRound() // 5 DesRound() // 6 DesRound() // 7 DesRound() // 8 AsmFPERM() //end AS2( movd ebp, mm6 ) // swap and write out AS1( bswap ebx ) AS1( bswap eax ) #ifdef __GNUC__ AS2( movd esi, mm7 ) // outBlock #else AS2( mov esi, DWORD PTR [ebp + 12] ) // outBlock #endif AS2( mov DWORD PTR [esi], ebx ) // right first AS2( mov DWORD PTR [esi + 4], eax ) EPILOG() } #endif // defined(DO_DES_ASM) } // namespace