| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687 |
- /*
- Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
- The redistribution and use of this software (with or without changes)
- is allowed without the payment of fees or royalties provided that:
- source code distributions include the above copyright notice, this
- list of conditions and the following disclaimer;
- binary distributions include the above copyright notice, this list
- of conditions and the following disclaimer in their documentation.
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its operation, including, but not limited to, correctness
- and fitness for purpose.
- ---------------------------------------------------------------------------
- Issue Date: 09/09/2014
- */
- #include "aes_ni.h"
- #if defined( USE_INTEL_AES_IF_PRESENT )
- #if defined(_MSC_VER)
- #include <intrin.h>
- #pragma intrinsic(__cpuid)
- #define INLINE __inline
- INLINE int has_aes_ni(void)
- {
- static int test = -1;
- if(test < 0)
- {
- int cpu_info[4];
- __cpuid(cpu_info, 1);
- test = cpu_info[2] & 0x02000000;
- }
- return test;
- }
- #elif defined( __GNUC__ )
- #include <cpuid.h>
- #if !defined(__clang__)
- #pragma GCC target ("ssse3")
- #pragma GCC target ("sse4.1")
- #pragma GCC target ("aes")
- #endif
- #include <x86intrin.h>
- #define INLINE static __inline
- INLINE int has_aes_ni()
- {
- static int test = -1;
- if(test < 0)
- {
- unsigned int a, b, c, d;
- if(!__get_cpuid(1, &a, &b, &c, &d))
- test = 0;
- else
- test = (c & 0x2000000);
- }
- return test;
- }
- #else
- #error AES New Instructions require Microsoft, Intel, GNU C, or CLANG
- #endif
- INLINE __m128i aes_128_assist(__m128i t1, __m128i t2)
- {
- __m128i t3;
- t2 = _mm_shuffle_epi32(t2, 0xff);
- t3 = _mm_slli_si128(t1, 0x4);
- t1 = _mm_xor_si128(t1, t3);
- t3 = _mm_slli_si128(t3, 0x4);
- t1 = _mm_xor_si128(t1, t3);
- t3 = _mm_slli_si128(t3, 0x4);
- t1 = _mm_xor_si128(t1, t3);
- t1 = _mm_xor_si128(t1, t2);
- return t1;
- }
- AES_RETURN aes_ni(encrypt_key128)(const unsigned char *key, aes_encrypt_ctx cx[1])
- {
- __m128i t1, t2;
- __m128i *ks = (__m128i*)cx->ks;
- if(!has_aes_ni())
- {
- return aes_xi(encrypt_key128)(key, cx);
- }
- t1 = _mm_loadu_si128((__m128i*)key);
- ks[0] = t1;
- t2 = _mm_aeskeygenassist_si128(t1, 0x1);
- t1 = aes_128_assist(t1, t2);
- ks[1] = t1;
- t2 = _mm_aeskeygenassist_si128(t1, 0x2);
- t1 = aes_128_assist(t1, t2);
- ks[2] = t1;
- t2 = _mm_aeskeygenassist_si128(t1, 0x4);
- t1 = aes_128_assist(t1, t2);
- ks[3] = t1;
- t2 = _mm_aeskeygenassist_si128(t1, 0x8);
- t1 = aes_128_assist(t1, t2);
- ks[4] = t1;
- t2 = _mm_aeskeygenassist_si128(t1, 0x10);
- t1 = aes_128_assist(t1, t2);
- ks[5] = t1;
- t2 = _mm_aeskeygenassist_si128(t1, 0x20);
- t1 = aes_128_assist(t1, t2);
- ks[6] = t1;
- t2 = _mm_aeskeygenassist_si128(t1, 0x40);
- t1 = aes_128_assist(t1, t2);
- ks[7] = t1;
- t2 = _mm_aeskeygenassist_si128(t1, 0x80);
- t1 = aes_128_assist(t1, t2);
- ks[8] = t1;
- t2 = _mm_aeskeygenassist_si128(t1, 0x1b);
- t1 = aes_128_assist(t1, t2);
- ks[9] = t1;
- t2 = _mm_aeskeygenassist_si128(t1, 0x36);
- t1 = aes_128_assist(t1, t2);
- ks[10] = t1;
- cx->inf.l = 0;
- cx->inf.b[0] = 10 * 16;
- return EXIT_SUCCESS;
- }
- INLINE void aes_192_assist(__m128i* t1, __m128i * t2, __m128i * t3)
- {
- __m128i t4;
- *t2 = _mm_shuffle_epi32(*t2, 0x55);
- t4 = _mm_slli_si128(*t1, 0x4);
- *t1 = _mm_xor_si128(*t1, t4);
- t4 = _mm_slli_si128(t4, 0x4);
- *t1 = _mm_xor_si128(*t1, t4);
- t4 = _mm_slli_si128(t4, 0x4);
- *t1 = _mm_xor_si128(*t1, t4);
- *t1 = _mm_xor_si128(*t1, *t2);
- *t2 = _mm_shuffle_epi32(*t1, 0xff);
- t4 = _mm_slli_si128(*t3, 0x4);
- *t3 = _mm_xor_si128(*t3, t4);
- *t3 = _mm_xor_si128(*t3, *t2);
- }
- AES_RETURN aes_ni(encrypt_key192)(const unsigned char *key, aes_encrypt_ctx cx[1])
- {
- __m128i t1, t2, t3;
- __m128i *ks = (__m128i*)cx->ks;
- if(!has_aes_ni())
- {
- return aes_xi(encrypt_key192)(key, cx);
- }
- t1 = _mm_loadu_si128((__m128i*)key);
- t3 = _mm_loadu_si128((__m128i*)(key + 16));
- ks[0] = t1;
- ks[1] = t3;
- t2 = _mm_aeskeygenassist_si128(t3, 0x1);
- aes_192_assist(&t1, &t2, &t3);
- ks[1] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(ks[1]), _mm_castsi128_pd(t1), 0));
- ks[2] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(t1), _mm_castsi128_pd(t3), 1));
- t2 = _mm_aeskeygenassist_si128(t3, 0x2);
- aes_192_assist(&t1, &t2, &t3);
- ks[3] = t1;
- ks[4] = t3;
- t2 = _mm_aeskeygenassist_si128(t3, 0x4);
- aes_192_assist(&t1, &t2, &t3);
- ks[4] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(ks[4]), _mm_castsi128_pd(t1), 0));
- ks[5] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(t1), _mm_castsi128_pd(t3), 1));
- t2 = _mm_aeskeygenassist_si128(t3, 0x8);
- aes_192_assist(&t1, &t2, &t3);
- ks[6] = t1;
- ks[7] = t3;
- t2 = _mm_aeskeygenassist_si128(t3, 0x10);
- aes_192_assist(&t1, &t2, &t3);
- ks[7] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(ks[7]), _mm_castsi128_pd(t1), 0));
- ks[8] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(t1), _mm_castsi128_pd(t3), 1));
- t2 = _mm_aeskeygenassist_si128(t3, 0x20);
- aes_192_assist(&t1, &t2, &t3);
- ks[9] = t1;
- ks[10] = t3;
- t2 = _mm_aeskeygenassist_si128(t3, 0x40);
- aes_192_assist(&t1, &t2, &t3);
- ks[10] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(ks[10]), _mm_castsi128_pd(t1), 0));
- ks[11] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(t1), _mm_castsi128_pd(t3), 1));
- t2 = _mm_aeskeygenassist_si128(t3, 0x80);
- aes_192_assist(&t1, &t2, &t3);
- ks[12] = t1;
- cx->inf.l = 0;
- cx->inf.b[0] = 12 * 16;
- return EXIT_SUCCESS;
- }
- INLINE void aes_256_assist1(__m128i* t1, __m128i * t2)
- {
- __m128i t4;
- *t2 = _mm_shuffle_epi32(*t2, 0xff);
- t4 = _mm_slli_si128(*t1, 0x4);
- *t1 = _mm_xor_si128(*t1, t4);
- t4 = _mm_slli_si128(t4, 0x4);
- *t1 = _mm_xor_si128(*t1, t4);
- t4 = _mm_slli_si128(t4, 0x4);
- *t1 = _mm_xor_si128(*t1, t4);
- *t1 = _mm_xor_si128(*t1, *t2);
- }
- INLINE void aes_256_assist2(__m128i* t1, __m128i * t3)
- {
- __m128i t2, t4;
- t4 = _mm_aeskeygenassist_si128(*t1, 0x0);
- t2 = _mm_shuffle_epi32(t4, 0xaa);
- t4 = _mm_slli_si128(*t3, 0x4);
- *t3 = _mm_xor_si128(*t3, t4);
- t4 = _mm_slli_si128(t4, 0x4);
- *t3 = _mm_xor_si128(*t3, t4);
- t4 = _mm_slli_si128(t4, 0x4);
- *t3 = _mm_xor_si128(*t3, t4);
- *t3 = _mm_xor_si128(*t3, t2);
- }
- AES_RETURN aes_ni(encrypt_key256)(const unsigned char *key, aes_encrypt_ctx cx[1])
- {
- __m128i t1, t2, t3;
- __m128i *ks = (__m128i*)cx->ks;
- if(!has_aes_ni())
- {
- return aes_xi(encrypt_key256)(key, cx);
- }
- t1 = _mm_loadu_si128((__m128i*)key);
- t3 = _mm_loadu_si128((__m128i*)(key + 16));
- ks[0] = t1;
- ks[1] = t3;
- t2 = _mm_aeskeygenassist_si128(t3, 0x01);
- aes_256_assist1(&t1, &t2);
- ks[2] = t1;
- aes_256_assist2(&t1, &t3);
- ks[3] = t3;
- t2 = _mm_aeskeygenassist_si128(t3, 0x02);
- aes_256_assist1(&t1, &t2);
- ks[4] = t1;
- aes_256_assist2(&t1, &t3);
- ks[5] = t3;
- t2 = _mm_aeskeygenassist_si128(t3, 0x04);
- aes_256_assist1(&t1, &t2);
- ks[6] = t1;
- aes_256_assist2(&t1, &t3);
- ks[7] = t3;
- t2 = _mm_aeskeygenassist_si128(t3, 0x08);
- aes_256_assist1(&t1, &t2);
- ks[8] = t1;
- aes_256_assist2(&t1, &t3);
- ks[9] = t3;
- t2 = _mm_aeskeygenassist_si128(t3, 0x10);
- aes_256_assist1(&t1, &t2);
- ks[10] = t1;
- aes_256_assist2(&t1, &t3);
- ks[11] = t3;
- t2 = _mm_aeskeygenassist_si128(t3, 0x20);
- aes_256_assist1(&t1, &t2);
- ks[12] = t1;
- aes_256_assist2(&t1, &t3);
- ks[13] = t3;
- t2 = _mm_aeskeygenassist_si128(t3, 0x40);
- aes_256_assist1(&t1, &t2);
- ks[14] = t1;
- cx->inf.l = 0;
- cx->inf.b[0] = 14 * 16;
- return EXIT_SUCCESS;
- }
- INLINE void enc_to_dec(aes_decrypt_ctx cx[1])
- {
- __m128i *ks = (__m128i*)cx->ks;
- int j;
- for( j = 1 ; j < (cx->inf.b[0] >> 4) ; ++j )
- ks[j] = _mm_aesimc_si128(ks[j]);
- }
- AES_RETURN aes_ni(decrypt_key128)(const unsigned char *key, aes_decrypt_ctx cx[1])
- {
- if(!has_aes_ni())
- {
- return aes_xi(decrypt_key128)(key, cx);
- }
- if(aes_ni(encrypt_key128)(key, (aes_encrypt_ctx*)cx) == EXIT_SUCCESS)
- {
- enc_to_dec(cx);
- return EXIT_SUCCESS;
- }
- else
- return EXIT_FAILURE;
- }
- AES_RETURN aes_ni(decrypt_key192)(const unsigned char *key, aes_decrypt_ctx cx[1])
- {
- if(!has_aes_ni())
- {
- return aes_xi(decrypt_key192)(key, cx);
- }
- if(aes_ni(encrypt_key192)(key, (aes_encrypt_ctx*)cx) == EXIT_SUCCESS)
- {
- enc_to_dec(cx);
- return EXIT_SUCCESS;
- }
- else
- return EXIT_FAILURE;
- }
- AES_RETURN aes_ni(decrypt_key256)(const unsigned char *key, aes_decrypt_ctx cx[1])
- {
- if(!has_aes_ni())
- {
- return aes_xi(decrypt_key256)(key, cx);
- }
- if(aes_ni(encrypt_key256)(key, (aes_encrypt_ctx*)cx) == EXIT_SUCCESS)
- {
- enc_to_dec(cx);
- return EXIT_SUCCESS;
- }
- else
- return EXIT_FAILURE;
- }
- AES_RETURN aes_ni(encrypt)(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1])
- {
- __m128i *key = (__m128i*)cx->ks, t;
- if(cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16)
- return EXIT_FAILURE;
- if(!has_aes_ni())
- {
- return aes_xi(encrypt)(in, out, cx);
- }
- t = _mm_xor_si128(_mm_loadu_si128((__m128i*)in), *(__m128i*)key);
- switch(cx->inf.b[0])
- {
- case 14 * 16:
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- case 12 * 16:
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- case 10 * 16:
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- t = _mm_aesenc_si128(t, *(__m128i*)++key);
- t = _mm_aesenclast_si128(t, *(__m128i*)++key);
- }
- _mm_storeu_si128(&((__m128i*)out)[0], t);
- return EXIT_SUCCESS;
- }
- AES_RETURN aes_ni(decrypt)(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1])
- {
- __m128i *key = (__m128i*)cx->ks + (cx->inf.b[0] >> 4), t;
- if(cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16)
- return EXIT_FAILURE;
- if(!has_aes_ni())
- {
- return aes_xi(decrypt)(in, out, cx);
- }
- t = _mm_xor_si128(_mm_loadu_si128((__m128i*)in), *(__m128i*)key);
- switch(cx->inf.b[0])
- {
- case 14 * 16:
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- case 12 * 16:
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- case 10 * 16:
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- t = _mm_aesdec_si128(t, *(__m128i*)--key);
- t = _mm_aesdeclast_si128(t, *(__m128i*)--key);
- }
- _mm_storeu_si128((__m128i*)out, t);
- return EXIT_SUCCESS;
- }
- #ifdef ADD_AESNI_MODE_CALLS
- #ifdef USE_AES_CONTEXT
- AES_RETURN aes_CBC_encrypt(const unsigned char *in,
- unsigned char *out,
- unsigned char ivec[16],
- unsigned long length,
- const aes_encrypt_ctx cx[1])
- {
- __m128i feedback, data, *key = (__m128i*)cx->ks;
- int number_of_rounds = cx->inf.b[0] >> 4, j;
- unsigned long i;
-
- if(number_of_rounds != 10 && number_of_rounds != 12 && number_of_rounds != 14)
- return EXIT_FAILURE;
- if(!has_aes_ni())
- {
- return aes_cbc_encrypt(in, out, length, ivec, cx);
- }
- if(length % 16)
- length = length / 16 + 1;
- else length /= 16;
- feedback = _mm_loadu_si128((__m128i*)ivec);
- for(i = 0; i < length; i++)
- {
- data = _mm_loadu_si128(&((__m128i*)in)[i]);
- feedback = _mm_xor_si128(data, feedback);
- feedback = _mm_xor_si128(feedback, ((__m128i*)key)[0]);
- for(j = 1; j <number_of_rounds; j++)
- feedback = _mm_aesenc_si128(feedback, ((__m128i*)key)[j]);
- feedback = _mm_aesenclast_si128(feedback, ((__m128i*)key)[j]);
- _mm_storeu_si128(&((__m128i*)out)[i], feedback);
- }
- return EXIT_SUCCESS;
- }
- AES_RETURN aes_CBC_decrypt(const unsigned char *in,
- unsigned char *out,
- unsigned char ivec[16],
- unsigned long length,
- const aes_decrypt_ctx cx[1])
- {
- __m128i data, feedback, last_in, *key = (__m128i*)cx->ks;
- int number_of_rounds = cx->inf.b[0] >> 4, j;
- unsigned long i;
- if(number_of_rounds != 10 && number_of_rounds != 12 && number_of_rounds != 14)
- return EXIT_FAILURE;
- if(!has_aes_ni())
- {
- return aes_cbc_decrypt(in, out, length, ivec, cx);
- }
- if(length % 16)
- length = length / 16 + 1;
- else length /= 16;
- feedback = _mm_loadu_si128((__m128i*)ivec);
- for(i = 0; i < length; i++)
- {
- last_in = _mm_loadu_si128(&((__m128i*)in)[i]);
- data = _mm_xor_si128(last_in, ((__m128i*)key)[number_of_rounds]);
- for(j = number_of_rounds - 1; j > 0; j--)
- {
- data = _mm_aesdec_si128(data, ((__m128i*)key)[j]);
- }
- data = _mm_aesdeclast_si128(data, ((__m128i*)key)[0]);
- data = _mm_xor_si128(data, feedback);
- _mm_storeu_si128(&((__m128i*)out)[i], data);
- feedback = last_in;
- }
- return EXIT_SUCCESS;
- }
- static void ctr_inc(unsigned char *ctr_blk)
- {
- uint32_t c;
- c = *(uint32_t*)(ctr_blk + 8);
- c++;
- *(uint32_t*)(ctr_blk + 8) = c;
- if(!c)
- *(uint32_t*)(ctr_blk + 12) = *(uint32_t*)(ctr_blk + 12) + 1;
- }
- AES_RETURN AES_CTR_encrypt(const unsigned char *in,
- unsigned char *out,
- const unsigned char ivec[8],
- const unsigned char nonce[4],
- unsigned long length,
- const aes_encrypt_ctx cx[1])
- {
- __m128i ctr_block = { 0 }, *key = (__m128i*)cx->ks, tmp, ONE, BSWAP_EPI64;
- int number_of_rounds = cx->inf.b[0] >> 4, j;
- unsigned long i;
- if(number_of_rounds != 10 && number_of_rounds != 12 && number_of_rounds != 14)
- return EXIT_FAILURE;
- if(!has_aes_ni())
- {
- unsigned char ctr_blk[16];
- *(uint64_t*)ctr_blk = *(uint64_t*)ivec;
- *(uint32_t*)(ctr_blk + 8) = *(uint32_t*)nonce;
- return aes_ctr_crypt(in, out, length, (unsigned char*)ctr_blk, ctr_inc, cx);
- }
- if(length % 16)
- length = length / 16 + 1;
- else length /= 16;
- ONE = _mm_set_epi32(0, 1, 0, 0);
- BSWAP_EPI64 = _mm_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
- #ifdef _MSC_VER
- ctr_block = _mm_insert_epi64(ctr_block, *(long long*)ivec, 1);
- #else
- ctr_block = _mm_set_epi64(*(__m64*)ivec, *(__m64*)&ctr_block);
- #endif
- ctr_block = _mm_insert_epi32(ctr_block, *(long*)nonce, 1);
- ctr_block = _mm_srli_si128(ctr_block, 4);
- ctr_block = _mm_shuffle_epi8(ctr_block, BSWAP_EPI64);
- ctr_block = _mm_add_epi64(ctr_block, ONE);
- for(i = 0; i < length; i++)
- {
- tmp = _mm_shuffle_epi8(ctr_block, BSWAP_EPI64);
- ctr_block = _mm_add_epi64(ctr_block, ONE);
- tmp = _mm_xor_si128(tmp, ((__m128i*)key)[0]);
- for(j = 1; j <number_of_rounds; j++)
- {
- tmp = _mm_aesenc_si128(tmp, ((__m128i*)key)[j]);
- };
- tmp = _mm_aesenclast_si128(tmp, ((__m128i*)key)[j]);
- tmp = _mm_xor_si128(tmp, _mm_loadu_si128(&((__m128i*)in)[i]));
- _mm_storeu_si128(&((__m128i*)out)[i], tmp);
- }
- return EXIT_SUCCESS;
- }
- #else
- void aes_CBC_encrypt(const unsigned char *in,
- unsigned char *out,
- unsigned char ivec[16],
- unsigned long length,
- unsigned char *key,
- int number_of_rounds)
- {
- __m128i feedback, data;
- unsigned long i;
- int j;
- if(length % 16)
- length = length / 16 + 1;
- else length /= 16;
- feedback = _mm_loadu_si128((__m128i*)ivec);
- for(i = 0; i < length; i++)
- {
- data = _mm_loadu_si128(&((__m128i*)in)[i]);
- feedback = _mm_xor_si128(data, feedback);
- feedback = _mm_xor_si128(feedback, ((__m128i*)key)[0]);
- for(j = 1; j <number_of_rounds; j++)
- feedback = _mm_aesenc_si128(feedback, ((__m128i*)key)[j]);
- feedback = _mm_aesenclast_si128(feedback, ((__m128i*)key)[j]);
- _mm_storeu_si128(&((__m128i*)out)[i], feedback);
- }
- }
- void aes_CBC_decrypt(const unsigned char *in,
- unsigned char *out,
- unsigned char ivec[16],
- unsigned long length,
- unsigned char *key,
- int number_of_rounds)
- {
- __m128i data, feedback, last_in;
- unsigned long i;
- int j;
- if(length % 16)
- length = length / 16 + 1;
- else length /= 16;
- feedback = _mm_loadu_si128((__m128i*)ivec);
- for(i = 0; i < length; i++)
- {
- last_in = _mm_loadu_si128(&((__m128i*)in)[i]);
- data = _mm_xor_si128(last_in, ((__m128i*)key)[0]);
- for(j = 1; j <number_of_rounds; j++)
- {
- data = _mm_aesdec_si128(data, ((__m128i*)key)[j]);
- }
- data = _mm_aesdeclast_si128(data, ((__m128i*)key)[j]);
- data = _mm_xor_si128(data, feedback);
- _mm_storeu_si128(&((__m128i*)out)[i], data);
- feedback = last_in;
- }
- }
- void AES_CTR_encrypt(const unsigned char *in,
- unsigned char *out,
- const unsigned char ivec[8],
- const unsigned char nonce[4],
- unsigned long length,
- const unsigned char *key,
- int number_of_rounds)
- {
- __m128i ctr_block = { 0 }, tmp, ONE, BSWAP_EPI64;
- unsigned long i;
- int j;
- if(length % 16)
- length = length / 16 + 1;
- else length /= 16;
- ONE = _mm_set_epi32(0, 1, 0, 0);
- BSWAP_EPI64 = _mm_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
- #ifdef _MSC_VER
- ctr_block = _mm_insert_epi64(ctr_block, *(long long*)ivec, 1);
- #else
- ctr_block = _mm_set_epi64(*(__m64*)ivec, *(__m64*)&ctr_block);
- #endif
- ctr_block = _mm_insert_epi32(ctr_block, *(long*)nonce, 1);
- ctr_block = _mm_srli_si128(ctr_block, 4);
- ctr_block = _mm_shuffle_epi8(ctr_block, BSWAP_EPI64);
- ctr_block = _mm_add_epi64(ctr_block, ONE);
- for(i = 0; i < length; i++)
- {
- tmp = _mm_shuffle_epi8(ctr_block, BSWAP_EPI64);
- ctr_block = _mm_add_epi64(ctr_block, ONE);
- tmp = _mm_xor_si128(tmp, ((__m128i*)key)[0]);
- for(j = 1; j <number_of_rounds; j++)
- {
- tmp = _mm_aesenc_si128(tmp, ((__m128i*)key)[j]);
- };
- tmp = _mm_aesenclast_si128(tmp, ((__m128i*)key)[j]);
- tmp = _mm_xor_si128(tmp, _mm_loadu_si128(&((__m128i*)in)[i]));
- _mm_storeu_si128(&((__m128i*)out)[i], tmp);
- }
- }
- #endif
- #endif
- #endif
|