added iOS source code
[wl-app.git] / iOS / Pods / SSZipArchive / SSZipArchive / minizip / aes / aes_ni.c
1 /*
2 Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
3
4 The redistribution and use of this software (with or without changes)
5 is allowed without the payment of fees or royalties provided that:
6
7   source code distributions include the above copyright notice, this
8   list of conditions and the following disclaimer;
9
10   binary distributions include the above copyright notice, this list
11   of conditions and the following disclaimer in their documentation.
12
13 This software is provided 'as is' with no explicit or implied warranties
14 in respect of its operation, including, but not limited to, correctness
15 and fitness for purpose.
16 ---------------------------------------------------------------------------
17 Issue Date: 09/09/2014
18 */
19
20 #include "aes_ni.h"
21
22 #if defined( USE_INTEL_AES_IF_PRESENT )
23
24 #if defined(_MSC_VER)
25
26 #include <intrin.h>
27 #pragma intrinsic(__cpuid)
28 #define INLINE  __inline
29
30 INLINE int has_aes_ni(void)
31 {
32         static int test = -1;
33         if(test < 0)
34         {
35         int cpu_info[4];
36         __cpuid(cpu_info, 1);
37                 test = cpu_info[2] & 0x02000000;
38         }
39         return test;
40 }
41
42 #elif defined( __GNUC__ )
43
44 #include <cpuid.h>
45
46 #if !defined(__clang__)
47 #pragma GCC target ("ssse3")
48 #pragma GCC target ("sse4.1")
49 #pragma GCC target ("aes")
50 #endif
51
52 #include <x86intrin.h>
53 #define INLINE  static __inline
54
55 INLINE int has_aes_ni()
56 {
57     static int test = -1;
58     if(test < 0)
59     {
60         unsigned int a, b, c, d;
61         if(!__get_cpuid(1, &a, &b, &c, &d))
62             test = 0;
63         else
64             test = (c & 0x2000000);
65     }
66     return test;
67 }
68
69 #else
70 #error AES New Instructions require Microsoft, Intel, GNU C, or CLANG
71 #endif
72
73 INLINE __m128i aes_128_assist(__m128i t1, __m128i t2)
74 {
75         __m128i t3;
76         t2 = _mm_shuffle_epi32(t2, 0xff);
77         t3 = _mm_slli_si128(t1, 0x4);
78         t1 = _mm_xor_si128(t1, t3);
79         t3 = _mm_slli_si128(t3, 0x4);
80         t1 = _mm_xor_si128(t1, t3);
81         t3 = _mm_slli_si128(t3, 0x4);
82         t1 = _mm_xor_si128(t1, t3);
83         t1 = _mm_xor_si128(t1, t2);
84         return t1;
85 }
86
87 AES_RETURN aes_ni(encrypt_key128)(const unsigned char *key, aes_encrypt_ctx cx[1])
88 {
89         __m128i t1, t2;
90         __m128i *ks = (__m128i*)cx->ks;
91
92         if(!has_aes_ni())
93         {
94                 return aes_xi(encrypt_key128)(key, cx);
95         }
96
97         t1 = _mm_loadu_si128((__m128i*)key);
98
99         ks[0] = t1;
100
101         t2 = _mm_aeskeygenassist_si128(t1, 0x1);
102         t1 = aes_128_assist(t1, t2);
103         ks[1] = t1;
104
105         t2 = _mm_aeskeygenassist_si128(t1, 0x2);
106         t1 = aes_128_assist(t1, t2);
107         ks[2] = t1;
108
109         t2 = _mm_aeskeygenassist_si128(t1, 0x4);
110         t1 = aes_128_assist(t1, t2);
111         ks[3] = t1;
112
113         t2 = _mm_aeskeygenassist_si128(t1, 0x8);
114         t1 = aes_128_assist(t1, t2);
115         ks[4] = t1;
116
117         t2 = _mm_aeskeygenassist_si128(t1, 0x10);
118         t1 = aes_128_assist(t1, t2);
119         ks[5] = t1;
120
121         t2 = _mm_aeskeygenassist_si128(t1, 0x20);
122         t1 = aes_128_assist(t1, t2);
123         ks[6] = t1;
124
125         t2 = _mm_aeskeygenassist_si128(t1, 0x40);
126         t1 = aes_128_assist(t1, t2);
127         ks[7] = t1;
128
129         t2 = _mm_aeskeygenassist_si128(t1, 0x80);
130         t1 = aes_128_assist(t1, t2);
131         ks[8] = t1;
132
133         t2 = _mm_aeskeygenassist_si128(t1, 0x1b);
134         t1 = aes_128_assist(t1, t2);
135         ks[9] = t1;
136
137         t2 = _mm_aeskeygenassist_si128(t1, 0x36);
138         t1 = aes_128_assist(t1, t2);
139         ks[10] = t1;
140
141         cx->inf.l = 0;
142         cx->inf.b[0] = 10 * 16;
143         return EXIT_SUCCESS;
144 }
145
146 INLINE void aes_192_assist(__m128i* t1, __m128i * t2, __m128i * t3)
147 {
148         __m128i t4;
149         *t2 = _mm_shuffle_epi32(*t2, 0x55);
150         t4 = _mm_slli_si128(*t1, 0x4);
151         *t1 = _mm_xor_si128(*t1, t4);
152         t4 = _mm_slli_si128(t4, 0x4);
153         *t1 = _mm_xor_si128(*t1, t4);
154         t4 = _mm_slli_si128(t4, 0x4);
155         *t1 = _mm_xor_si128(*t1, t4);
156         *t1 = _mm_xor_si128(*t1, *t2);
157         *t2 = _mm_shuffle_epi32(*t1, 0xff);
158         t4 = _mm_slli_si128(*t3, 0x4);
159         *t3 = _mm_xor_si128(*t3, t4);
160         *t3 = _mm_xor_si128(*t3, *t2);
161 }
162
163 AES_RETURN aes_ni(encrypt_key192)(const unsigned char *key, aes_encrypt_ctx cx[1])
164 {
165         __m128i t1, t2, t3;
166         __m128i *ks = (__m128i*)cx->ks;
167
168         if(!has_aes_ni())
169         {
170                 return aes_xi(encrypt_key192)(key, cx);
171         }
172
173         t1 = _mm_loadu_si128((__m128i*)key);
174         t3 = _mm_loadu_si128((__m128i*)(key + 16));
175
176         ks[0] = t1;
177         ks[1] = t3;
178
179         t2 = _mm_aeskeygenassist_si128(t3, 0x1);
180         aes_192_assist(&t1, &t2, &t3);
181
182         ks[1] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(ks[1]), _mm_castsi128_pd(t1), 0));
183         ks[2] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(t1), _mm_castsi128_pd(t3), 1));
184
185         t2 = _mm_aeskeygenassist_si128(t3, 0x2);
186         aes_192_assist(&t1, &t2, &t3);
187         ks[3] = t1;
188         ks[4] = t3;
189
190         t2 = _mm_aeskeygenassist_si128(t3, 0x4);
191         aes_192_assist(&t1, &t2, &t3);
192         ks[4] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(ks[4]), _mm_castsi128_pd(t1), 0));
193         ks[5] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(t1), _mm_castsi128_pd(t3), 1));
194
195         t2 = _mm_aeskeygenassist_si128(t3, 0x8);
196         aes_192_assist(&t1, &t2, &t3);
197         ks[6] = t1;
198         ks[7] = t3;
199
200         t2 = _mm_aeskeygenassist_si128(t3, 0x10);
201         aes_192_assist(&t1, &t2, &t3);
202         ks[7] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(ks[7]), _mm_castsi128_pd(t1), 0));
203         ks[8] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(t1), _mm_castsi128_pd(t3), 1));
204
205         t2 = _mm_aeskeygenassist_si128(t3, 0x20);
206         aes_192_assist(&t1, &t2, &t3);
207         ks[9] = t1;
208         ks[10] = t3;
209
210         t2 = _mm_aeskeygenassist_si128(t3, 0x40);
211         aes_192_assist(&t1, &t2, &t3);
212         ks[10] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(ks[10]), _mm_castsi128_pd(t1), 0));
213         ks[11] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(t1), _mm_castsi128_pd(t3), 1));
214
215         t2 = _mm_aeskeygenassist_si128(t3, 0x80);
216         aes_192_assist(&t1, &t2, &t3);
217         ks[12] = t1;
218
219         cx->inf.l = 0;
220         cx->inf.b[0] = 12 * 16;
221         return EXIT_SUCCESS;
222 }
223
224 INLINE void aes_256_assist1(__m128i* t1, __m128i * t2)
225 {
226         __m128i t4;
227         *t2 = _mm_shuffle_epi32(*t2, 0xff);
228         t4 = _mm_slli_si128(*t1, 0x4);
229         *t1 = _mm_xor_si128(*t1, t4);
230         t4 = _mm_slli_si128(t4, 0x4);
231         *t1 = _mm_xor_si128(*t1, t4);
232         t4 = _mm_slli_si128(t4, 0x4);
233         *t1 = _mm_xor_si128(*t1, t4);
234         *t1 = _mm_xor_si128(*t1, *t2);
235 }
236
237 INLINE void aes_256_assist2(__m128i* t1, __m128i * t3)
238 {
239         __m128i t2, t4;
240         t4 = _mm_aeskeygenassist_si128(*t1, 0x0);
241         t2 = _mm_shuffle_epi32(t4, 0xaa);
242         t4 = _mm_slli_si128(*t3, 0x4);
243         *t3 = _mm_xor_si128(*t3, t4);
244         t4 = _mm_slli_si128(t4, 0x4);
245         *t3 = _mm_xor_si128(*t3, t4);
246         t4 = _mm_slli_si128(t4, 0x4);
247         *t3 = _mm_xor_si128(*t3, t4);
248         *t3 = _mm_xor_si128(*t3, t2);
249 }
250
251 AES_RETURN aes_ni(encrypt_key256)(const unsigned char *key, aes_encrypt_ctx cx[1])
252 {
253         __m128i t1, t2, t3;
254         __m128i *ks = (__m128i*)cx->ks;
255
256         if(!has_aes_ni())
257         {
258                 return aes_xi(encrypt_key256)(key, cx);
259         }
260
261         t1 = _mm_loadu_si128((__m128i*)key);
262         t3 = _mm_loadu_si128((__m128i*)(key + 16));
263
264         ks[0] = t1;
265         ks[1] = t3;
266
267         t2 = _mm_aeskeygenassist_si128(t3, 0x01);
268         aes_256_assist1(&t1, &t2);
269         ks[2] = t1;
270         aes_256_assist2(&t1, &t3);
271         ks[3] = t3;
272
273         t2 = _mm_aeskeygenassist_si128(t3, 0x02);
274         aes_256_assist1(&t1, &t2);
275         ks[4] = t1;
276         aes_256_assist2(&t1, &t3);
277         ks[5] = t3;
278
279         t2 = _mm_aeskeygenassist_si128(t3, 0x04);
280         aes_256_assist1(&t1, &t2);
281         ks[6] = t1;
282         aes_256_assist2(&t1, &t3);
283         ks[7] = t3;
284
285         t2 = _mm_aeskeygenassist_si128(t3, 0x08);
286         aes_256_assist1(&t1, &t2);
287         ks[8] = t1;
288         aes_256_assist2(&t1, &t3);
289         ks[9] = t3;
290
291         t2 = _mm_aeskeygenassist_si128(t3, 0x10);
292         aes_256_assist1(&t1, &t2);
293         ks[10] = t1;
294         aes_256_assist2(&t1, &t3);
295         ks[11] = t3;
296
297         t2 = _mm_aeskeygenassist_si128(t3, 0x20);
298         aes_256_assist1(&t1, &t2);
299         ks[12] = t1;
300         aes_256_assist2(&t1, &t3);
301         ks[13] = t3;
302
303         t2 = _mm_aeskeygenassist_si128(t3, 0x40);
304         aes_256_assist1(&t1, &t2);
305         ks[14] = t1;
306
307         cx->inf.l = 0;
308         cx->inf.b[0] = 14 * 16;
309         return EXIT_SUCCESS;
310 }
311
312 INLINE void enc_to_dec(aes_decrypt_ctx cx[1])
313 {
314         __m128i *ks = (__m128i*)cx->ks;
315         int j;
316
317         for( j = 1 ; j < (cx->inf.b[0] >> 4) ; ++j )
318                 ks[j] = _mm_aesimc_si128(ks[j]);
319 }
320
321 AES_RETURN aes_ni(decrypt_key128)(const unsigned char *key, aes_decrypt_ctx cx[1])
322 {
323         if(!has_aes_ni())
324         {
325                 return aes_xi(decrypt_key128)(key, cx);
326         }
327
328         if(aes_ni(encrypt_key128)(key, (aes_encrypt_ctx*)cx) == EXIT_SUCCESS)
329         {
330                 enc_to_dec(cx);
331                 return EXIT_SUCCESS;
332         }
333         else
334                 return EXIT_FAILURE;
335
336 }
337
338 AES_RETURN aes_ni(decrypt_key192)(const unsigned char *key, aes_decrypt_ctx cx[1])
339 {
340         if(!has_aes_ni())
341         {
342                 return aes_xi(decrypt_key192)(key, cx);
343         }
344
345         if(aes_ni(encrypt_key192)(key, (aes_encrypt_ctx*)cx) == EXIT_SUCCESS)
346         {
347                 enc_to_dec(cx);
348                 return EXIT_SUCCESS;
349         }
350         else
351                 return EXIT_FAILURE;
352 }
353
354 AES_RETURN aes_ni(decrypt_key256)(const unsigned char *key, aes_decrypt_ctx cx[1])
355 {
356         if(!has_aes_ni())
357         {
358                 return aes_xi(decrypt_key256)(key, cx);
359         }
360
361         if(aes_ni(encrypt_key256)(key, (aes_encrypt_ctx*)cx) == EXIT_SUCCESS)
362         {
363                 enc_to_dec(cx);
364                 return EXIT_SUCCESS;
365         }
366         else
367                 return EXIT_FAILURE;
368 }
369
370 AES_RETURN aes_ni(encrypt)(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1])
371 {
372         __m128i *key = (__m128i*)cx->ks, t;
373
374         if(cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16)
375                 return EXIT_FAILURE;
376
377         if(!has_aes_ni())
378         {
379                 return aes_xi(encrypt)(in, out, cx);
380         }
381
382         t = _mm_xor_si128(_mm_loadu_si128((__m128i*)in), *(__m128i*)key);
383
384         switch(cx->inf.b[0])
385         {
386         case 14 * 16:
387                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
388                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
389         case 12 * 16:
390                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
391                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
392         case 10 * 16:
393                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
394                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
395                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
396                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
397                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
398                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
399                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
400                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
401                 t = _mm_aesenc_si128(t, *(__m128i*)++key);
402                 t = _mm_aesenclast_si128(t, *(__m128i*)++key);
403         }
404
405         _mm_storeu_si128(&((__m128i*)out)[0], t);
406         return EXIT_SUCCESS;
407 }
408
409 AES_RETURN aes_ni(decrypt)(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1])
410 {
411         __m128i *key = (__m128i*)cx->ks + (cx->inf.b[0] >> 4), t;
412
413         if(cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16)
414                 return EXIT_FAILURE;
415
416         if(!has_aes_ni())
417         {
418                 return aes_xi(decrypt)(in, out, cx);
419         }
420
421         t = _mm_xor_si128(_mm_loadu_si128((__m128i*)in), *(__m128i*)key);
422
423         switch(cx->inf.b[0])
424         {
425         case 14 * 16:
426                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
427                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
428         case 12 * 16:
429                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
430                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
431         case 10 * 16:
432                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
433                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
434                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
435                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
436                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
437                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
438                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
439                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
440                 t = _mm_aesdec_si128(t, *(__m128i*)--key);
441                 t = _mm_aesdeclast_si128(t, *(__m128i*)--key);
442         }
443
444         _mm_storeu_si128((__m128i*)out, t);
445         return EXIT_SUCCESS;
446 }
447
448 #ifdef ADD_AESNI_MODE_CALLS
449 #ifdef USE_AES_CONTEXT
450
451 AES_RETURN aes_CBC_encrypt(const unsigned char *in,
452         unsigned char *out,
453         unsigned char ivec[16],
454         unsigned long length,
455     const aes_encrypt_ctx cx[1])
456 {
457         __m128i feedback, data, *key = (__m128i*)cx->ks;
458         int number_of_rounds = cx->inf.b[0] >> 4, j;
459     unsigned long i;
460     
461     if(number_of_rounds != 10 && number_of_rounds != 12 && number_of_rounds != 14)
462         return EXIT_FAILURE;
463
464     if(!has_aes_ni())
465     {
466         return aes_cbc_encrypt(in, out, length, ivec, cx);
467     }
468
469     if(length % 16)
470                 length = length / 16 + 1;
471         else length /= 16;
472         feedback = _mm_loadu_si128((__m128i*)ivec);
473         for(i = 0; i < length; i++)
474         {
475                 data = _mm_loadu_si128(&((__m128i*)in)[i]);
476                 feedback = _mm_xor_si128(data, feedback);
477                 feedback = _mm_xor_si128(feedback, ((__m128i*)key)[0]);
478                 for(j = 1; j <number_of_rounds; j++)
479                         feedback = _mm_aesenc_si128(feedback, ((__m128i*)key)[j]);
480                 feedback = _mm_aesenclast_si128(feedback, ((__m128i*)key)[j]);
481                 _mm_storeu_si128(&((__m128i*)out)[i], feedback);
482         }
483     return EXIT_SUCCESS;
484 }
485
486 AES_RETURN aes_CBC_decrypt(const unsigned char *in,
487     unsigned char *out,
488     unsigned char ivec[16],
489     unsigned long length,
490     const aes_decrypt_ctx cx[1])
491 {
492     __m128i data, feedback, last_in, *key = (__m128i*)cx->ks;
493     int number_of_rounds = cx->inf.b[0] >> 4, j;
494     unsigned long i;
495
496     if(number_of_rounds != 10 && number_of_rounds != 12 && number_of_rounds != 14)
497         return EXIT_FAILURE;
498
499     if(!has_aes_ni())
500     {
501         return aes_cbc_decrypt(in, out, length, ivec, cx);
502     }
503
504     if(length % 16)
505         length = length / 16 + 1;
506     else length /= 16;
507     feedback = _mm_loadu_si128((__m128i*)ivec);
508     for(i = 0; i < length; i++)
509     {
510         last_in = _mm_loadu_si128(&((__m128i*)in)[i]);
511         data = _mm_xor_si128(last_in, ((__m128i*)key)[number_of_rounds]);
512         for(j = number_of_rounds - 1; j > 0; j--)
513         {
514             data = _mm_aesdec_si128(data, ((__m128i*)key)[j]);
515         }
516         data = _mm_aesdeclast_si128(data, ((__m128i*)key)[0]);
517         data = _mm_xor_si128(data, feedback);
518         _mm_storeu_si128(&((__m128i*)out)[i], data);
519         feedback = last_in;
520     }
521     return EXIT_SUCCESS;
522 }
523
524 static void ctr_inc(unsigned char *ctr_blk)
525 {
526     uint32_t c;
527
528     c = *(uint32_t*)(ctr_blk + 8);
529     c++;
530     *(uint32_t*)(ctr_blk + 8) = c;
531
532     if(!c)
533         *(uint32_t*)(ctr_blk + 12) = *(uint32_t*)(ctr_blk + 12) + 1;
534 }
535
536 AES_RETURN AES_CTR_encrypt(const unsigned char *in,
537     unsigned char *out,
538     const unsigned char ivec[8],
539     const unsigned char nonce[4],
540     unsigned long length,
541     const aes_encrypt_ctx cx[1])
542 {
543     __m128i ctr_block = { 0 }, *key = (__m128i*)cx->ks, tmp, ONE, BSWAP_EPI64;
544     int number_of_rounds = cx->inf.b[0] >> 4, j;
545     unsigned long i;
546
547     if(number_of_rounds != 10 && number_of_rounds != 12 && number_of_rounds != 14)
548         return EXIT_FAILURE;
549
550     if(!has_aes_ni())
551     {
552         unsigned char ctr_blk[16];
553         *(uint64_t*)ctr_blk = *(uint64_t*)ivec;
554         *(uint32_t*)(ctr_blk + 8) = *(uint32_t*)nonce;
555         return aes_ctr_crypt(in, out, length, (unsigned char*)ctr_blk, ctr_inc, cx);
556     }
557
558     if(length % 16)
559         length = length / 16 + 1;
560     else length /= 16;
561     ONE = _mm_set_epi32(0, 1, 0, 0);
562     BSWAP_EPI64 = _mm_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
563 #ifdef _MSC_VER
564     ctr_block = _mm_insert_epi64(ctr_block, *(long long*)ivec, 1);
565 #else
566     ctr_block = _mm_set_epi64(*(__m64*)ivec, *(__m64*)&ctr_block);
567 #endif
568     ctr_block = _mm_insert_epi32(ctr_block, *(long*)nonce, 1);
569     ctr_block = _mm_srli_si128(ctr_block, 4);
570     ctr_block = _mm_shuffle_epi8(ctr_block, BSWAP_EPI64);
571     ctr_block = _mm_add_epi64(ctr_block, ONE);
572     for(i = 0; i < length; i++)
573     {
574         tmp = _mm_shuffle_epi8(ctr_block, BSWAP_EPI64);
575         ctr_block = _mm_add_epi64(ctr_block, ONE);
576         tmp = _mm_xor_si128(tmp, ((__m128i*)key)[0]);
577         for(j = 1; j <number_of_rounds; j++)
578         {
579             tmp = _mm_aesenc_si128(tmp, ((__m128i*)key)[j]);
580         };
581         tmp = _mm_aesenclast_si128(tmp, ((__m128i*)key)[j]);
582         tmp = _mm_xor_si128(tmp, _mm_loadu_si128(&((__m128i*)in)[i]));
583         _mm_storeu_si128(&((__m128i*)out)[i], tmp);
584     }
585     return EXIT_SUCCESS;
586 }
587
588 #else
589
590 void aes_CBC_encrypt(const unsigned char *in,
591     unsigned char *out,
592     unsigned char ivec[16],
593     unsigned long length,
594     unsigned char *key,
595     int number_of_rounds)
596 {
597     __m128i feedback, data;
598     unsigned long i;
599     int j;
600     if(length % 16)
601         length = length / 16 + 1;
602     else length /= 16;
603     feedback = _mm_loadu_si128((__m128i*)ivec);
604     for(i = 0; i < length; i++)
605     {
606         data = _mm_loadu_si128(&((__m128i*)in)[i]);
607         feedback = _mm_xor_si128(data, feedback);
608         feedback = _mm_xor_si128(feedback, ((__m128i*)key)[0]);
609         for(j = 1; j <number_of_rounds; j++)
610             feedback = _mm_aesenc_si128(feedback, ((__m128i*)key)[j]);
611         feedback = _mm_aesenclast_si128(feedback, ((__m128i*)key)[j]);
612         _mm_storeu_si128(&((__m128i*)out)[i], feedback);
613     }
614 }
615
616 void aes_CBC_decrypt(const unsigned char *in,
617         unsigned char *out,
618         unsigned char ivec[16],
619         unsigned long length,
620         unsigned char *key,
621         int number_of_rounds)
622 {
623         __m128i data, feedback, last_in;
624         unsigned long i;
625         int j;
626         if(length % 16)
627                 length = length / 16 + 1;
628         else length /= 16;
629         feedback = _mm_loadu_si128((__m128i*)ivec);
630         for(i = 0; i < length; i++)
631         {
632                 last_in = _mm_loadu_si128(&((__m128i*)in)[i]);
633                 data = _mm_xor_si128(last_in, ((__m128i*)key)[0]);
634                 for(j = 1; j <number_of_rounds; j++)
635                 {
636                         data = _mm_aesdec_si128(data, ((__m128i*)key)[j]);
637                 }
638                 data = _mm_aesdeclast_si128(data, ((__m128i*)key)[j]);
639                 data = _mm_xor_si128(data, feedback);
640                 _mm_storeu_si128(&((__m128i*)out)[i], data);
641                 feedback = last_in;
642         }
643 }
644
645 void AES_CTR_encrypt(const unsigned char *in,
646         unsigned char *out,
647         const unsigned char ivec[8],
648         const unsigned char nonce[4],
649         unsigned long length,
650         const unsigned char *key,
651         int number_of_rounds)
652 {
653         __m128i ctr_block = { 0 }, tmp, ONE, BSWAP_EPI64;
654         unsigned long i;
655         int j;
656         if(length % 16)
657                 length = length / 16 + 1;
658         else length /= 16;
659         ONE = _mm_set_epi32(0, 1, 0, 0);
660         BSWAP_EPI64 = _mm_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
661 #ifdef _MSC_VER
662         ctr_block = _mm_insert_epi64(ctr_block, *(long long*)ivec, 1);
663 #else
664         ctr_block = _mm_set_epi64(*(__m64*)ivec, *(__m64*)&ctr_block);
665 #endif
666         ctr_block = _mm_insert_epi32(ctr_block, *(long*)nonce, 1);
667         ctr_block = _mm_srli_si128(ctr_block, 4);
668         ctr_block = _mm_shuffle_epi8(ctr_block, BSWAP_EPI64);
669         ctr_block = _mm_add_epi64(ctr_block, ONE);
670         for(i = 0; i < length; i++)
671         {
672                 tmp = _mm_shuffle_epi8(ctr_block, BSWAP_EPI64);
673                 ctr_block = _mm_add_epi64(ctr_block, ONE);
674                 tmp = _mm_xor_si128(tmp, ((__m128i*)key)[0]);
675                 for(j = 1; j <number_of_rounds; j++)
676                 {
677                         tmp = _mm_aesenc_si128(tmp, ((__m128i*)key)[j]);
678                 };
679                 tmp = _mm_aesenclast_si128(tmp, ((__m128i*)key)[j]);
680                 tmp = _mm_xor_si128(tmp, _mm_loadu_si128(&((__m128i*)in)[i]));
681                 _mm_storeu_si128(&((__m128i*)out)[i], tmp);
682         }
683 }
684 #endif
685 #endif
686
687 #endif