42 | 42 |
#include <openssl/aes.h>
|
43 | 43 |
#include "aes_local.h"
|
44 | 44 |
|
45 | |
#ifndef AES_ASM
|
|
45 |
#if defined(OPENSSL_AES_CONST_TIME) && !defined(AES_ASM)
|
|
46 |
typedef union {
|
|
47 |
unsigned char b[8];
|
|
48 |
u32 w[2];
|
|
49 |
u64 d;
|
|
50 |
} uni;
|
|
51 |
|
|
52 |
/*
|
|
53 |
* Compute w := (w * x) mod (x^8 + x^4 + x^3 + x^1 + 1)
|
|
54 |
* Therefore the name "xtime".
|
|
55 |
*/
|
|
56 |
static void XtimeWord(u32 *w)
|
|
57 |
{
|
|
58 |
u32 a, b;
|
|
59 |
|
|
60 |
a = *w;
|
|
61 |
b = a & 0x80808080u;
|
|
62 |
a ^= b;
|
|
63 |
b -= b >> 7;
|
|
64 |
b &= 0x1B1B1B1Bu;
|
|
65 |
b ^= a << 1;
|
|
66 |
*w = b;
|
|
67 |
}
|
|
68 |
|
|
69 |
static void XtimeLong(u64 *w)
|
|
70 |
{
|
|
71 |
u64 a, b;
|
|
72 |
|
|
73 |
a = *w;
|
|
74 |
b = a & 0x8080808080808080uLL;
|
|
75 |
a ^= b;
|
|
76 |
b -= b >> 7;
|
|
77 |
b &= 0x1B1B1B1B1B1B1B1BuLL;
|
|
78 |
b ^= a << 1;
|
|
79 |
*w = b;
|
|
80 |
}
|
|
81 |
|
|
82 |
/*
|
|
83 |
* This computes w := S * w ^ -1 + c, where c = {01100011}.
|
|
84 |
* Instead of using GF(2^8) mod (x^8+x^4+x^3+x+1} we do the inversion
|
|
85 |
* in GF(GF(GF(2^2)^2)^2) mod (X^2+X+8)
|
|
86 |
* and GF(GF(2^2)^2) mod (X^2+X+2)
|
|
87 |
* and GF(2^2) mod (X^2+X+1)
|
|
88 |
* The first part of the algorithm below transfers the coordinates
|
|
89 |
* {0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80} =>
|
|
90 |
* {1,Y,Y^2,Y^3,Y^4,Y^5,Y^6,Y^7} with Y=0x41:
|
|
91 |
* {0x01,0x41,0x66,0x6c,0x56,0x9a,0x58,0xc4}
|
|
92 |
* The last part undoes the coordinate transfer and the final affine
|
|
93 |
* transformation S:
|
|
94 |
* b[i] = b[i] + b[(i+4)%8] + b[(i+5)%8] + b[(i+6)%8] + b[(i+7)%8] + c[i]
|
|
95 |
* in one step.
|
|
96 |
* The multiplication in GF(2^2^2^2) is done in ordinary coords:
|
|
97 |
* A = (a0*1 + a1*x^4)
|
|
98 |
* B = (b0*1 + b1*x^4)
|
|
99 |
* AB = ((a0*b0 + 8*a1*b1)*1 + (a1*b0 + (a0+a1)*b1)*x^4)
|
|
100 |
* When A = (a0,a1) is given we want to solve AB = 1:
|
|
101 |
* (a) 1 = a0*b0 + 8*a1*b1
|
|
102 |
* (b) 0 = a1*b0 + (a0+a1)*b1
|
|
103 |
* => multiply (a) by a1 and (b) by a0
|
|
104 |
* (c) a1 = a1*a0*b0 + (8*a1*a1)*b1
|
|
105 |
* (d) 0 = a1*a0*b0 + (a0*a0+a1*a0)*b1
|
|
106 |
* => add (c) + (d)
|
|
107 |
* (e) a1 = (a0*a0 + a1*a0 + 8*a1*a1)*b1
|
|
108 |
* => therefore
|
|
109 |
* b1 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * a1
|
|
110 |
* => and adding (a1*b0) to (b) we get
|
|
111 |
* (f) a1*b0 = (a0+a1)*b1
|
|
112 |
* => therefore
|
|
113 |
* b0 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * (a0+a1)
|
|
114 |
* Note this formula also works for the case
|
|
115 |
* (a0+a1)*a0 + 8*a1*a1 = 0
|
|
116 |
* if the inverse element for 0^-1 is mapped to 0.
|
|
117 |
* Repeat the same for GF(2^2^2) and GF(2^2).
|
|
118 |
* We get the following algorithm:
|
|
119 |
* inv8(a0,a1):
|
|
120 |
* x0 = a0^a1
|
|
121 |
* [y0,y1] = mul4([x0,a1],[a0,a1]); (*)
|
|
122 |
* y1 = mul4(8,y1);
|
|
123 |
* t = inv4(y0^y1);
|
|
124 |
* [b0,b1] = mul4([x0,a1],[t,t]); (*)
|
|
125 |
* return [b0,b1];
|
|
126 |
* The non-linear multiplies (*) can be done in parallel at no extra cost.
|
|
127 |
*/
|
|
128 |
static void SubWord(u32 *w)
|
|
129 |
{
|
|
130 |
u32 x, y, a1, a2, a3, a4, a5, a6;
|
|
131 |
|
|
132 |
x = *w;
|
|
133 |
y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
|
|
134 |
x &= 0xDDDDDDDDu;
|
|
135 |
x ^= y & 0x57575757u;
|
|
136 |
y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
|
|
137 |
x ^= y & 0x1C1C1C1Cu;
|
|
138 |
y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
|
|
139 |
x ^= y & 0x4A4A4A4Au;
|
|
140 |
y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
|
|
141 |
x ^= y & 0x42424242u;
|
|
142 |
y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
|
|
143 |
x ^= y & 0x64646464u;
|
|
144 |
y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
|
|
145 |
x ^= y & 0xE0E0E0E0u;
|
|
146 |
a1 = x;
|
|
147 |
a1 ^= (x & 0xF0F0F0F0u) >> 4;
|
|
148 |
a2 = ((x & 0xCCCCCCCCu) >> 2) | ((x & 0x33333333u) << 2);
|
|
149 |
a3 = x & a1;
|
|
150 |
a3 ^= (a3 & 0xAAAAAAAAu) >> 1;
|
|
151 |
a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & 0xAAAAAAAAu;
|
|
152 |
a4 = a2 & a1;
|
|
153 |
a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
|
|
154 |
a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & 0xAAAAAAAAu;
|
|
155 |
a5 = (a3 & 0xCCCCCCCCu) >> 2;
|
|
156 |
a3 ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
|
|
157 |
a4 = a5 & 0x22222222u;
|
|
158 |
a4 |= a4 >> 1;
|
|
159 |
a4 ^= (a5 << 1) & 0x22222222u;
|
|
160 |
a3 ^= a4;
|
|
161 |
a5 = a3 & 0xA0A0A0A0u;
|
|
162 |
a5 |= a5 >> 1;
|
|
163 |
a5 ^= (a3 << 1) & 0xA0A0A0A0u;
|
|
164 |
a4 = a5 & 0xC0C0C0C0u;
|
|
165 |
a6 = a4 >> 2;
|
|
166 |
a4 ^= (a5 << 2) & 0xC0C0C0C0u;
|
|
167 |
a5 = a6 & 0x20202020u;
|
|
168 |
a5 |= a5 >> 1;
|
|
169 |
a5 ^= (a6 << 1) & 0x20202020u;
|
|
170 |
a4 |= a5;
|
|
171 |
a3 ^= a4 >> 4;
|
|
172 |
a3 &= 0x0F0F0F0Fu;
|
|
173 |
a2 = a3;
|
|
174 |
a2 ^= (a3 & 0x0C0C0C0Cu) >> 2;
|
|
175 |
a4 = a3 & a2;
|
|
176 |
a4 ^= (a4 & 0x0A0A0A0A0Au) >> 1;
|
|
177 |
a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & 0x0A0A0A0Au;
|
|
178 |
a5 = a4 & 0x08080808u;
|
|
179 |
a5 |= a5 >> 1;
|
|
180 |
a5 ^= (a4 << 1) & 0x08080808u;
|
|
181 |
a4 ^= a5 >> 2;
|
|
182 |
a4 &= 0x03030303u;
|
|
183 |
a4 ^= (a4 & 0x02020202u) >> 1;
|
|
184 |
a4 |= a4 << 2;
|
|
185 |
a3 = a2 & a4;
|
|
186 |
a3 ^= (a3 & 0x0A0A0A0Au) >> 1;
|
|
187 |
a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & 0x0A0A0A0Au;
|
|
188 |
a3 |= a3 << 4;
|
|
189 |
a2 = ((a1 & 0xCCCCCCCCu) >> 2) | ((a1 & 0x33333333u) << 2);
|
|
190 |
x = a1 & a3;
|
|
191 |
x ^= (x & 0xAAAAAAAAu) >> 1;
|
|
192 |
x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & 0xAAAAAAAAu;
|
|
193 |
a4 = a2 & a3;
|
|
194 |
a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
|
|
195 |
a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & 0xAAAAAAAAu;
|
|
196 |
a5 = (x & 0xCCCCCCCCu) >> 2;
|
|
197 |
x ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
|
|
198 |
a4 = a5 & 0x22222222u;
|
|
199 |
a4 |= a4 >> 1;
|
|
200 |
a4 ^= (a5 << 1) & 0x22222222u;
|
|
201 |
x ^= a4;
|
|
202 |
y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
|
|
203 |
x &= 0x39393939u;
|
|
204 |
x ^= y & 0x3F3F3F3Fu;
|
|
205 |
y = ((y & 0xFCFCFCFCu) >> 2) | ((y & 0x03030303u) << 6);
|
|
206 |
x ^= y & 0x97979797u;
|
|
207 |
y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
|
|
208 |
x ^= y & 0x9B9B9B9Bu;
|
|
209 |
y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
|
|
210 |
x ^= y & 0x3C3C3C3Cu;
|
|
211 |
y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
|
|
212 |
x ^= y & 0xDDDDDDDDu;
|
|
213 |
y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
|
|
214 |
x ^= y & 0x72727272u;
|
|
215 |
x ^= 0x63636363u;
|
|
216 |
*w = x;
|
|
217 |
}
|
|
218 |
|
|
219 |
static void SubLong(u64 *w)
|
|
220 |
{
|
|
221 |
u64 x, y, a1, a2, a3, a4, a5, a6;
|
|
222 |
|
|
223 |
x = *w;
|
|
224 |
y = ((x & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((x & 0x0101010101010101uLL) << 7);
|
|
225 |
x &= 0xDDDDDDDDDDDDDDDDuLL;
|
|
226 |
x ^= y & 0x5757575757575757uLL;
|
|
227 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
228 |
x ^= y & 0x1C1C1C1C1C1C1C1CuLL;
|
|
229 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
230 |
x ^= y & 0x4A4A4A4A4A4A4A4AuLL;
|
|
231 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
232 |
x ^= y & 0x4242424242424242uLL;
|
|
233 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
234 |
x ^= y & 0x6464646464646464uLL;
|
|
235 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
236 |
x ^= y & 0xE0E0E0E0E0E0E0E0uLL;
|
|
237 |
a1 = x;
|
|
238 |
a1 ^= (x & 0xF0F0F0F0F0F0F0F0uLL) >> 4;
|
|
239 |
a2 = ((x & 0xCCCCCCCCCCCCCCCCuLL) >> 2) | ((x & 0x3333333333333333uLL) << 2);
|
|
240 |
a3 = x & a1;
|
|
241 |
a3 ^= (a3 & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
|
|
242 |
a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & 0xAAAAAAAAAAAAAAAAuLL;
|
|
243 |
a4 = a2 & a1;
|
|
244 |
a4 ^= (a4 & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
|
|
245 |
a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & 0xAAAAAAAAAAAAAAAAuLL;
|
|
246 |
a5 = (a3 & 0xCCCCCCCCCCCCCCCCuLL) >> 2;
|
|
247 |
a3 ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCCCCCCCCCuLL;
|
|
248 |
a4 = a5 & 0x2222222222222222uLL;
|
|
249 |
a4 |= a4 >> 1;
|
|
250 |
a4 ^= (a5 << 1) & 0x2222222222222222uLL;
|
|
251 |
a3 ^= a4;
|
|
252 |
a5 = a3 & 0xA0A0A0A0A0A0A0A0uLL;
|
|
253 |
a5 |= a5 >> 1;
|
|
254 |
a5 ^= (a3 << 1) & 0xA0A0A0A0A0A0A0A0uLL;
|
|
255 |
a4 = a5 & 0xC0C0C0C0C0C0C0C0uLL;
|
|
256 |
a6 = a4 >> 2;
|
|
257 |
a4 ^= (a5 << 2) & 0xC0C0C0C0C0C0C0C0uLL;
|
|
258 |
a5 = a6 & 0x2020202020202020uLL;
|
|
259 |
a5 |= a5 >> 1;
|
|
260 |
a5 ^= (a6 << 1) & 0x2020202020202020uLL;
|
|
261 |
a4 |= a5;
|
|
262 |
a3 ^= a4 >> 4;
|
|
263 |
a3 &= 0x0F0F0F0F0F0F0F0FuLL;
|
|
264 |
a2 = a3;
|
|
265 |
a2 ^= (a3 & 0x0C0C0C0C0C0C0C0CuLL) >> 2;
|
|
266 |
a4 = a3 & a2;
|
|
267 |
a4 ^= (a4 & 0x0A0A0A0A0A0A0A0AuLL) >> 1;
|
|
268 |
a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & 0x0A0A0A0A0A0A0A0AuLL;
|
|
269 |
a5 = a4 & 0x0808080808080808uLL;
|
|
270 |
a5 |= a5 >> 1;
|
|
271 |
a5 ^= (a4 << 1) & 0x0808080808080808uLL;
|
|
272 |
a4 ^= a5 >> 2;
|
|
273 |
a4 &= 0x0303030303030303uLL;
|
|
274 |
a4 ^= (a4 & 0x0202020202020202uLL) >> 1;
|
|
275 |
a4 |= a4 << 2;
|
|
276 |
a3 = a2 & a4;
|
|
277 |
a3 ^= (a3 & 0x0A0A0A0A0A0A0A0AuLL) >> 1;
|
|
278 |
a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & 0x0A0A0A0A0A0A0A0AuLL;
|
|
279 |
a3 |= a3 << 4;
|
|
280 |
a2 = ((a1 & 0xCCCCCCCCCCCCCCCCuLL) >> 2) | ((a1 & 0x3333333333333333uLL) << 2);
|
|
281 |
x = a1 & a3;
|
|
282 |
x ^= (x & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
|
|
283 |
x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & 0xAAAAAAAAAAAAAAAAuLL;
|
|
284 |
a4 = a2 & a3;
|
|
285 |
a4 ^= (a4 & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
|
|
286 |
a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & 0xAAAAAAAAAAAAAAAAuLL;
|
|
287 |
a5 = (x & 0xCCCCCCCCCCCCCCCCuLL) >> 2;
|
|
288 |
x ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCCCCCCCCCuLL;
|
|
289 |
a4 = a5 & 0x2222222222222222uLL;
|
|
290 |
a4 |= a4 >> 1;
|
|
291 |
a4 ^= (a5 << 1) & 0x2222222222222222uLL;
|
|
292 |
x ^= a4;
|
|
293 |
y = ((x & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((x & 0x0101010101010101uLL) << 7);
|
|
294 |
x &= 0x3939393939393939uLL;
|
|
295 |
x ^= y & 0x3F3F3F3F3F3F3F3FuLL;
|
|
296 |
y = ((y & 0xFCFCFCFCFCFCFCFCuLL) >> 2) | ((y & 0x0303030303030303uLL) << 6);
|
|
297 |
x ^= y & 0x9797979797979797uLL;
|
|
298 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
299 |
x ^= y & 0x9B9B9B9B9B9B9B9BuLL;
|
|
300 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
301 |
x ^= y & 0x3C3C3C3C3C3C3C3CuLL;
|
|
302 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
303 |
x ^= y & 0xDDDDDDDDDDDDDDDDuLL;
|
|
304 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
305 |
x ^= y & 0x7272727272727272uLL;
|
|
306 |
x ^= 0x6363636363636363uLL;
|
|
307 |
*w = x;
|
|
308 |
}
|
|
309 |
|
|
310 |
/*
|
|
311 |
* This computes w := (S^-1 * (w + c))^-1
|
|
312 |
*/
|
|
313 |
static void InvSubLong(u64 *w)
|
|
314 |
{
|
|
315 |
u64 x, y, a1, a2, a3, a4, a5, a6;
|
|
316 |
|
|
317 |
x = *w;
|
|
318 |
x ^= 0x6363636363636363uLL;
|
|
319 |
y = ((x & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((x & 0x0101010101010101uLL) << 7);
|
|
320 |
x &= 0xFDFDFDFDFDFDFDFDuLL;
|
|
321 |
x ^= y & 0x5E5E5E5E5E5E5E5EuLL;
|
|
322 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
323 |
x ^= y & 0xF3F3F3F3F3F3F3F3uLL;
|
|
324 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
325 |
x ^= y & 0xF5F5F5F5F5F5F5F5uLL;
|
|
326 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
327 |
x ^= y & 0x7878787878787878uLL;
|
|
328 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
329 |
x ^= y & 0x7777777777777777uLL;
|
|
330 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
331 |
x ^= y & 0x1515151515151515uLL;
|
|
332 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
333 |
x ^= y & 0xA5A5A5A5A5A5A5A5uLL;
|
|
334 |
a1 = x;
|
|
335 |
a1 ^= (x & 0xF0F0F0F0F0F0F0F0uLL) >> 4;
|
|
336 |
a2 = ((x & 0xCCCCCCCCCCCCCCCCuLL) >> 2) | ((x & 0x3333333333333333uLL) << 2);
|
|
337 |
a3 = x & a1;
|
|
338 |
a3 ^= (a3 & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
|
|
339 |
a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & 0xAAAAAAAAAAAAAAAAuLL;
|
|
340 |
a4 = a2 & a1;
|
|
341 |
a4 ^= (a4 & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
|
|
342 |
a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & 0xAAAAAAAAAAAAAAAAuLL;
|
|
343 |
a5 = (a3 & 0xCCCCCCCCCCCCCCCCuLL) >> 2;
|
|
344 |
a3 ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCCCCCCCCCuLL;
|
|
345 |
a4 = a5 & 0x2222222222222222uLL;
|
|
346 |
a4 |= a4 >> 1;
|
|
347 |
a4 ^= (a5 << 1) & 0x2222222222222222uLL;
|
|
348 |
a3 ^= a4;
|
|
349 |
a5 = a3 & 0xA0A0A0A0A0A0A0A0uLL;
|
|
350 |
a5 |= a5 >> 1;
|
|
351 |
a5 ^= (a3 << 1) & 0xA0A0A0A0A0A0A0A0uLL;
|
|
352 |
a4 = a5 & 0xC0C0C0C0C0C0C0C0uLL;
|
|
353 |
a6 = a4 >> 2;
|
|
354 |
a4 ^= (a5 << 2) & 0xC0C0C0C0C0C0C0C0uLL;
|
|
355 |
a5 = a6 & 0x2020202020202020uLL;
|
|
356 |
a5 |= a5 >> 1;
|
|
357 |
a5 ^= (a6 << 1) & 0x2020202020202020uLL;
|
|
358 |
a4 |= a5;
|
|
359 |
a3 ^= a4 >> 4;
|
|
360 |
a3 &= 0x0F0F0F0F0F0F0F0FuLL;
|
|
361 |
a2 = a3;
|
|
362 |
a2 ^= (a3 & 0x0C0C0C0C0C0C0C0CuLL) >> 2;
|
|
363 |
a4 = a3 & a2;
|
|
364 |
a4 ^= (a4 & 0x0A0A0A0A0A0A0A0AuLL) >> 1;
|
|
365 |
a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & 0x0A0A0A0A0A0A0A0AuLL;
|
|
366 |
a5 = a4 & 0x0808080808080808uLL;
|
|
367 |
a5 |= a5 >> 1;
|
|
368 |
a5 ^= (a4 << 1) & 0x0808080808080808uLL;
|
|
369 |
a4 ^= a5 >> 2;
|
|
370 |
a4 &= 0x0303030303030303uLL;
|
|
371 |
a4 ^= (a4 & 0x0202020202020202uLL) >> 1;
|
|
372 |
a4 |= a4 << 2;
|
|
373 |
a3 = a2 & a4;
|
|
374 |
a3 ^= (a3 & 0x0A0A0A0A0A0A0A0AuLL) >> 1;
|
|
375 |
a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & 0x0A0A0A0A0A0A0A0AuLL;
|
|
376 |
a3 |= a3 << 4;
|
|
377 |
a2 = ((a1 & 0xCCCCCCCCCCCCCCCCuLL) >> 2) | ((a1 & 0x3333333333333333uLL) << 2);
|
|
378 |
x = a1 & a3;
|
|
379 |
x ^= (x & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
|
|
380 |
x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & 0xAAAAAAAAAAAAAAAAuLL;
|
|
381 |
a4 = a2 & a3;
|
|
382 |
a4 ^= (a4 & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
|
|
383 |
a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & 0xAAAAAAAAAAAAAAAAuLL;
|
|
384 |
a5 = (x & 0xCCCCCCCCCCCCCCCCuLL) >> 2;
|
|
385 |
x ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCCCCCCCCCuLL;
|
|
386 |
a4 = a5 & 0x2222222222222222uLL;
|
|
387 |
a4 |= a4 >> 1;
|
|
388 |
a4 ^= (a5 << 1) & 0x2222222222222222uLL;
|
|
389 |
x ^= a4;
|
|
390 |
y = ((x & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((x & 0x0101010101010101uLL) << 7);
|
|
391 |
x &= 0xB5B5B5B5B5B5B5B5uLL;
|
|
392 |
x ^= y & 0x4040404040404040uLL;
|
|
393 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
394 |
x ^= y & 0x8080808080808080uLL;
|
|
395 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
396 |
x ^= y & 0x1616161616161616uLL;
|
|
397 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
398 |
x ^= y & 0xEBEBEBEBEBEBEBEBuLL;
|
|
399 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
400 |
x ^= y & 0x9797979797979797uLL;
|
|
401 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
402 |
x ^= y & 0xFBFBFBFBFBFBFBFBuLL;
|
|
403 |
y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
|
|
404 |
x ^= y & 0x7D7D7D7D7D7D7D7DuLL;
|
|
405 |
*w = x;
|
|
406 |
}
|
|
407 |
|
|
408 |
static void ShiftRows(u64 *state)
|
|
409 |
{
|
|
410 |
unsigned char s[4];
|
|
411 |
unsigned char *s0;
|
|
412 |
int r;
|
|
413 |
|
|
414 |
s0 = (unsigned char *)state;
|
|
415 |
for (r = 0; r < 4; r++) {
|
|
416 |
s[0] = s0[0*4 + r];
|
|
417 |
s[1] = s0[1*4 + r];
|
|
418 |
s[2] = s0[2*4 + r];
|
|
419 |
s[3] = s0[3*4 + r];
|
|
420 |
s0[0*4 + r] = s[(r+0) % 4];
|
|
421 |
s0[1*4 + r] = s[(r+1) % 4];
|
|
422 |
s0[2*4 + r] = s[(r+2) % 4];
|
|
423 |
s0[3*4 + r] = s[(r+3) % 4];
|
|
424 |
}
|
|
425 |
}
|
|
426 |
|
|
427 |
static void InvShiftRows(u64 *state)
|
|
428 |
{
|
|
429 |
unsigned char s[4];
|
|
430 |
unsigned char *s0;
|
|
431 |
int r;
|
|
432 |
|
|
433 |
s0 = (unsigned char *)state;
|
|
434 |
for (r = 0; r < 4; r++) {
|
|
435 |
s[0] = s0[0*4 + r];
|
|
436 |
s[1] = s0[1*4 + r];
|
|
437 |
s[2] = s0[2*4 + r];
|
|
438 |
s[3] = s0[3*4 + r];
|
|
439 |
s0[0*4 + r] = s[(4-r) % 4];
|
|
440 |
s0[1*4 + r] = s[(5-r) % 4];
|
|
441 |
s0[2*4 + r] = s[(6-r) % 4];
|
|
442 |
s0[3*4 + r] = s[(7-r) % 4];
|
|
443 |
}
|
|
444 |
}
|
|
445 |
|
|
446 |
static void MixColumns(u64 *state)
|
|
447 |
{
|
|
448 |
uni s1;
|
|
449 |
uni s;
|
|
450 |
int c;
|
|
451 |
|
|
452 |
for (c = 0; c < 2; c++) {
|
|
453 |
s1.d = state[c];
|
|
454 |
s.d = s1.d;
|
|
455 |
s.d ^= ((s.d & 0xFFFF0000FFFF0000uLL) >> 16)
|
|
456 |
| ((s.d & 0x0000FFFF0000FFFFuLL) << 16);
|
|
457 |
s.d ^= ((s.d & 0xFF00FF00FF00FF00uLL) >> 8)
|
|
458 |
| ((s.d & 0x00FF00FF00FF00FFuLL) << 8);
|
|
459 |
s.d ^= s1.d;
|
|
460 |
XtimeLong(&s1.d);
|
|
461 |
s.d ^= s1.d;
|
|
462 |
s.b[0] ^= s1.b[1];
|
|
463 |
s.b[1] ^= s1.b[2];
|
|
464 |
s.b[2] ^= s1.b[3];
|
|
465 |
s.b[3] ^= s1.b[0];
|
|
466 |
s.b[4] ^= s1.b[5];
|
|
467 |
s.b[5] ^= s1.b[6];
|
|
468 |
s.b[6] ^= s1.b[7];
|
|
469 |
s.b[7] ^= s1.b[4];
|
|
470 |
state[c] = s.d;
|
|
471 |
}
|
|
472 |
}
|
|
473 |
|
|
474 |
static void InvMixColumns(u64 *state)
|
|
475 |
{
|
|
476 |
uni s1;
|
|
477 |
uni s;
|
|
478 |
int c;
|
|
479 |
|
|
480 |
for (c = 0; c < 2; c++) {
|
|
481 |
s1.d = state[c];
|
|
482 |
s.d = s1.d;
|
|
483 |
s.d ^= ((s.d & 0xFFFF0000FFFF0000uLL) >> 16)
|
|
484 |
| ((s.d & 0x0000FFFF0000FFFFuLL) << 16);
|
|
485 |
s.d ^= ((s.d & 0xFF00FF00FF00FF00uLL) >> 8)
|
|
486 |
| ((s.d & 0x00FF00FF00FF00FFuLL) << 8);
|
|
487 |
s.d ^= s1.d;
|
|
488 |
XtimeLong(&s1.d);
|
|
489 |
s.d ^= s1.d;
|
|
490 |
s.b[0] ^= s1.b[1];
|
|
491 |
s.b[1] ^= s1.b[2];
|
|
492 |
s.b[2] ^= s1.b[3];
|
|
493 |
s.b[3] ^= s1.b[0];
|
|
494 |
s.b[4] ^= s1.b[5];
|
|
495 |
s.b[5] ^= s1.b[6];
|
|
496 |
s.b[6] ^= s1.b[7];
|
|
497 |
s.b[7] ^= s1.b[4];
|
|
498 |
XtimeLong(&s1.d);
|
|
499 |
s1.d ^= ((s1.d & 0xFFFF0000FFFF0000uLL) >> 16)
|
|
500 |
| ((s1.d & 0x0000FFFF0000FFFFuLL) << 16);
|
|
501 |
s.d ^= s1.d;
|
|
502 |
XtimeLong(&s1.d);
|
|
503 |
s1.d ^= ((s1.d & 0xFF00FF00FF00FF00uLL) >> 8)
|
|
504 |
| ((s1.d & 0x00FF00FF00FF00FFuLL) << 8);
|
|
505 |
s.d ^= s1.d;
|
|
506 |
state[c] = s.d;
|
|
507 |
}
|
|
508 |
}
|
|
509 |
|
|
510 |
static void AddRoundKey(u64 *state, const u64 *w)
|
|
511 |
{
|
|
512 |
state[0] ^= w[0];
|
|
513 |
state[1] ^= w[1];
|
|
514 |
}
|
|
515 |
|
|
516 |
static void Cipher(const unsigned char *in, unsigned char *out,
|
|
517 |
const u64 *w, int nr)
|
|
518 |
{
|
|
519 |
u64 state[2];
|
|
520 |
int i;
|
|
521 |
|
|
522 |
memcpy(state, in, 16);
|
|
523 |
|
|
524 |
AddRoundKey(state, w);
|
|
525 |
|
|
526 |
for (i = 1; i < nr; i++) {
|
|
527 |
SubLong(&state[0]);
|
|
528 |
SubLong(&state[1]);
|
|
529 |
ShiftRows(state);
|
|
530 |
MixColumns(state);
|
|
531 |
AddRoundKey(state, w + i*2);
|
|
532 |
}
|
|
533 |
|
|
534 |
SubLong(&state[0]);
|
|
535 |
SubLong(&state[1]);
|
|
536 |
ShiftRows(state);
|
|
537 |
AddRoundKey(state, w + nr*2);
|
|
538 |
|
|
539 |
memcpy(out, state, 16);
|
|
540 |
}
|
|
541 |
|
|
542 |
static void InvCipher(const unsigned char *in, unsigned char *out,
|
|
543 |
const u64 *w, int nr)
|
|
544 |
|
|
545 |
{
|
|
546 |
u64 state[2];
|
|
547 |
int i;
|
|
548 |
|
|
549 |
memcpy(state, in, 16);
|
|
550 |
|
|
551 |
AddRoundKey(state, w + nr*2);
|
|
552 |
|
|
553 |
for (i = nr - 1; i > 0; i--) {
|
|
554 |
InvShiftRows(state);
|
|
555 |
InvSubLong(&state[0]);
|
|
556 |
InvSubLong(&state[1]);
|
|
557 |
AddRoundKey(state, w + i*2);
|
|
558 |
InvMixColumns(state);
|
|
559 |
}
|
|
560 |
|
|
561 |
InvShiftRows(state);
|
|
562 |
InvSubLong(&state[0]);
|
|
563 |
InvSubLong(&state[1]);
|
|
564 |
AddRoundKey(state, w);
|
|
565 |
|
|
566 |
memcpy(out, state, 16);
|
|
567 |
}
|
|
568 |
|
|
569 |
static void RotWord(u32 *x)
|
|
570 |
{
|
|
571 |
unsigned char *w0;
|
|
572 |
unsigned char tmp;
|
|
573 |
|
|
574 |
w0 = (unsigned char *)x;
|
|
575 |
tmp = w0[0];
|
|
576 |
w0[0] = w0[1];
|
|
577 |
w0[1] = w0[2];
|
|
578 |
w0[2] = w0[3];
|
|
579 |
w0[3] = tmp;
|
|
580 |
}
|
|
581 |
|
|
582 |
static void KeyExpansion(const unsigned char *key, u64 *w,
|
|
583 |
int nr, int nk)
|
|
584 |
{
|
|
585 |
u32 rcon;
|
|
586 |
uni prev;
|
|
587 |
u32 temp;
|
|
588 |
int i, n;
|
|
589 |
|
|
590 |
memcpy(w, key, nk*4);
|
|
591 |
memcpy(&rcon, "\1\0\0\0", 4);
|
|
592 |
n = nk/2;
|
|
593 |
prev.d = w[n-1];
|
|
594 |
for (i = n; i < (nr+1)*2; i++) {
|
|
595 |
temp = prev.w[1];
|
|
596 |
if (i % n == 0) {
|
|
597 |
RotWord(&temp);
|
|
598 |
SubWord(&temp);
|
|
599 |
temp ^= rcon;
|
|
600 |
XtimeWord(&rcon);
|
|
601 |
} else if (nk > 6 && i % n == 2) {
|
|
602 |
SubWord(&temp);
|
|
603 |
}
|
|
604 |
prev.d = w[i-n];
|
|
605 |
prev.w[0] ^= temp;
|
|
606 |
prev.w[1] ^= prev.w[0];
|
|
607 |
w[i] = prev.d;
|
|
608 |
}
|
|
609 |
}
|
|
610 |
|
|
611 |
/**
|
|
612 |
* Expand the cipher key into the encryption key schedule.
|
|
613 |
*/
|
|
614 |
int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
|
|
615 |
AES_KEY *key)
|
|
616 |
{
|
|
617 |
u64 *rk;
|
|
618 |
|
|
619 |
if (!userKey || !key)
|
|
620 |
return -1;
|
|
621 |
if (bits != 128 && bits != 192 && bits != 256)
|
|
622 |
return -2;
|
|
623 |
|
|
624 |
rk = (u64*)key->rd_key;
|
|
625 |
|
|
626 |
if (bits == 128)
|
|
627 |
key->rounds = 10;
|
|
628 |
else if (bits == 192)
|
|
629 |
key->rounds = 12;
|
|
630 |
else
|
|
631 |
key->rounds = 14;
|
|
632 |
|
|
633 |
KeyExpansion(userKey, rk, key->rounds, bits/32);
|
|
634 |
return 0;
|
|
635 |
}
|
|
636 |
|
|
637 |
/**
|
|
638 |
* Expand the cipher key into the decryption key schedule.
|
|
639 |
*/
|
|
640 |
int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
|
|
641 |
AES_KEY *key)
|
|
642 |
{
|
|
643 |
return AES_set_encrypt_key(userKey, bits, key);
|
|
644 |
}
|
|
645 |
|
|
646 |
/*
|
|
647 |
* Encrypt a single block
|
|
648 |
* in and out can overlap
|
|
649 |
*/
|
|
650 |
void AES_encrypt(const unsigned char *in, unsigned char *out,
|
|
651 |
const AES_KEY *key)
|
|
652 |
{
|
|
653 |
const u64 *rk;
|
|
654 |
|
|
655 |
assert(in && out && key);
|
|
656 |
rk = (u64*)key->rd_key;
|
|
657 |
|
|
658 |
Cipher(in, out, rk, key->rounds);
|
|
659 |
}
|
|
660 |
|
|
661 |
/*
|
|
662 |
* Decrypt a single block
|
|
663 |
* in and out can overlap
|
|
664 |
*/
|
|
665 |
void AES_decrypt(const unsigned char *in, unsigned char *out,
|
|
666 |
const AES_KEY *key)
|
|
667 |
{
|
|
668 |
const u64 *rk;
|
|
669 |
|
|
670 |
assert(in && out && key);
|
|
671 |
rk = (u64*)key->rd_key;
|
|
672 |
|
|
673 |
InvCipher(in, out, rk, key->rounds);
|
|
674 |
}
|
|
675 |
|
|
676 |
# ifndef OPENSSL_SMALL_FOOTPRINT
|
|
677 |
void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
|
|
678 |
size_t blocks, const AES_KEY *key,
|
|
679 |
const unsigned char *ivec);
|
|
680 |
|
|
681 |
static void RawToBits(const u8 raw[64], u64 bits[8])
|
|
682 |
{
|
|
683 |
int i, j;
|
|
684 |
u64 in, out;
|
|
685 |
|
|
686 |
memset(bits, 0, 64);
|
|
687 |
for (i = 0; i < 8; i++) {
|
|
688 |
in = 0;
|
|
689 |
for (j = 0; j < 8; j++)
|
|
690 |
in |= ((u64)raw[i * 8 + j]) << (8 * j);
|
|
691 |
out = in & 0xF0F0F0F00F0F0F0FuLL;
|
|
692 |
out |= (in & 0x0F0F0F0F00000000uLL) >> 28;
|
|
693 |
out |= (in & 0x00000000F0F0F0F0uLL) << 28;
|
|
694 |
in = out & 0xCCCC3333CCCC3333uLL;
|
|
695 |
in |= (out & 0x3333000033330000uLL) >> 14;
|
|
696 |
in |= (out & 0x0000CCCC0000CCCCuLL) << 14;
|
|
697 |
out = in & 0xAA55AA55AA55AA55uLL;
|
|
698 |
out |= (in & 0x5500550055005500uLL) >> 7;
|
|
699 |
out |= (in & 0x00AA00AA00AA00AAuLL) << 7;
|
|
700 |
for (j = 0; j < 8; j++) {
|
|
701 |
bits[j] |= (out & 0xFFuLL) << (8 * i);
|
|
702 |
out = out >> 8;
|
|
703 |
}
|
|
704 |
}
|
|
705 |
}
|
|
706 |
|
|
707 |
static void BitsToRaw(const u64 bits[8], u8 raw[64])
|
|
708 |
{
|
|
709 |
int i, j;
|
|
710 |
u64 in, out;
|
|
711 |
|
|
712 |
for (i = 0; i < 8; i++) {
|
|
713 |
in = 0;
|
|
714 |
for (j = 0; j < 8; j++)
|
|
715 |
in |= ((bits[j] >> (8 * i)) & 0xFFuLL) << (8 * j);
|
|
716 |
out = in & 0xF0F0F0F00F0F0F0FuLL;
|
|
717 |
out |= (in & 0x0F0F0F0F00000000uLL) >> 28;
|
|
718 |
out |= (in & 0x00000000F0F0F0F0uLL) << 28;
|
|
719 |
in = out & 0xCCCC3333CCCC3333uLL;
|
|
720 |
in |= (out & 0x3333000033330000uLL) >> 14;
|
|
721 |
in |= (out & 0x0000CCCC0000CCCCuLL) << 14;
|
|
722 |
out = in & 0xAA55AA55AA55AA55uLL;
|
|
723 |
out |= (in & 0x5500550055005500uLL) >> 7;
|
|
724 |
out |= (in & 0x00AA00AA00AA00AAuLL) << 7;
|
|
725 |
for (j = 0; j < 8; j++) {
|
|
726 |
raw[i * 8 + j] = (u8)out;
|
|
727 |
out = out >> 8;
|
|
728 |
}
|
|
729 |
}
|
|
730 |
}
|
|
731 |
|
|
732 |
static void BitsXtime(u64 state[8])
|
|
733 |
{
|
|
734 |
u64 b;
|
|
735 |
|
|
736 |
b = state[7];
|
|
737 |
state[7] = state[6];
|
|
738 |
state[6] = state[5];
|
|
739 |
state[5] = state[4];
|
|
740 |
state[4] = state[3] ^ b;
|
|
741 |
state[3] = state[2] ^ b;
|
|
742 |
state[2] = state[1];
|
|
743 |
state[1] = state[0] ^ b;
|
|
744 |
state[0] = b;
|
|
745 |
}
|
|
746 |
|
|
747 |
/*
|
|
748 |
* This S-box implementation follows a circuit described in
|
|
749 |
* Boyar and Peralta: "A new combinational logic minimization
|
|
750 |
* technique with applications to cryptology."
|
|
751 |
* https://eprint.iacr.org/2009/191.pdf
|
|
752 |
*
|
|
753 |
* The math is similar to above, in that it uses
|
|
754 |
* a tower field of GF(2^2^2^2) but with a different
|
|
755 |
* basis representation, that is better suited to
|
|
756 |
* logic designs.
|
|
757 |
*/
|
|
758 |
static void BitsSub(u64 state[8])
|
|
759 |
{
|
|
760 |
u64 x0, x1, x2, x3, x4, x5, x6, x7;
|
|
761 |
u64 y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11;
|
|
762 |
u64 y12, y13, y14, y15, y16, y17, y18, y19, y20, y21;
|
|
763 |
u64 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11;
|
|
764 |
u64 t12, t13, t14, t15, t16, t17, t18, t19, t20, t21;
|
|
765 |
u64 t22, t23, t24, t25, t26, t27, t28, t29, t30, t31;
|
|
766 |
u64 t32, t33, t34, t35, t36, t37, t38, t39, t40, t41;
|
|
767 |
u64 t42, t43, t44, t45, t46, t47, t48, t49, t50, t51;
|
|
768 |
u64 t52, t53, t54, t55, t56, t57, t58, t59, t60, t61;
|
|
769 |
u64 t62, t63, t64, t65, t66, t67;
|
|
770 |
u64 z0, z1, z2, z3, z4, z5, z6, z7, z8, z9, z10, z11;
|
|
771 |
u64 z12, z13, z14, z15, z16, z17;
|
|
772 |
u64 s0, s1, s2, s3, s4, s5, s6, s7;
|
|
773 |
|
|
774 |
x7 = state[0];
|
|
775 |
x6 = state[1];
|
|
776 |
x5 = state[2];
|
|
777 |
x4 = state[3];
|
|
778 |
x3 = state[4];
|
|
779 |
x2 = state[5];
|
|
780 |
x1 = state[6];
|
|
781 |
x0 = state[7];
|
|
782 |
y14 = x3 ^ x5;
|
|
783 |
y13 = x0 ^ x6;
|
|
784 |
y9 = x0 ^ x3;
|
|
785 |
y8 = x0 ^ x5;
|
|
786 |
t0 = x1 ^ x2;
|
|
787 |
y1 = t0 ^ x7;
|
|
788 |
y4 = y1 ^ x3;
|
|
789 |
y12 = y13 ^ y14;
|
|
790 |
y2 = y1 ^ x0;
|
|
791 |
y5 = y1 ^ x6;
|
|
792 |
y3 = y5 ^ y8;
|
|
793 |
t1 = x4 ^ y12;
|
|
794 |
y15 = t1 ^ x5;
|
|
795 |
y20 = t1 ^ x1;
|
|
796 |
y6 = y15 ^ x7;
|
|
797 |
y10 = y15 ^ t0;
|
|
798 |
y11 = y20 ^ y9;
|
|
799 |
y7 = x7 ^ y11;
|
|
800 |
y17 = y10 ^ y11;
|
|
801 |
y19 = y10 ^ y8;
|
|
802 |
y16 = t0 ^ y11;
|
|
803 |
y21 = y13 ^ y16;
|
|
804 |
y18 = x0 ^ y16;
|
|
805 |
t2 = y12 & y15;
|
|
806 |
t3 = y3 & y6;
|
|
807 |
t4 = t3 ^ t2;
|
|
808 |
t5 = y4 & x7;
|
|
809 |
t6 = t5 ^ t2;
|
|
810 |
t7 = y13 & y16;
|
|
811 |
t8 = y5 & y1;
|
|
812 |
t9 = t8 ^ t7;
|
|
813 |
t10 = y2 & y7;
|
|
814 |
t11 = t10 ^ t7;
|
|
815 |
t12 = y9 & y11;
|
|
816 |
t13 = y14 & y17;
|
|
817 |
t14 = t13 ^ t12;
|
|
818 |
t15 = y8 & y10;
|
|
819 |
t16 = t15 ^ t12;
|
|
820 |
t17 = t4 ^ t14;
|
|
821 |
t18 = t6 ^ t16;
|
|
822 |
t19 = t9 ^ t14;
|
|
823 |
t20 = t11 ^ t16;
|
|
824 |
t21 = t17 ^ y20;
|
|
825 |
t22 = t18 ^ y19;
|
|
826 |
t23 = t19 ^ y21;
|
|
827 |
t24 = t20 ^ y18;
|
|
828 |
t25 = t21 ^ t22;
|
|
829 |
t26 = t21 & t23;
|
|
830 |
t27 = t24 ^ t26;
|
|
831 |
t28 = t25 & t27;
|
|
832 |
t29 = t28 ^ t22;
|
|
833 |
t30 = t23 ^ t24;
|
|
834 |
t31 = t22 ^ t26;
|
|
835 |
t32 = t31 & t30;
|
|
836 |
t33 = t32 ^ t24;
|
|
837 |
t34 = t23 ^ t33;
|
|
838 |
t35 = t27 ^ t33;
|
|
839 |
t36 = t24 & t35;
|
|
840 |
t37 = t36 ^ t34;
|
|
841 |
t38 = t27 ^ t36;
|
|
842 |
t39 = t29 & t38;
|
|
843 |
t40 = t25 ^ t39;
|
|
844 |
t41 = t40 ^ t37;
|
|
845 |
t42 = t29 ^ t33;
|
|
846 |
t43 = t29 ^ t40;
|
|
847 |
t44 = t33 ^ t37;
|
|
848 |
t45 = t42 ^ t41;
|
|
849 |
z0 = t44 & y15;
|
|
850 |
z1 = t37 & y6;
|
|
851 |
z2 = t33 & x7;
|
|
852 |
z3 = t43 & y16;
|
|
853 |
z4 = t40 & y1;
|
|
854 |
z5 = t29 & y7;
|
|
855 |
z6 = t42 & y11;
|
|
856 |
z7 = t45 & y17;
|
|
857 |
z8 = t41 & y10;
|
|
858 |
z9 = t44 & y12;
|
|
859 |
z10 = t37 & y3;
|
|
860 |
z11 = t33 & y4;
|
|
861 |
z12 = t43 & y13;
|
|
862 |
z13 = t40 & y5;
|
|
863 |
z14 = t29 & y2;
|
|
864 |
z15 = t42 & y9;
|
|
865 |
z16 = t45 & y14;
|
|
866 |
z17 = t41 & y8;
|
|
867 |
t46 = z15 ^ z16;
|
|
868 |
t47 = z10 ^ z11;
|
|
869 |
t48 = z5 ^ z13;
|
|
870 |
t49 = z9 ^ z10;
|
|
871 |
t50 = z2 ^ z12;
|
|
872 |
t51 = z2 ^ z5;
|
|
873 |
t52 = z7 ^ z8;
|
|
874 |
t53 = z0 ^ z3;
|
|
875 |
t54 = z6 ^ z7;
|
|
876 |
t55 = z16 ^ z17;
|
|
877 |
t56 = z12 ^ t48;
|
|
878 |
t57 = t50 ^ t53;
|
|
879 |
t58 = z4 ^ t46;
|
|
880 |
t59 = z3 ^ t54;
|
|
881 |
t60 = t46 ^ t57;
|
|
882 |
t61 = z14 ^ t57;
|
|
883 |
t62 = t52 ^ t58;
|
|
884 |
t63 = t49 ^ t58;
|
|
885 |
t64 = z4 ^ t59;
|
|
886 |
t65 = t61 ^ t62;
|
|
887 |
t66 = z1 ^ t63;
|
|
888 |
s0 = t59 ^ t63;
|
|
889 |
s6 = ~(t56 ^ t62);
|
|
890 |
s7 = ~(t48 ^ t60);
|
|
891 |
t67 = t64 ^ t65;
|
|
892 |
s3 = t53 ^ t66;
|
|
893 |
s4 = t51 ^ t66;
|
|
894 |
s5 = t47 ^ t65;
|
|
895 |
s1 = ~(t64 ^ s3);
|
|
896 |
s2 = ~(t55 ^ t67);
|
|
897 |
state[0] = s7;
|
|
898 |
state[1] = s6;
|
|
899 |
state[2] = s5;
|
|
900 |
state[3] = s4;
|
|
901 |
state[4] = s3;
|
|
902 |
state[5] = s2;
|
|
903 |
state[6] = s1;
|
|
904 |
state[7] = s0;
|
|
905 |
}
|
|
906 |
|
|
907 |
static void BitsShiftRows(u64 state[8])
|
|
908 |
{
|
|
909 |
u64 s, s0;
|
|
910 |
int i;
|
|
911 |
|
|
912 |
for (i = 0; i < 8; i++) {
|
|
913 |
s = state[i];
|
|
914 |
s0 = s & 0x1111111111111111uLL;
|
|
915 |
s0 |= ((s & 0x2220222022202220uLL) >> 4) | ((s & 0x0002000200020002uLL) << 12);
|
|
916 |
s0 |= ((s & 0x4400440044004400uLL) >> 8) | ((s & 0x0044004400440044uLL) << 8);
|
|
917 |
s0 |= ((s & 0x8000800080008000uLL) >> 12) | ((s & 0x0888088808880888uLL) << 4);
|
|
918 |
state[i] = s0;
|
|
919 |
}
|
|
920 |
}
|
|
921 |
|
|
922 |
static void BitsMixColumns(u64 state[8])
|
|
923 |
{
|
|
924 |
u64 s1, s;
|
|
925 |
u64 s0[8];
|
|
926 |
int i;
|
|
927 |
|
|
928 |
for (i = 0; i < 8; i++) {
|
|
929 |
s1 = state[i];
|
|
930 |
s = s1;
|
|
931 |
s ^= ((s & 0xCCCCCCCCCCCCCCCCuLL) >> 2) | ((s & 0x3333333333333333uLL) << 2);
|
|
932 |
s ^= ((s & 0xAAAAAAAAAAAAAAAAuLL) >> 1) | ((s & 0x5555555555555555uLL) << 1);
|
|
933 |
s ^= s1;
|
|
934 |
s0[i] = s;
|
|
935 |
}
|
|
936 |
BitsXtime(state);
|
|
937 |
for (i = 0; i < 8; i++) {
|
|
938 |
s1 = state[i];
|
|
939 |
s = s0[i];
|
|
940 |
s ^= s1;
|
|
941 |
s ^= ((s1 & 0xEEEEEEEEEEEEEEEEuLL) >> 1) | ((s1 & 0x1111111111111111uLL) << 3);
|
|
942 |
state[i] = s;
|
|
943 |
}
|
|
944 |
}
|
|
945 |
|
|
946 |
static void BitsAddRoundKey(u64 state[8], const u64 key[8])
|
|
947 |
{
|
|
948 |
int i;
|
|
949 |
|
|
950 |
for (i = 0; i < 8; i++)
|
|
951 |
state[i] ^= key[i];
|
|
952 |
}
|
|
953 |
|
|
954 |
void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
|
|
955 |
size_t blocks, const AES_KEY *key,
|
|
956 |
const unsigned char *ivec)
|
|
957 |
{
|
|
958 |
struct {
|
|
959 |
u8 cipher[64];
|
|
960 |
u64 state[8];
|
|
961 |
u64 rd_key[AES_MAXNR + 1][8];
|
|
962 |
} *bs;
|
|
963 |
u32 ctr32;
|
|
964 |
int i;
|
|
965 |
|
|
966 |
ctr32 = GETU32(ivec + 12);
|
|
967 |
if (blocks >= 4
|
|
968 |
&& (bs = OPENSSL_malloc(sizeof(*bs)))) {
|
|
969 |
for (i = 0; i < key->rounds + 1; i++) {
|
|
970 |
memcpy(bs->cipher + 0, &key->rd_key[4 * i], 16);
|
|
971 |
memcpy(bs->cipher + 16, bs->cipher, 16);
|
|
972 |
memcpy(bs->cipher + 32, bs->cipher, 32);
|
|
973 |
RawToBits(bs->cipher, bs->rd_key[i]);
|
|
974 |
}
|
|
975 |
while (blocks) {
|
|
976 |
memcpy(bs->cipher, ivec, 12);
|
|
977 |
PUTU32(bs->cipher + 12, ctr32);
|
|
978 |
ctr32++;
|
|
979 |
memcpy(bs->cipher + 16, ivec, 12);
|
|
980 |
PUTU32(bs->cipher + 28, ctr32);
|
|
981 |
ctr32++;
|
|
982 |
memcpy(bs->cipher + 32, ivec, 12);
|
|
983 |
PUTU32(bs->cipher + 44, ctr32);
|
|
984 |
ctr32++;
|
|
985 |
memcpy(bs->cipher + 48, ivec, 12);
|
|
986 |
PUTU32(bs->cipher + 60, ctr32);
|
|
987 |
ctr32++;
|
|
988 |
RawToBits(bs->cipher, bs->state);
|
|
989 |
BitsAddRoundKey(bs->state, bs->rd_key[0]);
|
|
990 |
for (i = 1; i < key->rounds; i++) {
|
|
991 |
BitsSub(bs->state);
|
|
992 |
BitsShiftRows(bs->state);
|
|
993 |
BitsMixColumns(bs->state);
|
|
994 |
BitsAddRoundKey(bs->state, bs->rd_key[i]);
|
|
995 |
}
|
|
996 |
BitsSub(bs->state);
|
|
997 |
BitsShiftRows(bs->state);
|
|
998 |
BitsAddRoundKey(bs->state, bs->rd_key[key->rounds]);
|
|
999 |
BitsToRaw(bs->state, bs->cipher);
|
|
1000 |
for (i = 0; i < 64 && blocks; i++) {
|
|
1001 |
out[i] = in[i] ^ bs->cipher[i];
|
|
1002 |
if ((i & 15) == 15)
|
|
1003 |
blocks--;
|
|
1004 |
}
|
|
1005 |
in += i;
|
|
1006 |
out += i;
|
|
1007 |
}
|
|
1008 |
OPENSSL_clear_free(bs, sizeof(*bs));
|
|
1009 |
} else {
|
|
1010 |
unsigned char cipher[16];
|
|
1011 |
|
|
1012 |
while (blocks) {
|
|
1013 |
memcpy(cipher, ivec, 12);
|
|
1014 |
PUTU32(cipher + 12, ctr32);
|
|
1015 |
AES_encrypt(cipher, cipher, key);
|
|
1016 |
for (i = 0; i < 16; i++)
|
|
1017 |
out[i] = in[i] ^ cipher[i];
|
|
1018 |
in += 16;
|
|
1019 |
out += 16;
|
|
1020 |
ctr32++;
|
|
1021 |
blocks--;
|
|
1022 |
}
|
|
1023 |
}
|
|
1024 |
}
|
|
1025 |
# endif
|
|
1026 |
#elif !defined(AES_ASM)
|
46 | 1027 |
/*-
|
47 | 1028 |
Te0[x] = S [x].[02, 01, 01, 03];
|
48 | 1029 |
Te1[x] = S [x].[03, 02, 01, 01];
|