Annotation of src/usr.bin/ssh/chacha.c, Revision 1.2
1.2 ! djm 1: /* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */
1.1 djm 2: /*
3: chacha-merged.c version 20080118
4: D. J. Bernstein
5: Public domain.
6: */
7:
8: #include "chacha.h"
9:
10: typedef unsigned char u8;
11: typedef unsigned int u32;
12:
13: typedef struct chacha_ctx chacha_ctx;
14:
15: #define U8C(v) (v##U)
16: #define U32C(v) (v##U)
17:
18: #define U8V(v) ((u8)(v) & U8C(0xFF))
19: #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
20:
21: #define ROTL32(v, n) \
22: (U32V((v) << (n)) | ((v) >> (32 - (n))))
23:
24: #define U8TO32_LITTLE(p) \
25: (((u32)((p)[0]) ) | \
26: ((u32)((p)[1]) << 8) | \
27: ((u32)((p)[2]) << 16) | \
28: ((u32)((p)[3]) << 24))
29:
30: #define U32TO8_LITTLE(p, v) \
31: do { \
32: (p)[0] = U8V((v) ); \
33: (p)[1] = U8V((v) >> 8); \
34: (p)[2] = U8V((v) >> 16); \
35: (p)[3] = U8V((v) >> 24); \
36: } while (0)
37:
38: #define ROTATE(v,c) (ROTL32(v,c))
39: #define XOR(v,w) ((v) ^ (w))
40: #define PLUS(v,w) (U32V((v) + (w)))
41: #define PLUSONE(v) (PLUS((v),1))
42:
43: #define QUARTERROUND(a,b,c,d) \
44: a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
45: c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
46: a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
47: c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
48:
49: static const char sigma[16] = "expand 32-byte k";
50: static const char tau[16] = "expand 16-byte k";
51:
52: void
53: chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
54: {
55: const char *constants;
56:
57: x->input[4] = U8TO32_LITTLE(k + 0);
58: x->input[5] = U8TO32_LITTLE(k + 4);
59: x->input[6] = U8TO32_LITTLE(k + 8);
60: x->input[7] = U8TO32_LITTLE(k + 12);
61: if (kbits == 256) { /* recommended */
62: k += 16;
63: constants = sigma;
64: } else { /* kbits == 128 */
65: constants = tau;
66: }
67: x->input[8] = U8TO32_LITTLE(k + 0);
68: x->input[9] = U8TO32_LITTLE(k + 4);
69: x->input[10] = U8TO32_LITTLE(k + 8);
70: x->input[11] = U8TO32_LITTLE(k + 12);
71: x->input[0] = U8TO32_LITTLE(constants + 0);
72: x->input[1] = U8TO32_LITTLE(constants + 4);
73: x->input[2] = U8TO32_LITTLE(constants + 8);
74: x->input[3] = U8TO32_LITTLE(constants + 12);
75: }
76:
77: void
78: chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
79: {
80: x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
81: x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
82: x->input[14] = U8TO32_LITTLE(iv + 0);
83: x->input[15] = U8TO32_LITTLE(iv + 4);
84: }
85:
86: void
87: chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
88: {
89: u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
90: u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
91: u8 *ctarget = NULL;
92: u8 tmp[64];
93: u_int i;
94:
95: if (!bytes) return;
96:
97: j0 = x->input[0];
98: j1 = x->input[1];
99: j2 = x->input[2];
100: j3 = x->input[3];
101: j4 = x->input[4];
102: j5 = x->input[5];
103: j6 = x->input[6];
104: j7 = x->input[7];
105: j8 = x->input[8];
106: j9 = x->input[9];
107: j10 = x->input[10];
108: j11 = x->input[11];
109: j12 = x->input[12];
110: j13 = x->input[13];
111: j14 = x->input[14];
112: j15 = x->input[15];
113:
114: for (;;) {
115: if (bytes < 64) {
116: for (i = 0;i < bytes;++i) tmp[i] = m[i];
117: m = tmp;
118: ctarget = c;
119: c = tmp;
120: }
121: x0 = j0;
122: x1 = j1;
123: x2 = j2;
124: x3 = j3;
125: x4 = j4;
126: x5 = j5;
127: x6 = j6;
128: x7 = j7;
129: x8 = j8;
130: x9 = j9;
131: x10 = j10;
132: x11 = j11;
133: x12 = j12;
134: x13 = j13;
135: x14 = j14;
136: x15 = j15;
137: for (i = 20;i > 0;i -= 2) {
138: QUARTERROUND( x0, x4, x8,x12)
139: QUARTERROUND( x1, x5, x9,x13)
140: QUARTERROUND( x2, x6,x10,x14)
141: QUARTERROUND( x3, x7,x11,x15)
142: QUARTERROUND( x0, x5,x10,x15)
143: QUARTERROUND( x1, x6,x11,x12)
144: QUARTERROUND( x2, x7, x8,x13)
145: QUARTERROUND( x3, x4, x9,x14)
146: }
147: x0 = PLUS(x0,j0);
148: x1 = PLUS(x1,j1);
149: x2 = PLUS(x2,j2);
150: x3 = PLUS(x3,j3);
151: x4 = PLUS(x4,j4);
152: x5 = PLUS(x5,j5);
153: x6 = PLUS(x6,j6);
154: x7 = PLUS(x7,j7);
155: x8 = PLUS(x8,j8);
156: x9 = PLUS(x9,j9);
157: x10 = PLUS(x10,j10);
158: x11 = PLUS(x11,j11);
159: x12 = PLUS(x12,j12);
160: x13 = PLUS(x13,j13);
161: x14 = PLUS(x14,j14);
162: x15 = PLUS(x15,j15);
163:
164: x0 = XOR(x0,U8TO32_LITTLE(m + 0));
165: x1 = XOR(x1,U8TO32_LITTLE(m + 4));
166: x2 = XOR(x2,U8TO32_LITTLE(m + 8));
167: x3 = XOR(x3,U8TO32_LITTLE(m + 12));
168: x4 = XOR(x4,U8TO32_LITTLE(m + 16));
169: x5 = XOR(x5,U8TO32_LITTLE(m + 20));
170: x6 = XOR(x6,U8TO32_LITTLE(m + 24));
171: x7 = XOR(x7,U8TO32_LITTLE(m + 28));
172: x8 = XOR(x8,U8TO32_LITTLE(m + 32));
173: x9 = XOR(x9,U8TO32_LITTLE(m + 36));
174: x10 = XOR(x10,U8TO32_LITTLE(m + 40));
175: x11 = XOR(x11,U8TO32_LITTLE(m + 44));
176: x12 = XOR(x12,U8TO32_LITTLE(m + 48));
177: x13 = XOR(x13,U8TO32_LITTLE(m + 52));
178: x14 = XOR(x14,U8TO32_LITTLE(m + 56));
179: x15 = XOR(x15,U8TO32_LITTLE(m + 60));
180:
181: j12 = PLUSONE(j12);
182: if (!j12) {
183: j13 = PLUSONE(j13);
184: /* stopping at 2^70 bytes per nonce is user's responsibility */
185: }
186:
187: U32TO8_LITTLE(c + 0,x0);
188: U32TO8_LITTLE(c + 4,x1);
189: U32TO8_LITTLE(c + 8,x2);
190: U32TO8_LITTLE(c + 12,x3);
191: U32TO8_LITTLE(c + 16,x4);
192: U32TO8_LITTLE(c + 20,x5);
193: U32TO8_LITTLE(c + 24,x6);
194: U32TO8_LITTLE(c + 28,x7);
195: U32TO8_LITTLE(c + 32,x8);
196: U32TO8_LITTLE(c + 36,x9);
197: U32TO8_LITTLE(c + 40,x10);
198: U32TO8_LITTLE(c + 44,x11);
199: U32TO8_LITTLE(c + 48,x12);
200: U32TO8_LITTLE(c + 52,x13);
201: U32TO8_LITTLE(c + 56,x14);
202: U32TO8_LITTLE(c + 60,x15);
203:
204: if (bytes <= 64) {
205: if (bytes < 64) {
206: for (i = 0;i < bytes;++i) ctarget[i] = c[i];
207: }
208: x->input[12] = j12;
209: x->input[13] = j13;
210: return;
211: }
212: bytes -= 64;
213: c += 64;
214: m += 64;
215: }
216: }