Annotation of src/usr.bin/ssh/umac.c, Revision 1.1
1.1 ! pvalchev 1: /* $OpenBSD$ */
! 2: /* -----------------------------------------------------------------------
! 3: *
! 4: * umac.c -- C Implementation UMAC Message Authentication
! 5: *
! 6: * Version 0.93b of rfc4418.txt -- 2006 July 18
! 7: *
! 8: * For a full description of UMAC message authentication see the UMAC
! 9: * world-wide-web page at http://www.cs.ucdavis.edu/~rogaway/umac
! 10: * Please report bugs and suggestions to the UMAC webpage.
! 11: *
! 12: * Copyright (c) 1999-2006 Ted Krovetz
! 13: *
! 14: * Permission to use, copy, modify, and distribute this software and
! 15: * its documentation for any purpose and with or without fee, is hereby
! 16: * granted provided that the above copyright notice appears in all copies
! 17: * and in supporting documentation, and that the name of the copyright
! 18: * holder not be used in advertising or publicity pertaining to
! 19: * distribution of the software without specific, written prior permission.
! 20: *
! 21: * Comments should be directed to Ted Krovetz (tdk@acm.org)
! 22: *
! 23: * ---------------------------------------------------------------------- */
! 24:
! 25: /* ////////////////////// IMPORTANT NOTES /////////////////////////////////
! 26: *
! 27: * 1) This version does not work properly on messages larger than 16MB
! 28: *
! 29: * 2) If you set the switch to use SSE2, then all data must be 16-byte
! 30: * aligned
! 31: *
! 32: * 3) When calling the function umac(), it is assumed that msg is in
! 33: * a writable buffer of length divisible by 32 bytes. The message itself
! 34: * does not have to fill the entire buffer, but bytes beyond msg may be
! 35: * zeroed.
! 36: *
! 37: * 4) Three free AES implementations are supported by this implementation of
! 38: * UMAC. Paulo Barreto's version is in the public domain and can be found
! 39: * at http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ (search for
! 40: * "Barreto"). The only two files needed are rijndael-alg-fst.c and
! 41: * rijndael-alg-fst.h. Brian Gladman's version is distributed with the GNU
! 42: * Public lisence at http://fp.gladman.plus.com/AES/index.htm. It
! 43: * includes a fast IA-32 assembly version. The OpenSSL crypo library is
! 44: * the third.
! 45: *
! 46: * 5) With FORCE_C_ONLY flags set to 0, incorrect results are sometimes
! 47: * produced under gcc with optimizations set -O3 or higher. Dunno why.
! 48: *
! 49: /////////////////////////////////////////////////////////////////////// */
! 50:
! 51: /* ---------------------------------------------------------------------- */
! 52: /* --- User Switches ---------------------------------------------------- */
! 53: /* ---------------------------------------------------------------------- */
! 54:
! 55: #define UMAC_OUTPUT_LEN 8 /* Alowable: 4, 8, 12, 16 */
! 56: /* #define FORCE_C_ONLY 1 ANSI C and 64-bit integers req'd */
! 57: /* #define AES_IMPLEMENTAION 1 1 = OpenSSL, 2 = Barreto, 3 = Gladman */
! 58: /* #define SSE2 0 Is SSE2 is available? */
! 59: /* #define RUN_TESTS 0 Run basic correctness/speed tests */
! 60: /* #define UMAC_AE_SUPPORT 0 Enable auhthenticated encrytion */
! 61:
! 62: /* ---------------------------------------------------------------------- */
! 63: /* -- Global Includes --------------------------------------------------- */
! 64: /* ---------------------------------------------------------------------- */
! 65:
! 66: #include <sys/types.h>
! 67: #include <sys/endian.h>
! 68:
! 69: #include "umac.h"
! 70: #include <string.h>
! 71: #include <stdlib.h>
! 72: #include <stddef.h>
! 73:
! 74: /* ---------------------------------------------------------------------- */
! 75: /* --- Primitive Data Types --- */
! 76: /* ---------------------------------------------------------------------- */
! 77:
! 78: /* The following assumptions may need change on your system */
! 79: typedef u_int8_t UINT8; /* 1 byte */
! 80: typedef u_int16_t UINT16; /* 2 byte */
! 81: typedef u_int32_t UINT32; /* 4 byte */
! 82: typedef u_int64_t UINT64; /* 8 bytes */
! 83: typedef unsigned int UWORD; /* Register */
! 84:
! 85: /* ---------------------------------------------------------------------- */
! 86: /* --- Constants -------------------------------------------------------- */
! 87: /* ---------------------------------------------------------------------- */
! 88:
! 89: #define UMAC_KEY_LEN 16 /* UMAC takes 16 bytes of external key */
! 90:
! 91: /* Message "words" are read from memory in an endian-specific manner. */
! 92: /* For this implementation to behave correctly, __LITTLE_ENDIAN__ must */
! 93: /* be set true if the host computer is little-endian. */
! 94:
! 95: #if BYTE_ORDER == LITTLE_ENDIAN
! 96: #define __LITTLE_ENDIAN__ 1
! 97: #else
! 98: #define __LITTLE_ENDIAN__ 0
! 99: #endif
! 100:
! 101: /* ---------------------------------------------------------------------- */
! 102: /* ---------------------------------------------------------------------- */
! 103: /* ----- Architecture Specific ------------------------------------------ */
! 104: /* ---------------------------------------------------------------------- */
! 105: /* ---------------------------------------------------------------------- */
! 106:
! 107:
! 108: /* ---------------------------------------------------------------------- */
! 109: /* ---------------------------------------------------------------------- */
! 110: /* ----- Primitive Routines --------------------------------------------- */
! 111: /* ---------------------------------------------------------------------- */
! 112: /* ---------------------------------------------------------------------- */
! 113:
! 114:
! 115: /* ---------------------------------------------------------------------- */
! 116: /* --- 32-bit by 32-bit to 64-bit Multiplication ------------------------ */
! 117: /* ---------------------------------------------------------------------- */
! 118:
! 119: #define MUL64(a,b) ((UINT64)((UINT64)(UINT32)(a) * (UINT64)(UINT32)(b)))
! 120:
! 121: /* ---------------------------------------------------------------------- */
! 122: /* --- Endian Conversion --- Forcing assembly on some platforms */
! 123: /* ---------------------------------------------------------------------- */
! 124:
! 125: #if 0
! 126: static UINT32 LOAD_UINT32_REVERSED(void *ptr)
! 127: {
! 128: UINT32 temp = *(UINT32 *)ptr;
! 129: temp = (temp >> 24) | ((temp & 0x00FF0000) >> 8 )
! 130: | ((temp & 0x0000FF00) << 8 ) | (temp << 24);
! 131: return (UINT32)temp;
! 132: }
! 133:
! 134: static void STORE_UINT32_REVERSED(void *ptr, UINT32 x)
! 135: {
! 136: UINT32 i = (UINT32)x;
! 137: *(UINT32 *)ptr = (i >> 24) | ((i & 0x00FF0000) >> 8 )
! 138: | ((i & 0x0000FF00) << 8 ) | (i << 24);
! 139: }
! 140: #endif
! 141:
! 142: /* The following definitions use the above reversal-primitives to do the right
! 143: * thing on endian specific load and stores.
! 144: */
! 145:
! 146: #define LOAD_UINT32_REVERSED(p) (swap32(*(UINT32 *)(p)))
! 147: #define STORE_UINT32_REVERSED(p,v) (*(UINT32 *)(p) = swap32(v))
! 148:
! 149: #if (__LITTLE_ENDIAN__)
! 150: #define LOAD_UINT32_LITTLE(ptr) (*(UINT32 *)(ptr))
! 151: #define STORE_UINT32_BIG(ptr,x) STORE_UINT32_REVERSED(ptr,x)
! 152: #else
! 153: #define LOAD_UINT32_LITTLE(ptr) LOAD_UINT32_REVERSED(ptr)
! 154: #define STORE_UINT32_BIG(ptr,x) (*(UINT32 *)(ptr) = (UINT32)(x))
! 155: #endif
! 156:
! 157:
! 158:
! 159: /* ---------------------------------------------------------------------- */
! 160: /* ---------------------------------------------------------------------- */
! 161: /* ----- Begin KDF & PDF Section ---------------------------------------- */
! 162: /* ---------------------------------------------------------------------- */
! 163: /* ---------------------------------------------------------------------- */
! 164:
! 165: /* UMAC uses AES with 16 byte block and key lengths */
! 166: #define AES_BLOCK_LEN 16
! 167:
! 168: /* OpenSSL's AES */
! 169: #include <openssl/aes.h>
! 170: typedef AES_KEY aes_int_key[1];
! 171: #define aes_encryption(in,out,int_key) \
! 172: AES_encrypt((u_char *)(in),(u_char *)(out),(AES_KEY *)int_key)
! 173: #define aes_key_setup(key,int_key) \
! 174: AES_set_encrypt_key((u_char *)(key),UMAC_KEY_LEN*8,int_key)
! 175:
! 176: /* The user-supplied UMAC key is stretched using AES in a counter
! 177: * mode to supply all random bits needed by UMAC. The kdf function takes
! 178: * an AES internal key representation 'key' and writes a stream of
! 179: * 'nbytes' bytes to the memory pointed at by 'buffer_ptr'. Each distinct
! 180: * 'ndx' causes a distinct byte stream.
! 181: */
! 182: static void kdf(void *buffer_ptr, aes_int_key key, UINT8 ndx, int nbytes)
! 183: {
! 184: UINT8 in_buf[AES_BLOCK_LEN] = {0};
! 185: UINT8 out_buf[AES_BLOCK_LEN];
! 186: UINT8 *dst_buf = (UINT8 *)buffer_ptr;
! 187: int i;
! 188:
! 189: /* Setup the initial value */
! 190: in_buf[AES_BLOCK_LEN-9] = ndx;
! 191: in_buf[AES_BLOCK_LEN-1] = i = 1;
! 192:
! 193: while (nbytes >= AES_BLOCK_LEN) {
! 194: aes_encryption(in_buf, out_buf, key);
! 195: memcpy(dst_buf,out_buf,AES_BLOCK_LEN);
! 196: in_buf[AES_BLOCK_LEN-1] = ++i;
! 197: nbytes -= AES_BLOCK_LEN;
! 198: dst_buf += AES_BLOCK_LEN;
! 199: }
! 200: if (nbytes) {
! 201: aes_encryption(in_buf, out_buf, key);
! 202: memcpy(dst_buf,out_buf,nbytes);
! 203: }
! 204: }
! 205:
! 206: /* The final UHASH result is XOR'd with the output of a pseudorandom
! 207: * function. Here, we use AES to generate random output and
! 208: * xor the appropriate bytes depending on the last bits of nonce.
! 209: * This scheme is optimized for sequential, increasing big-endian nonces.
! 210: */
! 211:
! 212: typedef struct {
! 213: UINT8 cache[AES_BLOCK_LEN]; /* Previous AES output is saved */
! 214: UINT8 nonce[AES_BLOCK_LEN]; /* The AES input making above cache */
! 215: aes_int_key prf_key; /* Expanded AES key for PDF */
! 216: } pdf_ctx;
! 217:
! 218: static void pdf_init(pdf_ctx *pc, aes_int_key prf_key)
! 219: {
! 220: UINT8 buf[UMAC_KEY_LEN];
! 221:
! 222: kdf(buf, prf_key, 0, UMAC_KEY_LEN);
! 223: aes_key_setup(buf, pc->prf_key);
! 224:
! 225: /* Initialize pdf and cache */
! 226: memset(pc->nonce, 0, sizeof(pc->nonce));
! 227: aes_encryption(pc->nonce, pc->cache, pc->prf_key);
! 228: }
! 229:
! 230: static void pdf_gen_xor(pdf_ctx *pc, UINT8 nonce[8], UINT8 buf[8])
! 231: {
! 232: /* 'ndx' indicates that we'll be using the 0th or 1st eight bytes
! 233: * of the AES output. If last time around we returned the ndx-1st
! 234: * element, then we may have the result in the cache already.
! 235: */
! 236:
! 237: #if (UMAC_OUTPUT_LEN == 4)
! 238: #define LOW_BIT_MASK 3
! 239: #elif (UMAC_OUTPUT_LEN == 8)
! 240: #define LOW_BIT_MASK 1
! 241: #elif (UMAC_OUTPUT_LEN > 8)
! 242: #define LOW_BIT_MASK 0
! 243: #endif
! 244:
! 245: UINT8 tmp_nonce_lo[4];
! 246: #if LOW_BIT_MASK != 0
! 247: int ndx = nonce[7] & LOW_BIT_MASK;
! 248: #endif
! 249: *(UINT32 *)tmp_nonce_lo = ((UINT32 *)nonce)[1];
! 250: tmp_nonce_lo[3] &= ~LOW_BIT_MASK; /* zero last bit */
! 251:
! 252: if ( (((UINT32 *)tmp_nonce_lo)[0] != ((UINT32 *)pc->nonce)[1]) ||
! 253: (((UINT32 *)nonce)[0] != ((UINT32 *)pc->nonce)[0]) )
! 254: {
! 255: ((UINT32 *)pc->nonce)[0] = ((UINT32 *)nonce)[0];
! 256: ((UINT32 *)pc->nonce)[1] = ((UINT32 *)tmp_nonce_lo)[0];
! 257: aes_encryption(pc->nonce, pc->cache, pc->prf_key);
! 258: }
! 259:
! 260: #if (UMAC_OUTPUT_LEN == 4)
! 261: *((UINT32 *)buf) ^= ((UINT32 *)pc->cache)[ndx];
! 262: #elif (UMAC_OUTPUT_LEN == 8)
! 263: *((UINT64 *)buf) ^= ((UINT64 *)pc->cache)[ndx];
! 264: #elif (UMAC_OUTPUT_LEN == 12)
! 265: ((UINT64 *)buf)[0] ^= ((UINT64 *)pc->cache)[0];
! 266: ((UINT32 *)buf)[2] ^= ((UINT32 *)pc->cache)[2];
! 267: #elif (UMAC_OUTPUT_LEN == 16)
! 268: ((UINT64 *)buf)[0] ^= ((UINT64 *)pc->cache)[0];
! 269: ((UINT64 *)buf)[1] ^= ((UINT64 *)pc->cache)[1];
! 270: #endif
! 271: }
! 272:
! 273: /* ---------------------------------------------------------------------- */
! 274: /* ---------------------------------------------------------------------- */
! 275: /* ----- Begin NH Hash Section ------------------------------------------ */
! 276: /* ---------------------------------------------------------------------- */
! 277: /* ---------------------------------------------------------------------- */
! 278:
! 279: /* The NH-based hash functions used in UMAC are described in the UMAC paper
! 280: * and specification, both of which can be found at the UMAC website.
! 281: * The interface to this implementation has two
! 282: * versions, one expects the entire message being hashed to be passed
! 283: * in a single buffer and returns the hash result immediately. The second
! 284: * allows the message to be passed in a sequence of buffers. In the
! 285: * muliple-buffer interface, the client calls the routine nh_update() as
! 286: * many times as necessary. When there is no more data to be fed to the
! 287: * hash, the client calls nh_final() which calculates the hash output.
! 288: * Before beginning another hash calculation the nh_reset() routine
! 289: * must be called. The single-buffer routine, nh(), is equivalent to
! 290: * the sequence of calls nh_update() and nh_final(); however it is
! 291: * optimized and should be prefered whenever the multiple-buffer interface
! 292: * is not necessary. When using either interface, it is the client's
! 293: * responsability to pass no more than L1_KEY_LEN bytes per hash result.
! 294: *
! 295: * The routine nh_init() initializes the nh_ctx data structure and
! 296: * must be called once, before any other PDF routine.
! 297: */
! 298:
! 299: /* The "nh_aux" routines do the actual NH hashing work. They
! 300: * expect buffers to be multiples of L1_PAD_BOUNDARY. These routines
! 301: * produce output for all STREAMS NH iterations in one call,
! 302: * allowing the parallel implementation of the streams.
! 303: */
! 304:
! 305: #define STREAMS (UMAC_OUTPUT_LEN / 4) /* Number of times hash is applied */
! 306: #define L1_KEY_LEN 1024 /* Internal key bytes */
! 307: #define L1_KEY_SHIFT 16 /* Toeplitz key shift between streams */
! 308: #define L1_PAD_BOUNDARY 32 /* pad message to boundary multiple */
! 309: #define ALLOC_BOUNDARY 16 /* Keep buffers aligned to this */
! 310: #define HASH_BUF_BYTES 64 /* nh_aux_hb buffer multiple */
! 311:
! 312: typedef struct {
! 313: UINT8 nh_key [L1_KEY_LEN + L1_KEY_SHIFT * (STREAMS - 1)]; /* NH Key */
! 314: UINT8 data [HASH_BUF_BYTES]; /* Incomming data buffer */
! 315: int next_data_empty; /* Bookeeping variable for data buffer. */
! 316: int bytes_hashed; /* Bytes (out of L1_KEY_LEN) incorperated. */
! 317: UINT64 state[STREAMS]; /* on-line state */
! 318: } nh_ctx;
! 319:
! 320:
! 321: #if (UMAC_OUTPUT_LEN == 4)
! 322:
! 323: static void nh_aux(void *kp, void *dp, void *hp, UINT32 dlen)
! 324: /* NH hashing primitive. Previous (partial) hash result is loaded and
! 325: * then stored via hp pointer. The length of the data pointed at by "dp",
! 326: * "dlen", is guaranteed to be divisible by L1_PAD_BOUNDARY (32). Key
! 327: * is expected to be endian compensated in memory at key setup.
! 328: */
! 329: {
! 330: UINT64 h;
! 331: UWORD c = dlen / 32;
! 332: UINT32 *k = (UINT32 *)kp;
! 333: UINT32 *d = (UINT32 *)dp;
! 334: UINT32 d0,d1,d2,d3,d4,d5,d6,d7;
! 335: UINT32 k0,k1,k2,k3,k4,k5,k6,k7;
! 336:
! 337: h = *((UINT64 *)hp);
! 338: do {
! 339: d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1);
! 340: d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3);
! 341: d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5);
! 342: d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7);
! 343: k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3);
! 344: k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7);
! 345: h += MUL64((k0 + d0), (k4 + d4));
! 346: h += MUL64((k1 + d1), (k5 + d5));
! 347: h += MUL64((k2 + d2), (k6 + d6));
! 348: h += MUL64((k3 + d3), (k7 + d7));
! 349:
! 350: d += 8;
! 351: k += 8;
! 352: } while (--c);
! 353: *((UINT64 *)hp) = h;
! 354: }
! 355:
! 356: #elif (UMAC_OUTPUT_LEN == 8)
! 357:
! 358: static void nh_aux(void *kp, void *dp, void *hp, UINT32 dlen)
! 359: /* Same as previous nh_aux, but two streams are handled in one pass,
! 360: * reading and writing 16 bytes of hash-state per call.
! 361: */
! 362: {
! 363: UINT64 h1,h2;
! 364: UWORD c = dlen / 32;
! 365: UINT32 *k = (UINT32 *)kp;
! 366: UINT32 *d = (UINT32 *)dp;
! 367: UINT32 d0,d1,d2,d3,d4,d5,d6,d7;
! 368: UINT32 k0,k1,k2,k3,k4,k5,k6,k7,
! 369: k8,k9,k10,k11;
! 370:
! 371: h1 = *((UINT64 *)hp);
! 372: h2 = *((UINT64 *)hp + 1);
! 373: k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3);
! 374: do {
! 375: d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1);
! 376: d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3);
! 377: d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5);
! 378: d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7);
! 379: k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7);
! 380: k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11);
! 381:
! 382: h1 += MUL64((k0 + d0), (k4 + d4));
! 383: h2 += MUL64((k4 + d0), (k8 + d4));
! 384:
! 385: h1 += MUL64((k1 + d1), (k5 + d5));
! 386: h2 += MUL64((k5 + d1), (k9 + d5));
! 387:
! 388: h1 += MUL64((k2 + d2), (k6 + d6));
! 389: h2 += MUL64((k6 + d2), (k10 + d6));
! 390:
! 391: h1 += MUL64((k3 + d3), (k7 + d7));
! 392: h2 += MUL64((k7 + d3), (k11 + d7));
! 393:
! 394: k0 = k8; k1 = k9; k2 = k10; k3 = k11;
! 395:
! 396: d += 8;
! 397: k += 8;
! 398: } while (--c);
! 399: ((UINT64 *)hp)[0] = h1;
! 400: ((UINT64 *)hp)[1] = h2;
! 401: }
! 402:
! 403: #elif (UMAC_OUTPUT_LEN == 12)
! 404:
! 405: static void nh_aux(void *kp, void *dp, void *hp, UINT32 dlen)
! 406: /* Same as previous nh_aux, but two streams are handled in one pass,
! 407: * reading and writing 24 bytes of hash-state per call.
! 408: */
! 409: {
! 410: UINT64 h1,h2,h3;
! 411: UWORD c = dlen / 32;
! 412: UINT32 *k = (UINT32 *)kp;
! 413: UINT32 *d = (UINT32 *)dp;
! 414: UINT32 d0,d1,d2,d3,d4,d5,d6,d7;
! 415: UINT32 k0,k1,k2,k3,k4,k5,k6,k7,
! 416: k8,k9,k10,k11,k12,k13,k14,k15;
! 417:
! 418: h1 = *((UINT64 *)hp);
! 419: h2 = *((UINT64 *)hp + 1);
! 420: h3 = *((UINT64 *)hp + 2);
! 421: k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3);
! 422: k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7);
! 423: do {
! 424: d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1);
! 425: d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3);
! 426: d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5);
! 427: d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7);
! 428: k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11);
! 429: k12 = *(k+12); k13 = *(k+13); k14 = *(k+14); k15 = *(k+15);
! 430:
! 431: h1 += MUL64((k0 + d0), (k4 + d4));
! 432: h2 += MUL64((k4 + d0), (k8 + d4));
! 433: h3 += MUL64((k8 + d0), (k12 + d4));
! 434:
! 435: h1 += MUL64((k1 + d1), (k5 + d5));
! 436: h2 += MUL64((k5 + d1), (k9 + d5));
! 437: h3 += MUL64((k9 + d1), (k13 + d5));
! 438:
! 439: h1 += MUL64((k2 + d2), (k6 + d6));
! 440: h2 += MUL64((k6 + d2), (k10 + d6));
! 441: h3 += MUL64((k10 + d2), (k14 + d6));
! 442:
! 443: h1 += MUL64((k3 + d3), (k7 + d7));
! 444: h2 += MUL64((k7 + d3), (k11 + d7));
! 445: h3 += MUL64((k11 + d3), (k15 + d7));
! 446:
! 447: k0 = k8; k1 = k9; k2 = k10; k3 = k11;
! 448: k4 = k12; k5 = k13; k6 = k14; k7 = k15;
! 449:
! 450: d += 8;
! 451: k += 8;
! 452: } while (--c);
! 453: ((UINT64 *)hp)[0] = h1;
! 454: ((UINT64 *)hp)[1] = h2;
! 455: ((UINT64 *)hp)[2] = h3;
! 456: }
! 457:
! 458: #elif (UMAC_OUTPUT_LEN == 16)
! 459:
! 460: static void nh_aux(void *kp, void *dp, void *hp, UINT32 dlen)
! 461: /* Same as previous nh_aux, but two streams are handled in one pass,
! 462: * reading and writing 24 bytes of hash-state per call.
! 463: */
! 464: {
! 465: UINT64 h1,h2,h3,h4;
! 466: UWORD c = dlen / 32;
! 467: UINT32 *k = (UINT32 *)kp;
! 468: UINT32 *d = (UINT32 *)dp;
! 469: UINT32 d0,d1,d2,d3,d4,d5,d6,d7;
! 470: UINT32 k0,k1,k2,k3,k4,k5,k6,k7,
! 471: k8,k9,k10,k11,k12,k13,k14,k15,
! 472: k16,k17,k18,k19;
! 473:
! 474: h1 = *((UINT64 *)hp);
! 475: h2 = *((UINT64 *)hp + 1);
! 476: h3 = *((UINT64 *)hp + 2);
! 477: h4 = *((UINT64 *)hp + 3);
! 478: k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3);
! 479: k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7);
! 480: do {
! 481: d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1);
! 482: d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3);
! 483: d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5);
! 484: d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7);
! 485: k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11);
! 486: k12 = *(k+12); k13 = *(k+13); k14 = *(k+14); k15 = *(k+15);
! 487: k16 = *(k+16); k17 = *(k+17); k18 = *(k+18); k19 = *(k+19);
! 488:
! 489: h1 += MUL64((k0 + d0), (k4 + d4));
! 490: h2 += MUL64((k4 + d0), (k8 + d4));
! 491: h3 += MUL64((k8 + d0), (k12 + d4));
! 492: h4 += MUL64((k12 + d0), (k16 + d4));
! 493:
! 494: h1 += MUL64((k1 + d1), (k5 + d5));
! 495: h2 += MUL64((k5 + d1), (k9 + d5));
! 496: h3 += MUL64((k9 + d1), (k13 + d5));
! 497: h4 += MUL64((k13 + d1), (k17 + d5));
! 498:
! 499: h1 += MUL64((k2 + d2), (k6 + d6));
! 500: h2 += MUL64((k6 + d2), (k10 + d6));
! 501: h3 += MUL64((k10 + d2), (k14 + d6));
! 502: h4 += MUL64((k14 + d2), (k18 + d6));
! 503:
! 504: h1 += MUL64((k3 + d3), (k7 + d7));
! 505: h2 += MUL64((k7 + d3), (k11 + d7));
! 506: h3 += MUL64((k11 + d3), (k15 + d7));
! 507: h4 += MUL64((k15 + d3), (k19 + d7));
! 508:
! 509: k0 = k8; k1 = k9; k2 = k10; k3 = k11;
! 510: k4 = k12; k5 = k13; k6 = k14; k7 = k15;
! 511: k8 = k16; k9 = k17; k10 = k18; k11 = k19;
! 512:
! 513: d += 8;
! 514: k += 8;
! 515: } while (--c);
! 516: ((UINT64 *)hp)[0] = h1;
! 517: ((UINT64 *)hp)[1] = h2;
! 518: ((UINT64 *)hp)[2] = h3;
! 519: ((UINT64 *)hp)[3] = h4;
! 520: }
! 521:
! 522: /* ---------------------------------------------------------------------- */
! 523: #endif /* UMAC_OUTPUT_LENGTH */
! 524: /* ---------------------------------------------------------------------- */
! 525:
! 526:
! 527: /* ---------------------------------------------------------------------- */
! 528:
! 529: static void nh_transform(nh_ctx *hc, UINT8 *buf, UINT32 nbytes)
! 530: /* This function is a wrapper for the primitive NH hash functions. It takes
! 531: * as argument "hc" the current hash context and a buffer which must be a
! 532: * multiple of L1_PAD_BOUNDARY. The key passed to nh_aux is offset
! 533: * appropriately according to how much message has been hashed already.
! 534: */
! 535: {
! 536: UINT8 *key;
! 537:
! 538: key = hc->nh_key + hc->bytes_hashed;
! 539: nh_aux(key, buf, hc->state, nbytes);
! 540: }
! 541:
! 542: /* ---------------------------------------------------------------------- */
! 543:
! 544: static void endian_convert(void *buf, UWORD bpw, UINT32 num_bytes)
! 545: /* We endian convert the keys on little-endian computers to */
! 546: /* compensate for the lack of big-endian memory reads during hashing. */
! 547: {
! 548: UWORD iters = num_bytes / bpw;
! 549: if (bpw == 4) {
! 550: UINT32 *p = (UINT32 *)buf;
! 551: do {
! 552: *p = LOAD_UINT32_REVERSED(p);
! 553: p++;
! 554: } while (--iters);
! 555: } else if (bpw == 8) {
! 556: UINT32 *p = (UINT32 *)buf;
! 557: UINT32 t;
! 558: do {
! 559: t = LOAD_UINT32_REVERSED(p+1);
! 560: p[1] = LOAD_UINT32_REVERSED(p);
! 561: p[0] = t;
! 562: p += 2;
! 563: } while (--iters);
! 564: }
! 565: }
! 566: #if (__LITTLE_ENDIAN__)
! 567: #define endian_convert_if_le(x,y,z) endian_convert((x),(y),(z))
! 568: #else
! 569: #define endian_convert_if_le(x,y,z) do{}while(0) /* Do nothing */
! 570: #endif
! 571:
! 572: /* ---------------------------------------------------------------------- */
! 573:
! 574: static void nh_reset(nh_ctx *hc)
! 575: /* Reset nh_ctx to ready for hashing of new data */
! 576: {
! 577: hc->bytes_hashed = 0;
! 578: hc->next_data_empty = 0;
! 579: hc->state[0] = 0;
! 580: #if (UMAC_OUTPUT_LEN >= 8)
! 581: hc->state[1] = 0;
! 582: #endif
! 583: #if (UMAC_OUTPUT_LEN >= 12)
! 584: hc->state[2] = 0;
! 585: #endif
! 586: #if (UMAC_OUTPUT_LEN == 16)
! 587: hc->state[3] = 0;
! 588: #endif
! 589:
! 590: }
! 591:
! 592: /* ---------------------------------------------------------------------- */
! 593:
! 594: static void nh_init(nh_ctx *hc, aes_int_key prf_key)
! 595: /* Generate nh_key, endian convert and reset to be ready for hashing. */
! 596: {
! 597: kdf(hc->nh_key, prf_key, 1, sizeof(hc->nh_key));
! 598: endian_convert_if_le(hc->nh_key, 4, sizeof(hc->nh_key));
! 599: nh_reset(hc);
! 600: }
! 601:
! 602: /* ---------------------------------------------------------------------- */
! 603:
! 604: static void nh_update(nh_ctx *hc, UINT8 *buf, UINT32 nbytes)
! 605: /* Incorporate nbytes of data into a nh_ctx, buffer whatever is not an */
! 606: /* even multiple of HASH_BUF_BYTES. */
! 607: {
! 608: UINT32 i,j;
! 609:
! 610: j = hc->next_data_empty;
! 611: if ((j + nbytes) >= HASH_BUF_BYTES) {
! 612: if (j) {
! 613: i = HASH_BUF_BYTES - j;
! 614: memcpy(hc->data+j, buf, i);
! 615: nh_transform(hc,hc->data,HASH_BUF_BYTES);
! 616: nbytes -= i;
! 617: buf += i;
! 618: hc->bytes_hashed += HASH_BUF_BYTES;
! 619: }
! 620: if (nbytes >= HASH_BUF_BYTES) {
! 621: i = nbytes & ~(HASH_BUF_BYTES - 1);
! 622: nh_transform(hc, buf, i);
! 623: nbytes -= i;
! 624: buf += i;
! 625: hc->bytes_hashed += i;
! 626: }
! 627: j = 0;
! 628: }
! 629: memcpy(hc->data + j, buf, nbytes);
! 630: hc->next_data_empty = j + nbytes;
! 631: }
! 632:
! 633: /* ---------------------------------------------------------------------- */
! 634:
! 635: static void zero_pad(UINT8 *p, int nbytes)
! 636: {
! 637: /* Write "nbytes" of zeroes, beginning at "p" */
! 638: if (nbytes >= (int)sizeof(UWORD)) {
! 639: while ((ptrdiff_t)p % sizeof(UWORD)) {
! 640: *p = 0;
! 641: nbytes--;
! 642: p++;
! 643: }
! 644: while (nbytes >= (int)sizeof(UWORD)) {
! 645: *(UWORD *)p = 0;
! 646: nbytes -= sizeof(UWORD);
! 647: p += sizeof(UWORD);
! 648: }
! 649: }
! 650: while (nbytes) {
! 651: *p = 0;
! 652: nbytes--;
! 653: p++;
! 654: }
! 655: }
! 656:
! 657: /* ---------------------------------------------------------------------- */
! 658:
! 659: static void nh_final(nh_ctx *hc, UINT8 *result)
! 660: /* After passing some number of data buffers to nh_update() for integration
! 661: * into an NH context, nh_final is called to produce a hash result. If any
! 662: * bytes are in the buffer hc->data, incorporate them into the
! 663: * NH context. Finally, add into the NH accumulation "state" the total number
! 664: * of bits hashed. The resulting numbers are written to the buffer "result".
! 665: * If nh_update was never called, L1_PAD_BOUNDARY zeroes are incorporated.
! 666: */
! 667: {
! 668: int nh_len, nbits;
! 669:
! 670: if (hc->next_data_empty != 0) {
! 671: nh_len = ((hc->next_data_empty + (L1_PAD_BOUNDARY - 1)) &
! 672: ~(L1_PAD_BOUNDARY - 1));
! 673: zero_pad(hc->data + hc->next_data_empty,
! 674: nh_len - hc->next_data_empty);
! 675: nh_transform(hc, hc->data, nh_len);
! 676: hc->bytes_hashed += hc->next_data_empty;
! 677: } else if (hc->bytes_hashed == 0) {
! 678: nh_len = L1_PAD_BOUNDARY;
! 679: zero_pad(hc->data, L1_PAD_BOUNDARY);
! 680: nh_transform(hc, hc->data, nh_len);
! 681: }
! 682:
! 683: nbits = (hc->bytes_hashed << 3);
! 684: ((UINT64 *)result)[0] = ((UINT64 *)hc->state)[0] + nbits;
! 685: #if (UMAC_OUTPUT_LEN >= 8)
! 686: ((UINT64 *)result)[1] = ((UINT64 *)hc->state)[1] + nbits;
! 687: #endif
! 688: #if (UMAC_OUTPUT_LEN >= 12)
! 689: ((UINT64 *)result)[2] = ((UINT64 *)hc->state)[2] + nbits;
! 690: #endif
! 691: #if (UMAC_OUTPUT_LEN == 16)
! 692: ((UINT64 *)result)[3] = ((UINT64 *)hc->state)[3] + nbits;
! 693: #endif
! 694: nh_reset(hc);
! 695: }
! 696:
! 697: /* ---------------------------------------------------------------------- */
! 698:
! 699: static void nh(nh_ctx *hc, UINT8 *buf, UINT32 padded_len,
! 700: UINT32 unpadded_len, UINT8 *result)
! 701: /* All-in-one nh_update() and nh_final() equivalent.
! 702: * Assumes that padded_len is divisible by L1_PAD_BOUNDARY and result is
! 703: * well aligned
! 704: */
! 705: {
! 706: UINT32 nbits;
! 707:
! 708: /* Initialize the hash state */
! 709: nbits = (unpadded_len << 3);
! 710:
! 711: ((UINT64 *)result)[0] = nbits;
! 712: #if (UMAC_OUTPUT_LEN >= 8)
! 713: ((UINT64 *)result)[1] = nbits;
! 714: #endif
! 715: #if (UMAC_OUTPUT_LEN >= 12)
! 716: ((UINT64 *)result)[2] = nbits;
! 717: #endif
! 718: #if (UMAC_OUTPUT_LEN == 16)
! 719: ((UINT64 *)result)[3] = nbits;
! 720: #endif
! 721:
! 722: nh_aux(hc->nh_key, buf, result, padded_len);
! 723: }
! 724:
! 725: /* ---------------------------------------------------------------------- */
! 726: /* ---------------------------------------------------------------------- */
! 727: /* ----- Begin UHASH Section -------------------------------------------- */
! 728: /* ---------------------------------------------------------------------- */
! 729: /* ---------------------------------------------------------------------- */
! 730:
! 731: /* UHASH is a multi-layered algorithm. Data presented to UHASH is first
! 732: * hashed by NH. The NH output is then hashed by a polynomial-hash layer
! 733: * unless the initial data to be hashed is short. After the polynomial-
! 734: * layer, an inner-product hash is used to produce the final UHASH output.
! 735: *
! 736: * UHASH provides two interfaces, one all-at-once and another where data
! 737: * buffers are presented sequentially. In the sequential interface, the
! 738: * UHASH client calls the routine uhash_update() as many times as necessary.
! 739: * When there is no more data to be fed to UHASH, the client calls
! 740: * uhash_final() which
! 741: * calculates the UHASH output. Before beginning another UHASH calculation
! 742: * the uhash_reset() routine must be called. The all-at-once UHASH routine,
! 743: * uhash(), is equivalent to the sequence of calls uhash_update() and
! 744: * uhash_final(); however it is optimized and should be
! 745: * used whenever the sequential interface is not necessary.
! 746: *
! 747: * The routine uhash_init() initializes the uhash_ctx data structure and
! 748: * must be called once, before any other UHASH routine.
! 749: */
! 750:
! 751: /* ---------------------------------------------------------------------- */
! 752: /* ----- Constants and uhash_ctx ---------------------------------------- */
! 753: /* ---------------------------------------------------------------------- */
! 754:
! 755: /* ---------------------------------------------------------------------- */
! 756: /* ----- Poly hash and Inner-Product hash Constants --------------------- */
! 757: /* ---------------------------------------------------------------------- */
! 758:
! 759: /* Primes and masks */
! 760: #define p36 ((UINT64)0x0000000FFFFFFFFBull) /* 2^36 - 5 */
! 761: #define p64 ((UINT64)0xFFFFFFFFFFFFFFC5ull) /* 2^64 - 59 */
! 762: #define m36 ((UINT64)0x0000000FFFFFFFFFull) /* The low 36 of 64 bits */
! 763:
! 764:
! 765: /* ---------------------------------------------------------------------- */
! 766:
! 767: typedef struct uhash_ctx {
! 768: nh_ctx hash; /* Hash context for L1 NH hash */
! 769: UINT64 poly_key_8[STREAMS]; /* p64 poly keys */
! 770: UINT64 poly_accum[STREAMS]; /* poly hash result */
! 771: UINT64 ip_keys[STREAMS*4]; /* Inner-product keys */
! 772: UINT32 ip_trans[STREAMS]; /* Inner-product translation */
! 773: UINT32 msg_len; /* Total length of data passed */
! 774: /* to uhash */
! 775: } uhash_ctx;
! 776: typedef struct uhash_ctx *uhash_ctx_t;
! 777:
! 778: /* ---------------------------------------------------------------------- */
! 779:
! 780:
! 781: /* The polynomial hashes use Horner's rule to evaluate a polynomial one
! 782: * word at a time. As described in the specification, poly32 and poly64
! 783: * require keys from special domains. The following implementations exploit
! 784: * the special domains to avoid overflow. The results are not guaranteed to
! 785: * be within Z_p32 and Z_p64, but the Inner-Product hash implementation
! 786: * patches any errant values.
! 787: */
! 788:
! 789: static UINT64 poly64(UINT64 cur, UINT64 key, UINT64 data)
! 790: {
! 791: UINT32 key_hi = (UINT32)(key >> 32),
! 792: key_lo = (UINT32)key,
! 793: cur_hi = (UINT32)(cur >> 32),
! 794: cur_lo = (UINT32)cur,
! 795: x_lo,
! 796: x_hi;
! 797: UINT64 X,T,res;
! 798:
! 799: X = MUL64(key_hi, cur_lo) + MUL64(cur_hi, key_lo);
! 800: x_lo = (UINT32)X;
! 801: x_hi = (UINT32)(X >> 32);
! 802:
! 803: res = (MUL64(key_hi, cur_hi) + x_hi) * 59 + MUL64(key_lo, cur_lo);
! 804:
! 805: T = ((UINT64)x_lo << 32);
! 806: res += T;
! 807: if (res < T)
! 808: res += 59;
! 809:
! 810: res += data;
! 811: if (res < data)
! 812: res += 59;
! 813:
! 814: return res;
! 815: }
! 816:
! 817:
! 818: /* Although UMAC is specified to use a ramped polynomial hash scheme, this
! 819: * implementation does not handle all ramp levels. Because we don't handle
! 820: * the ramp up to p128 modulus in this implementation, we are limited to
! 821: * 2^14 poly_hash() invocations per stream (for a total capacity of 2^24
! 822: * bytes input to UMAC per tag, ie. 16MB).
! 823: */
! 824: static void poly_hash(uhash_ctx_t hc, UINT32 data_in[])
! 825: {
! 826: int i;
! 827: UINT64 *data=(UINT64*)data_in;
! 828:
! 829: for (i = 0; i < STREAMS; i++) {
! 830: if ((UINT32)(data[i] >> 32) == 0xfffffffful) {
! 831: hc->poly_accum[i] = poly64(hc->poly_accum[i],
! 832: hc->poly_key_8[i], p64 - 1);
! 833: hc->poly_accum[i] = poly64(hc->poly_accum[i],
! 834: hc->poly_key_8[i], (data[i] - 59));
! 835: } else {
! 836: hc->poly_accum[i] = poly64(hc->poly_accum[i],
! 837: hc->poly_key_8[i], data[i]);
! 838: }
! 839: }
! 840: }
! 841:
! 842:
! 843: /* ---------------------------------------------------------------------- */
! 844:
! 845:
! 846: /* The final step in UHASH is an inner-product hash. The poly hash
! 847: * produces a result not neccesarily WORD_LEN bytes long. The inner-
! 848: * product hash breaks the polyhash output into 16-bit chunks and
! 849: * multiplies each with a 36 bit key.
! 850: */
! 851:
! 852: static UINT64 ip_aux(UINT64 t, UINT64 *ipkp, UINT64 data)
! 853: {
! 854: t = t + ipkp[0] * (UINT64)(UINT16)(data >> 48);
! 855: t = t + ipkp[1] * (UINT64)(UINT16)(data >> 32);
! 856: t = t + ipkp[2] * (UINT64)(UINT16)(data >> 16);
! 857: t = t + ipkp[3] * (UINT64)(UINT16)(data);
! 858:
! 859: return t;
! 860: }
! 861:
! 862: static UINT32 ip_reduce_p36(UINT64 t)
! 863: {
! 864: /* Divisionless modular reduction */
! 865: UINT64 ret;
! 866:
! 867: ret = (t & m36) + 5 * (t >> 36);
! 868: if (ret >= p36)
! 869: ret -= p36;
! 870:
! 871: /* return least significant 32 bits */
! 872: return (UINT32)(ret);
! 873: }
! 874:
! 875:
! 876: /* If the data being hashed by UHASH is no longer than L1_KEY_LEN, then
! 877: * the polyhash stage is skipped and ip_short is applied directly to the
! 878: * NH output.
! 879: */
! 880: static void ip_short(uhash_ctx_t ahc, UINT8 *nh_res, u_char *res)
! 881: {
! 882: UINT64 t;
! 883: UINT64 *nhp = (UINT64 *)nh_res;
! 884:
! 885: t = ip_aux(0,ahc->ip_keys, nhp[0]);
! 886: STORE_UINT32_BIG((UINT32 *)res+0, ip_reduce_p36(t) ^ ahc->ip_trans[0]);
! 887: #if (UMAC_OUTPUT_LEN >= 8)
! 888: t = ip_aux(0,ahc->ip_keys+4, nhp[1]);
! 889: STORE_UINT32_BIG((UINT32 *)res+1, ip_reduce_p36(t) ^ ahc->ip_trans[1]);
! 890: #endif
! 891: #if (UMAC_OUTPUT_LEN >= 12)
! 892: t = ip_aux(0,ahc->ip_keys+8, nhp[2]);
! 893: STORE_UINT32_BIG((UINT32 *)res+2, ip_reduce_p36(t) ^ ahc->ip_trans[2]);
! 894: #endif
! 895: #if (UMAC_OUTPUT_LEN == 16)
! 896: t = ip_aux(0,ahc->ip_keys+12, nhp[3]);
! 897: STORE_UINT32_BIG((UINT32 *)res+3, ip_reduce_p36(t) ^ ahc->ip_trans[3]);
! 898: #endif
! 899: }
! 900:
! 901: /* If the data being hashed by UHASH is longer than L1_KEY_LEN, then
! 902: * the polyhash stage is not skipped and ip_long is applied to the
! 903: * polyhash output.
! 904: */
! 905: static void ip_long(uhash_ctx_t ahc, u_char *res)
! 906: {
! 907: int i;
! 908: UINT64 t;
! 909:
! 910: for (i = 0; i < STREAMS; i++) {
! 911: /* fix polyhash output not in Z_p64 */
! 912: if (ahc->poly_accum[i] >= p64)
! 913: ahc->poly_accum[i] -= p64;
! 914: t = ip_aux(0,ahc->ip_keys+(i*4), ahc->poly_accum[i]);
! 915: STORE_UINT32_BIG((UINT32 *)res+i,
! 916: ip_reduce_p36(t) ^ ahc->ip_trans[i]);
! 917: }
! 918: }
! 919:
! 920:
! 921: /* ---------------------------------------------------------------------- */
! 922:
! 923: /* ---------------------------------------------------------------------- */
! 924:
! 925: /* Reset uhash context for next hash session */
! 926: static int uhash_reset(uhash_ctx_t pc)
! 927: {
! 928: nh_reset(&pc->hash);
! 929: pc->msg_len = 0;
! 930: pc->poly_accum[0] = 1;
! 931: #if (UMAC_OUTPUT_LEN >= 8)
! 932: pc->poly_accum[1] = 1;
! 933: #endif
! 934: #if (UMAC_OUTPUT_LEN >= 12)
! 935: pc->poly_accum[2] = 1;
! 936: #endif
! 937: #if (UMAC_OUTPUT_LEN == 16)
! 938: pc->poly_accum[3] = 1;
! 939: #endif
! 940: return 1;
! 941: }
! 942:
! 943: /* ---------------------------------------------------------------------- */
! 944:
! 945: /* Given a pointer to the internal key needed by kdf() and a uhash context,
! 946: * initialize the NH context and generate keys needed for poly and inner-
! 947: * product hashing. All keys are endian adjusted in memory so that native
! 948: * loads cause correct keys to be in registers during calculation.
! 949: */
! 950: static void uhash_init(uhash_ctx_t ahc, aes_int_key prf_key)
! 951: {
! 952: int i;
! 953: UINT8 buf[(8*STREAMS+4)*sizeof(UINT64)];
! 954:
! 955: /* Zero the entire uhash context */
! 956: memset(ahc, 0, sizeof(uhash_ctx));
! 957:
! 958: /* Initialize the L1 hash */
! 959: nh_init(&ahc->hash, prf_key);
! 960:
! 961: /* Setup L2 hash variables */
! 962: kdf(buf, prf_key, 2, sizeof(buf)); /* Fill buffer with index 1 key */
! 963: for (i = 0; i < STREAMS; i++) {
! 964: /* Fill keys from the buffer, skipping bytes in the buffer not
! 965: * used by this implementation. Endian reverse the keys if on a
! 966: * little-endian computer.
! 967: */
! 968: memcpy(ahc->poly_key_8+i, buf+24*i, 8);
! 969: endian_convert_if_le(ahc->poly_key_8+i, 8, 8);
! 970: /* Mask the 64-bit keys to their special domain */
! 971: ahc->poly_key_8[i] &= ((UINT64)0x01ffffffu << 32) + 0x01ffffffu;
! 972: ahc->poly_accum[i] = 1; /* Our polyhash prepends a non-zero word */
! 973: }
! 974:
! 975: /* Setup L3-1 hash variables */
! 976: kdf(buf, prf_key, 3, sizeof(buf)); /* Fill buffer with index 2 key */
! 977: for (i = 0; i < STREAMS; i++)
! 978: memcpy(ahc->ip_keys+4*i, buf+(8*i+4)*sizeof(UINT64),
! 979: 4*sizeof(UINT64));
! 980: endian_convert_if_le(ahc->ip_keys, sizeof(UINT64),
! 981: sizeof(ahc->ip_keys));
! 982: for (i = 0; i < STREAMS*4; i++)
! 983: ahc->ip_keys[i] %= p36; /* Bring into Z_p36 */
! 984:
! 985: /* Setup L3-2 hash variables */
! 986: /* Fill buffer with index 4 key */
! 987: kdf(ahc->ip_trans, prf_key, 4, STREAMS * sizeof(UINT32));
! 988: endian_convert_if_le(ahc->ip_trans, sizeof(UINT32),
! 989: STREAMS * sizeof(UINT32));
! 990: }
! 991:
! 992: /* ---------------------------------------------------------------------- */
! 993:
! 994: #if 0
! 995: static uhash_ctx_t uhash_alloc(u_char key[])
! 996: {
! 997: /* Allocate memory and force to a 16-byte boundary. */
! 998: uhash_ctx_t ctx;
! 999: u_char bytes_to_add;
! 1000: aes_int_key prf_key;
! 1001:
! 1002: ctx = (uhash_ctx_t)malloc(sizeof(uhash_ctx)+ALLOC_BOUNDARY);
! 1003: if (ctx) {
! 1004: if (ALLOC_BOUNDARY) {
! 1005: bytes_to_add = ALLOC_BOUNDARY -
! 1006: ((ptrdiff_t)ctx & (ALLOC_BOUNDARY -1));
! 1007: ctx = (uhash_ctx_t)((u_char *)ctx + bytes_to_add);
! 1008: *((u_char *)ctx - 1) = bytes_to_add;
! 1009: }
! 1010: aes_key_setup(key,prf_key);
! 1011: uhash_init(ctx, prf_key);
! 1012: }
! 1013: return (ctx);
! 1014: }
! 1015: #endif
! 1016:
! 1017: /* ---------------------------------------------------------------------- */
! 1018:
! 1019: #if 0
! 1020: static int uhash_free(uhash_ctx_t ctx)
! 1021: {
! 1022: /* Free memory allocated by uhash_alloc */
! 1023: u_char bytes_to_sub;
! 1024:
! 1025: if (ctx) {
! 1026: if (ALLOC_BOUNDARY) {
! 1027: bytes_to_sub = *((u_char *)ctx - 1);
! 1028: ctx = (uhash_ctx_t)((u_char *)ctx - bytes_to_sub);
! 1029: }
! 1030: free(ctx);
! 1031: }
! 1032: return (1);
! 1033: }
! 1034: #endif
! 1035: /* ---------------------------------------------------------------------- */
! 1036:
! 1037: static int uhash_update(uhash_ctx_t ctx, u_char *input, long len)
! 1038: /* Given len bytes of data, we parse it into L1_KEY_LEN chunks and
! 1039: * hash each one with NH, calling the polyhash on each NH output.
! 1040: */
! 1041: {
! 1042: UWORD bytes_hashed, bytes_remaining;
! 1043: UINT8 nh_result[STREAMS*sizeof(UINT64)];
! 1044:
! 1045: if (ctx->msg_len + len <= L1_KEY_LEN) {
! 1046: nh_update(&ctx->hash, (UINT8 *)input, len);
! 1047: ctx->msg_len += len;
! 1048: } else {
! 1049:
! 1050: bytes_hashed = ctx->msg_len % L1_KEY_LEN;
! 1051: if (ctx->msg_len == L1_KEY_LEN)
! 1052: bytes_hashed = L1_KEY_LEN;
! 1053:
! 1054: if (bytes_hashed + len >= L1_KEY_LEN) {
! 1055:
! 1056: /* If some bytes have been passed to the hash function */
! 1057: /* then we want to pass at most (L1_KEY_LEN - bytes_hashed) */
! 1058: /* bytes to complete the current nh_block. */
! 1059: if (bytes_hashed) {
! 1060: bytes_remaining = (L1_KEY_LEN - bytes_hashed);
! 1061: nh_update(&ctx->hash, (UINT8 *)input, bytes_remaining);
! 1062: nh_final(&ctx->hash, nh_result);
! 1063: ctx->msg_len += bytes_remaining;
! 1064: poly_hash(ctx,(UINT32 *)nh_result);
! 1065: len -= bytes_remaining;
! 1066: input += bytes_remaining;
! 1067: }
! 1068:
! 1069: /* Hash directly from input stream if enough bytes */
! 1070: while (len >= L1_KEY_LEN) {
! 1071: nh(&ctx->hash, (UINT8 *)input, L1_KEY_LEN,
! 1072: L1_KEY_LEN, nh_result);
! 1073: ctx->msg_len += L1_KEY_LEN;
! 1074: len -= L1_KEY_LEN;
! 1075: input += L1_KEY_LEN;
! 1076: poly_hash(ctx,(UINT32 *)nh_result);
! 1077: }
! 1078: }
! 1079:
! 1080: /* pass remaining < L1_KEY_LEN bytes of input data to NH */
! 1081: if (len) {
! 1082: nh_update(&ctx->hash, (UINT8 *)input, len);
! 1083: ctx->msg_len += len;
! 1084: }
! 1085: }
! 1086:
! 1087: return (1);
! 1088: }
! 1089:
! 1090: /* ---------------------------------------------------------------------- */
! 1091:
! 1092: static int uhash_final(uhash_ctx_t ctx, u_char *res)
! 1093: /* Incorporate any pending data, pad, and generate tag */
! 1094: {
! 1095: UINT8 nh_result[STREAMS*sizeof(UINT64)];
! 1096:
! 1097: if (ctx->msg_len > L1_KEY_LEN) {
! 1098: if (ctx->msg_len % L1_KEY_LEN) {
! 1099: nh_final(&ctx->hash, nh_result);
! 1100: poly_hash(ctx,(UINT32 *)nh_result);
! 1101: }
! 1102: ip_long(ctx, res);
! 1103: } else {
! 1104: nh_final(&ctx->hash, nh_result);
! 1105: ip_short(ctx,nh_result, res);
! 1106: }
! 1107: uhash_reset(ctx);
! 1108: return (1);
! 1109: }
! 1110:
! 1111: /* ---------------------------------------------------------------------- */
! 1112:
! 1113: #if 0
! 1114: static int uhash(uhash_ctx_t ahc, u_char *msg, long len, u_char *res)
! 1115: /* assumes that msg is in a writable buffer of length divisible by */
! 1116: /* L1_PAD_BOUNDARY. Bytes beyond msg[len] may be zeroed. */
! 1117: {
! 1118: UINT8 nh_result[STREAMS*sizeof(UINT64)];
! 1119: UINT32 nh_len;
! 1120: int extra_zeroes_needed;
! 1121:
! 1122: /* If the message to be hashed is no longer than L1_HASH_LEN, we skip
! 1123: * the polyhash.
! 1124: */
! 1125: if (len <= L1_KEY_LEN) {
! 1126: if (len == 0) /* If zero length messages will not */
! 1127: nh_len = L1_PAD_BOUNDARY; /* be seen, comment out this case */
! 1128: else
! 1129: nh_len = ((len + (L1_PAD_BOUNDARY - 1)) & ~(L1_PAD_BOUNDARY - 1));
! 1130: extra_zeroes_needed = nh_len - len;
! 1131: zero_pad((UINT8 *)msg + len, extra_zeroes_needed);
! 1132: nh(&ahc->hash, (UINT8 *)msg, nh_len, len, nh_result);
! 1133: ip_short(ahc,nh_result, res);
! 1134: } else {
! 1135: /* Otherwise, we hash each L1_KEY_LEN chunk with NH, passing the NH
! 1136: * output to poly_hash().
! 1137: */
! 1138: do {
! 1139: nh(&ahc->hash, (UINT8 *)msg, L1_KEY_LEN, L1_KEY_LEN, nh_result);
! 1140: poly_hash(ahc,(UINT32 *)nh_result);
! 1141: len -= L1_KEY_LEN;
! 1142: msg += L1_KEY_LEN;
! 1143: } while (len >= L1_KEY_LEN);
! 1144: if (len) {
! 1145: nh_len = ((len + (L1_PAD_BOUNDARY - 1)) & ~(L1_PAD_BOUNDARY - 1));
! 1146: extra_zeroes_needed = nh_len - len;
! 1147: zero_pad((UINT8 *)msg + len, extra_zeroes_needed);
! 1148: nh(&ahc->hash, (UINT8 *)msg, nh_len, len, nh_result);
! 1149: poly_hash(ahc,(UINT32 *)nh_result);
! 1150: }
! 1151:
! 1152: ip_long(ahc, res);
! 1153: }
! 1154:
! 1155: uhash_reset(ahc);
! 1156: return 1;
! 1157: }
! 1158: #endif
! 1159:
! 1160: /* ---------------------------------------------------------------------- */
! 1161: /* ---------------------------------------------------------------------- */
! 1162: /* ----- Begin UMAC Section --------------------------------------------- */
! 1163: /* ---------------------------------------------------------------------- */
! 1164: /* ---------------------------------------------------------------------- */
! 1165:
! 1166: /* The UMAC interface has two interfaces, an all-at-once interface where
! 1167: * the entire message to be authenticated is passed to UMAC in one buffer,
! 1168: * and a sequential interface where the message is presented a little at a
! 1169: * time. The all-at-once is more optimaized than the sequential version and
! 1170: * should be preferred when the sequential interface is not required.
! 1171: */
! 1172: struct umac_ctx {
! 1173: uhash_ctx hash; /* Hash function for message compression */
! 1174: pdf_ctx pdf; /* PDF for hashed output */
! 1175: void *free_ptr; /* Address to free this struct via */
! 1176: } umac_ctx;
! 1177:
! 1178: /* ---------------------------------------------------------------------- */
! 1179:
! 1180: #if 0
! 1181: int umac_reset(struct umac_ctx *ctx)
! 1182: /* Reset the hash function to begin a new authentication. */
! 1183: {
! 1184: uhash_reset(&ctx->hash);
! 1185: return (1);
! 1186: }
! 1187: #endif
! 1188:
! 1189: /* ---------------------------------------------------------------------- */
! 1190:
! 1191: int umac_delete(struct umac_ctx *ctx)
! 1192: /* Deallocate the ctx structure */
! 1193: {
! 1194: if (ctx) {
! 1195: if (ALLOC_BOUNDARY)
! 1196: ctx = (struct umac_ctx *)ctx->free_ptr;
! 1197: free(ctx);
! 1198: }
! 1199: return (1);
! 1200: }
! 1201:
! 1202: /* ---------------------------------------------------------------------- */
! 1203:
! 1204: struct umac_ctx *umac_new(u_char key[])
! 1205: /* Dynamically allocate a umac_ctx struct, initialize variables,
! 1206: * generate subkeys from key. Align to 16-byte boundary.
! 1207: */
! 1208: {
! 1209: struct umac_ctx *ctx, *octx;
! 1210: size_t bytes_to_add;
! 1211: aes_int_key prf_key;
! 1212:
! 1213: octx = ctx = malloc(sizeof(*ctx) + ALLOC_BOUNDARY);
! 1214: if (ctx) {
! 1215: if (ALLOC_BOUNDARY) {
! 1216: bytes_to_add = ALLOC_BOUNDARY -
! 1217: ((ptrdiff_t)ctx & (ALLOC_BOUNDARY - 1));
! 1218: ctx = (struct umac_ctx *)((u_char *)ctx + bytes_to_add);
! 1219: }
! 1220: ctx->free_ptr = octx;
! 1221: aes_key_setup(key,prf_key);
! 1222: pdf_init(&ctx->pdf, prf_key);
! 1223: uhash_init(&ctx->hash, prf_key);
! 1224: }
! 1225:
! 1226: return (ctx);
! 1227: }
! 1228:
! 1229: /* ---------------------------------------------------------------------- */
! 1230:
! 1231: int umac_final(struct umac_ctx *ctx, u_char tag[], u_char nonce[8])
! 1232: /* Incorporate any pending data, pad, and generate tag */
! 1233: {
! 1234: uhash_final(&ctx->hash, (u_char *)tag);
! 1235: pdf_gen_xor(&ctx->pdf, (UINT8 *)nonce, (UINT8 *)tag);
! 1236:
! 1237: return (1);
! 1238: }
! 1239:
! 1240: /* ---------------------------------------------------------------------- */
! 1241:
! 1242: int umac_update(struct umac_ctx *ctx, u_char *input, long len)
! 1243: /* Given len bytes of data, we parse it into L1_KEY_LEN chunks and */
! 1244: /* hash each one, calling the PDF on the hashed output whenever the hash- */
! 1245: /* output buffer is full. */
! 1246: {
! 1247: uhash_update(&ctx->hash, input, len);
! 1248: return (1);
! 1249: }
! 1250:
! 1251: /* ---------------------------------------------------------------------- */
! 1252:
! 1253: #if 0
! 1254: int umac(struct umac_ctx *ctx, u_char *input,
! 1255: long len, u_char tag[],
! 1256: u_char nonce[8])
! 1257: /* All-in-one version simply calls umac_update() and umac_final(). */
! 1258: {
! 1259: uhash(&ctx->hash, input, len, (u_char *)tag);
! 1260: pdf_gen_xor(&ctx->pdf, (UINT8 *)nonce, (UINT8 *)tag);
! 1261:
! 1262: return (1);
! 1263: }
! 1264: #endif
! 1265:
! 1266: /* ---------------------------------------------------------------------- */
! 1267: /* ---------------------------------------------------------------------- */
! 1268: /* ----- End UMAC Section ----------------------------------------------- */
! 1269: /* ---------------------------------------------------------------------- */
! 1270: /* ---------------------------------------------------------------------- */