diff --git a/lib/libc/gen/arc4random.3 b/lib/libc/gen/arc4random.3 index 735aaf5abd82..c006516044dd 100644 --- a/lib/libc/gen/arc4random.3 +++ b/lib/libc/gen/arc4random.3 @@ -1,9 +1,11 @@ -.\" $NetBSD: arc4random.3,v 1.9 2011/02/05 00:24:08 wiz Exp $ -.\" $OpenBSD: arc4random.3,v 1.17 2000/12/21 14:07:41 aaron Exp $ +.\" $NetBSD: arc4random.3,v 1.10 2014/11/16 20:33:04 riastradh Exp $ .\" -.\" Copyright 1997 Niels Provos +.\" Copyright (c) 2014 The NetBSD Foundation, Inc. .\" All rights reserved. .\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Taylor R. Campbell. +.\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: @@ -12,122 +14,243 @@ .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. -.\" 3. All advertising materials mentioning features or use of this software -.\" must display the following acknowledgement: -.\" This product includes software developed by Niels Provos. -.\" 4. The name of the author may not be used to endorse or promote products -.\" derived from this software without specific prior written permission. .\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. .\" -.\" Manual page, using -mandoc macros -.\" -.Dd February 4, 2011 +.Dd November 16, 2014 .Dt ARC4RANDOM 3 .Os .Sh NAME .Nm arc4random , -.Nm arc4random_buf , .Nm arc4random_uniform , +.Nm arc4random_buf , .Nm arc4random_stir , .Nm arc4random_addrandom -.Nd arc4 random number generator +.Nd random number generator .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In stdlib.h .Ft uint32_t .Fn arc4random "void" -.Ft void -.Fn arc4random_buf "void *buffer" "size_t length" .Ft uint32_t -.Fn arc4random_uniform "uint32_t upper_bound" +.Fn arc4random_uniform "uint32_t bound" +.Ft void +.Fn arc4random_buf "void *buf" "size_t len" .Ft void .Fn arc4random_stir "void" .Ft void -.Fn arc4random_addrandom "u_char *dat" "int datlen" +.Fn arc4random_addrandom "unsigned char *buf" "int len" .Sh DESCRIPTION The -.Fn arc4random -function provides a high quality 32-bit pseudo-random -number very quickly. -.Fn arc4random -seeds itself on a regular basis from the kernel strong random number -subsystem described in -.Xr rnd 4 . -On each call, an ARC4 generator is used to generate a new result. -The -.Fn arc4random -function uses the ARC4 cipher key stream generator, -which uses 8*8 8 bit S-Boxes. -The S-Boxes can be in about (2**1700) states. -.Pp -.Fn arc4random -fits into a middle ground not covered by other subsystems such as -the strong, slow, and resource expensive random -devices described in -.Xr rnd 4 -versus the fast but poor quality interfaces described in -.Xr rand 3 , -.Xr random 3 , -and -.Xr drand48 3 . -.Pp -The -.Fn arc4random_buf -function fills the -.Fa buffer -with -.Fa length -bytes of ARC4-derived random data. -.Pp -The -.Fn arc4random_uniform -function returns a uniformly distributed random number less than -.Fa upper_bound -avoiding modulo bias when the upper bound is not a power of two. -.Pp -The -.Fn arc4random_stir -function reads data from +.Nm +family of functions provides a cryptographic pseudorandom number +generator automatically seeded from the system entropy pool and safe to +use from multiple threads. +.Nm +is faster and more convenient than reading from .Pa /dev/urandom -and uses it to permute the S-Boxes via -.Fn arc4random_addrandom . +directly. .Pp -There is no need to call -.Fn arc4random_stir -before using -.Fn arc4random , -since .Fn arc4random -automatically initializes itself. +returns an integer in [0, 2^32) chosen independently with uniform +distribution. +.Pp +.Fn arc4random_uniform +returns an integer in [0, +.Fa bound ) +chosen independently with uniform distribution. +.Pp +.Fn arc4random_buf +stores +.Fa len +bytes into the memory pointed to by +.Fa buf , +each byte chosen independently from [0, 256) with uniform +distribution. +.Pp +.Fn arc4random_stir +draws entropy from the operating system and incorporates it into the +library's PRNG state to influence future outputs. +.Pp +.Fn arc4random_addrandom +incorporates +.Fa len +bytes, which must be nonnegative, from the buffer +.Fa buf , +into the library's PRNG state to influence future outputs. +.Pp +It is not necessary for an application to call +.Fn arc4random_stir +or +.Fn arc4random_addrandom +before calling other +.Nm +functions. +The first call to any +.Nm +function will initialize the PRNG state unpredictably from the system +entropy pool. +.Sh SECURITY MODEL +The +.Nm +functions provides the following security properties against three +different classes of attackers, assuming that the state of the +operating system's entropy pool is unknown to the attacker: +.Bl -bullet -offset abcd -compact +.It +An attacker who has seen some outputs of any of the +.Nm +functions cannot predict past or future unseen outputs. +.It +An attacker who has seen the library's PRNG state in memory cannot +predict past outputs. +.It +An attacker who has seen one process's PRNG state cannot predict past +or future outputs in other processes, particularly its parent or +siblings. +.El +.Sh IMPLEMENTATION NOTES +The +.Nm +functions are currently implemented using the ChaCha20 pseudorandom +function family. +For any 32-byte string +.Fa s , +.Pf ChaCha20_ Fa s +is a function from 16-byte strings to 64-byte strings. +It is conjectured that if +.Fa s +is chosen with uniform distribution, then the distribution on +.Pf ChaCha20_ Fa s +is indistinguishable to a computationally bounded adversary from a +uniform distribution on all functions from 16-byte strings to 64-byte +strings. +.Pp +The PRNG state is a 32-byte ChaCha20 key +.Fa s . +Each request to +an +.Nm +function +.Bl -bullet -offset abcd -compact +.It +computes the 64-byte quantity +.Fa x += +.Pf ChaCha20_ Fa s Ns (0), +.It +splits +.Fa x +into two 32-byte quantities +.Fa s' +and +.Fa k , +.It +replaces +.Fa s +by +.Fa s' , +and +.It +uses +.Fa k +as output. +.El +.Pp +.Fn arc4random +yields the first four bytes of +.Fa k +as output directly. +.Fn arc4random_buf +either yields up to 32 bytes of +.Fa k +as output directly, or, for longer +requests, uses +.Fa k +as a ChaCha20 key and yields the concatenation +.Pf ChaCha20_ Fa k Ns (0) +|| +.Pf ChaCha20_ Fa k Ns (1) +|| ... as output. +.Fn arc4random_uniform +repeats +.Fn arc4random +until it obtains an integer in [2^32 % +.Fa bound , +2^32), and reduces that modulo +.Fa bound . +.Pp +The PRNG state is per-thread, unless memory allocation fails inside the +library, in which case some threads may share global PRNG state with a +mutex. +The global PRNG state is zeroed on fork in the parent via +.Xr pthread_atfork 3 , +and the per-thread PRNG state is zeroed on fork in the child via +.Xr minherit 2 +with +.Dv MAP_INHERIT_ZERO , +so that the child cannot reuse or see the parent's PRNG state. +The PRNG state is reseeded automatically from the system entropy pool +on the first use of an +.Nm +function after zeroing. +.Pp +The first use of an +.Nm +function may abort the process in the highly unlikely event that +library initialization necessary to implement the security model fails. +Additionally, +.Fn arc4random_stir +and +.Fn arc4random_addrandom +may abort the process in the highly unlikely event that the operating +system fails to provide entropy. .Sh SEE ALSO .Xr rand 3 , -.Xr rand48 3 , -.Xr random 3 -.Sh HISTORY -An algorithm called -.Pa RC4 -was designed by RSA Data Security, Inc. -It was considered a trade secret, but not trademarked. -Because it was a trade secret, it obviously could not be patented. -A clone of this was posted anonymously to USENET and confirmed to -be equivalent by several sources who had access to the original cipher. -Because of the trade secret situation, RSA Data Security, Inc. can do -nothing about the release of the ARC4 algorithm. -Since -.Pa RC4 -used to be a trade secret, the cipher is now referred to as -.Pa ARC4 . +.Xr random 3 , +.Xr cprng 9 +.Rs +.%A Daniel J. Bernstein +.%T ChaCha, a variant of Salsa20 +.%D 2008-01-28 +.%O Document ID: 4027b5256e17b9796842e6d0f68b0b5e +.%U http://cr.yp.to/papers.html#chacha +.Re +.Sh BUGS +There is no way to get deterministic, reproducible results out of +.Nm +for testing purposes. .Pp -These functions first appeared in -.Ox 2.1 . +The name +.Sq arc4random +was chosen for hysterical raisins, because it was originally +implemented using the RC4 stream cipher, which is now known to be +badly enough biased to admit practical attacks in the real world. +Unfortunately, the library found widespread adoption and the name +stuck before anyone recognized that it was silly. +.Pp +The signature of +.Fn arc4random_addrandom +is silly. +There is no reason to require casts or accept negative lengths: +it should take a +.Vt void * +buffer and a +.Vt size_t +length. +But it's too late to change that now. +.Pp +.Fn arc4random_uniform +does not help to choose integers in [0, n) uniformly at random when n > +2^32. diff --git a/lib/libc/gen/arc4random.c b/lib/libc/gen/arc4random.c index 0138d97c40bf..13700a1e34f2 100644 --- a/lib/libc/gen/arc4random.c +++ b/lib/libc/gen/arc4random.c @@ -1,47 +1,76 @@ -/* $NetBSD: arc4random.c,v 1.25 2014/07/19 14:53:22 roy Exp $ */ -/* $OpenBSD: arc4random.c,v 1.6 2001/06/05 05:05:38 pvalchev Exp $ */ +/* $NetBSD: arc4random.c,v 1.26 2014/11/16 20:33:04 riastradh Exp $ */ -/* - * Arc4 random number generator for OpenBSD. - * Copyright 1996 David Mazieres . +/*- + * Copyright (c) 2014 The NetBSD Foundation, Inc. + * All rights reserved. * - * Modification and redistribution in source and binary forms is - * permitted provided that due credit is given to the author and the - * OpenBSD project by leaving this copyright notice intact. + * This code is derived from software contributed to The NetBSD Foundation + * by Taylor R. Campbell. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */ /* - * This code is derived from section 17.1 of Applied Cryptography, - * second edition, which describes a stream cipher allegedly - * compatible with RSA Labs "RC4" cipher (the actual description of - * which is a trade secret). The same algorithm is used as a stream - * cipher called "arcfour" in Tatu Ylonen's ssh package. + * Legacy arc4random(3) API from OpenBSD reimplemented using the + * ChaCha20 PRF, with per-thread state. * - * Here the stream cipher has been modified always to include the time - * when initializing the state. That makes it impossible to - * regenerate the same random sequence twice, so this can't be used - * for encryption, but will generate good random numbers. + * Security model: + * - An attacker who sees some outputs cannot predict past or future + * outputs. + * - An attacker who sees the PRNG state cannot predict past outputs. + * - An attacker who sees a child's PRNG state cannot predict past or + * future outputs in the parent, or in other children. * - * RC4 is a registered trademark of RSA Laboratories. + * The arc4random(3) API may abort the process if: + * + * (a) the crypto self-test fails, + * (b) pthread_atfork or thr_keycreate fail, or + * (c) sysctl(KERN_ARND) fails when reseeding the PRNG. + * + * The crypto self-test, pthread_atfork, and thr_keycreate occur only + * once, on the first use of any of the arc4random(3) API. KERN_ARND + * is unlikely to fail later unless the kernel is seriously broken. */ #include -#if defined(LIBC_SCCS) && !defined(lint) -__RCSID("$NetBSD: arc4random.c,v 1.25 2014/07/19 14:53:22 roy Exp $"); -#endif /* LIBC_SCCS and not lint */ +__RCSID("$NetBSD: arc4random.c,v 1.26 2014/11/16 20:33:04 riastradh Exp $"); #include "namespace.h" #include "reentrant.h" -#include -#include -#include -#include -#include -#include -#include -#include + +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include + #ifdef __weak_alias __weak_alias(arc4random,_arc4random) __weak_alias(arc4random_addrandom,_arc4random_addrandom) @@ -50,286 +79,713 @@ __weak_alias(arc4random_stir,_arc4random_stir) __weak_alias(arc4random_uniform,_arc4random_uniform) #endif -#define REKEY_BYTES 1600000 - -struct arc4_stream { - bool inited; - uint8_t i; - uint8_t j; - uint8_t s[(uint8_t)~0u + 1u]; /* 256 to you and me */ - size_t count; - mutex_t mtx; -}; - -#ifdef _REENTRANT -#define LOCK(rs) do { \ - if (__isthreaded) mutex_lock(&(rs)->mtx); \ - } while (/*CONSTCOND*/ 0) -#define UNLOCK(rs) do { \ - if (__isthreaded) mutex_unlock(&(rs)->mtx); \ - } while (/*CONSTCOND*/ 0) -#else -#define LOCK(rs) -#define UNLOCK(rs) -#endif - -#define S(n) (n) -#define S4(n) S(n), S(n + 1), S(n + 2), S(n + 3) -#define S16(n) S4(n), S4(n + 4), S4(n + 8), S4(n + 12) -#define S64(n) S16(n), S16(n + 16), S16(n + 32), S16(n + 48) -#define S256 S64(0), S64(64), S64(128), S64(192) - -static struct arc4_stream rs = { .inited = false, - .i = 0xff, .j = 0, .s = { S256 }, - .count = 0, .mtx = MUTEX_INITIALIZER }; - -#undef S -#undef S4 -#undef S16 -#undef S64 -#undef S256 - -static inline void arc4_addrandom(struct arc4_stream *, u_char *, int); -static __noinline void arc4_stir(struct arc4_stream *); -static inline uint8_t arc4_getbyte(struct arc4_stream *); -static inline uint32_t arc4_getword(struct arc4_stream *); - -#ifdef _REENTRANT -static void -arc4_fork_prepare(void) -{ - - LOCK(&rs); -} - -static void -arc4_fork_parent(void) -{ - - UNLOCK(&rs); -} -#else -#define arc4_fork_prepare NULL -#define arc4_fork_parent NULL -#endif - -static void -arc4_fork_child(void) -{ - - /* Reset the counter to a force new stir after forking */ - rs.count = 0; - UNLOCK(&rs); -} - -static inline void -arc4_check_init(struct arc4_stream *as) -{ - /* - * pthread_atfork(3) only allows async-signal-safe functions in - * the child handler. - * NetBSD's mutex_unlock is async-signal safe, other implementations - * may not be. - */ - - if (__predict_false(!as->inited)) { - as->inited = true; - pthread_atfork(arc4_fork_prepare, - arc4_fork_parent, arc4_fork_child); - } -} - -static inline void -arc4_addrandom(struct arc4_stream *as, u_char *dat, int datlen) -{ - uint8_t si; - size_t n; - - for (n = 0; n < __arraycount(as->s); n++) { - as->i = (as->i + 1); - si = as->s[as->i]; - as->j = (as->j + si + dat[n % datlen]); - as->s[as->i] = as->s[as->j]; - as->s[as->j] = si; - } -} - -static __noinline void -arc4_stir(struct arc4_stream *as) -{ - int rdat[32]; - int mib[] = { CTL_KERN, KERN_URND }; - size_t len; - size_t i, j; - - arc4_check_init(as); - - /* - * This code once opened and read /dev/urandom on each - * call. That causes repeated rekeying of the kernel stream - * generator, which is very wasteful. Because of application - * behavior, caching the fd doesn't really help. So we just - * fill up the tank from sysctl, which is a tiny bit slower - * for us but much friendlier to other entropy consumers. - */ - - for (i = 0; i < __arraycount(rdat); i++) { - len = sizeof(rdat[i]); - if (sysctl(mib, 2, &rdat[i], &len, NULL, 0) == -1) - abort(); - } - - arc4_addrandom(as, (void *) &rdat, (int)sizeof(rdat)); - - /* - * Throw away the first N words of output, as suggested in the - * paper "Weaknesses in the Key Scheduling Algorithm of RC4" - * by Fluher, Mantin, and Shamir. (N = 256 in our case.) - */ - for (j = 0; j < __arraycount(as->s) * sizeof(uint32_t); j++) - arc4_getbyte(as); - - /* Stir again after REKEY_BYTES bytes, or if the pid changes */ - as->count = REKEY_BYTES; -} - -static inline void -arc4_stir_if_needed(struct arc4_stream *as, size_t len) -{ - - if (__predict_false(as->count <= len)) - arc4_stir(as); - else - as->count -= len; -} - -static __inline uint8_t -arc4_getbyte_ij(struct arc4_stream *as, uint8_t *i, uint8_t *j) -{ - uint8_t si, sj; - - *i = *i + 1; - si = as->s[*i]; - *j = *j + si; - sj = as->s[*j]; - as->s[*i] = sj; - as->s[*j] = si; - return (as->s[(si + sj) & 0xff]); -} - -static inline uint8_t -arc4_getbyte(struct arc4_stream *as) -{ - - return arc4_getbyte_ij(as, &as->i, &as->j); -} +/* + * For standard ChaCha, use le32dec/le32enc. We don't need that for + * the purposes of a nondeterministic random number generator -- we + * don't need to be bit-for-bit compatible over any wire. + */ static inline uint32_t -arc4_getword(struct arc4_stream *as) +crypto_le32dec(const void *p) { - uint32_t val; + uint32_t v; - val = arc4_getbyte(as) << 24; - val |= arc4_getbyte(as) << 16; - val |= arc4_getbyte(as) << 8; - val |= arc4_getbyte(as); - return val; + (void)memcpy(&v, p, sizeof v); + + return v; } -void -arc4random_stir(void) +static inline void +crypto_le32enc(void *p, uint32_t v) { - LOCK(&rs); - arc4_stir(&rs); - UNLOCK(&rs); + (void)memcpy(p, &v, sizeof v); } -void -arc4random_addrandom(u_char *dat, int datlen) +/* ChaCha core */ + +#define crypto_core_OUTPUTBYTES 64 +#define crypto_core_INPUTBYTES 16 +#define crypto_core_KEYBYTES 32 +#define crypto_core_CONSTBYTES 16 + +#define crypto_core_ROUNDS 8 + +static uint32_t +rotate(uint32_t u, unsigned c) { - LOCK(&rs); - arc4_stir_if_needed(&rs, datlen); - arc4_addrandom(&rs, dat, datlen); - UNLOCK(&rs); + return (u << c) | (u >> (32 - c)); } +#define QUARTERROUND(a, b, c, d) do { \ + (a) += (b); (d) ^= (a); (d) = rotate((d), 16); \ + (c) += (d); (b) ^= (c); (b) = rotate((b), 12); \ + (a) += (b); (d) ^= (a); (d) = rotate((d), 8); \ + (c) += (d); (b) ^= (c); (b) = rotate((b), 7); \ +} while (0) + +const uint8_t crypto_core_constant32[16] = "expand 32-byte k"; + +static void +crypto_core(uint8_t *out, const uint8_t *in, const uint8_t *k, + const uint8_t *c) +{ + uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; + uint32_t j0,j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14,j15; + int i; + + j0 = x0 = crypto_le32dec(c + 0); + j1 = x1 = crypto_le32dec(c + 4); + j2 = x2 = crypto_le32dec(c + 8); + j3 = x3 = crypto_le32dec(c + 12); + j4 = x4 = crypto_le32dec(k + 0); + j5 = x5 = crypto_le32dec(k + 4); + j6 = x6 = crypto_le32dec(k + 8); + j7 = x7 = crypto_le32dec(k + 12); + j8 = x8 = crypto_le32dec(k + 16); + j9 = x9 = crypto_le32dec(k + 20); + j10 = x10 = crypto_le32dec(k + 24); + j11 = x11 = crypto_le32dec(k + 28); + j12 = x12 = crypto_le32dec(in + 0); + j13 = x13 = crypto_le32dec(in + 4); + j14 = x14 = crypto_le32dec(in + 8); + j15 = x15 = crypto_le32dec(in + 12); + + for (i = crypto_core_ROUNDS; i > 0; i -= 2) { + QUARTERROUND( x0, x4, x8,x12); + QUARTERROUND( x1, x5, x9,x13); + QUARTERROUND( x2, x6,x10,x14); + QUARTERROUND( x3, x7,x11,x15); + QUARTERROUND( x0, x5,x10,x15); + QUARTERROUND( x1, x6,x11,x12); + QUARTERROUND( x2, x7, x8,x13); + QUARTERROUND( x3, x4, x9,x14); + } + + crypto_le32enc(out + 0, x0 + j0); + crypto_le32enc(out + 4, x1 + j1); + crypto_le32enc(out + 8, x2 + j2); + crypto_le32enc(out + 12, x3 + j3); + crypto_le32enc(out + 16, x4 + j4); + crypto_le32enc(out + 20, x5 + j5); + crypto_le32enc(out + 24, x6 + j6); + crypto_le32enc(out + 28, x7 + j7); + crypto_le32enc(out + 32, x8 + j8); + crypto_le32enc(out + 36, x9 + j9); + crypto_le32enc(out + 40, x10 + j10); + crypto_le32enc(out + 44, x11 + j11); + crypto_le32enc(out + 48, x12 + j12); + crypto_le32enc(out + 52, x13 + j13); + crypto_le32enc(out + 56, x14 + j14); + crypto_le32enc(out + 60, x15 + j15); +} + +/* ChaCha self-test */ + +#ifdef _DIAGNOSTIC + +/* + * Test vector for ChaCha20 from + * , + * test vectors for ChaCha12 and ChaCha8 and for big-endian machines + * generated by the same crypto_core code with crypto_core_ROUNDS and + * crypto_le32enc/dec varied. + */ + +static const uint8_t crypto_core_selftest_vector[64] = { +#if _BYTE_ORDER == _LITTLE_ENDIAN +# if crypto_core_ROUNDS == 8 + 0x3e,0x00,0xef,0x2f,0x89,0x5f,0x40,0xd6, + 0x7f,0x5b,0xb8,0xe8,0x1f,0x09,0xa5,0xa1, + 0x2c,0x84,0x0e,0xc3,0xce,0x9a,0x7f,0x3b, + 0x18,0x1b,0xe1,0x88,0xef,0x71,0x1a,0x1e, + 0x98,0x4c,0xe1,0x72,0xb9,0x21,0x6f,0x41, + 0x9f,0x44,0x53,0x67,0x45,0x6d,0x56,0x19, + 0x31,0x4a,0x42,0xa3,0xda,0x86,0xb0,0x01, + 0x38,0x7b,0xfd,0xb8,0x0e,0x0c,0xfe,0x42, +# elif crypto_core_ROUNDS == 12 + 0x9b,0xf4,0x9a,0x6a,0x07,0x55,0xf9,0x53, + 0x81,0x1f,0xce,0x12,0x5f,0x26,0x83,0xd5, + 0x04,0x29,0xc3,0xbb,0x49,0xe0,0x74,0x14, + 0x7e,0x00,0x89,0xa5,0x2e,0xae,0x15,0x5f, + 0x05,0x64,0xf8,0x79,0xd2,0x7a,0xe3,0xc0, + 0x2c,0xe8,0x28,0x34,0xac,0xfa,0x8c,0x79, + 0x3a,0x62,0x9f,0x2c,0xa0,0xde,0x69,0x19, + 0x61,0x0b,0xe8,0x2f,0x41,0x13,0x26,0xbe, +# elif crypto_core_ROUNDS == 20 + 0x76,0xb8,0xe0,0xad,0xa0,0xf1,0x3d,0x90, + 0x40,0x5d,0x6a,0xe5,0x53,0x86,0xbd,0x28, + 0xbd,0xd2,0x19,0xb8,0xa0,0x8d,0xed,0x1a, + 0xa8,0x36,0xef,0xcc,0x8b,0x77,0x0d,0xc7, + 0xda,0x41,0x59,0x7c,0x51,0x57,0x48,0x8d, + 0x77,0x24,0xe0,0x3f,0xb8,0xd8,0x4a,0x37, + 0x6a,0x43,0xb8,0xf4,0x15,0x18,0xa1,0x1c, + 0xc3,0x87,0xb6,0x69,0xb2,0xee,0x65,0x86, +# else +# error crypto_core_ROUNDS must be 8, 12, or 20. +# endif +#elif _BYTE_ORDER == _BIG_ENDIAN +# if crypto_core_ROUNDS == 8 + 0x9a,0x13,0x07,0xe3,0x38,0x18,0x9e,0x99, + 0x15,0x37,0x16,0x4d,0x04,0xe6,0x48,0x9a, + 0x07,0xd6,0xe8,0x7a,0x02,0xf9,0xf5,0xc7, + 0x3f,0xa9,0xc2,0x0a,0xe1,0xc6,0x62,0xea, + 0x80,0xaf,0xb6,0x51,0xca,0x52,0x43,0x87, + 0xe3,0xa6,0xa6,0x61,0x11,0xf5,0xe6,0xcf, + 0x09,0x0f,0xdc,0x9d,0xc3,0xc3,0xbb,0x43, + 0xd7,0xfa,0x70,0x42,0xbf,0xa5,0xee,0xa2, +# elif crypto_core_ROUNDS == 12 + 0xcf,0x6c,0x16,0x48,0xbf,0xf4,0xba,0x85, + 0x32,0x69,0xd3,0x98,0xc8,0x7d,0xcd,0x3f, + 0xdc,0x76,0x6b,0xa2,0x7b,0xcb,0x17,0x4d, + 0x05,0xda,0xdd,0xd8,0x62,0x54,0xbf,0xe0, + 0x65,0xed,0x0e,0xf4,0x01,0x7e,0x3c,0x05, + 0x35,0xb2,0x7a,0x60,0xf3,0x8f,0x12,0x33, + 0x24,0x60,0xcd,0x85,0xfe,0x4c,0xf3,0x39, + 0xb1,0x0e,0x3e,0xe0,0xba,0xa6,0x2f,0xa9, +# elif crypto_core_ROUNDS == 20 + 0x83,0x8b,0xf8,0x75,0xf7,0xde,0x9d,0x8c, + 0x33,0x14,0x72,0x28,0xd1,0xbe,0x88,0xe5, + 0x94,0xb5,0xed,0xb8,0x56,0xb5,0x9e,0x0c, + 0x64,0x6a,0xaf,0xd9,0xa7,0x49,0x10,0x59, + 0xba,0x3a,0x82,0xf8,0x4a,0x70,0x9c,0x00, + 0x82,0x2c,0xae,0xc6,0xd7,0x1c,0x2e,0xda, + 0x2a,0xfb,0x61,0x70,0x2b,0xd1,0xbf,0x8b, + 0x95,0xbc,0x23,0xb6,0x4b,0x60,0x02,0xec, +# else +# error crypto_core_ROUNDS must be 8, 12, or 20. +# endif +#else +# error Byte order must be little-endian or big-endian. +#endif +}; + +static int +crypto_core_selftest(void) +{ + const uint8_t nonce[crypto_core_INPUTBYTES] = {0}; + const uint8_t key[crypto_core_KEYBYTES] = {0}; + uint8_t block[64]; + unsigned i; + + crypto_core(block, nonce, key, crypto_core_constant32); + for (i = 0; i < 64; i++) { + if (block[i] != crypto_core_selftest_vector[i]) + return EIO; + } + + return 0; +} + +#else /* !_DIAGNOSTIC */ + +static int +crypto_core_selftest(void) +{ + + return 0; +} + +#endif + +/* PRNG */ + +/* + * For a state s, rather than use ChaCha20 as a stream cipher to + * generate the concatenation ChaCha20_s(0) || ChaCha20_s(1) || ..., we + * split ChaCha20_s(0) into s' || x and yield x for the first request, + * split ChaCha20_s'(0) into s'' || y and yield y for the second + * request, &c. This provides backtracking resistance: an attacker who + * finds s'' can't recover s' or x. + */ + +#define crypto_prng_SEEDBYTES crypto_core_KEYBYTES +#define crypto_prng_MAXOUTPUTBYTES \ + (crypto_core_OUTPUTBYTES - crypto_prng_SEEDBYTES) + +struct crypto_prng { + uint8_t state[crypto_prng_SEEDBYTES]; +}; + +static void +crypto_prng_seed(struct crypto_prng *prng, const void *seed) +{ + + (void)memcpy(prng->state, seed, crypto_prng_SEEDBYTES); +} + +static void +crypto_prng_buf(struct crypto_prng *prng, void *buf, size_t n) +{ + const uint8_t nonce[crypto_core_INPUTBYTES] = {0}; + uint8_t output[crypto_core_OUTPUTBYTES]; + + _DIAGASSERT(n <= crypto_prng_MAXOUTPUTBYTES); + __CTASSERT(sizeof prng->state + crypto_prng_MAXOUTPUTBYTES + <= sizeof output); + + crypto_core(output, nonce, prng->state, crypto_core_constant32); + (void)memcpy(prng->state, output, sizeof prng->state); + (void)memcpy(buf, output + sizeof prng->state, n); + (void)explicit_memset(output, 0, sizeof output); +} + +/* One-time stream: expand short single-use secret into long secret */ + +#define crypto_onetimestream_SEEDBYTES crypto_core_KEYBYTES + +static void +crypto_onetimestream(const void *seed, void *buf, size_t n) +{ + uint32_t nonce[crypto_core_INPUTBYTES / sizeof(uint32_t)] = {0}; + uint8_t block[crypto_core_OUTPUTBYTES]; + uint8_t *p8, *p32; + const uint8_t *nonce8 = (const uint8_t *)nonce; + size_t ni, nb, nf; + + /* + * Guarantee we can generate up to n bytes. We have + * 2^(8*INPUTBYTES) possible inputs yielding output of + * OUTPUTBYTES*2^(8*INPUTBYTES) bytes. It suffices to require + * that sizeof n > (1/CHAR_BIT) log_2 n be less than + * (1/CHAR_BIT) log_2 of the total output stream length. We + * have + * + * log_2 (o 2^(8 i)) = log_2 o + log_2 2^(8 i) + * = log_2 o + 8 i. + */ + __CTASSERT(CHAR_BIT*sizeof n <= + (ilog2(crypto_core_OUTPUTBYTES) + 8*crypto_core_INPUTBYTES)); + + p8 = buf; + p32 = (uint8_t *)roundup2((uintptr_t)p8, 4); + ni = p32 - p8; + if (n < ni) + ni = n; + nb = (n - ni) / sizeof block; + nf = (n - ni) % sizeof block; + + _DIAGASSERT(((uintptr_t)p32 & 3) == 0); + _DIAGASSERT(ni <= n); + _DIAGASSERT(nb <= (n / sizeof block)); + _DIAGASSERT(nf <= n); + _DIAGASSERT(n == (ni + (nb * sizeof block) + nf)); + _DIAGASSERT(ni < 4); + _DIAGASSERT(nf < sizeof block); + + if (ni) { + crypto_core(block, nonce8, seed, crypto_core_constant32); + nonce[0]++; + (void)memcpy(p8, block, ni); + } + while (nb--) { + crypto_core(p32, nonce8, seed, crypto_core_constant32); + if (++nonce[0] == 0) + nonce[1]++; + p32 += crypto_core_OUTPUTBYTES; + } + if (nf) { + crypto_core(block, nonce8, seed, crypto_core_constant32); + if (++nonce[0] == 0) + nonce[1]++; + (void)memcpy(p32, block, nf); + } + + if (ni | nf) + (void)explicit_memset(block, 0, sizeof block); +} + +/* arc4random state: per-thread, per-process (zeroed in child on fork) */ + +struct arc4random_prng { + struct crypto_prng arc4_prng; + bool arc4_seeded; +}; + +static void +arc4random_prng_addrandom(struct arc4random_prng *prng, const void *data, + size_t datalen) +{ + const int mib[] = { CTL_KERN, KERN_ARND }; + SHA256_CTX ctx; + uint8_t buf[crypto_prng_SEEDBYTES]; + size_t buflen = sizeof buf; + + __CTASSERT(sizeof buf == SHA256_DIGEST_LENGTH); + + SHA256_Init(&ctx); + + crypto_prng_buf(&prng->arc4_prng, buf, sizeof buf); + SHA256_Update(&ctx, buf, sizeof buf); + + if (sysctl(mib, __arraycount(mib), buf, &buflen, NULL, 0) == -1) + abort(); + if (buflen != sizeof buf) + abort(); + SHA256_Update(&ctx, buf, sizeof buf); + + if (data != NULL) + SHA256_Update(&ctx, data, datalen); + + SHA256_Final(buf, &ctx); + (void)explicit_memset(&ctx, 0, sizeof ctx); + + /* reseed(SHA256(prng() || sysctl(KERN_ARND) || data)) */ + crypto_prng_seed(&prng->arc4_prng, buf); + (void)explicit_memset(buf, 0, sizeof buf); + prng->arc4_seeded = true; +} + +#ifdef _REENTRANT +static struct arc4random_prng * +arc4random_prng_create(void) +{ + struct arc4random_prng *prng; + const size_t size = roundup(sizeof(*prng), sysconf(_SC_PAGESIZE)); + + prng = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANON, -1, 0); + if (prng == MAP_FAILED) + goto fail0; +#ifdef MAP_INHERIT_ZERO + if (minherit(prng, size, MAP_INHERIT_ZERO) == -1) + goto fail1; +#else +#warning This arc4random is not fork-safe! +#endif + + return prng; + +#ifdef MAP_INHERIT_ZERO +fail1: (void)munmap(prng, size); +#endif +fail0: return NULL; +} +#endif + +#ifdef _REENTRANT +static void +arc4random_prng_destroy(struct arc4random_prng *prng) +{ + const size_t size = roundup(sizeof(*prng), sysconf(_SC_PAGESIZE)); + + (void)explicit_memset(prng, 0, sizeof(*prng)); + (void)munmap(prng, size); +} +#endif + +/* Library state */ + +#if !defined(_REENTRANT) +#define mutex_lock(m) do {} while (0) +#define mutex_unlock(m) do {} while (0) +#endif + +static struct arc4random_global { +#ifdef _REENTRANT + mutex_t lock; + thread_key_t thread_key; +#endif + struct arc4random_prng prng; + bool initialized; +} arc4random_global = { +#ifdef _REENTRANT + .lock = MUTEX_INITIALIZER, +#endif + .initialized = false, +}; + +static void +arc4random_atfork_prepare(void) +{ + + mutex_lock(&arc4random_global.lock); + (void)explicit_memset(&arc4random_global.prng, 0, + sizeof arc4random_global.prng); +} + +static void +arc4random_atfork_parent(void) +{ + + mutex_unlock(&arc4random_global.lock); +} + +static void +arc4random_atfork_child(void) +{ + + mutex_unlock(&arc4random_global.lock); +} + +#ifdef _REENTRANT +static void +arc4random_tsd_destructor(void *p) +{ + struct arc4random_prng *const prng = p; + + arc4random_prng_destroy(prng); +} +#endif + +static void +arc4random_initialize(void) +{ + + mutex_lock(&arc4random_global.lock); + if (!arc4random_global.initialized) { + if (crypto_core_selftest() != 0) + abort(); + if (pthread_atfork(&arc4random_atfork_prepare, + &arc4random_atfork_parent, &arc4random_atfork_child) + != 0) + abort(); +#ifdef _REENTRANT + if (thr_keycreate(&arc4random_global.thread_key, + &arc4random_tsd_destructor) != 0) + abort(); +#endif + arc4random_global.initialized = true; + } + mutex_unlock(&arc4random_global.lock); +} + +static struct arc4random_prng * +arc4random_prng_get(void) +{ + struct arc4random_prng *prng = NULL; + + /* Make sure the library is initialized. */ + if (__predict_false(!arc4random_global.initialized)) + arc4random_initialize(); + +#ifdef _REENTRANT + /* Get or create the per-thread PRNG state. */ + prng = thr_getspecific(arc4random_global.thread_key); + if (__predict_false(prng == NULL)) { + prng = arc4random_prng_create(); + thr_setspecific(arc4random_global.thread_key, prng); + } +#endif + + /* If we can't create it, fall back to the global PRNG. */ + if (__predict_false(prng == NULL)) { + mutex_lock(&arc4random_global.lock); + prng = &arc4random_global.prng; + } + + /* Guarantee the PRNG is seeded. */ + if (__predict_false(!prng->arc4_seeded)) + arc4random_prng_addrandom(prng, NULL, 0); + + return prng; +} + +static void +arc4random_prng_put(struct arc4random_prng *prng) +{ + + /* If we had fallen back to the global PRNG, unlock it. */ + if (__predict_false(prng == &arc4random_global.prng)) + mutex_unlock(&arc4random_global.lock); +} + +/* Public API */ + uint32_t arc4random(void) { + struct arc4random_prng *prng; uint32_t v; - LOCK(&rs); - arc4_stir_if_needed(&rs, sizeof(v)); - v = arc4_getword(&rs); - UNLOCK(&rs); + prng = arc4random_prng_get(); + crypto_prng_buf(&prng->arc4_prng, &v, sizeof v); + arc4random_prng_put(prng); + return v; } void arc4random_buf(void *buf, size_t len) { - uint8_t *bp = buf; - uint8_t *ep = bp + len; - uint8_t i, j; + struct arc4random_prng *prng; - LOCK(&rs); - arc4_stir_if_needed(&rs, len); + if (len <= crypto_prng_MAXOUTPUTBYTES) { + prng = arc4random_prng_get(); + crypto_prng_buf(&prng->arc4_prng, buf, len); + arc4random_prng_put(prng); + } else { + uint8_t seed[crypto_onetimestream_SEEDBYTES]; - /* cache i and j - compiler can't know 'buf' doesn't alias them */ - i = rs.i; - j = rs.j; + prng = arc4random_prng_get(); + crypto_prng_buf(&prng->arc4_prng, seed, sizeof seed); + arc4random_prng_put(prng); - while (bp < ep) - *bp++ = arc4_getbyte_ij(&rs, &i, &j); - rs.i = i; - rs.j = j; - - UNLOCK(&rs); + crypto_onetimestream(seed, buf, len); + (void)explicit_memset(seed, 0, sizeof seed); + } } -/*- - * Written by Damien Miller. - * With simplifications by Jinmei Tatuya. - */ - -/* - * Calculate a uniformly distributed random number less than - * upper_bound avoiding "modulo bias". - * - * Uniformity is achieved by generating new random numbers - * until the one returned is outside the range - * [0, 2^32 % upper_bound[. This guarantees the selected - * random number will be inside the range - * [2^32 % upper_bound, 2^32[ which maps back to - * [0, upper_bound[ after reduction modulo upper_bound. - */ uint32_t -arc4random_uniform(uint32_t upper_bound) +arc4random_uniform(uint32_t bound) { - uint32_t r, min; - - if (upper_bound < 2) - return 0; - - /* calculate (2^32 % upper_bound) avoiding 64-bit math */ - /* ((2^32 - x) % x) == (2^32 % x) when x <= 2^31 */ - min = (0xFFFFFFFFU - upper_bound + 1) % upper_bound; - - LOCK(&rs); - arc4_stir_if_needed(&rs, sizeof(r)); + struct arc4random_prng *prng; + uint32_t minimum, r; /* - * This could theoretically loop forever but each retry has - * p > 0.5 (worst case, usually far better) of selecting a - * number inside the range we need, so it should rarely need - * to re-roll (at all). + * We want a uniform random choice in [0, n), and arc4random() + * makes a uniform random choice in [0, 2^32). If we reduce + * that modulo n, values in [0, 2^32 mod n) will be represented + * slightly more than values in [2^32 mod n, n). Instead we + * choose only from [2^32 mod n, 2^32) by rejecting samples in + * [0, 2^32 mod n), to avoid counting the extra representative + * of [0, 2^32 mod n). To compute 2^32 mod n, note that + * + * 2^32 mod n = 2^32 mod n - 0 + * = 2^32 mod n - n mod n + * = (2^32 - n) mod n, + * + * the last of which is what we compute in 32-bit arithmetic. */ - do - r = arc4_getword(&rs); - while (r < min); - UNLOCK(&rs); + minimum = (-bound % bound); - return r % upper_bound; + prng = arc4random_prng_get(); + do crypto_prng_buf(&prng->arc4_prng, &r, sizeof r); + while (__predict_false(r < minimum)); + arc4random_prng_put(prng); + + return (r % bound); } + +void +arc4random_stir(void) +{ + struct arc4random_prng *prng; + + prng = arc4random_prng_get(); + arc4random_prng_addrandom(prng, NULL, 0); + arc4random_prng_put(prng); +} + +/* + * Silly signature here is for hysterical raisins. Should instead be + * const void *data and size_t datalen. + */ +void +arc4random_addrandom(u_char *data, int datalen) +{ + struct arc4random_prng *prng; + + _DIAGASSERT(0 <= datalen); + + prng = arc4random_prng_get(); + arc4random_prng_addrandom(prng, data, datalen); + arc4random_prng_put(prng); +} + +#ifdef _ARC4RANDOM_TEST + +#include + +#include +#include + +int +main(int argc __unused, char **argv __unused) +{ + unsigned char gubbish[] = "random gubbish"; + const uint8_t zero64[64] = {0}; + uint8_t buf[2048]; + unsigned i, a, n; + + /* Test arc4random: should not be deterministic. */ + if (printf("arc4random: %08"PRIx32"\n", arc4random()) < 0) + err(1, "printf"); + + /* Test stirring: should definitely not be deterministic. */ + arc4random_stir(); + + /* Test small buffer. */ + arc4random_buf(buf, 8); + if (printf("arc4randombuf small:") < 0) + err(1, "printf"); + for (i = 0; i < 8; i++) + if (printf(" %02x", buf[i]) < 0) + err(1, "printf"); + if (printf("\n") < 0) + err(1, "printf"); + + /* Test addrandom: should not make the rest deterministic. */ + arc4random_addrandom(gubbish, sizeof gubbish); + + /* Test large buffer. */ + arc4random_buf(buf, sizeof buf); + if (printf("arc4randombuf_large:") < 0) + err(1, "printf"); + for (i = 0; i < sizeof buf; i++) + if (printf(" %02x", buf[i]) < 0) + err(1, "printf"); + if (printf("\n") < 0) + err(1, "printf"); + + /* Test misaligned small and large. */ + for (a = 0; a < 64; a++) { + for (n = a; n < sizeof buf; n++) { + (void)memset(buf, 0, sizeof buf); + arc4random_buf(buf, n - a); + if (memcmp(buf + n - a, zero64, a) != 0) + errx(1, "arc4random buffer overflow 0"); + + (void)memset(buf, 0, sizeof buf); + arc4random_buf(buf + a, n - a); + if (memcmp(buf, zero64, a) != 0) + errx(1, "arc4random buffer overflow 1"); + + if ((2*a) <= n) { + (void)memset(buf, 0, sizeof buf); + arc4random_buf(buf + a, n - a - a); + if (memcmp(buf + n - a, zero64, a) != 0) + errx(1, + "arc4random buffer overflow 2"); + } + } + } + + /* Test fork-safety. */ + { + pid_t pid, rpid; + int status; + + pid = fork(); + switch (pid) { + case -1: + err(1, "fork"); + case 0: + _exit(arc4random_prng_get()->arc4_seeded); + default: + rpid = waitpid(pid, &status, 0); + if (rpid == -1) + err(1, "waitpid"); + if (rpid != pid) + errx(1, "waitpid returned wrong pid" + ": %"PRIdMAX" != %"PRIdMAX, + (intmax_t)rpid, + (intmax_t)pid); + if (WIFEXITED(status)) { + if (WEXITSTATUS(status) != 0) + errx(1, "child exited with %d", + WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + errx(1, "child terminated on signal %d", + WTERMSIG(status)); + } else { + errx(1, "child died mysteriously: %d", status); + } + } + } + + /* XXX Test multithreaded fork safety...? */ + + return 0; +} +#endif