summaryrefslogtreecommitdiffstats
path: root/cpukit
diff options
context:
space:
mode:
authorColin Percival <cperciva@FreeBSD.org>2016-05-29 17:26:40 +0000
committerSebastian Huber <sebastian.huber@embedded-brains.de>2022-09-08 16:14:56 +0200
commit4ebb5f893ed4d8744671aac6a95c625e93135d94 (patch)
treeb71b83f55843248116ae718c42f9af584e086f2b /cpukit
parentImplement SHA-512 truncated (224 and 256 bits) (diff)
downloadrtems-4ebb5f893ed4d8744671aac6a95c625e93135d94.tar.bz2
Retune SHA2 code for improved performance on CPUs
with more ILP and a preference for memory load instructions over large code footprints with embedded immediate variables. On amd64 CPUs from 2007-2008 there is not a significant change, but amd64 CPUs from 2009-2010 get roughly 10% more throughput with this code; amd64 CPUs from 2011-2012 get roughly 15% more throughput; and AMD64 CPUs from 2013-2015 get 20-25% more throughput. The Raspberry Pi 2 increases its throughput by 6-8%. Sponsored by: Tarsnap Backup Inc. Performance tested by: allanjude MFC after: 3 weeks
Diffstat (limited to 'cpukit')
-rw-r--r--cpukit/libmd/sha256c.c174
-rw-r--r--cpukit/libmd/sha512c.c214
2 files changed, 198 insertions, 190 deletions
diff --git a/cpukit/libmd/sha256c.c b/cpukit/libmd/sha256c.c
index f5a453e43b..197de94586 100644
--- a/cpukit/libmd/sha256c.c
+++ b/cpukit/libmd/sha256c.c
@@ -74,6 +74,26 @@ be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
#endif /* BYTE_ORDER != BIG_ENDIAN */
+/* SHA256 round constants. */
+static const uint32_t K[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
/* Elementary functions used by SHA256 */
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
@@ -86,18 +106,21 @@ be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
/* SHA256 round function */
#define RND(a, b, c, d, e, f, g, h, k) \
- t0 = h + S1(e) + Ch(e, f, g) + k; \
- t1 = S0(a) + Maj(a, b, c); \
- d += t0; \
- h = t0 + t1;
+ h += S1(e) + Ch(e, f, g) + k; \
+ d += h; \
+ h += S0(a) + Maj(a, b, c);
/* Adjusted round function for rotating state */
-#define RNDr(S, W, i, k) \
+#define RNDr(S, W, i, ii) \
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
S[(66 - i) % 8], S[(67 - i) % 8], \
S[(68 - i) % 8], S[(69 - i) % 8], \
S[(70 - i) % 8], S[(71 - i) % 8], \
- W[i] + k)
+ W[i + ii] + K[i + ii])
+
+/* Message schedule computation */
+#define MSCH(W, ii, i) \
+ W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
/*
* SHA256 block compression function. The 256-bit state is transformed via
@@ -108,82 +131,52 @@ SHA256_Transform(uint32_t * state, const unsigned char block[64])
{
uint32_t W[64];
uint32_t S[8];
- uint32_t t0, t1;
int i;
- /* 1. Prepare message schedule W. */
+ /* 1. Prepare the first part of the message schedule W. */
be32dec_vect(W, block, 64);
- for (i = 16; i < 64; i++)
- W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
/* 2. Initialize working variables. */
memcpy(S, state, 32);
/* 3. Mix. */
- RNDr(S, W, 0, 0x428a2f98);
- RNDr(S, W, 1, 0x71374491);
- RNDr(S, W, 2, 0xb5c0fbcf);
- RNDr(S, W, 3, 0xe9b5dba5);
- RNDr(S, W, 4, 0x3956c25b);
- RNDr(S, W, 5, 0x59f111f1);
- RNDr(S, W, 6, 0x923f82a4);
- RNDr(S, W, 7, 0xab1c5ed5);
- RNDr(S, W, 8, 0xd807aa98);
- RNDr(S, W, 9, 0x12835b01);
- RNDr(S, W, 10, 0x243185be);
- RNDr(S, W, 11, 0x550c7dc3);
- RNDr(S, W, 12, 0x72be5d74);
- RNDr(S, W, 13, 0x80deb1fe);
- RNDr(S, W, 14, 0x9bdc06a7);
- RNDr(S, W, 15, 0xc19bf174);
- RNDr(S, W, 16, 0xe49b69c1);
- RNDr(S, W, 17, 0xefbe4786);
- RNDr(S, W, 18, 0x0fc19dc6);
- RNDr(S, W, 19, 0x240ca1cc);
- RNDr(S, W, 20, 0x2de92c6f);
- RNDr(S, W, 21, 0x4a7484aa);
- RNDr(S, W, 22, 0x5cb0a9dc);
- RNDr(S, W, 23, 0x76f988da);
- RNDr(S, W, 24, 0x983e5152);
- RNDr(S, W, 25, 0xa831c66d);
- RNDr(S, W, 26, 0xb00327c8);
- RNDr(S, W, 27, 0xbf597fc7);
- RNDr(S, W, 28, 0xc6e00bf3);
- RNDr(S, W, 29, 0xd5a79147);
- RNDr(S, W, 30, 0x06ca6351);
- RNDr(S, W, 31, 0x14292967);
- RNDr(S, W, 32, 0x27b70a85);
- RNDr(S, W, 33, 0x2e1b2138);
- RNDr(S, W, 34, 0x4d2c6dfc);
- RNDr(S, W, 35, 0x53380d13);
- RNDr(S, W, 36, 0x650a7354);
- RNDr(S, W, 37, 0x766a0abb);
- RNDr(S, W, 38, 0x81c2c92e);
- RNDr(S, W, 39, 0x92722c85);
- RNDr(S, W, 40, 0xa2bfe8a1);
- RNDr(S, W, 41, 0xa81a664b);
- RNDr(S, W, 42, 0xc24b8b70);
- RNDr(S, W, 43, 0xc76c51a3);
- RNDr(S, W, 44, 0xd192e819);
- RNDr(S, W, 45, 0xd6990624);
- RNDr(S, W, 46, 0xf40e3585);
- RNDr(S, W, 47, 0x106aa070);
- RNDr(S, W, 48, 0x19a4c116);
- RNDr(S, W, 49, 0x1e376c08);
- RNDr(S, W, 50, 0x2748774c);
- RNDr(S, W, 51, 0x34b0bcb5);
- RNDr(S, W, 52, 0x391c0cb3);
- RNDr(S, W, 53, 0x4ed8aa4a);
- RNDr(S, W, 54, 0x5b9cca4f);
- RNDr(S, W, 55, 0x682e6ff3);
- RNDr(S, W, 56, 0x748f82ee);
- RNDr(S, W, 57, 0x78a5636f);
- RNDr(S, W, 58, 0x84c87814);
- RNDr(S, W, 59, 0x8cc70208);
- RNDr(S, W, 60, 0x90befffa);
- RNDr(S, W, 61, 0xa4506ceb);
- RNDr(S, W, 62, 0xbef9a3f7);
- RNDr(S, W, 63, 0xc67178f2);
+ for (i = 0; i < 64; i += 16) {
+ RNDr(S, W, 0, i);
+ RNDr(S, W, 1, i);
+ RNDr(S, W, 2, i);
+ RNDr(S, W, 3, i);
+ RNDr(S, W, 4, i);
+ RNDr(S, W, 5, i);
+ RNDr(S, W, 6, i);
+ RNDr(S, W, 7, i);
+ RNDr(S, W, 8, i);
+ RNDr(S, W, 9, i);
+ RNDr(S, W, 10, i);
+ RNDr(S, W, 11, i);
+ RNDr(S, W, 12, i);
+ RNDr(S, W, 13, i);
+ RNDr(S, W, 14, i);
+ RNDr(S, W, 15, i);
+
+ if (i == 48)
+ break;
+ MSCH(W, 0, i);
+ MSCH(W, 1, i);
+ MSCH(W, 2, i);
+ MSCH(W, 3, i);
+ MSCH(W, 4, i);
+ MSCH(W, 5, i);
+ MSCH(W, 6, i);
+ MSCH(W, 7, i);
+ MSCH(W, 8, i);
+ MSCH(W, 9, i);
+ MSCH(W, 10, i);
+ MSCH(W, 11, i);
+ MSCH(W, 12, i);
+ MSCH(W, 13, i);
+ MSCH(W, 14, i);
+ MSCH(W, 15, i);
+ }
/* 4. Mix local working variables into global state */
for (i = 0; i < 8; i++)
@@ -201,22 +194,29 @@ static const unsigned char PAD[64] = {
static void
SHA256_Pad(SHA256_CTX * ctx)
{
- unsigned char len[8];
- uint32_t r, plen;
-
- /*
- * Convert length to a vector of bytes -- we do this now rather
- * than later because the length will change after we pad.
- */
- be64enc(len, ctx->count);
+ size_t r;
- /* Add 1--64 bytes so that the resulting length is 56 mod 64 */
+ /* Figure out how many bytes we have buffered. */
r = (ctx->count >> 3) & 0x3f;
- plen = (r < 56) ? (56 - r) : (120 - r);
- SHA256_Update(ctx, PAD, (size_t)plen);
- /* Add the terminating bit-count */
- SHA256_Update(ctx, len, 8);
+ /* Pad to 56 mod 64, transforming if we finish a block en route. */
+ if (r < 56) {
+ /* Pad to 56 mod 64. */
+ memcpy(&ctx->buf[r], PAD, 56 - r);
+ } else {
+ /* Finish the current block and mix. */
+ memcpy(&ctx->buf[r], PAD, 64 - r);
+ SHA256_Transform(ctx->state, ctx->buf);
+
+ /* The start of the final block is all zeroes. */
+ memset(&ctx->buf[0], 0, 56);
+ }
+
+ /* Add the terminating bit-count. */
+ be64enc(&ctx->buf[56], ctx->count);
+
+ /* Mix in the final block. */
+ SHA256_Transform(ctx->state, ctx->buf);
}
/* SHA-256 initialization. Begins a SHA-256 operation. */
diff --git a/cpukit/libmd/sha512c.c b/cpukit/libmd/sha512c.c
index abc5fd113b..daeef9d671 100644
--- a/cpukit/libmd/sha512c.c
+++ b/cpukit/libmd/sha512c.c
@@ -77,6 +77,50 @@ be64dec_vect(uint64_t *dst, const unsigned char *src, size_t len)
#endif /* BYTE_ORDER != BIG_ENDIAN */
+/* SHA512 round constants. */
+static const uint64_t K[80] = {
+ 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+ 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+ 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+ 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+ 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+ 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+ 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+ 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+ 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+ 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+ 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+ 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+ 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+ 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+ 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+ 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+ 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+ 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+ 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+ 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+ 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+ 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+ 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+ 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+ 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+ 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+ 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+ 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+ 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+ 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+ 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+ 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+ 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+ 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+ 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+ 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+ 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+ 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+ 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+ 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
/* Elementary functions used by SHA512 */
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
@@ -89,18 +133,21 @@ be64dec_vect(uint64_t *dst, const unsigned char *src, size_t len)
/* SHA512 round function */
#define RND(a, b, c, d, e, f, g, h, k) \
- t0 = h + S1(e) + Ch(e, f, g) + k; \
- t1 = S0(a) + Maj(a, b, c); \
- d += t0; \
- h = t0 + t1;
+ h += S1(e) + Ch(e, f, g) + k; \
+ d += h; \
+ h += S0(a) + Maj(a, b, c);
/* Adjusted round function for rotating state */
-#define RNDr(S, W, i, k) \
+#define RNDr(S, W, i, ii) \
RND(S[(80 - i) % 8], S[(81 - i) % 8], \
S[(82 - i) % 8], S[(83 - i) % 8], \
S[(84 - i) % 8], S[(85 - i) % 8], \
S[(86 - i) % 8], S[(87 - i) % 8], \
- W[i] + k)
+ W[i + ii] + K[i + ii])
+
+/* Message schedule computation */
+#define MSCH(W, ii, i) \
+ W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
/*
* SHA512 block compression function. The 512-bit state is transformed via
@@ -111,98 +158,52 @@ SHA512_Transform(uint64_t * state, const unsigned char block[SHA512_BLOCK_LENGTH
{
uint64_t W[80];
uint64_t S[8];
- uint64_t t0, t1;
int i;
- /* 1. Prepare message schedule W. */
+ /* 1. Prepare the first part of the message schedule W. */
be64dec_vect(W, block, SHA512_BLOCK_LENGTH);
- for (i = 16; i < 80; i++)
- W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
/* 2. Initialize working variables. */
memcpy(S, state, SHA512_DIGEST_LENGTH);
/* 3. Mix. */
- RNDr(S, W, 0, 0x428a2f98d728ae22ULL);
- RNDr(S, W, 1, 0x7137449123ef65cdULL);
- RNDr(S, W, 2, 0xb5c0fbcfec4d3b2fULL);
- RNDr(S, W, 3, 0xe9b5dba58189dbbcULL);
- RNDr(S, W, 4, 0x3956c25bf348b538ULL);
- RNDr(S, W, 5, 0x59f111f1b605d019ULL);
- RNDr(S, W, 6, 0x923f82a4af194f9bULL);
- RNDr(S, W, 7, 0xab1c5ed5da6d8118ULL);
- RNDr(S, W, 8, 0xd807aa98a3030242ULL);
- RNDr(S, W, 9, 0x12835b0145706fbeULL);
- RNDr(S, W, 10, 0x243185be4ee4b28cULL);
- RNDr(S, W, 11, 0x550c7dc3d5ffb4e2ULL);
- RNDr(S, W, 12, 0x72be5d74f27b896fULL);
- RNDr(S, W, 13, 0x80deb1fe3b1696b1ULL);
- RNDr(S, W, 14, 0x9bdc06a725c71235ULL);
- RNDr(S, W, 15, 0xc19bf174cf692694ULL);
- RNDr(S, W, 16, 0xe49b69c19ef14ad2ULL);
- RNDr(S, W, 17, 0xefbe4786384f25e3ULL);
- RNDr(S, W, 18, 0x0fc19dc68b8cd5b5ULL);
- RNDr(S, W, 19, 0x240ca1cc77ac9c65ULL);
- RNDr(S, W, 20, 0x2de92c6f592b0275ULL);
- RNDr(S, W, 21, 0x4a7484aa6ea6e483ULL);
- RNDr(S, W, 22, 0x5cb0a9dcbd41fbd4ULL);
- RNDr(S, W, 23, 0x76f988da831153b5ULL);
- RNDr(S, W, 24, 0x983e5152ee66dfabULL);
- RNDr(S, W, 25, 0xa831c66d2db43210ULL);
- RNDr(S, W, 26, 0xb00327c898fb213fULL);
- RNDr(S, W, 27, 0xbf597fc7beef0ee4ULL);
- RNDr(S, W, 28, 0xc6e00bf33da88fc2ULL);
- RNDr(S, W, 29, 0xd5a79147930aa725ULL);
- RNDr(S, W, 30, 0x06ca6351e003826fULL);
- RNDr(S, W, 31, 0x142929670a0e6e70ULL);
- RNDr(S, W, 32, 0x27b70a8546d22ffcULL);
- RNDr(S, W, 33, 0x2e1b21385c26c926ULL);
- RNDr(S, W, 34, 0x4d2c6dfc5ac42aedULL);
- RNDr(S, W, 35, 0x53380d139d95b3dfULL);
- RNDr(S, W, 36, 0x650a73548baf63deULL);
- RNDr(S, W, 37, 0x766a0abb3c77b2a8ULL);
- RNDr(S, W, 38, 0x81c2c92e47edaee6ULL);
- RNDr(S, W, 39, 0x92722c851482353bULL);
- RNDr(S, W, 40, 0xa2bfe8a14cf10364ULL);
- RNDr(S, W, 41, 0xa81a664bbc423001ULL);
- RNDr(S, W, 42, 0xc24b8b70d0f89791ULL);
- RNDr(S, W, 43, 0xc76c51a30654be30ULL);
- RNDr(S, W, 44, 0xd192e819d6ef5218ULL);
- RNDr(S, W, 45, 0xd69906245565a910ULL);
- RNDr(S, W, 46, 0xf40e35855771202aULL);
- RNDr(S, W, 47, 0x106aa07032bbd1b8ULL);
- RNDr(S, W, 48, 0x19a4c116b8d2d0c8ULL);
- RNDr(S, W, 49, 0x1e376c085141ab53ULL);
- RNDr(S, W, 50, 0x2748774cdf8eeb99ULL);
- RNDr(S, W, 51, 0x34b0bcb5e19b48a8ULL);
- RNDr(S, W, 52, 0x391c0cb3c5c95a63ULL);
- RNDr(S, W, 53, 0x4ed8aa4ae3418acbULL);
- RNDr(S, W, 54, 0x5b9cca4f7763e373ULL);
- RNDr(S, W, 55, 0x682e6ff3d6b2b8a3ULL);
- RNDr(S, W, 56, 0x748f82ee5defb2fcULL);
- RNDr(S, W, 57, 0x78a5636f43172f60ULL);
- RNDr(S, W, 58, 0x84c87814a1f0ab72ULL);
- RNDr(S, W, 59, 0x8cc702081a6439ecULL);
- RNDr(S, W, 60, 0x90befffa23631e28ULL);
- RNDr(S, W, 61, 0xa4506cebde82bde9ULL);
- RNDr(S, W, 62, 0xbef9a3f7b2c67915ULL);
- RNDr(S, W, 63, 0xc67178f2e372532bULL);
- RNDr(S, W, 64, 0xca273eceea26619cULL);
- RNDr(S, W, 65, 0xd186b8c721c0c207ULL);
- RNDr(S, W, 66, 0xeada7dd6cde0eb1eULL);
- RNDr(S, W, 67, 0xf57d4f7fee6ed178ULL);
- RNDr(S, W, 68, 0x06f067aa72176fbaULL);
- RNDr(S, W, 69, 0x0a637dc5a2c898a6ULL);
- RNDr(S, W, 70, 0x113f9804bef90daeULL);
- RNDr(S, W, 71, 0x1b710b35131c471bULL);
- RNDr(S, W, 72, 0x28db77f523047d84ULL);
- RNDr(S, W, 73, 0x32caab7b40c72493ULL);
- RNDr(S, W, 74, 0x3c9ebe0a15c9bebcULL);
- RNDr(S, W, 75, 0x431d67c49c100d4cULL);
- RNDr(S, W, 76, 0x4cc5d4becb3e42b6ULL);
- RNDr(S, W, 77, 0x597f299cfc657e2aULL);
- RNDr(S, W, 78, 0x5fcb6fab3ad6faecULL);
- RNDr(S, W, 79, 0x6c44198c4a475817ULL);
+ for (i = 0; i < 80; i += 16) {
+ RNDr(S, W, 0, i);
+ RNDr(S, W, 1, i);
+ RNDr(S, W, 2, i);
+ RNDr(S, W, 3, i);
+ RNDr(S, W, 4, i);
+ RNDr(S, W, 5, i);
+ RNDr(S, W, 6, i);
+ RNDr(S, W, 7, i);
+ RNDr(S, W, 8, i);
+ RNDr(S, W, 9, i);
+ RNDr(S, W, 10, i);
+ RNDr(S, W, 11, i);
+ RNDr(S, W, 12, i);
+ RNDr(S, W, 13, i);
+ RNDr(S, W, 14, i);
+ RNDr(S, W, 15, i);
+
+ if (i == 64)
+ break;
+ MSCH(W, 0, i);
+ MSCH(W, 1, i);
+ MSCH(W, 2, i);
+ MSCH(W, 3, i);
+ MSCH(W, 4, i);
+ MSCH(W, 5, i);
+ MSCH(W, 6, i);
+ MSCH(W, 7, i);
+ MSCH(W, 8, i);
+ MSCH(W, 9, i);
+ MSCH(W, 10, i);
+ MSCH(W, 11, i);
+ MSCH(W, 12, i);
+ MSCH(W, 13, i);
+ MSCH(W, 14, i);
+ MSCH(W, 15, i);
+ }
/* 4. Mix local working variables into global state */
for (i = 0; i < 8; i++)
@@ -224,22 +225,29 @@ static const unsigned char PAD[SHA512_BLOCK_LENGTH] = {
static void
SHA512_Pad(SHA512_CTX * ctx)
{
- unsigned char len[16];
- uint64_t r, plen;
-
- /*
- * Convert length to a vector of bytes -- we do this now rather
- * than later because the length will change after we pad.
- */
- be64enc_vect(len, ctx->count, 16);
+ size_t r;
- /* Add 1--128 bytes so that the resulting length is 112 mod 128 */
+ /* Figure out how many bytes we have buffered. */
r = (ctx->count[1] >> 3) & 0x7f;
- plen = (r < 112) ? (112 - r) : (240 - r);
- SHA512_Update(ctx, PAD, (size_t)plen);
- /* Add the terminating bit-count */
- SHA512_Update(ctx, len, 16);
+ /* Pad to 112 mod 128, transforming if we finish a block en route. */
+ if (r < 112) {
+ /* Pad to 112 mod 128. */
+ memcpy(&ctx->buf[r], PAD, 112 - r);
+ } else {
+ /* Finish the current block and mix. */
+ memcpy(&ctx->buf[r], PAD, 128 - r);
+ SHA512_Transform(ctx->state, ctx->buf);
+
+ /* The start of the final block is all zeroes. */
+ memset(&ctx->buf[0], 0, 112);
+ }
+
+ /* Add the terminating bit-count. */
+ be64enc_vect(&ctx->buf[112], ctx->count, 16);
+
+ /* Mix in the final block. */
+ SHA512_Transform(ctx->state, ctx->buf);
}
/* SHA-512 initialization. Begins a SHA-512 operation. */