Permalink
Browse files

Whirlpool: optimizations and refactoring

Used array_copy64 from bitfn.h to speed up block copies, and put some steps in their own (static) functions.
  • Loading branch information...
1 parent 7b1d40a commit 8b49d62fd01461cdd467c96a3bcb8d3bc0e041e6 Stijn van Drongelen committed Aug 11, 2012
Showing with 102 additions and 185 deletions.
  1. +102 −185 cbits/whirlpool.c
View
@@ -61,6 +61,7 @@
#include <stdlib.h>
#include <string.h>
+#include "bitfn.h"
#include "whirlpool_nessie.h"
/* #define TRACE_INTERMEDIATE_VALUES */
@@ -627,6 +628,96 @@ static const uint64_t rc[R + 1] = {
LL(0xca2dbf07ad5a8333),
};
+static void transformMatrix(uint64_t m[8]) {
+ uint64_t L[8];
+
+ L[0] =
+ C0[(int)(m[0] >> 56) ] ^
+ C1[(int)(m[7] >> 48) & 0xff] ^
+ C2[(int)(m[6] >> 40) & 0xff] ^
+ C3[(int)(m[5] >> 32) & 0xff] ^
+ C4[(int)(m[4] >> 24) & 0xff] ^
+ C5[(int)(m[3] >> 16) & 0xff] ^
+ C6[(int)(m[2] >> 8) & 0xff] ^
+ C7[(int)(m[1] ) & 0xff];
+ L[1] =
+ C0[(int)(m[1] >> 56) ] ^
+ C1[(int)(m[0] >> 48) & 0xff] ^
+ C2[(int)(m[7] >> 40) & 0xff] ^
+ C3[(int)(m[6] >> 32) & 0xff] ^
+ C4[(int)(m[5] >> 24) & 0xff] ^
+ C5[(int)(m[4] >> 16) & 0xff] ^
+ C6[(int)(m[3] >> 8) & 0xff] ^
+ C7[(int)(m[2] ) & 0xff];
+ L[2] =
+ C0[(int)(m[2] >> 56) ] ^
+ C1[(int)(m[1] >> 48) & 0xff] ^
+ C2[(int)(m[0] >> 40) & 0xff] ^
+ C3[(int)(m[7] >> 32) & 0xff] ^
+ C4[(int)(m[6] >> 24) & 0xff] ^
+ C5[(int)(m[5] >> 16) & 0xff] ^
+ C6[(int)(m[4] >> 8) & 0xff] ^
+ C7[(int)(m[3] ) & 0xff];
+ L[3] =
+ C0[(int)(m[3] >> 56) ] ^
+ C1[(int)(m[2] >> 48) & 0xff] ^
+ C2[(int)(m[1] >> 40) & 0xff] ^
+ C3[(int)(m[0] >> 32) & 0xff] ^
+ C4[(int)(m[7] >> 24) & 0xff] ^
+ C5[(int)(m[6] >> 16) & 0xff] ^
+ C6[(int)(m[5] >> 8) & 0xff] ^
+ C7[(int)(m[4] ) & 0xff];
+ L[4] =
+ C0[(int)(m[4] >> 56) ] ^
+ C1[(int)(m[3] >> 48) & 0xff] ^
+ C2[(int)(m[2] >> 40) & 0xff] ^
+ C3[(int)(m[1] >> 32) & 0xff] ^
+ C4[(int)(m[0] >> 24) & 0xff] ^
+ C5[(int)(m[7] >> 16) & 0xff] ^
+ C6[(int)(m[6] >> 8) & 0xff] ^
+ C7[(int)(m[5] ) & 0xff];
+ L[5] =
+ C0[(int)(m[5] >> 56) ] ^
+ C1[(int)(m[4] >> 48) & 0xff] ^
+ C2[(int)(m[3] >> 40) & 0xff] ^
+ C3[(int)(m[2] >> 32) & 0xff] ^
+ C4[(int)(m[1] >> 24) & 0xff] ^
+ C5[(int)(m[0] >> 16) & 0xff] ^
+ C6[(int)(m[7] >> 8) & 0xff] ^
+ C7[(int)(m[6] ) & 0xff];
+ L[6] =
+ C0[(int)(m[6] >> 56) ] ^
+ C1[(int)(m[5] >> 48) & 0xff] ^
+ C2[(int)(m[4] >> 40) & 0xff] ^
+ C3[(int)(m[3] >> 32) & 0xff] ^
+ C4[(int)(m[2] >> 24) & 0xff] ^
+ C5[(int)(m[1] >> 16) & 0xff] ^
+ C6[(int)(m[0] >> 8) & 0xff] ^
+ C7[(int)(m[7] ) & 0xff];
+ L[7] =
+ C0[(int)(m[7] >> 56) ] ^
+ C1[(int)(m[6] >> 48) & 0xff] ^
+ C2[(int)(m[5] >> 40) & 0xff] ^
+ C3[(int)(m[4] >> 32) & 0xff] ^
+ C4[(int)(m[3] >> 24) & 0xff] ^
+ C5[(int)(m[2] >> 16) & 0xff] ^
+ C6[(int)(m[1] >> 8) & 0xff] ^
+ C7[(int)(m[0] ) & 0xff];
+
+ array_copy64(m, L, 8);
+}
+
+static void inplaceXor(uint64_t dst[8], uint64_t src[8]) {
+ dst[0] ^= src[0];
+ dst[1] ^= src[1];
+ dst[2] ^= src[2];
+ dst[3] ^= src[3];
+ dst[4] ^= src[4];
+ dst[5] ^= src[5];
+ dst[6] ^= src[6];
+ dst[7] ^= src[7];
+}
+
/**
* The core Whirlpool transform.
*/
@@ -652,17 +743,13 @@ static void processBuffer(struct NESSIEstruct * const structpointer) {
(((uint64_t)buffer[6] & 0xffL) << 8) ^
(((uint64_t)buffer[7] & 0xffL) );
}
+
/*
* compute and apply K^0 to the cipher state:
*/
- state[0] = block[0] ^ (K[0] = structpointer->hash[0]);
- state[1] = block[1] ^ (K[1] = structpointer->hash[1]);
- state[2] = block[2] ^ (K[2] = structpointer->hash[2]);
- state[3] = block[3] ^ (K[3] = structpointer->hash[3]);
- state[4] = block[4] ^ (K[4] = structpointer->hash[4]);
- state[5] = block[5] ^ (K[5] = structpointer->hash[5]);
- state[6] = block[6] ^ (K[6] = structpointer->hash[6]);
- state[7] = block[7] ^ (K[7] = structpointer->hash[7]);
+ array_copy64(K, structpointer->hash, 8);
+ array_copy64(state, block, 8);
+ inplaceXor(state, K);
/*
* iterate over all rounds:
@@ -671,190 +758,20 @@ static void processBuffer(struct NESSIEstruct * const structpointer) {
/*
* compute K^r from K^{r-1}:
*/
- L[0] =
- C0[(int)(K[0] >> 56) ] ^
- C1[(int)(K[7] >> 48) & 0xff] ^
- C2[(int)(K[6] >> 40) & 0xff] ^
- C3[(int)(K[5] >> 32) & 0xff] ^
- C4[(int)(K[4] >> 24) & 0xff] ^
- C5[(int)(K[3] >> 16) & 0xff] ^
- C6[(int)(K[2] >> 8) & 0xff] ^
- C7[(int)(K[1] ) & 0xff] ^
- rc[r];
- L[1] =
- C0[(int)(K[1] >> 56) ] ^
- C1[(int)(K[0] >> 48) & 0xff] ^
- C2[(int)(K[7] >> 40) & 0xff] ^
- C3[(int)(K[6] >> 32) & 0xff] ^
- C4[(int)(K[5] >> 24) & 0xff] ^
- C5[(int)(K[4] >> 16) & 0xff] ^
- C6[(int)(K[3] >> 8) & 0xff] ^
- C7[(int)(K[2] ) & 0xff];
- L[2] =
- C0[(int)(K[2] >> 56) ] ^
- C1[(int)(K[1] >> 48) & 0xff] ^
- C2[(int)(K[0] >> 40) & 0xff] ^
- C3[(int)(K[7] >> 32) & 0xff] ^
- C4[(int)(K[6] >> 24) & 0xff] ^
- C5[(int)(K[5] >> 16) & 0xff] ^
- C6[(int)(K[4] >> 8) & 0xff] ^
- C7[(int)(K[3] ) & 0xff];
- L[3] =
- C0[(int)(K[3] >> 56) ] ^
- C1[(int)(K[2] >> 48) & 0xff] ^
- C2[(int)(K[1] >> 40) & 0xff] ^
- C3[(int)(K[0] >> 32) & 0xff] ^
- C4[(int)(K[7] >> 24) & 0xff] ^
- C5[(int)(K[6] >> 16) & 0xff] ^
- C6[(int)(K[5] >> 8) & 0xff] ^
- C7[(int)(K[4] ) & 0xff];
- L[4] =
- C0[(int)(K[4] >> 56) ] ^
- C1[(int)(K[3] >> 48) & 0xff] ^
- C2[(int)(K[2] >> 40) & 0xff] ^
- C3[(int)(K[1] >> 32) & 0xff] ^
- C4[(int)(K[0] >> 24) & 0xff] ^
- C5[(int)(K[7] >> 16) & 0xff] ^
- C6[(int)(K[6] >> 8) & 0xff] ^
- C7[(int)(K[5] ) & 0xff];
- L[5] =
- C0[(int)(K[5] >> 56) ] ^
- C1[(int)(K[4] >> 48) & 0xff] ^
- C2[(int)(K[3] >> 40) & 0xff] ^
- C3[(int)(K[2] >> 32) & 0xff] ^
- C4[(int)(K[1] >> 24) & 0xff] ^
- C5[(int)(K[0] >> 16) & 0xff] ^
- C6[(int)(K[7] >> 8) & 0xff] ^
- C7[(int)(K[6] ) & 0xff];
- L[6] =
- C0[(int)(K[6] >> 56) ] ^
- C1[(int)(K[5] >> 48) & 0xff] ^
- C2[(int)(K[4] >> 40) & 0xff] ^
- C3[(int)(K[3] >> 32) & 0xff] ^
- C4[(int)(K[2] >> 24) & 0xff] ^
- C5[(int)(K[1] >> 16) & 0xff] ^
- C6[(int)(K[0] >> 8) & 0xff] ^
- C7[(int)(K[7] ) & 0xff];
- L[7] =
- C0[(int)(K[7] >> 56) ] ^
- C1[(int)(K[6] >> 48) & 0xff] ^
- C2[(int)(K[5] >> 40) & 0xff] ^
- C3[(int)(K[4] >> 32) & 0xff] ^
- C4[(int)(K[3] >> 24) & 0xff] ^
- C5[(int)(K[2] >> 16) & 0xff] ^
- C6[(int)(K[1] >> 8) & 0xff] ^
- C7[(int)(K[0] ) & 0xff];
- K[0] = L[0];
- K[1] = L[1];
- K[2] = L[2];
- K[3] = L[3];
- K[4] = L[4];
- K[5] = L[5];
- K[6] = L[6];
- K[7] = L[7];
+ transformMatrix(K);
+ K[0] ^= rc[r];
+
/*
* apply the r-th round transformation:
*/
- L[0] =
- C0[(int)(state[0] >> 56) ] ^
- C1[(int)(state[7] >> 48) & 0xff] ^
- C2[(int)(state[6] >> 40) & 0xff] ^
- C3[(int)(state[5] >> 32) & 0xff] ^
- C4[(int)(state[4] >> 24) & 0xff] ^
- C5[(int)(state[3] >> 16) & 0xff] ^
- C6[(int)(state[2] >> 8) & 0xff] ^
- C7[(int)(state[1] ) & 0xff] ^
- K[0];
- L[1] =
- C0[(int)(state[1] >> 56) ] ^
- C1[(int)(state[0] >> 48) & 0xff] ^
- C2[(int)(state[7] >> 40) & 0xff] ^
- C3[(int)(state[6] >> 32) & 0xff] ^
- C4[(int)(state[5] >> 24) & 0xff] ^
- C5[(int)(state[4] >> 16) & 0xff] ^
- C6[(int)(state[3] >> 8) & 0xff] ^
- C7[(int)(state[2] ) & 0xff] ^
- K[1];
- L[2] =
- C0[(int)(state[2] >> 56) ] ^
- C1[(int)(state[1] >> 48) & 0xff] ^
- C2[(int)(state[0] >> 40) & 0xff] ^
- C3[(int)(state[7] >> 32) & 0xff] ^
- C4[(int)(state[6] >> 24) & 0xff] ^
- C5[(int)(state[5] >> 16) & 0xff] ^
- C6[(int)(state[4] >> 8) & 0xff] ^
- C7[(int)(state[3] ) & 0xff] ^
- K[2];
- L[3] =
- C0[(int)(state[3] >> 56) ] ^
- C1[(int)(state[2] >> 48) & 0xff] ^
- C2[(int)(state[1] >> 40) & 0xff] ^
- C3[(int)(state[0] >> 32) & 0xff] ^
- C4[(int)(state[7] >> 24) & 0xff] ^
- C5[(int)(state[6] >> 16) & 0xff] ^
- C6[(int)(state[5] >> 8) & 0xff] ^
- C7[(int)(state[4] ) & 0xff] ^
- K[3];
- L[4] =
- C0[(int)(state[4] >> 56) ] ^
- C1[(int)(state[3] >> 48) & 0xff] ^
- C2[(int)(state[2] >> 40) & 0xff] ^
- C3[(int)(state[1] >> 32) & 0xff] ^
- C4[(int)(state[0] >> 24) & 0xff] ^
- C5[(int)(state[7] >> 16) & 0xff] ^
- C6[(int)(state[6] >> 8) & 0xff] ^
- C7[(int)(state[5] ) & 0xff] ^
- K[4];
- L[5] =
- C0[(int)(state[5] >> 56) ] ^
- C1[(int)(state[4] >> 48) & 0xff] ^
- C2[(int)(state[3] >> 40) & 0xff] ^
- C3[(int)(state[2] >> 32) & 0xff] ^
- C4[(int)(state[1] >> 24) & 0xff] ^
- C5[(int)(state[0] >> 16) & 0xff] ^
- C6[(int)(state[7] >> 8) & 0xff] ^
- C7[(int)(state[6] ) & 0xff] ^
- K[5];
- L[6] =
- C0[(int)(state[6] >> 56) ] ^
- C1[(int)(state[5] >> 48) & 0xff] ^
- C2[(int)(state[4] >> 40) & 0xff] ^
- C3[(int)(state[3] >> 32) & 0xff] ^
- C4[(int)(state[2] >> 24) & 0xff] ^
- C5[(int)(state[1] >> 16) & 0xff] ^
- C6[(int)(state[0] >> 8) & 0xff] ^
- C7[(int)(state[7] ) & 0xff] ^
- K[6];
- L[7] =
- C0[(int)(state[7] >> 56) ] ^
- C1[(int)(state[6] >> 48) & 0xff] ^
- C2[(int)(state[5] >> 40) & 0xff] ^
- C3[(int)(state[4] >> 32) & 0xff] ^
- C4[(int)(state[3] >> 24) & 0xff] ^
- C5[(int)(state[2] >> 16) & 0xff] ^
- C6[(int)(state[1] >> 8) & 0xff] ^
- C7[(int)(state[0] ) & 0xff] ^
- K[7];
- state[0] = L[0];
- state[1] = L[1];
- state[2] = L[2];
- state[3] = L[3];
- state[4] = L[4];
- state[5] = L[5];
- state[6] = L[6];
- state[7] = L[7];
+ transformMatrix(state);
+ inplaceXor(state, K);
}
/*
* apply the Miyaguchi-Preneel compression function:
*/
- structpointer->hash[0] ^= state[0] ^ block[0];
- structpointer->hash[1] ^= state[1] ^ block[1];
- structpointer->hash[2] ^= state[2] ^ block[2];
- structpointer->hash[3] ^= state[3] ^ block[3];
- structpointer->hash[4] ^= state[4] ^ block[4];
- structpointer->hash[5] ^= state[5] ^ block[5];
- structpointer->hash[6] ^= state[6] ^ block[6];
- structpointer->hash[7] ^= state[7] ^ block[7];
+ inplaceXor(structpointer->hash, state);
+ inplaceXor(structpointer->hash, block);
}
/**

0 comments on commit 8b49d62

Please sign in to comment.