From 56a2055f6e8f849163635f39d3d8d20f54ad724d Mon Sep 17 00:00:00 2001 From: Andrew Ekstedt Date: Wed, 31 Dec 2014 17:52:09 -0800 Subject: [PATCH] Use fewer XORs in Theta and eliminate Pi. Go's common subexpression elimination is apparently not up to snuff. Pi is now done implicitly. --- gen.go | 21 ++++--- keccak_gen.go | 166 +++++++++++++++++++++++++++----------------------- 2 files changed, 103 insertions(+), 84 deletions(-) diff --git a/gen.go b/gen.go index d67114e..70febb3 100644 --- a/gen.go +++ b/gen.go @@ -89,28 +89,35 @@ func roundGo(a *[5][5]uint64) { {{ end }} {{ end }} {{ end }} + var d uint64 {{ range $x := count 5 }} {{ $x0 := add $x 4 5 }} {{ $x1 := add $x 1 5 }} + d = c{{$x0}} ^ (c{{$x1}}<<1 | c{{$x1}}>>63) {{ range $y := count 5 }} - a{{$x}}{{$y}} = a[{{$y}}][{{$x}}] ^ c{{$x0}} ^ (c{{$x1}}<<1 | c{{$x1}}>>63) + a{{$x}}{{$y}} = a[{{$y}}][{{$x}}] ^ d {{ end }} {{ end }} - // Rho and pi + // Rho {{ range $y := count 5 }} {{ range $x := count 5 }} - {{ $x0 := $y }} - {{ $y0 := add (mul $x 2) (mul $y 3) 5 }} + {{ $a := printf "a%d%d" $x $y }} {{ $r := index $.Rotc $x $y }} - var b{{$x0}}{{$y0}} = a{{$x}}{{$y}}<<{{$r}} | a{{$x}}{{$y}}>>{{sub 64 $r}} + {{$a}} = {{$a}}<<{{$r}} | {{$a}}>>{{sub 64 $r}} {{ end }} {{ end }} - // Chi / output + // Pi / Chi / output {{ range $y := count 5 }} {{ range $x := count 5 }} - a[{{$y}}][{{$x}}] = b{{$x}}{{$y}} ^ (b{{add $x 2 5}}{{$y}} &^ b{{add $x 1 5}}{{$y}}) + {{ $x0 := add $x (mul $y 3) 5 }} + {{ $y0 := $x }} + {{ $x1 := add (add $x 1 5) (mul $y 3) 5 }} + {{ $y1 := add $x 1 5 }} + {{ $x2 := add (add $x 2 5) (mul $y 3) 5 }} + {{ $y2 := add $x 2 5 }} + a[{{$y}}][{{$x}}] = a{{$x0}}{{$y0}} ^ (a{{$x2}}{{$y2}} &^ a{{$x1}}{{$y1}}) {{ end }} {{ end }} } diff --git a/keccak_gen.go b/keccak_gen.go index f1573f4..9a1fd9b 100644 --- a/keccak_gen.go +++ b/keccak_gen.go @@ -69,158 +69,170 @@ func roundGo(a *[5][5]uint64) { c4 ^= a[4][4] - a00 = a[0][0] ^ c4 ^ (c1<<1 | c1>>63) + var d uint64 - a01 = a[1][0] ^ c4 ^ (c1<<1 | c1>>63) + d = c4 ^ (c1<<1 | c1>>63) - a02 = a[2][0] ^ c4 ^ (c1<<1 | c1>>63) + a00 = a[0][0] ^ d - a03 = a[3][0] ^ c4 ^ (c1<<1 | c1>>63) + a01 = a[1][0] ^ d - a04 = a[4][0] ^ c4 ^ (c1<<1 | c1>>63) + a02 = a[2][0] ^ d - a10 = a[0][1] ^ c0 ^ (c2<<1 | c2>>63) + a03 = a[3][0] ^ d - a11 = a[1][1] ^ c0 ^ (c2<<1 | c2>>63) + a04 = a[4][0] ^ d - a12 = a[2][1] ^ c0 ^ (c2<<1 | c2>>63) + d = c0 ^ (c2<<1 | c2>>63) - a13 = a[3][1] ^ c0 ^ (c2<<1 | c2>>63) + a10 = a[0][1] ^ d - a14 = a[4][1] ^ c0 ^ (c2<<1 | c2>>63) + a11 = a[1][1] ^ d - a20 = a[0][2] ^ c1 ^ (c3<<1 | c3>>63) + a12 = a[2][1] ^ d - a21 = a[1][2] ^ c1 ^ (c3<<1 | c3>>63) + a13 = a[3][1] ^ d - a22 = a[2][2] ^ c1 ^ (c3<<1 | c3>>63) + a14 = a[4][1] ^ d - a23 = a[3][2] ^ c1 ^ (c3<<1 | c3>>63) + d = c1 ^ (c3<<1 | c3>>63) - a24 = a[4][2] ^ c1 ^ (c3<<1 | c3>>63) + a20 = a[0][2] ^ d - a30 = a[0][3] ^ c2 ^ (c4<<1 | c4>>63) + a21 = a[1][2] ^ d - a31 = a[1][3] ^ c2 ^ (c4<<1 | c4>>63) + a22 = a[2][2] ^ d - a32 = a[2][3] ^ c2 ^ (c4<<1 | c4>>63) + a23 = a[3][2] ^ d - a33 = a[3][3] ^ c2 ^ (c4<<1 | c4>>63) + a24 = a[4][2] ^ d - a34 = a[4][3] ^ c2 ^ (c4<<1 | c4>>63) + d = c2 ^ (c4<<1 | c4>>63) - a40 = a[0][4] ^ c3 ^ (c0<<1 | c0>>63) + a30 = a[0][3] ^ d - a41 = a[1][4] ^ c3 ^ (c0<<1 | c0>>63) + a31 = a[1][3] ^ d - a42 = a[2][4] ^ c3 ^ (c0<<1 | c0>>63) + a32 = a[2][3] ^ d - a43 = a[3][4] ^ c3 ^ (c0<<1 | c0>>63) + a33 = a[3][3] ^ d - a44 = a[4][4] ^ c3 ^ (c0<<1 | c0>>63) + a34 = a[4][3] ^ d - // Rho and pi + d = c3 ^ (c0<<1 | c0>>63) - var b00 = a00<<0 | a00>>64 + a40 = a[0][4] ^ d - var b02 = a10<<1 | a10>>63 + a41 = a[1][4] ^ d - var b04 = a20<<62 | a20>>2 + a42 = a[2][4] ^ d - var b01 = a30<<28 | a30>>36 + a43 = a[3][4] ^ d - var b03 = a40<<27 | a40>>37 + a44 = a[4][4] ^ d - var b13 = a01<<36 | a01>>28 + // Rho - var b10 = a11<<44 | a11>>20 + a00 = a00<<0 | a00>>64 - var b12 = a21<<6 | a21>>58 + a10 = a10<<1 | a10>>63 - var b14 = a31<<55 | a31>>9 + a20 = a20<<62 | a20>>2 - var b11 = a41<<20 | a41>>44 + a30 = a30<<28 | a30>>36 - var b21 = a02<<3 | a02>>61 + a40 = a40<<27 | a40>>37 - var b23 = a12<<10 | a12>>54 + a01 = a01<<36 | a01>>28 - var b20 = a22<<43 | a22>>21 + a11 = a11<<44 | a11>>20 - var b22 = a32<<25 | a32>>39 + a21 = a21<<6 | a21>>58 - var b24 = a42<<39 | a42>>25 + a31 = a31<<55 | a31>>9 - var b34 = a03<<41 | a03>>23 + a41 = a41<<20 | a41>>44 - var b31 = a13<<45 | a13>>19 + a02 = a02<<3 | a02>>61 - var b33 = a23<<15 | a23>>49 + a12 = a12<<10 | a12>>54 - var b30 = a33<<21 | a33>>43 + a22 = a22<<43 | a22>>21 - var b32 = a43<<8 | a43>>56 + a32 = a32<<25 | a32>>39 - var b42 = a04<<18 | a04>>46 + a42 = a42<<39 | a42>>25 - var b44 = a14<<2 | a14>>62 + a03 = a03<<41 | a03>>23 - var b41 = a24<<61 | a24>>3 + a13 = a13<<45 | a13>>19 - var b43 = a34<<56 | a34>>8 + a23 = a23<<15 | a23>>49 - var b40 = a44<<14 | a44>>50 + a33 = a33<<21 | a33>>43 - // Chi / output + a43 = a43<<8 | a43>>56 - a[0][0] = b00 ^ (b20 &^ b10) + a04 = a04<<18 | a04>>46 - a[0][1] = b10 ^ (b30 &^ b20) + a14 = a14<<2 | a14>>62 - a[0][2] = b20 ^ (b40 &^ b30) + a24 = a24<<61 | a24>>3 - a[0][3] = b30 ^ (b00 &^ b40) + a34 = a34<<56 | a34>>8 - a[0][4] = b40 ^ (b10 &^ b00) + a44 = a44<<14 | a44>>50 - a[1][0] = b01 ^ (b21 &^ b11) + // Pi / Chi / output - a[1][1] = b11 ^ (b31 &^ b21) + a[0][0] = a00 ^ (a22 &^ a11) - a[1][2] = b21 ^ (b41 &^ b31) + a[0][1] = a11 ^ (a33 &^ a22) - a[1][3] = b31 ^ (b01 &^ b41) + a[0][2] = a22 ^ (a44 &^ a33) - a[1][4] = b41 ^ (b11 &^ b01) + a[0][3] = a33 ^ (a00 &^ a44) - a[2][0] = b02 ^ (b22 &^ b12) + a[0][4] = a44 ^ (a11 &^ a00) - a[2][1] = b12 ^ (b32 &^ b22) + a[1][0] = a30 ^ (a02 &^ a41) - a[2][2] = b22 ^ (b42 &^ b32) + a[1][1] = a41 ^ (a13 &^ a02) - a[2][3] = b32 ^ (b02 &^ b42) + a[1][2] = a02 ^ (a24 &^ a13) - a[2][4] = b42 ^ (b12 &^ b02) + a[1][3] = a13 ^ (a30 &^ a24) - a[3][0] = b03 ^ (b23 &^ b13) + a[1][4] = a24 ^ (a41 &^ a30) - a[3][1] = b13 ^ (b33 &^ b23) + a[2][0] = a10 ^ (a32 &^ a21) - a[3][2] = b23 ^ (b43 &^ b33) + a[2][1] = a21 ^ (a43 &^ a32) - a[3][3] = b33 ^ (b03 &^ b43) + a[2][2] = a32 ^ (a04 &^ a43) - a[3][4] = b43 ^ (b13 &^ b03) + a[2][3] = a43 ^ (a10 &^ a04) - a[4][0] = b04 ^ (b24 &^ b14) + a[2][4] = a04 ^ (a21 &^ a10) - a[4][1] = b14 ^ (b34 &^ b24) + a[3][0] = a40 ^ (a12 &^ a01) - a[4][2] = b24 ^ (b44 &^ b34) + a[3][1] = a01 ^ (a23 &^ a12) - a[4][3] = b34 ^ (b04 &^ b44) + a[3][2] = a12 ^ (a34 &^ a23) - a[4][4] = b44 ^ (b14 &^ b04) + a[3][3] = a23 ^ (a40 &^ a34) + + a[3][4] = a34 ^ (a01 &^ a40) + + a[4][0] = a20 ^ (a42 &^ a31) + + a[4][1] = a31 ^ (a03 &^ a42) + + a[4][2] = a42 ^ (a14 &^ a03) + + a[4][3] = a03 ^ (a20 &^ a14) + + a[4][4] = a14 ^ (a31 &^ a20) }