Optimize loads and stores a bit.

master
magical 2014-12-31 16:52:34 -08:00
parent df6edcd0bb
commit 64c5855490
3 changed files with 140 additions and 177 deletions

37
gen.go
View File

@ -72,27 +72,29 @@ var tmpl = template.Must(template.New("keccak").Funcs(funcs).Parse(`
package keccak
// roundGeneric implements one round of the keccak-f[1600] permutation.
func roundGeneric(a *[5][5]uint64) {
// round implements one round of the keccak-f[1600] permutation.
func roundGo(a *[5][5]uint64) {
{{ range $x := count 5 }}
{{ range $y := count 5 }}
var a{{$x}}{{$y}} = a[{{$y}}][{{$x}}]
{{ end }}
var a{{$x}}0, a{{$x}}1, a{{$x}}2, a{{$x}}3, a{{$x}}4 uint64
{{ end }}
// Theta
var c0, c1, c2, c3, c4 uint64
{{ range $x := count 5 }}
c{{$x}} = a{{$x}}0 ^ a{{$x}}1 ^ a{{$x}}2 ^ a{{$x}}3 ^ a{{$x}}4
{{ range $y := count 5 }}
{{ range $x := count 5 }}
{{ if eq $y 0 }}
c{{$x}} = a[{{$y}}][{{$x}}]
{{ else }}
c{{$x}} ^= a[{{$y}}][{{$x}}]
{{ end }}
{{ end }}
{{ end }}
{{ range $x := count 5 }}
{{ $x0 := add $x 4 5 }}
{{ $x1 := add $x 1 5 }}
a{{$x}}0 ^= c{{$x0}} ^ (c{{$x1}}<<1 | c{{$x1}}>>63)
a{{$x}}1 ^= c{{$x0}} ^ (c{{$x1}}<<1 | c{{$x1}}>>63)
a{{$x}}2 ^= c{{$x0}} ^ (c{{$x1}}<<1 | c{{$x1}}>>63)
a{{$x}}3 ^= c{{$x0}} ^ (c{{$x1}}<<1 | c{{$x1}}>>63)
a{{$x}}4 ^= c{{$x0}} ^ (c{{$x1}}<<1 | c{{$x1}}>>63)
{{ range $y := count 5 }}
a{{$x}}{{$y}} = a[{{$y}}][{{$x}}] ^ c{{$x0}} ^ (c{{$x1}}<<1 | c{{$x1}}>>63)
{{ end }}
{{ end }}
// Rho and pi
@ -105,17 +107,10 @@ func roundGeneric(a *[5][5]uint64) {
{{ end }}
{{ end }}
// Chi
// Chi / output
{{ range $y := count 5 }}
{{ range $x := count 5 }}
a{{$x}}{{$y}} = b{{$x}}{{$y}} ^ (b{{add $x 2 5}}{{$y}} &^ b{{add $x 1 5}}{{$y}})
{{ end }}
{{ end }}
// Output
{{ range $y := count 5 }}
{{ range $x := count 5 }}
a[{{$y}}][{{$x}}] = a{{$x}}{{$y}}
a[{{$y}}][{{$x}}] = b{{$x}}{{$y}} ^ (b{{add $x 2 5}}{{$y}} &^ b{{add $x 1 5}}{{$y}})
{{ end }}
{{ end }}
}

View File

@ -3,101 +3,121 @@
package keccak
// roundGeneric implements one round of the keccak-f[1600] permutation.
func roundGen(a *[5][5]uint64) {
// round implements one round of the keccak-f[1600] permutation.
func roundGo(a *[5][5]uint64) {
var a00 = a[0][0]
var a00, a01, a02, a03, a04 uint64
var a01 = a[1][0]
var a10, a11, a12, a13, a14 uint64
var a02 = a[2][0]
var a20, a21, a22, a23, a24 uint64
var a03 = a[3][0]
var a30, a31, a32, a33, a34 uint64
var a04 = a[4][0]
var a10 = a[0][1]
var a11 = a[1][1]
var a12 = a[2][1]
var a13 = a[3][1]
var a14 = a[4][1]
var a20 = a[0][2]
var a21 = a[1][2]
var a22 = a[2][2]
var a23 = a[3][2]
var a24 = a[4][2]
var a30 = a[0][3]
var a31 = a[1][3]
var a32 = a[2][3]
var a33 = a[3][3]
var a34 = a[4][3]
var a40 = a[0][4]
var a41 = a[1][4]
var a42 = a[2][4]
var a43 = a[3][4]
var a44 = a[4][4]
var a40, a41, a42, a43, a44 uint64
// Theta
var c0, c1, c2, c3, c4 uint64
c0 = a00 ^ a01 ^ a02 ^ a03 ^ a04
c0 = a[0][0]
c1 = a10 ^ a11 ^ a12 ^ a13 ^ a14
c1 = a[0][1]
c2 = a20 ^ a21 ^ a22 ^ a23 ^ a24
c2 = a[0][2]
c3 = a30 ^ a31 ^ a32 ^ a33 ^ a34
c3 = a[0][3]
c4 = a40 ^ a41 ^ a42 ^ a43 ^ a44
c4 = a[0][4]
a00 ^= c4 ^ (c1<<1 | c1>>63)
a01 ^= c4 ^ (c1<<1 | c1>>63)
a02 ^= c4 ^ (c1<<1 | c1>>63)
a03 ^= c4 ^ (c1<<1 | c1>>63)
a04 ^= c4 ^ (c1<<1 | c1>>63)
c0 ^= a[1][0]
a10 ^= c0 ^ (c2<<1 | c2>>63)
a11 ^= c0 ^ (c2<<1 | c2>>63)
a12 ^= c0 ^ (c2<<1 | c2>>63)
a13 ^= c0 ^ (c2<<1 | c2>>63)
a14 ^= c0 ^ (c2<<1 | c2>>63)
c1 ^= a[1][1]
a20 ^= c1 ^ (c3<<1 | c3>>63)
a21 ^= c1 ^ (c3<<1 | c3>>63)
a22 ^= c1 ^ (c3<<1 | c3>>63)
a23 ^= c1 ^ (c3<<1 | c3>>63)
a24 ^= c1 ^ (c3<<1 | c3>>63)
c2 ^= a[1][2]
a30 ^= c2 ^ (c4<<1 | c4>>63)
a31 ^= c2 ^ (c4<<1 | c4>>63)
a32 ^= c2 ^ (c4<<1 | c4>>63)
a33 ^= c2 ^ (c4<<1 | c4>>63)
a34 ^= c2 ^ (c4<<1 | c4>>63)
c3 ^= a[1][3]
a40 ^= c3 ^ (c0<<1 | c0>>63)
a41 ^= c3 ^ (c0<<1 | c0>>63)
a42 ^= c3 ^ (c0<<1 | c0>>63)
a43 ^= c3 ^ (c0<<1 | c0>>63)
a44 ^= c3 ^ (c0<<1 | c0>>63)
c4 ^= a[1][4]
c0 ^= a[2][0]
c1 ^= a[2][1]
c2 ^= a[2][2]
c3 ^= a[2][3]
c4 ^= a[2][4]
c0 ^= a[3][0]
c1 ^= a[3][1]
c2 ^= a[3][2]
c3 ^= a[3][3]
c4 ^= a[3][4]
c0 ^= a[4][0]
c1 ^= a[4][1]
c2 ^= a[4][2]
c3 ^= a[4][3]
c4 ^= a[4][4]
a00 = a[0][0] ^ c4 ^ (c1<<1 | c1>>63)
a01 = a[1][0] ^ c4 ^ (c1<<1 | c1>>63)
a02 = a[2][0] ^ c4 ^ (c1<<1 | c1>>63)
a03 = a[3][0] ^ c4 ^ (c1<<1 | c1>>63)
a04 = a[4][0] ^ c4 ^ (c1<<1 | c1>>63)
a10 = a[0][1] ^ c0 ^ (c2<<1 | c2>>63)
a11 = a[1][1] ^ c0 ^ (c2<<1 | c2>>63)
a12 = a[2][1] ^ c0 ^ (c2<<1 | c2>>63)
a13 = a[3][1] ^ c0 ^ (c2<<1 | c2>>63)
a14 = a[4][1] ^ c0 ^ (c2<<1 | c2>>63)
a20 = a[0][2] ^ c1 ^ (c3<<1 | c3>>63)
a21 = a[1][2] ^ c1 ^ (c3<<1 | c3>>63)
a22 = a[2][2] ^ c1 ^ (c3<<1 | c3>>63)
a23 = a[3][2] ^ c1 ^ (c3<<1 | c3>>63)
a24 = a[4][2] ^ c1 ^ (c3<<1 | c3>>63)
a30 = a[0][3] ^ c2 ^ (c4<<1 | c4>>63)
a31 = a[1][3] ^ c2 ^ (c4<<1 | c4>>63)
a32 = a[2][3] ^ c2 ^ (c4<<1 | c4>>63)
a33 = a[3][3] ^ c2 ^ (c4<<1 | c4>>63)
a34 = a[4][3] ^ c2 ^ (c4<<1 | c4>>63)
a40 = a[0][4] ^ c3 ^ (c0<<1 | c0>>63)
a41 = a[1][4] ^ c3 ^ (c0<<1 | c0>>63)
a42 = a[2][4] ^ c3 ^ (c0<<1 | c0>>63)
a43 = a[3][4] ^ c3 ^ (c0<<1 | c0>>63)
a44 = a[4][4] ^ c3 ^ (c0<<1 | c0>>63)
// Rho and pi
@ -151,108 +171,56 @@ func roundGen(a *[5][5]uint64) {
var b40 = a44<<14 | a44>>50
// Chi
// Chi / output
a00 = b00 ^ (b20 &^ b10)
a[0][0] = b00 ^ (b20 &^ b10)
a10 = b10 ^ (b30 &^ b20)
a[0][1] = b10 ^ (b30 &^ b20)
a20 = b20 ^ (b40 &^ b30)
a[0][2] = b20 ^ (b40 &^ b30)
a30 = b30 ^ (b00 &^ b40)
a[0][3] = b30 ^ (b00 &^ b40)
a40 = b40 ^ (b10 &^ b00)
a[0][4] = b40 ^ (b10 &^ b00)
a01 = b01 ^ (b21 &^ b11)
a[1][0] = b01 ^ (b21 &^ b11)
a11 = b11 ^ (b31 &^ b21)
a[1][1] = b11 ^ (b31 &^ b21)
a21 = b21 ^ (b41 &^ b31)
a[1][2] = b21 ^ (b41 &^ b31)
a31 = b31 ^ (b01 &^ b41)
a[1][3] = b31 ^ (b01 &^ b41)
a41 = b41 ^ (b11 &^ b01)
a[1][4] = b41 ^ (b11 &^ b01)
a02 = b02 ^ (b22 &^ b12)
a[2][0] = b02 ^ (b22 &^ b12)
a12 = b12 ^ (b32 &^ b22)
a[2][1] = b12 ^ (b32 &^ b22)
a22 = b22 ^ (b42 &^ b32)
a[2][2] = b22 ^ (b42 &^ b32)
a32 = b32 ^ (b02 &^ b42)
a[2][3] = b32 ^ (b02 &^ b42)
a42 = b42 ^ (b12 &^ b02)
a[2][4] = b42 ^ (b12 &^ b02)
a03 = b03 ^ (b23 &^ b13)
a[3][0] = b03 ^ (b23 &^ b13)
a13 = b13 ^ (b33 &^ b23)
a[3][1] = b13 ^ (b33 &^ b23)
a23 = b23 ^ (b43 &^ b33)
a[3][2] = b23 ^ (b43 &^ b33)
a33 = b33 ^ (b03 &^ b43)
a[3][3] = b33 ^ (b03 &^ b43)
a43 = b43 ^ (b13 &^ b03)
a[3][4] = b43 ^ (b13 &^ b03)
a04 = b04 ^ (b24 &^ b14)
a[4][0] = b04 ^ (b24 &^ b14)
a14 = b14 ^ (b34 &^ b24)
a[4][1] = b14 ^ (b34 &^ b24)
a24 = b24 ^ (b44 &^ b34)
a[4][2] = b24 ^ (b44 &^ b34)
a34 = b34 ^ (b04 &^ b44)
a[4][3] = b34 ^ (b04 &^ b44)
a44 = b44 ^ (b14 &^ b04)
// Output
a[0][0] = a00
a[0][1] = a10
a[0][2] = a20
a[0][3] = a30
a[0][4] = a40
a[1][0] = a01
a[1][1] = a11
a[1][2] = a21
a[1][3] = a31
a[1][4] = a41
a[2][0] = a02
a[2][1] = a12
a[2][2] = a22
a[2][3] = a32
a[2][4] = a42
a[3][0] = a03
a[3][1] = a13
a[3][2] = a23
a[3][3] = a33
a[3][4] = a43
a[4][0] = a04
a[4][1] = a14
a[4][2] = a24
a[4][3] = a34
a[4][4] = a44
a[4][4] = b44 ^ (b14 &^ b04)
}

View File

@ -6,7 +6,7 @@ const Size = 256 / 8
const BlockSize = 1600/8 - Size*2
var round = roundGen
var round = roundGo
// digest implements hash.Hash
type digest struct {