Use fewer XORs in Theta and eliminate Pi.

Go's common subexpression elimination is apparently not up to snuff.

Pi is now done implicitly.
master
magical 2014-12-31 17:52:09 -08:00
parent 64c5855490
commit 56a2055f6e
2 changed files with 103 additions and 84 deletions

21
gen.go
View File

@ -89,28 +89,35 @@ func roundGo(a *[5][5]uint64) {
{{ end }} {{ end }}
{{ end }} {{ end }}
{{ end }} {{ end }}
var d uint64
{{ range $x := count 5 }} {{ range $x := count 5 }}
{{ $x0 := add $x 4 5 }} {{ $x0 := add $x 4 5 }}
{{ $x1 := add $x 1 5 }} {{ $x1 := add $x 1 5 }}
d = c{{$x0}} ^ (c{{$x1}}<<1 | c{{$x1}}>>63)
{{ range $y := count 5 }} {{ range $y := count 5 }}
a{{$x}}{{$y}} = a[{{$y}}][{{$x}}] ^ c{{$x0}} ^ (c{{$x1}}<<1 | c{{$x1}}>>63) a{{$x}}{{$y}} = a[{{$y}}][{{$x}}] ^ d
{{ end }} {{ end }}
{{ end }} {{ end }}
// Rho and pi // Rho
{{ range $y := count 5 }} {{ range $y := count 5 }}
{{ range $x := count 5 }} {{ range $x := count 5 }}
{{ $x0 := $y }} {{ $a := printf "a%d%d" $x $y }}
{{ $y0 := add (mul $x 2) (mul $y 3) 5 }}
{{ $r := index $.Rotc $x $y }} {{ $r := index $.Rotc $x $y }}
var b{{$x0}}{{$y0}} = a{{$x}}{{$y}}<<{{$r}} | a{{$x}}{{$y}}>>{{sub 64 $r}} {{$a}} = {{$a}}<<{{$r}} | {{$a}}>>{{sub 64 $r}}
{{ end }} {{ end }}
{{ end }} {{ end }}
// Chi / output // Pi / Chi / output
{{ range $y := count 5 }} {{ range $y := count 5 }}
{{ range $x := count 5 }} {{ range $x := count 5 }}
a[{{$y}}][{{$x}}] = b{{$x}}{{$y}} ^ (b{{add $x 2 5}}{{$y}} &^ b{{add $x 1 5}}{{$y}}) {{ $x0 := add $x (mul $y 3) 5 }}
{{ $y0 := $x }}
{{ $x1 := add (add $x 1 5) (mul $y 3) 5 }}
{{ $y1 := add $x 1 5 }}
{{ $x2 := add (add $x 2 5) (mul $y 3) 5 }}
{{ $y2 := add $x 2 5 }}
a[{{$y}}][{{$x}}] = a{{$x0}}{{$y0}} ^ (a{{$x2}}{{$y2}} &^ a{{$x1}}{{$y1}})
{{ end }} {{ end }}
{{ end }} {{ end }}
} }

View File

@ -69,158 +69,170 @@ func roundGo(a *[5][5]uint64) {
c4 ^= a[4][4] c4 ^= a[4][4]
a00 = a[0][0] ^ c4 ^ (c1<<1 | c1>>63) var d uint64
a01 = a[1][0] ^ c4 ^ (c1<<1 | c1>>63) d = c4 ^ (c1<<1 | c1>>63)
a02 = a[2][0] ^ c4 ^ (c1<<1 | c1>>63) a00 = a[0][0] ^ d
a03 = a[3][0] ^ c4 ^ (c1<<1 | c1>>63) a01 = a[1][0] ^ d
a04 = a[4][0] ^ c4 ^ (c1<<1 | c1>>63) a02 = a[2][0] ^ d
a10 = a[0][1] ^ c0 ^ (c2<<1 | c2>>63) a03 = a[3][0] ^ d
a11 = a[1][1] ^ c0 ^ (c2<<1 | c2>>63) a04 = a[4][0] ^ d
a12 = a[2][1] ^ c0 ^ (c2<<1 | c2>>63) d = c0 ^ (c2<<1 | c2>>63)
a13 = a[3][1] ^ c0 ^ (c2<<1 | c2>>63) a10 = a[0][1] ^ d
a14 = a[4][1] ^ c0 ^ (c2<<1 | c2>>63) a11 = a[1][1] ^ d
a20 = a[0][2] ^ c1 ^ (c3<<1 | c3>>63) a12 = a[2][1] ^ d
a21 = a[1][2] ^ c1 ^ (c3<<1 | c3>>63) a13 = a[3][1] ^ d
a22 = a[2][2] ^ c1 ^ (c3<<1 | c3>>63) a14 = a[4][1] ^ d
a23 = a[3][2] ^ c1 ^ (c3<<1 | c3>>63) d = c1 ^ (c3<<1 | c3>>63)
a24 = a[4][2] ^ c1 ^ (c3<<1 | c3>>63) a20 = a[0][2] ^ d
a30 = a[0][3] ^ c2 ^ (c4<<1 | c4>>63) a21 = a[1][2] ^ d
a31 = a[1][3] ^ c2 ^ (c4<<1 | c4>>63) a22 = a[2][2] ^ d
a32 = a[2][3] ^ c2 ^ (c4<<1 | c4>>63) a23 = a[3][2] ^ d
a33 = a[3][3] ^ c2 ^ (c4<<1 | c4>>63) a24 = a[4][2] ^ d
a34 = a[4][3] ^ c2 ^ (c4<<1 | c4>>63) d = c2 ^ (c4<<1 | c4>>63)
a40 = a[0][4] ^ c3 ^ (c0<<1 | c0>>63) a30 = a[0][3] ^ d
a41 = a[1][4] ^ c3 ^ (c0<<1 | c0>>63) a31 = a[1][3] ^ d
a42 = a[2][4] ^ c3 ^ (c0<<1 | c0>>63) a32 = a[2][3] ^ d
a43 = a[3][4] ^ c3 ^ (c0<<1 | c0>>63) a33 = a[3][3] ^ d
a44 = a[4][4] ^ c3 ^ (c0<<1 | c0>>63) a34 = a[4][3] ^ d
// Rho and pi d = c3 ^ (c0<<1 | c0>>63)
var b00 = a00<<0 | a00>>64 a40 = a[0][4] ^ d
var b02 = a10<<1 | a10>>63 a41 = a[1][4] ^ d
var b04 = a20<<62 | a20>>2 a42 = a[2][4] ^ d
var b01 = a30<<28 | a30>>36 a43 = a[3][4] ^ d
var b03 = a40<<27 | a40>>37 a44 = a[4][4] ^ d
var b13 = a01<<36 | a01>>28 // Rho
var b10 = a11<<44 | a11>>20 a00 = a00<<0 | a00>>64
var b12 = a21<<6 | a21>>58 a10 = a10<<1 | a10>>63
var b14 = a31<<55 | a31>>9 a20 = a20<<62 | a20>>2
var b11 = a41<<20 | a41>>44 a30 = a30<<28 | a30>>36
var b21 = a02<<3 | a02>>61 a40 = a40<<27 | a40>>37
var b23 = a12<<10 | a12>>54 a01 = a01<<36 | a01>>28
var b20 = a22<<43 | a22>>21 a11 = a11<<44 | a11>>20
var b22 = a32<<25 | a32>>39 a21 = a21<<6 | a21>>58
var b24 = a42<<39 | a42>>25 a31 = a31<<55 | a31>>9
var b34 = a03<<41 | a03>>23 a41 = a41<<20 | a41>>44
var b31 = a13<<45 | a13>>19 a02 = a02<<3 | a02>>61
var b33 = a23<<15 | a23>>49 a12 = a12<<10 | a12>>54
var b30 = a33<<21 | a33>>43 a22 = a22<<43 | a22>>21
var b32 = a43<<8 | a43>>56 a32 = a32<<25 | a32>>39
var b42 = a04<<18 | a04>>46 a42 = a42<<39 | a42>>25
var b44 = a14<<2 | a14>>62 a03 = a03<<41 | a03>>23
var b41 = a24<<61 | a24>>3 a13 = a13<<45 | a13>>19
var b43 = a34<<56 | a34>>8 a23 = a23<<15 | a23>>49
var b40 = a44<<14 | a44>>50 a33 = a33<<21 | a33>>43
// Chi / output a43 = a43<<8 | a43>>56
a[0][0] = b00 ^ (b20 &^ b10) a04 = a04<<18 | a04>>46
a[0][1] = b10 ^ (b30 &^ b20) a14 = a14<<2 | a14>>62
a[0][2] = b20 ^ (b40 &^ b30) a24 = a24<<61 | a24>>3
a[0][3] = b30 ^ (b00 &^ b40) a34 = a34<<56 | a34>>8
a[0][4] = b40 ^ (b10 &^ b00) a44 = a44<<14 | a44>>50
a[1][0] = b01 ^ (b21 &^ b11) // Pi / Chi / output
a[1][1] = b11 ^ (b31 &^ b21) a[0][0] = a00 ^ (a22 &^ a11)
a[1][2] = b21 ^ (b41 &^ b31) a[0][1] = a11 ^ (a33 &^ a22)
a[1][3] = b31 ^ (b01 &^ b41) a[0][2] = a22 ^ (a44 &^ a33)
a[1][4] = b41 ^ (b11 &^ b01) a[0][3] = a33 ^ (a00 &^ a44)
a[2][0] = b02 ^ (b22 &^ b12) a[0][4] = a44 ^ (a11 &^ a00)
a[2][1] = b12 ^ (b32 &^ b22) a[1][0] = a30 ^ (a02 &^ a41)
a[2][2] = b22 ^ (b42 &^ b32) a[1][1] = a41 ^ (a13 &^ a02)
a[2][3] = b32 ^ (b02 &^ b42) a[1][2] = a02 ^ (a24 &^ a13)
a[2][4] = b42 ^ (b12 &^ b02) a[1][3] = a13 ^ (a30 &^ a24)
a[3][0] = b03 ^ (b23 &^ b13) a[1][4] = a24 ^ (a41 &^ a30)
a[3][1] = b13 ^ (b33 &^ b23) a[2][0] = a10 ^ (a32 &^ a21)
a[3][2] = b23 ^ (b43 &^ b33) a[2][1] = a21 ^ (a43 &^ a32)
a[3][3] = b33 ^ (b03 &^ b43) a[2][2] = a32 ^ (a04 &^ a43)
a[3][4] = b43 ^ (b13 &^ b03) a[2][3] = a43 ^ (a10 &^ a04)
a[4][0] = b04 ^ (b24 &^ b14) a[2][4] = a04 ^ (a21 &^ a10)
a[4][1] = b14 ^ (b34 &^ b24) a[3][0] = a40 ^ (a12 &^ a01)
a[4][2] = b24 ^ (b44 &^ b34) a[3][1] = a01 ^ (a23 &^ a12)
a[4][3] = b34 ^ (b04 &^ b44) a[3][2] = a12 ^ (a34 &^ a23)
a[4][4] = b44 ^ (b14 &^ b04) a[3][3] = a23 ^ (a40 &^ a34)
a[3][4] = a34 ^ (a01 &^ a40)
a[4][0] = a20 ^ (a42 &^ a31)
a[4][1] = a31 ^ (a03 &^ a42)
a[4][2] = a42 ^ (a14 &^ a03)
a[4][3] = a03 ^ (a20 &^ a14)
a[4][4] = a14 ^ (a31 &^ a20)
} }