Compare commits

..

5 Commits

Author SHA1 Message Date
magical 4c06f23ff1 Merge branch 'shrink-buf' into shake
good example of how a merge can have no conflicts yet neverthless
require a bunch of adjustments to got something that actually works.

these two changes are conceptually independent but since shrink-buf
changes the internal representation of a digest, the shake code needs to
be adjusted to match. i wanted to capture the code both before and
after the shrink-buf change, thus this merge.

there's a bunch of code duplication that i will clean up later.
2024-10-06 22:41:30 -07:00
magical 6bebd054b1 add (c)SHAKE-128 and 256 2024-10-06 22:20:47 -07:00
magical 73654f751c reduce buffer size to 8 bytes
instead of buffering an entire block, buffer only when the input is not
aligned to 8 bytes, and otherwise xor uint64-sized chunks directly into
the state.

the code is a little more complicated but i think it's worth it.
we could eliminate the buffer entirely but that requires either
shenanigans with unsafe, or fiddly code to xor partial uint64s

a caveat is that the implementation now only supports sponge capacities
that are a multiple of 8. that's fine for the standard instantiations
but may restrict unusual applications.

not only does this let us reduce the buffer from 200 bytes to 8,
it also provides a nice speedup

name      old time/op    new time/op    delta
256_8-2     1.45µs ± 0%    1.28µs ± 1%  -11.58%  (p=0.000 n=10+10)
256_1k-2    10.1µs ± 0%     9.3µs ± 0%   -7.67%  (p=0.000 n=10+10)
256_8k-2    75.6µs ± 0%    70.2µs ± 1%   -7.09%  (p=0.000 n=10+10)
512_8-2     1.39µs ± 1%    1.29µs ± 1%   -6.85%  (p=0.000 n=10+10)
512_1k-2    18.7µs ± 0%    17.0µs ± 0%   -8.70%   (p=0.000 n=9+10)
512_8k-2     146µs ± 1%     129µs ± 0%  -11.70%   (p=0.000 n=10+9)

name      old speed      new speed      delta
256_8-2   5.53MB/s ± 0%  6.25MB/s ± 0%  +13.06%  (p=0.000 n=10+10)
256_1k-2   102MB/s ± 0%   110MB/s ± 0%   +8.30%  (p=0.000 n=10+10)
256_8k-2   108MB/s ± 0%   117MB/s ± 1%   +7.64%  (p=0.000 n=10+10)
512_8-2   5.78MB/s ± 1%  6.20MB/s ± 1%   +7.32%  (p=0.000 n=10+10)
512_1k-2  54.9MB/s ± 0%  60.1MB/s ± 0%   +9.53%   (p=0.000 n=9+10)
512_8k-2  56.1MB/s ± 1%  63.5MB/s ± 0%  +13.26%   (p=0.000 n=10+9)
2024-10-06 18:07:16 -07:00
magical 70a9bfa87d help the bounds checker in le64dec 2024-10-06 00:47:45 -07:00
magical 79b27a1530 add a few more test vectors for SHA-256
these test some boundary conditions for an optimization i'm about to do.

computed using https://emn178.github.io/online-tools/sha3_256.html
2024-10-06 00:40:46 -07:00
4 changed files with 364 additions and 21 deletions

View File

@ -40,6 +40,31 @@ var tests = []struct {
text: "The quick brown fox jumps over the lazy dog",
hash: "69070dda01975c8c120c3aada1b282394e7f032fa9cf32f4cb2259a0897dfc04",
},
{
f: New256,
name: "SHA3-256",
text: "a",
hash: "80084bf2fba02475726feb2cab2d8215eab14bc6bdd8bfb2c8151257032ecd8b",
},
{
f: New256,
name: "SHA3-256",
text: "abcdefg",
hash: "7d55114476dfc6a2fbeaa10e221a8d0f32fc8f2efb69a6e878f4633366917a62",
},
{
f: New256,
name: "SHA3-256",
text: "abcdefgh",
hash: "3e2020725a38a48eb3bbf75767f03a22c6b3f41f459c831309b06433ec649779",
},
{
f: New256,
name: "SHA3-256",
text: "abcdefghi",
hash: "f74eb337992307c22bc59eb43e59583a683f3b93077e7f2472508e8c464d2657",
},
}
func TestHash(t *testing.T) {
@ -58,6 +83,24 @@ func TestHash(t *testing.T) {
}
}
func TestHashSmallWrites(t *testing.T) {
for _, tt := range tests {
want, err := hex.DecodeString(tt.hash)
if err != nil {
t.Errorf("%s(%q): %s", tt.name, tt.text, err)
continue
}
h := tt.f()
for i := range []byte(tt.text) {
io.WriteString(h, tt.text[i:i+1])
}
got := h.Sum(nil)
if !bytes.Equal(got, want) {
t.Errorf("%s(%q) = %x, want %x", tt.name, tt.text, got, want)
}
}
}
func benchmark(b *testing.B, f func() hash.Hash, size int64) {
var tmp [Size * 2]byte
var msg [8192]byte

156
shake.go 100644
View File

@ -0,0 +1,156 @@
package keccak
import "math/bits"
type Shake struct {
digest
initialState *[25]uint64 // todo: unique.Handle?
running uint8
}
func NewShake128(N, S []byte) *Shake { return newShake(N, S, 128/8) }
func NewShake256(N, S []byte) *Shake { return newShake(N, S, 256/8) }
func newShake(N, S []byte, sizeBytes int) *Shake {
s := new(Shake)
s.digest.size = sizeBytes
if len(N) == 0 && len(S) == 0 {
s.digest.dsbyte = 0x1f // 1111 10...
} else {
// cSHAKE
s.digest.dsbyte = 0x04 // 00 10...
rate := s.digest.BlockSize()
s.digest.Write(leftEncode(uint64(rate))) // rate in bytes
s.digest.Write(leftEncode(uint64(len(N)) * 8)) // length of N in bits
s.digest.Write(N)
s.digest.Write(leftEncode(uint64(len(S)) * 8)) // length of S in bits
s.digest.Write(S)
if s.len > 0 || s.ulen > 0 {
s.pad8()
s.flush()
}
}
//s.Reset()
return s
}
func (s *Shake) pad8() {
if s.ulen > 0 {
for i := int(s.ulen); i < len(s.buf); i++ {
s.buf[i] = 0
}
s.a[s.len] ^= le64dec(s.buf[:])
s.len += 1
s.ulen = 0
}
}
// Shake is only resettable if Reset is called before the first Write or Read.
// (The first call to Reset makes a copy of the initial state which is restored
// on subsequent calls.)
func (s *Shake) Reset() {
if s.running == 0 {
if s.initialState == nil {
s.initialState = new([25]uint64)
*s.initialState = s.a
}
} else {
if s.initialState == nil {
panic("keccak: Reset called after Read or Write")
}
s.a = *s.initialState
s.buf = [8]byte{}
s.ulen = 0
s.len = 0
s.running = 0
}
}
func (s *Shake) Write(p []byte) (int, error) {
s.running = 1
return s.digest.Write(p)
}
func (s *Shake) Read(p []byte) (int, error) {
if s.running < 2 && len(p) > 0 {
s.running = 2
var dsword uint64
if s.ulen == 0 {
dsword = uint64(s.dsbyte)
} else {
s.buf[s.ulen] = s.dsbyte
for i := int(s.ulen) + 1; i < len(s.buf); i++ {
s.buf[i] = 0
}
dsword = le64dec(s.buf[:])
}
s.a[s.len] ^= dsword
bs := s.BlockSize() / 8
s.a[bs-1] ^= 0x80 << 56
s.len = bs
s.ulen = 0
}
return s.digest.read(p)
}
func (d *digest) read(p []byte) (int, error) {
bs := d.BlockSize() / 8
size := len(p)
if d.ulen > 0 {
n := copy(p, d.buf[d.ulen:])
p = p[n:]
d.ulen += int8(n)
if int(d.ulen) == len(d.buf) {
d.ulen = 0
d.len += 1
}
}
for len(p) >= 8 {
if d.len == bs {
d.squeeze()
}
le64enc(p[:0], d.a[d.len])
p = p[8:]
d.len += 1
}
if len(p) > 0 {
le64enc(d.buf[:0], d.a[d.len])
d.ulen = int8(copy(p, d.buf[:]))
}
return size, nil
}
func (d *digest) squeeze() {
//fmt.Printf("Squeezing\n", d.len)
keccakf(&d.a)
d.len = 0
}
func leftEncode(x uint64) []byte {
var out [9]byte
be64enc(out[1:], x)
i := bits.LeadingZeros64(x|1) / 8 // 0..7
out[i] = byte(8 - i)
return out[i:]
}
func be64enc(b []byte, x uint64) {
_ = b[7]
b[0] = byte(x >> 56)
b[1] = byte(x >> 48)
b[2] = byte(x >> 40)
b[3] = byte(x >> 32)
b[4] = byte(x >> 24)
b[5] = byte(x >> 16)
b[6] = byte(x >> 8)
b[7] = byte(x)
}

124
shake_test.go 100644
View File

@ -0,0 +1,124 @@
package keccak
import (
"bytes"
"fmt"
"testing"
)
func TestShake(t *testing.T) {
tests := []struct {
function string
F, S []byte
input string
digest string
}{
{
function: "shake128",
digest: "027cfb9e61ea1419dfce3b48e6bcb0b6b692fa2904751e5391c9ed468e010d5946f6f7c2be344c3ac2d722d96cf2a9a115681e22fcb43fd162a830b2079b527b",
},
{
function: "shake256",
digest: "f08a3125f880d9a58e0603ed5ae3778d33b8ccc795e0c18c9b9d735caa079440c6ab577eca4e0d898d966c4b730aee69307af26e459a0332367129476c381817",
},
{
function: "shake256",
input: "odd output",
digest: "678cc171bbc31a40c50d5e",
},
{
function: "cshake256",
F: []byte("FunctionName"),
S: []byte("CustomizationString"),
digest: "e80473cc7466ed4de29611752502ab9c5597818d",
},
}
for _, tt := range tests {
want := tt.digest
var sh *Shake
switch tt.function {
case "shake128":
sh = NewShake128(nil, nil)
case "shake256":
sh = NewShake256(nil, nil)
case "cshake256":
sh = NewShake256(tt.F, tt.S)
}
if tt.input == "" {
sh.Write([]byte("hello keccak"))
} else {
sh.Write([]byte(tt.input))
}
out := make([]byte, len(want)/2)
sh.Read(out)
got := fmt.Sprintf("%x", out)
if want != got {
t.Errorf("%s: got %s want %s", tt.function, got, want)
}
}
}
func TestShakeNIST(t *testing.T) {
sh := NewShake256(nil, nil)
want := shake256_digest_m0[:]
out := make([]byte, len(want))
n, err := sh.Read(out)
if n != len(out) || err != nil {
t.Errorf("unexpected return value: want (0, nil) got (%v,%v)", n, err)
}
if !bytes.Equal(want, out) {
t.Errorf("hash output:\nwant %x\ngot %x", want, out)
for i := range want {
if want[i] != out[i] {
t.Logf("note: first difference is at index %d", i)
break
}
}
}
}
// NIST test vector
var shake256_digest_m0 = [...]byte{
0x46, 0xB9, 0xDD, 0x2B, 0x0B, 0xA8, 0x8D, 0x13, 0x23, 0x3B, 0x3F, 0xEB, 0x74,
0x3E, 0xEB, 0x24, 0x3F, 0xCD, 0x52, 0xEA, 0x62, 0xB8, 0x1B, 0x82, 0xB5, 0x0C,
0x27, 0x64, 0x6E, 0xD5, 0x76, 0x2F, 0xD7, 0x5D, 0xC4, 0xDD, 0xD8, 0xC0, 0xF2,
0x00, 0xCB, 0x05, 0x01, 0x9D, 0x67, 0xB5, 0x92, 0xF6, 0xFC, 0x82, 0x1C, 0x49,
0x47, 0x9A, 0xB4, 0x86, 0x40, 0x29, 0x2E, 0xAC, 0xB3, 0xB7, 0xC4, 0xBE, 0x14,
0x1E, 0x96, 0x61, 0x6F, 0xB1, 0x39, 0x57, 0x69, 0x2C, 0xC7, 0xED, 0xD0, 0xB4,
0x5A, 0xE3, 0xDC, 0x07, 0x22, 0x3C, 0x8E, 0x92, 0x93, 0x7B, 0xEF, 0x84, 0xBC,
0x0E, 0xAB, 0x86, 0x28, 0x53, 0x34, 0x9E, 0xC7, 0x55, 0x46, 0xF5, 0x8F, 0xB7,
0xC2, 0x77, 0x5C, 0x38, 0x46, 0x2C, 0x50, 0x10, 0xD8, 0x46, 0xC1, 0x85, 0xC1,
0x51, 0x11, 0xE5, 0x95, 0x52, 0x2A, 0x6B, 0xCD, 0x16, 0xCF, 0x86, 0xF3, 0xD1,
0x22, 0x10, 0x9E, 0x3B, 0x1F, 0xDD, 0x94, 0x3B, 0x6A, 0xEC, 0x46, 0x8A, 0x2D,
0x62, 0x1A, 0x7C, 0x06, 0xC6, 0xA9, 0x57, 0xC6, 0x2B, 0x54, 0xDA, 0xFC, 0x3B,
0xE8, 0x75, 0x67, 0xD6, 0x77, 0x23, 0x13, 0x95, 0xF6, 0x14, 0x72, 0x93, 0xB6,
0x8C, 0xEA, 0xB7, 0xA9, 0xE0, 0xC5, 0x8D, 0x86, 0x4E, 0x8E, 0xFD, 0xE4, 0xE1,
0xB9, 0xA4, 0x6C, 0xBE, 0x85, 0x47, 0x13, 0x67, 0x2F, 0x5C, 0xAA, 0xAE, 0x31,
0x4E, 0xD9, 0x08, 0x3D, 0xAB, 0x4B, 0x09, 0x9F, 0x8E, 0x30, 0x0F, 0x01, 0xB8,
0x65, 0x0F, 0x1F, 0x4B, 0x1D, 0x8F, 0xCF, 0x3F, 0x3C, 0xB5, 0x3F, 0xB8, 0xE9,
0xEB, 0x2E, 0xA2, 0x03, 0xBD, 0xC9, 0x70, 0xF5, 0x0A, 0xE5, 0x54, 0x28, 0xA9,
0x1F, 0x7F, 0x53, 0xAC, 0x26, 0x6B, 0x28, 0x41, 0x9C, 0x37, 0x78, 0xA1, 0x5F,
0xD2, 0x48, 0xD3, 0x39, 0xED, 0xE7, 0x85, 0xFB, 0x7F, 0x5A, 0x1A, 0xAA, 0x96,
0xD3, 0x13, 0xEA, 0xCC, 0x89, 0x09, 0x36, 0xC1, 0x73, 0xCD, 0xCD, 0x0F, 0xAB,
0x88, 0x2C, 0x45, 0x75, 0x5F, 0xEB, 0x3A, 0xED, 0x96, 0xD4, 0x77, 0xFF, 0x96,
0x39, 0x0B, 0xF9, 0xA6, 0x6D, 0x13, 0x68, 0xB2, 0x08, 0xE2, 0x1F, 0x7C, 0x10,
0xD0, 0x4A, 0x3D, 0xBD, 0x4E, 0x36, 0x06, 0x33, 0xE5, 0xDB, 0x4B, 0x60, 0x26,
0x01, 0xC1, 0x4C, 0xEA, 0x73, 0x7D, 0xB3, 0xDC, 0xF7, 0x22, 0x63, 0x2C, 0xC7,
0x78, 0x51, 0xCB, 0xDD, 0xE2, 0xAA, 0xF0, 0xA3, 0x3A, 0x07, 0xB3, 0x73, 0x44,
0x5D, 0xF4, 0x90, 0xCC, 0x8F, 0xC1, 0xE4, 0x16, 0x0F, 0xF1, 0x18, 0x37, 0x8F,
0x11, 0xF0, 0x47, 0x7D, 0xE0, 0x55, 0xA8, 0x1A, 0x9E, 0xDA, 0x57, 0xA4, 0xA2,
0xCF, 0xB0, 0xC8, 0x39, 0x29, 0xD3, 0x10, 0x91, 0x2F, 0x72, 0x9E, 0xC6, 0xCF,
0xA3, 0x6C, 0x6A, 0xC6, 0xA7, 0x58, 0x37, 0x14, 0x30, 0x45, 0xD7, 0x91, 0xCC,
0x85, 0xEF, 0xF5, 0xB2, 0x19, 0x32, 0xF2, 0x38, 0x61, 0xBC, 0xF2, 0x3A, 0x52,
0xB5, 0xDA, 0x67, 0xEA, 0xF7, 0xBA, 0xAE, 0x0F, 0x5F, 0xB1, 0x36, 0x9D, 0xB7,
0x8F, 0x3A, 0xC4, 0x5F, 0x8C, 0x4A, 0xC5, 0x67, 0x1D, 0x85, 0x73, 0x5C, 0xDD,
0xDB, 0x09, 0xD2, 0xB1, 0xE3, 0x4A, 0x1F, 0xC0, 0x66, 0xFF, 0x4A, 0x16, 0x2C,
0xB2, 0x63, 0xD6, 0x54, 0x12, 0x74, 0xAE, 0x2F, 0xCC, 0x86, 0x5F, 0x61, 0x8A,
0xBE, 0x27, 0xC1, 0x24, 0xCD, 0x8B, 0x07, 0x4C, 0xCD, 0x51, 0x63, 0x01, 0xB9,
0x18, 0x75, 0x82, 0x4D, 0x09, 0x95, 0x8F, 0x34, 0x1E, 0xF2, 0x74, 0xBD, 0xAB,
0x0B, 0xAE, 0x31, 0x63, 0x39, 0x89, 0x43, 0x04, 0xE3, 0x58, 0x77, 0xB0, 0xC2,
0x8A, 0x9B, 0x1F, 0xD1, 0x66, 0xC7, 0x96, 0xB9, 0xCC, 0x25, 0x8A, 0x06, 0x4A,
0x8F, 0x57, 0xE2, 0x7F, 0x2A,
}

View File

@ -11,7 +11,8 @@ func round(a *[25]uint64) { roundGo(a) }
// digest implements hash.Hash
type digest struct {
a [25]uint64 // a[y][x][z]
buf [200]byte
buf [8]byte // buf[0:ulen] holds a partial uint64
ulen int8
dsbyte byte
len int
size int
@ -29,34 +30,46 @@ func (d *digest) BlockSize() int { return 200 - d.size*2 }
func (d *digest) Reset() {
//fmt.Println("resetting")
d.a = [25]uint64{}
d.buf = [200]byte{}
d.buf = [8]byte{}
d.len = 0
}
func (d *digest) Write(b []byte) (int, error) {
written := len(b)
bs := d.BlockSize()
for len(b) > 0 {
n := copy(d.buf[d.len:bs], b)
d.len += n
bs := d.BlockSize() / 8
// fill buf first, if non-empty
if d.ulen > 0 {
n := copy(d.buf[d.ulen:], b)
b = b[n:]
d.ulen += int8(n)
// flush?
if int(d.ulen) == len(d.buf) {
d.a[d.len] ^= le64dec(d.buf[:])
d.len += 1
d.ulen = 0
if d.len == bs {
d.flush()
}
}
}
// xor 8-byte chunks into the state
for len(b) >= 8 {
d.a[d.len] ^= le64dec(b)
b = b[8:]
d.len += 1
if d.len == bs {
d.flush()
}
} // len(b) < 8
// store any remaining bytes
if len(b) > 0 {
d.ulen = int8(copy(d.buf[:], b))
}
return written, nil
}
func (d *digest) flush() {
//fmt.Printf("Flushing with %d bytes\n", d.len)
b := d.buf[:d.len]
for i := range d.a {
if len(b) == 0 {
break
}
d.a[i] ^= le64dec(b)
b = b[8:]
}
//fmt.Printf("Flushing with %d bytes\n", d.len*8 + int(d.ulen))
keccakf(&d.a)
d.len = 0
}
@ -75,13 +88,19 @@ func (d *digest) clone() *digest {
func (d *digest) Sum(b []byte) []byte {
d = d.clone()
d.buf[d.len] = d.dsbyte
bs := d.BlockSize()
for i := d.len + 1; i < bs; i++ {
d.buf[i] = 0
if d.ulen == 0 {
d.a[d.len] ^= uint64(d.dsbyte)
} else {
d.buf[d.ulen] = d.dsbyte
for i := int(d.ulen) + 1; i < len(d.buf); i++ {
d.buf[i] = 0
}
d.a[d.len] ^= le64dec(d.buf[:])
}
d.buf[bs-1] |= 0x80
d.len = bs
bs := d.BlockSize() / 8
d.a[bs-1] |= 0x80 << 56
//d.len = bs
d.flush()
for i := 0; i < d.size/8; i++ {
@ -91,6 +110,7 @@ func (d *digest) Sum(b []byte) []byte {
}
func le64dec(b []byte) uint64 {
_ = b[7]
return uint64(b[0])<<0 | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}