reduce buffer size to 8 bytes
instead of buffering an entire block, buffer only when the input is not aligned to 8 bytes, and otherwise xor uint64-sized chunks directly into the state. the code is a little more complicated but i think it's worth it. we could eliminate the buffer entirely but that requires either shenanigans with unsafe, or fiddly code to xor partial uint64s a caveat is that the implementation now only supports sponge capacities that are a multiple of 8. that's fine for the standard instantiations but may restrict unusual applications. not only does this let us reduce the buffer from 200 bytes to 8, it also provides a nice speedup name old time/op new time/op delta 256_8-2 1.45µs ± 0% 1.28µs ± 1% -11.58% (p=0.000 n=10+10) 256_1k-2 10.1µs ± 0% 9.3µs ± 0% -7.67% (p=0.000 n=10+10) 256_8k-2 75.6µs ± 0% 70.2µs ± 1% -7.09% (p=0.000 n=10+10) 512_8-2 1.39µs ± 1% 1.29µs ± 1% -6.85% (p=0.000 n=10+10) 512_1k-2 18.7µs ± 0% 17.0µs ± 0% -8.70% (p=0.000 n=9+10) 512_8k-2 146µs ± 1% 129µs ± 0% -11.70% (p=0.000 n=10+9) name old speed new speed delta 256_8-2 5.53MB/s ± 0% 6.25MB/s ± 0% +13.06% (p=0.000 n=10+10) 256_1k-2 102MB/s ± 0% 110MB/s ± 0% +8.30% (p=0.000 n=10+10) 256_8k-2 108MB/s ± 0% 117MB/s ± 1% +7.64% (p=0.000 n=10+10) 512_8-2 5.78MB/s ± 1% 6.20MB/s ± 1% +7.32% (p=0.000 n=10+10) 512_1k-2 54.9MB/s ± 0% 60.1MB/s ± 0% +9.53% (p=0.000 n=9+10) 512_8k-2 56.1MB/s ± 1% 63.5MB/s ± 0% +13.26% (p=0.000 n=10+9)
This commit is contained in:
		
							parent
							
								
									70a9bfa87d
								
							
						
					
					
						commit
						73654f751c
					
				
							
								
								
									
										59
									
								
								sponge.go
									
									
									
									
									
								
							
							
						
						
									
										59
									
								
								sponge.go
									
									
									
									
									
								
							@ -11,7 +11,8 @@ func round(a *[25]uint64) { roundGo(a) }
 | 
				
			|||||||
// digest implements hash.Hash
 | 
					// digest implements hash.Hash
 | 
				
			||||||
type digest struct {
 | 
					type digest struct {
 | 
				
			||||||
	a      [25]uint64 // a[y][x][z]
 | 
						a      [25]uint64 // a[y][x][z]
 | 
				
			||||||
	buf    [200]byte
 | 
						buf    [8]byte    // buf[0:ulen] holds a partial uint64
 | 
				
			||||||
 | 
						ulen   int8
 | 
				
			||||||
	dsbyte byte
 | 
						dsbyte byte
 | 
				
			||||||
	len    int
 | 
						len    int
 | 
				
			||||||
	size   int
 | 
						size   int
 | 
				
			||||||
@ -29,34 +30,46 @@ func (d *digest) BlockSize() int { return 200 - d.size*2 }
 | 
				
			|||||||
func (d *digest) Reset() {
 | 
					func (d *digest) Reset() {
 | 
				
			||||||
	//fmt.Println("resetting")
 | 
						//fmt.Println("resetting")
 | 
				
			||||||
	d.a = [25]uint64{}
 | 
						d.a = [25]uint64{}
 | 
				
			||||||
	d.buf = [200]byte{}
 | 
						d.buf = [8]byte{}
 | 
				
			||||||
	d.len = 0
 | 
						d.len = 0
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func (d *digest) Write(b []byte) (int, error) {
 | 
					func (d *digest) Write(b []byte) (int, error) {
 | 
				
			||||||
	written := len(b)
 | 
						written := len(b)
 | 
				
			||||||
	bs := d.BlockSize()
 | 
						bs := d.BlockSize() / 8
 | 
				
			||||||
	for len(b) > 0 {
 | 
						// fill buf first, if non-empty
 | 
				
			||||||
		n := copy(d.buf[d.len:bs], b)
 | 
						if d.ulen > 0 {
 | 
				
			||||||
		d.len += n
 | 
							n := copy(d.buf[d.ulen:], b)
 | 
				
			||||||
		b = b[n:]
 | 
							b = b[n:]
 | 
				
			||||||
 | 
							d.ulen += int8(n)
 | 
				
			||||||
 | 
							// flush?
 | 
				
			||||||
 | 
							if int(d.ulen) == len(d.buf) {
 | 
				
			||||||
 | 
								d.a[d.len] ^= le64dec(d.buf[:])
 | 
				
			||||||
 | 
								d.len += 1
 | 
				
			||||||
 | 
								d.ulen = 0
 | 
				
			||||||
			if d.len == bs {
 | 
								if d.len == bs {
 | 
				
			||||||
				d.flush()
 | 
									d.flush()
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						// xor 8-byte chunks into the state
 | 
				
			||||||
 | 
						for len(b) >= 8 {
 | 
				
			||||||
 | 
							d.a[d.len] ^= le64dec(b)
 | 
				
			||||||
 | 
							b = b[8:]
 | 
				
			||||||
 | 
							d.len += 1
 | 
				
			||||||
 | 
							if d.len == bs {
 | 
				
			||||||
 | 
								d.flush()
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						} // len(b) < 8
 | 
				
			||||||
 | 
						// store any remaining bytes
 | 
				
			||||||
 | 
						if len(b) > 0 {
 | 
				
			||||||
 | 
							d.ulen = int8(copy(d.buf[:], b))
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	return written, nil
 | 
						return written, nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func (d *digest) flush() {
 | 
					func (d *digest) flush() {
 | 
				
			||||||
	//fmt.Printf("Flushing with %d bytes\n", d.len)
 | 
						//fmt.Printf("Flushing with %d bytes\n", d.len*8 + int(d.ulen))
 | 
				
			||||||
	b := d.buf[:d.len]
 | 
					 | 
				
			||||||
	for i := range d.a {
 | 
					 | 
				
			||||||
		if len(b) == 0 {
 | 
					 | 
				
			||||||
			break
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		d.a[i] ^= le64dec(b)
 | 
					 | 
				
			||||||
		b = b[8:]
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	keccakf(&d.a)
 | 
						keccakf(&d.a)
 | 
				
			||||||
	d.len = 0
 | 
						d.len = 0
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -75,13 +88,19 @@ func (d *digest) clone() *digest {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
func (d *digest) Sum(b []byte) []byte {
 | 
					func (d *digest) Sum(b []byte) []byte {
 | 
				
			||||||
	d = d.clone()
 | 
						d = d.clone()
 | 
				
			||||||
	d.buf[d.len] = d.dsbyte
 | 
						if d.ulen == 0 {
 | 
				
			||||||
	bs := d.BlockSize()
 | 
							d.a[d.len] ^= uint64(d.dsbyte)
 | 
				
			||||||
	for i := d.len + 1; i < bs; i++ {
 | 
						} else {
 | 
				
			||||||
 | 
							d.buf[d.ulen] = d.dsbyte
 | 
				
			||||||
 | 
							for i := int(d.ulen) + 1; i < len(d.buf); i++ {
 | 
				
			||||||
			d.buf[i] = 0
 | 
								d.buf[i] = 0
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	d.buf[bs-1] |= 0x80
 | 
							d.a[d.len] ^= le64dec(d.buf[:])
 | 
				
			||||||
	d.len = bs
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						bs := d.BlockSize() / 8
 | 
				
			||||||
 | 
						d.a[bs-1] |= 0x80 << 56
 | 
				
			||||||
 | 
						//d.len = bs
 | 
				
			||||||
	d.flush()
 | 
						d.flush()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for i := 0; i < d.size/8; i++ {
 | 
						for i := 0; i < d.size/8; i++ {
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user