2022-01-22 09:49:46 +00:00
|
|
|
package pmap
|
|
|
|
|
2022-01-23 00:24:49 +00:00
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"math/bits"
|
|
|
|
)
|
|
|
|
|
2022-01-23 00:34:52 +00:00
|
|
|
const (
|
|
|
|
nodeDegree = 16 // branch factor of nodes
|
|
|
|
nodeShift = 4
|
|
|
|
nodeMask = 0b1111
|
|
|
|
)
|
2022-01-22 09:49:46 +00:00
|
|
|
|
2022-01-23 01:34:34 +00:00
|
|
|
type Key = int
|
|
|
|
type Value = int
|
2022-01-22 09:49:46 +00:00
|
|
|
|
|
|
|
type Map interface {
|
|
|
|
Get(Key) (Value, bool)
|
|
|
|
Set(Key, Value) Map
|
|
|
|
Del(Key) Map
|
|
|
|
Len() int
|
|
|
|
}
|
|
|
|
|
|
|
|
type pmap struct {
|
|
|
|
root interface{}
|
|
|
|
len int
|
|
|
|
hash func(Key) uint32
|
|
|
|
}
|
|
|
|
|
|
|
|
// A Map implemented as a hashed trie
|
|
|
|
type node struct {
|
2022-01-23 03:08:19 +00:00
|
|
|
child []interface{}
|
2022-01-22 09:49:46 +00:00
|
|
|
bitmap uint32
|
|
|
|
}
|
|
|
|
|
2022-01-23 03:08:19 +00:00
|
|
|
func bitmask(shiftedHash uint32) uint32 {
|
|
|
|
return uint32(1) << (shiftedHash & nodeMask)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *node) index(mask uint32) int {
|
|
|
|
return bits.OnesCount32(n.bitmap & (mask - 1))
|
|
|
|
}
|
|
|
|
|
2022-01-22 09:49:46 +00:00
|
|
|
type collision struct {
|
|
|
|
hash uint32
|
|
|
|
leaf []leaf
|
|
|
|
}
|
|
|
|
|
|
|
|
type leaf struct {
|
|
|
|
k Key
|
|
|
|
v Value
|
|
|
|
}
|
|
|
|
|
2022-01-23 01:25:42 +00:00
|
|
|
type HashFunc = func(Key) uint32
|
|
|
|
|
|
|
|
func New(hash HashFunc) Map {
|
|
|
|
if hash == nil {
|
|
|
|
panic("pmap.New: nil hash")
|
|
|
|
}
|
|
|
|
return pmap{hash: hash}
|
2022-01-22 09:49:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (p pmap) Len() int {
|
|
|
|
return p.len
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p pmap) Get(k Key) (Value, bool) {
|
2022-01-23 01:25:42 +00:00
|
|
|
var zero Value
|
|
|
|
h := p.hash(k)
|
2022-01-23 00:34:52 +00:00
|
|
|
return lookup(p.root, 0, h, k, zero)
|
2022-01-22 09:49:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (p pmap) Set(k Key, v Value) Map {
|
2022-01-23 01:25:42 +00:00
|
|
|
h := p.hash(k)
|
2022-01-23 05:27:21 +00:00
|
|
|
root, added := insert(p.root, h, k, v, p.hash)
|
2022-01-22 21:04:29 +00:00
|
|
|
p.root = root
|
|
|
|
if added {
|
|
|
|
p.len++
|
|
|
|
}
|
|
|
|
//pretty.Println(p)
|
|
|
|
return p
|
2022-01-22 09:49:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (p pmap) Del(k Key) Map {
|
|
|
|
return p
|
|
|
|
}
|
|
|
|
|
2022-01-23 03:08:19 +00:00
|
|
|
func (n *node) check() {
|
|
|
|
if bits.OnesCount32(n.bitmap) != len(n.child) {
|
|
|
|
panic(fmt.Sprintf("pmap: corrupt bitmap b=%#b len=%d", n.bitmap, len(n.child)))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *node) getNode(shift, hash uint32, key Key) interface{} {
|
|
|
|
n.check()
|
|
|
|
m := bitmask(hash >> shift)
|
|
|
|
if n.bitmap&m == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return n.child[n.index(m)]
|
2022-01-22 09:49:46 +00:00
|
|
|
}
|
|
|
|
|
2022-01-23 03:08:19 +00:00
|
|
|
func (n collision) getNode(hash uint32, key Key) interface{} {
|
|
|
|
if hash != n.hash {
|
2022-01-22 09:49:46 +00:00
|
|
|
return nil
|
|
|
|
}
|
2022-01-23 03:08:19 +00:00
|
|
|
for i := range n.leaf {
|
|
|
|
if key == n.leaf[i].k {
|
|
|
|
return n.leaf[i]
|
2022-01-22 09:49:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-01-23 03:13:39 +00:00
|
|
|
func lookup(root interface{}, shift, hash uint32, key Key, zero Value) (Value, bool) {
|
2022-01-22 09:49:46 +00:00
|
|
|
cur := root
|
|
|
|
for {
|
|
|
|
switch n := cur.(type) {
|
|
|
|
case nil:
|
2022-01-23 00:34:52 +00:00
|
|
|
return zero, false
|
2022-01-22 09:49:46 +00:00
|
|
|
case leaf:
|
|
|
|
if n.k == key {
|
|
|
|
return n.v, true
|
|
|
|
} else {
|
2022-01-23 00:34:52 +00:00
|
|
|
return zero, false
|
2022-01-22 09:49:46 +00:00
|
|
|
}
|
|
|
|
case *node:
|
|
|
|
cur = n.getNode(shift, hash, key)
|
2022-01-23 00:34:52 +00:00
|
|
|
shift += nodeShift
|
2022-01-23 01:39:18 +00:00
|
|
|
case collision:
|
2022-01-22 09:49:46 +00:00
|
|
|
cur = n.getNode(hash, key)
|
|
|
|
default:
|
|
|
|
panic("pmap: unhandled case in lookup")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func singleton(key Key, val Value, hash, shift uint32) *node {
|
2022-01-22 21:04:29 +00:00
|
|
|
return newnode(leaf{key, val}, hash, shift)
|
|
|
|
}
|
|
|
|
|
|
|
|
func newnode(child interface{}, hash, shift uint32) *node {
|
2022-01-22 09:49:46 +00:00
|
|
|
n := &node{}
|
2022-01-23 03:08:19 +00:00
|
|
|
a := [1]interface{}{child}
|
|
|
|
n.child = a[:]
|
|
|
|
n.bitmap = bitmask(hash >> shift)
|
|
|
|
n.check()
|
2022-01-22 09:49:46 +00:00
|
|
|
return n
|
|
|
|
}
|
|
|
|
|
2022-01-23 02:59:35 +00:00
|
|
|
func insert(n interface{}, hash uint32, key Key, val Value, hashFn HashFunc) (newNode interface{}, added bool) {
|
2022-01-22 09:49:46 +00:00
|
|
|
if n == nil {
|
|
|
|
return leaf{key, val}, true
|
|
|
|
}
|
2022-01-22 21:04:29 +00:00
|
|
|
var _insert func(n interface{}, shift uint32) interface{}
|
|
|
|
_insert = func(n interface{}, shift uint32) interface{} {
|
|
|
|
//fmt.Printf("insert %v %x %#v\n", shift, hash, n)
|
2022-01-22 09:49:46 +00:00
|
|
|
switch n := n.(type) {
|
|
|
|
//case nil:
|
|
|
|
// added = true
|
|
|
|
// return leaf{key, val}
|
|
|
|
case leaf:
|
|
|
|
if n.k == key {
|
|
|
|
// replace existing entry
|
|
|
|
added = false
|
|
|
|
return leaf{key, val}
|
|
|
|
} else if h := hashFn(n.k); h == hash {
|
|
|
|
// collision
|
|
|
|
added = true
|
2022-01-23 01:39:18 +00:00
|
|
|
return collision{hash, []leaf{{key, val}, n}}
|
2022-01-22 09:49:46 +00:00
|
|
|
} else {
|
2022-01-22 21:04:29 +00:00
|
|
|
if h>>shift == hash>>shift {
|
|
|
|
panic("pmap: infinite loop in insert")
|
|
|
|
}
|
|
|
|
// not a collision, so we must still have some hash bits left
|
2022-01-22 09:49:46 +00:00
|
|
|
// split the trie
|
2022-01-23 03:08:19 +00:00
|
|
|
x := newnode(n, h, shift)
|
|
|
|
return _insert(x, shift)
|
2022-01-22 09:49:46 +00:00
|
|
|
}
|
|
|
|
case *node:
|
|
|
|
c := n.getNode(shift, hash, key)
|
|
|
|
if c == nil {
|
|
|
|
// new node
|
2022-01-22 21:04:29 +00:00
|
|
|
c = leaf{key, val}
|
2022-01-22 09:49:46 +00:00
|
|
|
added = true
|
2022-01-23 03:08:19 +00:00
|
|
|
|
|
|
|
m := bitmask(hash >> shift)
|
|
|
|
x := &node{bitmap: n.bitmap | m}
|
|
|
|
i := x.index(m)
|
|
|
|
x.child = make([]interface{}, len(n.child)+1)
|
|
|
|
copy(x.child[:i], n.child[:i])
|
|
|
|
x.child[i] = c
|
|
|
|
copy(x.child[i+1:], n.child[i:])
|
|
|
|
x.check()
|
|
|
|
return x
|
2022-01-22 09:49:46 +00:00
|
|
|
} else {
|
2022-01-23 00:34:52 +00:00
|
|
|
c = _insert(c, shift+nodeShift)
|
2022-01-23 03:08:19 +00:00
|
|
|
// TODO: short circuit if c unchanged?
|
|
|
|
m := bitmask(hash >> shift)
|
|
|
|
x := &node{bitmap: n.bitmap}
|
|
|
|
i := x.index(m)
|
|
|
|
x.child = make([]interface{}, len(n.child))
|
|
|
|
copy(x.child, n.child)
|
|
|
|
x.child[i] = c
|
|
|
|
x.check()
|
|
|
|
return x
|
2022-01-22 09:49:46 +00:00
|
|
|
}
|
2022-01-23 01:39:18 +00:00
|
|
|
case collision:
|
2022-01-22 09:49:46 +00:00
|
|
|
if n.hash != hash {
|
2022-01-22 21:04:29 +00:00
|
|
|
// not a collision, so we must still have some hash bits left
|
|
|
|
// split the trie
|
2022-01-23 03:08:19 +00:00
|
|
|
x := newnode(n, n.hash, shift)
|
|
|
|
return _insert(x, shift)
|
2022-01-22 09:49:46 +00:00
|
|
|
}
|
|
|
|
for i := range n.leaf {
|
|
|
|
if key == n.leaf[i].k {
|
|
|
|
// replace existing entry
|
2022-01-23 03:08:19 +00:00
|
|
|
l := make([]leaf, len(n.leaf))
|
2022-01-22 09:49:46 +00:00
|
|
|
l[0] = leaf{key, val}
|
2022-01-23 03:08:19 +00:00
|
|
|
copy(l[1:], n.leaf[:i])
|
|
|
|
copy(l[1+i:], n.leaf[i+1:])
|
2022-01-22 09:49:46 +00:00
|
|
|
added = false
|
2022-01-23 01:39:18 +00:00
|
|
|
return collision{hash, l}
|
2022-01-22 09:49:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// new collision
|
|
|
|
added = true
|
2022-01-23 01:39:18 +00:00
|
|
|
return collision{hash, append([]leaf{{key, val}}, n.leaf...)}
|
2022-01-22 09:49:46 +00:00
|
|
|
default:
|
|
|
|
panic("pmap: unhandled case in insert")
|
|
|
|
}
|
|
|
|
}
|
2022-01-23 02:59:35 +00:00
|
|
|
newNode = _insert(n, 0)
|
2022-01-22 09:49:46 +00:00
|
|
|
return
|
|
|
|
}
|
2022-01-23 00:24:37 +00:00
|
|
|
|
|
|
|
type stats struct {
|
|
|
|
count int
|
|
|
|
maxHeight int
|
|
|
|
avgHeight float64
|
|
|
|
avgDegree float64
|
|
|
|
nodeCount int
|
|
|
|
leafCount int
|
|
|
|
collisionCount int
|
|
|
|
collidedKeys int
|
|
|
|
emptySlots int
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p pmap) stats() stats {
|
|
|
|
var s stats
|
|
|
|
var th float64
|
|
|
|
var td float64
|
|
|
|
var visit func(n interface{}, h int)
|
|
|
|
visit = func(n interface{}, h int) {
|
|
|
|
switch n := n.(type) {
|
|
|
|
case leaf:
|
|
|
|
s.count++
|
|
|
|
s.leafCount++
|
|
|
|
th += float64(h)
|
|
|
|
if s.maxHeight < h {
|
|
|
|
s.maxHeight = h
|
|
|
|
}
|
|
|
|
case *node:
|
|
|
|
s.count++
|
|
|
|
s.nodeCount++
|
|
|
|
for i := range n.child {
|
|
|
|
if n.child[i] != nil {
|
2022-01-23 03:08:19 +00:00
|
|
|
td += 1.0
|
2022-01-23 00:24:37 +00:00
|
|
|
visit(n.child[i], h+1)
|
2022-01-23 03:08:19 +00:00
|
|
|
} else {
|
|
|
|
s.emptySlots++
|
2022-01-23 00:24:37 +00:00
|
|
|
}
|
|
|
|
}
|
2022-01-23 01:39:18 +00:00
|
|
|
case collision:
|
|
|
|
s.count++
|
2022-01-23 00:24:37 +00:00
|
|
|
s.collisionCount++
|
|
|
|
s.collidedKeys += len(n.leaf)
|
|
|
|
default:
|
|
|
|
panic("pmap: unhandled case in stats")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
visit(p.root, 1)
|
|
|
|
if s.leafCount > 0 {
|
|
|
|
s.avgHeight = th / float64(s.leafCount)
|
|
|
|
}
|
|
|
|
if s.nodeCount > 0 {
|
|
|
|
s.avgDegree = td / float64(s.nodeCount)
|
|
|
|
}
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s stats) String() string {
|
|
|
|
return fmt.Sprintf(
|
|
|
|
"count = %d\n"+
|
|
|
|
"maxHeight = %d\n"+
|
|
|
|
"avgHeight = %g\n"+
|
|
|
|
"avgDegree = %g\n"+
|
|
|
|
"nodeCount = %d\n"+
|
|
|
|
"leafCount = %d\n"+
|
|
|
|
"collisionCount = %d\n"+
|
|
|
|
"collidedKeys = %d\n"+
|
|
|
|
"emptySlots = %d\n",
|
|
|
|
s.count,
|
|
|
|
s.maxHeight,
|
|
|
|
s.avgHeight,
|
|
|
|
s.avgDegree,
|
|
|
|
s.nodeCount,
|
|
|
|
s.leafCount,
|
|
|
|
s.collisionCount,
|
|
|
|
s.collidedKeys,
|
|
|
|
s.emptySlots,
|
|
|
|
)
|
|
|
|
}
|