This commit is contained in:
Ian Gulliver
2023-12-29 20:48:12 -07:00
parent cb38c7581c
commit 37bc9cbae1
2 changed files with 108 additions and 0 deletions

80
genseed/genseed.go Normal file
View File

@@ -0,0 +1,80 @@
package main
import (
"bufio"
"log"
"os"
"github.com/samber/lo"
"github.com/securemesh/coding"
"github.com/securemesh/coding/heap"
"github.com/securemesh/coding/seeds"
)
func main() {
samples := lo.Must(loadSamples())
log.Printf("orig=%d", lo.SumBy(samples, func(sample []byte) int {
return len(sample)
}))
log.Printf("default=%d", totalLength(heap.NewHeap(), samples))
log.Printf("chat=%d", totalLength(seeds.ChatHeap(), samples))
opt := optimize(heap.NewHeap(), samples)
log.Printf("opt=%d [%s]", totalLength(opt, samples), opt)
}
func optimize(h *heap.Heap, samples [][]byte) *heap.Heap {
for true {
better := optimize2(h, samples)
if better == nil {
return h
}
h = better
log.Printf("\titer=%d [%s]", totalLength(h, samples), h)
}
return h
}
func optimize2(baseHeap *heap.Heap, samples [][]byte) *heap.Heap {
var best *heap.Heap = nil
bestScore := totalLength(baseHeap, samples)
for i := 0; i < 256; i++ {
h := baseHeap.Clone()
h.IncrementSymbol(byte(i))
score := totalLength(h, samples)
if score < bestScore {
best = h
bestScore = score
}
}
return best
}
func totalLength(heap *heap.Heap, samples [][]byte) int {
return lo.SumBy(samples, func(sample []byte) int {
return len(coding.Encode(heap.Clone(), sample))
})
}
func loadSamples() ([][]byte, error) {
fh, err := os.Open("sms.txt")
if err != nil {
return nil, err
}
defer fh.Close()
s := bufio.NewScanner(fh)
ret := [][]byte{}
for s.Scan() {
ret = append(ret, s.Bytes())
}
return ret, nil
}

View File

@@ -1,7 +1,11 @@
package heap
import (
"fmt"
"maps"
"slices"
"sort"
"strings"
)
type node struct {
@@ -55,6 +59,30 @@ func (h *Heap) IncrementSymbol(symbol byte) int {
return nodeIndex
}
func (h Heap) String() string {
nodes := []node{}
for _, node := range h.nodes {
if node.count == 0 {
continue
}
nodes = append(nodes, node)
}
slices.SortStableFunc(nodes, func(a, b node) int { return b.count - a.count })
strs := []string{}
for _, node := range nodes {
strs = append(strs, fmt.Sprintf("{%#U}=%d", node.symbol, node.count))
}
sort.Strings(strs)
return strings.Join(strs, ", ")
}
func (h Heap) parentIndex(nodeIndex int) int {
return (nodeIndex - 1) / 2
}