From 37bc9cbae15629826a43132bf5eeaf6432d28f94 Mon Sep 17 00:00:00 2001 From: Ian Gulliver Date: Fri, 29 Dec 2023 20:48:12 -0700 Subject: [PATCH] genseed --- genseed/genseed.go | 80 ++++++++++++++++++++++++++++++++++++++++++++++ heap/heap.go | 28 ++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 genseed/genseed.go diff --git a/genseed/genseed.go b/genseed/genseed.go new file mode 100644 index 0000000..d7f32ed --- /dev/null +++ b/genseed/genseed.go @@ -0,0 +1,80 @@ +package main + +import ( + "bufio" + "log" + "os" + + "github.com/samber/lo" + "github.com/securemesh/coding" + "github.com/securemesh/coding/heap" + "github.com/securemesh/coding/seeds" +) + +func main() { + samples := lo.Must(loadSamples()) + + log.Printf("orig=%d", lo.SumBy(samples, func(sample []byte) int { + return len(sample) + })) + + log.Printf("default=%d", totalLength(heap.NewHeap(), samples)) + log.Printf("chat=%d", totalLength(seeds.ChatHeap(), samples)) + + opt := optimize(heap.NewHeap(), samples) + log.Printf("opt=%d [%s]", totalLength(opt, samples), opt) +} + +func optimize(h *heap.Heap, samples [][]byte) *heap.Heap { + for true { + better := optimize2(h, samples) + if better == nil { + return h + } + h = better + log.Printf("\titer=%d [%s]", totalLength(h, samples), h) + } + + return h +} + +func optimize2(baseHeap *heap.Heap, samples [][]byte) *heap.Heap { + var best *heap.Heap = nil + bestScore := totalLength(baseHeap, samples) + + for i := 0; i < 256; i++ { + h := baseHeap.Clone() + h.IncrementSymbol(byte(i)) + score := totalLength(h, samples) + if score < bestScore { + best = h + bestScore = score + } + } + + return best +} + +func totalLength(heap *heap.Heap, samples [][]byte) int { + return lo.SumBy(samples, func(sample []byte) int { + return len(coding.Encode(heap.Clone(), sample)) + }) +} + +func loadSamples() ([][]byte, error) { + fh, err := os.Open("sms.txt") + if err != nil { + return nil, err + } + + defer fh.Close() + + s := bufio.NewScanner(fh) + ret := [][]byte{} + + for s.Scan() { + ret = append(ret, s.Bytes()) + } + + return ret, nil +} diff --git a/heap/heap.go b/heap/heap.go index c07a31e..3553f87 100644 --- a/heap/heap.go +++ b/heap/heap.go @@ -1,7 +1,11 @@ package heap import ( + "fmt" "maps" + "slices" + "sort" + "strings" ) type node struct { @@ -55,6 +59,30 @@ func (h *Heap) IncrementSymbol(symbol byte) int { return nodeIndex } +func (h Heap) String() string { + nodes := []node{} + + for _, node := range h.nodes { + if node.count == 0 { + continue + } + + nodes = append(nodes, node) + } + + slices.SortStableFunc(nodes, func(a, b node) int { return b.count - a.count }) + + strs := []string{} + + for _, node := range nodes { + strs = append(strs, fmt.Sprintf("{%#U}=%d", node.symbol, node.count)) + } + + sort.Strings(strs) + + return strings.Join(strs, ", ") +} + func (h Heap) parentIndex(nodeIndex int) int { return (nodeIndex - 1) / 2 }