Files
coding/genseed/genseed.go

111 lines
2.1 KiB
Go
Raw Normal View History

2023-12-29 20:48:12 -07:00
package main
import (
"bufio"
2023-12-31 16:09:35 -08:00
"bytes"
2023-12-29 20:48:12 -07:00
"log"
"os"
"slices"
2023-12-29 20:48:12 -07:00
"github.com/samber/lo"
"github.com/securemesh/coding"
"github.com/securemesh/coding/seeds"
2023-12-31 16:09:35 -08:00
"github.com/securemesh/coding/state"
2023-12-29 20:48:12 -07:00
)
func main() {
samples := lo.Must(loadSamples())
log.Printf("orig=%d", lo.SumBy(samples, func(sample []byte) int {
return len(sample)
}))
2023-12-30 20:14:01 -07:00
def := state.NewState()
2023-12-30 15:51:29 -07:00
log.Printf("def=%d [%s]", totalLength(def, samples), def)
2023-12-30 20:14:01 -07:00
chat := seeds.ChatState()
2023-12-30 15:51:29 -07:00
log.Printf("chat=%d [%s]", totalLength(chat, samples), chat)
2023-12-29 20:48:12 -07:00
2023-12-30 20:14:01 -07:00
opt := optimize(state.NewState(), samples)
2023-12-29 20:48:12 -07:00
log.Printf("opt=%d [%s]", totalLength(opt, samples), opt)
}
2023-12-30 20:14:01 -07:00
func optimize(st *state.State, samples [][]byte) *state.State {
2023-12-31 16:09:35 -08:00
st.AddSymbol([]byte("it "))
2023-12-29 20:48:12 -07:00
for true {
2023-12-30 20:14:01 -07:00
better := optimize2(st, samples)
2023-12-29 20:48:12 -07:00
if better == nil {
2023-12-30 21:00:37 -07:00
return st
2023-12-29 20:48:12 -07:00
}
2023-12-30 20:14:01 -07:00
st = better
log.Printf("\titer=%d [%s]", totalLength(st, samples), st)
2023-12-29 20:48:12 -07:00
}
2023-12-30 20:14:01 -07:00
return st
2023-12-29 20:48:12 -07:00
}
2023-12-29 22:02:07 -07:00
type sampleResult struct {
2023-12-31 16:09:35 -08:00
symbol []byte
state *state.State
score int
2023-12-29 22:02:07 -07:00
}
2023-12-30 20:14:01 -07:00
func optimize2(baseState *state.State, samples [][]byte) *state.State {
2023-12-29 22:02:07 -07:00
ch := make(chan sampleResult, 100)
2023-12-31 16:09:35 -08:00
symbols := baseState.Symbols()
2023-12-29 22:02:07 -07:00
2023-12-31 16:09:35 -08:00
for _, symbol := range symbols {
res := sampleResult{
2023-12-31 16:09:35 -08:00
symbol: symbol,
}
2023-12-31 16:09:35 -08:00
go func() {
2023-12-30 20:14:01 -07:00
st := baseState.Clone()
st.IncrementSymbol(res.symbol)
res.state = st
res.score = totalLength(st, samples)
ch <- res
2023-12-29 22:02:07 -07:00
}()
}
results := []sampleResult{}
2023-12-29 20:48:12 -07:00
2023-12-31 16:09:35 -08:00
for _ = range symbols {
results = append(results, <-ch)
}
2023-12-31 16:09:35 -08:00
slices.SortFunc(results, func(a, b sampleResult) int { return bytes.Compare(a.symbol, b.symbol) })
best := slices.MaxFunc(results, func(a, b sampleResult) int { return b.score - a.score })
if best.score == totalLength(baseState, samples) {
return nil
2023-12-29 20:48:12 -07:00
}
return best.state
2023-12-29 20:48:12 -07:00
}
2023-12-30 20:14:01 -07:00
func totalLength(st *state.State, samples [][]byte) int {
2023-12-29 20:48:12 -07:00
return lo.SumBy(samples, func(sample []byte) int {
2023-12-30 20:14:01 -07:00
return len(coding.Encode(st.Clone(), sample))
2023-12-29 20:48:12 -07:00
})
}
func loadSamples() ([][]byte, error) {
fh, err := os.Open("sms.txt")
if err != nil {
return nil, err
}
defer fh.Close()
s := bufio.NewScanner(fh)
ret := [][]byte{}
for s.Scan() {
ret = append(ret, s.Bytes())
}
return ret, nil
}