2023-12-29 20:48:12 -07:00
|
|
|
package main
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bufio"
|
|
|
|
|
"log"
|
|
|
|
|
"os"
|
|
|
|
|
|
|
|
|
|
"github.com/samber/lo"
|
|
|
|
|
"github.com/securemesh/coding"
|
2023-12-30 20:14:01 -07:00
|
|
|
"github.com/securemesh/coding/state"
|
2023-12-29 20:48:12 -07:00
|
|
|
"github.com/securemesh/coding/seeds"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
|
samples := lo.Must(loadSamples())
|
|
|
|
|
|
|
|
|
|
log.Printf("orig=%d", lo.SumBy(samples, func(sample []byte) int {
|
|
|
|
|
return len(sample)
|
|
|
|
|
}))
|
|
|
|
|
|
2023-12-30 20:14:01 -07:00
|
|
|
def := state.NewState()
|
2023-12-30 15:51:29 -07:00
|
|
|
log.Printf("def=%d [%s]", totalLength(def, samples), def)
|
|
|
|
|
|
2023-12-30 20:14:01 -07:00
|
|
|
chat := seeds.ChatState()
|
2023-12-30 15:51:29 -07:00
|
|
|
log.Printf("chat=%d [%s]", totalLength(chat, samples), chat)
|
2023-12-29 20:48:12 -07:00
|
|
|
|
2023-12-30 20:14:01 -07:00
|
|
|
chatOpt := optimize(chat, samples)
|
|
|
|
|
if chatOpt == nil {
|
|
|
|
|
log.Printf("\toptimal from further additions")
|
|
|
|
|
} else {
|
|
|
|
|
log.Printf("\tnot optimal [%s]", chatOpt)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
opt := optimize(state.NewState(), samples)
|
2023-12-29 20:48:12 -07:00
|
|
|
log.Printf("opt=%d [%s]", totalLength(opt, samples), opt)
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-30 20:14:01 -07:00
|
|
|
func optimize(st *state.State, samples [][]byte) *state.State {
|
|
|
|
|
var best *state.State
|
|
|
|
|
|
2023-12-29 20:48:12 -07:00
|
|
|
for true {
|
2023-12-30 20:14:01 -07:00
|
|
|
better := optimize2(st, samples)
|
2023-12-29 20:48:12 -07:00
|
|
|
if better == nil {
|
2023-12-30 20:14:01 -07:00
|
|
|
return best
|
2023-12-29 20:48:12 -07:00
|
|
|
}
|
2023-12-30 20:14:01 -07:00
|
|
|
best = better
|
|
|
|
|
st = better
|
|
|
|
|
log.Printf("\titer=%d [%s]", totalLength(st, samples), st)
|
2023-12-29 20:48:12 -07:00
|
|
|
}
|
|
|
|
|
|
2023-12-30 20:14:01 -07:00
|
|
|
return st
|
2023-12-29 20:48:12 -07:00
|
|
|
}
|
|
|
|
|
|
2023-12-29 22:02:07 -07:00
|
|
|
type sampleResult struct {
|
2023-12-30 20:14:01 -07:00
|
|
|
state *state.State
|
2023-12-29 22:02:07 -07:00
|
|
|
score int
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-30 20:14:01 -07:00
|
|
|
func optimize2(baseState *state.State, samples [][]byte) *state.State {
|
2023-12-29 22:02:07 -07:00
|
|
|
ch := make(chan sampleResult, 100)
|
|
|
|
|
|
|
|
|
|
for i := 0; i < 256; i++ {
|
2023-12-30 15:51:29 -07:00
|
|
|
s := byte(i)
|
2023-12-29 22:02:07 -07:00
|
|
|
go func () {
|
2023-12-30 20:14:01 -07:00
|
|
|
st := baseState.Clone()
|
|
|
|
|
st.IncrementSymbol(s)
|
2023-12-29 22:02:07 -07:00
|
|
|
ch <- sampleResult{
|
2023-12-30 20:14:01 -07:00
|
|
|
state: st,
|
|
|
|
|
score: totalLength(st, samples),
|
2023-12-29 22:02:07 -07:00
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-30 20:14:01 -07:00
|
|
|
var best *state.State = nil
|
|
|
|
|
bestScore := totalLength(baseState, samples)
|
2023-12-29 20:48:12 -07:00
|
|
|
|
|
|
|
|
for i := 0; i < 256; i++ {
|
2023-12-29 22:02:07 -07:00
|
|
|
res := <-ch
|
|
|
|
|
if res.score < bestScore {
|
2023-12-30 20:14:01 -07:00
|
|
|
best = res.state
|
2023-12-29 22:02:07 -07:00
|
|
|
bestScore = res.score
|
2023-12-29 20:48:12 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return best
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-30 20:14:01 -07:00
|
|
|
func totalLength(st *state.State, samples [][]byte) int {
|
2023-12-29 20:48:12 -07:00
|
|
|
return lo.SumBy(samples, func(sample []byte) int {
|
2023-12-30 20:14:01 -07:00
|
|
|
return len(coding.Encode(st.Clone(), sample))
|
2023-12-29 20:48:12 -07:00
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func loadSamples() ([][]byte, error) {
|
|
|
|
|
fh, err := os.Open("sms.txt")
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
defer fh.Close()
|
|
|
|
|
|
|
|
|
|
s := bufio.NewScanner(fh)
|
|
|
|
|
ret := [][]byte{}
|
|
|
|
|
|
|
|
|
|
for s.Scan() {
|
|
|
|
|
ret = append(ret, s.Bytes())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ret, nil
|
|
|
|
|
}
|