Dynamic code generation

This commit is contained in:
Ian Gulliver
2023-12-31 19:24:17 -08:00
parent 2f88ac2708
commit 01731ffb4f
4 changed files with 82 additions and 344 deletions

View File

@@ -1,271 +1,26 @@
package codes package codes
type Code struct { type Code struct {
Value uint16 Value uint32
Bits int Bits uint8
} }
var codes = []Code{ func CodeForIndex(index uint32) Code {
{Value: 0b0000, Bits: 4}, switch {
{Value: 0b0001, Bits: 4}, case index < 4:
{Value: 0b0010, Bits: 4}, return Code{Value: index, Bits: 4}
{Value: 0b0011, Bits: 4}, case index < 8:
{Value: 0b01000, Bits: 5}, return Code{Value: 0b01000 + (index & 0b00011), Bits: 5}
{Value: 0b01001, Bits: 5}, case index < 16:
{Value: 0b01010, Bits: 5}, return Code{Value: 0b011000 + (index & 0b000111), Bits: 6}
{Value: 0b01011, Bits: 5}, case index < 32:
{Value: 0b011000, Bits: 6}, return Code{Value: 0b1000000 + (index & 0b0001111), Bits: 7}
{Value: 0b011001, Bits: 6}, default:
{Value: 0b011010, Bits: 6}, set := uint8(index / uint32(64))
{Value: 0b011011, Bits: 6},
{Value: 0b011100, Bits: 6},
{Value: 0b011101, Bits: 6},
{Value: 0b011110, Bits: 6},
{Value: 0b011111, Bits: 6},
{Value: 0b1000000, Bits: 7},
{Value: 0b1000001, Bits: 7},
{Value: 0b1000010, Bits: 7},
{Value: 0b1000011, Bits: 7},
{Value: 0b1000100, Bits: 7},
{Value: 0b1000101, Bits: 7},
{Value: 0b1000110, Bits: 7},
{Value: 0b1000111, Bits: 7},
{Value: 0b1001000, Bits: 7},
{Value: 0b1001001, Bits: 7},
{Value: 0b1001010, Bits: 7},
{Value: 0b1001011, Bits: 7},
{Value: 0b1001100, Bits: 7},
{Value: 0b1001101, Bits: 7},
{Value: 0b1001110, Bits: 7},
{Value: 0b1001111, Bits: 7},
{Value: 0b10100000, Bits: 8},
{Value: 0b10100001, Bits: 8},
{Value: 0b10100010, Bits: 8},
{Value: 0b10100011, Bits: 8},
{Value: 0b10100100, Bits: 8},
{Value: 0b10100101, Bits: 8},
{Value: 0b10100110, Bits: 8},
{Value: 0b10100111, Bits: 8},
{Value: 0b10101000, Bits: 8},
{Value: 0b10101001, Bits: 8},
{Value: 0b10101010, Bits: 8},
{Value: 0b10101011, Bits: 8},
{Value: 0b10101100, Bits: 8},
{Value: 0b10101101, Bits: 8},
{Value: 0b10101110, Bits: 8},
{Value: 0b10101111, Bits: 8},
{Value: 0b10110000, Bits: 8},
{Value: 0b10110001, Bits: 8},
{Value: 0b10110010, Bits: 8},
{Value: 0b10110011, Bits: 8},
{Value: 0b10110100, Bits: 8},
{Value: 0b10110101, Bits: 8},
{Value: 0b10110110, Bits: 8},
{Value: 0b10110111, Bits: 8},
{Value: 0b10111000, Bits: 8},
{Value: 0b10111001, Bits: 8},
{Value: 0b10111010, Bits: 8},
{Value: 0b10111011, Bits: 8},
{Value: 0b10111100, Bits: 8},
{Value: 0b10111101, Bits: 8},
{Value: 0b10111110, Bits: 8},
{Value: 0b10111111, Bits: 8},
{Value: 0b110000000, Bits: 9},
{Value: 0b110000001, Bits: 9},
{Value: 0b110000010, Bits: 9},
{Value: 0b110000011, Bits: 9},
{Value: 0b110000100, Bits: 9},
{Value: 0b110000101, Bits: 9},
{Value: 0b110000110, Bits: 9},
{Value: 0b110000111, Bits: 9},
{Value: 0b110001000, Bits: 9},
{Value: 0b110001001, Bits: 9},
{Value: 0b110001010, Bits: 9},
{Value: 0b110001011, Bits: 9},
{Value: 0b110001100, Bits: 9},
{Value: 0b110001101, Bits: 9},
{Value: 0b110001110, Bits: 9},
{Value: 0b110001111, Bits: 9},
{Value: 0b110010000, Bits: 9},
{Value: 0b110010001, Bits: 9},
{Value: 0b110010010, Bits: 9},
{Value: 0b110010011, Bits: 9},
{Value: 0b110010100, Bits: 9},
{Value: 0b110010101, Bits: 9},
{Value: 0b110010110, Bits: 9},
{Value: 0b110010111, Bits: 9},
{Value: 0b110011000, Bits: 9},
{Value: 0b110011001, Bits: 9},
{Value: 0b110011010, Bits: 9},
{Value: 0b110011011, Bits: 9},
{Value: 0b110011100, Bits: 9},
{Value: 0b110011101, Bits: 9},
{Value: 0b110011110, Bits: 9},
{Value: 0b110011111, Bits: 9},
{Value: 0b110100000, Bits: 9},
{Value: 0b110100001, Bits: 9},
{Value: 0b110100010, Bits: 9},
{Value: 0b110100011, Bits: 9},
{Value: 0b110100100, Bits: 9},
{Value: 0b110100101, Bits: 9},
{Value: 0b110100110, Bits: 9},
{Value: 0b110100111, Bits: 9},
{Value: 0b110101000, Bits: 9},
{Value: 0b110101001, Bits: 9},
{Value: 0b110101010, Bits: 9},
{Value: 0b110101011, Bits: 9},
{Value: 0b110101100, Bits: 9},
{Value: 0b110101101, Bits: 9},
{Value: 0b110101110, Bits: 9},
{Value: 0b110101111, Bits: 9},
{Value: 0b110110000, Bits: 9},
{Value: 0b110110001, Bits: 9},
{Value: 0b110110010, Bits: 9},
{Value: 0b110110011, Bits: 9},
{Value: 0b110110100, Bits: 9},
{Value: 0b110110101, Bits: 9},
{Value: 0b110110110, Bits: 9},
{Value: 0b110110111, Bits: 9},
{Value: 0b110111000, Bits: 9},
{Value: 0b110111001, Bits: 9},
{Value: 0b110111010, Bits: 9},
{Value: 0b110111011, Bits: 9},
{Value: 0b110111100, Bits: 9},
{Value: 0b110111101, Bits: 9},
{Value: 0b110111110, Bits: 9},
{Value: 0b110111111, Bits: 9},
{Value: 0b1110000000, Bits: 10},
{Value: 0b1110000001, Bits: 10},
{Value: 0b1110000010, Bits: 10},
{Value: 0b1110000011, Bits: 10},
{Value: 0b1110000100, Bits: 10},
{Value: 0b1110000101, Bits: 10},
{Value: 0b1110000110, Bits: 10},
{Value: 0b1110000111, Bits: 10},
{Value: 0b1110001000, Bits: 10},
{Value: 0b1110001001, Bits: 10},
{Value: 0b1110001010, Bits: 10},
{Value: 0b1110001011, Bits: 10},
{Value: 0b1110001100, Bits: 10},
{Value: 0b1110001101, Bits: 10},
{Value: 0b1110001110, Bits: 10},
{Value: 0b1110001111, Bits: 10},
{Value: 0b1110010000, Bits: 10},
{Value: 0b1110010001, Bits: 10},
{Value: 0b1110010010, Bits: 10},
{Value: 0b1110010011, Bits: 10},
{Value: 0b1110010100, Bits: 10},
{Value: 0b1110010101, Bits: 10},
{Value: 0b1110010110, Bits: 10},
{Value: 0b1110010111, Bits: 10},
{Value: 0b1110011000, Bits: 10},
{Value: 0b1110011001, Bits: 10},
{Value: 0b1110011010, Bits: 10},
{Value: 0b1110011011, Bits: 10},
{Value: 0b1110011100, Bits: 10},
{Value: 0b1110011101, Bits: 10},
{Value: 0b1110011110, Bits: 10},
{Value: 0b1110011111, Bits: 10},
{Value: 0b1110100000, Bits: 10},
{Value: 0b1110100001, Bits: 10},
{Value: 0b1110100010, Bits: 10},
{Value: 0b1110100011, Bits: 10},
{Value: 0b1110100100, Bits: 10},
{Value: 0b1110100101, Bits: 10},
{Value: 0b1110100110, Bits: 10},
{Value: 0b1110100111, Bits: 10},
{Value: 0b1110101000, Bits: 10},
{Value: 0b1110101001, Bits: 10},
{Value: 0b1110101010, Bits: 10},
{Value: 0b1110101011, Bits: 10},
{Value: 0b1110101100, Bits: 10},
{Value: 0b1110101101, Bits: 10},
{Value: 0b1110101110, Bits: 10},
{Value: 0b1110101111, Bits: 10},
{Value: 0b1110110000, Bits: 10},
{Value: 0b1110110001, Bits: 10},
{Value: 0b1110110010, Bits: 10},
{Value: 0b1110110011, Bits: 10},
{Value: 0b1110110100, Bits: 10},
{Value: 0b1110110101, Bits: 10},
{Value: 0b1110110110, Bits: 10},
{Value: 0b1110110111, Bits: 10},
{Value: 0b1110111000, Bits: 10},
{Value: 0b1110111001, Bits: 10},
{Value: 0b1110111010, Bits: 10},
{Value: 0b1110111011, Bits: 10},
{Value: 0b1110111100, Bits: 10},
{Value: 0b1110111101, Bits: 10},
{Value: 0b1110111110, Bits: 10},
{Value: 0b1110111111, Bits: 10},
{Value: 0b1111000000, Bits: 10},
{Value: 0b1111000001, Bits: 10},
{Value: 0b1111000010, Bits: 10},
{Value: 0b1111000011, Bits: 10},
{Value: 0b1111000100, Bits: 10},
{Value: 0b1111000101, Bits: 10},
{Value: 0b1111000110, Bits: 10},
{Value: 0b1111000111, Bits: 10},
{Value: 0b1111001000, Bits: 10},
{Value: 0b1111001001, Bits: 10},
{Value: 0b1111001010, Bits: 10},
{Value: 0b1111001011, Bits: 10},
{Value: 0b1111001100, Bits: 10},
{Value: 0b1111001101, Bits: 10},
{Value: 0b1111001110, Bits: 10},
{Value: 0b1111001111, Bits: 10},
{Value: 0b1111010000, Bits: 10},
{Value: 0b1111010001, Bits: 10},
{Value: 0b1111010010, Bits: 10},
{Value: 0b1111010011, Bits: 10},
{Value: 0b1111010100, Bits: 10},
{Value: 0b1111010101, Bits: 10},
{Value: 0b1111010110, Bits: 10},
{Value: 0b1111010111, Bits: 10},
{Value: 0b1111011000, Bits: 10},
{Value: 0b1111011001, Bits: 10},
{Value: 0b1111011010, Bits: 10},
{Value: 0b1111011011, Bits: 10},
{Value: 0b1111011100, Bits: 10},
{Value: 0b1111011101, Bits: 10},
{Value: 0b1111011110, Bits: 10},
{Value: 0b1111011111, Bits: 10},
{Value: 0b1111100000, Bits: 10},
{Value: 0b1111100001, Bits: 10},
{Value: 0b1111100010, Bits: 10},
{Value: 0b1111100011, Bits: 10},
{Value: 0b1111100100, Bits: 10},
{Value: 0b1111100101, Bits: 10},
{Value: 0b1111100110, Bits: 10},
{Value: 0b1111100111, Bits: 10},
{Value: 0b1111101000, Bits: 10},
{Value: 0b1111101001, Bits: 10},
{Value: 0b1111101010, Bits: 10},
{Value: 0b1111101011, Bits: 10},
{Value: 0b1111101100, Bits: 10},
{Value: 0b1111101101, Bits: 10},
{Value: 0b1111101110, Bits: 10},
{Value: 0b1111101111, Bits: 10},
{Value: 0b1111110000, Bits: 10},
{Value: 0b1111110001, Bits: 10},
{Value: 0b1111110010, Bits: 10},
{Value: 0b1111110011, Bits: 10},
{Value: 0b1111110100, Bits: 10},
{Value: 0b1111110101, Bits: 10},
{Value: 0b1111110110, Bits: 10},
{Value: 0b1111110111, Bits: 10},
{Value: 0b1111111000, Bits: 10},
{Value: 0b1111111001, Bits: 10},
{Value: 0b1111111010, Bits: 10},
{Value: 0b1111111011, Bits: 10},
{Value: 0b1111111100, Bits: 10},
{Value: 0b1111111101, Bits: 10},
{Value: 0b11111111100, Bits: 11},
{Value: 0b11111111101, Bits: 11},
{Value: 0b11111111110, Bits: 11},
{Value: 0b11111111111, Bits: 11},
}
func CodeForIndex(index int) Code { return Code{
return codes[index] Value: (((2 << set) - 1) << 7) + (index % uint32(64)),
Bits: set + 8,
}
}
} }

View File

@@ -16,7 +16,7 @@ func Encode(st *state.State, msg []byte) []byte {
for i := 0; i < len(msg); { for i := 0; i < len(msg); {
l, index := st.IncrementSymbol(msg[i:]) l, index := st.IncrementSymbol(msg[i:])
i += l i += l
code := codes.CodeForIndex(index) code := codes.CodeForIndex(uint32(index))
lo.Must0(w.WriteBits(uint64(code.Value), uint8(code.Bits))) lo.Must0(w.WriteBits(uint64(code.Value), uint8(code.Bits)))
} }

View File

@@ -1,79 +0,0 @@
package main
import (
"log"
"strings"
)
func main() {
// Generate all possible values up to 10 bits
byLen := map[int][]string{
1: []string{"0", "1"},
}
for l := 2; l <= 10; l++ {
values := []string{}
for _, v := range byLen[l-1] {
values = append(values, v+"0", v+"1")
}
byLen[l] = values
}
limits := map[int]int{
1: 0,
2: 0,
3: 0,
4: 2,
5: 8,
6: 8,
7: 16,
8: 32,
9: 64,
10: 256,
}
total := 0
short := 0
for l := 1; l <= 10; l++ {
vs := byLen[l]
limit := limits[l]
values := []string{}
valueLoop:
for _, v := range vs {
if limit == 0 {
break
}
for i := 1; i < l; i++ {
for _, v2 := range byLen[i] {
if strings.HasPrefix(v, v2) {
continue valueLoop
}
}
}
values = append(values, v)
limit--
print(v + "\n")
}
byLen[l] = values
total += len(values)
if l < 8 {
short += len(values)
}
}
for l := 1; l <= 10; l++ {
values := byLen[l]
log.Printf("%d: %d", l, len(values))
}
log.Printf("total=%d", total)
log.Printf("short=%d", short)
}

View File

@@ -20,6 +20,8 @@ func main() {
return len(sample) return len(sample)
})) }))
dict := buildDictionary(samples, 1024)
def := state.NewState() def := state.NewState()
log.Printf("def=%d [%s]", totalLength(def, samples), def) log.Printf("def=%d [%s]", totalLength(def, samples), def)
@@ -30,6 +32,66 @@ func main() {
log.Printf("opt=%d [%s]", totalLength(opt, samples), opt) log.Printf("opt=%d [%s]", totalLength(opt, samples), opt)
} }
type pair struct {
symbol []byte
count int
}
func buildDictionary(samples [][]byte, num int) [][]byte {
counts := map[uint64]*pair{}
for _, sample := range samples {
for i := 0; i < len(sample); i++ {
sub := sample[i:]
for j := 2; j < min(5, len(sub)); j++ {
sub2 := sub[:j]
k := toUint64(sub2)
p := counts[k]
if p == nil {
counts[k] = &pair{
symbol: sub2,
count: 1,
}
} else {
p.count++
}
}
}
}
pairs := []*pair{}
for _, p := range counts {
pairs = append(pairs, p)
}
slices.SortFunc(pairs, func(a, b *pair) int { return bytes.Compare(a.symbol, b.symbol) })
slices.SortStableFunc(pairs, func(a, b *pair) int { return b.score() - a.score() })
ret := [][]byte{}
for i := 0; i < num && i < len(pairs); i++ {
ret = append(ret, pairs[i].symbol)
}
return ret
}
func toUint64(bs []byte) uint64 {
var ret uint64
for _, b := range bs {
ret = (ret << 8) | uint64(b)
}
return ret
}
func (p pair) score() int {
return p.count * ((len(p.symbol) * 8) - 11)
}
func optimize(st *state.State, samples [][]byte) *state.State { func optimize(st *state.State, samples [][]byte) *state.State {
st.AddSymbol([]byte("it ")) st.AddSymbol([]byte("it "))