Dynamic code generation
This commit is contained in:
283
codes/codes.go
283
codes/codes.go
@@ -1,271 +1,26 @@
|
||||
package codes
|
||||
|
||||
type Code struct {
|
||||
Value uint16
|
||||
Bits int
|
||||
Value uint32
|
||||
Bits uint8
|
||||
}
|
||||
|
||||
var codes = []Code{
|
||||
{Value: 0b0000, Bits: 4},
|
||||
{Value: 0b0001, Bits: 4},
|
||||
{Value: 0b0010, Bits: 4},
|
||||
{Value: 0b0011, Bits: 4},
|
||||
{Value: 0b01000, Bits: 5},
|
||||
{Value: 0b01001, Bits: 5},
|
||||
{Value: 0b01010, Bits: 5},
|
||||
{Value: 0b01011, Bits: 5},
|
||||
{Value: 0b011000, Bits: 6},
|
||||
{Value: 0b011001, Bits: 6},
|
||||
{Value: 0b011010, Bits: 6},
|
||||
{Value: 0b011011, Bits: 6},
|
||||
{Value: 0b011100, Bits: 6},
|
||||
{Value: 0b011101, Bits: 6},
|
||||
{Value: 0b011110, Bits: 6},
|
||||
{Value: 0b011111, Bits: 6},
|
||||
{Value: 0b1000000, Bits: 7},
|
||||
{Value: 0b1000001, Bits: 7},
|
||||
{Value: 0b1000010, Bits: 7},
|
||||
{Value: 0b1000011, Bits: 7},
|
||||
{Value: 0b1000100, Bits: 7},
|
||||
{Value: 0b1000101, Bits: 7},
|
||||
{Value: 0b1000110, Bits: 7},
|
||||
{Value: 0b1000111, Bits: 7},
|
||||
{Value: 0b1001000, Bits: 7},
|
||||
{Value: 0b1001001, Bits: 7},
|
||||
{Value: 0b1001010, Bits: 7},
|
||||
{Value: 0b1001011, Bits: 7},
|
||||
{Value: 0b1001100, Bits: 7},
|
||||
{Value: 0b1001101, Bits: 7},
|
||||
{Value: 0b1001110, Bits: 7},
|
||||
{Value: 0b1001111, Bits: 7},
|
||||
{Value: 0b10100000, Bits: 8},
|
||||
{Value: 0b10100001, Bits: 8},
|
||||
{Value: 0b10100010, Bits: 8},
|
||||
{Value: 0b10100011, Bits: 8},
|
||||
{Value: 0b10100100, Bits: 8},
|
||||
{Value: 0b10100101, Bits: 8},
|
||||
{Value: 0b10100110, Bits: 8},
|
||||
{Value: 0b10100111, Bits: 8},
|
||||
{Value: 0b10101000, Bits: 8},
|
||||
{Value: 0b10101001, Bits: 8},
|
||||
{Value: 0b10101010, Bits: 8},
|
||||
{Value: 0b10101011, Bits: 8},
|
||||
{Value: 0b10101100, Bits: 8},
|
||||
{Value: 0b10101101, Bits: 8},
|
||||
{Value: 0b10101110, Bits: 8},
|
||||
{Value: 0b10101111, Bits: 8},
|
||||
{Value: 0b10110000, Bits: 8},
|
||||
{Value: 0b10110001, Bits: 8},
|
||||
{Value: 0b10110010, Bits: 8},
|
||||
{Value: 0b10110011, Bits: 8},
|
||||
{Value: 0b10110100, Bits: 8},
|
||||
{Value: 0b10110101, Bits: 8},
|
||||
{Value: 0b10110110, Bits: 8},
|
||||
{Value: 0b10110111, Bits: 8},
|
||||
{Value: 0b10111000, Bits: 8},
|
||||
{Value: 0b10111001, Bits: 8},
|
||||
{Value: 0b10111010, Bits: 8},
|
||||
{Value: 0b10111011, Bits: 8},
|
||||
{Value: 0b10111100, Bits: 8},
|
||||
{Value: 0b10111101, Bits: 8},
|
||||
{Value: 0b10111110, Bits: 8},
|
||||
{Value: 0b10111111, Bits: 8},
|
||||
{Value: 0b110000000, Bits: 9},
|
||||
{Value: 0b110000001, Bits: 9},
|
||||
{Value: 0b110000010, Bits: 9},
|
||||
{Value: 0b110000011, Bits: 9},
|
||||
{Value: 0b110000100, Bits: 9},
|
||||
{Value: 0b110000101, Bits: 9},
|
||||
{Value: 0b110000110, Bits: 9},
|
||||
{Value: 0b110000111, Bits: 9},
|
||||
{Value: 0b110001000, Bits: 9},
|
||||
{Value: 0b110001001, Bits: 9},
|
||||
{Value: 0b110001010, Bits: 9},
|
||||
{Value: 0b110001011, Bits: 9},
|
||||
{Value: 0b110001100, Bits: 9},
|
||||
{Value: 0b110001101, Bits: 9},
|
||||
{Value: 0b110001110, Bits: 9},
|
||||
{Value: 0b110001111, Bits: 9},
|
||||
{Value: 0b110010000, Bits: 9},
|
||||
{Value: 0b110010001, Bits: 9},
|
||||
{Value: 0b110010010, Bits: 9},
|
||||
{Value: 0b110010011, Bits: 9},
|
||||
{Value: 0b110010100, Bits: 9},
|
||||
{Value: 0b110010101, Bits: 9},
|
||||
{Value: 0b110010110, Bits: 9},
|
||||
{Value: 0b110010111, Bits: 9},
|
||||
{Value: 0b110011000, Bits: 9},
|
||||
{Value: 0b110011001, Bits: 9},
|
||||
{Value: 0b110011010, Bits: 9},
|
||||
{Value: 0b110011011, Bits: 9},
|
||||
{Value: 0b110011100, Bits: 9},
|
||||
{Value: 0b110011101, Bits: 9},
|
||||
{Value: 0b110011110, Bits: 9},
|
||||
{Value: 0b110011111, Bits: 9},
|
||||
{Value: 0b110100000, Bits: 9},
|
||||
{Value: 0b110100001, Bits: 9},
|
||||
{Value: 0b110100010, Bits: 9},
|
||||
{Value: 0b110100011, Bits: 9},
|
||||
{Value: 0b110100100, Bits: 9},
|
||||
{Value: 0b110100101, Bits: 9},
|
||||
{Value: 0b110100110, Bits: 9},
|
||||
{Value: 0b110100111, Bits: 9},
|
||||
{Value: 0b110101000, Bits: 9},
|
||||
{Value: 0b110101001, Bits: 9},
|
||||
{Value: 0b110101010, Bits: 9},
|
||||
{Value: 0b110101011, Bits: 9},
|
||||
{Value: 0b110101100, Bits: 9},
|
||||
{Value: 0b110101101, Bits: 9},
|
||||
{Value: 0b110101110, Bits: 9},
|
||||
{Value: 0b110101111, Bits: 9},
|
||||
{Value: 0b110110000, Bits: 9},
|
||||
{Value: 0b110110001, Bits: 9},
|
||||
{Value: 0b110110010, Bits: 9},
|
||||
{Value: 0b110110011, Bits: 9},
|
||||
{Value: 0b110110100, Bits: 9},
|
||||
{Value: 0b110110101, Bits: 9},
|
||||
{Value: 0b110110110, Bits: 9},
|
||||
{Value: 0b110110111, Bits: 9},
|
||||
{Value: 0b110111000, Bits: 9},
|
||||
{Value: 0b110111001, Bits: 9},
|
||||
{Value: 0b110111010, Bits: 9},
|
||||
{Value: 0b110111011, Bits: 9},
|
||||
{Value: 0b110111100, Bits: 9},
|
||||
{Value: 0b110111101, Bits: 9},
|
||||
{Value: 0b110111110, Bits: 9},
|
||||
{Value: 0b110111111, Bits: 9},
|
||||
{Value: 0b1110000000, Bits: 10},
|
||||
{Value: 0b1110000001, Bits: 10},
|
||||
{Value: 0b1110000010, Bits: 10},
|
||||
{Value: 0b1110000011, Bits: 10},
|
||||
{Value: 0b1110000100, Bits: 10},
|
||||
{Value: 0b1110000101, Bits: 10},
|
||||
{Value: 0b1110000110, Bits: 10},
|
||||
{Value: 0b1110000111, Bits: 10},
|
||||
{Value: 0b1110001000, Bits: 10},
|
||||
{Value: 0b1110001001, Bits: 10},
|
||||
{Value: 0b1110001010, Bits: 10},
|
||||
{Value: 0b1110001011, Bits: 10},
|
||||
{Value: 0b1110001100, Bits: 10},
|
||||
{Value: 0b1110001101, Bits: 10},
|
||||
{Value: 0b1110001110, Bits: 10},
|
||||
{Value: 0b1110001111, Bits: 10},
|
||||
{Value: 0b1110010000, Bits: 10},
|
||||
{Value: 0b1110010001, Bits: 10},
|
||||
{Value: 0b1110010010, Bits: 10},
|
||||
{Value: 0b1110010011, Bits: 10},
|
||||
{Value: 0b1110010100, Bits: 10},
|
||||
{Value: 0b1110010101, Bits: 10},
|
||||
{Value: 0b1110010110, Bits: 10},
|
||||
{Value: 0b1110010111, Bits: 10},
|
||||
{Value: 0b1110011000, Bits: 10},
|
||||
{Value: 0b1110011001, Bits: 10},
|
||||
{Value: 0b1110011010, Bits: 10},
|
||||
{Value: 0b1110011011, Bits: 10},
|
||||
{Value: 0b1110011100, Bits: 10},
|
||||
{Value: 0b1110011101, Bits: 10},
|
||||
{Value: 0b1110011110, Bits: 10},
|
||||
{Value: 0b1110011111, Bits: 10},
|
||||
{Value: 0b1110100000, Bits: 10},
|
||||
{Value: 0b1110100001, Bits: 10},
|
||||
{Value: 0b1110100010, Bits: 10},
|
||||
{Value: 0b1110100011, Bits: 10},
|
||||
{Value: 0b1110100100, Bits: 10},
|
||||
{Value: 0b1110100101, Bits: 10},
|
||||
{Value: 0b1110100110, Bits: 10},
|
||||
{Value: 0b1110100111, Bits: 10},
|
||||
{Value: 0b1110101000, Bits: 10},
|
||||
{Value: 0b1110101001, Bits: 10},
|
||||
{Value: 0b1110101010, Bits: 10},
|
||||
{Value: 0b1110101011, Bits: 10},
|
||||
{Value: 0b1110101100, Bits: 10},
|
||||
{Value: 0b1110101101, Bits: 10},
|
||||
{Value: 0b1110101110, Bits: 10},
|
||||
{Value: 0b1110101111, Bits: 10},
|
||||
{Value: 0b1110110000, Bits: 10},
|
||||
{Value: 0b1110110001, Bits: 10},
|
||||
{Value: 0b1110110010, Bits: 10},
|
||||
{Value: 0b1110110011, Bits: 10},
|
||||
{Value: 0b1110110100, Bits: 10},
|
||||
{Value: 0b1110110101, Bits: 10},
|
||||
{Value: 0b1110110110, Bits: 10},
|
||||
{Value: 0b1110110111, Bits: 10},
|
||||
{Value: 0b1110111000, Bits: 10},
|
||||
{Value: 0b1110111001, Bits: 10},
|
||||
{Value: 0b1110111010, Bits: 10},
|
||||
{Value: 0b1110111011, Bits: 10},
|
||||
{Value: 0b1110111100, Bits: 10},
|
||||
{Value: 0b1110111101, Bits: 10},
|
||||
{Value: 0b1110111110, Bits: 10},
|
||||
{Value: 0b1110111111, Bits: 10},
|
||||
{Value: 0b1111000000, Bits: 10},
|
||||
{Value: 0b1111000001, Bits: 10},
|
||||
{Value: 0b1111000010, Bits: 10},
|
||||
{Value: 0b1111000011, Bits: 10},
|
||||
{Value: 0b1111000100, Bits: 10},
|
||||
{Value: 0b1111000101, Bits: 10},
|
||||
{Value: 0b1111000110, Bits: 10},
|
||||
{Value: 0b1111000111, Bits: 10},
|
||||
{Value: 0b1111001000, Bits: 10},
|
||||
{Value: 0b1111001001, Bits: 10},
|
||||
{Value: 0b1111001010, Bits: 10},
|
||||
{Value: 0b1111001011, Bits: 10},
|
||||
{Value: 0b1111001100, Bits: 10},
|
||||
{Value: 0b1111001101, Bits: 10},
|
||||
{Value: 0b1111001110, Bits: 10},
|
||||
{Value: 0b1111001111, Bits: 10},
|
||||
{Value: 0b1111010000, Bits: 10},
|
||||
{Value: 0b1111010001, Bits: 10},
|
||||
{Value: 0b1111010010, Bits: 10},
|
||||
{Value: 0b1111010011, Bits: 10},
|
||||
{Value: 0b1111010100, Bits: 10},
|
||||
{Value: 0b1111010101, Bits: 10},
|
||||
{Value: 0b1111010110, Bits: 10},
|
||||
{Value: 0b1111010111, Bits: 10},
|
||||
{Value: 0b1111011000, Bits: 10},
|
||||
{Value: 0b1111011001, Bits: 10},
|
||||
{Value: 0b1111011010, Bits: 10},
|
||||
{Value: 0b1111011011, Bits: 10},
|
||||
{Value: 0b1111011100, Bits: 10},
|
||||
{Value: 0b1111011101, Bits: 10},
|
||||
{Value: 0b1111011110, Bits: 10},
|
||||
{Value: 0b1111011111, Bits: 10},
|
||||
{Value: 0b1111100000, Bits: 10},
|
||||
{Value: 0b1111100001, Bits: 10},
|
||||
{Value: 0b1111100010, Bits: 10},
|
||||
{Value: 0b1111100011, Bits: 10},
|
||||
{Value: 0b1111100100, Bits: 10},
|
||||
{Value: 0b1111100101, Bits: 10},
|
||||
{Value: 0b1111100110, Bits: 10},
|
||||
{Value: 0b1111100111, Bits: 10},
|
||||
{Value: 0b1111101000, Bits: 10},
|
||||
{Value: 0b1111101001, Bits: 10},
|
||||
{Value: 0b1111101010, Bits: 10},
|
||||
{Value: 0b1111101011, Bits: 10},
|
||||
{Value: 0b1111101100, Bits: 10},
|
||||
{Value: 0b1111101101, Bits: 10},
|
||||
{Value: 0b1111101110, Bits: 10},
|
||||
{Value: 0b1111101111, Bits: 10},
|
||||
{Value: 0b1111110000, Bits: 10},
|
||||
{Value: 0b1111110001, Bits: 10},
|
||||
{Value: 0b1111110010, Bits: 10},
|
||||
{Value: 0b1111110011, Bits: 10},
|
||||
{Value: 0b1111110100, Bits: 10},
|
||||
{Value: 0b1111110101, Bits: 10},
|
||||
{Value: 0b1111110110, Bits: 10},
|
||||
{Value: 0b1111110111, Bits: 10},
|
||||
{Value: 0b1111111000, Bits: 10},
|
||||
{Value: 0b1111111001, Bits: 10},
|
||||
{Value: 0b1111111010, Bits: 10},
|
||||
{Value: 0b1111111011, Bits: 10},
|
||||
{Value: 0b1111111100, Bits: 10},
|
||||
{Value: 0b1111111101, Bits: 10},
|
||||
{Value: 0b11111111100, Bits: 11},
|
||||
{Value: 0b11111111101, Bits: 11},
|
||||
{Value: 0b11111111110, Bits: 11},
|
||||
{Value: 0b11111111111, Bits: 11},
|
||||
}
|
||||
func CodeForIndex(index uint32) Code {
|
||||
switch {
|
||||
case index < 4:
|
||||
return Code{Value: index, Bits: 4}
|
||||
case index < 8:
|
||||
return Code{Value: 0b01000 + (index & 0b00011), Bits: 5}
|
||||
case index < 16:
|
||||
return Code{Value: 0b011000 + (index & 0b000111), Bits: 6}
|
||||
case index < 32:
|
||||
return Code{Value: 0b1000000 + (index & 0b0001111), Bits: 7}
|
||||
default:
|
||||
set := uint8(index / uint32(64))
|
||||
|
||||
func CodeForIndex(index int) Code {
|
||||
return codes[index]
|
||||
return Code{
|
||||
Value: (((2 << set) - 1) << 7) + (index % uint32(64)),
|
||||
Bits: set + 8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ func Encode(st *state.State, msg []byte) []byte {
|
||||
for i := 0; i < len(msg); {
|
||||
l, index := st.IncrementSymbol(msg[i:])
|
||||
i += l
|
||||
code := codes.CodeForIndex(index)
|
||||
code := codes.CodeForIndex(uint32(index))
|
||||
lo.Must0(w.WriteBits(uint64(code.Value), uint8(code.Bits)))
|
||||
}
|
||||
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Generate all possible values up to 10 bits
|
||||
byLen := map[int][]string{
|
||||
1: []string{"0", "1"},
|
||||
}
|
||||
|
||||
for l := 2; l <= 10; l++ {
|
||||
values := []string{}
|
||||
|
||||
for _, v := range byLen[l-1] {
|
||||
values = append(values, v+"0", v+"1")
|
||||
}
|
||||
|
||||
byLen[l] = values
|
||||
}
|
||||
|
||||
limits := map[int]int{
|
||||
1: 0,
|
||||
2: 0,
|
||||
3: 0,
|
||||
4: 2,
|
||||
5: 8,
|
||||
6: 8,
|
||||
7: 16,
|
||||
8: 32,
|
||||
9: 64,
|
||||
10: 256,
|
||||
}
|
||||
|
||||
total := 0
|
||||
short := 0
|
||||
|
||||
for l := 1; l <= 10; l++ {
|
||||
vs := byLen[l]
|
||||
limit := limits[l]
|
||||
values := []string{}
|
||||
|
||||
valueLoop:
|
||||
for _, v := range vs {
|
||||
if limit == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
for i := 1; i < l; i++ {
|
||||
for _, v2 := range byLen[i] {
|
||||
if strings.HasPrefix(v, v2) {
|
||||
continue valueLoop
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
values = append(values, v)
|
||||
limit--
|
||||
print(v + "\n")
|
||||
}
|
||||
|
||||
byLen[l] = values
|
||||
|
||||
total += len(values)
|
||||
if l < 8 {
|
||||
short += len(values)
|
||||
}
|
||||
}
|
||||
|
||||
for l := 1; l <= 10; l++ {
|
||||
values := byLen[l]
|
||||
log.Printf("%d: %d", l, len(values))
|
||||
}
|
||||
|
||||
log.Printf("total=%d", total)
|
||||
log.Printf("short=%d", short)
|
||||
}
|
||||
@@ -20,6 +20,8 @@ func main() {
|
||||
return len(sample)
|
||||
}))
|
||||
|
||||
dict := buildDictionary(samples, 1024)
|
||||
|
||||
def := state.NewState()
|
||||
log.Printf("def=%d [%s]", totalLength(def, samples), def)
|
||||
|
||||
@@ -30,6 +32,66 @@ func main() {
|
||||
log.Printf("opt=%d [%s]", totalLength(opt, samples), opt)
|
||||
}
|
||||
|
||||
type pair struct {
|
||||
symbol []byte
|
||||
count int
|
||||
}
|
||||
|
||||
func buildDictionary(samples [][]byte, num int) [][]byte {
|
||||
counts := map[uint64]*pair{}
|
||||
|
||||
for _, sample := range samples {
|
||||
for i := 0; i < len(sample); i++ {
|
||||
sub := sample[i:]
|
||||
for j := 2; j < min(5, len(sub)); j++ {
|
||||
sub2 := sub[:j]
|
||||
k := toUint64(sub2)
|
||||
|
||||
p := counts[k]
|
||||
if p == nil {
|
||||
counts[k] = &pair{
|
||||
symbol: sub2,
|
||||
count: 1,
|
||||
}
|
||||
} else {
|
||||
p.count++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pairs := []*pair{}
|
||||
|
||||
for _, p := range counts {
|
||||
pairs = append(pairs, p)
|
||||
}
|
||||
|
||||
slices.SortFunc(pairs, func(a, b *pair) int { return bytes.Compare(a.symbol, b.symbol) })
|
||||
slices.SortStableFunc(pairs, func(a, b *pair) int { return b.score() - a.score() })
|
||||
|
||||
ret := [][]byte{}
|
||||
|
||||
for i := 0; i < num && i < len(pairs); i++ {
|
||||
ret = append(ret, pairs[i].symbol)
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func toUint64(bs []byte) uint64 {
|
||||
var ret uint64
|
||||
|
||||
for _, b := range bs {
|
||||
ret = (ret << 8) | uint64(b)
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (p pair) score() int {
|
||||
return p.count * ((len(p.symbol) * 8) - 11)
|
||||
}
|
||||
|
||||
func optimize(st *state.State, samples [][]byte) *state.State {
|
||||
st.AddSymbol([]byte("it "))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user