Initial commit

This commit is contained in:
Ian Gulliver
2023-12-24 17:57:00 -05:00
parent 5b04b9130d
commit cb331a4ed5
8 changed files with 5508 additions and 0 deletions

265
codes.go Normal file
View File

@@ -0,0 +1,265 @@
package coding
type code struct {
value uint16
bits int
}
var codes = [256]code{
{value: 0b0000, bits: 4},
{value: 0b0001, bits: 4},
{value: 0b00100, bits: 5},
{value: 0b00101, bits: 5},
{value: 0b00110, bits: 5},
{value: 0b00111, bits: 5},
{value: 0b01000, bits: 5},
{value: 0b01001, bits: 5},
{value: 0b01010, bits: 5},
{value: 0b01011, bits: 5},
{value: 0b011000, bits: 6},
{value: 0b011001, bits: 6},
{value: 0b011010, bits: 6},
{value: 0b011011, bits: 6},
{value: 0b011100, bits: 6},
{value: 0b011101, bits: 6},
{value: 0b011110, bits: 6},
{value: 0b011111, bits: 6},
{value: 0b1000000, bits: 7},
{value: 0b1000001, bits: 7},
{value: 0b1000010, bits: 7},
{value: 0b1000011, bits: 7},
{value: 0b1000100, bits: 7},
{value: 0b1000101, bits: 7},
{value: 0b1000110, bits: 7},
{value: 0b1000111, bits: 7},
{value: 0b1001000, bits: 7},
{value: 0b1001001, bits: 7},
{value: 0b1001010, bits: 7},
{value: 0b1001011, bits: 7},
{value: 0b1001100, bits: 7},
{value: 0b1001101, bits: 7},
{value: 0b1001110, bits: 7},
{value: 0b1001111, bits: 7},
{value: 0b10100000, bits: 8},
{value: 0b10100001, bits: 8},
{value: 0b10100010, bits: 8},
{value: 0b10100011, bits: 8},
{value: 0b10100100, bits: 8},
{value: 0b10100101, bits: 8},
{value: 0b10100110, bits: 8},
{value: 0b10100111, bits: 8},
{value: 0b10101000, bits: 8},
{value: 0b10101001, bits: 8},
{value: 0b10101010, bits: 8},
{value: 0b10101011, bits: 8},
{value: 0b10101100, bits: 8},
{value: 0b10101101, bits: 8},
{value: 0b10101110, bits: 8},
{value: 0b10101111, bits: 8},
{value: 0b10110000, bits: 8},
{value: 0b10110001, bits: 8},
{value: 0b10110010, bits: 8},
{value: 0b10110011, bits: 8},
{value: 0b10110100, bits: 8},
{value: 0b10110101, bits: 8},
{value: 0b10110110, bits: 8},
{value: 0b10110111, bits: 8},
{value: 0b10111000, bits: 8},
{value: 0b10111001, bits: 8},
{value: 0b10111010, bits: 8},
{value: 0b10111011, bits: 8},
{value: 0b10111100, bits: 8},
{value: 0b10111101, bits: 8},
{value: 0b10111110, bits: 8},
{value: 0b10111111, bits: 8},
{value: 0b110000000, bits: 9},
{value: 0b110000001, bits: 9},
{value: 0b110000010, bits: 9},
{value: 0b110000011, bits: 9},
{value: 0b110000100, bits: 9},
{value: 0b110000101, bits: 9},
{value: 0b110000110, bits: 9},
{value: 0b110000111, bits: 9},
{value: 0b110001000, bits: 9},
{value: 0b110001001, bits: 9},
{value: 0b110001010, bits: 9},
{value: 0b110001011, bits: 9},
{value: 0b110001100, bits: 9},
{value: 0b110001101, bits: 9},
{value: 0b110001110, bits: 9},
{value: 0b110001111, bits: 9},
{value: 0b110010000, bits: 9},
{value: 0b110010001, bits: 9},
{value: 0b110010010, bits: 9},
{value: 0b110010011, bits: 9},
{value: 0b110010100, bits: 9},
{value: 0b110010101, bits: 9},
{value: 0b110010110, bits: 9},
{value: 0b110010111, bits: 9},
{value: 0b110011000, bits: 9},
{value: 0b110011001, bits: 9},
{value: 0b110011010, bits: 9},
{value: 0b110011011, bits: 9},
{value: 0b110011100, bits: 9},
{value: 0b110011101, bits: 9},
{value: 0b110011110, bits: 9},
{value: 0b110011111, bits: 9},
{value: 0b110100000, bits: 9},
{value: 0b110100001, bits: 9},
{value: 0b110100010, bits: 9},
{value: 0b110100011, bits: 9},
{value: 0b110100100, bits: 9},
{value: 0b110100101, bits: 9},
{value: 0b110100110, bits: 9},
{value: 0b110100111, bits: 9},
{value: 0b110101000, bits: 9},
{value: 0b110101001, bits: 9},
{value: 0b110101010, bits: 9},
{value: 0b110101011, bits: 9},
{value: 0b110101100, bits: 9},
{value: 0b110101101, bits: 9},
{value: 0b110101110, bits: 9},
{value: 0b110101111, bits: 9},
{value: 0b110110000, bits: 9},
{value: 0b110110001, bits: 9},
{value: 0b110110010, bits: 9},
{value: 0b110110011, bits: 9},
{value: 0b110110100, bits: 9},
{value: 0b110110101, bits: 9},
{value: 0b110110110, bits: 9},
{value: 0b110110111, bits: 9},
{value: 0b110111000, bits: 9},
{value: 0b110111001, bits: 9},
{value: 0b110111010, bits: 9},
{value: 0b110111011, bits: 9},
{value: 0b110111100, bits: 9},
{value: 0b110111101, bits: 9},
{value: 0b110111110, bits: 9},
{value: 0b110111111, bits: 9},
{value: 0b1110000000, bits: 10},
{value: 0b1110000001, bits: 10},
{value: 0b1110000010, bits: 10},
{value: 0b1110000011, bits: 10},
{value: 0b1110000100, bits: 10},
{value: 0b1110000101, bits: 10},
{value: 0b1110000110, bits: 10},
{value: 0b1110000111, bits: 10},
{value: 0b1110001000, bits: 10},
{value: 0b1110001001, bits: 10},
{value: 0b1110001010, bits: 10},
{value: 0b1110001011, bits: 10},
{value: 0b1110001100, bits: 10},
{value: 0b1110001101, bits: 10},
{value: 0b1110001110, bits: 10},
{value: 0b1110001111, bits: 10},
{value: 0b1110010000, bits: 10},
{value: 0b1110010001, bits: 10},
{value: 0b1110010010, bits: 10},
{value: 0b1110010011, bits: 10},
{value: 0b1110010100, bits: 10},
{value: 0b1110010101, bits: 10},
{value: 0b1110010110, bits: 10},
{value: 0b1110010111, bits: 10},
{value: 0b1110011000, bits: 10},
{value: 0b1110011001, bits: 10},
{value: 0b1110011010, bits: 10},
{value: 0b1110011011, bits: 10},
{value: 0b1110011100, bits: 10},
{value: 0b1110011101, bits: 10},
{value: 0b1110011110, bits: 10},
{value: 0b1110011111, bits: 10},
{value: 0b1110100000, bits: 10},
{value: 0b1110100001, bits: 10},
{value: 0b1110100010, bits: 10},
{value: 0b1110100011, bits: 10},
{value: 0b1110100100, bits: 10},
{value: 0b1110100101, bits: 10},
{value: 0b1110100110, bits: 10},
{value: 0b1110100111, bits: 10},
{value: 0b1110101000, bits: 10},
{value: 0b1110101001, bits: 10},
{value: 0b1110101010, bits: 10},
{value: 0b1110101011, bits: 10},
{value: 0b1110101100, bits: 10},
{value: 0b1110101101, bits: 10},
{value: 0b1110101110, bits: 10},
{value: 0b1110101111, bits: 10},
{value: 0b1110110000, bits: 10},
{value: 0b1110110001, bits: 10},
{value: 0b1110110010, bits: 10},
{value: 0b1110110011, bits: 10},
{value: 0b1110110100, bits: 10},
{value: 0b1110110101, bits: 10},
{value: 0b1110110110, bits: 10},
{value: 0b1110110111, bits: 10},
{value: 0b1110111000, bits: 10},
{value: 0b1110111001, bits: 10},
{value: 0b1110111010, bits: 10},
{value: 0b1110111011, bits: 10},
{value: 0b1110111100, bits: 10},
{value: 0b1110111101, bits: 10},
{value: 0b1110111110, bits: 10},
{value: 0b1110111111, bits: 10},
{value: 0b1111000000, bits: 10},
{value: 0b1111000001, bits: 10},
{value: 0b1111000010, bits: 10},
{value: 0b1111000011, bits: 10},
{value: 0b1111000100, bits: 10},
{value: 0b1111000101, bits: 10},
{value: 0b1111000110, bits: 10},
{value: 0b1111000111, bits: 10},
{value: 0b1111001000, bits: 10},
{value: 0b1111001001, bits: 10},
{value: 0b1111001010, bits: 10},
{value: 0b1111001011, bits: 10},
{value: 0b1111001100, bits: 10},
{value: 0b1111001101, bits: 10},
{value: 0b1111001110, bits: 10},
{value: 0b1111001111, bits: 10},
{value: 0b1111010000, bits: 10},
{value: 0b1111010001, bits: 10},
{value: 0b1111010010, bits: 10},
{value: 0b1111010011, bits: 10},
{value: 0b1111010100, bits: 10},
{value: 0b1111010101, bits: 10},
{value: 0b1111010110, bits: 10},
{value: 0b1111010111, bits: 10},
{value: 0b1111011000, bits: 10},
{value: 0b1111011001, bits: 10},
{value: 0b1111011010, bits: 10},
{value: 0b1111011011, bits: 10},
{value: 0b1111011100, bits: 10},
{value: 0b1111011101, bits: 10},
{value: 0b1111011110, bits: 10},
{value: 0b1111011111, bits: 10},
{value: 0b1111100000, bits: 10},
{value: 0b1111100001, bits: 10},
{value: 0b1111100010, bits: 10},
{value: 0b1111100011, bits: 10},
{value: 0b1111100100, bits: 10},
{value: 0b1111100101, bits: 10},
{value: 0b1111100110, bits: 10},
{value: 0b1111100111, bits: 10},
{value: 0b1111101000, bits: 10},
{value: 0b1111101001, bits: 10},
{value: 0b1111101010, bits: 10},
{value: 0b1111101011, bits: 10},
{value: 0b1111101100, bits: 10},
{value: 0b1111101101, bits: 10},
{value: 0b1111101110, bits: 10},
{value: 0b1111101111, bits: 10},
{value: 0b1111110000, bits: 10},
{value: 0b1111110001, bits: 10},
{value: 0b1111110010, bits: 10},
{value: 0b1111110011, bits: 10},
{value: 0b1111110100, bits: 10},
{value: 0b1111110101, bits: 10},
{value: 0b1111110110, bits: 10},
{value: 0b1111110111, bits: 10},
{value: 0b1111111000, bits: 10},
{value: 0b1111111001, bits: 10},
{value: 0b1111111010, bits: 10},
{value: 0b1111111011, bits: 10},
{value: 0b1111111100, bits: 10},
{value: 0b1111111101, bits: 10},
}

35
coding_test.go Normal file
View File

@@ -0,0 +1,35 @@
package coding_test
import (
"bufio"
"os"
"testing"
"github.com/samber/lo"
"github.com/securemesh/coding"
)
func TestSimple(t *testing.T) {
msg := []byte("this is a test. this is only a test.")
encoded := coding.Encode(coding.ChatHeap(), msg)
t.Logf("orig=%d encoded=%d", len(msg), len(encoded))
}
func TestSMS(t *testing.T) {
fh := lo.Must(os.Open("sms.txt"))
defer fh.Close()
s := bufio.NewScanner(fh)
orig := 0
encoded := 0
for s.Scan() {
msg := s.Bytes()
e := coding.Encode(coding.ChatHeap(), msg)
orig += len(msg)
encoded += len(e)
}
t.Logf("orig=%d encoded=%d", orig, encoded)
}

21
encode.go Normal file
View File

@@ -0,0 +1,21 @@
package coding
import (
"bytes"
"github.com/icza/bitio"
"github.com/samber/lo"
)
func Encode(h *Heap, msg []byte) []byte {
buf := &bytes.Buffer{}
w := bitio.NewWriter(buf)
for _, b := range msg {
index := h.IncrementSymbol(b)
code := codes[index]
lo.Must0(w.WriteBits(uint64(code.value), uint8(code.bits)))
}
lo.Must0(w.Close())
return buf.Bytes()
}

10
go.mod Normal file
View File

@@ -0,0 +1,10 @@
module github.com/securemesh/coding
go 1.21.5
require (
github.com/icza/bitio v1.1.0
github.com/samber/lo v1.39.0
)
require golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect

8
go.sum Normal file
View File

@@ -0,0 +1,8 @@
github.com/icza/bitio v1.1.0 h1:ysX4vtldjdi3Ygai5m1cWy4oLkhWTAi+SyO6HC8L9T0=
github.com/icza/bitio v1.1.0/go.mod h1:0jGnlLAx8MKMr9VGnn/4YrvZiprkvBelsVIbA9Jjr9A=
github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6 h1:8UsGZ2rr2ksmEru6lToqnXgA8Mz1DP11X4zSJ159C3k=
github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6/go.mod h1:xQig96I1VNBDIWGCdTt54nHt6EeI639SmHycLYL7FkA=
github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA=
github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM=
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE=

60
heap.go Normal file
View File

@@ -0,0 +1,60 @@
package coding
import (
"maps"
)
type node struct {
symbol byte
count int
}
type Heap struct {
nodes [256]node
bySymbol map[byte]int
}
func NewHeap() *Heap {
h := &Heap{
bySymbol: map[byte]int{},
}
for i := 0; i < 256; i++ {
h.nodes[i].symbol = byte(i)
h.bySymbol[byte(i)] = i
}
return h
}
func (h Heap) Clone() *Heap {
return &Heap{
nodes: h.nodes,
bySymbol: maps.Clone(h.bySymbol),
}
}
func (h *Heap) IncrementSymbol(symbol byte) int {
nodeIndex := h.bySymbol[symbol]
h.nodes[nodeIndex].count++
iterIndex := nodeIndex
for iterIndex != 0 {
parentIndex := h.parentIndex(iterIndex)
if h.nodes[iterIndex].count <= h.nodes[parentIndex].count {
break
}
h.nodes[iterIndex], h.nodes[parentIndex] = h.nodes[parentIndex], h.nodes[iterIndex]
h.bySymbol[h.nodes[iterIndex].symbol] = iterIndex
h.bySymbol[h.nodes[parentIndex].symbol] = parentIndex
iterIndex = parentIndex
}
return nodeIndex
}
func (h Heap) parentIndex(nodeIndex int) int {
return (nodeIndex - 1) / 2
}

37
seeds.go Normal file
View File

@@ -0,0 +1,37 @@
package coding
var chatHeap = newHeapFromSeed([][]byte{
[]byte(`]\_}`),
[]byte(`[ê%=Z`),
[]byte(`#ÄQ<>`),
[]byte(`&X@+*`),
[]byte(`$~"V;`),
[]byte(`/78q9`),
[]byte("zRE54F(U-6\n"),
[]byte(`NLx:C01D2BJ)K3GP`),
[]byte(`STWH!OYAjM`),
[]byte(`?,'`),
[]byte(`bIv`),
[]byte(`mygwc.pfk`),
[]byte(`isrhlud`),
[]byte(`eotan`),
[]byte(` `),
})
func ChatHeap() *Heap {
return chatHeap.Clone()
}
func newHeapFromSeed(seed [][]byte) *Heap {
h := NewHeap()
for i := range seed {
for _, s := range seed[i:] {
for _, b := range s {
h.IncrementSymbol(b)
}
}
}
return h
}

5072
sms.txt Normal file

File diff suppressed because it is too large Load Diff