extract msgpack into its own static library; move non-templated bodies to msgpack.cpp; introduce util INTERFACE lib for span_writer

This commit is contained in:
Ian Gulliver
2026-05-01 10:43:29 -07:00
parent 7000c2e825
commit 1e97058b9b
6 changed files with 477 additions and 409 deletions

493
msgpack/msgpack.h Normal file
View File

@@ -0,0 +1,493 @@
#pragma once
#include <algorithm>
#include <array>
#include <cstdint>
#include <expected>
#include <iterator>
#include <limits>
#include <span>
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
#include "span_writer.h"
namespace msgpack {
enum class error_code {
overflow,
empty,
lack,
invalid,
type_error,
};
namespace format {
constexpr uint8_t POSITIVE_FIXINT_MIN = 0x00;
constexpr uint8_t POSITIVE_FIXINT_MAX = 0x7F;
constexpr uint8_t FIXMAP_MIN = 0x80;
constexpr uint8_t FIXMAP_MAX = 0x8F;
constexpr uint8_t FIXARRAY_MIN = 0x90;
constexpr uint8_t FIXARRAY_MAX = 0x9F;
constexpr uint8_t FIXSTR_MIN = 0xA0;
constexpr uint8_t FIXSTR_MAX = 0xBF;
constexpr uint8_t NEGATIVE_FIXINT_MIN = 0xE0;
constexpr uint8_t NEGATIVE_FIXINT_MAX = 0xFF;
constexpr uint8_t NIL = 0xC0;
constexpr uint8_t NEVER_USED = 0xC1;
constexpr uint8_t FALSE = 0xC2;
constexpr uint8_t TRUE = 0xC3;
constexpr uint8_t BIN8 = 0xC4;
constexpr uint8_t BIN16 = 0xC5;
constexpr uint8_t BIN32 = 0xC6;
constexpr uint8_t EXT8 = 0xC7;
constexpr uint8_t EXT16 = 0xC8;
constexpr uint8_t EXT32 = 0xC9;
constexpr uint8_t FLOAT32 = 0xCA;
constexpr uint8_t FLOAT64 = 0xCB;
constexpr uint8_t UINT8 = 0xCC;
constexpr uint8_t UINT16 = 0xCD;
constexpr uint8_t UINT32 = 0xCE;
constexpr uint8_t UINT64 = 0xCF;
constexpr uint8_t INT8 = 0xD0;
constexpr uint8_t INT16 = 0xD1;
constexpr uint8_t INT32 = 0xD2;
constexpr uint8_t INT64 = 0xD3;
constexpr uint8_t FIXEXT1 = 0xD4;
constexpr uint8_t FIXEXT2 = 0xD5;
constexpr uint8_t FIXEXT4 = 0xD6;
constexpr uint8_t FIXEXT8 = 0xD7;
constexpr uint8_t FIXEXT16 = 0xD8;
constexpr uint8_t STR8 = 0xD9;
constexpr uint8_t STR16 = 0xDA;
constexpr uint8_t STR32 = 0xDB;
constexpr uint8_t ARRAY16 = 0xDC;
constexpr uint8_t ARRAY32 = 0xDD;
constexpr uint8_t MAP16 = 0xDE;
constexpr uint8_t MAP32 = 0xDF;
constexpr bool is_positive_fixint(uint8_t b) { return b <= POSITIVE_FIXINT_MAX; }
constexpr bool is_fixmap(uint8_t b) { return b >= FIXMAP_MIN && b <= FIXMAP_MAX; }
constexpr bool is_fixarray(uint8_t b) { return b >= FIXARRAY_MIN && b <= FIXARRAY_MAX; }
constexpr bool is_fixstr(uint8_t b) { return b >= FIXSTR_MIN && b <= FIXSTR_MAX; }
constexpr bool is_negative_fixint(uint8_t b) { return b >= NEGATIVE_FIXINT_MIN; }
} // namespace format
template <typename T>
using result = std::expected<T, error_code>;
template <typename T>
result<T> body_number(const uint8_t *p, int size) {
if (size < 1 + static_cast<int>(sizeof(T))) {
return std::unexpected(error_code::lack);
}
if constexpr (sizeof(T) == 1) {
return static_cast<T>(p[1]);
} else if constexpr (sizeof(T) == 2) {
return static_cast<T>((p[1] << 8) | p[2]);
} else if constexpr (sizeof(T) == 4) {
uint8_t buf[] = {p[4], p[3], p[2], p[1]};
T val;
__builtin_memcpy(&val, buf, sizeof(T));
return val;
} else if constexpr (sizeof(T) == 8) {
uint8_t buf[] = {p[8], p[7], p[6], p[5], p[4], p[3], p[2], p[1]};
T val;
__builtin_memcpy(&val, buf, sizeof(T));
return val;
} else {
return std::unexpected(error_code::invalid);
}
}
struct body_info {
int header;
uint32_t body;
};
result<body_info> get_body_info(const uint8_t *p, int size);
class packer {
private:
span_writer &m_buf;
template <typename T> void push_big_endian(T n) {
auto p = reinterpret_cast<std::uint8_t *>(&n) + (sizeof(T) - 1);
for (size_t i = 0; i < sizeof(T); ++i, --p) {
m_buf.push_back(*p);
}
}
template <class Range> void push(const Range &r) {
m_buf.insert(m_buf.end(), std::begin(r), std::end(r));
}
public:
packer(span_writer &buf) : m_buf(buf) {}
packer(const packer &) = delete;
packer &operator=(const packer &) = delete;
using pack_result = result<std::reference_wrapper<packer>>;
pack_result pack_nil();
pack_result pack_bool(bool v);
template <typename T>
pack_result pack_integer(T n) {
if constexpr (std::is_signed_v<T>) {
if (n >= 0 && n <= 0x7F) {
m_buf.push_back(static_cast<uint8_t>(n));
} else if (n >= -32 && n < 0) {
m_buf.push_back(static_cast<uint8_t>(n));
} else if (n >= std::numeric_limits<int8_t>::min() && n <= std::numeric_limits<int8_t>::max()) {
m_buf.push_back(format::INT8);
m_buf.push_back(static_cast<uint8_t>(n));
} else if (n >= std::numeric_limits<int16_t>::min() && n <= std::numeric_limits<int16_t>::max()) {
m_buf.push_back(format::INT16);
push_big_endian(static_cast<int16_t>(n));
} else if (n >= std::numeric_limits<int32_t>::min() && n <= std::numeric_limits<int32_t>::max()) {
m_buf.push_back(format::INT32);
push_big_endian(static_cast<int32_t>(n));
} else {
m_buf.push_back(format::INT64);
push_big_endian(static_cast<int64_t>(n));
}
} else {
if (n <= 0x7F) {
m_buf.push_back(static_cast<uint8_t>(n));
} else if (n <= std::numeric_limits<uint8_t>::max()) {
m_buf.push_back(format::UINT8);
m_buf.push_back(static_cast<uint8_t>(n));
} else if (n <= std::numeric_limits<uint16_t>::max()) {
m_buf.push_back(format::UINT16);
push_big_endian(static_cast<uint16_t>(n));
} else if (n <= std::numeric_limits<uint32_t>::max()) {
m_buf.push_back(format::UINT32);
push_big_endian(static_cast<uint32_t>(n));
} else {
m_buf.push_back(format::UINT64);
push_big_endian(static_cast<uint64_t>(n));
}
}
return *this;
}
pack_result pack_uint32_fixed(uint32_t n);
pack_result pack_float(float n);
pack_result pack_double(double n);
template <class Range>
pack_result pack_str(const Range &r) {
auto sz = static_cast<size_t>(std::distance(std::begin(r), std::end(r)));
if (sz < 32) {
m_buf.push_back(format::FIXSTR_MIN | static_cast<uint8_t>(sz));
} else if (sz <= std::numeric_limits<uint8_t>::max()) {
m_buf.push_back(format::STR8);
m_buf.push_back(static_cast<uint8_t>(sz));
} else if (sz <= std::numeric_limits<uint16_t>::max()) {
m_buf.push_back(format::STR16);
push_big_endian(static_cast<uint16_t>(sz));
} else if (sz <= std::numeric_limits<uint32_t>::max()) {
m_buf.push_back(format::STR32);
push_big_endian(static_cast<uint32_t>(sz));
} else {
return std::unexpected(error_code::overflow);
}
push(r);
return *this;
}
pack_result pack_str(const char *s);
template <class Range>
pack_result pack_bin(const Range &r) {
auto sz = static_cast<size_t>(std::distance(std::begin(r), std::end(r)));
if (sz <= std::numeric_limits<uint8_t>::max()) {
m_buf.push_back(format::BIN8);
m_buf.push_back(static_cast<uint8_t>(sz));
} else if (sz <= std::numeric_limits<uint16_t>::max()) {
m_buf.push_back(format::BIN16);
push_big_endian(static_cast<uint16_t>(sz));
} else if (sz <= std::numeric_limits<uint32_t>::max()) {
m_buf.push_back(format::BIN32);
push_big_endian(static_cast<uint32_t>(sz));
} else {
return std::unexpected(error_code::overflow);
}
push(r);
return *this;
}
pack_result pack_array(size_t n);
pack_result pack_map(size_t n);
pack_result pack_ext16_header(char type, uint16_t len);
pack_result pack_bin16_header(uint16_t len);
template <class Range>
pack_result pack_ext(char type, const Range &r) {
auto sz = static_cast<size_t>(std::distance(std::begin(r), std::end(r)));
switch (sz) {
case 1: m_buf.push_back(format::FIXEXT1); break;
case 2: m_buf.push_back(format::FIXEXT2); break;
case 4: m_buf.push_back(format::FIXEXT4); break;
case 8: m_buf.push_back(format::FIXEXT8); break;
case 16: m_buf.push_back(format::FIXEXT16); break;
default:
if (sz <= std::numeric_limits<uint8_t>::max()) {
m_buf.push_back(format::EXT8);
m_buf.push_back(static_cast<uint8_t>(sz));
} else if (sz <= std::numeric_limits<uint16_t>::max()) {
m_buf.push_back(format::EXT16);
push_big_endian(static_cast<uint16_t>(sz));
} else if (sz <= std::numeric_limits<uint32_t>::max()) {
m_buf.push_back(format::EXT32);
push_big_endian(static_cast<uint32_t>(sz));
} else {
return std::unexpected(error_code::overflow);
}
}
m_buf.push_back(static_cast<uint8_t>(type));
push(r);
return *this;
}
template <typename T>
requires std::is_integral_v<T> && (!std::is_same_v<T, bool>)
pack_result pack(T n) { return pack_integer(n); }
template <typename T>
requires std::is_enum_v<T>
pack_result pack(T v) { return pack_integer(static_cast<std::underlying_type_t<T>>(v)); }
pack_result pack(bool v);
pack_result pack(float v);
pack_result pack(double v);
pack_result pack(const char *v);
pack_result pack(std::string_view v);
pack_result pack(const std::string &v);
pack_result pack(const std::vector<uint8_t> &v);
template <typename T>
requires (!std::is_same_v<T, uint8_t>)
pack_result pack(const std::vector<T> &v) {
auto r = pack_array(v.size());
if (!r) return r;
for (auto& elem : v) {
r = r->get().pack(elem);
if (!r) return r;
}
return r;
}
template <size_t N>
pack_result pack(const std::array<uint8_t, N> &v) { return pack_bin(v); }
template <typename... Ts>
pack_result pack(const std::tuple<Ts...> &t) {
auto r = pack_array(sizeof...(Ts));
if (!r) return r;
return pack_tuple_elements(t, std::index_sequence_for<Ts...>{});
}
template <typename T>
requires requires(const T &v) { { T::ext_id } -> std::convertible_to<int8_t>; v.as_tuple(); }
pack_result pack(const T &v) {
uint8_t ext_buf[256];
span_writer ext_writer(ext_buf, sizeof(ext_buf));
packer inner(ext_writer);
auto r = inner.pack(v.as_tuple());
if (!r) return r;
return pack_ext(T::ext_id, inner.get_payload());
}
template <typename T>
requires (requires(const T &v) { v.as_tuple(); } && !requires { { T::ext_id } -> std::convertible_to<int8_t>; })
pack_result pack(const T &v) {
return pack(v.as_tuple());
}
private:
template <typename Tuple, size_t... Is>
pack_result pack_tuple_elements(const Tuple &t, std::index_sequence<Is...>) {
pack_result r = *this;
((r = r ? r->get().pack(std::get<Is>(t)) : r), ...);
return r;
}
public:
const span_writer &get_payload() const { return m_buf; }
};
class parser {
const uint8_t *m_p = nullptr;
int m_size = 0;
result<uint8_t> header_byte() const {
if (m_size < 1) return std::unexpected(error_code::empty);
return m_p[0];
}
public:
parser() = default;
parser(const std::vector<uint8_t> &v)
: m_p(v.data()), m_size(static_cast<int>(v.size())) {}
parser(const uint8_t *p, int size)
: m_p(p), m_size(size < 0 ? 0 : size) {}
bool is_empty() const { return m_size == 0; }
const uint8_t *data() const { return m_p; }
int size() const { return m_size; }
result<parser> advance(int n) const {
if (n > m_size) return std::unexpected(error_code::lack);
return parser(m_p + n, m_size - n);
}
result<parser> next() const;
bool is_nil() const;
bool is_bool() const;
bool is_number() const;
bool is_string() const;
bool is_binary() const;
bool is_ext() const;
bool is_array() const;
bool is_map() const;
result<bool> get_bool() const;
result<std::string_view> get_string() const;
result<std::string_view> get_binary_view() const;
result<std::tuple<int8_t, std::string_view>> get_ext() const;
template <typename T>
result<T> get_number() const {
auto h = header_byte();
if (!h) return std::unexpected(h.error());
uint8_t b = *h;
if (format::is_positive_fixint(b)) return static_cast<T>(b);
if (format::is_negative_fixint(b)) return static_cast<T>(static_cast<int8_t>(b));
switch (b) {
case format::UINT8: { auto n = body_number<uint8_t>(m_p, m_size); if (!n) return std::unexpected(n.error()); return static_cast<T>(*n); }
case format::UINT16: { auto n = body_number<uint16_t>(m_p, m_size); if (!n) return std::unexpected(n.error()); return static_cast<T>(*n); }
case format::UINT32: { auto n = body_number<uint32_t>(m_p, m_size); if (!n) return std::unexpected(n.error()); return static_cast<T>(*n); }
case format::UINT64: { auto n = body_number<uint64_t>(m_p, m_size); if (!n) return std::unexpected(n.error()); return static_cast<T>(*n); }
case format::INT8: { auto n = body_number<int8_t>(m_p, m_size); if (!n) return std::unexpected(n.error()); return static_cast<T>(*n); }
case format::INT16: { auto n = body_number<int16_t>(m_p, m_size); if (!n) return std::unexpected(n.error()); return static_cast<T>(*n); }
case format::INT32: { auto n = body_number<int32_t>(m_p, m_size); if (!n) return std::unexpected(n.error()); return static_cast<T>(*n); }
case format::INT64: { auto n = body_number<int64_t>(m_p, m_size); if (!n) return std::unexpected(n.error()); return static_cast<T>(*n); }
case format::FLOAT32: { auto n = body_number<float>(m_p, m_size); if (!n) return std::unexpected(n.error()); return static_cast<T>(*n); }
case format::FLOAT64: { auto n = body_number<double>(m_p, m_size); if (!n) return std::unexpected(n.error()); return static_cast<T>(*n); }
default:
return std::unexpected(error_code::type_error);
}
}
result<uint32_t> count() const;
result<parser> first_item() const;
parser operator[](int index) const;
};
template <typename T>
requires std::is_enum_v<T>
result<parser> unpack(const parser &p, T &out) {
std::underlying_type_t<T> v;
auto r = unpack(p, v);
if (!r) return r;
out = static_cast<T>(v);
return r;
}
template <typename T>
requires std::is_integral_v<T> && (!std::is_same_v<T, bool>)
result<parser> unpack(const parser &p, T &out) {
auto v = p.get_number<T>();
if (!v) return std::unexpected(v.error());
out = *v;
return p.next();
}
result<parser> unpack(const parser &p, bool &out);
result<parser> unpack(const parser &p, std::string_view &out);
result<parser> unpack(const parser &p, std::string &out);
result<parser> unpack(const parser &p, std::vector<uint8_t> &out);
result<parser> unpack(const parser &p, std::span<const uint8_t> &out);
template <size_t N>
result<parser> unpack(const parser &p, std::array<uint8_t, N> &out) {
auto v = p.get_binary_view();
if (!v) return std::unexpected(v.error());
if (v->size() != N) return std::unexpected(error_code::type_error);
std::copy(v->begin(), v->end(), out.begin());
return p.next();
}
template <typename T>
requires (!std::is_same_v<T, uint8_t>)
result<parser> unpack(const parser &p, std::vector<T> &out) {
auto cnt = p.count();
if (!cnt) return std::unexpected(cnt.error());
out.resize(*cnt);
result<parser> cur = p.first_item();
for (size_t i = 0; i < *cnt; i++) {
if (!cur) return cur;
cur = unpack(*cur, out[i]);
}
if (!cur) return cur;
return p.next();
}
template <typename... Ts, size_t... Is>
result<parser> unpack_tuple_elements(const parser &p, std::tuple<Ts...> &t, std::index_sequence<Is...>) {
result<parser> cur = p.first_item();
if (!cur) return cur;
((cur = cur ? unpack(*cur, std::get<Is>(t)) : cur), ...);
return cur;
}
template <typename... Ts>
result<parser> unpack(const parser &p, std::tuple<Ts...> &t) {
auto cnt = p.count();
if (!cnt) return std::unexpected(cnt.error());
if (*cnt != sizeof...(Ts)) return std::unexpected(error_code::type_error);
auto r = unpack_tuple_elements(p, t, std::index_sequence_for<Ts...>{});
if (!r) return r;
return p.next();
}
template <typename T>
requires (requires(T &v) { v.as_tuple(); } && !requires { { T::ext_id } -> std::convertible_to<int8_t>; })
result<parser> unpack(const parser &p, T &out) {
auto tup = out.as_tuple();
auto cnt = p.count();
if (!cnt) return std::unexpected(cnt.error());
if (*cnt != std::tuple_size_v<decltype(tup)>) return std::unexpected(error_code::type_error);
auto r = unpack_tuple_elements(p, tup, std::make_index_sequence<std::tuple_size_v<decltype(tup)>>{});
if (!r) return r;
return p.next();
}
template <typename T>
requires requires(T &v) { { T::ext_id } -> std::convertible_to<int8_t>; v.as_tuple(); }
result<parser> unpack(const parser &p, T &out) {
auto ext = p.get_ext();
if (!ext) return std::unexpected(ext.error());
auto [ext_type, ext_data] = *ext;
if (ext_type != T::ext_id) return std::unexpected(error_code::type_error);
parser inner(reinterpret_cast<const uint8_t *>(ext_data.data()),
static_cast<int>(ext_data.size()));
auto tup = out.as_tuple();
auto r = unpack_tuple_elements(inner, tup, std::make_index_sequence<std::tuple_size_v<decltype(tup)>>{});
if (!r) return r;
return p.next();
}
} // namespace msgpack