Zero-copy encode: pack response body in place, single shared tx_buf, drain rx loop

This commit is contained in:
Ian Gulliver
2026-04-10 22:48:28 +09:00
parent 58db392bf3
commit 8408603390
8 changed files with 98 additions and 57 deletions

View File

@@ -1,6 +1,7 @@
#pragma once #pragma once
#include <cstdint> #include <cstdint>
#include <cstddef> #include <cstddef>
#include <span>
namespace halfsiphash { namespace halfsiphash {
@@ -34,7 +35,7 @@ inline void sipround(uint32_t &v0, uint32_t &v1, uint32_t &v2, uint32_t &v3) {
} // namespace detail } // namespace detail
// Compute HalfSipHash-2-4 with an 8-byte key, returning a 32-bit hash. // Compute HalfSipHash-2-4 with an 8-byte key, returning a 32-bit hash.
inline uint32_t hash32(const uint8_t *data, size_t len, const uint8_t key[8]) { inline uint32_t hash32(std::span<const uint8_t> data, const uint8_t key[8]) {
using namespace detail; using namespace detail;
uint32_t k0 = load_le32(key); uint32_t k0 = load_le32(key);
@@ -45,8 +46,8 @@ inline uint32_t hash32(const uint8_t *data, size_t len, const uint8_t key[8]) {
uint32_t v2 = UINT32_C(0x6c796765) ^ k0; uint32_t v2 = UINT32_C(0x6c796765) ^ k0;
uint32_t v3 = UINT32_C(0x74656462) ^ k1; uint32_t v3 = UINT32_C(0x74656462) ^ k1;
const uint8_t *end = data + len - (len % 4); const uint8_t *end = data.data() + data.size() - (data.size() % 4);
for (const uint8_t *p = data; p != end; p += 4) { for (const uint8_t *p = data.data(); p != end; p += 4) {
uint32_t m = load_le32(p); uint32_t m = load_le32(p);
v3 ^= m; v3 ^= m;
sipround(v0, v1, v2, v3); sipround(v0, v1, v2, v3);
@@ -54,8 +55,8 @@ inline uint32_t hash32(const uint8_t *data, size_t len, const uint8_t key[8]) {
v0 ^= m; v0 ^= m;
} }
uint32_t b = static_cast<uint32_t>(len) << 24; uint32_t b = static_cast<uint32_t>(data.size()) << 24;
switch (len & 3) { switch (data.size() & 3) {
case 3: b |= static_cast<uint32_t>(end[2]) << 16; [[fallthrough]]; case 3: b |= static_cast<uint32_t>(end[2]) << 16; [[fallthrough]];
case 2: b |= static_cast<uint32_t>(end[1]) << 8; [[fallthrough]]; case 2: b |= static_cast<uint32_t>(end[1]) << 8; [[fallthrough]];
case 1: b |= static_cast<uint32_t>(end[0]); break; case 1: b |= static_cast<uint32_t>(end[0]); break;

View File

@@ -236,6 +236,12 @@ public:
return *this; return *this;
} }
pack_result pack_uint32_fixed(uint32_t n) {
m_buf.push_back(format::UINT32);
push_big_endian(n);
return *this;
}
pack_result pack_float(float n) { pack_result pack_float(float n) {
m_buf.push_back(format::FLOAT32); m_buf.push_back(format::FLOAT32);
push_big_endian(n); push_big_endian(n);
@@ -322,6 +328,19 @@ public:
return *this; return *this;
} }
pack_result pack_ext16_header(char type, uint16_t len) {
m_buf.push_back(format::EXT16);
push_big_endian(len);
m_buf.push_back(static_cast<uint8_t>(type));
return *this;
}
pack_result pack_bin16_header(uint16_t len) {
m_buf.push_back(format::BIN16);
push_big_endian(len);
return *this;
}
template <class Range> template <class Range>
pack_result pack_ext(char type, const Range &r) { pack_result pack_ext(char type, const Range &r) {
auto sz = static_cast<size_t>(std::distance(std::begin(r), std::end(r))); auto sz = static_cast<size_t>(std::distance(std::begin(r), std::end(r)));

View File

@@ -15,4 +15,4 @@ using net_handler = std::function<size_t(std::span<const uint8_t> payload, span_
bool net_init(); bool net_init();
const net_state& net_get_state(); const net_state& net_get_state();
void net_set_handler(net_handler handler); void net_set_handler(net_handler handler);
void net_poll(); void net_poll(std::span<uint8_t> tx);

View File

@@ -2,6 +2,7 @@
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>
#include <span>
class span_writer { class span_writer {
uint8_t *m_data; uint8_t *m_data;
@@ -10,6 +11,7 @@ class span_writer {
public: public:
span_writer(uint8_t *data, size_t capacity) : m_data(data), m_capacity(capacity) {} span_writer(uint8_t *data, size_t capacity) : m_data(data), m_capacity(capacity) {}
span_writer(std::span<uint8_t> buf) : m_data(buf.data()), m_capacity(buf.size()) {}
void push_back(uint8_t v) { void push_back(uint8_t v) {
if (m_size < m_capacity) m_data[m_size++] = v; if (m_size < m_capacity) m_data[m_size++] = v;
@@ -32,4 +34,12 @@ public:
uint8_t *end() { return m_data + m_size; } uint8_t *end() { return m_data + m_size; }
const uint8_t *begin() const { return m_data; } const uint8_t *begin() const { return m_data; }
const uint8_t *end() const { return m_data + m_size; } const uint8_t *end() const { return m_data + m_size; }
span_writer subspan(size_t offset) {
return span_writer(m_data + offset, m_capacity - offset);
}
span_writer subspan(size_t offset, size_t len) {
return span_writer(m_data + offset, len);
}
}; };

View File

@@ -97,27 +97,39 @@ struct DecodedMessage {
std::vector<uint8_t> payload; std::vector<uint8_t> payload;
}; };
inline size_t pack_envelope_into(span_writer &out, uint32_t message_id, const uint8_t *payload, size_t payload_len) { static constexpr size_t ext16_header_len = 4;
uint32_t checksum = halfsiphash::hash32(payload, payload_len, hash_key); static constexpr size_t array3_header_len = 1;
uint8_t env_buf[512]; static constexpr size_t uint32_fixed_len = 5;
span_writer env_body(env_buf, sizeof(env_buf)); static constexpr size_t bin16_header_len = 3;
msgpack::packer env_p(env_body);
env_p.pack_array(3); static constexpr size_t envelope_hdr_len =
env_p.pack(message_id); ext16_header_len + array3_header_len +
env_p.pack(checksum); uint32_fixed_len + uint32_fixed_len + bin16_header_len;
env_p.pack_bin(std::span<const uint8_t>{payload, payload_len});
msgpack::packer outer(out); static constexpr size_t response_prefix_len = envelope_hdr_len + ext16_header_len;
outer.pack_ext(Envelope::ext_id, env_body);
return out.size();
}
template <typename T> template <typename T>
inline size_t encode_response_into(span_writer &out, uint32_t message_id, const T &msg) { inline size_t encode_response_into(span_writer &out, uint32_t message_id, const T &msg) {
uint8_t inner_buf[256]; auto body = out.subspan(response_prefix_len);
msgpack::packer inner(inner_buf, sizeof(inner_buf)); msgpack::packer body_p(body);
inner.pack(msg); body_p.pack(msg.as_tuple());
auto &pl = inner.get_payload();
return pack_envelope_into(out, message_id, pl.data(), pl.size()); auto inner_ext = out.subspan(envelope_hdr_len, ext16_header_len);
msgpack::packer(inner_ext).pack_ext16_header(T::ext_id, static_cast<uint16_t>(body.size()));
size_t bin_len = inner_ext.size() + body.size();
uint32_t checksum = halfsiphash::hash32({inner_ext.data(), bin_len}, hash_key);
auto env_hdr = out.subspan(0, envelope_hdr_len);
size_t env_body_len = array3_header_len + uint32_fixed_len + uint32_fixed_len + bin16_header_len + bin_len;
msgpack::packer hdr(env_hdr);
hdr.pack_ext16_header(Envelope::ext_id, static_cast<uint16_t>(env_body_len));
hdr.pack_array(3);
hdr.pack_uint32_fixed(message_id);
hdr.pack_uint32_fixed(checksum);
hdr.pack_bin16_header(static_cast<uint16_t>(bin_len));
return response_prefix_len + body.size();
} }
inline msgpack::result<DecodedMessage> try_decode(const uint8_t *data, size_t len) { inline msgpack::result<DecodedMessage> try_decode(const uint8_t *data, size_t len) {
@@ -127,7 +139,7 @@ inline msgpack::result<DecodedMessage> try_decode(const uint8_t *data, size_t le
auto r = msgpack::unpack(p, env); auto r = msgpack::unpack(p, env);
if (!r) return std::unexpected(r.error()); if (!r) return std::unexpected(r.error());
uint32_t expected = halfsiphash::hash32(env.payload.data(), env.payload.size(), hash_key); uint32_t expected = halfsiphash::hash32(env.payload, hash_key);
if (env.checksum != expected) return std::unexpected(msgpack::error_code::invalid); if (env.checksum != expected) return std::unexpected(msgpack::error_code::invalid);
msgpack::parser inner(env.payload.data(), static_cast<int>(env.payload.size())); msgpack::parser inner(env.payload.data(), static_cast<int>(env.payload.size()));
@@ -154,7 +166,7 @@ inline msgpack::result<T> decode_response(const uint8_t *data, size_t len) {
auto r = msgpack::unpack(p, env); auto r = msgpack::unpack(p, env);
if (!r) return std::unexpected(r.error()); if (!r) return std::unexpected(r.error());
uint32_t expected = halfsiphash::hash32(env.payload.data(), env.payload.size(), hash_key); uint32_t expected = halfsiphash::hash32(env.payload, hash_key);
if (env.checksum != expected) return std::unexpected(msgpack::error_code::invalid); if (env.checksum != expected) return std::unexpected(msgpack::error_code::invalid);
msgpack::parser inner(env.payload.data(), static_cast<int>(env.payload.size())); msgpack::parser inner(env.payload.data(), static_cast<int>(env.payload.size()));

View File

@@ -29,7 +29,7 @@ void dispatch_schedule_ms(uint32_t ms, std::function<void()> fn) {
static usb_cdc usb; static usb_cdc usb;
static static_vector<uint8_t, 256> usb_rx_buf; static static_vector<uint8_t, 256> usb_rx_buf;
static uint8_t tx_buf[1514]; static std::array<uint8_t, 1514> tx_buf;
net_set_handler([&](std::span<const uint8_t> payload, span_writer &out) -> size_t { net_set_handler([&](std::span<const uint8_t> payload, span_writer &out) -> size_t {
auto msg = try_decode(payload.data(), payload.size()); auto msg = try_decode(payload.data(), payload.size());
@@ -45,7 +45,7 @@ void dispatch_schedule_ms(uint32_t ms, std::function<void()> fn) {
dlog_if_slow("tud_task", 1000, [&]{ tud_task(); }); dlog_if_slow("tud_task", 1000, [&]{ tud_task(); });
dlog_if_slow("drain", 1000, [&]{ usb.drain(); }); dlog_if_slow("drain", 1000, [&]{ usb.drain(); });
dlog_if_slow("timers", 1000, [&]{ timers.run(); }); dlog_if_slow("timers", 1000, [&]{ timers.run(); });
dlog_if_slow("net_poll", 1000, [&]{ net_poll(); }); dlog_if_slow("net_poll", 1000, [&]{ net_poll(std::span{tx_buf}); });
while (tud_cdc_available()) { while (tud_cdc_available()) {
uint8_t byte; uint8_t byte;
@@ -63,16 +63,16 @@ void dispatch_schedule_ms(uint32_t ms, std::function<void()> fn) {
auto it = handler_map.find(msg->type_id); auto it = handler_map.find(msg->type_id);
if (it != handler_map.end()) { if (it != handler_map.end()) {
span_writer out(tx_buf, sizeof(tx_buf)); span_writer out(tx_buf);
size_t resp_len = it->second(msg->message_id, msg->payload, out); size_t resp_len = it->second(msg->message_id, msg->payload, out);
if (resp_len > 0) { if (resp_len > 0) {
if (resp_len > usb.tx.free()) { if (resp_len > usb.tx.free()) {
span_writer err_out(tx_buf, sizeof(tx_buf)); span_writer err_out(tx_buf);
size_t err_len = encode_response_into(err_out, msg->message_id, size_t err_len = encode_response_into(err_out, msg->message_id,
DeviceError{2, "response too large: " + std::to_string(resp_len)}); DeviceError{2, "response too large: " + std::to_string(resp_len)});
usb.send(std::span<const uint8_t>{tx_buf, err_len}); usb.send(std::span<const uint8_t>{tx_buf.data(), err_len});
} else { } else {
usb.send(std::span<const uint8_t>{tx_buf, resp_len}); usb.send(std::span<const uint8_t>{tx_buf.data(), resp_len});
} }
} }
} }

View File

@@ -139,9 +139,7 @@ static void handle_arp(const uint8_t* frame, size_t len) {
send_raw(&reply, sizeof(reply)); send_raw(&reply, sizeof(reply));
} }
static uint8_t tx_buf[1514]; static void handle_udp(const uint8_t* frame, size_t len, span_writer &tx) {
static void handle_udp(const uint8_t* frame, size_t len) {
if (len < sizeof(udp_header)) return; if (len < sizeof(udp_header)) return;
auto& pkt = *reinterpret_cast<const udp_header*>(frame); auto& pkt = *reinterpret_cast<const udp_header*>(frame);
@@ -156,14 +154,14 @@ static void handle_udp(const uint8_t* frame, size_t len) {
auto* payload = frame + sizeof(udp_header); auto* payload = frame + sizeof(udp_header);
size_t payload_len = udp_len - 8; size_t payload_len = udp_len - 8;
span_writer resp(tx_buf + sizeof(udp_header), sizeof(tx_buf) - sizeof(udp_header)); auto resp = tx.subspan(sizeof(udp_header));
size_t resp_len = msg_handler(std::span<const uint8_t>{payload, payload_len}, resp); size_t resp_len = msg_handler(std::span<const uint8_t>{payload, payload_len}, resp);
if (resp_len == 0) return; if (resp_len == 0) return;
size_t ip_total = 20 + 8 + resp_len; size_t ip_total = 20 + 8 + resp_len;
size_t reply_len = sizeof(eth_header) + ip_total; size_t reply_len = sizeof(eth_header) + ip_total;
auto& rip = *reinterpret_cast<ipv4_header*>(tx_buf); auto& rip = *reinterpret_cast<ipv4_header*>(tx.data());
rip.eth.dst = pkt.ip.eth.src; rip.eth.dst = pkt.ip.eth.src;
rip.eth.src = state.mac; rip.eth.src = state.mac;
rip.eth.ethertype = ETH_IPV4; rip.eth.ethertype = ETH_IPV4;
@@ -179,16 +177,16 @@ static void handle_udp(const uint8_t* frame, size_t len) {
rip.dst = pkt.ip.src; rip.dst = pkt.ip.src;
rip.checksum = ip_checksum(rip.ip_start(), 20); rip.checksum = ip_checksum(rip.ip_start(), 20);
auto& rudp = *reinterpret_cast<udp_header*>(tx_buf); auto& rudp = *reinterpret_cast<udp_header*>(tx.data());
rudp.src_port = PICOMAP_PORT; rudp.src_port = PICOMAP_PORT;
rudp.dst_port = pkt.src_port; rudp.dst_port = pkt.src_port;
rudp.length = __builtin_bswap16(8 + resp_len); rudp.length = __builtin_bswap16(8 + resp_len);
rudp.checksum = 0; rudp.checksum = 0;
send_raw(tx_buf, reply_len); send_raw(tx.data(), reply_len);
} }
static void handle_icmp(const uint8_t* frame, size_t len) { static void handle_icmp(const uint8_t* frame, size_t len, span_writer &tx) {
auto& ip = *reinterpret_cast<const ipv4_header*>(frame); auto& ip = *reinterpret_cast<const ipv4_header*>(frame);
size_t ip_hdr_len = ip.ip_header_len(); size_t ip_hdr_len = ip.ip_header_len();
size_t ip_total = ip.ip_total_len(); size_t ip_total = ip.ip_total_len();
@@ -202,12 +200,11 @@ static void handle_icmp(const uint8_t* frame, size_t len) {
if (icmp_len < sizeof(icmp_echo)) return; if (icmp_len < sizeof(icmp_echo)) return;
if (icmp.type != 8) return; if (icmp.type != 8) return;
uint8_t reply_buf[1514];
size_t reply_len = sizeof(eth_header) + ip_total; size_t reply_len = sizeof(eth_header) + ip_total;
if (reply_len > sizeof(reply_buf)) return; if (reply_len > tx.capacity()) return;
memcpy(reply_buf, frame, reply_len); memcpy(tx.data(), frame, reply_len);
auto& rip = *reinterpret_cast<ipv4_header*>(reply_buf); auto& rip = *reinterpret_cast<ipv4_header*>(tx.data());
rip.eth.dst = ip.eth.src; rip.eth.dst = ip.eth.src;
rip.eth.src = state.mac; rip.eth.src = state.mac;
rip.src = state.ip; rip.src = state.ip;
@@ -216,30 +213,30 @@ static void handle_icmp(const uint8_t* frame, size_t len) {
rip.checksum = 0; rip.checksum = 0;
rip.checksum = ip_checksum(rip.ip_start(), ip_hdr_len); rip.checksum = ip_checksum(rip.ip_start(), ip_hdr_len);
auto& ricmp = *reinterpret_cast<icmp_echo*>(reply_buf + sizeof(eth_header) + ip_hdr_len); auto& ricmp = *reinterpret_cast<icmp_echo*>(tx.data() + sizeof(eth_header) + ip_hdr_len);
ricmp.type = 0; ricmp.type = 0;
ricmp.checksum = 0; ricmp.checksum = 0;
ricmp.checksum = ip_checksum(&ricmp, icmp_len); ricmp.checksum = ip_checksum(&ricmp, icmp_len);
send_raw(reply_buf, reply_len); send_raw(tx.data(), reply_len);
} }
static void handle_ipv4(const uint8_t* frame, size_t len) { static void handle_ipv4(const uint8_t* frame, size_t len, span_writer &tx) {
if (len < sizeof(ipv4_header)) return; if (len < sizeof(ipv4_header)) return;
auto& ip = *reinterpret_cast<const ipv4_header*>(frame); auto& ip = *reinterpret_cast<const ipv4_header*>(frame);
if ((ip.ver_ihl >> 4) != 4) return; if ((ip.ver_ihl >> 4) != 4) return;
switch (ip.protocol) { switch (ip.protocol) {
case 1: case 1:
handle_icmp(frame, len); handle_icmp(frame, len, tx);
break; break;
case 17: case 17:
handle_udp(frame, len); handle_udp(frame, len, tx);
break; break;
} }
} }
static void process_frame(const uint8_t* frame, size_t len) { static void process_frame(const uint8_t* frame, size_t len, span_writer &tx) {
if (len < sizeof(eth_header)) return; if (len < sizeof(eth_header)) return;
auto& eth = *reinterpret_cast<const eth_header*>(frame); auto& eth = *reinterpret_cast<const eth_header*>(frame);
@@ -250,7 +247,7 @@ static void process_frame(const uint8_t* frame, size_t len) {
handle_arp(frame, len); handle_arp(frame, len);
break; break;
case ETH_IPV4: case ETH_IPV4:
handle_ipv4(frame, len); handle_ipv4(frame, len, tx);
break; break;
} }
} }
@@ -289,13 +286,15 @@ void net_set_handler(net_handler handler) {
msg_handler = std::move(handler); msg_handler = std::move(handler);
} }
void net_poll() { void net_poll(std::span<uint8_t> tx) {
if (!w6300::irq_pending) return; if (!w6300::irq_pending) return;
w6300::irq_pending = false; w6300::irq_pending = false;
w6300::clear_interrupt(w6300::ik_int_all); w6300::clear_interrupt(w6300::ik_int_all);
if (w6300::get_socket_recv_buf(raw_socket) == 0) return;
static uint8_t rx_buf[1518]; static uint8_t rx_buf[1518];
auto result = w6300::recv(raw_socket, std::span{rx_buf}); while (w6300::get_socket_recv_buf(raw_socket) > 0) {
if (!result) return; auto result = w6300::recv(raw_socket, std::span{rx_buf});
process_frame(rx_buf, *result); if (!result) break;
span_writer tx_writer(tx);
process_frame(rx_buf, *result, tx_writer);
}
} }

View File

@@ -29,7 +29,7 @@ static ResponseTest test_discovery() {
ResponseTest resp; ResponseTest resp;
resp.pass = true; resp.pass = true;
uint8_t req_buf[256]; uint8_t req_buf[1514];
span_writer req_out(req_buf, sizeof(req_buf)); span_writer req_out(req_buf, sizeof(req_buf));
size_t req_len = encode_request_into(req_out, 0, RequestInfo{}); size_t req_len = encode_request_into(req_out, 0, RequestInfo{});
auto send_result = w6300::send(test_socket, std::span<const uint8_t>{req_buf, req_len}); auto send_result = w6300::send(test_socket, std::span<const uint8_t>{req_buf, req_len});