Wakeup/delay fixes, and actually add retries for incoming and outgoing.

This commit is contained in:
Ian Gulliver
2016-02-21 16:47:27 -08:00
parent 835a144d6e
commit 8f36ad0db8
4 changed files with 76 additions and 41 deletions

View File

@@ -236,19 +236,12 @@ void rand_fill(void *value, size_t size) {
#define RETRY_MIN_MS 2000
#define RETRY_MAX_MS 64000
#define RETRY_MULT 2
#define RETRY_MAX_JITTER_DIV 2
uint32_t retry_get_delay_ms(uint32_t prev_delay) {
uint32_t delay = prev_delay * RETRY_MULT;
delay = delay < RETRY_MIN_MS ? RETRY_MIN_MS : delay;
delay = delay > RETRY_MAX_MS ? RETRY_MAX_MS : delay;
uint32_t retry_get_delay_ms(uint32_t attempt) {
uint32_t max_delay = RETRY_MIN_MS * (1 << attempt);
max_delay = max_delay > RETRY_MAX_MS ? RETRY_MAX_MS : max_delay;
uint32_t max_jitter = delay / RETRY_MAX_JITTER_DIV;
uint32_t jitter;
rand_fill(&jitter, sizeof(jitter));
delay += jitter % max_jitter;
delay = delay > RETRY_MAX_MS ? RETRY_MAX_MS : delay;
return delay;
return jitter % max_delay;
}

View File

@@ -11,6 +11,7 @@
#include <unistd.h>
#include "common.h"
#include "wakeup.h"
#include "incoming.h"
struct incoming {
@@ -18,6 +19,7 @@ struct incoming {
char id[UUID_LEN];
char *node;
char *service;
uint32_t attempt;
incoming_connection_handler handler;
void *passthrough;
struct incoming *next;
@@ -25,6 +27,15 @@ struct incoming {
static struct incoming *incoming_head = NULL;
static void incoming_resolve_wrapper(struct peer *);
static void incoming_retry(struct incoming *incoming) {
uint32_t delay = retry_get_delay_ms(incoming->attempt++);
fprintf(stderr, "I %s: Will retry in %ds\n", incoming->id, delay / 1000);
incoming->peer.event_handler = incoming_resolve_wrapper;
wakeup_add((struct peer *) incoming, delay);
}
static void incoming_handler(struct peer *peer) {
struct incoming *incoming = (struct incoming *) peer;
@@ -58,24 +69,7 @@ static void incoming_del(struct incoming *incoming) {
free(incoming);
}
void incoming_cleanup() {
struct incoming *iter = incoming_head;
while (iter) {
struct incoming *next = iter->next;
incoming_del(iter);
iter = next;
}
}
void incoming_new(char *node, char *service, incoming_connection_handler handler, void *passthrough) {
struct incoming *incoming = malloc(sizeof(*incoming));
incoming->peer.event_handler = incoming_handler;
uuid_gen(incoming->id);
incoming->node = strdup(node);
incoming->service = strdup(service);
incoming->handler = handler;
incoming->passthrough = passthrough;
static void incoming_resolve(struct incoming *incoming) {
fprintf(stderr, "I %s: Resolving %s/%s...\n", incoming->id, incoming->node, incoming->service);
struct addrinfo hints = {
@@ -88,8 +82,7 @@ void incoming_new(char *node, char *service, incoming_connection_handler handler
int gai_err = getaddrinfo(incoming->node, incoming->service, &hints, &addrs);
if (gai_err) {
fprintf(stderr, "I %s: Failed to resolve %s/%s: %s\n", incoming->id, incoming->node, incoming->service, gai_strerror(gai_err));
// TODO: retry
free(incoming);
incoming_retry(incoming);
return;
}
@@ -119,13 +112,39 @@ void incoming_new(char *node, char *service, incoming_connection_handler handler
if (addr == NULL) {
fprintf(stderr, "I %s: Failed to bind any addresses for %s/%s...\n", incoming->id, incoming->node, incoming->service);
// TODO: retry
free(incoming);
incoming_retry(incoming);
return;
}
incoming->attempt = 0;
peer_epoll_add((struct peer *) incoming, EPOLLIN);
}
static void incoming_resolve_wrapper(struct peer *peer) {
incoming_resolve((struct incoming *) peer);
}
void incoming_cleanup() {
struct incoming *iter = incoming_head;
while (iter) {
struct incoming *next = iter->next;
incoming_del(iter);
iter = next;
}
}
void incoming_new(char *node, char *service, incoming_connection_handler handler, void *passthrough) {
struct incoming *incoming = malloc(sizeof(*incoming));
incoming->peer.event_handler = incoming_handler;
uuid_gen(incoming->id);
incoming->node = strdup(node);
incoming->service = strdup(service);
incoming->attempt = 0;
incoming->handler = handler;
incoming->passthrough = passthrough;
incoming->next = incoming_head;
incoming_head = incoming;
incoming_resolve(incoming);
}

View File

@@ -9,6 +9,7 @@
#include <unistd.h>
#include "common.h"
#include "wakeup.h"
#include "outgoing.h"
struct outgoing {
@@ -18,6 +19,7 @@ struct outgoing {
char *service;
struct addrinfo *addrs;
struct addrinfo *addr;
uint32_t attempt;
outgoing_connection_handler handler;
void *passthrough;
struct outgoing *next;
@@ -27,12 +29,20 @@ static struct outgoing *outgoing_head = NULL;
static void outgoing_connect_result(struct outgoing *, int);
static void outgoing_resolve(struct outgoing *);
static void outgoing_resolve_wrapper(struct peer *);
static void outgoing_retry(struct outgoing *outgoing) {
uint32_t delay = retry_get_delay_ms(outgoing->attempt++);
fprintf(stderr, "O %s: Will retry in %ds\n", outgoing->id, delay / 1000);
outgoing->peer.event_handler = outgoing_resolve_wrapper;
wakeup_add((struct peer *) outgoing, delay);
}
static void outgoing_connect_next(struct outgoing *outgoing) {
if (outgoing->addr == NULL) {
freeaddrinfo(outgoing->addrs);
fprintf(stderr, "O %s: Can't connect to any addresses of %s/%s\n", outgoing->id, outgoing->node, outgoing->service);
// TODO: timed retry
outgoing_retry(outgoing);
return;
}
@@ -73,6 +83,7 @@ static void outgoing_connect_result(struct outgoing *outgoing, int result) {
case 0:
fprintf(stderr, "O %s: Connected to %s/%s\n", outgoing->id, hbuf, sbuf);
freeaddrinfo(outgoing->addrs);
outgoing->attempt = 0;
// We listen for just hangup on this fd. We pass a duplicate to the handler, so our epoll setings are independent.
outgoing->peer.event_handler = outgoing_disconnect_handler;
@@ -107,13 +118,17 @@ static void outgoing_resolve(struct outgoing *outgoing) {
int gai_err = getaddrinfo(outgoing->node, outgoing->service, &hints, &outgoing->addrs);
if (gai_err) {
fprintf(stderr, "O %s: Failed to resolve %s/%s: %s\n", outgoing->id, outgoing->node, outgoing->service, gai_strerror(gai_err));
// TODO: timed retry
outgoing_retry(outgoing);
return;
}
outgoing->addr = outgoing->addrs;
outgoing_connect_next(outgoing);
}
static void outgoing_resolve_wrapper(struct peer *peer) {
outgoing_resolve((struct outgoing *) peer);
}
static void outgoing_del(struct outgoing *outgoing) {
assert(!close(outgoing->peer.fd));
free(outgoing->node);
@@ -135,6 +150,7 @@ void outgoing_new(char *node, char *service, outgoing_connection_handler handler
uuid_gen(outgoing->id);
outgoing->node = strdup(node);
outgoing->service = strdup(service);
outgoing->attempt = 0;
outgoing->handler = handler;
outgoing->passthrough = passthrough;

View File

@@ -36,8 +36,9 @@ static int wakeup_write_fd;
static uint64_t wakeup_get_time_ms() {
struct timespec tp;
assert(!clock_gettime(CLOCK_MONOTONIC_COARSE, &tp));
#define MS_PER_S 1000
#define NS_PER_MS 1000000
return tp.tv_sec + (tp.tv_nsec / NS_PER_MS);
return (tp.tv_sec * MS_PER_S) + (tp.tv_nsec / NS_PER_MS);
}
static void wakeup_request_add(struct wakeup_entry **head, struct wakeup_request *request) {
@@ -79,9 +80,16 @@ static void *wakeup_main(void *arg) {
struct wakeup_entry *head = NULL;
while (1) {
int delay = -1;
if (head) {
// This call is the whole reason we need a separate thread: we want to avoid getting the time inside the tight main thread loop.
delay = head->request.absolute_time_ms - wakeup_get_time_ms();
delay = delay < 0 ? 0 : delay;
}
#define MAX_EVENTS 1
struct epoll_event events[MAX_EVENTS];
int nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, -1);
int nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, delay);
if (nfds == -1 && errno == EINTR) {
continue;
}
@@ -146,10 +154,9 @@ void wakeup_add(struct peer *peer, uint32_t delay_ms) {
int pipefd[2];
assert(!pipe2(pipefd, O_NONBLOCK));
struct wakeup_request request = {
.fd = pipefd[1],
.absolute_time_ms = wakeup_get_time_ms() + delay_ms,
};
struct wakeup_request request = { 0 };
request.fd = pipefd[1],
request.absolute_time_ms = wakeup_get_time_ms() + delay_ms;
assert(write(wakeup_write_fd, &request, sizeof(request)) == sizeof(request));
struct wakeup_peer *outer_peer = malloc(sizeof(*outer_peer));