Require 3 consecutive ping failures before marking unreachable

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Ian Gulliver
2026-01-25 20:05:29 -08:00
parent 0b65998d07
commit c701d26f0e

33
ping.go
View File

@@ -16,18 +16,22 @@ type pendingPing struct {
} }
type PingManager struct { type PingManager struct {
mu sync.Mutex mu sync.Mutex
conn *icmp.PacketConn conn *icmp.PacketConn
pending map[uint16]*pendingPing pending map[uint16]*pendingPing
nextID uint16 nextID uint16
minID uint16 minID uint16
failures map[string]int
} }
const pingFailureThreshold = 3
func NewPingManager() *PingManager { func NewPingManager() *PingManager {
pm := &PingManager{ pm := &PingManager{
pending: map[uint16]*pendingPing{}, pending: map[uint16]*pendingPing{},
nextID: 1000, nextID: 1000,
minID: 1000, minID: 1000,
failures: map[string]int{},
} }
conn, err := icmp.ListenPacket("ip4:icmp", "0.0.0.0") conn, err := icmp.ListenPacket("ip4:icmp", "0.0.0.0")
@@ -154,13 +158,22 @@ func (t *Tendrils) pingNode(node *Node) {
for _, ipStr := range ips { for _, ipStr := range ips {
reachable := t.ping.Ping(ipStr, 2*time.Second) reachable := t.ping.Ping(ipStr, 2*time.Second)
t.ping.mu.Lock()
if reachable { if reachable {
t.ping.failures[ipStr] = 0
t.ping.mu.Unlock()
if t.errors.ClearUnreachable(node, ipStr) { if t.errors.ClearUnreachable(node, ipStr) {
log.Printf("[ping] %s (%s) is now reachable", nodeName, ipStr) log.Printf("[ping] %s (%s) is now reachable", nodeName, ipStr)
} }
} else { } else {
if t.errors.SetUnreachable(node, ipStr) { t.ping.failures[ipStr]++
log.Printf("[ping] %s (%s) is now unreachable", nodeName, ipStr) failures := t.ping.failures[ipStr]
t.ping.mu.Unlock()
if failures >= pingFailureThreshold {
if t.errors.SetUnreachable(node, ipStr) {
log.Printf("[ping] %s (%s) is now unreachable", nodeName, ipStr)
}
} }
} }
} }