Add port flap and port down error tracking with faster ping

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Ian Gulliver
2026-01-31 13:01:07 -08:00
parent d1b4de01e8
commit 5a7596b456
5 changed files with 88 additions and 15 deletions

View File

@@ -12,6 +12,8 @@ const (
ErrorTypeNew = "new"
ErrorTypeUnreachable = "unreachable"
ErrorTypeHighUtilization = "high_utilization"
ErrorTypePortFlap = "port_flap"
ErrorTypePortDown = "port_down"
)
type Error struct {
@@ -139,6 +141,58 @@ func (e *ErrorTracker) AddUtilizationError(node *Node, portName string, utilizat
e.t.NotifyUpdate()
}
func (e *ErrorTracker) AddPortFlap(node *Node, portName string) {
e.mu.Lock()
defer e.mu.Unlock()
key := "flap:" + node.ID + ":" + portName
now := time.Now()
if existing, ok := e.errors[key]; ok {
existing.LastUpdated = now
e.t.NotifyUpdate()
return
}
e.nextID++
e.errors[key] = &Error{
ID: fmt.Sprintf("err-%d", e.nextID),
NodeID: node.ID,
NodeName: node.DisplayName(),
Port: portName,
Type: ErrorTypePortFlap,
FirstSeen: now,
LastUpdated: now,
}
e.t.NotifyUpdate()
}
func (e *ErrorTracker) AddPortDown(node *Node, portName string) {
e.mu.Lock()
defer e.mu.Unlock()
key := "down:" + node.ID + ":" + portName
now := time.Now()
if existing, ok := e.errors[key]; ok {
existing.LastUpdated = now
e.t.NotifyUpdate()
return
}
e.nextID++
e.errors[key] = &Error{
ID: fmt.Sprintf("err-%d", e.nextID),
NodeID: node.ID,
NodeName: node.DisplayName(),
Port: portName,
Type: ErrorTypePortDown,
FirstSeen: now,
LastUpdated: now,
}
e.t.NotifyUpdate()
}
func (e *ErrorTracker) ClearError(errorID string) {
e.mu.Lock()
defer e.mu.Unlock()

View File

@@ -246,7 +246,7 @@ func (n *Nodes) startNodePoller(node *Node) {
go func() {
pollTicker := time.NewTicker(10 * time.Second)
pingTicker := time.NewTicker(5 * time.Second)
pingTicker := time.NewTicker(3 * time.Second)
defer pollTicker.Stop()
defer pingTicker.Stop()

View File

@@ -172,7 +172,7 @@ func (t *Tendrils) pingNode(node *Node) {
anyReachable := false
for _, ipStr := range ips {
if t.ping.Ping(ipStr, 2*time.Second) {
if t.ping.Ping(ipStr, 1*time.Second) {
anyReachable = true
break
}

20
snmp.go
View File

@@ -17,6 +17,7 @@ type ifaceCounters struct {
outPkts uint64
inBytes uint64
outBytes uint64
uptime uint64
timestamp time.Time
}
@@ -252,9 +253,15 @@ func (t *Tendrils) queryInterfaceStats(snmp *gosnmp.GoSNMP, node *Node, ifNames
status, hasStatus := ifOperStatus[ifIndex]
isUp := hasStatus && status == 1
if !isUp {
if iface.Up {
log.Printf("[ERROR] port down on %s %s", node.DisplayName(), name)
t.errors.AddPortDown(node, name)
}
iface.Up = false
iface.Stats = nil
continue
}
iface.Up = true
stats := &InterfaceStats{}
@@ -282,11 +289,15 @@ func (t *Tendrils) queryInterfaceStats(snmp *gosnmp.GoSNMP, node *Node, ifNames
inPkts := ifHCInUcastPkts[ifIndex] + ifHCInMcastPkts[ifIndex] + ifHCInBcastPkts[ifIndex]
outPkts := ifHCOutUcastPkts[ifIndex] + ifHCOutMcastPkts[ifIndex] + ifHCOutBcastPkts[ifIndex]
if hasInBytes && hasOutBytes {
key := node.ID + ":" + name
ifaceTracker.mu.Lock()
prev, hasPrev := ifaceTracker.counters[key]
if hasPrev {
if prev.uptime > 0 && stats.Uptime > 0 && stats.Uptime < prev.uptime {
log.Printf("[ERROR] port flap on %s %s: uptime dropped from %d to %d seconds", node.DisplayName(), name, prev.uptime, stats.Uptime)
t.errors.AddPortFlap(node, name)
}
if hasInBytes && hasOutBytes {
elapsed := now.Sub(prev.timestamp).Seconds()
if elapsed > 0 {
stats.InPktsRate = float64(inPkts-prev.inPkts) / elapsed
@@ -307,15 +318,20 @@ func (t *Tendrils) queryInterfaceStats(snmp *gosnmp.GoSNMP, node *Node, ifNames
}
}
}
}
storedUptime := stats.Uptime
if storedUptime == 0 && hasPrev {
storedUptime = prev.uptime
}
ifaceTracker.counters[key] = &ifaceCounters{
inPkts: inPkts,
outPkts: outPkts,
inBytes: inBytes,
outBytes: outBytes,
uptime: storedUptime,
timestamp: now,
}
ifaceTracker.mu.Unlock()
}
if poe, ok := poeStats[name]; ok {
stats.PoE = poe

View File

@@ -360,6 +360,7 @@ type Interface struct {
Name string `json:"name,omitempty"`
MAC MAC `json:"mac"`
IPs IPSet `json:"ips,omitempty"`
Up bool `json:"up,omitempty"`
Stats *InterfaceStats `json:"stats,omitempty"`
}
@@ -368,6 +369,7 @@ func (i *Interface) MarshalJSON() ([]byte, error) {
Name string `json:"name,omitempty"`
MAC MAC `json:"mac"`
IPs []string `json:"ips,omitempty"`
Up bool `json:"up,omitempty"`
Stats *InterfaceStats `json:"stats,omitempty"`
}
var ips []string
@@ -378,6 +380,7 @@ func (i *Interface) MarshalJSON() ([]byte, error) {
Name: i.Name,
MAC: i.MAC,
IPs: ips,
Up: i.Up,
Stats: i.Stats,
})
}